xref: /dflybsd-src/contrib/zstd/lib/dictBuilder/divsufsort.c (revision a28cd43d19e8b720a6c852a4bbc5ae147a26165a)
1*a28cd43dSSascha Wildner /*
2*a28cd43dSSascha Wildner  * divsufsort.c for libdivsufsort-lite
3*a28cd43dSSascha Wildner  * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
4*a28cd43dSSascha Wildner  *
5*a28cd43dSSascha Wildner  * Permission is hereby granted, free of charge, to any person
6*a28cd43dSSascha Wildner  * obtaining a copy of this software and associated documentation
7*a28cd43dSSascha Wildner  * files (the "Software"), to deal in the Software without
8*a28cd43dSSascha Wildner  * restriction, including without limitation the rights to use,
9*a28cd43dSSascha Wildner  * copy, modify, merge, publish, distribute, sublicense, and/or sell
10*a28cd43dSSascha Wildner  * copies of the Software, and to permit persons to whom the
11*a28cd43dSSascha Wildner  * Software is furnished to do so, subject to the following
12*a28cd43dSSascha Wildner  * conditions:
13*a28cd43dSSascha Wildner  *
14*a28cd43dSSascha Wildner  * The above copyright notice and this permission notice shall be
15*a28cd43dSSascha Wildner  * included in all copies or substantial portions of the Software.
16*a28cd43dSSascha Wildner  *
17*a28cd43dSSascha Wildner  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18*a28cd43dSSascha Wildner  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
19*a28cd43dSSascha Wildner  * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20*a28cd43dSSascha Wildner  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
21*a28cd43dSSascha Wildner  * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
22*a28cd43dSSascha Wildner  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23*a28cd43dSSascha Wildner  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
24*a28cd43dSSascha Wildner  * OTHER DEALINGS IN THE SOFTWARE.
25*a28cd43dSSascha Wildner  */
26*a28cd43dSSascha Wildner 
27*a28cd43dSSascha Wildner /*- Compiler specifics -*/
28*a28cd43dSSascha Wildner #ifdef __clang__
29*a28cd43dSSascha Wildner #pragma clang diagnostic ignored "-Wshorten-64-to-32"
30*a28cd43dSSascha Wildner #endif
31*a28cd43dSSascha Wildner 
32*a28cd43dSSascha Wildner #if defined(_MSC_VER)
33*a28cd43dSSascha Wildner #  pragma warning(disable : 4244)
34*a28cd43dSSascha Wildner #  pragma warning(disable : 4127)    /* C4127 : Condition expression is constant */
35*a28cd43dSSascha Wildner #endif
36*a28cd43dSSascha Wildner 
37*a28cd43dSSascha Wildner 
38*a28cd43dSSascha Wildner /*- Dependencies -*/
39*a28cd43dSSascha Wildner #include <assert.h>
40*a28cd43dSSascha Wildner #include <stdio.h>
41*a28cd43dSSascha Wildner #include <stdlib.h>
42*a28cd43dSSascha Wildner 
43*a28cd43dSSascha Wildner #include "divsufsort.h"
44*a28cd43dSSascha Wildner 
45*a28cd43dSSascha Wildner /*- Constants -*/
46*a28cd43dSSascha Wildner #if defined(INLINE)
47*a28cd43dSSascha Wildner # undef INLINE
48*a28cd43dSSascha Wildner #endif
49*a28cd43dSSascha Wildner #if !defined(INLINE)
50*a28cd43dSSascha Wildner # define INLINE __inline
51*a28cd43dSSascha Wildner #endif
52*a28cd43dSSascha Wildner #if defined(ALPHABET_SIZE) && (ALPHABET_SIZE < 1)
53*a28cd43dSSascha Wildner # undef ALPHABET_SIZE
54*a28cd43dSSascha Wildner #endif
55*a28cd43dSSascha Wildner #if !defined(ALPHABET_SIZE)
56*a28cd43dSSascha Wildner # define ALPHABET_SIZE (256)
57*a28cd43dSSascha Wildner #endif
58*a28cd43dSSascha Wildner #define BUCKET_A_SIZE (ALPHABET_SIZE)
59*a28cd43dSSascha Wildner #define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE)
60*a28cd43dSSascha Wildner #if defined(SS_INSERTIONSORT_THRESHOLD)
61*a28cd43dSSascha Wildner # if SS_INSERTIONSORT_THRESHOLD < 1
62*a28cd43dSSascha Wildner #  undef SS_INSERTIONSORT_THRESHOLD
63*a28cd43dSSascha Wildner #  define SS_INSERTIONSORT_THRESHOLD (1)
64*a28cd43dSSascha Wildner # endif
65*a28cd43dSSascha Wildner #else
66*a28cd43dSSascha Wildner # define SS_INSERTIONSORT_THRESHOLD (8)
67*a28cd43dSSascha Wildner #endif
68*a28cd43dSSascha Wildner #if defined(SS_BLOCKSIZE)
69*a28cd43dSSascha Wildner # if SS_BLOCKSIZE < 0
70*a28cd43dSSascha Wildner #  undef SS_BLOCKSIZE
71*a28cd43dSSascha Wildner #  define SS_BLOCKSIZE (0)
72*a28cd43dSSascha Wildner # elif 32768 <= SS_BLOCKSIZE
73*a28cd43dSSascha Wildner #  undef SS_BLOCKSIZE
74*a28cd43dSSascha Wildner #  define SS_BLOCKSIZE (32767)
75*a28cd43dSSascha Wildner # endif
76*a28cd43dSSascha Wildner #else
77*a28cd43dSSascha Wildner # define SS_BLOCKSIZE (1024)
78*a28cd43dSSascha Wildner #endif
79*a28cd43dSSascha Wildner /* minstacksize = log(SS_BLOCKSIZE) / log(3) * 2 */
80*a28cd43dSSascha Wildner #if SS_BLOCKSIZE == 0
81*a28cd43dSSascha Wildner # define SS_MISORT_STACKSIZE (96)
82*a28cd43dSSascha Wildner #elif SS_BLOCKSIZE <= 4096
83*a28cd43dSSascha Wildner # define SS_MISORT_STACKSIZE (16)
84*a28cd43dSSascha Wildner #else
85*a28cd43dSSascha Wildner # define SS_MISORT_STACKSIZE (24)
86*a28cd43dSSascha Wildner #endif
87*a28cd43dSSascha Wildner #define SS_SMERGE_STACKSIZE (32)
88*a28cd43dSSascha Wildner #define TR_INSERTIONSORT_THRESHOLD (8)
89*a28cd43dSSascha Wildner #define TR_STACKSIZE (64)
90*a28cd43dSSascha Wildner 
91*a28cd43dSSascha Wildner 
92*a28cd43dSSascha Wildner /*- Macros -*/
93*a28cd43dSSascha Wildner #ifndef SWAP
94*a28cd43dSSascha Wildner # define SWAP(_a, _b) do { t = (_a); (_a) = (_b); (_b) = t; } while(0)
95*a28cd43dSSascha Wildner #endif /* SWAP */
96*a28cd43dSSascha Wildner #ifndef MIN
97*a28cd43dSSascha Wildner # define MIN(_a, _b) (((_a) < (_b)) ? (_a) : (_b))
98*a28cd43dSSascha Wildner #endif /* MIN */
99*a28cd43dSSascha Wildner #ifndef MAX
100*a28cd43dSSascha Wildner # define MAX(_a, _b) (((_a) > (_b)) ? (_a) : (_b))
101*a28cd43dSSascha Wildner #endif /* MAX */
102*a28cd43dSSascha Wildner #define STACK_PUSH(_a, _b, _c, _d)\
103*a28cd43dSSascha Wildner   do {\
104*a28cd43dSSascha Wildner     assert(ssize < STACK_SIZE);\
105*a28cd43dSSascha Wildner     stack[ssize].a = (_a), stack[ssize].b = (_b),\
106*a28cd43dSSascha Wildner     stack[ssize].c = (_c), stack[ssize++].d = (_d);\
107*a28cd43dSSascha Wildner   } while(0)
108*a28cd43dSSascha Wildner #define STACK_PUSH5(_a, _b, _c, _d, _e)\
109*a28cd43dSSascha Wildner   do {\
110*a28cd43dSSascha Wildner     assert(ssize < STACK_SIZE);\
111*a28cd43dSSascha Wildner     stack[ssize].a = (_a), stack[ssize].b = (_b),\
112*a28cd43dSSascha Wildner     stack[ssize].c = (_c), stack[ssize].d = (_d), stack[ssize++].e = (_e);\
113*a28cd43dSSascha Wildner   } while(0)
114*a28cd43dSSascha Wildner #define STACK_POP(_a, _b, _c, _d)\
115*a28cd43dSSascha Wildner   do {\
116*a28cd43dSSascha Wildner     assert(0 <= ssize);\
117*a28cd43dSSascha Wildner     if(ssize == 0) { return; }\
118*a28cd43dSSascha Wildner     (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
119*a28cd43dSSascha Wildner     (_c) = stack[ssize].c, (_d) = stack[ssize].d;\
120*a28cd43dSSascha Wildner   } while(0)
121*a28cd43dSSascha Wildner #define STACK_POP5(_a, _b, _c, _d, _e)\
122*a28cd43dSSascha Wildner   do {\
123*a28cd43dSSascha Wildner     assert(0 <= ssize);\
124*a28cd43dSSascha Wildner     if(ssize == 0) { return; }\
125*a28cd43dSSascha Wildner     (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
126*a28cd43dSSascha Wildner     (_c) = stack[ssize].c, (_d) = stack[ssize].d, (_e) = stack[ssize].e;\
127*a28cd43dSSascha Wildner   } while(0)
128*a28cd43dSSascha Wildner #define BUCKET_A(_c0) bucket_A[(_c0)]
129*a28cd43dSSascha Wildner #if ALPHABET_SIZE == 256
130*a28cd43dSSascha Wildner #define BUCKET_B(_c0, _c1) (bucket_B[((_c1) << 8) | (_c0)])
131*a28cd43dSSascha Wildner #define BUCKET_BSTAR(_c0, _c1) (bucket_B[((_c0) << 8) | (_c1)])
132*a28cd43dSSascha Wildner #else
133*a28cd43dSSascha Wildner #define BUCKET_B(_c0, _c1) (bucket_B[(_c1) * ALPHABET_SIZE + (_c0)])
134*a28cd43dSSascha Wildner #define BUCKET_BSTAR(_c0, _c1) (bucket_B[(_c0) * ALPHABET_SIZE + (_c1)])
135*a28cd43dSSascha Wildner #endif
136*a28cd43dSSascha Wildner 
137*a28cd43dSSascha Wildner 
138*a28cd43dSSascha Wildner /*- Private Functions -*/
139*a28cd43dSSascha Wildner 
140*a28cd43dSSascha Wildner static const int lg_table[256]= {
141*a28cd43dSSascha Wildner  -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
142*a28cd43dSSascha Wildner   5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
143*a28cd43dSSascha Wildner   6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
144*a28cd43dSSascha Wildner   6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
145*a28cd43dSSascha Wildner   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
146*a28cd43dSSascha Wildner   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
147*a28cd43dSSascha Wildner   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
148*a28cd43dSSascha Wildner   7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
149*a28cd43dSSascha Wildner };
150*a28cd43dSSascha Wildner 
151*a28cd43dSSascha Wildner #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
152*a28cd43dSSascha Wildner 
153*a28cd43dSSascha Wildner static INLINE
154*a28cd43dSSascha Wildner int
ss_ilg(int n)155*a28cd43dSSascha Wildner ss_ilg(int n) {
156*a28cd43dSSascha Wildner #if SS_BLOCKSIZE == 0
157*a28cd43dSSascha Wildner   return (n & 0xffff0000) ?
158*a28cd43dSSascha Wildner           ((n & 0xff000000) ?
159*a28cd43dSSascha Wildner             24 + lg_table[(n >> 24) & 0xff] :
160*a28cd43dSSascha Wildner             16 + lg_table[(n >> 16) & 0xff]) :
161*a28cd43dSSascha Wildner           ((n & 0x0000ff00) ?
162*a28cd43dSSascha Wildner              8 + lg_table[(n >>  8) & 0xff] :
163*a28cd43dSSascha Wildner              0 + lg_table[(n >>  0) & 0xff]);
164*a28cd43dSSascha Wildner #elif SS_BLOCKSIZE < 256
165*a28cd43dSSascha Wildner   return lg_table[n];
166*a28cd43dSSascha Wildner #else
167*a28cd43dSSascha Wildner   return (n & 0xff00) ?
168*a28cd43dSSascha Wildner           8 + lg_table[(n >> 8) & 0xff] :
169*a28cd43dSSascha Wildner           0 + lg_table[(n >> 0) & 0xff];
170*a28cd43dSSascha Wildner #endif
171*a28cd43dSSascha Wildner }
172*a28cd43dSSascha Wildner 
173*a28cd43dSSascha Wildner #endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
174*a28cd43dSSascha Wildner 
175*a28cd43dSSascha Wildner #if SS_BLOCKSIZE != 0
176*a28cd43dSSascha Wildner 
177*a28cd43dSSascha Wildner static const int sqq_table[256] = {
178*a28cd43dSSascha Wildner   0,  16,  22,  27,  32,  35,  39,  42,  45,  48,  50,  53,  55,  57,  59,  61,
179*a28cd43dSSascha Wildner  64,  65,  67,  69,  71,  73,  75,  76,  78,  80,  81,  83,  84,  86,  87,  89,
180*a28cd43dSSascha Wildner  90,  91,  93,  94,  96,  97,  98,  99, 101, 102, 103, 104, 106, 107, 108, 109,
181*a28cd43dSSascha Wildner 110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126,
182*a28cd43dSSascha Wildner 128, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
183*a28cd43dSSascha Wildner 143, 144, 144, 145, 146, 147, 148, 149, 150, 150, 151, 152, 153, 154, 155, 155,
184*a28cd43dSSascha Wildner 156, 157, 158, 159, 160, 160, 161, 162, 163, 163, 164, 165, 166, 167, 167, 168,
185*a28cd43dSSascha Wildner 169, 170, 170, 171, 172, 173, 173, 174, 175, 176, 176, 177, 178, 178, 179, 180,
186*a28cd43dSSascha Wildner 181, 181, 182, 183, 183, 184, 185, 185, 186, 187, 187, 188, 189, 189, 190, 191,
187*a28cd43dSSascha Wildner 192, 192, 193, 193, 194, 195, 195, 196, 197, 197, 198, 199, 199, 200, 201, 201,
188*a28cd43dSSascha Wildner 202, 203, 203, 204, 204, 205, 206, 206, 207, 208, 208, 209, 209, 210, 211, 211,
189*a28cd43dSSascha Wildner 212, 212, 213, 214, 214, 215, 215, 216, 217, 217, 218, 218, 219, 219, 220, 221,
190*a28cd43dSSascha Wildner 221, 222, 222, 223, 224, 224, 225, 225, 226, 226, 227, 227, 228, 229, 229, 230,
191*a28cd43dSSascha Wildner 230, 231, 231, 232, 232, 233, 234, 234, 235, 235, 236, 236, 237, 237, 238, 238,
192*a28cd43dSSascha Wildner 239, 240, 240, 241, 241, 242, 242, 243, 243, 244, 244, 245, 245, 246, 246, 247,
193*a28cd43dSSascha Wildner 247, 248, 248, 249, 249, 250, 250, 251, 251, 252, 252, 253, 253, 254, 254, 255
194*a28cd43dSSascha Wildner };
195*a28cd43dSSascha Wildner 
196*a28cd43dSSascha Wildner static INLINE
197*a28cd43dSSascha Wildner int
ss_isqrt(int x)198*a28cd43dSSascha Wildner ss_isqrt(int x) {
199*a28cd43dSSascha Wildner   int y, e;
200*a28cd43dSSascha Wildner 
201*a28cd43dSSascha Wildner   if(x >= (SS_BLOCKSIZE * SS_BLOCKSIZE)) { return SS_BLOCKSIZE; }
202*a28cd43dSSascha Wildner   e = (x & 0xffff0000) ?
203*a28cd43dSSascha Wildner         ((x & 0xff000000) ?
204*a28cd43dSSascha Wildner           24 + lg_table[(x >> 24) & 0xff] :
205*a28cd43dSSascha Wildner           16 + lg_table[(x >> 16) & 0xff]) :
206*a28cd43dSSascha Wildner         ((x & 0x0000ff00) ?
207*a28cd43dSSascha Wildner            8 + lg_table[(x >>  8) & 0xff] :
208*a28cd43dSSascha Wildner            0 + lg_table[(x >>  0) & 0xff]);
209*a28cd43dSSascha Wildner 
210*a28cd43dSSascha Wildner   if(e >= 16) {
211*a28cd43dSSascha Wildner     y = sqq_table[x >> ((e - 6) - (e & 1))] << ((e >> 1) - 7);
212*a28cd43dSSascha Wildner     if(e >= 24) { y = (y + 1 + x / y) >> 1; }
213*a28cd43dSSascha Wildner     y = (y + 1 + x / y) >> 1;
214*a28cd43dSSascha Wildner   } else if(e >= 8) {
215*a28cd43dSSascha Wildner     y = (sqq_table[x >> ((e - 6) - (e & 1))] >> (7 - (e >> 1))) + 1;
216*a28cd43dSSascha Wildner   } else {
217*a28cd43dSSascha Wildner     return sqq_table[x] >> 4;
218*a28cd43dSSascha Wildner   }
219*a28cd43dSSascha Wildner 
220*a28cd43dSSascha Wildner   return (x < (y * y)) ? y - 1 : y;
221*a28cd43dSSascha Wildner }
222*a28cd43dSSascha Wildner 
223*a28cd43dSSascha Wildner #endif /* SS_BLOCKSIZE != 0 */
224*a28cd43dSSascha Wildner 
225*a28cd43dSSascha Wildner 
226*a28cd43dSSascha Wildner /*---------------------------------------------------------------------------*/
227*a28cd43dSSascha Wildner 
228*a28cd43dSSascha Wildner /* Compares two suffixes. */
229*a28cd43dSSascha Wildner static INLINE
230*a28cd43dSSascha Wildner int
ss_compare(const unsigned char * T,const int * p1,const int * p2,int depth)231*a28cd43dSSascha Wildner ss_compare(const unsigned char *T,
232*a28cd43dSSascha Wildner            const int *p1, const int *p2,
233*a28cd43dSSascha Wildner            int depth) {
234*a28cd43dSSascha Wildner   const unsigned char *U1, *U2, *U1n, *U2n;
235*a28cd43dSSascha Wildner 
236*a28cd43dSSascha Wildner   for(U1 = T + depth + *p1,
237*a28cd43dSSascha Wildner       U2 = T + depth + *p2,
238*a28cd43dSSascha Wildner       U1n = T + *(p1 + 1) + 2,
239*a28cd43dSSascha Wildner       U2n = T + *(p2 + 1) + 2;
240*a28cd43dSSascha Wildner       (U1 < U1n) && (U2 < U2n) && (*U1 == *U2);
241*a28cd43dSSascha Wildner       ++U1, ++U2) {
242*a28cd43dSSascha Wildner   }
243*a28cd43dSSascha Wildner 
244*a28cd43dSSascha Wildner   return U1 < U1n ?
245*a28cd43dSSascha Wildner         (U2 < U2n ? *U1 - *U2 : 1) :
246*a28cd43dSSascha Wildner         (U2 < U2n ? -1 : 0);
247*a28cd43dSSascha Wildner }
248*a28cd43dSSascha Wildner 
249*a28cd43dSSascha Wildner 
250*a28cd43dSSascha Wildner /*---------------------------------------------------------------------------*/
251*a28cd43dSSascha Wildner 
252*a28cd43dSSascha Wildner #if (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1)
253*a28cd43dSSascha Wildner 
254*a28cd43dSSascha Wildner /* Insertionsort for small size groups */
255*a28cd43dSSascha Wildner static
256*a28cd43dSSascha Wildner void
ss_insertionsort(const unsigned char * T,const int * PA,int * first,int * last,int depth)257*a28cd43dSSascha Wildner ss_insertionsort(const unsigned char *T, const int *PA,
258*a28cd43dSSascha Wildner                  int *first, int *last, int depth) {
259*a28cd43dSSascha Wildner   int *i, *j;
260*a28cd43dSSascha Wildner   int t;
261*a28cd43dSSascha Wildner   int r;
262*a28cd43dSSascha Wildner 
263*a28cd43dSSascha Wildner   for(i = last - 2; first <= i; --i) {
264*a28cd43dSSascha Wildner     for(t = *i, j = i + 1; 0 < (r = ss_compare(T, PA + t, PA + *j, depth));) {
265*a28cd43dSSascha Wildner       do { *(j - 1) = *j; } while((++j < last) && (*j < 0));
266*a28cd43dSSascha Wildner       if(last <= j) { break; }
267*a28cd43dSSascha Wildner     }
268*a28cd43dSSascha Wildner     if(r == 0) { *j = ~*j; }
269*a28cd43dSSascha Wildner     *(j - 1) = t;
270*a28cd43dSSascha Wildner   }
271*a28cd43dSSascha Wildner }
272*a28cd43dSSascha Wildner 
273*a28cd43dSSascha Wildner #endif /* (SS_BLOCKSIZE != 1) && (SS_INSERTIONSORT_THRESHOLD != 1) */
274*a28cd43dSSascha Wildner 
275*a28cd43dSSascha Wildner 
276*a28cd43dSSascha Wildner /*---------------------------------------------------------------------------*/
277*a28cd43dSSascha Wildner 
278*a28cd43dSSascha Wildner #if (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE)
279*a28cd43dSSascha Wildner 
280*a28cd43dSSascha Wildner static INLINE
281*a28cd43dSSascha Wildner void
ss_fixdown(const unsigned char * Td,const int * PA,int * SA,int i,int size)282*a28cd43dSSascha Wildner ss_fixdown(const unsigned char *Td, const int *PA,
283*a28cd43dSSascha Wildner            int *SA, int i, int size) {
284*a28cd43dSSascha Wildner   int j, k;
285*a28cd43dSSascha Wildner   int v;
286*a28cd43dSSascha Wildner   int c, d, e;
287*a28cd43dSSascha Wildner 
288*a28cd43dSSascha Wildner   for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
289*a28cd43dSSascha Wildner     d = Td[PA[SA[k = j++]]];
290*a28cd43dSSascha Wildner     if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; }
291*a28cd43dSSascha Wildner     if(d <= c) { break; }
292*a28cd43dSSascha Wildner   }
293*a28cd43dSSascha Wildner   SA[i] = v;
294*a28cd43dSSascha Wildner }
295*a28cd43dSSascha Wildner 
296*a28cd43dSSascha Wildner /* Simple top-down heapsort. */
297*a28cd43dSSascha Wildner static
298*a28cd43dSSascha Wildner void
ss_heapsort(const unsigned char * Td,const int * PA,int * SA,int size)299*a28cd43dSSascha Wildner ss_heapsort(const unsigned char *Td, const int *PA, int *SA, int size) {
300*a28cd43dSSascha Wildner   int i, m;
301*a28cd43dSSascha Wildner   int t;
302*a28cd43dSSascha Wildner 
303*a28cd43dSSascha Wildner   m = size;
304*a28cd43dSSascha Wildner   if((size % 2) == 0) {
305*a28cd43dSSascha Wildner     m--;
306*a28cd43dSSascha Wildner     if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); }
307*a28cd43dSSascha Wildner   }
308*a28cd43dSSascha Wildner 
309*a28cd43dSSascha Wildner   for(i = m / 2 - 1; 0 <= i; --i) { ss_fixdown(Td, PA, SA, i, m); }
310*a28cd43dSSascha Wildner   if((size % 2) == 0) { SWAP(SA[0], SA[m]); ss_fixdown(Td, PA, SA, 0, m); }
311*a28cd43dSSascha Wildner   for(i = m - 1; 0 < i; --i) {
312*a28cd43dSSascha Wildner     t = SA[0], SA[0] = SA[i];
313*a28cd43dSSascha Wildner     ss_fixdown(Td, PA, SA, 0, i);
314*a28cd43dSSascha Wildner     SA[i] = t;
315*a28cd43dSSascha Wildner   }
316*a28cd43dSSascha Wildner }
317*a28cd43dSSascha Wildner 
318*a28cd43dSSascha Wildner 
319*a28cd43dSSascha Wildner /*---------------------------------------------------------------------------*/
320*a28cd43dSSascha Wildner 
321*a28cd43dSSascha Wildner /* Returns the median of three elements. */
322*a28cd43dSSascha Wildner static INLINE
323*a28cd43dSSascha Wildner int *
ss_median3(const unsigned char * Td,const int * PA,int * v1,int * v2,int * v3)324*a28cd43dSSascha Wildner ss_median3(const unsigned char *Td, const int *PA,
325*a28cd43dSSascha Wildner            int *v1, int *v2, int *v3) {
326*a28cd43dSSascha Wildner   int *t;
327*a28cd43dSSascha Wildner   if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); }
328*a28cd43dSSascha Wildner   if(Td[PA[*v2]] > Td[PA[*v3]]) {
329*a28cd43dSSascha Wildner     if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; }
330*a28cd43dSSascha Wildner     else { return v3; }
331*a28cd43dSSascha Wildner   }
332*a28cd43dSSascha Wildner   return v2;
333*a28cd43dSSascha Wildner }
334*a28cd43dSSascha Wildner 
335*a28cd43dSSascha Wildner /* Returns the median of five elements. */
336*a28cd43dSSascha Wildner static INLINE
337*a28cd43dSSascha Wildner int *
ss_median5(const unsigned char * Td,const int * PA,int * v1,int * v2,int * v3,int * v4,int * v5)338*a28cd43dSSascha Wildner ss_median5(const unsigned char *Td, const int *PA,
339*a28cd43dSSascha Wildner            int *v1, int *v2, int *v3, int *v4, int *v5) {
340*a28cd43dSSascha Wildner   int *t;
341*a28cd43dSSascha Wildner   if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); }
342*a28cd43dSSascha Wildner   if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); }
343*a28cd43dSSascha Wildner   if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); }
344*a28cd43dSSascha Wildner   if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); }
345*a28cd43dSSascha Wildner   if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); }
346*a28cd43dSSascha Wildner   if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; }
347*a28cd43dSSascha Wildner   return v3;
348*a28cd43dSSascha Wildner }
349*a28cd43dSSascha Wildner 
350*a28cd43dSSascha Wildner /* Returns the pivot element. */
351*a28cd43dSSascha Wildner static INLINE
352*a28cd43dSSascha Wildner int *
ss_pivot(const unsigned char * Td,const int * PA,int * first,int * last)353*a28cd43dSSascha Wildner ss_pivot(const unsigned char *Td, const int *PA, int *first, int *last) {
354*a28cd43dSSascha Wildner   int *middle;
355*a28cd43dSSascha Wildner   int t;
356*a28cd43dSSascha Wildner 
357*a28cd43dSSascha Wildner   t = last - first;
358*a28cd43dSSascha Wildner   middle = first + t / 2;
359*a28cd43dSSascha Wildner 
360*a28cd43dSSascha Wildner   if(t <= 512) {
361*a28cd43dSSascha Wildner     if(t <= 32) {
362*a28cd43dSSascha Wildner       return ss_median3(Td, PA, first, middle, last - 1);
363*a28cd43dSSascha Wildner     } else {
364*a28cd43dSSascha Wildner       t >>= 2;
365*a28cd43dSSascha Wildner       return ss_median5(Td, PA, first, first + t, middle, last - 1 - t, last - 1);
366*a28cd43dSSascha Wildner     }
367*a28cd43dSSascha Wildner   }
368*a28cd43dSSascha Wildner   t >>= 3;
369*a28cd43dSSascha Wildner   first  = ss_median3(Td, PA, first, first + t, first + (t << 1));
370*a28cd43dSSascha Wildner   middle = ss_median3(Td, PA, middle - t, middle, middle + t);
371*a28cd43dSSascha Wildner   last   = ss_median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1);
372*a28cd43dSSascha Wildner   return ss_median3(Td, PA, first, middle, last);
373*a28cd43dSSascha Wildner }
374*a28cd43dSSascha Wildner 
375*a28cd43dSSascha Wildner 
376*a28cd43dSSascha Wildner /*---------------------------------------------------------------------------*/
377*a28cd43dSSascha Wildner 
378*a28cd43dSSascha Wildner /* Binary partition for substrings. */
379*a28cd43dSSascha Wildner static INLINE
380*a28cd43dSSascha Wildner int *
ss_partition(const int * PA,int * first,int * last,int depth)381*a28cd43dSSascha Wildner ss_partition(const int *PA,
382*a28cd43dSSascha Wildner                     int *first, int *last, int depth) {
383*a28cd43dSSascha Wildner   int *a, *b;
384*a28cd43dSSascha Wildner   int t;
385*a28cd43dSSascha Wildner   for(a = first - 1, b = last;;) {
386*a28cd43dSSascha Wildner     for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; }
387*a28cd43dSSascha Wildner     for(; (a < --b) && ((PA[*b] + depth) <  (PA[*b + 1] + 1));) { }
388*a28cd43dSSascha Wildner     if(b <= a) { break; }
389*a28cd43dSSascha Wildner     t = ~*b;
390*a28cd43dSSascha Wildner     *b = *a;
391*a28cd43dSSascha Wildner     *a = t;
392*a28cd43dSSascha Wildner   }
393*a28cd43dSSascha Wildner   if(first < a) { *first = ~*first; }
394*a28cd43dSSascha Wildner   return a;
395*a28cd43dSSascha Wildner }
396*a28cd43dSSascha Wildner 
397*a28cd43dSSascha Wildner /* Multikey introsort for medium size groups. */
398*a28cd43dSSascha Wildner static
399*a28cd43dSSascha Wildner void
ss_mintrosort(const unsigned char * T,const int * PA,int * first,int * last,int depth)400*a28cd43dSSascha Wildner ss_mintrosort(const unsigned char *T, const int *PA,
401*a28cd43dSSascha Wildner               int *first, int *last,
402*a28cd43dSSascha Wildner               int depth) {
403*a28cd43dSSascha Wildner #define STACK_SIZE SS_MISORT_STACKSIZE
404*a28cd43dSSascha Wildner   struct { int *a, *b, c; int d; } stack[STACK_SIZE];
405*a28cd43dSSascha Wildner   const unsigned char *Td;
406*a28cd43dSSascha Wildner   int *a, *b, *c, *d, *e, *f;
407*a28cd43dSSascha Wildner   int s, t;
408*a28cd43dSSascha Wildner   int ssize;
409*a28cd43dSSascha Wildner   int limit;
410*a28cd43dSSascha Wildner   int v, x = 0;
411*a28cd43dSSascha Wildner 
412*a28cd43dSSascha Wildner   for(ssize = 0, limit = ss_ilg(last - first);;) {
413*a28cd43dSSascha Wildner 
414*a28cd43dSSascha Wildner     if((last - first) <= SS_INSERTIONSORT_THRESHOLD) {
415*a28cd43dSSascha Wildner #if 1 < SS_INSERTIONSORT_THRESHOLD
416*a28cd43dSSascha Wildner       if(1 < (last - first)) { ss_insertionsort(T, PA, first, last, depth); }
417*a28cd43dSSascha Wildner #endif
418*a28cd43dSSascha Wildner       STACK_POP(first, last, depth, limit);
419*a28cd43dSSascha Wildner       continue;
420*a28cd43dSSascha Wildner     }
421*a28cd43dSSascha Wildner 
422*a28cd43dSSascha Wildner     Td = T + depth;
423*a28cd43dSSascha Wildner     if(limit-- == 0) { ss_heapsort(Td, PA, first, last - first); }
424*a28cd43dSSascha Wildner     if(limit < 0) {
425*a28cd43dSSascha Wildner       for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) {
426*a28cd43dSSascha Wildner         if((x = Td[PA[*a]]) != v) {
427*a28cd43dSSascha Wildner           if(1 < (a - first)) { break; }
428*a28cd43dSSascha Wildner           v = x;
429*a28cd43dSSascha Wildner           first = a;
430*a28cd43dSSascha Wildner         }
431*a28cd43dSSascha Wildner       }
432*a28cd43dSSascha Wildner       if(Td[PA[*first] - 1] < v) {
433*a28cd43dSSascha Wildner         first = ss_partition(PA, first, a, depth);
434*a28cd43dSSascha Wildner       }
435*a28cd43dSSascha Wildner       if((a - first) <= (last - a)) {
436*a28cd43dSSascha Wildner         if(1 < (a - first)) {
437*a28cd43dSSascha Wildner           STACK_PUSH(a, last, depth, -1);
438*a28cd43dSSascha Wildner           last = a, depth += 1, limit = ss_ilg(a - first);
439*a28cd43dSSascha Wildner         } else {
440*a28cd43dSSascha Wildner           first = a, limit = -1;
441*a28cd43dSSascha Wildner         }
442*a28cd43dSSascha Wildner       } else {
443*a28cd43dSSascha Wildner         if(1 < (last - a)) {
444*a28cd43dSSascha Wildner           STACK_PUSH(first, a, depth + 1, ss_ilg(a - first));
445*a28cd43dSSascha Wildner           first = a, limit = -1;
446*a28cd43dSSascha Wildner         } else {
447*a28cd43dSSascha Wildner           last = a, depth += 1, limit = ss_ilg(a - first);
448*a28cd43dSSascha Wildner         }
449*a28cd43dSSascha Wildner       }
450*a28cd43dSSascha Wildner       continue;
451*a28cd43dSSascha Wildner     }
452*a28cd43dSSascha Wildner 
453*a28cd43dSSascha Wildner     /* choose pivot */
454*a28cd43dSSascha Wildner     a = ss_pivot(Td, PA, first, last);
455*a28cd43dSSascha Wildner     v = Td[PA[*a]];
456*a28cd43dSSascha Wildner     SWAP(*first, *a);
457*a28cd43dSSascha Wildner 
458*a28cd43dSSascha Wildner     /* partition */
459*a28cd43dSSascha Wildner     for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { }
460*a28cd43dSSascha Wildner     if(((a = b) < last) && (x < v)) {
461*a28cd43dSSascha Wildner       for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) {
462*a28cd43dSSascha Wildner         if(x == v) { SWAP(*b, *a); ++a; }
463*a28cd43dSSascha Wildner       }
464*a28cd43dSSascha Wildner     }
465*a28cd43dSSascha Wildner     for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { }
466*a28cd43dSSascha Wildner     if((b < (d = c)) && (x > v)) {
467*a28cd43dSSascha Wildner       for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
468*a28cd43dSSascha Wildner         if(x == v) { SWAP(*c, *d); --d; }
469*a28cd43dSSascha Wildner       }
470*a28cd43dSSascha Wildner     }
471*a28cd43dSSascha Wildner     for(; b < c;) {
472*a28cd43dSSascha Wildner       SWAP(*b, *c);
473*a28cd43dSSascha Wildner       for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) {
474*a28cd43dSSascha Wildner         if(x == v) { SWAP(*b, *a); ++a; }
475*a28cd43dSSascha Wildner       }
476*a28cd43dSSascha Wildner       for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
477*a28cd43dSSascha Wildner         if(x == v) { SWAP(*c, *d); --d; }
478*a28cd43dSSascha Wildner       }
479*a28cd43dSSascha Wildner     }
480*a28cd43dSSascha Wildner 
481*a28cd43dSSascha Wildner     if(a <= d) {
482*a28cd43dSSascha Wildner       c = b - 1;
483*a28cd43dSSascha Wildner 
484*a28cd43dSSascha Wildner       if((s = a - first) > (t = b - a)) { s = t; }
485*a28cd43dSSascha Wildner       for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
486*a28cd43dSSascha Wildner       if((s = d - c) > (t = last - d - 1)) { s = t; }
487*a28cd43dSSascha Wildner       for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
488*a28cd43dSSascha Wildner 
489*a28cd43dSSascha Wildner       a = first + (b - a), c = last - (d - c);
490*a28cd43dSSascha Wildner       b = (v <= Td[PA[*a] - 1]) ? a : ss_partition(PA, a, c, depth);
491*a28cd43dSSascha Wildner 
492*a28cd43dSSascha Wildner       if((a - first) <= (last - c)) {
493*a28cd43dSSascha Wildner         if((last - c) <= (c - b)) {
494*a28cd43dSSascha Wildner           STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
495*a28cd43dSSascha Wildner           STACK_PUSH(c, last, depth, limit);
496*a28cd43dSSascha Wildner           last = a;
497*a28cd43dSSascha Wildner         } else if((a - first) <= (c - b)) {
498*a28cd43dSSascha Wildner           STACK_PUSH(c, last, depth, limit);
499*a28cd43dSSascha Wildner           STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
500*a28cd43dSSascha Wildner           last = a;
501*a28cd43dSSascha Wildner         } else {
502*a28cd43dSSascha Wildner           STACK_PUSH(c, last, depth, limit);
503*a28cd43dSSascha Wildner           STACK_PUSH(first, a, depth, limit);
504*a28cd43dSSascha Wildner           first = b, last = c, depth += 1, limit = ss_ilg(c - b);
505*a28cd43dSSascha Wildner         }
506*a28cd43dSSascha Wildner       } else {
507*a28cd43dSSascha Wildner         if((a - first) <= (c - b)) {
508*a28cd43dSSascha Wildner           STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
509*a28cd43dSSascha Wildner           STACK_PUSH(first, a, depth, limit);
510*a28cd43dSSascha Wildner           first = c;
511*a28cd43dSSascha Wildner         } else if((last - c) <= (c - b)) {
512*a28cd43dSSascha Wildner           STACK_PUSH(first, a, depth, limit);
513*a28cd43dSSascha Wildner           STACK_PUSH(b, c, depth + 1, ss_ilg(c - b));
514*a28cd43dSSascha Wildner           first = c;
515*a28cd43dSSascha Wildner         } else {
516*a28cd43dSSascha Wildner           STACK_PUSH(first, a, depth, limit);
517*a28cd43dSSascha Wildner           STACK_PUSH(c, last, depth, limit);
518*a28cd43dSSascha Wildner           first = b, last = c, depth += 1, limit = ss_ilg(c - b);
519*a28cd43dSSascha Wildner         }
520*a28cd43dSSascha Wildner       }
521*a28cd43dSSascha Wildner     } else {
522*a28cd43dSSascha Wildner       limit += 1;
523*a28cd43dSSascha Wildner       if(Td[PA[*first] - 1] < v) {
524*a28cd43dSSascha Wildner         first = ss_partition(PA, first, last, depth);
525*a28cd43dSSascha Wildner         limit = ss_ilg(last - first);
526*a28cd43dSSascha Wildner       }
527*a28cd43dSSascha Wildner       depth += 1;
528*a28cd43dSSascha Wildner     }
529*a28cd43dSSascha Wildner   }
530*a28cd43dSSascha Wildner #undef STACK_SIZE
531*a28cd43dSSascha Wildner }
532*a28cd43dSSascha Wildner 
533*a28cd43dSSascha Wildner #endif /* (SS_BLOCKSIZE == 0) || (SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE) */
534*a28cd43dSSascha Wildner 
535*a28cd43dSSascha Wildner 
536*a28cd43dSSascha Wildner /*---------------------------------------------------------------------------*/
537*a28cd43dSSascha Wildner 
538*a28cd43dSSascha Wildner #if SS_BLOCKSIZE != 0
539*a28cd43dSSascha Wildner 
540*a28cd43dSSascha Wildner static INLINE
541*a28cd43dSSascha Wildner void
ss_blockswap(int * a,int * b,int n)542*a28cd43dSSascha Wildner ss_blockswap(int *a, int *b, int n) {
543*a28cd43dSSascha Wildner   int t;
544*a28cd43dSSascha Wildner   for(; 0 < n; --n, ++a, ++b) {
545*a28cd43dSSascha Wildner     t = *a, *a = *b, *b = t;
546*a28cd43dSSascha Wildner   }
547*a28cd43dSSascha Wildner }
548*a28cd43dSSascha Wildner 
549*a28cd43dSSascha Wildner static INLINE
550*a28cd43dSSascha Wildner void
ss_rotate(int * first,int * middle,int * last)551*a28cd43dSSascha Wildner ss_rotate(int *first, int *middle, int *last) {
552*a28cd43dSSascha Wildner   int *a, *b, t;
553*a28cd43dSSascha Wildner   int l, r;
554*a28cd43dSSascha Wildner   l = middle - first, r = last - middle;
555*a28cd43dSSascha Wildner   for(; (0 < l) && (0 < r);) {
556*a28cd43dSSascha Wildner     if(l == r) { ss_blockswap(first, middle, l); break; }
557*a28cd43dSSascha Wildner     if(l < r) {
558*a28cd43dSSascha Wildner       a = last - 1, b = middle - 1;
559*a28cd43dSSascha Wildner       t = *a;
560*a28cd43dSSascha Wildner       do {
561*a28cd43dSSascha Wildner         *a-- = *b, *b-- = *a;
562*a28cd43dSSascha Wildner         if(b < first) {
563*a28cd43dSSascha Wildner           *a = t;
564*a28cd43dSSascha Wildner           last = a;
565*a28cd43dSSascha Wildner           if((r -= l + 1) <= l) { break; }
566*a28cd43dSSascha Wildner           a -= 1, b = middle - 1;
567*a28cd43dSSascha Wildner           t = *a;
568*a28cd43dSSascha Wildner         }
569*a28cd43dSSascha Wildner       } while(1);
570*a28cd43dSSascha Wildner     } else {
571*a28cd43dSSascha Wildner       a = first, b = middle;
572*a28cd43dSSascha Wildner       t = *a;
573*a28cd43dSSascha Wildner       do {
574*a28cd43dSSascha Wildner         *a++ = *b, *b++ = *a;
575*a28cd43dSSascha Wildner         if(last <= b) {
576*a28cd43dSSascha Wildner           *a = t;
577*a28cd43dSSascha Wildner           first = a + 1;
578*a28cd43dSSascha Wildner           if((l -= r + 1) <= r) { break; }
579*a28cd43dSSascha Wildner           a += 1, b = middle;
580*a28cd43dSSascha Wildner           t = *a;
581*a28cd43dSSascha Wildner         }
582*a28cd43dSSascha Wildner       } while(1);
583*a28cd43dSSascha Wildner     }
584*a28cd43dSSascha Wildner   }
585*a28cd43dSSascha Wildner }
586*a28cd43dSSascha Wildner 
587*a28cd43dSSascha Wildner 
588*a28cd43dSSascha Wildner /*---------------------------------------------------------------------------*/
589*a28cd43dSSascha Wildner 
590*a28cd43dSSascha Wildner static
591*a28cd43dSSascha Wildner void
ss_inplacemerge(const unsigned char * T,const int * PA,int * first,int * middle,int * last,int depth)592*a28cd43dSSascha Wildner ss_inplacemerge(const unsigned char *T, const int *PA,
593*a28cd43dSSascha Wildner                 int *first, int *middle, int *last,
594*a28cd43dSSascha Wildner                 int depth) {
595*a28cd43dSSascha Wildner   const int *p;
596*a28cd43dSSascha Wildner   int *a, *b;
597*a28cd43dSSascha Wildner   int len, half;
598*a28cd43dSSascha Wildner   int q, r;
599*a28cd43dSSascha Wildner   int x;
600*a28cd43dSSascha Wildner 
601*a28cd43dSSascha Wildner   for(;;) {
602*a28cd43dSSascha Wildner     if(*(last - 1) < 0) { x = 1; p = PA + ~*(last - 1); }
603*a28cd43dSSascha Wildner     else                { x = 0; p = PA +  *(last - 1); }
604*a28cd43dSSascha Wildner     for(a = first, len = middle - first, half = len >> 1, r = -1;
605*a28cd43dSSascha Wildner         0 < len;
606*a28cd43dSSascha Wildner         len = half, half >>= 1) {
607*a28cd43dSSascha Wildner       b = a + half;
608*a28cd43dSSascha Wildner       q = ss_compare(T, PA + ((0 <= *b) ? *b : ~*b), p, depth);
609*a28cd43dSSascha Wildner       if(q < 0) {
610*a28cd43dSSascha Wildner         a = b + 1;
611*a28cd43dSSascha Wildner         half -= (len & 1) ^ 1;
612*a28cd43dSSascha Wildner       } else {
613*a28cd43dSSascha Wildner         r = q;
614*a28cd43dSSascha Wildner       }
615*a28cd43dSSascha Wildner     }
616*a28cd43dSSascha Wildner     if(a < middle) {
617*a28cd43dSSascha Wildner       if(r == 0) { *a = ~*a; }
618*a28cd43dSSascha Wildner       ss_rotate(a, middle, last);
619*a28cd43dSSascha Wildner       last -= middle - a;
620*a28cd43dSSascha Wildner       middle = a;
621*a28cd43dSSascha Wildner       if(first == middle) { break; }
622*a28cd43dSSascha Wildner     }
623*a28cd43dSSascha Wildner     --last;
624*a28cd43dSSascha Wildner     if(x != 0) { while(*--last < 0) { } }
625*a28cd43dSSascha Wildner     if(middle == last) { break; }
626*a28cd43dSSascha Wildner   }
627*a28cd43dSSascha Wildner }
628*a28cd43dSSascha Wildner 
629*a28cd43dSSascha Wildner 
630*a28cd43dSSascha Wildner /*---------------------------------------------------------------------------*/
631*a28cd43dSSascha Wildner 
632*a28cd43dSSascha Wildner /* Merge-forward with internal buffer. */
633*a28cd43dSSascha Wildner static
634*a28cd43dSSascha Wildner void
ss_mergeforward(const unsigned char * T,const int * PA,int * first,int * middle,int * last,int * buf,int depth)635*a28cd43dSSascha Wildner ss_mergeforward(const unsigned char *T, const int *PA,
636*a28cd43dSSascha Wildner                 int *first, int *middle, int *last,
637*a28cd43dSSascha Wildner                 int *buf, int depth) {
638*a28cd43dSSascha Wildner   int *a, *b, *c, *bufend;
639*a28cd43dSSascha Wildner   int t;
640*a28cd43dSSascha Wildner   int r;
641*a28cd43dSSascha Wildner 
642*a28cd43dSSascha Wildner   bufend = buf + (middle - first) - 1;
643*a28cd43dSSascha Wildner   ss_blockswap(buf, first, middle - first);
644*a28cd43dSSascha Wildner 
645*a28cd43dSSascha Wildner   for(t = *(a = first), b = buf, c = middle;;) {
646*a28cd43dSSascha Wildner     r = ss_compare(T, PA + *b, PA + *c, depth);
647*a28cd43dSSascha Wildner     if(r < 0) {
648*a28cd43dSSascha Wildner       do {
649*a28cd43dSSascha Wildner         *a++ = *b;
650*a28cd43dSSascha Wildner         if(bufend <= b) { *bufend = t; return; }
651*a28cd43dSSascha Wildner         *b++ = *a;
652*a28cd43dSSascha Wildner       } while(*b < 0);
653*a28cd43dSSascha Wildner     } else if(r > 0) {
654*a28cd43dSSascha Wildner       do {
655*a28cd43dSSascha Wildner         *a++ = *c, *c++ = *a;
656*a28cd43dSSascha Wildner         if(last <= c) {
657*a28cd43dSSascha Wildner           while(b < bufend) { *a++ = *b, *b++ = *a; }
658*a28cd43dSSascha Wildner           *a = *b, *b = t;
659*a28cd43dSSascha Wildner           return;
660*a28cd43dSSascha Wildner         }
661*a28cd43dSSascha Wildner       } while(*c < 0);
662*a28cd43dSSascha Wildner     } else {
663*a28cd43dSSascha Wildner       *c = ~*c;
664*a28cd43dSSascha Wildner       do {
665*a28cd43dSSascha Wildner         *a++ = *b;
666*a28cd43dSSascha Wildner         if(bufend <= b) { *bufend = t; return; }
667*a28cd43dSSascha Wildner         *b++ = *a;
668*a28cd43dSSascha Wildner       } while(*b < 0);
669*a28cd43dSSascha Wildner 
670*a28cd43dSSascha Wildner       do {
671*a28cd43dSSascha Wildner         *a++ = *c, *c++ = *a;
672*a28cd43dSSascha Wildner         if(last <= c) {
673*a28cd43dSSascha Wildner           while(b < bufend) { *a++ = *b, *b++ = *a; }
674*a28cd43dSSascha Wildner           *a = *b, *b = t;
675*a28cd43dSSascha Wildner           return;
676*a28cd43dSSascha Wildner         }
677*a28cd43dSSascha Wildner       } while(*c < 0);
678*a28cd43dSSascha Wildner     }
679*a28cd43dSSascha Wildner   }
680*a28cd43dSSascha Wildner }
681*a28cd43dSSascha Wildner 
682*a28cd43dSSascha Wildner /* Merge-backward with internal buffer. */
683*a28cd43dSSascha Wildner static
684*a28cd43dSSascha Wildner void
ss_mergebackward(const unsigned char * T,const int * PA,int * first,int * middle,int * last,int * buf,int depth)685*a28cd43dSSascha Wildner ss_mergebackward(const unsigned char *T, const int *PA,
686*a28cd43dSSascha Wildner                  int *first, int *middle, int *last,
687*a28cd43dSSascha Wildner                  int *buf, int depth) {
688*a28cd43dSSascha Wildner   const int *p1, *p2;
689*a28cd43dSSascha Wildner   int *a, *b, *c, *bufend;
690*a28cd43dSSascha Wildner   int t;
691*a28cd43dSSascha Wildner   int r;
692*a28cd43dSSascha Wildner   int x;
693*a28cd43dSSascha Wildner 
694*a28cd43dSSascha Wildner   bufend = buf + (last - middle) - 1;
695*a28cd43dSSascha Wildner   ss_blockswap(buf, middle, last - middle);
696*a28cd43dSSascha Wildner 
697*a28cd43dSSascha Wildner   x = 0;
698*a28cd43dSSascha Wildner   if(*bufend < 0)       { p1 = PA + ~*bufend; x |= 1; }
699*a28cd43dSSascha Wildner   else                  { p1 = PA +  *bufend; }
700*a28cd43dSSascha Wildner   if(*(middle - 1) < 0) { p2 = PA + ~*(middle - 1); x |= 2; }
701*a28cd43dSSascha Wildner   else                  { p2 = PA +  *(middle - 1); }
702*a28cd43dSSascha Wildner   for(t = *(a = last - 1), b = bufend, c = middle - 1;;) {
703*a28cd43dSSascha Wildner     r = ss_compare(T, p1, p2, depth);
704*a28cd43dSSascha Wildner     if(0 < r) {
705*a28cd43dSSascha Wildner       if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
706*a28cd43dSSascha Wildner       *a-- = *b;
707*a28cd43dSSascha Wildner       if(b <= buf) { *buf = t; break; }
708*a28cd43dSSascha Wildner       *b-- = *a;
709*a28cd43dSSascha Wildner       if(*b < 0) { p1 = PA + ~*b; x |= 1; }
710*a28cd43dSSascha Wildner       else       { p1 = PA +  *b; }
711*a28cd43dSSascha Wildner     } else if(r < 0) {
712*a28cd43dSSascha Wildner       if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
713*a28cd43dSSascha Wildner       *a-- = *c, *c-- = *a;
714*a28cd43dSSascha Wildner       if(c < first) {
715*a28cd43dSSascha Wildner         while(buf < b) { *a-- = *b, *b-- = *a; }
716*a28cd43dSSascha Wildner         *a = *b, *b = t;
717*a28cd43dSSascha Wildner         break;
718*a28cd43dSSascha Wildner       }
719*a28cd43dSSascha Wildner       if(*c < 0) { p2 = PA + ~*c; x |= 2; }
720*a28cd43dSSascha Wildner       else       { p2 = PA +  *c; }
721*a28cd43dSSascha Wildner     } else {
722*a28cd43dSSascha Wildner       if(x & 1) { do { *a-- = *b, *b-- = *a; } while(*b < 0); x ^= 1; }
723*a28cd43dSSascha Wildner       *a-- = ~*b;
724*a28cd43dSSascha Wildner       if(b <= buf) { *buf = t; break; }
725*a28cd43dSSascha Wildner       *b-- = *a;
726*a28cd43dSSascha Wildner       if(x & 2) { do { *a-- = *c, *c-- = *a; } while(*c < 0); x ^= 2; }
727*a28cd43dSSascha Wildner       *a-- = *c, *c-- = *a;
728*a28cd43dSSascha Wildner       if(c < first) {
729*a28cd43dSSascha Wildner         while(buf < b) { *a-- = *b, *b-- = *a; }
730*a28cd43dSSascha Wildner         *a = *b, *b = t;
731*a28cd43dSSascha Wildner         break;
732*a28cd43dSSascha Wildner       }
733*a28cd43dSSascha Wildner       if(*b < 0) { p1 = PA + ~*b; x |= 1; }
734*a28cd43dSSascha Wildner       else       { p1 = PA +  *b; }
735*a28cd43dSSascha Wildner       if(*c < 0) { p2 = PA + ~*c; x |= 2; }
736*a28cd43dSSascha Wildner       else       { p2 = PA +  *c; }
737*a28cd43dSSascha Wildner     }
738*a28cd43dSSascha Wildner   }
739*a28cd43dSSascha Wildner }
740*a28cd43dSSascha Wildner 
741*a28cd43dSSascha Wildner /* D&C based merge. */
742*a28cd43dSSascha Wildner static
743*a28cd43dSSascha Wildner void
ss_swapmerge(const unsigned char * T,const int * PA,int * first,int * middle,int * last,int * buf,int bufsize,int depth)744*a28cd43dSSascha Wildner ss_swapmerge(const unsigned char *T, const int *PA,
745*a28cd43dSSascha Wildner              int *first, int *middle, int *last,
746*a28cd43dSSascha Wildner              int *buf, int bufsize, int depth) {
747*a28cd43dSSascha Wildner #define STACK_SIZE SS_SMERGE_STACKSIZE
748*a28cd43dSSascha Wildner #define GETIDX(a) ((0 <= (a)) ? (a) : (~(a)))
749*a28cd43dSSascha Wildner #define MERGE_CHECK(a, b, c)\
750*a28cd43dSSascha Wildner   do {\
751*a28cd43dSSascha Wildner     if(((c) & 1) ||\
752*a28cd43dSSascha Wildner        (((c) & 2) && (ss_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0))) {\
753*a28cd43dSSascha Wildner       *(a) = ~*(a);\
754*a28cd43dSSascha Wildner     }\
755*a28cd43dSSascha Wildner     if(((c) & 4) && ((ss_compare(T, PA + GETIDX(*((b) - 1)), PA + *(b), depth) == 0))) {\
756*a28cd43dSSascha Wildner       *(b) = ~*(b);\
757*a28cd43dSSascha Wildner     }\
758*a28cd43dSSascha Wildner   } while(0)
759*a28cd43dSSascha Wildner   struct { int *a, *b, *c; int d; } stack[STACK_SIZE];
760*a28cd43dSSascha Wildner   int *l, *r, *lm, *rm;
761*a28cd43dSSascha Wildner   int m, len, half;
762*a28cd43dSSascha Wildner   int ssize;
763*a28cd43dSSascha Wildner   int check, next;
764*a28cd43dSSascha Wildner 
765*a28cd43dSSascha Wildner   for(check = 0, ssize = 0;;) {
766*a28cd43dSSascha Wildner     if((last - middle) <= bufsize) {
767*a28cd43dSSascha Wildner       if((first < middle) && (middle < last)) {
768*a28cd43dSSascha Wildner         ss_mergebackward(T, PA, first, middle, last, buf, depth);
769*a28cd43dSSascha Wildner       }
770*a28cd43dSSascha Wildner       MERGE_CHECK(first, last, check);
771*a28cd43dSSascha Wildner       STACK_POP(first, middle, last, check);
772*a28cd43dSSascha Wildner       continue;
773*a28cd43dSSascha Wildner     }
774*a28cd43dSSascha Wildner 
775*a28cd43dSSascha Wildner     if((middle - first) <= bufsize) {
776*a28cd43dSSascha Wildner       if(first < middle) {
777*a28cd43dSSascha Wildner         ss_mergeforward(T, PA, first, middle, last, buf, depth);
778*a28cd43dSSascha Wildner       }
779*a28cd43dSSascha Wildner       MERGE_CHECK(first, last, check);
780*a28cd43dSSascha Wildner       STACK_POP(first, middle, last, check);
781*a28cd43dSSascha Wildner       continue;
782*a28cd43dSSascha Wildner     }
783*a28cd43dSSascha Wildner 
784*a28cd43dSSascha Wildner     for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1;
785*a28cd43dSSascha Wildner         0 < len;
786*a28cd43dSSascha Wildner         len = half, half >>= 1) {
787*a28cd43dSSascha Wildner       if(ss_compare(T, PA + GETIDX(*(middle + m + half)),
788*a28cd43dSSascha Wildner                        PA + GETIDX(*(middle - m - half - 1)), depth) < 0) {
789*a28cd43dSSascha Wildner         m += half + 1;
790*a28cd43dSSascha Wildner         half -= (len & 1) ^ 1;
791*a28cd43dSSascha Wildner       }
792*a28cd43dSSascha Wildner     }
793*a28cd43dSSascha Wildner 
794*a28cd43dSSascha Wildner     if(0 < m) {
795*a28cd43dSSascha Wildner       lm = middle - m, rm = middle + m;
796*a28cd43dSSascha Wildner       ss_blockswap(lm, middle, m);
797*a28cd43dSSascha Wildner       l = r = middle, next = 0;
798*a28cd43dSSascha Wildner       if(rm < last) {
799*a28cd43dSSascha Wildner         if(*rm < 0) {
800*a28cd43dSSascha Wildner           *rm = ~*rm;
801*a28cd43dSSascha Wildner           if(first < lm) { for(; *--l < 0;) { } next |= 4; }
802*a28cd43dSSascha Wildner           next |= 1;
803*a28cd43dSSascha Wildner         } else if(first < lm) {
804*a28cd43dSSascha Wildner           for(; *r < 0; ++r) { }
805*a28cd43dSSascha Wildner           next |= 2;
806*a28cd43dSSascha Wildner         }
807*a28cd43dSSascha Wildner       }
808*a28cd43dSSascha Wildner 
809*a28cd43dSSascha Wildner       if((l - first) <= (last - r)) {
810*a28cd43dSSascha Wildner         STACK_PUSH(r, rm, last, (next & 3) | (check & 4));
811*a28cd43dSSascha Wildner         middle = lm, last = l, check = (check & 3) | (next & 4);
812*a28cd43dSSascha Wildner       } else {
813*a28cd43dSSascha Wildner         if((next & 2) && (r == middle)) { next ^= 6; }
814*a28cd43dSSascha Wildner         STACK_PUSH(first, lm, l, (check & 3) | (next & 4));
815*a28cd43dSSascha Wildner         first = r, middle = rm, check = (next & 3) | (check & 4);
816*a28cd43dSSascha Wildner       }
817*a28cd43dSSascha Wildner     } else {
818*a28cd43dSSascha Wildner       if(ss_compare(T, PA + GETIDX(*(middle - 1)), PA + *middle, depth) == 0) {
819*a28cd43dSSascha Wildner         *middle = ~*middle;
820*a28cd43dSSascha Wildner       }
821*a28cd43dSSascha Wildner       MERGE_CHECK(first, last, check);
822*a28cd43dSSascha Wildner       STACK_POP(first, middle, last, check);
823*a28cd43dSSascha Wildner     }
824*a28cd43dSSascha Wildner   }
825*a28cd43dSSascha Wildner #undef STACK_SIZE
826*a28cd43dSSascha Wildner }
827*a28cd43dSSascha Wildner 
828*a28cd43dSSascha Wildner #endif /* SS_BLOCKSIZE != 0 */
829*a28cd43dSSascha Wildner 
830*a28cd43dSSascha Wildner 
831*a28cd43dSSascha Wildner /*---------------------------------------------------------------------------*/
832*a28cd43dSSascha Wildner 
833*a28cd43dSSascha Wildner /* Substring sort */
834*a28cd43dSSascha Wildner static
835*a28cd43dSSascha Wildner void
sssort(const unsigned char * T,const int * PA,int * first,int * last,int * buf,int bufsize,int depth,int n,int lastsuffix)836*a28cd43dSSascha Wildner sssort(const unsigned char *T, const int *PA,
837*a28cd43dSSascha Wildner        int *first, int *last,
838*a28cd43dSSascha Wildner        int *buf, int bufsize,
839*a28cd43dSSascha Wildner        int depth, int n, int lastsuffix) {
840*a28cd43dSSascha Wildner   int *a;
841*a28cd43dSSascha Wildner #if SS_BLOCKSIZE != 0
842*a28cd43dSSascha Wildner   int *b, *middle, *curbuf;
843*a28cd43dSSascha Wildner   int j, k, curbufsize, limit;
844*a28cd43dSSascha Wildner #endif
845*a28cd43dSSascha Wildner   int i;
846*a28cd43dSSascha Wildner 
847*a28cd43dSSascha Wildner   if(lastsuffix != 0) { ++first; }
848*a28cd43dSSascha Wildner 
849*a28cd43dSSascha Wildner #if SS_BLOCKSIZE == 0
850*a28cd43dSSascha Wildner   ss_mintrosort(T, PA, first, last, depth);
851*a28cd43dSSascha Wildner #else
852*a28cd43dSSascha Wildner   if((bufsize < SS_BLOCKSIZE) &&
853*a28cd43dSSascha Wildner       (bufsize < (last - first)) &&
854*a28cd43dSSascha Wildner       (bufsize < (limit = ss_isqrt(last - first)))) {
855*a28cd43dSSascha Wildner     if(SS_BLOCKSIZE < limit) { limit = SS_BLOCKSIZE; }
856*a28cd43dSSascha Wildner     buf = middle = last - limit, bufsize = limit;
857*a28cd43dSSascha Wildner   } else {
858*a28cd43dSSascha Wildner     middle = last, limit = 0;
859*a28cd43dSSascha Wildner   }
860*a28cd43dSSascha Wildner   for(a = first, i = 0; SS_BLOCKSIZE < (middle - a); a += SS_BLOCKSIZE, ++i) {
861*a28cd43dSSascha Wildner #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
862*a28cd43dSSascha Wildner     ss_mintrosort(T, PA, a, a + SS_BLOCKSIZE, depth);
863*a28cd43dSSascha Wildner #elif 1 < SS_BLOCKSIZE
864*a28cd43dSSascha Wildner     ss_insertionsort(T, PA, a, a + SS_BLOCKSIZE, depth);
865*a28cd43dSSascha Wildner #endif
866*a28cd43dSSascha Wildner     curbufsize = last - (a + SS_BLOCKSIZE);
867*a28cd43dSSascha Wildner     curbuf = a + SS_BLOCKSIZE;
868*a28cd43dSSascha Wildner     if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; }
869*a28cd43dSSascha Wildner     for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) {
870*a28cd43dSSascha Wildner       ss_swapmerge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth);
871*a28cd43dSSascha Wildner     }
872*a28cd43dSSascha Wildner   }
873*a28cd43dSSascha Wildner #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
874*a28cd43dSSascha Wildner   ss_mintrosort(T, PA, a, middle, depth);
875*a28cd43dSSascha Wildner #elif 1 < SS_BLOCKSIZE
876*a28cd43dSSascha Wildner   ss_insertionsort(T, PA, a, middle, depth);
877*a28cd43dSSascha Wildner #endif
878*a28cd43dSSascha Wildner   for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) {
879*a28cd43dSSascha Wildner     if(i & 1) {
880*a28cd43dSSascha Wildner       ss_swapmerge(T, PA, a - k, a, middle, buf, bufsize, depth);
881*a28cd43dSSascha Wildner       a -= k;
882*a28cd43dSSascha Wildner     }
883*a28cd43dSSascha Wildner   }
884*a28cd43dSSascha Wildner   if(limit != 0) {
885*a28cd43dSSascha Wildner #if SS_INSERTIONSORT_THRESHOLD < SS_BLOCKSIZE
886*a28cd43dSSascha Wildner     ss_mintrosort(T, PA, middle, last, depth);
887*a28cd43dSSascha Wildner #elif 1 < SS_BLOCKSIZE
888*a28cd43dSSascha Wildner     ss_insertionsort(T, PA, middle, last, depth);
889*a28cd43dSSascha Wildner #endif
890*a28cd43dSSascha Wildner     ss_inplacemerge(T, PA, first, middle, last, depth);
891*a28cd43dSSascha Wildner   }
892*a28cd43dSSascha Wildner #endif
893*a28cd43dSSascha Wildner 
894*a28cd43dSSascha Wildner   if(lastsuffix != 0) {
895*a28cd43dSSascha Wildner     /* Insert last type B* suffix. */
896*a28cd43dSSascha Wildner     int PAi[2]; PAi[0] = PA[*(first - 1)], PAi[1] = n - 2;
897*a28cd43dSSascha Wildner     for(a = first, i = *(first - 1);
898*a28cd43dSSascha Wildner         (a < last) && ((*a < 0) || (0 < ss_compare(T, &(PAi[0]), PA + *a, depth)));
899*a28cd43dSSascha Wildner         ++a) {
900*a28cd43dSSascha Wildner       *(a - 1) = *a;
901*a28cd43dSSascha Wildner     }
902*a28cd43dSSascha Wildner     *(a - 1) = i;
903*a28cd43dSSascha Wildner   }
904*a28cd43dSSascha Wildner }
905*a28cd43dSSascha Wildner 
906*a28cd43dSSascha Wildner 
907*a28cd43dSSascha Wildner /*---------------------------------------------------------------------------*/
908*a28cd43dSSascha Wildner 
909*a28cd43dSSascha Wildner static INLINE
910*a28cd43dSSascha Wildner int
tr_ilg(int n)911*a28cd43dSSascha Wildner tr_ilg(int n) {
912*a28cd43dSSascha Wildner   return (n & 0xffff0000) ?
913*a28cd43dSSascha Wildner           ((n & 0xff000000) ?
914*a28cd43dSSascha Wildner             24 + lg_table[(n >> 24) & 0xff] :
915*a28cd43dSSascha Wildner             16 + lg_table[(n >> 16) & 0xff]) :
916*a28cd43dSSascha Wildner           ((n & 0x0000ff00) ?
917*a28cd43dSSascha Wildner              8 + lg_table[(n >>  8) & 0xff] :
918*a28cd43dSSascha Wildner              0 + lg_table[(n >>  0) & 0xff]);
919*a28cd43dSSascha Wildner }
920*a28cd43dSSascha Wildner 
921*a28cd43dSSascha Wildner 
922*a28cd43dSSascha Wildner /*---------------------------------------------------------------------------*/
923*a28cd43dSSascha Wildner 
924*a28cd43dSSascha Wildner /* Simple insertionsort for small size groups. */
925*a28cd43dSSascha Wildner static
926*a28cd43dSSascha Wildner void
tr_insertionsort(const int * ISAd,int * first,int * last)927*a28cd43dSSascha Wildner tr_insertionsort(const int *ISAd, int *first, int *last) {
928*a28cd43dSSascha Wildner   int *a, *b;
929*a28cd43dSSascha Wildner   int t, r;
930*a28cd43dSSascha Wildner 
931*a28cd43dSSascha Wildner   for(a = first + 1; a < last; ++a) {
932*a28cd43dSSascha Wildner     for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) {
933*a28cd43dSSascha Wildner       do { *(b + 1) = *b; } while((first <= --b) && (*b < 0));
934*a28cd43dSSascha Wildner       if(b < first) { break; }
935*a28cd43dSSascha Wildner     }
936*a28cd43dSSascha Wildner     if(r == 0) { *b = ~*b; }
937*a28cd43dSSascha Wildner     *(b + 1) = t;
938*a28cd43dSSascha Wildner   }
939*a28cd43dSSascha Wildner }
940*a28cd43dSSascha Wildner 
941*a28cd43dSSascha Wildner 
942*a28cd43dSSascha Wildner /*---------------------------------------------------------------------------*/
943*a28cd43dSSascha Wildner 
944*a28cd43dSSascha Wildner static INLINE
945*a28cd43dSSascha Wildner void
tr_fixdown(const int * ISAd,int * SA,int i,int size)946*a28cd43dSSascha Wildner tr_fixdown(const int *ISAd, int *SA, int i, int size) {
947*a28cd43dSSascha Wildner   int j, k;
948*a28cd43dSSascha Wildner   int v;
949*a28cd43dSSascha Wildner   int c, d, e;
950*a28cd43dSSascha Wildner 
951*a28cd43dSSascha Wildner   for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
952*a28cd43dSSascha Wildner     d = ISAd[SA[k = j++]];
953*a28cd43dSSascha Wildner     if(d < (e = ISAd[SA[j]])) { k = j; d = e; }
954*a28cd43dSSascha Wildner     if(d <= c) { break; }
955*a28cd43dSSascha Wildner   }
956*a28cd43dSSascha Wildner   SA[i] = v;
957*a28cd43dSSascha Wildner }
958*a28cd43dSSascha Wildner 
959*a28cd43dSSascha Wildner /* Simple top-down heapsort. */
960*a28cd43dSSascha Wildner static
961*a28cd43dSSascha Wildner void
tr_heapsort(const int * ISAd,int * SA,int size)962*a28cd43dSSascha Wildner tr_heapsort(const int *ISAd, int *SA, int size) {
963*a28cd43dSSascha Wildner   int i, m;
964*a28cd43dSSascha Wildner   int t;
965*a28cd43dSSascha Wildner 
966*a28cd43dSSascha Wildner   m = size;
967*a28cd43dSSascha Wildner   if((size % 2) == 0) {
968*a28cd43dSSascha Wildner     m--;
969*a28cd43dSSascha Wildner     if(ISAd[SA[m / 2]] < ISAd[SA[m]]) { SWAP(SA[m], SA[m / 2]); }
970*a28cd43dSSascha Wildner   }
971*a28cd43dSSascha Wildner 
972*a28cd43dSSascha Wildner   for(i = m / 2 - 1; 0 <= i; --i) { tr_fixdown(ISAd, SA, i, m); }
973*a28cd43dSSascha Wildner   if((size % 2) == 0) { SWAP(SA[0], SA[m]); tr_fixdown(ISAd, SA, 0, m); }
974*a28cd43dSSascha Wildner   for(i = m - 1; 0 < i; --i) {
975*a28cd43dSSascha Wildner     t = SA[0], SA[0] = SA[i];
976*a28cd43dSSascha Wildner     tr_fixdown(ISAd, SA, 0, i);
977*a28cd43dSSascha Wildner     SA[i] = t;
978*a28cd43dSSascha Wildner   }
979*a28cd43dSSascha Wildner }
980*a28cd43dSSascha Wildner 
981*a28cd43dSSascha Wildner 
982*a28cd43dSSascha Wildner /*---------------------------------------------------------------------------*/
983*a28cd43dSSascha Wildner 
984*a28cd43dSSascha Wildner /* Returns the median of three elements. */
985*a28cd43dSSascha Wildner static INLINE
986*a28cd43dSSascha Wildner int *
tr_median3(const int * ISAd,int * v1,int * v2,int * v3)987*a28cd43dSSascha Wildner tr_median3(const int *ISAd, int *v1, int *v2, int *v3) {
988*a28cd43dSSascha Wildner   int *t;
989*a28cd43dSSascha Wildner   if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); }
990*a28cd43dSSascha Wildner   if(ISAd[*v2] > ISAd[*v3]) {
991*a28cd43dSSascha Wildner     if(ISAd[*v1] > ISAd[*v3]) { return v1; }
992*a28cd43dSSascha Wildner     else { return v3; }
993*a28cd43dSSascha Wildner   }
994*a28cd43dSSascha Wildner   return v2;
995*a28cd43dSSascha Wildner }
996*a28cd43dSSascha Wildner 
997*a28cd43dSSascha Wildner /* Returns the median of five elements. */
998*a28cd43dSSascha Wildner static INLINE
999*a28cd43dSSascha Wildner int *
tr_median5(const int * ISAd,int * v1,int * v2,int * v3,int * v4,int * v5)1000*a28cd43dSSascha Wildner tr_median5(const int *ISAd,
1001*a28cd43dSSascha Wildner            int *v1, int *v2, int *v3, int *v4, int *v5) {
1002*a28cd43dSSascha Wildner   int *t;
1003*a28cd43dSSascha Wildner   if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); }
1004*a28cd43dSSascha Wildner   if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); }
1005*a28cd43dSSascha Wildner   if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); }
1006*a28cd43dSSascha Wildner   if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); }
1007*a28cd43dSSascha Wildner   if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); }
1008*a28cd43dSSascha Wildner   if(ISAd[*v3] > ISAd[*v4]) { return v4; }
1009*a28cd43dSSascha Wildner   return v3;
1010*a28cd43dSSascha Wildner }
1011*a28cd43dSSascha Wildner 
1012*a28cd43dSSascha Wildner /* Returns the pivot element. */
1013*a28cd43dSSascha Wildner static INLINE
1014*a28cd43dSSascha Wildner int *
tr_pivot(const int * ISAd,int * first,int * last)1015*a28cd43dSSascha Wildner tr_pivot(const int *ISAd, int *first, int *last) {
1016*a28cd43dSSascha Wildner   int *middle;
1017*a28cd43dSSascha Wildner   int t;
1018*a28cd43dSSascha Wildner 
1019*a28cd43dSSascha Wildner   t = last - first;
1020*a28cd43dSSascha Wildner   middle = first + t / 2;
1021*a28cd43dSSascha Wildner 
1022*a28cd43dSSascha Wildner   if(t <= 512) {
1023*a28cd43dSSascha Wildner     if(t <= 32) {
1024*a28cd43dSSascha Wildner       return tr_median3(ISAd, first, middle, last - 1);
1025*a28cd43dSSascha Wildner     } else {
1026*a28cd43dSSascha Wildner       t >>= 2;
1027*a28cd43dSSascha Wildner       return tr_median5(ISAd, first, first + t, middle, last - 1 - t, last - 1);
1028*a28cd43dSSascha Wildner     }
1029*a28cd43dSSascha Wildner   }
1030*a28cd43dSSascha Wildner   t >>= 3;
1031*a28cd43dSSascha Wildner   first  = tr_median3(ISAd, first, first + t, first + (t << 1));
1032*a28cd43dSSascha Wildner   middle = tr_median3(ISAd, middle - t, middle, middle + t);
1033*a28cd43dSSascha Wildner   last   = tr_median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1);
1034*a28cd43dSSascha Wildner   return tr_median3(ISAd, first, middle, last);
1035*a28cd43dSSascha Wildner }
1036*a28cd43dSSascha Wildner 
1037*a28cd43dSSascha Wildner 
1038*a28cd43dSSascha Wildner /*---------------------------------------------------------------------------*/
1039*a28cd43dSSascha Wildner 
1040*a28cd43dSSascha Wildner typedef struct _trbudget_t trbudget_t;
1041*a28cd43dSSascha Wildner struct _trbudget_t {
1042*a28cd43dSSascha Wildner   int chance;
1043*a28cd43dSSascha Wildner   int remain;
1044*a28cd43dSSascha Wildner   int incval;
1045*a28cd43dSSascha Wildner   int count;
1046*a28cd43dSSascha Wildner };
1047*a28cd43dSSascha Wildner 
1048*a28cd43dSSascha Wildner static INLINE
1049*a28cd43dSSascha Wildner void
trbudget_init(trbudget_t * budget,int chance,int incval)1050*a28cd43dSSascha Wildner trbudget_init(trbudget_t *budget, int chance, int incval) {
1051*a28cd43dSSascha Wildner   budget->chance = chance;
1052*a28cd43dSSascha Wildner   budget->remain = budget->incval = incval;
1053*a28cd43dSSascha Wildner }
1054*a28cd43dSSascha Wildner 
1055*a28cd43dSSascha Wildner static INLINE
1056*a28cd43dSSascha Wildner int
trbudget_check(trbudget_t * budget,int size)1057*a28cd43dSSascha Wildner trbudget_check(trbudget_t *budget, int size) {
1058*a28cd43dSSascha Wildner   if(size <= budget->remain) { budget->remain -= size; return 1; }
1059*a28cd43dSSascha Wildner   if(budget->chance == 0) { budget->count += size; return 0; }
1060*a28cd43dSSascha Wildner   budget->remain += budget->incval - size;
1061*a28cd43dSSascha Wildner   budget->chance -= 1;
1062*a28cd43dSSascha Wildner   return 1;
1063*a28cd43dSSascha Wildner }
1064*a28cd43dSSascha Wildner 
1065*a28cd43dSSascha Wildner 
1066*a28cd43dSSascha Wildner /*---------------------------------------------------------------------------*/
1067*a28cd43dSSascha Wildner 
1068*a28cd43dSSascha Wildner static INLINE
1069*a28cd43dSSascha Wildner void
tr_partition(const int * ISAd,int * first,int * middle,int * last,int ** pa,int ** pb,int v)1070*a28cd43dSSascha Wildner tr_partition(const int *ISAd,
1071*a28cd43dSSascha Wildner              int *first, int *middle, int *last,
1072*a28cd43dSSascha Wildner              int **pa, int **pb, int v) {
1073*a28cd43dSSascha Wildner   int *a, *b, *c, *d, *e, *f;
1074*a28cd43dSSascha Wildner   int t, s;
1075*a28cd43dSSascha Wildner   int x = 0;
1076*a28cd43dSSascha Wildner 
1077*a28cd43dSSascha Wildner   for(b = middle - 1; (++b < last) && ((x = ISAd[*b]) == v);) { }
1078*a28cd43dSSascha Wildner   if(((a = b) < last) && (x < v)) {
1079*a28cd43dSSascha Wildner     for(; (++b < last) && ((x = ISAd[*b]) <= v);) {
1080*a28cd43dSSascha Wildner       if(x == v) { SWAP(*b, *a); ++a; }
1081*a28cd43dSSascha Wildner     }
1082*a28cd43dSSascha Wildner   }
1083*a28cd43dSSascha Wildner   for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { }
1084*a28cd43dSSascha Wildner   if((b < (d = c)) && (x > v)) {
1085*a28cd43dSSascha Wildner     for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
1086*a28cd43dSSascha Wildner       if(x == v) { SWAP(*c, *d); --d; }
1087*a28cd43dSSascha Wildner     }
1088*a28cd43dSSascha Wildner   }
1089*a28cd43dSSascha Wildner   for(; b < c;) {
1090*a28cd43dSSascha Wildner     SWAP(*b, *c);
1091*a28cd43dSSascha Wildner     for(; (++b < c) && ((x = ISAd[*b]) <= v);) {
1092*a28cd43dSSascha Wildner       if(x == v) { SWAP(*b, *a); ++a; }
1093*a28cd43dSSascha Wildner     }
1094*a28cd43dSSascha Wildner     for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
1095*a28cd43dSSascha Wildner       if(x == v) { SWAP(*c, *d); --d; }
1096*a28cd43dSSascha Wildner     }
1097*a28cd43dSSascha Wildner   }
1098*a28cd43dSSascha Wildner 
1099*a28cd43dSSascha Wildner   if(a <= d) {
1100*a28cd43dSSascha Wildner     c = b - 1;
1101*a28cd43dSSascha Wildner     if((s = a - first) > (t = b - a)) { s = t; }
1102*a28cd43dSSascha Wildner     for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
1103*a28cd43dSSascha Wildner     if((s = d - c) > (t = last - d - 1)) { s = t; }
1104*a28cd43dSSascha Wildner     for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
1105*a28cd43dSSascha Wildner     first += (b - a), last -= (d - c);
1106*a28cd43dSSascha Wildner   }
1107*a28cd43dSSascha Wildner   *pa = first, *pb = last;
1108*a28cd43dSSascha Wildner }
1109*a28cd43dSSascha Wildner 
1110*a28cd43dSSascha Wildner static
1111*a28cd43dSSascha Wildner void
tr_copy(int * ISA,const int * SA,int * first,int * a,int * b,int * last,int depth)1112*a28cd43dSSascha Wildner tr_copy(int *ISA, const int *SA,
1113*a28cd43dSSascha Wildner         int *first, int *a, int *b, int *last,
1114*a28cd43dSSascha Wildner         int depth) {
1115*a28cd43dSSascha Wildner   /* sort suffixes of middle partition
1116*a28cd43dSSascha Wildner      by using sorted order of suffixes of left and right partition. */
1117*a28cd43dSSascha Wildner   int *c, *d, *e;
1118*a28cd43dSSascha Wildner   int s, v;
1119*a28cd43dSSascha Wildner 
1120*a28cd43dSSascha Wildner   v = b - SA - 1;
1121*a28cd43dSSascha Wildner   for(c = first, d = a - 1; c <= d; ++c) {
1122*a28cd43dSSascha Wildner     if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
1123*a28cd43dSSascha Wildner       *++d = s;
1124*a28cd43dSSascha Wildner       ISA[s] = d - SA;
1125*a28cd43dSSascha Wildner     }
1126*a28cd43dSSascha Wildner   }
1127*a28cd43dSSascha Wildner   for(c = last - 1, e = d + 1, d = b; e < d; --c) {
1128*a28cd43dSSascha Wildner     if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
1129*a28cd43dSSascha Wildner       *--d = s;
1130*a28cd43dSSascha Wildner       ISA[s] = d - SA;
1131*a28cd43dSSascha Wildner     }
1132*a28cd43dSSascha Wildner   }
1133*a28cd43dSSascha Wildner }
1134*a28cd43dSSascha Wildner 
1135*a28cd43dSSascha Wildner static
1136*a28cd43dSSascha Wildner void
tr_partialcopy(int * ISA,const int * SA,int * first,int * a,int * b,int * last,int depth)1137*a28cd43dSSascha Wildner tr_partialcopy(int *ISA, const int *SA,
1138*a28cd43dSSascha Wildner                int *first, int *a, int *b, int *last,
1139*a28cd43dSSascha Wildner                int depth) {
1140*a28cd43dSSascha Wildner   int *c, *d, *e;
1141*a28cd43dSSascha Wildner   int s, v;
1142*a28cd43dSSascha Wildner   int rank, lastrank, newrank = -1;
1143*a28cd43dSSascha Wildner 
1144*a28cd43dSSascha Wildner   v = b - SA - 1;
1145*a28cd43dSSascha Wildner   lastrank = -1;
1146*a28cd43dSSascha Wildner   for(c = first, d = a - 1; c <= d; ++c) {
1147*a28cd43dSSascha Wildner     if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
1148*a28cd43dSSascha Wildner       *++d = s;
1149*a28cd43dSSascha Wildner       rank = ISA[s + depth];
1150*a28cd43dSSascha Wildner       if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
1151*a28cd43dSSascha Wildner       ISA[s] = newrank;
1152*a28cd43dSSascha Wildner     }
1153*a28cd43dSSascha Wildner   }
1154*a28cd43dSSascha Wildner 
1155*a28cd43dSSascha Wildner   lastrank = -1;
1156*a28cd43dSSascha Wildner   for(e = d; first <= e; --e) {
1157*a28cd43dSSascha Wildner     rank = ISA[*e];
1158*a28cd43dSSascha Wildner     if(lastrank != rank) { lastrank = rank; newrank = e - SA; }
1159*a28cd43dSSascha Wildner     if(newrank != rank) { ISA[*e] = newrank; }
1160*a28cd43dSSascha Wildner   }
1161*a28cd43dSSascha Wildner 
1162*a28cd43dSSascha Wildner   lastrank = -1;
1163*a28cd43dSSascha Wildner   for(c = last - 1, e = d + 1, d = b; e < d; --c) {
1164*a28cd43dSSascha Wildner     if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
1165*a28cd43dSSascha Wildner       *--d = s;
1166*a28cd43dSSascha Wildner       rank = ISA[s + depth];
1167*a28cd43dSSascha Wildner       if(lastrank != rank) { lastrank = rank; newrank = d - SA; }
1168*a28cd43dSSascha Wildner       ISA[s] = newrank;
1169*a28cd43dSSascha Wildner     }
1170*a28cd43dSSascha Wildner   }
1171*a28cd43dSSascha Wildner }
1172*a28cd43dSSascha Wildner 
1173*a28cd43dSSascha Wildner static
1174*a28cd43dSSascha Wildner void
tr_introsort(int * ISA,const int * ISAd,int * SA,int * first,int * last,trbudget_t * budget)1175*a28cd43dSSascha Wildner tr_introsort(int *ISA, const int *ISAd,
1176*a28cd43dSSascha Wildner              int *SA, int *first, int *last,
1177*a28cd43dSSascha Wildner              trbudget_t *budget) {
1178*a28cd43dSSascha Wildner #define STACK_SIZE TR_STACKSIZE
1179*a28cd43dSSascha Wildner   struct { const int *a; int *b, *c; int d, e; }stack[STACK_SIZE];
1180*a28cd43dSSascha Wildner   int *a, *b, *c;
1181*a28cd43dSSascha Wildner   int t;
1182*a28cd43dSSascha Wildner   int v, x = 0;
1183*a28cd43dSSascha Wildner   int incr = ISAd - ISA;
1184*a28cd43dSSascha Wildner   int limit, next;
1185*a28cd43dSSascha Wildner   int ssize, trlink = -1;
1186*a28cd43dSSascha Wildner 
1187*a28cd43dSSascha Wildner   for(ssize = 0, limit = tr_ilg(last - first);;) {
1188*a28cd43dSSascha Wildner 
1189*a28cd43dSSascha Wildner     if(limit < 0) {
1190*a28cd43dSSascha Wildner       if(limit == -1) {
1191*a28cd43dSSascha Wildner         /* tandem repeat partition */
1192*a28cd43dSSascha Wildner         tr_partition(ISAd - incr, first, first, last, &a, &b, last - SA - 1);
1193*a28cd43dSSascha Wildner 
1194*a28cd43dSSascha Wildner         /* update ranks */
1195*a28cd43dSSascha Wildner         if(a < last) {
1196*a28cd43dSSascha Wildner           for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
1197*a28cd43dSSascha Wildner         }
1198*a28cd43dSSascha Wildner         if(b < last) {
1199*a28cd43dSSascha Wildner           for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; }
1200*a28cd43dSSascha Wildner         }
1201*a28cd43dSSascha Wildner 
1202*a28cd43dSSascha Wildner         /* push */
1203*a28cd43dSSascha Wildner         if(1 < (b - a)) {
1204*a28cd43dSSascha Wildner           STACK_PUSH5(NULL, a, b, 0, 0);
1205*a28cd43dSSascha Wildner           STACK_PUSH5(ISAd - incr, first, last, -2, trlink);
1206*a28cd43dSSascha Wildner           trlink = ssize - 2;
1207*a28cd43dSSascha Wildner         }
1208*a28cd43dSSascha Wildner         if((a - first) <= (last - b)) {
1209*a28cd43dSSascha Wildner           if(1 < (a - first)) {
1210*a28cd43dSSascha Wildner             STACK_PUSH5(ISAd, b, last, tr_ilg(last - b), trlink);
1211*a28cd43dSSascha Wildner             last = a, limit = tr_ilg(a - first);
1212*a28cd43dSSascha Wildner           } else if(1 < (last - b)) {
1213*a28cd43dSSascha Wildner             first = b, limit = tr_ilg(last - b);
1214*a28cd43dSSascha Wildner           } else {
1215*a28cd43dSSascha Wildner             STACK_POP5(ISAd, first, last, limit, trlink);
1216*a28cd43dSSascha Wildner           }
1217*a28cd43dSSascha Wildner         } else {
1218*a28cd43dSSascha Wildner           if(1 < (last - b)) {
1219*a28cd43dSSascha Wildner             STACK_PUSH5(ISAd, first, a, tr_ilg(a - first), trlink);
1220*a28cd43dSSascha Wildner             first = b, limit = tr_ilg(last - b);
1221*a28cd43dSSascha Wildner           } else if(1 < (a - first)) {
1222*a28cd43dSSascha Wildner             last = a, limit = tr_ilg(a - first);
1223*a28cd43dSSascha Wildner           } else {
1224*a28cd43dSSascha Wildner             STACK_POP5(ISAd, first, last, limit, trlink);
1225*a28cd43dSSascha Wildner           }
1226*a28cd43dSSascha Wildner         }
1227*a28cd43dSSascha Wildner       } else if(limit == -2) {
1228*a28cd43dSSascha Wildner         /* tandem repeat copy */
1229*a28cd43dSSascha Wildner         a = stack[--ssize].b, b = stack[ssize].c;
1230*a28cd43dSSascha Wildner         if(stack[ssize].d == 0) {
1231*a28cd43dSSascha Wildner           tr_copy(ISA, SA, first, a, b, last, ISAd - ISA);
1232*a28cd43dSSascha Wildner         } else {
1233*a28cd43dSSascha Wildner           if(0 <= trlink) { stack[trlink].d = -1; }
1234*a28cd43dSSascha Wildner           tr_partialcopy(ISA, SA, first, a, b, last, ISAd - ISA);
1235*a28cd43dSSascha Wildner         }
1236*a28cd43dSSascha Wildner         STACK_POP5(ISAd, first, last, limit, trlink);
1237*a28cd43dSSascha Wildner       } else {
1238*a28cd43dSSascha Wildner         /* sorted partition */
1239*a28cd43dSSascha Wildner         if(0 <= *first) {
1240*a28cd43dSSascha Wildner           a = first;
1241*a28cd43dSSascha Wildner           do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a));
1242*a28cd43dSSascha Wildner           first = a;
1243*a28cd43dSSascha Wildner         }
1244*a28cd43dSSascha Wildner         if(first < last) {
1245*a28cd43dSSascha Wildner           a = first; do { *a = ~*a; } while(*++a < 0);
1246*a28cd43dSSascha Wildner           next = (ISA[*a] != ISAd[*a]) ? tr_ilg(a - first + 1) : -1;
1247*a28cd43dSSascha Wildner           if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } }
1248*a28cd43dSSascha Wildner 
1249*a28cd43dSSascha Wildner           /* push */
1250*a28cd43dSSascha Wildner           if(trbudget_check(budget, a - first)) {
1251*a28cd43dSSascha Wildner             if((a - first) <= (last - a)) {
1252*a28cd43dSSascha Wildner               STACK_PUSH5(ISAd, a, last, -3, trlink);
1253*a28cd43dSSascha Wildner               ISAd += incr, last = a, limit = next;
1254*a28cd43dSSascha Wildner             } else {
1255*a28cd43dSSascha Wildner               if(1 < (last - a)) {
1256*a28cd43dSSascha Wildner                 STACK_PUSH5(ISAd + incr, first, a, next, trlink);
1257*a28cd43dSSascha Wildner                 first = a, limit = -3;
1258*a28cd43dSSascha Wildner               } else {
1259*a28cd43dSSascha Wildner                 ISAd += incr, last = a, limit = next;
1260*a28cd43dSSascha Wildner               }
1261*a28cd43dSSascha Wildner             }
1262*a28cd43dSSascha Wildner           } else {
1263*a28cd43dSSascha Wildner             if(0 <= trlink) { stack[trlink].d = -1; }
1264*a28cd43dSSascha Wildner             if(1 < (last - a)) {
1265*a28cd43dSSascha Wildner               first = a, limit = -3;
1266*a28cd43dSSascha Wildner             } else {
1267*a28cd43dSSascha Wildner               STACK_POP5(ISAd, first, last, limit, trlink);
1268*a28cd43dSSascha Wildner             }
1269*a28cd43dSSascha Wildner           }
1270*a28cd43dSSascha Wildner         } else {
1271*a28cd43dSSascha Wildner           STACK_POP5(ISAd, first, last, limit, trlink);
1272*a28cd43dSSascha Wildner         }
1273*a28cd43dSSascha Wildner       }
1274*a28cd43dSSascha Wildner       continue;
1275*a28cd43dSSascha Wildner     }
1276*a28cd43dSSascha Wildner 
1277*a28cd43dSSascha Wildner     if((last - first) <= TR_INSERTIONSORT_THRESHOLD) {
1278*a28cd43dSSascha Wildner       tr_insertionsort(ISAd, first, last);
1279*a28cd43dSSascha Wildner       limit = -3;
1280*a28cd43dSSascha Wildner       continue;
1281*a28cd43dSSascha Wildner     }
1282*a28cd43dSSascha Wildner 
1283*a28cd43dSSascha Wildner     if(limit-- == 0) {
1284*a28cd43dSSascha Wildner       tr_heapsort(ISAd, first, last - first);
1285*a28cd43dSSascha Wildner       for(a = last - 1; first < a; a = b) {
1286*a28cd43dSSascha Wildner         for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; }
1287*a28cd43dSSascha Wildner       }
1288*a28cd43dSSascha Wildner       limit = -3;
1289*a28cd43dSSascha Wildner       continue;
1290*a28cd43dSSascha Wildner     }
1291*a28cd43dSSascha Wildner 
1292*a28cd43dSSascha Wildner     /* choose pivot */
1293*a28cd43dSSascha Wildner     a = tr_pivot(ISAd, first, last);
1294*a28cd43dSSascha Wildner     SWAP(*first, *a);
1295*a28cd43dSSascha Wildner     v = ISAd[*first];
1296*a28cd43dSSascha Wildner 
1297*a28cd43dSSascha Wildner     /* partition */
1298*a28cd43dSSascha Wildner     tr_partition(ISAd, first, first + 1, last, &a, &b, v);
1299*a28cd43dSSascha Wildner     if((last - first) != (b - a)) {
1300*a28cd43dSSascha Wildner       next = (ISA[*a] != v) ? tr_ilg(b - a) : -1;
1301*a28cd43dSSascha Wildner 
1302*a28cd43dSSascha Wildner       /* update ranks */
1303*a28cd43dSSascha Wildner       for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
1304*a28cd43dSSascha Wildner       if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } }
1305*a28cd43dSSascha Wildner 
1306*a28cd43dSSascha Wildner       /* push */
1307*a28cd43dSSascha Wildner       if((1 < (b - a)) && (trbudget_check(budget, b - a))) {
1308*a28cd43dSSascha Wildner         if((a - first) <= (last - b)) {
1309*a28cd43dSSascha Wildner           if((last - b) <= (b - a)) {
1310*a28cd43dSSascha Wildner             if(1 < (a - first)) {
1311*a28cd43dSSascha Wildner               STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1312*a28cd43dSSascha Wildner               STACK_PUSH5(ISAd, b, last, limit, trlink);
1313*a28cd43dSSascha Wildner               last = a;
1314*a28cd43dSSascha Wildner             } else if(1 < (last - b)) {
1315*a28cd43dSSascha Wildner               STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1316*a28cd43dSSascha Wildner               first = b;
1317*a28cd43dSSascha Wildner             } else {
1318*a28cd43dSSascha Wildner               ISAd += incr, first = a, last = b, limit = next;
1319*a28cd43dSSascha Wildner             }
1320*a28cd43dSSascha Wildner           } else if((a - first) <= (b - a)) {
1321*a28cd43dSSascha Wildner             if(1 < (a - first)) {
1322*a28cd43dSSascha Wildner               STACK_PUSH5(ISAd, b, last, limit, trlink);
1323*a28cd43dSSascha Wildner               STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1324*a28cd43dSSascha Wildner               last = a;
1325*a28cd43dSSascha Wildner             } else {
1326*a28cd43dSSascha Wildner               STACK_PUSH5(ISAd, b, last, limit, trlink);
1327*a28cd43dSSascha Wildner               ISAd += incr, first = a, last = b, limit = next;
1328*a28cd43dSSascha Wildner             }
1329*a28cd43dSSascha Wildner           } else {
1330*a28cd43dSSascha Wildner             STACK_PUSH5(ISAd, b, last, limit, trlink);
1331*a28cd43dSSascha Wildner             STACK_PUSH5(ISAd, first, a, limit, trlink);
1332*a28cd43dSSascha Wildner             ISAd += incr, first = a, last = b, limit = next;
1333*a28cd43dSSascha Wildner           }
1334*a28cd43dSSascha Wildner         } else {
1335*a28cd43dSSascha Wildner           if((a - first) <= (b - a)) {
1336*a28cd43dSSascha Wildner             if(1 < (last - b)) {
1337*a28cd43dSSascha Wildner               STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1338*a28cd43dSSascha Wildner               STACK_PUSH5(ISAd, first, a, limit, trlink);
1339*a28cd43dSSascha Wildner               first = b;
1340*a28cd43dSSascha Wildner             } else if(1 < (a - first)) {
1341*a28cd43dSSascha Wildner               STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1342*a28cd43dSSascha Wildner               last = a;
1343*a28cd43dSSascha Wildner             } else {
1344*a28cd43dSSascha Wildner               ISAd += incr, first = a, last = b, limit = next;
1345*a28cd43dSSascha Wildner             }
1346*a28cd43dSSascha Wildner           } else if((last - b) <= (b - a)) {
1347*a28cd43dSSascha Wildner             if(1 < (last - b)) {
1348*a28cd43dSSascha Wildner               STACK_PUSH5(ISAd, first, a, limit, trlink);
1349*a28cd43dSSascha Wildner               STACK_PUSH5(ISAd + incr, a, b, next, trlink);
1350*a28cd43dSSascha Wildner               first = b;
1351*a28cd43dSSascha Wildner             } else {
1352*a28cd43dSSascha Wildner               STACK_PUSH5(ISAd, first, a, limit, trlink);
1353*a28cd43dSSascha Wildner               ISAd += incr, first = a, last = b, limit = next;
1354*a28cd43dSSascha Wildner             }
1355*a28cd43dSSascha Wildner           } else {
1356*a28cd43dSSascha Wildner             STACK_PUSH5(ISAd, first, a, limit, trlink);
1357*a28cd43dSSascha Wildner             STACK_PUSH5(ISAd, b, last, limit, trlink);
1358*a28cd43dSSascha Wildner             ISAd += incr, first = a, last = b, limit = next;
1359*a28cd43dSSascha Wildner           }
1360*a28cd43dSSascha Wildner         }
1361*a28cd43dSSascha Wildner       } else {
1362*a28cd43dSSascha Wildner         if((1 < (b - a)) && (0 <= trlink)) { stack[trlink].d = -1; }
1363*a28cd43dSSascha Wildner         if((a - first) <= (last - b)) {
1364*a28cd43dSSascha Wildner           if(1 < (a - first)) {
1365*a28cd43dSSascha Wildner             STACK_PUSH5(ISAd, b, last, limit, trlink);
1366*a28cd43dSSascha Wildner             last = a;
1367*a28cd43dSSascha Wildner           } else if(1 < (last - b)) {
1368*a28cd43dSSascha Wildner             first = b;
1369*a28cd43dSSascha Wildner           } else {
1370*a28cd43dSSascha Wildner             STACK_POP5(ISAd, first, last, limit, trlink);
1371*a28cd43dSSascha Wildner           }
1372*a28cd43dSSascha Wildner         } else {
1373*a28cd43dSSascha Wildner           if(1 < (last - b)) {
1374*a28cd43dSSascha Wildner             STACK_PUSH5(ISAd, first, a, limit, trlink);
1375*a28cd43dSSascha Wildner             first = b;
1376*a28cd43dSSascha Wildner           } else if(1 < (a - first)) {
1377*a28cd43dSSascha Wildner             last = a;
1378*a28cd43dSSascha Wildner           } else {
1379*a28cd43dSSascha Wildner             STACK_POP5(ISAd, first, last, limit, trlink);
1380*a28cd43dSSascha Wildner           }
1381*a28cd43dSSascha Wildner         }
1382*a28cd43dSSascha Wildner       }
1383*a28cd43dSSascha Wildner     } else {
1384*a28cd43dSSascha Wildner       if(trbudget_check(budget, last - first)) {
1385*a28cd43dSSascha Wildner         limit = tr_ilg(last - first), ISAd += incr;
1386*a28cd43dSSascha Wildner       } else {
1387*a28cd43dSSascha Wildner         if(0 <= trlink) { stack[trlink].d = -1; }
1388*a28cd43dSSascha Wildner         STACK_POP5(ISAd, first, last, limit, trlink);
1389*a28cd43dSSascha Wildner       }
1390*a28cd43dSSascha Wildner     }
1391*a28cd43dSSascha Wildner   }
1392*a28cd43dSSascha Wildner #undef STACK_SIZE
1393*a28cd43dSSascha Wildner }
1394*a28cd43dSSascha Wildner 
1395*a28cd43dSSascha Wildner 
1396*a28cd43dSSascha Wildner 
1397*a28cd43dSSascha Wildner /*---------------------------------------------------------------------------*/
1398*a28cd43dSSascha Wildner 
1399*a28cd43dSSascha Wildner /* Tandem repeat sort */
1400*a28cd43dSSascha Wildner static
1401*a28cd43dSSascha Wildner void
trsort(int * ISA,int * SA,int n,int depth)1402*a28cd43dSSascha Wildner trsort(int *ISA, int *SA, int n, int depth) {
1403*a28cd43dSSascha Wildner   int *ISAd;
1404*a28cd43dSSascha Wildner   int *first, *last;
1405*a28cd43dSSascha Wildner   trbudget_t budget;
1406*a28cd43dSSascha Wildner   int t, skip, unsorted;
1407*a28cd43dSSascha Wildner 
1408*a28cd43dSSascha Wildner   trbudget_init(&budget, tr_ilg(n) * 2 / 3, n);
1409*a28cd43dSSascha Wildner /*  trbudget_init(&budget, tr_ilg(n) * 3 / 4, n); */
1410*a28cd43dSSascha Wildner   for(ISAd = ISA + depth; -n < *SA; ISAd += ISAd - ISA) {
1411*a28cd43dSSascha Wildner     first = SA;
1412*a28cd43dSSascha Wildner     skip = 0;
1413*a28cd43dSSascha Wildner     unsorted = 0;
1414*a28cd43dSSascha Wildner     do {
1415*a28cd43dSSascha Wildner       if((t = *first) < 0) { first -= t; skip += t; }
1416*a28cd43dSSascha Wildner       else {
1417*a28cd43dSSascha Wildner         if(skip != 0) { *(first + skip) = skip; skip = 0; }
1418*a28cd43dSSascha Wildner         last = SA + ISA[t] + 1;
1419*a28cd43dSSascha Wildner         if(1 < (last - first)) {
1420*a28cd43dSSascha Wildner           budget.count = 0;
1421*a28cd43dSSascha Wildner           tr_introsort(ISA, ISAd, SA, first, last, &budget);
1422*a28cd43dSSascha Wildner           if(budget.count != 0) { unsorted += budget.count; }
1423*a28cd43dSSascha Wildner           else { skip = first - last; }
1424*a28cd43dSSascha Wildner         } else if((last - first) == 1) {
1425*a28cd43dSSascha Wildner           skip = -1;
1426*a28cd43dSSascha Wildner         }
1427*a28cd43dSSascha Wildner         first = last;
1428*a28cd43dSSascha Wildner       }
1429*a28cd43dSSascha Wildner     } while(first < (SA + n));
1430*a28cd43dSSascha Wildner     if(skip != 0) { *(first + skip) = skip; }
1431*a28cd43dSSascha Wildner     if(unsorted == 0) { break; }
1432*a28cd43dSSascha Wildner   }
1433*a28cd43dSSascha Wildner }
1434*a28cd43dSSascha Wildner 
1435*a28cd43dSSascha Wildner 
1436*a28cd43dSSascha Wildner /*---------------------------------------------------------------------------*/
1437*a28cd43dSSascha Wildner 
1438*a28cd43dSSascha Wildner /* Sorts suffixes of type B*. */
1439*a28cd43dSSascha Wildner static
1440*a28cd43dSSascha Wildner int
sort_typeBstar(const unsigned char * T,int * SA,int * bucket_A,int * bucket_B,int n,int openMP)1441*a28cd43dSSascha Wildner sort_typeBstar(const unsigned char *T, int *SA,
1442*a28cd43dSSascha Wildner                int *bucket_A, int *bucket_B,
1443*a28cd43dSSascha Wildner                int n, int openMP) {
1444*a28cd43dSSascha Wildner   int *PAb, *ISAb, *buf;
1445*a28cd43dSSascha Wildner #ifdef LIBBSC_OPENMP
1446*a28cd43dSSascha Wildner   int *curbuf;
1447*a28cd43dSSascha Wildner   int l;
1448*a28cd43dSSascha Wildner #endif
1449*a28cd43dSSascha Wildner   int i, j, k, t, m, bufsize;
1450*a28cd43dSSascha Wildner   int c0, c1;
1451*a28cd43dSSascha Wildner #ifdef LIBBSC_OPENMP
1452*a28cd43dSSascha Wildner   int d0, d1;
1453*a28cd43dSSascha Wildner #endif
1454*a28cd43dSSascha Wildner   (void)openMP;
1455*a28cd43dSSascha Wildner 
1456*a28cd43dSSascha Wildner   /* Initialize bucket arrays. */
1457*a28cd43dSSascha Wildner   for(i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; }
1458*a28cd43dSSascha Wildner   for(i = 0; i < BUCKET_B_SIZE; ++i) { bucket_B[i] = 0; }
1459*a28cd43dSSascha Wildner 
1460*a28cd43dSSascha Wildner   /* Count the number of occurrences of the first one or two characters of each
1461*a28cd43dSSascha Wildner      type A, B and B* suffix. Moreover, store the beginning position of all
1462*a28cd43dSSascha Wildner      type B* suffixes into the array SA. */
1463*a28cd43dSSascha Wildner   for(i = n - 1, m = n, c0 = T[n - 1]; 0 <= i;) {
1464*a28cd43dSSascha Wildner     /* type A suffix. */
1465*a28cd43dSSascha Wildner     do { ++BUCKET_A(c1 = c0); } while((0 <= --i) && ((c0 = T[i]) >= c1));
1466*a28cd43dSSascha Wildner     if(0 <= i) {
1467*a28cd43dSSascha Wildner       /* type B* suffix. */
1468*a28cd43dSSascha Wildner       ++BUCKET_BSTAR(c0, c1);
1469*a28cd43dSSascha Wildner       SA[--m] = i;
1470*a28cd43dSSascha Wildner       /* type B suffix. */
1471*a28cd43dSSascha Wildner       for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) {
1472*a28cd43dSSascha Wildner         ++BUCKET_B(c0, c1);
1473*a28cd43dSSascha Wildner       }
1474*a28cd43dSSascha Wildner     }
1475*a28cd43dSSascha Wildner   }
1476*a28cd43dSSascha Wildner   m = n - m;
1477*a28cd43dSSascha Wildner /*
1478*a28cd43dSSascha Wildner note:
1479*a28cd43dSSascha Wildner   A type B* suffix is lexicographically smaller than a type B suffix that
1480*a28cd43dSSascha Wildner   begins with the same first two characters.
1481*a28cd43dSSascha Wildner */
1482*a28cd43dSSascha Wildner 
1483*a28cd43dSSascha Wildner   /* Calculate the index of start/end point of each bucket. */
1484*a28cd43dSSascha Wildner   for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) {
1485*a28cd43dSSascha Wildner     t = i + BUCKET_A(c0);
1486*a28cd43dSSascha Wildner     BUCKET_A(c0) = i + j; /* start point */
1487*a28cd43dSSascha Wildner     i = t + BUCKET_B(c0, c0);
1488*a28cd43dSSascha Wildner     for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) {
1489*a28cd43dSSascha Wildner       j += BUCKET_BSTAR(c0, c1);
1490*a28cd43dSSascha Wildner       BUCKET_BSTAR(c0, c1) = j; /* end point */
1491*a28cd43dSSascha Wildner       i += BUCKET_B(c0, c1);
1492*a28cd43dSSascha Wildner     }
1493*a28cd43dSSascha Wildner   }
1494*a28cd43dSSascha Wildner 
1495*a28cd43dSSascha Wildner   if(0 < m) {
1496*a28cd43dSSascha Wildner     /* Sort the type B* suffixes by their first two characters. */
1497*a28cd43dSSascha Wildner     PAb = SA + n - m; ISAb = SA + m;
1498*a28cd43dSSascha Wildner     for(i = m - 2; 0 <= i; --i) {
1499*a28cd43dSSascha Wildner       t = PAb[i], c0 = T[t], c1 = T[t + 1];
1500*a28cd43dSSascha Wildner       SA[--BUCKET_BSTAR(c0, c1)] = i;
1501*a28cd43dSSascha Wildner     }
1502*a28cd43dSSascha Wildner     t = PAb[m - 1], c0 = T[t], c1 = T[t + 1];
1503*a28cd43dSSascha Wildner     SA[--BUCKET_BSTAR(c0, c1)] = m - 1;
1504*a28cd43dSSascha Wildner 
1505*a28cd43dSSascha Wildner     /* Sort the type B* substrings using sssort. */
1506*a28cd43dSSascha Wildner #ifdef LIBBSC_OPENMP
1507*a28cd43dSSascha Wildner     if (openMP)
1508*a28cd43dSSascha Wildner     {
1509*a28cd43dSSascha Wildner         buf = SA + m;
1510*a28cd43dSSascha Wildner         c0 = ALPHABET_SIZE - 2, c1 = ALPHABET_SIZE - 1, j = m;
1511*a28cd43dSSascha Wildner #pragma omp parallel default(shared) private(bufsize, curbuf, k, l, d0, d1)
1512*a28cd43dSSascha Wildner         {
1513*a28cd43dSSascha Wildner           bufsize = (n - (2 * m)) / omp_get_num_threads();
1514*a28cd43dSSascha Wildner           curbuf = buf + omp_get_thread_num() * bufsize;
1515*a28cd43dSSascha Wildner           k = 0;
1516*a28cd43dSSascha Wildner           for(;;) {
1517*a28cd43dSSascha Wildner             #pragma omp critical(sssort_lock)
1518*a28cd43dSSascha Wildner             {
1519*a28cd43dSSascha Wildner               if(0 < (l = j)) {
1520*a28cd43dSSascha Wildner                 d0 = c0, d1 = c1;
1521*a28cd43dSSascha Wildner                 do {
1522*a28cd43dSSascha Wildner                   k = BUCKET_BSTAR(d0, d1);
1523*a28cd43dSSascha Wildner                   if(--d1 <= d0) {
1524*a28cd43dSSascha Wildner                     d1 = ALPHABET_SIZE - 1;
1525*a28cd43dSSascha Wildner                     if(--d0 < 0) { break; }
1526*a28cd43dSSascha Wildner                   }
1527*a28cd43dSSascha Wildner                 } while(((l - k) <= 1) && (0 < (l = k)));
1528*a28cd43dSSascha Wildner                 c0 = d0, c1 = d1, j = k;
1529*a28cd43dSSascha Wildner               }
1530*a28cd43dSSascha Wildner             }
1531*a28cd43dSSascha Wildner             if(l == 0) { break; }
1532*a28cd43dSSascha Wildner             sssort(T, PAb, SA + k, SA + l,
1533*a28cd43dSSascha Wildner                    curbuf, bufsize, 2, n, *(SA + k) == (m - 1));
1534*a28cd43dSSascha Wildner           }
1535*a28cd43dSSascha Wildner         }
1536*a28cd43dSSascha Wildner     }
1537*a28cd43dSSascha Wildner     else
1538*a28cd43dSSascha Wildner     {
1539*a28cd43dSSascha Wildner         buf = SA + m, bufsize = n - (2 * m);
1540*a28cd43dSSascha Wildner         for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) {
1541*a28cd43dSSascha Wildner           for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) {
1542*a28cd43dSSascha Wildner             i = BUCKET_BSTAR(c0, c1);
1543*a28cd43dSSascha Wildner             if(1 < (j - i)) {
1544*a28cd43dSSascha Wildner               sssort(T, PAb, SA + i, SA + j,
1545*a28cd43dSSascha Wildner                      buf, bufsize, 2, n, *(SA + i) == (m - 1));
1546*a28cd43dSSascha Wildner             }
1547*a28cd43dSSascha Wildner           }
1548*a28cd43dSSascha Wildner         }
1549*a28cd43dSSascha Wildner     }
1550*a28cd43dSSascha Wildner #else
1551*a28cd43dSSascha Wildner     buf = SA + m, bufsize = n - (2 * m);
1552*a28cd43dSSascha Wildner     for(c0 = ALPHABET_SIZE - 2, j = m; 0 < j; --c0) {
1553*a28cd43dSSascha Wildner       for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) {
1554*a28cd43dSSascha Wildner         i = BUCKET_BSTAR(c0, c1);
1555*a28cd43dSSascha Wildner         if(1 < (j - i)) {
1556*a28cd43dSSascha Wildner           sssort(T, PAb, SA + i, SA + j,
1557*a28cd43dSSascha Wildner                  buf, bufsize, 2, n, *(SA + i) == (m - 1));
1558*a28cd43dSSascha Wildner         }
1559*a28cd43dSSascha Wildner       }
1560*a28cd43dSSascha Wildner     }
1561*a28cd43dSSascha Wildner #endif
1562*a28cd43dSSascha Wildner 
1563*a28cd43dSSascha Wildner     /* Compute ranks of type B* substrings. */
1564*a28cd43dSSascha Wildner     for(i = m - 1; 0 <= i; --i) {
1565*a28cd43dSSascha Wildner       if(0 <= SA[i]) {
1566*a28cd43dSSascha Wildner         j = i;
1567*a28cd43dSSascha Wildner         do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i]));
1568*a28cd43dSSascha Wildner         SA[i + 1] = i - j;
1569*a28cd43dSSascha Wildner         if(i <= 0) { break; }
1570*a28cd43dSSascha Wildner       }
1571*a28cd43dSSascha Wildner       j = i;
1572*a28cd43dSSascha Wildner       do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0);
1573*a28cd43dSSascha Wildner       ISAb[SA[i]] = j;
1574*a28cd43dSSascha Wildner     }
1575*a28cd43dSSascha Wildner 
1576*a28cd43dSSascha Wildner     /* Construct the inverse suffix array of type B* suffixes using trsort. */
1577*a28cd43dSSascha Wildner     trsort(ISAb, SA, m, 1);
1578*a28cd43dSSascha Wildner 
1579*a28cd43dSSascha Wildner     /* Set the sorted order of tyoe B* suffixes. */
1580*a28cd43dSSascha Wildner     for(i = n - 1, j = m, c0 = T[n - 1]; 0 <= i;) {
1581*a28cd43dSSascha Wildner       for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) >= c1); --i, c1 = c0) { }
1582*a28cd43dSSascha Wildner       if(0 <= i) {
1583*a28cd43dSSascha Wildner         t = i;
1584*a28cd43dSSascha Wildner         for(--i, c1 = c0; (0 <= i) && ((c0 = T[i]) <= c1); --i, c1 = c0) { }
1585*a28cd43dSSascha Wildner         SA[ISAb[--j]] = ((t == 0) || (1 < (t - i))) ? t : ~t;
1586*a28cd43dSSascha Wildner       }
1587*a28cd43dSSascha Wildner     }
1588*a28cd43dSSascha Wildner 
1589*a28cd43dSSascha Wildner     /* Calculate the index of start/end point of each bucket. */
1590*a28cd43dSSascha Wildner     BUCKET_B(ALPHABET_SIZE - 1, ALPHABET_SIZE - 1) = n; /* end point */
1591*a28cd43dSSascha Wildner     for(c0 = ALPHABET_SIZE - 2, k = m - 1; 0 <= c0; --c0) {
1592*a28cd43dSSascha Wildner       i = BUCKET_A(c0 + 1) - 1;
1593*a28cd43dSSascha Wildner       for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) {
1594*a28cd43dSSascha Wildner         t = i - BUCKET_B(c0, c1);
1595*a28cd43dSSascha Wildner         BUCKET_B(c0, c1) = i; /* end point */
1596*a28cd43dSSascha Wildner 
1597*a28cd43dSSascha Wildner         /* Move all type B* suffixes to the correct position. */
1598*a28cd43dSSascha Wildner         for(i = t, j = BUCKET_BSTAR(c0, c1);
1599*a28cd43dSSascha Wildner             j <= k;
1600*a28cd43dSSascha Wildner             --i, --k) { SA[i] = SA[k]; }
1601*a28cd43dSSascha Wildner       }
1602*a28cd43dSSascha Wildner       BUCKET_BSTAR(c0, c0 + 1) = i - BUCKET_B(c0, c0) + 1; /* start point */
1603*a28cd43dSSascha Wildner       BUCKET_B(c0, c0) = i; /* end point */
1604*a28cd43dSSascha Wildner     }
1605*a28cd43dSSascha Wildner   }
1606*a28cd43dSSascha Wildner 
1607*a28cd43dSSascha Wildner   return m;
1608*a28cd43dSSascha Wildner }
1609*a28cd43dSSascha Wildner 
1610*a28cd43dSSascha Wildner /* Constructs the suffix array by using the sorted order of type B* suffixes. */
1611*a28cd43dSSascha Wildner static
1612*a28cd43dSSascha Wildner void
construct_SA(const unsigned char * T,int * SA,int * bucket_A,int * bucket_B,int n,int m)1613*a28cd43dSSascha Wildner construct_SA(const unsigned char *T, int *SA,
1614*a28cd43dSSascha Wildner              int *bucket_A, int *bucket_B,
1615*a28cd43dSSascha Wildner              int n, int m) {
1616*a28cd43dSSascha Wildner   int *i, *j, *k;
1617*a28cd43dSSascha Wildner   int s;
1618*a28cd43dSSascha Wildner   int c0, c1, c2;
1619*a28cd43dSSascha Wildner 
1620*a28cd43dSSascha Wildner   if(0 < m) {
1621*a28cd43dSSascha Wildner     /* Construct the sorted order of type B suffixes by using
1622*a28cd43dSSascha Wildner        the sorted order of type B* suffixes. */
1623*a28cd43dSSascha Wildner     for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
1624*a28cd43dSSascha Wildner       /* Scan the suffix array from right to left. */
1625*a28cd43dSSascha Wildner       for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
1626*a28cd43dSSascha Wildner           j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
1627*a28cd43dSSascha Wildner           i <= j;
1628*a28cd43dSSascha Wildner           --j) {
1629*a28cd43dSSascha Wildner         if(0 < (s = *j)) {
1630*a28cd43dSSascha Wildner           assert(T[s] == c1);
1631*a28cd43dSSascha Wildner           assert(((s + 1) < n) && (T[s] <= T[s + 1]));
1632*a28cd43dSSascha Wildner           assert(T[s - 1] <= T[s]);
1633*a28cd43dSSascha Wildner           *j = ~s;
1634*a28cd43dSSascha Wildner           c0 = T[--s];
1635*a28cd43dSSascha Wildner           if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
1636*a28cd43dSSascha Wildner           if(c0 != c2) {
1637*a28cd43dSSascha Wildner             if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
1638*a28cd43dSSascha Wildner             k = SA + BUCKET_B(c2 = c0, c1);
1639*a28cd43dSSascha Wildner           }
1640*a28cd43dSSascha Wildner           assert(k < j); assert(k != NULL);
1641*a28cd43dSSascha Wildner           *k-- = s;
1642*a28cd43dSSascha Wildner         } else {
1643*a28cd43dSSascha Wildner           assert(((s == 0) && (T[s] == c1)) || (s < 0));
1644*a28cd43dSSascha Wildner           *j = ~s;
1645*a28cd43dSSascha Wildner         }
1646*a28cd43dSSascha Wildner       }
1647*a28cd43dSSascha Wildner     }
1648*a28cd43dSSascha Wildner   }
1649*a28cd43dSSascha Wildner 
1650*a28cd43dSSascha Wildner   /* Construct the suffix array by using
1651*a28cd43dSSascha Wildner      the sorted order of type B suffixes. */
1652*a28cd43dSSascha Wildner   k = SA + BUCKET_A(c2 = T[n - 1]);
1653*a28cd43dSSascha Wildner   *k++ = (T[n - 2] < c2) ? ~(n - 1) : (n - 1);
1654*a28cd43dSSascha Wildner   /* Scan the suffix array from left to right. */
1655*a28cd43dSSascha Wildner   for(i = SA, j = SA + n; i < j; ++i) {
1656*a28cd43dSSascha Wildner     if(0 < (s = *i)) {
1657*a28cd43dSSascha Wildner       assert(T[s - 1] >= T[s]);
1658*a28cd43dSSascha Wildner       c0 = T[--s];
1659*a28cd43dSSascha Wildner       if((s == 0) || (T[s - 1] < c0)) { s = ~s; }
1660*a28cd43dSSascha Wildner       if(c0 != c2) {
1661*a28cd43dSSascha Wildner         BUCKET_A(c2) = k - SA;
1662*a28cd43dSSascha Wildner         k = SA + BUCKET_A(c2 = c0);
1663*a28cd43dSSascha Wildner       }
1664*a28cd43dSSascha Wildner       assert(i < k);
1665*a28cd43dSSascha Wildner       *k++ = s;
1666*a28cd43dSSascha Wildner     } else {
1667*a28cd43dSSascha Wildner       assert(s < 0);
1668*a28cd43dSSascha Wildner       *i = ~s;
1669*a28cd43dSSascha Wildner     }
1670*a28cd43dSSascha Wildner   }
1671*a28cd43dSSascha Wildner }
1672*a28cd43dSSascha Wildner 
1673*a28cd43dSSascha Wildner /* Constructs the burrows-wheeler transformed string directly
1674*a28cd43dSSascha Wildner    by using the sorted order of type B* suffixes. */
1675*a28cd43dSSascha Wildner static
1676*a28cd43dSSascha Wildner int
construct_BWT(const unsigned char * T,int * SA,int * bucket_A,int * bucket_B,int n,int m)1677*a28cd43dSSascha Wildner construct_BWT(const unsigned char *T, int *SA,
1678*a28cd43dSSascha Wildner               int *bucket_A, int *bucket_B,
1679*a28cd43dSSascha Wildner               int n, int m) {
1680*a28cd43dSSascha Wildner   int *i, *j, *k, *orig;
1681*a28cd43dSSascha Wildner   int s;
1682*a28cd43dSSascha Wildner   int c0, c1, c2;
1683*a28cd43dSSascha Wildner 
1684*a28cd43dSSascha Wildner   if(0 < m) {
1685*a28cd43dSSascha Wildner     /* Construct the sorted order of type B suffixes by using
1686*a28cd43dSSascha Wildner        the sorted order of type B* suffixes. */
1687*a28cd43dSSascha Wildner     for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
1688*a28cd43dSSascha Wildner       /* Scan the suffix array from right to left. */
1689*a28cd43dSSascha Wildner       for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
1690*a28cd43dSSascha Wildner           j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
1691*a28cd43dSSascha Wildner           i <= j;
1692*a28cd43dSSascha Wildner           --j) {
1693*a28cd43dSSascha Wildner         if(0 < (s = *j)) {
1694*a28cd43dSSascha Wildner           assert(T[s] == c1);
1695*a28cd43dSSascha Wildner           assert(((s + 1) < n) && (T[s] <= T[s + 1]));
1696*a28cd43dSSascha Wildner           assert(T[s - 1] <= T[s]);
1697*a28cd43dSSascha Wildner           c0 = T[--s];
1698*a28cd43dSSascha Wildner           *j = ~((int)c0);
1699*a28cd43dSSascha Wildner           if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
1700*a28cd43dSSascha Wildner           if(c0 != c2) {
1701*a28cd43dSSascha Wildner             if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
1702*a28cd43dSSascha Wildner             k = SA + BUCKET_B(c2 = c0, c1);
1703*a28cd43dSSascha Wildner           }
1704*a28cd43dSSascha Wildner           assert(k < j); assert(k != NULL);
1705*a28cd43dSSascha Wildner           *k-- = s;
1706*a28cd43dSSascha Wildner         } else if(s != 0) {
1707*a28cd43dSSascha Wildner           *j = ~s;
1708*a28cd43dSSascha Wildner #ifndef NDEBUG
1709*a28cd43dSSascha Wildner         } else {
1710*a28cd43dSSascha Wildner           assert(T[s] == c1);
1711*a28cd43dSSascha Wildner #endif
1712*a28cd43dSSascha Wildner         }
1713*a28cd43dSSascha Wildner       }
1714*a28cd43dSSascha Wildner     }
1715*a28cd43dSSascha Wildner   }
1716*a28cd43dSSascha Wildner 
1717*a28cd43dSSascha Wildner   /* Construct the BWTed string by using
1718*a28cd43dSSascha Wildner      the sorted order of type B suffixes. */
1719*a28cd43dSSascha Wildner   k = SA + BUCKET_A(c2 = T[n - 1]);
1720*a28cd43dSSascha Wildner   *k++ = (T[n - 2] < c2) ? ~((int)T[n - 2]) : (n - 1);
1721*a28cd43dSSascha Wildner   /* Scan the suffix array from left to right. */
1722*a28cd43dSSascha Wildner   for(i = SA, j = SA + n, orig = SA; i < j; ++i) {
1723*a28cd43dSSascha Wildner     if(0 < (s = *i)) {
1724*a28cd43dSSascha Wildner       assert(T[s - 1] >= T[s]);
1725*a28cd43dSSascha Wildner       c0 = T[--s];
1726*a28cd43dSSascha Wildner       *i = c0;
1727*a28cd43dSSascha Wildner       if((0 < s) && (T[s - 1] < c0)) { s = ~((int)T[s - 1]); }
1728*a28cd43dSSascha Wildner       if(c0 != c2) {
1729*a28cd43dSSascha Wildner         BUCKET_A(c2) = k - SA;
1730*a28cd43dSSascha Wildner         k = SA + BUCKET_A(c2 = c0);
1731*a28cd43dSSascha Wildner       }
1732*a28cd43dSSascha Wildner       assert(i < k);
1733*a28cd43dSSascha Wildner       *k++ = s;
1734*a28cd43dSSascha Wildner     } else if(s != 0) {
1735*a28cd43dSSascha Wildner       *i = ~s;
1736*a28cd43dSSascha Wildner     } else {
1737*a28cd43dSSascha Wildner       orig = i;
1738*a28cd43dSSascha Wildner     }
1739*a28cd43dSSascha Wildner   }
1740*a28cd43dSSascha Wildner 
1741*a28cd43dSSascha Wildner   return orig - SA;
1742*a28cd43dSSascha Wildner }
1743*a28cd43dSSascha Wildner 
1744*a28cd43dSSascha Wildner /* Constructs the burrows-wheeler transformed string directly
1745*a28cd43dSSascha Wildner    by using the sorted order of type B* suffixes. */
1746*a28cd43dSSascha Wildner static
1747*a28cd43dSSascha Wildner int
construct_BWT_indexes(const unsigned char * T,int * SA,int * bucket_A,int * bucket_B,int n,int m,unsigned char * num_indexes,int * indexes)1748*a28cd43dSSascha Wildner construct_BWT_indexes(const unsigned char *T, int *SA,
1749*a28cd43dSSascha Wildner                       int *bucket_A, int *bucket_B,
1750*a28cd43dSSascha Wildner                       int n, int m,
1751*a28cd43dSSascha Wildner                       unsigned char * num_indexes, int * indexes) {
1752*a28cd43dSSascha Wildner   int *i, *j, *k, *orig;
1753*a28cd43dSSascha Wildner   int s;
1754*a28cd43dSSascha Wildner   int c0, c1, c2;
1755*a28cd43dSSascha Wildner 
1756*a28cd43dSSascha Wildner   int mod = n / 8;
1757*a28cd43dSSascha Wildner   {
1758*a28cd43dSSascha Wildner       mod |= mod >> 1;  mod |= mod >> 2;
1759*a28cd43dSSascha Wildner       mod |= mod >> 4;  mod |= mod >> 8;
1760*a28cd43dSSascha Wildner       mod |= mod >> 16; mod >>= 1;
1761*a28cd43dSSascha Wildner 
1762*a28cd43dSSascha Wildner       *num_indexes = (unsigned char)((n - 1) / (mod + 1));
1763*a28cd43dSSascha Wildner   }
1764*a28cd43dSSascha Wildner 
1765*a28cd43dSSascha Wildner   if(0 < m) {
1766*a28cd43dSSascha Wildner     /* Construct the sorted order of type B suffixes by using
1767*a28cd43dSSascha Wildner        the sorted order of type B* suffixes. */
1768*a28cd43dSSascha Wildner     for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
1769*a28cd43dSSascha Wildner       /* Scan the suffix array from right to left. */
1770*a28cd43dSSascha Wildner       for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
1771*a28cd43dSSascha Wildner           j = SA + BUCKET_A(c1 + 1) - 1, k = NULL, c2 = -1;
1772*a28cd43dSSascha Wildner           i <= j;
1773*a28cd43dSSascha Wildner           --j) {
1774*a28cd43dSSascha Wildner         if(0 < (s = *j)) {
1775*a28cd43dSSascha Wildner           assert(T[s] == c1);
1776*a28cd43dSSascha Wildner           assert(((s + 1) < n) && (T[s] <= T[s + 1]));
1777*a28cd43dSSascha Wildner           assert(T[s - 1] <= T[s]);
1778*a28cd43dSSascha Wildner 
1779*a28cd43dSSascha Wildner           if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = j - SA;
1780*a28cd43dSSascha Wildner 
1781*a28cd43dSSascha Wildner           c0 = T[--s];
1782*a28cd43dSSascha Wildner           *j = ~((int)c0);
1783*a28cd43dSSascha Wildner           if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
1784*a28cd43dSSascha Wildner           if(c0 != c2) {
1785*a28cd43dSSascha Wildner             if(0 <= c2) { BUCKET_B(c2, c1) = k - SA; }
1786*a28cd43dSSascha Wildner             k = SA + BUCKET_B(c2 = c0, c1);
1787*a28cd43dSSascha Wildner           }
1788*a28cd43dSSascha Wildner           assert(k < j); assert(k != NULL);
1789*a28cd43dSSascha Wildner           *k-- = s;
1790*a28cd43dSSascha Wildner         } else if(s != 0) {
1791*a28cd43dSSascha Wildner           *j = ~s;
1792*a28cd43dSSascha Wildner #ifndef NDEBUG
1793*a28cd43dSSascha Wildner         } else {
1794*a28cd43dSSascha Wildner           assert(T[s] == c1);
1795*a28cd43dSSascha Wildner #endif
1796*a28cd43dSSascha Wildner         }
1797*a28cd43dSSascha Wildner       }
1798*a28cd43dSSascha Wildner     }
1799*a28cd43dSSascha Wildner   }
1800*a28cd43dSSascha Wildner 
1801*a28cd43dSSascha Wildner   /* Construct the BWTed string by using
1802*a28cd43dSSascha Wildner      the sorted order of type B suffixes. */
1803*a28cd43dSSascha Wildner   k = SA + BUCKET_A(c2 = T[n - 1]);
1804*a28cd43dSSascha Wildner   if (T[n - 2] < c2) {
1805*a28cd43dSSascha Wildner     if (((n - 1) & mod) == 0) indexes[(n - 1) / (mod + 1) - 1] = k - SA;
1806*a28cd43dSSascha Wildner     *k++ = ~((int)T[n - 2]);
1807*a28cd43dSSascha Wildner   }
1808*a28cd43dSSascha Wildner   else {
1809*a28cd43dSSascha Wildner     *k++ = n - 1;
1810*a28cd43dSSascha Wildner   }
1811*a28cd43dSSascha Wildner 
1812*a28cd43dSSascha Wildner   /* Scan the suffix array from left to right. */
1813*a28cd43dSSascha Wildner   for(i = SA, j = SA + n, orig = SA; i < j; ++i) {
1814*a28cd43dSSascha Wildner     if(0 < (s = *i)) {
1815*a28cd43dSSascha Wildner       assert(T[s - 1] >= T[s]);
1816*a28cd43dSSascha Wildner 
1817*a28cd43dSSascha Wildner       if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = i - SA;
1818*a28cd43dSSascha Wildner 
1819*a28cd43dSSascha Wildner       c0 = T[--s];
1820*a28cd43dSSascha Wildner       *i = c0;
1821*a28cd43dSSascha Wildner       if(c0 != c2) {
1822*a28cd43dSSascha Wildner         BUCKET_A(c2) = k - SA;
1823*a28cd43dSSascha Wildner         k = SA + BUCKET_A(c2 = c0);
1824*a28cd43dSSascha Wildner       }
1825*a28cd43dSSascha Wildner       assert(i < k);
1826*a28cd43dSSascha Wildner       if((0 < s) && (T[s - 1] < c0)) {
1827*a28cd43dSSascha Wildner           if ((s & mod) == 0) indexes[s / (mod + 1) - 1] = k - SA;
1828*a28cd43dSSascha Wildner           *k++ = ~((int)T[s - 1]);
1829*a28cd43dSSascha Wildner       } else
1830*a28cd43dSSascha Wildner         *k++ = s;
1831*a28cd43dSSascha Wildner     } else if(s != 0) {
1832*a28cd43dSSascha Wildner       *i = ~s;
1833*a28cd43dSSascha Wildner     } else {
1834*a28cd43dSSascha Wildner       orig = i;
1835*a28cd43dSSascha Wildner     }
1836*a28cd43dSSascha Wildner   }
1837*a28cd43dSSascha Wildner 
1838*a28cd43dSSascha Wildner   return orig - SA;
1839*a28cd43dSSascha Wildner }
1840*a28cd43dSSascha Wildner 
1841*a28cd43dSSascha Wildner 
1842*a28cd43dSSascha Wildner /*---------------------------------------------------------------------------*/
1843*a28cd43dSSascha Wildner 
1844*a28cd43dSSascha Wildner /*- Function -*/
1845*a28cd43dSSascha Wildner 
1846*a28cd43dSSascha Wildner int
divsufsort(const unsigned char * T,int * SA,int n,int openMP)1847*a28cd43dSSascha Wildner divsufsort(const unsigned char *T, int *SA, int n, int openMP) {
1848*a28cd43dSSascha Wildner   int *bucket_A, *bucket_B;
1849*a28cd43dSSascha Wildner   int m;
1850*a28cd43dSSascha Wildner   int err = 0;
1851*a28cd43dSSascha Wildner 
1852*a28cd43dSSascha Wildner   /* Check arguments. */
1853*a28cd43dSSascha Wildner   if((T == NULL) || (SA == NULL) || (n < 0)) { return -1; }
1854*a28cd43dSSascha Wildner   else if(n == 0) { return 0; }
1855*a28cd43dSSascha Wildner   else if(n == 1) { SA[0] = 0; return 0; }
1856*a28cd43dSSascha Wildner   else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; }
1857*a28cd43dSSascha Wildner 
1858*a28cd43dSSascha Wildner   bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int));
1859*a28cd43dSSascha Wildner   bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int));
1860*a28cd43dSSascha Wildner 
1861*a28cd43dSSascha Wildner   /* Suffixsort. */
1862*a28cd43dSSascha Wildner   if((bucket_A != NULL) && (bucket_B != NULL)) {
1863*a28cd43dSSascha Wildner     m = sort_typeBstar(T, SA, bucket_A, bucket_B, n, openMP);
1864*a28cd43dSSascha Wildner     construct_SA(T, SA, bucket_A, bucket_B, n, m);
1865*a28cd43dSSascha Wildner   } else {
1866*a28cd43dSSascha Wildner     err = -2;
1867*a28cd43dSSascha Wildner   }
1868*a28cd43dSSascha Wildner 
1869*a28cd43dSSascha Wildner   free(bucket_B);
1870*a28cd43dSSascha Wildner   free(bucket_A);
1871*a28cd43dSSascha Wildner 
1872*a28cd43dSSascha Wildner   return err;
1873*a28cd43dSSascha Wildner }
1874*a28cd43dSSascha Wildner 
1875*a28cd43dSSascha Wildner int
divbwt(const unsigned char * T,unsigned char * U,int * A,int n,unsigned char * num_indexes,int * indexes,int openMP)1876*a28cd43dSSascha Wildner divbwt(const unsigned char *T, unsigned char *U, int *A, int n, unsigned char * num_indexes, int * indexes, int openMP) {
1877*a28cd43dSSascha Wildner   int *B;
1878*a28cd43dSSascha Wildner   int *bucket_A, *bucket_B;
1879*a28cd43dSSascha Wildner   int m, pidx, i;
1880*a28cd43dSSascha Wildner 
1881*a28cd43dSSascha Wildner   /* Check arguments. */
1882*a28cd43dSSascha Wildner   if((T == NULL) || (U == NULL) || (n < 0)) { return -1; }
1883*a28cd43dSSascha Wildner   else if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; }
1884*a28cd43dSSascha Wildner 
1885*a28cd43dSSascha Wildner   if((B = A) == NULL) { B = (int *)malloc((size_t)(n + 1) * sizeof(int)); }
1886*a28cd43dSSascha Wildner   bucket_A = (int *)malloc(BUCKET_A_SIZE * sizeof(int));
1887*a28cd43dSSascha Wildner   bucket_B = (int *)malloc(BUCKET_B_SIZE * sizeof(int));
1888*a28cd43dSSascha Wildner 
1889*a28cd43dSSascha Wildner   /* Burrows-Wheeler Transform. */
1890*a28cd43dSSascha Wildner   if((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) {
1891*a28cd43dSSascha Wildner     m = sort_typeBstar(T, B, bucket_A, bucket_B, n, openMP);
1892*a28cd43dSSascha Wildner 
1893*a28cd43dSSascha Wildner     if (num_indexes == NULL || indexes == NULL) {
1894*a28cd43dSSascha Wildner         pidx = construct_BWT(T, B, bucket_A, bucket_B, n, m);
1895*a28cd43dSSascha Wildner     } else {
1896*a28cd43dSSascha Wildner         pidx = construct_BWT_indexes(T, B, bucket_A, bucket_B, n, m, num_indexes, indexes);
1897*a28cd43dSSascha Wildner     }
1898*a28cd43dSSascha Wildner 
1899*a28cd43dSSascha Wildner     /* Copy to output string. */
1900*a28cd43dSSascha Wildner     U[0] = T[n - 1];
1901*a28cd43dSSascha Wildner     for(i = 0; i < pidx; ++i) { U[i + 1] = (unsigned char)B[i]; }
1902*a28cd43dSSascha Wildner     for(i += 1; i < n; ++i) { U[i] = (unsigned char)B[i]; }
1903*a28cd43dSSascha Wildner     pidx += 1;
1904*a28cd43dSSascha Wildner   } else {
1905*a28cd43dSSascha Wildner     pidx = -2;
1906*a28cd43dSSascha Wildner   }
1907*a28cd43dSSascha Wildner 
1908*a28cd43dSSascha Wildner   free(bucket_B);
1909*a28cd43dSSascha Wildner   free(bucket_A);
1910*a28cd43dSSascha Wildner   if(A == NULL) { free(B); }
1911*a28cd43dSSascha Wildner 
1912*a28cd43dSSascha Wildner   return pidx;
1913*a28cd43dSSascha Wildner }
1914