xref: /netbsd-src/external/gpl3/gcc/dist/libphobos/src/std/internal/math/biguintnoasm.d (revision b1e838363e3c6fc78a55519254d99869742dd33c)
1181254a7Smrg /** Arbitrary precision arithmetic ('bignum') for processors with no asm support
2181254a7Smrg  *
3181254a7Smrg  * All functions operate on arrays of uints, stored LSB first.
4181254a7Smrg  * If there is a destination array, it will be the first parameter.
5181254a7Smrg  * Currently, all of these functions are subject to change, and are
6181254a7Smrg  * intended for internal use only.
7181254a7Smrg  * This module is intended only to assist development of high-speed routines
8181254a7Smrg  * on currently unsupported processors.
9181254a7Smrg  * The X86 asm version is about 30 times faster than the D version (DMD).
10181254a7Smrg  */
11181254a7Smrg 
12181254a7Smrg /*          Copyright Don Clugston 2008 - 2010.
13181254a7Smrg  * Distributed under the Boost Software License, Version 1.0.
14181254a7Smrg  *    (See accompanying file LICENSE_1_0.txt or copy at
15181254a7Smrg  *          http://www.boost.org/LICENSE_1_0.txt)
16181254a7Smrg  */
17181254a7Smrg 
18181254a7Smrg module std.internal.math.biguintnoasm;
19181254a7Smrg 
20181254a7Smrg nothrow:
21181254a7Smrg @safe:
22181254a7Smrg 
23181254a7Smrg public:
24181254a7Smrg alias BigDigit = uint; // A Bignum is an array of BigDigits.
25181254a7Smrg 
26181254a7Smrg     // Limits for when to switch between multiplication algorithms.
27181254a7Smrg enum int KARATSUBALIMIT = 10; // Minimum value for which Karatsuba is worthwhile.
28181254a7Smrg enum int KARATSUBASQUARELIMIT = 12; // Minimum value for which square Karatsuba is worthwhile
29181254a7Smrg 
30181254a7Smrg 
31181254a7Smrg /** Multi-byte addition or subtraction
32181254a7Smrg  *    dest[] = src1[] + src2[] + carry (0 or 1).
33181254a7Smrg  * or dest[] = src1[] - src2[] - carry (0 or 1).
34181254a7Smrg  * Returns carry or borrow (0 or 1).
35181254a7Smrg  * Set op == '+' for addition, '-' for subtraction.
36181254a7Smrg  */
multibyteAddSub(char op)37181254a7Smrg uint multibyteAddSub(char op)(uint[] dest, const(uint) [] src1,
38181254a7Smrg     const (uint) [] src2, uint carry) pure @nogc @safe
39181254a7Smrg {
40181254a7Smrg     ulong c = carry;
41181254a7Smrg     for (size_t i = 0; i < src2.length; ++i)
42181254a7Smrg     {
43181254a7Smrg         static if (op=='+') c = c  + src1[i] + src2[i];
44181254a7Smrg              else           c = cast(ulong) src1[i] - src2[i] - c;
45181254a7Smrg         dest[i] = cast(uint) c;
46181254a7Smrg         c = (c > 0xFFFF_FFFF);
47181254a7Smrg     }
48181254a7Smrg     return cast(uint) c;
49181254a7Smrg }
50181254a7Smrg 
51181254a7Smrg @safe unittest
52181254a7Smrg {
53181254a7Smrg     uint [] a = new uint[40];
54181254a7Smrg     uint [] b = new uint[40];
55181254a7Smrg     uint [] c = new uint[40];
56181254a7Smrg     for (size_t i = 0; i < a.length; ++i)
57181254a7Smrg     {
58181254a7Smrg         if (i&1) a[i]=cast(uint)(0x8000_0000 + i);
59181254a7Smrg         else a[i]=cast(uint) i;
60181254a7Smrg         b[i]= 0x8000_0003;
61181254a7Smrg     }
62181254a7Smrg     c[19]=0x3333_3333;
63181254a7Smrg     uint carry = multibyteAddSub!('+')(c[0 .. 18], b[0 .. 18], a[0 .. 18], 0);
64*b1e83836Smrg     assert(c[0]==0x8000_0003, "c[0] has invalid value");
65*b1e83836Smrg     assert(c[1]==4, "c[1] must be for");
66*b1e83836Smrg     assert(c[19]==0x3333_3333, "c[19] has invalid value"); // check for overrun
67*b1e83836Smrg     assert(carry == 1, "carry must be 1");
68181254a7Smrg     for (size_t i = 0; i < a.length; ++i)
69181254a7Smrg     {
70181254a7Smrg         a[i] = b[i] = c[i] = 0;
71181254a7Smrg     }
72181254a7Smrg     a[8]=0x048D159E;
73181254a7Smrg     b[8]=0x048D159E;
74181254a7Smrg     a[10]=0x1D950C84;
75181254a7Smrg     b[10]=0x1D950C84;
76181254a7Smrg     a[5] =0x44444444;
77181254a7Smrg     carry = multibyteAddSub!('-')(a[0 .. 12], a[0 .. 12], b[0 .. 12], 0);
78*b1e83836Smrg     assert(a[11] == 0, "a[11] must be 0");
79181254a7Smrg     for (size_t i = 0; i < 10; ++i)
80181254a7Smrg         if (i != 5)
81*b1e83836Smrg             assert(a[i] == 0, "a[1] must be 0");
82181254a7Smrg 
83181254a7Smrg     for (size_t q = 3; q < 36; ++q)
84181254a7Smrg     {
85181254a7Smrg         for (size_t i = 0; i< a.length; ++i)
86181254a7Smrg         {
87181254a7Smrg             a[i] = b[i] = c[i] = 0;
88181254a7Smrg         }
89181254a7Smrg         a[q-2]=0x040000;
90181254a7Smrg         b[q-2]=0x040000;
91181254a7Smrg        carry = multibyteAddSub!('-')(a[0 .. q], a[0 .. q], b[0 .. q], 0);
92*b1e83836Smrg        assert(a[q-2]==0, "a[q-2] must be 0");
93181254a7Smrg     }
94181254a7Smrg }
95181254a7Smrg 
96181254a7Smrg 
97181254a7Smrg 
98181254a7Smrg /** dest[] += carry, or dest[] -= carry.
99181254a7Smrg  *  op must be '+' or '-'
100181254a7Smrg  *  Returns final carry or borrow (0 or 1)
101181254a7Smrg  */
multibyteIncrementAssign(char op)102181254a7Smrg uint multibyteIncrementAssign(char op)(uint[] dest, uint carry)
103181254a7Smrg     pure @nogc @safe
104181254a7Smrg {
105181254a7Smrg     static if (op=='+')
106181254a7Smrg     {
107181254a7Smrg         ulong c = carry;
108181254a7Smrg         c += dest[0];
109181254a7Smrg         dest[0] = cast(uint) c;
110181254a7Smrg         if (c <= 0xFFFF_FFFF)
111181254a7Smrg             return 0;
112181254a7Smrg 
113181254a7Smrg         for (size_t i = 1; i < dest.length; ++i)
114181254a7Smrg         {
115181254a7Smrg             ++dest[i];
116181254a7Smrg             if (dest[i] != 0)
117181254a7Smrg                 return 0;
118181254a7Smrg         }
119181254a7Smrg         return 1;
120181254a7Smrg     }
121181254a7Smrg     else
122181254a7Smrg     {
123181254a7Smrg         ulong c = carry;
124181254a7Smrg         c = dest[0] - c;
125181254a7Smrg         dest[0] = cast(uint) c;
126181254a7Smrg         if (c <= 0xFFFF_FFFF)
127181254a7Smrg             return 0;
128181254a7Smrg         for (size_t i = 1; i < dest.length; ++i)
129181254a7Smrg         {
130181254a7Smrg             --dest[i];
131181254a7Smrg             if (dest[i] != 0xFFFF_FFFF)
132181254a7Smrg                 return 0;
133181254a7Smrg         }
134181254a7Smrg         return 1;
135181254a7Smrg     }
136181254a7Smrg }
137181254a7Smrg 
138181254a7Smrg /** dest[] = src[] << numbits
139181254a7Smrg  *  numbits must be in the range 1 .. 31
140181254a7Smrg  */
multibyteShl(uint[]dest,const (uint)[]src,uint numbits)141181254a7Smrg uint multibyteShl(uint [] dest, const(uint) [] src, uint numbits)
142181254a7Smrg     pure @nogc @safe
143181254a7Smrg {
144181254a7Smrg     ulong c = 0;
145181254a7Smrg     for (size_t i = 0; i < dest.length; ++i)
146181254a7Smrg     {
147181254a7Smrg         c += (cast(ulong)(src[i]) << numbits);
148181254a7Smrg         dest[i] = cast(uint) c;
149181254a7Smrg         c >>>= 32;
150181254a7Smrg    }
151181254a7Smrg    return cast(uint) c;
152181254a7Smrg }
153181254a7Smrg 
154181254a7Smrg 
155181254a7Smrg /** dest[] = src[] >> numbits
156181254a7Smrg  *  numbits must be in the range 1 .. 31
157181254a7Smrg  */
multibyteShr(uint[]dest,const (uint)[]src,uint numbits)158181254a7Smrg void multibyteShr(uint [] dest, const(uint) [] src, uint numbits)
159181254a7Smrg     pure @nogc @safe
160181254a7Smrg {
161181254a7Smrg     ulong c = 0;
162181254a7Smrg     for (ptrdiff_t i = dest.length; i != 0; --i)
163181254a7Smrg     {
164181254a7Smrg         c += (src[i-1] >>numbits) + (cast(ulong)(src[i-1]) << (64 - numbits));
165181254a7Smrg         dest[i-1] = cast(uint) c;
166181254a7Smrg         c >>>= 32;
167181254a7Smrg    }
168181254a7Smrg }
169181254a7Smrg 
170181254a7Smrg @safe unittest
171181254a7Smrg {
172181254a7Smrg 
173181254a7Smrg     uint [] aa = [0x1222_2223, 0x4555_5556, 0x8999_999A, 0xBCCC_CCCD, 0xEEEE_EEEE];
174181254a7Smrg     multibyteShr(aa[0..$-2], aa, 4);
175181254a7Smrg     assert(aa[0] == 0x6122_2222 && aa[1] == 0xA455_5555 && aa[2] == 0x0899_9999);
176181254a7Smrg     assert(aa[3] == 0xBCCC_CCCD);
177181254a7Smrg 
178181254a7Smrg     aa = [0x1222_2223, 0x4555_5556, 0x8999_999A, 0xBCCC_CCCD, 0xEEEE_EEEE];
179181254a7Smrg     multibyteShr(aa[0..$-1], aa, 4);
180181254a7Smrg     assert(aa[0] == 0x6122_2222 && aa[1] == 0xA455_5555
181181254a7Smrg         && aa[2] == 0xD899_9999 && aa[3] == 0x0BCC_CCCC);
182181254a7Smrg 
183181254a7Smrg     aa = [0xF0FF_FFFF, 0x1222_2223, 0x4555_5556, 0x8999_999A, 0xBCCC_CCCD,
184181254a7Smrg         0xEEEE_EEEE];
185181254a7Smrg     multibyteShl(aa[1 .. 4], aa[1..$], 4);
186181254a7Smrg     assert(aa[0] == 0xF0FF_FFFF && aa[1] == 0x2222_2230
187181254a7Smrg         && aa[2]==0x5555_5561 && aa[3]==0x9999_99A4 && aa[4]==0x0BCCC_CCCD);
188181254a7Smrg }
189181254a7Smrg 
190181254a7Smrg /** dest[] = src[] * multiplier + carry.
191181254a7Smrg  * Returns carry.
192181254a7Smrg  */
multibyteMul(uint[]dest,const (uint)[]src,uint multiplier,uint carry)193181254a7Smrg uint multibyteMul(uint[] dest, const(uint)[] src, uint multiplier, uint carry)
194181254a7Smrg     pure @nogc @safe
195181254a7Smrg {
196*b1e83836Smrg     assert(dest.length == src.length, "dest and src must have the same length");
197181254a7Smrg     ulong c = carry;
198181254a7Smrg     for (size_t i = 0; i < src.length; ++i)
199181254a7Smrg     {
200181254a7Smrg         c += cast(ulong)(src[i]) * multiplier;
201181254a7Smrg         dest[i] = cast(uint) c;
202181254a7Smrg         c>>=32;
203181254a7Smrg     }
204181254a7Smrg     return cast(uint) c;
205181254a7Smrg }
206181254a7Smrg 
207181254a7Smrg @safe unittest
208181254a7Smrg {
209181254a7Smrg     uint [] aa = [0xF0FF_FFFF, 0x1222_2223, 0x4555_5556, 0x8999_999A,
210181254a7Smrg         0xBCCC_CCCD, 0xEEEE_EEEE];
211181254a7Smrg     multibyteMul(aa[1 .. 4], aa[1 .. 4], 16, 0);
212181254a7Smrg     assert(aa[0] == 0xF0FF_FFFF && aa[1] == 0x2222_2230 && aa[2]==0x5555_5561
213181254a7Smrg         && aa[3]==0x9999_99A4 && aa[4]==0x0BCCC_CCCD);
214181254a7Smrg }
215181254a7Smrg 
216181254a7Smrg /**
217181254a7Smrg  * dest[] += src[] * multiplier + carry(0 .. FFFF_FFFF).
218181254a7Smrg  * Returns carry out of MSB (0 .. FFFF_FFFF).
219181254a7Smrg  */
multibyteMulAdd(char op)220181254a7Smrg uint multibyteMulAdd(char op)(uint [] dest, const(uint)[] src,
221181254a7Smrg     uint multiplier, uint carry) pure @nogc @safe
222181254a7Smrg {
223*b1e83836Smrg     assert(dest.length == src.length, "dest and src must have the same length");
224181254a7Smrg     ulong c = carry;
225181254a7Smrg     for (size_t i = 0; i < src.length; ++i)
226181254a7Smrg     {
227181254a7Smrg         static if (op=='+')
228181254a7Smrg         {
229181254a7Smrg             c += cast(ulong)(multiplier) * src[i]  + dest[i];
230181254a7Smrg             dest[i] = cast(uint) c;
231181254a7Smrg             c >>= 32;
232181254a7Smrg         }
233181254a7Smrg         else
234181254a7Smrg         {
235181254a7Smrg             c += cast(ulong) multiplier * src[i];
236181254a7Smrg             ulong t = cast(ulong) dest[i] - cast(uint) c;
237181254a7Smrg             dest[i] = cast(uint) t;
238181254a7Smrg             c = cast(uint)((c >> 32) - (t >> 32));
239181254a7Smrg         }
240181254a7Smrg     }
241181254a7Smrg     return cast(uint) c;
242181254a7Smrg }
243181254a7Smrg 
244181254a7Smrg @safe unittest
245181254a7Smrg {
246181254a7Smrg 
247181254a7Smrg     uint [] aa = [0xF0FF_FFFF, 0x1222_2223, 0x4555_5556, 0x8999_999A,
248181254a7Smrg         0xBCCC_CCCD, 0xEEEE_EEEE];
249181254a7Smrg     uint [] bb = [0x1234_1234, 0xF0F0_F0F0, 0x00C0_C0C0, 0xF0F0_F0F0,
250181254a7Smrg         0xC0C0_C0C0];
251181254a7Smrg     multibyteMulAdd!('+')(bb[1..$-1], aa[1..$-2], 16, 5);
252181254a7Smrg     assert(bb[0] == 0x1234_1234 && bb[4] == 0xC0C0_C0C0);
253181254a7Smrg     assert(bb[1] == 0x2222_2230 + 0xF0F0_F0F0 + 5
254181254a7Smrg         && bb[2] == 0x5555_5561 + 0x00C0_C0C0 + 1
255181254a7Smrg         && bb[3] == 0x9999_99A4 + 0xF0F0_F0F0 );
256181254a7Smrg }
257181254a7Smrg 
258181254a7Smrg 
259181254a7Smrg /**
260181254a7Smrg    Sets result = result[0 .. left.length] + left * right
261181254a7Smrg 
262181254a7Smrg    It is defined in this way to allow cache-efficient multiplication.
263181254a7Smrg    This function is equivalent to:
264181254a7Smrg     ----
265181254a7Smrg     for (size_t i = 0; i< right.length; ++i)
266181254a7Smrg     {
267181254a7Smrg         dest[left.length + i] = multibyteMulAdd(dest[i .. left.length+i],
268181254a7Smrg                 left, right[i], 0);
269181254a7Smrg     }
270181254a7Smrg     ----
271181254a7Smrg  */
multibyteMultiplyAccumulate(uint[]dest,const (uint)[]left,const (uint)[]right)272181254a7Smrg void multibyteMultiplyAccumulate(uint [] dest, const(uint)[] left, const(uint)
273181254a7Smrg         [] right) pure @nogc @safe
274181254a7Smrg {
275181254a7Smrg     for (size_t i = 0; i < right.length; ++i)
276181254a7Smrg     {
277181254a7Smrg         dest[left.length + i] = multibyteMulAdd!('+')(dest[i .. left.length+i],
278181254a7Smrg                 left, right[i], 0);
279181254a7Smrg     }
280181254a7Smrg }
281181254a7Smrg 
282181254a7Smrg /**  dest[] /= divisor.
283181254a7Smrg  * overflow is the initial remainder, and must be in the range 0 .. divisor-1.
284181254a7Smrg  */
multibyteDivAssign(uint[]dest,uint divisor,uint overflow)285181254a7Smrg uint multibyteDivAssign(uint [] dest, uint divisor, uint overflow)
286181254a7Smrg     pure @nogc @safe
287181254a7Smrg {
288181254a7Smrg     ulong c = cast(ulong) overflow;
289181254a7Smrg     for (ptrdiff_t i = dest.length-1; i >= 0; --i)
290181254a7Smrg     {
291181254a7Smrg         c = (c << 32) + cast(ulong)(dest[i]);
292181254a7Smrg         uint q = cast(uint)(c/divisor);
293181254a7Smrg         c -= divisor * q;
294181254a7Smrg         dest[i] = q;
295181254a7Smrg     }
296181254a7Smrg     return cast(uint) c;
297181254a7Smrg }
298181254a7Smrg 
299181254a7Smrg @safe unittest
300181254a7Smrg {
301181254a7Smrg     uint [] aa = new uint[101];
302181254a7Smrg     for (uint i = 0; i < aa.length; ++i)
303181254a7Smrg         aa[i] = 0x8765_4321 * (i+3);
304181254a7Smrg     uint overflow = multibyteMul(aa, aa, 0x8EFD_FCFB, 0x33FF_7461);
305181254a7Smrg     uint r = multibyteDivAssign(aa, 0x8EFD_FCFB, overflow);
306181254a7Smrg     for (uint i=0; i<aa.length; ++i)
307181254a7Smrg     {
308181254a7Smrg         assert(aa[i] == 0x8765_4321 * (i+3));
309181254a7Smrg     }
310181254a7Smrg     assert(r == 0x33FF_7461);
311181254a7Smrg 
312181254a7Smrg }
313181254a7Smrg // Set dest[2*i .. 2*i+1]+=src[i]*src[i]
multibyteAddDiagonalSquares(uint[]dest,const (uint)[]src)314181254a7Smrg void multibyteAddDiagonalSquares(uint[] dest, const(uint)[] src)
315181254a7Smrg     pure @nogc @safe
316181254a7Smrg {
317181254a7Smrg     ulong c = 0;
318181254a7Smrg     for (size_t i = 0; i < src.length; ++i)
319181254a7Smrg     {
320181254a7Smrg         // At this point, c is 0 or 1, since FFFF*FFFF+FFFF_FFFF = 1_0000_0000.
321181254a7Smrg         c += cast(ulong)(src[i]) * src[i] + dest[2*i];
322181254a7Smrg         dest[2*i] = cast(uint) c;
323181254a7Smrg         c = (c>>=32) + dest[2*i+1];
324181254a7Smrg         dest[2*i+1] = cast(uint) c;
325181254a7Smrg         c >>= 32;
326181254a7Smrg     }
327181254a7Smrg }
328181254a7Smrg 
329181254a7Smrg // Does half a square multiply. (square = diagonal + 2*triangle)
multibyteTriangleAccumulate(uint[]dest,const (uint)[]x)330181254a7Smrg void multibyteTriangleAccumulate(uint[] dest, const(uint)[] x)
331181254a7Smrg     pure @nogc @safe
332181254a7Smrg {
333181254a7Smrg     // x[0]*x[1...$] + x[1]*x[2..$] + ... + x[$-2]x[$-1..$]
334181254a7Smrg     dest[x.length] = multibyteMul(dest[1 .. x.length], x[1..$], x[0], 0);
335181254a7Smrg     if (x.length < 4)
336181254a7Smrg     {
337181254a7Smrg         if (x.length == 3)
338181254a7Smrg         {
339181254a7Smrg             ulong c = cast(ulong)(x[$-1]) * x[$-2]  + dest[2*x.length-3];
340181254a7Smrg             dest[2*x.length - 3] = cast(uint) c;
341181254a7Smrg             c >>= 32;
342181254a7Smrg             dest[2*x.length - 2] = cast(uint) c;
343181254a7Smrg         }
344181254a7Smrg         return;
345181254a7Smrg     }
346181254a7Smrg     for (size_t i = 2; i < x.length - 2; ++i)
347181254a7Smrg     {
348181254a7Smrg         dest[i-1+ x.length] = multibyteMulAdd!('+')(
349181254a7Smrg              dest[i+i-1 .. i+x.length-1], x[i..$], x[i-1], 0);
350181254a7Smrg     }
351181254a7Smrg         // Unroll the last two entries, to reduce loop overhead:
352181254a7Smrg     ulong  c = cast(ulong)(x[$-3]) * x[$-2] + dest[2*x.length-5];
353181254a7Smrg     dest[2*x.length-5] = cast(uint) c;
354181254a7Smrg     c >>= 32;
355181254a7Smrg     c += cast(ulong)(x[$-3]) * x[$-1] + dest[2*x.length-4];
356181254a7Smrg     dest[2*x.length-4] = cast(uint) c;
357181254a7Smrg     c >>= 32;
358181254a7Smrg     c += cast(ulong)(x[$-1]) * x[$-2];
359181254a7Smrg     dest[2*x.length-3] = cast(uint) c;
360181254a7Smrg     c >>= 32;
361181254a7Smrg     dest[2*x.length-2] = cast(uint) c;
362181254a7Smrg }
363181254a7Smrg 
multibyteSquare(BigDigit[]result,const (BigDigit)[]x)364181254a7Smrg void multibyteSquare(BigDigit[] result, const(BigDigit) [] x) pure @nogc @safe
365181254a7Smrg {
366181254a7Smrg     multibyteTriangleAccumulate(result, x);
367181254a7Smrg     result[$-1] = multibyteShl(result[1..$-1], result[1..$-1], 1); // mul by 2
368181254a7Smrg     result[0] = 0;
369181254a7Smrg     multibyteAddDiagonalSquares(result, x);
370181254a7Smrg }
371