xref: /netbsd-src/sys/external/isc/libsodium/dist/src/libsodium/crypto_core/ed25519/ref10/ed25519_ref10.c (revision 02ec6e2340107ffc512690e0a3a2be85eb0a42a9)
1 #include <stddef.h>
2 #include <stdint.h>
3 #include <stdlib.h>
4 #include <string.h>
5 
6 #include "crypto_verify_32.h"
7 #include "private/common.h"
8 #include "private/ed25519_ref10.h"
9 #include "utils.h"
10 
11 static inline uint64_t
load_3(const unsigned char * in)12 load_3(const unsigned char *in)
13 {
14     uint64_t result;
15 
16     result = (uint64_t) in[0];
17     result |= ((uint64_t) in[1]) << 8;
18     result |= ((uint64_t) in[2]) << 16;
19 
20     return result;
21 }
22 
23 static inline uint64_t
load_4(const unsigned char * in)24 load_4(const unsigned char *in)
25 {
26     uint64_t result;
27 
28     result = (uint64_t) in[0];
29     result |= ((uint64_t) in[1]) << 8;
30     result |= ((uint64_t) in[2]) << 16;
31     result |= ((uint64_t) in[3]) << 24;
32 
33     return result;
34 }
35 
36 /*
37  * Field arithmetic:
38  * Use 5*51 bit limbs on 64-bit systems with support for 128 bit arithmetic,
39  * and 10*25.5 bit limbs elsewhere.
40  *
41  * Functions used elsewhere that are candidates for inlining are defined
42  * via "private/curve25519_ref10.h".
43  */
44 
45 #ifdef HAVE_TI_MODE
46 # include "fe_51/constants.h"
47 # include "fe_51/fe.h"
48 #else
49 # include "fe_25_5/constants.h"
50 # include "fe_25_5/fe.h"
51 #endif
52 
53 void
fe25519_invert(fe25519 out,const fe25519 z)54 fe25519_invert(fe25519 out, const fe25519 z)
55 {
56     fe25519 t0;
57     fe25519 t1;
58     fe25519 t2;
59     fe25519 t3;
60     int     i;
61 
62     fe25519_sq(t0, z);
63     fe25519_sq(t1, t0);
64     fe25519_sq(t1, t1);
65     fe25519_mul(t1, z, t1);
66     fe25519_mul(t0, t0, t1);
67     fe25519_sq(t2, t0);
68     fe25519_mul(t1, t1, t2);
69     fe25519_sq(t2, t1);
70     for (i = 1; i < 5; ++i) {
71         fe25519_sq(t2, t2);
72     }
73     fe25519_mul(t1, t2, t1);
74     fe25519_sq(t2, t1);
75     for (i = 1; i < 10; ++i) {
76         fe25519_sq(t2, t2);
77     }
78     fe25519_mul(t2, t2, t1);
79     fe25519_sq(t3, t2);
80     for (i = 1; i < 20; ++i) {
81         fe25519_sq(t3, t3);
82     }
83     fe25519_mul(t2, t3, t2);
84     fe25519_sq(t2, t2);
85     for (i = 1; i < 10; ++i) {
86         fe25519_sq(t2, t2);
87     }
88     fe25519_mul(t1, t2, t1);
89     fe25519_sq(t2, t1);
90     for (i = 1; i < 50; ++i) {
91         fe25519_sq(t2, t2);
92     }
93     fe25519_mul(t2, t2, t1);
94     fe25519_sq(t3, t2);
95     for (i = 1; i < 100; ++i) {
96         fe25519_sq(t3, t3);
97     }
98     fe25519_mul(t2, t3, t2);
99     fe25519_sq(t2, t2);
100     for (i = 1; i < 50; ++i) {
101         fe25519_sq(t2, t2);
102     }
103     fe25519_mul(t1, t2, t1);
104     fe25519_sq(t1, t1);
105     for (i = 1; i < 5; ++i) {
106         fe25519_sq(t1, t1);
107     }
108     fe25519_mul(out, t1, t0);
109 }
110 
111 static void
fe25519_pow22523(fe25519 out,const fe25519 z)112 fe25519_pow22523(fe25519 out, const fe25519 z)
113 {
114     fe25519 t0;
115     fe25519 t1;
116     fe25519 t2;
117     int     i;
118 
119     fe25519_sq(t0, z);
120     fe25519_sq(t1, t0);
121     fe25519_sq(t1, t1);
122     fe25519_mul(t1, z, t1);
123     fe25519_mul(t0, t0, t1);
124     fe25519_sq(t0, t0);
125     fe25519_mul(t0, t1, t0);
126     fe25519_sq(t1, t0);
127     for (i = 1; i < 5; ++i) {
128         fe25519_sq(t1, t1);
129     }
130     fe25519_mul(t0, t1, t0);
131     fe25519_sq(t1, t0);
132     for (i = 1; i < 10; ++i) {
133         fe25519_sq(t1, t1);
134     }
135     fe25519_mul(t1, t1, t0);
136     fe25519_sq(t2, t1);
137     for (i = 1; i < 20; ++i) {
138         fe25519_sq(t2, t2);
139     }
140     fe25519_mul(t1, t2, t1);
141     fe25519_sq(t1, t1);
142     for (i = 1; i < 10; ++i) {
143         fe25519_sq(t1, t1);
144     }
145     fe25519_mul(t0, t1, t0);
146     fe25519_sq(t1, t0);
147     for (i = 1; i < 50; ++i) {
148         fe25519_sq(t1, t1);
149     }
150     fe25519_mul(t1, t1, t0);
151     fe25519_sq(t2, t1);
152     for (i = 1; i < 100; ++i) {
153         fe25519_sq(t2, t2);
154     }
155     fe25519_mul(t1, t2, t1);
156     fe25519_sq(t1, t1);
157     for (i = 1; i < 50; ++i) {
158         fe25519_sq(t1, t1);
159     }
160     fe25519_mul(t0, t1, t0);
161     fe25519_sq(t0, t0);
162     fe25519_sq(t0, t0);
163     fe25519_mul(out, t0, z);
164 }
165 
166 /*
167  r = p + q
168  */
169 
170 void
ge25519_add(ge25519_p1p1 * r,const ge25519_p3 * p,const ge25519_cached * q)171 ge25519_add(ge25519_p1p1 *r, const ge25519_p3 *p, const ge25519_cached *q)
172 {
173     fe25519 t0;
174 
175     fe25519_add(r->X, p->Y, p->X);
176     fe25519_sub(r->Y, p->Y, p->X);
177     fe25519_mul(r->Z, r->X, q->YplusX);
178     fe25519_mul(r->Y, r->Y, q->YminusX);
179     fe25519_mul(r->T, q->T2d, p->T);
180     fe25519_mul(r->X, p->Z, q->Z);
181     fe25519_add(t0, r->X, r->X);
182     fe25519_sub(r->X, r->Z, r->Y);
183     fe25519_add(r->Y, r->Z, r->Y);
184     fe25519_add(r->Z, t0, r->T);
185     fe25519_sub(r->T, t0, r->T);
186 }
187 
188 static void
slide_vartime(signed char * r,const unsigned char * a)189 slide_vartime(signed char *r, const unsigned char *a)
190 {
191     int i;
192     int b;
193     int k;
194     int ribs;
195     int cmp;
196 
197     for (i = 0; i < 256; ++i) {
198         r[i] = 1 & (a[i >> 3] >> (i & 7));
199     }
200     for (i = 0; i < 256; ++i) {
201         if (! r[i]) {
202             continue;
203         }
204         for (b = 1; b <= 6 && i + b < 256; ++b) {
205             if (! r[i + b]) {
206                 continue;
207             }
208             ribs = r[i + b] << b;
209             cmp = r[i] + ribs;
210             if (cmp <= 15) {
211                 r[i] = cmp;
212                 r[i + b] = 0;
213             } else {
214                 cmp = r[i] - ribs;
215                 if (cmp < -15) {
216                     break;
217                 }
218                 r[i] = cmp;
219                 for (k = i + b; k < 256; ++k) {
220                     if (! r[k]) {
221                         r[k] = 1;
222                         break;
223                     }
224                     r[k] = 0;
225                 }
226             }
227         }
228     }
229 }
230 
231 int
ge25519_frombytes(ge25519_p3 * h,const unsigned char * s)232 ge25519_frombytes(ge25519_p3 *h, const unsigned char *s)
233 {
234     fe25519 u;
235     fe25519 v;
236     fe25519 v3;
237     fe25519 vxx;
238     fe25519 m_root_check, p_root_check;
239     fe25519 negx;
240     fe25519 x_sqrtm1;
241     int     has_m_root, has_p_root;
242 
243     fe25519_frombytes(h->Y, s);
244     fe25519_1(h->Z);
245     fe25519_sq(u, h->Y);
246     fe25519_mul(v, u, d);
247     fe25519_sub(u, u, h->Z); /* u = y^2-1 */
248     fe25519_add(v, v, h->Z); /* v = dy^2+1 */
249 
250     fe25519_sq(v3, v);
251     fe25519_mul(v3, v3, v); /* v3 = v^3 */
252     fe25519_sq(h->X, v3);
253     fe25519_mul(h->X, h->X, v);
254     fe25519_mul(h->X, h->X, u); /* x = uv^7 */
255 
256     fe25519_pow22523(h->X, h->X); /* x = (uv^7)^((q-5)/8) */
257     fe25519_mul(h->X, h->X, v3);
258     fe25519_mul(h->X, h->X, u); /* x = uv^3(uv^7)^((q-5)/8) */
259 
260     fe25519_sq(vxx, h->X);
261     fe25519_mul(vxx, vxx, v);
262     fe25519_sub(m_root_check, vxx, u); /* vx^2-u */
263     fe25519_add(p_root_check, vxx, u); /* vx^2+u */
264     has_m_root = fe25519_iszero(m_root_check);
265     has_p_root = fe25519_iszero(p_root_check);
266     fe25519_mul(x_sqrtm1, h->X, sqrtm1); /* x*sqrt(-1) */
267     fe25519_cmov(h->X, x_sqrtm1, 1 - has_m_root);
268 
269     fe25519_neg(negx, h->X);
270     fe25519_cmov(h->X, negx, fe25519_isnegative(h->X) ^ (s[31] >> 7));
271     fe25519_mul(h->T, h->X, h->Y);
272 
273     return (has_m_root | has_p_root) - 1;
274 }
275 
276 int
ge25519_frombytes_negate_vartime(ge25519_p3 * h,const unsigned char * s)277 ge25519_frombytes_negate_vartime(ge25519_p3 *h, const unsigned char *s)
278 {
279     fe25519 u;
280     fe25519 v;
281     fe25519 v3;
282     fe25519 vxx;
283     fe25519 m_root_check, p_root_check;
284 
285     fe25519_frombytes(h->Y, s);
286     fe25519_1(h->Z);
287     fe25519_sq(u, h->Y);
288     fe25519_mul(v, u, d);
289     fe25519_sub(u, u, h->Z); /* u = y^2-1 */
290     fe25519_add(v, v, h->Z); /* v = dy^2+1 */
291 
292     fe25519_sq(v3, v);
293     fe25519_mul(v3, v3, v); /* v3 = v^3 */
294     fe25519_sq(h->X, v3);
295     fe25519_mul(h->X, h->X, v);
296     fe25519_mul(h->X, h->X, u); /* x = uv^7 */
297 
298     fe25519_pow22523(h->X, h->X); /* x = (uv^7)^((q-5)/8) */
299     fe25519_mul(h->X, h->X, v3);
300     fe25519_mul(h->X, h->X, u); /* x = uv^3(uv^7)^((q-5)/8) */
301 
302     fe25519_sq(vxx, h->X);
303     fe25519_mul(vxx, vxx, v);
304     fe25519_sub(m_root_check, vxx, u); /* vx^2-u */
305     if (fe25519_iszero(m_root_check) == 0) {
306         fe25519_add(p_root_check, vxx, u); /* vx^2+u */
307         if (fe25519_iszero(p_root_check) == 0) {
308             return -1;
309         }
310         fe25519_mul(h->X, h->X, sqrtm1);
311     }
312 
313     if (fe25519_isnegative(h->X) == (s[31] >> 7)) {
314         fe25519_neg(h->X, h->X);
315     }
316     fe25519_mul(h->T, h->X, h->Y);
317 
318     return 0;
319 }
320 
321 /*
322  r = p + q
323  */
324 
325 static void
ge25519_madd(ge25519_p1p1 * r,const ge25519_p3 * p,const ge25519_precomp * q)326 ge25519_madd(ge25519_p1p1 *r, const ge25519_p3 *p, const ge25519_precomp *q)
327 {
328     fe25519 t0;
329 
330     fe25519_add(r->X, p->Y, p->X);
331     fe25519_sub(r->Y, p->Y, p->X);
332     fe25519_mul(r->Z, r->X, q->yplusx);
333     fe25519_mul(r->Y, r->Y, q->yminusx);
334     fe25519_mul(r->T, q->xy2d, p->T);
335     fe25519_add(t0, p->Z, p->Z);
336     fe25519_sub(r->X, r->Z, r->Y);
337     fe25519_add(r->Y, r->Z, r->Y);
338     fe25519_add(r->Z, t0, r->T);
339     fe25519_sub(r->T, t0, r->T);
340 }
341 
342 /*
343  r = p - q
344  */
345 
346 static void
ge25519_msub(ge25519_p1p1 * r,const ge25519_p3 * p,const ge25519_precomp * q)347 ge25519_msub(ge25519_p1p1 *r, const ge25519_p3 *p, const ge25519_precomp *q)
348 {
349     fe25519 t0;
350 
351     fe25519_add(r->X, p->Y, p->X);
352     fe25519_sub(r->Y, p->Y, p->X);
353     fe25519_mul(r->Z, r->X, q->yminusx);
354     fe25519_mul(r->Y, r->Y, q->yplusx);
355     fe25519_mul(r->T, q->xy2d, p->T);
356     fe25519_add(t0, p->Z, p->Z);
357     fe25519_sub(r->X, r->Z, r->Y);
358     fe25519_add(r->Y, r->Z, r->Y);
359     fe25519_sub(r->Z, t0, r->T);
360     fe25519_add(r->T, t0, r->T);
361 }
362 
363 /*
364  r = p
365  */
366 
367 void
ge25519_p1p1_to_p2(ge25519_p2 * r,const ge25519_p1p1 * p)368 ge25519_p1p1_to_p2(ge25519_p2 *r, const ge25519_p1p1 *p)
369 {
370     fe25519_mul(r->X, p->X, p->T);
371     fe25519_mul(r->Y, p->Y, p->Z);
372     fe25519_mul(r->Z, p->Z, p->T);
373 }
374 
375 /*
376  r = p
377  */
378 
379 void
ge25519_p1p1_to_p3(ge25519_p3 * r,const ge25519_p1p1 * p)380 ge25519_p1p1_to_p3(ge25519_p3 *r, const ge25519_p1p1 *p)
381 {
382     fe25519_mul(r->X, p->X, p->T);
383     fe25519_mul(r->Y, p->Y, p->Z);
384     fe25519_mul(r->Z, p->Z, p->T);
385     fe25519_mul(r->T, p->X, p->Y);
386 }
387 
388 static void
ge25519_p2_0(ge25519_p2 * h)389 ge25519_p2_0(ge25519_p2 *h)
390 {
391     fe25519_0(h->X);
392     fe25519_1(h->Y);
393     fe25519_1(h->Z);
394 }
395 
396 /*
397  r = 2 * p
398  */
399 
400 static void
ge25519_p2_dbl(ge25519_p1p1 * r,const ge25519_p2 * p)401 ge25519_p2_dbl(ge25519_p1p1 *r, const ge25519_p2 *p)
402 {
403     fe25519 t0;
404 
405     fe25519_sq(r->X, p->X);
406     fe25519_sq(r->Z, p->Y);
407     fe25519_sq2(r->T, p->Z);
408     fe25519_add(r->Y, p->X, p->Y);
409     fe25519_sq(t0, r->Y);
410     fe25519_add(r->Y, r->Z, r->X);
411     fe25519_sub(r->Z, r->Z, r->X);
412     fe25519_sub(r->X, t0, r->Y);
413     fe25519_sub(r->T, r->T, r->Z);
414 }
415 
416 static void
ge25519_p3_0(ge25519_p3 * h)417 ge25519_p3_0(ge25519_p3 *h)
418 {
419     fe25519_0(h->X);
420     fe25519_1(h->Y);
421     fe25519_1(h->Z);
422     fe25519_0(h->T);
423 }
424 
425 static void
ge25519_cached_0(ge25519_cached * h)426 ge25519_cached_0(ge25519_cached *h)
427 {
428     fe25519_1(h->YplusX);
429     fe25519_1(h->YminusX);
430     fe25519_1(h->Z);
431     fe25519_0(h->T2d);
432 }
433 
434 /*
435  r = p
436  */
437 
438 void
ge25519_p3_to_cached(ge25519_cached * r,const ge25519_p3 * p)439 ge25519_p3_to_cached(ge25519_cached *r, const ge25519_p3 *p)
440 {
441     fe25519_add(r->YplusX, p->Y, p->X);
442     fe25519_sub(r->YminusX, p->Y, p->X);
443     fe25519_copy(r->Z, p->Z);
444     fe25519_mul(r->T2d, p->T, d2);
445 }
446 
447 static void
ge25519_p3_to_precomp(ge25519_precomp * pi,const ge25519_p3 * p)448 ge25519_p3_to_precomp(ge25519_precomp *pi, const ge25519_p3 *p)
449 {
450     fe25519 recip;
451     fe25519 x;
452     fe25519 y;
453     fe25519 xy;
454 
455     fe25519_invert(recip, p->Z);
456     fe25519_mul(x, p->X, recip);
457     fe25519_mul(y, p->Y, recip);
458     fe25519_add(pi->yplusx, y, x);
459     fe25519_sub(pi->yminusx, y, x);
460     fe25519_mul(xy, x, y);
461     fe25519_mul(pi->xy2d, xy, d2);
462 }
463 
464 /*
465  r = p
466  */
467 
468 static void
ge25519_p3_to_p2(ge25519_p2 * r,const ge25519_p3 * p)469 ge25519_p3_to_p2(ge25519_p2 *r, const ge25519_p3 *p)
470 {
471     fe25519_copy(r->X, p->X);
472     fe25519_copy(r->Y, p->Y);
473     fe25519_copy(r->Z, p->Z);
474 }
475 
476 void
ge25519_p3_tobytes(unsigned char * s,const ge25519_p3 * h)477 ge25519_p3_tobytes(unsigned char *s, const ge25519_p3 *h)
478 {
479     fe25519 recip;
480     fe25519 x;
481     fe25519 y;
482 
483     fe25519_invert(recip, h->Z);
484     fe25519_mul(x, h->X, recip);
485     fe25519_mul(y, h->Y, recip);
486     fe25519_tobytes(s, y);
487     s[31] ^= fe25519_isnegative(x) << 7;
488 }
489 
490 /*
491  r = 2 * p
492  */
493 
494 static void
ge25519_p3_dbl(ge25519_p1p1 * r,const ge25519_p3 * p)495 ge25519_p3_dbl(ge25519_p1p1 *r, const ge25519_p3 *p)
496 {
497     ge25519_p2 q;
498     ge25519_p3_to_p2(&q, p);
499     ge25519_p2_dbl(r, &q);
500 }
501 
502 static void
ge25519_precomp_0(ge25519_precomp * h)503 ge25519_precomp_0(ge25519_precomp *h)
504 {
505     fe25519_1(h->yplusx);
506     fe25519_1(h->yminusx);
507     fe25519_0(h->xy2d);
508 }
509 
510 static unsigned char
equal(signed char b,signed char c)511 equal(signed char b, signed char c)
512 {
513     unsigned char ub = b;
514     unsigned char uc = c;
515     unsigned char x  = ub ^ uc; /* 0: yes; 1..255: no */
516     uint32_t      y  = x;       /* 0: yes; 1..255: no */
517 
518     y -= 1;   /* 4294967295: yes; 0..254: no */
519     y >>= 31; /* 1: yes; 0: no */
520 
521     return y;
522 }
523 
524 static unsigned char
negative(signed char b)525 negative(signed char b)
526 {
527     /* 18446744073709551361..18446744073709551615: yes; 0..255: no */
528     uint64_t x = b;
529 
530     x >>= 63; /* 1: yes; 0: no */
531 
532     return x;
533 }
534 
535 static void
ge25519_cmov(ge25519_precomp * t,const ge25519_precomp * u,unsigned char b)536 ge25519_cmov(ge25519_precomp *t, const ge25519_precomp *u, unsigned char b)
537 {
538     fe25519_cmov(t->yplusx, u->yplusx, b);
539     fe25519_cmov(t->yminusx, u->yminusx, b);
540     fe25519_cmov(t->xy2d, u->xy2d, b);
541 }
542 
543 static void
ge25519_cmov_cached(ge25519_cached * t,const ge25519_cached * u,unsigned char b)544 ge25519_cmov_cached(ge25519_cached *t, const ge25519_cached *u, unsigned char b)
545 {
546     fe25519_cmov(t->YplusX, u->YplusX, b);
547     fe25519_cmov(t->YminusX, u->YminusX, b);
548     fe25519_cmov(t->Z, u->Z, b);
549     fe25519_cmov(t->T2d, u->T2d, b);
550 }
551 
552 static void
ge25519_select(ge25519_precomp * t,const ge25519_precomp precomp[8],const signed char b)553 ge25519_select(ge25519_precomp *t, const ge25519_precomp precomp[8], const signed char b)
554 {
555     ge25519_precomp     minust;
556     const unsigned char bnegative = negative(b);
557     const unsigned char babs      = b - (((-bnegative) & b) * ((signed char) 1 << 1));
558 
559     ge25519_precomp_0(t);
560     ge25519_cmov(t, &precomp[0], equal(babs, 1));
561     ge25519_cmov(t, &precomp[1], equal(babs, 2));
562     ge25519_cmov(t, &precomp[2], equal(babs, 3));
563     ge25519_cmov(t, &precomp[3], equal(babs, 4));
564     ge25519_cmov(t, &precomp[4], equal(babs, 5));
565     ge25519_cmov(t, &precomp[5], equal(babs, 6));
566     ge25519_cmov(t, &precomp[6], equal(babs, 7));
567     ge25519_cmov(t, &precomp[7], equal(babs, 8));
568     fe25519_copy(minust.yplusx, t->yminusx);
569     fe25519_copy(minust.yminusx, t->yplusx);
570     fe25519_neg(minust.xy2d, t->xy2d);
571     ge25519_cmov(t, &minust, bnegative);
572 }
573 
574 static void
ge25519_select_base(ge25519_precomp * t,const int pos,const signed char b)575 ge25519_select_base(ge25519_precomp *t, const int pos, const signed char b)
576 {
577     static const ge25519_precomp base[32][8] = { /* base[i][j] = (j+1)*256^i*B */
578 #ifdef HAVE_TI_MODE
579 # include "fe_51/base.h"
580 #else
581 # include "fe_25_5/base.h"
582 #endif
583     };
584     ge25519_select(t, base[pos], b);
585 }
586 
587 static void
ge25519_select_cached(ge25519_cached * t,const ge25519_cached cached[8],const signed char b)588 ge25519_select_cached(ge25519_cached *t, const ge25519_cached cached[8], const signed char b)
589 {
590     ge25519_cached      minust;
591     const unsigned char bnegative = negative(b);
592     const unsigned char babs      = b - (((-bnegative) & b) * ((signed char) 1 << 1));
593 
594     ge25519_cached_0(t);
595     ge25519_cmov_cached(t, &cached[0], equal(babs, 1));
596     ge25519_cmov_cached(t, &cached[1], equal(babs, 2));
597     ge25519_cmov_cached(t, &cached[2], equal(babs, 3));
598     ge25519_cmov_cached(t, &cached[3], equal(babs, 4));
599     ge25519_cmov_cached(t, &cached[4], equal(babs, 5));
600     ge25519_cmov_cached(t, &cached[5], equal(babs, 6));
601     ge25519_cmov_cached(t, &cached[6], equal(babs, 7));
602     ge25519_cmov_cached(t, &cached[7], equal(babs, 8));
603     fe25519_copy(minust.YplusX, t->YminusX);
604     fe25519_copy(minust.YminusX, t->YplusX);
605     fe25519_copy(minust.Z, t->Z);
606     fe25519_neg(minust.T2d, t->T2d);
607     ge25519_cmov_cached(t, &minust, bnegative);
608 }
609 
610 /*
611  r = p - q
612  */
613 
614 void
ge25519_sub(ge25519_p1p1 * r,const ge25519_p3 * p,const ge25519_cached * q)615 ge25519_sub(ge25519_p1p1 *r, const ge25519_p3 *p, const ge25519_cached *q)
616 {
617     fe25519 t0;
618 
619     fe25519_add(r->X, p->Y, p->X);
620     fe25519_sub(r->Y, p->Y, p->X);
621     fe25519_mul(r->Z, r->X, q->YminusX);
622     fe25519_mul(r->Y, r->Y, q->YplusX);
623     fe25519_mul(r->T, q->T2d, p->T);
624     fe25519_mul(r->X, p->Z, q->Z);
625     fe25519_add(t0, r->X, r->X);
626     fe25519_sub(r->X, r->Z, r->Y);
627     fe25519_add(r->Y, r->Z, r->Y);
628     fe25519_sub(r->Z, t0, r->T);
629     fe25519_add(r->T, t0, r->T);
630 }
631 
632 void
ge25519_tobytes(unsigned char * s,const ge25519_p2 * h)633 ge25519_tobytes(unsigned char *s, const ge25519_p2 *h)
634 {
635     fe25519 recip;
636     fe25519 x;
637     fe25519 y;
638 
639     fe25519_invert(recip, h->Z);
640     fe25519_mul(x, h->X, recip);
641     fe25519_mul(y, h->Y, recip);
642     fe25519_tobytes(s, y);
643     s[31] ^= fe25519_isnegative(x) << 7;
644 }
645 
646 /*
647  r = a * A + b * B
648  where a = a[0]+256*a[1]+...+256^31 a[31].
649  and b = b[0]+256*b[1]+...+256^31 b[31].
650  B is the Ed25519 base point (x,4/5) with x positive.
651 
652  Only used for signatures verification.
653  */
654 
655 void
ge25519_double_scalarmult_vartime(ge25519_p2 * r,const unsigned char * a,const ge25519_p3 * A,const unsigned char * b)656 ge25519_double_scalarmult_vartime(ge25519_p2 *r, const unsigned char *a,
657                                   const ge25519_p3 *A, const unsigned char *b)
658 {
659     static const ge25519_precomp Bi[8] = {
660 #ifdef HAVE_TI_MODE
661 # include "fe_51/base2.h"
662 #else
663 # include "fe_25_5/base2.h"
664 #endif
665     };
666     signed char    aslide[256];
667     signed char    bslide[256];
668     ge25519_cached Ai[8]; /* A,3A,5A,7A,9A,11A,13A,15A */
669     ge25519_p1p1   t;
670     ge25519_p3     u;
671     ge25519_p3     A2;
672     int            i;
673 
674     slide_vartime(aslide, a);
675     slide_vartime(bslide, b);
676 
677     ge25519_p3_to_cached(&Ai[0], A);
678 
679     ge25519_p3_dbl(&t, A);
680     ge25519_p1p1_to_p3(&A2, &t);
681 
682     ge25519_add(&t, &A2, &Ai[0]);
683     ge25519_p1p1_to_p3(&u, &t);
684     ge25519_p3_to_cached(&Ai[1], &u);
685 
686     ge25519_add(&t, &A2, &Ai[1]);
687     ge25519_p1p1_to_p3(&u, &t);
688     ge25519_p3_to_cached(&Ai[2], &u);
689 
690     ge25519_add(&t, &A2, &Ai[2]);
691     ge25519_p1p1_to_p3(&u, &t);
692     ge25519_p3_to_cached(&Ai[3], &u);
693 
694     ge25519_add(&t, &A2, &Ai[3]);
695     ge25519_p1p1_to_p3(&u, &t);
696     ge25519_p3_to_cached(&Ai[4], &u);
697 
698     ge25519_add(&t, &A2, &Ai[4]);
699     ge25519_p1p1_to_p3(&u, &t);
700     ge25519_p3_to_cached(&Ai[5], &u);
701 
702     ge25519_add(&t, &A2, &Ai[5]);
703     ge25519_p1p1_to_p3(&u, &t);
704     ge25519_p3_to_cached(&Ai[6], &u);
705 
706     ge25519_add(&t, &A2, &Ai[6]);
707     ge25519_p1p1_to_p3(&u, &t);
708     ge25519_p3_to_cached(&Ai[7], &u);
709 
710     ge25519_p2_0(r);
711 
712     for (i = 255; i >= 0; --i) {
713         if (aslide[i] || bslide[i]) {
714             break;
715         }
716     }
717 
718     for (; i >= 0; --i) {
719         ge25519_p2_dbl(&t, r);
720 
721         if (aslide[i] > 0) {
722             ge25519_p1p1_to_p3(&u, &t);
723             ge25519_add(&t, &u, &Ai[aslide[i] / 2]);
724         } else if (aslide[i] < 0) {
725             ge25519_p1p1_to_p3(&u, &t);
726             ge25519_sub(&t, &u, &Ai[(-aslide[i]) / 2]);
727         }
728 
729         if (bslide[i] > 0) {
730             ge25519_p1p1_to_p3(&u, &t);
731             ge25519_madd(&t, &u, &Bi[bslide[i] / 2]);
732         } else if (bslide[i] < 0) {
733             ge25519_p1p1_to_p3(&u, &t);
734             ge25519_msub(&t, &u, &Bi[(-bslide[i]) / 2]);
735         }
736 
737         ge25519_p1p1_to_p2(r, &t);
738     }
739 }
740 
741 /*
742  h = a * p
743  where a = a[0]+256*a[1]+...+256^31 a[31]
744 
745  Preconditions:
746  a[31] <= 127
747 
748  p is public
749  */
750 
751 static void __noinline
ge25519_scalarmult_cache(ge25519_cached pi[static8],const ge25519_p3 * p)752 ge25519_scalarmult_cache(ge25519_cached pi[static 8], const ge25519_p3 *p)
753 {
754     ge25519_p1p1    t1;
755     ge25519_p3      p2, p3, p4, pt;
756 
757     ge25519_p3_to_cached(&pi[1 - 1], p);   /* p */
758 
759     ge25519_p3_dbl(&t1, p);
760     ge25519_p1p1_to_p3(&p2, &t1);
761     ge25519_p3_to_cached(&pi[2 - 1], &p2); /* 2p = 2*p */
762 
763     ge25519_add(&t1, p, &pi[2 - 1]);
764     ge25519_p1p1_to_p3(&p3, &t1);
765     ge25519_p3_to_cached(&pi[3 - 1], &p3); /* 3p = 2p+p */
766 
767     ge25519_p3_dbl(&t1, &p2);
768     ge25519_p1p1_to_p3(&p4, &t1);
769     ge25519_p3_to_cached(&pi[4 - 1], &p4); /* 4p = 2*2p */
770 
771     ge25519_add(&t1, p, &pi[4 - 1]);
772     ge25519_p1p1_to_p3(&pt, &t1);
773     ge25519_p3_to_cached(&pi[5 - 1], &pt); /* 5p = 4p+p */
774 
775     ge25519_p3_dbl(&t1, &p3);
776     ge25519_p1p1_to_p3(&pt, &t1);
777     ge25519_p3_to_cached(&pi[6 - 1], &pt); /* 6p = 2*3p */
778 
779     ge25519_add(&t1, p, &pi[6 - 1]);
780     ge25519_p1p1_to_p3(&pt, &t1);
781     ge25519_p3_to_cached(&pi[7 - 1], &pt); /* 7p = 6p+p */
782 
783     ge25519_p3_dbl(&t1, &p4);
784     ge25519_p1p1_to_p3(&pt, &t1);
785     ge25519_p3_to_cached(&pi[8 - 1], &pt); /* 8p = 2*4p */
786 }
787 
788 static void __noinline
ge25519_scalarmult_cached(ge25519_p3 * h,const unsigned char * a,const ge25519_p3 * p,const ge25519_cached pi[static8])789 ge25519_scalarmult_cached(ge25519_p3 *h, const unsigned char *a,
790     const ge25519_p3 *p, const ge25519_cached pi[static 8])
791 {
792     signed char     e[64];
793     signed char     carry;
794     ge25519_p1p1    r;
795     ge25519_p2      s;
796     ge25519_cached  t;
797     int             i;
798 
799     for (i = 0; i < 32; ++i) {
800         e[2 * i + 0] = (a[i] >> 0) & 15;
801         e[2 * i + 1] = (a[i] >> 4) & 15;
802     }
803     /* each e[i] is between 0 and 15 */
804     /* e[63] is between 0 and 7 */
805 
806     carry = 0;
807     for (i = 0; i < 63; ++i) {
808         e[i] += carry;
809         carry = e[i] + 8;
810         carry >>= 4;
811         e[i] -= carry * ((signed char) 1 << 4);
812     }
813     e[63] += carry;
814     /* each e[i] is between -8 and 8 */
815 
816     ge25519_p3_0(h);
817 
818     for (i = 63; i != 0; i--) {
819         ge25519_select_cached(&t, pi, e[i]);
820         ge25519_add(&r, h, &t);
821 
822         ge25519_p1p1_to_p2(&s, &r);
823         ge25519_p2_dbl(&r, &s);
824         ge25519_p1p1_to_p2(&s, &r);
825         ge25519_p2_dbl(&r, &s);
826         ge25519_p1p1_to_p2(&s, &r);
827         ge25519_p2_dbl(&r, &s);
828         ge25519_p1p1_to_p2(&s, &r);
829         ge25519_p2_dbl(&r, &s);
830 
831         ge25519_p1p1_to_p3(h, &r);  /* *16 */
832     }
833     ge25519_select_cached(&t, pi, e[i]);
834     ge25519_add(&r, h, &t);
835 
836     ge25519_p1p1_to_p3(h, &r);
837 }
838 
839 void
ge25519_scalarmult(ge25519_p3 * h,const unsigned char * a,const ge25519_p3 * p)840 ge25519_scalarmult(ge25519_p3 *h, const unsigned char *a, const ge25519_p3 *p)
841 {
842     ge25519_cached  pi[8];
843 
844     ge25519_scalarmult_cache(pi, p);
845     ge25519_scalarmult_cached(h, a, p, pi);
846 }
847 
848 /*
849  h = a * B (with precomputation)
850  where a = a[0]+256*a[1]+...+256^31 a[31]
851  B is the Ed25519 base point (x,4/5) with x positive
852  (as bytes: 0x5866666666666666666666666666666666666666666666666666666666666666)
853 
854  Preconditions:
855  a[31] <= 127
856  */
857 
858 void
ge25519_scalarmult_base(ge25519_p3 * h,const unsigned char * a)859 ge25519_scalarmult_base(ge25519_p3 *h, const unsigned char *a)
860 {
861     signed char     e[64];
862     signed char     carry;
863     ge25519_p1p1    r;
864     ge25519_p2      s;
865     ge25519_precomp t;
866     int             i;
867 
868     for (i = 0; i < 32; ++i) {
869         e[2 * i + 0] = (a[i] >> 0) & 15;
870         e[2 * i + 1] = (a[i] >> 4) & 15;
871     }
872     /* each e[i] is between 0 and 15 */
873     /* e[63] is between 0 and 7 */
874 
875     carry = 0;
876     for (i = 0; i < 63; ++i) {
877         e[i] += carry;
878         carry = e[i] + 8;
879         carry >>= 4;
880         e[i] -= carry * ((signed char) 1 << 4);
881     }
882     e[63] += carry;
883     /* each e[i] is between -8 and 8 */
884 
885     ge25519_p3_0(h);
886 
887     for (i = 1; i < 64; i += 2) {
888         ge25519_select_base(&t, i / 2, e[i]);
889         ge25519_madd(&r, h, &t);
890         ge25519_p1p1_to_p3(h, &r);
891     }
892 
893     ge25519_p3_dbl(&r, h);
894     ge25519_p1p1_to_p2(&s, &r);
895     ge25519_p2_dbl(&r, &s);
896     ge25519_p1p1_to_p2(&s, &r);
897     ge25519_p2_dbl(&r, &s);
898     ge25519_p1p1_to_p2(&s, &r);
899     ge25519_p2_dbl(&r, &s);
900     ge25519_p1p1_to_p3(h, &r);
901 
902     for (i = 0; i < 64; i += 2) {
903         ge25519_select_base(&t, i / 2, e[i]);
904         ge25519_madd(&r, h, &t);
905         ge25519_p1p1_to_p3(h, &r);
906     }
907 }
908 
909 /* multiply by the order of the main subgroup l = 2^252+27742317777372353535851937790883648493 */
910 static void
ge25519_mul_l(ge25519_p3 * r,const ge25519_p3 * A)911 ge25519_mul_l(ge25519_p3 *r, const ge25519_p3 *A)
912 {
913     static const signed char aslide[253] = {
914         13, 0, 0, 0, 0, -1, 0, 0, 0, 0, -11, 0, 0, 0, 0, 0, 0, -5, 0, 0, 0, 0, 0, 0, -3, 0, 0, 0, 0, -13, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, -13, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, -13, 0, 0, 0, 0, 0, 0, -3, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 3, 0, 0, 0, 0, -11, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, -1, 0, 0, 0, 0, 7, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1
915     };
916     ge25519_cached Ai[8];
917     ge25519_p1p1   t;
918     ge25519_p3     u;
919     ge25519_p3     A2;
920     int            i;
921 
922     ge25519_p3_to_cached(&Ai[0], A);
923     ge25519_p3_dbl(&t, A);
924     ge25519_p1p1_to_p3(&A2, &t);
925     ge25519_add(&t, &A2, &Ai[0]);
926     ge25519_p1p1_to_p3(&u, &t);
927     ge25519_p3_to_cached(&Ai[1], &u);
928     ge25519_add(&t, &A2, &Ai[1]);
929     ge25519_p1p1_to_p3(&u, &t);
930     ge25519_p3_to_cached(&Ai[2], &u);
931     ge25519_add(&t, &A2, &Ai[2]);
932     ge25519_p1p1_to_p3(&u, &t);
933     ge25519_p3_to_cached(&Ai[3], &u);
934     ge25519_add(&t, &A2, &Ai[3]);
935     ge25519_p1p1_to_p3(&u, &t);
936     ge25519_p3_to_cached(&Ai[4], &u);
937     ge25519_add(&t, &A2, &Ai[4]);
938     ge25519_p1p1_to_p3(&u, &t);
939     ge25519_p3_to_cached(&Ai[5], &u);
940     ge25519_add(&t, &A2, &Ai[5]);
941     ge25519_p1p1_to_p3(&u, &t);
942     ge25519_p3_to_cached(&Ai[6], &u);
943     ge25519_add(&t, &A2, &Ai[6]);
944     ge25519_p1p1_to_p3(&u, &t);
945     ge25519_p3_to_cached(&Ai[7], &u);
946 
947     ge25519_p3_0(r);
948 
949     for (i = 252; i >= 0; --i) {
950         ge25519_p3_dbl(&t, r);
951 
952         if (aslide[i] > 0) {
953             ge25519_p1p1_to_p3(&u, &t);
954             ge25519_add(&t, &u, &Ai[aslide[i] / 2]);
955         } else if (aslide[i] < 0) {
956             ge25519_p1p1_to_p3(&u, &t);
957             ge25519_sub(&t, &u, &Ai[(-aslide[i]) / 2]);
958         }
959 
960         ge25519_p1p1_to_p3(r, &t);
961     }
962 }
963 
964 int
ge25519_is_on_curve(const ge25519_p3 * p)965 ge25519_is_on_curve(const ge25519_p3 *p)
966 {
967     fe25519 x2;
968     fe25519 y2;
969     fe25519 z2;
970     fe25519 z4;
971     fe25519 t0;
972     fe25519 t1;
973 
974     fe25519_sq(x2, p->X);
975     fe25519_sq(y2, p->Y);
976     fe25519_sq(z2, p->Z);
977     fe25519_sub(t0, y2, x2);
978     fe25519_mul(t0, t0, z2);
979 
980     fe25519_mul(t1, x2, y2);
981     fe25519_mul(t1, t1, d);
982     fe25519_sq(z4, z2);
983     fe25519_add(t1, t1, z4);
984     fe25519_sub(t0, t0, t1);
985 
986     return fe25519_iszero(t0);
987 }
988 
989 int
ge25519_is_on_main_subgroup(const ge25519_p3 * p)990 ge25519_is_on_main_subgroup(const ge25519_p3 *p)
991 {
992     ge25519_p3 pl;
993 
994     ge25519_mul_l(&pl, p);
995 
996     return fe25519_iszero(pl.X);
997 }
998 
999 int
ge25519_is_canonical(const unsigned char * s)1000 ge25519_is_canonical(const unsigned char *s)
1001 {
1002     unsigned char c;
1003     unsigned char d;
1004     unsigned int  i;
1005 
1006     c = (s[31] & 0x7f) ^ 0x7f;
1007     for (i = 30; i > 0; i--) {
1008         c |= s[i] ^ 0xff;
1009     }
1010     c = (((unsigned int) c) - 1U) >> 8;
1011     d = (0xed - 1U - (unsigned int) s[0]) >> 8;
1012 
1013     return 1 - (c & d & 1);
1014 }
1015 
1016 int
ge25519_has_small_order(const unsigned char s[32])1017 ge25519_has_small_order(const unsigned char s[32])
1018 {
1019     CRYPTO_ALIGN(16)
1020     static const unsigned char blacklist[][32] = {
1021         /* 0 (order 4) */
1022         { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1023           0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1024           0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
1025         /* 1 (order 1) */
1026         { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1027           0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1028           0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
1029         /* 2707385501144840649318225287225658788936804267575313519463743609750303402022
1030            (order 8) */
1031         { 0x26, 0xe8, 0x95, 0x8f, 0xc2, 0xb2, 0x27, 0xb0, 0x45, 0xc3, 0xf4,
1032           0x89, 0xf2, 0xef, 0x98, 0xf0, 0xd5, 0xdf, 0xac, 0x05, 0xd3, 0xc6,
1033           0x33, 0x39, 0xb1, 0x38, 0x02, 0x88, 0x6d, 0x53, 0xfc, 0x05 },
1034         /* 55188659117513257062467267217118295137698188065244968500265048394206261417927
1035            (order 8) */
1036         { 0xc7, 0x17, 0x6a, 0x70, 0x3d, 0x4d, 0xd8, 0x4f, 0xba, 0x3c, 0x0b,
1037           0x76, 0x0d, 0x10, 0x67, 0x0f, 0x2a, 0x20, 0x53, 0xfa, 0x2c, 0x39,
1038           0xcc, 0xc6, 0x4e, 0xc7, 0xfd, 0x77, 0x92, 0xac, 0x03, 0x7a },
1039         /* p-1 (order 2) */
1040         { 0xec, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1041           0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1042           0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
1043         /* p (=0, order 4) */
1044         { 0xed, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1045           0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1046           0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
1047         /* p+1 (=1, order 1) */
1048         { 0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1049           0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1050           0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }
1051     };
1052     unsigned char c[7] = { 0 };
1053     unsigned int  k;
1054     size_t        i, j;
1055 
1056     COMPILER_ASSERT(7 == sizeof blacklist / sizeof blacklist[0]);
1057     for (j = 0; j < 31; j++) {
1058         for (i = 0; i < sizeof blacklist / sizeof blacklist[0]; i++) {
1059             c[i] |= s[j] ^ blacklist[i][j];
1060         }
1061     }
1062     for (i = 0; i < sizeof blacklist / sizeof blacklist[0]; i++) {
1063         c[i] |= (s[j] & 0x7f) ^ blacklist[i][j];
1064     }
1065     k = 0;
1066     for (i = 0; i < sizeof blacklist / sizeof blacklist[0]; i++) {
1067         k |= (c[i] - 1);
1068     }
1069     return (int) ((k >> 8) & 1);
1070 }
1071 
1072 /*
1073  Input:
1074  a[0]+256*a[1]+...+256^31*a[31] = a
1075  b[0]+256*b[1]+...+256^31*b[31] = b
1076  c[0]+256*c[1]+...+256^31*c[31] = c
1077  *
1078  Output:
1079  s[0]+256*s[1]+...+256^31*s[31] = (ab+c) mod l
1080  where l = 2^252 + 27742317777372353535851937790883648493.
1081  */
1082 
1083 void
sc25519_muladd(unsigned char * s,const unsigned char * a,const unsigned char * b,const unsigned char * c)1084 sc25519_muladd(unsigned char *s, const unsigned char *a,
1085                const unsigned char *b, const unsigned char *c)
1086 {
1087     int64_t a0  = 2097151 & load_3(a);
1088     int64_t a1  = 2097151 & (load_4(a + 2) >> 5);
1089     int64_t a2  = 2097151 & (load_3(a + 5) >> 2);
1090     int64_t a3  = 2097151 & (load_4(a + 7) >> 7);
1091     int64_t a4  = 2097151 & (load_4(a + 10) >> 4);
1092     int64_t a5  = 2097151 & (load_3(a + 13) >> 1);
1093     int64_t a6  = 2097151 & (load_4(a + 15) >> 6);
1094     int64_t a7  = 2097151 & (load_3(a + 18) >> 3);
1095     int64_t a8  = 2097151 & load_3(a + 21);
1096     int64_t a9  = 2097151 & (load_4(a + 23) >> 5);
1097     int64_t a10 = 2097151 & (load_3(a + 26) >> 2);
1098     int64_t a11 = (load_4(a + 28) >> 7);
1099 
1100     int64_t b0  = 2097151 & load_3(b);
1101     int64_t b1  = 2097151 & (load_4(b + 2) >> 5);
1102     int64_t b2  = 2097151 & (load_3(b + 5) >> 2);
1103     int64_t b3  = 2097151 & (load_4(b + 7) >> 7);
1104     int64_t b4  = 2097151 & (load_4(b + 10) >> 4);
1105     int64_t b5  = 2097151 & (load_3(b + 13) >> 1);
1106     int64_t b6  = 2097151 & (load_4(b + 15) >> 6);
1107     int64_t b7  = 2097151 & (load_3(b + 18) >> 3);
1108     int64_t b8  = 2097151 & load_3(b + 21);
1109     int64_t b9  = 2097151 & (load_4(b + 23) >> 5);
1110     int64_t b10 = 2097151 & (load_3(b + 26) >> 2);
1111     int64_t b11 = (load_4(b + 28) >> 7);
1112 
1113     int64_t c0  = 2097151 & load_3(c);
1114     int64_t c1  = 2097151 & (load_4(c + 2) >> 5);
1115     int64_t c2  = 2097151 & (load_3(c + 5) >> 2);
1116     int64_t c3  = 2097151 & (load_4(c + 7) >> 7);
1117     int64_t c4  = 2097151 & (load_4(c + 10) >> 4);
1118     int64_t c5  = 2097151 & (load_3(c + 13) >> 1);
1119     int64_t c6  = 2097151 & (load_4(c + 15) >> 6);
1120     int64_t c7  = 2097151 & (load_3(c + 18) >> 3);
1121     int64_t c8  = 2097151 & load_3(c + 21);
1122     int64_t c9  = 2097151 & (load_4(c + 23) >> 5);
1123     int64_t c10 = 2097151 & (load_3(c + 26) >> 2);
1124     int64_t c11 = (load_4(c + 28) >> 7);
1125 
1126     int64_t s0;
1127     int64_t s1;
1128     int64_t s2;
1129     int64_t s3;
1130     int64_t s4;
1131     int64_t s5;
1132     int64_t s6;
1133     int64_t s7;
1134     int64_t s8;
1135     int64_t s9;
1136     int64_t s10;
1137     int64_t s11;
1138     int64_t s12;
1139     int64_t s13;
1140     int64_t s14;
1141     int64_t s15;
1142     int64_t s16;
1143     int64_t s17;
1144     int64_t s18;
1145     int64_t s19;
1146     int64_t s20;
1147     int64_t s21;
1148     int64_t s22;
1149     int64_t s23;
1150 
1151     int64_t carry0;
1152     int64_t carry1;
1153     int64_t carry2;
1154     int64_t carry3;
1155     int64_t carry4;
1156     int64_t carry5;
1157     int64_t carry6;
1158     int64_t carry7;
1159     int64_t carry8;
1160     int64_t carry9;
1161     int64_t carry10;
1162     int64_t carry11;
1163     int64_t carry12;
1164     int64_t carry13;
1165     int64_t carry14;
1166     int64_t carry15;
1167     int64_t carry16;
1168     int64_t carry17;
1169     int64_t carry18;
1170     int64_t carry19;
1171     int64_t carry20;
1172     int64_t carry21;
1173     int64_t carry22;
1174 
1175     s0 = c0 + a0 * b0;
1176     s1 = c1 + a0 * b1 + a1 * b0;
1177     s2 = c2 + a0 * b2 + a1 * b1 + a2 * b0;
1178     s3 = c3 + a0 * b3 + a1 * b2 + a2 * b1 + a3 * b0;
1179     s4 = c4 + a0 * b4 + a1 * b3 + a2 * b2 + a3 * b1 + a4 * b0;
1180     s5 = c5 + a0 * b5 + a1 * b4 + a2 * b3 + a3 * b2 + a4 * b1 + a5 * b0;
1181     s6 = c6 + a0 * b6 + a1 * b5 + a2 * b4 + a3 * b3 + a4 * b2 + a5 * b1 +
1182          a6 * b0;
1183     s7 = c7 + a0 * b7 + a1 * b6 + a2 * b5 + a3 * b4 + a4 * b3 + a5 * b2 +
1184          a6 * b1 + a7 * b0;
1185     s8 = c8 + a0 * b8 + a1 * b7 + a2 * b6 + a3 * b5 + a4 * b4 + a5 * b3 +
1186          a6 * b2 + a7 * b1 + a8 * b0;
1187     s9 = c9 + a0 * b9 + a1 * b8 + a2 * b7 + a3 * b6 + a4 * b5 + a5 * b4 +
1188          a6 * b3 + a7 * b2 + a8 * b1 + a9 * b0;
1189     s10 = c10 + a0 * b10 + a1 * b9 + a2 * b8 + a3 * b7 + a4 * b6 + a5 * b5 +
1190           a6 * b4 + a7 * b3 + a8 * b2 + a9 * b1 + a10 * b0;
1191     s11 = c11 + a0 * b11 + a1 * b10 + a2 * b9 + a3 * b8 + a4 * b7 + a5 * b6 +
1192           a6 * b5 + a7 * b4 + a8 * b3 + a9 * b2 + a10 * b1 + a11 * b0;
1193     s12 = a1 * b11 + a2 * b10 + a3 * b9 + a4 * b8 + a5 * b7 + a6 * b6 +
1194           a7 * b5 + a8 * b4 + a9 * b3 + a10 * b2 + a11 * b1;
1195     s13 = a2 * b11 + a3 * b10 + a4 * b9 + a5 * b8 + a6 * b7 + a7 * b6 +
1196           a8 * b5 + a9 * b4 + a10 * b3 + a11 * b2;
1197     s14 = a3 * b11 + a4 * b10 + a5 * b9 + a6 * b8 + a7 * b7 + a8 * b6 +
1198           a9 * b5 + a10 * b4 + a11 * b3;
1199     s15 = a4 * b11 + a5 * b10 + a6 * b9 + a7 * b8 + a8 * b7 + a9 * b6 +
1200           a10 * b5 + a11 * b4;
1201     s16 =
1202         a5 * b11 + a6 * b10 + a7 * b9 + a8 * b8 + a9 * b7 + a10 * b6 + a11 * b5;
1203     s17 = a6 * b11 + a7 * b10 + a8 * b9 + a9 * b8 + a10 * b7 + a11 * b6;
1204     s18 = a7 * b11 + a8 * b10 + a9 * b9 + a10 * b8 + a11 * b7;
1205     s19 = a8 * b11 + a9 * b10 + a10 * b9 + a11 * b8;
1206     s20 = a9 * b11 + a10 * b10 + a11 * b9;
1207     s21 = a10 * b11 + a11 * b10;
1208     s22 = a11 * b11;
1209     s23 = 0;
1210 
1211     carry0 = (s0 + (int64_t) (1L << 20)) >> 21;
1212     s1 += carry0;
1213     s0 -= carry0 * ((uint64_t) 1L << 21);
1214     carry2 = (s2 + (int64_t) (1L << 20)) >> 21;
1215     s3 += carry2;
1216     s2 -= carry2 * ((uint64_t) 1L << 21);
1217     carry4 = (s4 + (int64_t) (1L << 20)) >> 21;
1218     s5 += carry4;
1219     s4 -= carry4 * ((uint64_t) 1L << 21);
1220     carry6 = (s6 + (int64_t) (1L << 20)) >> 21;
1221     s7 += carry6;
1222     s6 -= carry6 * ((uint64_t) 1L << 21);
1223     carry8 = (s8 + (int64_t) (1L << 20)) >> 21;
1224     s9 += carry8;
1225     s8 -= carry8 * ((uint64_t) 1L << 21);
1226     carry10 = (s10 + (int64_t) (1L << 20)) >> 21;
1227     s11 += carry10;
1228     s10 -= carry10 * ((uint64_t) 1L << 21);
1229     carry12 = (s12 + (int64_t) (1L << 20)) >> 21;
1230     s13 += carry12;
1231     s12 -= carry12 * ((uint64_t) 1L << 21);
1232     carry14 = (s14 + (int64_t) (1L << 20)) >> 21;
1233     s15 += carry14;
1234     s14 -= carry14 * ((uint64_t) 1L << 21);
1235     carry16 = (s16 + (int64_t) (1L << 20)) >> 21;
1236     s17 += carry16;
1237     s16 -= carry16 * ((uint64_t) 1L << 21);
1238     carry18 = (s18 + (int64_t) (1L << 20)) >> 21;
1239     s19 += carry18;
1240     s18 -= carry18 * ((uint64_t) 1L << 21);
1241     carry20 = (s20 + (int64_t) (1L << 20)) >> 21;
1242     s21 += carry20;
1243     s20 -= carry20 * ((uint64_t) 1L << 21);
1244     carry22 = (s22 + (int64_t) (1L << 20)) >> 21;
1245     s23 += carry22;
1246     s22 -= carry22 * ((uint64_t) 1L << 21);
1247 
1248     carry1 = (s1 + (int64_t) (1L << 20)) >> 21;
1249     s2 += carry1;
1250     s1 -= carry1 * ((uint64_t) 1L << 21);
1251     carry3 = (s3 + (int64_t) (1L << 20)) >> 21;
1252     s4 += carry3;
1253     s3 -= carry3 * ((uint64_t) 1L << 21);
1254     carry5 = (s5 + (int64_t) (1L << 20)) >> 21;
1255     s6 += carry5;
1256     s5 -= carry5 * ((uint64_t) 1L << 21);
1257     carry7 = (s7 + (int64_t) (1L << 20)) >> 21;
1258     s8 += carry7;
1259     s7 -= carry7 * ((uint64_t) 1L << 21);
1260     carry9 = (s9 + (int64_t) (1L << 20)) >> 21;
1261     s10 += carry9;
1262     s9 -= carry9 * ((uint64_t) 1L << 21);
1263     carry11 = (s11 + (int64_t) (1L << 20)) >> 21;
1264     s12 += carry11;
1265     s11 -= carry11 * ((uint64_t) 1L << 21);
1266     carry13 = (s13 + (int64_t) (1L << 20)) >> 21;
1267     s14 += carry13;
1268     s13 -= carry13 * ((uint64_t) 1L << 21);
1269     carry15 = (s15 + (int64_t) (1L << 20)) >> 21;
1270     s16 += carry15;
1271     s15 -= carry15 * ((uint64_t) 1L << 21);
1272     carry17 = (s17 + (int64_t) (1L << 20)) >> 21;
1273     s18 += carry17;
1274     s17 -= carry17 * ((uint64_t) 1L << 21);
1275     carry19 = (s19 + (int64_t) (1L << 20)) >> 21;
1276     s20 += carry19;
1277     s19 -= carry19 * ((uint64_t) 1L << 21);
1278     carry21 = (s21 + (int64_t) (1L << 20)) >> 21;
1279     s22 += carry21;
1280     s21 -= carry21 * ((uint64_t) 1L << 21);
1281 
1282     s11 += s23 * 666643;
1283     s12 += s23 * 470296;
1284     s13 += s23 * 654183;
1285     s14 -= s23 * 997805;
1286     s15 += s23 * 136657;
1287     s16 -= s23 * 683901;
1288 
1289     s10 += s22 * 666643;
1290     s11 += s22 * 470296;
1291     s12 += s22 * 654183;
1292     s13 -= s22 * 997805;
1293     s14 += s22 * 136657;
1294     s15 -= s22 * 683901;
1295 
1296     s9 += s21 * 666643;
1297     s10 += s21 * 470296;
1298     s11 += s21 * 654183;
1299     s12 -= s21 * 997805;
1300     s13 += s21 * 136657;
1301     s14 -= s21 * 683901;
1302 
1303     s8 += s20 * 666643;
1304     s9 += s20 * 470296;
1305     s10 += s20 * 654183;
1306     s11 -= s20 * 997805;
1307     s12 += s20 * 136657;
1308     s13 -= s20 * 683901;
1309 
1310     s7 += s19 * 666643;
1311     s8 += s19 * 470296;
1312     s9 += s19 * 654183;
1313     s10 -= s19 * 997805;
1314     s11 += s19 * 136657;
1315     s12 -= s19 * 683901;
1316 
1317     s6 += s18 * 666643;
1318     s7 += s18 * 470296;
1319     s8 += s18 * 654183;
1320     s9 -= s18 * 997805;
1321     s10 += s18 * 136657;
1322     s11 -= s18 * 683901;
1323 
1324     carry6 = (s6 + (int64_t) (1L << 20)) >> 21;
1325     s7 += carry6;
1326     s6 -= carry6 * ((uint64_t) 1L << 21);
1327     carry8 = (s8 + (int64_t) (1L << 20)) >> 21;
1328     s9 += carry8;
1329     s8 -= carry8 * ((uint64_t) 1L << 21);
1330     carry10 = (s10 + (int64_t) (1L << 20)) >> 21;
1331     s11 += carry10;
1332     s10 -= carry10 * ((uint64_t) 1L << 21);
1333     carry12 = (s12 + (int64_t) (1L << 20)) >> 21;
1334     s13 += carry12;
1335     s12 -= carry12 * ((uint64_t) 1L << 21);
1336     carry14 = (s14 + (int64_t) (1L << 20)) >> 21;
1337     s15 += carry14;
1338     s14 -= carry14 * ((uint64_t) 1L << 21);
1339     carry16 = (s16 + (int64_t) (1L << 20)) >> 21;
1340     s17 += carry16;
1341     s16 -= carry16 * ((uint64_t) 1L << 21);
1342 
1343     carry7 = (s7 + (int64_t) (1L << 20)) >> 21;
1344     s8 += carry7;
1345     s7 -= carry7 * ((uint64_t) 1L << 21);
1346     carry9 = (s9 + (int64_t) (1L << 20)) >> 21;
1347     s10 += carry9;
1348     s9 -= carry9 * ((uint64_t) 1L << 21);
1349     carry11 = (s11 + (int64_t) (1L << 20)) >> 21;
1350     s12 += carry11;
1351     s11 -= carry11 * ((uint64_t) 1L << 21);
1352     carry13 = (s13 + (int64_t) (1L << 20)) >> 21;
1353     s14 += carry13;
1354     s13 -= carry13 * ((uint64_t) 1L << 21);
1355     carry15 = (s15 + (int64_t) (1L << 20)) >> 21;
1356     s16 += carry15;
1357     s15 -= carry15 * ((uint64_t) 1L << 21);
1358 
1359     s5 += s17 * 666643;
1360     s6 += s17 * 470296;
1361     s7 += s17 * 654183;
1362     s8 -= s17 * 997805;
1363     s9 += s17 * 136657;
1364     s10 -= s17 * 683901;
1365 
1366     s4 += s16 * 666643;
1367     s5 += s16 * 470296;
1368     s6 += s16 * 654183;
1369     s7 -= s16 * 997805;
1370     s8 += s16 * 136657;
1371     s9 -= s16 * 683901;
1372 
1373     s3 += s15 * 666643;
1374     s4 += s15 * 470296;
1375     s5 += s15 * 654183;
1376     s6 -= s15 * 997805;
1377     s7 += s15 * 136657;
1378     s8 -= s15 * 683901;
1379 
1380     s2 += s14 * 666643;
1381     s3 += s14 * 470296;
1382     s4 += s14 * 654183;
1383     s5 -= s14 * 997805;
1384     s6 += s14 * 136657;
1385     s7 -= s14 * 683901;
1386 
1387     s1 += s13 * 666643;
1388     s2 += s13 * 470296;
1389     s3 += s13 * 654183;
1390     s4 -= s13 * 997805;
1391     s5 += s13 * 136657;
1392     s6 -= s13 * 683901;
1393 
1394     s0 += s12 * 666643;
1395     s1 += s12 * 470296;
1396     s2 += s12 * 654183;
1397     s3 -= s12 * 997805;
1398     s4 += s12 * 136657;
1399     s5 -= s12 * 683901;
1400     s12 = 0;
1401 
1402     carry0 = (s0 + (int64_t) (1L << 20)) >> 21;
1403     s1 += carry0;
1404     s0 -= carry0 * ((uint64_t) 1L << 21);
1405     carry2 = (s2 + (int64_t) (1L << 20)) >> 21;
1406     s3 += carry2;
1407     s2 -= carry2 * ((uint64_t) 1L << 21);
1408     carry4 = (s4 + (int64_t) (1L << 20)) >> 21;
1409     s5 += carry4;
1410     s4 -= carry4 * ((uint64_t) 1L << 21);
1411     carry6 = (s6 + (int64_t) (1L << 20)) >> 21;
1412     s7 += carry6;
1413     s6 -= carry6 * ((uint64_t) 1L << 21);
1414     carry8 = (s8 + (int64_t) (1L << 20)) >> 21;
1415     s9 += carry8;
1416     s8 -= carry8 * ((uint64_t) 1L << 21);
1417     carry10 = (s10 + (int64_t) (1L << 20)) >> 21;
1418     s11 += carry10;
1419     s10 -= carry10 * ((uint64_t) 1L << 21);
1420 
1421     carry1 = (s1 + (int64_t) (1L << 20)) >> 21;
1422     s2 += carry1;
1423     s1 -= carry1 * ((uint64_t) 1L << 21);
1424     carry3 = (s3 + (int64_t) (1L << 20)) >> 21;
1425     s4 += carry3;
1426     s3 -= carry3 * ((uint64_t) 1L << 21);
1427     carry5 = (s5 + (int64_t) (1L << 20)) >> 21;
1428     s6 += carry5;
1429     s5 -= carry5 * ((uint64_t) 1L << 21);
1430     carry7 = (s7 + (int64_t) (1L << 20)) >> 21;
1431     s8 += carry7;
1432     s7 -= carry7 * ((uint64_t) 1L << 21);
1433     carry9 = (s9 + (int64_t) (1L << 20)) >> 21;
1434     s10 += carry9;
1435     s9 -= carry9 * ((uint64_t) 1L << 21);
1436     carry11 = (s11 + (int64_t) (1L << 20)) >> 21;
1437     s12 += carry11;
1438     s11 -= carry11 * ((uint64_t) 1L << 21);
1439 
1440     s0 += s12 * 666643;
1441     s1 += s12 * 470296;
1442     s2 += s12 * 654183;
1443     s3 -= s12 * 997805;
1444     s4 += s12 * 136657;
1445     s5 -= s12 * 683901;
1446     s12 = 0;
1447 
1448     carry0 = s0 >> 21;
1449     s1 += carry0;
1450     s0 -= carry0 * ((uint64_t) 1L << 21);
1451     carry1 = s1 >> 21;
1452     s2 += carry1;
1453     s1 -= carry1 * ((uint64_t) 1L << 21);
1454     carry2 = s2 >> 21;
1455     s3 += carry2;
1456     s2 -= carry2 * ((uint64_t) 1L << 21);
1457     carry3 = s3 >> 21;
1458     s4 += carry3;
1459     s3 -= carry3 * ((uint64_t) 1L << 21);
1460     carry4 = s4 >> 21;
1461     s5 += carry4;
1462     s4 -= carry4 * ((uint64_t) 1L << 21);
1463     carry5 = s5 >> 21;
1464     s6 += carry5;
1465     s5 -= carry5 * ((uint64_t) 1L << 21);
1466     carry6 = s6 >> 21;
1467     s7 += carry6;
1468     s6 -= carry6 * ((uint64_t) 1L << 21);
1469     carry7 = s7 >> 21;
1470     s8 += carry7;
1471     s7 -= carry7 * ((uint64_t) 1L << 21);
1472     carry8 = s8 >> 21;
1473     s9 += carry8;
1474     s8 -= carry8 * ((uint64_t) 1L << 21);
1475     carry9 = s9 >> 21;
1476     s10 += carry9;
1477     s9 -= carry9 * ((uint64_t) 1L << 21);
1478     carry10 = s10 >> 21;
1479     s11 += carry10;
1480     s10 -= carry10 * ((uint64_t) 1L << 21);
1481     carry11 = s11 >> 21;
1482     s12 += carry11;
1483     s11 -= carry11 * ((uint64_t) 1L << 21);
1484 
1485     s0 += s12 * 666643;
1486     s1 += s12 * 470296;
1487     s2 += s12 * 654183;
1488     s3 -= s12 * 997805;
1489     s4 += s12 * 136657;
1490     s5 -= s12 * 683901;
1491 
1492     carry0 = s0 >> 21;
1493     s1 += carry0;
1494     s0 -= carry0 * ((uint64_t) 1L << 21);
1495     carry1 = s1 >> 21;
1496     s2 += carry1;
1497     s1 -= carry1 * ((uint64_t) 1L << 21);
1498     carry2 = s2 >> 21;
1499     s3 += carry2;
1500     s2 -= carry2 * ((uint64_t) 1L << 21);
1501     carry3 = s3 >> 21;
1502     s4 += carry3;
1503     s3 -= carry3 * ((uint64_t) 1L << 21);
1504     carry4 = s4 >> 21;
1505     s5 += carry4;
1506     s4 -= carry4 * ((uint64_t) 1L << 21);
1507     carry5 = s5 >> 21;
1508     s6 += carry5;
1509     s5 -= carry5 * ((uint64_t) 1L << 21);
1510     carry6 = s6 >> 21;
1511     s7 += carry6;
1512     s6 -= carry6 * ((uint64_t) 1L << 21);
1513     carry7 = s7 >> 21;
1514     s8 += carry7;
1515     s7 -= carry7 * ((uint64_t) 1L << 21);
1516     carry8 = s8 >> 21;
1517     s9 += carry8;
1518     s8 -= carry8 * ((uint64_t) 1L << 21);
1519     carry9 = s9 >> 21;
1520     s10 += carry9;
1521     s9 -= carry9 * ((uint64_t) 1L << 21);
1522     carry10 = s10 >> 21;
1523     s11 += carry10;
1524     s10 -= carry10 * ((uint64_t) 1L << 21);
1525 
1526     s[0]  = s0 >> 0;
1527     s[1]  = s0 >> 8;
1528     s[2]  = (s0 >> 16) | (s1 * ((uint64_t) 1 << 5));
1529     s[3]  = s1 >> 3;
1530     s[4]  = s1 >> 11;
1531     s[5]  = (s1 >> 19) | (s2 * ((uint64_t) 1 << 2));
1532     s[6]  = s2 >> 6;
1533     s[7]  = (s2 >> 14) | (s3 * ((uint64_t) 1 << 7));
1534     s[8]  = s3 >> 1;
1535     s[9]  = s3 >> 9;
1536     s[10] = (s3 >> 17) | (s4 * ((uint64_t) 1 << 4));
1537     s[11] = s4 >> 4;
1538     s[12] = s4 >> 12;
1539     s[13] = (s4 >> 20) | (s5 * ((uint64_t) 1 << 1));
1540     s[14] = s5 >> 7;
1541     s[15] = (s5 >> 15) | (s6 * ((uint64_t) 1 << 6));
1542     s[16] = s6 >> 2;
1543     s[17] = s6 >> 10;
1544     s[18] = (s6 >> 18) | (s7 * ((uint64_t) 1 << 3));
1545     s[19] = s7 >> 5;
1546     s[20] = s7 >> 13;
1547     s[21] = s8 >> 0;
1548     s[22] = s8 >> 8;
1549     s[23] = (s8 >> 16) | (s9 * ((uint64_t) 1 << 5));
1550     s[24] = s9 >> 3;
1551     s[25] = s9 >> 11;
1552     s[26] = (s9 >> 19) | (s10 * ((uint64_t) 1 << 2));
1553     s[27] = s10 >> 6;
1554     s[28] = (s10 >> 14) | (s11 * ((uint64_t) 1 << 7));
1555     s[29] = s11 >> 1;
1556     s[30] = s11 >> 9;
1557     s[31] = s11 >> 17;
1558 }
1559 
1560 /*
1561  Input:
1562  s[0]+256*s[1]+...+256^63*s[63] = s
1563  *
1564  Output:
1565  s[0]+256*s[1]+...+256^31*s[31] = s mod l
1566  where l = 2^252 + 27742317777372353535851937790883648493.
1567  Overwrites s in place.
1568  */
1569 
1570 void
sc25519_reduce(unsigned char * s)1571 sc25519_reduce(unsigned char *s)
1572 {
1573     int64_t s0  = 2097151 & load_3(s);
1574     int64_t s1  = 2097151 & (load_4(s + 2) >> 5);
1575     int64_t s2  = 2097151 & (load_3(s + 5) >> 2);
1576     int64_t s3  = 2097151 & (load_4(s + 7) >> 7);
1577     int64_t s4  = 2097151 & (load_4(s + 10) >> 4);
1578     int64_t s5  = 2097151 & (load_3(s + 13) >> 1);
1579     int64_t s6  = 2097151 & (load_4(s + 15) >> 6);
1580     int64_t s7  = 2097151 & (load_3(s + 18) >> 3);
1581     int64_t s8  = 2097151 & load_3(s + 21);
1582     int64_t s9  = 2097151 & (load_4(s + 23) >> 5);
1583     int64_t s10 = 2097151 & (load_3(s + 26) >> 2);
1584     int64_t s11 = 2097151 & (load_4(s + 28) >> 7);
1585     int64_t s12 = 2097151 & (load_4(s + 31) >> 4);
1586     int64_t s13 = 2097151 & (load_3(s + 34) >> 1);
1587     int64_t s14 = 2097151 & (load_4(s + 36) >> 6);
1588     int64_t s15 = 2097151 & (load_3(s + 39) >> 3);
1589     int64_t s16 = 2097151 & load_3(s + 42);
1590     int64_t s17 = 2097151 & (load_4(s + 44) >> 5);
1591     int64_t s18 = 2097151 & (load_3(s + 47) >> 2);
1592     int64_t s19 = 2097151 & (load_4(s + 49) >> 7);
1593     int64_t s20 = 2097151 & (load_4(s + 52) >> 4);
1594     int64_t s21 = 2097151 & (load_3(s + 55) >> 1);
1595     int64_t s22 = 2097151 & (load_4(s + 57) >> 6);
1596     int64_t s23 = (load_4(s + 60) >> 3);
1597 
1598     int64_t carry0;
1599     int64_t carry1;
1600     int64_t carry2;
1601     int64_t carry3;
1602     int64_t carry4;
1603     int64_t carry5;
1604     int64_t carry6;
1605     int64_t carry7;
1606     int64_t carry8;
1607     int64_t carry9;
1608     int64_t carry10;
1609     int64_t carry11;
1610     int64_t carry12;
1611     int64_t carry13;
1612     int64_t carry14;
1613     int64_t carry15;
1614     int64_t carry16;
1615 
1616     s11 += s23 * 666643;
1617     s12 += s23 * 470296;
1618     s13 += s23 * 654183;
1619     s14 -= s23 * 997805;
1620     s15 += s23 * 136657;
1621     s16 -= s23 * 683901;
1622 
1623     s10 += s22 * 666643;
1624     s11 += s22 * 470296;
1625     s12 += s22 * 654183;
1626     s13 -= s22 * 997805;
1627     s14 += s22 * 136657;
1628     s15 -= s22 * 683901;
1629 
1630     s9 += s21 * 666643;
1631     s10 += s21 * 470296;
1632     s11 += s21 * 654183;
1633     s12 -= s21 * 997805;
1634     s13 += s21 * 136657;
1635     s14 -= s21 * 683901;
1636 
1637     s8 += s20 * 666643;
1638     s9 += s20 * 470296;
1639     s10 += s20 * 654183;
1640     s11 -= s20 * 997805;
1641     s12 += s20 * 136657;
1642     s13 -= s20 * 683901;
1643 
1644     s7 += s19 * 666643;
1645     s8 += s19 * 470296;
1646     s9 += s19 * 654183;
1647     s10 -= s19 * 997805;
1648     s11 += s19 * 136657;
1649     s12 -= s19 * 683901;
1650 
1651     s6 += s18 * 666643;
1652     s7 += s18 * 470296;
1653     s8 += s18 * 654183;
1654     s9 -= s18 * 997805;
1655     s10 += s18 * 136657;
1656     s11 -= s18 * 683901;
1657 
1658     carry6 = (s6 + (int64_t) (1L << 20)) >> 21;
1659     s7 += carry6;
1660     s6 -= carry6 * ((uint64_t) 1L << 21);
1661     carry8 = (s8 + (int64_t) (1L << 20)) >> 21;
1662     s9 += carry8;
1663     s8 -= carry8 * ((uint64_t) 1L << 21);
1664     carry10 = (s10 + (int64_t) (1L << 20)) >> 21;
1665     s11 += carry10;
1666     s10 -= carry10 * ((uint64_t) 1L << 21);
1667     carry12 = (s12 + (int64_t) (1L << 20)) >> 21;
1668     s13 += carry12;
1669     s12 -= carry12 * ((uint64_t) 1L << 21);
1670     carry14 = (s14 + (int64_t) (1L << 20)) >> 21;
1671     s15 += carry14;
1672     s14 -= carry14 * ((uint64_t) 1L << 21);
1673     carry16 = (s16 + (int64_t) (1L << 20)) >> 21;
1674     s17 += carry16;
1675     s16 -= carry16 * ((uint64_t) 1L << 21);
1676 
1677     carry7 = (s7 + (int64_t) (1L << 20)) >> 21;
1678     s8 += carry7;
1679     s7 -= carry7 * ((uint64_t) 1L << 21);
1680     carry9 = (s9 + (int64_t) (1L << 20)) >> 21;
1681     s10 += carry9;
1682     s9 -= carry9 * ((uint64_t) 1L << 21);
1683     carry11 = (s11 + (int64_t) (1L << 20)) >> 21;
1684     s12 += carry11;
1685     s11 -= carry11 * ((uint64_t) 1L << 21);
1686     carry13 = (s13 + (int64_t) (1L << 20)) >> 21;
1687     s14 += carry13;
1688     s13 -= carry13 * ((uint64_t) 1L << 21);
1689     carry15 = (s15 + (int64_t) (1L << 20)) >> 21;
1690     s16 += carry15;
1691     s15 -= carry15 * ((uint64_t) 1L << 21);
1692 
1693     s5 += s17 * 666643;
1694     s6 += s17 * 470296;
1695     s7 += s17 * 654183;
1696     s8 -= s17 * 997805;
1697     s9 += s17 * 136657;
1698     s10 -= s17 * 683901;
1699 
1700     s4 += s16 * 666643;
1701     s5 += s16 * 470296;
1702     s6 += s16 * 654183;
1703     s7 -= s16 * 997805;
1704     s8 += s16 * 136657;
1705     s9 -= s16 * 683901;
1706 
1707     s3 += s15 * 666643;
1708     s4 += s15 * 470296;
1709     s5 += s15 * 654183;
1710     s6 -= s15 * 997805;
1711     s7 += s15 * 136657;
1712     s8 -= s15 * 683901;
1713 
1714     s2 += s14 * 666643;
1715     s3 += s14 * 470296;
1716     s4 += s14 * 654183;
1717     s5 -= s14 * 997805;
1718     s6 += s14 * 136657;
1719     s7 -= s14 * 683901;
1720 
1721     s1 += s13 * 666643;
1722     s2 += s13 * 470296;
1723     s3 += s13 * 654183;
1724     s4 -= s13 * 997805;
1725     s5 += s13 * 136657;
1726     s6 -= s13 * 683901;
1727 
1728     s0 += s12 * 666643;
1729     s1 += s12 * 470296;
1730     s2 += s12 * 654183;
1731     s3 -= s12 * 997805;
1732     s4 += s12 * 136657;
1733     s5 -= s12 * 683901;
1734     s12 = 0;
1735 
1736     carry0 = (s0 + (int64_t) (1L << 20)) >> 21;
1737     s1 += carry0;
1738     s0 -= carry0 * ((uint64_t) 1L << 21);
1739     carry2 = (s2 + (int64_t) (1L << 20)) >> 21;
1740     s3 += carry2;
1741     s2 -= carry2 * ((uint64_t) 1L << 21);
1742     carry4 = (s4 + (int64_t) (1L << 20)) >> 21;
1743     s5 += carry4;
1744     s4 -= carry4 * ((uint64_t) 1L << 21);
1745     carry6 = (s6 + (int64_t) (1L << 20)) >> 21;
1746     s7 += carry6;
1747     s6 -= carry6 * ((uint64_t) 1L << 21);
1748     carry8 = (s8 + (int64_t) (1L << 20)) >> 21;
1749     s9 += carry8;
1750     s8 -= carry8 * ((uint64_t) 1L << 21);
1751     carry10 = (s10 + (int64_t) (1L << 20)) >> 21;
1752     s11 += carry10;
1753     s10 -= carry10 * ((uint64_t) 1L << 21);
1754 
1755     carry1 = (s1 + (int64_t) (1L << 20)) >> 21;
1756     s2 += carry1;
1757     s1 -= carry1 * ((uint64_t) 1L << 21);
1758     carry3 = (s3 + (int64_t) (1L << 20)) >> 21;
1759     s4 += carry3;
1760     s3 -= carry3 * ((uint64_t) 1L << 21);
1761     carry5 = (s5 + (int64_t) (1L << 20)) >> 21;
1762     s6 += carry5;
1763     s5 -= carry5 * ((uint64_t) 1L << 21);
1764     carry7 = (s7 + (int64_t) (1L << 20)) >> 21;
1765     s8 += carry7;
1766     s7 -= carry7 * ((uint64_t) 1L << 21);
1767     carry9 = (s9 + (int64_t) (1L << 20)) >> 21;
1768     s10 += carry9;
1769     s9 -= carry9 * ((uint64_t) 1L << 21);
1770     carry11 = (s11 + (int64_t) (1L << 20)) >> 21;
1771     s12 += carry11;
1772     s11 -= carry11 * ((uint64_t) 1L << 21);
1773 
1774     s0 += s12 * 666643;
1775     s1 += s12 * 470296;
1776     s2 += s12 * 654183;
1777     s3 -= s12 * 997805;
1778     s4 += s12 * 136657;
1779     s5 -= s12 * 683901;
1780     s12 = 0;
1781 
1782     carry0 = s0 >> 21;
1783     s1 += carry0;
1784     s0 -= carry0 * ((uint64_t) 1L << 21);
1785     carry1 = s1 >> 21;
1786     s2 += carry1;
1787     s1 -= carry1 * ((uint64_t) 1L << 21);
1788     carry2 = s2 >> 21;
1789     s3 += carry2;
1790     s2 -= carry2 * ((uint64_t) 1L << 21);
1791     carry3 = s3 >> 21;
1792     s4 += carry3;
1793     s3 -= carry3 * ((uint64_t) 1L << 21);
1794     carry4 = s4 >> 21;
1795     s5 += carry4;
1796     s4 -= carry4 * ((uint64_t) 1L << 21);
1797     carry5 = s5 >> 21;
1798     s6 += carry5;
1799     s5 -= carry5 * ((uint64_t) 1L << 21);
1800     carry6 = s6 >> 21;
1801     s7 += carry6;
1802     s6 -= carry6 * ((uint64_t) 1L << 21);
1803     carry7 = s7 >> 21;
1804     s8 += carry7;
1805     s7 -= carry7 * ((uint64_t) 1L << 21);
1806     carry8 = s8 >> 21;
1807     s9 += carry8;
1808     s8 -= carry8 * ((uint64_t) 1L << 21);
1809     carry9 = s9 >> 21;
1810     s10 += carry9;
1811     s9 -= carry9 * ((uint64_t) 1L << 21);
1812     carry10 = s10 >> 21;
1813     s11 += carry10;
1814     s10 -= carry10 * ((uint64_t) 1L << 21);
1815     carry11 = s11 >> 21;
1816     s12 += carry11;
1817     s11 -= carry11 * ((uint64_t) 1L << 21);
1818 
1819     s0 += s12 * 666643;
1820     s1 += s12 * 470296;
1821     s2 += s12 * 654183;
1822     s3 -= s12 * 997805;
1823     s4 += s12 * 136657;
1824     s5 -= s12 * 683901;
1825 
1826     carry0 = s0 >> 21;
1827     s1 += carry0;
1828     s0 -= carry0 * ((uint64_t) 1L << 21);
1829     carry1 = s1 >> 21;
1830     s2 += carry1;
1831     s1 -= carry1 * ((uint64_t) 1L << 21);
1832     carry2 = s2 >> 21;
1833     s3 += carry2;
1834     s2 -= carry2 * ((uint64_t) 1L << 21);
1835     carry3 = s3 >> 21;
1836     s4 += carry3;
1837     s3 -= carry3 * ((uint64_t) 1L << 21);
1838     carry4 = s4 >> 21;
1839     s5 += carry4;
1840     s4 -= carry4 * ((uint64_t) 1L << 21);
1841     carry5 = s5 >> 21;
1842     s6 += carry5;
1843     s5 -= carry5 * ((uint64_t) 1L << 21);
1844     carry6 = s6 >> 21;
1845     s7 += carry6;
1846     s6 -= carry6 * ((uint64_t) 1L << 21);
1847     carry7 = s7 >> 21;
1848     s8 += carry7;
1849     s7 -= carry7 * ((uint64_t) 1L << 21);
1850     carry8 = s8 >> 21;
1851     s9 += carry8;
1852     s8 -= carry8 * ((uint64_t) 1L << 21);
1853     carry9 = s9 >> 21;
1854     s10 += carry9;
1855     s9 -= carry9 * ((uint64_t) 1L << 21);
1856     carry10 = s10 >> 21;
1857     s11 += carry10;
1858     s10 -= carry10 * ((uint64_t) 1L << 21);
1859 
1860     s[0]  = s0 >> 0;
1861     s[1]  = s0 >> 8;
1862     s[2]  = (s0 >> 16) | (s1 * ((uint64_t) 1 << 5));
1863     s[3]  = s1 >> 3;
1864     s[4]  = s1 >> 11;
1865     s[5]  = (s1 >> 19) | (s2 * ((uint64_t) 1 << 2));
1866     s[6]  = s2 >> 6;
1867     s[7]  = (s2 >> 14) | (s3 * ((uint64_t) 1 << 7));
1868     s[8]  = s3 >> 1;
1869     s[9]  = s3 >> 9;
1870     s[10] = (s3 >> 17) | (s4 * ((uint64_t) 1 << 4));
1871     s[11] = s4 >> 4;
1872     s[12] = s4 >> 12;
1873     s[13] = (s4 >> 20) | (s5 * ((uint64_t) 1 << 1));
1874     s[14] = s5 >> 7;
1875     s[15] = (s5 >> 15) | (s6 * ((uint64_t) 1 << 6));
1876     s[16] = s6 >> 2;
1877     s[17] = s6 >> 10;
1878     s[18] = (s6 >> 18) | (s7 * ((uint64_t) 1 << 3));
1879     s[19] = s7 >> 5;
1880     s[20] = s7 >> 13;
1881     s[21] = s8 >> 0;
1882     s[22] = s8 >> 8;
1883     s[23] = (s8 >> 16) | (s9 * ((uint64_t) 1 << 5));
1884     s[24] = s9 >> 3;
1885     s[25] = s9 >> 11;
1886     s[26] = (s9 >> 19) | (s10 * ((uint64_t) 1 << 2));
1887     s[27] = s10 >> 6;
1888     s[28] = (s10 >> 14) | (s11 * ((uint64_t) 1 << 7));
1889     s[29] = s11 >> 1;
1890     s[30] = s11 >> 9;
1891     s[31] = s11 >> 17;
1892 }
1893 
1894 int
sc25519_is_canonical(const unsigned char * s)1895 sc25519_is_canonical(const unsigned char *s)
1896 {
1897     /* 2^252+27742317777372353535851937790883648493 */
1898     static const unsigned char L[32] = {
1899         0xed, 0xd3, 0xf5, 0x5c, 0x1a, 0x63, 0x12, 0x58, 0xd6, 0x9c, 0xf7,
1900         0xa2, 0xde, 0xf9, 0xde, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1901         0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10
1902     };
1903     unsigned char c = 0;
1904     unsigned char n = 1;
1905     unsigned int  i = 32;
1906 
1907     do {
1908         i--;
1909         c |= ((s[i] - L[i]) >> 8) & n;
1910         n &= ((s[i] ^ L[i]) - 1) >> 8;
1911     } while (i != 0);
1912 
1913     return (c != 0);
1914 }
1915 
1916 static void
chi25519(fe25519 out,const fe25519 z)1917 chi25519(fe25519 out, const fe25519 z)
1918 {
1919     fe25519 t0, t1, t2, t3;
1920     int     i;
1921 
1922     fe25519_sq(t0, z);
1923     fe25519_mul(t1, t0, z);
1924     fe25519_sq(t0, t1);
1925     fe25519_sq(t2, t0);
1926     fe25519_sq(t2, t2);
1927     fe25519_mul(t2, t2, t0);
1928     fe25519_mul(t1, t2, z);
1929     fe25519_sq(t2, t1);
1930 
1931     for (i = 1; i < 5; i++) {
1932         fe25519_sq(t2, t2);
1933     }
1934     fe25519_mul(t1, t2, t1);
1935     fe25519_sq(t2, t1);
1936     for (i = 1; i < 10; i++) {
1937         fe25519_sq(t2, t2);
1938     }
1939     fe25519_mul(t2, t2, t1);
1940     fe25519_sq(t3, t2);
1941     for (i = 1; i < 20; i++) {
1942         fe25519_sq(t3, t3);
1943     }
1944     fe25519_mul(t2, t3, t2);
1945     fe25519_sq(t2, t2);
1946     for (i = 1; i < 10; i++) {
1947         fe25519_sq(t2, t2);
1948     }
1949     fe25519_mul(t1, t2, t1);
1950     fe25519_sq(t2, t1);
1951     for (i = 1; i < 50; i++) {
1952         fe25519_sq(t2, t2);
1953     }
1954     fe25519_mul(t2, t2, t1);
1955     fe25519_sq(t3, t2);
1956     for (i = 1; i < 100; i++) {
1957         fe25519_sq(t3, t3);
1958     }
1959     fe25519_mul(t2, t3, t2);
1960     fe25519_sq(t2, t2);
1961     for (i = 1; i < 50; i++) {
1962         fe25519_sq(t2, t2);
1963     }
1964     fe25519_mul(t1, t2, t1);
1965     fe25519_sq(t1, t1);
1966     for (i = 1; i < 4; i++) {
1967         fe25519_sq(t1, t1);
1968     }
1969     fe25519_mul(out, t1, t0);
1970 }
1971 
1972 void
ge25519_from_uniform(unsigned char s[32],const unsigned char r[32])1973 ge25519_from_uniform(unsigned char s[32], const unsigned char r[32])
1974 {
1975     fe25519       e;
1976     fe25519       negx;
1977     fe25519       rr2;
1978     fe25519       x, x2, x3;
1979     ge25519_p3    p3;
1980     ge25519_p1p1  p1;
1981     ge25519_p2    p2;
1982     unsigned int  e_is_minus_1;
1983     unsigned char x_sign;
1984 
1985     memcpy(s, r, 32);
1986     x_sign = s[31] & 0x80;
1987     s[31] &= 0x7f;
1988 
1989     fe25519_frombytes(rr2, s);
1990 
1991     /* elligator */
1992     fe25519_sq2(rr2, rr2);
1993     rr2[0]++;
1994     fe25519_invert(rr2, rr2);
1995     fe25519_mul(x, curve25519_A, rr2);
1996     fe25519_neg(x, x);
1997 
1998     fe25519_sq(x2, x);
1999     fe25519_mul(x3, x, x2);
2000     fe25519_add(e, x3, x);
2001     fe25519_mul(x2, x2, curve25519_A);
2002     fe25519_add(e, x2, e);
2003 
2004     chi25519(e, e);
2005 
2006     fe25519_tobytes(s, e);
2007     e_is_minus_1 = s[1] & 1;
2008     fe25519_neg(negx, x);
2009     fe25519_cmov(x, negx, e_is_minus_1);
2010     fe25519_0(x2);
2011     fe25519_cmov(x2, curve25519_A, e_is_minus_1);
2012     fe25519_sub(x, x, x2);
2013 
2014     /* yed = (x-1)/(x+1) */
2015     {
2016         fe25519 one;
2017         fe25519 x_plus_one;
2018         fe25519 x_plus_one_inv;
2019         fe25519 x_minus_one;
2020         fe25519 yed;
2021 
2022         fe25519_1(one);
2023         fe25519_add(x_plus_one, x, one);
2024         fe25519_sub(x_minus_one, x, one);
2025         fe25519_invert(x_plus_one_inv, x_plus_one);
2026         fe25519_mul(yed, x_minus_one, x_plus_one_inv);
2027         fe25519_tobytes(s, yed);
2028     }
2029 
2030     /* recover x */
2031     s[31] |= x_sign;
2032     if (ge25519_frombytes(&p3, s) != 0) {
2033         abort(); /* LCOV_EXCL_LINE */
2034     }
2035 
2036     /* multiply by the cofactor */
2037     ge25519_p3_dbl(&p1, &p3);
2038     ge25519_p1p1_to_p2(&p2, &p1);
2039     ge25519_p2_dbl(&p1, &p2);
2040     ge25519_p1p1_to_p2(&p2, &p1);
2041     ge25519_p2_dbl(&p1, &p2);
2042     ge25519_p1p1_to_p3(&p3, &p1);
2043 
2044     ge25519_p3_tobytes(s, &p3);
2045 }
2046