1 #include <stddef.h>
2 #include <stdint.h>
3 #include <stdlib.h>
4 #include <string.h>
5
6 #include "crypto_verify_32.h"
7 #include "private/common.h"
8 #include "private/ed25519_ref10.h"
9 #include "utils.h"
10
11 static inline uint64_t
load_3(const unsigned char * in)12 load_3(const unsigned char *in)
13 {
14 uint64_t result;
15
16 result = (uint64_t) in[0];
17 result |= ((uint64_t) in[1]) << 8;
18 result |= ((uint64_t) in[2]) << 16;
19
20 return result;
21 }
22
23 static inline uint64_t
load_4(const unsigned char * in)24 load_4(const unsigned char *in)
25 {
26 uint64_t result;
27
28 result = (uint64_t) in[0];
29 result |= ((uint64_t) in[1]) << 8;
30 result |= ((uint64_t) in[2]) << 16;
31 result |= ((uint64_t) in[3]) << 24;
32
33 return result;
34 }
35
36 /*
37 * Field arithmetic:
38 * Use 5*51 bit limbs on 64-bit systems with support for 128 bit arithmetic,
39 * and 10*25.5 bit limbs elsewhere.
40 *
41 * Functions used elsewhere that are candidates for inlining are defined
42 * via "private/curve25519_ref10.h".
43 */
44
45 #ifdef HAVE_TI_MODE
46 # include "fe_51/constants.h"
47 # include "fe_51/fe.h"
48 #else
49 # include "fe_25_5/constants.h"
50 # include "fe_25_5/fe.h"
51 #endif
52
53 void
fe25519_invert(fe25519 out,const fe25519 z)54 fe25519_invert(fe25519 out, const fe25519 z)
55 {
56 fe25519 t0;
57 fe25519 t1;
58 fe25519 t2;
59 fe25519 t3;
60 int i;
61
62 fe25519_sq(t0, z);
63 fe25519_sq(t1, t0);
64 fe25519_sq(t1, t1);
65 fe25519_mul(t1, z, t1);
66 fe25519_mul(t0, t0, t1);
67 fe25519_sq(t2, t0);
68 fe25519_mul(t1, t1, t2);
69 fe25519_sq(t2, t1);
70 for (i = 1; i < 5; ++i) {
71 fe25519_sq(t2, t2);
72 }
73 fe25519_mul(t1, t2, t1);
74 fe25519_sq(t2, t1);
75 for (i = 1; i < 10; ++i) {
76 fe25519_sq(t2, t2);
77 }
78 fe25519_mul(t2, t2, t1);
79 fe25519_sq(t3, t2);
80 for (i = 1; i < 20; ++i) {
81 fe25519_sq(t3, t3);
82 }
83 fe25519_mul(t2, t3, t2);
84 fe25519_sq(t2, t2);
85 for (i = 1; i < 10; ++i) {
86 fe25519_sq(t2, t2);
87 }
88 fe25519_mul(t1, t2, t1);
89 fe25519_sq(t2, t1);
90 for (i = 1; i < 50; ++i) {
91 fe25519_sq(t2, t2);
92 }
93 fe25519_mul(t2, t2, t1);
94 fe25519_sq(t3, t2);
95 for (i = 1; i < 100; ++i) {
96 fe25519_sq(t3, t3);
97 }
98 fe25519_mul(t2, t3, t2);
99 fe25519_sq(t2, t2);
100 for (i = 1; i < 50; ++i) {
101 fe25519_sq(t2, t2);
102 }
103 fe25519_mul(t1, t2, t1);
104 fe25519_sq(t1, t1);
105 for (i = 1; i < 5; ++i) {
106 fe25519_sq(t1, t1);
107 }
108 fe25519_mul(out, t1, t0);
109 }
110
111 static void
fe25519_pow22523(fe25519 out,const fe25519 z)112 fe25519_pow22523(fe25519 out, const fe25519 z)
113 {
114 fe25519 t0;
115 fe25519 t1;
116 fe25519 t2;
117 int i;
118
119 fe25519_sq(t0, z);
120 fe25519_sq(t1, t0);
121 fe25519_sq(t1, t1);
122 fe25519_mul(t1, z, t1);
123 fe25519_mul(t0, t0, t1);
124 fe25519_sq(t0, t0);
125 fe25519_mul(t0, t1, t0);
126 fe25519_sq(t1, t0);
127 for (i = 1; i < 5; ++i) {
128 fe25519_sq(t1, t1);
129 }
130 fe25519_mul(t0, t1, t0);
131 fe25519_sq(t1, t0);
132 for (i = 1; i < 10; ++i) {
133 fe25519_sq(t1, t1);
134 }
135 fe25519_mul(t1, t1, t0);
136 fe25519_sq(t2, t1);
137 for (i = 1; i < 20; ++i) {
138 fe25519_sq(t2, t2);
139 }
140 fe25519_mul(t1, t2, t1);
141 fe25519_sq(t1, t1);
142 for (i = 1; i < 10; ++i) {
143 fe25519_sq(t1, t1);
144 }
145 fe25519_mul(t0, t1, t0);
146 fe25519_sq(t1, t0);
147 for (i = 1; i < 50; ++i) {
148 fe25519_sq(t1, t1);
149 }
150 fe25519_mul(t1, t1, t0);
151 fe25519_sq(t2, t1);
152 for (i = 1; i < 100; ++i) {
153 fe25519_sq(t2, t2);
154 }
155 fe25519_mul(t1, t2, t1);
156 fe25519_sq(t1, t1);
157 for (i = 1; i < 50; ++i) {
158 fe25519_sq(t1, t1);
159 }
160 fe25519_mul(t0, t1, t0);
161 fe25519_sq(t0, t0);
162 fe25519_sq(t0, t0);
163 fe25519_mul(out, t0, z);
164 }
165
166 /*
167 r = p + q
168 */
169
170 void
ge25519_add(ge25519_p1p1 * r,const ge25519_p3 * p,const ge25519_cached * q)171 ge25519_add(ge25519_p1p1 *r, const ge25519_p3 *p, const ge25519_cached *q)
172 {
173 fe25519 t0;
174
175 fe25519_add(r->X, p->Y, p->X);
176 fe25519_sub(r->Y, p->Y, p->X);
177 fe25519_mul(r->Z, r->X, q->YplusX);
178 fe25519_mul(r->Y, r->Y, q->YminusX);
179 fe25519_mul(r->T, q->T2d, p->T);
180 fe25519_mul(r->X, p->Z, q->Z);
181 fe25519_add(t0, r->X, r->X);
182 fe25519_sub(r->X, r->Z, r->Y);
183 fe25519_add(r->Y, r->Z, r->Y);
184 fe25519_add(r->Z, t0, r->T);
185 fe25519_sub(r->T, t0, r->T);
186 }
187
188 static void
slide_vartime(signed char * r,const unsigned char * a)189 slide_vartime(signed char *r, const unsigned char *a)
190 {
191 int i;
192 int b;
193 int k;
194 int ribs;
195 int cmp;
196
197 for (i = 0; i < 256; ++i) {
198 r[i] = 1 & (a[i >> 3] >> (i & 7));
199 }
200 for (i = 0; i < 256; ++i) {
201 if (! r[i]) {
202 continue;
203 }
204 for (b = 1; b <= 6 && i + b < 256; ++b) {
205 if (! r[i + b]) {
206 continue;
207 }
208 ribs = r[i + b] << b;
209 cmp = r[i] + ribs;
210 if (cmp <= 15) {
211 r[i] = cmp;
212 r[i + b] = 0;
213 } else {
214 cmp = r[i] - ribs;
215 if (cmp < -15) {
216 break;
217 }
218 r[i] = cmp;
219 for (k = i + b; k < 256; ++k) {
220 if (! r[k]) {
221 r[k] = 1;
222 break;
223 }
224 r[k] = 0;
225 }
226 }
227 }
228 }
229 }
230
231 int
ge25519_frombytes(ge25519_p3 * h,const unsigned char * s)232 ge25519_frombytes(ge25519_p3 *h, const unsigned char *s)
233 {
234 fe25519 u;
235 fe25519 v;
236 fe25519 v3;
237 fe25519 vxx;
238 fe25519 m_root_check, p_root_check;
239 fe25519 negx;
240 fe25519 x_sqrtm1;
241 int has_m_root, has_p_root;
242
243 fe25519_frombytes(h->Y, s);
244 fe25519_1(h->Z);
245 fe25519_sq(u, h->Y);
246 fe25519_mul(v, u, d);
247 fe25519_sub(u, u, h->Z); /* u = y^2-1 */
248 fe25519_add(v, v, h->Z); /* v = dy^2+1 */
249
250 fe25519_sq(v3, v);
251 fe25519_mul(v3, v3, v); /* v3 = v^3 */
252 fe25519_sq(h->X, v3);
253 fe25519_mul(h->X, h->X, v);
254 fe25519_mul(h->X, h->X, u); /* x = uv^7 */
255
256 fe25519_pow22523(h->X, h->X); /* x = (uv^7)^((q-5)/8) */
257 fe25519_mul(h->X, h->X, v3);
258 fe25519_mul(h->X, h->X, u); /* x = uv^3(uv^7)^((q-5)/8) */
259
260 fe25519_sq(vxx, h->X);
261 fe25519_mul(vxx, vxx, v);
262 fe25519_sub(m_root_check, vxx, u); /* vx^2-u */
263 fe25519_add(p_root_check, vxx, u); /* vx^2+u */
264 has_m_root = fe25519_iszero(m_root_check);
265 has_p_root = fe25519_iszero(p_root_check);
266 fe25519_mul(x_sqrtm1, h->X, sqrtm1); /* x*sqrt(-1) */
267 fe25519_cmov(h->X, x_sqrtm1, 1 - has_m_root);
268
269 fe25519_neg(negx, h->X);
270 fe25519_cmov(h->X, negx, fe25519_isnegative(h->X) ^ (s[31] >> 7));
271 fe25519_mul(h->T, h->X, h->Y);
272
273 return (has_m_root | has_p_root) - 1;
274 }
275
276 int
ge25519_frombytes_negate_vartime(ge25519_p3 * h,const unsigned char * s)277 ge25519_frombytes_negate_vartime(ge25519_p3 *h, const unsigned char *s)
278 {
279 fe25519 u;
280 fe25519 v;
281 fe25519 v3;
282 fe25519 vxx;
283 fe25519 m_root_check, p_root_check;
284
285 fe25519_frombytes(h->Y, s);
286 fe25519_1(h->Z);
287 fe25519_sq(u, h->Y);
288 fe25519_mul(v, u, d);
289 fe25519_sub(u, u, h->Z); /* u = y^2-1 */
290 fe25519_add(v, v, h->Z); /* v = dy^2+1 */
291
292 fe25519_sq(v3, v);
293 fe25519_mul(v3, v3, v); /* v3 = v^3 */
294 fe25519_sq(h->X, v3);
295 fe25519_mul(h->X, h->X, v);
296 fe25519_mul(h->X, h->X, u); /* x = uv^7 */
297
298 fe25519_pow22523(h->X, h->X); /* x = (uv^7)^((q-5)/8) */
299 fe25519_mul(h->X, h->X, v3);
300 fe25519_mul(h->X, h->X, u); /* x = uv^3(uv^7)^((q-5)/8) */
301
302 fe25519_sq(vxx, h->X);
303 fe25519_mul(vxx, vxx, v);
304 fe25519_sub(m_root_check, vxx, u); /* vx^2-u */
305 if (fe25519_iszero(m_root_check) == 0) {
306 fe25519_add(p_root_check, vxx, u); /* vx^2+u */
307 if (fe25519_iszero(p_root_check) == 0) {
308 return -1;
309 }
310 fe25519_mul(h->X, h->X, sqrtm1);
311 }
312
313 if (fe25519_isnegative(h->X) == (s[31] >> 7)) {
314 fe25519_neg(h->X, h->X);
315 }
316 fe25519_mul(h->T, h->X, h->Y);
317
318 return 0;
319 }
320
321 /*
322 r = p + q
323 */
324
325 static void
ge25519_madd(ge25519_p1p1 * r,const ge25519_p3 * p,const ge25519_precomp * q)326 ge25519_madd(ge25519_p1p1 *r, const ge25519_p3 *p, const ge25519_precomp *q)
327 {
328 fe25519 t0;
329
330 fe25519_add(r->X, p->Y, p->X);
331 fe25519_sub(r->Y, p->Y, p->X);
332 fe25519_mul(r->Z, r->X, q->yplusx);
333 fe25519_mul(r->Y, r->Y, q->yminusx);
334 fe25519_mul(r->T, q->xy2d, p->T);
335 fe25519_add(t0, p->Z, p->Z);
336 fe25519_sub(r->X, r->Z, r->Y);
337 fe25519_add(r->Y, r->Z, r->Y);
338 fe25519_add(r->Z, t0, r->T);
339 fe25519_sub(r->T, t0, r->T);
340 }
341
342 /*
343 r = p - q
344 */
345
346 static void
ge25519_msub(ge25519_p1p1 * r,const ge25519_p3 * p,const ge25519_precomp * q)347 ge25519_msub(ge25519_p1p1 *r, const ge25519_p3 *p, const ge25519_precomp *q)
348 {
349 fe25519 t0;
350
351 fe25519_add(r->X, p->Y, p->X);
352 fe25519_sub(r->Y, p->Y, p->X);
353 fe25519_mul(r->Z, r->X, q->yminusx);
354 fe25519_mul(r->Y, r->Y, q->yplusx);
355 fe25519_mul(r->T, q->xy2d, p->T);
356 fe25519_add(t0, p->Z, p->Z);
357 fe25519_sub(r->X, r->Z, r->Y);
358 fe25519_add(r->Y, r->Z, r->Y);
359 fe25519_sub(r->Z, t0, r->T);
360 fe25519_add(r->T, t0, r->T);
361 }
362
363 /*
364 r = p
365 */
366
367 void
ge25519_p1p1_to_p2(ge25519_p2 * r,const ge25519_p1p1 * p)368 ge25519_p1p1_to_p2(ge25519_p2 *r, const ge25519_p1p1 *p)
369 {
370 fe25519_mul(r->X, p->X, p->T);
371 fe25519_mul(r->Y, p->Y, p->Z);
372 fe25519_mul(r->Z, p->Z, p->T);
373 }
374
375 /*
376 r = p
377 */
378
379 void
ge25519_p1p1_to_p3(ge25519_p3 * r,const ge25519_p1p1 * p)380 ge25519_p1p1_to_p3(ge25519_p3 *r, const ge25519_p1p1 *p)
381 {
382 fe25519_mul(r->X, p->X, p->T);
383 fe25519_mul(r->Y, p->Y, p->Z);
384 fe25519_mul(r->Z, p->Z, p->T);
385 fe25519_mul(r->T, p->X, p->Y);
386 }
387
388 static void
ge25519_p2_0(ge25519_p2 * h)389 ge25519_p2_0(ge25519_p2 *h)
390 {
391 fe25519_0(h->X);
392 fe25519_1(h->Y);
393 fe25519_1(h->Z);
394 }
395
396 /*
397 r = 2 * p
398 */
399
400 static void
ge25519_p2_dbl(ge25519_p1p1 * r,const ge25519_p2 * p)401 ge25519_p2_dbl(ge25519_p1p1 *r, const ge25519_p2 *p)
402 {
403 fe25519 t0;
404
405 fe25519_sq(r->X, p->X);
406 fe25519_sq(r->Z, p->Y);
407 fe25519_sq2(r->T, p->Z);
408 fe25519_add(r->Y, p->X, p->Y);
409 fe25519_sq(t0, r->Y);
410 fe25519_add(r->Y, r->Z, r->X);
411 fe25519_sub(r->Z, r->Z, r->X);
412 fe25519_sub(r->X, t0, r->Y);
413 fe25519_sub(r->T, r->T, r->Z);
414 }
415
416 static void
ge25519_p3_0(ge25519_p3 * h)417 ge25519_p3_0(ge25519_p3 *h)
418 {
419 fe25519_0(h->X);
420 fe25519_1(h->Y);
421 fe25519_1(h->Z);
422 fe25519_0(h->T);
423 }
424
425 static void
ge25519_cached_0(ge25519_cached * h)426 ge25519_cached_0(ge25519_cached *h)
427 {
428 fe25519_1(h->YplusX);
429 fe25519_1(h->YminusX);
430 fe25519_1(h->Z);
431 fe25519_0(h->T2d);
432 }
433
434 /*
435 r = p
436 */
437
438 void
ge25519_p3_to_cached(ge25519_cached * r,const ge25519_p3 * p)439 ge25519_p3_to_cached(ge25519_cached *r, const ge25519_p3 *p)
440 {
441 fe25519_add(r->YplusX, p->Y, p->X);
442 fe25519_sub(r->YminusX, p->Y, p->X);
443 fe25519_copy(r->Z, p->Z);
444 fe25519_mul(r->T2d, p->T, d2);
445 }
446
447 static void
ge25519_p3_to_precomp(ge25519_precomp * pi,const ge25519_p3 * p)448 ge25519_p3_to_precomp(ge25519_precomp *pi, const ge25519_p3 *p)
449 {
450 fe25519 recip;
451 fe25519 x;
452 fe25519 y;
453 fe25519 xy;
454
455 fe25519_invert(recip, p->Z);
456 fe25519_mul(x, p->X, recip);
457 fe25519_mul(y, p->Y, recip);
458 fe25519_add(pi->yplusx, y, x);
459 fe25519_sub(pi->yminusx, y, x);
460 fe25519_mul(xy, x, y);
461 fe25519_mul(pi->xy2d, xy, d2);
462 }
463
464 /*
465 r = p
466 */
467
468 static void
ge25519_p3_to_p2(ge25519_p2 * r,const ge25519_p3 * p)469 ge25519_p3_to_p2(ge25519_p2 *r, const ge25519_p3 *p)
470 {
471 fe25519_copy(r->X, p->X);
472 fe25519_copy(r->Y, p->Y);
473 fe25519_copy(r->Z, p->Z);
474 }
475
476 void
ge25519_p3_tobytes(unsigned char * s,const ge25519_p3 * h)477 ge25519_p3_tobytes(unsigned char *s, const ge25519_p3 *h)
478 {
479 fe25519 recip;
480 fe25519 x;
481 fe25519 y;
482
483 fe25519_invert(recip, h->Z);
484 fe25519_mul(x, h->X, recip);
485 fe25519_mul(y, h->Y, recip);
486 fe25519_tobytes(s, y);
487 s[31] ^= fe25519_isnegative(x) << 7;
488 }
489
490 /*
491 r = 2 * p
492 */
493
494 static void
ge25519_p3_dbl(ge25519_p1p1 * r,const ge25519_p3 * p)495 ge25519_p3_dbl(ge25519_p1p1 *r, const ge25519_p3 *p)
496 {
497 ge25519_p2 q;
498 ge25519_p3_to_p2(&q, p);
499 ge25519_p2_dbl(r, &q);
500 }
501
502 static void
ge25519_precomp_0(ge25519_precomp * h)503 ge25519_precomp_0(ge25519_precomp *h)
504 {
505 fe25519_1(h->yplusx);
506 fe25519_1(h->yminusx);
507 fe25519_0(h->xy2d);
508 }
509
510 static unsigned char
equal(signed char b,signed char c)511 equal(signed char b, signed char c)
512 {
513 unsigned char ub = b;
514 unsigned char uc = c;
515 unsigned char x = ub ^ uc; /* 0: yes; 1..255: no */
516 uint32_t y = x; /* 0: yes; 1..255: no */
517
518 y -= 1; /* 4294967295: yes; 0..254: no */
519 y >>= 31; /* 1: yes; 0: no */
520
521 return y;
522 }
523
524 static unsigned char
negative(signed char b)525 negative(signed char b)
526 {
527 /* 18446744073709551361..18446744073709551615: yes; 0..255: no */
528 uint64_t x = b;
529
530 x >>= 63; /* 1: yes; 0: no */
531
532 return x;
533 }
534
535 static void
ge25519_cmov(ge25519_precomp * t,const ge25519_precomp * u,unsigned char b)536 ge25519_cmov(ge25519_precomp *t, const ge25519_precomp *u, unsigned char b)
537 {
538 fe25519_cmov(t->yplusx, u->yplusx, b);
539 fe25519_cmov(t->yminusx, u->yminusx, b);
540 fe25519_cmov(t->xy2d, u->xy2d, b);
541 }
542
543 static void
ge25519_cmov_cached(ge25519_cached * t,const ge25519_cached * u,unsigned char b)544 ge25519_cmov_cached(ge25519_cached *t, const ge25519_cached *u, unsigned char b)
545 {
546 fe25519_cmov(t->YplusX, u->YplusX, b);
547 fe25519_cmov(t->YminusX, u->YminusX, b);
548 fe25519_cmov(t->Z, u->Z, b);
549 fe25519_cmov(t->T2d, u->T2d, b);
550 }
551
552 static void
ge25519_select(ge25519_precomp * t,const ge25519_precomp precomp[8],const signed char b)553 ge25519_select(ge25519_precomp *t, const ge25519_precomp precomp[8], const signed char b)
554 {
555 ge25519_precomp minust;
556 const unsigned char bnegative = negative(b);
557 const unsigned char babs = b - (((-bnegative) & b) * ((signed char) 1 << 1));
558
559 ge25519_precomp_0(t);
560 ge25519_cmov(t, &precomp[0], equal(babs, 1));
561 ge25519_cmov(t, &precomp[1], equal(babs, 2));
562 ge25519_cmov(t, &precomp[2], equal(babs, 3));
563 ge25519_cmov(t, &precomp[3], equal(babs, 4));
564 ge25519_cmov(t, &precomp[4], equal(babs, 5));
565 ge25519_cmov(t, &precomp[5], equal(babs, 6));
566 ge25519_cmov(t, &precomp[6], equal(babs, 7));
567 ge25519_cmov(t, &precomp[7], equal(babs, 8));
568 fe25519_copy(minust.yplusx, t->yminusx);
569 fe25519_copy(minust.yminusx, t->yplusx);
570 fe25519_neg(minust.xy2d, t->xy2d);
571 ge25519_cmov(t, &minust, bnegative);
572 }
573
574 static void
ge25519_select_base(ge25519_precomp * t,const int pos,const signed char b)575 ge25519_select_base(ge25519_precomp *t, const int pos, const signed char b)
576 {
577 static const ge25519_precomp base[32][8] = { /* base[i][j] = (j+1)*256^i*B */
578 #ifdef HAVE_TI_MODE
579 # include "fe_51/base.h"
580 #else
581 # include "fe_25_5/base.h"
582 #endif
583 };
584 ge25519_select(t, base[pos], b);
585 }
586
587 static void
ge25519_select_cached(ge25519_cached * t,const ge25519_cached cached[8],const signed char b)588 ge25519_select_cached(ge25519_cached *t, const ge25519_cached cached[8], const signed char b)
589 {
590 ge25519_cached minust;
591 const unsigned char bnegative = negative(b);
592 const unsigned char babs = b - (((-bnegative) & b) * ((signed char) 1 << 1));
593
594 ge25519_cached_0(t);
595 ge25519_cmov_cached(t, &cached[0], equal(babs, 1));
596 ge25519_cmov_cached(t, &cached[1], equal(babs, 2));
597 ge25519_cmov_cached(t, &cached[2], equal(babs, 3));
598 ge25519_cmov_cached(t, &cached[3], equal(babs, 4));
599 ge25519_cmov_cached(t, &cached[4], equal(babs, 5));
600 ge25519_cmov_cached(t, &cached[5], equal(babs, 6));
601 ge25519_cmov_cached(t, &cached[6], equal(babs, 7));
602 ge25519_cmov_cached(t, &cached[7], equal(babs, 8));
603 fe25519_copy(minust.YplusX, t->YminusX);
604 fe25519_copy(minust.YminusX, t->YplusX);
605 fe25519_copy(minust.Z, t->Z);
606 fe25519_neg(minust.T2d, t->T2d);
607 ge25519_cmov_cached(t, &minust, bnegative);
608 }
609
610 /*
611 r = p - q
612 */
613
614 void
ge25519_sub(ge25519_p1p1 * r,const ge25519_p3 * p,const ge25519_cached * q)615 ge25519_sub(ge25519_p1p1 *r, const ge25519_p3 *p, const ge25519_cached *q)
616 {
617 fe25519 t0;
618
619 fe25519_add(r->X, p->Y, p->X);
620 fe25519_sub(r->Y, p->Y, p->X);
621 fe25519_mul(r->Z, r->X, q->YminusX);
622 fe25519_mul(r->Y, r->Y, q->YplusX);
623 fe25519_mul(r->T, q->T2d, p->T);
624 fe25519_mul(r->X, p->Z, q->Z);
625 fe25519_add(t0, r->X, r->X);
626 fe25519_sub(r->X, r->Z, r->Y);
627 fe25519_add(r->Y, r->Z, r->Y);
628 fe25519_sub(r->Z, t0, r->T);
629 fe25519_add(r->T, t0, r->T);
630 }
631
632 void
ge25519_tobytes(unsigned char * s,const ge25519_p2 * h)633 ge25519_tobytes(unsigned char *s, const ge25519_p2 *h)
634 {
635 fe25519 recip;
636 fe25519 x;
637 fe25519 y;
638
639 fe25519_invert(recip, h->Z);
640 fe25519_mul(x, h->X, recip);
641 fe25519_mul(y, h->Y, recip);
642 fe25519_tobytes(s, y);
643 s[31] ^= fe25519_isnegative(x) << 7;
644 }
645
646 /*
647 r = a * A + b * B
648 where a = a[0]+256*a[1]+...+256^31 a[31].
649 and b = b[0]+256*b[1]+...+256^31 b[31].
650 B is the Ed25519 base point (x,4/5) with x positive.
651
652 Only used for signatures verification.
653 */
654
655 void
ge25519_double_scalarmult_vartime(ge25519_p2 * r,const unsigned char * a,const ge25519_p3 * A,const unsigned char * b)656 ge25519_double_scalarmult_vartime(ge25519_p2 *r, const unsigned char *a,
657 const ge25519_p3 *A, const unsigned char *b)
658 {
659 static const ge25519_precomp Bi[8] = {
660 #ifdef HAVE_TI_MODE
661 # include "fe_51/base2.h"
662 #else
663 # include "fe_25_5/base2.h"
664 #endif
665 };
666 signed char aslide[256];
667 signed char bslide[256];
668 ge25519_cached Ai[8]; /* A,3A,5A,7A,9A,11A,13A,15A */
669 ge25519_p1p1 t;
670 ge25519_p3 u;
671 ge25519_p3 A2;
672 int i;
673
674 slide_vartime(aslide, a);
675 slide_vartime(bslide, b);
676
677 ge25519_p3_to_cached(&Ai[0], A);
678
679 ge25519_p3_dbl(&t, A);
680 ge25519_p1p1_to_p3(&A2, &t);
681
682 ge25519_add(&t, &A2, &Ai[0]);
683 ge25519_p1p1_to_p3(&u, &t);
684 ge25519_p3_to_cached(&Ai[1], &u);
685
686 ge25519_add(&t, &A2, &Ai[1]);
687 ge25519_p1p1_to_p3(&u, &t);
688 ge25519_p3_to_cached(&Ai[2], &u);
689
690 ge25519_add(&t, &A2, &Ai[2]);
691 ge25519_p1p1_to_p3(&u, &t);
692 ge25519_p3_to_cached(&Ai[3], &u);
693
694 ge25519_add(&t, &A2, &Ai[3]);
695 ge25519_p1p1_to_p3(&u, &t);
696 ge25519_p3_to_cached(&Ai[4], &u);
697
698 ge25519_add(&t, &A2, &Ai[4]);
699 ge25519_p1p1_to_p3(&u, &t);
700 ge25519_p3_to_cached(&Ai[5], &u);
701
702 ge25519_add(&t, &A2, &Ai[5]);
703 ge25519_p1p1_to_p3(&u, &t);
704 ge25519_p3_to_cached(&Ai[6], &u);
705
706 ge25519_add(&t, &A2, &Ai[6]);
707 ge25519_p1p1_to_p3(&u, &t);
708 ge25519_p3_to_cached(&Ai[7], &u);
709
710 ge25519_p2_0(r);
711
712 for (i = 255; i >= 0; --i) {
713 if (aslide[i] || bslide[i]) {
714 break;
715 }
716 }
717
718 for (; i >= 0; --i) {
719 ge25519_p2_dbl(&t, r);
720
721 if (aslide[i] > 0) {
722 ge25519_p1p1_to_p3(&u, &t);
723 ge25519_add(&t, &u, &Ai[aslide[i] / 2]);
724 } else if (aslide[i] < 0) {
725 ge25519_p1p1_to_p3(&u, &t);
726 ge25519_sub(&t, &u, &Ai[(-aslide[i]) / 2]);
727 }
728
729 if (bslide[i] > 0) {
730 ge25519_p1p1_to_p3(&u, &t);
731 ge25519_madd(&t, &u, &Bi[bslide[i] / 2]);
732 } else if (bslide[i] < 0) {
733 ge25519_p1p1_to_p3(&u, &t);
734 ge25519_msub(&t, &u, &Bi[(-bslide[i]) / 2]);
735 }
736
737 ge25519_p1p1_to_p2(r, &t);
738 }
739 }
740
741 /*
742 h = a * p
743 where a = a[0]+256*a[1]+...+256^31 a[31]
744
745 Preconditions:
746 a[31] <= 127
747
748 p is public
749 */
750
751 static void __noinline
ge25519_scalarmult_cache(ge25519_cached pi[static8],const ge25519_p3 * p)752 ge25519_scalarmult_cache(ge25519_cached pi[static 8], const ge25519_p3 *p)
753 {
754 ge25519_p1p1 t1;
755 ge25519_p3 p2, p3, p4, pt;
756
757 ge25519_p3_to_cached(&pi[1 - 1], p); /* p */
758
759 ge25519_p3_dbl(&t1, p);
760 ge25519_p1p1_to_p3(&p2, &t1);
761 ge25519_p3_to_cached(&pi[2 - 1], &p2); /* 2p = 2*p */
762
763 ge25519_add(&t1, p, &pi[2 - 1]);
764 ge25519_p1p1_to_p3(&p3, &t1);
765 ge25519_p3_to_cached(&pi[3 - 1], &p3); /* 3p = 2p+p */
766
767 ge25519_p3_dbl(&t1, &p2);
768 ge25519_p1p1_to_p3(&p4, &t1);
769 ge25519_p3_to_cached(&pi[4 - 1], &p4); /* 4p = 2*2p */
770
771 ge25519_add(&t1, p, &pi[4 - 1]);
772 ge25519_p1p1_to_p3(&pt, &t1);
773 ge25519_p3_to_cached(&pi[5 - 1], &pt); /* 5p = 4p+p */
774
775 ge25519_p3_dbl(&t1, &p3);
776 ge25519_p1p1_to_p3(&pt, &t1);
777 ge25519_p3_to_cached(&pi[6 - 1], &pt); /* 6p = 2*3p */
778
779 ge25519_add(&t1, p, &pi[6 - 1]);
780 ge25519_p1p1_to_p3(&pt, &t1);
781 ge25519_p3_to_cached(&pi[7 - 1], &pt); /* 7p = 6p+p */
782
783 ge25519_p3_dbl(&t1, &p4);
784 ge25519_p1p1_to_p3(&pt, &t1);
785 ge25519_p3_to_cached(&pi[8 - 1], &pt); /* 8p = 2*4p */
786 }
787
788 static void __noinline
ge25519_scalarmult_cached(ge25519_p3 * h,const unsigned char * a,const ge25519_p3 * p,const ge25519_cached pi[static8])789 ge25519_scalarmult_cached(ge25519_p3 *h, const unsigned char *a,
790 const ge25519_p3 *p, const ge25519_cached pi[static 8])
791 {
792 signed char e[64];
793 signed char carry;
794 ge25519_p1p1 r;
795 ge25519_p2 s;
796 ge25519_cached t;
797 int i;
798
799 for (i = 0; i < 32; ++i) {
800 e[2 * i + 0] = (a[i] >> 0) & 15;
801 e[2 * i + 1] = (a[i] >> 4) & 15;
802 }
803 /* each e[i] is between 0 and 15 */
804 /* e[63] is between 0 and 7 */
805
806 carry = 0;
807 for (i = 0; i < 63; ++i) {
808 e[i] += carry;
809 carry = e[i] + 8;
810 carry >>= 4;
811 e[i] -= carry * ((signed char) 1 << 4);
812 }
813 e[63] += carry;
814 /* each e[i] is between -8 and 8 */
815
816 ge25519_p3_0(h);
817
818 for (i = 63; i != 0; i--) {
819 ge25519_select_cached(&t, pi, e[i]);
820 ge25519_add(&r, h, &t);
821
822 ge25519_p1p1_to_p2(&s, &r);
823 ge25519_p2_dbl(&r, &s);
824 ge25519_p1p1_to_p2(&s, &r);
825 ge25519_p2_dbl(&r, &s);
826 ge25519_p1p1_to_p2(&s, &r);
827 ge25519_p2_dbl(&r, &s);
828 ge25519_p1p1_to_p2(&s, &r);
829 ge25519_p2_dbl(&r, &s);
830
831 ge25519_p1p1_to_p3(h, &r); /* *16 */
832 }
833 ge25519_select_cached(&t, pi, e[i]);
834 ge25519_add(&r, h, &t);
835
836 ge25519_p1p1_to_p3(h, &r);
837 }
838
839 void
ge25519_scalarmult(ge25519_p3 * h,const unsigned char * a,const ge25519_p3 * p)840 ge25519_scalarmult(ge25519_p3 *h, const unsigned char *a, const ge25519_p3 *p)
841 {
842 ge25519_cached pi[8];
843
844 ge25519_scalarmult_cache(pi, p);
845 ge25519_scalarmult_cached(h, a, p, pi);
846 }
847
848 /*
849 h = a * B (with precomputation)
850 where a = a[0]+256*a[1]+...+256^31 a[31]
851 B is the Ed25519 base point (x,4/5) with x positive
852 (as bytes: 0x5866666666666666666666666666666666666666666666666666666666666666)
853
854 Preconditions:
855 a[31] <= 127
856 */
857
858 void
ge25519_scalarmult_base(ge25519_p3 * h,const unsigned char * a)859 ge25519_scalarmult_base(ge25519_p3 *h, const unsigned char *a)
860 {
861 signed char e[64];
862 signed char carry;
863 ge25519_p1p1 r;
864 ge25519_p2 s;
865 ge25519_precomp t;
866 int i;
867
868 for (i = 0; i < 32; ++i) {
869 e[2 * i + 0] = (a[i] >> 0) & 15;
870 e[2 * i + 1] = (a[i] >> 4) & 15;
871 }
872 /* each e[i] is between 0 and 15 */
873 /* e[63] is between 0 and 7 */
874
875 carry = 0;
876 for (i = 0; i < 63; ++i) {
877 e[i] += carry;
878 carry = e[i] + 8;
879 carry >>= 4;
880 e[i] -= carry * ((signed char) 1 << 4);
881 }
882 e[63] += carry;
883 /* each e[i] is between -8 and 8 */
884
885 ge25519_p3_0(h);
886
887 for (i = 1; i < 64; i += 2) {
888 ge25519_select_base(&t, i / 2, e[i]);
889 ge25519_madd(&r, h, &t);
890 ge25519_p1p1_to_p3(h, &r);
891 }
892
893 ge25519_p3_dbl(&r, h);
894 ge25519_p1p1_to_p2(&s, &r);
895 ge25519_p2_dbl(&r, &s);
896 ge25519_p1p1_to_p2(&s, &r);
897 ge25519_p2_dbl(&r, &s);
898 ge25519_p1p1_to_p2(&s, &r);
899 ge25519_p2_dbl(&r, &s);
900 ge25519_p1p1_to_p3(h, &r);
901
902 for (i = 0; i < 64; i += 2) {
903 ge25519_select_base(&t, i / 2, e[i]);
904 ge25519_madd(&r, h, &t);
905 ge25519_p1p1_to_p3(h, &r);
906 }
907 }
908
909 /* multiply by the order of the main subgroup l = 2^252+27742317777372353535851937790883648493 */
910 static void
ge25519_mul_l(ge25519_p3 * r,const ge25519_p3 * A)911 ge25519_mul_l(ge25519_p3 *r, const ge25519_p3 *A)
912 {
913 static const signed char aslide[253] = {
914 13, 0, 0, 0, 0, -1, 0, 0, 0, 0, -11, 0, 0, 0, 0, 0, 0, -5, 0, 0, 0, 0, 0, 0, -3, 0, 0, 0, 0, -13, 0, 0, 0, 0, 7, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, -13, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, 0, 11, 0, 0, 0, 0, -13, 0, 0, 0, 0, 0, 0, -3, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, 3, 0, 0, 0, 0, -11, 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0, 0, -1, 0, 0, 0, 0, -1, 0, 0, 0, 0, 7, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1
915 };
916 ge25519_cached Ai[8];
917 ge25519_p1p1 t;
918 ge25519_p3 u;
919 ge25519_p3 A2;
920 int i;
921
922 ge25519_p3_to_cached(&Ai[0], A);
923 ge25519_p3_dbl(&t, A);
924 ge25519_p1p1_to_p3(&A2, &t);
925 ge25519_add(&t, &A2, &Ai[0]);
926 ge25519_p1p1_to_p3(&u, &t);
927 ge25519_p3_to_cached(&Ai[1], &u);
928 ge25519_add(&t, &A2, &Ai[1]);
929 ge25519_p1p1_to_p3(&u, &t);
930 ge25519_p3_to_cached(&Ai[2], &u);
931 ge25519_add(&t, &A2, &Ai[2]);
932 ge25519_p1p1_to_p3(&u, &t);
933 ge25519_p3_to_cached(&Ai[3], &u);
934 ge25519_add(&t, &A2, &Ai[3]);
935 ge25519_p1p1_to_p3(&u, &t);
936 ge25519_p3_to_cached(&Ai[4], &u);
937 ge25519_add(&t, &A2, &Ai[4]);
938 ge25519_p1p1_to_p3(&u, &t);
939 ge25519_p3_to_cached(&Ai[5], &u);
940 ge25519_add(&t, &A2, &Ai[5]);
941 ge25519_p1p1_to_p3(&u, &t);
942 ge25519_p3_to_cached(&Ai[6], &u);
943 ge25519_add(&t, &A2, &Ai[6]);
944 ge25519_p1p1_to_p3(&u, &t);
945 ge25519_p3_to_cached(&Ai[7], &u);
946
947 ge25519_p3_0(r);
948
949 for (i = 252; i >= 0; --i) {
950 ge25519_p3_dbl(&t, r);
951
952 if (aslide[i] > 0) {
953 ge25519_p1p1_to_p3(&u, &t);
954 ge25519_add(&t, &u, &Ai[aslide[i] / 2]);
955 } else if (aslide[i] < 0) {
956 ge25519_p1p1_to_p3(&u, &t);
957 ge25519_sub(&t, &u, &Ai[(-aslide[i]) / 2]);
958 }
959
960 ge25519_p1p1_to_p3(r, &t);
961 }
962 }
963
964 int
ge25519_is_on_curve(const ge25519_p3 * p)965 ge25519_is_on_curve(const ge25519_p3 *p)
966 {
967 fe25519 x2;
968 fe25519 y2;
969 fe25519 z2;
970 fe25519 z4;
971 fe25519 t0;
972 fe25519 t1;
973
974 fe25519_sq(x2, p->X);
975 fe25519_sq(y2, p->Y);
976 fe25519_sq(z2, p->Z);
977 fe25519_sub(t0, y2, x2);
978 fe25519_mul(t0, t0, z2);
979
980 fe25519_mul(t1, x2, y2);
981 fe25519_mul(t1, t1, d);
982 fe25519_sq(z4, z2);
983 fe25519_add(t1, t1, z4);
984 fe25519_sub(t0, t0, t1);
985
986 return fe25519_iszero(t0);
987 }
988
989 int
ge25519_is_on_main_subgroup(const ge25519_p3 * p)990 ge25519_is_on_main_subgroup(const ge25519_p3 *p)
991 {
992 ge25519_p3 pl;
993
994 ge25519_mul_l(&pl, p);
995
996 return fe25519_iszero(pl.X);
997 }
998
999 int
ge25519_is_canonical(const unsigned char * s)1000 ge25519_is_canonical(const unsigned char *s)
1001 {
1002 unsigned char c;
1003 unsigned char d;
1004 unsigned int i;
1005
1006 c = (s[31] & 0x7f) ^ 0x7f;
1007 for (i = 30; i > 0; i--) {
1008 c |= s[i] ^ 0xff;
1009 }
1010 c = (((unsigned int) c) - 1U) >> 8;
1011 d = (0xed - 1U - (unsigned int) s[0]) >> 8;
1012
1013 return 1 - (c & d & 1);
1014 }
1015
1016 int
ge25519_has_small_order(const unsigned char s[32])1017 ge25519_has_small_order(const unsigned char s[32])
1018 {
1019 CRYPTO_ALIGN(16)
1020 static const unsigned char blacklist[][32] = {
1021 /* 0 (order 4) */
1022 { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1023 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1024 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
1025 /* 1 (order 1) */
1026 { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1027 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1028 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 },
1029 /* 2707385501144840649318225287225658788936804267575313519463743609750303402022
1030 (order 8) */
1031 { 0x26, 0xe8, 0x95, 0x8f, 0xc2, 0xb2, 0x27, 0xb0, 0x45, 0xc3, 0xf4,
1032 0x89, 0xf2, 0xef, 0x98, 0xf0, 0xd5, 0xdf, 0xac, 0x05, 0xd3, 0xc6,
1033 0x33, 0x39, 0xb1, 0x38, 0x02, 0x88, 0x6d, 0x53, 0xfc, 0x05 },
1034 /* 55188659117513257062467267217118295137698188065244968500265048394206261417927
1035 (order 8) */
1036 { 0xc7, 0x17, 0x6a, 0x70, 0x3d, 0x4d, 0xd8, 0x4f, 0xba, 0x3c, 0x0b,
1037 0x76, 0x0d, 0x10, 0x67, 0x0f, 0x2a, 0x20, 0x53, 0xfa, 0x2c, 0x39,
1038 0xcc, 0xc6, 0x4e, 0xc7, 0xfd, 0x77, 0x92, 0xac, 0x03, 0x7a },
1039 /* p-1 (order 2) */
1040 { 0xec, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1041 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1042 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
1043 /* p (=0, order 4) */
1044 { 0xed, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1045 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1046 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f },
1047 /* p+1 (=1, order 1) */
1048 { 0xee, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1049 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
1050 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x7f }
1051 };
1052 unsigned char c[7] = { 0 };
1053 unsigned int k;
1054 size_t i, j;
1055
1056 COMPILER_ASSERT(7 == sizeof blacklist / sizeof blacklist[0]);
1057 for (j = 0; j < 31; j++) {
1058 for (i = 0; i < sizeof blacklist / sizeof blacklist[0]; i++) {
1059 c[i] |= s[j] ^ blacklist[i][j];
1060 }
1061 }
1062 for (i = 0; i < sizeof blacklist / sizeof blacklist[0]; i++) {
1063 c[i] |= (s[j] & 0x7f) ^ blacklist[i][j];
1064 }
1065 k = 0;
1066 for (i = 0; i < sizeof blacklist / sizeof blacklist[0]; i++) {
1067 k |= (c[i] - 1);
1068 }
1069 return (int) ((k >> 8) & 1);
1070 }
1071
1072 /*
1073 Input:
1074 a[0]+256*a[1]+...+256^31*a[31] = a
1075 b[0]+256*b[1]+...+256^31*b[31] = b
1076 c[0]+256*c[1]+...+256^31*c[31] = c
1077 *
1078 Output:
1079 s[0]+256*s[1]+...+256^31*s[31] = (ab+c) mod l
1080 where l = 2^252 + 27742317777372353535851937790883648493.
1081 */
1082
1083 void
sc25519_muladd(unsigned char * s,const unsigned char * a,const unsigned char * b,const unsigned char * c)1084 sc25519_muladd(unsigned char *s, const unsigned char *a,
1085 const unsigned char *b, const unsigned char *c)
1086 {
1087 int64_t a0 = 2097151 & load_3(a);
1088 int64_t a1 = 2097151 & (load_4(a + 2) >> 5);
1089 int64_t a2 = 2097151 & (load_3(a + 5) >> 2);
1090 int64_t a3 = 2097151 & (load_4(a + 7) >> 7);
1091 int64_t a4 = 2097151 & (load_4(a + 10) >> 4);
1092 int64_t a5 = 2097151 & (load_3(a + 13) >> 1);
1093 int64_t a6 = 2097151 & (load_4(a + 15) >> 6);
1094 int64_t a7 = 2097151 & (load_3(a + 18) >> 3);
1095 int64_t a8 = 2097151 & load_3(a + 21);
1096 int64_t a9 = 2097151 & (load_4(a + 23) >> 5);
1097 int64_t a10 = 2097151 & (load_3(a + 26) >> 2);
1098 int64_t a11 = (load_4(a + 28) >> 7);
1099
1100 int64_t b0 = 2097151 & load_3(b);
1101 int64_t b1 = 2097151 & (load_4(b + 2) >> 5);
1102 int64_t b2 = 2097151 & (load_3(b + 5) >> 2);
1103 int64_t b3 = 2097151 & (load_4(b + 7) >> 7);
1104 int64_t b4 = 2097151 & (load_4(b + 10) >> 4);
1105 int64_t b5 = 2097151 & (load_3(b + 13) >> 1);
1106 int64_t b6 = 2097151 & (load_4(b + 15) >> 6);
1107 int64_t b7 = 2097151 & (load_3(b + 18) >> 3);
1108 int64_t b8 = 2097151 & load_3(b + 21);
1109 int64_t b9 = 2097151 & (load_4(b + 23) >> 5);
1110 int64_t b10 = 2097151 & (load_3(b + 26) >> 2);
1111 int64_t b11 = (load_4(b + 28) >> 7);
1112
1113 int64_t c0 = 2097151 & load_3(c);
1114 int64_t c1 = 2097151 & (load_4(c + 2) >> 5);
1115 int64_t c2 = 2097151 & (load_3(c + 5) >> 2);
1116 int64_t c3 = 2097151 & (load_4(c + 7) >> 7);
1117 int64_t c4 = 2097151 & (load_4(c + 10) >> 4);
1118 int64_t c5 = 2097151 & (load_3(c + 13) >> 1);
1119 int64_t c6 = 2097151 & (load_4(c + 15) >> 6);
1120 int64_t c7 = 2097151 & (load_3(c + 18) >> 3);
1121 int64_t c8 = 2097151 & load_3(c + 21);
1122 int64_t c9 = 2097151 & (load_4(c + 23) >> 5);
1123 int64_t c10 = 2097151 & (load_3(c + 26) >> 2);
1124 int64_t c11 = (load_4(c + 28) >> 7);
1125
1126 int64_t s0;
1127 int64_t s1;
1128 int64_t s2;
1129 int64_t s3;
1130 int64_t s4;
1131 int64_t s5;
1132 int64_t s6;
1133 int64_t s7;
1134 int64_t s8;
1135 int64_t s9;
1136 int64_t s10;
1137 int64_t s11;
1138 int64_t s12;
1139 int64_t s13;
1140 int64_t s14;
1141 int64_t s15;
1142 int64_t s16;
1143 int64_t s17;
1144 int64_t s18;
1145 int64_t s19;
1146 int64_t s20;
1147 int64_t s21;
1148 int64_t s22;
1149 int64_t s23;
1150
1151 int64_t carry0;
1152 int64_t carry1;
1153 int64_t carry2;
1154 int64_t carry3;
1155 int64_t carry4;
1156 int64_t carry5;
1157 int64_t carry6;
1158 int64_t carry7;
1159 int64_t carry8;
1160 int64_t carry9;
1161 int64_t carry10;
1162 int64_t carry11;
1163 int64_t carry12;
1164 int64_t carry13;
1165 int64_t carry14;
1166 int64_t carry15;
1167 int64_t carry16;
1168 int64_t carry17;
1169 int64_t carry18;
1170 int64_t carry19;
1171 int64_t carry20;
1172 int64_t carry21;
1173 int64_t carry22;
1174
1175 s0 = c0 + a0 * b0;
1176 s1 = c1 + a0 * b1 + a1 * b0;
1177 s2 = c2 + a0 * b2 + a1 * b1 + a2 * b0;
1178 s3 = c3 + a0 * b3 + a1 * b2 + a2 * b1 + a3 * b0;
1179 s4 = c4 + a0 * b4 + a1 * b3 + a2 * b2 + a3 * b1 + a4 * b0;
1180 s5 = c5 + a0 * b5 + a1 * b4 + a2 * b3 + a3 * b2 + a4 * b1 + a5 * b0;
1181 s6 = c6 + a0 * b6 + a1 * b5 + a2 * b4 + a3 * b3 + a4 * b2 + a5 * b1 +
1182 a6 * b0;
1183 s7 = c7 + a0 * b7 + a1 * b6 + a2 * b5 + a3 * b4 + a4 * b3 + a5 * b2 +
1184 a6 * b1 + a7 * b0;
1185 s8 = c8 + a0 * b8 + a1 * b7 + a2 * b6 + a3 * b5 + a4 * b4 + a5 * b3 +
1186 a6 * b2 + a7 * b1 + a8 * b0;
1187 s9 = c9 + a0 * b9 + a1 * b8 + a2 * b7 + a3 * b6 + a4 * b5 + a5 * b4 +
1188 a6 * b3 + a7 * b2 + a8 * b1 + a9 * b0;
1189 s10 = c10 + a0 * b10 + a1 * b9 + a2 * b8 + a3 * b7 + a4 * b6 + a5 * b5 +
1190 a6 * b4 + a7 * b3 + a8 * b2 + a9 * b1 + a10 * b0;
1191 s11 = c11 + a0 * b11 + a1 * b10 + a2 * b9 + a3 * b8 + a4 * b7 + a5 * b6 +
1192 a6 * b5 + a7 * b4 + a8 * b3 + a9 * b2 + a10 * b1 + a11 * b0;
1193 s12 = a1 * b11 + a2 * b10 + a3 * b9 + a4 * b8 + a5 * b7 + a6 * b6 +
1194 a7 * b5 + a8 * b4 + a9 * b3 + a10 * b2 + a11 * b1;
1195 s13 = a2 * b11 + a3 * b10 + a4 * b9 + a5 * b8 + a6 * b7 + a7 * b6 +
1196 a8 * b5 + a9 * b4 + a10 * b3 + a11 * b2;
1197 s14 = a3 * b11 + a4 * b10 + a5 * b9 + a6 * b8 + a7 * b7 + a8 * b6 +
1198 a9 * b5 + a10 * b4 + a11 * b3;
1199 s15 = a4 * b11 + a5 * b10 + a6 * b9 + a7 * b8 + a8 * b7 + a9 * b6 +
1200 a10 * b5 + a11 * b4;
1201 s16 =
1202 a5 * b11 + a6 * b10 + a7 * b9 + a8 * b8 + a9 * b7 + a10 * b6 + a11 * b5;
1203 s17 = a6 * b11 + a7 * b10 + a8 * b9 + a9 * b8 + a10 * b7 + a11 * b6;
1204 s18 = a7 * b11 + a8 * b10 + a9 * b9 + a10 * b8 + a11 * b7;
1205 s19 = a8 * b11 + a9 * b10 + a10 * b9 + a11 * b8;
1206 s20 = a9 * b11 + a10 * b10 + a11 * b9;
1207 s21 = a10 * b11 + a11 * b10;
1208 s22 = a11 * b11;
1209 s23 = 0;
1210
1211 carry0 = (s0 + (int64_t) (1L << 20)) >> 21;
1212 s1 += carry0;
1213 s0 -= carry0 * ((uint64_t) 1L << 21);
1214 carry2 = (s2 + (int64_t) (1L << 20)) >> 21;
1215 s3 += carry2;
1216 s2 -= carry2 * ((uint64_t) 1L << 21);
1217 carry4 = (s4 + (int64_t) (1L << 20)) >> 21;
1218 s5 += carry4;
1219 s4 -= carry4 * ((uint64_t) 1L << 21);
1220 carry6 = (s6 + (int64_t) (1L << 20)) >> 21;
1221 s7 += carry6;
1222 s6 -= carry6 * ((uint64_t) 1L << 21);
1223 carry8 = (s8 + (int64_t) (1L << 20)) >> 21;
1224 s9 += carry8;
1225 s8 -= carry8 * ((uint64_t) 1L << 21);
1226 carry10 = (s10 + (int64_t) (1L << 20)) >> 21;
1227 s11 += carry10;
1228 s10 -= carry10 * ((uint64_t) 1L << 21);
1229 carry12 = (s12 + (int64_t) (1L << 20)) >> 21;
1230 s13 += carry12;
1231 s12 -= carry12 * ((uint64_t) 1L << 21);
1232 carry14 = (s14 + (int64_t) (1L << 20)) >> 21;
1233 s15 += carry14;
1234 s14 -= carry14 * ((uint64_t) 1L << 21);
1235 carry16 = (s16 + (int64_t) (1L << 20)) >> 21;
1236 s17 += carry16;
1237 s16 -= carry16 * ((uint64_t) 1L << 21);
1238 carry18 = (s18 + (int64_t) (1L << 20)) >> 21;
1239 s19 += carry18;
1240 s18 -= carry18 * ((uint64_t) 1L << 21);
1241 carry20 = (s20 + (int64_t) (1L << 20)) >> 21;
1242 s21 += carry20;
1243 s20 -= carry20 * ((uint64_t) 1L << 21);
1244 carry22 = (s22 + (int64_t) (1L << 20)) >> 21;
1245 s23 += carry22;
1246 s22 -= carry22 * ((uint64_t) 1L << 21);
1247
1248 carry1 = (s1 + (int64_t) (1L << 20)) >> 21;
1249 s2 += carry1;
1250 s1 -= carry1 * ((uint64_t) 1L << 21);
1251 carry3 = (s3 + (int64_t) (1L << 20)) >> 21;
1252 s4 += carry3;
1253 s3 -= carry3 * ((uint64_t) 1L << 21);
1254 carry5 = (s5 + (int64_t) (1L << 20)) >> 21;
1255 s6 += carry5;
1256 s5 -= carry5 * ((uint64_t) 1L << 21);
1257 carry7 = (s7 + (int64_t) (1L << 20)) >> 21;
1258 s8 += carry7;
1259 s7 -= carry7 * ((uint64_t) 1L << 21);
1260 carry9 = (s9 + (int64_t) (1L << 20)) >> 21;
1261 s10 += carry9;
1262 s9 -= carry9 * ((uint64_t) 1L << 21);
1263 carry11 = (s11 + (int64_t) (1L << 20)) >> 21;
1264 s12 += carry11;
1265 s11 -= carry11 * ((uint64_t) 1L << 21);
1266 carry13 = (s13 + (int64_t) (1L << 20)) >> 21;
1267 s14 += carry13;
1268 s13 -= carry13 * ((uint64_t) 1L << 21);
1269 carry15 = (s15 + (int64_t) (1L << 20)) >> 21;
1270 s16 += carry15;
1271 s15 -= carry15 * ((uint64_t) 1L << 21);
1272 carry17 = (s17 + (int64_t) (1L << 20)) >> 21;
1273 s18 += carry17;
1274 s17 -= carry17 * ((uint64_t) 1L << 21);
1275 carry19 = (s19 + (int64_t) (1L << 20)) >> 21;
1276 s20 += carry19;
1277 s19 -= carry19 * ((uint64_t) 1L << 21);
1278 carry21 = (s21 + (int64_t) (1L << 20)) >> 21;
1279 s22 += carry21;
1280 s21 -= carry21 * ((uint64_t) 1L << 21);
1281
1282 s11 += s23 * 666643;
1283 s12 += s23 * 470296;
1284 s13 += s23 * 654183;
1285 s14 -= s23 * 997805;
1286 s15 += s23 * 136657;
1287 s16 -= s23 * 683901;
1288
1289 s10 += s22 * 666643;
1290 s11 += s22 * 470296;
1291 s12 += s22 * 654183;
1292 s13 -= s22 * 997805;
1293 s14 += s22 * 136657;
1294 s15 -= s22 * 683901;
1295
1296 s9 += s21 * 666643;
1297 s10 += s21 * 470296;
1298 s11 += s21 * 654183;
1299 s12 -= s21 * 997805;
1300 s13 += s21 * 136657;
1301 s14 -= s21 * 683901;
1302
1303 s8 += s20 * 666643;
1304 s9 += s20 * 470296;
1305 s10 += s20 * 654183;
1306 s11 -= s20 * 997805;
1307 s12 += s20 * 136657;
1308 s13 -= s20 * 683901;
1309
1310 s7 += s19 * 666643;
1311 s8 += s19 * 470296;
1312 s9 += s19 * 654183;
1313 s10 -= s19 * 997805;
1314 s11 += s19 * 136657;
1315 s12 -= s19 * 683901;
1316
1317 s6 += s18 * 666643;
1318 s7 += s18 * 470296;
1319 s8 += s18 * 654183;
1320 s9 -= s18 * 997805;
1321 s10 += s18 * 136657;
1322 s11 -= s18 * 683901;
1323
1324 carry6 = (s6 + (int64_t) (1L << 20)) >> 21;
1325 s7 += carry6;
1326 s6 -= carry6 * ((uint64_t) 1L << 21);
1327 carry8 = (s8 + (int64_t) (1L << 20)) >> 21;
1328 s9 += carry8;
1329 s8 -= carry8 * ((uint64_t) 1L << 21);
1330 carry10 = (s10 + (int64_t) (1L << 20)) >> 21;
1331 s11 += carry10;
1332 s10 -= carry10 * ((uint64_t) 1L << 21);
1333 carry12 = (s12 + (int64_t) (1L << 20)) >> 21;
1334 s13 += carry12;
1335 s12 -= carry12 * ((uint64_t) 1L << 21);
1336 carry14 = (s14 + (int64_t) (1L << 20)) >> 21;
1337 s15 += carry14;
1338 s14 -= carry14 * ((uint64_t) 1L << 21);
1339 carry16 = (s16 + (int64_t) (1L << 20)) >> 21;
1340 s17 += carry16;
1341 s16 -= carry16 * ((uint64_t) 1L << 21);
1342
1343 carry7 = (s7 + (int64_t) (1L << 20)) >> 21;
1344 s8 += carry7;
1345 s7 -= carry7 * ((uint64_t) 1L << 21);
1346 carry9 = (s9 + (int64_t) (1L << 20)) >> 21;
1347 s10 += carry9;
1348 s9 -= carry9 * ((uint64_t) 1L << 21);
1349 carry11 = (s11 + (int64_t) (1L << 20)) >> 21;
1350 s12 += carry11;
1351 s11 -= carry11 * ((uint64_t) 1L << 21);
1352 carry13 = (s13 + (int64_t) (1L << 20)) >> 21;
1353 s14 += carry13;
1354 s13 -= carry13 * ((uint64_t) 1L << 21);
1355 carry15 = (s15 + (int64_t) (1L << 20)) >> 21;
1356 s16 += carry15;
1357 s15 -= carry15 * ((uint64_t) 1L << 21);
1358
1359 s5 += s17 * 666643;
1360 s6 += s17 * 470296;
1361 s7 += s17 * 654183;
1362 s8 -= s17 * 997805;
1363 s9 += s17 * 136657;
1364 s10 -= s17 * 683901;
1365
1366 s4 += s16 * 666643;
1367 s5 += s16 * 470296;
1368 s6 += s16 * 654183;
1369 s7 -= s16 * 997805;
1370 s8 += s16 * 136657;
1371 s9 -= s16 * 683901;
1372
1373 s3 += s15 * 666643;
1374 s4 += s15 * 470296;
1375 s5 += s15 * 654183;
1376 s6 -= s15 * 997805;
1377 s7 += s15 * 136657;
1378 s8 -= s15 * 683901;
1379
1380 s2 += s14 * 666643;
1381 s3 += s14 * 470296;
1382 s4 += s14 * 654183;
1383 s5 -= s14 * 997805;
1384 s6 += s14 * 136657;
1385 s7 -= s14 * 683901;
1386
1387 s1 += s13 * 666643;
1388 s2 += s13 * 470296;
1389 s3 += s13 * 654183;
1390 s4 -= s13 * 997805;
1391 s5 += s13 * 136657;
1392 s6 -= s13 * 683901;
1393
1394 s0 += s12 * 666643;
1395 s1 += s12 * 470296;
1396 s2 += s12 * 654183;
1397 s3 -= s12 * 997805;
1398 s4 += s12 * 136657;
1399 s5 -= s12 * 683901;
1400 s12 = 0;
1401
1402 carry0 = (s0 + (int64_t) (1L << 20)) >> 21;
1403 s1 += carry0;
1404 s0 -= carry0 * ((uint64_t) 1L << 21);
1405 carry2 = (s2 + (int64_t) (1L << 20)) >> 21;
1406 s3 += carry2;
1407 s2 -= carry2 * ((uint64_t) 1L << 21);
1408 carry4 = (s4 + (int64_t) (1L << 20)) >> 21;
1409 s5 += carry4;
1410 s4 -= carry4 * ((uint64_t) 1L << 21);
1411 carry6 = (s6 + (int64_t) (1L << 20)) >> 21;
1412 s7 += carry6;
1413 s6 -= carry6 * ((uint64_t) 1L << 21);
1414 carry8 = (s8 + (int64_t) (1L << 20)) >> 21;
1415 s9 += carry8;
1416 s8 -= carry8 * ((uint64_t) 1L << 21);
1417 carry10 = (s10 + (int64_t) (1L << 20)) >> 21;
1418 s11 += carry10;
1419 s10 -= carry10 * ((uint64_t) 1L << 21);
1420
1421 carry1 = (s1 + (int64_t) (1L << 20)) >> 21;
1422 s2 += carry1;
1423 s1 -= carry1 * ((uint64_t) 1L << 21);
1424 carry3 = (s3 + (int64_t) (1L << 20)) >> 21;
1425 s4 += carry3;
1426 s3 -= carry3 * ((uint64_t) 1L << 21);
1427 carry5 = (s5 + (int64_t) (1L << 20)) >> 21;
1428 s6 += carry5;
1429 s5 -= carry5 * ((uint64_t) 1L << 21);
1430 carry7 = (s7 + (int64_t) (1L << 20)) >> 21;
1431 s8 += carry7;
1432 s7 -= carry7 * ((uint64_t) 1L << 21);
1433 carry9 = (s9 + (int64_t) (1L << 20)) >> 21;
1434 s10 += carry9;
1435 s9 -= carry9 * ((uint64_t) 1L << 21);
1436 carry11 = (s11 + (int64_t) (1L << 20)) >> 21;
1437 s12 += carry11;
1438 s11 -= carry11 * ((uint64_t) 1L << 21);
1439
1440 s0 += s12 * 666643;
1441 s1 += s12 * 470296;
1442 s2 += s12 * 654183;
1443 s3 -= s12 * 997805;
1444 s4 += s12 * 136657;
1445 s5 -= s12 * 683901;
1446 s12 = 0;
1447
1448 carry0 = s0 >> 21;
1449 s1 += carry0;
1450 s0 -= carry0 * ((uint64_t) 1L << 21);
1451 carry1 = s1 >> 21;
1452 s2 += carry1;
1453 s1 -= carry1 * ((uint64_t) 1L << 21);
1454 carry2 = s2 >> 21;
1455 s3 += carry2;
1456 s2 -= carry2 * ((uint64_t) 1L << 21);
1457 carry3 = s3 >> 21;
1458 s4 += carry3;
1459 s3 -= carry3 * ((uint64_t) 1L << 21);
1460 carry4 = s4 >> 21;
1461 s5 += carry4;
1462 s4 -= carry4 * ((uint64_t) 1L << 21);
1463 carry5 = s5 >> 21;
1464 s6 += carry5;
1465 s5 -= carry5 * ((uint64_t) 1L << 21);
1466 carry6 = s6 >> 21;
1467 s7 += carry6;
1468 s6 -= carry6 * ((uint64_t) 1L << 21);
1469 carry7 = s7 >> 21;
1470 s8 += carry7;
1471 s7 -= carry7 * ((uint64_t) 1L << 21);
1472 carry8 = s8 >> 21;
1473 s9 += carry8;
1474 s8 -= carry8 * ((uint64_t) 1L << 21);
1475 carry9 = s9 >> 21;
1476 s10 += carry9;
1477 s9 -= carry9 * ((uint64_t) 1L << 21);
1478 carry10 = s10 >> 21;
1479 s11 += carry10;
1480 s10 -= carry10 * ((uint64_t) 1L << 21);
1481 carry11 = s11 >> 21;
1482 s12 += carry11;
1483 s11 -= carry11 * ((uint64_t) 1L << 21);
1484
1485 s0 += s12 * 666643;
1486 s1 += s12 * 470296;
1487 s2 += s12 * 654183;
1488 s3 -= s12 * 997805;
1489 s4 += s12 * 136657;
1490 s5 -= s12 * 683901;
1491
1492 carry0 = s0 >> 21;
1493 s1 += carry0;
1494 s0 -= carry0 * ((uint64_t) 1L << 21);
1495 carry1 = s1 >> 21;
1496 s2 += carry1;
1497 s1 -= carry1 * ((uint64_t) 1L << 21);
1498 carry2 = s2 >> 21;
1499 s3 += carry2;
1500 s2 -= carry2 * ((uint64_t) 1L << 21);
1501 carry3 = s3 >> 21;
1502 s4 += carry3;
1503 s3 -= carry3 * ((uint64_t) 1L << 21);
1504 carry4 = s4 >> 21;
1505 s5 += carry4;
1506 s4 -= carry4 * ((uint64_t) 1L << 21);
1507 carry5 = s5 >> 21;
1508 s6 += carry5;
1509 s5 -= carry5 * ((uint64_t) 1L << 21);
1510 carry6 = s6 >> 21;
1511 s7 += carry6;
1512 s6 -= carry6 * ((uint64_t) 1L << 21);
1513 carry7 = s7 >> 21;
1514 s8 += carry7;
1515 s7 -= carry7 * ((uint64_t) 1L << 21);
1516 carry8 = s8 >> 21;
1517 s9 += carry8;
1518 s8 -= carry8 * ((uint64_t) 1L << 21);
1519 carry9 = s9 >> 21;
1520 s10 += carry9;
1521 s9 -= carry9 * ((uint64_t) 1L << 21);
1522 carry10 = s10 >> 21;
1523 s11 += carry10;
1524 s10 -= carry10 * ((uint64_t) 1L << 21);
1525
1526 s[0] = s0 >> 0;
1527 s[1] = s0 >> 8;
1528 s[2] = (s0 >> 16) | (s1 * ((uint64_t) 1 << 5));
1529 s[3] = s1 >> 3;
1530 s[4] = s1 >> 11;
1531 s[5] = (s1 >> 19) | (s2 * ((uint64_t) 1 << 2));
1532 s[6] = s2 >> 6;
1533 s[7] = (s2 >> 14) | (s3 * ((uint64_t) 1 << 7));
1534 s[8] = s3 >> 1;
1535 s[9] = s3 >> 9;
1536 s[10] = (s3 >> 17) | (s4 * ((uint64_t) 1 << 4));
1537 s[11] = s4 >> 4;
1538 s[12] = s4 >> 12;
1539 s[13] = (s4 >> 20) | (s5 * ((uint64_t) 1 << 1));
1540 s[14] = s5 >> 7;
1541 s[15] = (s5 >> 15) | (s6 * ((uint64_t) 1 << 6));
1542 s[16] = s6 >> 2;
1543 s[17] = s6 >> 10;
1544 s[18] = (s6 >> 18) | (s7 * ((uint64_t) 1 << 3));
1545 s[19] = s7 >> 5;
1546 s[20] = s7 >> 13;
1547 s[21] = s8 >> 0;
1548 s[22] = s8 >> 8;
1549 s[23] = (s8 >> 16) | (s9 * ((uint64_t) 1 << 5));
1550 s[24] = s9 >> 3;
1551 s[25] = s9 >> 11;
1552 s[26] = (s9 >> 19) | (s10 * ((uint64_t) 1 << 2));
1553 s[27] = s10 >> 6;
1554 s[28] = (s10 >> 14) | (s11 * ((uint64_t) 1 << 7));
1555 s[29] = s11 >> 1;
1556 s[30] = s11 >> 9;
1557 s[31] = s11 >> 17;
1558 }
1559
1560 /*
1561 Input:
1562 s[0]+256*s[1]+...+256^63*s[63] = s
1563 *
1564 Output:
1565 s[0]+256*s[1]+...+256^31*s[31] = s mod l
1566 where l = 2^252 + 27742317777372353535851937790883648493.
1567 Overwrites s in place.
1568 */
1569
1570 void
sc25519_reduce(unsigned char * s)1571 sc25519_reduce(unsigned char *s)
1572 {
1573 int64_t s0 = 2097151 & load_3(s);
1574 int64_t s1 = 2097151 & (load_4(s + 2) >> 5);
1575 int64_t s2 = 2097151 & (load_3(s + 5) >> 2);
1576 int64_t s3 = 2097151 & (load_4(s + 7) >> 7);
1577 int64_t s4 = 2097151 & (load_4(s + 10) >> 4);
1578 int64_t s5 = 2097151 & (load_3(s + 13) >> 1);
1579 int64_t s6 = 2097151 & (load_4(s + 15) >> 6);
1580 int64_t s7 = 2097151 & (load_3(s + 18) >> 3);
1581 int64_t s8 = 2097151 & load_3(s + 21);
1582 int64_t s9 = 2097151 & (load_4(s + 23) >> 5);
1583 int64_t s10 = 2097151 & (load_3(s + 26) >> 2);
1584 int64_t s11 = 2097151 & (load_4(s + 28) >> 7);
1585 int64_t s12 = 2097151 & (load_4(s + 31) >> 4);
1586 int64_t s13 = 2097151 & (load_3(s + 34) >> 1);
1587 int64_t s14 = 2097151 & (load_4(s + 36) >> 6);
1588 int64_t s15 = 2097151 & (load_3(s + 39) >> 3);
1589 int64_t s16 = 2097151 & load_3(s + 42);
1590 int64_t s17 = 2097151 & (load_4(s + 44) >> 5);
1591 int64_t s18 = 2097151 & (load_3(s + 47) >> 2);
1592 int64_t s19 = 2097151 & (load_4(s + 49) >> 7);
1593 int64_t s20 = 2097151 & (load_4(s + 52) >> 4);
1594 int64_t s21 = 2097151 & (load_3(s + 55) >> 1);
1595 int64_t s22 = 2097151 & (load_4(s + 57) >> 6);
1596 int64_t s23 = (load_4(s + 60) >> 3);
1597
1598 int64_t carry0;
1599 int64_t carry1;
1600 int64_t carry2;
1601 int64_t carry3;
1602 int64_t carry4;
1603 int64_t carry5;
1604 int64_t carry6;
1605 int64_t carry7;
1606 int64_t carry8;
1607 int64_t carry9;
1608 int64_t carry10;
1609 int64_t carry11;
1610 int64_t carry12;
1611 int64_t carry13;
1612 int64_t carry14;
1613 int64_t carry15;
1614 int64_t carry16;
1615
1616 s11 += s23 * 666643;
1617 s12 += s23 * 470296;
1618 s13 += s23 * 654183;
1619 s14 -= s23 * 997805;
1620 s15 += s23 * 136657;
1621 s16 -= s23 * 683901;
1622
1623 s10 += s22 * 666643;
1624 s11 += s22 * 470296;
1625 s12 += s22 * 654183;
1626 s13 -= s22 * 997805;
1627 s14 += s22 * 136657;
1628 s15 -= s22 * 683901;
1629
1630 s9 += s21 * 666643;
1631 s10 += s21 * 470296;
1632 s11 += s21 * 654183;
1633 s12 -= s21 * 997805;
1634 s13 += s21 * 136657;
1635 s14 -= s21 * 683901;
1636
1637 s8 += s20 * 666643;
1638 s9 += s20 * 470296;
1639 s10 += s20 * 654183;
1640 s11 -= s20 * 997805;
1641 s12 += s20 * 136657;
1642 s13 -= s20 * 683901;
1643
1644 s7 += s19 * 666643;
1645 s8 += s19 * 470296;
1646 s9 += s19 * 654183;
1647 s10 -= s19 * 997805;
1648 s11 += s19 * 136657;
1649 s12 -= s19 * 683901;
1650
1651 s6 += s18 * 666643;
1652 s7 += s18 * 470296;
1653 s8 += s18 * 654183;
1654 s9 -= s18 * 997805;
1655 s10 += s18 * 136657;
1656 s11 -= s18 * 683901;
1657
1658 carry6 = (s6 + (int64_t) (1L << 20)) >> 21;
1659 s7 += carry6;
1660 s6 -= carry6 * ((uint64_t) 1L << 21);
1661 carry8 = (s8 + (int64_t) (1L << 20)) >> 21;
1662 s9 += carry8;
1663 s8 -= carry8 * ((uint64_t) 1L << 21);
1664 carry10 = (s10 + (int64_t) (1L << 20)) >> 21;
1665 s11 += carry10;
1666 s10 -= carry10 * ((uint64_t) 1L << 21);
1667 carry12 = (s12 + (int64_t) (1L << 20)) >> 21;
1668 s13 += carry12;
1669 s12 -= carry12 * ((uint64_t) 1L << 21);
1670 carry14 = (s14 + (int64_t) (1L << 20)) >> 21;
1671 s15 += carry14;
1672 s14 -= carry14 * ((uint64_t) 1L << 21);
1673 carry16 = (s16 + (int64_t) (1L << 20)) >> 21;
1674 s17 += carry16;
1675 s16 -= carry16 * ((uint64_t) 1L << 21);
1676
1677 carry7 = (s7 + (int64_t) (1L << 20)) >> 21;
1678 s8 += carry7;
1679 s7 -= carry7 * ((uint64_t) 1L << 21);
1680 carry9 = (s9 + (int64_t) (1L << 20)) >> 21;
1681 s10 += carry9;
1682 s9 -= carry9 * ((uint64_t) 1L << 21);
1683 carry11 = (s11 + (int64_t) (1L << 20)) >> 21;
1684 s12 += carry11;
1685 s11 -= carry11 * ((uint64_t) 1L << 21);
1686 carry13 = (s13 + (int64_t) (1L << 20)) >> 21;
1687 s14 += carry13;
1688 s13 -= carry13 * ((uint64_t) 1L << 21);
1689 carry15 = (s15 + (int64_t) (1L << 20)) >> 21;
1690 s16 += carry15;
1691 s15 -= carry15 * ((uint64_t) 1L << 21);
1692
1693 s5 += s17 * 666643;
1694 s6 += s17 * 470296;
1695 s7 += s17 * 654183;
1696 s8 -= s17 * 997805;
1697 s9 += s17 * 136657;
1698 s10 -= s17 * 683901;
1699
1700 s4 += s16 * 666643;
1701 s5 += s16 * 470296;
1702 s6 += s16 * 654183;
1703 s7 -= s16 * 997805;
1704 s8 += s16 * 136657;
1705 s9 -= s16 * 683901;
1706
1707 s3 += s15 * 666643;
1708 s4 += s15 * 470296;
1709 s5 += s15 * 654183;
1710 s6 -= s15 * 997805;
1711 s7 += s15 * 136657;
1712 s8 -= s15 * 683901;
1713
1714 s2 += s14 * 666643;
1715 s3 += s14 * 470296;
1716 s4 += s14 * 654183;
1717 s5 -= s14 * 997805;
1718 s6 += s14 * 136657;
1719 s7 -= s14 * 683901;
1720
1721 s1 += s13 * 666643;
1722 s2 += s13 * 470296;
1723 s3 += s13 * 654183;
1724 s4 -= s13 * 997805;
1725 s5 += s13 * 136657;
1726 s6 -= s13 * 683901;
1727
1728 s0 += s12 * 666643;
1729 s1 += s12 * 470296;
1730 s2 += s12 * 654183;
1731 s3 -= s12 * 997805;
1732 s4 += s12 * 136657;
1733 s5 -= s12 * 683901;
1734 s12 = 0;
1735
1736 carry0 = (s0 + (int64_t) (1L << 20)) >> 21;
1737 s1 += carry0;
1738 s0 -= carry0 * ((uint64_t) 1L << 21);
1739 carry2 = (s2 + (int64_t) (1L << 20)) >> 21;
1740 s3 += carry2;
1741 s2 -= carry2 * ((uint64_t) 1L << 21);
1742 carry4 = (s4 + (int64_t) (1L << 20)) >> 21;
1743 s5 += carry4;
1744 s4 -= carry4 * ((uint64_t) 1L << 21);
1745 carry6 = (s6 + (int64_t) (1L << 20)) >> 21;
1746 s7 += carry6;
1747 s6 -= carry6 * ((uint64_t) 1L << 21);
1748 carry8 = (s8 + (int64_t) (1L << 20)) >> 21;
1749 s9 += carry8;
1750 s8 -= carry8 * ((uint64_t) 1L << 21);
1751 carry10 = (s10 + (int64_t) (1L << 20)) >> 21;
1752 s11 += carry10;
1753 s10 -= carry10 * ((uint64_t) 1L << 21);
1754
1755 carry1 = (s1 + (int64_t) (1L << 20)) >> 21;
1756 s2 += carry1;
1757 s1 -= carry1 * ((uint64_t) 1L << 21);
1758 carry3 = (s3 + (int64_t) (1L << 20)) >> 21;
1759 s4 += carry3;
1760 s3 -= carry3 * ((uint64_t) 1L << 21);
1761 carry5 = (s5 + (int64_t) (1L << 20)) >> 21;
1762 s6 += carry5;
1763 s5 -= carry5 * ((uint64_t) 1L << 21);
1764 carry7 = (s7 + (int64_t) (1L << 20)) >> 21;
1765 s8 += carry7;
1766 s7 -= carry7 * ((uint64_t) 1L << 21);
1767 carry9 = (s9 + (int64_t) (1L << 20)) >> 21;
1768 s10 += carry9;
1769 s9 -= carry9 * ((uint64_t) 1L << 21);
1770 carry11 = (s11 + (int64_t) (1L << 20)) >> 21;
1771 s12 += carry11;
1772 s11 -= carry11 * ((uint64_t) 1L << 21);
1773
1774 s0 += s12 * 666643;
1775 s1 += s12 * 470296;
1776 s2 += s12 * 654183;
1777 s3 -= s12 * 997805;
1778 s4 += s12 * 136657;
1779 s5 -= s12 * 683901;
1780 s12 = 0;
1781
1782 carry0 = s0 >> 21;
1783 s1 += carry0;
1784 s0 -= carry0 * ((uint64_t) 1L << 21);
1785 carry1 = s1 >> 21;
1786 s2 += carry1;
1787 s1 -= carry1 * ((uint64_t) 1L << 21);
1788 carry2 = s2 >> 21;
1789 s3 += carry2;
1790 s2 -= carry2 * ((uint64_t) 1L << 21);
1791 carry3 = s3 >> 21;
1792 s4 += carry3;
1793 s3 -= carry3 * ((uint64_t) 1L << 21);
1794 carry4 = s4 >> 21;
1795 s5 += carry4;
1796 s4 -= carry4 * ((uint64_t) 1L << 21);
1797 carry5 = s5 >> 21;
1798 s6 += carry5;
1799 s5 -= carry5 * ((uint64_t) 1L << 21);
1800 carry6 = s6 >> 21;
1801 s7 += carry6;
1802 s6 -= carry6 * ((uint64_t) 1L << 21);
1803 carry7 = s7 >> 21;
1804 s8 += carry7;
1805 s7 -= carry7 * ((uint64_t) 1L << 21);
1806 carry8 = s8 >> 21;
1807 s9 += carry8;
1808 s8 -= carry8 * ((uint64_t) 1L << 21);
1809 carry9 = s9 >> 21;
1810 s10 += carry9;
1811 s9 -= carry9 * ((uint64_t) 1L << 21);
1812 carry10 = s10 >> 21;
1813 s11 += carry10;
1814 s10 -= carry10 * ((uint64_t) 1L << 21);
1815 carry11 = s11 >> 21;
1816 s12 += carry11;
1817 s11 -= carry11 * ((uint64_t) 1L << 21);
1818
1819 s0 += s12 * 666643;
1820 s1 += s12 * 470296;
1821 s2 += s12 * 654183;
1822 s3 -= s12 * 997805;
1823 s4 += s12 * 136657;
1824 s5 -= s12 * 683901;
1825
1826 carry0 = s0 >> 21;
1827 s1 += carry0;
1828 s0 -= carry0 * ((uint64_t) 1L << 21);
1829 carry1 = s1 >> 21;
1830 s2 += carry1;
1831 s1 -= carry1 * ((uint64_t) 1L << 21);
1832 carry2 = s2 >> 21;
1833 s3 += carry2;
1834 s2 -= carry2 * ((uint64_t) 1L << 21);
1835 carry3 = s3 >> 21;
1836 s4 += carry3;
1837 s3 -= carry3 * ((uint64_t) 1L << 21);
1838 carry4 = s4 >> 21;
1839 s5 += carry4;
1840 s4 -= carry4 * ((uint64_t) 1L << 21);
1841 carry5 = s5 >> 21;
1842 s6 += carry5;
1843 s5 -= carry5 * ((uint64_t) 1L << 21);
1844 carry6 = s6 >> 21;
1845 s7 += carry6;
1846 s6 -= carry6 * ((uint64_t) 1L << 21);
1847 carry7 = s7 >> 21;
1848 s8 += carry7;
1849 s7 -= carry7 * ((uint64_t) 1L << 21);
1850 carry8 = s8 >> 21;
1851 s9 += carry8;
1852 s8 -= carry8 * ((uint64_t) 1L << 21);
1853 carry9 = s9 >> 21;
1854 s10 += carry9;
1855 s9 -= carry9 * ((uint64_t) 1L << 21);
1856 carry10 = s10 >> 21;
1857 s11 += carry10;
1858 s10 -= carry10 * ((uint64_t) 1L << 21);
1859
1860 s[0] = s0 >> 0;
1861 s[1] = s0 >> 8;
1862 s[2] = (s0 >> 16) | (s1 * ((uint64_t) 1 << 5));
1863 s[3] = s1 >> 3;
1864 s[4] = s1 >> 11;
1865 s[5] = (s1 >> 19) | (s2 * ((uint64_t) 1 << 2));
1866 s[6] = s2 >> 6;
1867 s[7] = (s2 >> 14) | (s3 * ((uint64_t) 1 << 7));
1868 s[8] = s3 >> 1;
1869 s[9] = s3 >> 9;
1870 s[10] = (s3 >> 17) | (s4 * ((uint64_t) 1 << 4));
1871 s[11] = s4 >> 4;
1872 s[12] = s4 >> 12;
1873 s[13] = (s4 >> 20) | (s5 * ((uint64_t) 1 << 1));
1874 s[14] = s5 >> 7;
1875 s[15] = (s5 >> 15) | (s6 * ((uint64_t) 1 << 6));
1876 s[16] = s6 >> 2;
1877 s[17] = s6 >> 10;
1878 s[18] = (s6 >> 18) | (s7 * ((uint64_t) 1 << 3));
1879 s[19] = s7 >> 5;
1880 s[20] = s7 >> 13;
1881 s[21] = s8 >> 0;
1882 s[22] = s8 >> 8;
1883 s[23] = (s8 >> 16) | (s9 * ((uint64_t) 1 << 5));
1884 s[24] = s9 >> 3;
1885 s[25] = s9 >> 11;
1886 s[26] = (s9 >> 19) | (s10 * ((uint64_t) 1 << 2));
1887 s[27] = s10 >> 6;
1888 s[28] = (s10 >> 14) | (s11 * ((uint64_t) 1 << 7));
1889 s[29] = s11 >> 1;
1890 s[30] = s11 >> 9;
1891 s[31] = s11 >> 17;
1892 }
1893
1894 int
sc25519_is_canonical(const unsigned char * s)1895 sc25519_is_canonical(const unsigned char *s)
1896 {
1897 /* 2^252+27742317777372353535851937790883648493 */
1898 static const unsigned char L[32] = {
1899 0xed, 0xd3, 0xf5, 0x5c, 0x1a, 0x63, 0x12, 0x58, 0xd6, 0x9c, 0xf7,
1900 0xa2, 0xde, 0xf9, 0xde, 0x14, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
1901 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10
1902 };
1903 unsigned char c = 0;
1904 unsigned char n = 1;
1905 unsigned int i = 32;
1906
1907 do {
1908 i--;
1909 c |= ((s[i] - L[i]) >> 8) & n;
1910 n &= ((s[i] ^ L[i]) - 1) >> 8;
1911 } while (i != 0);
1912
1913 return (c != 0);
1914 }
1915
1916 static void
chi25519(fe25519 out,const fe25519 z)1917 chi25519(fe25519 out, const fe25519 z)
1918 {
1919 fe25519 t0, t1, t2, t3;
1920 int i;
1921
1922 fe25519_sq(t0, z);
1923 fe25519_mul(t1, t0, z);
1924 fe25519_sq(t0, t1);
1925 fe25519_sq(t2, t0);
1926 fe25519_sq(t2, t2);
1927 fe25519_mul(t2, t2, t0);
1928 fe25519_mul(t1, t2, z);
1929 fe25519_sq(t2, t1);
1930
1931 for (i = 1; i < 5; i++) {
1932 fe25519_sq(t2, t2);
1933 }
1934 fe25519_mul(t1, t2, t1);
1935 fe25519_sq(t2, t1);
1936 for (i = 1; i < 10; i++) {
1937 fe25519_sq(t2, t2);
1938 }
1939 fe25519_mul(t2, t2, t1);
1940 fe25519_sq(t3, t2);
1941 for (i = 1; i < 20; i++) {
1942 fe25519_sq(t3, t3);
1943 }
1944 fe25519_mul(t2, t3, t2);
1945 fe25519_sq(t2, t2);
1946 for (i = 1; i < 10; i++) {
1947 fe25519_sq(t2, t2);
1948 }
1949 fe25519_mul(t1, t2, t1);
1950 fe25519_sq(t2, t1);
1951 for (i = 1; i < 50; i++) {
1952 fe25519_sq(t2, t2);
1953 }
1954 fe25519_mul(t2, t2, t1);
1955 fe25519_sq(t3, t2);
1956 for (i = 1; i < 100; i++) {
1957 fe25519_sq(t3, t3);
1958 }
1959 fe25519_mul(t2, t3, t2);
1960 fe25519_sq(t2, t2);
1961 for (i = 1; i < 50; i++) {
1962 fe25519_sq(t2, t2);
1963 }
1964 fe25519_mul(t1, t2, t1);
1965 fe25519_sq(t1, t1);
1966 for (i = 1; i < 4; i++) {
1967 fe25519_sq(t1, t1);
1968 }
1969 fe25519_mul(out, t1, t0);
1970 }
1971
1972 void
ge25519_from_uniform(unsigned char s[32],const unsigned char r[32])1973 ge25519_from_uniform(unsigned char s[32], const unsigned char r[32])
1974 {
1975 fe25519 e;
1976 fe25519 negx;
1977 fe25519 rr2;
1978 fe25519 x, x2, x3;
1979 ge25519_p3 p3;
1980 ge25519_p1p1 p1;
1981 ge25519_p2 p2;
1982 unsigned int e_is_minus_1;
1983 unsigned char x_sign;
1984
1985 memcpy(s, r, 32);
1986 x_sign = s[31] & 0x80;
1987 s[31] &= 0x7f;
1988
1989 fe25519_frombytes(rr2, s);
1990
1991 /* elligator */
1992 fe25519_sq2(rr2, rr2);
1993 rr2[0]++;
1994 fe25519_invert(rr2, rr2);
1995 fe25519_mul(x, curve25519_A, rr2);
1996 fe25519_neg(x, x);
1997
1998 fe25519_sq(x2, x);
1999 fe25519_mul(x3, x, x2);
2000 fe25519_add(e, x3, x);
2001 fe25519_mul(x2, x2, curve25519_A);
2002 fe25519_add(e, x2, e);
2003
2004 chi25519(e, e);
2005
2006 fe25519_tobytes(s, e);
2007 e_is_minus_1 = s[1] & 1;
2008 fe25519_neg(negx, x);
2009 fe25519_cmov(x, negx, e_is_minus_1);
2010 fe25519_0(x2);
2011 fe25519_cmov(x2, curve25519_A, e_is_minus_1);
2012 fe25519_sub(x, x, x2);
2013
2014 /* yed = (x-1)/(x+1) */
2015 {
2016 fe25519 one;
2017 fe25519 x_plus_one;
2018 fe25519 x_plus_one_inv;
2019 fe25519 x_minus_one;
2020 fe25519 yed;
2021
2022 fe25519_1(one);
2023 fe25519_add(x_plus_one, x, one);
2024 fe25519_sub(x_minus_one, x, one);
2025 fe25519_invert(x_plus_one_inv, x_plus_one);
2026 fe25519_mul(yed, x_minus_one, x_plus_one_inv);
2027 fe25519_tobytes(s, yed);
2028 }
2029
2030 /* recover x */
2031 s[31] |= x_sign;
2032 if (ge25519_frombytes(&p3, s) != 0) {
2033 abort(); /* LCOV_EXCL_LINE */
2034 }
2035
2036 /* multiply by the cofactor */
2037 ge25519_p3_dbl(&p1, &p3);
2038 ge25519_p1p1_to_p2(&p2, &p1);
2039 ge25519_p2_dbl(&p1, &p2);
2040 ge25519_p1p1_to_p2(&p2, &p1);
2041 ge25519_p2_dbl(&p1, &p2);
2042 ge25519_p1p1_to_p3(&p3, &p1);
2043
2044 ge25519_p3_tobytes(s, &p3);
2045 }
2046