1 /* $NetBSD: sntrup761.c,v 1.4 2024/09/24 21:32:18 christos Exp $ */ 2 /* $OpenBSD: sntrup761.c,v 1.8 2024/09/16 05:37:05 djm Exp $ */ 3 4 /* 5 * Public Domain, Authors: 6 * - Daniel J. Bernstein 7 * - Chitchanok Chuengsatiansup 8 * - Tanja Lange 9 * - Christine van Vredendaal 10 */ 11 #include "includes.h" 12 __RCSID("$NetBSD: sntrup761.c,v 1.4 2024/09/24 21:32:18 christos Exp $"); 13 14 #include <string.h> 15 #include "crypto_api.h" 16 17 #define crypto_declassify(x, y) do {} while (0) 18 19 #define int8 crypto_int8 20 #define uint8 crypto_uint8 21 #define int16 crypto_int16 22 #define uint16 crypto_uint16 23 #define int32 crypto_int32 24 #define uint32 crypto_uint32 25 #define int64 crypto_int64 26 #define uint64 crypto_uint64 27 extern volatile crypto_int16 crypto_int16_optblocker; 28 extern volatile crypto_int32 crypto_int32_optblocker; 29 extern volatile crypto_int64 crypto_int64_optblocker; 30 31 /* from supercop-20240808/cryptoint/crypto_int16.h */ 32 /* auto-generated: cd cryptoint; ./autogen */ 33 /* cryptoint 20240806 */ 34 35 #ifndef crypto_int16_h 36 #define crypto_int16_h 37 38 #define crypto_int16 int16_t 39 #define crypto_int16_unsigned uint16_t 40 41 42 43 __attribute__((unused)) 44 static inline 45 crypto_int16 crypto_int16_load(const unsigned char *crypto_int16_s) { 46 crypto_int16 crypto_int16_z = 0; 47 crypto_int16_z |= ((crypto_int16) (*crypto_int16_s++)) << 0; 48 crypto_int16_z |= ((crypto_int16) (*crypto_int16_s++)) << 8; 49 return crypto_int16_z; 50 } 51 52 __attribute__((unused)) 53 static inline 54 void crypto_int16_store(unsigned char *crypto_int16_s,crypto_int16 crypto_int16_x) { 55 *crypto_int16_s++ = crypto_int16_x >> 0; 56 *crypto_int16_s++ = crypto_int16_x >> 8; 57 } 58 59 __attribute__((unused)) 60 static inline 61 crypto_int16 crypto_int16_negative_mask(crypto_int16 crypto_int16_x) { 62 #if defined(__GNUC__) && defined(__x86_64__) 63 __asm__ ("sarw $15,%0" : "+r"(crypto_int16_x) : : "cc"); 64 return crypto_int16_x; 65 #elif defined(__GNUC__) && defined(__aarch64__) 66 crypto_int16 crypto_int16_y; 67 __asm__ ("sbfx %w0,%w1,15,1" : "=r"(crypto_int16_y) : "r"(crypto_int16_x) : ); 68 return crypto_int16_y; 69 #else 70 crypto_int16_x >>= 16-6; 71 crypto_int16_x ^= crypto_int16_optblocker; 72 crypto_int16_x >>= 5; 73 return crypto_int16_x; 74 #endif 75 } 76 77 __attribute__((unused)) 78 static inline 79 crypto_int16_unsigned crypto_int16_unsigned_topbit_01(crypto_int16_unsigned crypto_int16_x) { 80 #if defined(__GNUC__) && defined(__x86_64__) 81 __asm__ ("shrw $15,%0" : "+r"(crypto_int16_x) : : "cc"); 82 return crypto_int16_x; 83 #elif defined(__GNUC__) && defined(__aarch64__) 84 crypto_int16 crypto_int16_y; 85 __asm__ ("ubfx %w0,%w1,15,1" : "=r"(crypto_int16_y) : "r"(crypto_int16_x) : ); 86 return crypto_int16_y; 87 #else 88 crypto_int16_x >>= 16-6; 89 crypto_int16_x ^= crypto_int16_optblocker; 90 crypto_int16_x >>= 5; 91 return crypto_int16_x; 92 #endif 93 } 94 95 __attribute__((unused)) 96 static inline 97 crypto_int16 crypto_int16_negative_01(crypto_int16 crypto_int16_x) { 98 return crypto_int16_unsigned_topbit_01(crypto_int16_x); 99 } 100 101 __attribute__((unused)) 102 static inline 103 crypto_int16 crypto_int16_topbit_mask(crypto_int16 crypto_int16_x) { 104 return crypto_int16_negative_mask(crypto_int16_x); 105 } 106 107 __attribute__((unused)) 108 static inline 109 crypto_int16 crypto_int16_topbit_01(crypto_int16 crypto_int16_x) { 110 return crypto_int16_unsigned_topbit_01(crypto_int16_x); 111 } 112 113 __attribute__((unused)) 114 static inline 115 crypto_int16 crypto_int16_bottombit_mask(crypto_int16 crypto_int16_x) { 116 #if defined(__GNUC__) && defined(__x86_64__) 117 __asm__ ("andw $1,%0" : "+r"(crypto_int16_x) : : "cc"); 118 return -crypto_int16_x; 119 #elif defined(__GNUC__) && defined(__aarch64__) 120 crypto_int16 crypto_int16_y; 121 __asm__ ("sbfx %w0,%w1,0,1" : "=r"(crypto_int16_y) : "r"(crypto_int16_x) : ); 122 return crypto_int16_y; 123 #else 124 crypto_int16_x &= 1 ^ crypto_int16_optblocker; 125 return -crypto_int16_x; 126 #endif 127 } 128 129 __attribute__((unused)) 130 static inline 131 crypto_int16 crypto_int16_bottombit_01(crypto_int16 crypto_int16_x) { 132 #if defined(__GNUC__) && defined(__x86_64__) 133 __asm__ ("andw $1,%0" : "+r"(crypto_int16_x) : : "cc"); 134 return crypto_int16_x; 135 #elif defined(__GNUC__) && defined(__aarch64__) 136 crypto_int16 crypto_int16_y; 137 __asm__ ("ubfx %w0,%w1,0,1" : "=r"(crypto_int16_y) : "r"(crypto_int16_x) : ); 138 return crypto_int16_y; 139 #else 140 crypto_int16_x &= 1 ^ crypto_int16_optblocker; 141 return crypto_int16_x; 142 #endif 143 } 144 145 __attribute__((unused)) 146 static inline 147 crypto_int16 crypto_int16_bitinrangepublicpos_mask(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_s) { 148 #if defined(__GNUC__) && defined(__x86_64__) 149 __asm__ ("sarw %%cl,%0" : "+r"(crypto_int16_x) : "c"(crypto_int16_s) : "cc"); 150 #elif defined(__GNUC__) && defined(__aarch64__) 151 __asm__ ("sxth %w0,%w0\n asr %w0,%w0,%w1" : "+&r"(crypto_int16_x) : "r"(crypto_int16_s) : ); 152 #else 153 crypto_int16_x >>= crypto_int16_s ^ crypto_int16_optblocker; 154 #endif 155 return crypto_int16_bottombit_mask(crypto_int16_x); 156 } 157 158 __attribute__((unused)) 159 static inline 160 crypto_int16 crypto_int16_bitinrangepublicpos_01(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_s) { 161 #if defined(__GNUC__) && defined(__x86_64__) 162 __asm__ ("sarw %%cl,%0" : "+r"(crypto_int16_x) : "c"(crypto_int16_s) : "cc"); 163 #elif defined(__GNUC__) && defined(__aarch64__) 164 __asm__ ("sxth %w0,%w0\n asr %w0,%w0,%w1" : "+&r"(crypto_int16_x) : "r"(crypto_int16_s) : ); 165 #else 166 crypto_int16_x >>= crypto_int16_s ^ crypto_int16_optblocker; 167 #endif 168 return crypto_int16_bottombit_01(crypto_int16_x); 169 } 170 171 __attribute__((unused)) 172 static inline 173 crypto_int16 crypto_int16_shlmod(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_s) { 174 #if defined(__GNUC__) && defined(__x86_64__) 175 crypto_int16_s &= 15; 176 __asm__ ("shlw %%cl,%0" : "+r"(crypto_int16_x) : "c"(crypto_int16_s) : "cc"); 177 #elif defined(__GNUC__) && defined(__aarch64__) 178 __asm__ ("and %w0,%w0,15\n and %w1,%w1,65535\n lsl %w1,%w1,%w0" : "+&r"(crypto_int16_s), "+r"(crypto_int16_x) : : ); 179 #else 180 int crypto_int16_k, crypto_int16_l; 181 for (crypto_int16_l = 0,crypto_int16_k = 1;crypto_int16_k < 16;++crypto_int16_l,crypto_int16_k *= 2) 182 crypto_int16_x ^= (crypto_int16_x ^ (crypto_int16_x << crypto_int16_k)) & crypto_int16_bitinrangepublicpos_mask(crypto_int16_s,crypto_int16_l); 183 #endif 184 return crypto_int16_x; 185 } 186 187 __attribute__((unused)) 188 static inline 189 crypto_int16 crypto_int16_shrmod(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_s) { 190 #if defined(__GNUC__) && defined(__x86_64__) 191 crypto_int16_s &= 15; 192 __asm__ ("sarw %%cl,%0" : "+r"(crypto_int16_x) : "c"(crypto_int16_s) : "cc"); 193 #elif defined(__GNUC__) && defined(__aarch64__) 194 __asm__ ("and %w0,%w0,15\n sxth %w1,%w1\n asr %w1,%w1,%w0" : "+&r"(crypto_int16_s), "+r"(crypto_int16_x) : : ); 195 #else 196 int crypto_int16_k, crypto_int16_l; 197 for (crypto_int16_l = 0,crypto_int16_k = 1;crypto_int16_k < 16;++crypto_int16_l,crypto_int16_k *= 2) 198 crypto_int16_x ^= (crypto_int16_x ^ (crypto_int16_x >> crypto_int16_k)) & crypto_int16_bitinrangepublicpos_mask(crypto_int16_s,crypto_int16_l); 199 #endif 200 return crypto_int16_x; 201 } 202 203 __attribute__((unused)) 204 static inline 205 crypto_int16 crypto_int16_bitmod_mask(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_s) { 206 crypto_int16_x = crypto_int16_shrmod(crypto_int16_x,crypto_int16_s); 207 return crypto_int16_bottombit_mask(crypto_int16_x); 208 } 209 210 __attribute__((unused)) 211 static inline 212 crypto_int16 crypto_int16_bitmod_01(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_s) { 213 crypto_int16_x = crypto_int16_shrmod(crypto_int16_x,crypto_int16_s); 214 return crypto_int16_bottombit_01(crypto_int16_x); 215 } 216 217 __attribute__((unused)) 218 static inline 219 crypto_int16 crypto_int16_nonzero_mask(crypto_int16 crypto_int16_x) { 220 #if defined(__GNUC__) && defined(__x86_64__) 221 crypto_int16 crypto_int16_q,crypto_int16_z; 222 __asm__ ("xorw %0,%0\n movw $-1,%1\n testw %2,%2\n cmovnew %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x) : "cc"); 223 return crypto_int16_z; 224 #elif defined(__GNUC__) && defined(__aarch64__) 225 crypto_int16 crypto_int16_z; 226 __asm__ ("tst %w1,65535\n csetm %w0,ne" : "=r"(crypto_int16_z) : "r"(crypto_int16_x) : "cc"); 227 return crypto_int16_z; 228 #else 229 crypto_int16_x |= -crypto_int16_x; 230 return crypto_int16_negative_mask(crypto_int16_x); 231 #endif 232 } 233 234 __attribute__((unused)) 235 static inline 236 crypto_int16 crypto_int16_nonzero_01(crypto_int16 crypto_int16_x) { 237 #if defined(__GNUC__) && defined(__x86_64__) 238 crypto_int16 crypto_int16_q,crypto_int16_z; 239 __asm__ ("xorw %0,%0\n movw $1,%1\n testw %2,%2\n cmovnew %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x) : "cc"); 240 return crypto_int16_z; 241 #elif defined(__GNUC__) && defined(__aarch64__) 242 crypto_int16 crypto_int16_z; 243 __asm__ ("tst %w1,65535\n cset %w0,ne" : "=r"(crypto_int16_z) : "r"(crypto_int16_x) : "cc"); 244 return crypto_int16_z; 245 #else 246 crypto_int16_x |= -crypto_int16_x; 247 return crypto_int16_unsigned_topbit_01(crypto_int16_x); 248 #endif 249 } 250 251 __attribute__((unused)) 252 static inline 253 crypto_int16 crypto_int16_positive_mask(crypto_int16 crypto_int16_x) { 254 #if defined(__GNUC__) && defined(__x86_64__) 255 crypto_int16 crypto_int16_q,crypto_int16_z; 256 __asm__ ("xorw %0,%0\n movw $-1,%1\n testw %2,%2\n cmovgw %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x) : "cc"); 257 return crypto_int16_z; 258 #elif defined(__GNUC__) && defined(__aarch64__) 259 crypto_int16 crypto_int16_z; 260 __asm__ ("sxth %w0,%w1\n cmp %w0,0\n csetm %w0,gt" : "=r"(crypto_int16_z) : "r"(crypto_int16_x) : "cc"); 261 return crypto_int16_z; 262 #else 263 crypto_int16 crypto_int16_z = -crypto_int16_x; 264 crypto_int16_z ^= crypto_int16_x & crypto_int16_z; 265 return crypto_int16_negative_mask(crypto_int16_z); 266 #endif 267 } 268 269 __attribute__((unused)) 270 static inline 271 crypto_int16 crypto_int16_positive_01(crypto_int16 crypto_int16_x) { 272 #if defined(__GNUC__) && defined(__x86_64__) 273 crypto_int16 crypto_int16_q,crypto_int16_z; 274 __asm__ ("xorw %0,%0\n movw $1,%1\n testw %2,%2\n cmovgw %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x) : "cc"); 275 return crypto_int16_z; 276 #elif defined(__GNUC__) && defined(__aarch64__) 277 crypto_int16 crypto_int16_z; 278 __asm__ ("sxth %w0,%w1\n cmp %w0,0\n cset %w0,gt" : "=r"(crypto_int16_z) : "r"(crypto_int16_x) : "cc"); 279 return crypto_int16_z; 280 #else 281 crypto_int16 crypto_int16_z = -crypto_int16_x; 282 crypto_int16_z ^= crypto_int16_x & crypto_int16_z; 283 return crypto_int16_unsigned_topbit_01(crypto_int16_z); 284 #endif 285 } 286 287 __attribute__((unused)) 288 static inline 289 crypto_int16 crypto_int16_zero_mask(crypto_int16 crypto_int16_x) { 290 #if defined(__GNUC__) && defined(__x86_64__) 291 crypto_int16 crypto_int16_q,crypto_int16_z; 292 __asm__ ("xorw %0,%0\n movw $-1,%1\n testw %2,%2\n cmovew %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x) : "cc"); 293 return crypto_int16_z; 294 #elif defined(__GNUC__) && defined(__aarch64__) 295 crypto_int16 crypto_int16_z; 296 __asm__ ("tst %w1,65535\n csetm %w0,eq" : "=r"(crypto_int16_z) : "r"(crypto_int16_x) : "cc"); 297 return crypto_int16_z; 298 #else 299 return ~crypto_int16_nonzero_mask(crypto_int16_x); 300 #endif 301 } 302 303 __attribute__((unused)) 304 static inline 305 crypto_int16 crypto_int16_zero_01(crypto_int16 crypto_int16_x) { 306 #if defined(__GNUC__) && defined(__x86_64__) 307 crypto_int16 crypto_int16_q,crypto_int16_z; 308 __asm__ ("xorw %0,%0\n movw $1,%1\n testw %2,%2\n cmovew %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x) : "cc"); 309 return crypto_int16_z; 310 #elif defined(__GNUC__) && defined(__aarch64__) 311 crypto_int16 crypto_int16_z; 312 __asm__ ("tst %w1,65535\n cset %w0,eq" : "=r"(crypto_int16_z) : "r"(crypto_int16_x) : "cc"); 313 return crypto_int16_z; 314 #else 315 return 1-crypto_int16_nonzero_01(crypto_int16_x); 316 #endif 317 } 318 319 __attribute__((unused)) 320 static inline 321 crypto_int16 crypto_int16_unequal_mask(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_y) { 322 #if defined(__GNUC__) && defined(__x86_64__) 323 crypto_int16 crypto_int16_q,crypto_int16_z; 324 __asm__ ("xorw %0,%0\n movw $-1,%1\n cmpw %3,%2\n cmovnew %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); 325 return crypto_int16_z; 326 #elif defined(__GNUC__) && defined(__aarch64__) 327 crypto_int16 crypto_int16_z; 328 __asm__ ("and %w0,%w1,65535\n cmp %w0,%w2,uxth\n csetm %w0,ne" : "=&r"(crypto_int16_z) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); 329 return crypto_int16_z; 330 #else 331 return crypto_int16_nonzero_mask(crypto_int16_x ^ crypto_int16_y); 332 #endif 333 } 334 335 __attribute__((unused)) 336 static inline 337 crypto_int16 crypto_int16_unequal_01(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_y) { 338 #if defined(__GNUC__) && defined(__x86_64__) 339 crypto_int16 crypto_int16_q,crypto_int16_z; 340 __asm__ ("xorw %0,%0\n movw $1,%1\n cmpw %3,%2\n cmovnew %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); 341 return crypto_int16_z; 342 #elif defined(__GNUC__) && defined(__aarch64__) 343 crypto_int16 crypto_int16_z; 344 __asm__ ("and %w0,%w1,65535\n cmp %w0,%w2,uxth\n cset %w0,ne" : "=&r"(crypto_int16_z) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); 345 return crypto_int16_z; 346 #else 347 return crypto_int16_nonzero_01(crypto_int16_x ^ crypto_int16_y); 348 #endif 349 } 350 351 __attribute__((unused)) 352 static inline 353 crypto_int16 crypto_int16_equal_mask(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_y) { 354 #if defined(__GNUC__) && defined(__x86_64__) 355 crypto_int16 crypto_int16_q,crypto_int16_z; 356 __asm__ ("xorw %0,%0\n movw $-1,%1\n cmpw %3,%2\n cmovew %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); 357 return crypto_int16_z; 358 #elif defined(__GNUC__) && defined(__aarch64__) 359 crypto_int16 crypto_int16_z; 360 __asm__ ("and %w0,%w1,65535\n cmp %w0,%w2,uxth\n csetm %w0,eq" : "=&r"(crypto_int16_z) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); 361 return crypto_int16_z; 362 #else 363 return ~crypto_int16_unequal_mask(crypto_int16_x,crypto_int16_y); 364 #endif 365 } 366 367 __attribute__((unused)) 368 static inline 369 crypto_int16 crypto_int16_equal_01(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_y) { 370 #if defined(__GNUC__) && defined(__x86_64__) 371 crypto_int16 crypto_int16_q,crypto_int16_z; 372 __asm__ ("xorw %0,%0\n movw $1,%1\n cmpw %3,%2\n cmovew %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); 373 return crypto_int16_z; 374 #elif defined(__GNUC__) && defined(__aarch64__) 375 crypto_int16 crypto_int16_z; 376 __asm__ ("and %w0,%w1,65535\n cmp %w0,%w2,uxth\n cset %w0,eq" : "=&r"(crypto_int16_z) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); 377 return crypto_int16_z; 378 #else 379 return 1-crypto_int16_unequal_01(crypto_int16_x,crypto_int16_y); 380 #endif 381 } 382 383 __attribute__((unused)) 384 static inline 385 crypto_int16 crypto_int16_min(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_y) { 386 #if defined(__GNUC__) && defined(__x86_64__) 387 __asm__ ("cmpw %1,%0\n cmovgw %1,%0" : "+r"(crypto_int16_x) : "r"(crypto_int16_y) : "cc"); 388 return crypto_int16_x; 389 #elif defined(__GNUC__) && defined(__aarch64__) 390 __asm__ ("sxth %w0,%w0\n cmp %w0,%w1,sxth\n csel %w0,%w0,%w1,lt" : "+&r"(crypto_int16_x) : "r"(crypto_int16_y) : "cc"); 391 return crypto_int16_x; 392 #else 393 crypto_int16 crypto_int16_r = crypto_int16_y ^ crypto_int16_x; 394 crypto_int16 crypto_int16_z = crypto_int16_y - crypto_int16_x; 395 crypto_int16_z ^= crypto_int16_r & (crypto_int16_z ^ crypto_int16_y); 396 crypto_int16_z = crypto_int16_negative_mask(crypto_int16_z); 397 crypto_int16_z &= crypto_int16_r; 398 return crypto_int16_x ^ crypto_int16_z; 399 #endif 400 } 401 402 __attribute__((unused)) 403 static inline 404 crypto_int16 crypto_int16_max(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_y) { 405 #if defined(__GNUC__) && defined(__x86_64__) 406 __asm__ ("cmpw %1,%0\n cmovlw %1,%0" : "+r"(crypto_int16_x) : "r"(crypto_int16_y) : "cc"); 407 return crypto_int16_x; 408 #elif defined(__GNUC__) && defined(__aarch64__) 409 __asm__ ("sxth %w0,%w0\n cmp %w0,%w1,sxth\n csel %w0,%w1,%w0,lt" : "+&r"(crypto_int16_x) : "r"(crypto_int16_y) : "cc"); 410 return crypto_int16_x; 411 #else 412 crypto_int16 crypto_int16_r = crypto_int16_y ^ crypto_int16_x; 413 crypto_int16 crypto_int16_z = crypto_int16_y - crypto_int16_x; 414 crypto_int16_z ^= crypto_int16_r & (crypto_int16_z ^ crypto_int16_y); 415 crypto_int16_z = crypto_int16_negative_mask(crypto_int16_z); 416 crypto_int16_z &= crypto_int16_r; 417 return crypto_int16_y ^ crypto_int16_z; 418 #endif 419 } 420 421 __attribute__((unused)) 422 static inline 423 void crypto_int16_minmax(crypto_int16 *crypto_int16_p,crypto_int16 *crypto_int16_q) { 424 crypto_int16 crypto_int16_x = *crypto_int16_p; 425 crypto_int16 crypto_int16_y = *crypto_int16_q; 426 #if defined(__GNUC__) && defined(__x86_64__) 427 crypto_int16 crypto_int16_z; 428 __asm__ ("cmpw %2,%1\n movw %1,%0\n cmovgw %2,%1\n cmovgw %0,%2" : "=&r"(crypto_int16_z), "+&r"(crypto_int16_x), "+r"(crypto_int16_y) : : "cc"); 429 *crypto_int16_p = crypto_int16_x; 430 *crypto_int16_q = crypto_int16_y; 431 #elif defined(__GNUC__) && defined(__aarch64__) 432 crypto_int16 crypto_int16_r, crypto_int16_s; 433 __asm__ ("sxth %w0,%w0\n cmp %w0,%w3,sxth\n csel %w1,%w0,%w3,lt\n csel %w2,%w3,%w0,lt" : "+&r"(crypto_int16_x), "=&r"(crypto_int16_r), "=r"(crypto_int16_s) : "r"(crypto_int16_y) : "cc"); 434 *crypto_int16_p = crypto_int16_r; 435 *crypto_int16_q = crypto_int16_s; 436 #else 437 crypto_int16 crypto_int16_r = crypto_int16_y ^ crypto_int16_x; 438 crypto_int16 crypto_int16_z = crypto_int16_y - crypto_int16_x; 439 crypto_int16_z ^= crypto_int16_r & (crypto_int16_z ^ crypto_int16_y); 440 crypto_int16_z = crypto_int16_negative_mask(crypto_int16_z); 441 crypto_int16_z &= crypto_int16_r; 442 crypto_int16_x ^= crypto_int16_z; 443 crypto_int16_y ^= crypto_int16_z; 444 *crypto_int16_p = crypto_int16_x; 445 *crypto_int16_q = crypto_int16_y; 446 #endif 447 } 448 449 __attribute__((unused)) 450 static inline 451 crypto_int16 crypto_int16_smaller_mask(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_y) { 452 #if defined(__GNUC__) && defined(__x86_64__) 453 crypto_int16 crypto_int16_q,crypto_int16_z; 454 __asm__ ("xorw %0,%0\n movw $-1,%1\n cmpw %3,%2\n cmovlw %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); 455 return crypto_int16_z; 456 #elif defined(__GNUC__) && defined(__aarch64__) 457 crypto_int16 crypto_int16_z; 458 __asm__ ("sxth %w0,%w1\n cmp %w0,%w2,sxth\n csetm %w0,lt" : "=&r"(crypto_int16_z) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); 459 return crypto_int16_z; 460 #else 461 crypto_int16 crypto_int16_r = crypto_int16_x ^ crypto_int16_y; 462 crypto_int16 crypto_int16_z = crypto_int16_x - crypto_int16_y; 463 crypto_int16_z ^= crypto_int16_r & (crypto_int16_z ^ crypto_int16_x); 464 return crypto_int16_negative_mask(crypto_int16_z); 465 #endif 466 } 467 468 __attribute__((unused)) 469 static inline 470 crypto_int16 crypto_int16_smaller_01(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_y) { 471 #if defined(__GNUC__) && defined(__x86_64__) 472 crypto_int16 crypto_int16_q,crypto_int16_z; 473 __asm__ ("xorw %0,%0\n movw $1,%1\n cmpw %3,%2\n cmovlw %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); 474 return crypto_int16_z; 475 #elif defined(__GNUC__) && defined(__aarch64__) 476 crypto_int16 crypto_int16_z; 477 __asm__ ("sxth %w0,%w1\n cmp %w0,%w2,sxth\n cset %w0,lt" : "=&r"(crypto_int16_z) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); 478 return crypto_int16_z; 479 #else 480 crypto_int16 crypto_int16_r = crypto_int16_x ^ crypto_int16_y; 481 crypto_int16 crypto_int16_z = crypto_int16_x - crypto_int16_y; 482 crypto_int16_z ^= crypto_int16_r & (crypto_int16_z ^ crypto_int16_x); 483 return crypto_int16_unsigned_topbit_01(crypto_int16_z); 484 #endif 485 } 486 487 __attribute__((unused)) 488 static inline 489 crypto_int16 crypto_int16_leq_mask(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_y) { 490 #if defined(__GNUC__) && defined(__x86_64__) 491 crypto_int16 crypto_int16_q,crypto_int16_z; 492 __asm__ ("xorw %0,%0\n movw $-1,%1\n cmpw %3,%2\n cmovlew %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); 493 return crypto_int16_z; 494 #elif defined(__GNUC__) && defined(__aarch64__) 495 crypto_int16 crypto_int16_z; 496 __asm__ ("sxth %w0,%w1\n cmp %w0,%w2,sxth\n csetm %w0,le" : "=&r"(crypto_int16_z) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); 497 return crypto_int16_z; 498 #else 499 return ~crypto_int16_smaller_mask(crypto_int16_y,crypto_int16_x); 500 #endif 501 } 502 503 __attribute__((unused)) 504 static inline 505 crypto_int16 crypto_int16_leq_01(crypto_int16 crypto_int16_x,crypto_int16 crypto_int16_y) { 506 #if defined(__GNUC__) && defined(__x86_64__) 507 crypto_int16 crypto_int16_q,crypto_int16_z; 508 __asm__ ("xorw %0,%0\n movw $1,%1\n cmpw %3,%2\n cmovlew %1,%0" : "=&r"(crypto_int16_z), "=&r"(crypto_int16_q) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); 509 return crypto_int16_z; 510 #elif defined(__GNUC__) && defined(__aarch64__) 511 crypto_int16 crypto_int16_z; 512 __asm__ ("sxth %w0,%w1\n cmp %w0,%w2,sxth\n cset %w0,le" : "=&r"(crypto_int16_z) : "r"(crypto_int16_x), "r"(crypto_int16_y) : "cc"); 513 return crypto_int16_z; 514 #else 515 return 1-crypto_int16_smaller_01(crypto_int16_y,crypto_int16_x); 516 #endif 517 } 518 519 __attribute__((unused)) 520 static inline 521 int crypto_int16_ones_num(crypto_int16 crypto_int16_x) { 522 crypto_int16_unsigned crypto_int16_y = crypto_int16_x; 523 const crypto_int16 C0 = 0x5555; 524 const crypto_int16 C1 = 0x3333; 525 const crypto_int16 C2 = 0x0f0f; 526 crypto_int16_y -= ((crypto_int16_y >> 1) & C0); 527 crypto_int16_y = (crypto_int16_y & C1) + ((crypto_int16_y >> 2) & C1); 528 crypto_int16_y = (crypto_int16_y + (crypto_int16_y >> 4)) & C2; 529 crypto_int16_y = (crypto_int16_y + (crypto_int16_y >> 8)) & 0xff; 530 return crypto_int16_y; 531 } 532 533 __attribute__((unused)) 534 static inline 535 int crypto_int16_bottomzeros_num(crypto_int16 crypto_int16_x) { 536 #if defined(__GNUC__) && defined(__x86_64__) 537 crypto_int16 fallback = 16; 538 __asm__ ("bsfw %0,%0\n cmovew %1,%0" : "+&r"(crypto_int16_x) : "r"(fallback) : "cc"); 539 return crypto_int16_x; 540 #elif defined(__GNUC__) && defined(__aarch64__) 541 int64_t crypto_int16_y; 542 __asm__ ("orr %w0,%w1,-65536\n rbit %w0,%w0\n clz %w0,%w0" : "=r"(crypto_int16_y) : "r"(crypto_int16_x) : ); 543 return crypto_int16_y; 544 #else 545 crypto_int16 crypto_int16_y = crypto_int16_x ^ (crypto_int16_x-1); 546 crypto_int16_y = ((crypto_int16) crypto_int16_y) >> 1; 547 crypto_int16_y &= ~(crypto_int16_x & (((crypto_int16) 1) << (16-1))); 548 return crypto_int16_ones_num(crypto_int16_y); 549 #endif 550 } 551 552 #endif 553 554 /* from supercop-20240808/cryptoint/crypto_int32.h */ 555 /* auto-generated: cd cryptoint; ./autogen */ 556 /* cryptoint 20240806 */ 557 558 #ifndef crypto_int32_h 559 #define crypto_int32_h 560 561 #define crypto_int32 int32_t 562 #define crypto_int32_unsigned uint32_t 563 564 565 566 __attribute__((unused)) 567 static inline 568 crypto_int32 crypto_int32_load(const unsigned char *crypto_int32_s) { 569 crypto_int32 crypto_int32_z = 0; 570 crypto_int32_z |= ((crypto_int32) (*crypto_int32_s++)) << 0; 571 crypto_int32_z |= ((crypto_int32) (*crypto_int32_s++)) << 8; 572 crypto_int32_z |= ((crypto_int32) (*crypto_int32_s++)) << 16; 573 crypto_int32_z |= ((crypto_int32) (*crypto_int32_s++)) << 24; 574 return crypto_int32_z; 575 } 576 577 __attribute__((unused)) 578 static inline 579 void crypto_int32_store(unsigned char *crypto_int32_s,crypto_int32 crypto_int32_x) { 580 *crypto_int32_s++ = crypto_int32_x >> 0; 581 *crypto_int32_s++ = crypto_int32_x >> 8; 582 *crypto_int32_s++ = crypto_int32_x >> 16; 583 *crypto_int32_s++ = crypto_int32_x >> 24; 584 } 585 586 __attribute__((unused)) 587 static inline 588 crypto_int32 crypto_int32_negative_mask(crypto_int32 crypto_int32_x) { 589 #if defined(__GNUC__) && defined(__x86_64__) 590 __asm__ ("sarl $31,%0" : "+r"(crypto_int32_x) : : "cc"); 591 return crypto_int32_x; 592 #elif defined(__GNUC__) && defined(__aarch64__) 593 crypto_int32 crypto_int32_y; 594 __asm__ ("asr %w0,%w1,31" : "=r"(crypto_int32_y) : "r"(crypto_int32_x) : ); 595 return crypto_int32_y; 596 #else 597 crypto_int32_x >>= 32-6; 598 crypto_int32_x ^= crypto_int32_optblocker; 599 crypto_int32_x >>= 5; 600 return crypto_int32_x; 601 #endif 602 } 603 604 __attribute__((unused)) 605 static inline 606 crypto_int32_unsigned crypto_int32_unsigned_topbit_01(crypto_int32_unsigned crypto_int32_x) { 607 #if defined(__GNUC__) && defined(__x86_64__) 608 __asm__ ("shrl $31,%0" : "+r"(crypto_int32_x) : : "cc"); 609 return crypto_int32_x; 610 #elif defined(__GNUC__) && defined(__aarch64__) 611 crypto_int32 crypto_int32_y; 612 __asm__ ("lsr %w0,%w1,31" : "=r"(crypto_int32_y) : "r"(crypto_int32_x) : ); 613 return crypto_int32_y; 614 #else 615 crypto_int32_x >>= 32-6; 616 crypto_int32_x ^= crypto_int32_optblocker; 617 crypto_int32_x >>= 5; 618 return crypto_int32_x; 619 #endif 620 } 621 622 __attribute__((unused)) 623 static inline 624 crypto_int32 crypto_int32_negative_01(crypto_int32 crypto_int32_x) { 625 return crypto_int32_unsigned_topbit_01(crypto_int32_x); 626 } 627 628 __attribute__((unused)) 629 static inline 630 crypto_int32 crypto_int32_topbit_mask(crypto_int32 crypto_int32_x) { 631 return crypto_int32_negative_mask(crypto_int32_x); 632 } 633 634 __attribute__((unused)) 635 static inline 636 crypto_int32 crypto_int32_topbit_01(crypto_int32 crypto_int32_x) { 637 return crypto_int32_unsigned_topbit_01(crypto_int32_x); 638 } 639 640 __attribute__((unused)) 641 static inline 642 crypto_int32 crypto_int32_bottombit_mask(crypto_int32 crypto_int32_x) { 643 #if defined(__GNUC__) && defined(__x86_64__) 644 __asm__ ("andl $1,%0" : "+r"(crypto_int32_x) : : "cc"); 645 return -crypto_int32_x; 646 #elif defined(__GNUC__) && defined(__aarch64__) 647 crypto_int32 crypto_int32_y; 648 __asm__ ("sbfx %w0,%w1,0,1" : "=r"(crypto_int32_y) : "r"(crypto_int32_x) : ); 649 return crypto_int32_y; 650 #else 651 crypto_int32_x &= 1 ^ crypto_int32_optblocker; 652 return -crypto_int32_x; 653 #endif 654 } 655 656 __attribute__((unused)) 657 static inline 658 crypto_int32 crypto_int32_bottombit_01(crypto_int32 crypto_int32_x) { 659 #if defined(__GNUC__) && defined(__x86_64__) 660 __asm__ ("andl $1,%0" : "+r"(crypto_int32_x) : : "cc"); 661 return crypto_int32_x; 662 #elif defined(__GNUC__) && defined(__aarch64__) 663 crypto_int32 crypto_int32_y; 664 __asm__ ("ubfx %w0,%w1,0,1" : "=r"(crypto_int32_y) : "r"(crypto_int32_x) : ); 665 return crypto_int32_y; 666 #else 667 crypto_int32_x &= 1 ^ crypto_int32_optblocker; 668 return crypto_int32_x; 669 #endif 670 } 671 672 __attribute__((unused)) 673 static inline 674 crypto_int32 crypto_int32_bitinrangepublicpos_mask(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_s) { 675 #if defined(__GNUC__) && defined(__x86_64__) 676 __asm__ ("sarl %%cl,%0" : "+r"(crypto_int32_x) : "c"(crypto_int32_s) : "cc"); 677 #elif defined(__GNUC__) && defined(__aarch64__) 678 __asm__ ("asr %w0,%w0,%w1" : "+r"(crypto_int32_x) : "r"(crypto_int32_s) : ); 679 #else 680 crypto_int32_x >>= crypto_int32_s ^ crypto_int32_optblocker; 681 #endif 682 return crypto_int32_bottombit_mask(crypto_int32_x); 683 } 684 685 __attribute__((unused)) 686 static inline 687 crypto_int32 crypto_int32_bitinrangepublicpos_01(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_s) { 688 #if defined(__GNUC__) && defined(__x86_64__) 689 __asm__ ("sarl %%cl,%0" : "+r"(crypto_int32_x) : "c"(crypto_int32_s) : "cc"); 690 #elif defined(__GNUC__) && defined(__aarch64__) 691 __asm__ ("asr %w0,%w0,%w1" : "+r"(crypto_int32_x) : "r"(crypto_int32_s) : ); 692 #else 693 crypto_int32_x >>= crypto_int32_s ^ crypto_int32_optblocker; 694 #endif 695 return crypto_int32_bottombit_01(crypto_int32_x); 696 } 697 698 __attribute__((unused)) 699 static inline 700 crypto_int32 crypto_int32_shlmod(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_s) { 701 #if defined(__GNUC__) && defined(__x86_64__) 702 __asm__ ("shll %%cl,%0" : "+r"(crypto_int32_x) : "c"(crypto_int32_s) : "cc"); 703 #elif defined(__GNUC__) && defined(__aarch64__) 704 __asm__ ("lsl %w0,%w0,%w1" : "+r"(crypto_int32_x) : "r"(crypto_int32_s) : ); 705 #else 706 int crypto_int32_k, crypto_int32_l; 707 for (crypto_int32_l = 0,crypto_int32_k = 1;crypto_int32_k < 32;++crypto_int32_l,crypto_int32_k *= 2) 708 crypto_int32_x ^= (crypto_int32_x ^ (crypto_int32_x << crypto_int32_k)) & crypto_int32_bitinrangepublicpos_mask(crypto_int32_s,crypto_int32_l); 709 #endif 710 return crypto_int32_x; 711 } 712 713 __attribute__((unused)) 714 static inline 715 crypto_int32 crypto_int32_shrmod(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_s) { 716 #if defined(__GNUC__) && defined(__x86_64__) 717 __asm__ ("sarl %%cl,%0" : "+r"(crypto_int32_x) : "c"(crypto_int32_s) : "cc"); 718 #elif defined(__GNUC__) && defined(__aarch64__) 719 __asm__ ("asr %w0,%w0,%w1" : "+r"(crypto_int32_x) : "r"(crypto_int32_s) : ); 720 #else 721 int crypto_int32_k, crypto_int32_l; 722 for (crypto_int32_l = 0,crypto_int32_k = 1;crypto_int32_k < 32;++crypto_int32_l,crypto_int32_k *= 2) 723 crypto_int32_x ^= (crypto_int32_x ^ (crypto_int32_x >> crypto_int32_k)) & crypto_int32_bitinrangepublicpos_mask(crypto_int32_s,crypto_int32_l); 724 #endif 725 return crypto_int32_x; 726 } 727 728 __attribute__((unused)) 729 static inline 730 crypto_int32 crypto_int32_bitmod_mask(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_s) { 731 crypto_int32_x = crypto_int32_shrmod(crypto_int32_x,crypto_int32_s); 732 return crypto_int32_bottombit_mask(crypto_int32_x); 733 } 734 735 __attribute__((unused)) 736 static inline 737 crypto_int32 crypto_int32_bitmod_01(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_s) { 738 crypto_int32_x = crypto_int32_shrmod(crypto_int32_x,crypto_int32_s); 739 return crypto_int32_bottombit_01(crypto_int32_x); 740 } 741 742 __attribute__((unused)) 743 static inline 744 crypto_int32 crypto_int32_nonzero_mask(crypto_int32 crypto_int32_x) { 745 #if defined(__GNUC__) && defined(__x86_64__) 746 crypto_int32 crypto_int32_q,crypto_int32_z; 747 __asm__ ("xorl %0,%0\n movl $-1,%1\n testl %2,%2\n cmovnel %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x) : "cc"); 748 return crypto_int32_z; 749 #elif defined(__GNUC__) && defined(__aarch64__) 750 crypto_int32 crypto_int32_z; 751 __asm__ ("cmp %w1,0\n csetm %w0,ne" : "=r"(crypto_int32_z) : "r"(crypto_int32_x) : "cc"); 752 return crypto_int32_z; 753 #else 754 crypto_int32_x |= -crypto_int32_x; 755 return crypto_int32_negative_mask(crypto_int32_x); 756 #endif 757 } 758 759 __attribute__((unused)) 760 static inline 761 crypto_int32 crypto_int32_nonzero_01(crypto_int32 crypto_int32_x) { 762 #if defined(__GNUC__) && defined(__x86_64__) 763 crypto_int32 crypto_int32_q,crypto_int32_z; 764 __asm__ ("xorl %0,%0\n movl $1,%1\n testl %2,%2\n cmovnel %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x) : "cc"); 765 return crypto_int32_z; 766 #elif defined(__GNUC__) && defined(__aarch64__) 767 crypto_int32 crypto_int32_z; 768 __asm__ ("cmp %w1,0\n cset %w0,ne" : "=r"(crypto_int32_z) : "r"(crypto_int32_x) : "cc"); 769 return crypto_int32_z; 770 #else 771 crypto_int32_x |= -crypto_int32_x; 772 return crypto_int32_unsigned_topbit_01(crypto_int32_x); 773 #endif 774 } 775 776 __attribute__((unused)) 777 static inline 778 crypto_int32 crypto_int32_positive_mask(crypto_int32 crypto_int32_x) { 779 #if defined(__GNUC__) && defined(__x86_64__) 780 crypto_int32 crypto_int32_q,crypto_int32_z; 781 __asm__ ("xorl %0,%0\n movl $-1,%1\n testl %2,%2\n cmovgl %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x) : "cc"); 782 return crypto_int32_z; 783 #elif defined(__GNUC__) && defined(__aarch64__) 784 crypto_int32 crypto_int32_z; 785 __asm__ ("cmp %w1,0\n csetm %w0,gt" : "=r"(crypto_int32_z) : "r"(crypto_int32_x) : "cc"); 786 return crypto_int32_z; 787 #else 788 crypto_int32 crypto_int32_z = -crypto_int32_x; 789 crypto_int32_z ^= crypto_int32_x & crypto_int32_z; 790 return crypto_int32_negative_mask(crypto_int32_z); 791 #endif 792 } 793 794 __attribute__((unused)) 795 static inline 796 crypto_int32 crypto_int32_positive_01(crypto_int32 crypto_int32_x) { 797 #if defined(__GNUC__) && defined(__x86_64__) 798 crypto_int32 crypto_int32_q,crypto_int32_z; 799 __asm__ ("xorl %0,%0\n movl $1,%1\n testl %2,%2\n cmovgl %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x) : "cc"); 800 return crypto_int32_z; 801 #elif defined(__GNUC__) && defined(__aarch64__) 802 crypto_int32 crypto_int32_z; 803 __asm__ ("cmp %w1,0\n cset %w0,gt" : "=r"(crypto_int32_z) : "r"(crypto_int32_x) : "cc"); 804 return crypto_int32_z; 805 #else 806 crypto_int32 crypto_int32_z = -crypto_int32_x; 807 crypto_int32_z ^= crypto_int32_x & crypto_int32_z; 808 return crypto_int32_unsigned_topbit_01(crypto_int32_z); 809 #endif 810 } 811 812 __attribute__((unused)) 813 static inline 814 crypto_int32 crypto_int32_zero_mask(crypto_int32 crypto_int32_x) { 815 #if defined(__GNUC__) && defined(__x86_64__) 816 crypto_int32 crypto_int32_q,crypto_int32_z; 817 __asm__ ("xorl %0,%0\n movl $-1,%1\n testl %2,%2\n cmovel %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x) : "cc"); 818 return crypto_int32_z; 819 #elif defined(__GNUC__) && defined(__aarch64__) 820 crypto_int32 crypto_int32_z; 821 __asm__ ("cmp %w1,0\n csetm %w0,eq" : "=r"(crypto_int32_z) : "r"(crypto_int32_x) : "cc"); 822 return crypto_int32_z; 823 #else 824 return ~crypto_int32_nonzero_mask(crypto_int32_x); 825 #endif 826 } 827 828 __attribute__((unused)) 829 static inline 830 crypto_int32 crypto_int32_zero_01(crypto_int32 crypto_int32_x) { 831 #if defined(__GNUC__) && defined(__x86_64__) 832 crypto_int32 crypto_int32_q,crypto_int32_z; 833 __asm__ ("xorl %0,%0\n movl $1,%1\n testl %2,%2\n cmovel %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x) : "cc"); 834 return crypto_int32_z; 835 #elif defined(__GNUC__) && defined(__aarch64__) 836 crypto_int32 crypto_int32_z; 837 __asm__ ("cmp %w1,0\n cset %w0,eq" : "=r"(crypto_int32_z) : "r"(crypto_int32_x) : "cc"); 838 return crypto_int32_z; 839 #else 840 return 1-crypto_int32_nonzero_01(crypto_int32_x); 841 #endif 842 } 843 844 __attribute__((unused)) 845 static inline 846 crypto_int32 crypto_int32_unequal_mask(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_y) { 847 #if defined(__GNUC__) && defined(__x86_64__) 848 crypto_int32 crypto_int32_q,crypto_int32_z; 849 __asm__ ("xorl %0,%0\n movl $-1,%1\n cmpl %3,%2\n cmovnel %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); 850 return crypto_int32_z; 851 #elif defined(__GNUC__) && defined(__aarch64__) 852 crypto_int32 crypto_int32_z; 853 __asm__ ("cmp %w1,%w2\n csetm %w0,ne" : "=r"(crypto_int32_z) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); 854 return crypto_int32_z; 855 #else 856 return crypto_int32_nonzero_mask(crypto_int32_x ^ crypto_int32_y); 857 #endif 858 } 859 860 __attribute__((unused)) 861 static inline 862 crypto_int32 crypto_int32_unequal_01(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_y) { 863 #if defined(__GNUC__) && defined(__x86_64__) 864 crypto_int32 crypto_int32_q,crypto_int32_z; 865 __asm__ ("xorl %0,%0\n movl $1,%1\n cmpl %3,%2\n cmovnel %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); 866 return crypto_int32_z; 867 #elif defined(__GNUC__) && defined(__aarch64__) 868 crypto_int32 crypto_int32_z; 869 __asm__ ("cmp %w1,%w2\n cset %w0,ne" : "=r"(crypto_int32_z) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); 870 return crypto_int32_z; 871 #else 872 return crypto_int32_nonzero_01(crypto_int32_x ^ crypto_int32_y); 873 #endif 874 } 875 876 __attribute__((unused)) 877 static inline 878 crypto_int32 crypto_int32_equal_mask(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_y) { 879 #if defined(__GNUC__) && defined(__x86_64__) 880 crypto_int32 crypto_int32_q,crypto_int32_z; 881 __asm__ ("xorl %0,%0\n movl $-1,%1\n cmpl %3,%2\n cmovel %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); 882 return crypto_int32_z; 883 #elif defined(__GNUC__) && defined(__aarch64__) 884 crypto_int32 crypto_int32_z; 885 __asm__ ("cmp %w1,%w2\n csetm %w0,eq" : "=r"(crypto_int32_z) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); 886 return crypto_int32_z; 887 #else 888 return ~crypto_int32_unequal_mask(crypto_int32_x,crypto_int32_y); 889 #endif 890 } 891 892 __attribute__((unused)) 893 static inline 894 crypto_int32 crypto_int32_equal_01(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_y) { 895 #if defined(__GNUC__) && defined(__x86_64__) 896 crypto_int32 crypto_int32_q,crypto_int32_z; 897 __asm__ ("xorl %0,%0\n movl $1,%1\n cmpl %3,%2\n cmovel %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); 898 return crypto_int32_z; 899 #elif defined(__GNUC__) && defined(__aarch64__) 900 crypto_int32 crypto_int32_z; 901 __asm__ ("cmp %w1,%w2\n cset %w0,eq" : "=r"(crypto_int32_z) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); 902 return crypto_int32_z; 903 #else 904 return 1-crypto_int32_unequal_01(crypto_int32_x,crypto_int32_y); 905 #endif 906 } 907 908 __attribute__((unused)) 909 static inline 910 crypto_int32 crypto_int32_min(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_y) { 911 #if defined(__GNUC__) && defined(__x86_64__) 912 __asm__ ("cmpl %1,%0\n cmovgl %1,%0" : "+r"(crypto_int32_x) : "r"(crypto_int32_y) : "cc"); 913 return crypto_int32_x; 914 #elif defined(__GNUC__) && defined(__aarch64__) 915 __asm__ ("cmp %w0,%w1\n csel %w0,%w0,%w1,lt" : "+r"(crypto_int32_x) : "r"(crypto_int32_y) : "cc"); 916 return crypto_int32_x; 917 #else 918 crypto_int64 crypto_int32_r = (crypto_int64)crypto_int32_y ^ (crypto_int64)crypto_int32_x; 919 crypto_int64 crypto_int32_z = (crypto_int64)crypto_int32_y - (crypto_int64)crypto_int32_x; 920 crypto_int32_z ^= crypto_int32_r & (crypto_int32_z ^ crypto_int32_y); 921 crypto_int32_z = crypto_int32_negative_mask(crypto_int32_z); 922 crypto_int32_z &= crypto_int32_r; 923 return crypto_int32_x ^ crypto_int32_z; 924 #endif 925 } 926 927 __attribute__((unused)) 928 static inline 929 crypto_int32 crypto_int32_max(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_y) { 930 #if defined(__GNUC__) && defined(__x86_64__) 931 __asm__ ("cmpl %1,%0\n cmovll %1,%0" : "+r"(crypto_int32_x) : "r"(crypto_int32_y) : "cc"); 932 return crypto_int32_x; 933 #elif defined(__GNUC__) && defined(__aarch64__) 934 __asm__ ("cmp %w0,%w1\n csel %w0,%w1,%w0,lt" : "+r"(crypto_int32_x) : "r"(crypto_int32_y) : "cc"); 935 return crypto_int32_x; 936 #else 937 crypto_int64 crypto_int32_r = (crypto_int64)crypto_int32_y ^ (crypto_int64)crypto_int32_x; 938 crypto_int64 crypto_int32_z = (crypto_int64)crypto_int32_y - (crypto_int64)crypto_int32_x; 939 crypto_int32_z ^= crypto_int32_r & (crypto_int32_z ^ crypto_int32_y); 940 crypto_int32_z = crypto_int32_negative_mask(crypto_int32_z); 941 crypto_int32_z &= crypto_int32_r; 942 return crypto_int32_y ^ crypto_int32_z; 943 #endif 944 } 945 946 __attribute__((unused)) 947 static inline 948 void crypto_int32_minmax(crypto_int32 *crypto_int32_p,crypto_int32 *crypto_int32_q) { 949 crypto_int32 crypto_int32_x = *crypto_int32_p; 950 crypto_int32 crypto_int32_y = *crypto_int32_q; 951 #if defined(__GNUC__) && defined(__x86_64__) 952 crypto_int32 crypto_int32_z; 953 __asm__ ("cmpl %2,%1\n movl %1,%0\n cmovgl %2,%1\n cmovgl %0,%2" : "=&r"(crypto_int32_z), "+&r"(crypto_int32_x), "+r"(crypto_int32_y) : : "cc"); 954 *crypto_int32_p = crypto_int32_x; 955 *crypto_int32_q = crypto_int32_y; 956 #elif defined(__GNUC__) && defined(__aarch64__) 957 crypto_int32 crypto_int32_r, crypto_int32_s; 958 __asm__ ("cmp %w2,%w3\n csel %w0,%w2,%w3,lt\n csel %w1,%w3,%w2,lt" : "=&r"(crypto_int32_r), "=r"(crypto_int32_s) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); 959 *crypto_int32_p = crypto_int32_r; 960 *crypto_int32_q = crypto_int32_s; 961 #else 962 crypto_int64 crypto_int32_r = (crypto_int64)crypto_int32_y ^ (crypto_int64)crypto_int32_x; 963 crypto_int64 crypto_int32_z = (crypto_int64)crypto_int32_y - (crypto_int64)crypto_int32_x; 964 crypto_int32_z ^= crypto_int32_r & (crypto_int32_z ^ crypto_int32_y); 965 crypto_int32_z = crypto_int32_negative_mask(crypto_int32_z); 966 crypto_int32_z &= crypto_int32_r; 967 crypto_int32_x ^= crypto_int32_z; 968 crypto_int32_y ^= crypto_int32_z; 969 *crypto_int32_p = crypto_int32_x; 970 *crypto_int32_q = crypto_int32_y; 971 #endif 972 } 973 974 __attribute__((unused)) 975 static inline 976 crypto_int32 crypto_int32_smaller_mask(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_y) { 977 #if defined(__GNUC__) && defined(__x86_64__) 978 crypto_int32 crypto_int32_q,crypto_int32_z; 979 __asm__ ("xorl %0,%0\n movl $-1,%1\n cmpl %3,%2\n cmovll %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); 980 return crypto_int32_z; 981 #elif defined(__GNUC__) && defined(__aarch64__) 982 crypto_int32 crypto_int32_z; 983 __asm__ ("cmp %w1,%w2\n csetm %w0,lt" : "=r"(crypto_int32_z) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); 984 return crypto_int32_z; 985 #else 986 crypto_int32 crypto_int32_r = crypto_int32_x ^ crypto_int32_y; 987 crypto_int32 crypto_int32_z = crypto_int32_x - crypto_int32_y; 988 crypto_int32_z ^= crypto_int32_r & (crypto_int32_z ^ crypto_int32_x); 989 return crypto_int32_negative_mask(crypto_int32_z); 990 #endif 991 } 992 993 __attribute__((unused)) 994 static inline 995 crypto_int32 crypto_int32_smaller_01(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_y) { 996 #if defined(__GNUC__) && defined(__x86_64__) 997 crypto_int32 crypto_int32_q,crypto_int32_z; 998 __asm__ ("xorl %0,%0\n movl $1,%1\n cmpl %3,%2\n cmovll %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); 999 return crypto_int32_z; 1000 #elif defined(__GNUC__) && defined(__aarch64__) 1001 crypto_int32 crypto_int32_z; 1002 __asm__ ("cmp %w1,%w2\n cset %w0,lt" : "=r"(crypto_int32_z) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); 1003 return crypto_int32_z; 1004 #else 1005 crypto_int32 crypto_int32_r = crypto_int32_x ^ crypto_int32_y; 1006 crypto_int32 crypto_int32_z = crypto_int32_x - crypto_int32_y; 1007 crypto_int32_z ^= crypto_int32_r & (crypto_int32_z ^ crypto_int32_x); 1008 return crypto_int32_unsigned_topbit_01(crypto_int32_z); 1009 #endif 1010 } 1011 1012 __attribute__((unused)) 1013 static inline 1014 crypto_int32 crypto_int32_leq_mask(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_y) { 1015 #if defined(__GNUC__) && defined(__x86_64__) 1016 crypto_int32 crypto_int32_q,crypto_int32_z; 1017 __asm__ ("xorl %0,%0\n movl $-1,%1\n cmpl %3,%2\n cmovlel %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); 1018 return crypto_int32_z; 1019 #elif defined(__GNUC__) && defined(__aarch64__) 1020 crypto_int32 crypto_int32_z; 1021 __asm__ ("cmp %w1,%w2\n csetm %w0,le" : "=r"(crypto_int32_z) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); 1022 return crypto_int32_z; 1023 #else 1024 return ~crypto_int32_smaller_mask(crypto_int32_y,crypto_int32_x); 1025 #endif 1026 } 1027 1028 __attribute__((unused)) 1029 static inline 1030 crypto_int32 crypto_int32_leq_01(crypto_int32 crypto_int32_x,crypto_int32 crypto_int32_y) { 1031 #if defined(__GNUC__) && defined(__x86_64__) 1032 crypto_int32 crypto_int32_q,crypto_int32_z; 1033 __asm__ ("xorl %0,%0\n movl $1,%1\n cmpl %3,%2\n cmovlel %1,%0" : "=&r"(crypto_int32_z), "=&r"(crypto_int32_q) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); 1034 return crypto_int32_z; 1035 #elif defined(__GNUC__) && defined(__aarch64__) 1036 crypto_int32 crypto_int32_z; 1037 __asm__ ("cmp %w1,%w2\n cset %w0,le" : "=r"(crypto_int32_z) : "r"(crypto_int32_x), "r"(crypto_int32_y) : "cc"); 1038 return crypto_int32_z; 1039 #else 1040 return 1-crypto_int32_smaller_01(crypto_int32_y,crypto_int32_x); 1041 #endif 1042 } 1043 1044 __attribute__((unused)) 1045 static inline 1046 int crypto_int32_ones_num(crypto_int32 crypto_int32_x) { 1047 crypto_int32_unsigned crypto_int32_y = crypto_int32_x; 1048 const crypto_int32 C0 = 0x55555555; 1049 const crypto_int32 C1 = 0x33333333; 1050 const crypto_int32 C2 = 0x0f0f0f0f; 1051 crypto_int32_y -= ((crypto_int32_y >> 1) & C0); 1052 crypto_int32_y = (crypto_int32_y & C1) + ((crypto_int32_y >> 2) & C1); 1053 crypto_int32_y = (crypto_int32_y + (crypto_int32_y >> 4)) & C2; 1054 crypto_int32_y += crypto_int32_y >> 8; 1055 crypto_int32_y = (crypto_int32_y + (crypto_int32_y >> 16)) & 0xff; 1056 return crypto_int32_y; 1057 } 1058 1059 __attribute__((unused)) 1060 static inline 1061 int crypto_int32_bottomzeros_num(crypto_int32 crypto_int32_x) { 1062 #if defined(__GNUC__) && defined(__x86_64__) 1063 crypto_int32 fallback = 32; 1064 __asm__ ("bsfl %0,%0\n cmovel %1,%0" : "+&r"(crypto_int32_x) : "r"(fallback) : "cc"); 1065 return crypto_int32_x; 1066 #elif defined(__GNUC__) && defined(__aarch64__) 1067 int64_t crypto_int32_y; 1068 __asm__ ("rbit %w0,%w1\n clz %w0,%w0" : "=r"(crypto_int32_y) : "r"(crypto_int32_x) : ); 1069 return crypto_int32_y; 1070 #else 1071 crypto_int32 crypto_int32_y = crypto_int32_x ^ (crypto_int32_x-1); 1072 crypto_int32_y = ((crypto_int32) crypto_int32_y) >> 1; 1073 crypto_int32_y &= ~(crypto_int32_x & (((crypto_int32) 1) << (32-1))); 1074 return crypto_int32_ones_num(crypto_int32_y); 1075 #endif 1076 } 1077 1078 #endif 1079 1080 /* from supercop-20240808/cryptoint/crypto_int64.h */ 1081 /* auto-generated: cd cryptoint; ./autogen */ 1082 /* cryptoint 20240806 */ 1083 1084 #ifndef crypto_int64_h 1085 #define crypto_int64_h 1086 1087 #define crypto_int64 int64_t 1088 #define crypto_int64_unsigned uint64_t 1089 1090 1091 1092 __attribute__((unused)) 1093 static inline 1094 crypto_int64 crypto_int64_load(const unsigned char *crypto_int64_s) { 1095 crypto_int64 crypto_int64_z = 0; 1096 crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 0; 1097 crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 8; 1098 crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 16; 1099 crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 24; 1100 crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 32; 1101 crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 40; 1102 crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 48; 1103 crypto_int64_z |= ((crypto_int64) (*crypto_int64_s++)) << 56; 1104 return crypto_int64_z; 1105 } 1106 1107 __attribute__((unused)) 1108 static inline 1109 void crypto_int64_store(unsigned char *crypto_int64_s,crypto_int64 crypto_int64_x) { 1110 *crypto_int64_s++ = crypto_int64_x >> 0; 1111 *crypto_int64_s++ = crypto_int64_x >> 8; 1112 *crypto_int64_s++ = crypto_int64_x >> 16; 1113 *crypto_int64_s++ = crypto_int64_x >> 24; 1114 *crypto_int64_s++ = crypto_int64_x >> 32; 1115 *crypto_int64_s++ = crypto_int64_x >> 40; 1116 *crypto_int64_s++ = crypto_int64_x >> 48; 1117 *crypto_int64_s++ = crypto_int64_x >> 56; 1118 } 1119 1120 __attribute__((unused)) 1121 static inline 1122 crypto_int64 crypto_int64_negative_mask(crypto_int64 crypto_int64_x) { 1123 #if defined(__GNUC__) && defined(__x86_64__) 1124 __asm__ ("sarq $63,%0" : "+r"(crypto_int64_x) : : "cc"); 1125 return crypto_int64_x; 1126 #elif defined(__GNUC__) && defined(__aarch64__) 1127 crypto_int64 crypto_int64_y; 1128 __asm__ ("asr %0,%1,63" : "=r"(crypto_int64_y) : "r"(crypto_int64_x) : ); 1129 return crypto_int64_y; 1130 #else 1131 crypto_int64_x >>= 64-6; 1132 crypto_int64_x ^= crypto_int64_optblocker; 1133 crypto_int64_x >>= 5; 1134 return crypto_int64_x; 1135 #endif 1136 } 1137 1138 __attribute__((unused)) 1139 static inline 1140 crypto_int64_unsigned crypto_int64_unsigned_topbit_01(crypto_int64_unsigned crypto_int64_x) { 1141 #if defined(__GNUC__) && defined(__x86_64__) 1142 __asm__ ("shrq $63,%0" : "+r"(crypto_int64_x) : : "cc"); 1143 return crypto_int64_x; 1144 #elif defined(__GNUC__) && defined(__aarch64__) 1145 crypto_int64 crypto_int64_y; 1146 __asm__ ("lsr %0,%1,63" : "=r"(crypto_int64_y) : "r"(crypto_int64_x) : ); 1147 return crypto_int64_y; 1148 #else 1149 crypto_int64_x >>= 64-6; 1150 crypto_int64_x ^= crypto_int64_optblocker; 1151 crypto_int64_x >>= 5; 1152 return crypto_int64_x; 1153 #endif 1154 } 1155 1156 __attribute__((unused)) 1157 static inline 1158 crypto_int64 crypto_int64_negative_01(crypto_int64 crypto_int64_x) { 1159 return crypto_int64_unsigned_topbit_01(crypto_int64_x); 1160 } 1161 1162 __attribute__((unused)) 1163 static inline 1164 crypto_int64 crypto_int64_topbit_mask(crypto_int64 crypto_int64_x) { 1165 return crypto_int64_negative_mask(crypto_int64_x); 1166 } 1167 1168 __attribute__((unused)) 1169 static inline 1170 crypto_int64 crypto_int64_topbit_01(crypto_int64 crypto_int64_x) { 1171 return crypto_int64_unsigned_topbit_01(crypto_int64_x); 1172 } 1173 1174 __attribute__((unused)) 1175 static inline 1176 crypto_int64 crypto_int64_bottombit_mask(crypto_int64 crypto_int64_x) { 1177 #if defined(__GNUC__) && defined(__x86_64__) 1178 __asm__ ("andq $1,%0" : "+r"(crypto_int64_x) : : "cc"); 1179 return -crypto_int64_x; 1180 #elif defined(__GNUC__) && defined(__aarch64__) 1181 crypto_int64 crypto_int64_y; 1182 __asm__ ("sbfx %0,%1,0,1" : "=r"(crypto_int64_y) : "r"(crypto_int64_x) : ); 1183 return crypto_int64_y; 1184 #else 1185 crypto_int64_x &= 1 ^ crypto_int64_optblocker; 1186 return -crypto_int64_x; 1187 #endif 1188 } 1189 1190 __attribute__((unused)) 1191 static inline 1192 crypto_int64 crypto_int64_bottombit_01(crypto_int64 crypto_int64_x) { 1193 #if defined(__GNUC__) && defined(__x86_64__) 1194 __asm__ ("andq $1,%0" : "+r"(crypto_int64_x) : : "cc"); 1195 return crypto_int64_x; 1196 #elif defined(__GNUC__) && defined(__aarch64__) 1197 crypto_int64 crypto_int64_y; 1198 __asm__ ("ubfx %0,%1,0,1" : "=r"(crypto_int64_y) : "r"(crypto_int64_x) : ); 1199 return crypto_int64_y; 1200 #else 1201 crypto_int64_x &= 1 ^ crypto_int64_optblocker; 1202 return crypto_int64_x; 1203 #endif 1204 } 1205 1206 __attribute__((unused)) 1207 static inline 1208 crypto_int64 crypto_int64_bitinrangepublicpos_mask(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_s) { 1209 #if defined(__GNUC__) && defined(__x86_64__) 1210 __asm__ ("sarq %%cl,%0" : "+r"(crypto_int64_x) : "c"(crypto_int64_s) : "cc"); 1211 #elif defined(__GNUC__) && defined(__aarch64__) 1212 __asm__ ("asr %0,%0,%1" : "+r"(crypto_int64_x) : "r"(crypto_int64_s) : ); 1213 #else 1214 crypto_int64_x >>= crypto_int64_s ^ crypto_int64_optblocker; 1215 #endif 1216 return crypto_int64_bottombit_mask(crypto_int64_x); 1217 } 1218 1219 __attribute__((unused)) 1220 static inline 1221 crypto_int64 crypto_int64_bitinrangepublicpos_01(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_s) { 1222 #if defined(__GNUC__) && defined(__x86_64__) 1223 __asm__ ("sarq %%cl,%0" : "+r"(crypto_int64_x) : "c"(crypto_int64_s) : "cc"); 1224 #elif defined(__GNUC__) && defined(__aarch64__) 1225 __asm__ ("asr %0,%0,%1" : "+r"(crypto_int64_x) : "r"(crypto_int64_s) : ); 1226 #else 1227 crypto_int64_x >>= crypto_int64_s ^ crypto_int64_optblocker; 1228 #endif 1229 return crypto_int64_bottombit_01(crypto_int64_x); 1230 } 1231 1232 __attribute__((unused)) 1233 static inline 1234 crypto_int64 crypto_int64_shlmod(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_s) { 1235 #if defined(__GNUC__) && defined(__x86_64__) 1236 __asm__ ("shlq %%cl,%0" : "+r"(crypto_int64_x) : "c"(crypto_int64_s) : "cc"); 1237 #elif defined(__GNUC__) && defined(__aarch64__) 1238 __asm__ ("lsl %0,%0,%1" : "+r"(crypto_int64_x) : "r"(crypto_int64_s) : ); 1239 #else 1240 int crypto_int64_k, crypto_int64_l; 1241 for (crypto_int64_l = 0,crypto_int64_k = 1;crypto_int64_k < 64;++crypto_int64_l,crypto_int64_k *= 2) 1242 crypto_int64_x ^= (crypto_int64_x ^ (crypto_int64_x << crypto_int64_k)) & crypto_int64_bitinrangepublicpos_mask(crypto_int64_s,crypto_int64_l); 1243 #endif 1244 return crypto_int64_x; 1245 } 1246 1247 __attribute__((unused)) 1248 static inline 1249 crypto_int64 crypto_int64_shrmod(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_s) { 1250 #if defined(__GNUC__) && defined(__x86_64__) 1251 __asm__ ("sarq %%cl,%0" : "+r"(crypto_int64_x) : "c"(crypto_int64_s) : "cc"); 1252 #elif defined(__GNUC__) && defined(__aarch64__) 1253 __asm__ ("asr %0,%0,%1" : "+r"(crypto_int64_x) : "r"(crypto_int64_s) : ); 1254 #else 1255 int crypto_int64_k, crypto_int64_l; 1256 for (crypto_int64_l = 0,crypto_int64_k = 1;crypto_int64_k < 64;++crypto_int64_l,crypto_int64_k *= 2) 1257 crypto_int64_x ^= (crypto_int64_x ^ (crypto_int64_x >> crypto_int64_k)) & crypto_int64_bitinrangepublicpos_mask(crypto_int64_s,crypto_int64_l); 1258 #endif 1259 return crypto_int64_x; 1260 } 1261 1262 __attribute__((unused)) 1263 static inline 1264 crypto_int64 crypto_int64_bitmod_mask(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_s) { 1265 crypto_int64_x = crypto_int64_shrmod(crypto_int64_x,crypto_int64_s); 1266 return crypto_int64_bottombit_mask(crypto_int64_x); 1267 } 1268 1269 __attribute__((unused)) 1270 static inline 1271 crypto_int64 crypto_int64_bitmod_01(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_s) { 1272 crypto_int64_x = crypto_int64_shrmod(crypto_int64_x,crypto_int64_s); 1273 return crypto_int64_bottombit_01(crypto_int64_x); 1274 } 1275 1276 __attribute__((unused)) 1277 static inline 1278 crypto_int64 crypto_int64_nonzero_mask(crypto_int64 crypto_int64_x) { 1279 #if defined(__GNUC__) && defined(__x86_64__) 1280 crypto_int64 crypto_int64_q,crypto_int64_z; 1281 __asm__ ("xorq %0,%0\n movq $-1,%1\n testq %2,%2\n cmovneq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x) : "cc"); 1282 return crypto_int64_z; 1283 #elif defined(__GNUC__) && defined(__aarch64__) 1284 crypto_int64 crypto_int64_z; 1285 __asm__ ("cmp %1,0\n csetm %0,ne" : "=r"(crypto_int64_z) : "r"(crypto_int64_x) : "cc"); 1286 return crypto_int64_z; 1287 #else 1288 crypto_int64_x |= -crypto_int64_x; 1289 return crypto_int64_negative_mask(crypto_int64_x); 1290 #endif 1291 } 1292 1293 __attribute__((unused)) 1294 static inline 1295 crypto_int64 crypto_int64_nonzero_01(crypto_int64 crypto_int64_x) { 1296 #if defined(__GNUC__) && defined(__x86_64__) 1297 crypto_int64 crypto_int64_q,crypto_int64_z; 1298 __asm__ ("xorq %0,%0\n movq $1,%1\n testq %2,%2\n cmovneq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x) : "cc"); 1299 return crypto_int64_z; 1300 #elif defined(__GNUC__) && defined(__aarch64__) 1301 crypto_int64 crypto_int64_z; 1302 __asm__ ("cmp %1,0\n cset %0,ne" : "=r"(crypto_int64_z) : "r"(crypto_int64_x) : "cc"); 1303 return crypto_int64_z; 1304 #else 1305 crypto_int64_x |= -crypto_int64_x; 1306 return crypto_int64_unsigned_topbit_01(crypto_int64_x); 1307 #endif 1308 } 1309 1310 __attribute__((unused)) 1311 static inline 1312 crypto_int64 crypto_int64_positive_mask(crypto_int64 crypto_int64_x) { 1313 #if defined(__GNUC__) && defined(__x86_64__) 1314 crypto_int64 crypto_int64_q,crypto_int64_z; 1315 __asm__ ("xorq %0,%0\n movq $-1,%1\n testq %2,%2\n cmovgq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x) : "cc"); 1316 return crypto_int64_z; 1317 #elif defined(__GNUC__) && defined(__aarch64__) 1318 crypto_int64 crypto_int64_z; 1319 __asm__ ("cmp %1,0\n csetm %0,gt" : "=r"(crypto_int64_z) : "r"(crypto_int64_x) : "cc"); 1320 return crypto_int64_z; 1321 #else 1322 crypto_int64 crypto_int64_z = -crypto_int64_x; 1323 crypto_int64_z ^= crypto_int64_x & crypto_int64_z; 1324 return crypto_int64_negative_mask(crypto_int64_z); 1325 #endif 1326 } 1327 1328 __attribute__((unused)) 1329 static inline 1330 crypto_int64 crypto_int64_positive_01(crypto_int64 crypto_int64_x) { 1331 #if defined(__GNUC__) && defined(__x86_64__) 1332 crypto_int64 crypto_int64_q,crypto_int64_z; 1333 __asm__ ("xorq %0,%0\n movq $1,%1\n testq %2,%2\n cmovgq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x) : "cc"); 1334 return crypto_int64_z; 1335 #elif defined(__GNUC__) && defined(__aarch64__) 1336 crypto_int64 crypto_int64_z; 1337 __asm__ ("cmp %1,0\n cset %0,gt" : "=r"(crypto_int64_z) : "r"(crypto_int64_x) : "cc"); 1338 return crypto_int64_z; 1339 #else 1340 crypto_int64 crypto_int64_z = -crypto_int64_x; 1341 crypto_int64_z ^= crypto_int64_x & crypto_int64_z; 1342 return crypto_int64_unsigned_topbit_01(crypto_int64_z); 1343 #endif 1344 } 1345 1346 __attribute__((unused)) 1347 static inline 1348 crypto_int64 crypto_int64_zero_mask(crypto_int64 crypto_int64_x) { 1349 #if defined(__GNUC__) && defined(__x86_64__) 1350 crypto_int64 crypto_int64_q,crypto_int64_z; 1351 __asm__ ("xorq %0,%0\n movq $-1,%1\n testq %2,%2\n cmoveq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x) : "cc"); 1352 return crypto_int64_z; 1353 #elif defined(__GNUC__) && defined(__aarch64__) 1354 crypto_int64 crypto_int64_z; 1355 __asm__ ("cmp %1,0\n csetm %0,eq" : "=r"(crypto_int64_z) : "r"(crypto_int64_x) : "cc"); 1356 return crypto_int64_z; 1357 #else 1358 return ~crypto_int64_nonzero_mask(crypto_int64_x); 1359 #endif 1360 } 1361 1362 __attribute__((unused)) 1363 static inline 1364 crypto_int64 crypto_int64_zero_01(crypto_int64 crypto_int64_x) { 1365 #if defined(__GNUC__) && defined(__x86_64__) 1366 crypto_int64 crypto_int64_q,crypto_int64_z; 1367 __asm__ ("xorq %0,%0\n movq $1,%1\n testq %2,%2\n cmoveq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x) : "cc"); 1368 return crypto_int64_z; 1369 #elif defined(__GNUC__) && defined(__aarch64__) 1370 crypto_int64 crypto_int64_z; 1371 __asm__ ("cmp %1,0\n cset %0,eq" : "=r"(crypto_int64_z) : "r"(crypto_int64_x) : "cc"); 1372 return crypto_int64_z; 1373 #else 1374 return 1-crypto_int64_nonzero_01(crypto_int64_x); 1375 #endif 1376 } 1377 1378 __attribute__((unused)) 1379 static inline 1380 crypto_int64 crypto_int64_unequal_mask(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_y) { 1381 #if defined(__GNUC__) && defined(__x86_64__) 1382 crypto_int64 crypto_int64_q,crypto_int64_z; 1383 __asm__ ("xorq %0,%0\n movq $-1,%1\n cmpq %3,%2\n cmovneq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); 1384 return crypto_int64_z; 1385 #elif defined(__GNUC__) && defined(__aarch64__) 1386 crypto_int64 crypto_int64_z; 1387 __asm__ ("cmp %1,%2\n csetm %0,ne" : "=r"(crypto_int64_z) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); 1388 return crypto_int64_z; 1389 #else 1390 return crypto_int64_nonzero_mask(crypto_int64_x ^ crypto_int64_y); 1391 #endif 1392 } 1393 1394 __attribute__((unused)) 1395 static inline 1396 crypto_int64 crypto_int64_unequal_01(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_y) { 1397 #if defined(__GNUC__) && defined(__x86_64__) 1398 crypto_int64 crypto_int64_q,crypto_int64_z; 1399 __asm__ ("xorq %0,%0\n movq $1,%1\n cmpq %3,%2\n cmovneq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); 1400 return crypto_int64_z; 1401 #elif defined(__GNUC__) && defined(__aarch64__) 1402 crypto_int64 crypto_int64_z; 1403 __asm__ ("cmp %1,%2\n cset %0,ne" : "=r"(crypto_int64_z) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); 1404 return crypto_int64_z; 1405 #else 1406 return crypto_int64_nonzero_01(crypto_int64_x ^ crypto_int64_y); 1407 #endif 1408 } 1409 1410 __attribute__((unused)) 1411 static inline 1412 crypto_int64 crypto_int64_equal_mask(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_y) { 1413 #if defined(__GNUC__) && defined(__x86_64__) 1414 crypto_int64 crypto_int64_q,crypto_int64_z; 1415 __asm__ ("xorq %0,%0\n movq $-1,%1\n cmpq %3,%2\n cmoveq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); 1416 return crypto_int64_z; 1417 #elif defined(__GNUC__) && defined(__aarch64__) 1418 crypto_int64 crypto_int64_z; 1419 __asm__ ("cmp %1,%2\n csetm %0,eq" : "=r"(crypto_int64_z) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); 1420 return crypto_int64_z; 1421 #else 1422 return ~crypto_int64_unequal_mask(crypto_int64_x,crypto_int64_y); 1423 #endif 1424 } 1425 1426 __attribute__((unused)) 1427 static inline 1428 crypto_int64 crypto_int64_equal_01(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_y) { 1429 #if defined(__GNUC__) && defined(__x86_64__) 1430 crypto_int64 crypto_int64_q,crypto_int64_z; 1431 __asm__ ("xorq %0,%0\n movq $1,%1\n cmpq %3,%2\n cmoveq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); 1432 return crypto_int64_z; 1433 #elif defined(__GNUC__) && defined(__aarch64__) 1434 crypto_int64 crypto_int64_z; 1435 __asm__ ("cmp %1,%2\n cset %0,eq" : "=r"(crypto_int64_z) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); 1436 return crypto_int64_z; 1437 #else 1438 return 1-crypto_int64_unequal_01(crypto_int64_x,crypto_int64_y); 1439 #endif 1440 } 1441 1442 __attribute__((unused)) 1443 static inline 1444 crypto_int64 crypto_int64_min(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_y) { 1445 #if defined(__GNUC__) && defined(__x86_64__) 1446 __asm__ ("cmpq %1,%0\n cmovgq %1,%0" : "+r"(crypto_int64_x) : "r"(crypto_int64_y) : "cc"); 1447 return crypto_int64_x; 1448 #elif defined(__GNUC__) && defined(__aarch64__) 1449 __asm__ ("cmp %0,%1\n csel %0,%0,%1,lt" : "+r"(crypto_int64_x) : "r"(crypto_int64_y) : "cc"); 1450 return crypto_int64_x; 1451 #else 1452 crypto_int64 crypto_int64_r = crypto_int64_y ^ crypto_int64_x; 1453 crypto_int64 crypto_int64_z = crypto_int64_y - crypto_int64_x; 1454 crypto_int64_z ^= crypto_int64_r & (crypto_int64_z ^ crypto_int64_y); 1455 crypto_int64_z = crypto_int64_negative_mask(crypto_int64_z); 1456 crypto_int64_z &= crypto_int64_r; 1457 return crypto_int64_x ^ crypto_int64_z; 1458 #endif 1459 } 1460 1461 __attribute__((unused)) 1462 static inline 1463 crypto_int64 crypto_int64_max(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_y) { 1464 #if defined(__GNUC__) && defined(__x86_64__) 1465 __asm__ ("cmpq %1,%0\n cmovlq %1,%0" : "+r"(crypto_int64_x) : "r"(crypto_int64_y) : "cc"); 1466 return crypto_int64_x; 1467 #elif defined(__GNUC__) && defined(__aarch64__) 1468 __asm__ ("cmp %0,%1\n csel %0,%1,%0,lt" : "+r"(crypto_int64_x) : "r"(crypto_int64_y) : "cc"); 1469 return crypto_int64_x; 1470 #else 1471 crypto_int64 crypto_int64_r = crypto_int64_y ^ crypto_int64_x; 1472 crypto_int64 crypto_int64_z = crypto_int64_y - crypto_int64_x; 1473 crypto_int64_z ^= crypto_int64_r & (crypto_int64_z ^ crypto_int64_y); 1474 crypto_int64_z = crypto_int64_negative_mask(crypto_int64_z); 1475 crypto_int64_z &= crypto_int64_r; 1476 return crypto_int64_y ^ crypto_int64_z; 1477 #endif 1478 } 1479 1480 __attribute__((unused)) 1481 static inline 1482 void crypto_int64_minmax(crypto_int64 *crypto_int64_p,crypto_int64 *crypto_int64_q) { 1483 crypto_int64 crypto_int64_x = *crypto_int64_p; 1484 crypto_int64 crypto_int64_y = *crypto_int64_q; 1485 #if defined(__GNUC__) && defined(__x86_64__) 1486 crypto_int64 crypto_int64_z; 1487 __asm__ ("cmpq %2,%1\n movq %1,%0\n cmovgq %2,%1\n cmovgq %0,%2" : "=&r"(crypto_int64_z), "+&r"(crypto_int64_x), "+r"(crypto_int64_y) : : "cc"); 1488 *crypto_int64_p = crypto_int64_x; 1489 *crypto_int64_q = crypto_int64_y; 1490 #elif defined(__GNUC__) && defined(__aarch64__) 1491 crypto_int64 crypto_int64_r, crypto_int64_s; 1492 __asm__ ("cmp %2,%3\n csel %0,%2,%3,lt\n csel %1,%3,%2,lt" : "=&r"(crypto_int64_r), "=r"(crypto_int64_s) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); 1493 *crypto_int64_p = crypto_int64_r; 1494 *crypto_int64_q = crypto_int64_s; 1495 #else 1496 crypto_int64 crypto_int64_r = crypto_int64_y ^ crypto_int64_x; 1497 crypto_int64 crypto_int64_z = crypto_int64_y - crypto_int64_x; 1498 crypto_int64_z ^= crypto_int64_r & (crypto_int64_z ^ crypto_int64_y); 1499 crypto_int64_z = crypto_int64_negative_mask(crypto_int64_z); 1500 crypto_int64_z &= crypto_int64_r; 1501 crypto_int64_x ^= crypto_int64_z; 1502 crypto_int64_y ^= crypto_int64_z; 1503 *crypto_int64_p = crypto_int64_x; 1504 *crypto_int64_q = crypto_int64_y; 1505 #endif 1506 } 1507 1508 __attribute__((unused)) 1509 static inline 1510 crypto_int64 crypto_int64_smaller_mask(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_y) { 1511 #if defined(__GNUC__) && defined(__x86_64__) 1512 crypto_int64 crypto_int64_q,crypto_int64_z; 1513 __asm__ ("xorq %0,%0\n movq $-1,%1\n cmpq %3,%2\n cmovlq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); 1514 return crypto_int64_z; 1515 #elif defined(__GNUC__) && defined(__aarch64__) 1516 crypto_int64 crypto_int64_z; 1517 __asm__ ("cmp %1,%2\n csetm %0,lt" : "=r"(crypto_int64_z) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); 1518 return crypto_int64_z; 1519 #else 1520 crypto_int64 crypto_int64_r = crypto_int64_x ^ crypto_int64_y; 1521 crypto_int64 crypto_int64_z = crypto_int64_x - crypto_int64_y; 1522 crypto_int64_z ^= crypto_int64_r & (crypto_int64_z ^ crypto_int64_x); 1523 return crypto_int64_negative_mask(crypto_int64_z); 1524 #endif 1525 } 1526 1527 __attribute__((unused)) 1528 static inline 1529 crypto_int64 crypto_int64_smaller_01(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_y) { 1530 #if defined(__GNUC__) && defined(__x86_64__) 1531 crypto_int64 crypto_int64_q,crypto_int64_z; 1532 __asm__ ("xorq %0,%0\n movq $1,%1\n cmpq %3,%2\n cmovlq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); 1533 return crypto_int64_z; 1534 #elif defined(__GNUC__) && defined(__aarch64__) 1535 crypto_int64 crypto_int64_z; 1536 __asm__ ("cmp %1,%2\n cset %0,lt" : "=r"(crypto_int64_z) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); 1537 return crypto_int64_z; 1538 #else 1539 crypto_int64 crypto_int64_r = crypto_int64_x ^ crypto_int64_y; 1540 crypto_int64 crypto_int64_z = crypto_int64_x - crypto_int64_y; 1541 crypto_int64_z ^= crypto_int64_r & (crypto_int64_z ^ crypto_int64_x); 1542 return crypto_int64_unsigned_topbit_01(crypto_int64_z); 1543 #endif 1544 } 1545 1546 __attribute__((unused)) 1547 static inline 1548 crypto_int64 crypto_int64_leq_mask(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_y) { 1549 #if defined(__GNUC__) && defined(__x86_64__) 1550 crypto_int64 crypto_int64_q,crypto_int64_z; 1551 __asm__ ("xorq %0,%0\n movq $-1,%1\n cmpq %3,%2\n cmovleq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); 1552 return crypto_int64_z; 1553 #elif defined(__GNUC__) && defined(__aarch64__) 1554 crypto_int64 crypto_int64_z; 1555 __asm__ ("cmp %1,%2\n csetm %0,le" : "=r"(crypto_int64_z) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); 1556 return crypto_int64_z; 1557 #else 1558 return ~crypto_int64_smaller_mask(crypto_int64_y,crypto_int64_x); 1559 #endif 1560 } 1561 1562 __attribute__((unused)) 1563 static inline 1564 crypto_int64 crypto_int64_leq_01(crypto_int64 crypto_int64_x,crypto_int64 crypto_int64_y) { 1565 #if defined(__GNUC__) && defined(__x86_64__) 1566 crypto_int64 crypto_int64_q,crypto_int64_z; 1567 __asm__ ("xorq %0,%0\n movq $1,%1\n cmpq %3,%2\n cmovleq %1,%0" : "=&r"(crypto_int64_z), "=&r"(crypto_int64_q) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); 1568 return crypto_int64_z; 1569 #elif defined(__GNUC__) && defined(__aarch64__) 1570 crypto_int64 crypto_int64_z; 1571 __asm__ ("cmp %1,%2\n cset %0,le" : "=r"(crypto_int64_z) : "r"(crypto_int64_x), "r"(crypto_int64_y) : "cc"); 1572 return crypto_int64_z; 1573 #else 1574 return 1-crypto_int64_smaller_01(crypto_int64_y,crypto_int64_x); 1575 #endif 1576 } 1577 1578 __attribute__((unused)) 1579 static inline 1580 int crypto_int64_ones_num(crypto_int64 crypto_int64_x) { 1581 crypto_int64_unsigned crypto_int64_y = crypto_int64_x; 1582 const crypto_int64 C0 = 0x5555555555555555; 1583 const crypto_int64 C1 = 0x3333333333333333; 1584 const crypto_int64 C2 = 0x0f0f0f0f0f0f0f0f; 1585 crypto_int64_y -= ((crypto_int64_y >> 1) & C0); 1586 crypto_int64_y = (crypto_int64_y & C1) + ((crypto_int64_y >> 2) & C1); 1587 crypto_int64_y = (crypto_int64_y + (crypto_int64_y >> 4)) & C2; 1588 crypto_int64_y += crypto_int64_y >> 8; 1589 crypto_int64_y += crypto_int64_y >> 16; 1590 crypto_int64_y = (crypto_int64_y + (crypto_int64_y >> 32)) & 0xff; 1591 return crypto_int64_y; 1592 } 1593 1594 __attribute__((unused)) 1595 static inline 1596 int crypto_int64_bottomzeros_num(crypto_int64 crypto_int64_x) { 1597 #if defined(__GNUC__) && defined(__x86_64__) 1598 crypto_int64 fallback = 64; 1599 __asm__ ("bsfq %0,%0\n cmoveq %1,%0" : "+&r"(crypto_int64_x) : "r"(fallback) : "cc"); 1600 return crypto_int64_x; 1601 #elif defined(__GNUC__) && defined(__aarch64__) 1602 int64_t crypto_int64_y; 1603 __asm__ ("rbit %0,%1\n clz %0,%0" : "=r"(crypto_int64_y) : "r"(crypto_int64_x) : ); 1604 return crypto_int64_y; 1605 #else 1606 crypto_int64 crypto_int64_y = crypto_int64_x ^ (crypto_int64_x-1); 1607 crypto_int64_y = ((crypto_int64) crypto_int64_y) >> 1; 1608 crypto_int64_y &= ~(crypto_int64_x & (((crypto_int64) 1) << (64-1))); 1609 return crypto_int64_ones_num(crypto_int64_y); 1610 #endif 1611 } 1612 1613 #endif 1614 1615 /* from supercop-20240808/crypto_sort/int32/portable4/sort.c */ 1616 #define int32_MINMAX(a,b) crypto_int32_minmax(&a,&b) 1617 1618 static void crypto_sort_int32(void *array,long long n) 1619 { 1620 long long top,p,q,r,i,j; 1621 int32 *x = array; 1622 1623 if (n < 2) return; 1624 top = 1; 1625 while (top < n - top) top += top; 1626 1627 for (p = top;p >= 1;p >>= 1) { 1628 i = 0; 1629 while (i + 2 * p <= n) { 1630 for (j = i;j < i + p;++j) 1631 int32_MINMAX(x[j],x[j+p]); 1632 i += 2 * p; 1633 } 1634 for (j = i;j < n - p;++j) 1635 int32_MINMAX(x[j],x[j+p]); 1636 1637 i = 0; 1638 j = 0; 1639 for (q = top;q > p;q >>= 1) { 1640 if (j != i) for (;;) { 1641 if (j == n - q) goto done; 1642 int32 a = x[j + p]; 1643 for (r = q;r > p;r >>= 1) 1644 int32_MINMAX(a,x[j + r]); 1645 x[j + p] = a; 1646 ++j; 1647 if (j == i + p) { 1648 i += 2 * p; 1649 break; 1650 } 1651 } 1652 while (i + p <= n - q) { 1653 for (j = i;j < i + p;++j) { 1654 int32 a = x[j + p]; 1655 for (r = q;r > p;r >>= 1) 1656 int32_MINMAX(a,x[j+r]); 1657 x[j + p] = a; 1658 } 1659 i += 2 * p; 1660 } 1661 /* now i + p > n - q */ 1662 j = i; 1663 while (j < n - q) { 1664 int32 a = x[j + p]; 1665 for (r = q;r > p;r >>= 1) 1666 int32_MINMAX(a,x[j+r]); 1667 x[j + p] = a; 1668 ++j; 1669 } 1670 1671 done: ; 1672 } 1673 } 1674 } 1675 1676 /* from supercop-20240808/crypto_sort/uint32/useint32/sort.c */ 1677 1678 /* can save time by vectorizing xor loops */ 1679 /* can save time by integrating xor loops with int32_sort */ 1680 1681 static void crypto_sort_uint32(void *array,long long n) 1682 { 1683 crypto_uint32 *x = array; 1684 long long j; 1685 for (j = 0;j < n;++j) x[j] ^= 0x80000000; 1686 crypto_sort_int32(array,n); 1687 for (j = 0;j < n;++j) x[j] ^= 0x80000000; 1688 } 1689 1690 /* from supercop-20240808/crypto_kem/sntrup761/compact/kem.c */ 1691 // 20240806 djb: some automated conversion to cryptoint 1692 1693 #define p 761 1694 #define q 4591 1695 #define w 286 1696 #define q12 ((q - 1) / 2) 1697 typedef int8_t small; 1698 typedef int16_t Fq; 1699 #define Hash_bytes 32 1700 #define Small_bytes ((p + 3) / 4) 1701 typedef small Inputs[p]; 1702 #define SecretKeys_bytes (2 * Small_bytes) 1703 #define Confirm_bytes 32 1704 1705 static small F3_freeze(int16_t x) { return x - 3 * ((10923 * x + 16384) >> 15); } 1706 1707 static Fq Fq_freeze(int32_t x) { 1708 const int32_t q16 = (0x10000 + q / 2) / q; 1709 const int32_t q20 = (0x100000 + q / 2) / q; 1710 const int32_t q28 = (0x10000000 + q / 2) / q; 1711 x -= q * ((q16 * x) >> 16); 1712 x -= q * ((q20 * x) >> 20); 1713 return x - q * ((q28 * x + 0x8000000) >> 28); 1714 } 1715 1716 static int Weightw_mask(small *r) { 1717 int i, weight = 0; 1718 for (i = 0; i < p; ++i) weight += crypto_int64_bottombit_01(r[i]); 1719 return crypto_int16_nonzero_mask(weight - w); 1720 } 1721 1722 static void uint32_divmod_uint14(uint32_t *Q, uint16_t *r, uint32_t x, uint16_t m) { 1723 uint32_t qpart, mask, v = 0x80000000 / m; 1724 qpart = (x * (uint64_t)v) >> 31; 1725 x -= qpart * m; 1726 *Q = qpart; 1727 qpart = (x * (uint64_t)v) >> 31; 1728 x -= qpart * m; 1729 *Q += qpart; 1730 x -= m; 1731 *Q += 1; 1732 mask = crypto_int32_negative_mask(x); 1733 x += mask & (uint32_t)m; 1734 *Q += mask; 1735 *r = x; 1736 } 1737 1738 static uint16_t uint32_mod_uint14(uint32_t x, uint16_t m) { 1739 uint32_t Q; 1740 uint16_t r; 1741 uint32_divmod_uint14(&Q, &r, x, m); 1742 return r; 1743 } 1744 1745 static void Encode(unsigned char *out, const uint16_t *R, const uint16_t *M, long long len) { 1746 if (len == 1) { 1747 uint16_t r = R[0], m = M[0]; 1748 while (m > 1) { 1749 *out++ = r; 1750 r >>= 8; 1751 m = (m + 255) >> 8; 1752 } 1753 } 1754 if (len > 1) { 1755 uint16_t R2[(len + 1) / 2], M2[(len + 1) / 2]; 1756 long long i; 1757 for (i = 0; i < len - 1; i += 2) { 1758 uint32_t m0 = M[i]; 1759 uint32_t r = R[i] + R[i + 1] * m0; 1760 uint32_t m = M[i + 1] * m0; 1761 while (m >= 16384) { 1762 *out++ = r; 1763 r >>= 8; 1764 m = (m + 255) >> 8; 1765 } 1766 R2[i / 2] = r; 1767 M2[i / 2] = m; 1768 } 1769 if (i < len) { 1770 R2[i / 2] = R[i]; 1771 M2[i / 2] = M[i]; 1772 } 1773 Encode(out, R2, M2, (len + 1) / 2); 1774 } 1775 } 1776 1777 static void Decode(uint16_t *out, const unsigned char *S, const uint16_t *M, long long len) { 1778 if (len == 1) { 1779 if (M[0] == 1) 1780 *out = 0; 1781 else if (M[0] <= 256) 1782 *out = uint32_mod_uint14(S[0], M[0]); 1783 else 1784 *out = uint32_mod_uint14(S[0] + (((uint16_t)S[1]) << 8), M[0]); 1785 } 1786 if (len > 1) { 1787 uint16_t R2[(len + 1) / 2], M2[(len + 1) / 2], bottomr[len / 2]; 1788 uint32_t bottomt[len / 2]; 1789 long long i; 1790 for (i = 0; i < len - 1; i += 2) { 1791 uint32_t m = M[i] * (uint32_t)M[i + 1]; 1792 if (m > 256 * 16383) { 1793 bottomt[i / 2] = 256 * 256; 1794 bottomr[i / 2] = S[0] + 256 * S[1]; 1795 S += 2; 1796 M2[i / 2] = (((m + 255) >> 8) + 255) >> 8; 1797 } else if (m >= 16384) { 1798 bottomt[i / 2] = 256; 1799 bottomr[i / 2] = S[0]; 1800 S += 1; 1801 M2[i / 2] = (m + 255) >> 8; 1802 } else { 1803 bottomt[i / 2] = 1; 1804 bottomr[i / 2] = 0; 1805 M2[i / 2] = m; 1806 } 1807 } 1808 if (i < len) M2[i / 2] = M[i]; 1809 Decode(R2, S, M2, (len + 1) / 2); 1810 for (i = 0; i < len - 1; i += 2) { 1811 uint32_t r1, r = bottomr[i / 2]; 1812 uint16_t r0; 1813 r += bottomt[i / 2] * R2[i / 2]; 1814 uint32_divmod_uint14(&r1, &r0, r, M[i]); 1815 r1 = uint32_mod_uint14(r1, M[i + 1]); 1816 *out++ = r0; 1817 *out++ = r1; 1818 } 1819 if (i < len) *out++ = R2[i / 2]; 1820 } 1821 } 1822 1823 static void R3_fromRq(small *out, const Fq *r) { 1824 int i; 1825 for (i = 0; i < p; ++i) out[i] = F3_freeze(r[i]); 1826 } 1827 1828 static void R3_mult(small *h, const small *f, const small *g) { 1829 int16_t fg[p + p - 1]; 1830 int i, j; 1831 for (i = 0; i < p + p - 1; ++i) fg[i] = 0; 1832 for (i = 0; i < p; ++i) 1833 for (j = 0; j < p; ++j) fg[i + j] += f[i] * (int16_t)g[j]; 1834 for (i = p; i < p + p - 1; ++i) fg[i - p] += fg[i]; 1835 for (i = p; i < p + p - 1; ++i) fg[i - p + 1] += fg[i]; 1836 for (i = 0; i < p; ++i) h[i] = F3_freeze(fg[i]); 1837 } 1838 1839 static int R3_recip(small *out, const small *in) { 1840 small f[p + 1], g[p + 1], v[p + 1], r[p + 1]; 1841 int sign, swap, t, i, loop, delta = 1; 1842 for (i = 0; i < p + 1; ++i) v[i] = 0; 1843 for (i = 0; i < p + 1; ++i) r[i] = 0; 1844 r[0] = 1; 1845 for (i = 0; i < p; ++i) f[i] = 0; 1846 f[0] = 1; 1847 f[p - 1] = f[p] = -1; 1848 for (i = 0; i < p; ++i) g[p - 1 - i] = in[i]; 1849 g[p] = 0; 1850 for (loop = 0; loop < 2 * p - 1; ++loop) { 1851 for (i = p; i > 0; --i) v[i] = v[i - 1]; 1852 v[0] = 0; 1853 sign = -g[0] * f[0]; 1854 swap = crypto_int16_negative_mask(-delta) & crypto_int16_nonzero_mask(g[0]); 1855 delta ^= swap & (delta ^ -delta); 1856 delta += 1; 1857 for (i = 0; i < p + 1; ++i) { 1858 t = swap & (f[i] ^ g[i]); 1859 f[i] ^= t; 1860 g[i] ^= t; 1861 t = swap & (v[i] ^ r[i]); 1862 v[i] ^= t; 1863 r[i] ^= t; 1864 } 1865 for (i = 0; i < p + 1; ++i) g[i] = F3_freeze(g[i] + sign * f[i]); 1866 for (i = 0; i < p + 1; ++i) r[i] = F3_freeze(r[i] + sign * v[i]); 1867 for (i = 0; i < p; ++i) g[i] = g[i + 1]; 1868 g[p] = 0; 1869 } 1870 sign = f[0]; 1871 for (i = 0; i < p; ++i) out[i] = sign * v[p - 1 - i]; 1872 return crypto_int16_nonzero_mask(delta); 1873 } 1874 1875 static void Rq_mult_small(Fq *h, const Fq *f, const small *g) { 1876 int32_t fg[p + p - 1]; 1877 int i, j; 1878 for (i = 0; i < p + p - 1; ++i) fg[i] = 0; 1879 for (i = 0; i < p; ++i) 1880 for (j = 0; j < p; ++j) fg[i + j] += f[i] * (int32_t)g[j]; 1881 for (i = p; i < p + p - 1; ++i) fg[i - p] += fg[i]; 1882 for (i = p; i < p + p - 1; ++i) fg[i - p + 1] += fg[i]; 1883 for (i = 0; i < p; ++i) h[i] = Fq_freeze(fg[i]); 1884 } 1885 1886 static void Rq_mult3(Fq *h, const Fq *f) { 1887 int i; 1888 for (i = 0; i < p; ++i) h[i] = Fq_freeze(3 * f[i]); 1889 } 1890 1891 static Fq Fq_recip(Fq a1) { 1892 int i = 1; 1893 Fq ai = a1; 1894 while (i < q - 2) { 1895 ai = Fq_freeze(a1 * (int32_t)ai); 1896 i += 1; 1897 } 1898 return ai; 1899 } 1900 1901 static int Rq_recip3(Fq *out, const small *in) { 1902 Fq f[p + 1], g[p + 1], v[p + 1], r[p + 1], scale; 1903 int swap, t, i, loop, delta = 1; 1904 int32_t f0, g0; 1905 for (i = 0; i < p + 1; ++i) v[i] = 0; 1906 for (i = 0; i < p + 1; ++i) r[i] = 0; 1907 r[0] = Fq_recip(3); 1908 for (i = 0; i < p; ++i) f[i] = 0; 1909 f[0] = 1; 1910 f[p - 1] = f[p] = -1; 1911 for (i = 0; i < p; ++i) g[p - 1 - i] = in[i]; 1912 g[p] = 0; 1913 for (loop = 0; loop < 2 * p - 1; ++loop) { 1914 for (i = p; i > 0; --i) v[i] = v[i - 1]; 1915 v[0] = 0; 1916 swap = crypto_int16_negative_mask(-delta) & crypto_int16_nonzero_mask(g[0]); 1917 delta ^= swap & (delta ^ -delta); 1918 delta += 1; 1919 for (i = 0; i < p + 1; ++i) { 1920 t = swap & (f[i] ^ g[i]); 1921 f[i] ^= t; 1922 g[i] ^= t; 1923 t = swap & (v[i] ^ r[i]); 1924 v[i] ^= t; 1925 r[i] ^= t; 1926 } 1927 f0 = f[0]; 1928 g0 = g[0]; 1929 for (i = 0; i < p + 1; ++i) g[i] = Fq_freeze(f0 * g[i] - g0 * f[i]); 1930 for (i = 0; i < p + 1; ++i) r[i] = Fq_freeze(f0 * r[i] - g0 * v[i]); 1931 for (i = 0; i < p; ++i) g[i] = g[i + 1]; 1932 g[p] = 0; 1933 } 1934 scale = Fq_recip(f[0]); 1935 for (i = 0; i < p; ++i) out[i] = Fq_freeze(scale * (int32_t)v[p - 1 - i]); 1936 return crypto_int16_nonzero_mask(delta); 1937 } 1938 1939 static void Round(Fq *out, const Fq *a) { 1940 int i; 1941 for (i = 0; i < p; ++i) out[i] = a[i] - F3_freeze(a[i]); 1942 } 1943 1944 static void Short_fromlist(small *out, const uint32_t *in) { 1945 uint32_t L[p]; 1946 int i; 1947 for (i = 0; i < w; ++i) L[i] = in[i] & (uint32_t)-2; 1948 for (i = w; i < p; ++i) L[i] = (in[i] & (uint32_t)-3) | 1; 1949 crypto_sort_uint32(L, p); 1950 for (i = 0; i < p; ++i) out[i] = (L[i] & 3) - 1; 1951 } 1952 1953 static void Hash_prefix(unsigned char *out, int b, const unsigned char *in, int inlen) { 1954 unsigned char x[inlen + 1], h[64]; 1955 int i; 1956 x[0] = b; 1957 for (i = 0; i < inlen; ++i) x[i + 1] = in[i]; 1958 crypto_hash_sha512(h, x, inlen + 1); 1959 for (i = 0; i < 32; ++i) out[i] = h[i]; 1960 } 1961 1962 static uint32_t urandom32(void) { 1963 unsigned char c[4]; 1964 uint32_t result = 0; 1965 int i; 1966 randombytes(c, 4); 1967 for (i = 0; i < 4; ++i) result += ((uint32_t)c[i]) << (8 * i); 1968 return result; 1969 } 1970 1971 static void Short_random(small *out) { 1972 uint32_t L[p]; 1973 int i; 1974 for (i = 0; i < p; ++i) L[i] = urandom32(); 1975 Short_fromlist(out, L); 1976 } 1977 1978 static void Small_random(small *out) { 1979 int i; 1980 for (i = 0; i < p; ++i) out[i] = (((urandom32() & 0x3fffffff) * 3) >> 30) - 1; 1981 } 1982 1983 static void KeyGen(Fq *h, small *f, small *ginv) { 1984 small g[p]; 1985 Fq finv[p]; 1986 for (;;) { 1987 int result; 1988 Small_random(g); 1989 result = R3_recip(ginv, g); 1990 crypto_declassify(&result, sizeof result); 1991 if (result == 0) break; 1992 } 1993 Short_random(f); 1994 Rq_recip3(finv, f); 1995 Rq_mult_small(h, finv, g); 1996 } 1997 1998 static void Encrypt(Fq *c, const small *r, const Fq *h) { 1999 Fq hr[p]; 2000 Rq_mult_small(hr, h, r); 2001 Round(c, hr); 2002 } 2003 2004 static void Decrypt(small *r, const Fq *c, const small *f, const small *ginv) { 2005 Fq cf[p], cf3[p]; 2006 small e[p], ev[p]; 2007 int mask, i; 2008 Rq_mult_small(cf, c, f); 2009 Rq_mult3(cf3, cf); 2010 R3_fromRq(e, cf3); 2011 R3_mult(ev, e, ginv); 2012 mask = Weightw_mask(ev); 2013 for (i = 0; i < w; ++i) r[i] = ((ev[i] ^ 1) & ~mask) ^ 1; 2014 for (i = w; i < p; ++i) r[i] = ev[i] & ~mask; 2015 } 2016 2017 static void Small_encode(unsigned char *s, const small *f) { 2018 int i, j; 2019 for (i = 0; i < p / 4; ++i) { 2020 small x = 0; 2021 for (j = 0;j < 4;++j) x += (*f++ + 1) << (2 * j); 2022 *s++ = x; 2023 } 2024 *s = *f++ + 1; 2025 } 2026 2027 static void Small_decode(small *f, const unsigned char *s) { 2028 int i, j; 2029 for (i = 0; i < p / 4; ++i) { 2030 unsigned char x = *s++; 2031 for (j = 0;j < 4;++j) *f++ = ((small)((x >> (2 * j)) & 3)) - 1; 2032 } 2033 *f++ = ((small)(*s & 3)) - 1; 2034 } 2035 2036 static void Rq_encode(unsigned char *s, const Fq *r) { 2037 uint16_t R[p], M[p]; 2038 int i; 2039 for (i = 0; i < p; ++i) R[i] = r[i] + q12; 2040 for (i = 0; i < p; ++i) M[i] = q; 2041 Encode(s, R, M, p); 2042 } 2043 2044 static void Rq_decode(Fq *r, const unsigned char *s) { 2045 uint16_t R[p], M[p]; 2046 int i; 2047 for (i = 0; i < p; ++i) M[i] = q; 2048 Decode(R, s, M, p); 2049 for (i = 0; i < p; ++i) r[i] = ((Fq)R[i]) - q12; 2050 } 2051 2052 static void Rounded_encode(unsigned char *s, const Fq *r) { 2053 uint16_t R[p], M[p]; 2054 int i; 2055 for (i = 0; i < p; ++i) R[i] = ((r[i] + q12) * 10923) >> 15; 2056 for (i = 0; i < p; ++i) M[i] = (q + 2) / 3; 2057 Encode(s, R, M, p); 2058 } 2059 2060 static void Rounded_decode(Fq *r, const unsigned char *s) { 2061 uint16_t R[p], M[p]; 2062 int i; 2063 for (i = 0; i < p; ++i) M[i] = (q + 2) / 3; 2064 Decode(R, s, M, p); 2065 for (i = 0; i < p; ++i) r[i] = R[i] * 3 - q12; 2066 } 2067 2068 static void ZKeyGen(unsigned char *pk, unsigned char *sk) { 2069 Fq h[p]; 2070 small f[p], v[p]; 2071 KeyGen(h, f, v); 2072 Rq_encode(pk, h); 2073 Small_encode(sk, f); 2074 Small_encode(sk + Small_bytes, v); 2075 } 2076 2077 static void ZEncrypt(unsigned char *C, const Inputs r, const unsigned char *pk) { 2078 Fq h[p], c[p]; 2079 Rq_decode(h, pk); 2080 Encrypt(c, r, h); 2081 Rounded_encode(C, c); 2082 } 2083 2084 static void ZDecrypt(Inputs r, const unsigned char *C, const unsigned char *sk) { 2085 small f[p], v[p]; 2086 Fq c[p]; 2087 Small_decode(f, sk); 2088 Small_decode(v, sk + Small_bytes); 2089 Rounded_decode(c, C); 2090 Decrypt(r, c, f, v); 2091 } 2092 2093 static void HashConfirm(unsigned char *h, const unsigned char *r, const unsigned char *cache) { 2094 unsigned char x[Hash_bytes * 2]; 2095 int i; 2096 Hash_prefix(x, 3, r, Small_bytes); 2097 for (i = 0; i < Hash_bytes; ++i) x[Hash_bytes + i] = cache[i]; 2098 Hash_prefix(h, 2, x, sizeof x); 2099 } 2100 2101 static void HashSession(unsigned char *k, int b, const unsigned char *y, const unsigned char *z) { 2102 unsigned char x[Hash_bytes + crypto_kem_sntrup761_CIPHERTEXTBYTES]; 2103 int i; 2104 Hash_prefix(x, 3, y, Small_bytes); 2105 for (i = 0; i < crypto_kem_sntrup761_CIPHERTEXTBYTES; ++i) x[Hash_bytes + i] = z[i]; 2106 Hash_prefix(k, b, x, sizeof x); 2107 } 2108 2109 int crypto_kem_sntrup761_keypair(unsigned char *pk, unsigned char *sk) { 2110 int i; 2111 ZKeyGen(pk, sk); 2112 sk += SecretKeys_bytes; 2113 for (i = 0; i < crypto_kem_sntrup761_PUBLICKEYBYTES; ++i) *sk++ = pk[i]; 2114 randombytes(sk, Small_bytes); 2115 Hash_prefix(sk + Small_bytes, 4, pk, crypto_kem_sntrup761_PUBLICKEYBYTES); 2116 return 0; 2117 } 2118 2119 static void Hide(unsigned char *c, unsigned char *r_enc, const Inputs r, const unsigned char *pk, const unsigned char *cache) { 2120 Small_encode(r_enc, r); 2121 ZEncrypt(c, r, pk); 2122 HashConfirm(c + crypto_kem_sntrup761_CIPHERTEXTBYTES - Confirm_bytes, r_enc, cache); 2123 } 2124 2125 int crypto_kem_sntrup761_enc(unsigned char *c, unsigned char *k, const unsigned char *pk) { 2126 Inputs r; 2127 unsigned char r_enc[Small_bytes], cache[Hash_bytes]; 2128 Hash_prefix(cache, 4, pk, crypto_kem_sntrup761_PUBLICKEYBYTES); 2129 Short_random(r); 2130 Hide(c, r_enc, r, pk, cache); 2131 HashSession(k, 1, r_enc, c); 2132 return 0; 2133 } 2134 2135 static int Ciphertexts_diff_mask(const unsigned char *c, const unsigned char *c2) { 2136 uint16_t differentbits = 0; 2137 int len = crypto_kem_sntrup761_CIPHERTEXTBYTES; 2138 while (len-- > 0) differentbits |= (*c++) ^ (*c2++); 2139 return (crypto_int64_bitmod_01((differentbits - 1),8)) - 1; 2140 } 2141 2142 int crypto_kem_sntrup761_dec(unsigned char *k, const unsigned char *c, const unsigned char *sk) { 2143 const unsigned char *pk = sk + SecretKeys_bytes; 2144 const unsigned char *rho = pk + crypto_kem_sntrup761_PUBLICKEYBYTES; 2145 const unsigned char *cache = rho + Small_bytes; 2146 Inputs r; 2147 unsigned char r_enc[Small_bytes], cnew[crypto_kem_sntrup761_CIPHERTEXTBYTES]; 2148 int mask, i; 2149 ZDecrypt(r, c, sk); 2150 Hide(cnew, r_enc, r, pk, cache); 2151 mask = Ciphertexts_diff_mask(c, cnew); 2152 for (i = 0; i < Small_bytes; ++i) r_enc[i] ^= mask & (r_enc[i] ^ rho[i]); 2153 HashSession(k, 1 + mask, r_enc, c); 2154 return 0; 2155 } 2156 2157