1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2// 3// Permission to use, copy, modify, and/or distribute this software for any 4// purpose with or without fee is hereby granted, provided that the above 5// copyright notice and this permission notice appear in all copies. 6// 7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 10// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 12// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 13// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 14 15// ---------------------------------------------------------------------------- 16// Square, z := x^2 17// Input x[4]; output z[8] 18// 19// extern void bignum_sqr_4_8_alt 20// (uint64_t z[static 8], uint64_t x[static 4]); 21// 22// Standard x86-64 ABI: RDI = z, RSI = x 23// Microsoft x64 ABI: RCX = z, RDX = x 24// ---------------------------------------------------------------------------- 25 26#include "s2n_bignum_internal.h" 27 28 .intel_syntax noprefix 29 S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sqr_4_8_alt) 30 S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sqr_4_8_alt) 31 .text 32 33// Input arguments 34 35#define z rdi 36#define x rsi 37 38// Other variables used as a rotating 3-word window to add terms to 39 40#define t0 rcx 41#define t1 r8 42#define t2 r9 43 44// Macro for the key "multiply and add to (c,h,l)" step, for square term 45 46#define combadd1(c,h,l,numa) \ 47 mov rax, numa; \ 48 mul rax; \ 49 add l, rax; \ 50 adc h, rdx; \ 51 adc c, 0 52 53// A short form where we don't expect a top carry 54 55#define combads(h,l,numa) \ 56 mov rax, numa; \ 57 mul rax; \ 58 add l, rax; \ 59 adc h, rdx 60 61// A version doubling before adding, for non-square terms 62 63#define combadd2(c,h,l,numa,numb) \ 64 mov rax, numa; \ 65 mul QWORD PTR numb; \ 66 add rax, rax; \ 67 adc rdx, rdx; \ 68 adc c, 0; \ 69 add l, rax; \ 70 adc h, rdx; \ 71 adc c, 0 72 73S2N_BN_SYMBOL(bignum_sqr_4_8_alt): 74 _CET_ENDBR 75 76#if WINDOWS_ABI 77 push rdi 78 push rsi 79 mov rdi, rcx 80 mov rsi, rdx 81#endif 82 83// Result term 0 84 85 mov rax, [x] 86 mul rax 87 88 mov [z], rax 89 mov t0, rdx 90 xor t1, t1 91 92// Result term 1 93 94 xor t2, t2 95 combadd2(t2,t1,t0,[x],[x+8]) 96 mov [z+8], t0 97 98// Result term 2 99 100 xor t0, t0 101 combadd1(t0,t2,t1,[x+8]) 102 combadd2(t0,t2,t1,[x],[x+16]) 103 mov [z+16], t1 104 105// Result term 3 106 107 xor t1, t1 108 combadd2(t1,t0,t2,[x],[x+24]) 109 combadd2(t1,t0,t2,[x+8],[x+16]) 110 mov [z+24], t2 111 112// Result term 4 113 114 xor t2, t2 115 combadd2(t2,t1,t0,[x+8],[x+24]) 116 combadd1(t2,t1,t0,[x+16]) 117 mov [z+32], t0 118 119// Result term 5 120 121 xor t0, t0 122 combadd2(t0,t2,t1,[x+16],[x+24]) 123 mov [z+40], t1 124 125// Result term 6 126 127 xor t1, t1 128 combads(t0,t2,[x+24]) 129 mov [z+48], t2 130 131// Result term 7 132 133 mov [z+56], t0 134 135// Return 136 137#if WINDOWS_ABI 138 pop rsi 139 pop rdi 140#endif 141 ret 142 143#if defined(__linux__) && defined(__ELF__) 144.section .note.GNU-stack,"",%progbits 145#endif 146