1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2// 3// Permission to use, copy, modify, and/or distribute this software for any 4// purpose with or without fee is hereby granted, provided that the above 5// copyright notice and this permission notice appear in all copies. 6// 7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 10// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 12// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 13// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 14 15// ---------------------------------------------------------------------------- 16// Add, z := x + y 17// Inputs x[m], y[n]; outputs function return (carry-out) and z[p] 18// 19// extern uint64_t bignum_add 20// (uint64_t p, uint64_t *z, 21// uint64_t m, uint64_t *x, uint64_t n, uint64_t *y); 22// 23// Does the z := x + y operation, truncating modulo p words in general and 24// returning a top carry (0 or 1) in the p'th place, only adding the input 25// words below p (as well as m and n respectively) to get the sum and carry. 26// 27// Standard x86-64 ABI: RDI = p, RSI = z, RDX = m, RCX = x, R8 = n, R9 = y, returns RAX 28// Microsoft x64 ABI: RCX = p, RDX = z, R8 = m, R9 = x, [RSP+40] = n, [RSP+48] = y, returns RAX 29// ---------------------------------------------------------------------------- 30 31#include "s2n_bignum_internal.h" 32 33 .intel_syntax noprefix 34 S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_add) 35 S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_add) 36 .text 37 38#define p rdi 39#define z rsi 40#define m rdx 41#define x rcx 42#define n r8 43#define y r9 44#define i r10 45#define a rax 46 47#define ashort eax 48 49 50 51S2N_BN_SYMBOL(bignum_add): 52 _CET_ENDBR 53 54#if WINDOWS_ABI 55 push rdi 56 push rsi 57 mov rdi, rcx 58 mov rsi, rdx 59 mov rdx, r8 60 mov rcx, r9 61 mov r8, [rsp+56] 62 mov r9, [rsp+64] 63#endif 64 65// Zero the main index counter for both branches 66 67 xor i, i 68 69// First clamp the two input sizes m := min(p,m) and n := min(p,n) since 70// we'll never need words past the p'th. Can now assume m <= p and n <= p. 71// Then compare the modified m and n and branch accordingly 72 73 cmp p, m 74 cmovc m, p 75 cmp p, n 76 cmovc n, p 77 cmp m, n 78 jc ylonger 79 80// The case where x is longer or of the same size (p >= m >= n) 81 82 sub p, m 83 sub m, n 84 inc m 85 test n, n 86 jz xtest 87xmainloop: 88 mov a, [x+8*i] 89 adc a, [y+8*i] 90 mov [z+8*i],a 91 inc i 92 dec n 93 jnz xmainloop 94 jmp xtest 95xtoploop: 96 mov a, [x+8*i] 97 adc a, 0 98 mov [z+8*i],a 99 inc i 100xtest: 101 dec m 102 jnz xtoploop 103 mov ashort, 0 104 adc a, 0 105 test p, p 106 jnz tails 107#if WINDOWS_ABI 108 pop rsi 109 pop rdi 110#endif 111 ret 112 113// The case where y is longer (p >= n > m) 114 115ylonger: 116 117 sub p, n 118 sub n, m 119 test m, m 120 jz ytoploop 121ymainloop: 122 mov a, [x+8*i] 123 adc a, [y+8*i] 124 mov [z+8*i],a 125 inc i 126 dec m 127 jnz ymainloop 128ytoploop: 129 mov a, [y+8*i] 130 adc a, 0 131 mov [z+8*i],a 132 inc i 133 dec n 134 jnz ytoploop 135 mov ashort, 0 136 adc a, 0 137 test p, p 138 jnz tails 139#if WINDOWS_ABI 140 pop rsi 141 pop rdi 142#endif 143 ret 144 145// Adding a non-trivial tail, when p > max(m,n) 146 147tails: 148 mov [z+8*i],a 149 xor a, a 150 jmp tail 151tailloop: 152 mov [z+8*i],a 153tail: 154 inc i 155 dec p 156 jnz tailloop 157#if WINDOWS_ABI 158 pop rsi 159 pop rdi 160#endif 161 ret 162 163#if defined(__linux__) && defined(__ELF__) 164.section .note.GNU-stack,"",%progbits 165#endif 166