1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2// 3// Permission to use, copy, modify, and/or distribute this software for any 4// purpose with or without fee is hereby granted, provided that the above 5// copyright notice and this permission notice appear in all copies. 6// 7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 10// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 12// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 13// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 14 15// ---------------------------------------------------------------------------- 16// Multiply z := x * y 17// Inputs x[m], y[n]; output z[k] 18// 19// extern void bignum_mul 20// (uint64_t k, uint64_t *z, 21// uint64_t m, uint64_t *x, uint64_t n, uint64_t *y); 22// 23// Does the "z := x * y" operation where x is m digits, y is n, result z is k. 24// Truncates the result in general unless k >= m + n 25// 26// Standard x86-64 ABI: RDI = k, RSI = z, RDX = m, RCX = x, R8 = n, R9 = y 27// Microsoft x64 ABI: RCX = k, RDX = z, R8 = m, R9 = x, [RSP+40] = n, [RSP+48] = y 28// ---------------------------------------------------------------------------- 29 30#include "s2n_bignum_internal.h" 31 32 .intel_syntax noprefix 33 S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_mul) 34 S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_mul) 35 .text 36 37// These are actually right 38 39#define p rdi 40#define z rsi 41#define n r8 42 43// These are not 44 45#define c r15 46#define h r14 47#define l r13 48#define x r12 49#define y r11 50#define i rbx 51#define k r10 52#define m rbp 53 54// These are always local scratch since multiplier result is in these 55 56#define a rax 57#define d rdx 58 59 60 61S2N_BN_SYMBOL(bignum_mul): 62 _CET_ENDBR 63 64#if WINDOWS_ABI 65 push rdi 66 push rsi 67 mov rdi, rcx 68 mov rsi, rdx 69 mov rdx, r8 70 mov rcx, r9 71 mov r8, [rsp+56] 72 mov r9, [rsp+64] 73#endif 74 75// We use too many registers, and also we need rax:rdx for multiplications 76 77 push rbx 78 push rbp 79 push r12 80 push r13 81 push r14 82 push r15 83 mov m, rdx 84 85// If the result size is zero, do nothing 86// Note that even if either or both inputs has size zero, we can't 87// just give up because we at least need to zero the output array 88// If we did a multiply-add variant, however, then we could 89 90 test p, p 91 jz end 92 93// Set initial 2-part sum to zero (we zero c inside the body) 94 95 xor h,h 96 xor l,l 97 98// Otherwise do outer loop k = 0 ... k = p - 1 99 100 xor k, k 101 102outerloop: 103 104// Zero our carry term first; we eventually want it and a zero is useful now 105// Set a = max 0 (k + 1 - n), i = min (k + 1) m 106// This defines the range a <= j < i for the inner summation 107// Note that since k < p < 2^64 we can assume k + 1 doesn't overflow 108// And since we want to increment it anyway, we might as well do it now 109 110 xor c, c // c = 0 111 inc k // k = k + 1 112 113 mov a, k // a = k + 1 114 sub a, n // a = k + 1 - n 115 cmovc a, c // a = max 0 (k + 1 - n) 116 117 mov i, m // i = m 118 cmp k, m // CF <=> k + 1 < m 119 cmovc i, k // i = min (k + 1) m 120 121// Turn i into a loop count, and skip things if it's <= 0 122// Otherwise set up initial pointers x -> x0[a] and y -> y0[k - a] 123// and then launch into the main inner loop, postdecrementing i 124 125 mov d, k 126 sub d, i 127 sub i, a 128 jbe innerend 129 lea x,[rcx+8*a] 130 lea y,[r9+8*d-8] 131 132innerloop: 133 mov rax, [y+8*i] 134 mul QWORD PTR [x] 135 add x, 8 136 add l, rax 137 adc h, rdx 138 adc c, 0 139 dec i 140 jnz innerloop 141 142innerend: 143 144 mov [z], l 145 mov l, h 146 mov h, c 147 add z, 8 148 149 cmp k, p 150 jc outerloop 151 152end: 153 pop r15 154 pop r14 155 pop r13 156 pop r12 157 pop rbp 158 pop rbx 159#if WINDOWS_ABI 160 pop rsi 161 pop rdi 162#endif 163 ret 164 165#if defined(__linux__) && defined(__ELF__) 166.section .note.GNU-stack,"",%progbits 167#endif 168