1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 2// 3// Permission to use, copy, modify, and/or distribute this software for any 4// purpose with or without fee is hereby granted, provided that the above 5// copyright notice and this permission notice appear in all copies. 6// 7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 10// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 12// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 13// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 14 15// ---------------------------------------------------------------------------- 16// Multiply by a single word, z := c * y 17// Inputs c, y[n]; outputs function return (carry-out) and z[k] 18// 19// extern uint64_t bignum_cmul 20// (uint64_t k, uint64_t *z, uint64_t c, uint64_t n, uint64_t *y); 21// 22// Does the "z := c * y" operation where y is n digits, result z is p. 23// Truncates the result in general unless p >= n + 1. 24// 25// The return value is a high/carry word that is meaningful when p >= n as 26// giving the high part of the result. Since this is always zero if p > n, 27// it is mainly of interest in the special case p = n, i.e. where the source 28// and destination have the same nominal size, when it gives the extra word 29// of the full result. 30// 31// Standard x86-64 ABI: RDI = k, RSI = z, RDX = c, RCX = n, R8 = y, returns RAX 32// Microsoft x64 ABI: RCX = k, RDX = z, R8 = c, R9 = n, [RSP+40] = y, returns RAX 33// ---------------------------------------------------------------------------- 34 35#include "s2n_bignum_internal.h" 36 37 .intel_syntax noprefix 38 S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_cmul) 39 S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_cmul) 40 .text 41 42#define p rdi 43#define z rsi 44#define c r9 45#define n rcx 46#define x r8 47 48#define i r10 49#define h r11 50 51 52 53S2N_BN_SYMBOL(bignum_cmul): 54 _CET_ENDBR 55 56#if WINDOWS_ABI 57 push rdi 58 push rsi 59 mov rdi, rcx 60 mov rsi, rdx 61 mov rdx, r8 62 mov rcx, r9 63 mov r8, [rsp+56] 64#endif 65 66// First clamp the input size n := min(p,n) since we can never need to read 67// past the p'th term of the input to generate p-digit output. Now we can 68// assume that n <= p 69 70 cmp p, n 71 cmovc n, p 72 73// Initialize current input/output pointer offset i and high part h. 74// But then if n = 0 skip the multiplication and go to the tail part 75 76 xor h, h 77 xor i, i 78 test n, n 79 jz tail 80 81// Move c into a safer register as multiplies overwrite rdx 82 83 mov c, rdx 84 85// Initialization of the loop: [h,l] = c * x_0 86 87 mov rax, [x] 88 mul c 89 mov [z], rax 90 mov h, rdx 91 inc i 92 cmp i, n 93 jz tail 94 95// Main loop doing the multiplications 96 97loop: 98 mov rax, [x+8*i] 99 mul c 100 add rax, h 101 adc rdx, 0 102 mov [z+8*i], rax 103 mov h, rdx 104 inc i 105 cmp i, n 106 jc loop 107 108// Add a tail when the destination is longer 109 110tail: 111 cmp i, p 112 jnc end 113 mov [z+8*i], h 114 xor h, h 115 inc i 116 cmp i, p 117 jnc end 118 119tloop: 120 mov [z+8*i], h 121 inc i 122 cmp i, p 123 jc tloop 124 125// Return the high/carry word 126 127end: 128 mov rax, h 129 130#if WINDOWS_ABI 131 pop rsi 132 pop rdi 133#endif 134 ret 135 136#if defined(__linux__) && defined(__ELF__) 137.section .note.GNU-stack,"",%progbits 138#endif 139