xref: /openbsd-src/lib/libcrypto/bn/arch/amd64/bignum_cmul.S (revision 22787c513b4b59ee1fb13a32326a50f73cd342c1)
1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2//
3// Permission to use, copy, modify, and/or distribute this software for any
4// purpose with or without fee is hereby granted, provided that the above
5// copyright notice and this permission notice appear in all copies.
6//
7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15// ----------------------------------------------------------------------------
16// Multiply by a single word, z := c * y
17// Inputs c, y[n]; outputs function return (carry-out) and z[k]
18//
19//    extern uint64_t bignum_cmul
20//     (uint64_t k, uint64_t *z, uint64_t c, uint64_t n, uint64_t *y);
21//
22// Does the "z := c * y" operation where y is n digits, result z is p.
23// Truncates the result in general unless p >= n + 1.
24//
25// The return value is a high/carry word that is meaningful when p >= n as
26// giving the high part of the result. Since this is always zero if p > n,
27// it is mainly of interest in the special case p = n, i.e. where the source
28// and destination have the same nominal size, when it gives the extra word
29// of the full result.
30//
31// Standard x86-64 ABI: RDI = k, RSI = z, RDX = c, RCX = n, R8 = y, returns RAX
32// Microsoft x64 ABI:   RCX = k, RDX = z, R8 = c, R9 = n, [RSP+40] = y, returns RAX
33// ----------------------------------------------------------------------------
34
35#include "s2n_bignum_internal.h"
36
37        .intel_syntax noprefix
38        S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_cmul)
39        S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_cmul)
40        .text
41
42#define p rdi
43#define z rsi
44#define c r9
45#define n rcx
46#define x r8
47
48#define i r10
49#define h r11
50
51
52
53S2N_BN_SYMBOL(bignum_cmul):
54	_CET_ENDBR
55
56#if WINDOWS_ABI
57        push    rdi
58        push    rsi
59        mov     rdi, rcx
60        mov     rsi, rdx
61        mov     rdx, r8
62        mov     rcx, r9
63        mov     r8, [rsp+56]
64#endif
65
66// First clamp the input size n := min(p,n) since we can never need to read
67// past the p'th term of the input to generate p-digit output. Now we can
68// assume that n <= p
69
70        cmp     p, n
71        cmovc   n, p
72
73// Initialize current input/output pointer offset i and high part h.
74// But then if n = 0 skip the multiplication and go to the tail part
75
76        xor     h, h
77        xor     i, i
78        test    n, n
79        jz      tail
80
81// Move c into a safer register as multiplies overwrite rdx
82
83        mov     c, rdx
84
85// Initialization of the loop: [h,l] = c * x_0
86
87        mov     rax, [x]
88        mul     c
89        mov     [z], rax
90        mov     h, rdx
91        inc     i
92        cmp     i, n
93        jz      tail
94
95// Main loop doing the multiplications
96
97loop:
98        mov     rax, [x+8*i]
99        mul     c
100        add     rax, h
101        adc     rdx, 0
102        mov     [z+8*i], rax
103        mov     h, rdx
104        inc     i
105        cmp     i, n
106        jc      loop
107
108// Add a tail when the destination is longer
109
110tail:
111        cmp     i, p
112        jnc     end
113        mov     [z+8*i], h
114        xor     h, h
115        inc     i
116        cmp     i, p
117        jnc     end
118
119tloop:
120        mov     [z+8*i], h
121        inc     i
122        cmp     i, p
123        jc      tloop
124
125// Return the high/carry word
126
127end:
128        mov     rax, h
129
130#if WINDOWS_ABI
131        pop    rsi
132        pop    rdi
133#endif
134        ret
135
136#if defined(__linux__) && defined(__ELF__)
137.section .note.GNU-stack,"",%progbits
138#endif
139