xref: /openbsd-src/lib/libcrypto/bn/arch/amd64/bignum_add.S (revision 22787c513b4b59ee1fb13a32326a50f73cd342c1)
1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2//
3// Permission to use, copy, modify, and/or distribute this software for any
4// purpose with or without fee is hereby granted, provided that the above
5// copyright notice and this permission notice appear in all copies.
6//
7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15// ----------------------------------------------------------------------------
16// Add, z := x + y
17// Inputs x[m], y[n]; outputs function return (carry-out) and z[p]
18//
19//    extern uint64_t bignum_add
20//     (uint64_t p, uint64_t *z,
21//      uint64_t m, uint64_t *x, uint64_t n, uint64_t *y);
22//
23// Does the z := x + y operation, truncating modulo p words in general and
24// returning a top carry (0 or 1) in the p'th place, only adding the input
25// words below p (as well as m and n respectively) to get the sum and carry.
26//
27// Standard x86-64 ABI: RDI = p, RSI = z, RDX = m, RCX = x, R8 = n, R9 = y, returns RAX
28// Microsoft x64 ABI:   RCX = p, RDX = z, R8 = m, R9 = x, [RSP+40] = n, [RSP+48] = y, returns RAX
29// ----------------------------------------------------------------------------
30
31#include "s2n_bignum_internal.h"
32
33        .intel_syntax noprefix
34        S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_add)
35        S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_add)
36        .text
37
38#define p rdi
39#define z rsi
40#define m rdx
41#define x rcx
42#define n r8
43#define y r9
44#define i r10
45#define a rax
46
47#define ashort eax
48
49
50
51S2N_BN_SYMBOL(bignum_add):
52	_CET_ENDBR
53
54#if WINDOWS_ABI
55        push    rdi
56        push    rsi
57        mov     rdi, rcx
58        mov     rsi, rdx
59        mov     rdx, r8
60        mov     rcx, r9
61        mov     r8, [rsp+56]
62        mov     r9, [rsp+64]
63#endif
64
65// Zero the main index counter for both branches
66
67        xor     i, i
68
69// First clamp the two input sizes m := min(p,m) and n := min(p,n) since
70// we'll never need words past the p'th. Can now assume m <= p and n <= p.
71// Then compare the modified m and n and branch accordingly
72
73        cmp     p, m
74        cmovc   m, p
75        cmp     p, n
76        cmovc   n, p
77        cmp     m, n
78        jc      ylonger
79
80// The case where x is longer or of the same size (p >= m >= n)
81
82        sub     p, m
83        sub     m, n
84        inc     m
85        test    n, n
86        jz      xtest
87xmainloop:
88        mov     a, [x+8*i]
89        adc     a, [y+8*i]
90        mov     [z+8*i],a
91        inc     i
92        dec     n
93        jnz     xmainloop
94        jmp     xtest
95xtoploop:
96        mov     a, [x+8*i]
97        adc     a, 0
98        mov     [z+8*i],a
99        inc     i
100xtest:
101        dec     m
102        jnz     xtoploop
103        mov     ashort, 0
104        adc     a, 0
105        test    p, p
106        jnz     tails
107#if WINDOWS_ABI
108        pop    rsi
109        pop    rdi
110#endif
111        ret
112
113// The case where y is longer (p >= n > m)
114
115ylonger:
116
117        sub     p, n
118        sub     n, m
119        test    m, m
120        jz      ytoploop
121ymainloop:
122        mov     a, [x+8*i]
123        adc     a, [y+8*i]
124        mov     [z+8*i],a
125        inc     i
126        dec     m
127        jnz     ymainloop
128ytoploop:
129        mov     a, [y+8*i]
130        adc     a, 0
131        mov     [z+8*i],a
132        inc     i
133        dec     n
134        jnz     ytoploop
135        mov     ashort, 0
136        adc     a, 0
137        test    p, p
138        jnz     tails
139#if WINDOWS_ABI
140        pop    rsi
141        pop    rdi
142#endif
143        ret
144
145// Adding a non-trivial tail, when p > max(m,n)
146
147tails:
148        mov     [z+8*i],a
149        xor     a, a
150        jmp     tail
151tailloop:
152        mov     [z+8*i],a
153tail:
154        inc     i
155        dec     p
156        jnz     tailloop
157#if WINDOWS_ABI
158        pop    rsi
159        pop    rdi
160#endif
161        ret
162
163#if defined(__linux__) && defined(__ELF__)
164.section .note.GNU-stack,"",%progbits
165#endif
166