xref: /openbsd-src/lib/libcrypto/bn/arch/amd64/bignum_sub.S (revision 22787c513b4b59ee1fb13a32326a50f73cd342c1)
1// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2//
3// Permission to use, copy, modify, and/or distribute this software for any
4// purpose with or without fee is hereby granted, provided that the above
5// copyright notice and this permission notice appear in all copies.
6//
7// THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
8// WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
9// MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
10// ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
11// WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
12// ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
13// OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
14
15// ----------------------------------------------------------------------------
16// Subtract, z := x - y
17// Inputs x[m], y[n]; outputs function return (carry-out) and z[p]
18//
19//    extern uint64_t bignum_sub
20//     (uint64_t p, uint64_t *z,
21//      uint64_t m, uint64_t *x, uint64_t n, uint64_t *y);
22//
23// Does the z := x - y operation, truncating modulo p words in general and
24// returning a top borrow (0 or 1) in the p'th place, only subtracting input
25// words below p (as well as m and n respectively) to get the diff and borrow.
26//
27// Standard x86-64 ABI: RDI = p, RSI = z, RDX = m, RCX = x, R8 = n, R9 = y, returns RAX
28// Microsoft x64 ABI:   RCX = p, RDX = z, R8 = m, R9 = x, [RSP+40] = n, [RSP+48] = y, returns RAX
29// ----------------------------------------------------------------------------
30
31#include "s2n_bignum_internal.h"
32
33        .intel_syntax noprefix
34        S2N_BN_SYM_VISIBILITY_DIRECTIVE(bignum_sub)
35        S2N_BN_SYM_PRIVACY_DIRECTIVE(bignum_sub)
36        .text
37
38#define p rdi
39#define z rsi
40#define m rdx
41#define x rcx
42#define n r8
43#define y r9
44#define i r10
45#define a rax
46
47#define ashort eax
48
49
50
51S2N_BN_SYMBOL(bignum_sub):
52	_CET_ENDBR
53
54#if WINDOWS_ABI
55        push    rdi
56        push    rsi
57        mov     rdi, rcx
58        mov     rsi, rdx
59        mov     rdx, r8
60        mov     rcx, r9
61        mov     r8, [rsp+56]
62        mov     r9, [rsp+64]
63#endif
64
65// Zero the main index counter for both branches
66
67        xor     i, i
68
69// First clamp the two input sizes m := min(p,m) and n := min(p,n) since
70// we'll never need words past the p'th. Can now assume m <= p and n <= p.
71// Then compare the modified m and n and branch accordingly
72
73        cmp     p, m
74        cmovc   m, p
75        cmp     p, n
76        cmovc   n, p
77        cmp     m, n
78        jc      ylonger
79
80// The case where x is longer or of the same size (p >= m >= n)
81
82        sub     p, m
83        sub     m, n
84        inc     m
85        test    n, n
86        jz      xtest
87xmainloop:
88        mov     a, [x+8*i]
89        sbb     a, [y+8*i]
90        mov     [z+8*i],a
91        inc     i
92        dec     n
93        jnz     xmainloop
94        jmp     xtest
95xtoploop:
96        mov     a, [x+8*i]
97        sbb     a, 0
98        mov     [z+8*i],a
99        inc     i
100xtest:
101        dec     m
102        jnz     xtoploop
103        sbb     a, a
104        test    p, p
105        jz      tailskip
106tailloop:
107        mov     [z+8*i],a
108        inc     i
109        dec     p
110        jnz     tailloop
111tailskip:
112        neg     a
113#if WINDOWS_ABI
114        pop    rsi
115        pop    rdi
116#endif
117        ret
118
119// The case where y is longer (p >= n > m)
120
121ylonger:
122
123        sub     p, n
124        sub     n, m
125        test    m, m
126        jz      ytoploop
127ymainloop:
128        mov     a, [x+8*i]
129        sbb     a, [y+8*i]
130        mov     [z+8*i],a
131        inc     i
132        dec     m
133        jnz     ymainloop
134ytoploop:
135        mov     ashort, 0
136        sbb     a, [y+8*i]
137        mov     [z+8*i],a
138        inc     i
139        dec     n
140        jnz     ytoploop
141        sbb     a, a
142        test    p, p
143        jnz     tailloop
144        neg     a
145#if WINDOWS_ABI
146        pop    rsi
147        pop    rdi
148#endif
149        ret
150
151#if defined(__linux__) && defined(__ELF__)
152.section .note.GNU-stack,"",%progbits
153#endif
154