1dnl AMD64 mpn_addlshC_n -- rp[] = up[] + (vp[] << C) 2dnl AMD64 mpn_rsblshC_n -- rp[] = (vp[] << C) - up[] 3 4dnl Copyright 2009, 2010, 2011, 2012 Free Software Foundation, Inc. 5 6dnl This file is part of the GNU MP Library. 7 8dnl The GNU MP Library is free software; you can redistribute it and/or modify 9dnl it under the terms of the GNU Lesser General Public License as published 10dnl by the Free Software Foundation; either version 3 of the License, or (at 11dnl your option) any later version. 12 13dnl The GNU MP Library is distributed in the hope that it will be useful, but 14dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16dnl License for more details. 17 18dnl You should have received a copy of the GNU Lesser General Public License 19dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 20 21 22C cycles/limb 23C AMD K8,K9 2 24C AMD K10 2 25C Intel P4 ? 26C Intel core2 3 27C Intel NHM 2.75 28C Intel SBR 2.55 29C Intel atom ? 30C VIA nano ? 31 32C INPUT PARAMETERS 33define(`rp', `%rdi') 34define(`up', `%rsi') 35define(`vp', `%rdx') 36define(`n', `%rcx') 37 38define(M, eval(m4_lshift(1,LSH))) 39 40ABI_SUPPORT(DOS64) 41ABI_SUPPORT(STD64) 42 43ASM_START() 44 TEXT 45 ALIGN(16) 46PROLOGUE(func) 47 FUNC_ENTRY(4) 48 push %r12 49 push %r13 50 push %r14 51 push %r15 52 53 mov (vp), %r8 54 lea (,%r8,M), %r12 55 shr $RSH, %r8 56 57 mov R32(n), R32(%rax) 58 lea (rp,n,8), rp 59 lea (up,n,8), up 60 lea (vp,n,8), vp 61 neg n 62 and $3, R8(%rax) 63 je L(b00) 64 cmp $2, R8(%rax) 65 jc L(b01) 66 je L(b10) 67 68L(b11): mov 8(vp,n,8), %r10 69 lea (%r8,%r10,M), %r14 70 shr $RSH, %r10 71 mov 16(vp,n,8), %r11 72 lea (%r10,%r11,M), %r15 73 shr $RSH, %r11 74 ADDSUB (up,n,8), %r12 75 ADCSBB 8(up,n,8), %r14 76 ADCSBB 16(up,n,8), %r15 77 sbb R32(%rax), R32(%rax) C save carry for next 78 mov %r12, (rp,n,8) 79 mov %r14, 8(rp,n,8) 80 mov %r15, 16(rp,n,8) 81 add $3, n 82 js L(top) 83 jmp L(end) 84 85L(b01): mov %r8, %r11 86 ADDSUB (up,n,8), %r12 87 sbb R32(%rax), R32(%rax) C save carry for next 88 mov %r12, (rp,n,8) 89 add $1, n 90 js L(top) 91 jmp L(end) 92 93L(b10): mov 8(vp,n,8), %r11 94 lea (%r8,%r11,M), %r15 95 shr $RSH, %r11 96 ADDSUB (up,n,8), %r12 97 ADCSBB 8(up,n,8), %r15 98 sbb R32(%rax), R32(%rax) C save carry for next 99 mov %r12, (rp,n,8) 100 mov %r15, 8(rp,n,8) 101 add $2, n 102 js L(top) 103 jmp L(end) 104 105L(b00): mov 8(vp,n,8), %r9 106 mov 16(vp,n,8), %r10 107 jmp L(e00) 108 109 ALIGN(16) 110L(top): mov 16(vp,n,8), %r10 111 mov (vp,n,8), %r8 112 mov 8(vp,n,8), %r9 113 lea (%r11,%r8,M), %r12 114 shr $RSH, %r8 115L(e00): lea (%r8,%r9,M), %r13 116 shr $RSH, %r9 117 mov 24(vp,n,8), %r11 118 lea (%r9,%r10,M), %r14 119 shr $RSH, %r10 120 lea (%r10,%r11,M), %r15 121 shr $RSH, %r11 122 add R32(%rax), R32(%rax) C restore carry 123 ADCSBB (up,n,8), %r12 124 ADCSBB 8(up,n,8), %r13 125 ADCSBB 16(up,n,8), %r14 126 ADCSBB 24(up,n,8), %r15 127 mov %r12, (rp,n,8) 128 mov %r13, 8(rp,n,8) 129 mov %r14, 16(rp,n,8) 130 sbb R32(%rax), R32(%rax) C save carry for next 131 mov %r15, 24(rp,n,8) 132 add $4, n 133 js L(top) 134L(end): 135 136ifelse(ADDSUB,add,` 137 sub R32(%r11), R32(%rax) 138 neg R32(%rax) 139',` 140 add R32(%r11), R32(%rax) 141 movslq R32(%rax), %rax 142') 143 pop %r15 144 pop %r14 145 pop %r13 146 pop %r12 147 FUNC_EXIT() 148 ret 149EPILOGUE() 150