1dnl x86-64 mpn_divrem_1 -- mpn by limb division. 2 3dnl Copyright 2004, 2005, 2007, 2008, 2009, 2010, 2011, 2012 Free Software 4dnl Foundation, Inc. 5 6dnl This file is part of the GNU MP Library. 7 8dnl The GNU MP Library is free software; you can redistribute it and/or modify 9dnl it under the terms of the GNU Lesser General Public License as published 10dnl by the Free Software Foundation; either version 3 of the License, or (at 11dnl your option) any later version. 12 13dnl The GNU MP Library is distributed in the hope that it will be useful, but 14dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16dnl License for more details. 17 18dnl You should have received a copy of the GNU Lesser General Public License 19dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 20 21include(`../config.m4') 22 23 24C norm unorm frac 25C AMD K8,K9 13 13 12 26C AMD K10 13 13 12 27C Intel P4 43 44 43 28C Intel core2 24.5 24.5 19.5 29C Intel corei 20.5 19.5 18 30C Intel atom 43 46 36 31C VIA nano 25.5 25.5 24 32 33C mp_limb_t 34C mpn_divrem_1 (mp_ptr qp, mp_size_t fn, 35C mp_srcptr np, mp_size_t nn, mp_limb_t d) 36 37C mp_limb_t 38C mpn_preinv_divrem_1 (mp_ptr qp, mp_size_t fn, 39C mp_srcptr np, mp_size_t nn, mp_limb_t d, 40C mp_limb_t dinv, int cnt) 41 42C INPUT PARAMETERS 43define(`qp', `%rdi') 44define(`fn_param', `%rsi') 45define(`up_param', `%rdx') 46define(`un_param', `%rcx') 47define(`d', `%r8') 48define(`dinv', `%r9') C only for mpn_preinv_divrem_1 49C shift passed on stack C only for mpn_preinv_divrem_1 50 51define(`cnt', `%rcx') 52define(`up', `%rsi') 53define(`fn', `%r12') 54define(`un', `%rbx') 55 56 57C rax rbx rcx rdx rsi rdi rbp r8 r9 r10 r11 r12 r13 r14 r15 58C cnt qp d dinv 59 60ABI_SUPPORT(DOS64) 61ABI_SUPPORT(STD64) 62 63IFSTD(`define(`CNTOFF', `40($1)')') 64IFDOS(`define(`CNTOFF', `104($1)')') 65 66ASM_START() 67 TEXT 68 ALIGN(16) 69PROLOGUE(mpn_preinv_divrem_1) 70 FUNC_ENTRY(4) 71IFDOS(` mov 56(%rsp), %r8 ') 72IFDOS(` mov 64(%rsp), %r9 ') 73 xor R32(%rax), R32(%rax) 74 push %r13 75 push %r12 76 push %rbp 77 push %rbx 78 79 mov fn_param, fn 80 mov un_param, un 81 add fn_param, un_param 82 mov up_param, up 83 84 lea -8(qp,un_param,8), qp 85 86 test d, d 87 js L(nent) 88 89 mov CNTOFF(%rsp), R8(cnt) 90 shl R8(cnt), d 91 jmp L(uent) 92EPILOGUE() 93 94 ALIGN(16) 95PROLOGUE(mpn_divrem_1) 96 FUNC_ENTRY(4) 97IFDOS(` mov 56(%rsp), %r8 ') 98 xor R32(%rax), R32(%rax) 99 push %r13 100 push %r12 101 push %rbp 102 push %rbx 103 104 mov fn_param, fn 105 mov un_param, un 106 add fn_param, un_param 107 mov up_param, up 108 je L(ret) 109 110 lea -8(qp,un_param,8), qp 111 xor R32(%rbp), R32(%rbp) 112 113 test d, d 114 jns L(unnormalized) 115 116L(normalized): 117 test un, un 118 je L(8) C un == 0 119 mov -8(up,un,8), %rbp 120 dec un 121 mov %rbp, %rax 122 sub d, %rbp 123 cmovc %rax, %rbp 124 sbb R32(%rax), R32(%rax) 125 inc R32(%rax) 126 mov %rax, (qp) 127 lea -8(qp), qp 128L(8): 129IFSTD(` push %rdi ') 130IFSTD(` push %rsi ') 131 push %r8 132IFSTD(` mov d, %rdi ') 133IFDOS(` mov d, %rcx ') 134 CALL( mpn_invert_limb) 135 pop %r8 136IFSTD(` pop %rsi ') 137IFSTD(` pop %rdi ') 138 139 mov %rax, dinv 140 mov %rbp, %rax 141 jmp L(nent) 142 143 ALIGN(16) 144L(ntop): C K8-K10 P6-CNR P6-NHM P4 145 mov (up,un,8), %r10 C 146 mul dinv C 0,13 0,20 0,18 0,45 147 add %r10, %rax C 4 8 3 12 148 adc %rbp, %rdx C 5 9 10 13 149 mov %rax, %rbp C 5 9 4 13 150 mov %rdx, %r13 C 6 11 12 23 151 imul d, %rdx C 6 11 11 23 152 sub %rdx, %r10 C 10 16 14 33 153 mov d, %rax C 154 add %r10, %rax C 11 17 15 34 155 cmp %rbp, %r10 C 11 17 15 34 156 cmovc %r10, %rax C 12 18 16 35 157 adc $-1, %r13 C 158 cmp d, %rax C 159 jae L(nfx) C 160L(nok): mov %r13, (qp) C 161 sub $8, qp C 162L(nent):lea 1(%rax), %rbp C 163 dec un C 164 jns L(ntop) C 165 166 xor R32(%rcx), R32(%rcx) 167 jmp L(87) 168 169L(nfx): sub d, %rax 170 inc %r13 171 jmp L(nok) 172 173L(unnormalized): 174 test un, un 175 je L(44) 176 mov -8(up,un,8), %rax 177 cmp d, %rax 178 jae L(44) 179 mov %rbp, (qp) 180 mov %rax, %rbp 181 lea -8(qp), qp 182 je L(ret) 183 dec un 184L(44): 185 bsr d, %rcx 186 not R32(%rcx) 187 shl R8(%rcx), d 188 shl R8(%rcx), %rbp 189 190 push %rcx 191IFSTD(` push %rdi ') 192IFSTD(` push %rsi ') 193 push %r8 194IFSTD(` mov d, %rdi ') 195IFDOS(` mov d, %rcx ') 196 CALL( mpn_invert_limb) 197 pop %r8 198IFSTD(` pop %rsi ') 199IFSTD(` pop %rdi ') 200 pop %rcx 201 202 mov %rax, dinv 203 mov %rbp, %rax 204 test un, un 205 je L(87) 206 207L(uent):dec un 208 mov (up,un,8), %rbp 209 neg R32(%rcx) 210 shr R8(%rcx), %rbp 211 neg R32(%rcx) 212 or %rbp, %rax 213 jmp L(ent) 214 215 ALIGN(16) 216L(utop):mov (up,un,8), %r10 217 shl R8(%rcx), %rbp 218 neg R32(%rcx) 219 shr R8(%rcx), %r10 220 neg R32(%rcx) 221 or %r10, %rbp 222 mul dinv 223 add %rbp, %rax 224 adc %r11, %rdx 225 mov %rax, %r11 226 mov %rdx, %r13 227 imul d, %rdx 228 sub %rdx, %rbp 229 mov d, %rax 230 add %rbp, %rax 231 cmp %r11, %rbp 232 cmovc %rbp, %rax 233 adc $-1, %r13 234 cmp d, %rax 235 jae L(ufx) 236L(uok): mov %r13, (qp) 237 sub $8, qp 238L(ent): mov (up,un,8), %rbp 239 dec un 240 lea 1(%rax), %r11 241 jns L(utop) 242 243L(uend):shl R8(%rcx), %rbp 244 mul dinv 245 add %rbp, %rax 246 adc %r11, %rdx 247 mov %rax, %r11 248 mov %rdx, %r13 249 imul d, %rdx 250 sub %rdx, %rbp 251 mov d, %rax 252 add %rbp, %rax 253 cmp %r11, %rbp 254 cmovc %rbp, %rax 255 adc $-1, %r13 256 cmp d, %rax 257 jae L(efx) 258L(eok): mov %r13, (qp) 259 sub $8, qp 260 jmp L(87) 261 262L(ufx): sub d, %rax 263 inc %r13 264 jmp L(uok) 265L(efx): sub d, %rax 266 inc %r13 267 jmp L(eok) 268 269L(87): mov d, %rbp 270 neg %rbp 271 jmp L(fent) 272 273 ALIGN(16) C K8-K10 P6-CNR P6-NHM P4 274L(ftop):mul dinv C 0,12 0,17 0,17 275 add %r11, %rdx C 5 8 10 276 mov %rax, %r11 C 4 8 3 277 mov %rdx, %r13 C 6 9 11 278 imul %rbp, %rdx C 6 9 11 279 mov d, %rax C 280 add %rdx, %rax C 10 14 14 281 cmp %r11, %rdx C 10 14 14 282 cmovc %rdx, %rax C 11 15 15 283 adc $-1, %r13 C 284 mov %r13, (qp) C 285 sub $8, qp C 286L(fent):lea 1(%rax), %r11 C 287 dec fn C 288 jns L(ftop) C 289 290 shr R8(%rcx), %rax 291L(ret): pop %rbx 292 pop %rbp 293 pop %r12 294 pop %r13 295 FUNC_EXIT() 296 ret 297EPILOGUE() 298