1dnl x86-64 mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number. 2 3dnl Copyright 2007, 2008, 2010, 2014 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of either: 9dnl 10dnl * the GNU Lesser General Public License as published by the Free 11dnl Software Foundation; either version 3 of the License, or (at your 12dnl option) any later version. 13dnl 14dnl or 15dnl 16dnl * the GNU General Public License as published by the Free Software 17dnl Foundation; either version 2 of the License, or (at your option) any 18dnl later version. 19dnl 20dnl or both in parallel, as here. 21dnl 22dnl The GNU MP Library is distributed in the hope that it will be useful, but 23dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25dnl for more details. 26dnl 27dnl You should have received copies of the GNU General Public License and the 28dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29dnl see https://www.gnu.org/licenses/. 30 31include(`../config.m4') 32 33 34C cycles/limb best 35C AMD K8,K9 18 36C AMD K10 18 37C AMD bull 38C AMD pile 39C AMD bobcat 40C AMD jaguar 41C Intel P4 68 42C Intel core 34 43C Intel NHM 30.25 44C Intel SBR 21.3 45C Intel IBR 21.4 46C Intel HWL 20.6 47C Intel BWL 48C Intel atom 73 49C VIA nano 33 50 51 52C INPUT PARAMETERS 53define(`qp', `%rdi') 54define(`fn', `%rsi') 55define(`up_param', `%rdx') 56define(`un_param', `%rcx') 57define(`dp', `%r8') 58 59ABI_SUPPORT(DOS64) 60ABI_SUPPORT(STD64) 61 62ASM_START() 63 TEXT 64 ALIGN(16) 65PROLOGUE(mpn_divrem_2) 66 FUNC_ENTRY(4) 67IFDOS(` mov 56(%rsp), %r8 ') 68 push %r15 69 push %r14 70 push %r13 71 push %r12 72 lea -24(%rdx,%rcx,8), %r12 C r12 = &up[un-1] 73 mov %rsi, %r13 74 push %rbp 75 mov %rdi, %rbp 76 push %rbx 77 mov 8(%r8), %r11 C d1 78 mov 16(%r12), %rbx 79 mov (%r8), %r8 C d0 80 mov 8(%r12), %r10 81 82 xor R32(%r15), R32(%r15) 83 cmp %rbx, %r11 84 ja L(2) 85 setb %dl 86 cmp %r10, %r8 87 setbe %al 88 orb %al, %dl C "orb" form to placate Sun tools 89 je L(2) 90 inc R32(%r15) 91 sub %r8, %r10 92 sbb %r11, %rbx 93L(2): 94 lea -3(%rcx,%r13), %r14 C un + fn - 3 95 test %r14, %r14 96 js L(end) 97 98 push %r8 99 push %r10 100 push %r11 101IFSTD(` mov %r11, %rdi ') 102IFDOS(` mov %r11, %rcx ') 103IFDOS(` sub $32, %rsp ') 104 ASSERT(nz, `test $15, %rsp') 105 CALL( mpn_invert_limb) 106IFDOS(` add $32, %rsp ') 107 pop %r11 108 pop %r10 109 pop %r8 110 111 mov %r11, %rdx 112 mov %rax, %rdi 113 imul %rax, %rdx 114 mov %rdx, %r9 115 mul %r8 116 xor R32(%rcx), R32(%rcx) 117 add %r8, %r9 118 adc $-1, %rcx 119 add %rdx, %r9 120 adc $0, %rcx 121 js 2f 1221: dec %rdi 123 sub %r11, %r9 124 sbb $0, %rcx 125 jns 1b 1262: 127 128 lea (%rbp,%r14,8), %rbp 129 mov %r11, %rsi 130 neg %rsi C -d1 131 132C rax rbx rcx rdx rsi rdi rbp r8 r9 r10 r11 r12 r13 r14 r15 133C n2 un -d1 dinv qp d0 q0 d1 up fn msl 134 135 ALIGN(16) 136L(top): mov %rdi, %rax C di ncp 137 mul %rbx C 0, 17 138 mov %r10, %rcx C 139 add %rax, %rcx C 4 140 adc %rbx, %rdx C 5 141 mov %rdx, %r9 C q 6 142 imul %rsi, %rdx C 6 143 mov %r8, %rax C ncp 144 lea (%rdx, %r10), %rbx C n1 -= ... 10 145 xor R32(%r10), R32(%r10) C 146 mul %r9 C 7 147 cmp %r14, %r13 C 148 jg L(19) C 149 mov (%r12), %r10 C 150 sub $8, %r12 C 151L(19): sub %r8, %r10 C ncp 152 sbb %r11, %rbx C 11 153 sub %rax, %r10 C 11 154 sbb %rdx, %rbx C 12 155 xor R32(%rax), R32(%rax) C 156 xor R32(%rdx), R32(%rdx) C 157 cmp %rcx, %rbx C 13 158 cmovnc %r8, %rax C 14 159 cmovnc %r11, %rdx C 14 160 adc $0, %r9 C adjust q 14 161 nop 162 add %rax, %r10 C 15 163 adc %rdx, %rbx C 16 164 cmp %r11, %rbx C 165 jae L(fix) C 166L(bck): mov %r9, (%rbp) C 167 sub $8, %rbp C 168 dec %r14 169 jns L(top) 170 171L(end): mov %r10, 8(%r12) 172 mov %rbx, 16(%r12) 173 pop %rbx 174 pop %rbp 175 pop %r12 176 pop %r13 177 pop %r14 178 mov %r15, %rax 179 pop %r15 180 FUNC_EXIT() 181 ret 182 183L(fix): seta %dl 184 cmp %r8, %r10 185 setae %al 186 orb %dl, %al C "orb" form to placate Sun tools 187 je L(bck) 188 inc %r9 189 sub %r8, %r10 190 sbb %r11, %rbx 191 jmp L(bck) 192EPILOGUE() 193