1dnl x86 mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number. 2 3dnl Copyright 2007, 2008 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of either: 9dnl 10dnl * the GNU Lesser General Public License as published by the Free 11dnl Software Foundation; either version 3 of the License, or (at your 12dnl option) any later version. 13dnl 14dnl or 15dnl 16dnl * the GNU General Public License as published by the Free Software 17dnl Foundation; either version 2 of the License, or (at your option) any 18dnl later version. 19dnl 20dnl or both in parallel, as here. 21dnl 22dnl The GNU MP Library is distributed in the hope that it will be useful, but 23dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25dnl for more details. 26dnl 27dnl You should have received copies of the GNU General Public License and the 28dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29dnl see https://www.gnu.org/licenses/. 30 31include(`../config.m4') 32 33 34C norm frac 35C 486 36C P5 37C P6-13 29.2 38C P6-15 *26 39C K6 40C K7 22 41C K8 *19 42C P4-f1 43C P4-f2 *65 44C P4-f3 45C P4-f4 *72 46 47C A star means numbers not updated for the latest version of the code. 48 49 50C TODO 51C * Perhaps keep ecx or esi in stack slot, freeing up a reg for q0. 52C * The loop has not been carefully tuned. We should at the very least do 53C some local insn swapping. 54C * The code outside the main loop is what gcc generated. Clean up! 55C * Clean up stack slot usage. 56 57C INPUT PARAMETERS 58C qp 59C fn 60C up_param 61C un_param 62C dp 63 64 65C eax ebx ecx edx esi edi ebp 66C cnt qp 67 68ASM_START() 69 TEXT 70 ALIGN(16) 71PROLOGUE(mpn_divrem_2) 72 push %ebp 73 push %edi 74 push %esi 75 push %ebx 76 sub $36, %esp 77 mov 68(%esp), %ecx C un 78 mov 72(%esp), %esi C dp 79 movl $0, 32(%esp) 80 lea 0(,%ecx,4), %edi 81 add 64(%esp), %edi C up 82 mov (%esi), %ebx 83 mov 4(%esi), %eax 84 mov %ebx, 20(%esp) 85 sub $12, %edi 86 mov %eax, 24(%esp) 87 mov %edi, 12(%esp) 88 mov 8(%edi), %ebx 89 mov 4(%edi), %ebp 90 cmp %eax, %ebx 91 jb L(8) 92 seta %dl 93 cmp 20(%esp), %ebp 94 setae %al 95 orb %dl, %al C "orb" form to placate Sun tools 96 jne L(35) 97L(8): 98 mov 60(%esp), %esi C fn 99 lea -3(%esi,%ecx), %edi 100 test %edi, %edi 101 js L(9) 102 mov 24(%esp), %edx 103 mov $-1, %esi 104 mov %esi, %eax 105 mov %esi, %ecx 106 not %edx 107 divl 24(%esp) 108 mov %eax, %esi 109 imul 24(%esp), %eax 110 mov %eax, (%esp) 111 mov %esi, %eax 112 mull 20(%esp) 113 mov (%esp), %eax 114 add 20(%esp), %eax 115 adc $0, %ecx 116 add %eax, %edx 117 adc $0, %ecx 118 mov %ecx, %eax 119 js L(32) 120L(36): dec %esi 121 sub 24(%esp), %edx 122 sbb $0, %eax 123 jns L(36) 124L(32): 125 mov %esi, 16(%esp) C di 126 mov %edi, %ecx C un 127 mov 12(%esp), %esi C up 128 mov 24(%esp), %eax 129 neg %eax 130 mov %eax, 4(%esp) C -d1 131 ALIGN(16) 132 nop 133 134C eax ebx ecx edx esi edi ebp 0 4 8 12 16 20 24 28 32 56 60 135C n2 un up n1 q0 -d1 di d0 d1 msl qp fn 136 137L(loop): 138 mov 16(%esp), %eax C di 139 mul %ebx 140 add %ebp, %eax 141 mov %eax, (%esp) C q0 142 adc %ebx, %edx 143 mov %edx, %edi C q 144 imul 4(%esp), %edx 145 mov 20(%esp), %eax 146 lea (%edx, %ebp), %ebx C n1 -= ... 147 mul %edi 148 xor %ebp, %ebp 149 cmp 60(%esp), %ecx 150 jl L(19) 151 mov (%esi), %ebp 152 sub $4, %esi 153L(19): sub 20(%esp), %ebp 154 sbb 24(%esp), %ebx 155 sub %eax, %ebp 156 sbb %edx, %ebx 157 mov 20(%esp), %eax C d1 158 inc %edi 159 xor %edx, %edx 160 cmp (%esp), %ebx 161 adc $-1, %edx C mask 162 add %edx, %edi C q-- 163 and %edx, %eax C d0 or 0 164 and 24(%esp), %edx C d1 or 0 165 add %eax, %ebp 166 adc %edx, %ebx 167 cmp 24(%esp), %ebx 168 jae L(fix) 169L(bck): mov 56(%esp), %edx 170 mov %edi, (%edx, %ecx, 4) 171 dec %ecx 172 jns L(loop) 173 174L(9): mov 64(%esp), %esi C up 175 mov %ebp, (%esi) 176 mov %ebx, 4(%esi) 177 mov 32(%esp), %eax 178 add $36, %esp 179 pop %ebx 180 pop %esi 181 pop %edi 182 pop %ebp 183 ret 184 185L(fix): seta %dl 186 cmp 20(%esp), %ebp 187 setae %al 188 orb %dl, %al C "orb" form to placate Sun tools 189 je L(bck) 190 inc %edi 191 sub 20(%esp), %ebp 192 sbb 24(%esp), %ebx 193 jmp L(bck) 194 195L(35): sub 20(%esp), %ebp 196 sbb 24(%esp), %ebx 197 movl $1, 32(%esp) 198 jmp L(8) 199EPILOGUE() 200