1dnl x86 mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number. 2 3dnl Copyright 2007, 2008 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of the GNU Lesser General Public License as published 9dnl by the Free Software Foundation; either version 3 of the License, or (at 10dnl your option) any later version. 11 12dnl The GNU MP Library is distributed in the hope that it will be useful, but 13dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 15dnl License for more details. 16 17dnl You should have received a copy of the GNU Lesser General Public License 18dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 19 20include(`../config.m4') 21 22 23C norm frac 24C 486 25C P5 26C P6-13 29.2 27C P6-15 *26 28C K6 29C K7 22 30C K8 *19 31C P4-f1 32C P4-f2 *65 33C P4-f3 34C P4-f4 *72 35 36C A star means numbers not updated for the latest version of the code. 37 38 39C TODO 40C * Perhaps keep ecx or esi in stack slot, freeing up a reg for q0. 41C * The loop has not been carefully tuned. We should at the very least do 42C some local insn swapping. 43C * The code outside the main loop is what gcc generated. Clean up! 44C * Clean up stack slot usage. 45 46C INPUT PARAMETERS 47C qp 48C fn 49C up_param 50C un_param 51C dp 52 53 54C eax ebx ecx edx esi edi ebp 55C cnt qp 56 57ASM_START() 58 TEXT 59 ALIGN(16) 60PROLOGUE(mpn_divrem_2) 61 push %ebp 62 push %edi 63 push %esi 64 push %ebx 65 sub $36, %esp 66 mov 68(%esp), %ecx C un 67 mov 72(%esp), %esi C dp 68 movl $0, 32(%esp) 69 lea 0(,%ecx,4), %edi 70 add 64(%esp), %edi C up 71 mov (%esi), %ebx 72 mov 4(%esi), %eax 73 mov %ebx, 20(%esp) 74 sub $12, %edi 75 mov %eax, 24(%esp) 76 mov %edi, 12(%esp) 77 mov 8(%edi), %ebx 78 mov 4(%edi), %ebp 79 cmp %eax, %ebx 80 jb L(8) 81 seta %dl 82 cmp 20(%esp), %ebp 83 setae %al 84 orb %dl, %al C "orb" form to placate Sun tools 85 jne L(35) 86L(8): 87 mov 60(%esp), %esi C fn 88 lea -3(%esi,%ecx), %edi 89 test %edi, %edi 90 js L(9) 91 mov 24(%esp), %edx 92 mov $-1, %esi 93 mov %esi, %eax 94 mov %esi, %ecx 95 not %edx 96 divl 24(%esp) 97 mov %eax, %esi 98 imul 24(%esp), %eax 99 mov %eax, (%esp) 100 mov %esi, %eax 101 mull 20(%esp) 102 mov (%esp), %eax 103 add 20(%esp), %eax 104 adc $0, %ecx 105 add %eax, %edx 106 adc $0, %ecx 107 mov %ecx, %eax 108 js L(32) 109L(36): dec %esi 110 sub 24(%esp), %edx 111 sbb $0, %eax 112 jns L(36) 113L(32): 114 mov %esi, 16(%esp) C di 115 mov %edi, %ecx C un 116 mov 12(%esp), %esi C up 117 mov 24(%esp), %eax 118 neg %eax 119 mov %eax, 4(%esp) C -d1 120 ALIGN(16) 121 nop 122 123C eax ebx ecx edx esi edi ebp 0 4 8 12 16 20 24 28 32 56 60 124C n2 un up n1 q0 -d1 di d0 d1 msl qp fn 125 126L(loop): 127 mov 16(%esp), %eax C di 128 mul %ebx 129 add %ebp, %eax 130 mov %eax, (%esp) C q0 131 adc %ebx, %edx 132 mov %edx, %edi C q 133 imul 4(%esp), %edx 134 mov 20(%esp), %eax 135 lea (%edx, %ebp), %ebx C n1 -= ... 136 mul %edi 137 xor %ebp, %ebp 138 cmp 60(%esp), %ecx 139 jl L(19) 140 mov (%esi), %ebp 141 sub $4, %esi 142L(19): sub 20(%esp), %ebp 143 sbb 24(%esp), %ebx 144 sub %eax, %ebp 145 sbb %edx, %ebx 146 mov 20(%esp), %eax C d1 147 inc %edi 148 xor %edx, %edx 149 cmp (%esp), %ebx 150 adc $-1, %edx C mask 151 add %edx, %edi C q-- 152 and %edx, %eax C d0 or 0 153 and 24(%esp), %edx C d1 or 0 154 add %eax, %ebp 155 adc %edx, %ebx 156 cmp 24(%esp), %ebx 157 jae L(fix) 158L(bck): mov 56(%esp), %edx 159 mov %edi, (%edx, %ecx, 4) 160 dec %ecx 161 jns L(loop) 162 163L(9): mov 64(%esp), %esi C up 164 mov %ebp, (%esi) 165 mov %ebx, 4(%esi) 166 mov 32(%esp), %eax 167 add $36, %esp 168 pop %ebx 169 pop %esi 170 pop %edi 171 pop %ebp 172 ret 173 174L(fix): seta %dl 175 cmp 20(%esp), %ebp 176 setae %al 177 orb %dl, %al C "orb" form to placate Sun tools 178 je L(bck) 179 inc %edi 180 sub 20(%esp), %ebp 181 sbb 24(%esp), %ebx 182 jmp L(bck) 183 184L(35): sub 20(%esp), %ebp 185 sbb 24(%esp), %ebx 186 movl $1, 32(%esp) 187 jmp L(8) 188EPILOGUE() 189