1dnl x86 mpn_bdiv_q_1 -- mpn by limb exact division. 2 3dnl Copyright 2001, 2002, 2007, 2011 Free Software Foundation, Inc. 4dnl 5dnl This file is part of the GNU MP Library. 6dnl 7dnl Rearranged from mpn/x86/dive_1.asm by Marco Bodrato. 8dnl 9dnl The GNU MP Library is free software; you can redistribute it and/or 10dnl modify it under the terms of the GNU Lesser General Public License as 11dnl published by the Free Software Foundation; either version 3 of the 12dnl License, or (at your option) any later version. 13dnl 14dnl The GNU MP Library is distributed in the hope that it will be useful, 15dnl but WITHOUT ANY WARRANTY; without even the implied warranty of 16dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17dnl Lesser General Public License for more details. 18dnl 19dnl You should have received a copy of the GNU Lesser General Public License 20dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 21 22include(`../config.m4') 23 24 25C cycles/limb 26C P54 30.0 27C P55 29.0 28C P6 13.0 odd divisor, 12.0 even (strangely) 29C K6 14.0 30C K7 12.0 31C P4 42.0 32 33MULFUNC_PROLOGUE(mpn_bdiv_q_1 mpn_pi1_bdiv_q_1) 34 35defframe(PARAM_SHIFT, 24) 36defframe(PARAM_INVERSE,20) 37defframe(PARAM_DIVISOR,16) 38defframe(PARAM_SIZE, 12) 39defframe(PARAM_SRC, 8) 40defframe(PARAM_DST, 4) 41 42dnl re-use parameter space 43define(VAR_INVERSE,`PARAM_SRC') 44 45 TEXT 46 47C mp_limb_t 48C mpn_pi1_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, mp_limb_t divisor, 49C mp_limb_t inverse, int shift) 50 51 ALIGN(16) 52PROLOGUE(mpn_pi1_bdiv_q_1) 53deflit(`FRAME',0) 54 55 movl PARAM_SHIFT, %ecx 56 pushl %ebp FRAME_pushl() 57 58 movl PARAM_INVERSE, %eax 59 movl PARAM_SIZE, %ebp 60 pushl %ebx FRAME_pushl() 61L(common): 62 pushl %edi FRAME_pushl() 63 pushl %esi FRAME_pushl() 64 65 movl PARAM_SRC, %esi 66 movl PARAM_DST, %edi 67 68 leal (%esi,%ebp,4), %esi C src end 69 leal (%edi,%ebp,4), %edi C dst end 70 negl %ebp C -size 71 72 movl %eax, VAR_INVERSE 73 movl (%esi,%ebp,4), %eax C src[0] 74 75 xorl %ebx, %ebx 76 xorl %edx, %edx 77 78 incl %ebp 79 jz L(one) 80 81 movl (%esi,%ebp,4), %edx C src[1] 82 83 shrdl( %cl, %edx, %eax) 84 85 movl VAR_INVERSE, %edx 86 jmp L(entry) 87 88 89 ALIGN(8) 90 nop C k6 code alignment 91 nop 92L(top): 93 C eax q 94 C ebx carry bit, 0 or -1 95 C ecx shift 96 C edx carry limb 97 C esi src end 98 C edi dst end 99 C ebp counter, limbs, negative 100 101 movl -4(%esi,%ebp,4), %eax 102 subl %ebx, %edx C accumulate carry bit 103 104 movl (%esi,%ebp,4), %ebx 105 106 shrdl( %cl, %ebx, %eax) 107 108 subl %edx, %eax C apply carry limb 109 movl VAR_INVERSE, %edx 110 111 sbbl %ebx, %ebx 112 113L(entry): 114 imull %edx, %eax 115 116 movl %eax, -4(%edi,%ebp,4) 117 movl PARAM_DIVISOR, %edx 118 119 mull %edx 120 121 incl %ebp 122 jnz L(top) 123 124 125 movl -4(%esi), %eax C src high limb 126L(one): 127 shrl %cl, %eax 128 popl %esi FRAME_popl() 129 130 addl %ebx, %eax C apply carry bit 131 132 subl %edx, %eax C apply carry limb 133 134 imull VAR_INVERSE, %eax 135 136 movl %eax, -4(%edi) 137 138 popl %edi 139 popl %ebx 140 popl %ebp 141 142 ret 143 144EPILOGUE() 145 146C mp_limb_t mpn_bdiv_q_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, 147C mp_limb_t divisor); 148C 149 150 ALIGN(16) 151PROLOGUE(mpn_bdiv_q_1) 152deflit(`FRAME',0) 153 154 movl PARAM_DIVISOR, %eax 155 pushl %ebp FRAME_pushl() 156 157 movl $-1, %ecx C shift count 158 movl PARAM_SIZE, %ebp 159 160 pushl %ebx FRAME_pushl() 161 162L(strip_twos): 163 incl %ecx 164 165 shrl %eax 166 jnc L(strip_twos) 167 168 leal 1(%eax,%eax), %ebx C d without twos 169 andl $127, %eax C d/2, 7 bits 170 171ifdef(`PIC',` 172 LEA( binvert_limb_table, %edx) 173 movzbl (%eax,%edx), %eax C inv 8 bits 174',` 175 movzbl binvert_limb_table(%eax), %eax C inv 8 bits 176') 177 178 leal (%eax,%eax), %edx C 2*inv 179 movl %ebx, PARAM_DIVISOR C d without twos 180 imull %eax, %eax C inv*inv 181 imull %ebx, %eax C inv*inv*d 182 subl %eax, %edx C inv = 2*inv - inv*inv*d 183 184 leal (%edx,%edx), %eax C 2*inv 185 imull %edx, %edx C inv*inv 186 imull %ebx, %edx C inv*inv*d 187 subl %edx, %eax C inv = 2*inv - inv*inv*d 188 189 ASSERT(e,` C expect d*inv == 1 mod 2^GMP_LIMB_BITS 190 pushl %eax FRAME_pushl() 191 imull PARAM_DIVISOR, %eax 192 cmpl $1, %eax 193 popl %eax FRAME_popl()') 194 195 jmp L(common) 196EPILOGUE() 197 198