1dnl x86 __gmpn_addmul_1 (for 386 and 486) -- Multiply a limb vector with a 2dnl limb and add the result to a second limb vector. 3 4dnl Copyright 1992, 1994, 1997, 1999, 2000, 2001, 2002, 2005 Free Software 5dnl Foundation, Inc. 6dnl 7dnl This file is part of the GNU MP Library. 8dnl 9dnl The GNU MP Library is free software; you can redistribute it and/or 10dnl modify it under the terms of the GNU Lesser General Public License as 11dnl published by the Free Software Foundation; either version 3 of the 12dnl License, or (at your option) any later version. 13dnl 14dnl The GNU MP Library is distributed in the hope that it will be useful, 15dnl but WITHOUT ANY WARRANTY; without even the implied warranty of 16dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17dnl Lesser General Public License for more details. 18dnl 19dnl You should have received a copy of the GNU Lesser General Public License 20dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 21 22include(`../config.m4') 23 24C cycles/limb 25C P5 14.75 26C P6 model 0-8,10-12 7.5 27C P6 model 9 (Banias) 6.7 28C P6 model 13 (Dothan) 6.75 29C P4 model 0 (Willamette) 24.0 30C P4 model 1 (?) 24.0 31C P4 model 2 (Northwood) 24.0 32C P4 model 3 (Prescott) 33C P4 model 4 (Nocona) 34C Intel Atom 35C AMD K6 12.5 36C AMD K7 5.25 37C AMD K8 38C AMD K10 39 40 41ifdef(`OPERATION_addmul_1',` 42 define(M4_inst, addl) 43 define(M4_function_1, mpn_addmul_1) 44 45',`ifdef(`OPERATION_submul_1',` 46 define(M4_inst, subl) 47 define(M4_function_1, mpn_submul_1) 48 49',`m4_error(`Need OPERATION_addmul_1 or OPERATION_submul_1 50')')') 51 52MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1) 53 54 55C mp_limb_t M4_function_1 (mp_ptr dst, mp_srcptr src, mp_size_t size, 56C mp_limb_t mult); 57 58define(PARAM_MULTIPLIER, `FRAME+16(%esp)') 59define(PARAM_SIZE, `FRAME+12(%esp)') 60define(PARAM_SRC, `FRAME+8(%esp)') 61define(PARAM_DST, `FRAME+4(%esp)') 62 63 TEXT 64 ALIGN(8) 65 66PROLOGUE(M4_function_1) 67deflit(`FRAME',0) 68 69 pushl %edi 70 pushl %esi 71 pushl %ebx 72 pushl %ebp 73deflit(`FRAME',16) 74 75 movl PARAM_DST,%edi 76 movl PARAM_SRC,%esi 77 movl PARAM_SIZE,%ecx 78 79 xorl %ebx,%ebx 80 andl $3,%ecx 81 jz L(end0) 82 83L(oop0): 84 movl (%esi),%eax 85 mull PARAM_MULTIPLIER 86 leal 4(%esi),%esi 87 addl %ebx,%eax 88 movl $0,%ebx 89 adcl %ebx,%edx 90 M4_inst %eax,(%edi) 91 adcl %edx,%ebx C propagate carry into cylimb 92 93 leal 4(%edi),%edi 94 decl %ecx 95 jnz L(oop0) 96 97L(end0): 98 movl PARAM_SIZE,%ecx 99 shrl $2,%ecx 100 jz L(end) 101 102 ALIGN(8) 103L(oop): movl (%esi),%eax 104 mull PARAM_MULTIPLIER 105 addl %eax,%ebx 106 movl $0,%ebp 107 adcl %edx,%ebp 108 109 movl 4(%esi),%eax 110 mull PARAM_MULTIPLIER 111 M4_inst %ebx,(%edi) 112 adcl %eax,%ebp C new lo + cylimb 113 movl $0,%ebx 114 adcl %edx,%ebx 115 116 movl 8(%esi),%eax 117 mull PARAM_MULTIPLIER 118 M4_inst %ebp,4(%edi) 119 adcl %eax,%ebx C new lo + cylimb 120 movl $0,%ebp 121 adcl %edx,%ebp 122 123 movl 12(%esi),%eax 124 mull PARAM_MULTIPLIER 125 M4_inst %ebx,8(%edi) 126 adcl %eax,%ebp C new lo + cylimb 127 movl $0,%ebx 128 adcl %edx,%ebx 129 130 M4_inst %ebp,12(%edi) 131 adcl $0,%ebx C propagate carry into cylimb 132 133 leal 16(%esi),%esi 134 leal 16(%edi),%edi 135 decl %ecx 136 jnz L(oop) 137 138L(end): movl %ebx,%eax 139 140 popl %ebp 141 popl %ebx 142 popl %esi 143 popl %edi 144 ret 145 146EPILOGUE() 147