1dnl x86 mpn_add_n/mpn_sub_n -- mpn addition and subtraction. 2 3dnl Copyright 1992, 1994, 1995, 1996, 1999, 2000, 2001, 2002 Free Software 4dnl Foundation, Inc. 5dnl 6dnl This file is part of the GNU MP Library. 7dnl 8dnl The GNU MP Library is free software; you can redistribute it and/or 9dnl modify it under the terms of the GNU Lesser General Public License as 10dnl published by the Free Software Foundation; either version 3 of the 11dnl License, or (at your option) any later version. 12dnl 13dnl The GNU MP Library is distributed in the hope that it will be useful, 14dnl but WITHOUT ANY WARRANTY; without even the implied warranty of 15dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16dnl Lesser General Public License for more details. 17dnl 18dnl You should have received a copy of the GNU Lesser General Public License 19dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 20 21include(`../config.m4') 22 23 24C cycles/limb 25C P5: 3.375 26C P6: 3.125 27C K6: 3.5 28C K7: 2.25 29C P4: 8.75 30 31 32ifdef(`OPERATION_add_n',` 33 define(M4_inst, adcl) 34 define(M4_function_n, mpn_add_n) 35 define(M4_function_nc, mpn_add_nc) 36 37',`ifdef(`OPERATION_sub_n',` 38 define(M4_inst, sbbl) 39 define(M4_function_n, mpn_sub_n) 40 define(M4_function_nc, mpn_sub_nc) 41 42',`m4_error(`Need OPERATION_add_n or OPERATION_sub_n 43')')') 44 45MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc) 46 47 48C mp_limb_t M4_function_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, 49C mp_size_t size); 50C mp_limb_t M4_function_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, 51C mp_size_t size, mp_limb_t carry); 52 53defframe(PARAM_CARRY,20) 54defframe(PARAM_SIZE, 16) 55defframe(PARAM_SRC2, 12) 56defframe(PARAM_SRC1, 8) 57defframe(PARAM_DST, 4) 58 59 TEXT 60 ALIGN(8) 61 62PROLOGUE(M4_function_nc) 63deflit(`FRAME',0) 64 65 pushl %edi FRAME_pushl() 66 pushl %esi FRAME_pushl() 67 68 movl PARAM_DST,%edi 69 movl PARAM_SRC1,%esi 70 movl PARAM_SRC2,%edx 71 movl PARAM_SIZE,%ecx 72 73 movl %ecx,%eax 74 shrl $3,%ecx C compute count for unrolled loop 75 negl %eax 76 andl $7,%eax C get index where to start loop 77 jz L(oopgo) C necessary special case for 0 78 incl %ecx C adjust loop count 79 shll $2,%eax C adjustment for pointers... 80 subl %eax,%edi C ... since they are offset ... 81 subl %eax,%esi C ... by a constant when we ... 82 subl %eax,%edx C ... enter the loop 83 shrl $2,%eax C restore previous value 84 85ifdef(`PIC',` 86 C Calculate start address in loop for PIC. Due to limitations in 87 C old gas, LF(M4_function_n,oop)-L(0a)-3 cannot be put into the leal 88 call L(0a) 89L(0a): leal (%eax,%eax,8),%eax 90 addl (%esp),%eax 91 addl $L(oop)-L(0a)-3,%eax 92 addl $4,%esp 93',` 94 C Calculate start address in loop for non-PIC. 95 leal L(oop)-3(%eax,%eax,8),%eax 96') 97 98 C These lines initialize carry from the 5th parameter. Should be 99 C possible to simplify. 100 pushl %ebp FRAME_pushl() 101 movl PARAM_CARRY,%ebp 102 shrl $1,%ebp C shift bit 0 into carry 103 popl %ebp FRAME_popl() 104 105 jmp *%eax C jump into loop 106 107EPILOGUE() 108 109 110 ALIGN(16) 111PROLOGUE(M4_function_n) 112deflit(`FRAME',0) 113 114 pushl %edi FRAME_pushl() 115 pushl %esi FRAME_pushl() 116 117 movl PARAM_DST,%edi 118 movl PARAM_SRC1,%esi 119 movl PARAM_SRC2,%edx 120 movl PARAM_SIZE,%ecx 121 122 movl %ecx,%eax 123 shrl $3,%ecx C compute count for unrolled loop 124 negl %eax 125 andl $7,%eax C get index where to start loop 126 jz L(oop) C necessary special case for 0 127 incl %ecx C adjust loop count 128 shll $2,%eax C adjustment for pointers... 129 subl %eax,%edi C ... since they are offset ... 130 subl %eax,%esi C ... by a constant when we ... 131 subl %eax,%edx C ... enter the loop 132 shrl $2,%eax C restore previous value 133 134ifdef(`PIC',` 135 C Calculate start address in loop for PIC. Due to limitations in 136 C some assemblers, L(oop)-L(0b)-3 cannot be put into the leal 137 call L(0b) 138L(0b): leal (%eax,%eax,8),%eax 139 addl (%esp),%eax 140 addl $L(oop)-L(0b)-3,%eax 141 addl $4,%esp 142',` 143 C Calculate start address in loop for non-PIC. 144 leal L(oop)-3(%eax,%eax,8),%eax 145') 146 jmp *%eax C jump into loop 147 148L(oopgo): 149 pushl %ebp FRAME_pushl() 150 movl PARAM_CARRY,%ebp 151 shrl $1,%ebp C shift bit 0 into carry 152 popl %ebp FRAME_popl() 153 154 ALIGN(16) 155L(oop): movl (%esi),%eax 156 M4_inst (%edx),%eax 157 movl %eax,(%edi) 158 movl 4(%esi),%eax 159 M4_inst 4(%edx),%eax 160 movl %eax,4(%edi) 161 movl 8(%esi),%eax 162 M4_inst 8(%edx),%eax 163 movl %eax,8(%edi) 164 movl 12(%esi),%eax 165 M4_inst 12(%edx),%eax 166 movl %eax,12(%edi) 167 movl 16(%esi),%eax 168 M4_inst 16(%edx),%eax 169 movl %eax,16(%edi) 170 movl 20(%esi),%eax 171 M4_inst 20(%edx),%eax 172 movl %eax,20(%edi) 173 movl 24(%esi),%eax 174 M4_inst 24(%edx),%eax 175 movl %eax,24(%edi) 176 movl 28(%esi),%eax 177 M4_inst 28(%edx),%eax 178 movl %eax,28(%edi) 179 leal 32(%edi),%edi 180 leal 32(%esi),%esi 181 leal 32(%edx),%edx 182 decl %ecx 183 jnz L(oop) 184 185 sbbl %eax,%eax 186 negl %eax 187 188 popl %esi 189 popl %edi 190 ret 191 192EPILOGUE() 193