1dnl x86 mpn_add_n/mpn_sub_n -- mpn addition and subtraction. 2 3dnl Copyright 1992, 1994-1996, 1999-2002 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of either: 9dnl 10dnl * the GNU Lesser General Public License as published by the Free 11dnl Software Foundation; either version 3 of the License, or (at your 12dnl option) any later version. 13dnl 14dnl or 15dnl 16dnl * the GNU General Public License as published by the Free Software 17dnl Foundation; either version 2 of the License, or (at your option) any 18dnl later version. 19dnl 20dnl or both in parallel, as here. 21dnl 22dnl The GNU MP Library is distributed in the hope that it will be useful, but 23dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25dnl for more details. 26dnl 27dnl You should have received copies of the GNU General Public License and the 28dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29dnl see https://www.gnu.org/licenses/. 30 31include(`../config.m4') 32 33 34C cycles/limb 35C P5 3.375 36C P6 3.125 37C K6 3.5 38C K7 2.25 39C P4 8.75 40 41 42ifdef(`OPERATION_add_n',` 43 define(M4_inst, adcl) 44 define(M4_function_n, mpn_add_n) 45 define(M4_function_nc, mpn_add_nc) 46 47',`ifdef(`OPERATION_sub_n',` 48 define(M4_inst, sbbl) 49 define(M4_function_n, mpn_sub_n) 50 define(M4_function_nc, mpn_sub_nc) 51 52',`m4_error(`Need OPERATION_add_n or OPERATION_sub_n 53')')') 54 55MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc) 56 57 58C mp_limb_t M4_function_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, 59C mp_size_t size); 60C mp_limb_t M4_function_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, 61C mp_size_t size, mp_limb_t carry); 62 63defframe(PARAM_CARRY,20) 64defframe(PARAM_SIZE, 16) 65defframe(PARAM_SRC2, 12) 66defframe(PARAM_SRC1, 8) 67defframe(PARAM_DST, 4) 68 69 TEXT 70 ALIGN(8) 71 72PROLOGUE(M4_function_nc) 73deflit(`FRAME',0) 74 75 pushl %edi FRAME_pushl() 76 pushl %esi FRAME_pushl() 77 78 movl PARAM_DST,%edi 79 movl PARAM_SRC1,%esi 80 movl PARAM_SRC2,%edx 81 movl PARAM_SIZE,%ecx 82 83 movl %ecx,%eax 84 shrl $3,%ecx C compute count for unrolled loop 85 negl %eax 86 andl $7,%eax C get index where to start loop 87 jz L(oopgo) C necessary special case for 0 88 incl %ecx C adjust loop count 89 shll $2,%eax C adjustment for pointers... 90 subl %eax,%edi C ... since they are offset ... 91 subl %eax,%esi C ... by a constant when we ... 92 subl %eax,%edx C ... enter the loop 93 shrl $2,%eax C restore previous value 94 95ifdef(`PIC',` 96 C Calculate start address in loop for PIC. Due to limitations in 97 C old gas, LF(M4_function_n,oop)-L(0a)-3 cannot be put into the leal 98 call L(0a) 99L(0a): leal (%eax,%eax,8),%eax 100 addl (%esp),%eax 101 addl $L(oop)-L(0a)-3,%eax 102 addl $4,%esp 103',` 104 C Calculate start address in loop for non-PIC. 105 leal L(oop)-3(%eax,%eax,8),%eax 106') 107 108 C These lines initialize carry from the 5th parameter. Should be 109 C possible to simplify. 110 pushl %ebp FRAME_pushl() 111 movl PARAM_CARRY,%ebp 112 shrl %ebp C shift bit 0 into carry 113 popl %ebp FRAME_popl() 114 115 jmp *%eax C jump into loop 116 117EPILOGUE() 118 119 120 ALIGN(16) 121PROLOGUE(M4_function_n) 122deflit(`FRAME',0) 123 124 pushl %edi FRAME_pushl() 125 pushl %esi FRAME_pushl() 126 127 movl PARAM_DST,%edi 128 movl PARAM_SRC1,%esi 129 movl PARAM_SRC2,%edx 130 movl PARAM_SIZE,%ecx 131 132 movl %ecx,%eax 133 shrl $3,%ecx C compute count for unrolled loop 134 negl %eax 135 andl $7,%eax C get index where to start loop 136 jz L(oop) C necessary special case for 0 137 incl %ecx C adjust loop count 138 shll $2,%eax C adjustment for pointers... 139 subl %eax,%edi C ... since they are offset ... 140 subl %eax,%esi C ... by a constant when we ... 141 subl %eax,%edx C ... enter the loop 142 shrl $2,%eax C restore previous value 143 144ifdef(`PIC',` 145 C Calculate start address in loop for PIC. Due to limitations in 146 C some assemblers, L(oop)-L(0b)-3 cannot be put into the leal 147 call L(0b) 148L(0b): leal (%eax,%eax,8),%eax 149 addl (%esp),%eax 150 addl $L(oop)-L(0b)-3,%eax 151 addl $4,%esp 152',` 153 C Calculate start address in loop for non-PIC. 154 leal L(oop)-3(%eax,%eax,8),%eax 155') 156 jmp *%eax C jump into loop 157 158L(oopgo): 159 pushl %ebp FRAME_pushl() 160 movl PARAM_CARRY,%ebp 161 shrl %ebp C shift bit 0 into carry 162 popl %ebp FRAME_popl() 163 164 ALIGN(16) 165L(oop): movl (%esi),%eax 166 M4_inst (%edx),%eax 167 movl %eax,(%edi) 168 movl 4(%esi),%eax 169 M4_inst 4(%edx),%eax 170 movl %eax,4(%edi) 171 movl 8(%esi),%eax 172 M4_inst 8(%edx),%eax 173 movl %eax,8(%edi) 174 movl 12(%esi),%eax 175 M4_inst 12(%edx),%eax 176 movl %eax,12(%edi) 177 movl 16(%esi),%eax 178 M4_inst 16(%edx),%eax 179 movl %eax,16(%edi) 180 movl 20(%esi),%eax 181 M4_inst 20(%edx),%eax 182 movl %eax,20(%edi) 183 movl 24(%esi),%eax 184 M4_inst 24(%edx),%eax 185 movl %eax,24(%edi) 186 movl 28(%esi),%eax 187 M4_inst 28(%edx),%eax 188 movl %eax,28(%edi) 189 leal 32(%edi),%edi 190 leal 32(%esi),%esi 191 leal 32(%edx),%edx 192 decl %ecx 193 jnz L(oop) 194 195 sbbl %eax,%eax 196 negl %eax 197 198 popl %esi 199 popl %edi 200 ret 201 202EPILOGUE() 203