1dnl Intel Pentium mpn_add_n/mpn_sub_n -- mpn addition and subtraction. 2 3dnl Copyright 1992, 1994, 1995, 1996, 1999, 2000, 2002 Free Software 4dnl Foundation, Inc. 5dnl 6dnl This file is part of the GNU MP Library. 7dnl 8dnl The GNU MP Library is free software; you can redistribute it and/or 9dnl modify it under the terms of the GNU Lesser General Public License as 10dnl published by the Free Software Foundation; either version 3 of the 11dnl License, or (at your option) any later version. 12dnl 13dnl The GNU MP Library is distributed in the hope that it will be useful, 14dnl but WITHOUT ANY WARRANTY; without even the implied warranty of 15dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16dnl Lesser General Public License for more details. 17dnl 18dnl You should have received a copy of the GNU Lesser General Public License 19dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 20 21include(`../config.m4') 22 23 24C P5: 2.375 cycles/limb 25 26 27ifdef(`OPERATION_add_n',` 28 define(M4_inst, adcl) 29 define(M4_function_n, mpn_add_n) 30 define(M4_function_nc, mpn_add_nc) 31 32',`ifdef(`OPERATION_sub_n',` 33 define(M4_inst, sbbl) 34 define(M4_function_n, mpn_sub_n) 35 define(M4_function_nc, mpn_sub_nc) 36 37',`m4_error(`Need OPERATION_add_n or OPERATION_sub_n 38')')') 39 40MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc) 41 42 43C mp_limb_t M4_function_n (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, 44C mp_size_t size); 45C mp_limb_t M4_function_nc (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, 46C mp_size_t size, mp_limb_t carry); 47 48defframe(PARAM_CARRY,20) 49defframe(PARAM_SIZE, 16) 50defframe(PARAM_SRC2, 12) 51defframe(PARAM_SRC1, 8) 52defframe(PARAM_DST, 4) 53 54 TEXT 55 ALIGN(8) 56PROLOGUE(M4_function_nc) 57 58 pushl %edi 59 pushl %esi 60 pushl %ebx 61 pushl %ebp 62deflit(`FRAME',16) 63 64 movl PARAM_DST,%edi 65 movl PARAM_SRC1,%esi 66 movl PARAM_SRC2,%ebp 67 movl PARAM_SIZE,%ecx 68 69 movl (%ebp),%ebx 70 71 decl %ecx 72 movl %ecx,%edx 73 shrl $3,%ecx 74 andl $7,%edx 75 testl %ecx,%ecx C zero carry flag 76 jz L(endgo) 77 78 pushl %edx 79FRAME_pushl() 80 movl PARAM_CARRY,%eax 81 shrl %eax C shift bit 0 into carry 82 jmp L(oop) 83 84L(endgo): 85deflit(`FRAME',16) 86 movl PARAM_CARRY,%eax 87 shrl %eax C shift bit 0 into carry 88 jmp L(end) 89 90EPILOGUE() 91 92 93 ALIGN(8) 94PROLOGUE(M4_function_n) 95 96 pushl %edi 97 pushl %esi 98 pushl %ebx 99 pushl %ebp 100deflit(`FRAME',16) 101 102 movl PARAM_DST,%edi 103 movl PARAM_SRC1,%esi 104 movl PARAM_SRC2,%ebp 105 movl PARAM_SIZE,%ecx 106 107 movl (%ebp),%ebx 108 109 decl %ecx 110 movl %ecx,%edx 111 shrl $3,%ecx 112 andl $7,%edx 113 testl %ecx,%ecx C zero carry flag 114 jz L(end) 115 pushl %edx 116FRAME_pushl() 117 118 ALIGN(8) 119L(oop): movl 28(%edi),%eax C fetch destination cache line 120 leal 32(%edi),%edi 121 122L(1): movl (%esi),%eax 123 movl 4(%esi),%edx 124 M4_inst %ebx,%eax 125 movl 4(%ebp),%ebx 126 M4_inst %ebx,%edx 127 movl 8(%ebp),%ebx 128 movl %eax,-32(%edi) 129 movl %edx,-28(%edi) 130 131L(2): movl 8(%esi),%eax 132 movl 12(%esi),%edx 133 M4_inst %ebx,%eax 134 movl 12(%ebp),%ebx 135 M4_inst %ebx,%edx 136 movl 16(%ebp),%ebx 137 movl %eax,-24(%edi) 138 movl %edx,-20(%edi) 139 140L(3): movl 16(%esi),%eax 141 movl 20(%esi),%edx 142 M4_inst %ebx,%eax 143 movl 20(%ebp),%ebx 144 M4_inst %ebx,%edx 145 movl 24(%ebp),%ebx 146 movl %eax,-16(%edi) 147 movl %edx,-12(%edi) 148 149L(4): movl 24(%esi),%eax 150 movl 28(%esi),%edx 151 M4_inst %ebx,%eax 152 movl 28(%ebp),%ebx 153 M4_inst %ebx,%edx 154 movl 32(%ebp),%ebx 155 movl %eax,-8(%edi) 156 movl %edx,-4(%edi) 157 158 leal 32(%esi),%esi 159 leal 32(%ebp),%ebp 160 decl %ecx 161 jnz L(oop) 162 163 popl %edx 164FRAME_popl() 165L(end): 166 decl %edx C test %edx w/o clobbering carry 167 js L(end2) 168 incl %edx 169L(oop2): 170 leal 4(%edi),%edi 171 movl (%esi),%eax 172 M4_inst %ebx,%eax 173 movl 4(%ebp),%ebx 174 movl %eax,-4(%edi) 175 leal 4(%esi),%esi 176 leal 4(%ebp),%ebp 177 decl %edx 178 jnz L(oop2) 179L(end2): 180 movl (%esi),%eax 181 M4_inst %ebx,%eax 182 movl %eax,(%edi) 183 184 sbbl %eax,%eax 185 negl %eax 186 187 popl %ebp 188 popl %ebx 189 popl %esi 190 popl %edi 191 ret 192 193EPILOGUE() 194