1dnl Intel P6 mpn_add_n/mpn_sub_n -- mpn add or subtract. 2 3dnl Copyright 2006 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of either: 9dnl 10dnl * the GNU Lesser General Public License as published by the Free 11dnl Software Foundation; either version 3 of the License, or (at your 12dnl option) any later version. 13dnl 14dnl or 15dnl 16dnl * the GNU General Public License as published by the Free Software 17dnl Foundation; either version 2 of the License, or (at your option) any 18dnl later version. 19dnl 20dnl or both in parallel, as here. 21dnl 22dnl The GNU MP Library is distributed in the hope that it will be useful, but 23dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25dnl for more details. 26dnl 27dnl You should have received copies of the GNU General Public License and the 28dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29dnl see https://www.gnu.org/licenses/. 30 31include(`../config.m4') 32 33C TODO: 34C * Avoid indexed addressing, it makes us stall on the two-ported register 35C file. 36 37C cycles/limb 38C P6 model 0-8,10-12 3.17 39C P6 model 9 (Banias) 2.15 40C P6 model 13 (Dothan) 2.25 41 42 43define(`rp', `%edi') 44define(`up', `%esi') 45define(`vp', `%ebx') 46define(`n', `%ecx') 47 48ifdef(`OPERATION_add_n', ` 49 define(ADCSBB, adc) 50 define(func, mpn_add_n) 51 define(func_nc, mpn_add_nc)') 52ifdef(`OPERATION_sub_n', ` 53 define(ADCSBB, sbb) 54 define(func, mpn_sub_n) 55 define(func_nc, mpn_sub_nc)') 56 57MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc) 58 59ASM_START() 60 61 TEXT 62 ALIGN(16) 63 64PROLOGUE(func) 65 xor %edx, %edx 66L(start): 67 push %edi 68 push %esi 69 push %ebx 70 71 mov 16(%esp), rp 72 mov 20(%esp), up 73 mov 24(%esp), vp 74 mov 28(%esp), n 75 76 lea (up,n,4), up 77 lea (vp,n,4), vp 78 lea (rp,n,4), rp 79 80 neg n 81 mov n, %eax 82 and $-8, n 83 and $7, %eax 84 shl $2, %eax C 4x 85ifdef(`PIC',` 86 call L(pic_calc) 87L(here): 88',` 89 lea L(ent) (%eax,%eax,2), %eax C 12x 90') 91 92 shr %edx C set cy flag 93 jmp *%eax 94 95ifdef(`PIC',` 96L(pic_calc): 97 C See mpn/x86/README about old gas bugs 98 lea (%eax,%eax,2), %eax 99 add $L(ent)-L(here), %eax 100 add (%esp), %eax 101 ret_internal 102') 103 104L(end): 105 sbb %eax, %eax 106 neg %eax 107 pop %ebx 108 pop %esi 109 pop %edi 110 ret 111 112 ALIGN(16) 113L(top): 114 jecxz L(end) 115L(ent): 116Zdisp( mov, 0,(up,n,4), %eax) 117Zdisp( ADCSBB, 0,(vp,n,4), %eax) 118Zdisp( mov, %eax, 0,(rp,n,4)) 119 120 mov 4(up,n,4), %edx 121 ADCSBB 4(vp,n,4), %edx 122 mov %edx, 4(rp,n,4) 123 124 mov 8(up,n,4), %eax 125 ADCSBB 8(vp,n,4), %eax 126 mov %eax, 8(rp,n,4) 127 128 mov 12(up,n,4), %edx 129 ADCSBB 12(vp,n,4), %edx 130 mov %edx, 12(rp,n,4) 131 132 mov 16(up,n,4), %eax 133 ADCSBB 16(vp,n,4), %eax 134 mov %eax, 16(rp,n,4) 135 136 mov 20(up,n,4), %edx 137 ADCSBB 20(vp,n,4), %edx 138 mov %edx, 20(rp,n,4) 139 140 mov 24(up,n,4), %eax 141 ADCSBB 24(vp,n,4), %eax 142 mov %eax, 24(rp,n,4) 143 144 mov 28(up,n,4), %edx 145 ADCSBB 28(vp,n,4), %edx 146 mov %edx, 28(rp,n,4) 147 148 lea 8(n), n 149 jmp L(top) 150 151EPILOGUE() 152 153PROLOGUE(func_nc) 154 movl 20(%esp), %edx 155 jmp L(start) 156EPILOGUE() 157