1dnl X86 mpn_cnd_add_n, mpn_cnd_sub_n 2 3dnl Copyright 2013 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of either: 9dnl 10dnl * the GNU Lesser General Public License as published by the Free 11dnl Software Foundation; either version 3 of the License, or (at your 12dnl option) any later version. 13dnl 14dnl or 15dnl 16dnl * the GNU General Public License as published by the Free Software 17dnl Foundation; either version 2 of the License, or (at your option) any 18dnl later version. 19dnl 20dnl or both in parallel, as here. 21dnl 22dnl The GNU MP Library is distributed in the hope that it will be useful, but 23dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25dnl for more details. 26dnl 27dnl You should have received copies of the GNU General Public License and the 28dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29dnl see https://www.gnu.org/licenses/. 30 31include(`../config.m4') 32 33C cycles/limb 34C P5 ? 35C P6 model 0-8,10-12 ? 36C P6 model 9 (Banias) ? 37C P6 model 13 (Dothan) 5.4 38C P4 model 0-1 (Willamette) ? 39C P4 model 2 (Northwood) 14.5 40C P4 model 3-4 (Prescott) 21 41C Intel atom 11 42C AMD K6 ? 43C AMD K7 3.4 44C AMD K8 ? 45 46 47define(`rp', `%edi') 48define(`up', `%esi') 49define(`vp', `%ebp') 50define(`n', `%ecx') 51define(`cnd', `20(%esp)') 52define(`cy', `%edx') 53 54ifdef(`OPERATION_cnd_add_n', ` 55 define(ADDSUB, add) 56 define(ADCSBB, adc) 57 define(func, mpn_cnd_add_n)') 58ifdef(`OPERATION_cnd_sub_n', ` 59 define(ADDSUB, sub) 60 define(ADCSBB, sbb) 61 define(func, mpn_cnd_sub_n)') 62 63MULFUNC_PROLOGUE(mpn_cnd_add_n mpn_cnd_sub_n) 64 65ASM_START() 66 TEXT 67 ALIGN(16) 68PROLOGUE(func) 69 add $-16, %esp 70 mov %ebp, (%esp) 71 mov %ebx, 4(%esp) 72 mov %esi, 8(%esp) 73 mov %edi, 12(%esp) 74 75 C make cnd into a full mask 76 mov cnd, %eax 77 neg %eax 78 sbb %eax, %eax 79 mov %eax, cnd 80 81 C load parameters into registers 82 mov 24(%esp), rp 83 mov 28(%esp), up 84 mov 32(%esp), vp 85 mov 36(%esp), n 86 87 mov (vp), %eax 88 mov (up), %ebx 89 90 C put operand pointers just beyond their last limb 91 lea (vp,n,4), vp 92 lea (up,n,4), up 93 lea -4(rp,n,4), rp 94 neg n 95 96 and cnd, %eax 97 ADDSUB %eax, %ebx 98 sbb cy, cy 99 inc n 100 je L(end) 101 102 ALIGN(16) 103L(top): mov (vp,n,4), %eax 104 and cnd, %eax 105 mov %ebx, (rp,n,4) 106 mov (up,n,4), %ebx 107 add cy, cy 108 ADCSBB %eax, %ebx 109 sbb cy, cy 110 inc n 111 jne L(top) 112 113L(end): mov %ebx, (rp) 114 xor %eax, %eax 115 sub cy, %eax 116 117 mov (%esp), %ebp 118 mov 4(%esp), %ebx 119 mov 8(%esp), %esi 120 mov 12(%esp), %edi 121 add $16, %esp 122 ret 123EPILOGUE() 124ASM_END() 125