1dnl X86 mpn_cnd_add_n optimised for Intel Atom. 2 3dnl Copyright 2013 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of either: 9dnl 10dnl * the GNU Lesser General Public License as published by the Free 11dnl Software Foundation; either version 3 of the License, or (at your 12dnl option) any later version. 13dnl 14dnl or 15dnl 16dnl * the GNU General Public License as published by the Free Software 17dnl Foundation; either version 2 of the License, or (at your option) any 18dnl later version. 19dnl 20dnl or both in parallel, as here. 21dnl 22dnl The GNU MP Library is distributed in the hope that it will be useful, but 23dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25dnl for more details. 26dnl 27dnl You should have received copies of the GNU General Public License and the 28dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29dnl see https://www.gnu.org/licenses/. 30 31include(`../config.m4') 32 33C cycles/limb 34C P5 ? 35C P6 model 0-8,10-12 ? 36C P6 model 9 (Banias) ? 37C P6 model 13 (Dothan) ? 38C P4 model 0-1 (Willamette) ? 39C P4 model 2 (Northwood) ? 40C P4 model 3-4 (Prescott) ? 41C Intel atom 4.67 42C AMD K6 ? 43C AMD K7 ? 44C AMD K8 ? 45 46 47define(`rp', `%edi') 48define(`up', `%esi') 49define(`vp', `%ebp') 50define(`n', `%ecx') 51define(`cnd', `20(%esp)') 52 53ASM_START() 54 TEXT 55 ALIGN(16) 56PROLOGUE(mpn_cnd_add_n) 57 push %edi 58 push %esi 59 push %ebx 60 push %ebp 61 62 mov cnd, %eax C make cnd into a mask (1) 63 mov 24(%esp), rp 64 neg %eax C make cnd into a mask (1) 65 mov 28(%esp), up 66 sbb %eax, %eax C make cnd into a mask (1) 67 mov 32(%esp), vp 68 mov %eax, cnd C make cnd into a mask (1) 69 mov 36(%esp), n 70 71 xor %edx, %edx 72 73 shr $1, n 74 jnc L(top) 75 76 mov 0(vp), %eax 77 and cnd, %eax 78 lea 4(vp), vp 79 add 0(up), %eax 80 lea 4(rp), rp 81 lea 4(up), up 82 sbb %edx, %edx 83 mov %eax, -4(rp) 84 inc n 85 dec n 86 je L(end) 87 88L(top): sbb %edx, %edx 89 mov 0(vp), %eax 90 and cnd, %eax 91 lea 8(vp), vp 92 lea 8(rp), rp 93 mov -4(vp), %ebx 94 and cnd, %ebx 95 add %edx, %edx 96 adc 0(up), %eax 97 lea 8(up), up 98 mov %eax, -8(rp) 99 adc -4(up), %ebx 100 dec n 101 mov %ebx, -4(rp) 102 jne L(top) 103 104L(end): mov $0, %eax 105 adc %eax, %eax 106 107 pop %ebp 108 pop %ebx 109 pop %esi 110 pop %edi 111 ret 112EPILOGUE() 113ASM_END() 114