1dnl X86 mpn_cnd_sub_n optimised for Intel Atom. 2 3dnl Copyright 2013 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of either: 9dnl 10dnl * the GNU Lesser General Public License as published by the Free 11dnl Software Foundation; either version 3 of the License, or (at your 12dnl option) any later version. 13dnl 14dnl or 15dnl 16dnl * the GNU General Public License as published by the Free Software 17dnl Foundation; either version 2 of the License, or (at your option) any 18dnl later version. 19dnl 20dnl or both in parallel, as here. 21dnl 22dnl The GNU MP Library is distributed in the hope that it will be useful, but 23dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25dnl for more details. 26dnl 27dnl You should have received copies of the GNU General Public License and the 28dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29dnl see https://www.gnu.org/licenses/. 30 31include(`../config.m4') 32 33C cycles/limb 34C P5 ? 35C P6 model 0-8,10-12 ? 36C P6 model 9 (Banias) ? 37C P6 model 13 (Dothan) ? 38C P4 model 0-1 (Willamette) ? 39C P4 model 2 (Northwood) ? 40C P4 model 3-4 (Prescott) ? 41C Intel atom 5.67 42C AMD K6 ? 43C AMD K7 ? 44C AMD K8 ? 45 46 47define(`rp', `%edi') 48define(`up', `%esi') 49define(`vp', `%ebp') 50define(`n', `%ecx') 51define(`cnd', `20(%esp)') 52 53ASM_START() 54 TEXT 55 ALIGN(16) 56PROLOGUE(mpn_cnd_sub_n) 57 push %edi 58 push %esi 59 push %ebx 60 push %ebp 61 62 mov cnd, %eax C make cnd into a mask (1) 63 mov 24(%esp), rp 64 neg %eax C make cnd into a mask (1) 65 mov 28(%esp), up 66 sbb %eax, %eax C make cnd into a mask (1) 67 mov 32(%esp), vp 68 mov %eax, cnd C make cnd into a mask (1) 69 mov 36(%esp), n 70 71 xor %edx, %edx 72 73 inc n 74 shr n 75 jnc L(ent) 76 77 mov 0(vp), %eax 78 and cnd, %eax 79 lea 4(vp), vp 80 mov 0(up), %edx 81 sub %eax, %edx 82 lea 4(rp), rp 83 lea 4(up), up 84 mov %edx, -4(rp) 85 sbb %edx, %edx C save cy 86 87L(ent): mov 0(vp), %ebx 88 and cnd, %ebx 89 add %edx, %edx C restore cy 90 mov 0(up), %edx 91 dec n 92 je L(end) 93 94L(top): sbb %ebx, %edx 95 mov 4(vp), %eax 96 mov %edx, 0(rp) 97 sbb %edx, %edx C save cy 98 mov 8(vp), %ebx 99 lea 8(up), up 100 and cnd, %ebx 101 and cnd, %eax 102 add %edx, %edx C restore cy 103 mov -4(up), %edx 104 lea 8(rp), rp 105 sbb %eax, %edx 106 mov %edx, -4(rp) 107 dec n 108 mov 0(up), %edx 109 lea 8(vp), vp 110 jne L(top) 111 112L(end): sbb %ebx, %edx 113 mov %edx, 0(rp) 114 115 mov $0, %eax 116 adc %eax, %eax 117 118 pop %ebp 119 pop %ebx 120 pop %esi 121 pop %edi 122 ret 123EPILOGUE() 124ASM_END() 125