1dnl AMD64 mpn_mul_1 optimised for AMD Bulldozer. 2 3dnl Copyright 2003-2005, 2007, 2008, 2011-2013 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of either: 9dnl 10dnl * the GNU Lesser General Public License as published by the Free 11dnl Software Foundation; either version 3 of the License, or (at your 12dnl option) any later version. 13dnl 14dnl or 15dnl 16dnl * the GNU General Public License as published by the Free Software 17dnl Foundation; either version 2 of the License, or (at your option) any 18dnl later version. 19dnl 20dnl or both in parallel, as here. 21dnl 22dnl The GNU MP Library is distributed in the hope that it will be useful, but 23dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25dnl for more details. 26dnl 27dnl You should have received copies of the GNU General Public License and the 28dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29dnl see https://www.gnu.org/licenses/. 30 31include(`../config.m4') 32 33C cycles/limb 34C AMD K8,K9 35C AMD K10 36C AMD bd1 4 37C AMD bobcat 38C Intel P4 39C Intel core2 40C Intel NHM 41C Intel SBR 42C Intel atom 43C VIA nano 44 45C The loop of this code is the result of running a code generation and 46C optimisation tool suite written by David Harvey and Torbjorn Granlund. 47 48C TODO 49C * Move loop code into feed-in blocks, to save insn for zeroing regs. 50 51define(`rp', `%rdi') C rcx 52define(`up', `%rsi') C rdx 53define(`n_param', `%rdx') C r8 54define(`v0', `%rcx') C r9 55 56define(`n', `%rbx') 57 58ABI_SUPPORT(DOS64) 59ABI_SUPPORT(STD64) 60 61IFDOS(` define(`up', ``%rsi'') ') dnl 62IFDOS(` define(`rp', ``%rcx'') ') dnl 63IFDOS(` define(`v0', ``%r9'') ') dnl 64IFDOS(` define(`r9', ``rdi'') ') dnl 65IFDOS(` define(`n', ``%r8'') ') dnl 66IFDOS(` define(`r8', ``rbx'') ') dnl 67 68ASM_START() 69 TEXT 70 ALIGN(16) 71PROLOGUE(mpn_mul_1c) 72IFDOS(``push %rsi '') 73IFDOS(``push %rdi '') 74IFDOS(``mov %rdx, %rsi '') 75 76 mov (up), %rax C read first u limb early 77 push %rbx 78IFSTD(` mov n_param, %r11 ') C move away n from rdx, mul uses it 79IFDOS(` mov n, %r11 ') 80 mul v0 81 82IFSTD(` add %r8, %rax ') 83IFDOS(` add 64(%rsp), %rax ') C 40 + 3*8 (3 push insns) 84 adc $0, %rdx 85 jmp L(common) 86 87EPILOGUE() 88 89 ALIGN(16) 90PROLOGUE(mpn_mul_1) 91IFDOS(``push %rsi '') 92IFDOS(``push %rdi '') 93IFDOS(``mov %rdx, %rsi '') 94 95 mov (up), %rax C read first u limb early 96 push %rbx 97IFSTD(` mov n_param, %r11 ') C move away n from rdx, mul uses it 98IFDOS(` mov n, %r11 ') 99 mul v0 100 101L(common): 102IFSTD(` mov %r11, n ') 103 104 and $3, R32(%r11) 105 lea -16(rp,n,8), rp 106 jz L(b0) 107 cmp $2, R32(%r11) 108 jb L(b1) 109 jz L(b2) 110 111L(b3): mov %rax, %r10 112 mov %rdx, %r11 113 mov 8(up), %rax 114 mul v0 115 lea (up,n,8), up 116 not n 117 jmp L(L3) 118 119L(b0): mov %rax, %r9 120 mov %rdx, %r10 121 mov 8(up), %rax 122 lea (up,n,8), up 123 neg n 124 jmp L(L0) 125 126L(b1): mov %rax, %r8 127 cmp $1, n 128 jz L(n1) 129 mov %rdx, %r9 130 lea (up,n,8), up 131 neg n 132 mov %r8, 16(rp,n,8) 133 inc n 134 jmp L(L1) 135 136L(b2): mov %rax, %r11 137 mov %rdx, %r8 138 mov 8(up), %rax 139 lea (up,n,8), up 140 neg n 141 add $2, n 142 jns L(end) 143 144 ALIGN(16) 145L(top): mul v0 146 mov %rdx, %r9 147 add %rax, %r8 148 adc $0, %r9 149 mov %r8, 8(rp,n,8) 150 mov %r11, (rp,n,8) 151L(L1): mov (up,n,8), %rax 152 mul v0 153 add %rax, %r9 154 mov %rdx, %r10 155 mov 8(up,n,8), %rax 156 adc $0, %r10 157L(L0): mul v0 158 add %rax, %r10 159 mov %rdx, %r11 160 mov 16(up,n,8), %rax 161 adc $0, %r11 162 mul v0 163 mov %r9, 16(rp,n,8) 164L(L3): add %rax, %r11 165 mov %r10, 24(rp,n,8) 166 mov %rdx, %r8 167 adc $0, %r8 168 add $4, n 169 mov -8(up,n,8), %rax 170 js L(top) 171 172L(end): mul v0 173 add %rax, %r8 174 adc $0, %rdx 175 mov %r11, (rp) 176L(n1): mov %r8, 8(rp) 177 mov %rdx, %rax 178 179 pop %rbx 180IFDOS(``pop %rdi '') 181IFDOS(``pop %rsi '') 182 ret 183EPILOGUE() 184ASM_END() 185