1dnl AMD64 mpn_sqr_diag_addlsh1 2 3dnl Contributed to the GNU project by Torbjörn Granlund. 4 5dnl Copyright 2011-2013 Free Software Foundation, Inc. 6 7dnl This file is part of the GNU MP Library. 8dnl 9dnl The GNU MP Library is free software; you can redistribute it and/or modify 10dnl it under the terms of either: 11dnl 12dnl * the GNU Lesser General Public License as published by the Free 13dnl Software Foundation; either version 3 of the License, or (at your 14dnl option) any later version. 15dnl 16dnl or 17dnl 18dnl * the GNU General Public License as published by the Free Software 19dnl Foundation; either version 2 of the License, or (at your option) any 20dnl later version. 21dnl 22dnl or both in parallel, as here. 23dnl 24dnl The GNU MP Library is distributed in the hope that it will be useful, but 25dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27dnl for more details. 28dnl 29dnl You should have received copies of the GNU General Public License and the 30dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31dnl see https://www.gnu.org/licenses/. 32 33include(`../config.m4') 34 35C cycles/limb 36C AMD K8,K9 2.5 37C AMD K10 2.5 38C AMD bull 3.6 39C AMD pile 3.6 40C AMD steam ? 41C AMD bobcat 4 42C AMD jaguar ? 43C Intel P4 11.5 44C Intel core 4 45C Intel NHM 3.6 46C Intel SBR 3.15 47C Intel IBR 3.0 48C Intel HWL 2.6 49C Intel BWL ? 50C Intel atom 14 51C VIA nano 3.5 52 53C When playing with pointers, set this to $2 to fall back to conservative 54C indexing in wind-down code. 55define(`I',`$1') 56 57define(`rp', `%rdi') 58define(`tp', `%rsi') 59define(`up_arg', `%rdx') 60define(`n', `%rcx') 61 62define(`up', `%r11') 63 64ABI_SUPPORT(DOS64) 65ABI_SUPPORT(STD64) 66 67ASM_START() 68 TEXT 69 ALIGN(32) 70PROLOGUE(mpn_sqr_diag_addlsh1) 71 FUNC_ENTRY(4) 72 push %rbx 73 74 dec n 75 shl n 76 77 mov (up_arg), %rax 78 79 lea (rp,n,8), rp 80 lea (tp,n,8), tp 81 lea (up_arg,n,4), up 82 neg n 83 84 mul %rax 85 mov %rax, (rp,n,8) 86 87 xor R32(%rbx), R32(%rbx) 88 jmp L(mid) 89 90 ALIGN(16) 91L(top): add %r10, %r8 92 adc %rax, %r9 93 mov %r8, -8(rp,n,8) 94 mov %r9, (rp,n,8) 95L(mid): mov 8(up,n,4), %rax 96 mov (tp,n,8), %r8 97 mov 8(tp,n,8), %r9 98 adc %r8, %r8 99 adc %r9, %r9 100 lea (%rdx,%rbx), %r10 101 setc R8(%rbx) 102 mul %rax 103 add $2, n 104 js L(top) 105 106L(end): add %r10, %r8 107 adc %rax, %r9 108 mov %r8, I(-8(rp),-8(rp,n,8)) 109 mov %r9, I((rp),(rp,n,8)) 110 adc %rbx, %rdx 111 mov %rdx, I(8(rp),8(rp,n,8)) 112 113 pop %rbx 114 FUNC_EXIT() 115 ret 116EPILOGUE() 117