1dnl AMD64 mpn_lshift -- mpn left shift, optimised for Atom. 2 3dnl Contributed to the GNU project by Torbjorn Granlund. 4 5dnl Copyright 2011, 2012 Free Software Foundation, Inc. 6 7dnl This file is part of the GNU MP Library. 8 9dnl The GNU MP Library is free software; you can redistribute it and/or modify 10dnl it under the terms of the GNU Lesser General Public License as published 11dnl by the Free Software Foundation; either version 3 of the License, or (at 12dnl your option) any later version. 13 14dnl The GNU MP Library is distributed in the hope that it will be useful, but 15dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 16dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 17dnl License for more details. 18 19dnl You should have received a copy of the GNU Lesser General Public License 20dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 21 22include(`../config.m4') 23 24C cycles/limb 25C AMD K8,K9 ? 26C AMD K10 ? 27C Intel P4 ? 28C Intel core2 ? 29C Intel NHM ? 30C Intel SBR ? 31C Intel atom 4.5 32C VIA nano ? 33 34C TODO 35C * Consider using 4-way unrolling. We reach 4 c/l, but the code is 2.5 times 36C larger. 37 38C INPUT PARAMETERS 39define(`rp', `%rdi') 40define(`up', `%rsi') 41define(`n', `%rdx') 42define(`cnt', `%rcx') 43 44ABI_SUPPORT(DOS64) 45ABI_SUPPORT(STD64) 46 47ASM_START() 48 TEXT 49 ALIGN(16) 50PROLOGUE(mpn_lshift) 51 FUNC_ENTRY(4) 52 lea -8(up,n,8), up 53 lea -8(rp,n,8), rp 54 shr R32(n) 55 mov (up), %rax 56 jnc L(evn) 57 58 mov %rax, %r11 59 shl R8(%rcx), %r11 60 neg R8(%rcx) 61 shr R8(%rcx), %rax 62 test n, n 63 jnz L(gt1) 64 mov %r11, (rp) 65 FUNC_EXIT() 66 ret 67 68L(gt1): mov -8(up), %r8 69 mov %r8, %r10 70 shr R8(%rcx), %r8 71 jmp L(lo1) 72 73L(evn): mov %rax, %r10 74 neg R8(%rcx) 75 shr R8(%rcx), %rax 76 mov -8(up), %r9 77 mov %r9, %r11 78 shr R8(%rcx), %r9 79 neg R8(%rcx) 80 dec n 81 lea 8(rp), rp 82 lea -8(up), up 83 jz L(end) 84 85 ALIGN(8) 86L(top): shl R8(%rcx), %r10 87 or %r10, %r9 88 shl R8(%rcx), %r11 89 neg R8(%rcx) 90 mov -8(up), %r8 91 mov %r8, %r10 92 mov %r9, -8(rp) 93 shr R8(%rcx), %r8 94 lea -16(rp), rp 95L(lo1): mov -16(up), %r9 96 or %r11, %r8 97 mov %r9, %r11 98 shr R8(%rcx), %r9 99 lea -16(up), up 100 neg R8(%rcx) 101 mov %r8, (rp) 102 dec n 103 jg L(top) 104 105L(end): shl R8(%rcx), %r10 106 or %r10, %r9 107 shl R8(%rcx), %r11 108 mov %r9, -8(rp) 109 mov %r11, -16(rp) 110 FUNC_EXIT() 111 ret 112EPILOGUE() 113