1dnl AMD64 mpn_lshift -- mpn left shift. 2 3dnl Copyright 2003, 2005, 2007, 2009, 2011, 2012, 2018 Free Software 4dnl Foundation, Inc. 5 6dnl This file is part of the GNU MP Library. 7dnl 8dnl The GNU MP Library is free software; you can redistribute it and/or modify 9dnl it under the terms of either: 10dnl 11dnl * the GNU Lesser General Public License as published by the Free 12dnl Software Foundation; either version 3 of the License, or (at your 13dnl option) any later version. 14dnl 15dnl or 16dnl 17dnl * the GNU General Public License as published by the Free Software 18dnl Foundation; either version 2 of the License, or (at your option) any 19dnl later version. 20dnl 21dnl or both in parallel, as here. 22dnl 23dnl The GNU MP Library is distributed in the hope that it will be useful, but 24dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 25dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 26dnl for more details. 27dnl 28dnl You should have received copies of the GNU General Public License and the 29dnl GNU Lesser General Public License along with the GNU MP Library. If not, 30dnl see https://www.gnu.org/licenses/. 31 32include(`../config.m4') 33 34 35C cycles/limb cycles/limb cnt=1 36C AMD K8,K9 2.375 1.375 37C AMD K10 2.375 1.375 38C Intel P4 8 10.5 39C Intel core2 2.11 4.28 40C Intel corei ? ? 41C Intel atom 5.75 3.5 42C VIA nano 3.5 2.25 43 44 45C INPUT PARAMETERS 46define(`rp', `%rdi') 47define(`up', `%rsi') 48define(`n', `%rdx') 49define(`cnt', `%rcx') 50 51ABI_SUPPORT(DOS64) 52ABI_SUPPORT(STD64) 53 54ASM_START() 55 TEXT 56 ALIGN(32) 57PROLOGUE(mpn_lshift) 58 FUNC_ENTRY(4) 59 neg R32(%rcx) C put rsh count in cl 60 mov -8(up,n,8), %rax 61 shr R8(%rcx), %rax C function return value 62 63 neg R32(%rcx) C put lsh count in cl 64 lea 1(n), R32(%r8) 65 and $3, R32(%r8) 66 je L(rlx) C jump for n = 3, 7, 11, ... 67 68 dec R32(%r8) 69 jne L(1) 70C n = 4, 8, 12, ... 71 mov -8(up,n,8), %r10 72 shl R8(%rcx), %r10 73 neg R32(%rcx) C put rsh count in cl 74 mov -16(up,n,8), %r8 75 shr R8(%rcx), %r8 76 or %r8, %r10 77 mov %r10, -8(rp,n,8) 78 dec n 79 jmp L(rll) 80 81L(1): dec R32(%r8) 82 je L(1x) C jump for n = 1, 5, 9, 13, ... 83C n = 2, 6, 10, 16, ... 84 mov -8(up,n,8), %r10 85 shl R8(%rcx), %r10 86 neg R32(%rcx) C put rsh count in cl 87 mov -16(up,n,8), %r8 88 shr R8(%rcx), %r8 89 or %r8, %r10 90 mov %r10, -8(rp,n,8) 91 dec n 92 neg R32(%rcx) C put lsh count in cl 93L(1x): 94 cmp $1, n 95 je L(ast) 96 mov -8(up,n,8), %r10 97 shl R8(%rcx), %r10 98 mov -16(up,n,8), %r11 99 shl R8(%rcx), %r11 100 neg R32(%rcx) C put rsh count in cl 101 mov -16(up,n,8), %r8 102 mov -24(up,n,8), %r9 103 shr R8(%rcx), %r8 104 or %r8, %r10 105 shr R8(%rcx), %r9 106 or %r9, %r11 107 mov %r10, -8(rp,n,8) 108 mov %r11, -16(rp,n,8) 109 sub $2, n 110 111L(rll): neg R32(%rcx) C put lsh count in cl 112L(rlx): mov -8(up,n,8), %r10 113 shl R8(%rcx), %r10 114 mov -16(up,n,8), %r11 115 shl R8(%rcx), %r11 116 117 sub $4, n C 4 118 jb L(end) C 2 119 ALIGN(16) 120L(top): 121 C finish stuff from lsh block 122 neg R32(%rcx) C put rsh count in cl 123 mov 16(up,n,8), %r8 124 mov 8(up,n,8), %r9 125 shr R8(%rcx), %r8 126 or %r8, %r10 127 shr R8(%rcx), %r9 128 or %r9, %r11 129 mov %r10, 24(rp,n,8) 130 mov %r11, 16(rp,n,8) 131 C start two new rsh 132 mov 0(up,n,8), %r8 133 mov -8(up,n,8), %r9 134 shr R8(%rcx), %r8 135 shr R8(%rcx), %r9 136 137 C finish stuff from rsh block 138 neg R32(%rcx) C put lsh count in cl 139 mov 8(up,n,8), %r10 140 mov 0(up,n,8), %r11 141 shl R8(%rcx), %r10 142 or %r10, %r8 143 shl R8(%rcx), %r11 144 or %r11, %r9 145 mov %r8, 8(rp,n,8) 146 mov %r9, 0(rp,n,8) 147 C start two new lsh 148 mov -8(up,n,8), %r10 149 mov -16(up,n,8), %r11 150 shl R8(%rcx), %r10 151 shl R8(%rcx), %r11 152 153 sub $4, n 154 jae L(top) C 2 155L(end): 156 neg R32(%rcx) C put rsh count in cl 157 mov 8(up), %r8 158 shr R8(%rcx), %r8 159 or %r8, %r10 160 mov (up), %r9 161 shr R8(%rcx), %r9 162 or %r9, %r11 163 mov %r10, 16(rp) 164 mov %r11, 8(rp) 165 166 neg R32(%rcx) C put lsh count in cl 167L(ast): mov (up), %r10 168 shl R8(%rcx), %r10 169 mov %r10, (rp) 170 FUNC_EXIT() 171 ret 172EPILOGUE() 173