1dnl AMD64 mpn_rshift -- mpn right shift. 2 3dnl Copyright 2003, 2005, 2009, 2011, 2012 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of the GNU Lesser General Public License as published 9dnl by the Free Software Foundation; either version 3 of the License, or (at 10dnl your option) any later version. 11 12dnl The GNU MP Library is distributed in the hope that it will be useful, but 13dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 15dnl License for more details. 16 17dnl You should have received a copy of the GNU Lesser General Public License 18dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 19 20include(`../config.m4') 21 22 23C cycles/limb 24C AMD K8,K9 2.375 25C AMD K10 2.375 26C Intel P4 8 27C Intel core2 2.11 28C Intel corei ? 29C Intel atom 5.75 30C VIA nano 3.5 31 32 33C INPUT PARAMETERS 34define(`rp', `%rdi') 35define(`up', `%rsi') 36define(`n', `%rdx') 37define(`cnt', `%rcx') 38 39ABI_SUPPORT(DOS64) 40ABI_SUPPORT(STD64) 41 42ASM_START() 43 TEXT 44 ALIGN(32) 45PROLOGUE(mpn_rshift) 46 FUNC_ENTRY(4) 47 neg R32(%rcx) C put rsh count in cl 48 mov (up), %rax 49 shl R8(%rcx), %rax C function return value 50 neg R32(%rcx) C put lsh count in cl 51 52 lea 1(n), R32(%r8) 53 54 lea -8(up,n,8), up 55 lea -8(rp,n,8), rp 56 neg n 57 58 and $3, R32(%r8) 59 je L(rlx) C jump for n = 3, 7, 11, ... 60 61 dec R32(%r8) 62 jne L(1) 63C n = 4, 8, 12, ... 64 mov 8(up,n,8), %r10 65 shr R8(%rcx), %r10 66 neg R32(%rcx) C put rsh count in cl 67 mov 16(up,n,8), %r8 68 shl R8(%rcx), %r8 69 or %r8, %r10 70 mov %r10, 8(rp,n,8) 71 inc n 72 jmp L(rll) 73 74L(1): dec R32(%r8) 75 je L(1x) C jump for n = 1, 5, 9, 13, ... 76C n = 2, 6, 10, 16, ... 77 mov 8(up,n,8), %r10 78 shr R8(%rcx), %r10 79 neg R32(%rcx) C put rsh count in cl 80 mov 16(up,n,8), %r8 81 shl R8(%rcx), %r8 82 or %r8, %r10 83 mov %r10, 8(rp,n,8) 84 inc n 85 neg R32(%rcx) C put lsh count in cl 86L(1x): 87 cmp $-1, n 88 je L(ast) 89 mov 8(up,n,8), %r10 90 shr R8(%rcx), %r10 91 mov 16(up,n,8), %r11 92 shr R8(%rcx), %r11 93 neg R32(%rcx) C put rsh count in cl 94 mov 16(up,n,8), %r8 95 mov 24(up,n,8), %r9 96 shl R8(%rcx), %r8 97 or %r8, %r10 98 shl R8(%rcx), %r9 99 or %r9, %r11 100 mov %r10, 8(rp,n,8) 101 mov %r11, 16(rp,n,8) 102 add $2, n 103 104L(rll): neg R32(%rcx) C put lsh count in cl 105L(rlx): mov 8(up,n,8), %r10 106 shr R8(%rcx), %r10 107 mov 16(up,n,8), %r11 108 shr R8(%rcx), %r11 109 110 add $4, n C 4 111 jb L(end) C 2 112 ALIGN(16) 113L(top): 114 C finish stuff from lsh block 115 neg R32(%rcx) C put rsh count in cl 116 mov -16(up,n,8), %r8 117 mov -8(up,n,8), %r9 118 shl R8(%rcx), %r8 119 or %r8, %r10 120 shl R8(%rcx), %r9 121 or %r9, %r11 122 mov %r10, -24(rp,n,8) 123 mov %r11, -16(rp,n,8) 124 C start two new rsh 125 mov (up,n,8), %r8 126 mov 8(up,n,8), %r9 127 shl R8(%rcx), %r8 128 shl R8(%rcx), %r9 129 130 C finish stuff from rsh block 131 neg R32(%rcx) C put lsh count in cl 132 mov -8(up,n,8), %r10 133 mov 0(up,n,8), %r11 134 shr R8(%rcx), %r10 135 or %r10, %r8 136 shr R8(%rcx), %r11 137 or %r11, %r9 138 mov %r8, -8(rp,n,8) 139 mov %r9, 0(rp,n,8) 140 C start two new lsh 141 mov 8(up,n,8), %r10 142 mov 16(up,n,8), %r11 143 shr R8(%rcx), %r10 144 shr R8(%rcx), %r11 145 146 add $4, n 147 jae L(top) C 2 148L(end): 149 neg R32(%rcx) C put rsh count in cl 150 mov -8(up), %r8 151 shl R8(%rcx), %r8 152 or %r8, %r10 153 mov (up), %r9 154 shl R8(%rcx), %r9 155 or %r9, %r11 156 mov %r10, -16(rp) 157 mov %r11, -8(rp) 158 159 neg R32(%rcx) C put lsh count in cl 160L(ast): mov (up), %r10 161 shr R8(%rcx), %r10 162 mov %r10, (rp) 163 FUNC_EXIT() 164 ret 165EPILOGUE() 166