1dnl AMD64 mpn_rshift -- mpn right shift. 2 3dnl Copyright 2003, 2005, 2009, 2011, 2012 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of either: 9dnl 10dnl * the GNU Lesser General Public License as published by the Free 11dnl Software Foundation; either version 3 of the License, or (at your 12dnl option) any later version. 13dnl 14dnl or 15dnl 16dnl * the GNU General Public License as published by the Free Software 17dnl Foundation; either version 2 of the License, or (at your option) any 18dnl later version. 19dnl 20dnl or both in parallel, as here. 21dnl 22dnl The GNU MP Library is distributed in the hope that it will be useful, but 23dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25dnl for more details. 26dnl 27dnl You should have received copies of the GNU General Public License and the 28dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29dnl see https://www.gnu.org/licenses/. 30 31include(`../config.m4') 32 33 34C cycles/limb 35C AMD K8,K9 2.375 36C AMD K10 2.375 37C Intel P4 8 38C Intel core2 2.11 39C Intel corei ? 40C Intel atom 5.75 41C VIA nano 3.5 42 43 44C INPUT PARAMETERS 45define(`rp', `%rdi') 46define(`up', `%rsi') 47define(`n', `%rdx') 48define(`cnt', `%rcx') 49 50ABI_SUPPORT(DOS64) 51ABI_SUPPORT(STD64) 52 53ASM_START() 54 TEXT 55 ALIGN(32) 56PROLOGUE(mpn_rshift) 57 FUNC_ENTRY(4) 58 neg R32(%rcx) C put rsh count in cl 59 mov (up), %rax 60 shl R8(%rcx), %rax C function return value 61 neg R32(%rcx) C put lsh count in cl 62 63 lea 1(n), R32(%r8) 64 65 lea -8(up,n,8), up 66 lea -8(rp,n,8), rp 67 neg n 68 69 and $3, R32(%r8) 70 je L(rlx) C jump for n = 3, 7, 11, ... 71 72 dec R32(%r8) 73 jne L(1) 74C n = 4, 8, 12, ... 75 mov 8(up,n,8), %r10 76 shr R8(%rcx), %r10 77 neg R32(%rcx) C put rsh count in cl 78 mov 16(up,n,8), %r8 79 shl R8(%rcx), %r8 80 or %r8, %r10 81 mov %r10, 8(rp,n,8) 82 inc n 83 jmp L(rll) 84 85L(1): dec R32(%r8) 86 je L(1x) C jump for n = 1, 5, 9, 13, ... 87C n = 2, 6, 10, 16, ... 88 mov 8(up,n,8), %r10 89 shr R8(%rcx), %r10 90 neg R32(%rcx) C put rsh count in cl 91 mov 16(up,n,8), %r8 92 shl R8(%rcx), %r8 93 or %r8, %r10 94 mov %r10, 8(rp,n,8) 95 inc n 96 neg R32(%rcx) C put lsh count in cl 97L(1x): 98 cmp $-1, n 99 je L(ast) 100 mov 8(up,n,8), %r10 101 shr R8(%rcx), %r10 102 mov 16(up,n,8), %r11 103 shr R8(%rcx), %r11 104 neg R32(%rcx) C put rsh count in cl 105 mov 16(up,n,8), %r8 106 mov 24(up,n,8), %r9 107 shl R8(%rcx), %r8 108 or %r8, %r10 109 shl R8(%rcx), %r9 110 or %r9, %r11 111 mov %r10, 8(rp,n,8) 112 mov %r11, 16(rp,n,8) 113 add $2, n 114 115L(rll): neg R32(%rcx) C put lsh count in cl 116L(rlx): mov 8(up,n,8), %r10 117 shr R8(%rcx), %r10 118 mov 16(up,n,8), %r11 119 shr R8(%rcx), %r11 120 121 add $4, n C 4 122 jb L(end) C 2 123 ALIGN(16) 124L(top): 125 C finish stuff from lsh block 126 neg R32(%rcx) C put rsh count in cl 127 mov -16(up,n,8), %r8 128 mov -8(up,n,8), %r9 129 shl R8(%rcx), %r8 130 or %r8, %r10 131 shl R8(%rcx), %r9 132 or %r9, %r11 133 mov %r10, -24(rp,n,8) 134 mov %r11, -16(rp,n,8) 135 C start two new rsh 136 mov (up,n,8), %r8 137 mov 8(up,n,8), %r9 138 shl R8(%rcx), %r8 139 shl R8(%rcx), %r9 140 141 C finish stuff from rsh block 142 neg R32(%rcx) C put lsh count in cl 143 mov -8(up,n,8), %r10 144 mov 0(up,n,8), %r11 145 shr R8(%rcx), %r10 146 or %r10, %r8 147 shr R8(%rcx), %r11 148 or %r11, %r9 149 mov %r8, -8(rp,n,8) 150 mov %r9, 0(rp,n,8) 151 C start two new lsh 152 mov 8(up,n,8), %r10 153 mov 16(up,n,8), %r11 154 shr R8(%rcx), %r10 155 shr R8(%rcx), %r11 156 157 add $4, n 158 jae L(top) C 2 159L(end): 160 neg R32(%rcx) C put rsh count in cl 161 mov -8(up), %r8 162 shl R8(%rcx), %r8 163 or %r8, %r10 164 mov (up), %r9 165 shl R8(%rcx), %r9 166 or %r9, %r11 167 mov %r10, -16(rp) 168 mov %r11, -8(rp) 169 170 neg R32(%rcx) C put lsh count in cl 171L(ast): mov (up), %r10 172 shr R8(%rcx), %r10 173 mov %r10, (rp) 174 FUNC_EXIT() 175 ret 176EPILOGUE() 177