1dnl AMD64 mpn_lshiftc -- mpn left shift with complement. 2 3dnl Copyright 2003, 2005, 2006, 2009, 2011, 2012 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of either: 9dnl 10dnl * the GNU Lesser General Public License as published by the Free 11dnl Software Foundation; either version 3 of the License, or (at your 12dnl option) any later version. 13dnl 14dnl or 15dnl 16dnl * the GNU General Public License as published by the Free Software 17dnl Foundation; either version 2 of the License, or (at your option) any 18dnl later version. 19dnl 20dnl or both in parallel, as here. 21dnl 22dnl The GNU MP Library is distributed in the hope that it will be useful, but 23dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25dnl for more details. 26dnl 27dnl You should have received copies of the GNU General Public License and the 28dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29dnl see https://www.gnu.org/licenses/. 30 31include(`../config.m4') 32 33 34C cycles/limb 35C AMD K8,K9 2.75 36C AMD K10 2.75 37C Intel P4 ? 38C Intel core2 ? 39C Intel corei ? 40C Intel atom ? 41C VIA nano 3.75 42 43 44C INPUT PARAMETERS 45define(`rp', `%rdi') 46define(`up', `%rsi') 47define(`n', `%rdx') 48define(`cnt', `%rcx') 49 50ABI_SUPPORT(DOS64) 51ABI_SUPPORT(STD64) 52 53ASM_START() 54 TEXT 55 ALIGN(32) 56PROLOGUE(mpn_lshiftc) 57 FUNC_ENTRY(4) 58 neg R32(%rcx) C put rsh count in cl 59 mov -8(up,n,8), %rax 60 shr R8(%rcx), %rax C function return value 61 62 neg R32(%rcx) C put lsh count in cl 63 lea 1(n), R32(%r8) 64 and $3, R32(%r8) 65 je L(rlx) C jump for n = 3, 7, 11, ... 66 67 dec R32(%r8) 68 jne L(1) 69C n = 4, 8, 12, ... 70 mov -8(up,n,8), %r10 71 shl R8(%rcx), %r10 72 neg R32(%rcx) C put rsh count in cl 73 mov -16(up,n,8), %r8 74 shr R8(%rcx), %r8 75 or %r8, %r10 76 not %r10 77 mov %r10, -8(rp,n,8) 78 dec n 79 jmp L(rll) 80 81L(1): dec R32(%r8) 82 je L(1x) C jump for n = 1, 5, 9, 13, ... 83C n = 2, 6, 10, 16, ... 84 mov -8(up,n,8), %r10 85 shl R8(%rcx), %r10 86 neg R32(%rcx) C put rsh count in cl 87 mov -16(up,n,8), %r8 88 shr R8(%rcx), %r8 89 or %r8, %r10 90 not %r10 91 mov %r10, -8(rp,n,8) 92 dec n 93 neg R32(%rcx) C put lsh count in cl 94L(1x): 95 cmp $1, n 96 je L(ast) 97 mov -8(up,n,8), %r10 98 shl R8(%rcx), %r10 99 mov -16(up,n,8), %r11 100 shl R8(%rcx), %r11 101 neg R32(%rcx) C put rsh count in cl 102 mov -16(up,n,8), %r8 103 mov -24(up,n,8), %r9 104 shr R8(%rcx), %r8 105 or %r8, %r10 106 shr R8(%rcx), %r9 107 or %r9, %r11 108 not %r10 109 not %r11 110 mov %r10, -8(rp,n,8) 111 mov %r11, -16(rp,n,8) 112 sub $2, n 113 114L(rll): neg R32(%rcx) C put lsh count in cl 115L(rlx): mov -8(up,n,8), %r10 116 shl R8(%rcx), %r10 117 mov -16(up,n,8), %r11 118 shl R8(%rcx), %r11 119 120 sub $4, n C 4 121 jb L(end) C 2 122 ALIGN(16) 123L(top): 124 C finish stuff from lsh block 125 neg R32(%rcx) C put rsh count in cl 126 mov 16(up,n,8), %r8 127 mov 8(up,n,8), %r9 128 shr R8(%rcx), %r8 129 or %r8, %r10 130 shr R8(%rcx), %r9 131 or %r9, %r11 132 not %r10 133 not %r11 134 mov %r10, 24(rp,n,8) 135 mov %r11, 16(rp,n,8) 136 C start two new rsh 137 mov 0(up,n,8), %r8 138 mov -8(up,n,8), %r9 139 shr R8(%rcx), %r8 140 shr R8(%rcx), %r9 141 142 C finish stuff from rsh block 143 neg R32(%rcx) C put lsh count in cl 144 mov 8(up,n,8), %r10 145 mov 0(up,n,8), %r11 146 shl R8(%rcx), %r10 147 or %r10, %r8 148 shl R8(%rcx), %r11 149 or %r11, %r9 150 not %r8 151 not %r9 152 mov %r8, 8(rp,n,8) 153 mov %r9, 0(rp,n,8) 154 C start two new lsh 155 mov -8(up,n,8), %r10 156 mov -16(up,n,8), %r11 157 shl R8(%rcx), %r10 158 shl R8(%rcx), %r11 159 160 sub $4, n 161 jae L(top) C 2 162L(end): 163 neg R32(%rcx) C put rsh count in cl 164 mov 8(up), %r8 165 shr R8(%rcx), %r8 166 or %r8, %r10 167 mov (up), %r9 168 shr R8(%rcx), %r9 169 or %r9, %r11 170 not %r10 171 not %r11 172 mov %r10, 16(rp) 173 mov %r11, 8(rp) 174 175 neg R32(%rcx) C put lsh count in cl 176L(ast): mov (up), %r10 177 shl R8(%rcx), %r10 178 not %r10 179 mov %r10, (rp) 180 FUNC_EXIT() 181 ret 182EPILOGUE() 183