1dnl AMD64 mpn_lshiftc -- mpn left shift with complement. 2 3dnl Copyright 2003, 2005, 2006, 2009, 2011, 2012 Free Software Foundation, Inc. 4dnl 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or 8dnl modify it under the terms of the GNU Lesser General Public License as 9dnl published by the Free Software Foundation; either version 3 of the 10dnl License, or (at your option) any later version. 11dnl 12dnl The GNU MP Library is distributed in the hope that it will be useful, 13dnl but WITHOUT ANY WARRANTY; without even the implied warranty of 14dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15dnl Lesser General Public License for more details. 16dnl 17dnl You should have received a copy of the GNU Lesser General Public License 18dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 19 20include(`../config.m4') 21 22 23C cycles/limb 24C AMD K8,K9 2.75 25C AMD K10 2.75 26C Intel P4 ? 27C Intel core2 ? 28C Intel corei ? 29C Intel atom ? 30C VIA nano 3.75 31 32 33C INPUT PARAMETERS 34define(`rp', `%rdi') 35define(`up', `%rsi') 36define(`n', `%rdx') 37define(`cnt', `%rcx') 38 39ABI_SUPPORT(DOS64) 40ABI_SUPPORT(STD64) 41 42ASM_START() 43 TEXT 44 ALIGN(32) 45PROLOGUE(mpn_lshiftc) 46 FUNC_ENTRY(4) 47 neg R32(%rcx) C put rsh count in cl 48 mov -8(up,n,8), %rax 49 shr R8(%rcx), %rax C function return value 50 51 neg R32(%rcx) C put lsh count in cl 52 lea 1(n), R32(%r8) 53 and $3, R32(%r8) 54 je L(rlx) C jump for n = 3, 7, 11, ... 55 56 dec R32(%r8) 57 jne L(1) 58C n = 4, 8, 12, ... 59 mov -8(up,n,8), %r10 60 shl R8(%rcx), %r10 61 neg R32(%rcx) C put rsh count in cl 62 mov -16(up,n,8), %r8 63 shr R8(%rcx), %r8 64 or %r8, %r10 65 not %r10 66 mov %r10, -8(rp,n,8) 67 dec n 68 jmp L(rll) 69 70L(1): dec R32(%r8) 71 je L(1x) C jump for n = 1, 5, 9, 13, ... 72C n = 2, 6, 10, 16, ... 73 mov -8(up,n,8), %r10 74 shl R8(%rcx), %r10 75 neg R32(%rcx) C put rsh count in cl 76 mov -16(up,n,8), %r8 77 shr R8(%rcx), %r8 78 or %r8, %r10 79 not %r10 80 mov %r10, -8(rp,n,8) 81 dec n 82 neg R32(%rcx) C put lsh count in cl 83L(1x): 84 cmp $1, n 85 je L(ast) 86 mov -8(up,n,8), %r10 87 shl R8(%rcx), %r10 88 mov -16(up,n,8), %r11 89 shl R8(%rcx), %r11 90 neg R32(%rcx) C put rsh count in cl 91 mov -16(up,n,8), %r8 92 mov -24(up,n,8), %r9 93 shr R8(%rcx), %r8 94 or %r8, %r10 95 shr R8(%rcx), %r9 96 or %r9, %r11 97 not %r10 98 not %r11 99 mov %r10, -8(rp,n,8) 100 mov %r11, -16(rp,n,8) 101 sub $2, n 102 103L(rll): neg R32(%rcx) C put lsh count in cl 104L(rlx): mov -8(up,n,8), %r10 105 shl R8(%rcx), %r10 106 mov -16(up,n,8), %r11 107 shl R8(%rcx), %r11 108 109 sub $4, n C 4 110 jb L(end) C 2 111 ALIGN(16) 112L(top): 113 C finish stuff from lsh block 114 neg R32(%rcx) C put rsh count in cl 115 mov 16(up,n,8), %r8 116 mov 8(up,n,8), %r9 117 shr R8(%rcx), %r8 118 or %r8, %r10 119 shr R8(%rcx), %r9 120 or %r9, %r11 121 not %r10 122 not %r11 123 mov %r10, 24(rp,n,8) 124 mov %r11, 16(rp,n,8) 125 C start two new rsh 126 mov 0(up,n,8), %r8 127 mov -8(up,n,8), %r9 128 shr R8(%rcx), %r8 129 shr R8(%rcx), %r9 130 131 C finish stuff from rsh block 132 neg R32(%rcx) C put lsh count in cl 133 mov 8(up,n,8), %r10 134 mov 0(up,n,8), %r11 135 shl R8(%rcx), %r10 136 or %r10, %r8 137 shl R8(%rcx), %r11 138 or %r11, %r9 139 not %r8 140 not %r9 141 mov %r8, 8(rp,n,8) 142 mov %r9, 0(rp,n,8) 143 C start two new lsh 144 mov -8(up,n,8), %r10 145 mov -16(up,n,8), %r11 146 shl R8(%rcx), %r10 147 shl R8(%rcx), %r11 148 149 sub $4, n 150 jae L(top) C 2 151L(end): 152 neg R32(%rcx) C put rsh count in cl 153 mov 8(up), %r8 154 shr R8(%rcx), %r8 155 or %r8, %r10 156 mov (up), %r9 157 shr R8(%rcx), %r9 158 or %r9, %r11 159 not %r10 160 not %r11 161 mov %r10, 16(rp) 162 mov %r11, 8(rp) 163 164 neg R32(%rcx) C put lsh count in cl 165L(ast): mov (up), %r10 166 shl R8(%rcx), %r10 167 not %r10 168 mov %r10, (rp) 169 FUNC_EXIT() 170 ret 171EPILOGUE() 172