1dnl Intel Atom mpn_lshift -- mpn left shift. 2 3dnl Copyright 2011 Free Software Foundation, Inc. 4 5dnl Contributed to the GNU project by Torbjorn Granlund and Marco Bodrato. 6 7dnl This file is part of the GNU MP Library. 8dnl 9dnl The GNU MP Library is free software; you can redistribute it and/or 10dnl modify it under the terms of the GNU Lesser General Public License as 11dnl published by the Free Software Foundation; either version 3 of the 12dnl License, or (at your option) any later version. 13dnl 14dnl The GNU MP Library is distributed in the hope that it will be useful, 15dnl but WITHOUT ANY WARRANTY; without even the implied warranty of 16dnl MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17dnl Lesser General Public License for more details. 18dnl 19dnl You should have received a copy of the GNU Lesser General Public License 20dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 21 22include(`../config.m4') 23 24C mp_limb_t mpn_lshift (mp_ptr dst, mp_srcptr src, mp_size_t size, 25C unsigned cnt); 26 27C cycles/limb 28C cnt!=1 cnt==1 29C P5 30C P6 model 0-8,10-12 31C P6 model 9 (Banias) 32C P6 model 13 (Dothan) 33C P4 model 0 (Willamette) 34C P4 model 1 (?) 35C P4 model 2 (Northwood) 36C P4 model 3 (Prescott) 37C P4 model 4 (Nocona) 38C Intel Atom 5 2.5 39C AMD K6 40C AMD K7 41C AMD K8 42C AMD K10 43 44defframe(PARAM_CNT, 16) 45defframe(PARAM_SIZE,12) 46defframe(PARAM_SRC, 8) 47defframe(PARAM_DST, 4) 48 49dnl re-use parameter space 50define(SAVE_UP,`PARAM_CNT') 51define(VAR_COUNT,`PARAM_SIZE') 52define(SAVE_EBX,`PARAM_SRC') 53define(SAVE_EBP,`PARAM_DST') 54 55define(`rp', `%edi') 56define(`up', `%esi') 57define(`cnt', `%ecx') 58 59ASM_START() 60 TEXT 61 ALIGN(8) 62deflit(`FRAME',0) 63PROLOGUE(mpn_lshift) 64 mov PARAM_CNT, cnt 65 mov PARAM_SIZE, %edx 66 mov up, SAVE_UP 67 mov PARAM_SRC, up 68 push rp FRAME_pushl() 69 mov PARAM_DST, rp 70 71C We can use faster code for shift-by-1 under certain conditions. 72 cmp $1,cnt 73 jne L(normal) 74 cmpl rp, up 75 jnc L(special) C jump if s_ptr + 1 >= res_ptr 76 leal (up,%edx,4),%eax 77 cmpl %eax,rp 78 jnc L(special) C jump if res_ptr >= s_ptr + size 79 80L(normal): 81 lea -4(up,%edx,4), up 82 mov %ebx, SAVE_EBX 83 lea -4(rp,%edx,4), rp 84 85 shr %edx 86 mov (up), %eax 87 mov %edx, VAR_COUNT 88 jnc L(evn) 89 90 mov %eax, %ebx 91 shl %cl, %ebx 92 neg cnt 93 shr %cl, %eax 94 test %edx, %edx 95 jnz L(gt1) 96 mov %ebx, (rp) 97 jmp L(quit) 98 99L(gt1): mov %ebp, SAVE_EBP 100 push %eax 101 mov -4(up), %eax 102 mov %eax, %ebp 103 shr %cl, %eax 104 jmp L(lo1) 105 106L(evn): mov %ebp, SAVE_EBP 107 neg cnt 108 mov %eax, %ebp 109 mov -4(up), %edx 110 shr %cl, %eax 111 mov %edx, %ebx 112 shr %cl, %edx 113 neg cnt 114 decl VAR_COUNT 115 lea 4(rp), rp 116 lea -4(up), up 117 jz L(end) 118 push %eax FRAME_pushl() 119 120 ALIGN(8) 121L(top): shl %cl, %ebp 122 or %ebp, %edx 123 shl %cl, %ebx 124 neg cnt 125 mov -4(up), %eax 126 mov %eax, %ebp 127 mov %edx, -4(rp) 128 shr %cl, %eax 129 lea -8(rp), rp 130L(lo1): mov -8(up), %edx 131 or %ebx, %eax 132 mov %edx, %ebx 133 shr %cl, %edx 134 lea -8(up), up 135 neg cnt 136 mov %eax, (rp) 137 decl VAR_COUNT 138 jg L(top) 139 140 pop %eax FRAME_popl() 141L(end): 142 shl %cl, %ebp 143 shl %cl, %ebx 144 or %ebp, %edx 145 mov SAVE_EBP, %ebp 146 mov %edx, -4(rp) 147 mov %ebx, -8(rp) 148 149L(quit): 150 mov SAVE_UP, up 151 mov SAVE_EBX, %ebx 152 pop rp FRAME_popl() 153 ret 154 155L(special): 156deflit(`FRAME',4) 157 lea 3(%edx), %eax C size + 3 158 dec %edx C size - 1 159 mov (up), %ecx 160 shr $2, %eax C (size + 3) / 4 161 and $3, %edx C (size - 1) % 4 162 jz L(goloop) C jmp if size == 1 (mod 4) 163 shr %edx 164 jnc L(odd) C jum if size == 3 (mod 4) 165 166 add %ecx, %ecx 167 lea 4(up), up 168 mov %ecx, (rp) 169 mov (up), %ecx 170 lea 4(rp), rp 171 172 dec %edx 173 jnz L(goloop) C jump if size == 0 (mod 4) 174L(odd): lea -8(up), up 175 lea -8(rp), rp 176 jmp L(sentry) C reached if size == 2 or 3 (mod 4) 177 178L(sloop): 179 adc %ecx, %ecx 180 mov 4(up), %edx 181 mov %ecx, (rp) 182 adc %edx, %edx 183 mov 8(up), %ecx 184 mov %edx, 4(rp) 185L(sentry): 186 adc %ecx, %ecx 187 mov 12(up), %edx 188 mov %ecx, 8(rp) 189 adc %edx, %edx 190 lea 16(up), up 191 mov %edx, 12(rp) 192 lea 16(rp), rp 193 mov (up), %ecx 194L(goloop): 195 decl %eax 196 jnz L(sloop) 197 198L(squit): 199 adc %ecx, %ecx 200 mov %ecx, (rp) 201 adc %eax, %eax 202 203 mov SAVE_UP, up 204 pop rp FRAME_popl() 205 ret 206EPILOGUE() 207ASM_END() 208