1dnl PowerPC-64 mpn_lshiftc -- rp[] = ~up[] << cnt 2 3dnl Copyright 2003, 2005, 2010 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of the GNU Lesser General Public License as published 9dnl by the Free Software Foundation; either version 3 of the License, or (at 10dnl your option) any later version. 11 12dnl The GNU MP Library is distributed in the hope that it will be useful, but 13dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 15dnl License for more details. 16 17dnl You should have received a copy of the GNU Lesser General Public License 18dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 19 20include(`../config.m4') 21 22C cycles/limb 23C POWER3/PPC630 ? 24C POWER4/PPC970 ? 25C POWER5 2.25 26C POWER6 9.5 27C POWER7 2.15 28 29C TODO 30C * Try to reduce the number of needed live registers 31C * Micro-optimise header code 32C * Keep in synch with lshift.asm and rshift.asm 33 34C INPUT PARAMETERS 35define(`rp', `r3') 36define(`up', `r4') 37define(`n', `r5') 38define(`cnt', `r6') 39 40define(`tnc',`r0') 41define(`u0',`r30') 42define(`u1',`r31') 43define(`retval',`r5') 44 45ASM_START() 46PROLOGUE(mpn_lshiftc) 47 std r31, -8(r1) 48 std r30, -16(r1) 49 subfic tnc, cnt, 64 50 sldi r7, n, 3 C byte count corresponding to n 51 add up, up, r7 C up = up + n 52 add rp, rp, r7 C rp = rp + n 53 rldicl. r30, n, 0,62 C r30 = n & 3, set cr0 54 cmpdi cr6, r30, 2 55 addi r31, n, 3 C compute count... 56 ld r10, -8(up) C load 1st limb for b00...b11 57 srd retval, r10, tnc 58 srdi r31, r31, 2 C ...for ctr 59 mtctr r31 C copy count into ctr 60 beq cr0, L(b00) 61 blt cr6, L(b01) 62 ld r11, -16(up) C load 2nd limb for b10 and b11 63 beq cr6, L(b10) 64 65 ALIGN(16) 66L(b11): sld r8, r10, cnt 67 srd r9, r11, tnc 68 ld u1, -24(up) 69 addi up, up, -24 70 sld r12, r11, cnt 71 srd r7, u1, tnc 72 addi rp, rp, 16 73 bdnz L(gt3) 74 75 nor r11, r8, r9 76 sld r8, u1, cnt 77 nor r8, r8, r8 78 b L(cj3) 79 80 ALIGN(16) 81L(gt3): ld u0, -8(up) 82 nor r11, r8, r9 83 sld r8, u1, cnt 84 srd r9, u0, tnc 85 ld u1, -16(up) 86 nor r10, r12, r7 87 b L(L11) 88 89 ALIGN(32) 90L(b10): sld r12, r10, cnt 91 addi rp, rp, 24 92 srd r7, r11, tnc 93 bdnz L(gt2) 94 95 sld r8, r11, cnt 96 nor r10, r12, r7 97 nor r8, r8, r8 98 b L(cj2) 99 100L(gt2): ld u0, -24(up) 101 sld r8, r11, cnt 102 srd r9, u0, tnc 103 ld u1, -32(up) 104 nor r10, r12, r7 105 sld r12, u0, cnt 106 srd r7, u1, tnc 107 ld u0, -40(up) 108 nor r11, r8, r9 109 addi up, up, -16 110 b L(L10) 111 112 ALIGN(16) 113L(b00): ld u1, -16(up) 114 sld r12, r10, cnt 115 srd r7, u1, tnc 116 ld u0, -24(up) 117 sld r8, u1, cnt 118 srd r9, u0, tnc 119 ld u1, -32(up) 120 nor r10, r12, r7 121 sld r12, u0, cnt 122 srd r7, u1, tnc 123 addi rp, rp, 8 124 bdz L(cj4) 125 126L(gt4): addi up, up, -32 127 ld u0, -8(up) 128 nor r11, r8, r9 129 b L(L00) 130 131 ALIGN(16) 132L(b01): bdnz L(gt1) 133 sld r8, r10, cnt 134 nor r8, r8, r8 135 std r8, -8(rp) 136 b L(ret) 137 138L(gt1): ld u0, -16(up) 139 sld r8, r10, cnt 140 srd r9, u0, tnc 141 ld u1, -24(up) 142 sld r12, u0, cnt 143 srd r7, u1, tnc 144 ld u0, -32(up) 145 nor r11, r8, r9 146 sld r8, u1, cnt 147 srd r9, u0, tnc 148 ld u1, -40(up) 149 addi up, up, -40 150 nor r10, r12, r7 151 bdz L(end) 152 153 ALIGN(32) 154L(top): sld r12, u0, cnt 155 srd r7, u1, tnc 156 ld u0, -8(up) 157 std r11, -8(rp) 158 nor r11, r8, r9 159L(L00): sld r8, u1, cnt 160 srd r9, u0, tnc 161 ld u1, -16(up) 162 std r10, -16(rp) 163 nor r10, r12, r7 164L(L11): sld r12, u0, cnt 165 srd r7, u1, tnc 166 ld u0, -24(up) 167 std r11, -24(rp) 168 nor r11, r8, r9 169L(L10): sld r8, u1, cnt 170 srd r9, u0, tnc 171 ld u1, -32(up) 172 addi up, up, -32 173 std r10, -32(rp) 174 addi rp, rp, -32 175 nor r10, r12, r7 176 bdnz L(top) 177 178 ALIGN(32) 179L(end): sld r12, u0, cnt 180 srd r7, u1, tnc 181 std r11, -8(rp) 182L(cj4): nor r11, r8, r9 183 sld r8, u1, cnt 184 std r10, -16(rp) 185 nor r8, r8, r8 186L(cj3): nor r10, r12, r7 187 std r11, -24(rp) 188L(cj2): std r10, -32(rp) 189 std r8, -40(rp) 190 191L(ret): ld r31, -8(r1) 192 ld r30, -16(r1) 193ifdef(`HAVE_ABI_mode32', 194` srdi r3, retval, 32 195 mr r4, retval 196',` mr r3, retval') 197 blr 198EPILOGUE() 199