1dnl PowerPC-64 mpn_lshiftc -- rp[] = ~up[] << cnt 2 3dnl Copyright 2003, 2005, 2010 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of either: 9dnl 10dnl * the GNU Lesser General Public License as published by the Free 11dnl Software Foundation; either version 3 of the License, or (at your 12dnl option) any later version. 13dnl 14dnl or 15dnl 16dnl * the GNU General Public License as published by the Free Software 17dnl Foundation; either version 2 of the License, or (at your option) any 18dnl later version. 19dnl 20dnl or both in parallel, as here. 21dnl 22dnl The GNU MP Library is distributed in the hope that it will be useful, but 23dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25dnl for more details. 26dnl 27dnl You should have received copies of the GNU General Public License and the 28dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29dnl see https://www.gnu.org/licenses/. 30 31include(`../config.m4') 32 33C cycles/limb 34C POWER3/PPC630 ? 35C POWER4/PPC970 ? 36C POWER5 2.25 37C POWER6 9.5 38C POWER7 2.15 39 40C TODO 41C * Try to reduce the number of needed live registers 42C * Micro-optimise header code 43C * Keep in synch with lshift.asm and rshift.asm 44C * Could the long-scheduled std insns be less scheduled? 45 46C INPUT PARAMETERS 47define(`rp', `r3') 48define(`up', `r4') 49define(`n', `r5') 50define(`cnt', `r6') 51 52define(`tnc',`r0') 53define(`u0',`r30') 54define(`u1',`r31') 55define(`retval',`r5') 56 57ASM_START() 58PROLOGUE(mpn_lshiftc) 59 std r31, -8(r1) 60 std r30, -16(r1) 61 subfic tnc, cnt, 64 62 sldi r7, n, 3 C byte count corresponding to n 63 add up, up, r7 C up = up + n 64 add rp, rp, r7 C rp = rp + n 65 rldicl. r30, n, 0,62 C r30 = n & 3, set cr0 66 cmpdi cr6, r30, 2 67 addi r31, n, 3 C compute count... 68 ld r10, -8(up) C load 1st limb for b00...b11 69 srd retval, r10, tnc 70 srdi r31, r31, 2 C ...for ctr 71 mtctr r31 C copy count into ctr 72 beq cr0, L(b00) 73 blt cr6, L(b01) 74 ld r11, -16(up) C load 2nd limb for b10 and b11 75 beq cr6, L(b10) 76 77 ALIGN(16) 78L(b11): sld r8, r10, cnt 79 srd r9, r11, tnc 80 ld u1, -24(up) 81 addi up, up, -24 82 sld r12, r11, cnt 83 srd r7, u1, tnc 84 addi rp, rp, 16 85 bdnz L(gt3) 86 87 nor r11, r8, r9 88 sld r8, u1, cnt 89 nor r8, r8, r8 90 b L(cj3) 91 92 ALIGN(16) 93L(gt3): ld u0, -8(up) 94 nor r11, r8, r9 95 sld r8, u1, cnt 96 srd r9, u0, tnc 97 ld u1, -16(up) 98 nor r10, r12, r7 99 b L(L11) 100 101 ALIGN(32) 102L(b10): sld r12, r10, cnt 103 addi rp, rp, 24 104 srd r7, r11, tnc 105 bdnz L(gt2) 106 107 sld r8, r11, cnt 108 nor r10, r12, r7 109 nor r8, r8, r8 110 b L(cj2) 111 112L(gt2): ld u0, -24(up) 113 sld r8, r11, cnt 114 srd r9, u0, tnc 115 ld u1, -32(up) 116 nor r10, r12, r7 117 sld r12, u0, cnt 118 srd r7, u1, tnc 119 ld u0, -40(up) 120 nor r11, r8, r9 121 addi up, up, -16 122 b L(L10) 123 124 ALIGN(16) 125L(b00): ld u1, -16(up) 126 sld r12, r10, cnt 127 srd r7, u1, tnc 128 ld u0, -24(up) 129 sld r8, u1, cnt 130 srd r9, u0, tnc 131 ld u1, -32(up) 132 nor r10, r12, r7 133 sld r12, u0, cnt 134 srd r7, u1, tnc 135 addi rp, rp, 8 136 bdz L(cj4) 137 138L(gt4): addi up, up, -32 139 ld u0, -8(up) 140 nor r11, r8, r9 141 b L(L00) 142 143 ALIGN(16) 144L(b01): bdnz L(gt1) 145 sld r8, r10, cnt 146 nor r8, r8, r8 147 std r8, -8(rp) 148 b L(ret) 149 150L(gt1): ld u0, -16(up) 151 sld r8, r10, cnt 152 srd r9, u0, tnc 153 ld u1, -24(up) 154 sld r12, u0, cnt 155 srd r7, u1, tnc 156 ld u0, -32(up) 157 nor r11, r8, r9 158 sld r8, u1, cnt 159 srd r9, u0, tnc 160 ld u1, -40(up) 161 addi up, up, -40 162 nor r10, r12, r7 163 bdz L(end) 164 165 ALIGN(32) 166L(top): sld r12, u0, cnt 167 srd r7, u1, tnc 168 ld u0, -8(up) 169 std r11, -8(rp) 170 nor r11, r8, r9 171L(L00): sld r8, u1, cnt 172 srd r9, u0, tnc 173 ld u1, -16(up) 174 std r10, -16(rp) 175 nor r10, r12, r7 176L(L11): sld r12, u0, cnt 177 srd r7, u1, tnc 178 ld u0, -24(up) 179 std r11, -24(rp) 180 nor r11, r8, r9 181L(L10): sld r8, u1, cnt 182 srd r9, u0, tnc 183 ld u1, -32(up) 184 addi up, up, -32 185 std r10, -32(rp) 186 addi rp, rp, -32 187 nor r10, r12, r7 188 bdnz L(top) 189 190 ALIGN(32) 191L(end): sld r12, u0, cnt 192 srd r7, u1, tnc 193 std r11, -8(rp) 194L(cj4): nor r11, r8, r9 195 sld r8, u1, cnt 196 std r10, -16(rp) 197 nor r8, r8, r8 198L(cj3): nor r10, r12, r7 199 std r11, -24(rp) 200L(cj2): std r10, -32(rp) 201 std r8, -40(rp) 202 203L(ret): ld r31, -8(r1) 204 ld r30, -16(r1) 205ifdef(`HAVE_ABI_mode32', 206` srdi r3, retval, 32 207 mr r4, retval 208',` mr r3, retval') 209 blr 210EPILOGUE() 211