1dnl PowerPC-64 mpn_rshift -- rp[] = up[] >> cnt 2 3dnl Copyright 2003, 2005, 2010, 2011 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of either: 9dnl 10dnl * the GNU Lesser General Public License as published by the Free 11dnl Software Foundation; either version 3 of the License, or (at your 12dnl option) any later version. 13dnl 14dnl or 15dnl 16dnl * the GNU General Public License as published by the Free Software 17dnl Foundation; either version 2 of the License, or (at your option) any 18dnl later version. 19dnl 20dnl or both in parallel, as here. 21dnl 22dnl The GNU MP Library is distributed in the hope that it will be useful, but 23dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25dnl for more details. 26dnl 27dnl You should have received copies of the GNU General Public License and the 28dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29dnl see https://www.gnu.org/licenses/. 30 31include(`../config.m4') 32 33C cycles/limb 34C POWER3/PPC630 ? 35C POWER4/PPC970 ? 36C POWER5 2.25 37C POWER6 9.75 38C POWER7 2.15 39 40C TODO 41C * Try to reduce the number of needed live registers 42C * Micro-optimise header code 43C * Keep in synch with lshift.asm and lshiftc.asm 44 45C INPUT PARAMETERS 46define(`rp', `r3') 47define(`up', `r4') 48define(`n', `r5') 49define(`cnt', `r6') 50 51define(`tnc',`r0') 52define(`u0',`r30') 53define(`u1',`r31') 54define(`retval',`r5') 55 56ASM_START() 57PROLOGUE(mpn_rshift) 58 std r31, -8(r1) 59 std r30, -16(r1) 60 subfic tnc, cnt, 64 61C sldi r30, n, 3 C byte count corresponding to n 62C add rp, rp, r30 C rp = rp + n 63C add up, up, r30 C up = up + n 64 rldicl. r30, n, 0,62 C r30 = n & 3, set cr0 65 cmpdi cr6, r30, 2 66 addi r31, n, 3 C compute count... 67 ld r10, 0(up) C load 1st limb for b00...b11 68 sld retval, r10, tnc 69ifdef(`HAVE_ABI_mode32', 70` rldicl r31, r31, 62,34', C ...branch count 71` srdi r31, r31, 2') C ...for ctr 72 mtctr r31 C copy count into ctr 73 beq cr0, L(b00) 74 blt cr6, L(b01) 75 ld r11, 8(up) C load 2nd limb for b10 and b11 76 beq cr6, L(b10) 77 78 ALIGN(16) 79L(b11): srd r8, r10, cnt 80 sld r9, r11, tnc 81 ld u1, 16(up) 82 addi up, up, 24 83 srd r12, r11, cnt 84 sld r7, u1, tnc 85 addi rp, rp, -16 86 bdnz L(gt3) 87 88 or r11, r8, r9 89 srd r8, u1, cnt 90 b L(cj3) 91 92 ALIGN(16) 93L(gt3): ld u0, 0(up) 94 or r11, r8, r9 95 srd r8, u1, cnt 96 sld r9, u0, tnc 97 ld u1, 8(up) 98 or r10, r12, r7 99 b L(L11) 100 101 ALIGN(32) 102L(b10): srd r12, r10, cnt 103 addi rp, rp, -24 104 sld r7, r11, tnc 105 bdnz L(gt2) 106 107 srd r8, r11, cnt 108 or r10, r12, r7 109 b L(cj2) 110 111L(gt2): ld u0, 16(up) 112 srd r8, r11, cnt 113 sld r9, u0, tnc 114 ld u1, 24(up) 115 or r10, r12, r7 116 srd r12, u0, cnt 117 sld r7, u1, tnc 118 ld u0, 32(up) 119 or r11, r8, r9 120 addi up, up, 16 121 b L(L10) 122 123 ALIGN(16) 124L(b00): ld u1, 8(up) 125 srd r12, r10, cnt 126 sld r7, u1, tnc 127 ld u0, 16(up) 128 srd r8, u1, cnt 129 sld r9, u0, tnc 130 ld u1, 24(up) 131 or r10, r12, r7 132 srd r12, u0, cnt 133 sld r7, u1, tnc 134 addi rp, rp, -8 135 bdz L(cj4) 136 137L(gt4): addi up, up, 32 138 ld u0, 0(up) 139 or r11, r8, r9 140 b L(L00) 141 142 ALIGN(16) 143L(b01): bdnz L(gt1) 144 srd r8, r10, cnt 145 std r8, 0(rp) 146 b L(ret) 147 148L(gt1): ld u0, 8(up) 149 srd r8, r10, cnt 150 sld r9, u0, tnc 151 ld u1, 16(up) 152 srd r12, u0, cnt 153 sld r7, u1, tnc 154 ld u0, 24(up) 155 or r11, r8, r9 156 srd r8, u1, cnt 157 sld r9, u0, tnc 158 ld u1, 32(up) 159 addi up, up, 40 160 or r10, r12, r7 161 bdz L(end) 162 163 ALIGN(32) 164L(top): srd r12, u0, cnt 165 sld r7, u1, tnc 166 ld u0, 0(up) 167 std r11, 0(rp) 168 or r11, r8, r9 169L(L00): srd r8, u1, cnt 170 sld r9, u0, tnc 171 ld u1, 8(up) 172 std r10, 8(rp) 173 or r10, r12, r7 174L(L11): srd r12, u0, cnt 175 sld r7, u1, tnc 176 ld u0, 16(up) 177 std r11, 16(rp) 178 or r11, r8, r9 179L(L10): srd r8, u1, cnt 180 sld r9, u0, tnc 181 ld u1, 24(up) 182 addi up, up, 32 183 std r10, 24(rp) 184 addi rp, rp, 32 185 or r10, r12, r7 186 bdnz L(top) 187 188 ALIGN(32) 189L(end): srd r12, u0, cnt 190 sld r7, u1, tnc 191 std r11, 0(rp) 192L(cj4): or r11, r8, r9 193 srd r8, u1, cnt 194 std r10, 8(rp) 195L(cj3): or r10, r12, r7 196 std r11, 16(rp) 197L(cj2): std r10, 24(rp) 198 std r8, 32(rp) 199 200L(ret): ld r31, -8(r1) 201 ld r30, -16(r1) 202ifdef(`HAVE_ABI_mode32', 203` srdi r3, retval, 32 204 mr r4, retval 205',` mr r3, retval') 206 blr 207EPILOGUE() 208