1dnl PowerPC-64 mpn_lshift -- rp[] = up[] << cnt 2 3dnl Copyright 2003, 2005, 2010, 2013 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of the GNU Lesser General Public License as published 9dnl by the Free Software Foundation; either version 3 of the License, or (at 10dnl your option) any later version. 11 12dnl The GNU MP Library is distributed in the hope that it will be useful, but 13dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 15dnl License for more details. 16 17dnl You should have received a copy of the GNU Lesser General Public License 18dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 19 20include(`../config.m4') 21 22C cycles/limb 23C POWER3/PPC630 ? 24C POWER4/PPC970 ? 25C POWER5 2.25 26C POWER6 4 27 28C TODO 29C * Micro-optimise header code 30C * Perhaps do 4-way unrolling, for 2.5 c/l on POWER6. The code is 4236 31C bytes, 4-way code would become about 50% larger. 32 33C INPUT PARAMETERS 34define(`rp_param', `r3') 35define(`up', `r4') 36define(`n', `r5') 37define(`cnt', `r6') 38 39define(`tnc',`r0') 40define(`retval',`r3') 41define(`rp', `r7') 42 43ASM_START() 44PROLOGUE(mpn_lshift) 45 46ifdef(`HAVE_ABI_mode32',` 47 rldicl n, n, 0,32 C FIXME: avoid this zero extend 48') 49 mflr r12 50 sldi r8, n, 3 51 sldi r10, cnt, 6 C multiply cnt by size of a SHIFT block 52 LEAL( r11, L(e1)) C address of L(e1) label in SHIFT(1) 53 add up, up, r8 C make up point at end of up[] 54 add r11, r11, r10 C address of L(oN) for N = cnt 55 srdi r10, n, 1 56 add rp, rp_param, r8 C make rp point at end of rp[] 57 subfic tnc, cnt, 64 58 rlwinm. r8, n, 0,31,31 C extract bit 0 59 mtctr r10 60 beq L(evn) 61 62L(odd): ld r9, -8(up) 63 cmpdi cr0, n, 1 C n = 1? 64 beq L(1) 65 ld r8, -16(up) 66 addi r11, r11, -84 C L(o1) - L(e1) - 64 67 mtlr r11 68 srd r3, r9, tnc C retval 69 addi up, up, 8 70 addi rp, rp, -8 71 blr C branch to L(oN) 72 73L(evn): ld r8, -8(up) 74 ld r9, -16(up) 75 addi r11, r11, -64 76 mtlr r11 77 srd r3, r8, tnc C retval 78 blr C branch to L(eN) 79 80L(1): srd r3, r9, tnc C retval 81 sld r8, r9, cnt 82 std r8, -8(rp) 83 mtlr r12 84ifdef(`HAVE_ABI_mode32', 85` mr r4, r3 86 srdi r3, r3, 32 87') 88 blr 89 90 91define(SHIFT,` 92L(lo$1):ld r8, -24(up) 93 std r11, -8(rp) 94 addi rp, rp, -16 95L(o$1): srdi r10, r8, eval(64-$1) 96 rldimi r10, r9, $1, 0 97 ld r9, -32(up) 98 addi up, up, -16 99 std r10, 0(rp) 100L(e$1): srdi r11, r9, eval(64-$1) 101 rldimi r11, r8, $1, 0 102 bdnz L(lo$1) 103 std r11, -8(rp) 104 sldi r10, r9, $1 105 b L(com) 106 nop 107 nop 108') 109 110 ALIGN(64) 111forloop(`i',1,63,`SHIFT(i)') 112 113L(com): std r10, -16(rp) 114 mtlr r12 115ifdef(`HAVE_ABI_mode32', 116` mr r4, r3 117 srdi r3, r3, 32 118') 119 blr 120EPILOGUE() 121ASM_END() 122