1dnl PowerPC-64 mpn_lshiftc -- rp[] = ~up[] << cnt 2 3dnl Copyright 2003, 2005, 2010, 2013 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of the GNU Lesser General Public License as published 9dnl by the Free Software Foundation; either version 3 of the License, or (at 10dnl your option) any later version. 11 12dnl The GNU MP Library is distributed in the hope that it will be useful, but 13dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 15dnl License for more details. 16 17dnl You should have received a copy of the GNU Lesser General Public License 18dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 19 20include(`../config.m4') 21 22C cycles/limb 23C POWER3/PPC630 ? 24C POWER4/PPC970 ? 25C POWER5 2.25 26C POWER6 4 27 28C TODO 29C * Micro-optimise header code 30C * Perhaps do 4-way unrolling, for 2.5 c/l on POWER6. The code is 4236 31C bytes, 4-way code would become about 50% larger. 32 33C INPUT PARAMETERS 34define(`rp_param', `r3') 35define(`up', `r4') 36define(`n', `r5') 37define(`cnt', `r6') 38 39define(`tnc',`r0') 40define(`retval',`r3') 41define(`rp', `r7') 42 43ASM_START() 44PROLOGUE(mpn_lshiftc) 45 46ifdef(`HAVE_ABI_mode32',` 47 rldicl n, n, 0,32 C FIXME: avoid this zero extend 48') 49 mflr r12 50 sldi r8, n, 3 51 sldi r10, cnt, 6 C multiply cnt by size of a SHIFT block 52 LEAL( r11, L(e1)) C address of L(e1) label in SHIFT(1) 53 add up, up, r8 C make up point at end of up[] 54 add r11, r11, r10 C address of L(oN) for N = cnt 55 srdi r10, n, 1 56 add rp, rp_param, r8 C make rp point at end of rp[] 57 subfic tnc, cnt, 64 58 rlwinm. r8, n, 0,31,31 C extract bit 0 59 mtctr r10 60 beq L(evn) 61 62L(odd): ld r9, -8(up) 63 cmpdi cr0, n, 1 C n = 1? 64 beq L(1) 65 ld r8, -16(up) 66 addi r11, r11, -88 C L(o1) - L(e1) - 64 67 mtlr r11 68 srd r3, r9, tnc C retval 69 addi up, up, 8 70 addi rp, rp, -8 71 blr C branch to L(oN) 72 73L(evn): ld r8, -8(up) 74 ld r9, -16(up) 75 addi r11, r11, -64 76 mtlr r11 77 srd r3, r8, tnc C retval 78 blr C branch to L(eN) 79 80L(1): srd r3, r9, tnc C retval 81 sld r8, r9, cnt 82 nor r8, r8, r8 83 std r8, -8(rp) 84 mtlr r12 85ifdef(`HAVE_ABI_mode32', 86` mr r4, r3 87 srdi r3, r3, 32 88') 89 blr 90 91 92define(SHIFT,` 93L(lo$1):ld r8, -24(up) 94 nor r11, r11, r11 95 std r11, -8(rp) 96 addi rp, rp, -16 97L(o$1): srdi r10, r8, eval(64-$1) 98 rldimi r10, r9, $1, 0 99 ld r9, -32(up) 100 addi up, up, -16 101 nor r10, r10, r10 102 std r10, 0(rp) 103L(e$1): srdi r11, r9, eval(64-$1) 104 rldimi r11, r8, $1, 0 105 bdnz L(lo$1) 106 sldi r10, r9, $1 107 b L(com) 108 nop 109') 110 111 ALIGN(64) 112forloop(`i',1,63,`SHIFT(i)') 113 114L(com): nor r11, r11, r11 115 nor r10, r10, r10 116 std r11, -8(rp) 117 std r10, -16(rp) 118 mtlr r12 119ifdef(`HAVE_ABI_mode32', 120` mr r4, r3 121 srdi r3, r3, 32 122') 123 blr 124EPILOGUE() 125ASM_END() 126