1dnl PowerPC-32 mpn_lshift -- Shift a number left. 2 3dnl Copyright 1995, 1998, 2000, 2002, 2003, 2004, 2005 Free Software 4dnl Foundation, Inc. 5 6dnl This file is part of the GNU MP Library. 7 8dnl The GNU MP Library is free software; you can redistribute it and/or modify 9dnl it under the terms of the GNU Lesser General Public License as published 10dnl by the Free Software Foundation; either version 3 of the License, or (at 11dnl your option) any later version. 12 13dnl The GNU MP Library is distributed in the hope that it will be useful, but 14dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16dnl License for more details. 17 18dnl You should have received a copy of the GNU Lesser General Public License 19dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 20 21include(`../config.m4') 22 23C cycles/limb 24C 603e: ? 25C 604e: 3.0 26C 75x (G3): 3.0 27C 7400,7410 (G4): 3.0 28C 7445,7455 (G4+): 2.5 29C 7447,7457 (G4+): 2.25 30C power4/ppc970: 2.5 31C power5: 2.5 32 33C INPUT PARAMETERS 34C rp r3 35C up r4 36C n r5 37C cnt r6 38 39ASM_START() 40PROLOGUE(mpn_lshift) 41 cmpwi cr0, r5, 30 C more than 30 limbs? 42 slwi r0, r5, 2 43 add r4, r4, r0 C make r4 point at end of s1 44 add r7, r3, r0 C make r7 point at end of res 45 bgt L(BIG) C branch if more than 12 limbs 46 47 mtctr r5 C copy size into CTR 48 subfic r8, r6, 32 49 lwzu r11, -4(r4) C load first s1 limb 50 srw r3, r11, r8 C compute function return value 51 bdz L(end1) 52 53L(oop): lwzu r10, -4(r4) 54 slw r9, r11, r6 55 srw r12, r10, r8 56 or r9, r9, r12 57 stwu r9, -4(r7) 58 bdz L(end2) 59 lwzu r11, -4(r4) 60 slw r9, r10, r6 61 srw r12, r11, r8 62 or r9, r9, r12 63 stwu r9, -4(r7) 64 bdnz L(oop) 65 66L(end1): 67 slw r0, r11, r6 68 stw r0, -4(r7) 69 blr 70L(end2): 71 slw r0, r10, r6 72 stw r0, -4(r7) 73 blr 74 75L(BIG): 76 stmw r24, -32(r1) C save registers we are supposed to preserve 77 lwzu r9, -4(r4) 78 subfic r8, r6, 32 79 srw r3, r9, r8 C compute function return value 80 slw r0, r9, r6 81 addi r5, r5, -1 82 83 andi. r10, r5, 3 C count for spill loop 84 beq L(e) 85 mtctr r10 86 lwzu r28, -4(r4) 87 bdz L(xe0) 88 89L(loop0): 90 slw r12, r28, r6 91 srw r24, r28, r8 92 lwzu r28, -4(r4) 93 or r24, r0, r24 94 stwu r24, -4(r7) 95 mr r0, r12 96 bdnz L(loop0) C taken at most once! 97 98L(xe0): slw r12, r28, r6 99 srw r24, r28, r8 100 or r24, r0, r24 101 stwu r24, -4(r7) 102 mr r0, r12 103 104L(e): srwi r5, r5, 2 C count for unrolled loop 105 addi r5, r5, -1 106 mtctr r5 107 lwz r28, -4(r4) 108 lwz r29, -8(r4) 109 lwz r30, -12(r4) 110 lwzu r31, -16(r4) 111 112L(loopU): 113 slw r9, r28, r6 114 srw r24, r28, r8 115 lwz r28, -4(r4) 116 slw r10, r29, r6 117 srw r25, r29, r8 118 lwz r29, -8(r4) 119 slw r11, r30, r6 120 srw r26, r30, r8 121 lwz r30, -12(r4) 122 slw r12, r31, r6 123 srw r27, r31, r8 124 lwzu r31, -16(r4) 125 or r24, r0, r24 126 stw r24, -4(r7) 127 or r25, r9, r25 128 stw r25, -8(r7) 129 or r26, r10, r26 130 stw r26, -12(r7) 131 or r27, r11, r27 132 stwu r27, -16(r7) 133 mr r0, r12 134 bdnz L(loopU) 135 136 slw r9, r28, r6 137 srw r24, r28, r8 138 slw r10, r29, r6 139 srw r25, r29, r8 140 slw r11, r30, r6 141 srw r26, r30, r8 142 slw r12, r31, r6 143 srw r27, r31, r8 144 or r24, r0, r24 145 stw r24, -4(r7) 146 or r25, r9, r25 147 stw r25, -8(r7) 148 or r26, r10, r26 149 stw r26, -12(r7) 150 or r27, r11, r27 151 stw r27, -16(r7) 152 153 stw r12, -20(r7) 154 lmw r24, -32(r1) C restore registers 155 blr 156EPILOGUE() 157