1dnl Alpha mpn_lshift -- Shift a number left. 2 3dnl Copyright 1994, 1995, 2000, 2003, 2009 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of the GNU Lesser General Public License as published 9dnl by the Free Software Foundation; either version 3 of the License, or (at 10dnl your option) any later version. 11 12dnl The GNU MP Library is distributed in the hope that it will be useful, but 13dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 15dnl License for more details. 16 17dnl You should have received a copy of the GNU Lesser General Public License 18dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 19 20include(`../config.m4') 21 22C cycles/limb 23C EV4: ? 24C EV5: 3.25 25C EV6: 1.75 26 27C INPUT PARAMETERS 28C rp r16 29C up r17 30C n r18 31C cnt r19 32 33 34ASM_START() 35PROLOGUE(mpn_lshift) 36 s8addq r18,r17,r17 C make r17 point at end of s1 37 ldq r4,-8(r17) C load first limb 38 subq r31,r19,r20 39 s8addq r18,r16,r16 C make r16 point at end of RES 40 subq r18,1,r18 41 and r18,4-1,r28 C number of limbs in first loop 42 srl r4,r20,r0 C compute function result 43 44 beq r28,L(L0) 45 subq r18,r28,r18 46 47 ALIGN(8) 48L(top0): 49 ldq r3,-16(r17) 50 subq r16,8,r16 51 sll r4,r19,r5 52 subq r17,8,r17 53 subq r28,1,r28 54 srl r3,r20,r6 55 bis r3,r3,r4 56 bis r5,r6,r8 57 stq r8,0(r16) 58 bne r28,L(top0) 59 60L(L0): sll r4,r19,r24 61 beq r18,L(end) 62C warm up phase 1 63 ldq r1,-16(r17) 64 subq r18,4,r18 65 ldq r2,-24(r17) 66 ldq r3,-32(r17) 67 ldq r4,-40(r17) 68C warm up phase 2 69 srl r1,r20,r7 70 sll r1,r19,r21 71 srl r2,r20,r8 72 beq r18,L(end1) 73 ldq r1,-48(r17) 74 sll r2,r19,r22 75 ldq r2,-56(r17) 76 srl r3,r20,r5 77 bis r7,r24,r7 78 sll r3,r19,r23 79 bis r8,r21,r8 80 srl r4,r20,r6 81 ldq r3,-64(r17) 82 sll r4,r19,r24 83 ldq r4,-72(r17) 84 subq r18,4,r18 85 beq r18,L(end2) 86 ALIGN(16) 87C main loop 88L(top): stq r7,-8(r16) 89 bis r5,r22,r5 90 stq r8,-16(r16) 91 bis r6,r23,r6 92 93 srl r1,r20,r7 94 subq r18,4,r18 95 sll r1,r19,r21 96 unop C ldq r31,-96(r17) 97 98 srl r2,r20,r8 99 ldq r1,-80(r17) 100 sll r2,r19,r22 101 ldq r2,-88(r17) 102 103 stq r5,-24(r16) 104 bis r7,r24,r7 105 stq r6,-32(r16) 106 bis r8,r21,r8 107 108 srl r3,r20,r5 109 unop C ldq r31,-96(r17) 110 sll r3,r19,r23 111 subq r16,32,r16 112 113 srl r4,r20,r6 114 ldq r3,-96(r17) 115 sll r4,r19,r24 116 ldq r4,-104(r17) 117 118 subq r17,32,r17 119 bne r18,L(top) 120C cool down phase 2/1 121L(end2): 122 stq r7,-8(r16) 123 bis r5,r22,r5 124 stq r8,-16(r16) 125 bis r6,r23,r6 126 srl r1,r20,r7 127 sll r1,r19,r21 128 srl r2,r20,r8 129 sll r2,r19,r22 130 stq r5,-24(r16) 131 bis r7,r24,r7 132 stq r6,-32(r16) 133 bis r8,r21,r8 134 srl r3,r20,r5 135 sll r3,r19,r23 136 srl r4,r20,r6 137 sll r4,r19,r24 138C cool down phase 2/2 139 stq r7,-40(r16) 140 bis r5,r22,r5 141 stq r8,-48(r16) 142 bis r6,r23,r6 143 stq r5,-56(r16) 144 stq r6,-64(r16) 145C cool down phase 2/3 146 stq r24,-72(r16) 147 ret r31,(r26),1 148 149C cool down phase 1/1 150L(end1): 151 sll r2,r19,r22 152 srl r3,r20,r5 153 bis r7,r24,r7 154 sll r3,r19,r23 155 bis r8,r21,r8 156 srl r4,r20,r6 157 sll r4,r19,r24 158C cool down phase 1/2 159 stq r7,-8(r16) 160 bis r5,r22,r5 161 stq r8,-16(r16) 162 bis r6,r23,r6 163 stq r5,-24(r16) 164 stq r6,-32(r16) 165 stq r24,-40(r16) 166 ret r31,(r26),1 167 168L(end): stq r24,-8(r16) 169 ret r31,(r26),1 170EPILOGUE(mpn_lshift) 171ASM_END() 172