1dnl PA64 mpn_addlsh1_n/mpn_sublsh1_n -- rp[] = up[] +- (vp[] << 1). 2 3dnl Copyright 2003 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of the GNU Lesser General Public License as published 9dnl by the Free Software Foundation; either version 3 of the License, or (at 10dnl your option) any later version. 11 12dnl The GNU MP Library is distributed in the hope that it will be useful, but 13dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 15dnl License for more details. 16 17dnl You should have received a copy of the GNU Lesser General Public License 18dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 19 20include(`../config.m4') 21 22C cycles/limb 23C 8000,8200: 2 24C 8500,8600,8700: 1.75 25 26C TODO 27C * Write special feed-in code for each (n mod 8). (See the ia64 code.) 28C * Try to make this run at closer to 1.5 c/l. 29C * Set up register aliases (define(`u0',`%r19')). 30C * Explicitly align loop. 31 32dnl INPUT PARAMETERS 33define(`rp',`%r26') 34define(`up',`%r25') 35define(`vp',`%r24') 36define(`n',`%r23') 37 38ifdef(`OPERATION_addlsh1_n',` 39 define(ADCSBC, `add,dc') 40 define(INITC, `ldi 0,') 41 define(func, mpn_addlsh1_n) 42') 43ifdef(`OPERATION_sublsh1_n',` 44 define(ADCSBC, `sub,db') 45 define(INITC, `ldi 1,') 46 define(func, mpn_sublsh1_n) 47') 48 49MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n) 50 51ifdef(`HAVE_ABI_2_0w',` 52 define(LEVEL, `.level 2.0w') 53 define(RETREG, `%r28') 54 define(CLRRET1, `dnl') 55') 56ifdef(`HAVE_ABI_2_0n',` 57 define(LEVEL, `.level 2.0') 58 define(RETREG, `%r29') 59 define(CLRRET1, `ldi 0, %r28') 60') 61 62 LEVEL 63PROLOGUE(func) 64 std,ma %r3, 0x100(%r30) C save reg 65 66 INITC %r1 C init saved cy 67 68C Primitive code for the first (n mod 8) limbs: 69 extrd,u n, 63, 3, %r22 C count for loop0 70 comib,= 0, %r22, L(unrolled) C skip loop0? 71 copy %r0, %r28 72LDEF(loop0) 73 ldd 0(vp), %r21 74 ldo 8(vp), vp 75 ldd 0(up), %r19 76 ldo 8(up), up 77 shrpd %r21, %r28, 63, %r31 78 addi -1, %r1, %r0 C restore cy 79 ADCSBC %r19, %r31, %r29 80 std %r29, 0(rp) 81 add,dc %r0, %r0, %r1 C save cy 82 copy %r21, %r28 83 addib,> -1, %r22, L(loop0) 84 ldo 8(rp), rp 85 86 addib,>= -8, n, L(unrolled) 87 addi -1, %r1, %r0 C restore cy 88 89 shrpd %r0, %r28, 63, %r28 90 ADCSBC %r0, %r28, RETREG 91ifdef(`OPERATION_sublsh1_n', 92` sub %r0, RETREG, RETREG') 93 CLRRET1 94 95 bve (%r2) 96 ldd,mb -0x100(%r30), %r3 97 98 99LDEF(unrolled) 100 std %r4, -0xf8(%r30) C save reg 101 ldd 0(vp), %r4 102 std %r5, -0xf0(%r30) C save reg 103 ldd 8(vp), %r5 104 std %r6, -0xe8(%r30) C save reg 105 ldd 16(vp), %r6 106 std %r7, -0xe0(%r30) C save reg 107 108 ldd 24(vp), %r7 109 shrpd %r4, %r28, 63, %r31 110 std %r8, -0xd8(%r30) C save reg 111 ldd 32(vp), %r8 112 shrpd %r5, %r4, 63, %r4 113 std %r9, -0xd0(%r30) C save reg 114 ldd 40(vp), %r9 115 shrpd %r6, %r5, 63, %r5 116 ldd 48(vp), %r3 117 shrpd %r7, %r6, 63, %r6 118 ldd 56(vp), %r28 119 shrpd %r8, %r7, 63, %r7 120 ldd 0(up), %r19 121 shrpd %r9, %r8, 63, %r8 122 ldd 8(up), %r20 123 shrpd %r3, %r9, 63, %r9 124 ldd 16(up), %r21 125 shrpd %r28, %r3, 63, %r3 126 ldd 24(up), %r22 127 128 nop C alignment FIXME 129 addib,<= -8, n, L(end) 130 addi -1, %r1, %r0 C restore cy 131LDEF(loop) 132 ADCSBC %r19, %r31, %r29 133 ldd 32(up), %r19 134 std %r29, 0(rp) 135 ADCSBC %r20, %r4, %r29 136 ldd 40(up), %r20 137 std %r29, 8(rp) 138 ADCSBC %r21, %r5, %r29 139 ldd 48(up), %r21 140 std %r29, 16(rp) 141 ADCSBC %r22, %r6, %r29 142 ldd 56(up), %r22 143 std %r29, 24(rp) 144 ADCSBC %r19, %r7, %r29 145 ldd 64(vp), %r4 146 std %r29, 32(rp) 147 ADCSBC %r20, %r8, %r29 148 ldd 72(vp), %r5 149 std %r29, 40(rp) 150 ADCSBC %r21, %r9, %r29 151 ldd 80(vp), %r6 152 std %r29, 48(rp) 153 ADCSBC %r22, %r3, %r29 154 std %r29, 56(rp) 155 156 add,dc %r0, %r0, %r1 C save cy 157 158 ldd 88(vp), %r7 159 shrpd %r4, %r28, 63, %r31 160 ldd 96(vp), %r8 161 shrpd %r5, %r4, 63, %r4 162 ldd 104(vp), %r9 163 shrpd %r6, %r5, 63, %r5 164 ldd 112(vp), %r3 165 shrpd %r7, %r6, 63, %r6 166 ldd 120(vp), %r28 167 shrpd %r8, %r7, 63, %r7 168 ldd 64(up), %r19 169 shrpd %r9, %r8, 63, %r8 170 ldd 72(up), %r20 171 shrpd %r3, %r9, 63, %r9 172 ldd 80(up), %r21 173 shrpd %r28, %r3, 63, %r3 174 ldd 88(up), %r22 175 176 ldo 64(vp), vp 177 ldo 64(rp), rp 178 ldo 64(up), up 179 addib,> -8, n, L(loop) 180 addi -1, %r1, %r0 C restore cy 181LDEF(end) 182 ADCSBC %r19, %r31, %r29 183 ldd 32(up), %r19 184 std %r29, 0(rp) 185 ADCSBC %r20, %r4, %r29 186 ldd 40(up), %r20 187 std %r29, 8(rp) 188 ADCSBC %r21, %r5, %r29 189 ldd 48(up), %r21 190 std %r29, 16(rp) 191 ADCSBC %r22, %r6, %r29 192 ldd 56(up), %r22 193 std %r29, 24(rp) 194 ADCSBC %r19, %r7, %r29 195 ldd -0xf8(%r30), %r4 C restore reg 196 std %r29, 32(rp) 197 ADCSBC %r20, %r8, %r29 198 ldd -0xf0(%r30), %r5 C restore reg 199 std %r29, 40(rp) 200 ADCSBC %r21, %r9, %r29 201 ldd -0xe8(%r30), %r6 C restore reg 202 std %r29, 48(rp) 203 ADCSBC %r22, %r3, %r29 204 ldd -0xe0(%r30), %r7 C restore reg 205 std %r29, 56(rp) 206 207 shrpd %r0, %r28, 63, %r28 208 ldd -0xd8(%r30), %r8 C restore reg 209 ADCSBC %r0, %r28, RETREG 210ifdef(`OPERATION_sublsh1_n', 211` sub %r0, RETREG, RETREG') 212 CLRRET1 213 214 ldd -0xd0(%r30), %r9 C restore reg 215 bve (%r2) 216 ldd,mb -0x100(%r30), %r3 C restore reg 217EPILOGUE() 218