1dnl Alpha mpn_addlsh2_n/mpn_sublsh2_n -- rp[] = up[] +- (vp[] << 2). 2 3dnl Copyright 2003, 2013 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of either: 9dnl 10dnl * the GNU Lesser General Public License as published by the Free 11dnl Software Foundation; either version 3 of the License, or (at your 12dnl option) any later version. 13dnl 14dnl or 15dnl 16dnl * the GNU General Public License as published by the Free Software 17dnl Foundation; either version 2 of the License, or (at your option) any 18dnl later version. 19dnl 20dnl or both in parallel, as here. 21dnl 22dnl The GNU MP Library is distributed in the hope that it will be useful, but 23dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25dnl for more details. 26dnl 27dnl You should have received copies of the GNU General Public License and the 28dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29dnl see https://www.gnu.org/licenses/. 30 31include(`../config.m4') 32 33C cycles/limb 34C EV4: ? 35C EV5: 6 36C EV6: 3.75 37 38C TODO 39C * Tune to reach 3.5 c/l on ev6 and 5.75 c/l on ev5. 40 41define(`rp',`r16') 42define(`up',`r17') 43define(`vp',`r18') 44define(`n', `r19') 45 46define(`u0', `r8') 47define(`u1', `r1') 48define(`v0', `r4') 49define(`v1', `r5') 50 51define(`cy0', `r0') 52define(`cy1', `r20') 53define(`cy', `r22') 54define(`rr', `r24') 55define(`ps', `r25') 56define(`sl', `r28') 57 58ifdef(`OPERATION_addlsh2_n',` 59 define(ADDSUB, addq) 60 define(CARRY, `cmpult $1,$2,$3') 61 define(func, mpn_addlsh2_n) 62') 63ifdef(`OPERATION_sublsh2_n',` 64 define(ADDSUB, subq) 65 define(CARRY, `cmpult $2,$1,$3') 66 define(func, mpn_sublsh2_n) 67') 68 69MULFUNC_PROLOGUE(mpn_addlsh2_n mpn_sublsh2_n) 70 71ASM_START() 72PROLOGUE(func) 73 and n, 2, cy0 74 blbs n, L(bx1) 75L(bx0): ldq v1, 0(vp) 76 ldq u1, 0(up) 77 bis r31, r31, r2 78 bne cy0, L(b10) 79 80L(b00): lda vp, 48(vp) 81 lda up, -16(up) 82 lda rp, -8(rp) 83 s4addq v1, r31, sl 84 br r31, L(lo0) 85 86L(b10): lda vp, 32(vp) 87 lda rp, 8(rp) 88 lda cy0, 0(r31) 89 br r31, L(lo2) 90 91L(bx1): ldq v0, 0(vp) 92 ldq u0, 0(up) 93 lda cy1, 0(r31) 94 bis r31, r31, r3 95 nop 96 beq cy0, L(b01) 97 98L(b11): lda vp, 40(vp) 99 lda up, -24(up) 100 lda rp, 16(rp) 101 br r31, L(lo3) 102 103L(b01): lda n, -4(n) 104 ble n, L(end) 105 lda vp, 24(vp) 106 lda up, -8(up) 107 108 ALIGN(16) 109L(top): s4addq v0, r3, sl C combined vlimb 110 ldq v1, -16(vp) 111 ADDSUB u0, sl, ps C ulimb + (vlimb << 1) 112 ldq u1, 16(up) 113 srl v0, 62, r2 C high v bits 114 ADDSUB ps, cy1, rr C consume carry from previous operation 115 CARRY( ps, u0, cy0) C carry out #2 116 stq rr, 0(rp) 117 CARRY( rr, ps, cy) C carry out #3 118 lda vp, 32(vp) C bookkeeping 119 addq cy, cy0, cy0 C final carry out 120 s4addq v1, r2, sl 121L(lo0): ldq v0, -40(vp) 122 ADDSUB u1, sl, ps 123 ldq u0, 24(up) 124 srl v1, 62, r3 125 ADDSUB ps, cy0, rr 126 CARRY( ps, u1, cy1) 127 stq rr, 8(rp) 128 CARRY( rr, ps, cy) 129 lda rp, 32(rp) C bookkeeping 130 addq cy, cy1, cy1 131L(lo3): s4addq v0, r3, sl 132 ldq v1, -32(vp) 133 ADDSUB u0, sl, ps 134 ldq u1, 32(up) 135 srl v0, 62, r2 136 ADDSUB ps, cy1, rr 137 CARRY( ps, u0, cy0) 138 stq rr, -16(rp) 139 CARRY( rr, ps, cy) 140 lda up, 32(up) C bookkeeping 141 addq cy, cy0, cy0 142L(lo2): s4addq v1, r2, sl 143 ldq v0, -24(vp) 144 ADDSUB u1, sl, ps 145 ldq u0, 8(up) 146 srl v1, 62, r3 147 ADDSUB ps, cy0, rr 148 CARRY( ps, u1, cy1) 149 stq rr, -8(rp) 150 CARRY( rr, ps, cy) 151 lda n, -4(n) C bookkeeping 152 addq cy, cy1, cy1 153 bgt n, L(top) 154 155L(end): s4addq v0, r3, sl 156 ADDSUB u0, sl, ps 157 srl v0, 62, r2 158 ADDSUB ps, cy1, rr 159 CARRY( ps, u0, cy0) 160 stq rr, 0(rp) 161 CARRY( rr, ps, cy) 162 addq cy, cy0, cy0 163 addq cy0, r2, r0 164 165 ret r31,(r26),1 166EPILOGUE() 167ASM_END() 168