1dnl Alpha mpn_mod_34lsub1. 2 3dnl Copyright 2002 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of the GNU Lesser General Public License as published 9dnl by the Free Software Foundation; either version 3 of the License, or (at 10dnl your option) any later version. 11 12dnl The GNU MP Library is distributed in the hope that it will be useful, but 13dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 15dnl License for more details. 16 17dnl You should have received a copy of the GNU Lesser General Public License 18dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 19 20include(`../config.m4') 21 22C cycles/limb 23C EV4: 4 (?) 24C EV5: 2.67 25C EV6: 1.67 26 27 28dnl INPUT PARAMETERS 29dnl up r16 30dnl n r17 31 32define(`l0',`r18') 33define(`l1',`r19') 34define(`l2',`r20') 35define(`a0',`r21') 36define(`a1',`r22') 37define(`a2',`r23') 38define(`c0',`r24') 39define(`c1',`r5') 40define(`c2',`r6') 41 42ASM_START() 43PROLOGUE(mpn_mod_34lsub1) 44 bis r31, r31, c0 45 bis r31, r31, c1 46 bis r31, r31, c2 47 48 lda r17, -3(r17) 49 bge r17, $L_3_or_more 50 bis r31, r31, a0 51 bis r31, r31, a1 52 bis r31, r31, a2 53 br r31, $L_012 54 55$L_3_or_more: 56 ldq a0, 0(r16) 57 ldq a1, 8(r16) 58 ldq a2, 16(r16) 59 lda r16, 24(r16) 60 lda r17, -3(r17) 61 blt r17, $L_012 62 63$L_6_or_more: 64 ldq l0, 0(r16) 65 ldq l1, 8(r16) 66 ldq l2, 16(r16) 67 addq l0, a0, a0 68 69 lda r16, 24(r16) 70 lda r17, -3(r17) 71 blt r17, $L_end 72 73 ALIGN(16) 74C Main loop 75$L_9_or_more: 76$Loop: cmpult a0, l0, r0 77 ldq l0, 0(r16) 78 addq r0, c0, c0 79 addq l1, a1, a1 80 cmpult a1, l1, r0 81 ldq l1, 8(r16) 82 addq r0, c1, c1 83 addq l2, a2, a2 84 cmpult a2, l2, r0 85 ldq l2, 16(r16) 86 addq r0, c2, c2 87 addq l0, a0, a0 88 lda r16, 24(r16) 89 lda r17, -3(r17) 90 bge r17, $Loop 91 92$L_end: cmpult a0, l0, r0 93 addq r0, c0, c0 94 addq l1, a1, a1 95 cmpult a1, l1, r0 96 addq r0, c1, c1 97 addq l2, a2, a2 98 cmpult a2, l2, r0 99 addq r0, c2, c2 100 101C Handle the last (n mod 3) limbs 102$L_012: lda r17, 2(r17) 103 blt r17, $L_0 104 ldq l0, 0(r16) 105 addq l0, a0, a0 106 cmpult a0, l0, r0 107 addq r0, c0, c0 108 beq r17, $L_0 109 ldq l1, 8(r16) 110 addq l1, a1, a1 111 cmpult a1, l1, r0 112 addq r0, c1, c1 113 114C Align and sum our 3 main accumulators and 3 carry accumulators 115$L_0: srl a0, 48, r2 116 srl a1, 32, r4 117ifdef(`HAVE_LIMB_LITTLE_ENDIAN', 118` insll a1, 2, r1', C (a1 & 0xffffffff) << 16 119` zapnot a1, 15, r25 120 sll r25, 16, r1') 121 zapnot a0, 63, r0 C a0 & 0xffffffffffff 122 srl a2, 16, a1 123ifdef(`HAVE_LIMB_LITTLE_ENDIAN', 124` inswl a2, 4, r3', C (a2 & 0xffff) << 32 125` zapnot a2, 3, r25 126 sll r25, 32, r3') 127 addq r1, r4, r1 128 addq r0, r2, r0 129 srl c0, 32, a2 130ifdef(`HAVE_LIMB_LITTLE_ENDIAN', 131` insll c0, 2, r4', C (c0 & 0xffffffff) << 16 132` zapnot c0, 15, r25 133 sll r25, 16, r4') 134 addq r0, r1, r0 135 addq r3, a1, r3 136 addq r0, r3, r0 137 srl c1, 16, c0 138ifdef(`HAVE_LIMB_LITTLE_ENDIAN', 139` inswl c1, 4, r2', C (c1 & 0xffff) << 32 140` zapnot c1, 3, r25 141 sll r25, 32, r2') 142 addq r4, a2, r4 143C srl c2, 48, r3 C This will be 0 in practise 144 zapnot c2, 63, r1 C r1 = c2 & 0xffffffffffff 145 addq r0, r4, r0 146 addq r2, c0, r2 147 addq r0, r2, r0 148C addq r1, r3, r1 149 addq r0, r1, r0 150 151 ret r31, (r26), 1 152EPILOGUE(mpn_mod_34lsub1) 153ASM_END() 154