1dnl Alpha mpn_add_n -- Add two limb vectors of the same length > 0 and 2dnl store sum in a third limb vector. 3 4dnl Copyright 1995, 1999, 2000, 2005, 2011 Free Software Foundation, Inc. 5 6dnl This file is part of the GNU MP Library. 7 8dnl The GNU MP Library is free software; you can redistribute it and/or modify 9dnl it under the terms of the GNU Lesser General Public License as published 10dnl by the Free Software Foundation; either version 3 of the License, or (at 11dnl your option) any later version. 12 13dnl The GNU MP Library is distributed in the hope that it will be useful, but 14dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16dnl License for more details. 17 18dnl You should have received a copy of the GNU Lesser General Public License 19dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 20 21include(`../config.m4') 22 23C cycles/limb 24C EV4: ? 25C EV5: 4.75 26C EV6: 3 27 28dnl INPUT PARAMETERS 29dnl res_ptr r16 30dnl s1_ptr r17 31dnl s2_ptr r18 32dnl size r19 33 34ASM_START() 35PROLOGUE(mpn_add_nc) 36 bis r20,r31,r25 37 br L(com) 38EPILOGUE() 39PROLOGUE(mpn_add_n) 40 bis r31,r31,r25 C clear cy 41L(com): subq r19,4,r19 C decr loop cnt 42 blt r19,$Lend2 C if less than 4 limbs, goto 2nd loop 43C Start software pipeline for 1st loop 44 ldq r0,0(r18) 45 ldq r4,0(r17) 46 ldq r1,8(r18) 47 ldq r5,8(r17) 48 addq r17,32,r17 C update s1_ptr 49 addq r0,r4,r28 C 1st main add 50 ldq r2,16(r18) 51 addq r25,r28,r20 C 1st carry add 52 ldq r3,24(r18) 53 cmpult r28,r4,r8 C compute cy from last add 54 ldq r6,-16(r17) 55 cmpult r20,r28,r25 C compute cy from last add 56 ldq r7,-8(r17) 57 bis r8,r25,r25 C combine cy from the two adds 58 subq r19,4,r19 C decr loop cnt 59 addq r1,r5,r28 C 2nd main add 60 addq r18,32,r18 C update s2_ptr 61 addq r28,r25,r21 C 2nd carry add 62 cmpult r28,r5,r8 C compute cy from last add 63 blt r19,$Lend1 C if less than 4 limbs remain, jump 64C 1st loop handles groups of 4 limbs in a software pipeline 65 ALIGN(16) 66$Loop: cmpult r21,r28,r25 C compute cy from last add 67 ldq r0,0(r18) 68 bis r8,r25,r25 C combine cy from the two adds 69 ldq r1,8(r18) 70 addq r2,r6,r28 C 3rd main add 71 ldq r4,0(r17) 72 addq r28,r25,r22 C 3rd carry add 73 ldq r5,8(r17) 74 cmpult r28,r6,r8 C compute cy from last add 75 cmpult r22,r28,r25 C compute cy from last add 76 stq r20,0(r16) 77 bis r8,r25,r25 C combine cy from the two adds 78 stq r21,8(r16) 79 addq r3,r7,r28 C 4th main add 80 addq r28,r25,r23 C 4th carry add 81 cmpult r28,r7,r8 C compute cy from last add 82 cmpult r23,r28,r25 C compute cy from last add 83 addq r17,32,r17 C update s1_ptr 84 bis r8,r25,r25 C combine cy from the two adds 85 addq r16,32,r16 C update res_ptr 86 addq r0,r4,r28 C 1st main add 87 ldq r2,16(r18) 88 addq r25,r28,r20 C 1st carry add 89 ldq r3,24(r18) 90 cmpult r28,r4,r8 C compute cy from last add 91 ldq r6,-16(r17) 92 cmpult r20,r28,r25 C compute cy from last add 93 ldq r7,-8(r17) 94 bis r8,r25,r25 C combine cy from the two adds 95 subq r19,4,r19 C decr loop cnt 96 stq r22,-16(r16) 97 addq r1,r5,r28 C 2nd main add 98 stq r23,-8(r16) 99 addq r25,r28,r21 C 2nd carry add 100 addq r18,32,r18 C update s2_ptr 101 cmpult r28,r5,r8 C compute cy from last add 102 bge r19,$Loop 103C Finish software pipeline for 1st loop 104$Lend1: cmpult r21,r28,r25 C compute cy from last add 105 bis r8,r25,r25 C combine cy from the two adds 106 addq r2,r6,r28 C 3rd main add 107 addq r28,r25,r22 C 3rd carry add 108 cmpult r28,r6,r8 C compute cy from last add 109 cmpult r22,r28,r25 C compute cy from last add 110 stq r20,0(r16) 111 bis r8,r25,r25 C combine cy from the two adds 112 stq r21,8(r16) 113 addq r3,r7,r28 C 4th main add 114 addq r28,r25,r23 C 4th carry add 115 cmpult r28,r7,r8 C compute cy from last add 116 cmpult r23,r28,r25 C compute cy from last add 117 bis r8,r25,r25 C combine cy from the two adds 118 addq r16,32,r16 C update res_ptr 119 stq r22,-16(r16) 120 stq r23,-8(r16) 121$Lend2: addq r19,4,r19 C restore loop cnt 122 beq r19,$Lret 123C Start software pipeline for 2nd loop 124 ldq r0,0(r18) 125 ldq r4,0(r17) 126 subq r19,1,r19 127 beq r19,$Lend0 128C 2nd loop handles remaining 1-3 limbs 129 ALIGN(16) 130$Loop0: addq r0,r4,r28 C main add 131 ldq r0,8(r18) 132 cmpult r28,r4,r8 C compute cy from last add 133 ldq r4,8(r17) 134 addq r28,r25,r20 C carry add 135 addq r18,8,r18 136 addq r17,8,r17 137 stq r20,0(r16) 138 cmpult r20,r28,r25 C compute cy from last add 139 subq r19,1,r19 C decr loop cnt 140 bis r8,r25,r25 C combine cy from the two adds 141 addq r16,8,r16 142 bne r19,$Loop0 143$Lend0: addq r0,r4,r28 C main add 144 addq r28,r25,r20 C carry add 145 cmpult r28,r4,r8 C compute cy from last add 146 cmpult r20,r28,r25 C compute cy from last add 147 stq r20,0(r16) 148 bis r8,r25,r25 C combine cy from the two adds 149 150$Lret: bis r25,r31,r0 C return cy 151 ret r31,(r26),1 152EPILOGUE() 153ASM_END() 154