1dnl Alpha mpn_sub_n -- Subtract two limb vectors of the same length > 0 2dnl and store difference in a third limb vector. 3 4dnl Copyright 1995, 1999, 2000, 2005, 2011 Free Software Foundation, Inc. 5 6dnl This file is part of the GNU MP Library. 7 8dnl The GNU MP Library is free software; you can redistribute it and/or modify 9dnl it under the terms of the GNU Lesser General Public License as published 10dnl by the Free Software Foundation; either version 3 of the License, or (at 11dnl your option) any later version. 12 13dnl The GNU MP Library is distributed in the hope that it will be useful, but 14dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16dnl License for more details. 17 18dnl You should have received a copy of the GNU Lesser General Public License 19dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 20 21include(`../config.m4') 22 23C cycles/limb 24C EV4: ? 25C EV5: 4.75 26C EV6: 3 27 28dnl INPUT PARAMETERS 29dnl res_ptr r16 30dnl s1_ptr r17 31dnl s2_ptr r18 32dnl size r19 33 34ASM_START() 35PROLOGUE(mpn_sub_nc) 36 bis r31,r20,r25 37 br L(com) 38EPILOGUE() 39PROLOGUE(mpn_sub_n) 40 bis r31,r31,r25 C clear cy 41L(com): subq r19,4,r19 C decr loop cnt 42 blt r19,$Lend2 C if less than 4 limbs, goto 2nd loop 43C Start software pipeline for 1st loop 44 ldq r0,0(r18) 45 ldq r4,0(r17) 46 ldq r1,8(r18) 47 ldq r5,8(r17) 48 addq r17,32,r17 C update s1_ptr 49 subq r4,r0,r28 C 1st main subtract 50 ldq r2,16(r18) 51 subq r28,r25,r20 C 1st carry subtract 52 ldq r3,24(r18) 53 cmpult r4,r0,r8 C compute cy from last subtract 54 ldq r6,-16(r17) 55 cmpult r28,r25,r25 C compute cy from last subtract 56 ldq r7,-8(r17) 57 bis r8,r25,r25 C combine cy from the two subtracts 58 subq r19,4,r19 C decr loop cnt 59 subq r5,r1,r28 C 2nd main subtract 60 addq r18,32,r18 C update s2_ptr 61 subq r28,r25,r21 C 2nd carry subtract 62 cmpult r5,r1,r8 C compute cy from last subtract 63 blt r19,$Lend1 C if less than 4 limbs remain, jump 64C 1st loop handles groups of 4 limbs in a software pipeline 65 ALIGN(16) 66$Loop: cmpult r28,r25,r25 C compute cy from last subtract 67 ldq r0,0(r18) 68 bis r8,r25,r25 C combine cy from the two subtracts 69 ldq r1,8(r18) 70 subq r6,r2,r28 C 3rd main subtract 71 ldq r4,0(r17) 72 subq r28,r25,r22 C 3rd carry subtract 73 ldq r5,8(r17) 74 cmpult r6,r2,r8 C compute cy from last subtract 75 cmpult r28,r25,r25 C compute cy from last subtract 76 stq r20,0(r16) 77 bis r8,r25,r25 C combine cy from the two subtracts 78 stq r21,8(r16) 79 subq r7,r3,r28 C 4th main subtract 80 subq r28,r25,r23 C 4th carry subtract 81 cmpult r7,r3,r8 C compute cy from last subtract 82 cmpult r28,r25,r25 C compute cy from last subtract 83 addq r17,32,r17 C update s1_ptr 84 bis r8,r25,r25 C combine cy from the two subtracts 85 addq r16,32,r16 C update res_ptr 86 subq r4,r0,r28 C 1st main subtract 87 ldq r2,16(r18) 88 subq r28,r25,r20 C 1st carry subtract 89 ldq r3,24(r18) 90 cmpult r4,r0,r8 C compute cy from last subtract 91 ldq r6,-16(r17) 92 cmpult r28,r25,r25 C compute cy from last subtract 93 ldq r7,-8(r17) 94 bis r8,r25,r25 C combine cy from the two subtracts 95 subq r19,4,r19 C decr loop cnt 96 stq r22,-16(r16) 97 subq r5,r1,r28 C 2nd main subtract 98 stq r23,-8(r16) 99 subq r28,r25,r21 C 2nd carry subtract 100 addq r18,32,r18 C update s2_ptr 101 cmpult r5,r1,r8 C compute cy from last subtract 102 bge r19,$Loop 103C Finish software pipeline for 1st loop 104$Lend1: cmpult r28,r25,r25 C compute cy from last subtract 105 bis r8,r25,r25 C combine cy from the two subtracts 106 subq r6,r2,r28 C cy add 107 subq r28,r25,r22 C 3rd main subtract 108 cmpult r6,r2,r8 C compute cy from last subtract 109 cmpult r28,r25,r25 C compute cy from last subtract 110 stq r20,0(r16) 111 bis r8,r25,r25 C combine cy from the two subtracts 112 stq r21,8(r16) 113 subq r7,r3,r28 C cy add 114 subq r28,r25,r23 C 4th main subtract 115 cmpult r7,r3,r8 C compute cy from last subtract 116 cmpult r28,r25,r25 C compute cy from last subtract 117 bis r8,r25,r25 C combine cy from the two subtracts 118 addq r16,32,r16 C update res_ptr 119 stq r22,-16(r16) 120 stq r23,-8(r16) 121$Lend2: addq r19,4,r19 C restore loop cnt 122 beq r19,$Lret 123C Start software pipeline for 2nd loop 124 ldq r0,0(r18) 125 ldq r4,0(r17) 126 subq r19,1,r19 127 beq r19,$Lend0 128C 2nd loop handles remaining 1-3 limbs 129 ALIGN(16) 130$Loop0: subq r4,r0,r28 C main subtract 131 cmpult r4,r0,r8 C compute cy from last subtract 132 ldq r0,8(r18) 133 ldq r4,8(r17) 134 subq r28,r25,r20 C carry subtract 135 addq r18,8,r18 136 addq r17,8,r17 137 stq r20,0(r16) 138 cmpult r28,r25,r25 C compute cy from last subtract 139 subq r19,1,r19 C decr loop cnt 140 bis r8,r25,r25 C combine cy from the two subtracts 141 addq r16,8,r16 142 bne r19,$Loop0 143$Lend0: subq r4,r0,r28 C main subtract 144 subq r28,r25,r20 C carry subtract 145 cmpult r4,r0,r8 C compute cy from last subtract 146 cmpult r28,r25,r25 C compute cy from last subtract 147 stq r20,0(r16) 148 bis r8,r25,r25 C combine cy from the two subtracts 149 150$Lret: bis r25,r31,r0 C return cy 151 ret r31,(r26),1 152EPILOGUE() 153ASM_END() 154