1dnl Alpha mpn_sub_n -- Subtract two limb vectors of the same length > 0 2dnl and store difference in a third limb vector. 3 4dnl Copyright 1995, 1999, 2000, 2005, 2011 Free Software Foundation, Inc. 5 6dnl This file is part of the GNU MP Library. 7dnl 8dnl The GNU MP Library is free software; you can redistribute it and/or modify 9dnl it under the terms of either: 10dnl 11dnl * the GNU Lesser General Public License as published by the Free 12dnl Software Foundation; either version 3 of the License, or (at your 13dnl option) any later version. 14dnl 15dnl or 16dnl 17dnl * the GNU General Public License as published by the Free Software 18dnl Foundation; either version 2 of the License, or (at your option) any 19dnl later version. 20dnl 21dnl or both in parallel, as here. 22dnl 23dnl The GNU MP Library is distributed in the hope that it will be useful, but 24dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 25dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 26dnl for more details. 27dnl 28dnl You should have received copies of the GNU General Public License and the 29dnl GNU Lesser General Public License along with the GNU MP Library. If not, 30dnl see https://www.gnu.org/licenses/. 31 32include(`../config.m4') 33 34C cycles/limb 35C EV4: ? 36C EV5: 4.75 37C EV6: 3 38 39dnl INPUT PARAMETERS 40dnl res_ptr r16 41dnl s1_ptr r17 42dnl s2_ptr r18 43dnl size r19 44 45ASM_START() 46PROLOGUE(mpn_sub_nc) 47 bis r31,r20,r25 48 br L(com) 49EPILOGUE() 50PROLOGUE(mpn_sub_n) 51 bis r31,r31,r25 C clear cy 52L(com): subq r19,4,r19 C decr loop cnt 53 blt r19,$Lend2 C if less than 4 limbs, goto 2nd loop 54C Start software pipeline for 1st loop 55 ldq r0,0(r18) 56 ldq r4,0(r17) 57 ldq r1,8(r18) 58 ldq r5,8(r17) 59 addq r17,32,r17 C update s1_ptr 60 subq r4,r0,r28 C 1st main subtract 61 ldq r2,16(r18) 62 subq r28,r25,r20 C 1st carry subtract 63 ldq r3,24(r18) 64 cmpult r4,r0,r8 C compute cy from last subtract 65 ldq r6,-16(r17) 66 cmpult r28,r25,r25 C compute cy from last subtract 67 ldq r7,-8(r17) 68 bis r8,r25,r25 C combine cy from the two subtracts 69 subq r19,4,r19 C decr loop cnt 70 subq r5,r1,r28 C 2nd main subtract 71 addq r18,32,r18 C update s2_ptr 72 subq r28,r25,r21 C 2nd carry subtract 73 cmpult r5,r1,r8 C compute cy from last subtract 74 blt r19,$Lend1 C if less than 4 limbs remain, jump 75C 1st loop handles groups of 4 limbs in a software pipeline 76 ALIGN(16) 77$Loop: cmpult r28,r25,r25 C compute cy from last subtract 78 ldq r0,0(r18) 79 bis r8,r25,r25 C combine cy from the two subtracts 80 ldq r1,8(r18) 81 subq r6,r2,r28 C 3rd main subtract 82 ldq r4,0(r17) 83 subq r28,r25,r22 C 3rd carry subtract 84 ldq r5,8(r17) 85 cmpult r6,r2,r8 C compute cy from last subtract 86 cmpult r28,r25,r25 C compute cy from last subtract 87 stq r20,0(r16) 88 bis r8,r25,r25 C combine cy from the two subtracts 89 stq r21,8(r16) 90 subq r7,r3,r28 C 4th main subtract 91 subq r28,r25,r23 C 4th carry subtract 92 cmpult r7,r3,r8 C compute cy from last subtract 93 cmpult r28,r25,r25 C compute cy from last subtract 94 addq r17,32,r17 C update s1_ptr 95 bis r8,r25,r25 C combine cy from the two subtracts 96 addq r16,32,r16 C update res_ptr 97 subq r4,r0,r28 C 1st main subtract 98 ldq r2,16(r18) 99 subq r28,r25,r20 C 1st carry subtract 100 ldq r3,24(r18) 101 cmpult r4,r0,r8 C compute cy from last subtract 102 ldq r6,-16(r17) 103 cmpult r28,r25,r25 C compute cy from last subtract 104 ldq r7,-8(r17) 105 bis r8,r25,r25 C combine cy from the two subtracts 106 subq r19,4,r19 C decr loop cnt 107 stq r22,-16(r16) 108 subq r5,r1,r28 C 2nd main subtract 109 stq r23,-8(r16) 110 subq r28,r25,r21 C 2nd carry subtract 111 addq r18,32,r18 C update s2_ptr 112 cmpult r5,r1,r8 C compute cy from last subtract 113 bge r19,$Loop 114C Finish software pipeline for 1st loop 115$Lend1: cmpult r28,r25,r25 C compute cy from last subtract 116 bis r8,r25,r25 C combine cy from the two subtracts 117 subq r6,r2,r28 C cy add 118 subq r28,r25,r22 C 3rd main subtract 119 cmpult r6,r2,r8 C compute cy from last subtract 120 cmpult r28,r25,r25 C compute cy from last subtract 121 stq r20,0(r16) 122 bis r8,r25,r25 C combine cy from the two subtracts 123 stq r21,8(r16) 124 subq r7,r3,r28 C cy add 125 subq r28,r25,r23 C 4th main subtract 126 cmpult r7,r3,r8 C compute cy from last subtract 127 cmpult r28,r25,r25 C compute cy from last subtract 128 bis r8,r25,r25 C combine cy from the two subtracts 129 addq r16,32,r16 C update res_ptr 130 stq r22,-16(r16) 131 stq r23,-8(r16) 132$Lend2: addq r19,4,r19 C restore loop cnt 133 beq r19,$Lret 134C Start software pipeline for 2nd loop 135 ldq r0,0(r18) 136 ldq r4,0(r17) 137 subq r19,1,r19 138 beq r19,$Lend0 139C 2nd loop handles remaining 1-3 limbs 140 ALIGN(16) 141$Loop0: subq r4,r0,r28 C main subtract 142 cmpult r4,r0,r8 C compute cy from last subtract 143 ldq r0,8(r18) 144 ldq r4,8(r17) 145 subq r28,r25,r20 C carry subtract 146 addq r18,8,r18 147 addq r17,8,r17 148 stq r20,0(r16) 149 cmpult r28,r25,r25 C compute cy from last subtract 150 subq r19,1,r19 C decr loop cnt 151 bis r8,r25,r25 C combine cy from the two subtracts 152 addq r16,8,r16 153 bne r19,$Loop0 154$Lend0: subq r4,r0,r28 C main subtract 155 subq r28,r25,r20 C carry subtract 156 cmpult r4,r0,r8 C compute cy from last subtract 157 cmpult r28,r25,r25 C compute cy from last subtract 158 stq r20,0(r16) 159 bis r8,r25,r25 C combine cy from the two subtracts 160 161$Lret: bis r25,r31,r0 C return cy 162 ret r31,(r26),1 163EPILOGUE() 164ASM_END() 165