1dnl Alpha mpn_sub_n -- Subtract two limb vectors of the same length > 0 2dnl and store difference in a third limb vector. 3 4dnl Copyright 1995, 1999, 2000, 2005 Free Software Foundation, Inc. 5 6dnl This file is part of the GNU MP Library. 7 8dnl The GNU MP Library is free software; you can redistribute it and/or modify 9dnl it under the terms of the GNU Lesser General Public License as published 10dnl by the Free Software Foundation; either version 3 of the License, or (at 11dnl your option) any later version. 12 13dnl The GNU MP Library is distributed in the hope that it will be useful, but 14dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16dnl License for more details. 17 18dnl You should have received a copy of the GNU Lesser General Public License 19dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 20 21include(`../config.m4') 22 23C cycles/limb 24C EV4: ? 25C EV5: 4.75 26C EV6: 3 27 28dnl INPUT PARAMETERS 29dnl res_ptr r16 30dnl s1_ptr r17 31dnl s2_ptr r18 32dnl size r19 33 34ASM_START() 35PROLOGUE(mpn_sub_n) 36 bis r31,r31,r25 C clear cy 37 subq r19,4,r19 C decr loop cnt 38 blt r19,$Lend2 C if less than 4 limbs, goto 2nd loop 39C Start software pipeline for 1st loop 40 ldq r0,0(r18) 41 ldq r4,0(r17) 42 ldq r1,8(r18) 43 ldq r5,8(r17) 44 addq r17,32,r17 C update s1_ptr 45 ldq r2,16(r18) 46 subq r4,r0,r20 C 1st main subtract 47 ldq r3,24(r18) 48 subq r19,4,r19 C decr loop cnt 49 ldq r6,-16(r17) 50 cmpult r4,r0,r25 C compute cy from last subtract 51 ldq r7,-8(r17) 52 subq r5,r1,r28 C 2nd main subtract 53 addq r18,32,r18 C update s2_ptr 54 subq r28,r25,r21 C 2nd carry subtract 55 cmpult r5,r1,r8 C compute cy from last subtract 56 blt r19,$Lend1 C if less than 4 limbs remain, jump 57C 1st loop handles groups of 4 limbs in a software pipeline 58 ALIGN(16) 59$Loop: cmpult r28,r25,r25 C compute cy from last subtract 60 ldq r0,0(r18) 61 bis r8,r25,r25 C combine cy from the two subtracts 62 ldq r1,8(r18) 63 subq r6,r2,r28 C 3rd main subtract 64 ldq r4,0(r17) 65 subq r28,r25,r22 C 3rd carry subtract 66 ldq r5,8(r17) 67 cmpult r6,r2,r8 C compute cy from last subtract 68 cmpult r28,r25,r25 C compute cy from last subtract 69 stq r20,0(r16) 70 bis r8,r25,r25 C combine cy from the two subtracts 71 stq r21,8(r16) 72 subq r7,r3,r28 C 4th main subtract 73 subq r28,r25,r23 C 4th carry subtract 74 cmpult r7,r3,r8 C compute cy from last subtract 75 cmpult r28,r25,r25 C compute cy from last subtract 76 addq r17,32,r17 C update s1_ptr 77 bis r8,r25,r25 C combine cy from the two subtracts 78 addq r16,32,r16 C update res_ptr 79 subq r4,r0,r28 C 1st main subtract 80 ldq r2,16(r18) 81 subq r28,r25,r20 C 1st carry subtract 82 ldq r3,24(r18) 83 cmpult r4,r0,r8 C compute cy from last subtract 84 ldq r6,-16(r17) 85 cmpult r28,r25,r25 C compute cy from last subtract 86 ldq r7,-8(r17) 87 bis r8,r25,r25 C combine cy from the two subtracts 88 subq r19,4,r19 C decr loop cnt 89 stq r22,-16(r16) 90 subq r5,r1,r28 C 2nd main subtract 91 stq r23,-8(r16) 92 subq r28,r25,r21 C 2nd carry subtract 93 addq r18,32,r18 C update s2_ptr 94 cmpult r5,r1,r8 C compute cy from last subtract 95 bge r19,$Loop 96C Finish software pipeline for 1st loop 97$Lend1: cmpult r28,r25,r25 C compute cy from last subtract 98 bis r8,r25,r25 C combine cy from the two subtracts 99 subq r6,r2,r28 C cy add 100 subq r28,r25,r22 C 3rd main subtract 101 cmpult r6,r2,r8 C compute cy from last subtract 102 cmpult r28,r25,r25 C compute cy from last subtract 103 stq r20,0(r16) 104 bis r8,r25,r25 C combine cy from the two subtracts 105 stq r21,8(r16) 106 subq r7,r3,r28 C cy add 107 subq r28,r25,r23 C 4th main subtract 108 cmpult r7,r3,r8 C compute cy from last subtract 109 cmpult r28,r25,r25 C compute cy from last subtract 110 bis r8,r25,r25 C combine cy from the two subtracts 111 addq r16,32,r16 C update res_ptr 112 stq r22,-16(r16) 113 stq r23,-8(r16) 114$Lend2: addq r19,4,r19 C restore loop cnt 115 beq r19,$Lret 116C Start software pipeline for 2nd loop 117 ldq r0,0(r18) 118 ldq r4,0(r17) 119 subq r19,1,r19 120 beq r19,$Lend0 121C 2nd loop handles remaining 1-3 limbs 122 ALIGN(16) 123$Loop0: subq r4,r0,r28 C main subtract 124 cmpult r4,r0,r8 C compute cy from last subtract 125 ldq r0,8(r18) 126 ldq r4,8(r17) 127 subq r28,r25,r20 C carry subtract 128 addq r18,8,r18 129 addq r17,8,r17 130 stq r20,0(r16) 131 cmpult r28,r25,r25 C compute cy from last subtract 132 subq r19,1,r19 C decr loop cnt 133 bis r8,r25,r25 C combine cy from the two subtracts 134 addq r16,8,r16 135 bne r19,$Loop0 136$Lend0: subq r4,r0,r28 C main subtract 137 subq r28,r25,r20 C carry subtract 138 cmpult r4,r0,r8 C compute cy from last subtract 139 cmpult r28,r25,r25 C compute cy from last subtract 140 stq r20,0(r16) 141 bis r8,r25,r25 C combine cy from the two subtracts 142 143$Lret: bis r25,r31,r0 C return cy 144 ret r31,(r26),1 145EPILOGUE(mpn_sub_n) 146ASM_END() 147