1dnl PowerPC-32 mpn_submul_1 -- Multiply a limb vector with a limb and subtract 2dnl the result from a second limb vector. 3 4dnl Copyright 1995, 1997, 1998, 2000, 2002, 2005 Free Software Foundation, 5dnl Inc. 6 7dnl This file is part of the GNU MP Library. 8dnl 9dnl The GNU MP Library is free software; you can redistribute it and/or modify 10dnl it under the terms of either: 11dnl 12dnl * the GNU Lesser General Public License as published by the Free 13dnl Software Foundation; either version 3 of the License, or (at your 14dnl option) any later version. 15dnl 16dnl or 17dnl 18dnl * the GNU General Public License as published by the Free Software 19dnl Foundation; either version 2 of the License, or (at your option) any 20dnl later version. 21dnl 22dnl or both in parallel, as here. 23dnl 24dnl The GNU MP Library is distributed in the hope that it will be useful, but 25dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27dnl for more details. 28dnl 29dnl You should have received copies of the GNU General Public License and the 30dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31dnl see https://www.gnu.org/licenses/. 32 33include(`../config.m4') 34 35C cycles/limb 36C 603e: ? 37C 604e: 7.5 38C 75x (G3): 9.3-15 39C 7400,7410 (G4): 9.3-15 40C 744x,745x (G4+): 10.5 41C power4/ppc970: 6.75 42C power5: 6.5 43 44C INPUT PARAMETERS 45C rp r3 46C up r4 47C n r5 48C vl r6 49 50C This is optimized for the PPC604. See addmul_1.asm for additional comments. 51 52ASM_START() 53PROLOGUE(mpn_submul_1) 54 cmpwi cr0,r5,9 C more than 9 limbs? 55 bgt cr0,L(big) C branch if more than 9 limbs 56 57 mtctr r5 58 lwz r0,0(r4) 59 mullw r7,r0,r6 60 mulhwu r10,r0,r6 61 lwz r9,0(r3) 62 subfc r8,r7,r9 63 addc r7,r7,r8 C invert cy (r7 is junk) 64 addi r3,r3,-4 65 bdz L(end) 66L(loop): 67 lwzu r0,4(r4) 68 stwu r8,4(r3) 69 mullw r8,r0,r6 70 adde r7,r8,r10 71 mulhwu r10,r0,r6 72 lwz r9,4(r3) 73 addze r10,r10 74 subfc r8,r7,r9 75 addc r7,r7,r8 C invert cy (r7 is junk) 76 bdnz L(loop) 77L(end): stw r8,4(r3) 78 addze r3,r10 79 blr 80 81L(big): stwu r1,-16(r1) 82 addi r5,r5,-1 83 stw r30,8(r1) 84 srwi r0,r5,2 85 stw r31,12(r1) 86 mtctr r0 87 88 lwz r7,0(r4) 89 mullw r8,r7,r6 90 mulhwu r0,r7,r6 91 lwz r7,0(r3) 92 subfc r7,r8,r7 93 addc r8,r8,r7 94 stw r7,0(r3) 95 96L(loopU): 97 lwz r7,4(r4) 98 lwz r12,8(r4) 99 lwz r30,12(r4) 100 lwzu r31,16(r4) 101 mullw r8,r7,r6 102 mullw r9,r12,r6 103 mullw r10,r30,r6 104 mullw r11,r31,r6 105 adde r8,r8,r0 C add cy_limb 106 mulhwu r0,r7,r6 107 lwz r7,4(r3) 108 adde r9,r9,r0 109 mulhwu r0,r12,r6 110 lwz r12,8(r3) 111 adde r10,r10,r0 112 mulhwu r0,r30,r6 113 lwz r30,12(r3) 114 adde r11,r11,r0 115 mulhwu r0,r31,r6 116 lwz r31,16(r3) 117 addze r0,r0 C new cy_limb 118 subfc r7,r8,r7 119 stw r7,4(r3) 120 subfe r12,r9,r12 121 stw r12,8(r3) 122 subfe r30,r10,r30 123 stw r30,12(r3) 124 subfe r31,r11,r31 125 stwu r31,16(r3) 126 subfe r11,r11,r11 C invert ... 127 addic r11,r11,1 C ... carry 128 bdnz L(loopU) 129 130 andi. r31,r5,3 131 mtctr r31 132 beq cr0,L(endx) 133 134L(loopE): 135 lwzu r7,4(r4) 136 mullw r8,r7,r6 137 adde r8,r8,r0 C add cy_limb 138 mulhwu r0,r7,r6 139 lwz r7,4(r3) 140 addze r0,r0 C new cy_limb 141 subfc r7,r8,r7 142 addc r8,r8,r7 143 stwu r7,4(r3) 144 bdnz L(loopE) 145L(endx): 146 addze r3,r0 147 lwz r30,8(r1) 148 lwz r31,12(r1) 149 addi r1,r1,16 150 blr 151EPILOGUE(mpn_submul_1) 152