1dnl PowerPC-64 mpn_addmul_1 and mpn_submul_1. 2 3dnl Copyright 1999, 2000, 2001, 2003, 2004, 2005, 2006, 2010, 2011, 2012 4dnl Free Software Foundation, Inc. 5 6dnl This file is part of the GNU MP Library. 7 8dnl The GNU MP Library is free software; you can redistribute it and/or modify 9dnl it under the terms of the GNU Lesser General Public License as published 10dnl by the Free Software Foundation; either version 3 of the License, or (at 11dnl your option) any later version. 12 13dnl The GNU MP Library is distributed in the hope that it will be useful, but 14dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16dnl License for more details. 17 18dnl You should have received a copy of the GNU Lesser General Public License 19dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 20 21include(`../config.m4') 22 23C mpn_addmul_1 mpn_submul_1 24C cycles/limb cycles/limb 25C POWER3/PPC630 6-18 6-18 26C POWER4/PPC970 8 8.3 27C POWER5 8 8.25 28C POWER6 16.25 16.75 29C POWER7 3.77 4.9 30 31C TODO 32C * Try to reduce the number of needed live registers 33C * Add support for _1c entry points 34 35C INPUT PARAMETERS 36define(`rp', `r3') 37define(`up', `r4') 38define(`n', `r5') 39define(`vl', `r6') 40 41ifdef(`OPERATION_addmul_1',` 42 define(ADDSUBC, adde) 43 define(ADDSUB, addc) 44 define(func, mpn_addmul_1) 45 define(func_nc, mpn_addmul_1c) C FIXME: not really supported 46 define(SM, `') 47') 48ifdef(`OPERATION_submul_1',` 49 define(ADDSUBC, subfe) 50 define(ADDSUB, subfc) 51 define(func, mpn_submul_1) 52 define(func_nc, mpn_submul_1c) C FIXME: not really supported 53 define(SM, `$1') 54') 55 56MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1) 57 58ASM_START() 59PROLOGUE(func) 60 std r31, -8(r1) 61 rldicl. r0, n, 0,62 C r0 = n & 3, set cr0 62 std r30, -16(r1) 63 cmpdi cr6, r0, 2 64 std r29, -24(r1) 65 addi n, n, 3 C compute count... 66 std r28, -32(r1) 67 srdi n, n, 2 C ...for ctr 68 std r27, -40(r1) 69 mtctr n C copy count into ctr 70 beq cr0, L(b00) 71 blt cr6, L(b01) 72 beq cr6, L(b10) 73 74L(b11): ld r9, 0(up) 75 ld r28, 0(rp) 76 mulld r0, r9, r6 77 mulhdu r12, r9, r6 78 ADDSUB r0, r0, r28 79 std r0, 0(rp) 80 addi rp, rp, 8 81 ld r9, 8(up) 82 ld r27, 16(up) 83 addi up, up, 24 84SM(` subfe r11, r11, r11 ') 85 b L(bot) 86 87 ALIGN(16) 88L(b00): ld r9, 0(up) 89 ld r27, 8(up) 90 ld r28, 0(rp) 91 ld r29, 8(rp) 92 mulld r0, r9, r6 93 mulhdu r5, r9, r6 94 mulld r7, r27, r6 95 mulhdu r8, r27, r6 96 addc r7, r7, r5 97 addze r12, r8 98 ADDSUB r0, r0, r28 99 std r0, 0(rp) 100 ADDSUBC r7, r7, r29 101 std r7, 8(rp) 102 addi rp, rp, 16 103 ld r9, 16(up) 104 ld r27, 24(up) 105 addi up, up, 32 106SM(` subfe r11, r11, r11 ') 107 b L(bot) 108 109 ALIGN(16) 110L(b01): bdnz L(gt1) 111 ld r9, 0(up) 112 ld r11, 0(rp) 113 mulld r0, r9, r6 114 mulhdu r8, r9, r6 115 ADDSUB r0, r0, r11 116 std r0, 0(rp) 117SM(` subfe r11, r11, r11 ') 118SM(` addic r11, r11, 1 ') 119 addze r3, r8 120 blr 121L(gt1): ld r9, 0(up) 122 ld r27, 8(up) 123 mulld r0, r9, r6 124 mulhdu r5, r9, r6 125 mulld r7, r27, r6 126 mulhdu r8, r27, r6 127 ld r9, 16(up) 128 ld r28, 0(rp) 129 ld r29, 8(rp) 130 ld r30, 16(rp) 131 mulld r11, r9, r6 132 mulhdu r10, r9, r6 133 addc r7, r7, r5 134 adde r11, r11, r8 135 addze r12, r10 136 ADDSUB r0, r0, r28 137 std r0, 0(rp) 138 ADDSUBC r7, r7, r29 139 std r7, 8(rp) 140 ADDSUBC r11, r11, r30 141 std r11, 16(rp) 142 addi rp, rp, 24 143 ld r9, 24(up) 144 ld r27, 32(up) 145 addi up, up, 40 146SM(` subfe r11, r11, r11 ') 147 b L(bot) 148 149L(b10): addic r0, r0, 0 150 li r12, 0 C cy_limb = 0 151 ld r9, 0(up) 152 ld r27, 8(up) 153 bdz L(end) 154 addi up, up, 16 155 156 ALIGN(16) 157L(top): mulld r0, r9, r6 158 mulhdu r5, r9, r6 C 9 159 mulld r7, r27, r6 160 mulhdu r8, r27, r6 C 27 161 ld r9, 0(up) 162 ld r28, 0(rp) 163 ld r27, 8(up) 164 ld r29, 8(rp) 165 adde r0, r0, r12 C 0 12 166 adde r7, r7, r5 C 5 7 167 mulld r5, r9, r6 168 mulhdu r10, r9, r6 C 9 169 mulld r11, r27, r6 170 mulhdu r12, r27, r6 C 27 171 ld r9, 16(up) 172 ld r30, 16(rp) 173 ld r27, 24(up) 174 ld r31, 24(rp) 175 adde r5, r5, r8 C 8 5 176 adde r11, r11, r10 C 10 11 177 addze r12, r12 C 12 178 ADDSUB r0, r0, r28 C 0 28 179 std r0, 0(rp) C 0 180 ADDSUBC r7, r7, r29 C 7 29 181 std r7, 8(rp) C 7 182 ADDSUBC r5, r5, r30 C 5 30 183 std r5, 16(rp) C 5 184 ADDSUBC r11, r11, r31 C 11 31 185 std r11, 24(rp) C 11 186 addi up, up, 32 187SM(` subfe r11, r11, r11 ') 188 addi rp, rp, 32 189L(bot): 190SM(` addic r11, r11, 1 ') 191 bdnz L(top) 192 193L(end): mulld r0, r9, r6 194 mulhdu r5, r9, r6 195 mulld r7, r27, r6 196 mulhdu r8, r27, r6 197 ld r28, 0(rp) 198 ld r29, 8(rp) 199 adde r0, r0, r12 200 adde r7, r7, r5 201 addze r8, r8 202 ADDSUB r0, r0, r28 203 std r0, 0(rp) 204 ADDSUBC r7, r7, r29 205 std r7, 8(rp) 206SM(` subfe r11, r11, r11 ') 207SM(` addic r11, r11, 1 ') 208 addze r3, r8 209 ld r31, -8(r1) 210 ld r30, -16(r1) 211 ld r29, -24(r1) 212 ld r28, -32(r1) 213 ld r27, -40(r1) 214 blr 215EPILOGUE() 216