1dnl PowerPC-64 mpn_addmul_1 and mpn_submul_1. 2 3dnl Copyright 1999-2001, 2003-2006, 2010-2012 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of either: 9dnl 10dnl * the GNU Lesser General Public License as published by the Free 11dnl Software Foundation; either version 3 of the License, or (at your 12dnl option) any later version. 13dnl 14dnl or 15dnl 16dnl * the GNU General Public License as published by the Free Software 17dnl Foundation; either version 2 of the License, or (at your option) any 18dnl later version. 19dnl 20dnl or both in parallel, as here. 21dnl 22dnl The GNU MP Library is distributed in the hope that it will be useful, but 23dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25dnl for more details. 26dnl 27dnl You should have received copies of the GNU General Public License and the 28dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29dnl see https://www.gnu.org/licenses/. 30 31include(`../config.m4') 32 33C mpn_addmul_1 mpn_submul_1 34C cycles/limb cycles/limb 35C POWER3/PPC630 6-18 6-18 36C POWER4/PPC970 8 8.3 37C POWER5 8 8.25 38C POWER6 16.25 16.75 39C POWER7 3.77 4.9 40 41C TODO 42C * Try to reduce the number of needed live registers 43C * Add support for _1c entry points 44 45C INPUT PARAMETERS 46define(`rp', `r3') 47define(`up', `r4') 48define(`n', `r5') 49define(`vl', `r6') 50 51ifdef(`OPERATION_addmul_1',` 52 define(ADDSUBC, adde) 53 define(ADDSUB, addc) 54 define(func, mpn_addmul_1) 55 define(func_nc, mpn_addmul_1c) C FIXME: not really supported 56 define(SM, `') 57') 58ifdef(`OPERATION_submul_1',` 59 define(ADDSUBC, subfe) 60 define(ADDSUB, subfc) 61 define(func, mpn_submul_1) 62 define(func_nc, mpn_submul_1c) C FIXME: not really supported 63 define(SM, `$1') 64') 65 66MULFUNC_PROLOGUE(mpn_addmul_1 mpn_submul_1) 67 68ASM_START() 69PROLOGUE(func) 70 std r31, -8(r1) 71 rldicl. r0, n, 0,62 C r0 = n & 3, set cr0 72 std r30, -16(r1) 73 cmpdi cr6, r0, 2 74 std r29, -24(r1) 75 addi n, n, 3 C compute count... 76 std r28, -32(r1) 77 srdi n, n, 2 C ...for ctr 78 std r27, -40(r1) 79 mtctr n C copy count into ctr 80 beq cr0, L(b00) 81 blt cr6, L(b01) 82 beq cr6, L(b10) 83 84L(b11): ld r9, 0(up) 85 ld r28, 0(rp) 86 mulld r0, r9, r6 87 mulhdu r12, r9, r6 88 ADDSUB r0, r0, r28 89 std r0, 0(rp) 90 addi rp, rp, 8 91 ld r9, 8(up) 92 ld r27, 16(up) 93 addi up, up, 24 94SM(` subfe r11, r11, r11 ') 95 b L(bot) 96 97 ALIGN(16) 98L(b00): ld r9, 0(up) 99 ld r27, 8(up) 100 ld r28, 0(rp) 101 ld r29, 8(rp) 102 mulld r0, r9, r6 103 mulhdu r5, r9, r6 104 mulld r7, r27, r6 105 mulhdu r8, r27, r6 106 addc r7, r7, r5 107 addze r12, r8 108 ADDSUB r0, r0, r28 109 std r0, 0(rp) 110 ADDSUBC r7, r7, r29 111 std r7, 8(rp) 112 addi rp, rp, 16 113 ld r9, 16(up) 114 ld r27, 24(up) 115 addi up, up, 32 116SM(` subfe r11, r11, r11 ') 117 b L(bot) 118 119 ALIGN(16) 120L(b01): bdnz L(gt1) 121 ld r9, 0(up) 122 ld r11, 0(rp) 123 mulld r0, r9, r6 124 mulhdu r8, r9, r6 125 ADDSUB r0, r0, r11 126 std r0, 0(rp) 127SM(` subfe r11, r11, r11 ') 128SM(` addic r11, r11, 1 ') 129 addze r3, r8 130 blr 131L(gt1): ld r9, 0(up) 132 ld r27, 8(up) 133 mulld r0, r9, r6 134 mulhdu r5, r9, r6 135 mulld r7, r27, r6 136 mulhdu r8, r27, r6 137 ld r9, 16(up) 138 ld r28, 0(rp) 139 ld r29, 8(rp) 140 ld r30, 16(rp) 141 mulld r11, r9, r6 142 mulhdu r10, r9, r6 143 addc r7, r7, r5 144 adde r11, r11, r8 145 addze r12, r10 146 ADDSUB r0, r0, r28 147 std r0, 0(rp) 148 ADDSUBC r7, r7, r29 149 std r7, 8(rp) 150 ADDSUBC r11, r11, r30 151 std r11, 16(rp) 152 addi rp, rp, 24 153 ld r9, 24(up) 154 ld r27, 32(up) 155 addi up, up, 40 156SM(` subfe r11, r11, r11 ') 157 b L(bot) 158 159L(b10): addic r0, r0, 0 160 li r12, 0 C cy_limb = 0 161 ld r9, 0(up) 162 ld r27, 8(up) 163 bdz L(end) 164 addi up, up, 16 165 166 ALIGN(16) 167L(top): mulld r0, r9, r6 168 mulhdu r5, r9, r6 C 9 169 mulld r7, r27, r6 170 mulhdu r8, r27, r6 C 27 171 ld r9, 0(up) 172 ld r28, 0(rp) 173 ld r27, 8(up) 174 ld r29, 8(rp) 175 adde r0, r0, r12 C 0 12 176 adde r7, r7, r5 C 5 7 177 mulld r5, r9, r6 178 mulhdu r10, r9, r6 C 9 179 mulld r11, r27, r6 180 mulhdu r12, r27, r6 C 27 181 ld r9, 16(up) 182 ld r30, 16(rp) 183 ld r27, 24(up) 184 ld r31, 24(rp) 185 adde r5, r5, r8 C 8 5 186 adde r11, r11, r10 C 10 11 187 addze r12, r12 C 12 188 ADDSUB r0, r0, r28 C 0 28 189 std r0, 0(rp) C 0 190 ADDSUBC r7, r7, r29 C 7 29 191 std r7, 8(rp) C 7 192 ADDSUBC r5, r5, r30 C 5 30 193 std r5, 16(rp) C 5 194 ADDSUBC r11, r11, r31 C 11 31 195 std r11, 24(rp) C 11 196 addi up, up, 32 197SM(` subfe r11, r11, r11 ') 198 addi rp, rp, 32 199L(bot): 200SM(` addic r11, r11, 1 ') 201 bdnz L(top) 202 203L(end): mulld r0, r9, r6 204 mulhdu r5, r9, r6 205 mulld r7, r27, r6 206 mulhdu r8, r27, r6 207 ld r28, 0(rp) 208 ld r29, 8(rp) 209 adde r0, r0, r12 210 adde r7, r7, r5 211 addze r8, r8 212 ADDSUB r0, r0, r28 213 std r0, 0(rp) 214 ADDSUBC r7, r7, r29 215 std r7, 8(rp) 216SM(` subfe r11, r11, r11 ') 217SM(` addic r11, r11, 1 ') 218 addze r3, r8 219 ld r31, -8(r1) 220 ld r30, -16(r1) 221 ld r29, -24(r1) 222 ld r28, -32(r1) 223 ld r27, -40(r1) 224 blr 225EPILOGUE() 226