1dnl PowerPC-64 mpn_add_n/mpn_sub_n -- mpn addition and subtraction. 2 3dnl Copyright 1999, 2000, 2001, 2003, 2004, 2005, 2007, 2011 Free Software 4dnl Foundation, Inc. 5 6dnl This file is part of the GNU MP Library. 7 8dnl The GNU MP Library is free software; you can redistribute it and/or modify 9dnl it under the terms of the GNU Lesser General Public License as published 10dnl by the Free Software Foundation; either version 3 of the License, or (at 11dnl your option) any later version. 12 13dnl The GNU MP Library is distributed in the hope that it will be useful, but 14dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16dnl License for more details. 17 18dnl You should have received a copy of the GNU Lesser General Public License 19dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 20 21include(`../config.m4') 22 23C cycles/limb 24C POWER3/PPC630 1.5 25C POWER4/PPC970 2 26C POWER5 2 27C POWER6 2.63 28C POWER7 2.25-2.87 29 30C This code is a little bit slower for POWER3/PPC630 than the simple code used 31C previously, but it is much faster for POWER4/PPC970. The reason for the 32C POWER3/PPC630 slowdown can be attributed to the saving and restoring of 4 33C registers. 34 35C INPUT PARAMETERS 36C rp r3 37C up r4 38C vp r5 39C n r6 40 41ifdef(`OPERATION_add_n',` 42 define(ADDSUBC, adde) 43 define(ADDSUB, addc) 44 define(func, mpn_add_n) 45 define(func_nc, mpn_add_nc) 46 define(GENRVAL, `addi r3, r3, 1') 47 define(SETCBR, `addic r0, $1, -1') 48 define(CLRCB, `addic r0, r0, 0') 49') 50ifdef(`OPERATION_sub_n',` 51 define(ADDSUBC, subfe) 52 define(ADDSUB, subfc) 53 define(func, mpn_sub_n) 54 define(func_nc, mpn_sub_nc) 55 define(GENRVAL, `neg r3, r3') 56 define(SETCBR, `subfic r0, $1, 0') 57 define(CLRCB, `addic r0, r1, -1') 58') 59 60MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc) 61 62ASM_START() 63PROLOGUE(func_nc) 64 SETCBR(r7) 65 b L(ent) 66EPILOGUE() 67 68PROLOGUE(func) 69 CLRCB 70L(ent): std r31, -8(r1) 71 std r30, -16(r1) 72 std r29, -24(r1) 73 std r28, -32(r1) 74 75 rldicl. r0, r6, 0,62 C r0 = n & 3, set cr0 76 cmpdi cr6, r0, 2 77 addi r6, r6, 3 C compute count... 78 srdi r6, r6, 2 C ...for ctr 79 mtctr r6 C copy count into ctr 80 beq cr0, L(b00) 81 blt cr6, L(b01) 82 beq cr6, L(b10) 83 84L(b11): ld r8, 0(r4) C load s1 limb 85 ld r9, 0(r5) C load s2 limb 86 ld r10, 8(r4) C load s1 limb 87 ld r11, 8(r5) C load s2 limb 88 ld r12, 16(r4) C load s1 limb 89 addi r4, r4, 24 90 ld r0, 16(r5) C load s2 limb 91 addi r5, r5, 24 92 ADDSUBC r29, r9, r8 93 ADDSUBC r30, r11, r10 94 ADDSUBC r31, r0, r12 95 std r29, 0(r3) 96 std r30, 8(r3) 97 std r31, 16(r3) 98 addi r3, r3, 24 99 bdnz L(go) 100 b L(ret) 101 102L(b01): ld r12, 0(r4) C load s1 limb 103 addi r4, r4, 8 104 ld r0, 0(r5) C load s2 limb 105 addi r5, r5, 8 106 ADDSUBC r31, r0, r12 C add 107 std r31, 0(r3) 108 addi r3, r3, 8 109 bdnz L(go) 110 b L(ret) 111 112L(b10): ld r10, 0(r4) C load s1 limb 113 ld r11, 0(r5) C load s2 limb 114 ld r12, 8(r4) C load s1 limb 115 addi r4, r4, 16 116 ld r0, 8(r5) C load s2 limb 117 addi r5, r5, 16 118 ADDSUBC r30, r11, r10 C add 119 ADDSUBC r31, r0, r12 C add 120 std r30, 0(r3) 121 std r31, 8(r3) 122 addi r3, r3, 16 123 bdnz L(go) 124 b L(ret) 125 126L(b00): C INITCY C clear/set cy 127L(go): ld r6, 0(r4) C load s1 limb 128 ld r7, 0(r5) C load s2 limb 129 ld r8, 8(r4) C load s1 limb 130 ld r9, 8(r5) C load s2 limb 131 ld r10, 16(r4) C load s1 limb 132 ld r11, 16(r5) C load s2 limb 133 ld r12, 24(r4) C load s1 limb 134 ld r0, 24(r5) C load s2 limb 135 bdz L(end) 136 137 addi r4, r4, 32 138 addi r5, r5, 32 139 140 ALIGN(16) 141L(top): ADDSUBC r28, r7, r6 142 ld r6, 0(r4) C load s1 limb 143 ld r7, 0(r5) C load s2 limb 144 ADDSUBC r29, r9, r8 145 ld r8, 8(r4) C load s1 limb 146 ld r9, 8(r5) C load s2 limb 147 ADDSUBC r30, r11, r10 148 ld r10, 16(r4) C load s1 limb 149 ld r11, 16(r5) C load s2 limb 150 ADDSUBC r31, r0, r12 151 ld r12, 24(r4) C load s1 limb 152 ld r0, 24(r5) C load s2 limb 153 std r28, 0(r3) 154 addi r4, r4, 32 155 std r29, 8(r3) 156 addi r5, r5, 32 157 std r30, 16(r3) 158 std r31, 24(r3) 159 addi r3, r3, 32 160 bdnz L(top) C decrement ctr and loop back 161 162L(end): ADDSUBC r28, r7, r6 163 ADDSUBC r29, r9, r8 164 ADDSUBC r30, r11, r10 165 ADDSUBC r31, r0, r12 166 std r28, 0(r3) 167 std r29, 8(r3) 168 std r30, 16(r3) 169 std r31, 24(r3) 170 171L(ret): ld r31, -8(r1) 172 ld r30, -16(r1) 173 ld r29, -24(r1) 174 ld r28, -32(r1) 175 176 subfe r3, r0, r0 C -cy 177 GENRVAL 178 blr 179EPILOGUE() 180