1dnl PowerPC-64 mpn_add_n/mpn_sub_n -- mpn addition and subtraction. 2 3dnl Copyright 1999-2001, 2003-2005, 2007, 2011 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of either: 9dnl 10dnl * the GNU Lesser General Public License as published by the Free 11dnl Software Foundation; either version 3 of the License, or (at your 12dnl option) any later version. 13dnl 14dnl or 15dnl 16dnl * the GNU General Public License as published by the Free Software 17dnl Foundation; either version 2 of the License, or (at your option) any 18dnl later version. 19dnl 20dnl or both in parallel, as here. 21dnl 22dnl The GNU MP Library is distributed in the hope that it will be useful, but 23dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25dnl for more details. 26dnl 27dnl You should have received copies of the GNU General Public License and the 28dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29dnl see https://www.gnu.org/licenses/. 30 31include(`../config.m4') 32 33C cycles/limb 34C POWER3/PPC630 1.5 35C POWER4/PPC970 2 36C POWER5 2 37C POWER6 2.63 38C POWER7 2.25-2.87 39 40C This code is a little bit slower for POWER3/PPC630 than the simple code used 41C previously, but it is much faster for POWER4/PPC970. The reason for the 42C POWER3/PPC630 slowdown can be attributed to the saving and restoring of 4 43C registers. 44 45C INPUT PARAMETERS 46C rp r3 47C up r4 48C vp r5 49C n r6 50 51ifdef(`OPERATION_add_n',` 52 define(ADDSUBC, adde) 53 define(ADDSUB, addc) 54 define(func, mpn_add_n) 55 define(func_nc, mpn_add_nc) 56 define(GENRVAL, `addi r3, r3, 1') 57 define(SETCBR, `addic r0, $1, -1') 58 define(CLRCB, `addic r0, r0, 0') 59') 60ifdef(`OPERATION_sub_n',` 61 define(ADDSUBC, subfe) 62 define(ADDSUB, subfc) 63 define(func, mpn_sub_n) 64 define(func_nc, mpn_sub_nc) 65 define(GENRVAL, `neg r3, r3') 66 define(SETCBR, `subfic r0, $1, 0') 67 define(CLRCB, `addic r0, r1, -1') 68') 69 70MULFUNC_PROLOGUE(mpn_add_n mpn_add_nc mpn_sub_n mpn_sub_nc) 71 72ASM_START() 73PROLOGUE(func_nc) 74 SETCBR(r7) 75 b L(ent) 76EPILOGUE() 77 78PROLOGUE(func) 79 CLRCB 80L(ent): std r31, -8(r1) 81 std r30, -16(r1) 82 std r29, -24(r1) 83 std r28, -32(r1) 84 85 rldicl. r0, r6, 0,62 C r0 = n & 3, set cr0 86 cmpdi cr6, r0, 2 87 addi r6, r6, 3 C compute count... 88 srdi r6, r6, 2 C ...for ctr 89 mtctr r6 C copy count into ctr 90 beq cr0, L(b00) 91 blt cr6, L(b01) 92 beq cr6, L(b10) 93 94L(b11): ld r8, 0(r4) C load s1 limb 95 ld r9, 0(r5) C load s2 limb 96 ld r10, 8(r4) C load s1 limb 97 ld r11, 8(r5) C load s2 limb 98 ld r12, 16(r4) C load s1 limb 99 addi r4, r4, 24 100 ld r0, 16(r5) C load s2 limb 101 addi r5, r5, 24 102 ADDSUBC r29, r9, r8 103 ADDSUBC r30, r11, r10 104 ADDSUBC r31, r0, r12 105 std r29, 0(r3) 106 std r30, 8(r3) 107 std r31, 16(r3) 108 addi r3, r3, 24 109 bdnz L(go) 110 b L(ret) 111 112L(b01): ld r12, 0(r4) C load s1 limb 113 addi r4, r4, 8 114 ld r0, 0(r5) C load s2 limb 115 addi r5, r5, 8 116 ADDSUBC r31, r0, r12 C add 117 std r31, 0(r3) 118 addi r3, r3, 8 119 bdnz L(go) 120 b L(ret) 121 122L(b10): ld r10, 0(r4) C load s1 limb 123 ld r11, 0(r5) C load s2 limb 124 ld r12, 8(r4) C load s1 limb 125 addi r4, r4, 16 126 ld r0, 8(r5) C load s2 limb 127 addi r5, r5, 16 128 ADDSUBC r30, r11, r10 C add 129 ADDSUBC r31, r0, r12 C add 130 std r30, 0(r3) 131 std r31, 8(r3) 132 addi r3, r3, 16 133 bdnz L(go) 134 b L(ret) 135 136L(b00): C INITCY C clear/set cy 137L(go): ld r6, 0(r4) C load s1 limb 138 ld r7, 0(r5) C load s2 limb 139 ld r8, 8(r4) C load s1 limb 140 ld r9, 8(r5) C load s2 limb 141 ld r10, 16(r4) C load s1 limb 142 ld r11, 16(r5) C load s2 limb 143 ld r12, 24(r4) C load s1 limb 144 ld r0, 24(r5) C load s2 limb 145 bdz L(end) 146 147 addi r4, r4, 32 148 addi r5, r5, 32 149 150 ALIGN(16) 151L(top): ADDSUBC r28, r7, r6 152 ld r6, 0(r4) C load s1 limb 153 ld r7, 0(r5) C load s2 limb 154 ADDSUBC r29, r9, r8 155 ld r8, 8(r4) C load s1 limb 156 ld r9, 8(r5) C load s2 limb 157 ADDSUBC r30, r11, r10 158 ld r10, 16(r4) C load s1 limb 159 ld r11, 16(r5) C load s2 limb 160 ADDSUBC r31, r0, r12 161 ld r12, 24(r4) C load s1 limb 162 ld r0, 24(r5) C load s2 limb 163 std r28, 0(r3) 164 addi r4, r4, 32 165 std r29, 8(r3) 166 addi r5, r5, 32 167 std r30, 16(r3) 168 std r31, 24(r3) 169 addi r3, r3, 32 170 bdnz L(top) C decrement ctr and loop back 171 172L(end): ADDSUBC r28, r7, r6 173 ADDSUBC r29, r9, r8 174 ADDSUBC r30, r11, r10 175 ADDSUBC r31, r0, r12 176 std r28, 0(r3) 177 std r29, 8(r3) 178 std r30, 16(r3) 179 std r31, 24(r3) 180 181L(ret): ld r31, -8(r1) 182 ld r30, -16(r1) 183 ld r29, -24(r1) 184 ld r28, -32(r1) 185 186 subfe r3, r0, r0 C -cy 187 GENRVAL 188 blr 189EPILOGUE() 190