1dnl PowerPC-64 mpn_rsh1add_n, mpn_rsh1sub_n 2 3dnl Copyright 2003, 2005, 2010, 2013 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of either: 9dnl 10dnl * the GNU Lesser General Public License as published by the Free 11dnl Software Foundation; either version 3 of the License, or (at your 12dnl option) any later version. 13dnl 14dnl or 15dnl 16dnl * the GNU General Public License as published by the Free Software 17dnl Foundation; either version 2 of the License, or (at your option) any 18dnl later version. 19dnl 20dnl or both in parallel, as here. 21dnl 22dnl The GNU MP Library is distributed in the hope that it will be useful, but 23dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25dnl for more details. 26dnl 27dnl You should have received copies of the GNU General Public License and the 28dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29dnl see https://www.gnu.org/licenses/. 30 31include(`../config.m4') 32 33C cycles/limb 34C POWER3/PPC630 ? 35C POWER4/PPC970 2.9 36C POWER5 ? 37C POWER6 3.5 38C POWER7 2.25 39 40define(`rp', `r3') 41define(`up', `r4') 42define(`vp', `r5') 43define(`n', `r6') 44 45ifdef(`OPERATION_rsh1add_n', ` 46 define(`ADDSUBC', `addc') 47 define(`ADDSUBE', `adde') 48 define(INITCY, `addic $1, r1, 0') 49 define(`func', mpn_rsh1add_n)') 50ifdef(`OPERATION_rsh1sub_n', ` 51 define(`ADDSUBC', `subfc') 52 define(`ADDSUBE', `subfe') 53 define(INITCY, `addic $1, r1, -1') 54 define(`func', mpn_rsh1sub_n)') 55 56define(`s0', `r9') 57define(`s1', `r7') 58define(`x0', `r0') 59define(`x1', `r12') 60define(`u0', `r8') 61define(`v0', `r10') 62 63MULFUNC_PROLOGUE(mpn_rsh1add_n mpn_rsh1sub_n) 64 65ASM_START() 66PROLOGUE(func) 67 ld u0, 0(up) 68 ld v0, 0(vp) 69 70 cmpdi cr6, n, 2 71 72 addi r0, n, 1 73 srdi r0, r0, 2 74 mtctr r0 C copy size to count register 75 76 andi. r0, n, 1 77 bne cr0, L(bx1) 78 79L(bx0): ADDSUBC x1, v0, u0 80 ld u0, 8(up) 81 ld v0, 8(vp) 82 ADDSUBE x0, v0, u0 83 ble cr6, L(n2) 84 ld u0, 16(up) 85 ld v0, 16(vp) 86 srdi s0, x1, 1 87 rldicl r11, x1, 0, 63 C return value 88 ADDSUBE x1, v0, u0 89 andi. n, n, 2 90 bne cr0, L(b10) 91L(b00): addi rp, rp, -24 92 b L(lo0) 93L(b10): addi up, up, 16 94 addi vp, vp, 16 95 addi rp, rp, -8 96 b L(lo2) 97 98 ALIGN(16) 99L(bx1): ADDSUBC x0, v0, u0 100 ble cr6, L(n1) 101 ld u0, 8(up) 102 ld v0, 8(vp) 103 ADDSUBE x1, v0, u0 104 ld u0, 16(up) 105 ld v0, 16(vp) 106 srdi s1, x0, 1 107 rldicl r11, x0, 0, 63 C return value 108 ADDSUBE x0, v0, u0 109 andi. n, n, 2 110 bne cr0, L(b11) 111L(b01): addi up, up, 8 112 addi vp, vp, 8 113 addi rp, rp, -16 114 b L(lo1) 115L(b11): addi up, up, 24 116 addi vp, vp, 24 117 bdz L(end) 118 119 ALIGN(32) 120L(top): ld u0, 0(up) 121 ld v0, 0(vp) 122 srdi s0, x1, 1 123 rldimi s1, x1, 63, 0 124 std s1, 0(rp) 125 ADDSUBE x1, v0, u0 126L(lo2): ld u0, 8(up) 127 ld v0, 8(vp) 128 srdi s1, x0, 1 129 rldimi s0, x0, 63, 0 130 std s0, 8(rp) 131 ADDSUBE x0, v0, u0 132L(lo1): ld u0, 16(up) 133 ld v0, 16(vp) 134 srdi s0, x1, 1 135 rldimi s1, x1, 63, 0 136 std s1, 16(rp) 137 ADDSUBE x1, v0, u0 138L(lo0): ld u0, 24(up) 139 ld v0, 24(vp) 140 srdi s1, x0, 1 141 rldimi s0, x0, 63, 0 142 std s0, 24(rp) 143 ADDSUBE x0, v0, u0 144 addi up, up, 32 145 addi vp, vp, 32 146 addi rp, rp, 32 147 bdnz L(top) 148 149L(end): srdi s0, x1, 1 150 rldimi s1, x1, 63, 0 151 std s1, 0(rp) 152L(cj2): srdi s1, x0, 1 153 rldimi s0, x0, 63, 0 154 std s0, 8(rp) 155L(cj1): ADDSUBE x1, x1, x1 C pseudo-depends on x1 156 rldimi s1, x1, 63, 0 157 std s1, 16(rp) 158 mr r3, r11 159 blr 160 161L(n1): srdi s1, x0, 1 162 rldicl r11, x0, 0, 63 C return value 163 ADDSUBE x1, x1, x1 C pseudo-depends on x1 164 rldimi s1, x1, 63, 0 165 std s1, 0(rp) 166 mr r3, r11 167 blr 168 169L(n2): addi rp, rp, -8 170 srdi s0, x1, 1 171 rldicl r11, x1, 0, 63 C return value 172 b L(cj2) 173EPILOGUE() 174