1dnl PowerPC-64 mpn_sec_tabselect. 2 3dnl Contributed to the GNU project by Torbjörn Granlund. 4 5dnl Copyright 2011-2013 Free Software Foundation, Inc. 6 7dnl This file is part of the GNU MP Library. 8dnl 9dnl The GNU MP Library is free software; you can redistribute it and/or modify 10dnl it under the terms of either: 11dnl 12dnl * the GNU Lesser General Public License as published by the Free 13dnl Software Foundation; either version 3 of the License, or (at your 14dnl option) any later version. 15dnl 16dnl or 17dnl 18dnl * the GNU General Public License as published by the Free Software 19dnl Foundation; either version 2 of the License, or (at your option) any 20dnl later version. 21dnl 22dnl or both in parallel, as here. 23dnl 24dnl The GNU MP Library is distributed in the hope that it will be useful, but 25dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27dnl for more details. 28dnl 29dnl You should have received copies of the GNU General Public License and the 30dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31dnl see https://www.gnu.org/licenses/. 32 33include(`../config.m4') 34 35C cycles/limb 36C POWER3/PPC630 1.75 37C POWER4/PPC970 2.0 38C POWER5 ? 39C POWER6 5.0 40C POWER7 1.75 41 42define(`rp', `r3') 43define(`tp', `r4') 44define(`n', `r5') 45define(`nents', `r6') 46define(`which', `r7') 47 48define(`i', `r8') 49define(`j', `r9') 50define(`stride', `r12') 51define(`mask', `r11') 52 53 54ASM_START() 55PROLOGUE(mpn_sec_tabselect) 56 addic. j, n, -4 C outer loop induction variable 57 std r31, -8(r1) 58 std r30, -16(r1) 59 std r29, -24(r1) 60 std r28, -32(r1) 61 std r27, -40(r1) 62 sldi stride, n, 3 63 64 blt cr0, L(outer_end) 65L(outer_top): 66 mtctr nents 67 mr r10, tp 68 li r28, 0 69 li r29, 0 70 li r30, 0 71 li r31, 0 72 addic. j, j, -4 C outer loop induction variable 73 mr i, which 74 75 ALIGN(16) 76L(top): addic i, i, -1 C set carry iff i != 0 77 subfe mask, mask, mask 78 ld r0, 0(tp) 79 ld r27, 8(tp) 80 and r0, r0, mask 81 and r27, r27, mask 82 or r28, r28, r0 83 or r29, r29, r27 84 ld r0, 16(tp) 85 ld r27, 24(tp) 86 and r0, r0, mask 87 and r27, r27, mask 88 or r30, r30, r0 89 or r31, r31, r27 90 add tp, tp, stride 91 bdnz L(top) 92 93 std r28, 0(rp) 94 std r29, 8(rp) 95 std r30, 16(rp) 96 std r31, 24(rp) 97 addi tp, r10, 32 98 addi rp, rp, 32 99 bge cr0, L(outer_top) 100L(outer_end): 101 102 rldicl. r0, n, 63, 63 103 beq cr0, L(b0x) 104L(b1x): mtctr nents 105 mr r10, tp 106 li r28, 0 107 li r29, 0 108 mr i, which 109 ALIGN(16) 110L(tp2): addic i, i, -1 111 subfe mask, mask, mask 112 ld r0, 0(tp) 113 ld r27, 8(tp) 114 and r0, r0, mask 115 and r27, r27, mask 116 or r28, r28, r0 117 or r29, r29, r27 118 add tp, tp, stride 119 bdnz L(tp2) 120 std r28, 0(rp) 121 std r29, 8(rp) 122 addi tp, r10, 16 123 addi rp, rp, 16 124 125L(b0x): rldicl. r0, n, 0, 63 126 beq cr0, L(b00) 127L(b01): mtctr nents 128 mr r10, tp 129 li r28, 0 130 mr i, which 131 ALIGN(16) 132L(tp1): addic i, i, -1 133 subfe mask, mask, mask 134 ld r0, 0(tp) 135 and r0, r0, mask 136 or r28, r28, r0 137 add tp, tp, stride 138 bdnz L(tp1) 139 std r28, 0(rp) 140 141L(b00): ld r31, -8(r1) 142 ld r30, -16(r1) 143 ld r29, -24(r1) 144 ld r28, -32(r1) 145 ld r27, -40(r1) 146 blr 147EPILOGUE() 148