1dnl SPARC v9 mpn_sec_tabselect. 2 3dnl Contributed to the GNU project by Torbjörn Granlund and David Miller. 4 5dnl Copyright 2013 Free Software Foundation, Inc. 6 7dnl This file is part of the GNU MP Library. 8dnl 9dnl The GNU MP Library is free software; you can redistribute it and/or modify 10dnl it under the terms of either: 11dnl 12dnl * the GNU Lesser General Public License as published by the Free 13dnl Software Foundation; either version 3 of the License, or (at your 14dnl option) any later version. 15dnl 16dnl or 17dnl 18dnl * the GNU General Public License as published by the Free Software 19dnl Foundation; either version 2 of the License, or (at your option) any 20dnl later version. 21dnl 22dnl or both in parallel, as here. 23dnl 24dnl The GNU MP Library is distributed in the hope that it will be useful, but 25dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27dnl for more details. 28dnl 29dnl You should have received copies of the GNU General Public License and the 30dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31dnl see https://www.gnu.org/licenses/. 32 33include(`../config.m4') 34 35C cycles/limb 36C UltraSPARC 1&2: 2 hopefully 37C UltraSPARC 3: 3 38C UltraSPARC T1: 17 39C UltraSPARC T3: ? 40C UltraSPARC T4/T5: 2.25 hopefully 41 42C INPUT PARAMETERS 43define(`rp', `%i0') 44define(`tp', `%i1') 45define(`n', `%i2') 46define(`nents', `%i3') 47define(`which', `%i4') 48 49define(`i', `%g1') 50define(`j', `%g3') 51define(`stride', `%g4') 52define(`tporig', `%g5') 53define(`mask', `%o0') 54 55define(`data0', `%l0') 56define(`data1', `%l1') 57define(`data2', `%l2') 58define(`data3', `%l3') 59define(`t0', `%l4') 60define(`t1', `%l5') 61define(`t2', `%l6') 62define(`t3', `%l7') 63 64ASM_START() 65 REGISTER(%g2,#scratch) 66 REGISTER(%g3,#scratch) 67PROLOGUE(mpn_sec_tabselect) 68 save %sp, -176, %sp 69 70 sllx n, 3, stride 71 sub n, 4, j 72 brlz j, L(outer_end) 73 mov tp, tporig 74 75L(outer_loop): 76 clr data0 77 clr data1 78 clr data2 79 clr data3 80 mov tporig, tp 81 mov nents, i 82 mov which, %o1 83 84L(top): subcc %o1, 1, %o1 C set carry iff o1 = 0 85 ldx [tp + 0], t0 86 subc %g0, %g0, mask 87 ldx [tp + 8], t1 88 sub i, 1, i 89 ldx [tp + 16], t2 90 ldx [tp + 24], t3 91 add tp, stride, tp 92 and t0, mask, t0 93 and t1, mask, t1 94 or t0, data0, data0 95 and t2, mask, t2 96 or t1, data1, data1 97 and t3, mask, t3 98 or t2, data2, data2 99 brnz i, L(top) 100 or t3, data3, data3 101 102 stx data0, [rp + 0] 103 subcc j, 4, j 104 stx data1, [rp + 8] 105 stx data2, [rp + 16] 106 stx data3, [rp + 24] 107 add tporig, (4 * 8), tporig 108 109 brgez j, L(outer_loop) 110 add rp, (4 * 8), rp 111L(outer_end): 112 113 114 andcc n, 2, %g0 115 be L(b0x) 116 nop 117L(b1x): clr data0 118 clr data1 119 mov tporig, tp 120 mov nents, i 121 mov which, %o1 122 123L(tp2): subcc %o1, 1, %o1 124 ldx [tp + 0], t0 125 subc %g0, %g0, mask 126 ldx [tp + 8], t1 127 sub i, 1, i 128 add tp, stride, tp 129 and t0, mask, t0 130 and t1, mask, t1 131 or t0, data0, data0 132 brnz i, L(tp2) 133 or t1, data1, data1 134 135 stx data0, [rp + 0] 136 stx data1, [rp + 8] 137 add tporig, (2 * 8), tporig 138 add rp, (2 * 8), rp 139 140 141L(b0x): andcc n, 1, %g0 142 be L(b00) 143 nop 144L(b01): clr data0 145 mov tporig, tp 146 mov nents, i 147 mov which, %o1 148 149L(tp1): subcc %o1, 1, %o1 150 ldx [tp + 0], t0 151 subc %g0, %g0, mask 152 sub i, 1, i 153 add tp, stride, tp 154 and t0, mask, t0 155 brnz i, L(tp1) 156 or t0, data0, data0 157 158 stx data0, [rp + 0] 159 160L(b00): ret 161 restore 162EPILOGUE() 163