1dnl SPARC v9 mpn_addlsh_n and mpn_sublsh_n for T3/T4/T5. 2 3dnl Contributed to the GNU project by Torbjörn Granlund. 4 5dnl Copyright 2013 Free Software Foundation, Inc. 6 7dnl This file is part of the GNU MP Library. 8dnl 9dnl The GNU MP Library is free software; you can redistribute it and/or modify 10dnl it under the terms of either: 11dnl 12dnl * the GNU Lesser General Public License as published by the Free 13dnl Software Foundation; either version 3 of the License, or (at your 14dnl option) any later version. 15dnl 16dnl or 17dnl 18dnl * the GNU General Public License as published by the Free Software 19dnl Foundation; either version 2 of the License, or (at your option) any 20dnl later version. 21dnl 22dnl or both in parallel, as here. 23dnl 24dnl The GNU MP Library is distributed in the hope that it will be useful, but 25dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27dnl for more details. 28dnl 29dnl You should have received copies of the GNU General Public License and the 30dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31dnl see https://www.gnu.org/licenses/. 32 33include(`../config.m4') 34 35C cycles/limb 36C UltraSPARC T3: 11 37C UltraSPARC T4: 4 38 39C For sublsh_n we combine the two shifted limbs using xnor, using the identity 40C (a xor not b) = (not (a xor b)) which equals (not (a or b)) when (a and b) = 41C 0 as it is in our usage. This gives us the ones complement for free. 42C Unfortunately, the same trick will not work for rsblsh_n, which will instead 43C require a separate negation. 44C 45C FIXME: Add rsblsh_n to this file. 46 47define(`rp', `%i0') 48define(`up', `%i1') 49define(`vp', `%i2') 50define(`n', `%i3') 51define(`cnt',`%i4') 52 53define(`tnc',`%o5') 54 55ifdef(`OPERATION_addlsh_n',` 56 define(`INITCY', `subcc %g0, 0, %g0') 57 define(`MERGE', `or') 58 define(`func', `mpn_addlsh_n') 59') 60ifdef(`OPERATION_sublsh_n',` 61 define(`INITCY', `subcc %g0, 1, %g0') 62 define(`MERGE', `xnor') 63 define(`func', `mpn_sublsh_n') 64') 65 66define(`rp0', `rp') 67define(`rp1', `%o2') 68define(`up0', `up') 69define(`up1', `%o3') 70define(`vp0', `vp') 71define(`vp1', `%o4') 72 73MULFUNC_PROLOGUE(mpn_addlsh_n mpn_sublsh_n) 74ASM_START() 75 REGISTER(%g2,#scratch) 76 REGISTER(%g3,#scratch) 77PROLOGUE(func) 78 save %sp, -176, %sp 79 mov 64, tnc 80 sub tnc, cnt, tnc 81 82 andcc n, 1, %g0 83 sllx n, 3, n 84 add n, -16, n 85 add up, n, up0 86 add vp, n, vp0 87 add rp, n, rp0 88 add up0, 8, up1 89 add vp0, 8, vp1 90 add rp0, -8, rp1 91 add rp0, -16, rp0 92 neg n, n 93 be L(evn) 94 INITCY 95 96L(odd): ldx [vp0 + n], %l1 97 mov 0, %l2 98 ldx [up0 + n], %l5 99 sllx %l1, cnt, %g3 100 brgez n, L(wd1) 101 add n, 8, n 102 ldx [vp0 + n], %l0 103 b L(lo1) 104 sllx %l1, cnt, %g3 105 106L(evn): ldx [vp0 + n], %l0 107 mov 0, %l3 108 ldx [up0 + n], %l4 109 ldx [vp1 + n], %l1 110 b L(lo0) 111 sllx %l0, cnt, %g1 112 113L(top): addxccc(%l6, %l4, %o0) 114 ldx [vp0 + n], %l0 115 sllx %l1, cnt, %g3 116 stx %o0, [rp0 + n] 117L(lo1): srlx %l1, tnc, %l3 118 MERGE %l2, %g3, %l7 119 ldx [up0 + n], %l4 120 addxccc(%l7, %l5, %o1) 121 ldx [vp1 + n], %l1 122 sllx %l0, cnt, %g1 123 stx %o1, [rp1 + n] 124L(lo0): srlx %l0, tnc, %l2 125 MERGE %l3, %g1, %l6 126 ldx [up1 + n], %l5 127 brlz,pt n, L(top) 128 add n, 16, n 129 130 addxccc(%l6, %l4, %o0) 131 sllx %l1, cnt, %g3 132 stx %o0, [rp0 + n] 133L(wd1): srlx %l1, tnc, %l3 134 MERGE %l2, %g3, %l7 135 addxccc(%l7, %l5, %o1) 136 stx %o1, [rp1 + n] 137 138ifdef(`OPERATION_addlsh_n', 139` addxc( %l3, %g0, %i0)') 140ifdef(`OPERATION_sublsh_n', 141` addxc( %g0, %g0, %g1) 142 add %g1, -1, %g1 143 sub %l3, %g1, %i0') 144 145 ret 146 restore 147EPILOGUE() 148