1dnl IA-64 mpn_popcount -- mpn population count. 2 3dnl Copyright 2000, 2001, 2002, 2003, 2004, 2005 Free Software Foundation, 4dnl Inc. 5 6dnl This file is part of the GNU MP Library. 7 8dnl The GNU MP Library is free software; you can redistribute it and/or modify 9dnl it under the terms of the GNU Lesser General Public License as published 10dnl by the Free Software Foundation; either version 3 of the License, or (at 11dnl your option) any later version. 12 13dnl The GNU MP Library is distributed in the hope that it will be useful, but 14dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16dnl License for more details. 17 18dnl You should have received a copy of the GNU Lesser General Public License 19dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 20 21include(`../config.m4') 22 23C cycles/limb 24C Itanium: 1.5 25C Itanium 2: 1 26 27C INPUT PARAMETERS 28define(`up', `r32') 29define(`n', `r33') 30 31define(`u0',`r16') define(`u1',`r17') define(`u2',`r18') define(`u3',`r19') 32define(`c0',`r28') define(`c1',`r29') define(`c2',`r30') define(`c3',`r31') 33define(`s',`r8') 34 35 36ASM_START() 37PROLOGUE(mpn_popcount) 38 .prologue 39ifdef(`HAVE_ABI_32', 40` addp4 up = 0, up C M I 41 zxt4 n = n C I 42 ;; 43') 44 45 {.mmi; add r9 = 512, up C prefetch pointer M I 46 ld8 r10 = [up], 8 C load first limb M01 47 mov.i r2 = ar.lc C save ar.lc I0 48}{.mmi; and r14 = 3, n C M I 49 cmp.lt p15, p14 = 4, n C small count? M I 50 add n = -5, n C M I 51 ;; 52}{.mmi; cmp.eq p6, p0 = 1, r14 C M I 53 cmp.eq p7, p0 = 2, r14 C M I 54 cmp.eq p8, p0 = 3, r14 C M I 55}{.bbb 56 (p6) br.dptk .Lb01 C B 57 (p7) br.dptk .Lb10 C B 58 (p8) br.dptk .Lb11 C B 59} 60 61 62.Lb00: ld8 u1 = [up], 8 C M01 63 shr.u n = n, 2 C I0 64 mov s = 0 C M I 65 ;; 66 ld8 u2 = [up], 8 C M01 67 popcnt c0 = r10 C I0 68 mov.i ar.lc = n C I0 69 ;; 70 ld8 u3 = [up], 8 C M01 71 popcnt c1 = u1 C I0 72 (p15) br.cond.dptk .grt4 C B 73 ;; 74 nop.m 0 C - 75 nop.m 0 C - 76 popcnt c2 = u2 C I0 77 ;; 78 mov s = c0 C M I 79 popcnt c3 = u3 C I0 80 br .Lcj4 C B 81 82.grt4: ld8 u0 = [up], 8 C M01 83 popcnt c2 = u2 C I0 84 br .LL00 C B 85 86 87.Lb01: 88 popcnt s = r10 C I0 89 (p14) br.ret.sptk.many b0 C B 90 91.grt1: ld8 u0 = [up], 8 C M01 92 shr.u n = n, 2 C I0 93 ;; 94 ld8 u1 = [up], 8 C M01 95 mov.i ar.lc = n C I0 96 ;; 97 ld8 u2 = [up], 8 C M01 98 popcnt c0 = u0 C I0 99 mov c3 = 0 C I0 100 101 ;; 102 ld8 u3 = [up], 8 C M01 103 popcnt c1 = u1 C I0 104 br.cloop.dptk .Loop C B 105 br .Lend C B 106 107 108.Lb10: ld8 u3 = [up], 8 C M01 109 shr.u n = n, 2 C I0 110 (p15) br.cond.dptk .grt2 C B 111 112 popcnt s = r10 C I0 113 ;; 114 popcnt c3 = u3 C I0 115 br .Lcj2 C B 116 117.grt2: ld8 u0 = [up], 8 C M01 118 mov.i ar.lc = n C I0 119 popcnt c2 = r10 C I0 120 ;; 121 ld8 u1 = [up], 8 C M01 122 popcnt c3 = u3 C I0 123 mov s = 0 C M I 124 ;; 125 ld8 u2 = [up], 8 C M01 126 popcnt c0 = u0 C I0 127 br .LL10 C B 128 129 130.Lb11: ld8 u2 = [up], 8 C M01 131 shr.u n = n, 2 C I0 132 mov s = 0 C M I 133 ;; 134 ld8 u3 = [up], 8 C M01 135 popcnt s = r10 C I0 136 (p15) br.cond.dptk .grt3 C B 137 138 popcnt c2 = u2 C I0 139 ;; 140 popcnt c3 = u3 C I0 141 br .Lcj3 C B 142 143.grt3: ld8 u0 = [up], 8 C M01 144 popcnt c2 = u2 C I0 145 mov.i ar.lc = n C I0 146 mov c1 = 0 147 ;; 148 ld8 u1 = [up], 8 C M01 149 popcnt c3 = u3 C I0 150 br .LL11 C B 151 152 153.Loop: ld8 u0 = [up], 8 C M01 154 popcnt c2 = u2 C I0 155 add s = s, c3 C M I 156 ;; 157.LL00: ld8 u1 = [up], 8 C M01 158 popcnt c3 = u3 C I0 159 add s = s, c0 C M I 160 ;; 161.LL11: ld8 u2 = [up], 8 C M01 162 popcnt c0 = u0 C I0 163 add s = s, c1 C M I 164 ;; 165.LL10: ld8 u3 = [up], 8 C M01 166 popcnt c1 = u1 C I0 167 add s = s, c2 C M I 168 lfetch [r9], 32 C M01 169 nop.m 0 C - 170 br.cloop.dptk .Loop C B 171 ;; 172 173.Lend: popcnt c2 = u2 C I0 174 add s = s, c3 C M I 175 ;; 176 popcnt c3 = u3 C I0 177 add s = s, c0 C M I 178 ;; 179.Lcj4: add s = s, c1 C M I 180 ;; 181.Lcj3: add s = s, c2 C M I 182 ;; 183.Lcj2: add s = s, c3 C M I 184 mov.i ar.lc = r2 C I0 185 br.ret.sptk.many b0 C B 186EPILOGUE() 187ASM_END() 188