1dnl IA-64 mpn_and_n, mpn_andn_n, mpn_nand_n, mpn_ior_n, mpn_iorn_n, 2dnl mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise logical operations. 3 4dnl Contributed to the GNU project by Torbjorn Granlund. 5 6dnl Copyright 2003, 2004, 2005 Free Software Foundation, Inc. 7dnl 8dnl This file is part of the GNU MP Library. 9dnl 10dnl The GNU MP Library is free software; you can redistribute it and/or modify 11dnl it under the terms of the GNU Lesser General Public License as published 12dnl by the Free Software Foundation; either version 3 of the License, or (at 13dnl your option) any later version. 14dnl 15dnl The GNU MP Library is distributed in the hope that it will be useful, but 16dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 17dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 18dnl License for more details. 19dnl 20dnl You should have received a copy of the GNU Lesser General Public License 21dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 22 23include(`../config.m4') 24 25C cycles/limb 26C Itanium: 2 27C Itanium 2: 1 28 29C TODO 30C * Use rp,rpx scheme of aors_n.asm to allow parallel stores (useful in 31C wind-down code). 32 33C INPUT PARAMETERS 34define(`rp', `r32') 35define(`up', `r33') 36define(`vp', `r34') 37define(`n', `r35') 38 39ifdef(`OPERATION_and_n', 40` define(`func',`mpn_and_n') 41 define(`logop', `and $1 = $2, $3') 42 define(`notormov', `mov $1 = $2')') 43ifdef(`OPERATION_andn_n', 44` define(`func',`mpn_andn_n') 45 define(`logop', `andcm $1 = $2, $3') 46 define(`notormov', `mov $1 = $2')') 47ifdef(`OPERATION_nand_n', 48` define(`func',`mpn_nand_n') 49 define(`logop', `and $1 = $2, $3') 50 define(`notormov', `sub $1 = -1, $2')') 51ifdef(`OPERATION_ior_n', 52` define(`func',`mpn_ior_n') 53 define(`logop', `or $1 = $2, $3') 54 define(`notormov', `mov $1 = $2')') 55ifdef(`OPERATION_iorn_n', 56` define(`func',`mpn_iorn_n') 57 define(`logop', `andcm $1 = $3, $2') 58 define(`notormov', `sub $1 = -1, $2')') 59ifdef(`OPERATION_nior_n', 60` define(`func',`mpn_nior_n') 61 define(`logop', `or $1 = $2, $3') 62 define(`notormov', `sub $1 = -1, $2')') 63ifdef(`OPERATION_xor_n', 64` define(`func',`mpn_xor_n') 65 define(`logop', `xor $1 = $2, $3') 66 define(`notormov', `mov $1 = $2')') 67ifdef(`OPERATION_xnor_n', 68` define(`func',`mpn_xnor_n') 69 define(`logop', `xor $1 = $2, $3') 70 define(`notormov', `sub $1 = -1, $2')') 71 72MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n) 73 74ASM_START() 75PROLOGUE(func) 76 .prologue 77 .save ar.lc, r2 78 .body 79ifdef(`HAVE_ABI_32', 80` addp4 rp = 0, rp C M I 81 addp4 up = 0, up C M I 82 addp4 vp = 0, vp C M I 83 zxt4 n = n C I 84 ;; 85') 86{.mmi 87 ld8 r10 = [up], 8 C M 88 ld8 r11 = [vp], 8 C M 89 mov.i r2 = ar.lc C I0 90} 91{.mmi 92 and r14 = 3, n C M I 93 cmp.lt p15, p14 = 4, n C M I 94 shr.u n = n, 2 C I0 95 ;; 96} 97{.mmi 98 cmp.eq p6, p0 = 1, r14 C M I 99 cmp.eq p7, p0 = 2, r14 C M I 100 cmp.eq p8, p0 = 3, r14 C M I 101} 102{.bbb 103 (p6) br.dptk .Lb01 C B 104 (p7) br.dptk .Lb10 C B 105 (p8) br.dptk .Lb11 C B 106} 107 108.Lb00: ld8 r17 = [up], 8 C M 109 ld8 r21 = [vp], 8 C M 110 add n = -2, n C M I 111 ;; 112 ld8 r18 = [up], 8 C M 113 ld8 r22 = [vp], 8 C M 114 ;; 115 ld8 r19 = [up], 8 C M 116 ld8 r23 = [vp], 8 C M 117 (p15) br.cond.dpnt .grt4 C B 118 119 logop( r14, r10, r11) C M I 120 ;; 121 logop( r15, r17, r21) C M I 122 notormov( r8, r14) C M I 123 br .Lcj4 C B 124 125.grt4: logop( r14, r10, r11) C M I 126 ld8 r16 = [up], 8 C M 127 ld8 r20 = [vp], 8 C M 128 ;; 129 logop( r15, r17, r21) C M I 130 ld8 r17 = [up], 8 C M 131 mov.i ar.lc = n C I0 132 notormov( r8, r14) C M I 133 ld8 r21 = [vp], 8 C M 134 br .LL00 C B 135 136.Lb01: add n = -1, n C M I 137 logop( r15, r10, r11) C M I 138 (p15) br.cond.dpnt .grt1 C B 139 ;; 140 141 notormov( r9, r15) C M I 142 br .Lcj1 C B 143 144.grt1: ld8 r16 = [up], 8 C M 145 ld8 r20 = [vp], 8 C M 146 ;; 147 ld8 r17 = [up], 8 C M 148 ld8 r21 = [vp], 8 C M 149 mov.i ar.lc = n C I0 150 ;; 151 ld8 r18 = [up], 8 C M 152 ld8 r22 = [vp], 8 C M 153 ;; 154 ld8 r19 = [up], 8 C M 155 ld8 r23 = [vp], 8 C M 156 br.cloop.dptk .grt5 C B 157 ;; 158 159 logop( r14, r16, r20) C M I 160 notormov( r9, r15) C M I 161 br .Lcj5 C B 162 163.grt5: logop( r14, r16, r20) C M I 164 ld8 r16 = [up], 8 C M 165 notormov( r9, r15) C M I 166 ld8 r20 = [vp], 8 C M 167 br .LL01 C B 168 169.Lb10: ld8 r19 = [up], 8 C M 170 ld8 r23 = [vp], 8 C M 171 (p15) br.cond.dpnt .grt2 C B 172 173 logop( r14, r10, r11) C M I 174 ;; 175 logop( r15, r19, r23) C M I 176 notormov( r8, r14) C M I 177 br .Lcj2 C B 178 179.grt2: ld8 r16 = [up], 8 C M 180 ld8 r20 = [vp], 8 C M 181 add n = -1, n C M I 182 ;; 183 ld8 r17 = [up], 8 C M 184 ld8 r21 = [vp], 8 C M 185 logop( r14, r10, r11) C M I 186 ;; 187 ld8 r18 = [up], 8 C M 188 ld8 r22 = [vp], 8 C M 189 mov.i ar.lc = n C I0 190 ;; 191 logop( r15, r19, r23) C M I 192 ld8 r19 = [up], 8 C M 193 notormov( r8, r14) C M I 194 ld8 r23 = [vp], 8 C M 195 br.cloop.dptk .Loop C B 196 br .Lcj6 C B 197 198.Lb11: ld8 r18 = [up], 8 C M 199 ld8 r22 = [vp], 8 C M 200 add n = -1, n C M I 201 ;; 202 ld8 r19 = [up], 8 C M 203 ld8 r23 = [vp], 8 C M 204 logop( r15, r10, r11) C M I 205 (p15) br.cond.dpnt .grt3 C B 206 ;; 207 208 logop( r14, r18, r22) C M I 209 notormov( r9, r15) C M I 210 br .Lcj3 C B 211 212.grt3: ld8 r16 = [up], 8 C M 213 ld8 r20 = [vp], 8 C M 214 ;; 215 ld8 r17 = [up], 8 C M 216 ld8 r21 = [vp], 8 C M 217 mov.i ar.lc = n C I0 218 ;; 219 logop( r14, r18, r22) C M I 220 ld8 r18 = [up], 8 C M 221 notormov( r9, r15) C M I 222 ld8 r22 = [vp], 8 C M 223 br .LL11 C B 224 225C *** MAIN LOOP START *** 226 ALIGN(32) 227.Loop: st8 [rp] = r8, 8 C M 228 logop( r14, r16, r20) C M I 229 notormov( r9, r15) C M I 230 ld8 r16 = [up], 8 C M 231 ld8 r20 = [vp], 8 C M 232 nop.b 0 233 ;; 234.LL01: st8 [rp] = r9, 8 C M 235 logop( r15, r17, r21) C M I 236 notormov( r8, r14) C M I 237 ld8 r17 = [up], 8 C M 238 ld8 r21 = [vp], 8 C M 239 nop.b 0 240 ;; 241.LL00: st8 [rp] = r8, 8 C M 242 logop( r14, r18, r22) C M I 243 notormov( r9, r15) C M I 244 ld8 r18 = [up], 8 C M 245 ld8 r22 = [vp], 8 C M 246 nop.b 0 247 ;; 248.LL11: st8 [rp] = r9, 8 C M 249 logop( r15, r19, r23) C M I 250 notormov( r8, r14) C M I 251 ld8 r19 = [up], 8 C M 252 ld8 r23 = [vp], 8 C M 253 br.cloop.dptk .Loop ;; C B 254C *** MAIN LOOP END *** 255 256.Lcj6: st8 [rp] = r8, 8 C M 257 logop( r14, r16, r20) C M I 258 notormov( r9, r15) C M I 259 ;; 260.Lcj5: st8 [rp] = r9, 8 C M 261 logop( r15, r17, r21) C M I 262 notormov( r8, r14) C M I 263 ;; 264.Lcj4: st8 [rp] = r8, 8 C M 265 logop( r14, r18, r22) C M I 266 notormov( r9, r15) C M I 267 ;; 268.Lcj3: st8 [rp] = r9, 8 C M 269 logop( r15, r19, r23) C M I 270 notormov( r8, r14) C M I 271 ;; 272.Lcj2: st8 [rp] = r8, 8 C M 273 notormov( r9, r15) C M I 274 ;; 275.Lcj1: st8 [rp] = r9, 8 C M 276 mov.i ar.lc = r2 C I0 277 br.ret.sptk.many b0 C B 278EPILOGUE() 279ASM_END() 280