1dnl IA-64 mpn_and_n, mpn_andn_n, mpn_nand_n, mpn_ior_n, mpn_iorn_n, 2dnl mpn_nior_n, mpn_xor_n, mpn_xnor_n -- mpn bitwise logical operations. 3 4dnl Contributed to the GNU project by Torbjorn Granlund. 5 6dnl Copyright 2003-2005 Free Software Foundation, Inc. 7 8dnl This file is part of the GNU MP Library. 9dnl 10dnl The GNU MP Library is free software; you can redistribute it and/or modify 11dnl it under the terms of either: 12dnl 13dnl * the GNU Lesser General Public License as published by the Free 14dnl Software Foundation; either version 3 of the License, or (at your 15dnl option) any later version. 16dnl 17dnl or 18dnl 19dnl * the GNU General Public License as published by the Free Software 20dnl Foundation; either version 2 of the License, or (at your option) any 21dnl later version. 22dnl 23dnl or both in parallel, as here. 24dnl 25dnl The GNU MP Library is distributed in the hope that it will be useful, but 26dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 27dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 28dnl for more details. 29dnl 30dnl You should have received copies of the GNU General Public License and the 31dnl GNU Lesser General Public License along with the GNU MP Library. If not, 32dnl see https://www.gnu.org/licenses/. 33 34include(`../config.m4') 35 36C cycles/limb 37C Itanium: 2 38C Itanium 2: 1 39 40C TODO 41C * Use rp,rpx scheme of aors_n.asm to allow parallel stores (useful in 42C wind-down code). 43 44C INPUT PARAMETERS 45define(`rp', `r32') 46define(`up', `r33') 47define(`vp', `r34') 48define(`n', `r35') 49 50ifdef(`OPERATION_and_n', 51` define(`func',`mpn_and_n') 52 define(`logop', `and $1 = $2, $3') 53 define(`notormov', `mov $1 = $2')') 54ifdef(`OPERATION_andn_n', 55` define(`func',`mpn_andn_n') 56 define(`logop', `andcm $1 = $2, $3') 57 define(`notormov', `mov $1 = $2')') 58ifdef(`OPERATION_nand_n', 59` define(`func',`mpn_nand_n') 60 define(`logop', `and $1 = $2, $3') 61 define(`notormov', `sub $1 = -1, $2')') 62ifdef(`OPERATION_ior_n', 63` define(`func',`mpn_ior_n') 64 define(`logop', `or $1 = $2, $3') 65 define(`notormov', `mov $1 = $2')') 66ifdef(`OPERATION_iorn_n', 67` define(`func',`mpn_iorn_n') 68 define(`logop', `andcm $1 = $3, $2') 69 define(`notormov', `sub $1 = -1, $2')') 70ifdef(`OPERATION_nior_n', 71` define(`func',`mpn_nior_n') 72 define(`logop', `or $1 = $2, $3') 73 define(`notormov', `sub $1 = -1, $2')') 74ifdef(`OPERATION_xor_n', 75` define(`func',`mpn_xor_n') 76 define(`logop', `xor $1 = $2, $3') 77 define(`notormov', `mov $1 = $2')') 78ifdef(`OPERATION_xnor_n', 79` define(`func',`mpn_xnor_n') 80 define(`logop', `xor $1 = $2, $3') 81 define(`notormov', `sub $1 = -1, $2')') 82 83MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n) 84 85ASM_START() 86PROLOGUE(func) 87 .prologue 88 .save ar.lc, r2 89 .body 90ifdef(`HAVE_ABI_32', 91` addp4 rp = 0, rp C M I 92 addp4 up = 0, up C M I 93 addp4 vp = 0, vp C M I 94 nop.m 0 95 nop.m 0 96 zxt4 n = n C I 97 ;; 98') 99{.mmi 100 ld8 r10 = [up], 8 C M 101 ld8 r11 = [vp], 8 C M 102 mov.i r2 = ar.lc C I0 103} 104{.mmi 105 and r14 = 3, n C M I 106 cmp.lt p15, p14 = 4, n C M I 107 shr.u n = n, 2 C I0 108 ;; 109} 110{.mmi 111 cmp.eq p6, p0 = 1, r14 C M I 112 cmp.eq p7, p0 = 2, r14 C M I 113 cmp.eq p8, p0 = 3, r14 C M I 114} 115{.bbb 116 (p6) br.dptk .Lb01 C B 117 (p7) br.dptk .Lb10 C B 118 (p8) br.dptk .Lb11 C B 119} 120 121.Lb00: ld8 r17 = [up], 8 C M 122 ld8 r21 = [vp], 8 C M 123 add n = -2, n C M I 124 ;; 125 ld8 r18 = [up], 8 C M 126 ld8 r22 = [vp], 8 C M 127 ;; 128 ld8 r19 = [up], 8 C M 129 ld8 r23 = [vp], 8 C M 130 (p15) br.cond.dpnt .grt4 C B 131 132 logop( r14, r10, r11) C M I 133 ;; 134 logop( r15, r17, r21) C M I 135 notormov( r8, r14) C M I 136 br .Lcj4 C B 137 138.grt4: logop( r14, r10, r11) C M I 139 ld8 r16 = [up], 8 C M 140 ld8 r20 = [vp], 8 C M 141 ;; 142 logop( r15, r17, r21) C M I 143 ld8 r17 = [up], 8 C M 144 mov.i ar.lc = n C I0 145 notormov( r8, r14) C M I 146 ld8 r21 = [vp], 8 C M 147 br .LL00 C B 148 149.Lb01: add n = -1, n C M I 150 logop( r15, r10, r11) C M I 151 (p15) br.cond.dpnt .grt1 C B 152 ;; 153 154 notormov( r9, r15) C M I 155 br .Lcj1 C B 156 157.grt1: ld8 r16 = [up], 8 C M 158 ld8 r20 = [vp], 8 C M 159 ;; 160 ld8 r17 = [up], 8 C M 161 ld8 r21 = [vp], 8 C M 162 mov.i ar.lc = n C I0 163 ;; 164 ld8 r18 = [up], 8 C M 165 ld8 r22 = [vp], 8 C M 166 ;; 167 ld8 r19 = [up], 8 C M 168 ld8 r23 = [vp], 8 C M 169 br.cloop.dptk .grt5 C B 170 ;; 171 172 logop( r14, r16, r20) C M I 173 notormov( r9, r15) C M I 174 br .Lcj5 C B 175 176.grt5: logop( r14, r16, r20) C M I 177 ld8 r16 = [up], 8 C M 178 notormov( r9, r15) C M I 179 ld8 r20 = [vp], 8 C M 180 br .LL01 C B 181 182.Lb10: ld8 r19 = [up], 8 C M 183 ld8 r23 = [vp], 8 C M 184 (p15) br.cond.dpnt .grt2 C B 185 186 logop( r14, r10, r11) C M I 187 ;; 188 logop( r15, r19, r23) C M I 189 notormov( r8, r14) C M I 190 br .Lcj2 C B 191 192.grt2: ld8 r16 = [up], 8 C M 193 ld8 r20 = [vp], 8 C M 194 add n = -1, n C M I 195 ;; 196 ld8 r17 = [up], 8 C M 197 ld8 r21 = [vp], 8 C M 198 logop( r14, r10, r11) C M I 199 ;; 200 ld8 r18 = [up], 8 C M 201 ld8 r22 = [vp], 8 C M 202 mov.i ar.lc = n C I0 203 ;; 204 logop( r15, r19, r23) C M I 205 ld8 r19 = [up], 8 C M 206 notormov( r8, r14) C M I 207 ld8 r23 = [vp], 8 C M 208 br.cloop.dptk .Loop C B 209 br .Lcj6 C B 210 211.Lb11: ld8 r18 = [up], 8 C M 212 ld8 r22 = [vp], 8 C M 213 add n = -1, n C M I 214 ;; 215 ld8 r19 = [up], 8 C M 216 ld8 r23 = [vp], 8 C M 217 logop( r15, r10, r11) C M I 218 (p15) br.cond.dpnt .grt3 C B 219 ;; 220 221 logop( r14, r18, r22) C M I 222 notormov( r9, r15) C M I 223 br .Lcj3 C B 224 225.grt3: ld8 r16 = [up], 8 C M 226 ld8 r20 = [vp], 8 C M 227 ;; 228 ld8 r17 = [up], 8 C M 229 ld8 r21 = [vp], 8 C M 230 mov.i ar.lc = n C I0 231 ;; 232 logop( r14, r18, r22) C M I 233 ld8 r18 = [up], 8 C M 234 notormov( r9, r15) C M I 235 ld8 r22 = [vp], 8 C M 236 br .LL11 C B 237 238C *** MAIN LOOP START *** 239 ALIGN(32) 240.Loop: st8 [rp] = r8, 8 C M 241 logop( r14, r16, r20) C M I 242 notormov( r9, r15) C M I 243 ld8 r16 = [up], 8 C M 244 ld8 r20 = [vp], 8 C M 245 nop.b 0 246 ;; 247.LL01: st8 [rp] = r9, 8 C M 248 logop( r15, r17, r21) C M I 249 notormov( r8, r14) C M I 250 ld8 r17 = [up], 8 C M 251 ld8 r21 = [vp], 8 C M 252 nop.b 0 253 ;; 254.LL00: st8 [rp] = r8, 8 C M 255 logop( r14, r18, r22) C M I 256 notormov( r9, r15) C M I 257 ld8 r18 = [up], 8 C M 258 ld8 r22 = [vp], 8 C M 259 nop.b 0 260 ;; 261.LL11: st8 [rp] = r9, 8 C M 262 logop( r15, r19, r23) C M I 263 notormov( r8, r14) C M I 264 ld8 r19 = [up], 8 C M 265 ld8 r23 = [vp], 8 C M 266 br.cloop.dptk .Loop ;; C B 267C *** MAIN LOOP END *** 268 269.Lcj6: st8 [rp] = r8, 8 C M 270 logop( r14, r16, r20) C M I 271 notormov( r9, r15) C M I 272 ;; 273.Lcj5: st8 [rp] = r9, 8 C M 274 logop( r15, r17, r21) C M I 275 notormov( r8, r14) C M I 276 ;; 277.Lcj4: st8 [rp] = r8, 8 C M 278 logop( r14, r18, r22) C M I 279 notormov( r9, r15) C M I 280 ;; 281.Lcj3: st8 [rp] = r9, 8 C M 282 logop( r15, r19, r23) C M I 283 notormov( r8, r14) C M I 284 ;; 285.Lcj2: st8 [rp] = r8, 8 C M 286 notormov( r9, r15) C M I 287 ;; 288.Lcj1: st8 [rp] = r9, 8 C M 289 mov.i ar.lc = r2 C I0 290 br.ret.sptk.many b0 C B 291EPILOGUE() 292ASM_END() 293