1dnl IA-64 mpn_copyi -- copy limb vector, incrementing. 2 3dnl Copyright 2001, 2002, 2004 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of the GNU Lesser General Public License as published 9dnl by the Free Software Foundation; either version 3 of the License, or (at 10dnl your option) any later version. 11 12dnl The GNU MP Library is distributed in the hope that it will be useful, but 13dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 15dnl License for more details. 16 17dnl You should have received a copy of the GNU Lesser General Public License 18dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 19 20include(`../config.m4') 21 22C cycles/limb 23C Itanium: 1 24C Itanium 2: 0.5 25 26C INPUT PARAMETERS 27C rp = r32 28C sp = r33 29C n = r34 30 31ASM_START() 32PROLOGUE(mpn_copyi) 33 .prologue 34 .save ar.lc, r2 35 .body 36ifdef(`HAVE_ABI_32', 37` addp4 r32 = 0, r32 38 addp4 r33 = 0, r33 39 sxt4 r34 = r34 40 ;; 41') 42{.mmi 43 nop 0 44 nop 0 45 mov.i r2 = ar.lc 46} 47{.mmi 48 and r14 = 3, r34 49 cmp.ge p14, p15 = 3, r34 50 add r34 = -4, r34 51 ;; 52} 53{.mmi 54 cmp.eq p8, p0 = 1, r14 55 cmp.eq p10, p0 = 2, r14 56 cmp.eq p12, p0 = 3, r14 57} 58{.bbb 59 (p8) br.dptk .Lb01 60 (p10) br.dptk .Lb10 61 (p12) br.dptk .Lb11 62} 63 64.Lb00: C n = 0, 4, 8, 12, ... 65 (p14) br.dptk .Ls00 66 ;; 67 add r21 = 8, r33 68 ld8 r16 = [r33], 16 69 shr r15 = r34, 2 70 ;; 71 ld8 r17 = [r21], 16 72 mov.i ar.lc = r15 73 ld8 r18 = [r33], 16 74 add r20 = 8, r32 75 ;; 76 ld8 r19 = [r21], 16 77 br.cloop.dptk .Loop 78 ;; 79 br.sptk .Lend 80 ;; 81 82.Lb01: C n = 1, 5, 9, 13, ... 83 add r21 = 0, r33 84 add r20 = 0, r32 85 add r33 = 8, r33 86 add r32 = 8, r32 87 ;; 88 ld8 r19 = [r21], 16 89 shr r15 = r34, 2 90 (p14) br.dptk .Ls01 91 ;; 92 ld8 r16 = [r33], 16 93 mov.i ar.lc = r15 94 ;; 95 ld8 r17 = [r21], 16 96 ld8 r18 = [r33], 16 97 br.sptk .Li01 98 ;; 99 100.Lb10: C n = 2,6, 10, 14, ... 101 add r21 = 8, r33 102 add r20 = 8, r32 103 ld8 r18 = [r33], 16 104 shr r15 = r34, 2 105 ;; 106 ld8 r19 = [r21], 16 107 mov.i ar.lc = r15 108 (p14) br.dptk .Ls10 109 ;; 110 ld8 r16 = [r33], 16 111 ld8 r17 = [r21], 16 112 br.sptk .Li10 113 ;; 114 115.Lb11: C n = 3, 7, 11, 15, ... 116 add r21 = 0, r33 117 add r20 = 0, r32 118 add r33 = 8, r33 119 add r32 = 8, r32 120 ;; 121 ld8 r17 = [r21], 16 122 shr r15 = r34, 2 123 ;; 124 ld8 r18 = [r33], 16 125 mov.i ar.lc = r15 126 ld8 r19 = [r21], 16 127 (p14) br.dptk .Ls11 128 ;; 129 ld8 r16 = [r33], 16 130 br.sptk .Li11 131 ;; 132 133 ALIGN(32) 134.Loop: 135.Li00: 136{.mmb 137 st8 [r32] = r16, 16 138 ld8 r16 = [r33], 16 139 nop.b 0 140} 141.Li11: 142{.mmb 143 st8 [r20] = r17, 16 144 ld8 r17 = [r21], 16 145 nop.b 0 146 ;; 147} 148.Li10: 149{.mmb 150 st8 [r32] = r18, 16 151 ld8 r18 = [r33], 16 152 nop.b 0 153} 154.Li01: 155{.mmb 156 st8 [r20] = r19, 16 157 ld8 r19 = [r21], 16 158 br.cloop.dptk .Loop 159 ;; 160} 161.Lend: st8 [r32] = r16, 16 162.Ls11: st8 [r20] = r17, 16 163 ;; 164.Ls10: st8 [r32] = r18, 16 165.Ls01: st8 [r20] = r19, 16 166.Ls00: mov.i ar.lc = r2 167 br.ret.sptk.many b0 168EPILOGUE() 169ASM_END() 170