1dnl IA-64 mpn_copyi -- copy limb vector, incrementing. 2 3dnl Contributed to the GNU project by Torbjorn Granlund. 4 5dnl Copyright 2001, 2002, 2004 Free Software Foundation, Inc. 6 7dnl This file is part of the GNU MP Library. 8 9dnl The GNU MP Library is free software; you can redistribute it and/or modify 10dnl it under the terms of the GNU Lesser General Public License as published 11dnl by the Free Software Foundation; either version 3 of the License, or (at 12dnl your option) any later version. 13 14dnl The GNU MP Library is distributed in the hope that it will be useful, but 15dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 16dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 17dnl License for more details. 18 19dnl You should have received a copy of the GNU Lesser General Public License 20dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 21 22include(`../config.m4') 23 24C cycles/limb 25C Itanium: 1 26C Itanium 2: 0.5 27 28C INPUT PARAMETERS 29C rp = r32 30C sp = r33 31C n = r34 32 33ASM_START() 34PROLOGUE(mpn_copyi) 35 .prologue 36 .save ar.lc, r2 37 .body 38ifdef(`HAVE_ABI_32', 39` addp4 r32 = 0, r32 40 addp4 r33 = 0, r33 41 sxt4 r34 = r34 42 ;; 43') 44{.mmi 45 nop 0 46 nop 0 47 mov.i r2 = ar.lc 48} 49{.mmi 50 and r14 = 3, r34 51 cmp.ge p14, p15 = 3, r34 52 add r34 = -4, r34 53 ;; 54} 55{.mmi 56 cmp.eq p8, p0 = 1, r14 57 cmp.eq p10, p0 = 2, r14 58 cmp.eq p12, p0 = 3, r14 59} 60{.bbb 61 (p8) br.dptk .Lb01 62 (p10) br.dptk .Lb10 63 (p12) br.dptk .Lb11 64} 65 66.Lb00: C n = 0, 4, 8, 12, ... 67 (p14) br.dptk .Ls00 68 ;; 69 add r21 = 8, r33 70 ld8 r16 = [r33], 16 71 shr r15 = r34, 2 72 ;; 73 ld8 r17 = [r21], 16 74 mov.i ar.lc = r15 75 ld8 r18 = [r33], 16 76 add r20 = 8, r32 77 ;; 78 ld8 r19 = [r21], 16 79 br.cloop.dptk .Loop 80 ;; 81 br.sptk .Lend 82 ;; 83 84.Lb01: C n = 1, 5, 9, 13, ... 85 add r21 = 0, r33 86 add r20 = 0, r32 87 add r33 = 8, r33 88 add r32 = 8, r32 89 ;; 90 ld8 r19 = [r21], 16 91 shr r15 = r34, 2 92 (p14) br.dptk .Ls01 93 ;; 94 ld8 r16 = [r33], 16 95 mov.i ar.lc = r15 96 ;; 97 ld8 r17 = [r21], 16 98 ld8 r18 = [r33], 16 99 br.sptk .Li01 100 ;; 101 102.Lb10: C n = 2,6, 10, 14, ... 103 add r21 = 8, r33 104 add r20 = 8, r32 105 ld8 r18 = [r33], 16 106 shr r15 = r34, 2 107 ;; 108 ld8 r19 = [r21], 16 109 mov.i ar.lc = r15 110 (p14) br.dptk .Ls10 111 ;; 112 ld8 r16 = [r33], 16 113 ld8 r17 = [r21], 16 114 br.sptk .Li10 115 ;; 116 117.Lb11: C n = 3, 7, 11, 15, ... 118 add r21 = 0, r33 119 add r20 = 0, r32 120 add r33 = 8, r33 121 add r32 = 8, r32 122 ;; 123 ld8 r17 = [r21], 16 124 shr r15 = r34, 2 125 ;; 126 ld8 r18 = [r33], 16 127 mov.i ar.lc = r15 128 ld8 r19 = [r21], 16 129 (p14) br.dptk .Ls11 130 ;; 131 ld8 r16 = [r33], 16 132 br.sptk .Li11 133 ;; 134 135 ALIGN(32) 136.Loop: 137.Li00: 138{.mmb 139 st8 [r32] = r16, 16 140 ld8 r16 = [r33], 16 141 nop.b 0 142} 143.Li11: 144{.mmb 145 st8 [r20] = r17, 16 146 ld8 r17 = [r21], 16 147 nop.b 0 148 ;; 149} 150.Li10: 151{.mmb 152 st8 [r32] = r18, 16 153 ld8 r18 = [r33], 16 154 nop.b 0 155} 156.Li01: 157{.mmb 158 st8 [r20] = r19, 16 159 ld8 r19 = [r21], 16 160 br.cloop.dptk .Loop 161 ;; 162} 163.Lend: st8 [r32] = r16, 16 164.Ls11: st8 [r20] = r17, 16 165 ;; 166.Ls10: st8 [r32] = r18, 16 167.Ls01: st8 [r20] = r19, 16 168.Ls00: mov.i ar.lc = r2 169 br.ret.sptk.many b0 170EPILOGUE() 171ASM_END() 172