1dnl IA-64 mpn_copyd -- copy limb vector, decrementing. 2 3dnl Contributed to the GNU project by Torbjorn Granlund. 4 5dnl Copyright 2001, 2002, 2004 Free Software Foundation, Inc. 6 7dnl This file is part of the GNU MP Library. 8 9dnl The GNU MP Library is free software; you can redistribute it and/or modify 10dnl it under the terms of the GNU Lesser General Public License as published 11dnl by the Free Software Foundation; either version 3 of the License, or (at 12dnl your option) any later version. 13 14dnl The GNU MP Library is distributed in the hope that it will be useful, but 15dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 16dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 17dnl License for more details. 18 19dnl You should have received a copy of the GNU Lesser General Public License 20dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. 21 22include(`../config.m4') 23 24C cycles/limb 25C Itanium: 1 26C Itanium 2: 0.5 27 28C INPUT PARAMETERS 29C rp = r32 30C sp = r33 31C n = r34 32 33ASM_START() 34PROLOGUE(mpn_copyd) 35 .prologue 36 .save ar.lc, r2 37 .body 38ifdef(`HAVE_ABI_32', 39` addp4 r32 = 0, r32 40 addp4 r33 = 0, r33 41 sxt4 r34 = r34 42 ;; 43') 44{.mmi 45 shladd r32 = r34, 3, r32 46 shladd r33 = r34, 3, r33 47 mov.i r2 = ar.lc 48} 49{.mmi 50 and r14 = 3, r34 51 cmp.ge p14, p15 = 3, r34 52 add r34 = -4, r34 53 ;; 54} 55{.mmi 56 cmp.eq p8, p0 = 1, r14 57 cmp.eq p10, p0 = 2, r14 58 cmp.eq p12, p0 = 3, r14 59} 60{.bbb 61 (p8) br.dptk .Lb01 62 (p10) br.dptk .Lb10 63 (p12) br.dptk .Lb11 64} 65 66.Lb00: C n = 0, 4, 8, 12, ... 67 add r32 = -8, r32 68 add r33 = -8, r33 69 (p14) br.dptk .Ls00 70 ;; 71 add r21 = -8, r33 72 ld8 r16 = [r33], -16 73 shr r15 = r34, 2 74 ;; 75 ld8 r17 = [r21], -16 76 mov.i ar.lc = r15 77 ld8 r18 = [r33], -16 78 add r20 = -8, r32 79 ;; 80 ld8 r19 = [r21], -16 81 br.cloop.dptk .Loop 82 ;; 83 br.sptk .Lend 84 ;; 85 86.Lb01: C n = 1, 5, 9, 13, ... 87 add r21 = -8, r33 88 add r20 = -8, r32 89 add r33 = -16, r33 90 add r32 = -16, r32 91 ;; 92 ld8 r19 = [r21], -16 93 shr r15 = r34, 2 94 (p14) br.dptk .Ls01 95 ;; 96 ld8 r16 = [r33], -16 97 mov.i ar.lc = r15 98 ;; 99 ld8 r17 = [r21], -16 100 ld8 r18 = [r33], -16 101 br.sptk .Li01 102 ;; 103 104.Lb10: C n = 2,6, 10, 14, ... 105 add r21 = -16, r33 106 shr r15 = r34, 2 107 add r20 = -16, r32 108 add r32 = -8, r32 109 add r33 = -8, r33 110 ;; 111 ld8 r18 = [r33], -16 112 ld8 r19 = [r21], -16 113 mov.i ar.lc = r15 114 (p14) br.dptk .Ls10 115 ;; 116 ld8 r16 = [r33], -16 117 ld8 r17 = [r21], -16 118 br.sptk .Li10 119 ;; 120 121.Lb11: C n = 3, 7, 11, 15, ... 122 add r21 = -8, r33 123 add r20 = -8, r32 124 add r33 = -16, r33 125 add r32 = -16, r32 126 ;; 127 ld8 r17 = [r21], -16 128 shr r15 = r34, 2 129 ;; 130 ld8 r18 = [r33], -16 131 mov.i ar.lc = r15 132 ld8 r19 = [r21], -16 133 (p14) br.dptk .Ls11 134 ;; 135 ld8 r16 = [r33], -16 136 br.sptk .Li11 137 ;; 138 139 ALIGN(32) 140.Loop: 141.Li00: 142{.mmb 143 st8 [r32] = r16, -16 144 ld8 r16 = [r33], -16 145 nop.b 0 146} 147.Li11: 148{.mmb 149 st8 [r20] = r17, -16 150 ld8 r17 = [r21], -16 151 nop.b 0 152 ;; 153} 154.Li10: 155{.mmb 156 st8 [r32] = r18, -16 157 ld8 r18 = [r33], -16 158 nop.b 0 159} 160.Li01: 161{.mmb 162 st8 [r20] = r19, -16 163 ld8 r19 = [r21], -16 164 br.cloop.dptk .Loop 165 ;; 166} 167.Lend: st8 [r32] = r16, -16 168.Ls11: st8 [r20] = r17, -16 169 ;; 170.Ls10: st8 [r32] = r18, -16 171.Ls01: st8 [r20] = r19, -16 172.Ls00: mov.i ar.lc = r2 173 br.ret.sptk.many b0 174EPILOGUE() 175ASM_END() 176