1dnl IA-64 mpn_copyi -- copy limb vector, incrementing. 2 3dnl Contributed to the GNU project by Torbjorn Granlund. 4 5dnl Copyright 2001, 2002, 2004 Free Software Foundation, Inc. 6 7dnl This file is part of the GNU MP Library. 8dnl 9dnl The GNU MP Library is free software; you can redistribute it and/or modify 10dnl it under the terms of either: 11dnl 12dnl * the GNU Lesser General Public License as published by the Free 13dnl Software Foundation; either version 3 of the License, or (at your 14dnl option) any later version. 15dnl 16dnl or 17dnl 18dnl * the GNU General Public License as published by the Free Software 19dnl Foundation; either version 2 of the License, or (at your option) any 20dnl later version. 21dnl 22dnl or both in parallel, as here. 23dnl 24dnl The GNU MP Library is distributed in the hope that it will be useful, but 25dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 26dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 27dnl for more details. 28dnl 29dnl You should have received copies of the GNU General Public License and the 30dnl GNU Lesser General Public License along with the GNU MP Library. If not, 31dnl see https://www.gnu.org/licenses/. 32 33include(`../config.m4') 34 35C cycles/limb 36C Itanium: 1 37C Itanium 2: 0.5 38 39C INPUT PARAMETERS 40C rp = r32 41C sp = r33 42C n = r34 43 44ASM_START() 45PROLOGUE(mpn_copyi) 46 .prologue 47 .save ar.lc, r2 48 .body 49ifdef(`HAVE_ABI_32', 50` addp4 r32 = 0, r32 51 addp4 r33 = 0, r33 52 sxt4 r34 = r34 53 ;; 54') 55{.mmi 56 nop 0 57 nop 0 58 mov.i r2 = ar.lc 59} 60{.mmi 61 and r14 = 3, r34 62 cmp.ge p14, p15 = 3, r34 63 add r34 = -4, r34 64 ;; 65} 66{.mmi 67 cmp.eq p8, p0 = 1, r14 68 cmp.eq p10, p0 = 2, r14 69 cmp.eq p12, p0 = 3, r14 70} 71{.bbb 72 (p8) br.dptk .Lb01 73 (p10) br.dptk .Lb10 74 (p12) br.dptk .Lb11 75} 76 77.Lb00: C n = 0, 4, 8, 12, ... 78 (p14) br.dptk .Ls00 79 ;; 80 add r21 = 8, r33 81 ld8 r16 = [r33], 16 82 shr r15 = r34, 2 83 ;; 84 ld8 r17 = [r21], 16 85 mov.i ar.lc = r15 86 ld8 r18 = [r33], 16 87 add r20 = 8, r32 88 ;; 89 ld8 r19 = [r21], 16 90 br.cloop.dptk .Loop 91 ;; 92 br.sptk .Lend 93 ;; 94 95.Lb01: C n = 1, 5, 9, 13, ... 96 add r21 = 0, r33 97 add r20 = 0, r32 98 add r33 = 8, r33 99 add r32 = 8, r32 100 ;; 101 ld8 r19 = [r21], 16 102 shr r15 = r34, 2 103 (p14) br.dptk .Ls01 104 ;; 105 ld8 r16 = [r33], 16 106 mov.i ar.lc = r15 107 ;; 108 ld8 r17 = [r21], 16 109 ld8 r18 = [r33], 16 110 br.sptk .Li01 111 ;; 112 113.Lb10: C n = 2,6, 10, 14, ... 114 add r21 = 8, r33 115 add r20 = 8, r32 116 ld8 r18 = [r33], 16 117 shr r15 = r34, 2 118 ;; 119 ld8 r19 = [r21], 16 120 mov.i ar.lc = r15 121 (p14) br.dptk .Ls10 122 ;; 123 ld8 r16 = [r33], 16 124 ld8 r17 = [r21], 16 125 br.sptk .Li10 126 ;; 127 128.Lb11: C n = 3, 7, 11, 15, ... 129 add r21 = 0, r33 130 add r20 = 0, r32 131 add r33 = 8, r33 132 add r32 = 8, r32 133 ;; 134 ld8 r17 = [r21], 16 135 shr r15 = r34, 2 136 ;; 137 ld8 r18 = [r33], 16 138 mov.i ar.lc = r15 139 ld8 r19 = [r21], 16 140 (p14) br.dptk .Ls11 141 ;; 142 ld8 r16 = [r33], 16 143 br.sptk .Li11 144 ;; 145 146 ALIGN(32) 147.Loop: 148.Li00: 149{.mmb 150 st8 [r32] = r16, 16 151 ld8 r16 = [r33], 16 152 nop.b 0 153} 154.Li11: 155{.mmb 156 st8 [r20] = r17, 16 157 ld8 r17 = [r21], 16 158 nop.b 0 159 ;; 160} 161.Li10: 162{.mmb 163 st8 [r32] = r18, 16 164 ld8 r18 = [r33], 16 165 nop.b 0 166} 167.Li01: 168{.mmb 169 st8 [r20] = r19, 16 170 ld8 r19 = [r21], 16 171 br.cloop.dptk .Loop 172 ;; 173} 174.Lend: st8 [r32] = r16, 16 175.Ls11: st8 [r20] = r17, 16 176 ;; 177.Ls10: st8 [r32] = r18, 16 178.Ls01: st8 [r20] = r19, 16 179.Ls00: mov.i ar.lc = r2 180 br.ret.sptk.many b0 181EPILOGUE() 182ASM_END() 183