1dnl Intel Pentium mpn_copyd -- copy limb vector, decrementing. 2 3dnl Copyright 1996, 2001, 2002, 2006 Free Software Foundation, Inc. 4 5dnl This file is part of the GNU MP Library. 6dnl 7dnl The GNU MP Library is free software; you can redistribute it and/or modify 8dnl it under the terms of either: 9dnl 10dnl * the GNU Lesser General Public License as published by the Free 11dnl Software Foundation; either version 3 of the License, or (at your 12dnl option) any later version. 13dnl 14dnl or 15dnl 16dnl * the GNU General Public License as published by the Free Software 17dnl Foundation; either version 2 of the License, or (at your option) any 18dnl later version. 19dnl 20dnl or both in parallel, as here. 21dnl 22dnl The GNU MP Library is distributed in the hope that it will be useful, but 23dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 24dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 25dnl for more details. 26dnl 27dnl You should have received copies of the GNU General Public License and the 28dnl GNU Lesser General Public License along with the GNU MP Library. If not, 29dnl see https://www.gnu.org/licenses/. 30 31include(`../config.m4') 32 33 34C P5: 1.25 cycles/limb 35 36 37C void mpn_copyd (mp_ptr dst, mp_srcptr src, mp_size_t size); 38C 39C See comments in copyi.asm. 40 41defframe(PARAM_SIZE,12) 42defframe(PARAM_SRC, 8) 43defframe(PARAM_DST, 4) 44 45 TEXT 46 ALIGN(8) 47PROLOGUE(mpn_copyd) 48deflit(`FRAME',0) 49 50 movl PARAM_SRC, %eax 51 movl PARAM_SIZE, %ecx 52 53 pushl %esi FRAME_pushl() 54 pushl %edi FRAME_pushl() 55 56 leal -4(%eax,%ecx,4), %eax C &src[size-1] 57 movl PARAM_DST, %edx 58 59 subl $7, %ecx C size-7 60 jle L(end) 61 62 movl 28-4(%edx,%ecx,4), %esi C prefetch cache, dst[size-1] 63 nop 64 65L(top): 66 C eax src, decrementing 67 C ebx 68 C ecx counter, limbs 69 C edx dst 70 C esi scratch 71 C edi scratch 72 C ebp 73 74 movl 28-32(%edx,%ecx,4), %esi C prefetch dst cache line 75 subl $8, %ecx 76 77 movl (%eax), %esi C read words pairwise 78 movl -4(%eax), %edi 79 movl %esi, 56(%edx,%ecx,4) C store words pairwise 80 movl %edi, 52(%edx,%ecx,4) 81 82 movl -8(%eax), %esi 83 movl -12(%eax), %edi 84 movl %esi, 48(%edx,%ecx,4) 85 movl %edi, 44(%edx,%ecx,4) 86 87 movl -16(%eax), %esi 88 movl -20(%eax), %edi 89 movl %esi, 40(%edx,%ecx,4) 90 movl %edi, 36(%edx,%ecx,4) 91 92 movl -24(%eax), %esi 93 movl -28(%eax), %edi 94 movl %esi, 32(%edx,%ecx,4) 95 movl %edi, 28(%edx,%ecx,4) 96 97 leal -32(%eax), %eax 98 jg L(top) 99 100 101L(end): 102 C ecx -7 to 0, representing respectively 0 to 7 limbs remaining 103 C eax src end 104 C edx dst, next location to store 105 106 addl $4, %ecx 107 jle L(no4) 108 109 movl (%eax), %esi 110 movl -4(%eax), %edi 111 movl %esi, 8(%edx,%ecx,4) 112 movl %edi, 4(%edx,%ecx,4) 113 114 movl -8(%eax), %esi 115 movl -12(%eax), %edi 116 movl %esi, (%edx,%ecx,4) 117 movl %edi, -4(%edx,%ecx,4) 118 119 subl $16, %eax 120 subl $4, %ecx 121L(no4): 122 123 addl $2, %ecx 124 jle L(no2) 125 126 movl (%eax), %esi 127 movl -4(%eax), %edi 128 movl %esi, (%edx,%ecx,4) 129 movl %edi, -4(%edx,%ecx,4) 130 131 subl $8, %eax 132 subl $2, %ecx 133L(no2): 134 135 jnz L(done) 136 137 movl (%eax), %ecx 138 movl %ecx, (%edx) C risk of cache bank clash here 139 140L(done): 141 popl %edi 142 popl %esi 143 144 ret 145 146EPILOGUE() 147