xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/x86/copyi.asm (revision 413d532bcc3f62d122e56d92e13ac64825a40baf)
1dnl  x86 mpn_copyi -- copy limb vector, incrementing.
2
3dnl  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
4dnl
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or
8dnl  modify it under the terms of the GNU Lesser General Public License as
9dnl  published by the Free Software Foundation; either version 3 of the
10dnl  License, or (at your option) any later version.
11dnl
12dnl  The GNU MP Library is distributed in the hope that it will be useful,
13dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
14dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15dnl  Lesser General Public License for more details.
16dnl
17dnl  You should have received a copy of the GNU Lesser General Public License
18dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
19
20include(`../config.m4')
21
22
23C     cycles/limb  startup (approx)
24C P5	  1.0	      35
25C P6	  0.75	      45
26C K6	  1.0	      30
27C K7	  1.3	      65
28C P4	  1.0	     120
29C
30C (Startup time includes some function call overheads.)
31
32
33C void mpn_copyi (mp_ptr dst, mp_srcptr src, mp_size_t size);
34C
35C Copy src,size to dst,size, working from low to high addresses.
36C
37C The code here is very generic and can be expected to be reasonable on all
38C the x86 family.
39C
40C P6 -  An MMX based copy was tried, but was found to be slower than a rep
41C       movs in all cases.  The fastest MMX found was 0.8 cycles/limb (when
42C       fully aligned).  A rep movs seems to have a startup time of about 15
43C       cycles, but doing something special for small sizes could lead to a
44C       branch misprediction that would destroy any saving.  For now a plain
45C       rep movs seems ok.
46C
47C K62 - We used to have a big chunk of code doing an MMX copy at 0.56 c/l if
48C       aligned or a 1.0 rep movs if not.  But that seemed excessive since
49C       it only got an advantage half the time, and even then only showed it
50C       above 50 limbs or so.
51
52defframe(PARAM_SIZE,12)
53defframe(PARAM_SRC, 8)
54defframe(PARAM_DST, 4)
55deflit(`FRAME',0)
56
57	TEXT
58	ALIGN(32)
59
60	C eax	saved esi
61	C ebx
62	C ecx	counter
63	C edx	saved edi
64	C esi	src
65	C edi	dst
66	C ebp
67
68PROLOGUE(mpn_copyi)
69
70	movl	PARAM_SIZE, %ecx
71	movl	%esi, %eax
72
73	movl	PARAM_SRC, %esi
74	movl	%edi, %edx
75
76	movl	PARAM_DST, %edi
77
78	cld	C better safe than sorry, see mpn/x86/README
79
80	rep
81	movsl
82
83	movl	%eax, %esi
84	movl	%edx, %edi
85
86	ret
87
88EPILOGUE()
89