xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/x86/pentium4/sse2/add_n.asm (revision f14316bcbc544b96a93e884bc5c2b15fd60e22ae)
1dnl  Intel Pentium-4 mpn_add_n -- mpn addition.
2
3dnl  Copyright 2001, 2002 Free Software Foundation, Inc.
4dnl
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or
8dnl  modify it under the terms of the GNU Lesser General Public License as
9dnl  published by the Free Software Foundation; either version 3 of the
10dnl  License, or (at your option) any later version.
11dnl
12dnl  The GNU MP Library is distributed in the hope that it will be useful,
13dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
14dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15dnl  Lesser General Public License for more details.
16dnl
17dnl  You should have received a copy of the GNU Lesser General Public License
18dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
19
20include(`../config.m4')
21
22
23C					cycles/limb
24C			     dst!=src1,2  dst==src1  dst==src2
25C P6 model 0-8,10-12		-
26C P6 model 9   (Banias)		?
27C P6 model 13  (Dothan)		?
28C P4 model 0-1 (Willamette)	?
29C P4 model 2   (Northwood)	4	     6		6
30C P4 model 3-4 (Prescott)	4.25	     7.5	7.5
31
32defframe(PARAM_CARRY,20)
33defframe(PARAM_SIZE, 16)
34defframe(PARAM_SRC2, 12)
35defframe(PARAM_SRC1, 8)
36defframe(PARAM_DST,  4)
37
38dnl  re-use parameter space
39define(SAVE_EBX,`PARAM_SRC1')
40
41	TEXT
42	ALIGN(8)
43
44PROLOGUE(mpn_add_nc)
45deflit(`FRAME',0)
46	movd	PARAM_CARRY, %mm0
47	jmp	L(start_nc)
48EPILOGUE()
49
50	ALIGN(8)
51PROLOGUE(mpn_add_n)
52deflit(`FRAME',0)
53	pxor	%mm0, %mm0
54L(start_nc):
55	mov	PARAM_SRC1, %eax
56	mov	%ebx, SAVE_EBX
57	mov	PARAM_SRC2, %ebx
58	mov	PARAM_DST, %edx
59	mov	PARAM_SIZE, %ecx
60
61	lea	(%eax,%ecx,4), %eax	C src1 end
62	lea	(%ebx,%ecx,4), %ebx	C src2 end
63	lea	(%edx,%ecx,4), %edx	C dst end
64	neg	%ecx			C -size
65
66L(top):
67	C eax	src1 end
68	C ebx	src2 end
69	C ecx	counter, limbs, negative
70	C edx	dst end
71	C mm0	carry bit
72
73	movd	(%eax,%ecx,4), %mm1
74	movd	(%ebx,%ecx,4), %mm2
75	paddq	%mm2, %mm1
76
77	paddq	%mm1, %mm0
78	movd	%mm0, (%edx,%ecx,4)
79
80	psrlq	$32, %mm0
81
82	add	$1, %ecx
83	jnz	L(top)
84
85	movd	%mm0, %eax
86	mov	SAVE_EBX, %ebx
87	emms
88	ret
89
90EPILOGUE()
91