xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/x86/pentium4/sse2/add_n.asm (revision 867d70fc718005c0918b8b8b2f9d7f2d52d0a0db)
1dnl  Intel Pentium-4 mpn_add_n -- mpn addition.
2
3dnl  Copyright 2001, 2002 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of either:
9dnl
10dnl    * the GNU Lesser General Public License as published by the Free
11dnl      Software Foundation; either version 3 of the License, or (at your
12dnl      option) any later version.
13dnl
14dnl  or
15dnl
16dnl    * the GNU General Public License as published by the Free Software
17dnl      Foundation; either version 2 of the License, or (at your option) any
18dnl      later version.
19dnl
20dnl  or both in parallel, as here.
21dnl
22dnl  The GNU MP Library is distributed in the hope that it will be useful, but
23dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25dnl  for more details.
26dnl
27dnl  You should have received copies of the GNU General Public License and the
28dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
29dnl  see https://www.gnu.org/licenses/.
30
31include(`../config.m4')
32
33
34C					cycles/limb
35C			     dst!=src1,2  dst==src1  dst==src2
36C P6 model 0-8,10-12		-
37C P6 model 9   (Banias)		?
38C P6 model 13  (Dothan)		?
39C P4 model 0-1 (Willamette)	?
40C P4 model 2   (Northwood)	4	     6		6
41C P4 model 3-4 (Prescott)	4.25	     7.5	7.5
42
43defframe(PARAM_CARRY,20)
44defframe(PARAM_SIZE, 16)
45defframe(PARAM_SRC2, 12)
46defframe(PARAM_SRC1, 8)
47defframe(PARAM_DST,  4)
48
49dnl  re-use parameter space
50define(SAVE_EBX,`PARAM_SRC1')
51
52	TEXT
53	ALIGN(8)
54
55PROLOGUE(mpn_add_nc)
56deflit(`FRAME',0)
57	movd	PARAM_CARRY, %mm0
58	jmp	L(start_nc)
59EPILOGUE()
60
61	ALIGN(8)
62PROLOGUE(mpn_add_n)
63deflit(`FRAME',0)
64	pxor	%mm0, %mm0
65L(start_nc):
66	mov	PARAM_SRC1, %eax
67	mov	%ebx, SAVE_EBX
68	mov	PARAM_SRC2, %ebx
69	mov	PARAM_DST, %edx
70	mov	PARAM_SIZE, %ecx
71
72	lea	(%eax,%ecx,4), %eax	C src1 end
73	lea	(%ebx,%ecx,4), %ebx	C src2 end
74	lea	(%edx,%ecx,4), %edx	C dst end
75	neg	%ecx			C -size
76
77L(top):
78	C eax	src1 end
79	C ebx	src2 end
80	C ecx	counter, limbs, negative
81	C edx	dst end
82	C mm0	carry bit
83
84	movd	(%eax,%ecx,4), %mm1
85	movd	(%ebx,%ecx,4), %mm2
86	paddq	%mm2, %mm1
87
88	paddq	%mm1, %mm0
89	movd	%mm0, (%edx,%ecx,4)
90
91	psrlq	$32, %mm0
92
93	add	$1, %ecx
94	jnz	L(top)
95
96	movd	%mm0, %eax
97	mov	SAVE_EBX, %ebx
98	emms
99	ret
100
101EPILOGUE()
102