xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/x86/pentium4/sse2/sub_n.asm (revision 4b004442778f1201b2161e87fd65ba87aae6601a)
1dnl  Intel Pentium-4 mpn_sub_n -- mpn subtraction.
2
3dnl  Copyright 2001, 2002 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of either:
9dnl
10dnl    * the GNU Lesser General Public License as published by the Free
11dnl      Software Foundation; either version 3 of the License, or (at your
12dnl      option) any later version.
13dnl
14dnl  or
15dnl
16dnl    * the GNU General Public License as published by the Free Software
17dnl      Foundation; either version 2 of the License, or (at your option) any
18dnl      later version.
19dnl
20dnl  or both in parallel, as here.
21dnl
22dnl  The GNU MP Library is distributed in the hope that it will be useful, but
23dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25dnl  for more details.
26dnl
27dnl  You should have received copies of the GNU General Public License and the
28dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
29dnl  see https://www.gnu.org/licenses/.
30
31include(`../config.m4')
32
33
34C					cycles/limb
35C			     dst!=src1,2  dst==src1  dst==src2
36C P6 model 0-8,10-12		-
37C P6 model 9   (Banias)		?
38C P6 model 13  (Dothan)		?
39C P4 model 0-1 (Willamette)	?
40C P4 model 2   (Northwood)	4	     6		6
41C P4 model 3-4 (Prescott)	4.25	     7.5	7.5
42
43defframe(PARAM_CARRY,20)
44defframe(PARAM_SIZE, 16)
45defframe(PARAM_SRC2, 12)
46defframe(PARAM_SRC1, 8)
47defframe(PARAM_DST,  4)
48
49dnl  re-use parameter space
50define(SAVE_EBX,`PARAM_SRC1')
51
52	TEXT
53	ALIGN(8)
54
55PROLOGUE(mpn_sub_nc)
56deflit(`FRAME',0)
57	movd	PARAM_CARRY, %mm0
58	jmp	L(start_nc)
59EPILOGUE()
60
61	ALIGN(8)
62PROLOGUE(mpn_sub_n)
63deflit(`FRAME',0)
64	pxor	%mm0, %mm0
65L(start_nc):
66	mov	PARAM_SRC1, %eax
67	mov	%ebx, SAVE_EBX
68	mov	PARAM_SRC2, %ebx
69	mov	PARAM_DST, %edx
70	mov	PARAM_SIZE, %ecx
71
72	lea	(%eax,%ecx,4), %eax	C src1 end
73	lea	(%ebx,%ecx,4), %ebx	C src2 end
74	lea	(%edx,%ecx,4), %edx	C dst end
75	neg	%ecx			C -size
76
77L(top):
78	C eax	src1 end
79	C ebx	src2 end
80	C ecx	counter, limbs, negative
81	C edx	dst end
82	C mm0	carry bit
83
84	movd	(%eax,%ecx,4), %mm1
85	movd	(%ebx,%ecx,4), %mm2
86	psubq	%mm2, %mm1
87
88	psubq	%mm0, %mm1
89	movd	%mm1, (%edx,%ecx,4)
90
91	psrlq	$63, %mm1
92
93	add	$1, %ecx
94	jz	L(done_mm1)
95
96	movd	(%eax,%ecx,4), %mm0
97	movd	(%ebx,%ecx,4), %mm2
98	psubq	%mm2, %mm0
99
100	psubq	%mm1, %mm0
101	movd	%mm0, (%edx,%ecx,4)
102
103	psrlq	$63, %mm0
104
105	add	$1, %ecx
106	jnz	L(top)
107
108	movd	%mm0, %eax
109	mov	SAVE_EBX, %ebx
110	emms
111	ret
112
113L(done_mm1):
114	movd	%mm1, %eax
115	mov	SAVE_EBX, %ebx
116	emms
117	ret
118
119EPILOGUE()
120