xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/x86/pentium4/sse2/sub_n.asm (revision 230b95665bbd3a9d1a53658a36b1053f8382a519)
1dnl  Intel Pentium-4 mpn_sub_n -- mpn subtraction.
2
3dnl  Copyright 2001, 2002 Free Software Foundation, Inc.
4dnl
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or
8dnl  modify it under the terms of the GNU Lesser General Public License as
9dnl  published by the Free Software Foundation; either version 3 of the
10dnl  License, or (at your option) any later version.
11dnl
12dnl  The GNU MP Library is distributed in the hope that it will be useful,
13dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
14dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15dnl  Lesser General Public License for more details.
16dnl
17dnl  You should have received a copy of the GNU Lesser General Public License
18dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
19
20include(`../config.m4')
21
22
23C					cycles/limb
24C			     dst!=src1,2  dst==src1  dst==src2
25C P6 model 0-8,10-12		-
26C P6 model 9   (Banias)		?
27C P6 model 13  (Dothan)		?
28C P4 model 0-1 (Willamette)	?
29C P4 model 2   (Northwood)	4	     6		6
30C P4 model 3-4 (Prescott)	4.25	     7.5	7.5
31
32defframe(PARAM_CARRY,20)
33defframe(PARAM_SIZE, 16)
34defframe(PARAM_SRC2, 12)
35defframe(PARAM_SRC1, 8)
36defframe(PARAM_DST,  4)
37
38dnl  re-use parameter space
39define(SAVE_EBX,`PARAM_SRC1')
40
41	TEXT
42	ALIGN(8)
43
44PROLOGUE(mpn_sub_nc)
45deflit(`FRAME',0)
46	movd	PARAM_CARRY, %mm0
47	jmp	L(start_nc)
48EPILOGUE()
49
50	ALIGN(8)
51PROLOGUE(mpn_sub_n)
52deflit(`FRAME',0)
53	pxor	%mm0, %mm0
54L(start_nc):
55	mov	PARAM_SRC1, %eax
56	mov	%ebx, SAVE_EBX
57	mov	PARAM_SRC2, %ebx
58	mov	PARAM_DST, %edx
59	mov	PARAM_SIZE, %ecx
60
61	lea	(%eax,%ecx,4), %eax	C src1 end
62	lea	(%ebx,%ecx,4), %ebx	C src2 end
63	lea	(%edx,%ecx,4), %edx	C dst end
64	neg	%ecx			C -size
65
66L(top):
67	C eax	src1 end
68	C ebx	src2 end
69	C ecx	counter, limbs, negative
70	C edx	dst end
71	C mm0	carry bit
72
73	movd	(%eax,%ecx,4), %mm1
74	movd	(%ebx,%ecx,4), %mm2
75	psubq	%mm2, %mm1
76
77	psubq	%mm0, %mm1
78	movd	%mm1, (%edx,%ecx,4)
79
80	psrlq	$63, %mm1
81
82	add	$1, %ecx
83	jz	L(done_mm1)
84
85	movd	(%eax,%ecx,4), %mm0
86	movd	(%ebx,%ecx,4), %mm2
87	psubq	%mm2, %mm0
88
89	psubq	%mm1, %mm0
90	movd	%mm0, (%edx,%ecx,4)
91
92	psrlq	$63, %mm0
93
94	add	$1, %ecx
95	jnz	L(top)
96
97	movd	%mm0, %eax
98	mov	SAVE_EBX, %ebx
99	emms
100	ret
101
102L(done_mm1):
103	movd	%mm1, %eax
104	mov	SAVE_EBX, %ebx
105	emms
106	ret
107
108EPILOGUE()
109