xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/x86/k6/mmx/com.asm (revision ce54336801cf28877c3414aa2fcb251dddd543a2)
1dnl  AMD K6-2 mpn_com -- mpn bitwise one's complement.
2
3dnl  Copyright 1999-2002 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of either:
9dnl
10dnl    * the GNU Lesser General Public License as published by the Free
11dnl      Software Foundation; either version 3 of the License, or (at your
12dnl      option) any later version.
13dnl
14dnl  or
15dnl
16dnl    * the GNU General Public License as published by the Free Software
17dnl      Foundation; either version 2 of the License, or (at your option) any
18dnl      later version.
19dnl
20dnl  or both in parallel, as here.
21dnl
22dnl  The GNU MP Library is distributed in the hope that it will be useful, but
23dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25dnl  for more details.
26dnl
27dnl  You should have received copies of the GNU General Public License and the
28dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
29dnl  see https://www.gnu.org/licenses/.
30
31include(`../config.m4')
32
33NAILS_SUPPORT(0-31)
34
35
36C    alignment dst/src, A=0mod8 N=4mod8
37C       A/A   A/N   N/A   N/N
38C K6-2  1.0   1.18  1.18  1.18  cycles/limb
39C K6    1.5   1.85  1.75  1.85
40
41
42C void mpn_com (mp_ptr dst, mp_srcptr src, mp_size_t size);
43C
44C Take the bitwise ones-complement of src,size and write it to dst,size.
45
46defframe(PARAM_SIZE,12)
47defframe(PARAM_SRC, 8)
48defframe(PARAM_DST, 4)
49
50	TEXT
51	ALIGN(16)
52PROLOGUE(mpn_com)
53deflit(`FRAME',0)
54
55	movl	PARAM_SIZE, %ecx
56	movl	PARAM_SRC, %eax
57	movl	PARAM_DST, %edx
58	shrl	%ecx
59	jnz	L(two_or_more)
60
61	movl	(%eax), %eax
62	notl_or_xorl_GMP_NUMB_MASK(	%eax)
63	movl	%eax, (%edx)
64	ret
65
66
67L(two_or_more):
68	pushl	%ebx	FRAME_pushl()
69	pcmpeqd	%mm7, %mm7		C all ones
70
71	movl	%ecx, %ebx
72ifelse(GMP_NAIL_BITS,0,,
73`	psrld	$GMP_NAIL_BITS, %mm7')	C clear nails
74
75
76
77	ALIGN(8)
78L(top):
79	C eax	src
80	C ebx	floor(size/2)
81	C ecx	counter
82	C edx	dst
83	C
84	C mm0	scratch
85	C mm7	mask
86
87	movq	-8(%eax,%ecx,8), %mm0
88	pxor	%mm7, %mm0
89	movq	%mm0, -8(%edx,%ecx,8)
90	loop	L(top)
91
92
93	jnc	L(no_extra)
94	movl	(%eax,%ebx,8), %eax
95	notl_or_xorl_GMP_NUMB_MASK(	%eax)
96	movl	%eax, (%edx,%ebx,8)
97L(no_extra):
98
99	popl	%ebx
100	emms_or_femms
101	ret
102
103EPILOGUE()
104