xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/x86/pentium/logops_n.asm (revision ce54336801cf28877c3414aa2fcb251dddd543a2)
1dnl  Intel Pentium mpn_and_n,...,mpn_xnor_n -- bitwise logical operations.
2
3dnl  Copyright 2001, 2002 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of either:
9dnl
10dnl    * the GNU Lesser General Public License as published by the Free
11dnl      Software Foundation; either version 3 of the License, or (at your
12dnl      option) any later version.
13dnl
14dnl  or
15dnl
16dnl    * the GNU General Public License as published by the Free Software
17dnl      Foundation; either version 2 of the License, or (at your option) any
18dnl      later version.
19dnl
20dnl  or both in parallel, as here.
21dnl
22dnl  The GNU MP Library is distributed in the hope that it will be useful, but
23dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25dnl  for more details.
26dnl
27dnl  You should have received copies of the GNU General Public License and the
28dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
29dnl  see https://www.gnu.org/licenses/.
30
31include(`../config.m4')
32
33
34C P5: 3.0 c/l  and, ior, xor
35C     3.5 c/l  andn, iorn, nand, nior, xnor
36
37
38define(M4_choose_op,
39`ifdef(`OPERATION_$1',`
40define(`M4_function', `mpn_$1')
41define(`M4_want_pre', `$4')
42define(`M4op',        `$3')
43define(`M4_want_post',`$2')
44')')
45define(M4pre, `ifelse(M4_want_pre, yes,`$1')')
46define(M4post,`ifelse(M4_want_post,yes,`$1')')
47
48M4_choose_op( and_n,     , andl,    )
49M4_choose_op( andn_n,    , andl, yes)
50M4_choose_op( nand_n, yes, andl,    )
51M4_choose_op( ior_n,     ,  orl,    )
52M4_choose_op( iorn_n,    ,  orl, yes)
53M4_choose_op( nior_n, yes,  orl,    )
54M4_choose_op( xor_n,     , xorl,    )
55M4_choose_op( xnor_n, yes, xorl,    )
56
57ifdef(`M4_function',,
58`m4_error(`Unrecognised or undefined OPERATION symbol
59')')
60
61MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
62
63NAILS_SUPPORT(0-31)
64
65
66C void M4_function (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size);
67C
68C Nothing complicated here, just some care to avoid data cache bank clashes
69C and AGIs.
70C
71C We're one register short of being able to do a simple 4 loads, 2 ops, 2
72C stores.  Instead %ebp is juggled a bit and nops are introduced to keep the
73C pairings as intended.  An in-place operation would free up a register, for
74C an 0.5 c/l speedup, if that's worth bothering with.
75C
76C This code seems best for P55 too.  Data alignment is a big problem for MMX
77C and the pairing restrictions on movq and integer instructions make life
78C difficult.
79
80defframe(PARAM_SIZE,16)
81defframe(PARAM_YP,  12)
82defframe(PARAM_XP,   8)
83defframe(PARAM_WP,   4)
84
85	TEXT
86	ALIGN(8)
87
88PROLOGUE(M4_function)
89deflit(`FRAME',0)
90
91	pushl	%ebx	FRAME_pushl()
92	pushl	%esi	FRAME_pushl()
93
94	pushl	%edi	FRAME_pushl()
95	pushl	%ebp	FRAME_pushl()
96
97	movl	PARAM_SIZE, %ecx
98	movl	PARAM_XP, %ebx
99
100	movl	PARAM_YP, %esi
101	movl	PARAM_WP, %edi
102
103	shrl	%ecx
104	jnc	L(entry)
105
106	movl	(%ebx,%ecx,8), %eax	C risk of data cache bank clash here
107	movl	(%esi,%ecx,8), %edx
108
109M4pre(`	notl_or_xorl_GMP_NUMB_MASK(%edx)')
110
111	M4op	%edx, %eax
112
113M4post(`xorl	$GMP_NUMB_MASK, %eax')
114	orl	%ecx, %ecx
115
116	movl	%eax, (%edi,%ecx,8)
117	jz	L(done)
118
119	jmp	L(entry)
120
121
122L(top):
123	C eax
124	C ebx	xp
125	C ecx	counter, limb pairs, decrementing
126	C edx
127	C esi	yp
128	C edi	wp
129	C ebp
130
131	M4op	%ebp, %edx
132	nop
133
134M4post(`xorl	$GMP_NUMB_MASK, %eax')
135M4post(`xorl	$GMP_NUMB_MASK, %edx')
136
137	movl	%eax, 4(%edi,%ecx,8)
138	movl	%edx, (%edi,%ecx,8)
139
140L(entry):
141	movl	-4(%ebx,%ecx,8), %ebp
142	nop
143
144	movl	-4(%esi,%ecx,8), %eax
145	movl	-8(%esi,%ecx,8), %edx
146
147M4pre(`	xorl	$GMP_NUMB_MASK, %eax')
148M4pre(`	xorl	$GMP_NUMB_MASK, %edx')
149
150	M4op	%ebp, %eax
151	movl	-8(%ebx,%ecx,8), %ebp
152
153	decl	%ecx
154	jnz	L(top)
155
156
157	M4op	%ebp, %edx
158	nop
159
160M4post(`xorl	$GMP_NUMB_MASK, %eax')
161M4post(`xorl	$GMP_NUMB_MASK, %edx')
162
163	movl	%eax, 4(%edi,%ecx,8)
164	movl	%edx, (%edi,%ecx,8)
165
166
167L(done):
168	popl	%ebp
169	popl	%edi
170
171	popl	%esi
172	popl	%ebx
173
174	ret
175
176EPILOGUE()
177