xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/x86/pentium/logops_n.asm (revision 75f6d617e282811cb173c2ccfbf5df0dd71f7045)
1dnl  Intel Pentium mpn_and_n,...,mpn_xnor_n -- bitwise logical operations.
2
3dnl  Copyright 2001, 2002 Free Software Foundation, Inc.
4dnl
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or
8dnl  modify it under the terms of the GNU Lesser General Public License as
9dnl  published by the Free Software Foundation; either version 3 of the
10dnl  License, or (at your option) any later version.
11dnl
12dnl  The GNU MP Library is distributed in the hope that it will be useful,
13dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
14dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15dnl  Lesser General Public License for more details.
16dnl
17dnl  You should have received a copy of the GNU Lesser General Public License
18dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
19
20include(`../config.m4')
21
22
23C P5: 3.0 c/l  and, ior, xor
24C     3.5 c/l  andn, iorn, nand, nior, xnor
25
26
27define(M4_choose_op,
28`ifdef(`OPERATION_$1',`
29define(`M4_function', `mpn_$1')
30define(`M4_want_pre', `$4')
31define(`M4op',        `$3')
32define(`M4_want_post',`$2')
33')')
34define(M4pre, `ifelse(M4_want_pre, yes,`$1')')
35define(M4post,`ifelse(M4_want_post,yes,`$1')')
36
37M4_choose_op( and_n,     , andl,    )
38M4_choose_op( andn_n,    , andl, yes)
39M4_choose_op( nand_n, yes, andl,    )
40M4_choose_op( ior_n,     ,  orl,    )
41M4_choose_op( iorn_n,    ,  orl, yes)
42M4_choose_op( nior_n, yes,  orl,    )
43M4_choose_op( xor_n,     , xorl,    )
44M4_choose_op( xnor_n, yes, xorl,    )
45
46ifdef(`M4_function',,
47`m4_error(`Unrecognised or undefined OPERATION symbol
48')')
49
50MULFUNC_PROLOGUE(mpn_and_n mpn_andn_n mpn_nand_n mpn_ior_n mpn_iorn_n mpn_nior_n mpn_xor_n mpn_xnor_n)
51
52NAILS_SUPPORT(0-31)
53
54
55C void M4_function (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size);
56C
57C Nothing complicated here, just some care to avoid data cache bank clashes
58C and AGIs.
59C
60C We're one register short of being able to do a simple 4 loads, 2 ops, 2
61C stores.  Instead %ebp is juggled a bit and nops are introduced to keep the
62C pairings as intended.  An in-place operation would free up a register, for
63C an 0.5 c/l speedup, if that's worth bothering with.
64C
65C This code seems best for P55 too.  Data alignment is a big problem for MMX
66C and the pairing restrictions on movq and integer instructions make life
67C difficult.
68
69defframe(PARAM_SIZE,16)
70defframe(PARAM_YP,  12)
71defframe(PARAM_XP,   8)
72defframe(PARAM_WP,   4)
73
74	TEXT
75	ALIGN(8)
76
77PROLOGUE(M4_function)
78deflit(`FRAME',0)
79
80	pushl	%ebx	FRAME_pushl()
81	pushl	%esi	FRAME_pushl()
82
83	pushl	%edi	FRAME_pushl()
84	pushl	%ebp	FRAME_pushl()
85
86	movl	PARAM_SIZE, %ecx
87	movl	PARAM_XP, %ebx
88
89	movl	PARAM_YP, %esi
90	movl	PARAM_WP, %edi
91
92	shrl	%ecx
93	jnc	L(entry)
94
95	movl	(%ebx,%ecx,8), %eax	C risk of data cache bank clash here
96	movl	(%esi,%ecx,8), %edx
97
98M4pre(`	notl_or_xorl_GMP_NUMB_MASK(%edx)')
99
100	M4op	%edx, %eax
101
102M4post(`xorl	$GMP_NUMB_MASK, %eax')
103	orl	%ecx, %ecx
104
105	movl	%eax, (%edi,%ecx,8)
106	jz	L(done)
107
108	jmp	L(entry)
109
110
111L(top):
112	C eax
113	C ebx	xp
114	C ecx	counter, limb pairs, decrementing
115	C edx
116	C esi	yp
117	C edi	wp
118	C ebp
119
120	M4op	%ebp, %edx
121	nop
122
123M4post(`xorl	$GMP_NUMB_MASK, %eax')
124M4post(`xorl	$GMP_NUMB_MASK, %edx')
125
126	movl	%eax, 4(%edi,%ecx,8)
127	movl	%edx, (%edi,%ecx,8)
128
129L(entry):
130	movl	-4(%ebx,%ecx,8), %ebp
131	nop
132
133	movl	-4(%esi,%ecx,8), %eax
134	movl	-8(%esi,%ecx,8), %edx
135
136M4pre(`	xorl	$GMP_NUMB_MASK, %eax')
137M4pre(`	xorl	$GMP_NUMB_MASK, %edx')
138
139	M4op	%ebp, %eax
140	movl	-8(%ebx,%ecx,8), %ebp
141
142	decl	%ecx
143	jnz	L(top)
144
145
146	M4op	%ebp, %edx
147	nop
148
149M4post(`xorl	$GMP_NUMB_MASK, %eax')
150M4post(`xorl	$GMP_NUMB_MASK, %edx')
151
152	movl	%eax, 4(%edi,%ecx,8)
153	movl	%edx, (%edi,%ecx,8)
154
155
156L(done):
157	popl	%ebp
158	popl	%edi
159
160	popl	%esi
161	popl	%ebx
162
163	ret
164
165EPILOGUE()
166