xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/x86/sec_tabselect.asm (revision ce54336801cf28877c3414aa2fcb251dddd543a2)
1dnl  x86 mpn_sec_tabselect.
2
3dnl  Copyright 2011 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of either:
9dnl
10dnl    * the GNU Lesser General Public License as published by the Free
11dnl      Software Foundation; either version 3 of the License, or (at your
12dnl      option) any later version.
13dnl
14dnl  or
15dnl
16dnl    * the GNU General Public License as published by the Free Software
17dnl      Foundation; either version 2 of the License, or (at your option) any
18dnl      later version.
19dnl
20dnl  or both in parallel, as here.
21dnl
22dnl  The GNU MP Library is distributed in the hope that it will be useful, but
23dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25dnl  for more details.
26dnl
27dnl  You should have received copies of the GNU General Public License and the
28dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
29dnl  see https://www.gnu.org/licenses/.
30
31include(`../config.m4')
32
33
34C			    cycles/limb
35C P5				 ?
36C P6 model 0-8,10-12		 ?
37C P6 model 9  (Banias)		 ?
38C P6 model 13 (Dothan)		 ?
39C P4 model 0  (Willamette)	 ?
40C P4 model 1  (?)		 ?
41C P4 model 2  (Northwood)	 4.5
42C P4 model 3  (Prescott)	 ?
43C P4 model 4  (Nocona)		 ?
44C Intel Atom			 ?
45C AMD K6			 ?
46C AMD K7			 3.4
47C AMD K8			 ?
48C AMD K10			 ?
49
50C NOTES
51C  * This has not been tuned for any specific processor.  Its speed should not
52C    be too bad, though.
53C  * Using SSE2 could result in many-fold speedup.
54
55C mpn_sec_tabselect (mp_limb_t *rp, mp_limb_t *tp, mp_size_t n, mp_size_t nents, mp_size_t which)
56define(`rp',     `%edi')
57define(`tp',     `%esi')
58define(`n',      `%ebx')
59define(`nents',  `%ecx')
60define(`which',  `36(%esp)')
61
62define(`i',      `%ebp')
63define(`maskp',  `20(%esp)')
64define(`maskn',  `32(%esp)')
65
66ASM_START()
67	TEXT
68	ALIGN(16)
69PROLOGUE(mpn_sec_tabselect)
70	push	%edi
71	push	%esi
72	push	%ebx
73	push	%ebp
74	mov	20(%esp), rp
75	mov	24(%esp), tp
76	mov	28(%esp), n
77	mov	32(%esp), nents
78
79	lea	(rp,n,4), rp
80	lea	(tp,n,4), tp
81	sub	nents, which
82L(outer):
83	mov	which, %eax
84	add	nents, %eax
85	neg	%eax			C set CF iff 'which' != k
86	sbb	%eax, %eax
87	mov	%eax, maskn
88	not	%eax
89	mov	%eax, maskp
90
91	mov	n, i
92	neg	i
93
94	ALIGN(16)
95L(top):	mov	(tp,i,4), %eax
96	and	maskp, %eax
97	mov	(rp,i,4), %edx
98	and	maskn, %edx
99	or	%edx, %eax
100	mov	%eax, (rp,i,4)
101	inc	i
102	js	L(top)
103
104L(end):	mov	n, %eax
105	lea	(tp,%eax,4), tp
106	dec	nents
107	jne	L(outer)
108
109L(outer_end):
110	pop	%ebp
111	pop	%ebx
112	pop	%esi
113	pop	%edi
114	ret
115EPILOGUE()
116