xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/x86/fat/fat_entry.asm (revision ce54336801cf28877c3414aa2fcb251dddd543a2)
1dnl  x86 fat binary entrypoints.
2
3dnl  Copyright 2003, 2012, 2014 Free Software Foundation, Inc.
4
5dnl  This file is part of the GNU MP Library.
6dnl
7dnl  The GNU MP Library is free software; you can redistribute it and/or modify
8dnl  it under the terms of either:
9dnl
10dnl    * the GNU Lesser General Public License as published by the Free
11dnl      Software Foundation; either version 3 of the License, or (at your
12dnl      option) any later version.
13dnl
14dnl  or
15dnl
16dnl    * the GNU General Public License as published by the Free Software
17dnl      Foundation; either version 2 of the License, or (at your option) any
18dnl      later version.
19dnl
20dnl  or both in parallel, as here.
21dnl
22dnl  The GNU MP Library is distributed in the hope that it will be useful, but
23dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
24dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
25dnl  for more details.
26dnl
27dnl  You should have received copies of the GNU General Public License and the
28dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
29dnl  see https://www.gnu.org/licenses/.
30
31include(`../config.m4')
32
33
34dnl  Forcibly disable profiling.
35dnl
36dnl  The entrypoints and inits are small enough not to worry about, the real
37dnl  routines arrived at will have any profiling.  Also, the way the code
38dnl  here ends with a jump means we won't work properly with the
39dnl  "instrument" profiling scheme anyway.
40
41define(`WANT_PROFILING',no)
42
43
44	TEXT
45
46
47dnl  Usage: FAT_ENTRY(name, offset)
48dnl
49dnl  Emit a fat binary entrypoint function of the given name.  This is the
50dnl  normal entry for applications, eg. __gmpn_add_n.
51dnl
52dnl  The code simply jumps through the function pointer in __gmpn_cpuvec at
53dnl  the given "offset" (in bytes).
54dnl
55dnl  For non-PIC, the jumps are 5 bytes each, aligning them to 8 should be
56dnl  fine for all x86s.
57dnl
58dnl  For PIC, the jumps are 20 bytes each, and are best aligned to 16 to
59dnl  ensure at least the first two instructions don't cross a cache line
60dnl  boundary.
61dnl
62dnl  Note the extra `' ahead of PROLOGUE obscures it from the HAVE_NATIVE
63dnl  grepping in configure, stopping that code trying to eval something with
64dnl  $1 in it.
65
66define(FAT_ENTRY,
67m4_assert_numargs(2)
68`	ALIGN(ifdef(`PIC',16,8))
69`'PROLOGUE($1)dnl
70ifdef(`PIC',`dnl
71ifdef(`DARWIN',`
72	call	L(movl_eip_edx)
73	movl	L(___gmpn_cpuvec)$non_lazy_ptr-.(%edx), %edx
74	jmp	*m4_empty_if_zero($2)(%edx)
75',`dnl
76	call	L(movl_eip_edx)
77L(entry_here$2):
78	addl	$_GLOBAL_OFFSET_TABLE_+[.-L(entry_here$2)], %edx
79	movl	GSYM_PREFIX`'__gmpn_cpuvec@GOT(%edx), %edx
80	jmp	*m4_empty_if_zero($2)(%edx)
81')
82',`dnl non-PIC
83	jmp	*GSYM_PREFIX`'__gmpn_cpuvec+$2
84')
85EPILOGUE()
86')
87
88
89dnl  FAT_ENTRY for each CPUVEC_FUNCS_LIST
90dnl
91
92define(`CPUVEC_offset',0)
93foreach(i,
94`FAT_ENTRY(MPN(i),CPUVEC_offset)
95define(`CPUVEC_offset',eval(CPUVEC_offset + 4))',
96CPUVEC_FUNCS_LIST)
97
98ifdef(`PIC',`
99	ALIGN(8)
100L(movl_eip_edx):
101	movl	(%esp), %edx
102	ret_internal
103ifdef(`DARWIN',`
104	.section	__IMPORT,__pointers,non_lazy_symbol_pointers
105L(___gmpn_cpuvec)$non_lazy_ptr:
106	.indirect_symbol	___gmpn_cpuvec
107	.long	0
108	TEXT
109')
110')
111
112
113dnl  Usage: FAT_INIT(name, offset)
114dnl
115dnl  Emit a fat binary initializer function of the given name.  These
116dnl  functions are the initial values for the pointers in __gmpn_cpuvec.
117dnl
118dnl  The code simply calls __gmpn_cpuvec_init, and then jumps back through
119dnl  the __gmpn_cpuvec pointer, at the given "offset" (in bytes).
120dnl  __gmpn_cpuvec_init will have stored the address of the selected
121dnl  implementation there.
122dnl
123dnl  Only one of these routines will be executed, and only once, since after
124dnl  that all the __gmpn_cpuvec pointers go to real routines.  So there's no
125dnl  need for anything special here, just something small and simple.  To
126dnl  keep code size down, "fat_init" is a shared bit of code, arrived at
127dnl  with the offset in %al.  %al is used since the movb instruction is 2
128dnl  bytes where %eax would be 4.
129dnl
130dnl  Note having `PROLOGUE in FAT_INIT obscures that PROLOGUE from the
131dnl  HAVE_NATIVE grepping in configure, preventing that code trying to eval
132dnl  something with $1 in it.
133
134define(FAT_INIT,
135m4_assert_numargs(2)
136`PROLOGUE($1)dnl
137	movb	$`'$2, %al
138	jmp	L(fat_init)
139EPILOGUE()
140')
141
142L(fat_init):
143	C al	__gmpn_cpuvec byte offset
144
145	movzbl	%al, %eax
146	pushl	%eax
147
148ifdef(`PIC',`dnl
149ifdef(`DARWIN',`
150	sub	$8, %esp
151	CALL(	__gmpn_cpuvec_init)
152	add	$8, %esp
153	call	L(movl_eip_edx)
154	movl	L(___gmpn_cpuvec)$non_lazy_ptr-.(%edx), %edx
155',`dnl
156	pushl	%ebx
157	call	L(movl_eip_ebx)
158L(init_here):
159	addl	$_GLOBAL_OFFSET_TABLE_+[.-L(init_here)], %ebx
160	CALL(	__gmpn_cpuvec_init)
161	movl	GSYM_PREFIX`'__gmpn_cpuvec@GOT(%ebx), %edx
162	popl	%ebx
163')
164	popl	%eax
165	jmp	*(%edx,%eax)
166
167L(movl_eip_ebx):
168	movl	(%esp), %ebx
169	ret_internal
170',`dnl non-PIC
171	sub	$8, %esp		C needed on Darwin, harmless elsewhere
172	CALL(	__gmpn_cpuvec_init)
173	add	$8, %esp		C needed on Darwin, harmless elsewhere
174	popl	%eax
175	jmp	*GSYM_PREFIX`'__gmpn_cpuvec(%eax)
176')
177
178dnl  FAT_INIT for each CPUVEC_FUNCS_LIST
179dnl
180
181define(`CPUVEC_offset',0)
182foreach(i,
183`FAT_INIT(MPN(i`'_init),CPUVEC_offset)
184define(`CPUVEC_offset',eval(CPUVEC_offset + 4))',
185CPUVEC_FUNCS_LIST)
186
187
188
189C long __gmpn_cpuid (char dst[12], int id);
190C
191C This is called only once, so just something simple and compact is fine.
192
193defframe(PARAM_ID,  8)
194defframe(PARAM_DST, 4)
195deflit(`FRAME',0)
196
197PROLOGUE(__gmpn_cpuid)
198	pushl	%esi		FRAME_pushl()
199	pushl	%ebx		FRAME_pushl()
200	movl	PARAM_ID, %eax
201	cpuid
202	movl	PARAM_DST, %esi
203	movl	%ebx, (%esi)
204	movl	%edx, 4(%esi)
205	movl	%ecx, 8(%esi)
206	popl	%ebx
207	popl	%esi
208	ret
209EPILOGUE()
210
211
212C int __gmpn_cpuid_available (void);
213C
214C Return non-zero if the cpuid instruction is available, which means late
215C model 80486 and higher.  80386 and early 80486 don't have cpuid.
216C
217C The test follows Intel AP-485 application note, namely that if bit 21 is
218C modifiable then cpuid is supported.  This test is reentrant and thread
219C safe, since of course any interrupt or context switch will preserve the
220C flags while we're tinkering with them.
221C
222C This is called only once, so just something simple and compact is fine.
223
224PROLOGUE(__gmpn_cpuid_available)
225	pushf
226	popl	%ecx		C old flags
227
228	movl	%ecx, %edx
229	xorl	$0x200000, %edx
230	pushl	%edx
231	popf
232	pushf
233	popl	%edx		C tweaked flags
234
235	movl	$1, %eax
236	cmpl	%ecx, %edx
237	jne	L(available)
238	xorl	%eax, %eax	C not changed, so cpuid not available
239
240L(available):
241	ret
242EPILOGUE()
243ASM_END()
244