xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/x86_64/fat/fat_entry.asm (revision 1897181a7231d5fc7ab48994d1447fcbc4e13a49)
1dnl  x86 fat binary entrypoints.
2
3dnl  Contributed to the GNU project by Kevin Ryde (original x86_32 code) and
4dnl  Torbjorn Granlund (port to x86_64)
5
6dnl  Copyright 2003, 2009, 2011 Free Software Foundation, Inc.
7dnl
8dnl  This file is part of the GNU MP Library.
9dnl
10dnl  The GNU MP Library is free software; you can redistribute it and/or
11dnl  modify it under the terms of the GNU Lesser General Public License as
12dnl  published by the Free Software Foundation; either version 3 of the
13dnl  License, or (at your option) any later version.
14dnl
15dnl  The GNU MP Library is distributed in the hope that it will be useful,
16dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
17dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18dnl  Lesser General Public License for more details.
19dnl
20dnl  You should have received a copy of the GNU Lesser General Public License
21dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
22
23include(`../config.m4')
24
25
26dnl  Forcibly disable profiling.
27dnl
28dnl  The entrypoints and inits are small enough not to worry about, the real
29dnl  routines arrived at will have any profiling.  Also, the way the code
30dnl  here ends with a jump means we won't work properly with the
31dnl  "instrument" profiling scheme anyway.
32
33define(`WANT_PROFILING',no)
34
35
36dnl  We define PIC_OR_DARWIN as a helper symbol, the use it for suppressing
37dnl  normal, fast call code, since that triggers problems on darwin.
38dnl
39dnl  FIXME: There might be a more elegant solution, adding less overhead.
40
41ifdef(`DARWIN',
42`define(`PIC_OR_DARWIN')')
43ifdef(`PIC',
44`define(`PIC_OR_DARWIN')')
45
46
47	TEXT
48
49
50dnl  Usage: FAT_ENTRY(name, offset)
51dnl
52dnl  Emit a fat binary entrypoint function of the given name.  This is the
53dnl  normal entry for applications, eg. __gmpn_add_n.
54dnl
55dnl  The code simply jumps through the function pointer in __gmpn_cpuvec at
56dnl  the given "offset" (in bytes).
57dnl
58dnl  For non-PIC, the jumps are 5 bytes each, aligning them to 8 should be
59dnl  fine for all x86s.
60dnl
61dnl  For PIC, the jumps are 20 bytes each, and are best aligned to 16 to
62dnl  ensure at least the first two instructions don't cross a cache line
63dnl  boundary.
64dnl
65dnl  Note the extra `' ahead of PROLOGUE obscures it from the HAVE_NATIVE
66dnl  grepping in configure, stopping that code trying to eval something with
67dnl  $1 in it.
68
69define(FAT_ENTRY,
70m4_assert_numargs(2)
71`	ALIGN(ifdef(`PIC',16,8))
72`'PROLOGUE($1)
73ifdef(`PIC_OR_DARWIN',
74`	LEA(	GSYM_PREFIX`'__gmpn_cpuvec, %rax)
75	jmp	*$2(%rax)
76',`dnl non-PIC
77	jmp	*GSYM_PREFIX`'__gmpn_cpuvec+$2
78')
79EPILOGUE()
80')
81
82
83dnl  FAT_ENTRY for each CPUVEC_FUNCS_LIST
84dnl
85
86define(`CPUVEC_offset',0)
87foreach(i,
88`FAT_ENTRY(MPN(i),CPUVEC_offset)
89define(`CPUVEC_offset',eval(CPUVEC_offset + 8))',
90CPUVEC_FUNCS_LIST)
91
92
93dnl  Usage: FAT_INIT(name, offset)
94dnl
95dnl  Emit a fat binary initializer function of the given name.  These
96dnl  functions are the initial values for the pointers in __gmpn_cpuvec.
97dnl
98dnl  The code simply calls __gmpn_cpuvec_init, and then jumps back through
99dnl  the __gmpn_cpuvec pointer, at the given "offset" (in bytes).
100dnl  __gmpn_cpuvec_init will have stored the address of the selected
101dnl  implementation there.
102dnl
103dnl  Only one of these routines will be executed, and only once, since after
104dnl  that all the __gmpn_cpuvec pointers go to real routines.  So there's no
105dnl  need for anything special here, just something small and simple.  To
106dnl  keep code size down, "fat_init" is a shared bit of code, arrived at
107dnl  with the offset in %al.  %al is used since the movb instruction is 2
108dnl  bytes where %eax would be 4.
109dnl
110dnl  Note having `PROLOGUE in FAT_INIT obscures that PROLOGUE from the
111dnl  HAVE_NATIVE grepping in configure, preventing that code trying to eval
112dnl  something with $1 in it.
113dnl
114dnl  We need to preserve parameter registers over the __gmpn_cpuvec_init call
115
116define(FAT_INIT,
117m4_assert_numargs(2)
118`PROLOGUE($1)
119	mov	$`'$2, %al
120	jmp	L(fat_init)
121EPILOGUE()
122')
123
124L(fat_init):
125	C al	__gmpn_cpuvec byte offset
126
127	movzbl	%al, %eax
128	push	%rdi
129	push	%rsi
130	push	%rdx
131	push	%rcx
132	push	%r8
133	push	%r9
134	push	%rax
135	CALL(	__gmpn_cpuvec_init)
136	pop	%rax
137	pop	%r9
138	pop	%r8
139	pop	%rcx
140	pop	%rdx
141	pop	%rsi
142	pop	%rdi
143ifdef(`PIC_OR_DARWIN',`
144	LEA(	GSYM_PREFIX`'__gmpn_cpuvec, %r10)
145	jmp	*(%r10,%rax)
146',`dnl non-PIC
147	jmp	*GSYM_PREFIX`'__gmpn_cpuvec(%rax)
148')
149
150dnl  FAT_INIT for each CPUVEC_FUNCS_LIST
151dnl
152
153define(`CPUVEC_offset',0)
154foreach(i,
155`FAT_INIT(MPN(i`'_init),CPUVEC_offset)
156define(`CPUVEC_offset',eval(CPUVEC_offset + 8))',
157CPUVEC_FUNCS_LIST)
158
159
160
161C long __gmpn_cpuid (char dst[12], int id);
162C
163C This is called only once, so just something simple and compact is fine.
164
165
166PROLOGUE(__gmpn_cpuid)
167	mov	%rbx, %r8
168	mov	%esi, %eax
169	cpuid
170	mov	%ebx, (%rdi)
171	mov	%edx, 4(%rdi)
172	mov	%ecx, 8(%rdi)
173	mov	%r8, %rbx
174	ret
175EPILOGUE()
176