xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/x86_64/x86_64-defs.m4 (revision bdc22b2e01993381dcefeff2bc9b56ca75a4235c)
1divert(-1)
2
3dnl  m4 macros for amd64 assembler.
4
5dnl  Copyright 1999-2005, 2008, 2009, 2011-2013 Free Software Foundation, Inc.
6
7dnl  This file is part of the GNU MP Library.
8dnl
9dnl  The GNU MP Library is free software; you can redistribute it and/or modify
10dnl  it under the terms of either:
11dnl
12dnl    * the GNU Lesser General Public License as published by the Free
13dnl      Software Foundation; either version 3 of the License, or (at your
14dnl      option) any later version.
15dnl
16dnl  or
17dnl
18dnl    * the GNU General Public License as published by the Free Software
19dnl      Foundation; either version 2 of the License, or (at your option) any
20dnl      later version.
21dnl
22dnl  or both in parallel, as here.
23dnl
24dnl  The GNU MP Library is distributed in the hope that it will be useful, but
25dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
26dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
27dnl  for more details.
28dnl
29dnl  You should have received copies of the GNU General Public License and the
30dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
31dnl  see https://www.gnu.org/licenses/.
32
33
34dnl  Usage: CPUVEC_FUNCS_LIST
35dnl
36dnl  A list of the functions from gmp-impl.h x86 struct cpuvec_t, in the
37dnl  order they appear in that structure.
38
39define(CPUVEC_FUNCS_LIST,
40``add_n',
41`addlsh1_n',
42`addlsh2_n',
43`addmul_1',
44`addmul_2',
45`bdiv_dbm1c',
46`cnd_add_n',
47`cnd_sub_n',
48`com',
49`copyd',
50`copyi',
51`divexact_1',
52`divrem_1',
53`gcd_1',
54`lshift',
55`lshiftc',
56`mod_1',
57`mod_1_1p',
58`mod_1_1p_cps',
59`mod_1s_2p',
60`mod_1s_2p_cps',
61`mod_1s_4p',
62`mod_1s_4p_cps',
63`mod_34lsub1',
64`modexact_1c_odd',
65`mul_1',
66`mul_basecase',
67`mullo_basecase',
68`preinv_divrem_1',
69`preinv_mod_1',
70`redc_1',
71`redc_2',
72`rshift',
73`sqr_basecase',
74`sub_n',
75`sublsh1_n',
76`submul_1'')
77
78
79dnl  Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)
80dnl
81dnl  In the amd64 code we use explicit TEXT and ALIGN() calls in the code,
82dnl  since different alignments are wanted in various circumstances.  So for
83dnl  instance,
84dnl
85dnl                  TEXT
86dnl                  ALIGN(16)
87dnl          PROLOGUE(mpn_add_n)
88dnl                  ...
89dnl          EPILOGUE()
90
91define(`PROLOGUE_cpu',
92m4_assert_numargs(1)
93`	GLOBL	$1
94	TYPE($1,`function')
95$1:
96')
97
98
99dnl  Usage: ASSERT([cond][,instructions])
100dnl
101dnl  If WANT_ASSERT is 1, output the given instructions and expect the given
102dnl  flags condition to then be satisfied.  For example,
103dnl
104dnl         ASSERT(ne, `cmpq %rax, %rbx')
105dnl
106dnl  The instructions can be omitted to just assert a flags condition with
107dnl  no extra calculation.  For example,
108dnl
109dnl         ASSERT(nc)
110dnl
111dnl  When `instructions' is not empty, a pushfq/popfq is added for
112dnl  convenience to preserve the flags, but the instructions themselves must
113dnl  preserve any registers that matter.
114dnl
115dnl  The condition can be omitted to just output the given instructions when
116dnl  assertion checking is wanted.  In this case the pushf/popf is omitted.
117dnl  For example,
118dnl
119dnl         ASSERT(, `movq %rax, VAR_KEEPVAL')
120
121define(ASSERT,
122m4_assert_numargs_range(1,2)
123m4_assert_defined(`WANT_ASSERT')
124`ifelse(WANT_ASSERT,1,
125`ifelse(`$1',,
126`	$2',
127`ifelse(`$2',,,
128`	pushfq')
129	$2
130	`j$1'	L(ASSERT_ok`'ASSERT_counter)
131	ud2	C assertion failed
132L(ASSERT_ok`'ASSERT_counter):
133ifelse(`$2',,,`	popfq')
134define(`ASSERT_counter',incr(ASSERT_counter))')')')
135
136define(ASSERT_counter,1)
137
138define(`LEA',`dnl
139ifdef(`PIC',
140	`mov	$1@GOTPCREL(%rip), $2'
141,
142	`movabs	`$'$1, $2')
143')
144
145
146define(`DEF_OBJECT',
147m4_assert_numargs_range(1,2)
148`	RODATA
149	ALIGN(ifelse($#,1,2,$2))
150$1:
151')
152
153define(`END_OBJECT',
154m4_assert_numargs(1)
155`	SIZE(`$1',.-`$1')')
156
157
158define(`R32',
159	`ifelse($1,`%rax',`%eax',
160		$1,`%rbx',`%ebx',
161		$1,`%rcx',`%ecx',
162		$1,`%rdx',`%edx',
163		$1,`%rsi',`%esi',
164		$1,`%rdi',`%edi',
165		$1,`%rbp',`%ebp',
166		$1,`%r8',`%r8d',
167		$1,`%r9',`%r9d',
168		$1,`%r10',`%r10d',
169		$1,`%r11',`%r11d',
170		$1,`%r12',`%r12d',
171		$1,`%r13',`%r13d',
172		$1,`%r14',`%r14d',
173		$1,`%r15',`%r15d')')
174define(`R8',
175	`ifelse($1,`%rax',`%al',
176		$1,`%rbx',`%bl',
177		$1,`%rcx',`%cl',
178		$1,`%rdx',`%dl',
179		$1,`%rsi',`%sil',
180		$1,`%rdi',`%dil',
181		$1,`%rbp',`%bpl',
182		$1,`%r8',`%r8b',
183		$1,`%r9',`%r9b',
184		$1,`%r10',`%r10b',
185		$1,`%r11',`%r11b',
186		$1,`%r12',`%r12b',
187		$1,`%r13',`%r13b',
188		$1,`%r14',`%r14b',
189		$1,`%r15',`%r15b')')
190
191
192dnl  Usage: CALL(funcname)
193dnl
194
195define(`CALL',`dnl
196ifdef(`PIC',
197	`call	GSYM_PREFIX`'$1@PLT'
198,
199	`call	GSYM_PREFIX`'$1'
200)')
201
202
203define(`JUMPTABSECT', `.section	.data.rel.ro.local,"aw",@progbits')
204
205
206dnl  Usage: JMPENT(targlabel,tablabel)
207
208define(`JMPENT',`dnl
209ifdef(`PIC',
210	`.long	$1-$2'dnl
211,
212	`.quad	$1'dnl
213)')
214
215
216dnl  These macros are defined just for DOS64, where they provide calling
217dnl  sequence glue code.
218
219define(`FUNC_ENTRY',`')
220define(`FUNC_EXIT',`')
221
222
223dnl  Target ABI macros.
224
225define(`IFDOS',   `')
226define(`IFSTD',   `$1')
227define(`IFELF',   `$1')
228
229
230dnl  Usage: PROTECT(symbol)
231dnl
232dnl  Used for private GMP symbols that should never be overridden by users.
233dnl  This can save reloc entries and improve shlib sharing as well as
234dnl  application startup times
235
236define(`PROTECT',  `.hidden $1')
237
238
239dnl  Usage: x86_lookup(target, key,value, key,value, ...)
240dnl
241dnl  Look for `target' among the `key' parameters.
242dnl
243dnl  x86_lookup expands to the corresponding `value', or generates an error
244dnl  if `target' isn't found.
245
246define(x86_lookup,
247m4_assert_numargs_range(1,999)
248`ifelse(eval($#<3),1,
249`m4_error(`unrecognised part of x86 instruction: $1
250')',
251`ifelse(`$1',`$2', `$3',
252`x86_lookup(`$1',shift(shift(shift($@))))')')')
253
254
255dnl  Usage: x86_opcode_regxmm(reg)
256dnl
257dnl  Validate the given xmm register, and return its number, 0 to 7.
258
259define(x86_opcode_regxmm,
260m4_assert_numargs(1)
261`x86_lookup(`$1',x86_opcode_regxmm_list)')
262
263define(x86_opcode_regxmm_list,
264``%xmm0',0,
265`%xmm1',1,
266`%xmm2',2,
267`%xmm3',3,
268`%xmm4',4,
269`%xmm5',5,
270`%xmm6',6,
271`%xmm7',7,
272`%xmm8',8,
273`%xmm9',9,
274`%xmm10',10,
275`%xmm11',11,
276`%xmm12',12,
277`%xmm13',13,
278`%xmm14',14,
279`%xmm15',15')
280
281dnl  Usage: palignr($imm,%srcreg,%dstreg)
282dnl
283dnl  Emit a palignr instruction, using a .byte sequence, since obsolete but
284dnl  still distributed versions of gas don't know SSSE3 instructions.
285
286define(`palignr',
287m4_assert_numargs(3)
288`.byte	0x66,dnl
289ifelse(eval(x86_opcode_regxmm($3) >= 8 || x86_opcode_regxmm($2) >= 8),1,
290       `eval(0x40+x86_opcode_regxmm($3)/8*4+x86_opcode_regxmm($2)/8),')dnl
2910x0f,0x3a,0x0f,dnl
292eval(0xc0+x86_opcode_regxmm($3)%8*8+x86_opcode_regxmm($2)%8),dnl
293substr($1,1)')
294
295
296dnl  Usage
297dnl
298dnl    regnum(op)   raw operand index (so slightly misnamed)
299dnl    regnumh(op)  high bit of register operand nimber
300dnl    ix(op)       0 for reg operand, 1 for plain pointer operand.
301dnl
302
303define(`regnum',`x86_lookup(`$1',oplist)')
304define(`regnumh',`eval(regnum($1)/8 & 1)')
305define(`ix',`eval(regnum($1)/16)')
306define(`oplist',
307``%rax',   0, `%rcx',   1, `%rdx',   2,  `%rbx',   3,
308 `%rsp',   4, `%rbp',   5, `%rsi',   6,  `%rdi',   7,
309 `%r8',    8, `%r9',    9, `%r10',  10,  `%r11',  11,
310 `%r12',  12, `%r13',  13, `%r14',  14,  `%r15',  15,
311 `(%rax)',16, `(%rcx)',17, `(%rdx)',18,  `(%rbx)',19,
312 `(%rsp)',20, `(%rbp)',21, `(%rsi)',22,  `(%rdi)',23,
313 `(%r8)', 24, `(%r9)', 25, `(%r10)',26,  `(%r11)',27,
314 `(%r12)',28, `(%r13)',29, `(%r14)',30,  `(%r15)',31')
315
316
317dnl  Usage
318dnl
319dnl     mulx(reg1,reg2,reg3)
320dnl
321dnl  or
322dnl
323dnl     mulx((reg1),reg2,reg3)
324dnl
325dnl  where reg1 is any register but rsp,rbp,r12,r13, or
326dnl
327dnl     mulx(off,(reg1),reg2,reg3)
328dnl
329dnl  where reg1 is any register but rsp,r12.
330dnl
331dnl  The exceptions are due to special coding needed for some registers; rsp
332dnl  and r12 need an extra byte 0x24 at the end while rbp and r13 lack the
333dnl  offset-less form.
334dnl
335dnl  Other addressing forms are not handled.  Invalid forms are not properly
336dnl  detected.  Offsets that don't fit one byte are not handled correctly.
337
338define(`mulx',`dnl
339.byte	0xc4`'dnl
340ifelse(`$#',3,`dnl
341,eval(0xe2^32*regnumh($1)^128*regnumh($3))`'dnl
342,eval(0xfb-8*regnum($2))`'dnl
343,0xf6`'dnl
344,eval(0xc0+(7 & regnum($1))+8*(7 & regnum($3))-0xc0*ix($1))`'dnl
345',`$#',4,`dnl
346,eval(0xe2^32*regnumh($2)^128*regnumh($4))`'dnl
347,eval(0xfb-8*regnum($3))`'dnl
348,0xf6`'dnl
349,eval(0x40+(7 & regnum($2))+8*(7 & regnum($4)))`'dnl
350,eval(($1 + 256) % 256)`'dnl
351')')
352
353dnl  Usage
354dnl
355dnl     adcx(reg1,reg2)
356dnl     adox(reg1,reg2)
357dnl
358dnl  or
359dnl
360dnl     adcx((reg1),reg2)
361dnl     adox((reg1),reg2)
362dnl
363dnl  where reg1 is any register but rsp,rbp,r12,r13, or
364dnl
365dnl     adcx(off,(reg1),reg2)
366dnl     adox(off,(reg1),reg2)
367dnl
368dnl  where reg1 is any register but rsp,r12.
369dnl
370dnl  The exceptions are due to special coding needed for some registers; rsp
371dnl  and r12 need an extra byte 0x24 at the end while rbp and r13 lack the
372dnl  offset-less form.
373dnl
374dnl  Other addressing forms are not handled.  Invalid forms are not properly
375dnl  detected.  Offsets that don't fit one byte are not handled correctly.
376
377define(`adx_helper',`dnl
378,eval(0x48+regnumh($1)+4*regnumh($2))`'dnl
379,0x0f`'dnl
380,0x38`'dnl
381,0xf6`'dnl
382')
383
384define(`adx',`dnl
385ifelse(`$#',2,`dnl
386adx_helper($1,$2)dnl
387,eval(0xc0+(7 & regnum($1))+8*(7 & regnum($2))-0xc0*ix($1))`'dnl
388',`$#',3,`dnl
389adx_helper($2,$3)dnl
390,eval(0x40+(7 & regnum($2))+8*(7 & regnum($3)))`'dnl
391,eval(($1 + 256) % 256)`'dnl
392')')
393
394define(`adcx',`dnl
395.byte	0x66`'dnl
396adx($@)')
397
398define(`adox',`dnl
399.byte	0xf3`'dnl
400adx($@)')
401
402divert`'dnl
403