xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/x86_64/x86_64-defs.m4 (revision dd3ee07da436799d8de85f3055253118b76bf345)
1divert(-1)
2
3dnl  m4 macros for amd64 assembler.
4
5dnl  Copyright 1999-2005, 2008, 2009, 2011-2013, 2017 Free Software Foundation,
6dnl  Inc.
7
8dnl  This file is part of the GNU MP Library.
9dnl
10dnl  The GNU MP Library is free software; you can redistribute it and/or modify
11dnl  it under the terms of either:
12dnl
13dnl    * the GNU Lesser General Public License as published by the Free
14dnl      Software Foundation; either version 3 of the License, or (at your
15dnl      option) any later version.
16dnl
17dnl  or
18dnl
19dnl    * the GNU General Public License as published by the Free Software
20dnl      Foundation; either version 2 of the License, or (at your option) any
21dnl      later version.
22dnl
23dnl  or both in parallel, as here.
24dnl
25dnl  The GNU MP Library is distributed in the hope that it will be useful, but
26dnl  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
27dnl  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
28dnl  for more details.
29dnl
30dnl  You should have received copies of the GNU General Public License and the
31dnl  GNU Lesser General Public License along with the GNU MP Library.  If not,
32dnl  see https://www.gnu.org/licenses/.
33
34
35dnl  Usage: CPUVEC_FUNCS_LIST
36dnl
37dnl  A list of the functions from gmp-impl.h x86 struct cpuvec_t, in the
38dnl  order they appear in that structure.
39
40define(CPUVEC_FUNCS_LIST,
41``add_n',
42`addlsh1_n',
43`addlsh2_n',
44`addmul_1',
45`addmul_2',
46`bdiv_dbm1c',
47`cnd_add_n',
48`cnd_sub_n',
49`com',
50`copyd',
51`copyi',
52`divexact_1',
53`divrem_1',
54`gcd_11',
55`lshift',
56`lshiftc',
57`mod_1',
58`mod_1_1p',
59`mod_1_1p_cps',
60`mod_1s_2p',
61`mod_1s_2p_cps',
62`mod_1s_4p',
63`mod_1s_4p_cps',
64`mod_34lsub1',
65`modexact_1c_odd',
66`mul_1',
67`mul_basecase',
68`mullo_basecase',
69`preinv_divrem_1',
70`preinv_mod_1',
71`redc_1',
72`redc_2',
73`rshift',
74`sqr_basecase',
75`sub_n',
76`sublsh1_n',
77`submul_1'')
78
79
80dnl  Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)
81dnl
82dnl  In the amd64 code we use explicit TEXT and ALIGN() calls in the code,
83dnl  since different alignments are wanted in various circumstances.  So for
84dnl  instance,
85dnl
86dnl                  TEXT
87dnl                  ALIGN(16)
88dnl          PROLOGUE(mpn_add_n)
89dnl                  ...
90dnl          EPILOGUE()
91
92define(`PROLOGUE_cpu',
93m4_assert_numargs(1)
94`	GLOBL	$1
95	TYPE($1,`function')
96	COFF_TYPE($1)
97$1:
98')
99
100
101dnl  Usage: COFF_TYPE(GSYM_PREFIX`'foo)
102dnl
103dnl  Emit COFF style ".def ... .endef" type information for a function, when
104dnl  supported.  The argument should include any GSYM_PREFIX.
105dnl
106dnl  See autoconf macro GMP_ASM_COFF_TYPE for HAVE_COFF_TYPE.
107
108define(COFF_TYPE,
109m4_assert_numargs(1)
110m4_assert_defined(`HAVE_COFF_TYPE')
111`ifelse(HAVE_COFF_TYPE,yes,
112	`.def	$1
113	.scl	2
114	.type	32
115	.endef')')
116
117
118dnl  Usage: ASSERT([cond][,instructions])
119dnl
120dnl  If WANT_ASSERT is 1, output the given instructions and expect the given
121dnl  flags condition to then be satisfied.  For example,
122dnl
123dnl         ASSERT(ne, `cmpq %rax, %rbx')
124dnl
125dnl  The instructions can be omitted to just assert a flags condition with
126dnl  no extra calculation.  For example,
127dnl
128dnl         ASSERT(nc)
129dnl
130dnl  When `instructions' is not empty, a pushfq/popfq is added for
131dnl  convenience to preserve the flags, but the instructions themselves must
132dnl  preserve any registers that matter.
133dnl
134dnl  The condition can be omitted to just output the given instructions when
135dnl  assertion checking is wanted.  In this case the pushf/popf is omitted.
136dnl  For example,
137dnl
138dnl         ASSERT(, `movq %rax, VAR_KEEPVAL')
139
140define(ASSERT,
141m4_assert_numargs_range(1,2)
142m4_assert_defined(`WANT_ASSERT')
143`ifelse(WANT_ASSERT,1,
144`ifelse(`$1',,
145`	$2',
146`ifelse(`$2',,,
147`	pushfq')
148	$2
149	`j$1'	L(ASSERT_ok`'ASSERT_counter)
150	ud2	C assertion failed
151L(ASSERT_ok`'ASSERT_counter):
152ifelse(`$2',,,`	popfq')
153define(`ASSERT_counter',incr(ASSERT_counter))')')')
154
155define(ASSERT_counter,1)
156
157dnl LEA - load effective address
158dnl
159dnl FIXME: We should never create a GOT entry and therefore use the simpler 2nd
160dnl variant always. We need to understand what happens for not-yet-hidden
161dnl symbols first.
162dnl
163define(`LEA',`dnl
164ifdef(`PIC',
165	`mov	$1@GOTPCREL(%rip), $2'
166,
167	`lea	$1(%rip), $2')
168')
169
170
171define(`DEF_OBJECT',
172m4_assert_numargs_range(2,3)
173`	ifelse($#,3,`$3',`RODATA')
174	ALIGN($2)
175$1:
176')
177
178define(`END_OBJECT',
179m4_assert_numargs(1)
180`	SIZE(`$1',.-`$1')')
181
182
183define(`R32',
184	`ifelse($1,`%rax',`%eax',
185		$1,`%rbx',`%ebx',
186		$1,`%rcx',`%ecx',
187		$1,`%rdx',`%edx',
188		$1,`%rsi',`%esi',
189		$1,`%rdi',`%edi',
190		$1,`%rbp',`%ebp',
191		$1,`%r8',`%r8d',
192		$1,`%r9',`%r9d',
193		$1,`%r10',`%r10d',
194		$1,`%r11',`%r11d',
195		$1,`%r12',`%r12d',
196		$1,`%r13',`%r13d',
197		$1,`%r14',`%r14d',
198		$1,`%r15',`%r15d')')
199define(`R8',
200	`ifelse($1,`%rax',`%al',
201		$1,`%rbx',`%bl',
202		$1,`%rcx',`%cl',
203		$1,`%rdx',`%dl',
204		$1,`%rsi',`%sil',
205		$1,`%rdi',`%dil',
206		$1,`%rbp',`%bpl',
207		$1,`%r8',`%r8b',
208		$1,`%r9',`%r9b',
209		$1,`%r10',`%r10b',
210		$1,`%r11',`%r11b',
211		$1,`%r12',`%r12b',
212		$1,`%r13',`%r13b',
213		$1,`%r14',`%r14b',
214		$1,`%r15',`%r15b')')
215
216
217dnl  Usage: CALL(funcname)
218dnl
219
220define(`CALL',`dnl
221ifdef(`PIC',
222	`call	GSYM_PREFIX`'$1@PLT'
223,
224	`call	GSYM_PREFIX`'$1'
225)')
226
227define(`TCALL',`dnl
228ifdef(`PIC',
229	`jmp	GSYM_PREFIX`'$1@PLT'
230,
231	`jmp	GSYM_PREFIX`'$1'
232)')
233
234
235define(`JUMPTABSECT', `.section	.data.rel.ro.local,"a",@progbits')
236
237
238dnl  Usage: JMPENT(targlabel,tablabel)
239
240define(`JMPENT',`dnl
241ifdef(`PIC',
242	`.long	$1-$2'dnl
243,
244	`.quad	$1'dnl
245)')
246
247
248dnl  These macros are defined just for DOS64, where they provide calling
249dnl  sequence glue code.
250
251define(`FUNC_ENTRY',`')
252define(`FUNC_EXIT',`')
253
254
255dnl  Target ABI macros.
256
257define(`IFDOS',   `')
258define(`IFSTD',   `$1')
259define(`IFELF',   `$1')
260
261
262dnl  Usage: PROTECT(symbol)
263dnl
264dnl  Used for private GMP symbols that should never be overridden by users.
265dnl  This can save reloc entries and improve shlib sharing as well as
266dnl  application startup times
267
268define(`PROTECT',  `.hidden $1')
269
270
271dnl  Usage: x86_lookup(target, key,value, key,value, ...)
272dnl
273dnl  Look for `target' among the `key' parameters.
274dnl
275dnl  x86_lookup expands to the corresponding `value', or generates an error
276dnl  if `target' isn't found.
277
278define(x86_lookup,
279m4_assert_numargs_range(1,999)
280`ifelse(eval($#<3),1,
281`m4_error(`unrecognised part of x86 instruction: $1
282')',
283`ifelse(`$1',`$2', `$3',
284`x86_lookup(`$1',shift(shift(shift($@))))')')')
285
286
287dnl  Usage: x86_opcode_regxmm(reg)
288dnl
289dnl  Validate the given xmm register, and return its number, 0 to 7.
290
291define(x86_opcode_regxmm,
292m4_assert_numargs(1)
293`x86_lookup(`$1',x86_opcode_regxmm_list)')
294
295define(x86_opcode_regxmm_list,
296``%xmm0',0,
297`%xmm1',1,
298`%xmm2',2,
299`%xmm3',3,
300`%xmm4',4,
301`%xmm5',5,
302`%xmm6',6,
303`%xmm7',7,
304`%xmm8',8,
305`%xmm9',9,
306`%xmm10',10,
307`%xmm11',11,
308`%xmm12',12,
309`%xmm13',13,
310`%xmm14',14,
311`%xmm15',15')
312
313dnl  Usage: palignr($imm,%srcreg,%dstreg)
314dnl
315dnl  Emit a palignr instruction, using a .byte sequence, since obsolete but
316dnl  still distributed versions of gas don't know SSSE3 instructions.
317
318define(`palignr',
319m4_assert_numargs(3)
320`.byte	0x66,dnl
321ifelse(eval(x86_opcode_regxmm($3) >= 8 || x86_opcode_regxmm($2) >= 8),1,
322       `eval(0x40+x86_opcode_regxmm($3)/8*4+x86_opcode_regxmm($2)/8),')dnl
3230x0f,0x3a,0x0f,dnl
324eval(0xc0+x86_opcode_regxmm($3)%8*8+x86_opcode_regxmm($2)%8),dnl
325substr($1,1)')
326
327
328dnl  Usage
329dnl
330dnl    regnum(op)   raw operand index (so slightly misnamed)
331dnl    regnumh(op)  high bit of register operand nimber
332dnl    ix(op)       0 for reg operand, 1 for plain pointer operand.
333dnl
334
335define(`regnum',`x86_lookup(`$1',oplist)')
336define(`regnumh',`eval(regnum($1)/8 & 1)')
337define(`ix',`eval(regnum($1)/16)')
338define(`oplist',
339``%rax',   0, `%rcx',   1, `%rdx',   2,  `%rbx',   3,
340 `%rsp',   4, `%rbp',   5, `%rsi',   6,  `%rdi',   7,
341 `%r8',    8, `%r9',    9, `%r10',  10,  `%r11',  11,
342 `%r12',  12, `%r13',  13, `%r14',  14,  `%r15',  15,
343 `(%rax)',16, `(%rcx)',17, `(%rdx)',18,  `(%rbx)',19,
344 `(%rsp)',20, `(%rbp)',21, `(%rsi)',22,  `(%rdi)',23,
345 `(%r8)', 24, `(%r9)', 25, `(%r10)',26,  `(%r11)',27,
346 `(%r12)',28, `(%r13)',29, `(%r14)',30,  `(%r15)',31')
347
348dnl  Usage (by mulx, shlx, shrx)
349dnl
350dnl     reg1,reg2,reg3,opc1,opc2
351dnl
352dnl  or
353dnl
354dnl     (reg1),reg2,reg3,opc1,opc2
355dnl
356dnl  where reg1 is any register but rsp,rbp,r12,r13, or
357dnl
358dnl  or
359dnl
360dnl     off,(reg1),reg2,reg3,opc1,opc2
361dnl
362dnl  where reg1 is any register but rsp,r12.
363dnl
364dnl  The exceptions are due to special coding needed for some registers; rsp
365dnl  and r12 need an extra byte 0x24 at the end while rbp and r13 lack the
366dnl  offset-less form.
367dnl
368dnl  Other addressing forms are not handled.  Invalid forms are not properly
369dnl  detected.  Offsets that don't fit one byte are not handled correctly.
370
371define(`c4_helper',`dnl
372.byte	0xc4`'dnl
373ifelse(`$#',5,`dnl
374,eval(0xe2^32*regnumh($1)^128*regnumh($3))`'dnl
375,eval(0x$4-8*regnum($2))`'dnl
376,0x$5`'dnl
377,eval(0xc0+(7 & regnum($1))+8*(7 & regnum($3))-0xc0*ix($1))`'dnl
378',`$#',6,`dnl
379,eval(0xe2^32*regnumh($2)^128*regnumh($4))`'dnl
380,eval(0x$5-8*regnum($3))`'dnl
381,0x$6`'dnl
382,eval(0x40+(7 & regnum($2))+8*(7 & regnum($4)))`'dnl
383,eval(($1 + 256) % 256)`'dnl
384')')
385
386
387dnl  Usage
388dnl
389dnl     mulx(reg1,reg2,reg3)
390dnl
391dnl  or
392dnl
393dnl     mulx((reg1),reg2,reg3)
394dnl
395dnl  where reg1 is any register but rsp,rbp,r12,r13, or
396dnl
397dnl     mulx(off,(reg1),reg2,reg3)
398dnl
399dnl  where reg1 is any register but rsp,r12.
400
401define(`mulx',`dnl
402ifelse(`$#',3,`dnl
403c4_helper($1,$2,$3,fb,f6)',`dnl         format 1,2
404c4_helper($1,$2,$3,$4,fb,f6)'dnl	format 3
405)')
406
407
408dnl  Usage
409dnl
410dnl     shlx(reg1,reg2,reg3)
411dnl     shrx(reg1,reg2,reg3)
412dnl
413dnl  or
414dnl
415dnl     shlx(reg1,(reg2),reg3)
416dnl     shrx(reg1,(reg2),reg3)
417dnl
418dnl  where reg2 is any register but rsp,rbp,r12,r13, or
419dnl
420dnl     shlx(reg1,off,(reg2),reg3)
421dnl     shrx(reg1,off,(reg2),reg3)
422dnl
423dnl  where reg2 is any register but rsp,r12.
424
425define(`shlx',`dnl
426ifelse(`$#',3,`dnl
427c4_helper($2,$1,$3,f9,f7)',`dnl         format 1,2
428c4_helper($1,$3,$2,$4,f9,f7)'dnl        format 3
429)')
430
431define(`shrx',`dnl
432ifelse(`$#',3,`dnl
433c4_helper($2,$1,$3,fb,f7)',`dnl         format 1,2
434c4_helper($1,$3,$2,$4,fb,f7)'dnl        format 3
435)')
436
437define(`sarx',`dnl
438ifelse(`$#',3,`dnl
439c4_helper($2,$1,$3,fa,f7)',`dnl         format 1,2
440c4_helper($1,$3,$2,$4,fa,f7)'dnl        format 3
441)')
442
443
444dnl  Usage
445dnl
446dnl     adcx(reg1,reg2)
447dnl     adox(reg1,reg2)
448dnl
449dnl  or
450dnl
451dnl     adcx((reg1),reg2)
452dnl     adox((reg1),reg2)
453dnl
454dnl  where reg1 is any register but rsp,rbp,r12,r13, or
455dnl
456dnl     adcx(off,(reg1),reg2)
457dnl     adox(off,(reg1),reg2)
458dnl
459dnl  where reg1 is any register but rsp,r12.
460dnl
461dnl  The exceptions are due to special coding needed for some registers; rsp
462dnl  and r12 need an extra byte 0x24 at the end while rbp and r13 lack the
463dnl  offset-less form.
464dnl
465dnl  Other addressing forms are not handled.  Invalid forms are not properly
466dnl  detected.  Offsets that don't fit one byte are not handled correctly.
467
468define(`adx_helper',`dnl
469,eval(0x48+regnumh($1)+4*regnumh($2))`'dnl
470,0x0f`'dnl
471,0x38`'dnl
472,0xf6`'dnl
473')
474
475define(`adx',`dnl
476ifelse(`$#',2,`dnl
477adx_helper($1,$2)dnl
478,eval(0xc0+(7 & regnum($1))+8*(7 & regnum($2))-0xc0*ix($1))`'dnl
479',`$#',3,`dnl
480adx_helper($2,$3)dnl
481,eval(0x40+(7 & regnum($2))+8*(7 & regnum($3)))`'dnl
482,eval(($1 + 256) % 256)`'dnl
483')')
484
485define(`adcx',`dnl
486.byte	0x66`'dnl
487adx($@)')
488
489define(`adox',`dnl
490.byte	0xf3`'dnl
491adx($@)')
492
493divert`'dnl
494