xref: /netbsd-src/external/lgpl3/gmp/dist/mpn/x86/x86-defs.m4 (revision 7788a0781fe6ff2cce37368b4578a7ade0850cb1)
1divert(-1)
2
3
4dnl  m4 macros for x86 assembler.
5
6
7dnl  Copyright 1999, 2000, 2001, 2002, 2003, 2007 Free Software Foundation,
8dnl  Inc.
9dnl
10dnl  This file is part of the GNU MP Library.
11dnl
12dnl  The GNU MP Library is free software; you can redistribute it and/or
13dnl  modify it under the terms of the GNU Lesser General Public License as
14dnl  published by the Free Software Foundation; either version 3 of the
15dnl  License, or (at your option) any later version.
16dnl
17dnl  The GNU MP Library is distributed in the hope that it will be useful,
18dnl  but WITHOUT ANY WARRANTY; without even the implied warranty of
19dnl  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20dnl  Lesser General Public License for more details.
21dnl
22dnl  You should have received a copy of the GNU Lesser General Public License
23dnl  along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.
24
25
26dnl  Notes:
27dnl
28dnl  m4 isn't perfect for processing BSD style x86 assembler code, the main
29dnl  problems are,
30dnl
31dnl  1. Doing define(foo,123) and then using foo in an addressing mode like
32dnl     foo(%ebx) expands as a macro rather than a constant.  This is worked
33dnl     around by using deflit() from asm-defs.m4, instead of define().
34dnl
35dnl  2. Immediates in macro definitions need a space or `' to stop the $
36dnl     looking like a macro parameter.  For example,
37dnl
38dnl	        define(foo, `mov $ 123, %eax')
39dnl
40dnl     This is only a problem in macro definitions, not in ordinary text,
41dnl     and not in macro parameters like text passed to forloop() or ifdef().
42
43
44deflit(BYTES_PER_MP_LIMB, 4)
45
46
47dnl  Libtool gives -DPIC -DDLL_EXPORT to indicate a cygwin or mingw DLL.  We
48dnl  undefine PIC since we don't need to be position independent in this
49dnl  case and definitely don't want the ELF style _GLOBAL_OFFSET_TABLE_ etc.
50
51ifdef(`DLL_EXPORT',`undefine(`PIC')')
52
53
54dnl  Usage: CPUVEC_FUNCS_LIST
55dnl
56dnl  A list of the functions from gmp-impl.h x86 struct cpuvec_t, in the
57dnl  order they appear in that structure.
58
59define(CPUVEC_FUNCS_LIST,
60``add_n',
61`addmul_1',
62`copyd',
63`copyi',
64`divexact_1',
65`divexact_by3c',
66`divrem_1',
67`gcd_1',
68`lshift',
69`mod_1',
70`mod_34lsub1',
71`modexact_1c_odd',
72`mul_1',
73`mul_basecase',
74`preinv_divrem_1',
75`preinv_mod_1',
76`rshift',
77`sqr_basecase',
78`sub_n',
79`submul_1'')
80
81
82dnl  Called: PROLOGUE_cpu(GSYM_PREFIX`'foo)
83dnl
84dnl  In the x86 code we use explicit TEXT and ALIGN() calls in the code,
85dnl  since different alignments are wanted in various circumstances.  So for
86dnl  instance,
87dnl
88dnl                  TEXT
89dnl                  ALIGN(16)
90dnl          PROLOGUE(mpn_add_n)
91dnl          ...
92dnl          EPILOGUE()
93
94define(`PROLOGUE_cpu',
95m4_assert_numargs(1)
96m4_assert_defined(`WANT_PROFILING')
97	`GLOBL	$1
98	TYPE($1,`function')
99	COFF_TYPE($1)
100$1:
101ifelse(WANT_PROFILING,`prof',      `	call_mcount')
102ifelse(WANT_PROFILING,`gprof',     `	call_mcount')
103ifelse(WANT_PROFILING,`instrument',`	call_instrument(enter)')
104')
105
106
107dnl  Usage: COFF_TYPE(GSYM_PREFIX`'foo)
108dnl
109dnl  Emit COFF style ".def ... .endef" type information for a function, when
110dnl  supported.  The argument should include any GSYM_PREFIX.
111dnl
112dnl  See autoconf macro GMP_ASM_COFF_TYPE for HAVE_COFF_TYPE.
113
114define(COFF_TYPE,
115m4_assert_numargs(1)
116m4_assert_defined(`HAVE_COFF_TYPE')
117`ifelse(HAVE_COFF_TYPE,yes,
118	`.def	$1
119	.scl	2
120	.type	32
121	.endef')')
122
123
124dnl  Usage: call_mcount
125dnl
126dnl  For `gprof' style profiling, %ebp is setup as a frame pointer.  None of
127dnl  the assembler routines use %ebp this way, so it's done only for the
128dnl  benefit of mcount.  glibc sysdeps/i386/i386-mcount.S shows how mcount
129dnl  gets the current function from (%esp) and the parent from 4(%ebp).
130dnl
131dnl  For `prof' style profiling gcc generates mcount calls without setting
132dnl  up %ebp, and the same is done here.
133
134define(`call_mcount',
135m4_assert_numargs(-1)
136m4_assert_defined(`WANT_PROFILING')
137m4_assert_defined(`MCOUNT_PIC_REG')
138m4_assert_defined(`MCOUNT_NONPIC_REG')
139m4_assert_defined(`MCOUNT_PIC_CALL')
140m4_assert_defined(`MCOUNT_NONPIC_CALL')
141`ifelse(ifdef(`PIC',`MCOUNT_PIC_REG',`MCOUNT_NONPIC_REG'),,,
142`	DATA
143	ALIGN(4)
144L(mcount_data_`'mcount_counter):
145	W32	0
146	TEXT
147')dnl
148ifelse(WANT_PROFILING,`gprof',
149`	pushl	%ebp
150	movl	%esp, %ebp
151')dnl
152ifdef(`PIC',
153`	pushl	%ebx
154	call_movl_eip_to_ebx
155L(mcount_here_`'mcount_counter):
156	addl	$_GLOBAL_OFFSET_TABLE_+[.-L(mcount_here_`'mcount_counter)], %ebx
157ifelse(MCOUNT_PIC_REG,,,
158`	leal	L(mcount_data_`'mcount_counter)@GOTOFF(%ebx), MCOUNT_PIC_REG')
159MCOUNT_PIC_CALL
160	popl	%ebx
161',`dnl non-PIC
162ifelse(MCOUNT_NONPIC_REG,,,
163`	movl	`$'L(mcount_data_`'mcount_counter), MCOUNT_NONPIC_REG
164')dnl
165MCOUNT_NONPIC_CALL
166')dnl
167ifelse(WANT_PROFILING,`gprof',
168`	popl	%ebp
169')
170define(`mcount_counter',incr(mcount_counter))
171')
172
173define(mcount_counter,1)
174
175
176dnl  Usage: call_instrument(enter|exit)
177dnl
178dnl  Call __cyg_profile_func_enter or __cyg_profile_func_exit.
179dnl
180dnl  For PIC, most routines don't require _GLOBAL_OFFSET_TABLE_ themselves
181dnl  so %ebx is just setup for these calls.  It's a bit wasteful to repeat
182dnl  the setup for the exit call having done it earlier for the enter, but
183dnl  there's nowhere very convenient to hold %ebx through the length of a
184dnl  routine, in general.
185dnl
186dnl  For PIC, because instrument_current_function will be within the current
187dnl  object file we can get it just as an offset from %eip, there's no need
188dnl  to use the GOT.
189dnl
190dnl  No attempt is made to maintain the stack alignment gcc generates with
191dnl  -mpreferred-stack-boundary.  This wouldn't be hard, but it seems highly
192dnl  unlikely the instrumenting functions would be doing anything that'd
193dnl  benefit from alignment, in particular they're unlikely to be using
194dnl  doubles or long doubles on the stack.
195dnl
196dnl  The FRAME scheme is used to conveniently account for the register saves
197dnl  before accessing the return address.  Any previous value is saved and
198dnl  restored, since plenty of code keeps a value across a "ret" in the
199dnl  middle of a routine.
200
201define(call_instrument,
202m4_assert_numargs(1)
203`	pushdef(`FRAME',0)
204ifelse($1,exit,
205`	pushl	%eax	FRAME_pushl()	C return value
206')
207ifdef(`PIC',
208`	pushl	%ebx	FRAME_pushl()
209	call_movl_eip_to_ebx
210L(instrument_here_`'instrument_count):
211	movl	%ebx, %ecx
212	addl	$_GLOBAL_OFFSET_TABLE_+[.-L(instrument_here_`'instrument_count)], %ebx
213	C use addl rather than leal to avoid old gas bugs, see mpn/x86/README
214	addl	$instrument_current_function-L(instrument_here_`'instrument_count), %ecx
215	pushl	m4_empty_if_zero(FRAME)(%esp)	FRAME_pushl()	C return addr
216	pushl	%ecx				FRAME_pushl()	C this function
217	call	GSYM_PREFIX`'__cyg_profile_func_$1@PLT
218	addl	$`'8, %esp
219	popl	%ebx
220',
221`	C non-PIC
222	pushl	m4_empty_if_zero(FRAME)(%esp)	FRAME_pushl()	C return addr
223	pushl	$instrument_current_function	FRAME_pushl()	C this function
224	call	GSYM_PREFIX`'__cyg_profile_func_$1
225	addl	$`'8, %esp
226')
227ifelse($1,exit,
228`	popl	%eax			C return value
229')
230	popdef(`FRAME')
231define(`instrument_count',incr(instrument_count))
232')
233define(instrument_count,1)
234
235
236dnl  Usage: instrument_current_function
237dnl
238dnl  Return the current function name for instrumenting purposes.  This is
239dnl  PROLOGUE_current_function, but it sticks at the first such name seen.
240dnl
241dnl  Sticking to the first name seen ensures that multiple-entrypoint
242dnl  functions like mpn_add_nc and mpn_add_n will make enter and exit calls
243dnl  giving the same function address.
244
245define(instrument_current_function,
246m4_assert_numargs(-1)
247`ifdef(`instrument_current_function_seen',
248`instrument_current_function_seen',
249`define(`instrument_current_function_seen',PROLOGUE_current_function)dnl
250PROLOGUE_current_function')')
251
252
253dnl  Usage: call_movl_eip_to_ebx
254dnl
255dnl  Generate a call to L(movl_eip_to_ebx), and record the need for that
256dnl  routine.
257
258define(call_movl_eip_to_ebx,
259m4_assert_numargs(-1)
260`call	L(movl_eip_to_ebx)
261define(`movl_eip_to_ebx_needed',1)')
262
263dnl  Usage: generate_movl_eip_to_ebx
264dnl
265dnl  Emit a L(movl_eip_to_ebx) routine, if needed and not already generated.
266
267define(generate_movl_eip_to_ebx,
268m4_assert_numargs(-1)
269`ifelse(movl_eip_to_ebx_needed,1,
270`ifelse(movl_eip_to_ebx_done,1,,
271`L(movl_eip_to_ebx):
272	movl	(%esp), %ebx
273	ret_internal
274define(`movl_eip_to_ebx_done',1)
275')')')
276
277
278dnl  Usage: ret
279dnl
280dnl  Generate a "ret", but if doing instrumented profiling then call
281dnl  __cyg_profile_func_exit first.
282
283define(ret,
284m4_assert_numargs(-1)
285m4_assert_defined(`WANT_PROFILING')
286`ifelse(WANT_PROFILING,instrument,
287`ret_instrument',
288`ret_internal')
289generate_movl_eip_to_ebx
290')
291
292
293dnl  Usage: ret_internal
294dnl
295dnl  A plain "ret", without any __cyg_profile_func_exit call.  This can be
296dnl  used for a return which is internal to some function, such as when
297dnl  getting %eip for PIC.
298
299define(ret_internal,
300m4_assert_numargs(-1)
301``ret'')
302
303
304dnl  Usage: ret_instrument
305dnl
306dnl  Generate call to __cyg_profile_func_exit and then a ret.  If a ret has
307dnl  already been seen from this function then jump to that chunk of code,
308dnl  rather than emitting it again.
309
310define(ret_instrument,
311m4_assert_numargs(-1)
312`ifelse(m4_unquote(ret_instrument_seen_`'instrument_current_function),1,
313`jmp	L(instrument_exit_`'instrument_current_function)',
314`define(ret_instrument_seen_`'instrument_current_function,1)
315L(instrument_exit_`'instrument_current_function):
316call_instrument(exit)
317	ret_internal')')
318
319
320dnl  Usage: _GLOBAL_OFFSET_TABLE_
321dnl
322dnl  Expand to _GLOBAL_OFFSET_TABLE_ plus any necessary underscore prefix.
323dnl  This lets us write plain _GLOBAL_OFFSET_TABLE_ in SVR4 style, but still
324dnl  work with systems requiring an extra underscore such as OpenBSD.
325dnl
326dnl  deflit is used so "leal _GLOBAL_OFFSET_TABLE_(%eax), %ebx" will come
327dnl  out right, though that form doesn't work properly in gas (see
328dnl  mpn/x86/README).
329
330deflit(_GLOBAL_OFFSET_TABLE_,
331m4_assert_defined(`GOT_GSYM_PREFIX')
332`GOT_GSYM_PREFIX`_GLOBAL_OFFSET_TABLE_'')
333
334
335dnl  --------------------------------------------------------------------------
336dnl  Various x86 macros.
337dnl
338
339
340dnl  Usage: ALIGN_OFFSET(bytes,offset)
341dnl
342dnl  Align to `offset' away from a multiple of `bytes'.
343dnl
344dnl  This is useful for testing, for example align to something very strict
345dnl  and see what effect offsets from it have, "ALIGN_OFFSET(256,32)".
346dnl
347dnl  Generally you wouldn't execute across the padding, but it's done with
348dnl  nop's so it'll work.
349
350define(ALIGN_OFFSET,
351m4_assert_numargs(2)
352`ALIGN($1)
353forloop(`i',1,$2,`	nop
354')')
355
356
357dnl  Usage: defframe(name,offset)
358dnl
359dnl  Make a definition like the following with which to access a parameter
360dnl  or variable on the stack.
361dnl
362dnl         define(name,`FRAME+offset(%esp)')
363dnl
364dnl  Actually m4_empty_if_zero(FRAME+offset) is used, which will save one
365dnl  byte if FRAME+offset is zero, by putting (%esp) rather than 0(%esp).
366dnl  Use define(`defframe_empty_if_zero_disabled',1) if for some reason the
367dnl  zero offset is wanted.
368dnl
369dnl  The new macro also gets a check that when it's used FRAME is actually
370dnl  defined, and that the final %esp offset isn't negative, which would
371dnl  mean an attempt to access something below the current %esp.
372dnl
373dnl  deflit() is used rather than a plain define(), so the new macro won't
374dnl  delete any following parenthesized expression.  name(%edi) will come
375dnl  out say as 16(%esp)(%edi).  This isn't valid assembler and should
376dnl  provoke an error, which is better than silently giving just 16(%esp).
377dnl
378dnl  See README for more on the suggested way to access the stack frame.
379
380define(defframe,
381m4_assert_numargs(2)
382`deflit(`$1',
383m4_assert_defined(`FRAME')
384`defframe_check_notbelow(`$1',$2,FRAME)dnl
385defframe_empty_if_zero(FRAME+($2))(%esp)')')
386
387dnl  Called: defframe_empty_if_zero(expression)
388define(defframe_empty_if_zero,
389m4_assert_numargs(1)
390`ifelse(defframe_empty_if_zero_disabled,1,
391`eval($1)',
392`m4_empty_if_zero($1)')')
393
394dnl  Called: defframe_check_notbelow(`name',offset,FRAME)
395define(defframe_check_notbelow,
396m4_assert_numargs(3)
397`ifelse(eval(($3)+($2)<0),1,
398`m4_error(`$1 at frame offset $2 used when FRAME is only $3 bytes
399')')')
400
401
402dnl  Usage: FRAME_pushl()
403dnl         FRAME_popl()
404dnl         FRAME_addl_esp(n)
405dnl         FRAME_subl_esp(n)
406dnl
407dnl  Adjust FRAME appropriately for a pushl or popl, or for an addl or subl
408dnl  %esp of n bytes.
409dnl
410dnl  Using these macros is completely optional.  Sometimes it makes more
411dnl  sense to put explicit deflit(`FRAME',N) forms, especially when there's
412dnl  jumps and different sequences of FRAME values need to be used in
413dnl  different places.
414
415define(FRAME_pushl,
416m4_assert_numargs(0)
417m4_assert_defined(`FRAME')
418`deflit(`FRAME',eval(FRAME+4))')
419
420define(FRAME_popl,
421m4_assert_numargs(0)
422m4_assert_defined(`FRAME')
423`deflit(`FRAME',eval(FRAME-4))')
424
425define(FRAME_addl_esp,
426m4_assert_numargs(1)
427m4_assert_defined(`FRAME')
428`deflit(`FRAME',eval(FRAME-($1)))')
429
430define(FRAME_subl_esp,
431m4_assert_numargs(1)
432m4_assert_defined(`FRAME')
433`deflit(`FRAME',eval(FRAME+($1)))')
434
435
436dnl  Usage: defframe_pushl(name)
437dnl
438dnl  Do a combination FRAME_pushl() and a defframe() to name the stack
439dnl  location just pushed.  This should come after a pushl instruction.
440dnl  Putting it on the same line works and avoids lengthening the code.  For
441dnl  example,
442dnl
443dnl         pushl   %eax     defframe_pushl(VAR_COUNTER)
444dnl
445dnl  Notice the defframe() is done with an unquoted -FRAME thus giving its
446dnl  current value without tracking future changes.
447
448define(defframe_pushl,
449m4_assert_numargs(1)
450`FRAME_pushl()defframe(`$1',-FRAME)')
451
452
453dnl  --------------------------------------------------------------------------
454dnl  Assembler instruction macros.
455dnl
456
457
458dnl  Usage: emms_or_femms
459dnl         femms_available_p
460dnl
461dnl  femms_available_p expands to 1 or 0 according to whether the AMD 3DNow
462dnl  femms instruction is available.  emms_or_femms expands to femms if
463dnl  available, or emms if not.
464dnl
465dnl  emms_or_femms is meant for use in the K6 directory where plain K6
466dnl  (without femms) and K6-2 and K6-3 (with a slightly faster femms) are
467dnl  supported together.
468dnl
469dnl  On K7 femms is no longer faster and is just an alias for emms, so plain
470dnl  emms may as well be used.
471
472define(femms_available_p,
473m4_assert_numargs(-1)
474`m4_ifdef_anyof_p(
475	`HAVE_HOST_CPU_k62',
476	`HAVE_HOST_CPU_k63',
477	`HAVE_HOST_CPU_athlon')')
478
479define(emms_or_femms,
480m4_assert_numargs(-1)
481`ifelse(femms_available_p,1,`femms',`emms')')
482
483
484dnl  Usage: femms
485dnl
486dnl  Gas 2.9.1 which comes with FreeBSD 3.4 doesn't support femms, so the
487dnl  following is a replacement using .byte.
488
489define(femms,
490m4_assert_numargs(-1)
491`.byte	15,14	C AMD 3DNow femms')
492
493
494dnl  Usage: jadcl0(op)
495dnl
496dnl  Generate a jnc/incl as a substitute for adcl $0,op.  Note this isn't an
497dnl  exact replacement, since it doesn't set the flags like adcl does.
498dnl
499dnl  This finds a use in K6 mpn_addmul_1, mpn_submul_1, mpn_mul_basecase and
500dnl  mpn_sqr_basecase because on K6 an adcl is slow, the branch
501dnl  misprediction penalty is small, and the multiply algorithm used leads
502dnl  to a carry bit on average only 1/4 of the time.
503dnl
504dnl  jadcl0_disabled can be set to 1 to instead generate an ordinary adcl
505dnl  for comparison.  For example,
506dnl
507dnl		define(`jadcl0_disabled',1)
508dnl
509dnl  When using a register operand, eg. "jadcl0(%edx)", the jnc/incl code is
510dnl  the same size as an adcl.  This makes it possible to use the exact same
511dnl  computed jump code when testing the relative speed of the two.
512
513define(jadcl0,
514m4_assert_numargs(1)
515`ifelse(jadcl0_disabled,1,
516	`adcl	$`'0, $1',
517	`jnc	L(jadcl0_`'jadcl0_counter)
518	incl	$1
519L(jadcl0_`'jadcl0_counter):
520define(`jadcl0_counter',incr(jadcl0_counter))')')
521
522define(jadcl0_counter,1)
523
524
525dnl  Usage: x86_lookup(target, key,value, key,value, ...)
526dnl         x86_lookup_p(target, key,value, key,value, ...)
527dnl
528dnl  Look for `target' among the `key' parameters.
529dnl
530dnl  x86_lookup expands to the corresponding `value', or generates an error
531dnl  if `target' isn't found.
532dnl
533dnl  x86_lookup_p expands to 1 if `target' is found, or 0 if not.
534
535define(x86_lookup,
536m4_assert_numargs_range(1,999)
537`ifelse(eval($#<3),1,
538`m4_error(`unrecognised part of x86 instruction: $1
539')',
540`ifelse(`$1',`$2', `$3',
541`x86_lookup(`$1',shift(shift(shift($@))))')')')
542
543define(x86_lookup_p,
544m4_assert_numargs_range(1,999)
545`ifelse(eval($#<3),1, `0',
546`ifelse(`$1',`$2',    `1',
547`x86_lookup_p(`$1',shift(shift(shift($@))))')')')
548
549
550dnl  Usage: x86_opcode_reg32(reg)
551dnl         x86_opcode_reg32_p(reg)
552dnl
553dnl  x86_opcode_reg32 expands to the standard 3 bit encoding for the given
554dnl  32-bit register, eg. `%ebp' turns into 5.
555dnl
556dnl  x86_opcode_reg32_p expands to 1 if reg is a valid 32-bit register, or 0
557dnl  if not.
558
559define(x86_opcode_reg32,
560m4_assert_numargs(1)
561`x86_lookup(`$1',x86_opcode_reg32_list)')
562
563define(x86_opcode_reg32_p,
564m4_assert_onearg()
565`x86_lookup_p(`$1',x86_opcode_reg32_list)')
566
567define(x86_opcode_reg32_list,
568``%eax',0,
569`%ecx',1,
570`%edx',2,
571`%ebx',3,
572`%esp',4,
573`%ebp',5,
574`%esi',6,
575`%edi',7')
576
577
578dnl  Usage: x86_opcode_tttn(cond)
579dnl
580dnl  Expand to the 4-bit "tttn" field value for the given x86 branch
581dnl  condition (like `c', `ae', etc).
582
583define(x86_opcode_tttn,
584m4_assert_numargs(1)
585`x86_lookup(`$1',x86_opcode_ttn_list)')
586
587define(x86_opcode_tttn_list,
588``o',  0,
589`no',  1,
590`b',   2, `c',  2, `nae',2,
591`nb',  3, `nc', 3, `ae', 3,
592`e',   4, `z',  4,
593`ne',  5, `nz', 5,
594`be',  6, `na', 6,
595`nbe', 7, `a',  7,
596`s',   8,
597`ns',  9,
598`p',  10, `pe', 10, `npo',10,
599`np', 11, `npe',11, `po', 11,
600`l',  12, `nge',12,
601`nl', 13, `ge', 13,
602`le', 14, `ng', 14,
603`nle',15, `g',  15')
604
605
606dnl  Usage: cmovCC(%srcreg,%dstreg)
607dnl
608dnl  Emit a cmov instruction, using a .byte sequence, since various past
609dnl  versions of gas don't know cmov.  For example,
610dnl
611dnl         cmovz(  %eax, %ebx)
612dnl
613dnl  The source operand can only be a plain register.  (m4 code implementing
614dnl  full memory addressing modes exists, believe it or not, but isn't
615dnl  currently needed and isn't included.)
616dnl
617dnl  All the standard conditions are defined.  Attempting to use one without
618dnl  the macro parentheses, such as just "cmovbe %eax, %ebx", will provoke
619dnl  an error.  This protects against writing something old gas wouldn't
620dnl  understand.
621
622dnl  Called: define_cmov_many(cond,tttn,cond,tttn,...)
623define(define_cmov_many,
624`ifelse(m4_length(`$1'),0,,
625`define_cmov(`$1',`$2')define_cmov_many(shift(shift($@)))')')
626
627dnl  Called: define_cmov(cond,tttn)
628dnl  Emit basically define(cmov<cond>,`cmov_internal(<cond>,<ttn>,`$1',`$2')')
629define(define_cmov,
630m4_assert_numargs(2)
631`define(`cmov$1',
632m4_instruction_wrapper()
633m4_assert_numargs(2)
634`cmov_internal'(m4_doublequote($`'0),``$2'',dnl
635m4_doublequote($`'1),m4_doublequote($`'2)))')
636
637define_cmov_many(x86_opcode_tttn_list)
638
639dnl  Called: cmov_internal(name,tttn,src,dst)
640define(cmov_internal,
641m4_assert_numargs(4)
642`.byte	dnl
64315, dnl
644eval(64+$2), dnl
645eval(192+8*x86_opcode_reg32(`$4')+x86_opcode_reg32(`$3')) dnl
646	C `$1 $3, $4'')
647
648
649dnl  Usage: x86_opcode_regmmx(reg)
650dnl
651dnl  Validate the given mmx register, and return its number, 0 to 7.
652
653define(x86_opcode_regmmx,
654m4_assert_numargs(1)
655`x86_lookup(`$1',x86_opcode_regmmx_list)')
656
657define(x86_opcode_regmmx_list,
658``%mm0',0,
659`%mm1',1,
660`%mm2',2,
661`%mm3',3,
662`%mm4',4,
663`%mm5',5,
664`%mm6',6,
665`%mm7',7')
666
667
668dnl  Usage: psadbw(%srcreg,%dstreg)
669dnl
670dnl  Oldish versions of gas don't know psadbw, in particular gas 2.9.1 on
671dnl  FreeBSD 3.3 and 3.4 doesn't, so instead emit .byte sequences.  For
672dnl  example,
673dnl
674dnl         psadbw( %mm1, %mm2)
675dnl
676dnl  Only register->register forms are supported here, which suffices for
677dnl  the current code.
678
679define(psadbw,
680m4_instruction_wrapper()
681m4_assert_numargs(2)
682`.byte 0x0f,0xf6,dnl
683eval(192+x86_opcode_regmmx(`$2')*8+x86_opcode_regmmx(`$1')) dnl
684	C `psadbw $1, $2'')
685
686
687dnl  Usage: Zdisp(inst,op,op,op)
688dnl
689dnl  Generate explicit .byte sequences if necessary to force a byte-sized
690dnl  zero displacement on an instruction.  For example,
691dnl
692dnl         Zdisp(  movl,   0,(%esi), %eax)
693dnl
694dnl  expands to
695dnl
696dnl                 .byte   139,70,0  C movl 0(%esi), %eax
697dnl
698dnl  If the displacement given isn't 0, then normal assembler code is
699dnl  generated.  For example,
700dnl
701dnl         Zdisp(  movl,   4,(%esi), %eax)
702dnl
703dnl  expands to
704dnl
705dnl                 movl    4(%esi), %eax
706dnl
707dnl  This means a single Zdisp() form can be used with an expression for the
708dnl  displacement, and .byte will be used only if necessary.  The
709dnl  displacement argument is eval()ed.
710dnl
711dnl  Because there aren't many places a 0(reg) form is wanted, Zdisp is
712dnl  implemented with a table of instructions and encodings.  A new entry is
713dnl  needed for any different operation or registers.  The table is split
714dnl  into separate macros to avoid overflowing BSD m4 macro expansion space.
715
716define(Zdisp,
717m4_assert_numargs(4)
718`define(`Zdisp_found',0)dnl
719Zdisp_1($@)dnl
720Zdisp_2($@)dnl
721Zdisp_3($@)dnl
722Zdisp_4($@)dnl
723ifelse(Zdisp_found,0,
724`m4_error(`unrecognised instruction in Zdisp: $1 $2 $3 $4
725')')')
726
727define(Zdisp_1,`dnl
728Zdisp_match( adcl, 0,(%edx), %eax,        `0x13,0x42,0x00',           $@)`'dnl
729Zdisp_match( adcl, 0,(%edx), %ebx,        `0x13,0x5a,0x00',           $@)`'dnl
730Zdisp_match( adcl, 0,(%edx), %esi,        `0x13,0x72,0x00',           $@)`'dnl
731Zdisp_match( addl, %ebx, 0,(%edi),        `0x01,0x5f,0x00',           $@)`'dnl
732Zdisp_match( addl, %ecx, 0,(%edi),        `0x01,0x4f,0x00',           $@)`'dnl
733Zdisp_match( addl, %esi, 0,(%edi),        `0x01,0x77,0x00',           $@)`'dnl
734Zdisp_match( sbbl, 0,(%edx), %eax,        `0x1b,0x42,0x00',           $@)`'dnl
735Zdisp_match( sbbl, 0,(%edx), %esi,        `0x1b,0x72,0x00',           $@)`'dnl
736Zdisp_match( subl, %ecx, 0,(%edi),        `0x29,0x4f,0x00',           $@)`'dnl
737Zdisp_match( movzbl, 0,(%eax,%ebp), %eax, `0x0f,0xb6,0x44,0x28,0x00', $@)`'dnl
738Zdisp_match( movzbl, 0,(%ecx,%edi), %edi, `0x0f,0xb6,0x7c,0x39,0x00', $@)`'dnl
739Zdisp_match( adc, 0,(%ebx,%ecx,4), %eax,  `0x13,0x44,0x8b,0x00',      $@)`'dnl
740Zdisp_match( sbb, 0,(%ebx,%ecx,4), %eax,  `0x1b,0x44,0x8b,0x00',      $@)`'dnl
741')
742define(Zdisp_2,`dnl
743Zdisp_match( movl, %eax, 0,(%edi),        `0x89,0x47,0x00',           $@)`'dnl
744Zdisp_match( movl, %ebx, 0,(%edi),        `0x89,0x5f,0x00',           $@)`'dnl
745Zdisp_match( movl, %esi, 0,(%edi),        `0x89,0x77,0x00',           $@)`'dnl
746Zdisp_match( movl, 0,(%ebx), %eax,        `0x8b,0x43,0x00',           $@)`'dnl
747Zdisp_match( movl, 0,(%ebx), %esi,        `0x8b,0x73,0x00',           $@)`'dnl
748Zdisp_match( movl, 0,(%edx), %eax,        `0x8b,0x42,0x00',           $@)`'dnl
749Zdisp_match( movl, 0,(%esi), %eax,        `0x8b,0x46,0x00',           $@)`'dnl
750Zdisp_match( movl, 0,(%esi,%ecx,4), %eax, `0x8b,0x44,0x8e,0x00',      $@)`'dnl
751Zdisp_match( mov, 0,(%esi,%ecx,4), %eax,  `0x8b,0x44,0x8e,0x00',      $@)`'dnl
752Zdisp_match( mov, %eax, 0,(%edi,%ecx,4),  `0x89,0x44,0x8f,0x00',      $@)`'dnl
753')
754define(Zdisp_3,`dnl
755Zdisp_match( movq, 0,(%eax,%ecx,8), %mm0, `0x0f,0x6f,0x44,0xc8,0x00', $@)`'dnl
756Zdisp_match( movq, 0,(%ebx,%eax,4), %mm0, `0x0f,0x6f,0x44,0x83,0x00', $@)`'dnl
757Zdisp_match( movq, 0,(%ebx,%eax,4), %mm2, `0x0f,0x6f,0x54,0x83,0x00', $@)`'dnl
758Zdisp_match( movq, 0,(%ebx,%ecx,4), %mm0, `0x0f,0x6f,0x44,0x8b,0x00', $@)`'dnl
759Zdisp_match( movq, 0,(%edx), %mm0,        `0x0f,0x6f,0x42,0x00',      $@)`'dnl
760Zdisp_match( movq, 0,(%esi), %mm0,        `0x0f,0x6f,0x46,0x00',      $@)`'dnl
761Zdisp_match( movq, %mm0, 0,(%edi),        `0x0f,0x7f,0x47,0x00',      $@)`'dnl
762Zdisp_match( movq, %mm2, 0,(%ecx,%eax,4), `0x0f,0x7f,0x54,0x81,0x00', $@)`'dnl
763Zdisp_match( movq, %mm2, 0,(%edx,%eax,4), `0x0f,0x7f,0x54,0x82,0x00', $@)`'dnl
764Zdisp_match( movq, %mm0, 0,(%edx,%ecx,8), `0x0f,0x7f,0x44,0xca,0x00', $@)`'dnl
765')
766define(Zdisp_4,`dnl
767Zdisp_match( movd, 0,(%eax,%ecx,4), %mm0, `0x0f,0x6e,0x44,0x88,0x00', $@)`'dnl
768Zdisp_match( movd, 0,(%eax,%ecx,8), %mm1, `0x0f,0x6e,0x4c,0xc8,0x00', $@)`'dnl
769Zdisp_match( movd, 0,(%edx,%ecx,8), %mm0, `0x0f,0x6e,0x44,0xca,0x00', $@)`'dnl
770Zdisp_match( movd, %mm0, 0,(%eax,%ecx,4), `0x0f,0x7e,0x44,0x88,0x00', $@)`'dnl
771Zdisp_match( movd, %mm0, 0,(%ecx,%eax,4), `0x0f,0x7e,0x44,0x81,0x00', $@)`'dnl
772Zdisp_match( movd, %mm2, 0,(%ecx,%eax,4), `0x0f,0x7e,0x54,0x81,0x00', $@)`'dnl
773Zdisp_match( movd, %mm0, 0,(%edx,%ecx,4), `0x0f,0x7e,0x44,0x8a,0x00', $@)`'dnl
774')
775
776define(Zdisp_match,
777m4_assert_numargs(9)
778`ifelse(eval(m4_stringequal_p(`$1',`$6')
779	&& m4_stringequal_p(`$2',0)
780	&& m4_stringequal_p(`$3',`$8')
781	&& m4_stringequal_p(`$4',`$9')),1,
782`define(`Zdisp_found',1)dnl
783ifelse(eval(`$7'),0,
784`	.byte	$5  C `$1 0$3, $4'',
785`	$6	$7$8, $9')',
786
787`ifelse(eval(m4_stringequal_p(`$1',`$6')
788	&& m4_stringequal_p(`$2',`$7')
789	&& m4_stringequal_p(`$3',0)
790	&& m4_stringequal_p(`$4',`$9')),1,
791`define(`Zdisp_found',1)dnl
792ifelse(eval(`$8'),0,
793`	.byte	$5  C `$1 $2, 0$4'',
794`	$6	$7, $8$9')')')')
795
796
797dnl  Usage: shldl(count,src,dst)
798dnl         shrdl(count,src,dst)
799dnl         shldw(count,src,dst)
800dnl         shrdw(count,src,dst)
801dnl
802dnl  Generate a double-shift instruction, possibly omitting a %cl count
803dnl  parameter if that's what the assembler requires, as indicated by
804dnl  WANT_SHLDL_CL in config.m4.  For example,
805dnl
806dnl         shldl(  %cl, %eax, %ebx)
807dnl
808dnl  turns into either
809dnl
810dnl         shldl   %cl, %eax, %ebx
811dnl  or
812dnl         shldl   %eax, %ebx
813dnl
814dnl  Immediate counts are always passed through unchanged.  For example,
815dnl
816dnl         shrdl(  $2, %esi, %edi)
817dnl  becomes
818dnl         shrdl   $2, %esi, %edi
819dnl
820dnl
821dnl  If you forget to use the macro form "shldl( ...)" and instead write
822dnl  just a plain "shldl ...", an error results.  This ensures the necessary
823dnl  variant treatment of %cl isn't accidentally bypassed.
824
825define(define_shd_instruction,
826m4_assert_numargs(1)
827`define($1,
828m4_instruction_wrapper()
829m4_assert_numargs(3)
830`shd_instruction'(m4_doublequote($`'0),m4_doublequote($`'1),dnl
831m4_doublequote($`'2),m4_doublequote($`'3)))')
832
833dnl  Effectively: define(shldl,`shd_instruction(`$0',`$1',`$2',`$3')') etc
834define_shd_instruction(shldl)
835define_shd_instruction(shrdl)
836define_shd_instruction(shldw)
837define_shd_instruction(shrdw)
838
839dnl  Called: shd_instruction(op,count,src,dst)
840define(shd_instruction,
841m4_assert_numargs(4)
842m4_assert_defined(`WANT_SHLDL_CL')
843`ifelse(eval(m4_stringequal_p(`$2',`%cl') && !WANT_SHLDL_CL),1,
844``$1'	`$3', `$4'',
845``$1'	`$2', `$3', `$4'')')
846
847
848dnl  Usage: ASSERT([cond][,instructions])
849dnl
850dnl  If WANT_ASSERT is 1, output the given instructions and expect the given
851dnl  flags condition to then be satisfied.  For example,
852dnl
853dnl         ASSERT(ne, `cmpl %eax, %ebx')
854dnl
855dnl  The instructions can be omitted to just assert a flags condition with
856dnl  no extra calculation.  For example,
857dnl
858dnl         ASSERT(nc)
859dnl
860dnl  When `instructions' is not empty, a pushf/popf is added to preserve the
861dnl  flags, but the instructions themselves must preserve any registers that
862dnl  matter.  FRAME is adjusted for the push and pop, so the instructions
863dnl  given can use defframe() stack variables.
864dnl
865dnl  The condition can be omitted to just output the given instructions when
866dnl  assertion checking is wanted.  In this case the pushf/popf is omitted.
867dnl  For example,
868dnl
869dnl         ASSERT(, `movl %eax, VAR_KEEPVAL')
870
871define(ASSERT,
872m4_assert_numargs_range(1,2)
873m4_assert_defined(`WANT_ASSERT')
874`ifelse(WANT_ASSERT,1,
875`ifelse(`$1',,
876	`$2',
877	`C ASSERT
878ifelse(`$2',,,`	pushf	ifdef(`FRAME',`FRAME_pushl()')')
879	$2
880	j`$1'	L(ASSERT_ok`'ASSERT_counter)
881	ud2	C assertion failed
882L(ASSERT_ok`'ASSERT_counter):
883ifelse(`$2',,,`	popf	ifdef(`FRAME',`FRAME_popl()')')
884define(`ASSERT_counter',incr(ASSERT_counter))')')')
885
886define(ASSERT_counter,1)
887
888
889dnl  Usage: movl_text_address(label,register)
890dnl
891dnl  Get the address of a text segment label, using either a plain movl or a
892dnl  position-independent calculation, as necessary.  For example,
893dnl
894dnl         movl_code_address(L(foo),%eax)
895dnl
896dnl  This macro is only meant for use in ASSERT()s or when testing, since
897dnl  the PIC sequence it generates will want to be done with a ret balancing
898dnl  the call on CPUs with return address branch prediction.
899dnl
900dnl  The addl generated here has a backward reference to the label, and so
901dnl  won't suffer from the two forwards references bug in old gas (described
902dnl  in mpn/x86/README).
903
904define(movl_text_address,
905m4_assert_numargs(2)
906`ifdef(`PIC',
907	`call	L(movl_text_address_`'movl_text_address_counter)
908L(movl_text_address_`'movl_text_address_counter):
909	popl	$2	C %eip
910	addl	`$'$1-L(movl_text_address_`'movl_text_address_counter), $2
911define(`movl_text_address_counter',incr(movl_text_address_counter))',
912	`movl	`$'$1, $2')')
913
914define(movl_text_address_counter,1)
915
916
917dnl  Usage: notl_or_xorl_GMP_NUMB_MASK(reg)
918dnl
919dnl  Expand to either "notl `reg'" or "xorl $GMP_NUMB_BITS,`reg'" as
920dnl  appropriate for nails in use or not.
921
922define(notl_or_xorl_GMP_NUMB_MASK,
923m4_assert_numargs(1)
924`ifelse(GMP_NAIL_BITS,0,
925`notl	`$1'',
926`xorl	$GMP_NUMB_MASK, `$1'')')
927
928
929dnl  Usage LEA(symbol,reg)
930
931define(`LEA',`
932define(`EPILOGUE_cpu',
933`
934L(movl_eip_`'substr($2,1)):
935	movl	(%esp), $2
936	ret_internal
937	SIZE($'`1, .-$'`1)')
938
939        call    L(movl_eip_`'substr($2,1))
940        addl    $_GLOBAL_OFFSET_TABLE_, $2
941        movl    $1@GOT($2), $2
942')
943
944
945define(`DEF_OBJECT',
946m4_assert_numargs_range(1,2)
947	`RODATA
948	ALIGN(ifelse($#,1,2,$2))
949$1:
950')
951
952define(`END_OBJECT',
953m4_assert_numargs(1)
954`	SIZE(`$1',.-`$1')')
955
956divert`'dnl
957