xref: /llvm-project/llvm/test/CodeGen/X86/materialize.ll (revision f6038096378e2d9870b1f29fb3a2ce442df88778)
1; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+cmov %s -o - | FileCheck %s --check-prefix=CHECK32
2; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+cmov %s -o - | FileCheck %s --check-prefix=CHECK64
3; RUN: llc -mtriple=x86_64-pc-win32 -mattr=+cmov %s -o - | FileCheck %s --check-prefix=CHECKWIN64
4
5; RUN: llc -mtriple=i686-unknown-linux-gnu -mattr=+cmov %s -o /dev/null \
6; RUN:     -print-after postrapseudos -filter-print-funcs pr26023 2>&1 \
7; RUN:    | FileCheck %s --check-prefix=OPERAND32
8; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+cmov %s -o /dev/null \
9; RUN:     -print-after postrapseudos -filter-print-funcs one64_minsize 2>&1 \
10; RUN:    | FileCheck %s --check-prefix=OPERAND64
11
12define i32 @one32_nooptsize() {
13entry:
14  ret i32 1
15
16; When not optimizing for size, use mov.
17; CHECK32-LABEL: one32_nooptsize:
18; CHECK32:       movl $1, %eax
19; CHECK32-NEXT:  retl
20; CHECK64-LABEL: one32_nooptsize:
21; CHECK64:       movl $1, %eax
22; CHECK64-NEXT:  retq
23}
24
25define i32 @one32() optsize {
26entry:
27  ret i32 1
28
29; CHECK32-LABEL: one32:
30; CHECK32:       xorl %eax, %eax
31; CHECK32-NEXT:  incl %eax
32; CHECK32-NEXT:  retl
33
34; FIXME: Figure out the best approach in 64-bit mode.
35; CHECK64-LABEL: one32:
36; CHECK64:       movl $1, %eax
37; CHECK64-NEXT:  retq
38}
39
40define i32 @one32_pgso() !prof !14 {
41entry:
42  ret i32 1
43
44; CHECK32-LABEL: one32_pgso:
45; CHECK32:       xorl %eax, %eax
46; CHECK32-NEXT:  incl %eax
47; CHECK32-NEXT:  retl
48
49; FIXME: Figure out the best approach in 64-bit mode.
50; CHECK64-LABEL: one32_pgso:
51; CHECK64:       movl $1, %eax
52; CHECK64-NEXT:  retq
53}
54
55define i32 @one32_minsize() minsize {
56entry:
57  ret i32 1
58
59; On 32-bit, xor-inc is preferred over push-pop.
60; CHECK32-LABEL: one32_minsize:
61; CHECK32:       xorl %eax, %eax
62; CHECK32-NEXT:  incl %eax
63; CHECK32-NEXT:  retl
64
65; On 64-bit we don't do xor-inc yet, so push-pop it is. Note that we have to
66; pop into a 64-bit register even when we just need 32 bits.
67; CHECK64-LABEL: one32_minsize:
68; CHECK64:       pushq $1
69; CHECK64:       .cfi_adjust_cfa_offset 8
70; CHECK64:       popq %rax
71; CHECK64:       .cfi_adjust_cfa_offset -8
72; CHECK64-NEXT:  retq
73
74; On Win64 we can't adjust the stack unless there's a frame pointer.
75; CHECKWIN64-LABEL: one32_minsize:
76; CHECKWIN64:       movl $1, %eax
77; CHECKWIN64-NEXT:  retq
78}
79
80define i32 @pr26023() minsize {
81entry:
82  %x = alloca [120 x i8]
83  call void asm sideeffect "", "imr,~{memory},~{dirflag},~{fpsr},~{flags}"(ptr %x)
84  %arrayidx = getelementptr inbounds [120 x i8], ptr %x, i64 0, i64 119
85  store volatile i8 -2, ptr %arrayidx
86  call void asm sideeffect "", "r,~{dirflag},~{fpsr},~{flags}"(i32 5)
87  %0 = load volatile i8, ptr %arrayidx
88  %conv = sext i8 %0 to i32
89  ret i32 %conv
90
91; The function writes to the redzone, so push/pop cannot be used.
92; CHECK64-LABEL: pr26023:
93; CHECK64:       movl $5, %ecx
94; CHECK64:       retq
95
96; 32-bit X86 doesn't have a redzone.
97; CHECK32-LABEL: pr26023:
98; CHECK32:       pushl $5
99; CHECK32:       popl %ecx
100; CHECK32:       retl
101
102; Check push/pop have implicit def/use of $esp
103; OPERAND32:      PUSH32i 5, implicit-def $esp, implicit $esp
104; OPERAND32-NEXT: CFI_INSTRUCTION adjust_cfa_offset 4
105; OPERAND32-NEXT: renamable $ecx = POP32r implicit-def $esp, implicit $esp
106; OPERAND32-NEXT: CFI_INSTRUCTION adjust_cfa_offset -4
107}
108
109
110define i64 @one64_minsize() minsize {
111entry:
112  ret i64 1
113; On 64-bit we don't do xor-inc yet, so push-pop it is.
114; CHECK64-LABEL: one64_minsize:
115; CHECK64:       pushq $1
116; CHECK64:       .cfi_adjust_cfa_offset 8
117; CHECK64:       popq %rax
118; CHECK64:       .cfi_adjust_cfa_offset -8
119; CHECK64-NEXT:  retq
120
121; On Win64 we can't adjust the stack unless there's a frame pointer.
122; CHECKWIN64-LABEL: one64_minsize:
123; CHECKWIN64:       movl $1, %eax
124; CHECKWIN64-NEXT:  retq
125
126; Check push/pop have implicit def/use of $rsp
127; OPERAND64:      PUSH64i32 1, implicit-def $rsp, implicit $rsp
128; OPERAND64-NEXT: CFI_INSTRUCTION adjust_cfa_offset 8
129; OPERAND64-NEXT: $rax = POP64r implicit-def $rsp, implicit $rsp
130; OPERAND64-NEXT: CFI_INSTRUCTION adjust_cfa_offset -8
131; OPERAND64-NEXT: RET 0, $rax
132}
133
134define i32 @minus_one32() optsize {
135entry:
136  ret i32 -1
137
138; CHECK32-LABEL: minus_one32:
139; CHECK32:       xorl %eax, %eax
140; CHECK32-NEXT:  decl %eax
141; CHECK32-NEXT:  retl
142}
143
144define i32 @minus_one32_pgso() !prof !14 {
145entry:
146  ret i32 -1
147
148; CHECK32-LABEL: minus_one32_pgso:
149; CHECK32:       xorl %eax, %eax
150; CHECK32-NEXT:  decl %eax
151; CHECK32-NEXT:  retl
152}
153
154define i32 @minus_one32_minsize() minsize {
155entry:
156  ret i32 -1
157
158; xor-dec is preferred over push-pop.
159; CHECK32-LABEL: minus_one32_minsize:
160; CHECK32:       xorl %eax, %eax
161; CHECK32-NEXT:  decl %eax
162; CHECK32-NEXT:  retl
163}
164
165define i16 @one16() optsize {
166entry:
167  ret i16 1
168
169; CHECK32-LABEL: one16:
170; CHECK32:       xorl %eax, %eax
171; CHECK32-NEXT:  incl %eax
172; CHECK32-NEXT:  # kill
173; CHECK32-NEXT:  retl
174}
175
176define i16 @minus_one16() optsize {
177entry:
178  ret i16 -1
179
180; CHECK32-LABEL: minus_one16:
181; CHECK32:       xorl %eax, %eax
182; CHECK32-NEXT:  decl %eax
183; CHECK32-NEXT:  # kill
184; CHECK32-NEXT:  retl
185}
186
187define i16 @one16_pgso() !prof !14 {
188entry:
189  ret i16 1
190
191; CHECK32-LABEL: one16_pgso:
192; CHECK32:       xorl %eax, %eax
193; CHECK32-NEXT:  incl %eax
194; CHECK32-NEXT:  # kill
195; CHECK32-NEXT:  retl
196}
197
198define i16 @minus_one16_pgso() !prof !14 {
199entry:
200  ret i16 -1
201
202; CHECK32-LABEL: minus_one16_pgso:
203; CHECK32:       xorl %eax, %eax
204; CHECK32-NEXT:  decl %eax
205; CHECK32-NEXT:  # kill
206; CHECK32-NEXT:  retl
207}
208
209define i32 @minus_five32() minsize {
210entry:
211  ret i32 -5
212
213; CHECK32-LABEL: minus_five32:
214; CHECK32: pushl $-5
215; CHECK32: popl %eax
216; CHECK32: retl
217}
218
219define i64 @minus_five64() minsize {
220entry:
221  ret i64 -5
222
223; CHECK64-LABEL: minus_five64:
224; CHECK64: pushq $-5
225; CHECK64:       .cfi_adjust_cfa_offset 8
226; CHECK64: popq %rax
227; CHECK64:       .cfi_adjust_cfa_offset -8
228; CHECK64: retq
229}
230
231define i32 @rematerialize_minus_one() optsize {
232entry:
233  ; Materialize -1 (thiscall forces it into %ecx).
234  tail call x86_thiscallcc void @f(i32 -1)
235
236  ; Clobber all registers except %esp, leaving nowhere to store the -1 besides
237  ; spilling it to the stack.
238  tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{edi},~{esi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
239
240  ; -1 should be re-materialized here instead of getting spilled above.
241  ret i32 -1
242
243; CHECK32-LABEL: rematerialize_minus_one
244; CHECK32:       xorl %ecx, %ecx
245; CHECK32-NEXT:  decl %ecx
246; CHECK32:       calll
247; CHECK32:       xorl %eax, %eax
248; CHECK32-NEXT:  decl %eax
249; CHECK32-NOT:   %eax
250; CHECK32:       retl
251}
252
253define i32 @rematerialize_minus_one_eflags(i32 %x) optsize {
254entry:
255  ; Materialize -1 (thiscall forces it into %ecx).
256  tail call x86_thiscallcc void @f(i32 -1)
257
258  ; Clobber all registers except %esp, leaving nowhere to store the -1 besides
259  ; spilling it to the stack.
260  tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{edi},~{esi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
261
262  ; Define eflags.
263  %a = icmp ne i32 %x, 123
264  %b = zext i1 %a to i32
265  ; Cause -1 to be rematerialized right in front of the cmov, which needs eflags.
266  ; It must therefore not use the xor-dec lowering.
267  %c = select i1 %a, i32 %b, i32 -1
268  ret i32 %c
269
270; CHECK32-LABEL: rematerialize_minus_one_eflags
271; CHECK32:       xorl %ecx, %ecx
272; CHECK32-NEXT:  decl %ecx
273; CHECK32:       calll
274; CHECK32:       cmpl
275; CHECK32:       setne
276; CHECK32-NOT:   xorl
277; CHECK32:       movl $-1
278; CHECK32:       cmov
279; CHECK32:       retl
280}
281
282define i32 @rematerialize_minus_one_pgso() !prof !14 {
283entry:
284  ; Materialize -1 (thiscall forces it into %ecx).
285  tail call x86_thiscallcc void @f(i32 -1)
286
287  ; Clobber all registers except %esp, leaving nowhere to store the -1 besides
288  ; spilling it to the stack.
289  tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{edi},~{esi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
290
291  ; -1 should be re-materialized here instead of getting spilled above.
292  ret i32 -1
293
294; CHECK32-LABEL: rematerialize_minus_one_pgso
295; CHECK32:       xorl %ecx, %ecx
296; CHECK32-NEXT:  decl %ecx
297; CHECK32:       calll
298; CHECK32:       xorl %eax, %eax
299; CHECK32-NEXT:  decl %eax
300; CHECK32-NOT:   %eax
301; CHECK32:       retl
302}
303
304define i32 @rematerialize_minus_one_eflags_pgso(i32 %x) !prof !14 {
305entry:
306  ; Materialize -1 (thiscall forces it into %ecx).
307  tail call x86_thiscallcc void @f(i32 -1)
308
309  ; Clobber all registers except %esp, leaving nowhere to store the -1 besides
310  ; spilling it to the stack.
311  tail call void asm sideeffect "", "~{eax},~{ebx},~{ecx},~{edx},~{edi},~{esi},~{ebp},~{dirflag},~{fpsr},~{flags}"()
312
313  ; Define eflags.
314  %a = icmp ne i32 %x, 123
315  %b = zext i1 %a to i32
316  ; Cause -1 to be rematerialized right in front of the cmov, which needs eflags.
317  ; It must therefore not use the xor-dec lowering.
318  %c = select i1 %a, i32 %b, i32 -1
319  ret i32 %c
320
321; CHECK32-LABEL: rematerialize_minus_one_eflags_pgso
322; CHECK32:       xorl %ecx, %ecx
323; CHECK32-NEXT:  decl %ecx
324; CHECK32:       calll
325; CHECK32:       cmpl
326; CHECK32:       setne
327; CHECK32-NOT:   xorl
328; CHECK32:       movl $-1
329; CHECK32:       cmov
330; CHECK32:       retl
331}
332
333declare x86_thiscallcc void @f(i32)
334
335!llvm.module.flags = !{!0}
336!0 = !{i32 1, !"ProfileSummary", !1}
337!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
338!2 = !{!"ProfileFormat", !"InstrProf"}
339!3 = !{!"TotalCount", i64 10000}
340!4 = !{!"MaxCount", i64 10}
341!5 = !{!"MaxInternalCount", i64 1}
342!6 = !{!"MaxFunctionCount", i64 1000}
343!7 = !{!"NumCounts", i64 3}
344!8 = !{!"NumFunctions", i64 3}
345!9 = !{!"DetailedSummary", !10}
346!10 = !{!11, !12, !13}
347!11 = !{i32 10000, i64 100, i32 1}
348!12 = !{i32 999000, i64 100, i32 1}
349!13 = !{i32 999999, i64 1, i32 2}
350!14 = !{!"function_entry_count", i64 0}
351