xref: /llvm-project/llvm/test/CodeGen/X86/cmovcmov.ll (revision a2a0089ac3a5781ba74d4d319c87c9e8b46d4eda)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-linux | FileCheck %s --check-prefix=CMOV
3; RUN: llc < %s -mtriple=i686-unknown-linux | FileCheck %s --check-prefix=NOCMOV
4
5target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
6
7; Test 2xCMOV patterns exposed after legalization.
8; One way to do that is with (select (fcmp une/oeq)), which gets
9; legalized to setp/setne.
10
11define dso_local i32 @test_select_fcmp_oeq_i32(float %a, float %b, i32 %c, i32 %d) nounwind {
12; CMOV-LABEL: test_select_fcmp_oeq_i32:
13; CMOV:       # %bb.0: # %entry
14; CMOV-NEXT:    movl %edi, %eax
15; CMOV-NEXT:    ucomiss %xmm1, %xmm0
16; CMOV-NEXT:    cmovnel %esi, %eax
17; CMOV-NEXT:    cmovpl %esi, %eax
18; CMOV-NEXT:    retq
19;
20; NOCMOV-LABEL: test_select_fcmp_oeq_i32:
21; NOCMOV:       # %bb.0: # %entry
22; NOCMOV-NEXT:    flds {{[0-9]+}}(%esp)
23; NOCMOV-NEXT:    flds {{[0-9]+}}(%esp)
24; NOCMOV-NEXT:    fucompp
25; NOCMOV-NEXT:    fnstsw %ax
26; NOCMOV-NEXT:    # kill: def $ah killed $ah killed $ax
27; NOCMOV-NEXT:    sahf
28; NOCMOV-NEXT:    leal {{[0-9]+}}(%esp), %eax
29; NOCMOV-NEXT:    jne .LBB0_3
30; NOCMOV-NEXT:  # %bb.1: # %entry
31; NOCMOV-NEXT:    jp .LBB0_3
32; NOCMOV-NEXT:  # %bb.2: # %entry
33; NOCMOV-NEXT:    leal {{[0-9]+}}(%esp), %eax
34; NOCMOV-NEXT:  .LBB0_3: # %entry
35; NOCMOV-NEXT:    movl (%eax), %eax
36; NOCMOV-NEXT:    retl
37entry:
38  %cmp = fcmp oeq float %a, %b
39  %r = select i1 %cmp, i32 %c, i32 %d
40  ret i32 %r
41}
42
43define i64 @test_select_fcmp_oeq_i64(float %a, float %b, i64 %c, i64 %d) nounwind {
44; CMOV-LABEL: test_select_fcmp_oeq_i64:
45; CMOV:       # %bb.0: # %entry
46; CMOV-NEXT:    movq %rdi, %rax
47; CMOV-NEXT:    ucomiss %xmm1, %xmm0
48; CMOV-NEXT:    cmovneq %rsi, %rax
49; CMOV-NEXT:    cmovpq %rsi, %rax
50; CMOV-NEXT:    retq
51;
52; NOCMOV-LABEL: test_select_fcmp_oeq_i64:
53; NOCMOV:       # %bb.0: # %entry
54; NOCMOV-NEXT:    flds {{[0-9]+}}(%esp)
55; NOCMOV-NEXT:    flds {{[0-9]+}}(%esp)
56; NOCMOV-NEXT:    fucompp
57; NOCMOV-NEXT:    fnstsw %ax
58; NOCMOV-NEXT:    # kill: def $ah killed $ah killed $ax
59; NOCMOV-NEXT:    sahf
60; NOCMOV-NEXT:    leal {{[0-9]+}}(%esp), %ecx
61; NOCMOV-NEXT:    jne .LBB1_3
62; NOCMOV-NEXT:  # %bb.1: # %entry
63; NOCMOV-NEXT:    jp .LBB1_3
64; NOCMOV-NEXT:  # %bb.2: # %entry
65; NOCMOV-NEXT:    leal {{[0-9]+}}(%esp), %ecx
66; NOCMOV-NEXT:  .LBB1_3: # %entry
67; NOCMOV-NEXT:    movl (%ecx), %eax
68; NOCMOV-NEXT:    movl 4(%ecx), %edx
69; NOCMOV-NEXT:    retl
70entry:
71  %cmp = fcmp oeq float %a, %b
72  %r = select i1 %cmp, i64 %c, i64 %d
73  ret i64 %r
74}
75
76define i64 @test_select_fcmp_une_i64(float %a, float %b, i64 %c, i64 %d) nounwind {
77; CMOV-LABEL: test_select_fcmp_une_i64:
78; CMOV:       # %bb.0: # %entry
79; CMOV-NEXT:    movq %rsi, %rax
80; CMOV-NEXT:    ucomiss %xmm1, %xmm0
81; CMOV-NEXT:    cmovneq %rdi, %rax
82; CMOV-NEXT:    cmovpq %rdi, %rax
83; CMOV-NEXT:    retq
84;
85; NOCMOV-LABEL: test_select_fcmp_une_i64:
86; NOCMOV:       # %bb.0: # %entry
87; NOCMOV-NEXT:    flds {{[0-9]+}}(%esp)
88; NOCMOV-NEXT:    flds {{[0-9]+}}(%esp)
89; NOCMOV-NEXT:    fucompp
90; NOCMOV-NEXT:    fnstsw %ax
91; NOCMOV-NEXT:    # kill: def $ah killed $ah killed $ax
92; NOCMOV-NEXT:    sahf
93; NOCMOV-NEXT:    leal {{[0-9]+}}(%esp), %ecx
94; NOCMOV-NEXT:    jne .LBB2_3
95; NOCMOV-NEXT:  # %bb.1: # %entry
96; NOCMOV-NEXT:    jp .LBB2_3
97; NOCMOV-NEXT:  # %bb.2: # %entry
98; NOCMOV-NEXT:    leal {{[0-9]+}}(%esp), %ecx
99; NOCMOV-NEXT:  .LBB2_3: # %entry
100; NOCMOV-NEXT:    movl (%ecx), %eax
101; NOCMOV-NEXT:    movl 4(%ecx), %edx
102; NOCMOV-NEXT:    retl
103entry:
104  %cmp = fcmp une float %a, %b
105  %r = select i1 %cmp, i64 %c, i64 %d
106  ret i64 %r
107}
108
109define dso_local double @test_select_fcmp_oeq_f64(float %a, float %b, double %c, double %d) nounwind {
110; CMOV-LABEL: test_select_fcmp_oeq_f64:
111; CMOV:       # %bb.0: # %entry
112; CMOV-NEXT:    ucomiss %xmm1, %xmm0
113; CMOV-NEXT:    jne .LBB3_3
114; CMOV-NEXT:  # %bb.1: # %entry
115; CMOV-NEXT:    jp .LBB3_3
116; CMOV-NEXT:  # %bb.2: # %entry
117; CMOV-NEXT:    movaps %xmm2, %xmm3
118; CMOV-NEXT:  .LBB3_3: # %entry
119; CMOV-NEXT:    movaps %xmm3, %xmm0
120; CMOV-NEXT:    retq
121;
122; NOCMOV-LABEL: test_select_fcmp_oeq_f64:
123; NOCMOV:       # %bb.0: # %entry
124; NOCMOV-NEXT:    flds {{[0-9]+}}(%esp)
125; NOCMOV-NEXT:    flds {{[0-9]+}}(%esp)
126; NOCMOV-NEXT:    fucompp
127; NOCMOV-NEXT:    fnstsw %ax
128; NOCMOV-NEXT:    # kill: def $ah killed $ah killed $ax
129; NOCMOV-NEXT:    sahf
130; NOCMOV-NEXT:    leal {{[0-9]+}}(%esp), %eax
131; NOCMOV-NEXT:    jne .LBB3_3
132; NOCMOV-NEXT:  # %bb.1: # %entry
133; NOCMOV-NEXT:    jp .LBB3_3
134; NOCMOV-NEXT:  # %bb.2: # %entry
135; NOCMOV-NEXT:    leal {{[0-9]+}}(%esp), %eax
136; NOCMOV-NEXT:  .LBB3_3: # %entry
137; NOCMOV-NEXT:    fldl (%eax)
138; NOCMOV-NEXT:    retl
139entry:
140  %cmp = fcmp oeq float %a, %b
141  %r = select i1 %cmp, double %c, double %d
142  ret double %r
143}
144
145define <4 x i32> @test_select_fcmp_oeq_v4i32(float %a, float %b, <4 x i32> %c, <4 x i32> %d) nounwind {
146; CMOV-LABEL: test_select_fcmp_oeq_v4i32:
147; CMOV:       # %bb.0: # %entry
148; CMOV-NEXT:    ucomiss %xmm1, %xmm0
149; CMOV-NEXT:    jne .LBB4_3
150; CMOV-NEXT:  # %bb.1: # %entry
151; CMOV-NEXT:    jp .LBB4_3
152; CMOV-NEXT:  # %bb.2: # %entry
153; CMOV-NEXT:    movaps %xmm2, %xmm3
154; CMOV-NEXT:  .LBB4_3: # %entry
155; CMOV-NEXT:    movaps %xmm3, %xmm0
156; CMOV-NEXT:    retq
157;
158; NOCMOV-LABEL: test_select_fcmp_oeq_v4i32:
159; NOCMOV:       # %bb.0: # %entry
160; NOCMOV-NEXT:    pushl %edi
161; NOCMOV-NEXT:    pushl %esi
162; NOCMOV-NEXT:    flds {{[0-9]+}}(%esp)
163; NOCMOV-NEXT:    flds {{[0-9]+}}(%esp)
164; NOCMOV-NEXT:    fucompp
165; NOCMOV-NEXT:    fnstsw %ax
166; NOCMOV-NEXT:    # kill: def $ah killed $ah killed $ax
167; NOCMOV-NEXT:    sahf
168; NOCMOV-NEXT:    leal {{[0-9]+}}(%esp), %ecx
169; NOCMOV-NEXT:    jne .LBB4_3
170; NOCMOV-NEXT:  # %bb.1: # %entry
171; NOCMOV-NEXT:    jp .LBB4_3
172; NOCMOV-NEXT:  # %bb.2: # %entry
173; NOCMOV-NEXT:    leal {{[0-9]+}}(%esp), %ecx
174; NOCMOV-NEXT:  .LBB4_3: # %entry
175; NOCMOV-NEXT:    leal {{[0-9]+}}(%esp), %edx
176; NOCMOV-NEXT:    jne .LBB4_6
177; NOCMOV-NEXT:  # %bb.4: # %entry
178; NOCMOV-NEXT:    jp .LBB4_6
179; NOCMOV-NEXT:  # %bb.5: # %entry
180; NOCMOV-NEXT:    leal {{[0-9]+}}(%esp), %edx
181; NOCMOV-NEXT:  .LBB4_6: # %entry
182; NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
183; NOCMOV-NEXT:    leal {{[0-9]+}}(%esp), %esi
184; NOCMOV-NEXT:    jne .LBB4_9
185; NOCMOV-NEXT:  # %bb.7: # %entry
186; NOCMOV-NEXT:    jp .LBB4_9
187; NOCMOV-NEXT:  # %bb.8: # %entry
188; NOCMOV-NEXT:    leal {{[0-9]+}}(%esp), %esi
189; NOCMOV-NEXT:  .LBB4_9: # %entry
190; NOCMOV-NEXT:    movl (%ecx), %ecx
191; NOCMOV-NEXT:    movl (%edx), %edx
192; NOCMOV-NEXT:    movl (%esi), %esi
193; NOCMOV-NEXT:    leal {{[0-9]+}}(%esp), %edi
194; NOCMOV-NEXT:    jne .LBB4_12
195; NOCMOV-NEXT:  # %bb.10: # %entry
196; NOCMOV-NEXT:    jp .LBB4_12
197; NOCMOV-NEXT:  # %bb.11: # %entry
198; NOCMOV-NEXT:    leal {{[0-9]+}}(%esp), %edi
199; NOCMOV-NEXT:  .LBB4_12: # %entry
200; NOCMOV-NEXT:    movl (%edi), %edi
201; NOCMOV-NEXT:    movl %edi, 12(%eax)
202; NOCMOV-NEXT:    movl %esi, 8(%eax)
203; NOCMOV-NEXT:    movl %edx, 4(%eax)
204; NOCMOV-NEXT:    movl %ecx, (%eax)
205; NOCMOV-NEXT:    popl %esi
206; NOCMOV-NEXT:    popl %edi
207; NOCMOV-NEXT:    retl $4
208entry:
209  %cmp = fcmp oeq float %a, %b
210  %r = select i1 %cmp, <4 x i32> %c, <4 x i32> %d
211  ret <4 x i32> %r
212}
213
214; Also make sure we catch the original code-sequence of interest:
215
216define dso_local float @test_zext_fcmp_une(float %a, float %b) nounwind {
217; CMOV-LABEL: test_zext_fcmp_une:
218; CMOV:       # %bb.0: # %entry
219; CMOV-NEXT:    cmpneqss %xmm1, %xmm0
220; CMOV-NEXT:    movss {{.*#+}} xmm1 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
221; CMOV-NEXT:    andps %xmm1, %xmm0
222; CMOV-NEXT:    retq
223;
224; NOCMOV-LABEL: test_zext_fcmp_une:
225; NOCMOV:       # %bb.0: # %entry
226; NOCMOV-NEXT:    flds {{[0-9]+}}(%esp)
227; NOCMOV-NEXT:    flds {{[0-9]+}}(%esp)
228; NOCMOV-NEXT:    fucompp
229; NOCMOV-NEXT:    fnstsw %ax
230; NOCMOV-NEXT:    # kill: def $ah killed $ah killed $ax
231; NOCMOV-NEXT:    sahf
232; NOCMOV-NEXT:    fld1
233; NOCMOV-NEXT:    fldz
234; NOCMOV-NEXT:    jne .LBB5_1
235; NOCMOV-NEXT:  # %bb.2: # %entry
236; NOCMOV-NEXT:    jp .LBB5_5
237; NOCMOV-NEXT:  # %bb.3: # %entry
238; NOCMOV-NEXT:    fstp %st(1)
239; NOCMOV-NEXT:    jmp .LBB5_4
240; NOCMOV-NEXT:  .LBB5_1:
241; NOCMOV-NEXT:    fstp %st(0)
242; NOCMOV-NEXT:  .LBB5_4: # %entry
243; NOCMOV-NEXT:    fldz
244; NOCMOV-NEXT:  .LBB5_5: # %entry
245; NOCMOV-NEXT:    fstp %st(0)
246; NOCMOV-NEXT:    retl
247entry:
248  %cmp = fcmp une float %a, %b
249  %conv1 = zext i1 %cmp to i32
250  %conv2 = sitofp i32 %conv1 to float
251  ret float %conv2
252}
253
254define dso_local float @test_zext_fcmp_oeq(float %a, float %b) nounwind {
255; CMOV-LABEL: test_zext_fcmp_oeq:
256; CMOV:       # %bb.0: # %entry
257; CMOV-NEXT:    cmpeqss %xmm1, %xmm0
258; CMOV-NEXT:    movss {{.*#+}} xmm1 = [1.0E+0,0.0E+0,0.0E+0,0.0E+0]
259; CMOV-NEXT:    andps %xmm1, %xmm0
260; CMOV-NEXT:    retq
261;
262; NOCMOV-LABEL: test_zext_fcmp_oeq:
263; NOCMOV:       # %bb.0: # %entry
264; NOCMOV-NEXT:    flds {{[0-9]+}}(%esp)
265; NOCMOV-NEXT:    flds {{[0-9]+}}(%esp)
266; NOCMOV-NEXT:    fucompp
267; NOCMOV-NEXT:    fnstsw %ax
268; NOCMOV-NEXT:    # kill: def $ah killed $ah killed $ax
269; NOCMOV-NEXT:    sahf
270; NOCMOV-NEXT:    fldz
271; NOCMOV-NEXT:    fld1
272; NOCMOV-NEXT:    jne .LBB6_1
273; NOCMOV-NEXT:  # %bb.2: # %entry
274; NOCMOV-NEXT:    jp .LBB6_5
275; NOCMOV-NEXT:  # %bb.3: # %entry
276; NOCMOV-NEXT:    fstp %st(1)
277; NOCMOV-NEXT:    jmp .LBB6_4
278; NOCMOV-NEXT:  .LBB6_1:
279; NOCMOV-NEXT:    fstp %st(0)
280; NOCMOV-NEXT:  .LBB6_4: # %entry
281; NOCMOV-NEXT:    fldz
282; NOCMOV-NEXT:  .LBB6_5: # %entry
283; NOCMOV-NEXT:    fstp %st(0)
284; NOCMOV-NEXT:    retl
285entry:
286  %cmp = fcmp oeq float %a, %b
287  %conv1 = zext i1 %cmp to i32
288  %conv2 = sitofp i32 %conv1 to float
289  ret float %conv2
290}
291
292attributes #0 = { nounwind }
293
294@g8 = dso_local global i8 0
295
296; The following test failed because llvm had a bug where a structure like:
297;
298; %12 = CMOV_GR8 %7, %11 ... (lt)
299; %13 = CMOV_GR8 %12, %11 ... (gt)
300;
301; was lowered to:
302;
303; The first two cmovs got expanded to:
304; %bb.0:
305;   JCC_1 %bb.9, 12
306; %bb.7:
307;   JCC_1 %bb.9, 15
308; %bb.8:
309; %bb.9:
310;   %12 = phi(%7, %bb.8, %11, %bb.0, %12, %bb.7)
311;   %13 = COPY %12
312; Which was invalid as %12 is not the same value as %13
313
314define dso_local void @no_cascade_opt(i32 %v0, i32 %v1, i32 %v2, i32 %v3) nounwind {
315; CMOV-LABEL: no_cascade_opt:
316; CMOV:       # %bb.0: # %entry
317; CMOV-NEXT:    cmpl %edx, %esi
318; CMOV-NEXT:    movl $20, %eax
319; CMOV-NEXT:    cmovll %eax, %ecx
320; CMOV-NEXT:    cmovlel %ecx, %eax
321; CMOV-NEXT:    testl %edi, %edi
322; CMOV-NEXT:    cmovnel %ecx, %eax
323; CMOV-NEXT:    movb %al, g8(%rip)
324; CMOV-NEXT:    retq
325;
326; NOCMOV-LABEL: no_cascade_opt:
327; NOCMOV:       # %bb.0: # %entry
328; NOCMOV-NEXT:    movl {{[0-9]+}}(%esp), %eax
329; NOCMOV-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
330; NOCMOV-NEXT:    movb $20, %al
331; NOCMOV-NEXT:    movb $20, %cl
332; NOCMOV-NEXT:    jge .LBB7_1
333; NOCMOV-NEXT:  # %bb.2: # %entry
334; NOCMOV-NEXT:    jle .LBB7_3
335; NOCMOV-NEXT:  .LBB7_4: # %entry
336; NOCMOV-NEXT:    cmpl $0, {{[0-9]+}}(%esp)
337; NOCMOV-NEXT:    jne .LBB7_5
338; NOCMOV-NEXT:  .LBB7_6: # %entry
339; NOCMOV-NEXT:    movb %al, g8
340; NOCMOV-NEXT:    retl
341; NOCMOV-NEXT:  .LBB7_1: # %entry
342; NOCMOV-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
343; NOCMOV-NEXT:    jg .LBB7_4
344; NOCMOV-NEXT:  .LBB7_3: # %entry
345; NOCMOV-NEXT:    movl %ecx, %eax
346; NOCMOV-NEXT:    cmpl $0, {{[0-9]+}}(%esp)
347; NOCMOV-NEXT:    je .LBB7_6
348; NOCMOV-NEXT:  .LBB7_5: # %entry
349; NOCMOV-NEXT:    movl %ecx, %eax
350; NOCMOV-NEXT:    movb %al, g8
351; NOCMOV-NEXT:    retl
352entry:
353  %c0 = icmp eq i32 %v0, 0
354  %c1 = icmp slt i32 %v1, %v2
355  %c2 = icmp sgt i32 %v1, %v2
356  %trunc = trunc i32 %v3 to i8
357  %sel0 = select i1 %c1, i8 20, i8 %trunc
358  %sel1 = select i1 %c2, i8 20, i8 %sel0
359  %sel2 = select i1 %c0, i8 %sel1, i8 %sel0
360  store volatile i8 %sel2, ptr @g8
361  ret void
362}
363