xref: /llvm-project/llvm/test/CodeGen/X86/apx/cf.ll (revision a48305e0f9753bc24d9db93b2b0b5a91f10e83dc)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64 -mattr=+cf,+avx512f -verify-machineinstrs | FileCheck %s
3
4define void @basic(i32 %a, ptr %b, ptr %p, ptr %q) {
5; CHECK-LABEL: basic:
6; CHECK:       # %bb.0: # %entry
7; CHECK-NEXT:    testl %edi, %edi
8; CHECK-NEXT:    cfcmovel (%rsi), %eax
9; CHECK-NEXT:    cfcmovel %eax, (%rdx)
10; CHECK-NEXT:    movl $1, %eax
11; CHECK-NEXT:    cfcmovneq %rax, (%rdx)
12; CHECK-NEXT:    movw $2, %ax
13; CHECK-NEXT:    cfcmovnew %ax, (%rcx)
14; CHECK-NEXT:    retq
15entry:
16  %cond = icmp eq i32 %a, 0
17  %0 = bitcast i1 %cond to <1 x i1>
18  %1 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr %b, i32 4, <1 x i1> %0, <1 x i32> poison)
19  call void @llvm.masked.store.v1i32.p0(<1 x i32> %1, ptr %p, i32 4, <1 x i1> %0)
20  %2 = xor i1 %cond, true
21  %3 = bitcast i1 %2 to <1 x i1>
22  call void @llvm.masked.store.v1i64.p0(<1 x i64> <i64 1>, ptr %p, i32 8, <1 x i1> %3)
23  call void @llvm.masked.store.v1i16.p0(<1 x i16> <i16 2>, ptr %q, i32 8, <1 x i1> %3)
24  ret void
25}
26
27define i16 @cload_passthru_zero(i16 %a, ptr %b) {
28; CHECK-LABEL: cload_passthru_zero:
29; CHECK:       # %bb.0: # %entry
30; CHECK-NEXT:    testw %di, %di
31; CHECK-NEXT:    cfcmovew (%rsi), %ax
32; CHECK-NEXT:    retq
33entry:
34  %cond = icmp eq i16 %a, 0
35  %0 = bitcast i1 %cond to <1 x i1>
36  %1 = call <1 x i16> @llvm.masked.load.v1i16.p0(ptr %b, i32 4, <1 x i1> %0, <1 x i16> <i16 0>)
37  %2 = bitcast <1 x i16> %1 to i16
38  ret i16 %2
39}
40
41define i64 @cload_passthru_not_zero(i64 %a, ptr %b) {
42; CHECK-LABEL: cload_passthru_not_zero:
43; CHECK:       # %bb.0: # %entry
44; CHECK-NEXT:    testq %rdi, %rdi
45; CHECK-NEXT:    cfcmoveq (%rsi), %rdi, %rax
46; CHECK-NEXT:    retq
47entry:
48  %cond = icmp eq i64 %a, 0
49  %0 = bitcast i1 %cond to <1 x i1>
50  %va = bitcast i64 %a to <1 x i64>
51  %1 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr %b, i32 4, <1 x i1> %0, <1 x i64> %va)
52  %2 = bitcast <1 x i64> %1 to i64
53  ret i64 %2
54}
55
56;; CFCMOV can use the flags produced by SUB directly.
57define i64 @reduced_data_dependency(i64 %a, i64 %b, ptr %c) {
58; CHECK-LABEL: reduced_data_dependency:
59; CHECK:       # %bb.0: # %entry
60; CHECK-NEXT:    movq %rdi, %rcx
61; CHECK-NEXT:    subq %rsi, %rcx
62; CHECK-NEXT:    cfcmovnsq (%rdx), %rdi, %rax
63; CHECK-NEXT:    addq %rcx, %rax
64; CHECK-NEXT:    retq
65entry:
66  %sub = sub i64 %a, %b
67  %cond = icmp sge i64 %sub, 0
68  %0 = bitcast i1 %cond to <1 x i1>
69  %va = bitcast i64 %a to <1 x i64>
70  %1 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr %c, i32 4, <1 x i1> %0, <1 x i64> %va)
71  %2 = bitcast <1 x i64> %1 to i64
72  %3 = add i64 %2, %sub
73  ret i64 %3
74}
75
76;; No need to optimize the generated assembly for cond_false/cond_true b/c it
77;; should never be emitted by middle end. Add IR here just to check it's
78;; legal to feed constant mask to backend.
79define i16 @cond_false(ptr %b) {
80; CHECK-LABEL: cond_false:
81; CHECK:       # %bb.0: # %entry
82; CHECK-NEXT:    xorl %eax, %eax
83; CHECK-NEXT:    negb %al
84; CHECK-NEXT:    cfcmovnew (%rdi), %ax
85; CHECK-NEXT:    retq
86entry:
87  %0 = bitcast i1 false to <1 x i1>
88  %1 = call <1 x i16> @llvm.masked.load.v1i16.p0(ptr %b, i32 4, <1 x i1> %0, <1 x i16> <i16 0>)
89  %2 = bitcast <1 x i16> %1 to i16
90  ret i16 %2
91}
92
93define i64 @cond_true(ptr %b) {
94; CHECK-LABEL: cond_true:
95; CHECK:       # %bb.0: # %entry
96; CHECK-NEXT:    movb $1, %al
97; CHECK-NEXT:    negb %al
98; CHECK-NEXT:    cfcmovneq (%rdi), %rax
99; CHECK-NEXT:    retq
100entry:
101  %0 = bitcast i1 true to <1 x i1>
102  %1 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr %b, i32 4, <1 x i1> %0, <1 x i64> <i64 0>)
103  %2 = bitcast <1 x i64> %1 to i64
104  ret i64 %2
105}
106
107define void @no_crash(ptr %p, <4 x i1> %cond1, <4 x i1> %cond2) {
108; CHECK-LABEL: no_crash:
109; CHECK:       # %bb.0: # %entry
110; CHECK-NEXT:    vpslld $31, %xmm1, %xmm1
111; CHECK-NEXT:    vptestmd %zmm1, %zmm1, %k0
112; CHECK-NEXT:    kshiftlw $12, %k0, %k0
113; CHECK-NEXT:    kshiftrw $12, %k0, %k1
114; CHECK-NEXT:    vpslld $31, %xmm0, %xmm0
115; CHECK-NEXT:    vptestmd %zmm0, %zmm0, %k0
116; CHECK-NEXT:    kshiftlw $12, %k0, %k0
117; CHECK-NEXT:    kshiftrw $12, %k0, %k2
118; CHECK-NEXT:    vmovdqu64 (%rdi), %zmm0 {%k2} {z}
119; CHECK-NEXT:    vmovdqu64 %zmm0, (%rdi) {%k1}
120; CHECK-NEXT:    vzeroupper
121; CHECK-NEXT:    retq
122entry:
123  %0 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr %p, i32 8, <4 x i1> %cond1, <4 x i64> poison)
124  call void @llvm.masked.store.v4i64.p0(<4 x i64> %0, ptr %p, i32 8, <4 x i1> %cond2)
125  ret void
126}
127