1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64 -mattr=+cf,+avx512f -verify-machineinstrs | FileCheck %s 3 4define void @basic(i32 %a, ptr %b, ptr %p, ptr %q) { 5; CHECK-LABEL: basic: 6; CHECK: # %bb.0: # %entry 7; CHECK-NEXT: testl %edi, %edi 8; CHECK-NEXT: cfcmovel (%rsi), %eax 9; CHECK-NEXT: cfcmovel %eax, (%rdx) 10; CHECK-NEXT: movl $1, %eax 11; CHECK-NEXT: cfcmovneq %rax, (%rdx) 12; CHECK-NEXT: movw $2, %ax 13; CHECK-NEXT: cfcmovnew %ax, (%rcx) 14; CHECK-NEXT: retq 15entry: 16 %cond = icmp eq i32 %a, 0 17 %0 = bitcast i1 %cond to <1 x i1> 18 %1 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr %b, i32 4, <1 x i1> %0, <1 x i32> poison) 19 call void @llvm.masked.store.v1i32.p0(<1 x i32> %1, ptr %p, i32 4, <1 x i1> %0) 20 %2 = xor i1 %cond, true 21 %3 = bitcast i1 %2 to <1 x i1> 22 call void @llvm.masked.store.v1i64.p0(<1 x i64> <i64 1>, ptr %p, i32 8, <1 x i1> %3) 23 call void @llvm.masked.store.v1i16.p0(<1 x i16> <i16 2>, ptr %q, i32 8, <1 x i1> %3) 24 ret void 25} 26 27define i16 @cload_passthru_zero(i16 %a, ptr %b) { 28; CHECK-LABEL: cload_passthru_zero: 29; CHECK: # %bb.0: # %entry 30; CHECK-NEXT: testw %di, %di 31; CHECK-NEXT: cfcmovew (%rsi), %ax 32; CHECK-NEXT: retq 33entry: 34 %cond = icmp eq i16 %a, 0 35 %0 = bitcast i1 %cond to <1 x i1> 36 %1 = call <1 x i16> @llvm.masked.load.v1i16.p0(ptr %b, i32 4, <1 x i1> %0, <1 x i16> <i16 0>) 37 %2 = bitcast <1 x i16> %1 to i16 38 ret i16 %2 39} 40 41define i64 @cload_passthru_not_zero(i64 %a, ptr %b) { 42; CHECK-LABEL: cload_passthru_not_zero: 43; CHECK: # %bb.0: # %entry 44; CHECK-NEXT: testq %rdi, %rdi 45; CHECK-NEXT: cfcmoveq (%rsi), %rdi, %rax 46; CHECK-NEXT: retq 47entry: 48 %cond = icmp eq i64 %a, 0 49 %0 = bitcast i1 %cond to <1 x i1> 50 %va = bitcast i64 %a to <1 x i64> 51 %1 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr %b, i32 4, <1 x i1> %0, <1 x i64> %va) 52 %2 = bitcast <1 x i64> %1 to i64 53 ret i64 %2 54} 55 56;; CFCMOV can use the flags produced by SUB directly. 57define i64 @reduced_data_dependency(i64 %a, i64 %b, ptr %c) { 58; CHECK-LABEL: reduced_data_dependency: 59; CHECK: # %bb.0: # %entry 60; CHECK-NEXT: movq %rdi, %rcx 61; CHECK-NEXT: subq %rsi, %rcx 62; CHECK-NEXT: cfcmovnsq (%rdx), %rdi, %rax 63; CHECK-NEXT: addq %rcx, %rax 64; CHECK-NEXT: retq 65entry: 66 %sub = sub i64 %a, %b 67 %cond = icmp sge i64 %sub, 0 68 %0 = bitcast i1 %cond to <1 x i1> 69 %va = bitcast i64 %a to <1 x i64> 70 %1 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr %c, i32 4, <1 x i1> %0, <1 x i64> %va) 71 %2 = bitcast <1 x i64> %1 to i64 72 %3 = add i64 %2, %sub 73 ret i64 %3 74} 75 76;; No need to optimize the generated assembly for cond_false/cond_true b/c it 77;; should never be emitted by middle end. Add IR here just to check it's 78;; legal to feed constant mask to backend. 79define i16 @cond_false(ptr %b) { 80; CHECK-LABEL: cond_false: 81; CHECK: # %bb.0: # %entry 82; CHECK-NEXT: xorl %eax, %eax 83; CHECK-NEXT: negb %al 84; CHECK-NEXT: cfcmovnew (%rdi), %ax 85; CHECK-NEXT: retq 86entry: 87 %0 = bitcast i1 false to <1 x i1> 88 %1 = call <1 x i16> @llvm.masked.load.v1i16.p0(ptr %b, i32 4, <1 x i1> %0, <1 x i16> <i16 0>) 89 %2 = bitcast <1 x i16> %1 to i16 90 ret i16 %2 91} 92 93define i64 @cond_true(ptr %b) { 94; CHECK-LABEL: cond_true: 95; CHECK: # %bb.0: # %entry 96; CHECK-NEXT: movb $1, %al 97; CHECK-NEXT: negb %al 98; CHECK-NEXT: cfcmovneq (%rdi), %rax 99; CHECK-NEXT: retq 100entry: 101 %0 = bitcast i1 true to <1 x i1> 102 %1 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr %b, i32 4, <1 x i1> %0, <1 x i64> <i64 0>) 103 %2 = bitcast <1 x i64> %1 to i64 104 ret i64 %2 105} 106 107define void @no_crash(ptr %p, <4 x i1> %cond1, <4 x i1> %cond2) { 108; CHECK-LABEL: no_crash: 109; CHECK: # %bb.0: # %entry 110; CHECK-NEXT: vpslld $31, %xmm1, %xmm1 111; CHECK-NEXT: vptestmd %zmm1, %zmm1, %k0 112; CHECK-NEXT: kshiftlw $12, %k0, %k0 113; CHECK-NEXT: kshiftrw $12, %k0, %k1 114; CHECK-NEXT: vpslld $31, %xmm0, %xmm0 115; CHECK-NEXT: vptestmd %zmm0, %zmm0, %k0 116; CHECK-NEXT: kshiftlw $12, %k0, %k0 117; CHECK-NEXT: kshiftrw $12, %k0, %k2 118; CHECK-NEXT: vmovdqu64 (%rdi), %zmm0 {%k2} {z} 119; CHECK-NEXT: vmovdqu64 %zmm0, (%rdi) {%k1} 120; CHECK-NEXT: vzeroupper 121; CHECK-NEXT: retq 122entry: 123 %0 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr %p, i32 8, <4 x i1> %cond1, <4 x i64> poison) 124 call void @llvm.masked.store.v4i64.p0(<4 x i64> %0, ptr %p, i32 8, <4 x i1> %cond2) 125 ret void 126} 127