1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-apple-macosx -mattr=+sse2 -verify-machineinstrs | FileCheck %s 3 4; After tail duplication, two copies in an early exit BB can be cancelled out. 5; rdar://10640363 6define i32 @t1(i32 %a, i32 %b) nounwind { 7; CHECK-LABEL: t1: 8; CHECK: ## %bb.0: ## %entry 9; CHECK-NEXT: movl %edi, %eax 10; CHECK-NEXT: testl %esi, %esi 11; CHECK-NEXT: je LBB0_4 12; CHECK-NEXT: ## %bb.1: ## %while.body.preheader 13; CHECK-NEXT: movl %esi, %edx 14; CHECK-NEXT: .p2align 4 15; CHECK-NEXT: LBB0_2: ## %while.body 16; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 17; CHECK-NEXT: movl %edx, %ecx 18; CHECK-NEXT: cltd 19; CHECK-NEXT: idivl %ecx 20; CHECK-NEXT: testl %edx, %edx 21; CHECK-NEXT: movl %ecx, %eax 22; CHECK-NEXT: jne LBB0_2 23; CHECK-NEXT: ## %bb.3: ## %while.end 24; CHECK-NEXT: movl %ecx, %eax 25; CHECK-NEXT: LBB0_4: 26; CHECK-NEXT: retq 27entry: 28 %cmp1 = icmp eq i32 %b, 0 29 br i1 %cmp1, label %while.end, label %while.body 30 31while.body: ; preds = %entry, %while.body 32 %a.addr.03 = phi i32 [ %b.addr.02, %while.body ], [ %a, %entry ] 33 %b.addr.02 = phi i32 [ %rem, %while.body ], [ %b, %entry ] 34 %rem = srem i32 %a.addr.03, %b.addr.02 35 %cmp = icmp eq i32 %rem, 0 36 br i1 %cmp, label %while.end, label %while.body 37 38while.end: ; preds = %while.body, %entry 39 %a.addr.0.lcssa = phi i32 [ %a, %entry ], [ %b.addr.02, %while.body ] 40 ret i32 %a.addr.0.lcssa 41} 42 43; Two movdqa (from phi-elimination) in the entry BB cancels out. 44; rdar://10428165 45define <8 x i16> @t2(<8 x i16> %T0, <8 x i16> %T1) nounwind readnone { 46; CHECK-LABEL: t2: 47; CHECK: ## %bb.0: ## %entry 48; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] 49; CHECK-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,1,1,2,4,5,6,7] 50; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 51; CHECK-NEXT: retq 52entry: 53 %tmp8 = shufflevector <8 x i16> %T0, <8 x i16> %T1, <8 x i32> < i32 undef, i32 undef, i32 7, i32 2, i32 8, i32 undef, i32 undef , i32 undef > 54 ret <8 x i16> %tmp8 55} 56 57define i32 @t3(i64 %a, i64 %b) nounwind { 58; CHECK-LABEL: t3: 59; CHECK: ## %bb.0: ## %entry 60; CHECK-NEXT: movq %rdi, %rax 61; CHECK-NEXT: testq %rsi, %rsi 62; CHECK-NEXT: je LBB2_4 63; CHECK-NEXT: ## %bb.1: ## %while.body.preheader 64; CHECK-NEXT: movq %rsi, %rdx 65; CHECK-NEXT: .p2align 4 66; CHECK-NEXT: LBB2_2: ## %while.body 67; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1 68; CHECK-NEXT: movq %rdx, %rcx 69; CHECK-NEXT: cqto 70; CHECK-NEXT: idivq %rcx 71; CHECK-NEXT: testq %rdx, %rdx 72; CHECK-NEXT: movq %rcx, %rax 73; CHECK-NEXT: jne LBB2_2 74; CHECK-NEXT: ## %bb.3: ## %while.end 75; CHECK-NEXT: movl %ecx, %eax 76; CHECK-NEXT: LBB2_4: 77; CHECK-NEXT: retq 78entry: 79 %cmp1 = icmp eq i64 %b, 0 80 br i1 %cmp1, label %while.end, label %while.body 81 82while.body: ; preds = %entry, %while.body 83 %a.addr.03 = phi i64 [ %b.addr.02, %while.body ], [ %a, %entry ] 84 %b.addr.02 = phi i64 [ %rem, %while.body ], [ %b, %entry ] 85 %rem = srem i64 %a.addr.03, %b.addr.02 86 %cmp = icmp eq i64 %rem, 0 87 br i1 %cmp, label %while.end, label %while.body 88 89while.end: ; preds = %while.body, %entry 90 %a.addr.0.lcssa = phi i64 [ %a, %entry ], [ %b.addr.02, %while.body ] 91 %t = trunc i64 %a.addr.0.lcssa to i32 92 ret i32 %t 93} 94 95; Check that copy propagation does not kill thing like: 96; dst = copy src <-- do not kill that. 97; ... = op1 undef dst 98; ... = op2 dst <-- this is used here. 99define <16 x float> @foo(<16 x float> %x) { 100; CHECK-LABEL: foo: 101; CHECK: ## %bb.0: ## %bb 102; CHECK-NEXT: xorps %xmm5, %xmm5 103; CHECK-NEXT: cvttps2dq %xmm3, %xmm8 104; CHECK-NEXT: movaps %xmm3, %xmm4 105; CHECK-NEXT: cmpltps %xmm5, %xmm4 106; CHECK-NEXT: movaps {{.*#+}} xmm7 = [13,14,15,16] 107; CHECK-NEXT: movaps %xmm4, %xmm6 108; CHECK-NEXT: orps %xmm7, %xmm6 109; CHECK-NEXT: cvtdq2ps %xmm8, %xmm3 110; CHECK-NEXT: andps %xmm7, %xmm3 111; CHECK-NEXT: andps %xmm6, %xmm3 112; CHECK-NEXT: andnps %xmm4, %xmm6 113; CHECK-NEXT: cvttps2dq %xmm2, %xmm4 114; CHECK-NEXT: movaps %xmm2, %xmm7 115; CHECK-NEXT: cmpltps %xmm5, %xmm7 116; CHECK-NEXT: movaps {{.*#+}} xmm8 = [9,10,11,12] 117; CHECK-NEXT: movaps %xmm7, %xmm9 118; CHECK-NEXT: orps %xmm8, %xmm9 119; CHECK-NEXT: cvtdq2ps %xmm4, %xmm2 120; CHECK-NEXT: andps %xmm8, %xmm2 121; CHECK-NEXT: andps %xmm9, %xmm2 122; CHECK-NEXT: andnps %xmm7, %xmm9 123; CHECK-NEXT: cvttps2dq %xmm1, %xmm4 124; CHECK-NEXT: cmpltps %xmm5, %xmm1 125; CHECK-NEXT: movaps {{.*#+}} xmm7 = [5,6,7,8] 126; CHECK-NEXT: movaps %xmm1, %xmm8 127; CHECK-NEXT: orps %xmm7, %xmm8 128; CHECK-NEXT: cvtdq2ps %xmm4, %xmm4 129; CHECK-NEXT: andps %xmm7, %xmm4 130; CHECK-NEXT: andps %xmm8, %xmm4 131; CHECK-NEXT: andnps %xmm1, %xmm8 132; CHECK-NEXT: cvttps2dq %xmm0, %xmm1 133; CHECK-NEXT: cmpltps %xmm5, %xmm0 134; CHECK-NEXT: movaps {{.*#+}} xmm5 = [1,2,3,4] 135; CHECK-NEXT: movaps %xmm0, %xmm7 136; CHECK-NEXT: orps %xmm5, %xmm7 137; CHECK-NEXT: cvtdq2ps %xmm1, %xmm1 138; CHECK-NEXT: andps %xmm5, %xmm1 139; CHECK-NEXT: andps %xmm7, %xmm1 140; CHECK-NEXT: andnps %xmm0, %xmm7 141; CHECK-NEXT: movaps {{.*#+}} xmm0 = [1,1,1,1] 142; CHECK-NEXT: andps %xmm0, %xmm7 143; CHECK-NEXT: orps %xmm7, %xmm1 144; CHECK-NEXT: andps %xmm0, %xmm8 145; CHECK-NEXT: orps %xmm8, %xmm4 146; CHECK-NEXT: andps %xmm0, %xmm9 147; CHECK-NEXT: orps %xmm9, %xmm2 148; CHECK-NEXT: andps %xmm0, %xmm6 149; CHECK-NEXT: orps %xmm6, %xmm3 150; CHECK-NEXT: movaps %xmm1, %xmm0 151; CHECK-NEXT: movaps %xmm4, %xmm1 152; CHECK-NEXT: retq 153bb: 154 %v3 = icmp slt <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, zeroinitializer 155 %v14 = zext <16 x i1> %v3 to <16 x i32> 156 %v16 = fcmp olt <16 x float> %x, zeroinitializer 157 %v17 = sext <16 x i1> %v16 to <16 x i32> 158 %v18 = zext <16 x i1> %v16 to <16 x i32> 159 %v19 = xor <16 x i32> %v14, %v18 160 %v20 = or <16 x i32> %v17, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16> 161 %v21 = fptosi <16 x float> %x to <16 x i32> 162 %v22 = sitofp <16 x i32> %v21 to <16 x float> 163 %v69 = fcmp ogt <16 x float> %v22, zeroinitializer 164 %v75 = and <16 x i1> %v69, %v3 165 %v77 = bitcast <16 x float> %v22 to <16 x i32> 166 %v79 = sext <16 x i1> %v75 to <16 x i32> 167 %v80 = and <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>, %v79 168 %v81 = xor <16 x i32> %v77, %v80 169 %v82 = and <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>, %v81 170 %v83 = xor <16 x i32> %v19, %v82 171 %v84 = and <16 x i32> %v83, %v20 172 %v85 = xor <16 x i32> %v19, %v84 173 %v86 = bitcast <16 x i32> %v85 to <16 x float> 174 ret <16 x float> %v86 175} 176