1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s --check-prefixes=CHECK,SSE 3; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,AVX 4 5define i1 @fcmp_and_v2f64(<2 x double> %a) { 6; SSE-LABEL: @fcmp_and_v2f64( 7; SSE-NEXT: [[E1:%.*]] = extractelement <2 x double> [[A:%.*]], i32 0 8; SSE-NEXT: [[E2:%.*]] = extractelement <2 x double> [[A]], i32 1 9; SSE-NEXT: [[CMP1:%.*]] = fcmp olt double [[E1]], 4.200000e+01 10; SSE-NEXT: [[CMP2:%.*]] = fcmp olt double [[E2]], -8.000000e+00 11; SSE-NEXT: [[R:%.*]] = and i1 [[CMP1]], [[CMP2]] 12; SSE-NEXT: ret i1 [[R]] 13; 14; AVX-LABEL: @fcmp_and_v2f64( 15; AVX-NEXT: [[TMP1:%.*]] = fcmp olt <2 x double> [[A:%.*]], <double 4.200000e+01, double -8.000000e+00> 16; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <2 x i1> [[TMP1]], <2 x i1> poison, <2 x i32> <i32 1, i32 poison> 17; AVX-NEXT: [[TMP2:%.*]] = and <2 x i1> [[TMP1]], [[SHIFT]] 18; AVX-NEXT: [[R:%.*]] = extractelement <2 x i1> [[TMP2]], i64 0 19; AVX-NEXT: ret i1 [[R]] 20; 21 %e1 = extractelement <2 x double> %a, i32 0 22 %e2 = extractelement <2 x double> %a, i32 1 23 %cmp1 = fcmp olt double %e1, 42.0 24 %cmp2 = fcmp olt double %e2, -8.0 25 %r = and i1 %cmp1, %cmp2 26 ret i1 %r 27} 28 29define i1 @fcmp_or_v4f64(<4 x double> %a) { 30; SSE-LABEL: @fcmp_or_v4f64( 31; SSE-NEXT: [[E1:%.*]] = extractelement <4 x double> [[A:%.*]], i32 0 32; SSE-NEXT: [[E2:%.*]] = extractelement <4 x double> [[A]], i64 2 33; SSE-NEXT: [[CMP1:%.*]] = fcmp olt double [[E1]], 4.200000e+01 34; SSE-NEXT: [[CMP2:%.*]] = fcmp olt double [[E2]], -8.000000e+00 35; SSE-NEXT: [[R:%.*]] = or i1 [[CMP1]], [[CMP2]] 36; SSE-NEXT: ret i1 [[R]] 37; 38; AVX-LABEL: @fcmp_or_v4f64( 39; AVX-NEXT: [[TMP1:%.*]] = fcmp olt <4 x double> [[A:%.*]], <double 4.200000e+01, double poison, double -8.000000e+00, double poison> 40; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> <i32 2, i32 poison, i32 poison, i32 poison> 41; AVX-NEXT: [[TMP2:%.*]] = or <4 x i1> [[TMP1]], [[SHIFT]] 42; AVX-NEXT: [[R:%.*]] = extractelement <4 x i1> [[TMP2]], i64 0 43; AVX-NEXT: ret i1 [[R]] 44; 45 %e1 = extractelement <4 x double> %a, i32 0 46 %e2 = extractelement <4 x double> %a, i64 2 47 %cmp1 = fcmp olt double %e1, 42.0 48 %cmp2 = fcmp olt double %e2, -8.0 49 %r = or i1 %cmp1, %cmp2 50 ret i1 %r 51} 52 53define i1 @icmp_xor_v4i32(<4 x i32> %a) { 54; CHECK-LABEL: @icmp_xor_v4i32( 55; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[A:%.*]], <i32 poison, i32 -8, i32 poison, i32 42> 56; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> <i32 poison, i32 3, i32 poison, i32 poison> 57; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[SHIFT]], [[TMP1]] 58; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i1> [[TMP2]], i64 1 59; CHECK-NEXT: ret i1 [[R]] 60; 61 %e1 = extractelement <4 x i32> %a, i32 3 62 %e2 = extractelement <4 x i32> %a, i32 1 63 %cmp1 = icmp sgt i32 %e1, 42 64 %cmp2 = icmp sgt i32 %e2, -8 65 %r = xor i1 %cmp1, %cmp2 66 ret i1 %r 67} 68 69define i1 @icmp_samesign_xor_v4i32(<4 x i32> %a) { 70; CHECK-LABEL: @icmp_samesign_xor_v4i32( 71; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[A:%.*]], <i32 poison, i32 -8, i32 poison, i32 42> 72; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> <i32 poison, i32 3, i32 poison, i32 poison> 73; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[SHIFT]], [[TMP1]] 74; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i1> [[TMP2]], i64 1 75; CHECK-NEXT: ret i1 [[R]] 76; 77 %e1 = extractelement <4 x i32> %a, i32 3 78 %e2 = extractelement <4 x i32> %a, i32 1 79 %cmp1 = icmp samesign ugt i32 %e1, 42 80 %cmp2 = icmp sgt i32 %e2, -8 81 %r = xor i1 %cmp1, %cmp2 82 ret i1 %r 83} 84 85; add is not canonical (should be xor), but that is ok. 86 87define i1 @icmp_add_v8i32(<8 x i32> %a) { 88; CHECK-LABEL: @icmp_add_v8i32( 89; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <8 x i32> [[A:%.*]], <i32 poison, i32 poison, i32 -8, i32 poison, i32 poison, i32 poison, i32 poison, i32 42> 90; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> poison, <8 x i32> <i32 poison, i32 poison, i32 7, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 91; CHECK-NEXT: [[TMP2:%.*]] = add <8 x i1> [[SHIFT]], [[TMP1]] 92; CHECK-NEXT: [[R:%.*]] = extractelement <8 x i1> [[TMP2]], i64 2 93; CHECK-NEXT: ret i1 [[R]] 94; 95 %e1 = extractelement <8 x i32> %a, i32 7 96 %e2 = extractelement <8 x i32> %a, i32 2 97 %cmp1 = icmp eq i32 %e1, 42 98 %cmp2 = icmp eq i32 %e2, -8 99 %r = add i1 %cmp1, %cmp2 100 ret i1 %r 101} 102 103declare void @use() 104 105define i1 @fcmp_and_v2f64_multiuse(<2 x double> %a) { 106; SSE-LABEL: @fcmp_and_v2f64_multiuse( 107; SSE-NEXT: [[E1:%.*]] = extractelement <2 x double> [[A:%.*]], i32 0 108; SSE-NEXT: call void @use(double [[E1]]) 109; SSE-NEXT: [[E2:%.*]] = extractelement <2 x double> [[A]], i32 1 110; SSE-NEXT: [[CMP1:%.*]] = fcmp olt double [[E1]], 4.200000e+01 111; SSE-NEXT: [[CMP2:%.*]] = fcmp olt double [[E2]], -8.000000e+00 112; SSE-NEXT: [[R:%.*]] = and i1 [[CMP1]], [[CMP2]] 113; SSE-NEXT: call void @use(i1 [[R]]) 114; SSE-NEXT: ret i1 [[R]] 115; 116; AVX-LABEL: @fcmp_and_v2f64_multiuse( 117; AVX-NEXT: [[E1:%.*]] = extractelement <2 x double> [[A:%.*]], i32 0 118; AVX-NEXT: call void @use(double [[E1]]) 119; AVX-NEXT: [[TMP1:%.*]] = fcmp olt <2 x double> [[A]], <double 4.200000e+01, double -8.000000e+00> 120; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <2 x i1> [[TMP1]], <2 x i1> poison, <2 x i32> <i32 1, i32 poison> 121; AVX-NEXT: [[TMP2:%.*]] = and <2 x i1> [[TMP1]], [[SHIFT]] 122; AVX-NEXT: [[R:%.*]] = extractelement <2 x i1> [[TMP2]], i64 0 123; AVX-NEXT: call void @use(i1 [[R]]) 124; AVX-NEXT: ret i1 [[R]] 125; 126 %e1 = extractelement <2 x double> %a, i32 0 127 call void @use(double %e1) 128 %e2 = extractelement <2 x double> %a, i32 1 129 %cmp1 = fcmp olt double %e1, 42.0 130 %cmp2 = fcmp olt double %e2, -8.0 131 %r = and i1 %cmp1, %cmp2 132 call void @use(i1 %r) 133 ret i1 %r 134} 135 136define i1 @icmp_xor_v4i32_multiuse(<4 x i32> %a) { 137; CHECK-LABEL: @icmp_xor_v4i32_multiuse( 138; CHECK-NEXT: [[E2:%.*]] = extractelement <4 x i32> [[A:%.*]], i32 1 139; CHECK-NEXT: call void @use(i32 [[E2]]) 140; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[A]], <i32 poison, i32 -8, i32 poison, i32 42> 141; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> <i32 poison, i32 3, i32 poison, i32 poison> 142; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[SHIFT]], [[TMP1]] 143; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i1> [[TMP2]], i64 1 144; CHECK-NEXT: call void @use(i1 [[R]]) 145; CHECK-NEXT: ret i1 [[R]] 146; 147 %e1 = extractelement <4 x i32> %a, i32 3 148 %e2 = extractelement <4 x i32> %a, i32 1 149 call void @use(i32 %e2) 150 %cmp1 = icmp sgt i32 %e1, 42 151 %cmp2 = icmp sgt i32 %e2, -8 152 %r = xor i1 %cmp1, %cmp2 153 call void @use(i1 %r) 154 ret i1 %r 155} 156 157define i1 @icmp_samesign_xor_v4i32_multiuse(<4 x i32> %a) { 158; CHECK-LABEL: @icmp_samesign_xor_v4i32_multiuse( 159; CHECK-NEXT: [[E2:%.*]] = extractelement <4 x i32> [[A:%.*]], i32 1 160; CHECK-NEXT: call void @use(i32 [[E2]]) 161; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[A]], <i32 poison, i32 -8, i32 poison, i32 42> 162; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> poison, <4 x i32> <i32 poison, i32 3, i32 poison, i32 poison> 163; CHECK-NEXT: [[TMP2:%.*]] = xor <4 x i1> [[SHIFT]], [[TMP1]] 164; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i1> [[TMP2]], i64 1 165; CHECK-NEXT: call void @use(i1 [[R]]) 166; CHECK-NEXT: ret i1 [[R]] 167; 168 %e1 = extractelement <4 x i32> %a, i32 3 169 %e2 = extractelement <4 x i32> %a, i32 1 170 call void @use(i32 %e2) 171 %cmp1 = icmp sgt i32 %e1, 42 172 %cmp2 = icmp samesign ugt i32 %e2, -8 173 %r = xor i1 %cmp1, %cmp2 174 call void @use(i1 %r) 175 ret i1 %r 176} 177 178; Negative test - this could CSE/simplify. 179 180define i1 @same_extract_index(<4 x i32> %a) { 181; CHECK-LABEL: @same_extract_index( 182; CHECK-NEXT: [[E1:%.*]] = extractelement <4 x i32> [[A:%.*]], i32 2 183; CHECK-NEXT: [[E2:%.*]] = extractelement <4 x i32> [[A]], i32 2 184; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt i32 [[E1]], 42 185; CHECK-NEXT: [[CMP2:%.*]] = icmp ugt i32 [[E2]], -8 186; CHECK-NEXT: [[R:%.*]] = and i1 [[CMP1]], [[CMP2]] 187; CHECK-NEXT: ret i1 [[R]] 188; 189 %e1 = extractelement <4 x i32> %a, i32 2 190 %e2 = extractelement <4 x i32> %a, i32 2 191 %cmp1 = icmp ugt i32 %e1, 42 192 %cmp2 = icmp ugt i32 %e2, -8 193 %r = and i1 %cmp1, %cmp2 194 ret i1 %r 195} 196 197; Negative test - need identical predicates. 198 199define i1 @different_preds(<4 x i32> %a) { 200; CHECK-LABEL: @different_preds( 201; CHECK-NEXT: [[E1:%.*]] = extractelement <4 x i32> [[A:%.*]], i32 1 202; CHECK-NEXT: [[E2:%.*]] = extractelement <4 x i32> [[A]], i32 2 203; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[E1]], 42 204; CHECK-NEXT: [[CMP2:%.*]] = icmp ugt i32 [[E2]], -8 205; CHECK-NEXT: [[R:%.*]] = and i1 [[CMP1]], [[CMP2]] 206; CHECK-NEXT: ret i1 [[R]] 207; 208 %e1 = extractelement <4 x i32> %a, i32 1 209 %e2 = extractelement <4 x i32> %a, i32 2 210 %cmp1 = icmp sgt i32 %e1, 42 211 %cmp2 = icmp ugt i32 %e2, -8 212 %r = and i1 %cmp1, %cmp2 213 ret i1 %r 214} 215 216; Negative test with integer and fp predicates. 217 218define i1 @different_preds_typemismtach(<4 x float> %a, <4 x i32> %b) { 219; CHECK-LABEL: @different_preds_typemismtach( 220; CHECK-NEXT: [[E1:%.*]] = extractelement <4 x float> [[A:%.*]], i32 1 221; CHECK-NEXT: [[E2:%.*]] = extractelement <4 x i32> [[B:%.*]], i32 2 222; CHECK-NEXT: [[CMP1:%.*]] = fcmp ogt float [[E1]], 4.200000e+01 223; CHECK-NEXT: [[CMP2:%.*]] = icmp samesign ugt i32 [[E2]], -8 224; CHECK-NEXT: [[R:%.*]] = and i1 [[CMP1]], [[CMP2]] 225; CHECK-NEXT: ret i1 [[R]] 226; 227 %e1 = extractelement <4 x float> %a, i32 1 228 %e2 = extractelement <4 x i32> %b, i32 2 229 %cmp1 = fcmp ogt float %e1, 42.0 230 %cmp2 = icmp samesign ugt i32 %e2, -8 231 %r = and i1 %cmp1, %cmp2 232 ret i1 %r 233} 234 235; Negative test - need 1 source vector. 236 237define i1 @different_source_vec(<4 x i32> %a, <4 x i32> %b) { 238; CHECK-LABEL: @different_source_vec( 239; CHECK-NEXT: [[E1:%.*]] = extractelement <4 x i32> [[A:%.*]], i32 1 240; CHECK-NEXT: [[E2:%.*]] = extractelement <4 x i32> [[B:%.*]], i32 2 241; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[E1]], 42 242; CHECK-NEXT: [[CMP2:%.*]] = icmp sgt i32 [[E2]], -8 243; CHECK-NEXT: [[R:%.*]] = and i1 [[CMP1]], [[CMP2]] 244; CHECK-NEXT: ret i1 [[R]] 245; 246 %e1 = extractelement <4 x i32> %a, i32 1 247 %e2 = extractelement <4 x i32> %b, i32 2 248 %cmp1 = icmp sgt i32 %e1, 42 249 %cmp2 = icmp sgt i32 %e2, -8 250 %r = and i1 %cmp1, %cmp2 251 ret i1 %r 252} 253