1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=SSE2 | FileCheck %s --check-prefixes=CHECK,SSE 3; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=AVX2 | FileCheck %s --check-prefixes=CHECK,AVX 4 5define i1 @cmp_v4i32(<4 x float> %arg, <4 x float> %arg1) { 6; CHECK-LABEL: @cmp_v4i32( 7; CHECK-NEXT: bb: 8; CHECK-NEXT: [[T:%.*]] = bitcast <4 x float> [[ARG:%.*]] to <4 x i32> 9; CHECK-NEXT: [[T3:%.*]] = bitcast <4 x float> [[ARG1:%.*]] to <4 x i32> 10; CHECK-NEXT: [[TMP0:%.*]] = icmp eq <4 x i32> [[T]], [[T3]] 11; CHECK-NEXT: [[T5:%.*]] = extractelement <4 x i1> [[TMP0]], i32 0 12; CHECK-NEXT: br i1 [[T5]], label [[BB6:%.*]], label [[BB18:%.*]] 13; CHECK: bb6: 14; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i32> [[T]], [[T3]] 15; CHECK-NEXT: [[T9:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1 16; CHECK-NEXT: br i1 [[T9]], label [[BB10:%.*]], label [[BB18]] 17; CHECK: bb10: 18; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[T]], [[T3]] 19; CHECK-NEXT: [[T13:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2 20; CHECK-NEXT: br i1 [[T13]], label [[BB14:%.*]], label [[BB18]] 21; CHECK: bb14: 22; CHECK-NEXT: [[TMP3:%.*]] = icmp eq <4 x i32> [[T]], [[T3]] 23; CHECK-NEXT: [[T17:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3 24; CHECK-NEXT: br label [[BB18]] 25; CHECK: bb18: 26; CHECK-NEXT: [[T19:%.*]] = phi i1 [ false, [[BB10]] ], [ false, [[BB6]] ], [ false, [[BB:%.*]] ], [ [[T17]], [[BB14]] ] 27; CHECK-NEXT: ret i1 [[T19]] 28; 29bb: 30 %t = bitcast <4 x float> %arg to <4 x i32> 31 %t2 = extractelement <4 x i32> %t, i32 0 32 %t3 = bitcast <4 x float> %arg1 to <4 x i32> 33 %t4 = extractelement <4 x i32> %t3, i32 0 34 %t5 = icmp eq i32 %t2, %t4 35 br i1 %t5, label %bb6, label %bb18 36 37bb6: 38 %t7 = extractelement <4 x i32> %t, i32 1 39 %t8 = extractelement <4 x i32> %t3, i32 1 40 %t9 = icmp eq i32 %t7, %t8 41 br i1 %t9, label %bb10, label %bb18 42 43bb10: 44 %t11 = extractelement <4 x i32> %t, i32 2 45 %t12 = extractelement <4 x i32> %t3, i32 2 46 %t13 = icmp eq i32 %t11, %t12 47 br i1 %t13, label %bb14, label %bb18 48 49bb14: 50 %t15 = extractelement <4 x i32> %t, i32 3 51 %t16 = extractelement <4 x i32> %t3, i32 3 52 %t17 = icmp eq i32 %t15, %t16 53 br label %bb18 54 55bb18: 56 %t19 = phi i1 [ false, %bb10 ], [ false, %bb6 ], [ false, %bb ], [ %t17, %bb14 ] 57 ret i1 %t19 58} 59 60define i32 @cmp_v2f64(<2 x double> %x, <2 x double> %y, <2 x double> %z) { 61; SSE-LABEL: @cmp_v2f64( 62; SSE-NEXT: entry: 63; SSE-NEXT: [[X1:%.*]] = extractelement <2 x double> [[X:%.*]], i32 1 64; SSE-NEXT: [[Y1:%.*]] = extractelement <2 x double> [[Y:%.*]], i32 1 65; SSE-NEXT: [[CMP1:%.*]] = fcmp oeq double [[X1]], [[Y1]] 66; SSE-NEXT: br i1 [[CMP1]], label [[T:%.*]], label [[F:%.*]] 67; SSE: t: 68; SSE-NEXT: [[Z1:%.*]] = extractelement <2 x double> [[Z:%.*]], i32 1 69; SSE-NEXT: [[CMP2:%.*]] = fcmp ogt double [[Y1]], [[Z1]] 70; SSE-NEXT: [[E:%.*]] = select i1 [[CMP2]], i32 42, i32 99 71; SSE-NEXT: ret i32 [[E]] 72; SSE: f: 73; SSE-NEXT: ret i32 0 74; 75; AVX-LABEL: @cmp_v2f64( 76; AVX-NEXT: entry: 77; AVX-NEXT: [[TMP0:%.*]] = fcmp oeq <2 x double> [[X:%.*]], [[Y:%.*]] 78; AVX-NEXT: [[CMP1:%.*]] = extractelement <2 x i1> [[TMP0]], i32 1 79; AVX-NEXT: br i1 [[CMP1]], label [[T:%.*]], label [[F:%.*]] 80; AVX: t: 81; AVX-NEXT: [[TMP1:%.*]] = fcmp ogt <2 x double> [[Y]], [[Z:%.*]] 82; AVX-NEXT: [[CMP2:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1 83; AVX-NEXT: [[E:%.*]] = select i1 [[CMP2]], i32 42, i32 99 84; AVX-NEXT: ret i32 [[E]] 85; AVX: f: 86; AVX-NEXT: ret i32 0 87; 88entry: 89 %x1 = extractelement <2 x double> %x, i32 1 90 %y1 = extractelement <2 x double> %y, i32 1 91 %cmp1 = fcmp oeq double %x1, %y1 92 br i1 %cmp1, label %t, label %f 93 94t: 95 %z1 = extractelement <2 x double> %z, i32 1 96 %cmp2 = fcmp ogt double %y1, %z1 97 %e = select i1 %cmp2, i32 42, i32 99 98 ret i32 %e 99 100f: 101 ret i32 0 102} 103 104define i1 @cmp01_v2f64(<2 x double> %x, <2 x double> %y) { 105; SSE-LABEL: @cmp01_v2f64( 106; SSE-NEXT: [[X0:%.*]] = extractelement <2 x double> [[X:%.*]], i32 0 107; SSE-NEXT: [[Y1:%.*]] = extractelement <2 x double> [[Y:%.*]], i32 1 108; SSE-NEXT: [[CMP:%.*]] = fcmp oge double [[X0]], [[Y1]] 109; SSE-NEXT: ret i1 [[CMP]] 110; 111; AVX-LABEL: @cmp01_v2f64( 112; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <2 x double> [[Y:%.*]], <2 x double> poison, <2 x i32> <i32 1, i32 poison> 113; AVX-NEXT: [[TMP1:%.*]] = fcmp oge <2 x double> [[X:%.*]], [[SHIFT]] 114; AVX-NEXT: [[CMP:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0 115; AVX-NEXT: ret i1 [[CMP]] 116; 117 %x0 = extractelement <2 x double> %x, i32 0 118 %y1 = extractelement <2 x double> %y, i32 1 119 %cmp = fcmp oge double %x0, %y1 120 ret i1 %cmp 121} 122 123define i1 @cmp10_v2f64(<2 x double> %x, <2 x double> %y) { 124; SSE-LABEL: @cmp10_v2f64( 125; SSE-NEXT: [[X1:%.*]] = extractelement <2 x double> [[X:%.*]], i32 1 126; SSE-NEXT: [[Y0:%.*]] = extractelement <2 x double> [[Y:%.*]], i32 0 127; SSE-NEXT: [[CMP:%.*]] = fcmp ule double [[X1]], [[Y0]] 128; SSE-NEXT: ret i1 [[CMP]] 129; 130; AVX-LABEL: @cmp10_v2f64( 131; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <2 x double> [[X:%.*]], <2 x double> poison, <2 x i32> <i32 1, i32 poison> 132; AVX-NEXT: [[TMP1:%.*]] = fcmp ule <2 x double> [[SHIFT]], [[Y:%.*]] 133; AVX-NEXT: [[CMP:%.*]] = extractelement <2 x i1> [[TMP1]], i64 0 134; AVX-NEXT: ret i1 [[CMP]] 135; 136 %x1 = extractelement <2 x double> %x, i32 1 137 %y0 = extractelement <2 x double> %y, i32 0 138 %cmp = fcmp ule double %x1, %y0 139 ret i1 %cmp 140} 141 142define i1 @cmp12_v4i32(<4 x i32> %x, <4 x i32> %y) { 143; CHECK-LABEL: @cmp12_v4i32( 144; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> poison, <4 x i32> <i32 poison, i32 2, i32 poison, i32 poison> 145; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> [[X:%.*]], [[SHIFT]] 146; CHECK-NEXT: [[CMP:%.*]] = extractelement <4 x i1> [[TMP1]], i32 1 147; CHECK-NEXT: ret i1 [[CMP]] 148; 149 %x1 = extractelement <4 x i32> %x, i32 1 150 %y2 = extractelement <4 x i32> %y, i32 2 151 %cmp = icmp sgt i32 %x1, %y2 152 ret i1 %cmp 153} 154 155define <4 x i1> @ins_fcmp_ext_ext(<4 x float> %a, <4 x i1> %b) { 156; SSE-LABEL: @ins_fcmp_ext_ext( 157; SSE-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A:%.*]], i32 1 158; SSE-NEXT: [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2 159; SSE-NEXT: [[A21:%.*]] = fcmp ugt float [[A2]], [[A1]] 160; SSE-NEXT: [[R:%.*]] = insertelement <4 x i1> [[B:%.*]], i1 [[A21]], i32 2 161; SSE-NEXT: ret <4 x i1> [[R]] 162; 163; AVX-LABEL: @ins_fcmp_ext_ext( 164; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 1, i32 poison> 165; AVX-NEXT: [[TMP1:%.*]] = fcmp ugt <4 x float> [[A]], [[SHIFT]] 166; AVX-NEXT: [[R:%.*]] = shufflevector <4 x i1> [[B:%.*]], <4 x i1> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 6, i32 3> 167; AVX-NEXT: ret <4 x i1> [[R]] 168; 169 %a1 = extractelement <4 x float> %a, i32 1 170 %a2 = extractelement <4 x float> %a, i32 2 171 %a21 = fcmp ugt float %a2, %a1 172 %r = insertelement <4 x i1> %b, i1 %a21, i32 2 173 ret <4 x i1> %r 174} 175 176define <4 x i1> @ins_icmp_ext_ext(<4 x i32> %a, <4 x i1> %b) { 177; CHECK-LABEL: @ins_icmp_ext_ext( 178; CHECK-NEXT: [[A3:%.*]] = extractelement <4 x i32> [[A:%.*]], i32 3 179; CHECK-NEXT: [[A2:%.*]] = extractelement <4 x i32> [[A]], i32 2 180; CHECK-NEXT: [[A23:%.*]] = icmp ule i32 [[A2]], [[A3]] 181; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i1> [[B:%.*]], i1 [[A23]], i32 3 182; CHECK-NEXT: ret <4 x i1> [[R]] 183; 184 %a3 = extractelement <4 x i32> %a, i32 3 185 %a2 = extractelement <4 x i32> %a, i32 2 186 %a23 = icmp ule i32 %a2, %a3 187 %r = insertelement <4 x i1> %b, i1 %a23, i32 3 188 ret <4 x i1> %r 189} 190