1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s 3; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s 4 5declare void @use(<4 x i32>) 6declare void @usef(<4 x float>) 7 8; Eliminating an insert is profitable. 9 10define <16 x i1> @ins0_ins0_i8(i8 %x, i8 %y) { 11; CHECK-LABEL: @ins0_ins0_i8( 12; CHECK-NEXT: [[R_SCALAR:%.*]] = icmp eq i8 [[X:%.*]], [[Y:%.*]] 13; CHECK-NEXT: [[R:%.*]] = insertelement <16 x i1> undef, i1 [[R_SCALAR]], i64 0 14; CHECK-NEXT: ret <16 x i1> [[R]] 15; 16 %i0 = insertelement <16 x i8> undef, i8 %x, i32 0 17 %i1 = insertelement <16 x i8> undef, i8 %y, i32 0 18 %r = icmp eq <16 x i8> %i0, %i1 19 ret <16 x i1> %r 20} 21 22; Eliminating an insert is still profitable. Mismatch types on index is ok. 23 24define <8 x i1> @ins5_ins5_i16(i16 %x, i16 %y) { 25; CHECK-LABEL: @ins5_ins5_i16( 26; CHECK-NEXT: [[R_SCALAR:%.*]] = icmp sgt i16 [[X:%.*]], [[Y:%.*]] 27; CHECK-NEXT: [[R:%.*]] = insertelement <8 x i1> undef, i1 [[R_SCALAR]], i64 5 28; CHECK-NEXT: ret <8 x i1> [[R]] 29; 30 %i0 = insertelement <8 x i16> undef, i16 %x, i8 5 31 %i1 = insertelement <8 x i16> undef, i16 %y, i32 5 32 %r = icmp sgt <8 x i16> %i0, %i1 33 ret <8 x i1> %r 34} 35 36; The new vector constant is calculated by constant folding. 37 38define <2 x i1> @ins1_ins1_i64(i64 %x, i64 %y) { 39; CHECK-LABEL: @ins1_ins1_i64( 40; CHECK-NEXT: [[R_SCALAR:%.*]] = icmp sle i64 [[X:%.*]], [[Y:%.*]] 41; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i1> <i1 true, i1 false>, i1 [[R_SCALAR]], i64 1 42; CHECK-NEXT: ret <2 x i1> [[R]] 43; 44 %i0 = insertelement <2 x i64> zeroinitializer, i64 %x, i64 1 45 %i1 = insertelement <2 x i64> <i64 1, i64 -1>, i64 %y, i32 1 46 %r = icmp sle <2 x i64> %i0, %i1 47 ret <2 x i1> %r 48} 49 50; The inserts are free, but it's still better to scalarize. 51 52define <2 x i1> @ins0_ins0_f64(double %x, double %y) { 53; CHECK-LABEL: @ins0_ins0_f64( 54; CHECK-NEXT: [[R_SCALAR:%.*]] = fcmp nnan ninf uge double [[X:%.*]], [[Y:%.*]] 55; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i1> splat (i1 true), i1 [[R_SCALAR]], i64 0 56; CHECK-NEXT: ret <2 x i1> [[R]] 57; 58 %i0 = insertelement <2 x double> undef, double %x, i32 0 59 %i1 = insertelement <2 x double> undef, double %y, i32 0 60 %r = fcmp nnan ninf uge <2 x double> %i0, %i1 61 ret <2 x i1> %r 62} 63 64; Negative test - mismatched indexes (but could fold this). 65 66define <16 x i1> @ins1_ins0_i8(i8 %x, i8 %y) { 67; CHECK-LABEL: @ins1_ins0_i8( 68; CHECK-NEXT: [[I0:%.*]] = insertelement <16 x i8> undef, i8 [[X:%.*]], i32 1 69; CHECK-NEXT: [[I1:%.*]] = insertelement <16 x i8> undef, i8 [[Y:%.*]], i32 0 70; CHECK-NEXT: [[R:%.*]] = icmp sle <16 x i8> [[I0]], [[I1]] 71; CHECK-NEXT: ret <16 x i1> [[R]] 72; 73 %i0 = insertelement <16 x i8> undef, i8 %x, i32 1 74 %i1 = insertelement <16 x i8> undef, i8 %y, i32 0 75 %r = icmp sle <16 x i8> %i0, %i1 76 ret <16 x i1> %r 77} 78 79; Base vector does not have to be undef. 80 81define <4 x i1> @ins0_ins0_i32(i32 %x, i32 %y) { 82; CHECK-LABEL: @ins0_ins0_i32( 83; CHECK-NEXT: [[R_SCALAR:%.*]] = icmp ne i32 [[X:%.*]], [[Y:%.*]] 84; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i1> undef, i1 [[R_SCALAR]], i64 0 85; CHECK-NEXT: ret <4 x i1> [[R]] 86; 87 %i0 = insertelement <4 x i32> zeroinitializer, i32 %x, i32 0 88 %i1 = insertelement <4 x i32> undef, i32 %y, i32 0 89 %r = icmp ne <4 x i32> %i0, %i1 90 ret <4 x i1> %r 91} 92 93; Extra use is accounted for in cost calculation. 94 95define <4 x i1> @ins0_ins0_i32_use(i32 %x, i32 %y) { 96; CHECK-LABEL: @ins0_ins0_i32_use( 97; CHECK-NEXT: [[I0:%.*]] = insertelement <4 x i32> undef, i32 [[X:%.*]], i32 0 98; CHECK-NEXT: call void @use(<4 x i32> [[I0]]) 99; CHECK-NEXT: [[R_SCALAR:%.*]] = icmp ugt i32 [[X]], [[Y:%.*]] 100; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i1> undef, i1 [[R_SCALAR]], i64 0 101; CHECK-NEXT: ret <4 x i1> [[R]] 102; 103 %i0 = insertelement <4 x i32> undef, i32 %x, i32 0 104 call void @use(<4 x i32> %i0) 105 %i1 = insertelement <4 x i32> undef, i32 %y, i32 0 106 %r = icmp ugt <4 x i32> %i0, %i1 107 ret <4 x i1> %r 108} 109 110; Extra use is accounted for in cost calculation. 111 112define <4 x i1> @ins1_ins1_f32_use(float %x, float %y) { 113; CHECK-LABEL: @ins1_ins1_f32_use( 114; CHECK-NEXT: [[I1:%.*]] = insertelement <4 x float> undef, float [[Y:%.*]], i32 1 115; CHECK-NEXT: call void @usef(<4 x float> [[I1]]) 116; CHECK-NEXT: [[R_SCALAR:%.*]] = fcmp ogt float [[X:%.*]], [[Y]] 117; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i1> zeroinitializer, i1 [[R_SCALAR]], i64 1 118; CHECK-NEXT: ret <4 x i1> [[R]] 119; 120 %i0 = insertelement <4 x float> undef, float %x, i32 1 121 %i1 = insertelement <4 x float> undef, float %y, i32 1 122 call void @usef(<4 x float> %i1) 123 %r = fcmp ogt <4 x float> %i0, %i1 124 ret <4 x i1> %r 125} 126 127; If the scalar cmp is not cheaper than the vector cmp, extra uses can prevent the transform. 128 129define <4 x i1> @ins2_ins2_f32_uses(float %x, float %y) { 130; CHECK-LABEL: @ins2_ins2_f32_uses( 131; CHECK-NEXT: [[I0:%.*]] = insertelement <4 x float> undef, float [[X:%.*]], i32 2 132; CHECK-NEXT: call void @usef(<4 x float> [[I0]]) 133; CHECK-NEXT: [[I1:%.*]] = insertelement <4 x float> undef, float [[Y:%.*]], i32 2 134; CHECK-NEXT: call void @usef(<4 x float> [[I1]]) 135; CHECK-NEXT: [[R:%.*]] = fcmp oeq <4 x float> [[I0]], [[I1]] 136; CHECK-NEXT: ret <4 x i1> [[R]] 137; 138 %i0 = insertelement <4 x float> undef, float %x, i32 2 139 call void @usef(<4 x float> %i0) 140 %i1 = insertelement <4 x float> undef, float %y, i32 2 141 call void @usef(<4 x float> %i1) 142 %r = fcmp oeq <4 x float> %i0, %i1 143 ret <4 x i1> %r 144} 145 146define <2 x i1> @constant_op1_i64(i64 %x) { 147; CHECK-LABEL: @constant_op1_i64( 148; CHECK-NEXT: [[R_SCALAR:%.*]] = icmp ne i64 [[X:%.*]], 42 149; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i1> undef, i1 [[R_SCALAR]], i64 0 150; CHECK-NEXT: ret <2 x i1> [[R]] 151; 152 %ins = insertelement <2 x i64> undef, i64 %x, i32 0 153 %r = icmp ne <2 x i64> %ins, <i64 42, i64 undef> 154 ret <2 x i1> %r 155} 156 157define <2 x i1> @constant_op1_i64_not_undef_lane(i64 %x) { 158; CHECK-LABEL: @constant_op1_i64_not_undef_lane( 159; CHECK-NEXT: [[R_SCALAR:%.*]] = icmp sge i64 [[X:%.*]], 42 160; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i1> splat (i1 true), i1 [[R_SCALAR]], i64 0 161; CHECK-NEXT: ret <2 x i1> [[R]] 162; 163 %ins = insertelement <2 x i64> undef, i64 %x, i32 0 164 %r = icmp sge <2 x i64> %ins, <i64 42, i64 -42> 165 ret <2 x i1> %r 166} 167 168; negative test - load prevents the transform 169 170define <2 x i1> @constant_op1_i64_load(ptr %p) { 171; CHECK-LABEL: @constant_op1_i64_load( 172; CHECK-NEXT: [[LD:%.*]] = load i64, ptr [[P:%.*]], align 8 173; CHECK-NEXT: [[INS:%.*]] = insertelement <2 x i64> undef, i64 [[LD]], i32 0 174; CHECK-NEXT: [[R:%.*]] = icmp eq <2 x i64> [[INS]], <i64 42, i64 -42> 175; CHECK-NEXT: ret <2 x i1> [[R]] 176; 177 %ld = load i64, ptr %p 178 %ins = insertelement <2 x i64> undef, i64 %ld, i32 0 179 %r = icmp eq <2 x i64> %ins, <i64 42, i64 -42> 180 ret <2 x i1> %r 181} 182 183define <4 x i1> @constant_op0_i32(i32 %x) { 184; CHECK-LABEL: @constant_op0_i32( 185; CHECK-NEXT: [[R_SCALAR:%.*]] = icmp ult i32 -42, [[X:%.*]] 186; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i1> zeroinitializer, i1 [[R_SCALAR]], i64 1 187; CHECK-NEXT: ret <4 x i1> [[R]] 188; 189 %ins = insertelement <4 x i32> undef, i32 %x, i32 1 190 %r = icmp ult <4 x i32> <i32 undef, i32 -42, i32 undef, i32 undef>, %ins 191 ret <4 x i1> %r 192} 193 194define <4 x i1> @constant_op0_i32_not_undef_lane(i32 %x) { 195; CHECK-LABEL: @constant_op0_i32_not_undef_lane( 196; CHECK-NEXT: [[R_SCALAR:%.*]] = icmp ule i32 42, [[X:%.*]] 197; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i1> splat (i1 true), i1 [[R_SCALAR]], i64 1 198; CHECK-NEXT: ret <4 x i1> [[R]] 199; 200 %ins = insertelement <4 x i32> undef, i32 %x, i32 1 201 %r = icmp ule <4 x i32> <i32 1, i32 42, i32 42, i32 -42>, %ins 202 ret <4 x i1> %r 203} 204 205define <2 x i1> @constant_op0_f64(double %x) { 206; CHECK-LABEL: @constant_op0_f64( 207; CHECK-NEXT: [[R_SCALAR:%.*]] = fcmp fast olt double 4.200000e+01, [[X:%.*]] 208; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i1> zeroinitializer, i1 [[R_SCALAR]], i64 0 209; CHECK-NEXT: ret <2 x i1> [[R]] 210; 211 %ins = insertelement <2 x double> undef, double %x, i32 0 212 %r = fcmp fast olt <2 x double> <double 42.0, double undef>, %ins 213 ret <2 x i1> %r 214} 215 216define <2 x i1> @constant_op0_f64_not_undef_lane(double %x) { 217; CHECK-LABEL: @constant_op0_f64_not_undef_lane( 218; CHECK-NEXT: [[R_SCALAR:%.*]] = fcmp nnan ueq double -4.200000e+01, [[X:%.*]] 219; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i1> splat (i1 true), i1 [[R_SCALAR]], i64 1 220; CHECK-NEXT: ret <2 x i1> [[R]] 221; 222 %ins = insertelement <2 x double> undef, double %x, i32 1 223 %r = fcmp nnan ueq <2 x double> <double 42.0, double -42.0>, %ins 224 ret <2 x i1> %r 225} 226 227define <2 x i1> @constant_op1_f64(double %x) { 228; CHECK-LABEL: @constant_op1_f64( 229; CHECK-NEXT: [[R_SCALAR:%.*]] = fcmp one double [[X:%.*]], 4.200000e+01 230; CHECK-NEXT: [[R:%.*]] = insertelement <2 x i1> zeroinitializer, i1 [[R_SCALAR]], i64 1 231; CHECK-NEXT: ret <2 x i1> [[R]] 232; 233 %ins = insertelement <2 x double> undef, double %x, i32 1 234 %r = fcmp one <2 x double> %ins, <double undef, double 42.0> 235 ret <2 x i1> %r 236} 237 238define <4 x i1> @constant_op1_f32_not_undef_lane(float %x) { 239; CHECK-LABEL: @constant_op1_f32_not_undef_lane( 240; CHECK-NEXT: [[R_SCALAR:%.*]] = fcmp uge float [[X:%.*]], 4.200000e+01 241; CHECK-NEXT: [[R:%.*]] = insertelement <4 x i1> splat (i1 true), i1 [[R_SCALAR]], i64 0 242; CHECK-NEXT: ret <4 x i1> [[R]] 243; 244 %ins = insertelement <4 x float> undef, float %x, i32 0 245 %r = fcmp uge <4 x float> %ins, <float 42.0, float -42.0, float 0.0, float 1.0> 246 ret <4 x i1> %r 247} 248 249; negative test - select prevents the transform 250 251define <4 x float> @vec_select_use1(<4 x float> %x, <4 x float> %y, i32 %a, i32 %b) { 252; CHECK-LABEL: @vec_select_use1( 253; CHECK-NEXT: [[VECA:%.*]] = insertelement <4 x i32> undef, i32 [[A:%.*]], i8 0 254; CHECK-NEXT: [[VECB:%.*]] = insertelement <4 x i32> undef, i32 [[B:%.*]], i8 0 255; CHECK-NEXT: [[COND:%.*]] = icmp eq <4 x i32> [[VECA]], [[VECB]] 256; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[COND]], <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]] 257; CHECK-NEXT: ret <4 x float> [[R]] 258; 259 %veca = insertelement <4 x i32> undef, i32 %a, i8 0 260 %vecb = insertelement <4 x i32> undef, i32 %b, i8 0 261 %cond = icmp eq <4 x i32> %veca, %vecb 262 %r = select <4 x i1> %cond, <4 x float> %x, <4 x float> %y 263 ret <4 x float> %r 264} 265 266; negative test - select prevents the transform 267 268define <4 x float> @vec_select_use2(<4 x float> %x, <4 x float> %y, float %a) { 269; CHECK-LABEL: @vec_select_use2( 270; CHECK-NEXT: [[VECA:%.*]] = insertelement <4 x float> undef, float [[A:%.*]], i8 0 271; CHECK-NEXT: [[COND:%.*]] = fcmp oeq <4 x float> [[VECA]], zeroinitializer 272; CHECK-NEXT: [[R:%.*]] = select <4 x i1> [[COND]], <4 x float> [[X:%.*]], <4 x float> [[Y:%.*]] 273; CHECK-NEXT: ret <4 x float> [[R]] 274; 275 %veca = insertelement <4 x float> undef, float %a, i8 0 276 %cond = fcmp oeq <4 x float> %veca, zeroinitializer 277 %r = select <4 x i1> %cond, <4 x float> %x, <4 x float> %y 278 ret <4 x float> %r 279} 280 281define <4 x i1> @vector_of_pointers(ptr %t1) { 282; CHECK-LABEL: @vector_of_pointers( 283; CHECK-NEXT: [[T6_SCALAR:%.*]] = icmp ne ptr [[T1:%.*]], null 284; CHECK-NEXT: [[T6:%.*]] = insertelement <4 x i1> undef, i1 [[T6_SCALAR]], i64 0 285; CHECK-NEXT: ret <4 x i1> [[T6]] 286; 287 %t5 = insertelement <4 x ptr> undef, ptr %t1, i32 0 288 %t6 = icmp ne <4 x ptr> %t5, zeroinitializer 289 ret <4 x i1> %t6 290} 291