1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -passes=instcombine -S < %s | FileCheck %s 3 4define i32 @extract_load(ptr %p) { 5; 6; CHECK-LABEL: @extract_load( 7; CHECK-NEXT: [[X:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 4 8; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x i32> [[X]], i64 1 9; CHECK-NEXT: ret i32 [[EXT]] 10; 11 %x = load <4 x i32>, ptr %p, align 4 12 %ext = extractelement <4 x i32> %x, i32 1 13 ret i32 %ext 14} 15 16define double @extract_load_fp(ptr %p) { 17; 18; CHECK-LABEL: @extract_load_fp( 19; CHECK-NEXT: [[X:%.*]] = load <4 x double>, ptr [[P:%.*]], align 32 20; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[X]], i64 3 21; CHECK-NEXT: ret double [[EXT]] 22; 23 %x = load <4 x double>, ptr %p, align 32 24 %ext = extractelement <4 x double> %x, i32 3 25 ret double %ext 26} 27 28define double @extract_load_volatile(ptr %p) { 29; 30; CHECK-LABEL: @extract_load_volatile( 31; CHECK-NEXT: [[X:%.*]] = load volatile <4 x double>, ptr [[P:%.*]], align 32 32; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[X]], i64 2 33; CHECK-NEXT: ret double [[EXT]] 34; 35 %x = load volatile <4 x double>, ptr %p 36 %ext = extractelement <4 x double> %x, i32 2 37 ret double %ext 38} 39 40define double @extract_load_extra_use(ptr %p, ptr %p2) { 41; 42; CHECK-LABEL: @extract_load_extra_use( 43; CHECK-NEXT: [[X:%.*]] = load <4 x double>, ptr [[P:%.*]], align 8 44; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[X]], i64 0 45; CHECK-NEXT: store <4 x double> [[X]], ptr [[P2:%.*]], align 32 46; CHECK-NEXT: ret double [[EXT]] 47; 48 %x = load <4 x double>, ptr %p, align 8 49 %ext = extractelement <4 x double> %x, i32 0 50 store <4 x double> %x, ptr %p2 51 ret double %ext 52} 53 54define double @extract_load_variable_index(ptr %p, i32 %y) { 55; 56; CHECK-LABEL: @extract_load_variable_index( 57; CHECK-NEXT: [[X:%.*]] = load <4 x double>, ptr [[P:%.*]], align 32 58; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[X]], i32 [[Y:%.*]] 59; CHECK-NEXT: ret double [[EXT]] 60; 61 %x = load <4 x double>, ptr %p 62 %ext = extractelement <4 x double> %x, i32 %y 63 ret double %ext 64} 65 66define void @scalarize_phi(ptr %n, ptr %inout) { 67; 68; CHECK-LABEL: @scalarize_phi( 69; CHECK-NEXT: entry: 70; CHECK-NEXT: [[T0:%.*]] = load volatile float, ptr [[INOUT:%.*]], align 4 71; CHECK-NEXT: br label [[FOR_COND:%.*]] 72; CHECK: for.cond: 73; CHECK-NEXT: [[TMP0:%.*]] = phi float [ [[T0]], [[ENTRY:%.*]] ], [ [[TMP1:%.*]], [[FOR_BODY:%.*]] ] 74; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_BODY]] ] 75; CHECK-NEXT: [[T1:%.*]] = load i32, ptr [[N:%.*]], align 4 76; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[I_0]], [[T1]] 77; CHECK-NEXT: br i1 [[CMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] 78; CHECK: for.body: 79; CHECK-NEXT: store volatile float [[TMP0]], ptr [[INOUT]], align 4 80; CHECK-NEXT: [[TMP1]] = fmul float [[TMP0]], 0x4002A3D700000000 81; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1 82; CHECK-NEXT: br label [[FOR_COND]] 83; CHECK: for.end: 84; CHECK-NEXT: ret void 85; 86entry: 87 %t0 = load volatile float, ptr %inout, align 4 88 %insert = insertelement <4 x float> poison, float %t0, i32 0 89 %splat = shufflevector <4 x float> %insert, <4 x float> poison, <4 x i32> zeroinitializer 90 %insert1 = insertelement <4 x float> poison, float 3.0, i32 0 91 br label %for.cond 92 93for.cond: 94 %x.0 = phi <4 x float> [ %splat, %entry ], [ %mul, %for.body ] 95 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 96 %t1 = load i32, ptr %n, align 4 97 %cmp = icmp ne i32 %i.0, %t1 98 br i1 %cmp, label %for.body, label %for.end 99 100for.body: 101 %t2 = extractelement <4 x float> %x.0, i32 1 102 store volatile float %t2, ptr %inout, align 4 103 %mul = fmul <4 x float> %x.0, <float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000> 104 %inc = add nsw i32 %i.0, 1 105 br label %for.cond 106 107for.end: 108 ret void 109} 110 111define float @extract_element_binop_splat_constant_index(<4 x float> %x) { 112; 113; CHECK-LABEL: @extract_element_binop_splat_constant_index( 114; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[X:%.*]], i64 2 115; CHECK-NEXT: [[R:%.*]] = fadd float [[TMP1]], 0x4002A3D700000000 116; CHECK-NEXT: ret float [[R]] 117; 118 %b = fadd <4 x float> %x, <float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000> 119 %r = extractelement <4 x float> %b, i32 2 120 ret float %r 121} 122 123define double @extract_element_binop_splat_with_undef_constant_index(<2 x double> %x) { 124; 125; CHECK-LABEL: @extract_element_binop_splat_with_undef_constant_index( 126; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[X:%.*]], i64 0 127; CHECK-NEXT: [[R:%.*]] = fdiv double 4.200000e+01, [[TMP1]] 128; CHECK-NEXT: ret double [[R]] 129; 130 %b = fdiv <2 x double> <double 42.0, double undef>, %x 131 %r = extractelement <2 x double> %b, i32 0 132 ret double %r 133} 134 135define float @extract_element_binop_nonsplat_constant_index(<2 x float> %x) { 136; 137; CHECK-LABEL: @extract_element_binop_nonsplat_constant_index( 138; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i64 1 139; CHECK-NEXT: [[R:%.*]] = fmul float [[TMP1]], 4.300000e+01 140; CHECK-NEXT: ret float [[R]] 141; 142 %b = fmul <2 x float> %x, <float 42.0, float 43.0> 143 %r = extractelement <2 x float> %b, i32 1 144 ret float %r 145} 146 147define i8 @extract_element_binop_splat_variable_index(<4 x i8> %x, i32 %y) { 148; 149; CHECK-LABEL: @extract_element_binop_splat_variable_index( 150; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i8> [[X:%.*]], i32 [[Y:%.*]] 151; CHECK-NEXT: [[R:%.*]] = sdiv i8 [[TMP1]], 42 152; CHECK-NEXT: ret i8 [[R]] 153; 154 %b = sdiv <4 x i8> %x, <i8 42, i8 42, i8 42, i8 42> 155 %r = extractelement <4 x i8> %b, i32 %y 156 ret i8 %r 157} 158 159define i8 @extract_element_binop_splat_with_undef_variable_index(<4 x i8> %x, i32 %y) { 160; 161; CHECK-LABEL: @extract_element_binop_splat_with_undef_variable_index( 162; CHECK-NEXT: [[B:%.*]] = mul <4 x i8> [[X:%.*]], <i8 42, i8 42, i8 undef, i8 42> 163; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i8> [[B]], i32 [[Y:%.*]] 164; CHECK-NEXT: ret i8 [[R]] 165; 166 %b = mul <4 x i8> %x, <i8 42, i8 42, i8 undef, i8 42> 167 %r = extractelement <4 x i8> %b, i32 %y 168 ret i8 %r 169} 170 171define i8 @extract_element_binop_nonsplat_variable_index(<4 x i8> %x, i32 %y) { 172; 173; CHECK-LABEL: @extract_element_binop_nonsplat_variable_index( 174; CHECK-NEXT: [[B:%.*]] = lshr <4 x i8> [[X:%.*]], <i8 4, i8 3, i8 undef, i8 2> 175; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i8> [[B]], i32 [[Y:%.*]] 176; CHECK-NEXT: ret i8 [[R]] 177; 178 %b = lshr <4 x i8> %x, <i8 4, i8 3, i8 undef, i8 2> 179 %r = extractelement <4 x i8> %b, i32 %y 180 ret i8 %r 181} 182 183define float @extract_element_load(<4 x float> %x, ptr %ptr) { 184; 185; CHECK-LABEL: @extract_element_load( 186; CHECK-NEXT: [[LOAD:%.*]] = load <4 x float>, ptr [[PTR:%.*]], align 16 187; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[X:%.*]], i64 2 188; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[LOAD]], i64 2 189; CHECK-NEXT: [[R:%.*]] = fadd float [[TMP1]], [[TMP2]] 190; CHECK-NEXT: ret float [[R]] 191; 192 %load = load <4 x float>, ptr %ptr 193 %add = fadd <4 x float> %x, %load 194 %r = extractelement <4 x float> %add, i32 2 195 ret float %r 196} 197 198define float @extract_element_multi_Use_load(<4 x float> %x, ptr %ptr0, ptr %ptr1) { 199; 200; CHECK-LABEL: @extract_element_multi_Use_load( 201; CHECK-NEXT: [[LOAD:%.*]] = load <4 x float>, ptr [[PTR0:%.*]], align 16 202; CHECK-NEXT: store <4 x float> [[LOAD]], ptr [[PTR1:%.*]], align 16 203; CHECK-NEXT: [[ADD:%.*]] = fadd <4 x float> [[X:%.*]], [[LOAD]] 204; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[ADD]], i64 2 205; CHECK-NEXT: ret float [[R]] 206; 207 %load = load <4 x float>, ptr %ptr0 208 store <4 x float> %load, ptr %ptr1 209 %add = fadd <4 x float> %x, %load 210 %r = extractelement <4 x float> %add, i32 2 211 ret float %r 212} 213 214define float @extract_element_variable_index(<4 x float> %x, i32 %y) { 215; 216; CHECK-LABEL: @extract_element_variable_index( 217; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[X:%.*]], i32 [[Y:%.*]] 218; CHECK-NEXT: [[R:%.*]] = fadd float [[TMP1]], 1.000000e+00 219; CHECK-NEXT: ret float [[R]] 220; 221 %add = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> 222 %r = extractelement <4 x float> %add, i32 %y 223 ret float %r 224} 225 226define float @extelt_binop_insertelt(<4 x float> %A, <4 x float> %B, float %f) { 227; 228; CHECK-LABEL: @extelt_binop_insertelt( 229; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 230; CHECK-NEXT: [[E:%.*]] = fmul nnan float [[F:%.*]], [[TMP1]] 231; CHECK-NEXT: ret float [[E]] 232; 233 %C = insertelement <4 x float> %A, float %f, i32 0 234 %D = fmul nnan <4 x float> %C, %B 235 %E = extractelement <4 x float> %D, i32 0 236 ret float %E 237} 238 239; We recurse to find a scalarizable operand. 240; FIXME: We should propagate the IR flags including wrapping flags. 241 242define i32 @extelt_binop_binop_insertelt(<4 x i32> %A, <4 x i32> %B, i32 %f) { 243; 244; CHECK-LABEL: @extelt_binop_binop_insertelt( 245; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[B:%.*]], i64 0 246; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[F:%.*]], [[TMP1]] 247; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[B]], i64 0 248; CHECK-NEXT: [[E:%.*]] = mul nsw i32 [[TMP2]], [[TMP3]] 249; CHECK-NEXT: ret i32 [[E]] 250; 251 %v = insertelement <4 x i32> %A, i32 %f, i32 0 252 %C = add <4 x i32> %v, %B 253 %D = mul nsw <4 x i32> %C, %B 254 %E = extractelement <4 x i32> %D, i32 0 255 ret i32 %E 256} 257 258define float @extract_element_constant_vector_variable_index(i32 %y) { 259; 260; CHECK-LABEL: @extract_element_constant_vector_variable_index( 261; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>, i32 [[Y:%.*]] 262; CHECK-NEXT: ret float [[R]] 263; 264 %r = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %y 265 ret float %r 266} 267 268define i1 @cheap_to_extract_icmp(<4 x i32> %x, <4 x i1> %y) { 269; 270; CHECK-LABEL: @cheap_to_extract_icmp( 271; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[X:%.*]], i64 2 272; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 273; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[Y:%.*]], i64 2 274; CHECK-NEXT: [[R:%.*]] = and i1 [[TMP2]], [[TMP3]] 275; CHECK-NEXT: ret i1 [[R]] 276; 277 %cmp = icmp eq <4 x i32> %x, zeroinitializer 278 %and = and <4 x i1> %cmp, %y 279 %r = extractelement <4 x i1> %and, i32 2 280 ret i1 %r 281} 282 283define i1 @cheap_to_extract_fcmp(<4 x float> %x, <4 x i1> %y) { 284; 285; CHECK-LABEL: @cheap_to_extract_fcmp( 286; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[X:%.*]], i64 2 287; CHECK-NEXT: [[TMP2:%.*]] = fcmp oeq float [[TMP1]], 0.000000e+00 288; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[Y:%.*]], i64 2 289; CHECK-NEXT: [[R:%.*]] = and i1 [[TMP2]], [[TMP3]] 290; CHECK-NEXT: ret i1 [[R]] 291; 292 %cmp = fcmp oeq <4 x float> %x, zeroinitializer 293 %and = and <4 x i1> %cmp, %y 294 %r = extractelement <4 x i1> %and, i32 2 295 ret i1 %r 296} 297 298define i1 @extractelt_vector_icmp_constrhs(<2 x i32> %arg) { 299; 300; CHECK-LABEL: @extractelt_vector_icmp_constrhs( 301; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[ARG:%.*]], i64 0 302; CHECK-NEXT: [[EXT:%.*]] = icmp eq i32 [[TMP1]], 0 303; CHECK-NEXT: ret i1 [[EXT]] 304; 305 %cmp = icmp eq <2 x i32> %arg, zeroinitializer 306 %ext = extractelement <2 x i1> %cmp, i32 0 307 ret i1 %ext 308} 309 310define i1 @extractelt_vector_fcmp_constrhs(<2 x float> %arg) { 311; 312; CHECK-LABEL: @extractelt_vector_fcmp_constrhs( 313; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[ARG:%.*]], i64 0 314; CHECK-NEXT: [[EXT:%.*]] = fcmp oeq float [[TMP1]], 0.000000e+00 315; CHECK-NEXT: ret i1 [[EXT]] 316; 317 %cmp = fcmp oeq <2 x float> %arg, zeroinitializer 318 %ext = extractelement <2 x i1> %cmp, i32 0 319 ret i1 %ext 320} 321 322define i1 @extractelt_vector_icmp_constrhs_dynidx(<2 x i32> %arg, i32 %idx) { 323; 324; CHECK-LABEL: @extractelt_vector_icmp_constrhs_dynidx( 325; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[ARG:%.*]], i32 [[IDX:%.*]] 326; CHECK-NEXT: [[EXT:%.*]] = icmp eq i32 [[TMP1]], 0 327; CHECK-NEXT: ret i1 [[EXT]] 328; 329 %cmp = icmp eq <2 x i32> %arg, zeroinitializer 330 %ext = extractelement <2 x i1> %cmp, i32 %idx 331 ret i1 %ext 332} 333 334define i1 @extractelt_vector_fcmp_constrhs_dynidx(<2 x float> %arg, i32 %idx) { 335; 336; CHECK-LABEL: @extractelt_vector_fcmp_constrhs_dynidx( 337; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[ARG:%.*]], i32 [[IDX:%.*]] 338; CHECK-NEXT: [[EXT:%.*]] = fcmp oeq float [[TMP1]], 0.000000e+00 339; CHECK-NEXT: ret i1 [[EXT]] 340; 341 %cmp = fcmp oeq <2 x float> %arg, zeroinitializer 342 %ext = extractelement <2 x i1> %cmp, i32 %idx 343 ret i1 %ext 344} 345 346define i1 @extractelt_vector_fcmp_not_cheap_to_scalarize_multi_use(<2 x float> %arg0, <2 x float> %arg1, <2 x float> %arg2, i32 %idx) { 347; 348; CHECK-LABEL: @extractelt_vector_fcmp_not_cheap_to_scalarize_multi_use( 349; CHECK-NEXT: [[ADD:%.*]] = fadd <2 x float> [[ARG1:%.*]], [[ARG2:%.*]] 350; CHECK-NEXT: store volatile <2 x float> [[ADD]], ptr undef, align 8 351; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq <2 x float> [[ARG0:%.*]], [[ADD]] 352; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x i1> [[CMP]], i64 0 353; CHECK-NEXT: ret i1 [[EXT]] 354; 355 %add = fadd <2 x float> %arg1, %arg2 356 store volatile <2 x float> %add, ptr undef 357 %cmp = fcmp oeq <2 x float> %arg0, %add 358 %ext = extractelement <2 x i1> %cmp, i32 0 359 ret i1 %ext 360} 361