1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -instcombine -S < %s | FileCheck %s 3 4define i32 @extract_load(<4 x i32>* %p) { 5; 6; CHECK-LABEL: @extract_load( 7; CHECK-NEXT: [[X:%.*]] = load <4 x i32>, <4 x i32>* [[P:%.*]], align 4 8; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x i32> [[X]], i32 1 9; CHECK-NEXT: ret i32 [[EXT]] 10; 11 %x = load <4 x i32>, <4 x i32>* %p, align 4 12 %ext = extractelement <4 x i32> %x, i32 1 13 ret i32 %ext 14} 15 16define double @extract_load_fp(<4 x double>* %p) { 17; 18; CHECK-LABEL: @extract_load_fp( 19; CHECK-NEXT: [[X:%.*]] = load <4 x double>, <4 x double>* [[P:%.*]], align 32 20; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[X]], i32 3 21; CHECK-NEXT: ret double [[EXT]] 22; 23 %x = load <4 x double>, <4 x double>* %p, align 32 24 %ext = extractelement <4 x double> %x, i32 3 25 ret double %ext 26} 27 28define double @extract_load_volatile(<4 x double>* %p) { 29; 30; CHECK-LABEL: @extract_load_volatile( 31; CHECK-NEXT: [[X:%.*]] = load volatile <4 x double>, <4 x double>* [[P:%.*]], align 32 32; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[X]], i32 2 33; CHECK-NEXT: ret double [[EXT]] 34; 35 %x = load volatile <4 x double>, <4 x double>* %p 36 %ext = extractelement <4 x double> %x, i32 2 37 ret double %ext 38} 39 40define double @extract_load_extra_use(<4 x double>* %p, <4 x double>* %p2) { 41; 42; CHECK-LABEL: @extract_load_extra_use( 43; CHECK-NEXT: [[X:%.*]] = load <4 x double>, <4 x double>* [[P:%.*]], align 8 44; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[X]], i32 0 45; CHECK-NEXT: store <4 x double> [[X]], <4 x double>* [[P2:%.*]], align 32 46; CHECK-NEXT: ret double [[EXT]] 47; 48 %x = load <4 x double>, <4 x double>* %p, align 8 49 %ext = extractelement <4 x double> %x, i32 0 50 store <4 x double> %x, <4 x double>* %p2 51 ret double %ext 52} 53 54define double @extract_load_variable_index(<4 x double>* %p, i32 %y) { 55; 56; CHECK-LABEL: @extract_load_variable_index( 57; CHECK-NEXT: [[X:%.*]] = load <4 x double>, <4 x double>* [[P:%.*]], align 32 58; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[X]], i32 [[Y:%.*]] 59; CHECK-NEXT: ret double [[EXT]] 60; 61 %x = load <4 x double>, <4 x double>* %p 62 %ext = extractelement <4 x double> %x, i32 %y 63 ret double %ext 64} 65 66define void @scalarize_phi(i32 * %n, float * %inout) { 67; 68; CHECK-LABEL: @scalarize_phi( 69; CHECK-NEXT: entry: 70; CHECK-NEXT: [[T0:%.*]] = load volatile float, float* [[INOUT:%.*]], align 4 71; CHECK-NEXT: br label [[FOR_COND:%.*]] 72; CHECK: for.cond: 73; CHECK-NEXT: [[TMP0:%.*]] = phi float [ [[T0]], [[ENTRY:%.*]] ], [ [[TMP1:%.*]], [[FOR_BODY:%.*]] ] 74; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_BODY]] ] 75; CHECK-NEXT: [[T1:%.*]] = load i32, i32* [[N:%.*]], align 4 76; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[I_0]], [[T1]] 77; CHECK-NEXT: br i1 [[CMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] 78; CHECK: for.body: 79; CHECK-NEXT: store volatile float [[TMP0]], float* [[INOUT]], align 4 80; CHECK-NEXT: [[TMP1]] = fmul float [[TMP0]], 0x4002A3D700000000 81; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1 82; CHECK-NEXT: br label [[FOR_COND]] 83; CHECK: for.end: 84; CHECK-NEXT: ret void 85; 86entry: 87 %t0 = load volatile float, float * %inout, align 4 88 %insert = insertelement <4 x float> undef, float %t0, i32 0 89 %splat = shufflevector <4 x float> %insert, <4 x float> undef, <4 x i32> zeroinitializer 90 %insert1 = insertelement <4 x float> undef, float 3.0, i32 0 91 br label %for.cond 92 93for.cond: 94 %x.0 = phi <4 x float> [ %splat, %entry ], [ %mul, %for.body ] 95 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 96 %t1 = load i32, i32 * %n, align 4 97 %cmp = icmp ne i32 %i.0, %t1 98 br i1 %cmp, label %for.body, label %for.end 99 100for.body: 101 %t2 = extractelement <4 x float> %x.0, i32 1 102 store volatile float %t2, float * %inout, align 4 103 %mul = fmul <4 x float> %x.0, <float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000> 104 %inc = add nsw i32 %i.0, 1 105 br label %for.cond 106 107for.end: 108 ret void 109} 110 111define float @extract_element_binop_splat_constant_index(<4 x float> %x) { 112; 113; CHECK-LABEL: @extract_element_binop_splat_constant_index( 114; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[X:%.*]], i32 2 115; CHECK-NEXT: [[R:%.*]] = fadd float [[TMP1]], 0x4002A3D700000000 116; CHECK-NEXT: ret float [[R]] 117; 118 %b = fadd <4 x float> %x, <float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000> 119 %r = extractelement <4 x float> %b, i32 2 120 ret float %r 121} 122 123define double @extract_element_binop_splat_with_undef_constant_index(<2 x double> %x) { 124; 125; CHECK-LABEL: @extract_element_binop_splat_with_undef_constant_index( 126; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[X:%.*]], i32 0 127; CHECK-NEXT: [[R:%.*]] = fdiv double 4.200000e+01, [[TMP1]] 128; CHECK-NEXT: ret double [[R]] 129; 130 %b = fdiv <2 x double> <double 42.0, double undef>, %x 131 %r = extractelement <2 x double> %b, i32 0 132 ret double %r 133} 134 135define float @extract_element_binop_nonsplat_constant_index(<2 x float> %x) { 136; 137; CHECK-LABEL: @extract_element_binop_nonsplat_constant_index( 138; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 1 139; CHECK-NEXT: [[R:%.*]] = fmul float [[TMP1]], 4.300000e+01 140; CHECK-NEXT: ret float [[R]] 141; 142 %b = fmul <2 x float> %x, <float 42.0, float 43.0> 143 %r = extractelement <2 x float> %b, i32 1 144 ret float %r 145} 146 147define i8 @extract_element_binop_splat_variable_index(<4 x i8> %x, i32 %y) { 148; 149; CHECK-LABEL: @extract_element_binop_splat_variable_index( 150; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i8> [[X:%.*]], i32 [[Y:%.*]] 151; CHECK-NEXT: [[R:%.*]] = sdiv i8 [[TMP1]], 42 152; CHECK-NEXT: ret i8 [[R]] 153; 154 %b = sdiv <4 x i8> %x, <i8 42, i8 42, i8 42, i8 42> 155 %r = extractelement <4 x i8> %b, i32 %y 156 ret i8 %r 157} 158 159define i8 @extract_element_binop_splat_with_undef_variable_index(<4 x i8> %x, i32 %y) { 160; 161; CHECK-LABEL: @extract_element_binop_splat_with_undef_variable_index( 162; CHECK-NEXT: [[B:%.*]] = mul <4 x i8> [[X:%.*]], <i8 42, i8 42, i8 undef, i8 42> 163; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i8> [[B]], i32 [[Y:%.*]] 164; CHECK-NEXT: ret i8 [[R]] 165; 166 %b = mul <4 x i8> %x, <i8 42, i8 42, i8 undef, i8 42> 167 %r = extractelement <4 x i8> %b, i32 %y 168 ret i8 %r 169} 170 171define i8 @extract_element_binop_nonsplat_variable_index(<4 x i8> %x, i32 %y) { 172; 173; CHECK-LABEL: @extract_element_binop_nonsplat_variable_index( 174; CHECK-NEXT: [[B:%.*]] = lshr <4 x i8> [[X:%.*]], <i8 4, i8 3, i8 undef, i8 2> 175; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i8> [[B]], i32 [[Y:%.*]] 176; CHECK-NEXT: ret i8 [[R]] 177; 178 %b = lshr <4 x i8> %x, <i8 4, i8 3, i8 undef, i8 2> 179 %r = extractelement <4 x i8> %b, i32 %y 180 ret i8 %r 181} 182 183define float @extract_element_load(<4 x float> %x, <4 x float>* %ptr) { 184; 185; CHECK-LABEL: @extract_element_load( 186; CHECK-NEXT: [[LOAD:%.*]] = load <4 x float>, <4 x float>* [[PTR:%.*]], align 16 187; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[LOAD]], i32 2 188; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[X:%.*]], i32 2 189; CHECK-NEXT: [[R:%.*]] = fadd float [[TMP1]], [[TMP2]] 190; CHECK-NEXT: ret float [[R]] 191; 192 %load = load <4 x float>, <4 x float>* %ptr 193 %add = fadd <4 x float> %x, %load 194 %r = extractelement <4 x float> %add, i32 2 195 ret float %r 196} 197 198define float @extract_element_multi_Use_load(<4 x float> %x, <4 x float>* %ptr0, <4 x float>* %ptr1) { 199; 200; CHECK-LABEL: @extract_element_multi_Use_load( 201; CHECK-NEXT: [[LOAD:%.*]] = load <4 x float>, <4 x float>* [[PTR0:%.*]], align 16 202; CHECK-NEXT: store <4 x float> [[LOAD]], <4 x float>* [[PTR1:%.*]], align 16 203; CHECK-NEXT: [[ADD:%.*]] = fadd <4 x float> [[LOAD]], [[X:%.*]] 204; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[ADD]], i32 2 205; CHECK-NEXT: ret float [[R]] 206; 207 %load = load <4 x float>, <4 x float>* %ptr0 208 store <4 x float> %load, <4 x float>* %ptr1 209 %add = fadd <4 x float> %x, %load 210 %r = extractelement <4 x float> %add, i32 2 211 ret float %r 212} 213 214define float @extract_element_variable_index(<4 x float> %x, i32 %y) { 215; 216; CHECK-LABEL: @extract_element_variable_index( 217; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[X:%.*]], i32 [[Y:%.*]] 218; CHECK-NEXT: [[R:%.*]] = fadd float [[TMP1]], 1.000000e+00 219; CHECK-NEXT: ret float [[R]] 220; 221 %add = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> 222 %r = extractelement <4 x float> %add, i32 %y 223 ret float %r 224} 225 226define float @extelt_binop_insertelt(<4 x float> %A, <4 x float> %B, float %f) { 227; 228; CHECK-LABEL: @extelt_binop_insertelt( 229; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0 230; CHECK-NEXT: [[E:%.*]] = fmul nnan float [[TMP1]], [[F:%.*]] 231; CHECK-NEXT: ret float [[E]] 232; 233 %C = insertelement <4 x float> %A, float %f, i32 0 234 %D = fmul nnan <4 x float> %C, %B 235 %E = extractelement <4 x float> %D, i32 0 236 ret float %E 237} 238 239; We recurse to find a scalarizable operand. 240define i32 @extelt_binop_binop_insertelt(<4 x i32> %A, <4 x i32> %B, i32 %f) { 241; 242; CHECK-LABEL: @extelt_binop_binop_insertelt( 243; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[B:%.*]], i32 0 244; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[TMP1]], [[F:%.*]] 245; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[B]], i32 0 246; CHECK-NEXT: [[E:%.*]] = mul nsw i32 [[TMP2]], [[TMP3]] 247; CHECK-NEXT: ret i32 [[E]] 248; 249 %v = insertelement <4 x i32> %A, i32 %f, i32 0 250 %C = add <4 x i32> %v, %B 251 %D = mul nsw <4 x i32> %C, %B 252 %E = extractelement <4 x i32> %D, i32 0 253 ret i32 %E 254} 255 256define float @extract_element_constant_vector_variable_index(i32 %y) { 257; 258; CHECK-LABEL: @extract_element_constant_vector_variable_index( 259; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>, i32 [[Y:%.*]] 260; CHECK-NEXT: ret float [[R]] 261; 262 %r = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %y 263 ret float %r 264} 265 266define i1 @cheap_to_extract_icmp(<4 x i32> %x, <4 x i1> %y) { 267; 268; CHECK-LABEL: @cheap_to_extract_icmp( 269; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 2 270; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 271; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[Y:%.*]], i32 2 272; CHECK-NEXT: [[R:%.*]] = and i1 [[TMP2]], [[TMP3]] 273; CHECK-NEXT: ret i1 [[R]] 274; 275 %cmp = icmp eq <4 x i32> %x, zeroinitializer 276 %and = and <4 x i1> %cmp, %y 277 %r = extractelement <4 x i1> %and, i32 2 278 ret i1 %r 279} 280 281define i1 @cheap_to_extract_fcmp(<4 x float> %x, <4 x i1> %y) { 282; 283; CHECK-LABEL: @cheap_to_extract_fcmp( 284; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[X:%.*]], i32 2 285; CHECK-NEXT: [[TMP2:%.*]] = fcmp oeq float [[TMP1]], 0.000000e+00 286; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[Y:%.*]], i32 2 287; CHECK-NEXT: [[R:%.*]] = and i1 [[TMP2]], [[TMP3]] 288; CHECK-NEXT: ret i1 [[R]] 289; 290 %cmp = fcmp oeq <4 x float> %x, zeroinitializer 291 %and = and <4 x i1> %cmp, %y 292 %r = extractelement <4 x i1> %and, i32 2 293 ret i1 %r 294} 295 296define i1 @extractelt_vector_icmp_constrhs(<2 x i32> %arg) { 297; 298; CHECK-LABEL: @extractelt_vector_icmp_constrhs( 299; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[ARG:%.*]], i32 0 300; CHECK-NEXT: [[EXT:%.*]] = icmp eq i32 [[TMP1]], 0 301; CHECK-NEXT: ret i1 [[EXT]] 302; 303 %cmp = icmp eq <2 x i32> %arg, zeroinitializer 304 %ext = extractelement <2 x i1> %cmp, i32 0 305 ret i1 %ext 306} 307 308define i1 @extractelt_vector_fcmp_constrhs(<2 x float> %arg) { 309; 310; CHECK-LABEL: @extractelt_vector_fcmp_constrhs( 311; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[ARG:%.*]], i32 0 312; CHECK-NEXT: [[EXT:%.*]] = fcmp oeq float [[TMP1]], 0.000000e+00 313; CHECK-NEXT: ret i1 [[EXT]] 314; 315 %cmp = fcmp oeq <2 x float> %arg, zeroinitializer 316 %ext = extractelement <2 x i1> %cmp, i32 0 317 ret i1 %ext 318} 319 320define i1 @extractelt_vector_icmp_constrhs_dynidx(<2 x i32> %arg, i32 %idx) { 321; 322; CHECK-LABEL: @extractelt_vector_icmp_constrhs_dynidx( 323; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[ARG:%.*]], i32 [[IDX:%.*]] 324; CHECK-NEXT: [[EXT:%.*]] = icmp eq i32 [[TMP1]], 0 325; CHECK-NEXT: ret i1 [[EXT]] 326; 327 %cmp = icmp eq <2 x i32> %arg, zeroinitializer 328 %ext = extractelement <2 x i1> %cmp, i32 %idx 329 ret i1 %ext 330} 331 332define i1 @extractelt_vector_fcmp_constrhs_dynidx(<2 x float> %arg, i32 %idx) { 333; 334; CHECK-LABEL: @extractelt_vector_fcmp_constrhs_dynidx( 335; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[ARG:%.*]], i32 [[IDX:%.*]] 336; CHECK-NEXT: [[EXT:%.*]] = fcmp oeq float [[TMP1]], 0.000000e+00 337; CHECK-NEXT: ret i1 [[EXT]] 338; 339 %cmp = fcmp oeq <2 x float> %arg, zeroinitializer 340 %ext = extractelement <2 x i1> %cmp, i32 %idx 341 ret i1 %ext 342} 343 344define i1 @extractelt_vector_fcmp_not_cheap_to_scalarize_multi_use(<2 x float> %arg0, <2 x float> %arg1, <2 x float> %arg2, i32 %idx) { 345; 346; CHECK-LABEL: @extractelt_vector_fcmp_not_cheap_to_scalarize_multi_use( 347; CHECK-NEXT: [[ADD:%.*]] = fadd <2 x float> [[ARG1:%.*]], [[ARG2:%.*]] 348; CHECK-NEXT: store volatile <2 x float> [[ADD]], <2 x float>* undef, align 8 349; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq <2 x float> [[ADD]], [[ARG0:%.*]] 350; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x i1> [[CMP]], i32 0 351; CHECK-NEXT: ret i1 [[EXT]] 352; 353 %add = fadd <2 x float> %arg1, %arg2 354 store volatile <2 x float> %add, <2 x float>* undef 355 %cmp = fcmp oeq <2 x float> %arg0, %add 356 %ext = extractelement <2 x i1> %cmp, i32 0 357 ret i1 %ext 358} 359