1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -passes=instcombine -S < %s | FileCheck %s 3 4define i32 @extract_load(ptr %p) { 5; 6; CHECK-LABEL: @extract_load( 7; CHECK-NEXT: [[X:%.*]] = load <4 x i32>, ptr [[P:%.*]], align 4 8; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x i32> [[X]], i64 1 9; CHECK-NEXT: ret i32 [[EXT]] 10; 11 %x = load <4 x i32>, ptr %p, align 4 12 %ext = extractelement <4 x i32> %x, i32 1 13 ret i32 %ext 14} 15 16define double @extract_load_fp(ptr %p) { 17; 18; CHECK-LABEL: @extract_load_fp( 19; CHECK-NEXT: [[X:%.*]] = load <4 x double>, ptr [[P:%.*]], align 32 20; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[X]], i64 3 21; CHECK-NEXT: ret double [[EXT]] 22; 23 %x = load <4 x double>, ptr %p, align 32 24 %ext = extractelement <4 x double> %x, i32 3 25 ret double %ext 26} 27 28define double @extract_load_volatile(ptr %p) { 29; 30; CHECK-LABEL: @extract_load_volatile( 31; CHECK-NEXT: [[X:%.*]] = load volatile <4 x double>, ptr [[P:%.*]], align 32 32; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[X]], i64 2 33; CHECK-NEXT: ret double [[EXT]] 34; 35 %x = load volatile <4 x double>, ptr %p 36 %ext = extractelement <4 x double> %x, i32 2 37 ret double %ext 38} 39 40define double @extract_load_extra_use(ptr %p, ptr %p2) { 41; 42; CHECK-LABEL: @extract_load_extra_use( 43; CHECK-NEXT: [[X:%.*]] = load <4 x double>, ptr [[P:%.*]], align 8 44; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[X]], i64 0 45; CHECK-NEXT: store <4 x double> [[X]], ptr [[P2:%.*]], align 32 46; CHECK-NEXT: ret double [[EXT]] 47; 48 %x = load <4 x double>, ptr %p, align 8 49 %ext = extractelement <4 x double> %x, i32 0 50 store <4 x double> %x, ptr %p2 51 ret double %ext 52} 53 54define double @extract_load_variable_index(ptr %p, i32 %y) { 55; 56; CHECK-LABEL: @extract_load_variable_index( 57; CHECK-NEXT: [[X:%.*]] = load <4 x double>, ptr [[P:%.*]], align 32 58; CHECK-NEXT: [[EXT:%.*]] = extractelement <4 x double> [[X]], i32 [[Y:%.*]] 59; CHECK-NEXT: ret double [[EXT]] 60; 61 %x = load <4 x double>, ptr %p 62 %ext = extractelement <4 x double> %x, i32 %y 63 ret double %ext 64} 65 66define void @scalarize_phi(ptr %n, ptr %inout) { 67; 68; CHECK-LABEL: @scalarize_phi( 69; CHECK-NEXT: entry: 70; CHECK-NEXT: [[T0:%.*]] = load volatile float, ptr [[INOUT:%.*]], align 4 71; CHECK-NEXT: br label [[FOR_COND:%.*]] 72; CHECK: for.cond: 73; CHECK-NEXT: [[TMP0:%.*]] = phi float [ [[T0]], [[ENTRY:%.*]] ], [ [[TMP1:%.*]], [[FOR_BODY:%.*]] ] 74; CHECK-NEXT: [[I_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_BODY]] ] 75; CHECK-NEXT: [[T1:%.*]] = load i32, ptr [[N:%.*]], align 4 76; CHECK-NEXT: [[CMP_NOT:%.*]] = icmp eq i32 [[I_0]], [[T1]] 77; CHECK-NEXT: br i1 [[CMP_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] 78; CHECK: for.body: 79; CHECK-NEXT: store volatile float [[TMP0]], ptr [[INOUT]], align 4 80; CHECK-NEXT: [[TMP1]] = fmul float [[TMP0]], 0x4002A3D700000000 81; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[I_0]], 1 82; CHECK-NEXT: br label [[FOR_COND]] 83; CHECK: for.end: 84; CHECK-NEXT: ret void 85; 86entry: 87 %t0 = load volatile float, ptr %inout, align 4 88 %insert = insertelement <4 x float> undef, float %t0, i32 0 89 %splat = shufflevector <4 x float> %insert, <4 x float> undef, <4 x i32> zeroinitializer 90 %insert1 = insertelement <4 x float> undef, float 3.0, i32 0 91 br label %for.cond 92 93for.cond: 94 %x.0 = phi <4 x float> [ %splat, %entry ], [ %mul, %for.body ] 95 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 96 %t1 = load i32, ptr %n, align 4 97 %cmp = icmp ne i32 %i.0, %t1 98 br i1 %cmp, label %for.body, label %for.end 99 100for.body: 101 %t2 = extractelement <4 x float> %x.0, i32 1 102 store volatile float %t2, ptr %inout, align 4 103 %mul = fmul <4 x float> %x.0, <float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000> 104 %inc = add nsw i32 %i.0, 1 105 br label %for.cond 106 107for.end: 108 ret void 109} 110 111define float @extract_element_binop_splat_constant_index(<4 x float> %x) { 112; 113; CHECK-LABEL: @extract_element_binop_splat_constant_index( 114; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[X:%.*]], i64 2 115; CHECK-NEXT: [[R:%.*]] = fadd float [[TMP1]], 0x4002A3D700000000 116; CHECK-NEXT: ret float [[R]] 117; 118 %b = fadd <4 x float> %x, <float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000> 119 %r = extractelement <4 x float> %b, i32 2 120 ret float %r 121} 122 123define double @extract_element_binop_splat_with_undef_constant_index(<2 x double> %x) { 124; 125; CHECK-LABEL: @extract_element_binop_splat_with_undef_constant_index( 126; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[X:%.*]], i64 0 127; CHECK-NEXT: [[R:%.*]] = fdiv double 4.200000e+01, [[TMP1]] 128; CHECK-NEXT: ret double [[R]] 129; 130 %b = fdiv <2 x double> <double 42.0, double undef>, %x 131 %r = extractelement <2 x double> %b, i32 0 132 ret double %r 133} 134 135define float @extract_element_binop_nonsplat_constant_index(<2 x float> %x) { 136; 137; CHECK-LABEL: @extract_element_binop_nonsplat_constant_index( 138; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i64 1 139; CHECK-NEXT: [[R:%.*]] = fmul float [[TMP1]], 4.300000e+01 140; CHECK-NEXT: ret float [[R]] 141; 142 %b = fmul <2 x float> %x, <float 42.0, float 43.0> 143 %r = extractelement <2 x float> %b, i32 1 144 ret float %r 145} 146 147define i8 @extract_element_binop_splat_variable_index(<4 x i8> %x, i32 %y) { 148; 149; CHECK-LABEL: @extract_element_binop_splat_variable_index( 150; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i8> [[X:%.*]], i32 [[Y:%.*]] 151; CHECK-NEXT: [[R:%.*]] = sdiv i8 [[TMP1]], 42 152; CHECK-NEXT: ret i8 [[R]] 153; 154 %b = sdiv <4 x i8> %x, <i8 42, i8 42, i8 42, i8 42> 155 %r = extractelement <4 x i8> %b, i32 %y 156 ret i8 %r 157} 158 159; We cannot move the extractelement before the sdiv here, because %z may be 160; out of range, making the divisor poison and resulting in immediate UB. 161define i8 @extract_element_binop_splat_variable_index_may_trap(<4 x i8> %x, <4 x i8> %y, i32 %z) { 162; 163; CHECK-LABEL: @extract_element_binop_splat_variable_index_may_trap( 164; CHECK-NEXT: [[B:%.*]] = sdiv <4 x i8> splat (i8 42), [[Y:%.*]] 165; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i8> [[B]], i32 [[Z:%.*]] 166; CHECK-NEXT: ret i8 [[R]] 167; 168 %b = sdiv <4 x i8> splat (i8 42), %y 169 %r = extractelement <4 x i8> %b, i32 %z 170 ret i8 %r 171} 172 173; Moving the extractelement first is fine here, because the index is known to 174; be valid, so we can't introduce additional poison. 175define i8 @extract_element_binop_constant_index_may_trap(<4 x i8> %x, <4 x i8> %y, i32 %z) { 176; 177; CHECK-LABEL: @extract_element_binop_constant_index_may_trap( 178; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i8> [[Y:%.*]], i64 3 179; CHECK-NEXT: [[R:%.*]] = sdiv i8 42, [[TMP1]] 180; CHECK-NEXT: ret i8 [[R]] 181; 182 %b = sdiv <4 x i8> splat (i8 42), %y 183 %r = extractelement <4 x i8> %b, i32 3 184 ret i8 %r 185} 186 187define i8 @extract_element_binop_splat_with_undef_variable_index(<4 x i8> %x, i32 %y) { 188; 189; CHECK-LABEL: @extract_element_binop_splat_with_undef_variable_index( 190; CHECK-NEXT: [[B:%.*]] = mul <4 x i8> [[X:%.*]], <i8 42, i8 42, i8 undef, i8 42> 191; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i8> [[B]], i32 [[Y:%.*]] 192; CHECK-NEXT: ret i8 [[R]] 193; 194 %b = mul <4 x i8> %x, <i8 42, i8 42, i8 undef, i8 42> 195 %r = extractelement <4 x i8> %b, i32 %y 196 ret i8 %r 197} 198 199define i8 @extract_element_binop_nonsplat_variable_index(<4 x i8> %x, i32 %y) { 200; 201; CHECK-LABEL: @extract_element_binop_nonsplat_variable_index( 202; CHECK-NEXT: [[B:%.*]] = lshr <4 x i8> [[X:%.*]], <i8 4, i8 3, i8 undef, i8 2> 203; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i8> [[B]], i32 [[Y:%.*]] 204; CHECK-NEXT: ret i8 [[R]] 205; 206 %b = lshr <4 x i8> %x, <i8 4, i8 3, i8 undef, i8 2> 207 %r = extractelement <4 x i8> %b, i32 %y 208 ret i8 %r 209} 210 211define float @extract_element_load(<4 x float> %x, ptr %ptr) { 212; 213; CHECK-LABEL: @extract_element_load( 214; CHECK-NEXT: [[LOAD:%.*]] = load <4 x float>, ptr [[PTR:%.*]], align 16 215; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[X:%.*]], i64 2 216; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[LOAD]], i64 2 217; CHECK-NEXT: [[R:%.*]] = fadd float [[TMP1]], [[TMP2]] 218; CHECK-NEXT: ret float [[R]] 219; 220 %load = load <4 x float>, ptr %ptr 221 %add = fadd <4 x float> %x, %load 222 %r = extractelement <4 x float> %add, i32 2 223 ret float %r 224} 225 226define float @extract_element_multi_Use_load(<4 x float> %x, ptr %ptr0, ptr %ptr1) { 227; 228; CHECK-LABEL: @extract_element_multi_Use_load( 229; CHECK-NEXT: [[LOAD:%.*]] = load <4 x float>, ptr [[PTR0:%.*]], align 16 230; CHECK-NEXT: store <4 x float> [[LOAD]], ptr [[PTR1:%.*]], align 16 231; CHECK-NEXT: [[ADD:%.*]] = fadd <4 x float> [[X:%.*]], [[LOAD]] 232; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[ADD]], i64 2 233; CHECK-NEXT: ret float [[R]] 234; 235 %load = load <4 x float>, ptr %ptr0 236 store <4 x float> %load, ptr %ptr1 237 %add = fadd <4 x float> %x, %load 238 %r = extractelement <4 x float> %add, i32 2 239 ret float %r 240} 241 242define float @extract_element_variable_index(<4 x float> %x, i32 %y) { 243; 244; CHECK-LABEL: @extract_element_variable_index( 245; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[X:%.*]], i32 [[Y:%.*]] 246; CHECK-NEXT: [[R:%.*]] = fadd float [[TMP1]], 1.000000e+00 247; CHECK-NEXT: ret float [[R]] 248; 249 %add = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> 250 %r = extractelement <4 x float> %add, i32 %y 251 ret float %r 252} 253 254define float @extelt_binop_insertelt(<4 x float> %A, <4 x float> %B, float %f) { 255; 256; CHECK-LABEL: @extelt_binop_insertelt( 257; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0 258; CHECK-NEXT: [[E:%.*]] = fmul nnan float [[F:%.*]], [[TMP1]] 259; CHECK-NEXT: ret float [[E]] 260; 261 %C = insertelement <4 x float> %A, float %f, i32 0 262 %D = fmul nnan <4 x float> %C, %B 263 %E = extractelement <4 x float> %D, i32 0 264 ret float %E 265} 266 267; We recurse to find a scalarizable operand. 268define i32 @extelt_binop_binop_insertelt(<4 x i32> %A, <4 x i32> %B, i32 %f) { 269; 270; CHECK-LABEL: @extelt_binop_binop_insertelt( 271; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[B:%.*]], i64 0 272; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[F:%.*]], [[TMP1]] 273; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i32> [[B]], i64 0 274; CHECK-NEXT: [[E:%.*]] = mul nsw i32 [[TMP2]], [[TMP3]] 275; CHECK-NEXT: ret i32 [[E]] 276; 277 %v = insertelement <4 x i32> %A, i32 %f, i32 0 278 %C = add <4 x i32> %v, %B 279 %D = mul nsw <4 x i32> %C, %B 280 %E = extractelement <4 x i32> %D, i32 0 281 ret i32 %E 282} 283 284define float @extract_element_constant_vector_variable_index(i32 %y) { 285; 286; CHECK-LABEL: @extract_element_constant_vector_variable_index( 287; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> <float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00>, i32 [[Y:%.*]] 288; CHECK-NEXT: ret float [[R]] 289; 290 %r = extractelement <4 x float> <float 1.0, float 2.0, float 3.0, float 4.0>, i32 %y 291 ret float %r 292} 293 294define i1 @cheap_to_extract_icmp(<4 x i32> %x, <4 x i1> %y) { 295; 296; CHECK-LABEL: @cheap_to_extract_icmp( 297; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[X:%.*]], i64 2 298; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 299; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[Y:%.*]], i64 2 300; CHECK-NEXT: [[R:%.*]] = and i1 [[TMP2]], [[TMP3]] 301; CHECK-NEXT: ret i1 [[R]] 302; 303 %cmp = icmp eq <4 x i32> %x, zeroinitializer 304 %and = and <4 x i1> %cmp, %y 305 %r = extractelement <4 x i1> %and, i32 2 306 ret i1 %r 307} 308 309define i1 @cheap_to_extract_fcmp(<4 x float> %x, <4 x i1> %y) { 310; 311; CHECK-LABEL: @cheap_to_extract_fcmp( 312; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[X:%.*]], i64 2 313; CHECK-NEXT: [[TMP2:%.*]] = fcmp oeq float [[TMP1]], 0.000000e+00 314; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[Y:%.*]], i64 2 315; CHECK-NEXT: [[R:%.*]] = and i1 [[TMP2]], [[TMP3]] 316; CHECK-NEXT: ret i1 [[R]] 317; 318 %cmp = fcmp oeq <4 x float> %x, zeroinitializer 319 %and = and <4 x i1> %cmp, %y 320 %r = extractelement <4 x i1> %and, i32 2 321 ret i1 %r 322} 323 324define i1 @extractelt_vector_icmp_constrhs(<2 x i32> %arg) { 325; 326; CHECK-LABEL: @extractelt_vector_icmp_constrhs( 327; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[ARG:%.*]], i64 0 328; CHECK-NEXT: [[EXT:%.*]] = icmp eq i32 [[TMP1]], 0 329; CHECK-NEXT: ret i1 [[EXT]] 330; 331 %cmp = icmp eq <2 x i32> %arg, zeroinitializer 332 %ext = extractelement <2 x i1> %cmp, i32 0 333 ret i1 %ext 334} 335 336define i1 @extractelt_vector_fcmp_constrhs(<2 x float> %arg) { 337; 338; CHECK-LABEL: @extractelt_vector_fcmp_constrhs( 339; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[ARG:%.*]], i64 0 340; CHECK-NEXT: [[EXT:%.*]] = fcmp oeq float [[TMP1]], 0.000000e+00 341; CHECK-NEXT: ret i1 [[EXT]] 342; 343 %cmp = fcmp oeq <2 x float> %arg, zeroinitializer 344 %ext = extractelement <2 x i1> %cmp, i32 0 345 ret i1 %ext 346} 347 348define i1 @extractelt_vector_icmp_constrhs_dynidx(<2 x i32> %arg, i32 %idx) { 349; 350; CHECK-LABEL: @extractelt_vector_icmp_constrhs_dynidx( 351; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[ARG:%.*]], i32 [[IDX:%.*]] 352; CHECK-NEXT: [[EXT:%.*]] = icmp eq i32 [[TMP1]], 0 353; CHECK-NEXT: ret i1 [[EXT]] 354; 355 %cmp = icmp eq <2 x i32> %arg, zeroinitializer 356 %ext = extractelement <2 x i1> %cmp, i32 %idx 357 ret i1 %ext 358} 359 360define i1 @extractelt_vector_fcmp_constrhs_dynidx(<2 x float> %arg, i32 %idx) { 361; 362; CHECK-LABEL: @extractelt_vector_fcmp_constrhs_dynidx( 363; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[ARG:%.*]], i32 [[IDX:%.*]] 364; CHECK-NEXT: [[EXT:%.*]] = fcmp oeq float [[TMP1]], 0.000000e+00 365; CHECK-NEXT: ret i1 [[EXT]] 366; 367 %cmp = fcmp oeq <2 x float> %arg, zeroinitializer 368 %ext = extractelement <2 x i1> %cmp, i32 %idx 369 ret i1 %ext 370} 371 372define i1 @extractelt_vector_fcmp_copy_flags(<4 x float> %x) { 373; CHECK-LABEL: @extractelt_vector_fcmp_copy_flags( 374; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[X:%.*]], i64 2 375; CHECK-NEXT: [[R:%.*]] = fcmp nsz arcp oeq float [[TMP1]], 0.000000e+00 376; CHECK-NEXT: ret i1 [[R]] 377; 378 %cmp = fcmp nsz arcp oeq <4 x float> %x, zeroinitializer 379 %r = extractelement <4 x i1> %cmp, i32 2 380 ret i1 %r 381} 382 383define i1 @extractelt_vector_fcmp_not_cheap_to_scalarize_multi_use(<2 x float> %arg0, <2 x float> %arg1, <2 x float> %arg2, i32 %idx) { 384; 385; CHECK-LABEL: @extractelt_vector_fcmp_not_cheap_to_scalarize_multi_use( 386; CHECK-NEXT: [[ADD:%.*]] = fadd <2 x float> [[ARG1:%.*]], [[ARG2:%.*]] 387; CHECK-NEXT: store volatile <2 x float> [[ADD]], ptr undef, align 8 388; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq <2 x float> [[ARG0:%.*]], [[ADD]] 389; CHECK-NEXT: [[EXT:%.*]] = extractelement <2 x i1> [[CMP]], i64 0 390; CHECK-NEXT: ret i1 [[EXT]] 391; 392 %add = fadd <2 x float> %arg1, %arg2 393 store volatile <2 x float> %add, ptr undef 394 %cmp = fcmp oeq <2 x float> %arg0, %add 395 %ext = extractelement <2 x i1> %cmp, i32 0 396 ret i1 %ext 397} 398