1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=SSE2 | FileCheck %s --check-prefixes=CHECK,SSE 3; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=AVX2 | FileCheck %s --check-prefixes=CHECK,AVX 4 5declare void @use_i8(i8) 6declare void @use_f32(float) 7 8; Eliminating extract is profitable. 9 10define i8 @ext0_ext0_add(<16 x i8> %x, <16 x i8> %y) { 11; CHECK-LABEL: @ext0_ext0_add( 12; CHECK-NEXT: [[TMP1:%.*]] = add <16 x i8> [[X:%.*]], [[Y:%.*]] 13; CHECK-NEXT: [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i32 0 14; CHECK-NEXT: ret i8 [[R]] 15; 16 %e0 = extractelement <16 x i8> %x, i32 0 17 %e1 = extractelement <16 x i8> %y, i32 0 18 %r = add i8 %e0, %e1 19 ret i8 %r 20} 21 22; Eliminating extract is still profitable. Flags propagate. 23 24define i8 @ext1_ext1_add_flags(<16 x i8> %x, <16 x i8> %y) { 25; CHECK-LABEL: @ext1_ext1_add_flags( 26; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw <16 x i8> [[X:%.*]], [[Y:%.*]] 27; CHECK-NEXT: [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i32 1 28; CHECK-NEXT: ret i8 [[R]] 29; 30 %e0 = extractelement <16 x i8> %x, i32 1 31 %e1 = extractelement <16 x i8> %y, i32 1 32 %r = add nsw nuw i8 %e0, %e1 33 ret i8 %r 34} 35 36; Negative test - eliminating extract is profitable, but vector shift is expensive. 37 38define i8 @ext1_ext1_shl(<16 x i8> %x, <16 x i8> %y) { 39; CHECK-LABEL: @ext1_ext1_shl( 40; CHECK-NEXT: [[E0:%.*]] = extractelement <16 x i8> [[X:%.*]], i32 1 41; CHECK-NEXT: [[E1:%.*]] = extractelement <16 x i8> [[Y:%.*]], i32 1 42; CHECK-NEXT: [[R:%.*]] = shl i8 [[E0]], [[E1]] 43; CHECK-NEXT: ret i8 [[R]] 44; 45 %e0 = extractelement <16 x i8> %x, i32 1 46 %e1 = extractelement <16 x i8> %y, i32 1 47 %r = shl i8 %e0, %e1 48 ret i8 %r 49} 50 51; Negative test - eliminating extract is profitable, but vector multiply is expensive. 52 53define i8 @ext13_ext13_mul(<16 x i8> %x, <16 x i8> %y) { 54; CHECK-LABEL: @ext13_ext13_mul( 55; CHECK-NEXT: [[E0:%.*]] = extractelement <16 x i8> [[X:%.*]], i32 13 56; CHECK-NEXT: [[E1:%.*]] = extractelement <16 x i8> [[Y:%.*]], i32 13 57; CHECK-NEXT: [[R:%.*]] = mul i8 [[E0]], [[E1]] 58; CHECK-NEXT: ret i8 [[R]] 59; 60 %e0 = extractelement <16 x i8> %x, i32 13 61 %e1 = extractelement <16 x i8> %y, i32 13 62 %r = mul i8 %e0, %e1 63 ret i8 %r 64} 65 66; Negative test - cost is irrelevant because sdiv has potential UB. 67 68define i8 @ext0_ext0_sdiv(<16 x i8> %x, <16 x i8> %y) { 69; CHECK-LABEL: @ext0_ext0_sdiv( 70; CHECK-NEXT: [[E0:%.*]] = extractelement <16 x i8> [[X:%.*]], i32 0 71; CHECK-NEXT: [[E1:%.*]] = extractelement <16 x i8> [[Y:%.*]], i32 0 72; CHECK-NEXT: [[R:%.*]] = sdiv i8 [[E0]], [[E1]] 73; CHECK-NEXT: ret i8 [[R]] 74; 75 %e0 = extractelement <16 x i8> %x, i32 0 76 %e1 = extractelement <16 x i8> %y, i32 0 77 %r = sdiv i8 %e0, %e1 78 ret i8 %r 79} 80 81; Extracts are free and vector op has same cost as scalar, but we 82; speculatively transform to vector to create more optimization 83; opportunities.. 84 85define double @ext0_ext0_fadd(<2 x double> %x, <2 x double> %y) { 86; CHECK-LABEL: @ext0_ext0_fadd( 87; CHECK-NEXT: [[TMP1:%.*]] = fadd <2 x double> [[X:%.*]], [[Y:%.*]] 88; CHECK-NEXT: [[R:%.*]] = extractelement <2 x double> [[TMP1]], i32 0 89; CHECK-NEXT: ret double [[R]] 90; 91 %e0 = extractelement <2 x double> %x, i32 0 92 %e1 = extractelement <2 x double> %y, i32 0 93 %r = fadd double %e0, %e1 94 ret double %r 95} 96 97; Eliminating extract is profitable. Flags propagate. 98 99define double @ext1_ext1_fsub(<2 x double> %x, <2 x double> %y) { 100; CHECK-LABEL: @ext1_ext1_fsub( 101; CHECK-NEXT: [[TMP1:%.*]] = fsub fast <2 x double> [[X:%.*]], [[Y:%.*]] 102; CHECK-NEXT: [[R:%.*]] = extractelement <2 x double> [[TMP1]], i32 1 103; CHECK-NEXT: ret double [[R]] 104; 105 %e0 = extractelement <2 x double> %x, i32 1 106 %e1 = extractelement <2 x double> %y, i32 1 107 %r = fsub fast double %e0, %e1 108 ret double %r 109} 110 111; Negative test - type mismatch. 112 113define double @ext1_ext1_fadd_different_types(<2 x double> %x, <4 x double> %y) { 114; CHECK-LABEL: @ext1_ext1_fadd_different_types( 115; CHECK-NEXT: [[E0:%.*]] = extractelement <2 x double> [[X:%.*]], i32 1 116; CHECK-NEXT: [[E1:%.*]] = extractelement <4 x double> [[Y:%.*]], i32 1 117; CHECK-NEXT: [[R:%.*]] = fadd fast double [[E0]], [[E1]] 118; CHECK-NEXT: ret double [[R]] 119; 120 %e0 = extractelement <2 x double> %x, i32 1 121 %e1 = extractelement <4 x double> %y, i32 1 122 %r = fadd fast double %e0, %e1 123 ret double %r 124} 125 126; Disguised same vector operand; scalar code is not cheaper (with default 127; x86 target), so aggressively form vector binop. 128 129define i32 @ext1_ext1_add_same_vec(<4 x i32> %x) { 130; CHECK-LABEL: @ext1_ext1_add_same_vec( 131; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[X:%.*]], [[X]] 132; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[TMP1]], i32 1 133; CHECK-NEXT: ret i32 [[R]] 134; 135 %e0 = extractelement <4 x i32> %x, i32 1 136 %e1 = extractelement <4 x i32> %x, i32 1 137 %r = add i32 %e0, %e1 138 ret i32 %r 139} 140 141; Functionally equivalent to above test; should transform as above. 142 143define i32 @ext1_ext1_add_same_vec_cse(<4 x i32> %x) { 144; CHECK-LABEL: @ext1_ext1_add_same_vec_cse( 145; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[X:%.*]], [[X]] 146; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[TMP1]], i32 1 147; CHECK-NEXT: ret i32 [[R]] 148; 149 %e0 = extractelement <4 x i32> %x, i32 1 150 %r = add i32 %e0, %e0 151 ret i32 %r 152} 153 154; Don't assert if extract indices have different types. 155 156define i32 @ext1_ext1_add_same_vec_diff_idx_ty(<4 x i32> %x) { 157; CHECK-LABEL: @ext1_ext1_add_same_vec_diff_idx_ty( 158; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[X:%.*]], [[X]] 159; CHECK-NEXT: [[R:%.*]] = extractelement <4 x i32> [[TMP1]], i32 1 160; CHECK-NEXT: ret i32 [[R]] 161; 162 %e0 = extractelement <4 x i32> %x, i32 1 163 %e1 = extractelement <4 x i32> %x, i64 1 164 %r = add i32 %e0, %e1 165 ret i32 %r 166} 167 168; Negative test - same vector operand; scalar code is cheaper than general case 169; and vector code would be more expensive still. 170 171define i8 @ext1_ext1_add_same_vec_extra_use0(<16 x i8> %x) { 172; CHECK-LABEL: @ext1_ext1_add_same_vec_extra_use0( 173; CHECK-NEXT: [[E0:%.*]] = extractelement <16 x i8> [[X:%.*]], i32 0 174; CHECK-NEXT: call void @use_i8(i8 [[E0]]) 175; CHECK-NEXT: [[E1:%.*]] = extractelement <16 x i8> [[X]], i32 0 176; CHECK-NEXT: [[R:%.*]] = add i8 [[E0]], [[E1]] 177; CHECK-NEXT: ret i8 [[R]] 178; 179 %e0 = extractelement <16 x i8> %x, i32 0 180 call void @use_i8(i8 %e0) 181 %e1 = extractelement <16 x i8> %x, i32 0 182 %r = add i8 %e0, %e1 183 ret i8 %r 184} 185 186; Negative test - same vector operand; scalar code is cheaper than general case 187; and vector code would be more expensive still. 188 189define i8 @ext1_ext1_add_same_vec_extra_use1(<16 x i8> %x) { 190; CHECK-LABEL: @ext1_ext1_add_same_vec_extra_use1( 191; CHECK-NEXT: [[E0:%.*]] = extractelement <16 x i8> [[X:%.*]], i32 0 192; CHECK-NEXT: [[E1:%.*]] = extractelement <16 x i8> [[X]], i32 0 193; CHECK-NEXT: call void @use_i8(i8 [[E1]]) 194; CHECK-NEXT: [[R:%.*]] = add i8 [[E0]], [[E1]] 195; CHECK-NEXT: ret i8 [[R]] 196; 197 %e0 = extractelement <16 x i8> %x, i32 0 198 %e1 = extractelement <16 x i8> %x, i32 0 199 call void @use_i8(i8 %e1) 200 %r = add i8 %e0, %e1 201 ret i8 %r 202} 203 204; Negative test - same vector operand; scalar code is cheaper than general case 205; and vector code would be more expensive still. 206 207define i8 @ext1_ext1_add_same_vec_cse_extra_use(<16 x i8> %x) { 208; CHECK-LABEL: @ext1_ext1_add_same_vec_cse_extra_use( 209; CHECK-NEXT: [[E:%.*]] = extractelement <16 x i8> [[X:%.*]], i32 0 210; CHECK-NEXT: call void @use_i8(i8 [[E]]) 211; CHECK-NEXT: [[R:%.*]] = add i8 [[E]], [[E]] 212; CHECK-NEXT: ret i8 [[R]] 213; 214 %e = extractelement <16 x i8> %x, i32 0 215 call void @use_i8(i8 %e) 216 %r = add i8 %e, %e 217 ret i8 %r 218} 219 220; Vector code costs the same as scalar, so aggressively form vector op. 221 222define i8 @ext1_ext1_add_uses1(<16 x i8> %x, <16 x i8> %y) { 223; CHECK-LABEL: @ext1_ext1_add_uses1( 224; CHECK-NEXT: [[E0:%.*]] = extractelement <16 x i8> [[X:%.*]], i32 0 225; CHECK-NEXT: call void @use_i8(i8 [[E0]]) 226; CHECK-NEXT: [[TMP1:%.*]] = add <16 x i8> [[X]], [[Y:%.*]] 227; CHECK-NEXT: [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i32 0 228; CHECK-NEXT: ret i8 [[R]] 229; 230 %e0 = extractelement <16 x i8> %x, i32 0 231 call void @use_i8(i8 %e0) 232 %e1 = extractelement <16 x i8> %y, i32 0 233 %r = add i8 %e0, %e1 234 ret i8 %r 235} 236 237; Vector code costs the same as scalar, so aggressively form vector op. 238 239define i8 @ext1_ext1_add_uses2(<16 x i8> %x, <16 x i8> %y) { 240; CHECK-LABEL: @ext1_ext1_add_uses2( 241; CHECK-NEXT: [[E1:%.*]] = extractelement <16 x i8> [[Y:%.*]], i32 0 242; CHECK-NEXT: call void @use_i8(i8 [[E1]]) 243; CHECK-NEXT: [[TMP1:%.*]] = add <16 x i8> [[X:%.*]], [[Y]] 244; CHECK-NEXT: [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i32 0 245; CHECK-NEXT: ret i8 [[R]] 246; 247 %e0 = extractelement <16 x i8> %x, i32 0 248 %e1 = extractelement <16 x i8> %y, i32 0 249 call void @use_i8(i8 %e1) 250 %r = add i8 %e0, %e1 251 ret i8 %r 252} 253 254define i8 @ext0_ext1_add(<16 x i8> %x, <16 x i8> %y) { 255; CHECK-LABEL: @ext0_ext1_add( 256; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <16 x i8> [[Y:%.*]], <16 x i8> poison, <16 x i32> <i32 1, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 257; CHECK-NEXT: [[TMP1:%.*]] = add nuw <16 x i8> [[X:%.*]], [[SHIFT]] 258; CHECK-NEXT: [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i32 0 259; CHECK-NEXT: ret i8 [[R]] 260; 261 %e0 = extractelement <16 x i8> %x, i32 0 262 %e1 = extractelement <16 x i8> %y, i32 1 263 %r = add nuw i8 %e0, %e1 264 ret i8 %r 265} 266 267define i8 @ext5_ext0_add(<16 x i8> %x, <16 x i8> %y) { 268; CHECK-LABEL: @ext5_ext0_add( 269; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <16 x i8> [[X:%.*]], <16 x i8> poison, <16 x i32> <i32 5, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 270; CHECK-NEXT: [[TMP1:%.*]] = sub nsw <16 x i8> [[SHIFT]], [[Y:%.*]] 271; CHECK-NEXT: [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i64 0 272; CHECK-NEXT: ret i8 [[R]] 273; 274 %e0 = extractelement <16 x i8> %x, i32 5 275 %e1 = extractelement <16 x i8> %y, i32 0 276 %r = sub nsw i8 %e0, %e1 277 ret i8 %r 278} 279 280define i8 @ext1_ext6_add(<16 x i8> %x, <16 x i8> %y) { 281; CHECK-LABEL: @ext1_ext6_add( 282; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <16 x i8> [[Y:%.*]], <16 x i8> poison, <16 x i32> <i32 poison, i32 6, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 283; CHECK-NEXT: [[TMP1:%.*]] = and <16 x i8> [[X:%.*]], [[SHIFT]] 284; CHECK-NEXT: [[R:%.*]] = extractelement <16 x i8> [[TMP1]], i32 1 285; CHECK-NEXT: ret i8 [[R]] 286; 287 %e0 = extractelement <16 x i8> %x, i32 1 288 %e1 = extractelement <16 x i8> %y, i32 6 289 %r = and i8 %e0, %e1 290 ret i8 %r 291} 292 293define float @ext1_ext0_fmul(<4 x float> %x) { 294; CHECK-LABEL: @ext1_ext0_fmul( 295; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison> 296; CHECK-NEXT: [[TMP1:%.*]] = fmul <4 x float> [[SHIFT]], [[X]] 297; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[TMP1]], i64 0 298; CHECK-NEXT: ret float [[R]] 299; 300 %e0 = extractelement <4 x float> %x, i32 1 301 %e1 = extractelement <4 x float> %x, i32 0 302 %r = fmul float %e0, %e1 303 ret float %r 304} 305 306define float @ext0_ext3_fmul_extra_use1(<4 x float> %x) { 307; CHECK-LABEL: @ext0_ext3_fmul_extra_use1( 308; CHECK-NEXT: [[E0:%.*]] = extractelement <4 x float> [[X:%.*]], i32 0 309; CHECK-NEXT: call void @use_f32(float [[E0]]) 310; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[X]], <4 x float> poison, <4 x i32> <i32 3, i32 poison, i32 poison, i32 poison> 311; CHECK-NEXT: [[TMP1:%.*]] = fmul nnan <4 x float> [[X]], [[SHIFT]] 312; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[TMP1]], i32 0 313; CHECK-NEXT: ret float [[R]] 314; 315 %e0 = extractelement <4 x float> %x, i32 0 316 call void @use_f32(float %e0) 317 %e1 = extractelement <4 x float> %x, i32 3 318 %r = fmul nnan float %e0, %e1 319 ret float %r 320} 321 322define float @ext0_ext3_fmul_extra_use2(<4 x float> %x) { 323; CHECK-LABEL: @ext0_ext3_fmul_extra_use2( 324; CHECK-NEXT: [[E0:%.*]] = extractelement <4 x float> [[X:%.*]], i32 0 325; CHECK-NEXT: [[E1:%.*]] = extractelement <4 x float> [[X]], i32 3 326; CHECK-NEXT: call void @use_f32(float [[E1]]) 327; CHECK-NEXT: [[R:%.*]] = fmul ninf nsz float [[E0]], [[E1]] 328; CHECK-NEXT: ret float [[R]] 329; 330 %e0 = extractelement <4 x float> %x, i32 0 331 %e1 = extractelement <4 x float> %x, i32 3 332 call void @use_f32(float %e1) 333 %r = fmul ninf nsz float %e0, %e1 334 ret float %r 335} 336 337define float @ext0_ext4_fmul_v8f32(<8 x float> %x) { 338; SSE-LABEL: @ext0_ext4_fmul_v8f32( 339; SSE-NEXT: [[E0:%.*]] = extractelement <8 x float> [[X:%.*]], i32 0 340; SSE-NEXT: [[E1:%.*]] = extractelement <8 x float> [[X]], i32 4 341; SSE-NEXT: [[R:%.*]] = fadd float [[E0]], [[E1]] 342; SSE-NEXT: ret float [[R]] 343; 344; AVX-LABEL: @ext0_ext4_fmul_v8f32( 345; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[X:%.*]], <8 x float> poison, <8 x i32> <i32 4, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison> 346; AVX-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[X]], [[SHIFT]] 347; AVX-NEXT: [[R:%.*]] = extractelement <8 x float> [[TMP1]], i32 0 348; AVX-NEXT: ret float [[R]] 349; 350 %e0 = extractelement <8 x float> %x, i32 0 351 %e1 = extractelement <8 x float> %x, i32 4 352 %r = fadd float %e0, %e1 353 ret float %r 354} 355 356define float @ext7_ext4_fmul_v8f32(<8 x float> %x) { 357; SSE-LABEL: @ext7_ext4_fmul_v8f32( 358; SSE-NEXT: [[E0:%.*]] = extractelement <8 x float> [[X:%.*]], i32 7 359; SSE-NEXT: [[E1:%.*]] = extractelement <8 x float> [[X]], i32 4 360; SSE-NEXT: [[R:%.*]] = fadd float [[E0]], [[E1]] 361; SSE-NEXT: ret float [[R]] 362; 363; AVX-LABEL: @ext7_ext4_fmul_v8f32( 364; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <8 x float> [[X:%.*]], <8 x float> poison, <8 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 7, i32 poison, i32 poison, i32 poison> 365; AVX-NEXT: [[TMP1:%.*]] = fadd <8 x float> [[SHIFT]], [[X]] 366; AVX-NEXT: [[R:%.*]] = extractelement <8 x float> [[TMP1]], i64 4 367; AVX-NEXT: ret float [[R]] 368; 369 %e0 = extractelement <8 x float> %x, i32 7 370 %e1 = extractelement <8 x float> %x, i32 4 371 %r = fadd float %e0, %e1 372 ret float %r 373} 374 375define float @ext0_ext8_fmul_v16f32(<16 x float> %x) { 376; CHECK-LABEL: @ext0_ext8_fmul_v16f32( 377; CHECK-NEXT: [[E0:%.*]] = extractelement <16 x float> [[X:%.*]], i32 0 378; CHECK-NEXT: [[E1:%.*]] = extractelement <16 x float> [[X]], i32 8 379; CHECK-NEXT: [[R:%.*]] = fadd float [[E0]], [[E1]] 380; CHECK-NEXT: ret float [[R]] 381; 382 %e0 = extractelement <16 x float> %x, i32 0 383 %e1 = extractelement <16 x float> %x, i32 8 384 %r = fadd float %e0, %e1 385 ret float %r 386} 387 388define float @ext14_ext15_fmul_v16f32(<16 x float> %x) { 389; SSE-LABEL: @ext14_ext15_fmul_v16f32( 390; SSE-NEXT: [[E0:%.*]] = extractelement <16 x float> [[X:%.*]], i32 14 391; SSE-NEXT: [[E1:%.*]] = extractelement <16 x float> [[X]], i32 15 392; SSE-NEXT: [[R:%.*]] = fadd float [[E0]], [[E1]] 393; SSE-NEXT: ret float [[R]] 394; 395; AVX-LABEL: @ext14_ext15_fmul_v16f32( 396; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <16 x float> [[X:%.*]], <16 x float> poison, <16 x i32> <i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 poison, i32 15, i32 poison> 397; AVX-NEXT: [[TMP1:%.*]] = fadd <16 x float> [[X]], [[SHIFT]] 398; AVX-NEXT: [[R:%.*]] = extractelement <16 x float> [[TMP1]], i32 14 399; AVX-NEXT: ret float [[R]] 400; 401 %e0 = extractelement <16 x float> %x, i32 14 402 %e1 = extractelement <16 x float> %x, i32 15 403 %r = fadd float %e0, %e1 404 ret float %r 405} 406 407define <4 x float> @ins_bo_ext_ext(<4 x float> %a, <4 x float> %b) { 408; CHECK-LABEL: @ins_bo_ext_ext( 409; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2> 410; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[SHIFT]], [[A]] 411; CHECK-NEXT: [[V3:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 2, i32 7> 412; CHECK-NEXT: ret <4 x float> [[V3]] 413; 414 %a2 = extractelement <4 x float> %a, i32 2 415 %a3 = extractelement <4 x float> %a, i32 3 416 %a23 = fadd float %a2, %a3 417 %v3 = insertelement <4 x float> %b, float %a23, i32 3 418 ret <4 x float> %v3 419} 420 421; TODO: This is conservatively left to extract from the lower index value, 422; but it is likely that extracting from index 3 is the better option. 423 424define <4 x float> @ins_bo_ext_ext_uses(<4 x float> %a, <4 x float> %b) { 425; SSE-LABEL: @ins_bo_ext_ext_uses( 426; SSE-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison> 427; SSE-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]] 428; SSE-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2 429; SSE-NEXT: call void @use_f32(float [[A23]]) 430; SSE-NEXT: [[V3:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> [[TMP1]], <4 x i32> <i32 0, i32 1, i32 2, i32 6> 431; SSE-NEXT: ret <4 x float> [[V3]] 432; 433; AVX-LABEL: @ins_bo_ext_ext_uses( 434; AVX-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison> 435; AVX-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]] 436; AVX-NEXT: [[A23:%.*]] = extractelement <4 x float> [[TMP1]], i32 2 437; AVX-NEXT: call void @use_f32(float [[A23]]) 438; AVX-NEXT: [[V3:%.*]] = insertelement <4 x float> [[B:%.*]], float [[A23]], i32 3 439; AVX-NEXT: ret <4 x float> [[V3]] 440; 441 %a2 = extractelement <4 x float> %a, i32 2 442 %a3 = extractelement <4 x float> %a, i32 3 443 %a23 = fadd float %a2, %a3 444 call void @use_f32(float %a23) 445 %v3 = insertelement <4 x float> %b, float %a23, i32 3 446 ret <4 x float> %v3 447} 448 449define <4 x float> @PR34724(<4 x float> %a, <4 x float> %b) { 450; CHECK-LABEL: @PR34724( 451; CHECK-NEXT: [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0 452; CHECK-NEXT: [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1 453; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x float> [[A]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 3, i32 poison> 454; CHECK-NEXT: [[TMP1:%.*]] = fadd <4 x float> [[A]], [[SHIFT]] 455; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison> 456; CHECK-NEXT: [[TMP2:%.*]] = fadd <4 x float> [[B]], [[SHIFT1]] 457; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x float> [[B]], <4 x float> poison, <4 x i32> <i32 poison, i32 poison, i32 poison, i32 2> 458; CHECK-NEXT: [[TMP3:%.*]] = fadd <4 x float> [[SHIFT2]], [[B]] 459; CHECK-NEXT: [[V1:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> undef, <4 x i32> <i32 4, i32 2, i32 6, i32 7> 460; CHECK-NEXT: [[V2:%.*]] = shufflevector <4 x float> [[V1]], <4 x float> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 4, i32 3> 461; CHECK-NEXT: [[V3:%.*]] = shufflevector <4 x float> [[V2]], <4 x float> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 2, i32 7> 462; CHECK-NEXT: ret <4 x float> [[V3]] 463; 464 %a0 = extractelement <4 x float> %a, i32 0 465 %a1 = extractelement <4 x float> %a, i32 1 466 %a2 = extractelement <4 x float> %a, i32 2 467 %a3 = extractelement <4 x float> %a, i32 3 468 469 %b0 = extractelement <4 x float> %b, i32 0 470 %b1 = extractelement <4 x float> %b, i32 1 471 %b2 = extractelement <4 x float> %b, i32 2 472 %b3 = extractelement <4 x float> %b, i32 3 473 474 %a23 = fadd float %a2, %a3 475 %b01 = fadd float %b0, %b1 476 %b23 = fadd float %b2, %b3 477 478 %v1 = insertelement <4 x float> undef, float %a23, i32 1 479 %v2 = insertelement <4 x float> %v1, float %b01, i32 2 480 %v3 = insertelement <4 x float> %v2, float %b23, i32 3 481 ret <4 x float> %v3 482} 483 484define i32 @ext_ext_or_reduction_v4i32(<4 x i32> %x, <4 x i32> %y) { 485; CHECK-LABEL: @ext_ext_or_reduction_v4i32( 486; CHECK-NEXT: [[Z:%.*]] = and <4 x i32> [[X:%.*]], [[Y:%.*]] 487; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison> 488; CHECK-NEXT: [[TMP1:%.*]] = or <4 x i32> [[Z]], [[SHIFT]] 489; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> poison, <4 x i32> <i32 2, i32 poison, i32 poison, i32 poison> 490; CHECK-NEXT: [[TMP2:%.*]] = or <4 x i32> [[TMP1]], [[SHIFT1]] 491; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x i32> [[Z]], <4 x i32> poison, <4 x i32> <i32 3, i32 poison, i32 poison, i32 poison> 492; CHECK-NEXT: [[TMP3:%.*]] = or <4 x i32> [[SHIFT2]], [[TMP2]] 493; CHECK-NEXT: [[Z0123:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 494; CHECK-NEXT: ret i32 [[Z0123]] 495; 496 %z = and <4 x i32> %x, %y 497 %z0 = extractelement <4 x i32> %z, i32 0 498 %z1 = extractelement <4 x i32> %z, i32 1 499 %z01 = or i32 %z0, %z1 500 %z2 = extractelement <4 x i32> %z, i32 2 501 %z012 = or i32 %z01, %z2 502 %z3 = extractelement <4 x i32> %z, i32 3 503 %z0123 = or i32 %z3, %z012 504 ret i32 %z0123 505} 506 507define i32 @ext_ext_partial_add_reduction_v4i32(<4 x i32> %x) { 508; CHECK-LABEL: @ext_ext_partial_add_reduction_v4i32( 509; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison> 510; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[SHIFT]], [[X]] 511; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <4 x i32> <i32 2, i32 poison, i32 poison, i32 poison> 512; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[SHIFT1]], [[TMP1]] 513; CHECK-NEXT: [[X210:%.*]] = extractelement <4 x i32> [[TMP2]], i64 0 514; CHECK-NEXT: ret i32 [[X210]] 515; 516 %x0 = extractelement <4 x i32> %x, i32 0 517 %x1 = extractelement <4 x i32> %x, i32 1 518 %x10 = add i32 %x1, %x0 519 %x2 = extractelement <4 x i32> %x, i32 2 520 %x210 = add i32 %x2, %x10 521 ret i32 %x210 522} 523 524define i32 @ext_ext_partial_add_reduction_and_extra_add_v4i32(<4 x i32> %x, <4 x i32> %y) { 525; CHECK-LABEL: @ext_ext_partial_add_reduction_and_extra_add_v4i32( 526; CHECK-NEXT: [[SHIFT:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> poison, <4 x i32> <i32 1, i32 poison, i32 poison, i32 poison> 527; CHECK-NEXT: [[TMP1:%.*]] = add <4 x i32> [[SHIFT]], [[Y]] 528; CHECK-NEXT: [[SHIFT1:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> poison, <4 x i32> <i32 2, i32 poison, i32 poison, i32 poison> 529; CHECK-NEXT: [[TMP2:%.*]] = add <4 x i32> [[SHIFT1]], [[TMP1]] 530; CHECK-NEXT: [[SHIFT2:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <4 x i32> <i32 2, i32 poison, i32 poison, i32 poison> 531; CHECK-NEXT: [[TMP3:%.*]] = add <4 x i32> [[SHIFT2]], [[TMP2]] 532; CHECK-NEXT: [[X2Y210:%.*]] = extractelement <4 x i32> [[TMP3]], i64 0 533; CHECK-NEXT: ret i32 [[X2Y210]] 534; 535 %y0 = extractelement <4 x i32> %y, i32 0 536 %y1 = extractelement <4 x i32> %y, i32 1 537 %y10 = add i32 %y1, %y0 538 %y2 = extractelement <4 x i32> %y, i32 2 539 %y210 = add i32 %y2, %y10 540 %x2 = extractelement <4 x i32> %x, i32 2 541 %x2y210 = add i32 %x2, %y210 542 ret i32 %x2y210 543} 544 545define i32 @constant_fold_crash(<4 x i32> %x) { 546; CHECK-LABEL: @constant_fold_crash( 547; CHECK-NEXT: [[A:%.*]] = extractelement <4 x i32> <i32 16, i32 17, i32 18, i32 19>, i32 1 548; CHECK-NEXT: [[B:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0 549; CHECK-NEXT: [[C:%.*]] = add i32 [[A]], [[B]] 550; CHECK-NEXT: ret i32 [[C]] 551; 552 %a = extractelement <4 x i32> <i32 16, i32 17, i32 18, i32 19>, i32 1 553 %b = extractelement <4 x i32> %x, i32 0 554 %c = add i32 %a, %b 555 ret i32 %c 556} 557 558define float @constant_fold_crash_commute(<4 x float> %x) { 559; CHECK-LABEL: @constant_fold_crash_commute( 560; CHECK-NEXT: [[A:%.*]] = extractelement <4 x float> <float 1.600000e+01, float 1.700000e+01, float 1.800000e+01, float 1.900000e+01>, i32 3 561; CHECK-NEXT: [[B:%.*]] = extractelement <4 x float> [[X:%.*]], i32 1 562; CHECK-NEXT: [[C:%.*]] = fadd float [[B]], [[A]] 563; CHECK-NEXT: ret float [[C]] 564; 565 %a = extractelement <4 x float> <float 16.0, float 17.0, float 18.0, float 19.0>, i32 3 566 %b = extractelement <4 x float> %x, i32 1 567 %c = fadd float %b, %a 568 ret float %c 569} 570