1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX1 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX2 4 5define void @PR32957(ptr %in, ptr %out) { 6; CHECK-LABEL: PR32957: 7; CHECK: # %bb.0: 8; CHECK-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 9; CHECK-NEXT: vmovaps %ymm0, (%rsi) 10; CHECK-NEXT: vzeroupper 11; CHECK-NEXT: retq 12 %ld = load <2 x float>, ptr %in, align 8 13 %ext = extractelement <2 x float> %ld, i64 0 14 %ext2 = extractelement <2 x float> %ld, i64 1 15 %ins = insertelement <8 x float> <float undef, float undef, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0, float 0.0>, float %ext, i64 0 16 %ins2 = insertelement <8 x float> %ins, float %ext2, i64 1 17 store <8 x float> %ins2, ptr %out, align 32 18 ret void 19} 20 21declare { i8, double } @fun() 22 23; Check that this does not fail to combine concat_vectors of a value from 24; merge_values through a bitcast. 25define void @d(i1 %cmp) { 26; CHECK-LABEL: d: 27; CHECK: # %bb.0: # %bar 28; CHECK-NEXT: pushq %rax 29; CHECK-NEXT: .cfi_def_cfa_offset 16 30; CHECK-NEXT: callq fun@PLT 31bar: 32 %val = call { i8, double } @fun() 33 %extr = extractvalue { i8, double } %val, 1 34 %bc = bitcast double %extr to <2 x float> 35 br label %baz 36 37baz: 38 %extr1 = extractelement <2 x float> %bc, i64 0 39 unreachable 40} 41 42@qa_ = external unnamed_addr global [49216 x i8], align 32 43 44define void @concat_of_broadcast_v2f64_v4f64() { 45; AVX1-LABEL: concat_of_broadcast_v2f64_v4f64: 46; AVX1: # %bb.0: # %alloca_0 47; AVX1-NEXT: movq qa_@GOTPCREL(%rip), %rax 48; AVX1-NEXT: movl $1091567616, 30256(%rax) # imm = 0x41100000 49; AVX1-NEXT: movabsq $4294967297, %rcx # imm = 0x100000001 50; AVX1-NEXT: movq %rcx, 46348(%rax) 51; AVX1-NEXT: vbroadcastss {{.*#+}} ymm0 = [1065353216,1065353216,1065353216,1065353216,1065353216,1065353216,1065353216,1065353216] 52; AVX1-NEXT: vmovups %ymm0, 48296(%rax) 53; AVX1-NEXT: vmovsd {{.*#+}} xmm0 = [7.812501848093234E-3,0.0E+0] 54; AVX1-NEXT: vmovsd %xmm0, 47372(%rax) 55; AVX1-NEXT: vzeroupper 56; AVX1-NEXT: retq 57; 58; AVX2-LABEL: concat_of_broadcast_v2f64_v4f64: 59; AVX2: # %bb.0: # %alloca_0 60; AVX2-NEXT: movq qa_@GOTPCREL(%rip), %rax 61; AVX2-NEXT: movl $1091567616, 30256(%rax) # imm = 0x41100000 62; AVX2-NEXT: movabsq $4294967297, %rcx # imm = 0x100000001 63; AVX2-NEXT: movq %rcx, 46348(%rax) 64; AVX2-NEXT: vbroadcastss {{.*#+}} ymm0 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] 65; AVX2-NEXT: vmovups %ymm0, 48296(%rax) 66; AVX2-NEXT: vmovlps %xmm0, 47372(%rax) 67; AVX2-NEXT: vzeroupper 68; AVX2-NEXT: retq 69alloca_0: 70 store float 9.000000e+00, ptr getelementptr inbounds ([49216 x i8], ptr @qa_, i64 0, i64 30256), align 16 71 store <2 x i32> <i32 1, i32 1>, ptr getelementptr inbounds ([49216 x i8], ptr @qa_, i64 0, i64 46348), align 4 72 br label %loop.4942 73 74loop.4942: ; preds = %loop.4942, %alloca_0 75 br i1 poison, label %loop.4942, label %ifmerge.1298 76 77ifmerge.1298: ; preds = %loop.4942 78 %gepload4638 = load float, ptr getelementptr inbounds ([49216 x i8], ptr @qa_, i64 0, i64 28324), align 4 79 store <2 x float> <float 1.000000e+00, float 1.000000e+00>, ptr getelementptr inbounds ([49216 x i8], ptr @qa_, i64 0, i64 48296), align 8 80 store <2 x float> <float 1.000000e+00, float 1.000000e+00>, ptr getelementptr inbounds ([49216 x i8], ptr @qa_, i64 0, i64 48304), align 16 81 store <2 x float> <float 1.000000e+00, float 1.000000e+00>, ptr getelementptr inbounds ([49216 x i8], ptr @qa_, i64 0, i64 48312), align 8 82 store <2 x float> <float 1.000000e+00, float 1.000000e+00>, ptr getelementptr inbounds ([49216 x i8], ptr @qa_, i64 0, i64 48320), align 32 83 store <2 x float> <float 1.000000e+00, float 1.000000e+00>, ptr getelementptr inbounds ([49216 x i8], ptr @qa_, i64 0, i64 47372), align 4 84 ret void 85} 86 87define <4 x float> @concat_of_broadcast_v4f32_v8f32(ptr %a0, ptr %a1, ptr %a2) { 88; AVX1-LABEL: concat_of_broadcast_v4f32_v8f32: 89; AVX1: # %bb.0: 90; AVX1-NEXT: vmovaps (%rdi), %ymm0 91; AVX1-NEXT: vmovaps (%rsi), %ymm1 92; AVX1-NEXT: vmovaps (%rdx), %ymm2 93; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,0,0,0] 94; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 95; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7] 96; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm2[0,1,2,3],ymm0[4],ymm2[5,6],ymm0[7] 97; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 98; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm2[3,0],xmm0[0,0] 99; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,3],xmm1[2,0] 100; AVX1-NEXT: vzeroupper 101; AVX1-NEXT: retq 102; 103; AVX2-LABEL: concat_of_broadcast_v4f32_v8f32: 104; AVX2: # %bb.0: 105; AVX2-NEXT: vmovaps (%rdi), %ymm0 106; AVX2-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5] 107; AVX2-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[0,1,2,0] 108; AVX2-NEXT: vmovaps {{.*#+}} xmm1 = [6,7,4,3] 109; AVX2-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3],ymm0[4],mem[5,6],ymm0[7] 110; AVX2-NEXT: vpermps %ymm0, %ymm1, %ymm0 111; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 112; AVX2-NEXT: vzeroupper 113; AVX2-NEXT: retq 114 %ld0 = load volatile <8 x float>, ptr %a0 115 %ld1 = load volatile <8 x float>, ptr %a1 116 %ld2 = load volatile <8 x float>, ptr %a2 117 %shuffle = shufflevector <8 x float> %ld0, <8 x float> %ld1, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 undef, i32 undef, i32 8> 118 %shuffle1 = shufflevector <8 x float> %ld2, <8 x float> %shuffle, <4 x i32> <i32 6, i32 15, i32 12, i32 3> 119 ret <4 x float> %shuffle1 120} 121 122define <4 x i64> @broadcast_of_shuffle_v2i64_v4i64(<16 x i8> %vecinit.i) { 123; AVX1-LABEL: broadcast_of_shuffle_v2i64_v4i64: 124; AVX1: # %bb.0: # %entry 125; AVX1-NEXT: vpsllq $56, %xmm0, %xmm0 126; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 127; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 128; AVX1-NEXT: retq 129; 130; AVX2-LABEL: broadcast_of_shuffle_v2i64_v4i64: 131; AVX2: # %bb.0: # %entry 132; AVX2-NEXT: vpsllq $56, %xmm0, %xmm0 133; AVX2-NEXT: vpbroadcastq %xmm0, %ymm0 134; AVX2-NEXT: retq 135entry: 136 %vecinit15.i = shufflevector <16 x i8> %vecinit.i, <16 x i8> poison, <16 x i32> zeroinitializer 137 %0 = bitcast <16 x i8> %vecinit15.i to <2 x i64> 138 %1 = extractelement <2 x i64> %0, i64 0 139 %2 = and i64 %1, -72057594037927936 ; 0xFF00 0000 0000 0000 140 %3 = insertelement <4 x i64> poison, i64 %2, i64 0 141 %4 = shufflevector <4 x i64> %3, <4 x i64> poison, <4 x i32> zeroinitializer 142 ret <4 x i64> %4 143} 144