1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_mem_shuffle 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64 4 5define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp { 6; CHECK-LABEL: funcA: 7; CHECK: # %bb.0: # %entry 8; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5] 9; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 10; CHECK-NEXT: ret{{[l|q]}} 11entry: 12 %shuffle = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 13 ret <32 x i8> %shuffle 14} 15 16define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp { 17; CHECK-LABEL: funcB: 18; CHECK: # %bb.0: # %entry 19; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5] 20; CHECK-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,2,2] 21; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 22; CHECK-NEXT: ret{{[l|q]}} 23entry: 24 %shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 25 ret <16 x i16> %shuffle 26} 27 28define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp { 29; X86-LABEL: funcC: 30; X86: # %bb.0: # %entry 31; X86-NEXT: vbroadcastsd {{[0-9]+}}(%esp), %ymm0 32; X86-NEXT: retl 33; 34; X64-LABEL: funcC: 35; X64: # %bb.0: # %entry 36; X64-NEXT: vmovq %rdi, %xmm0 37; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 38; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 39; X64-NEXT: retq 40entry: 41 %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0 42 %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1 43 %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2 44 %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3 45 ret <4 x i64> %vecinit6.i 46} 47 48define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp { 49; X86-LABEL: funcD: 50; X86: # %bb.0: # %entry 51; X86-NEXT: vbroadcastsd {{[0-9]+}}(%esp), %ymm0 52; X86-NEXT: retl 53; 54; X64-LABEL: funcD: 55; X64: # %bb.0: # %entry 56; X64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 57; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 58; X64-NEXT: retq 59entry: 60 %vecinit.i = insertelement <4 x double> undef, double %q, i32 0 61 %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1 62 %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2 63 %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3 64 ret <4 x double> %vecinit6.i 65} 66 67; Test this turns into a broadcast: 68; shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0> 69; 70define <8 x float> @funcE() nounwind { 71; X86-LABEL: funcE: 72; X86: # %bb.0: # %allocas 73; X86-NEXT: xorl %eax, %eax 74; X86-NEXT: testb %al, %al 75; X86-NEXT: # implicit-def: $ymm0 76; X86-NEXT: jne .LBB4_2 77; X86-NEXT: # %bb.1: # %load.i1247 78; X86-NEXT: pushl %ebp 79; X86-NEXT: movl %esp, %ebp 80; X86-NEXT: andl $-32, %esp 81; X86-NEXT: subl $1312, %esp # imm = 0x520 82; X86-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0 83; X86-NEXT: movl %ebp, %esp 84; X86-NEXT: popl %ebp 85; X86-NEXT: .LBB4_2: # %__load_and_broadcast_32.exit1249 86; X86-NEXT: retl 87; 88; X64-LABEL: funcE: 89; X64: # %bb.0: # %allocas 90; X64-NEXT: xorl %eax, %eax 91; X64-NEXT: testb %al, %al 92; X64-NEXT: # implicit-def: $ymm0 93; X64-NEXT: jne .LBB4_2 94; X64-NEXT: # %bb.1: # %load.i1247 95; X64-NEXT: pushq %rbp 96; X64-NEXT: movq %rsp, %rbp 97; X64-NEXT: andq $-32, %rsp 98; X64-NEXT: subq $1312, %rsp # imm = 0x520 99; X64-NEXT: vbroadcastss {{[0-9]+}}(%rsp), %ymm0 100; X64-NEXT: movq %rbp, %rsp 101; X64-NEXT: popq %rbp 102; X64-NEXT: .LBB4_2: # %__load_and_broadcast_32.exit1249 103; X64-NEXT: retq 104allocas: 105 %udx495 = alloca [18 x [18 x float]], align 32 106 br label %for_test505.preheader 107 108for_test505.preheader: ; preds = %for_test505.preheader, %allocas 109 br i1 undef, label %for_exit499, label %for_test505.preheader 110 111for_exit499: ; preds = %for_test505.preheader 112 br i1 undef, label %__load_and_broadcast_32.exit1249, label %load.i1247 113 114load.i1247: ; preds = %for_exit499 115 %ptr1227 = getelementptr [18 x [18 x float]], ptr %udx495, i64 0, i64 1, i64 1 116 %val.i1238 = load i32, ptr %ptr1227, align 4 117 %ret6.i1245 = insertelement <8 x i32> undef, i32 %val.i1238, i32 6 118 %ret7.i1246 = insertelement <8 x i32> %ret6.i1245, i32 %val.i1238, i32 7 119 %phitmp = bitcast <8 x i32> %ret7.i1246 to <8 x float> 120 br label %__load_and_broadcast_32.exit1249 121 122__load_and_broadcast_32.exit1249: ; preds = %load.i1247, %for_exit499 123 %load_broadcast12281250 = phi <8 x float> [ %phitmp, %load.i1247 ], [ undef, %for_exit499 ] 124 ret <8 x float> %load_broadcast12281250 125} 126 127define <8 x float> @funcF(i32 %val) nounwind { 128; X86-LABEL: funcF: 129; X86: # %bb.0: 130; X86-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0 131; X86-NEXT: retl 132; 133; X64-LABEL: funcF: 134; X64: # %bb.0: 135; X64-NEXT: vmovd %edi, %xmm0 136; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 137; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 138; X64-NEXT: retq 139 %ret6 = insertelement <8 x i32> undef, i32 %val, i32 6 140 %ret7 = insertelement <8 x i32> %ret6, i32 %val, i32 7 141 %tmp = bitcast <8 x i32> %ret7 to <8 x float> 142 ret <8 x float> %tmp 143} 144 145define <8 x float> @funcG(<8 x float> %a) nounwind uwtable readnone ssp { 146; CHECK-LABEL: funcG: 147; CHECK: # %bb.0: # %entry 148; CHECK-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,0,0] 149; CHECK-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 150; CHECK-NEXT: ret{{[l|q]}} 151entry: 152 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 153 ret <8 x float> %shuffle 154} 155 156define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp { 157; CHECK-LABEL: funcH: 158; CHECK: # %bb.0: # %entry 159; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3] 160; CHECK-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,1,1,1,5,5,5,5] 161; CHECK-NEXT: ret{{[l|q]}} 162entry: 163 %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 164 ret <8 x float> %shuffle 165} 166 167define <2 x double> @splat_load_2f64_11(ptr %ptr) { 168; X86-LABEL: splat_load_2f64_11: 169; X86: # %bb.0: 170; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 171; X86-NEXT: vmovddup 8(%eax), %xmm0 # xmm0 = mem[0,0] 172; X86-NEXT: retl 173; 174; X64-LABEL: splat_load_2f64_11: 175; X64: # %bb.0: 176; X64-NEXT: vmovddup 8(%rdi), %xmm0 # xmm0 = mem[0,0] 177; X64-NEXT: retq 178 %x = load <2 x double>, ptr %ptr 179 %x1 = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 1, i32 1> 180 ret <2 x double> %x1 181} 182 183define <4 x double> @splat_load_4f64_2222(ptr %ptr) { 184; X86-LABEL: splat_load_4f64_2222: 185; X86: # %bb.0: 186; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 187; X86-NEXT: vbroadcastsd 16(%eax), %ymm0 188; X86-NEXT: retl 189; 190; X64-LABEL: splat_load_4f64_2222: 191; X64: # %bb.0: 192; X64-NEXT: vbroadcastsd 16(%rdi), %ymm0 193; X64-NEXT: retq 194 %x = load <4 x double>, ptr %ptr 195 %x1 = shufflevector <4 x double> %x, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2> 196 ret <4 x double> %x1 197} 198 199define <4 x float> @splat_load_4f32_0000(ptr %ptr) { 200; X86-LABEL: splat_load_4f32_0000: 201; X86: # %bb.0: 202; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 203; X86-NEXT: vbroadcastss (%eax), %xmm0 204; X86-NEXT: retl 205; 206; X64-LABEL: splat_load_4f32_0000: 207; X64: # %bb.0: 208; X64-NEXT: vbroadcastss (%rdi), %xmm0 209; X64-NEXT: retq 210 %x = load <4 x float>, ptr %ptr 211 %x1 = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0> 212 ret <4 x float> %x1 213} 214 215define <8 x float> @splat_load_8f32_77777777(ptr %ptr) { 216; X86-LABEL: splat_load_8f32_77777777: 217; X86: # %bb.0: 218; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 219; X86-NEXT: vbroadcastss 28(%eax), %ymm0 220; X86-NEXT: retl 221; 222; X64-LABEL: splat_load_8f32_77777777: 223; X64: # %bb.0: 224; X64-NEXT: vbroadcastss 28(%rdi), %ymm0 225; X64-NEXT: retq 226 %x = load <8 x float>, ptr %ptr 227 %x1 = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7> 228 ret <8 x float> %x1 229} 230