xref: /llvm-project/llvm/test/CodeGen/X86/avx-splat.ll (revision 69a322fed19b977d15be9500d8653496b73673e9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_mem_shuffle
2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64
4
5define <32 x i8> @funcA(<32 x i8> %a) nounwind uwtable readnone ssp {
6; CHECK-LABEL: funcA:
7; CHECK:       # %bb.0: # %entry
8; CHECK-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5]
9; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
10; CHECK-NEXT:    ret{{[l|q]}}
11entry:
12  %shuffle = shufflevector <32 x i8> %a, <32 x i8> undef, <32 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
13  ret <32 x i8> %shuffle
14}
15
16define <16 x i16> @funcB(<16 x i16> %a) nounwind uwtable readnone ssp {
17; CHECK-LABEL: funcB:
18; CHECK:       # %bb.0: # %entry
19; CHECK-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5]
20; CHECK-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
21; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
22; CHECK-NEXT:    ret{{[l|q]}}
23entry:
24  %shuffle = shufflevector <16 x i16> %a, <16 x i16> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
25  ret <16 x i16> %shuffle
26}
27
28define <4 x i64> @funcC(i64 %q) nounwind uwtable readnone ssp {
29; X86-LABEL: funcC:
30; X86:       # %bb.0: # %entry
31; X86-NEXT:    vbroadcastsd {{[0-9]+}}(%esp), %ymm0
32; X86-NEXT:    retl
33;
34; X64-LABEL: funcC:
35; X64:       # %bb.0: # %entry
36; X64-NEXT:    vmovq %rdi, %xmm0
37; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
38; X64-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
39; X64-NEXT:    retq
40entry:
41  %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
42  %vecinit2.i = insertelement <4 x i64> %vecinit.i, i64 %q, i32 1
43  %vecinit4.i = insertelement <4 x i64> %vecinit2.i, i64 %q, i32 2
44  %vecinit6.i = insertelement <4 x i64> %vecinit4.i, i64 %q, i32 3
45  ret <4 x i64> %vecinit6.i
46}
47
48define <4 x double> @funcD(double %q) nounwind uwtable readnone ssp {
49; X86-LABEL: funcD:
50; X86:       # %bb.0: # %entry
51; X86-NEXT:    vbroadcastsd {{[0-9]+}}(%esp), %ymm0
52; X86-NEXT:    retl
53;
54; X64-LABEL: funcD:
55; X64:       # %bb.0: # %entry
56; X64-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
57; X64-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
58; X64-NEXT:    retq
59entry:
60  %vecinit.i = insertelement <4 x double> undef, double %q, i32 0
61  %vecinit2.i = insertelement <4 x double> %vecinit.i, double %q, i32 1
62  %vecinit4.i = insertelement <4 x double> %vecinit2.i, double %q, i32 2
63  %vecinit6.i = insertelement <4 x double> %vecinit4.i, double %q, i32 3
64  ret <4 x double> %vecinit6.i
65}
66
67; Test this turns into a broadcast:
68;   shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
69;
70define <8 x float> @funcE() nounwind {
71; X86-LABEL: funcE:
72; X86:       # %bb.0: # %allocas
73; X86-NEXT:    xorl %eax, %eax
74; X86-NEXT:    testb %al, %al
75; X86-NEXT:    # implicit-def: $ymm0
76; X86-NEXT:    jne .LBB4_2
77; X86-NEXT:  # %bb.1: # %load.i1247
78; X86-NEXT:    pushl %ebp
79; X86-NEXT:    movl %esp, %ebp
80; X86-NEXT:    andl $-32, %esp
81; X86-NEXT:    subl $1312, %esp # imm = 0x520
82; X86-NEXT:    vbroadcastss {{[0-9]+}}(%esp), %ymm0
83; X86-NEXT:    movl %ebp, %esp
84; X86-NEXT:    popl %ebp
85; X86-NEXT:  .LBB4_2: # %__load_and_broadcast_32.exit1249
86; X86-NEXT:    retl
87;
88; X64-LABEL: funcE:
89; X64:       # %bb.0: # %allocas
90; X64-NEXT:    xorl %eax, %eax
91; X64-NEXT:    testb %al, %al
92; X64-NEXT:    # implicit-def: $ymm0
93; X64-NEXT:    jne .LBB4_2
94; X64-NEXT:  # %bb.1: # %load.i1247
95; X64-NEXT:    pushq %rbp
96; X64-NEXT:    movq %rsp, %rbp
97; X64-NEXT:    andq $-32, %rsp
98; X64-NEXT:    subq $1312, %rsp # imm = 0x520
99; X64-NEXT:    vbroadcastss {{[0-9]+}}(%rsp), %ymm0
100; X64-NEXT:    movq %rbp, %rsp
101; X64-NEXT:    popq %rbp
102; X64-NEXT:  .LBB4_2: # %__load_and_broadcast_32.exit1249
103; X64-NEXT:    retq
104allocas:
105  %udx495 = alloca [18 x [18 x float]], align 32
106  br label %for_test505.preheader
107
108for_test505.preheader:                            ; preds = %for_test505.preheader, %allocas
109  br i1 undef, label %for_exit499, label %for_test505.preheader
110
111for_exit499:                                      ; preds = %for_test505.preheader
112  br i1 undef, label %__load_and_broadcast_32.exit1249, label %load.i1247
113
114load.i1247:                                       ; preds = %for_exit499
115  %ptr1227 = getelementptr [18 x [18 x float]], ptr %udx495, i64 0, i64 1, i64 1
116  %val.i1238 = load i32, ptr %ptr1227, align 4
117  %ret6.i1245 = insertelement <8 x i32> undef, i32 %val.i1238, i32 6
118  %ret7.i1246 = insertelement <8 x i32> %ret6.i1245, i32 %val.i1238, i32 7
119  %phitmp = bitcast <8 x i32> %ret7.i1246 to <8 x float>
120  br label %__load_and_broadcast_32.exit1249
121
122__load_and_broadcast_32.exit1249:                 ; preds = %load.i1247, %for_exit499
123  %load_broadcast12281250 = phi <8 x float> [ %phitmp, %load.i1247 ], [ undef, %for_exit499 ]
124  ret <8 x float> %load_broadcast12281250
125}
126
127define <8 x float> @funcF(i32 %val) nounwind {
128; X86-LABEL: funcF:
129; X86:       # %bb.0:
130; X86-NEXT:    vbroadcastss {{[0-9]+}}(%esp), %ymm0
131; X86-NEXT:    retl
132;
133; X64-LABEL: funcF:
134; X64:       # %bb.0:
135; X64-NEXT:    vmovd %edi, %xmm0
136; X64-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
137; X64-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
138; X64-NEXT:    retq
139  %ret6 = insertelement <8 x i32> undef, i32 %val, i32 6
140  %ret7 = insertelement <8 x i32> %ret6, i32 %val, i32 7
141  %tmp = bitcast <8 x i32> %ret7 to <8 x float>
142  ret <8 x float> %tmp
143}
144
145define <8 x float> @funcG(<8 x float> %a) nounwind uwtable readnone ssp {
146; CHECK-LABEL: funcG:
147; CHECK:       # %bb.0: # %entry
148; CHECK-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
149; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
150; CHECK-NEXT:    ret{{[l|q]}}
151entry:
152  %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
153  ret <8 x float> %shuffle
154}
155
156define <8 x float> @funcH(<8 x float> %a) nounwind uwtable readnone ssp {
157; CHECK-LABEL: funcH:
158; CHECK:       # %bb.0: # %entry
159; CHECK-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
160; CHECK-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[1,1,1,1,5,5,5,5]
161; CHECK-NEXT:    ret{{[l|q]}}
162entry:
163  %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
164  ret <8 x float> %shuffle
165}
166
167define <2 x double> @splat_load_2f64_11(ptr %ptr) {
168; X86-LABEL: splat_load_2f64_11:
169; X86:       # %bb.0:
170; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
171; X86-NEXT:    vmovddup 8(%eax), %xmm0 # xmm0 = mem[0,0]
172; X86-NEXT:    retl
173;
174; X64-LABEL: splat_load_2f64_11:
175; X64:       # %bb.0:
176; X64-NEXT:    vmovddup 8(%rdi), %xmm0 # xmm0 = mem[0,0]
177; X64-NEXT:    retq
178  %x = load <2 x double>, ptr %ptr
179  %x1 = shufflevector <2 x double> %x, <2 x double> undef, <2 x i32> <i32 1, i32 1>
180  ret <2 x double> %x1
181}
182
183define <4 x double> @splat_load_4f64_2222(ptr %ptr) {
184; X86-LABEL: splat_load_4f64_2222:
185; X86:       # %bb.0:
186; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
187; X86-NEXT:    vbroadcastsd 16(%eax), %ymm0
188; X86-NEXT:    retl
189;
190; X64-LABEL: splat_load_4f64_2222:
191; X64:       # %bb.0:
192; X64-NEXT:    vbroadcastsd 16(%rdi), %ymm0
193; X64-NEXT:    retq
194  %x = load <4 x double>, ptr %ptr
195  %x1 = shufflevector <4 x double> %x, <4 x double> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2>
196  ret <4 x double> %x1
197}
198
199define <4 x float> @splat_load_4f32_0000(ptr %ptr) {
200; X86-LABEL: splat_load_4f32_0000:
201; X86:       # %bb.0:
202; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
203; X86-NEXT:    vbroadcastss (%eax), %xmm0
204; X86-NEXT:    retl
205;
206; X64-LABEL: splat_load_4f32_0000:
207; X64:       # %bb.0:
208; X64-NEXT:    vbroadcastss (%rdi), %xmm0
209; X64-NEXT:    retq
210  %x = load <4 x float>, ptr %ptr
211  %x1 = shufflevector <4 x float> %x, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
212  ret <4 x float> %x1
213}
214
215define <8 x float> @splat_load_8f32_77777777(ptr %ptr) {
216; X86-LABEL: splat_load_8f32_77777777:
217; X86:       # %bb.0:
218; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
219; X86-NEXT:    vbroadcastss 28(%eax), %ymm0
220; X86-NEXT:    retl
221;
222; X64-LABEL: splat_load_8f32_77777777:
223; X64:       # %bb.0:
224; X64-NEXT:    vbroadcastss 28(%rdi), %ymm0
225; X64-NEXT:    retq
226  %x = load <8 x float>, ptr %ptr
227  %x1 = shufflevector <8 x float> %x, <8 x float> undef, <8 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>
228  ret <8 x float> %x1
229}
230