xref: /llvm-project/llvm/test/CodeGen/X86/load-partial.ll (revision 31b7d4333a6c10aa8b7e1a7ca5aa0e281f124ec2)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSSE3
4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41
5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX
6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX
7
8;
9; Partial Vector Loads - PR16739
10;
11
12define <4 x float> @load_float4_float3(ptr nocapture readonly dereferenceable(16)) nofree nosync {
13; SSE-LABEL: load_float4_float3:
14; SSE:       # %bb.0:
15; SSE-NEXT:    movups (%rdi), %xmm0
16; SSE-NEXT:    retq
17;
18; AVX-LABEL: load_float4_float3:
19; AVX:       # %bb.0:
20; AVX-NEXT:    vmovups (%rdi), %xmm0
21; AVX-NEXT:    retq
22  %p1 = getelementptr inbounds <4 x float>, ptr %0, i64 0, i64 1
23  %p2 = getelementptr inbounds <4 x float>, ptr %0, i64 0, i64 2
24  %ld0 = load float, ptr %0, align 4
25  %ld1 = load float, ptr %p1, align 4
26  %ld2 = load float, ptr %p2, align 4
27  %r0 = insertelement <4 x float> undef, float %ld0, i32 0
28  %r1 = insertelement <4 x float> %r0,   float %ld1, i32 1
29  %r2 = insertelement <4 x float> %r1,   float %ld2, i32 2
30  ret <4 x float> %r2
31}
32
33define <4 x float> @load_float4_float3_0122(ptr nocapture readonly dereferenceable(16)) nofree nosync {
34; SSE-LABEL: load_float4_float3_0122:
35; SSE:       # %bb.0:
36; SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
37; SSE-NEXT:    movups (%rdi), %xmm0
38; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
39; SSE-NEXT:    retq
40;
41; AVX-LABEL: load_float4_float3_0122:
42; AVX:       # %bb.0:
43; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
44; AVX-NEXT:    vmovups (%rdi), %xmm1
45; AVX-NEXT:    vshufps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0,0]
46; AVX-NEXT:    retq
47  %p1 = getelementptr inbounds <4 x float>, ptr %0, i64 0, i64 1
48  %p2 = getelementptr inbounds <4 x float>, ptr %0, i64 0, i64 2
49  %ld0 = load float, ptr %0, align 4
50  %ld1 = load float, ptr %p1, align 4
51  %ld2 = load float, ptr %p2, align 4
52  %r0 = insertelement <4 x float> undef, float %ld0, i32 0
53  %r1 = insertelement <4 x float> %r0,   float %ld1, i32 1
54  %r2 = insertelement <4 x float> %r1,   float %ld2, i32 2
55  %r3 = insertelement <4 x float> %r2,   float %ld2, i32 3
56  ret <4 x float> %r3
57}
58
59define <8 x float> @load_float8_float3(ptr nocapture readonly dereferenceable(16)) nofree nosync {
60; SSE-LABEL: load_float8_float3:
61; SSE:       # %bb.0:
62; SSE-NEXT:    movups (%rdi), %xmm0
63; SSE-NEXT:    retq
64;
65; AVX-LABEL: load_float8_float3:
66; AVX:       # %bb.0:
67; AVX-NEXT:    vmovups (%rdi), %xmm0
68; AVX-NEXT:    retq
69  %p1 = getelementptr inbounds <4 x float>, ptr %0, i64 0, i64 1
70  %p2 = getelementptr inbounds <4 x float>, ptr %0, i64 0, i64 2
71  %ld0 = load float, ptr %0, align 4
72  %ld1 = load float, ptr %p1, align 4
73  %ld2 = load float, ptr %p2, align 4
74  %r0 = insertelement <8 x float> undef, float %ld0, i32 0
75  %r1 = insertelement <8 x float> %r0,   float %ld1, i32 1
76  %r2 = insertelement <8 x float> %r1,   float %ld2, i32 2
77  ret <8 x float> %r2
78}
79
80define <8 x float> @load_float8_float3_0122(ptr nocapture readonly dereferenceable(16)) nofree nosync {
81; SSE-LABEL: load_float8_float3_0122:
82; SSE:       # %bb.0:
83; SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
84; SSE-NEXT:    movups (%rdi), %xmm0
85; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
86; SSE-NEXT:    retq
87;
88; AVX-LABEL: load_float8_float3_0122:
89; AVX:       # %bb.0:
90; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
91; AVX-NEXT:    vmovups (%rdi), %xmm1
92; AVX-NEXT:    vshufps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0,0]
93; AVX-NEXT:    retq
94  %p1 = getelementptr inbounds <4 x float>, ptr %0, i64 0, i64 1
95  %p2 = getelementptr inbounds <4 x float>, ptr %0, i64 0, i64 2
96  %ld0 = load float, ptr %0, align 4
97  %ld1 = load float, ptr %p1, align 4
98  %ld2 = load float, ptr %p2, align 4
99  %r0 = insertelement <8 x float> undef, float %ld0, i32 0
100  %r1 = insertelement <8 x float> %r0,   float %ld1, i32 1
101  %r2 = insertelement <8 x float> %r1,   float %ld2, i32 2
102  %r3 = insertelement <8 x float> %r2,   float %ld2, i32 3
103  ret <8 x float> %r3
104}
105
106define <4 x float> @load_float4_float3_as_float2_float(ptr nocapture readonly dereferenceable(16)) nofree nosync {
107; SSE-LABEL: load_float4_float3_as_float2_float:
108; SSE:       # %bb.0:
109; SSE-NEXT:    movups (%rdi), %xmm0
110; SSE-NEXT:    retq
111;
112; AVX-LABEL: load_float4_float3_as_float2_float:
113; AVX:       # %bb.0:
114; AVX-NEXT:    vmovups (%rdi), %xmm0
115; AVX-NEXT:    retq
116  %2 = load <2 x float>, ptr %0, align 4
117  %3 = extractelement <2 x float> %2, i32 0
118  %4 = insertelement <4 x float> undef, float %3, i32 0
119  %5 = extractelement <2 x float> %2, i32 1
120  %6 = insertelement <4 x float> %4, float %5, i32 1
121  %7 = getelementptr inbounds <4 x float>, ptr %0, i64 0, i64 2
122  %8 = load float, ptr %7, align 4
123  %9 = insertelement <4 x float> %6, float %8, i32 2
124  ret <4 x float> %9
125}
126
127define <4 x float> @load_float4_float3_as_float2_float_0122(ptr nocapture readonly dereferenceable(16)) nofree nosync {
128; SSE-LABEL: load_float4_float3_as_float2_float_0122:
129; SSE:       # %bb.0:
130; SSE-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
131; SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
132; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
133; SSE-NEXT:    retq
134;
135; AVX-LABEL: load_float4_float3_as_float2_float_0122:
136; AVX:       # %bb.0:
137; AVX-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
138; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
139; AVX-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
140; AVX-NEXT:    retq
141  %2 = load <2 x float>, ptr %0, align 4
142  %3 = extractelement <2 x float> %2, i32 0
143  %4 = insertelement <4 x float> undef, float %3, i32 0
144  %5 = extractelement <2 x float> %2, i32 1
145  %6 = insertelement <4 x float> %4, float %5, i32 1
146  %7 = getelementptr inbounds <4 x float>, ptr %0, i64 0, i64 2
147  %8 = load float, ptr %7, align 4
148  %9 = insertelement <4 x float> %6, float %8, i32 2
149  %10 = insertelement <4 x float> %9, float %8, i32 3
150  ret <4 x float> %10
151}
152
153define <4 x float> @load_float4_float3_trunc(ptr nocapture readonly dereferenceable(16)) {
154; SSE-LABEL: load_float4_float3_trunc:
155; SSE:       # %bb.0:
156; SSE-NEXT:    movaps (%rdi), %xmm0
157; SSE-NEXT:    retq
158;
159; AVX-LABEL: load_float4_float3_trunc:
160; AVX:       # %bb.0:
161; AVX-NEXT:    vmovaps (%rdi), %xmm0
162; AVX-NEXT:    retq
163  %2 = load i64, ptr %0, align 16
164  %3 = getelementptr inbounds <4 x float>, ptr %0, i64 0, i64 2
165  %4 = load i64, ptr %3, align 8
166  %5 = trunc i64 %2 to i32
167  %6 = bitcast i32 %5 to float
168  %7 = insertelement <4 x float> undef, float %6, i32 0
169  %8 = lshr i64 %2, 32
170  %9 = trunc i64 %8 to i32
171  %10 = bitcast i32 %9 to float
172  %11 = insertelement <4 x float> %7, float %10, i32 1
173  %12 = trunc i64 %4 to i32
174  %13 = bitcast i32 %12 to float
175  %14 = insertelement <4 x float> %11, float %13, i32 2
176  ret <4 x float> %14
177}
178
179define <4 x float> @load_float4_float3_trunc_0122(ptr nocapture readonly dereferenceable(16)) nofree nosync {
180; SSE-LABEL: load_float4_float3_trunc_0122:
181; SSE:       # %bb.0:
182; SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
183; SSE-NEXT:    movaps (%rdi), %xmm0
184; SSE-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0]
185; SSE-NEXT:    retq
186;
187; AVX-LABEL: load_float4_float3_trunc_0122:
188; AVX:       # %bb.0:
189; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
190; AVX-NEXT:    vmovaps (%rdi), %xmm1
191; AVX-NEXT:    vshufps {{.*#+}} xmm0 = xmm1[0,1],xmm0[0,0]
192; AVX-NEXT:    retq
193  %2 = load i64, ptr %0, align 16
194  %3 = getelementptr inbounds <4 x float>, ptr %0, i64 0, i64 2
195  %4 = load i64, ptr %3, align 8
196  %5 = trunc i64 %2 to i32
197  %6 = bitcast i32 %5 to float
198  %7 = insertelement <4 x float> undef, float %6, i32 0
199  %8 = lshr i64 %2, 32
200  %9 = trunc i64 %8 to i32
201  %10 = bitcast i32 %9 to float
202  %11 = insertelement <4 x float> %7, float %10, i32 1
203  %12 = trunc i64 %4 to i32
204  %13 = bitcast i32 %12 to float
205  %14 = insertelement <4 x float> %11, float %13, i32 2
206  %15 = insertelement <4 x float> %14, float %13, i32 3
207  ret <4 x float> %15
208}
209
210define <4 x float> @load_float4_float3_trunc_0123(ptr nocapture readonly dereferenceable(16)) nofree nosync {
211; SSE2-LABEL: load_float4_float3_trunc_0123:
212; SSE2:       # %bb.0:
213; SSE2-NEXT:    movaps (%rdi), %xmm0
214; SSE2-NEXT:    movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
215; SSE2-NEXT:    retq
216;
217; SSSE3-LABEL: load_float4_float3_trunc_0123:
218; SSSE3:       # %bb.0:
219; SSSE3-NEXT:    movaps (%rdi), %xmm0
220; SSSE3-NEXT:    movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
221; SSSE3-NEXT:    retq
222;
223; SSE41-LABEL: load_float4_float3_trunc_0123:
224; SSE41:       # %bb.0:
225; SSE41-NEXT:    movaps (%rdi), %xmm0
226; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
227; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
228; SSE41-NEXT:    retq
229;
230; AVX-LABEL: load_float4_float3_trunc_0123:
231; AVX:       # %bb.0:
232; AVX-NEXT:    vmovaps (%rdi), %xmm0
233; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
234; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
235; AVX-NEXT:    retq
236  %2 = load i64, ptr %0, align 16
237  %3 = getelementptr inbounds <4 x float>, ptr %0, i64 0, i64 2
238  %4 = load i64, ptr %3, align 8
239  %5 = trunc i64 %2 to i32
240  %6 = bitcast i32 %5 to float
241  %7 = insertelement <4 x float> undef, float %6, i32 0
242  %8 = lshr i64 %2, 32
243  %9 = trunc i64 %8 to i32
244  %10 = bitcast i32 %9 to float
245  %11 = insertelement <4 x float> %7, float %10, i32 1
246  %12 = trunc i64 %4 to i32
247  %13 = bitcast i32 %12 to float
248  %14 = insertelement <4 x float> %11, float %13, i32 2
249  %15 = lshr i64 %4, 32
250  %16 = trunc i64 %15 to i32
251  %17 = bitcast i32 %16 to float
252  %18 = insertelement <4 x float> %14, float %17, i32 3
253  ret <4 x float> %18
254}
255
256define <4 x float> @load_float4_float3_trunc_0123_unaligned(ptr nocapture readonly dereferenceable(16)) nofree nosync {
257; SSE2-LABEL: load_float4_float3_trunc_0123_unaligned:
258; SSE2:       # %bb.0:
259; SSE2-NEXT:    movups (%rdi), %xmm0
260; SSE2-NEXT:    movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
261; SSE2-NEXT:    retq
262;
263; SSSE3-LABEL: load_float4_float3_trunc_0123_unaligned:
264; SSSE3:       # %bb.0:
265; SSSE3-NEXT:    movups (%rdi), %xmm0
266; SSSE3-NEXT:    movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
267; SSSE3-NEXT:    retq
268;
269; SSE41-LABEL: load_float4_float3_trunc_0123_unaligned:
270; SSE41:       # %bb.0:
271; SSE41-NEXT:    movups (%rdi), %xmm0
272; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
273; SSE41-NEXT:    insertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
274; SSE41-NEXT:    retq
275;
276; AVX-LABEL: load_float4_float3_trunc_0123_unaligned:
277; AVX:       # %bb.0:
278; AVX-NEXT:    vmovups (%rdi), %xmm0
279; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
280; AVX-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
281; AVX-NEXT:    retq
282  %2 = load i64, ptr %0, align 1
283  %3 = getelementptr inbounds <4 x float>, ptr %0, i64 0, i64 2
284  %4 = load i64, ptr %3, align 1
285  %5 = trunc i64 %2 to i32
286  %6 = bitcast i32 %5 to float
287  %7 = insertelement <4 x float> undef, float %6, i32 0
288  %8 = lshr i64 %2, 32
289  %9 = trunc i64 %8 to i32
290  %10 = bitcast i32 %9 to float
291  %11 = insertelement <4 x float> %7, float %10, i32 1
292  %12 = trunc i64 %4 to i32
293  %13 = bitcast i32 %12 to float
294  %14 = insertelement <4 x float> %11, float %13, i32 2
295  %15 = lshr i64 %4, 32
296  %16 = trunc i64 %15 to i32
297  %17 = bitcast i32 %16 to float
298  %18 = insertelement <4 x float> %14, float %17, i32 3
299  ret <4 x float> %18
300}
301
302; PR21780
303define <4 x double> @load_double4_0u2u(ptr nocapture readonly dereferenceable(32)) nofree nosync {
304; SSE2-LABEL: load_double4_0u2u:
305; SSE2:       # %bb.0:
306; SSE2-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
307; SSE2-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
308; SSE2-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0,0]
309; SSE2-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0,0]
310; SSE2-NEXT:    retq
311;
312; SSSE3-LABEL: load_double4_0u2u:
313; SSSE3:       # %bb.0:
314; SSSE3-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
315; SSSE3-NEXT:    movddup {{.*#+}} xmm1 = mem[0,0]
316; SSSE3-NEXT:    retq
317;
318; SSE41-LABEL: load_double4_0u2u:
319; SSE41:       # %bb.0:
320; SSE41-NEXT:    movddup {{.*#+}} xmm0 = mem[0,0]
321; SSE41-NEXT:    movddup {{.*#+}} xmm1 = mem[0,0]
322; SSE41-NEXT:    retq
323;
324; AVX-LABEL: load_double4_0u2u:
325; AVX:       # %bb.0:
326; AVX-NEXT:    vmovddup {{.*#+}} ymm0 = mem[0,0,2,2]
327; AVX-NEXT:    retq
328  %2 = load double, ptr %0, align 8
329  %3 = insertelement <4 x double> undef, double %2, i32 0
330  %4 = getelementptr inbounds double, ptr %0, i64 2
331  %5 = load double, ptr %4, align 8
332  %6 = insertelement <4 x double> %3, double %5, i32 2
333  %7 = shufflevector <4 x double> %6, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
334  ret <4 x double> %7
335}
336
337; Test case identified in rL366501
338@h = dso_local local_unnamed_addr global i8 0, align 1
339define dso_local i32 @load_partial_illegal_type()  {
340; SSE2-LABEL: load_partial_illegal_type:
341; SSE2:       # %bb.0:
342; SSE2-NEXT:    movzwl h(%rip), %eax
343; SSE2-NEXT:    movd %eax, %xmm0
344; SSE2-NEXT:    pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
345; SSE2-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
346; SSE2-NEXT:    movd %xmm0, %eax
347; SSE2-NEXT:    retq
348;
349; SSSE3-LABEL: load_partial_illegal_type:
350; SSSE3:       # %bb.0:
351; SSSE3-NEXT:    movzwl h(%rip), %eax
352; SSSE3-NEXT:    movd %eax, %xmm0
353; SSSE3-NEXT:    pshufb {{.*#+}} xmm0 = xmm0[0,1],zero,xmm0[3,u,u,u,u,u,u,u,u,u,u,u,u]
354; SSSE3-NEXT:    por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
355; SSSE3-NEXT:    movd %xmm0, %eax
356; SSSE3-NEXT:    retq
357;
358; SSE41-LABEL: load_partial_illegal_type:
359; SSE41:       # %bb.0:
360; SSE41-NEXT:    movzwl h(%rip), %eax
361; SSE41-NEXT:    movd %eax, %xmm0
362; SSE41-NEXT:    movl $2, %eax
363; SSE41-NEXT:    pinsrb $2, %eax, %xmm0
364; SSE41-NEXT:    movd %xmm0, %eax
365; SSE41-NEXT:    retq
366;
367; AVX-LABEL: load_partial_illegal_type:
368; AVX:       # %bb.0:
369; AVX-NEXT:    movzwl h(%rip), %eax
370; AVX-NEXT:    vmovd %eax, %xmm0
371; AVX-NEXT:    movl $2, %eax
372; AVX-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0
373; AVX-NEXT:    vmovd %xmm0, %eax
374; AVX-NEXT:    retq
375  %1 = load <2 x i8>, ptr @h, align 1
376  %2 = shufflevector <2 x i8> %1, <2 x i8> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
377  %3 = insertelement <4 x i8> %2, i8 2, i32 2
378  %4 = bitcast <4 x i8> %3 to i32
379  ret i32 %4
380}
381
382define dso_local void @PR43227(ptr %explicit_0, ptr %explicit_1) {
383; SSE-LABEL: PR43227:
384; SSE:       # %bb.0:
385; SSE-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
386; SSE-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
387; SSE-NEXT:    movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
388; SSE-NEXT:    xorps %xmm0, %xmm0
389; SSE-NEXT:    movaps %xmm0, 672(%rsi)
390; SSE-NEXT:    movaps %xmm1, 688(%rsi)
391; SSE-NEXT:    retq
392;
393; AVX-LABEL: PR43227:
394; AVX:       # %bb.0:
395; AVX-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
396; AVX-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
397; AVX-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
398; AVX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
399; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
400; AVX-NEXT:    vmovaps %ymm0, 672(%rsi)
401; AVX-NEXT:    vzeroupper
402; AVX-NEXT:    retq
403  %1 = getelementptr i32, ptr %explicit_0, i64 63
404  %2 = load <3 x i32>, ptr %1, align 1
405  %3 = shufflevector <3 x i32> %2, <3 x i32> undef, <2 x i32> <i32 1, i32 2>
406  %4 = shufflevector <2 x i32> %3, <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
407  %5 = shufflevector <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 undef, i32 0, i32 undef, i32 0>, <8 x i32> %4, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 5, i32 9, i32 7>
408  %6 = getelementptr inbounds <8 x i32>, ptr %explicit_1, i64 21
409  store <8 x i32> %5, ptr %6, align 32
410  ret void
411}
412