xref: /llvm-project/llvm/test/Transforms/InterleavedAccess/X86/interleaved-accesses-64bits-avx.ll (revision cd6e462d012f289cc4ec12927ca8198f9ed1469e)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -mtriple=x86_64-pc-linux  -mattr=+avx -interleaved-access -S | FileCheck %s
3; RUN: opt < %s -mtriple=x86_64-pc-linux  -mattr=+avx -passes=interleaved-access -S | FileCheck %s
4
5; This file tests the function `llvm::lowerInterleavedLoad/Store`.
6
7define <4 x double> @load_factorf64_4(ptr %ptr) {
8; CHECK-LABEL: @load_factorf64_4(
9; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <4 x double>, ptr [[PTR:%.*]], i32 0
10; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x double>, ptr [[TMP1]], align 16
11; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <4 x double>, ptr [[PTR]], i32 1
12; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x double>, ptr [[TMP3]], align 16
13; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr <4 x double>, ptr [[PTR]], i32 2
14; CHECK-NEXT:    [[TMP6:%.*]] = load <4 x double>, ptr [[TMP5]], align 16
15; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr <4 x double>, ptr [[PTR]], i32 3
16; CHECK-NEXT:    [[TMP8:%.*]] = load <4 x double>, ptr [[TMP7]], align 16
17; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
18; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> [[TMP8]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
19; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP6]], <4 x i32> <i32 2, i32 3, i32 6, i32 7>
20; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> [[TMP8]], <4 x i32> <i32 2, i32 3, i32 6, i32 7>
21; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <4 x double> [[TMP9]], <4 x double> [[TMP10]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
22; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <4 x double> [[TMP11]], <4 x double> [[TMP12]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
23; CHECK-NEXT:    [[TMP15:%.*]] = shufflevector <4 x double> [[TMP9]], <4 x double> [[TMP10]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
24; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <4 x double> [[TMP11]], <4 x double> [[TMP12]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
25; CHECK-NEXT:    [[ADD1:%.*]] = fadd <4 x double> [[TMP13]], [[TMP15]]
26; CHECK-NEXT:    [[ADD2:%.*]] = fadd <4 x double> [[ADD1]], [[TMP14]]
27; CHECK-NEXT:    [[ADD3:%.*]] = fadd <4 x double> [[ADD2]], [[TMP16]]
28; CHECK-NEXT:    ret <4 x double> [[ADD3]]
29;
30  %wide.vec = load <16 x double>, ptr %ptr, align 16
31  %strided.v0 = shufflevector <16 x double> %wide.vec, <16 x double> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
32  %strided.v1 = shufflevector <16 x double> %wide.vec, <16 x double> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
33  %strided.v2 = shufflevector <16 x double> %wide.vec, <16 x double> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
34  %strided.v3 = shufflevector <16 x double> %wide.vec, <16 x double> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
35  %add1 = fadd <4 x double> %strided.v0, %strided.v1
36  %add2 = fadd <4 x double> %add1, %strided.v2
37  %add3 = fadd <4 x double> %add2, %strided.v3
38  ret <4 x double> %add3
39}
40
41define <4 x i64> @load_factori64_4(ptr %ptr) {
42; CHECK-LABEL: @load_factori64_4(
43; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <4 x i64>, ptr [[PTR:%.*]], i32 0
44; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i64>, ptr [[TMP1]], align 16
45; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <4 x i64>, ptr [[PTR]], i32 1
46; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i64>, ptr [[TMP3]], align 16
47; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr <4 x i64>, ptr [[PTR]], i32 2
48; CHECK-NEXT:    [[TMP6:%.*]] = load <4 x i64>, ptr [[TMP5]], align 16
49; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr <4 x i64>, ptr [[PTR]], i32 3
50; CHECK-NEXT:    [[TMP8:%.*]] = load <4 x i64>, ptr [[TMP7]], align 16
51; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <4 x i64> [[TMP2]], <4 x i64> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
52; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP8]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
53; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP2]], <4 x i64> [[TMP6]], <4 x i32> <i32 2, i32 3, i32 6, i32 7>
54; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP8]], <4 x i32> <i32 2, i32 3, i32 6, i32 7>
55; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> [[TMP10]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
56; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <4 x i64> [[TMP11]], <4 x i64> [[TMP12]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
57; CHECK-NEXT:    [[TMP15:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> [[TMP10]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
58; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <4 x i64> [[TMP11]], <4 x i64> [[TMP12]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
59; CHECK-NEXT:    [[ADD1:%.*]] = add <4 x i64> [[TMP13]], [[TMP15]]
60; CHECK-NEXT:    [[ADD2:%.*]] = add <4 x i64> [[ADD1]], [[TMP14]]
61; CHECK-NEXT:    [[ADD3:%.*]] = add <4 x i64> [[ADD2]], [[TMP16]]
62; CHECK-NEXT:    ret <4 x i64> [[ADD3]]
63;
64  %wide.vec = load <16 x i64>, ptr %ptr, align 16
65  %strided.v0 = shufflevector <16 x i64> %wide.vec, <16 x i64> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
66  %strided.v1 = shufflevector <16 x i64> %wide.vec, <16 x i64> undef, <4 x i32> <i32 1, i32 5, i32 9, i32 13>
67  %strided.v2 = shufflevector <16 x i64> %wide.vec, <16 x i64> undef, <4 x i32> <i32 2, i32 6, i32 10, i32 14>
68  %strided.v3 = shufflevector <16 x i64> %wide.vec, <16 x i64> undef, <4 x i32> <i32 3, i32 7, i32 11, i32 15>
69  %add1 = add <4 x i64> %strided.v0, %strided.v1
70  %add2 = add <4 x i64> %add1, %strided.v2
71  %add3 = add <4 x i64> %add2, %strided.v3
72  ret <4 x i64> %add3
73}
74
75define <4 x double> @load_factorf64_1(ptr %ptr) {
76; CHECK-LABEL: @load_factorf64_1(
77; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr <4 x double>, ptr [[PTR:%.*]], i32 0
78; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x double>, ptr [[TMP1]], align 16
79; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr <4 x double>, ptr [[PTR]], i32 1
80; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x double>, ptr [[TMP3]], align 16
81; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr <4 x double>, ptr [[PTR]], i32 2
82; CHECK-NEXT:    [[TMP6:%.*]] = load <4 x double>, ptr [[TMP5]], align 16
83; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr <4 x double>, ptr [[PTR]], i32 3
84; CHECK-NEXT:    [[TMP8:%.*]] = load <4 x double>, ptr [[TMP7]], align 16
85; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
86; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> [[TMP8]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
87; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP6]], <4 x i32> <i32 2, i32 3, i32 6, i32 7>
88; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> [[TMP8]], <4 x i32> <i32 2, i32 3, i32 6, i32 7>
89; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <4 x double> [[TMP9]], <4 x double> [[TMP10]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
90; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <4 x double> [[TMP11]], <4 x double> [[TMP12]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
91; CHECK-NEXT:    [[TMP15:%.*]] = shufflevector <4 x double> [[TMP9]], <4 x double> [[TMP10]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
92; CHECK-NEXT:    [[TMP16:%.*]] = shufflevector <4 x double> [[TMP11]], <4 x double> [[TMP12]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
93; CHECK-NEXT:    [[MUL:%.*]] = fmul <4 x double> [[TMP13]], [[TMP13]]
94; CHECK-NEXT:    ret <4 x double> [[MUL]]
95;
96  %wide.vec = load <16 x double>, ptr %ptr, align 16
97  %strided.v0 = shufflevector <16 x double> %wide.vec, <16 x double> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
98  %strided.v3 = shufflevector <16 x double> %wide.vec, <16 x double> undef, <4 x i32> <i32 0, i32 4, i32 8, i32 12>
99  %mul = fmul <4 x double> %strided.v0, %strided.v3
100  ret <4 x double> %mul
101}
102
103define void @store_factorf64_4(ptr %ptr, <4 x double> %v0, <4 x double> %v1, <4 x double> %v2, <4 x double> %v3) {
104; CHECK-LABEL: @store_factorf64_4(
105; CHECK-NEXT:    [[S0:%.*]] = shufflevector <4 x double> [[V0:%.*]], <4 x double> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
106; CHECK-NEXT:    [[S1:%.*]] = shufflevector <4 x double> [[V2:%.*]], <4 x double> [[V3:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
107; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
108; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
109; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> <i32 8, i32 9, i32 10, i32 11>
110; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> <i32 12, i32 13, i32 14, i32 15>
111; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
112; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
113; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> [[TMP3]], <4 x i32> <i32 2, i32 3, i32 6, i32 7>
114; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP4]], <4 x i32> <i32 2, i32 3, i32 6, i32 7>
115; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <4 x double> [[TMP5]], <4 x double> [[TMP6]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
116; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> [[TMP8]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
117; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <4 x double> [[TMP5]], <4 x double> [[TMP6]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
118; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> [[TMP8]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
119; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <4 x double> [[TMP9]], <4 x double> [[TMP11]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
120; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <4 x double> [[TMP10]], <4 x double> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
121; CHECK-NEXT:    [[TMP15:%.*]] = shufflevector <8 x double> [[TMP13]], <8 x double> [[TMP14]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
122; CHECK-NEXT:    store <16 x double> [[TMP15]], ptr [[PTR:%.*]], align 16
123; CHECK-NEXT:    ret void
124;
125  %s0 = shufflevector <4 x double> %v0, <4 x double> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
126  %s1 = shufflevector <4 x double> %v2, <4 x double> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
127  %interleaved.vec = shufflevector <8 x double> %s0, <8 x double> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
128  store <16 x double> %interleaved.vec, ptr %ptr, align 16
129  ret void
130}
131
132define void @store_factori64_4(ptr %ptr, <4 x i64> %v0, <4 x i64> %v1, <4 x i64> %v2, <4 x i64> %v3) {
133; CHECK-LABEL: @store_factori64_4(
134; CHECK-NEXT:    [[S0:%.*]] = shufflevector <4 x i64> [[V0:%.*]], <4 x i64> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
135; CHECK-NEXT:    [[S1:%.*]] = shufflevector <4 x i64> [[V2:%.*]], <4 x i64> [[V3:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
136; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i64> [[S0]], <8 x i64> [[S1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
137; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i64> [[S0]], <8 x i64> [[S1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
138; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i64> [[S0]], <8 x i64> [[S1]], <4 x i32> <i32 8, i32 9, i32 10, i32 11>
139; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <8 x i64> [[S0]], <8 x i64> [[S1]], <4 x i32> <i32 12, i32 13, i32 14, i32 15>
140; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
141; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <4 x i64> [[TMP2]], <4 x i64> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
142; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <4 x i64> [[TMP1]], <4 x i64> [[TMP3]], <4 x i32> <i32 2, i32 3, i32 6, i32 7>
143; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i64> [[TMP2]], <4 x i64> [[TMP4]], <4 x i32> <i32 2, i32 3, i32 6, i32 7>
144; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <4 x i64> [[TMP5]], <4 x i64> [[TMP6]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
145; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> [[TMP8]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
146; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP5]], <4 x i64> [[TMP6]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
147; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <4 x i64> [[TMP7]], <4 x i64> [[TMP8]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
148; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> [[TMP11]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
149; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <4 x i64> [[TMP10]], <4 x i64> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
150; CHECK-NEXT:    [[TMP15:%.*]] = shufflevector <8 x i64> [[TMP13]], <8 x i64> [[TMP14]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
151; CHECK-NEXT:    store <16 x i64> [[TMP15]], ptr [[PTR:%.*]], align 16
152; CHECK-NEXT:    ret void
153;
154  %s0 = shufflevector <4 x i64> %v0, <4 x i64> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
155  %s1 = shufflevector <4 x i64> %v2, <4 x i64> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
156  %interleaved.vec = shufflevector <8 x i64> %s0, <8 x i64> %s1, <16 x i32> <i32 0, i32 4, i32 8, i32 12, i32 1, i32 5, i32 9, i32 13, i32 2, i32 6, i32 10, i32 14, i32 3, i32 7, i32 11, i32 15>
157  store <16 x i64> %interleaved.vec, ptr %ptr, align 16
158  ret void
159}
160
161define void @store_factorf64_4_revMask(ptr %ptr, <4 x double> %v0, <4 x double> %v1, <4 x double> %v2, <4 x double> %v3) {
162; CHECK-LABEL: @store_factorf64_4_revMask(
163; CHECK-NEXT:    [[S0:%.*]] = shufflevector <4 x double> [[V0:%.*]], <4 x double> [[V1:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
164; CHECK-NEXT:    [[S1:%.*]] = shufflevector <4 x double> [[V2:%.*]], <4 x double> [[V3:%.*]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
165; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> <i32 12, i32 13, i32 14, i32 15>
166; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> <i32 8, i32 9, i32 10, i32 11>
167; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
168; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <8 x double> [[S0]], <8 x double> [[S1]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
169; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
170; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
171; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> [[TMP3]], <4 x i32> <i32 2, i32 3, i32 6, i32 7>
172; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP4]], <4 x i32> <i32 2, i32 3, i32 6, i32 7>
173; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <4 x double> [[TMP5]], <4 x double> [[TMP6]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
174; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> [[TMP8]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
175; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <4 x double> [[TMP5]], <4 x double> [[TMP6]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
176; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> [[TMP8]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
177; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <4 x double> [[TMP9]], <4 x double> [[TMP11]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
178; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <4 x double> [[TMP10]], <4 x double> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
179; CHECK-NEXT:    [[TMP15:%.*]] = shufflevector <8 x double> [[TMP13]], <8 x double> [[TMP14]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
180; CHECK-NEXT:    store <16 x double> [[TMP15]], ptr [[PTR:%.*]], align 16
181; CHECK-NEXT:    ret void
182;
183  %s0 = shufflevector <4 x double> %v0, <4 x double> %v1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
184  %s1 = shufflevector <4 x double> %v2, <4 x double> %v3, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
185  %interleaved.vec = shufflevector <8 x double> %s0, <8 x double> %s1, <16 x i32> <i32 12, i32 8, i32 4, i32 0, i32 13, i32 9, i32 5, i32 1, i32 14, i32 10, i32 6, i32 2, i32 15, i32 11, i32 7, i32 3>
186  store <16 x double> %interleaved.vec, ptr %ptr, align 16
187  ret void
188}
189
190define void @store_factorf64_4_arbitraryMask(ptr %ptr, <16 x double> %v0, <16 x double> %v1, <16 x double> %v2, <16 x double> %v3) {
191; CHECK-LABEL: @store_factorf64_4_arbitraryMask(
192; CHECK-NEXT:    [[S0:%.*]] = shufflevector <16 x double> [[V0:%.*]], <16 x double> [[V1:%.*]], <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
193; CHECK-NEXT:    [[S1:%.*]] = shufflevector <16 x double> [[V2:%.*]], <16 x double> [[V3:%.*]], <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
194; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x double> [[S0]], <32 x double> [[S1]], <4 x i32> <i32 4, i32 5, i32 6, i32 7>
195; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <32 x double> [[S0]], <32 x double> [[S1]], <4 x i32> <i32 32, i32 33, i32 34, i32 35>
196; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <32 x double> [[S0]], <32 x double> [[S1]], <4 x i32> <i32 16, i32 17, i32 18, i32 19>
197; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <32 x double> [[S0]], <32 x double> [[S1]], <4 x i32> <i32 8, i32 9, i32 10, i32 11>
198; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> [[TMP3]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
199; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
200; CHECK-NEXT:    [[TMP7:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> [[TMP3]], <4 x i32> <i32 2, i32 3, i32 6, i32 7>
201; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <4 x double> [[TMP2]], <4 x double> [[TMP4]], <4 x i32> <i32 2, i32 3, i32 6, i32 7>
202; CHECK-NEXT:    [[TMP9:%.*]] = shufflevector <4 x double> [[TMP5]], <4 x double> [[TMP6]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
203; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> [[TMP8]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
204; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <4 x double> [[TMP5]], <4 x double> [[TMP6]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
205; CHECK-NEXT:    [[TMP12:%.*]] = shufflevector <4 x double> [[TMP7]], <4 x double> [[TMP8]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
206; CHECK-NEXT:    [[TMP13:%.*]] = shufflevector <4 x double> [[TMP9]], <4 x double> [[TMP11]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
207; CHECK-NEXT:    [[TMP14:%.*]] = shufflevector <4 x double> [[TMP10]], <4 x double> [[TMP12]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
208; CHECK-NEXT:    [[TMP15:%.*]] = shufflevector <8 x double> [[TMP13]], <8 x double> [[TMP14]], <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
209; CHECK-NEXT:    store <16 x double> [[TMP15]], ptr [[PTR:%.*]], align 16
210; CHECK-NEXT:    ret void
211;
212  %s0 = shufflevector <16 x double> %v0, <16 x double> %v1, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
213  %s1 = shufflevector <16 x double> %v2, <16 x double> %v3, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
214  %interleaved.vec = shufflevector <32 x double> %s0, <32 x double> %s1, <16 x i32> <i32 4, i32 32, i32 16, i32 8, i32 5, i32 33, i32 17, i32 9, i32 6, i32 34, i32 18, i32 10, i32 7, i32 35, i32 19, i32 11>
215  store <16 x double> %interleaved.vec, ptr %ptr, align 16
216  ret void
217}
218
219; This verifies whether the test passes and does not hit any assertions.
220; Today, X86InterleavedAccess could have handled this case and
221; generate transposed sequence by extending the current implementation
222; which would be creating dummy vectors of undef. But it decided not to
223; optimize these cases where the load-size is less than Factor * NumberOfElements.
224; Because a better sequence can easily be generated by CG.
225
226@a = local_unnamed_addr global <4 x double> zeroinitializer, align 32
227; Function Attrs: norecurse nounwind readonly uwtable
228define <4 x double> @test_unhandled(<4 x double> %b) {
229; CHECK-LABEL: @test_unhandled(
230; CHECK-NEXT:  entry:
231; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x double>, ptr @a, align 32
232; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[TMP0]], <4 x double> undef, <4 x i32> <i32 3, i32 poison, i32 poison, i32 poison>
233; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x double> [[TMP1]], <4 x double> [[B:%.*]], <4 x i32> <i32 0, i32 4, i32 0, i32 0>
234; CHECK-NEXT:    ret <4 x double> [[SHUFFLE]]
235;
236entry:
237  %0 = load <4 x double>, ptr @a, align 32
238  %1 = shufflevector <4 x double> %0, <4 x double> undef, <4 x i32> <i32 3, i32 undef, i32 undef, i32 undef>
239  %shuffle = shufflevector <4 x double> %1, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 0, i32 0>
240  ret <4 x double> %shuffle
241}
242