xref: /llvm-project/llvm/test/CodeGen/X86/pr45833.ll (revision 4742715eb7fbd90b2d7ca807980bdca7e552fcd2)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -O3 -mtriple=x86_64-linux-generic -mattr=avx < %s | FileCheck %s
3
4; Bug 45833:
5; The SplitVecRes_MSTORE method should split a extended value type
6; according to the halving of the enveloping type to avoid all sorts
7; of inconsistencies downstream. For example for a extended value type
8; with VL=14 and enveloping type VL=16 that is split 8/8, the extended
9; type should be split 8/6 and not 7/7. This also accounts for hi masked
10; store that get zero storage size (and are unused).
11
12define void @mstore_split9(<9 x float> %value, ptr %addr, <9 x i1> %mask) {
13; CHECK-LABEL: mstore_split9:
14; CHECK:       # %bb.0:
15; CHECK-NEXT:    vinsertps {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[2,3]
16; CHECK-NEXT:    vinsertps {{.*#+}} xmm4 = xmm4[0,1],xmm6[0],xmm4[3]
17; CHECK-NEXT:    vinsertps {{.*#+}} xmm4 = xmm4[0,1,2],xmm7[0]
18; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
19; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
20; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0]
21; CHECK-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
22; CHECK-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
23; CHECK-NEXT:    vmovd %esi, %xmm2
24; CHECK-NEXT:    vpinsrb $1, %edx, %xmm2, %xmm2
25; CHECK-NEXT:    vpinsrb $2, %ecx, %xmm2, %xmm2
26; CHECK-NEXT:    vpinsrb $3, %r8d, %xmm2, %xmm2
27; CHECK-NEXT:    vpinsrb $4, %r9d, %xmm2, %xmm3
28; CHECK-NEXT:    vpinsrb $5, {{[0-9]+}}(%rsp), %xmm3, %xmm3
29; CHECK-NEXT:    vpinsrb $6, {{[0-9]+}}(%rsp), %xmm3, %xmm3
30; CHECK-NEXT:    vpinsrb $7, {{[0-9]+}}(%rsp), %xmm3, %xmm3
31; CHECK-NEXT:    vpinsrb $8, {{[0-9]+}}(%rsp), %xmm3, %xmm3
32; CHECK-NEXT:    vpshufb {{.*#+}} xmm4 = xmm3[8,u,u,u],zero,xmm3[u,u,u],zero,xmm3[u,u,u],zero,xmm3[u,u,u]
33; CHECK-NEXT:    vpslld $31, %xmm4, %xmm4
34; CHECK-NEXT:    vmaskmovps %ymm1, %ymm4, 32(%rdi)
35; CHECK-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
36; CHECK-NEXT:    vpslld $31, %xmm1, %xmm1
37; CHECK-NEXT:    vpshufd {{.*#+}} xmm2 = xmm3[1,1,1,1]
38; CHECK-NEXT:    vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
39; CHECK-NEXT:    vpslld $31, %xmm2, %xmm2
40; CHECK-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
41; CHECK-NEXT:    vmaskmovps %ymm0, %ymm1, (%rdi)
42; CHECK-NEXT:    vzeroupper
43; CHECK-NEXT:    retq
44  call void @llvm.masked.store.v9f32.p0(<9 x float> %value, ptr %addr, i32 4, <9 x i1>%mask)
45  ret void
46}
47
48define void @mstore_split13(<13 x float> %value, ptr %addr, <13 x i1> %mask) {
49; CHECK-LABEL: mstore_split13:
50; CHECK:       # %bb.0:
51; CHECK-NEXT:    vinsertps {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[2,3]
52; CHECK-NEXT:    vinsertps {{.*#+}} xmm4 = xmm4[0,1],xmm6[0],xmm4[3]
53; CHECK-NEXT:    vinsertps {{.*#+}} xmm4 = xmm4[0,1,2],xmm7[0]
54; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
55; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
56; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0]
57; CHECK-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
58; CHECK-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
59; CHECK-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
60; CHECK-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]
61; CHECK-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0]
62; CHECK-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
63; CHECK-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
64; CHECK-NEXT:    vmovd %esi, %xmm2
65; CHECK-NEXT:    vpinsrb $1, %edx, %xmm2, %xmm2
66; CHECK-NEXT:    vpinsrb $2, %ecx, %xmm2, %xmm2
67; CHECK-NEXT:    vpinsrb $3, %r8d, %xmm2, %xmm2
68; CHECK-NEXT:    vpinsrb $4, %r9d, %xmm2, %xmm3
69; CHECK-NEXT:    vpinsrb $5, {{[0-9]+}}(%rsp), %xmm3, %xmm3
70; CHECK-NEXT:    vpinsrb $6, {{[0-9]+}}(%rsp), %xmm3, %xmm3
71; CHECK-NEXT:    vpinsrb $7, {{[0-9]+}}(%rsp), %xmm3, %xmm3
72; CHECK-NEXT:    vpinsrb $8, {{[0-9]+}}(%rsp), %xmm3, %xmm3
73; CHECK-NEXT:    vpinsrb $9, {{[0-9]+}}(%rsp), %xmm3, %xmm3
74; CHECK-NEXT:    vpinsrb $10, {{[0-9]+}}(%rsp), %xmm3, %xmm3
75; CHECK-NEXT:    vpinsrb $11, {{[0-9]+}}(%rsp), %xmm3, %xmm3
76; CHECK-NEXT:    vpinsrb $12, {{[0-9]+}}(%rsp), %xmm3, %xmm4
77; CHECK-NEXT:    vpunpckhbw {{.*#+}} xmm3 = xmm3[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15]
78; CHECK-NEXT:    vpmovzxwd {{.*#+}} xmm3 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero
79; CHECK-NEXT:    vpslld $31, %xmm3, %xmm3
80; CHECK-NEXT:    vpsrldq {{.*#+}} xmm5 = xmm4[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero
81; CHECK-NEXT:    vpslld $31, %xmm5, %xmm5
82; CHECK-NEXT:    vinsertf128 $1, %xmm5, %ymm3, %ymm3
83; CHECK-NEXT:    vmaskmovps %ymm1, %ymm3, 32(%rdi)
84; CHECK-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
85; CHECK-NEXT:    vpslld $31, %xmm1, %xmm1
86; CHECK-NEXT:    vpshufd {{.*#+}} xmm2 = xmm4[1,1,1,1]
87; CHECK-NEXT:    vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
88; CHECK-NEXT:    vpslld $31, %xmm2, %xmm2
89; CHECK-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
90; CHECK-NEXT:    vmaskmovps %ymm0, %ymm1, (%rdi)
91; CHECK-NEXT:    vzeroupper
92; CHECK-NEXT:    retq
93  call void @llvm.masked.store.v13f32.p0(<13 x float> %value, ptr %addr, i32 4, <13 x i1>%mask)
94  ret void
95}
96
97define void @mstore_split14(<14 x float> %value, ptr %addr, <14 x i1> %mask) {
98; CHECK-LABEL: mstore_split14:
99; CHECK:       # %bb.0:
100; CHECK-NEXT:    vinsertps {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[2,3]
101; CHECK-NEXT:    vinsertps {{.*#+}} xmm4 = xmm4[0,1],xmm6[0],xmm4[3]
102; CHECK-NEXT:    vinsertps {{.*#+}} xmm4 = xmm4[0,1,2],xmm7[0]
103; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
104; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
105; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0]
106; CHECK-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
107; CHECK-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
108; CHECK-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
109; CHECK-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]
110; CHECK-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0]
111; CHECK-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
112; CHECK-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3]
113; CHECK-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
114; CHECK-NEXT:    vmovd %esi, %xmm2
115; CHECK-NEXT:    vpinsrb $1, %edx, %xmm2, %xmm2
116; CHECK-NEXT:    vpinsrb $2, %ecx, %xmm2, %xmm2
117; CHECK-NEXT:    vpinsrb $3, %r8d, %xmm2, %xmm2
118; CHECK-NEXT:    vpinsrb $4, %r9d, %xmm2, %xmm3
119; CHECK-NEXT:    vpinsrb $5, {{[0-9]+}}(%rsp), %xmm3, %xmm3
120; CHECK-NEXT:    vpinsrb $6, {{[0-9]+}}(%rsp), %xmm3, %xmm3
121; CHECK-NEXT:    vpinsrb $7, {{[0-9]+}}(%rsp), %xmm3, %xmm3
122; CHECK-NEXT:    vpinsrb $8, {{[0-9]+}}(%rsp), %xmm3, %xmm3
123; CHECK-NEXT:    vpinsrb $9, {{[0-9]+}}(%rsp), %xmm3, %xmm3
124; CHECK-NEXT:    vpinsrb $10, {{[0-9]+}}(%rsp), %xmm3, %xmm3
125; CHECK-NEXT:    vpinsrb $11, {{[0-9]+}}(%rsp), %xmm3, %xmm3
126; CHECK-NEXT:    vpinsrb $12, {{[0-9]+}}(%rsp), %xmm3, %xmm3
127; CHECK-NEXT:    vpinsrb $13, {{[0-9]+}}(%rsp), %xmm3, %xmm3
128; CHECK-NEXT:    vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
129; CHECK-NEXT:    vpslld $31, %xmm2, %xmm2
130; CHECK-NEXT:    vpshufd {{.*#+}} xmm4 = xmm3[1,1,1,1]
131; CHECK-NEXT:    vpmovzxbd {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,xmm4[1],zero,zero,zero,xmm4[2],zero,zero,zero,xmm4[3],zero,zero,zero
132; CHECK-NEXT:    vpslld $31, %xmm4, %xmm4
133; CHECK-NEXT:    vinsertf128 $1, %xmm4, %ymm2, %ymm2
134; CHECK-NEXT:    vmaskmovps %ymm0, %ymm2, (%rdi)
135; CHECK-NEXT:    vpshufb {{.*#+}} xmm0 = xmm3[8,u,9,u,10,u,11,u,12,u,13,u],zero,xmm3[u],zero,xmm3[u]
136; CHECK-NEXT:    vpmovzxwd {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
137; CHECK-NEXT:    vpslld $31, %xmm2, %xmm2
138; CHECK-NEXT:    vpunpckhwd {{.*#+}} xmm0 = xmm0[4,4,5,5,6,6,7,7]
139; CHECK-NEXT:    vpslld $31, %xmm0, %xmm0
140; CHECK-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
141; CHECK-NEXT:    vmaskmovps %ymm1, %ymm0, 32(%rdi)
142; CHECK-NEXT:    vzeroupper
143; CHECK-NEXT:    retq
144  call void @llvm.masked.store.v14f32.p0(<14 x float> %value, ptr %addr, i32 4, <14 x i1>%mask)
145  ret void
146}
147
148define void @mstore_split17(<17 x float> %value, ptr %addr, <17 x i1> %mask) {
149; CHECK-LABEL: mstore_split17:
150; CHECK:       # %bb.0:
151; CHECK-NEXT:    vinsertps {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[2,3]
152; CHECK-NEXT:    vinsertps {{.*#+}} xmm4 = xmm4[0,1],xmm6[0],xmm4[3]
153; CHECK-NEXT:    vinsertps {{.*#+}} xmm4 = xmm4[0,1,2],xmm7[0]
154; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
155; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
156; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0]
157; CHECK-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
158; CHECK-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
159; CHECK-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
160; CHECK-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]
161; CHECK-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0]
162; CHECK-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
163; CHECK-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3]
164; CHECK-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3]
165; CHECK-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],mem[0]
166; CHECK-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
167; CHECK-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
168; CHECK-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
169; CHECK-NEXT:    vmovd %eax, %xmm3
170; CHECK-NEXT:    vpslld $31, %xmm3, %xmm3
171; CHECK-NEXT:    vmaskmovps %ymm2, %ymm3, 64(%rdi)
172; CHECK-NEXT:    vmovd {{.*#+}} xmm2 = mem[0],zero,zero,zero
173; CHECK-NEXT:    vpinsrb $2, {{[0-9]+}}(%rsp), %xmm2, %xmm2
174; CHECK-NEXT:    vpinsrb $4, {{[0-9]+}}(%rsp), %xmm2, %xmm2
175; CHECK-NEXT:    vpinsrb $6, {{[0-9]+}}(%rsp), %xmm2, %xmm2
176; CHECK-NEXT:    vpmovzxwd {{.*#+}} xmm3 = xmm2[0],zero,xmm2[1],zero,xmm2[2],zero,xmm2[3],zero
177; CHECK-NEXT:    vpslld $31, %xmm3, %xmm3
178; CHECK-NEXT:    vpinsrb $8, {{[0-9]+}}(%rsp), %xmm2, %xmm2
179; CHECK-NEXT:    vpinsrb $10, {{[0-9]+}}(%rsp), %xmm2, %xmm2
180; CHECK-NEXT:    vpinsrb $12, {{[0-9]+}}(%rsp), %xmm2, %xmm2
181; CHECK-NEXT:    vpinsrb $14, {{[0-9]+}}(%rsp), %xmm2, %xmm2
182; CHECK-NEXT:    vpunpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7]
183; CHECK-NEXT:    vpslld $31, %xmm2, %xmm2
184; CHECK-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
185; CHECK-NEXT:    vmaskmovps %ymm1, %ymm2, 32(%rdi)
186; CHECK-NEXT:    vmovd %esi, %xmm1
187; CHECK-NEXT:    vpinsrb $1, %edx, %xmm1, %xmm1
188; CHECK-NEXT:    vpinsrb $2, %ecx, %xmm1, %xmm1
189; CHECK-NEXT:    vpinsrb $3, %r8d, %xmm1, %xmm1
190; CHECK-NEXT:    vpmovzxbd {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
191; CHECK-NEXT:    vpslld $31, %xmm2, %xmm2
192; CHECK-NEXT:    vpinsrb $4, %r9d, %xmm1, %xmm1
193; CHECK-NEXT:    vpinsrb $5, {{[0-9]+}}(%rsp), %xmm1, %xmm1
194; CHECK-NEXT:    vpinsrb $6, {{[0-9]+}}(%rsp), %xmm1, %xmm1
195; CHECK-NEXT:    vpinsrb $7, {{[0-9]+}}(%rsp), %xmm1, %xmm1
196; CHECK-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,1,1]
197; CHECK-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
198; CHECK-NEXT:    vpslld $31, %xmm1, %xmm1
199; CHECK-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
200; CHECK-NEXT:    vmaskmovps %ymm0, %ymm1, (%rdi)
201; CHECK-NEXT:    vzeroupper
202; CHECK-NEXT:    retq
203  call void @llvm.masked.store.v17f32.p0(<17 x float> %value, ptr %addr, i32 4, <17 x i1>%mask)
204  ret void
205}
206
207define void @mstore_split23(<23 x float> %value, ptr %addr, <23 x i1> %mask) {
208; CHECK-LABEL: mstore_split23:
209; CHECK:       # %bb.0:
210; CHECK-NEXT:    vinsertps {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[2,3]
211; CHECK-NEXT:    vinsertps {{.*#+}} xmm4 = xmm4[0,1],xmm6[0],xmm4[3]
212; CHECK-NEXT:    vinsertps {{.*#+}} xmm4 = xmm4[0,1,2],xmm7[0]
213; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3]
214; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm2[0],xmm0[3]
215; CHECK-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm3[0]
216; CHECK-NEXT:    vinsertf128 $1, %xmm4, %ymm0, %ymm0
217; CHECK-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
218; CHECK-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
219; CHECK-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]
220; CHECK-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0]
221; CHECK-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
222; CHECK-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3]
223; CHECK-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3]
224; CHECK-NEXT:    vinsertf128 $1, %xmm2, %ymm1, %ymm1
225; CHECK-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
226; CHECK-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0],mem[0],xmm2[2,3]
227; CHECK-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1],mem[0],xmm2[3]
228; CHECK-NEXT:    vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],mem[0]
229; CHECK-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
230; CHECK-NEXT:    vinsertps {{.*#+}} xmm3 = xmm3[0],mem[0],xmm3[2,3]
231; CHECK-NEXT:    vinsertps {{.*#+}} xmm3 = xmm3[0,1],mem[0],xmm3[3]
232; CHECK-NEXT:    vinsertps {{.*#+}} xmm3 = xmm3[0,1,2],mem[0]
233; CHECK-NEXT:    vinsertf128 $1, %xmm3, %ymm2, %ymm2
234; CHECK-NEXT:    movzbl {{[0-9]+}}(%rsp), %eax
235; CHECK-NEXT:    vmovd {{.*#+}} xmm3 = mem[0],zero,zero,zero
236; CHECK-NEXT:    vpinsrb $2, {{[0-9]+}}(%rsp), %xmm3, %xmm3
237; CHECK-NEXT:    vpinsrb $4, {{[0-9]+}}(%rsp), %xmm3, %xmm3
238; CHECK-NEXT:    vpinsrb $6, {{[0-9]+}}(%rsp), %xmm3, %xmm3
239; CHECK-NEXT:    vpmovzxwd {{.*#+}} xmm4 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero
240; CHECK-NEXT:    vpslld $31, %xmm4, %xmm4
241; CHECK-NEXT:    vpinsrb $8, {{[0-9]+}}(%rsp), %xmm3, %xmm3
242; CHECK-NEXT:    vpinsrb $10, {{[0-9]+}}(%rsp), %xmm3, %xmm3
243; CHECK-NEXT:    vpinsrb $12, {{[0-9]+}}(%rsp), %xmm3, %xmm3
244; CHECK-NEXT:    vpinsrb $14, {{[0-9]+}}(%rsp), %xmm3, %xmm3
245; CHECK-NEXT:    vpunpckhwd {{.*#+}} xmm3 = xmm3[4,4,5,5,6,6,7,7]
246; CHECK-NEXT:    vpslld $31, %xmm3, %xmm3
247; CHECK-NEXT:    vinsertf128 $1, %xmm3, %ymm4, %ymm3
248; CHECK-NEXT:    vmaskmovps %ymm2, %ymm3, 32(%rdi)
249; CHECK-NEXT:    vmovd %eax, %xmm2
250; CHECK-NEXT:    vpinsrb $1, {{[0-9]+}}(%rsp), %xmm2, %xmm2
251; CHECK-NEXT:    vpinsrb $2, {{[0-9]+}}(%rsp), %xmm2, %xmm2
252; CHECK-NEXT:    vpinsrb $3, {{[0-9]+}}(%rsp), %xmm2, %xmm2
253; CHECK-NEXT:    vpmovzxbd {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
254; CHECK-NEXT:    vpslld $31, %xmm3, %xmm3
255; CHECK-NEXT:    vpinsrb $4, {{[0-9]+}}(%rsp), %xmm2, %xmm2
256; CHECK-NEXT:    vpinsrb $5, {{[0-9]+}}(%rsp), %xmm2, %xmm2
257; CHECK-NEXT:    vpinsrb $6, {{[0-9]+}}(%rsp), %xmm2, %xmm2
258; CHECK-NEXT:    vpshufd {{.*#+}} xmm2 = xmm2[1,1,1,1]
259; CHECK-NEXT:    vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero
260; CHECK-NEXT:    vpslld $31, %xmm2, %xmm2
261; CHECK-NEXT:    vinsertf128 $1, %xmm2, %ymm3, %ymm2
262; CHECK-NEXT:    vmaskmovps %ymm1, %ymm2, 64(%rdi)
263; CHECK-NEXT:    vmovd %esi, %xmm1
264; CHECK-NEXT:    vpinsrb $1, %edx, %xmm1, %xmm1
265; CHECK-NEXT:    vpinsrb $2, %ecx, %xmm1, %xmm1
266; CHECK-NEXT:    vpinsrb $3, %r8d, %xmm1, %xmm1
267; CHECK-NEXT:    vpmovzxbd {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
268; CHECK-NEXT:    vpslld $31, %xmm2, %xmm2
269; CHECK-NEXT:    vpinsrb $4, %r9d, %xmm1, %xmm1
270; CHECK-NEXT:    vpinsrb $5, {{[0-9]+}}(%rsp), %xmm1, %xmm1
271; CHECK-NEXT:    vpinsrb $6, {{[0-9]+}}(%rsp), %xmm1, %xmm1
272; CHECK-NEXT:    vpinsrb $7, {{[0-9]+}}(%rsp), %xmm1, %xmm1
273; CHECK-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[1,1,1,1]
274; CHECK-NEXT:    vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero
275; CHECK-NEXT:    vpslld $31, %xmm1, %xmm1
276; CHECK-NEXT:    vinsertf128 $1, %xmm1, %ymm2, %ymm1
277; CHECK-NEXT:    vmaskmovps %ymm0, %ymm1, (%rdi)
278; CHECK-NEXT:    vzeroupper
279; CHECK-NEXT:    retq
280  call void @llvm.masked.store.v23f32.p0(<23 x float> %value, ptr %addr, i32 4, <23 x i1>%mask)
281  ret void
282}
283
284declare void @llvm.masked.store.v9f32.p0(<9 x float>, ptr, i32, <9 x i1>)
285declare void @llvm.masked.store.v13f32.p0(<13 x float>, ptr, i32, <13 x i1>)
286declare void @llvm.masked.store.v14f32.p0(<14 x float>, ptr, i32, <14 x i1>)
287declare void @llvm.masked.store.v17f32.p0(<17 x float>, ptr, i32, <17 x i1>)
288declare void @llvm.masked.store.v23f32.p0(<23 x float>, ptr, i32, <23 x i1>)
289