xref: /llvm-project/llvm/test/CodeGen/X86/avx512-rotate.ll (revision 1bd836fa1087dcd6b07ca043b9155089116f1f51)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=KNL
3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=SKX
4
5declare <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
6declare <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
7declare <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
8declare <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
9
10declare <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
11declare <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
12
13; Tests showing replacement of variable rotates with immediate splat versions.
14
15define { <16 x i32>, <16 x i32>, <16 x i32> } @test_splat_rol_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
16; KNL-LABEL: test_splat_rol_v16i32:
17; KNL:       # %bb.0:
18; KNL-NEXT:    vmovdqa64 %zmm1, %zmm3
19; KNL-NEXT:    kmovw %edi, %k1
20; KNL-NEXT:    vprold $5, %zmm0, %zmm3 {%k1}
21; KNL-NEXT:    vprold $6, %zmm0, %zmm1 {%k1} {z}
22; KNL-NEXT:    vprold $7, %zmm0, %zmm2
23; KNL-NEXT:    vmovdqa64 %zmm3, %zmm0
24; KNL-NEXT:    retq
25;
26; SKX-LABEL: test_splat_rol_v16i32:
27; SKX:       # %bb.0:
28; SKX-NEXT:    vmovdqa64 %zmm1, %zmm3
29; SKX-NEXT:    kmovd %edi, %k1
30; SKX-NEXT:    vprold $5, %zmm0, %zmm3 {%k1}
31; SKX-NEXT:    vprold $6, %zmm0, %zmm1 {%k1} {z}
32; SKX-NEXT:    vprold $7, %zmm0, %zmm2
33; SKX-NEXT:    vmovdqa64 %zmm3, %zmm0
34; SKX-NEXT:    retq
35  %res0 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, <16 x i32> %x1, i16 %x2)
36  %res1 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>, <16 x i32> zeroinitializer, i16 %x2)
37  %res2 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>, <16 x i32> %x1, i16 -1)
38  %res3 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } poison, <16 x i32> %res0, 0
39  %res4 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> }  %res3, <16 x i32> %res1, 1
40  %res5 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> }  %res4, <16 x i32> %res2, 2
41  ret { <16 x i32>, <16 x i32>, <16 x i32> } %res5
42}
43
44define { <8 x i64>, <8 x i64>, <8 x i64> } @test_splat_rol_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
45; KNL-LABEL: test_splat_rol_v8i64:
46; KNL:       # %bb.0:
47; KNL-NEXT:    vmovdqa64 %zmm1, %zmm3
48; KNL-NEXT:    kmovw %edi, %k1
49; KNL-NEXT:    vprolq $5, %zmm0, %zmm3 {%k1}
50; KNL-NEXT:    vprolq $6, %zmm0, %zmm1 {%k1} {z}
51; KNL-NEXT:    vprolq $7, %zmm0, %zmm2
52; KNL-NEXT:    vmovdqa64 %zmm3, %zmm0
53; KNL-NEXT:    retq
54;
55; SKX-LABEL: test_splat_rol_v8i64:
56; SKX:       # %bb.0:
57; SKX-NEXT:    vmovdqa64 %zmm1, %zmm3
58; SKX-NEXT:    kmovd %edi, %k1
59; SKX-NEXT:    vprolq $5, %zmm0, %zmm3 {%k1}
60; SKX-NEXT:    vprolq $6, %zmm0, %zmm1 {%k1} {z}
61; SKX-NEXT:    vprolq $7, %zmm0, %zmm2
62; SKX-NEXT:    vmovdqa64 %zmm3, %zmm0
63; SKX-NEXT:    retq
64  %res0 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>, <8 x i64> %x1, i8 %x2)
65  %res1 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 6, i64 6, i64 6, i64 6, i64 6, i64 6, i64 6, i64 6>, <8 x i64> zeroinitializer, i8 %x2)
66  %res2 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>, <8 x i64> %x1, i8 -1)
67  %res3 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } poison, <8 x i64> %res0, 0
68  %res4 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> }  %res3, <8 x i64> %res1, 1
69  %res5 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> }  %res4, <8 x i64> %res2, 2
70  ret { <8 x i64>, <8 x i64>, <8 x i64> } %res5
71}
72
73define { <16 x i32>, <16 x i32>, <16 x i32> } @test_splat_ror_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
74; KNL-LABEL: test_splat_ror_v16i32:
75; KNL:       # %bb.0:
76; KNL-NEXT:    vmovdqa64 %zmm1, %zmm3
77; KNL-NEXT:    kmovw %edi, %k1
78; KNL-NEXT:    vprord $5, %zmm0, %zmm3 {%k1}
79; KNL-NEXT:    vprord $6, %zmm0, %zmm1 {%k1} {z}
80; KNL-NEXT:    vprord $7, %zmm0, %zmm2
81; KNL-NEXT:    vmovdqa64 %zmm3, %zmm0
82; KNL-NEXT:    retq
83;
84; SKX-LABEL: test_splat_ror_v16i32:
85; SKX:       # %bb.0:
86; SKX-NEXT:    vmovdqa64 %zmm1, %zmm3
87; SKX-NEXT:    kmovd %edi, %k1
88; SKX-NEXT:    vprord $5, %zmm0, %zmm3 {%k1}
89; SKX-NEXT:    vprord $6, %zmm0, %zmm1 {%k1} {z}
90; SKX-NEXT:    vprord $7, %zmm0, %zmm2
91; SKX-NEXT:    vmovdqa64 %zmm3, %zmm0
92; SKX-NEXT:    retq
93  %res0 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>, <16 x i32> %x1, i16 %x2)
94  %res1 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6, i32 6>, <16 x i32> zeroinitializer, i16 %x2)
95  %res2 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7, i32 7>, <16 x i32> %x1, i16 -1)
96  %res3 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } poison, <16 x i32> %res0, 0
97  %res4 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> }  %res3, <16 x i32> %res1, 1
98  %res5 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> }  %res4, <16 x i32> %res2, 2
99  ret { <16 x i32>, <16 x i32>, <16 x i32> } %res5
100}
101
102define { <8 x i64>, <8 x i64>, <8 x i64> } @test_splat_ror_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
103; KNL-LABEL: test_splat_ror_v8i64:
104; KNL:       # %bb.0:
105; KNL-NEXT:    vmovdqa64 %zmm1, %zmm3
106; KNL-NEXT:    kmovw %edi, %k1
107; KNL-NEXT:    vprorq $5, %zmm0, %zmm3 {%k1}
108; KNL-NEXT:    vprorq $6, %zmm0, %zmm1 {%k1} {z}
109; KNL-NEXT:    vprorq $7, %zmm0, %zmm2
110; KNL-NEXT:    vmovdqa64 %zmm3, %zmm0
111; KNL-NEXT:    retq
112;
113; SKX-LABEL: test_splat_ror_v8i64:
114; SKX:       # %bb.0:
115; SKX-NEXT:    vmovdqa64 %zmm1, %zmm3
116; SKX-NEXT:    kmovd %edi, %k1
117; SKX-NEXT:    vprorq $5, %zmm0, %zmm3 {%k1}
118; SKX-NEXT:    vprorq $6, %zmm0, %zmm1 {%k1} {z}
119; SKX-NEXT:    vprorq $7, %zmm0, %zmm2
120; SKX-NEXT:    vmovdqa64 %zmm3, %zmm0
121; SKX-NEXT:    retq
122  %res0 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>, <8 x i64> %x1, i8 %x2)
123  %res1 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 6, i64 6, i64 6, i64 6, i64 6, i64 6, i64 6, i64 6>, <8 x i64> zeroinitializer, i8 %x2)
124  %res2 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>, <8 x i64> %x1, i8 -1)
125  %res3 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } poison, <8 x i64> %res0, 0
126  %res4 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> }  %res3, <8 x i64> %res1, 1
127  %res5 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> }  %res4, <8 x i64> %res2, 2
128  ret { <8 x i64>, <8 x i64>, <8 x i64> } %res5
129}
130
131; Tests showing replacement of out-of-bounds variable rotates with in-bounds immediate splat versions.
132
133define { <16 x i32>, <16 x i32>, <16 x i32> } @test_splat_bounds_rol_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
134; KNL-LABEL: test_splat_bounds_rol_v16i32:
135; KNL:       # %bb.0:
136; KNL-NEXT:    vmovdqa64 %zmm1, %zmm3
137; KNL-NEXT:    kmovw %edi, %k1
138; KNL-NEXT:    vprold $1, %zmm0, %zmm3 {%k1}
139; KNL-NEXT:    vprold $31, %zmm0, %zmm1 {%k1} {z}
140; KNL-NEXT:    vprold $30, %zmm0, %zmm2
141; KNL-NEXT:    vmovdqa64 %zmm3, %zmm0
142; KNL-NEXT:    retq
143;
144; SKX-LABEL: test_splat_bounds_rol_v16i32:
145; SKX:       # %bb.0:
146; SKX-NEXT:    vmovdqa64 %zmm1, %zmm3
147; SKX-NEXT:    kmovd %edi, %k1
148; SKX-NEXT:    vprold $1, %zmm0, %zmm3 {%k1}
149; SKX-NEXT:    vprold $31, %zmm0, %zmm1 {%k1} {z}
150; SKX-NEXT:    vprold $30, %zmm0, %zmm2
151; SKX-NEXT:    vmovdqa64 %zmm3, %zmm0
152; SKX-NEXT:    retq
153  %res0 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33>, <16 x i32> %x1, i16 %x2)
154  %res1 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <16 x i32> zeroinitializer, i16 %x2)
155  %res2 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> <i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534>, <16 x i32> %x1, i16 -1)
156  %res3 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } poison, <16 x i32> %res0, 0
157  %res4 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> }  %res3, <16 x i32> %res1, 1
158  %res5 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> }  %res4, <16 x i32> %res2, 2
159  ret { <16 x i32>, <16 x i32>, <16 x i32> } %res5
160}
161
162define { <8 x i64>, <8 x i64>, <8 x i64> } @test_splat_bounds_rol_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
163; KNL-LABEL: test_splat_bounds_rol_v8i64:
164; KNL:       # %bb.0:
165; KNL-NEXT:    vmovdqa64 %zmm1, %zmm3
166; KNL-NEXT:    kmovw %edi, %k1
167; KNL-NEXT:    vprolq $62, %zmm0, %zmm3 {%k1}
168; KNL-NEXT:    vprolq $1, %zmm0, %zmm1 {%k1} {z}
169; KNL-NEXT:    vprolq $63, %zmm0, %zmm2
170; KNL-NEXT:    vmovdqa64 %zmm3, %zmm0
171; KNL-NEXT:    retq
172;
173; SKX-LABEL: test_splat_bounds_rol_v8i64:
174; SKX:       # %bb.0:
175; SKX-NEXT:    vmovdqa64 %zmm1, %zmm3
176; SKX-NEXT:    kmovd %edi, %k1
177; SKX-NEXT:    vprolq $62, %zmm0, %zmm3 {%k1}
178; SKX-NEXT:    vprolq $1, %zmm0, %zmm1 {%k1} {z}
179; SKX-NEXT:    vprolq $63, %zmm0, %zmm2
180; SKX-NEXT:    vmovdqa64 %zmm3, %zmm0
181; SKX-NEXT:    retq
182  %res0 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534>, <8 x i64> %x1, i8 %x2)
183  %res1 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 65, i64 65, i64 65, i64 65, i64 65, i64 65, i64 65, i64 65>, <8 x i64> zeroinitializer, i8 %x2)
184  %res2 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, <8 x i64> %x1, i8 -1)
185  %res3 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } poison, <8 x i64> %res0, 0
186  %res4 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> }  %res3, <8 x i64> %res1, 1
187  %res5 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> }  %res4, <8 x i64> %res2, 2
188  ret { <8 x i64>, <8 x i64>, <8 x i64> } %res5
189}
190
191define { <16 x i32>, <16 x i32>, <16 x i32> } @test_splat_bounds_ror_v16i32(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
192; KNL-LABEL: test_splat_bounds_ror_v16i32:
193; KNL:       # %bb.0:
194; KNL-NEXT:    vmovdqa64 %zmm1, %zmm3
195; KNL-NEXT:    kmovw %edi, %k1
196; KNL-NEXT:    vprord $1, %zmm0, %zmm3 {%k1}
197; KNL-NEXT:    vprord $31, %zmm0, %zmm1 {%k1} {z}
198; KNL-NEXT:    vprord $30, %zmm0, %zmm2
199; KNL-NEXT:    vmovdqa64 %zmm3, %zmm0
200; KNL-NEXT:    retq
201;
202; SKX-LABEL: test_splat_bounds_ror_v16i32:
203; SKX:       # %bb.0:
204; SKX-NEXT:    vmovdqa64 %zmm1, %zmm3
205; SKX-NEXT:    kmovd %edi, %k1
206; SKX-NEXT:    vprord $1, %zmm0, %zmm3 {%k1}
207; SKX-NEXT:    vprord $31, %zmm0, %zmm1 {%k1} {z}
208; SKX-NEXT:    vprord $30, %zmm0, %zmm2
209; SKX-NEXT:    vmovdqa64 %zmm3, %zmm0
210; SKX-NEXT:    retq
211  %res0 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33, i32 33>, <16 x i32> %x1, i16 %x2)
212  %res1 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1>, <16 x i32> zeroinitializer, i16 %x2)
213  %res2 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> <i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534, i32 65534>, <16 x i32> %x1, i16 -1)
214  %res3 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> } poison, <16 x i32> %res0, 0
215  %res4 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> }  %res3, <16 x i32> %res1, 1
216  %res5 = insertvalue { <16 x i32>, <16 x i32>, <16 x i32> }  %res4, <16 x i32> %res2, 2
217  ret { <16 x i32>, <16 x i32>, <16 x i32> } %res5
218}
219
220define { <8 x i64>, <8 x i64>, <8 x i64> } @test_splat_bounds_ror_v8i64(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
221; KNL-LABEL: test_splat_bounds_ror_v8i64:
222; KNL:       # %bb.0:
223; KNL-NEXT:    vmovdqa64 %zmm1, %zmm3
224; KNL-NEXT:    kmovw %edi, %k1
225; KNL-NEXT:    vprorq $62, %zmm0, %zmm3 {%k1}
226; KNL-NEXT:    vprorq $1, %zmm0, %zmm1 {%k1} {z}
227; KNL-NEXT:    vprorq $63, %zmm0, %zmm2
228; KNL-NEXT:    vmovdqa64 %zmm3, %zmm0
229; KNL-NEXT:    retq
230;
231; SKX-LABEL: test_splat_bounds_ror_v8i64:
232; SKX:       # %bb.0:
233; SKX-NEXT:    vmovdqa64 %zmm1, %zmm3
234; SKX-NEXT:    kmovd %edi, %k1
235; SKX-NEXT:    vprorq $62, %zmm0, %zmm3 {%k1}
236; SKX-NEXT:    vprorq $1, %zmm0, %zmm1 {%k1} {z}
237; SKX-NEXT:    vprorq $63, %zmm0, %zmm2
238; SKX-NEXT:    vmovdqa64 %zmm3, %zmm0
239; SKX-NEXT:    retq
240  %res0 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534, i64 65534>, <8 x i64> %x1, i8 %x2)
241  %res1 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 65, i64 65, i64 65, i64 65, i64 65, i64 65, i64 65, i64 65>, <8 x i64> zeroinitializer, i8 %x2)
242  %res2 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> <i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1, i64 -1>, <8 x i64> %x1, i8 -1)
243  %res3 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> } poison, <8 x i64> %res0, 0
244  %res4 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> }  %res3, <8 x i64> %res1, 1
245  %res5 = insertvalue { <8 x i64>, <8 x i64>, <8 x i64> }  %res4, <8 x i64> %res2, 2
246  ret { <8 x i64>, <8 x i64>, <8 x i64> } %res5
247}
248
249; Constant folding
250; We also test with a target shuffle so that this can't be constant folded upon creation, it must
251; wait until the target shuffle has been constant folded in combineX86ShufflesRecursively.
252
253define <8 x i64> @test_fold_rol_v8i64() {
254; CHECK-LABEL: test_fold_rol_v8i64:
255; CHECK:       # %bb.0:
256; CHECK-NEXT:    vmovaps {{.*#+}} zmm0 = [1,2,4,9223372036854775808,2,4611686018427387904,9223372036854775808,9223372036854775808]
257; CHECK-NEXT:    retq
258  %res = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>, <8 x i64> <i64 0, i64 1, i64 2, i64 63, i64 65, i64 65534, i64 65535, i64 -1>, <8 x i64> zeroinitializer, i8 -1)
259  ret <8 x i64> %res
260}
261
262define <16 x i32> @test_fold_rol_v16i32(<16 x i32> %x0, <16 x i32> %x1) {
263; CHECK-LABEL: test_fold_rol_v16i32:
264; CHECK:       # %bb.0:
265; CHECK-NEXT:    vpbroadcastd {{.*#+}} zmm0 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
266; CHECK-NEXT:    vprolvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
267; CHECK-NEXT:    retq
268  %res0 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, <16 x i32> zeroinitializer, i16 -1)
269  %res1 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %res0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, <16 x i32> zeroinitializer, i16 -1)
270  ret <16 x i32> %res1
271}
272
273define <8 x i64> @test_fold_ror_v8i64() {
274; CHECK-LABEL: test_fold_ror_v8i64:
275; CHECK:       # %bb.0:
276; CHECK-NEXT:    vpbroadcastq {{.*#+}} zmm0 = [1,1,1,1,1,1,1,1]
277; CHECK-NEXT:    vprorvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
278; CHECK-NEXT:    retq
279  %res0 = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> <i64 undef, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>, <8 x i64> <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>, <8 x i64> zeroinitializer, i8 -1)
280  %res1 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %res0, <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, <8 x i64> zeroinitializer, i8 -1)
281  ret <8 x i64> %res1
282}
283
284define <16 x i32> @test_fold_ror_v16i32(<16 x i32> %x0, <16 x i32> %x1) {
285; CHECK-LABEL: test_fold_ror_v16i32:
286; CHECK:       # %bb.0:
287; CHECK-NEXT:    vpbroadcastd {{.*#+}} zmm0 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
288; CHECK-NEXT:    vprorvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
289; CHECK-NEXT:    retq
290  %res0 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, <16 x i32> zeroinitializer, i16 -1)
291  %res1 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %res0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, <16 x i32> zeroinitializer, i16 -1)
292  ret <16 x i32> %res1
293}
294