xref: /llvm-project/llvm/test/Transforms/InstCombine/X86/x86-muldq.ll (revision 38fffa630ee80163dc65e759392ad29798905679)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -passes=instcombine -S | FileCheck %s
3
4;
5; UNDEF Elts
6;
7
8define <2 x i64> @undef_pmuludq_128(<4 x i32> %a0, <4 x i32> %a1) {
9; CHECK-LABEL: @undef_pmuludq_128(
10; CHECK-NEXT:    ret <2 x i64> zeroinitializer
11;
12  %1 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> undef, <4 x i32> undef)
13  ret <2 x i64> %1
14}
15
16define <4 x i64> @undef_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) {
17; CHECK-LABEL: @undef_pmuludq_256(
18; CHECK-NEXT:    ret <4 x i64> zeroinitializer
19;
20  %1 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> undef, <8 x i32> undef)
21  ret <4 x i64> %1
22}
23
24define <8 x i64> @undef_pmuludq_512(<16 x i32> %a0, <16 x i32> %a1) {
25; CHECK-LABEL: @undef_pmuludq_512(
26; CHECK-NEXT:    ret <8 x i64> zeroinitializer
27;
28  %1 = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> undef, <16 x i32> undef)
29  ret <8 x i64> %1
30}
31
32define <2 x i64> @undef_pmuldq_128(<4 x i32> %a0, <4 x i32> %a1) {
33; CHECK-LABEL: @undef_pmuldq_128(
34; CHECK-NEXT:    ret <2 x i64> zeroinitializer
35;
36  %1 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> undef, <4 x i32> undef)
37  ret <2 x i64> %1
38}
39
40define <4 x i64> @undef_pmuldq_256(<8 x i32> %a0, <8 x i32> %a1) {
41; CHECK-LABEL: @undef_pmuldq_256(
42; CHECK-NEXT:    ret <4 x i64> zeroinitializer
43;
44  %1 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> undef, <8 x i32> undef)
45  ret <4 x i64> %1
46}
47
48define <8 x i64> @undef_pmuldq_512(<16 x i32> %a0, <16 x i32> %a1) {
49; CHECK-LABEL: @undef_pmuldq_512(
50; CHECK-NEXT:    ret <8 x i64> zeroinitializer
51;
52  %1 = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> undef, <16 x i32> undef)
53  ret <8 x i64> %1
54}
55
56define <2 x i64> @undef_zero_pmuludq_128(<4 x i32> %a0, <4 x i32> %a1) {
57; CHECK-LABEL: @undef_zero_pmuludq_128(
58; CHECK-NEXT:    ret <2 x i64> zeroinitializer
59;
60  %1 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> undef, <4 x i32> zeroinitializer)
61  ret <2 x i64> %1
62}
63
64define <4 x i64> @undef_zero_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) {
65; CHECK-LABEL: @undef_zero_pmuludq_256(
66; CHECK-NEXT:    ret <4 x i64> zeroinitializer
67;
68  %1 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> zeroinitializer, <8 x i32> undef)
69  ret <4 x i64> %1
70}
71
72define <8 x i64> @undef_zero_pmuludq_512(<16 x i32> %a0, <16 x i32> %a1) {
73; CHECK-LABEL: @undef_zero_pmuludq_512(
74; CHECK-NEXT:    ret <8 x i64> zeroinitializer
75;
76  %1 = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> undef, <16 x i32> zeroinitializer)
77  ret <8 x i64> %1
78}
79
80define <2 x i64> @undef_zero_pmuldq_128(<4 x i32> %a0, <4 x i32> %a1) {
81; CHECK-LABEL: @undef_zero_pmuldq_128(
82; CHECK-NEXT:    ret <2 x i64> zeroinitializer
83;
84  %1 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> zeroinitializer, <4 x i32> undef)
85  ret <2 x i64> %1
86}
87
88define <4 x i64> @undef_zero_pmuldq_256(<8 x i32> %a0, <8 x i32> %a1) {
89; CHECK-LABEL: @undef_zero_pmuldq_256(
90; CHECK-NEXT:    ret <4 x i64> zeroinitializer
91;
92  %1 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> undef, <8 x i32> zeroinitializer)
93  ret <4 x i64> %1
94}
95
96define <8 x i64> @undef_zero_pmuldq_512(<16 x i32> %a0, <16 x i32> %a1) {
97; CHECK-LABEL: @undef_zero_pmuldq_512(
98; CHECK-NEXT:    ret <8 x i64> zeroinitializer
99;
100  %1 = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> zeroinitializer, <16 x i32> undef)
101  ret <8 x i64> %1
102}
103
104;
105; Constant Folding
106;
107
108define <2 x i64> @fold_pmuludq_128(<4 x i32> %a0, <4 x i32> %a1) {
109; CHECK-LABEL: @fold_pmuludq_128(
110; CHECK-NEXT:    ret <2 x i64> <i64 9223372030412324865, i64 4294967295>
111;
112  %1 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 2147483647, i32 1, i32 1, i32 3>)
113  ret <2 x i64> %1
114}
115
116define <4 x i64> @fold_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) {
117; CHECK-LABEL: @fold_pmuludq_256(
118; CHECK-NEXT:    ret <4 x i64> zeroinitializer
119;
120  %1 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> zeroinitializer, <8 x i32> zeroinitializer)
121  ret <4 x i64> %1
122}
123
124define <8 x i64> @fold_pmuludq_512(<16 x i32> %a0, <16 x i32> %a1) {
125; CHECK-LABEL: @fold_pmuludq_512(
126; CHECK-NEXT:    ret <8 x i64> <i64 0, i64 0, i64 255, i64 131070, i64 0, i64 -281474976645121, i64 140737488289792, i64 281470681743360>
127;
128  %1 = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> <i32 0, i32 0, i32 undef, i32 0, i32 1, i32 1, i32 2, i32 2, i32 undef, i32 undef, i32 -1, i32 -1, i32 65536, i32 -1, i32 -65536, i32 undef>, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 1, i32 255, i32 -256, i32 65535, i32 -65536, i32 0, i32 -1, i32 -65535, i32 -65535, i32 2147483647, i32 2147483648, i32 65536, i32 -65535>)
129  ret <8 x i64> %1
130}
131
132define <2 x i64> @fold_pmuldq_128(<4 x i32> %a0, <4 x i32> %a1) {
133; CHECK-LABEL: @fold_pmuldq_128(
134; CHECK-NEXT:    ret <2 x i64> <i64 0, i64 2>
135;
136  %1 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> <i32 undef, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 undef, i32 1, i32 -2, i32 3>)
137  ret <2 x i64> %1
138}
139
140define <4 x i64> @fold_pmuldq_256(<8 x i32> %a0, <8 x i32> %a1) {
141; CHECK-LABEL: @fold_pmuldq_256(
142; CHECK-NEXT:    ret <4 x i64> <i64 0, i64 4294836225, i64 140737488289792, i64 -140737488355328>
143;
144  %1 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> <i32 undef, i32 1, i32 -65535, i32 128, i32 65536, i32 2147483647, i32 -2147483648, i32 65536>, <8 x i32> <i32 0, i32 -1, i32 -65535, i32 -65535, i32 2147483647, i32 2147483648, i32 65536, i32 -65535>)
145  ret <4 x i64> %1
146}
147
148define <8 x i64> @fold_pmuldq_512(<16 x i32> %a0, <16 x i32> %a1) {
149; CHECK-LABEL: @fold_pmuldq_512(
150; CHECK-NEXT:    ret <8 x i64> zeroinitializer
151;
152  %1 = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> zeroinitializer, <16 x i32> <i32 undef, i32 -1, i32 -3, i32 -1, i32 8, i32 10, i32 -256, i32 65536, i32 undef, i32 1, i32 -65535, i32 128, i32 65536, i32 2147483647, i32 -2147483648, i32 65536>)
153  ret <8 x i64> %1
154}
155
156;
157; PMULUDQ/PMULDQ - only the even elements (0, 2, 4, 6) of the vXi32 inputs are required.
158;
159
160define <2 x i64> @test_demanded_elts_pmuludq_128(<4 x i32> %a0, <4 x i32> %a1) {
161; CHECK-LABEL: @test_demanded_elts_pmuludq_128(
162; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[A0:%.*]], <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 poison, i32 poison>
163; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[A1:%.*]], <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 poison, i32 poison>
164; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <2 x i64>
165; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to <2 x i64>
166; CHECK-NEXT:    [[TMP5:%.*]] = and <2 x i64> [[TMP3]], <i64 4294967295, i64 poison>
167; CHECK-NEXT:    [[TMP6:%.*]] = and <2 x i64> [[TMP4]], <i64 4294967295, i64 poison>
168; CHECK-NEXT:    [[TMP7:%.*]] = mul nuw <2 x i64> [[TMP5]], [[TMP6]]
169; CHECK-NEXT:    [[TMP8:%.*]] = shufflevector <2 x i64> [[TMP7]], <2 x i64> poison, <2 x i32> zeroinitializer
170; CHECK-NEXT:    ret <2 x i64> [[TMP8]]
171;
172  %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
173  %2 = shufflevector <4 x i32> %a1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
174  %3 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %1, <4 x i32> %2)
175  %4 = shufflevector <2 x i64> %3, <2 x i64> undef, <2 x i32> zeroinitializer
176  ret <2 x i64> %4
177}
178
179define <4 x i64> @test_demanded_elts_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) {
180; CHECK-LABEL: @test_demanded_elts_pmuludq_256(
181; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> poison, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
182; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i32> [[A1:%.*]], <8 x i32> poison, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
183; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i32> [[TMP1]] to <4 x i64>
184; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to <4 x i64>
185; CHECK-NEXT:    [[TMP5:%.*]] = and <4 x i64> [[TMP3]], splat (i64 4294967295)
186; CHECK-NEXT:    [[TMP6:%.*]] = and <4 x i64> [[TMP4]], splat (i64 4294967295)
187; CHECK-NEXT:    [[TMP7:%.*]] = mul nuw <4 x i64> [[TMP5]], [[TMP6]]
188; CHECK-NEXT:    ret <4 x i64> [[TMP7]]
189;
190  %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
191  %2 = shufflevector <8 x i32> %a1, <8 x i32> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
192  %3 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> %1, <8 x i32> %2)
193  ret <4 x i64> %3
194}
195
196define <8 x i64> @test_demanded_elts_pmuludq_512(<16 x i32> %a0, <16 x i32> %a1) {
197; CHECK-LABEL: @test_demanded_elts_pmuludq_512(
198; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> poison, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
199; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i32> [[A1:%.*]], <16 x i32> poison, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
200; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to <8 x i64>
201; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64>
202; CHECK-NEXT:    [[TMP5:%.*]] = and <8 x i64> [[TMP3]], splat (i64 4294967295)
203; CHECK-NEXT:    [[TMP6:%.*]] = and <8 x i64> [[TMP4]], splat (i64 4294967295)
204; CHECK-NEXT:    [[TMP7:%.*]] = mul nuw <8 x i64> [[TMP5]], [[TMP6]]
205; CHECK-NEXT:    ret <8 x i64> [[TMP7]]
206;
207  %1 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
208  %2 = shufflevector <16 x i32> %a1, <16 x i32> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
209  %3 = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %1, <16 x i32> %2)
210  ret <8 x i64> %3
211}
212
213define <2 x i64> @test_demanded_elts_pmuldq_128(<4 x i32> %a0, <4 x i32> %a1) {
214; CHECK-LABEL: @test_demanded_elts_pmuldq_128(
215; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[A0:%.*]], <4 x i32> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
216; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[A1:%.*]], <4 x i32> poison, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
217; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <2 x i64>
218; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to <2 x i64>
219; CHECK-NEXT:    [[TMP5:%.*]] = shl <2 x i64> [[TMP3]], splat (i64 32)
220; CHECK-NEXT:    [[TMP6:%.*]] = ashr exact <2 x i64> [[TMP5]], splat (i64 32)
221; CHECK-NEXT:    [[TMP7:%.*]] = shl <2 x i64> [[TMP4]], splat (i64 32)
222; CHECK-NEXT:    [[TMP8:%.*]] = ashr exact <2 x i64> [[TMP7]], splat (i64 32)
223; CHECK-NEXT:    [[TMP9:%.*]] = mul nsw <2 x i64> [[TMP6]], [[TMP8]]
224; CHECK-NEXT:    ret <2 x i64> [[TMP9]]
225;
226  %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
227  %2 = shufflevector <4 x i32> %a1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
228  %3 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %1, <4 x i32> %2)
229  ret <2 x i64> %3
230}
231
232define <4 x i64> @test_demanded_elts_pmuldq_256(<8 x i32> %a0, <8 x i32> %a1) {
233; CHECK-LABEL: @test_demanded_elts_pmuldq_256(
234; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> poison, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
235; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i32> [[A1:%.*]], <8 x i32> poison, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
236; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i32> [[TMP1]] to <4 x i64>
237; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i32> [[TMP2]] to <4 x i64>
238; CHECK-NEXT:    [[TMP5:%.*]] = shl <4 x i64> [[TMP3]], splat (i64 32)
239; CHECK-NEXT:    [[TMP6:%.*]] = ashr exact <4 x i64> [[TMP5]], splat (i64 32)
240; CHECK-NEXT:    [[TMP7:%.*]] = shl <4 x i64> [[TMP4]], splat (i64 32)
241; CHECK-NEXT:    [[TMP8:%.*]] = ashr exact <4 x i64> [[TMP7]], splat (i64 32)
242; CHECK-NEXT:    [[TMP9:%.*]] = mul nsw <4 x i64> [[TMP6]], [[TMP8]]
243; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> poison, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
244; CHECK-NEXT:    ret <4 x i64> [[TMP10]]
245;
246  %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
247  %2 = shufflevector <8 x i32> %a1, <8 x i32> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
248  %3 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %1, <8 x i32> %2)
249  %4 = shufflevector <4 x i64> %3, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
250  ret <4 x i64> %4
251}
252
253define <8 x i64> @test_demanded_elts_pmuldq_512(<16 x i32> %a0, <16 x i32> %a1) {
254; CHECK-LABEL: @test_demanded_elts_pmuldq_512(
255; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> poison, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
256; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i32> [[A1:%.*]], <16 x i32> poison, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
257; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i32> [[TMP1]] to <8 x i64>
258; CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i32> [[TMP2]] to <8 x i64>
259; CHECK-NEXT:    [[TMP5:%.*]] = shl <8 x i64> [[TMP3]], splat (i64 32)
260; CHECK-NEXT:    [[TMP6:%.*]] = ashr exact <8 x i64> [[TMP5]], splat (i64 32)
261; CHECK-NEXT:    [[TMP7:%.*]] = shl <8 x i64> [[TMP4]], splat (i64 32)
262; CHECK-NEXT:    [[TMP8:%.*]] = ashr exact <8 x i64> [[TMP7]], splat (i64 32)
263; CHECK-NEXT:    [[TMP9:%.*]] = mul nsw <8 x i64> [[TMP6]], [[TMP8]]
264; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <8 x i64> [[TMP9]], <8 x i64> poison, <8 x i32> <i32 0, i32 0, i32 3, i32 3, i32 4, i32 4, i32 7, i32 7>
265; CHECK-NEXT:    ret <8 x i64> [[TMP10]]
266;
267  %1 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
268  %2 = shufflevector <16 x i32> %a1, <16 x i32> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
269  %3 = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %1, <16 x i32> %2)
270  %4 = shufflevector <8 x i64> %3, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 3, i32 3, i32 4, i32 4, i32 7, i32 7>
271  ret <8 x i64> %4
272}
273
274declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
275declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
276
277declare <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32>, <8 x i32>) nounwind readnone
278declare <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32>, <8 x i32>) nounwind readnone
279
280declare <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32>, <16 x i32>) nounwind readnone
281declare <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32>, <16 x i32>) nounwind readnone
282