xref: /llvm-project/llvm/test/Transforms/InstCombine/X86/x86-pmaddwd.ll (revision d893ed78718e25a982dcba9cdba2d78212b79353)
1e8c79fbaSSimon Pilgrim; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2e8c79fbaSSimon Pilgrim; RUN: opt < %s -passes=instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s
3e8c79fbaSSimon Pilgrim
4e8c79fbaSSimon Pilgrim;
5e8c79fbaSSimon Pilgrim; UNDEF Elts
6e8c79fbaSSimon Pilgrim;
7e8c79fbaSSimon Pilgrim
866caf01dSSimon Pilgrimdefine <4 x i32> @undef_pmaddwd_128(<8 x i16> %a0) {
9e8c79fbaSSimon Pilgrim; CHECK-LABEL: @undef_pmaddwd_128(
10*d893ed78SSimon Pilgrim; CHECK-NEXT:    ret <4 x i32> zeroinitializer
11e8c79fbaSSimon Pilgrim;
1266caf01dSSimon Pilgrim  %1 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> undef)
13e8c79fbaSSimon Pilgrim  ret <4 x i32> %1
14e8c79fbaSSimon Pilgrim}
15e8c79fbaSSimon Pilgrim
1666caf01dSSimon Pilgrimdefine <4 x i32> @undef_pmaddwd_128_commute(<8 x i16> %a0) {
1766caf01dSSimon Pilgrim; CHECK-LABEL: @undef_pmaddwd_128_commute(
18*d893ed78SSimon Pilgrim; CHECK-NEXT:    ret <4 x i32> zeroinitializer
1966caf01dSSimon Pilgrim;
2066caf01dSSimon Pilgrim  %1 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> undef, <8 x i16> %a0)
2166caf01dSSimon Pilgrim  ret <4 x i32> %1
2266caf01dSSimon Pilgrim}
2366caf01dSSimon Pilgrim
2466caf01dSSimon Pilgrimdefine <8 x i32> @undef_pmaddwd_256(<16 x i16> %a0) {
25e8c79fbaSSimon Pilgrim; CHECK-LABEL: @undef_pmaddwd_256(
26*d893ed78SSimon Pilgrim; CHECK-NEXT:    ret <8 x i32> zeroinitializer
27e8c79fbaSSimon Pilgrim;
2866caf01dSSimon Pilgrim  %1 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %a0, <16 x i16> undef)
29e8c79fbaSSimon Pilgrim  ret <8 x i32> %1
30e8c79fbaSSimon Pilgrim}
31e8c79fbaSSimon Pilgrim
3266caf01dSSimon Pilgrimdefine <8 x i32> @undef_pmaddwd_256_commute(<16 x i16> %a0) {
3366caf01dSSimon Pilgrim; CHECK-LABEL: @undef_pmaddwd_256_commute(
34*d893ed78SSimon Pilgrim; CHECK-NEXT:    ret <8 x i32> zeroinitializer
3566caf01dSSimon Pilgrim;
3666caf01dSSimon Pilgrim  %1 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> undef, <16 x i16> %a0)
3766caf01dSSimon Pilgrim  ret <8 x i32> %1
3866caf01dSSimon Pilgrim}
3966caf01dSSimon Pilgrim
4066caf01dSSimon Pilgrimdefine <16 x i32> @undef_pmaddwd_512(<32 x i16> %a0) {
41e8c79fbaSSimon Pilgrim; CHECK-LABEL: @undef_pmaddwd_512(
42*d893ed78SSimon Pilgrim; CHECK-NEXT:    ret <16 x i32> zeroinitializer
43e8c79fbaSSimon Pilgrim;
4466caf01dSSimon Pilgrim  %1 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> %a0, <32 x i16> undef)
4566caf01dSSimon Pilgrim  ret <16 x i32> %1
4666caf01dSSimon Pilgrim}
4766caf01dSSimon Pilgrim
4866caf01dSSimon Pilgrimdefine <16 x i32> @undef_pmaddwd_512_commute(<32 x i16> %a0) {
4966caf01dSSimon Pilgrim; CHECK-LABEL: @undef_pmaddwd_512_commute(
50*d893ed78SSimon Pilgrim; CHECK-NEXT:    ret <16 x i32> zeroinitializer
5166caf01dSSimon Pilgrim;
5266caf01dSSimon Pilgrim  %1 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> undef, <32 x i16> %a0)
53e8c79fbaSSimon Pilgrim  ret <16 x i32> %1
54e8c79fbaSSimon Pilgrim}
55e8c79fbaSSimon Pilgrim
56e8c79fbaSSimon Pilgrim;
57e8c79fbaSSimon Pilgrim; Zero Elts
58e8c79fbaSSimon Pilgrim;
59e8c79fbaSSimon Pilgrim
60e8c79fbaSSimon Pilgrimdefine <4 x i32> @zero_pmaddwd_128(<8 x i16> %a0) {
61e8c79fbaSSimon Pilgrim; CHECK-LABEL: @zero_pmaddwd_128(
629de14e24SSimon Pilgrim; CHECK-NEXT:    ret <4 x i32> zeroinitializer
63e8c79fbaSSimon Pilgrim;
64e8c79fbaSSimon Pilgrim  %1 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> zeroinitializer)
65e8c79fbaSSimon Pilgrim  ret <4 x i32> %1
66e8c79fbaSSimon Pilgrim}
67e8c79fbaSSimon Pilgrim
68e8c79fbaSSimon Pilgrimdefine <4 x i32> @zero_pmaddwd_128_commute(<8 x i16> %a0) {
69e8c79fbaSSimon Pilgrim; CHECK-LABEL: @zero_pmaddwd_128_commute(
709de14e24SSimon Pilgrim; CHECK-NEXT:    ret <4 x i32> zeroinitializer
71e8c79fbaSSimon Pilgrim;
72e8c79fbaSSimon Pilgrim  %1 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> zeroinitializer, <8 x i16> %a0)
73e8c79fbaSSimon Pilgrim  ret <4 x i32> %1
74e8c79fbaSSimon Pilgrim}
75e8c79fbaSSimon Pilgrim
76e8c79fbaSSimon Pilgrimdefine <8 x i32> @zero_pmaddwd_256(<16 x i16> %a0) {
77e8c79fbaSSimon Pilgrim; CHECK-LABEL: @zero_pmaddwd_256(
789de14e24SSimon Pilgrim; CHECK-NEXT:    ret <8 x i32> zeroinitializer
79e8c79fbaSSimon Pilgrim;
80e8c79fbaSSimon Pilgrim  %1 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %a0, <16 x i16> zeroinitializer)
81e8c79fbaSSimon Pilgrim  ret <8 x i32> %1
82e8c79fbaSSimon Pilgrim}
83e8c79fbaSSimon Pilgrim
84e8c79fbaSSimon Pilgrimdefine <8 x i32> @zero_pmaddwd_256_commute(<16 x i16> %a0) {
85e8c79fbaSSimon Pilgrim; CHECK-LABEL: @zero_pmaddwd_256_commute(
869de14e24SSimon Pilgrim; CHECK-NEXT:    ret <8 x i32> zeroinitializer
87e8c79fbaSSimon Pilgrim;
88e8c79fbaSSimon Pilgrim  %1 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> zeroinitializer, <16 x i16> %a0)
89e8c79fbaSSimon Pilgrim  ret <8 x i32> %1
90e8c79fbaSSimon Pilgrim}
91e8c79fbaSSimon Pilgrim
92e8c79fbaSSimon Pilgrimdefine <16 x i32> @zero_pmaddwd_512(<32 x i16> %a0) {
93e8c79fbaSSimon Pilgrim; CHECK-LABEL: @zero_pmaddwd_512(
949de14e24SSimon Pilgrim; CHECK-NEXT:    ret <16 x i32> zeroinitializer
95e8c79fbaSSimon Pilgrim;
96e8c79fbaSSimon Pilgrim  %1 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> %a0, <32 x i16> zeroinitializer)
97e8c79fbaSSimon Pilgrim  ret <16 x i32> %1
98e8c79fbaSSimon Pilgrim}
99e8c79fbaSSimon Pilgrim
1009de14e24SSimon Pilgrimdefine <16 x i32> @zero_pmaddwd_512_commute(<32 x i16> %a0) {
1019de14e24SSimon Pilgrim; CHECK-LABEL: @zero_pmaddwd_512_commute(
1029de14e24SSimon Pilgrim; CHECK-NEXT:    ret <16 x i32> zeroinitializer
103e8c79fbaSSimon Pilgrim;
104e8c79fbaSSimon Pilgrim  %1 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> zeroinitializer, <32 x i16> %a0)
105e8c79fbaSSimon Pilgrim  ret <16 x i32> %1
106e8c79fbaSSimon Pilgrim}
107e8c79fbaSSimon Pilgrim
108e8c79fbaSSimon Pilgrim;
109e8c79fbaSSimon Pilgrim; Constant Folding
110e8c79fbaSSimon Pilgrim;
111e8c79fbaSSimon Pilgrim
112e8c79fbaSSimon Pilgrimdefine <4 x i32> @fold_pmaddwd_128() {
113e8c79fbaSSimon Pilgrim; CHECK-LABEL: @fold_pmaddwd_128(
114f1faba25SSimon Pilgrim; CHECK-NEXT:    ret <4 x i32> <i32 19, i32 -229364, i32 -21, i32 -491429>
115e8c79fbaSSimon Pilgrim;
116e8c79fbaSSimon Pilgrim  %1 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> <i16 -1, i16 2, i16 3, i16 -4, i16 -5, i16 6, i16 7, i16 -8>, <8 x i16> <i16 -5, i16 7, i16 -32768, i16 32765, i16 -9, i16 -11, i16 -32763, i16 32761>)
117e8c79fbaSSimon Pilgrim  ret <4 x i32> %1
118e8c79fbaSSimon Pilgrim}
119e8c79fbaSSimon Pilgrim
120e8c79fbaSSimon Pilgrimdefine <8 x i32> @fold_pmaddwd_256() {
121e8c79fbaSSimon Pilgrim; CHECK-LABEL: @fold_pmaddwd_256(
122f1faba25SSimon Pilgrim; CHECK-NEXT:    ret <8 x i32> <i32 -7, i32 32762, i32 91, i32 32750, i32 -239, i32 687938, i32 -451, i32 -32756>
123e8c79fbaSSimon Pilgrim;
124e8c79fbaSSimon Pilgrim  %1 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> <i16 0, i16 -1, i16 2, i16 3, i16 -4, i16 -5, i16 6, i16 7, i16 -8, i16 9, i16 -10, i16 11, i16 -12, i16 13, i16 -14, i16 -15>, <16 x i16> <i16 -5, i16 7, i16 -32768, i16 32766, i16 -9, i16 -11, i16 -32764, i16 32762, i16 13, i16 -15, i16 -32760, i16 32758, i16 17, i16 -19, i16 -32756, i16 32756>)
125e8c79fbaSSimon Pilgrim  ret <8 x i32> %1
126e8c79fbaSSimon Pilgrim}
127e8c79fbaSSimon Pilgrim
128e8c79fbaSSimon Pilgrimdefine <16 x i32> @fold_pmaddwd_512() {
129e8c79fbaSSimon Pilgrim; CHECK-LABEL: @fold_pmaddwd_512(
130f1faba25SSimon Pilgrim; CHECK-NEXT:    ret <16 x i32> <i32 -7, i32 32762, i32 91, i32 32750, i32 -239, i32 687938, i32 -451, i32 -32756, i32 -7, i32 32762, i32 91, i32 32750, i32 -239, i32 687938, i32 -451, i32 -32756>
131e8c79fbaSSimon Pilgrim;
132e8c79fbaSSimon Pilgrim  %1 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> <i16 0, i16 -1, i16 2, i16 3, i16 -4, i16 -5, i16 6, i16 7, i16 -8, i16 9, i16 -10, i16 11, i16 -12, i16 13, i16 -14, i16 -15, i16 -5, i16 7, i16 -32768, i16 32766, i16 -9, i16 -11, i16 -32764, i16 32762, i16 13, i16 -15, i16 -32760, i16 32758, i16 17, i16 -19, i16 -32756, i16 32756>, <32 x i16> <i16 -5, i16 7, i16 -32768, i16 32766, i16 -9, i16 -11, i16 -32764, i16 32762, i16 13, i16 -15, i16 -32760, i16 32758, i16 17, i16 -19, i16 -32756, i16 32756, i16 0, i16 -1, i16 2, i16 3, i16 -4, i16 -5, i16 6, i16 7, i16 -8, i16 9, i16 -10, i16 11, i16 -12, i16 13, i16 -14, i16 -15>)
133e8c79fbaSSimon Pilgrim  ret <16 x i32> %1
134e8c79fbaSSimon Pilgrim}
135e8c79fbaSSimon Pilgrim
136e8c79fbaSSimon Pilgrim;
137e8c79fbaSSimon Pilgrim; Demanded Elts
138e8c79fbaSSimon Pilgrim;
139e8c79fbaSSimon Pilgrim
140e8c79fbaSSimon Pilgrimdefine <4 x i32> @elts_pmaddwd_128(<8 x i16> %a0, <8 x i16> %a1) {
141e8c79fbaSSimon Pilgrim; CHECK-LABEL: @elts_pmaddwd_128(
142e1751a10SSimon Pilgrim; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]])
143e1751a10SSimon Pilgrim; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> zeroinitializer
144e1751a10SSimon Pilgrim; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
145e8c79fbaSSimon Pilgrim;
146e8c79fbaSSimon Pilgrim  %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2>
147e8c79fbaSSimon Pilgrim  %2 = shufflevector <8 x i16> %a1, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
148e8c79fbaSSimon Pilgrim  %3 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %1, <8 x i16> %2)
149e8c79fbaSSimon Pilgrim  %4 = shufflevector <4 x i32> %3, <4 x i32> poison, <4 x i32> zeroinitializer
150e8c79fbaSSimon Pilgrim  ret <4 x i32> %4
151e8c79fbaSSimon Pilgrim}
152e8c79fbaSSimon Pilgrim
153e8c79fbaSSimon Pilgrimdefine <8 x i32> @elts_pmaddwd_256(<16 x i16> %a0, <16 x i16> %a1) {
154e8c79fbaSSimon Pilgrim; CHECK-LABEL: @elts_pmaddwd_256(
155e1751a10SSimon Pilgrim; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> [[A0:%.*]], <16 x i16> [[A1:%.*]])
156e1751a10SSimon Pilgrim; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> poison, <8 x i32> zeroinitializer
157e1751a10SSimon Pilgrim; CHECK-NEXT:    ret <8 x i32> [[TMP2]]
158e8c79fbaSSimon Pilgrim;
159e8c79fbaSSimon Pilgrim  %1 = shufflevector <16 x i16> %a0, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
160e8c79fbaSSimon Pilgrim  %2 = shufflevector <16 x i16> %a1, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
161e8c79fbaSSimon Pilgrim  %3 = call <8 x i32> @llvm.x86.avx2.pmadd.wd(<16 x i16> %1, <16 x i16> %2)
162e8c79fbaSSimon Pilgrim  %4 = shufflevector <8 x i32> %3, <8 x i32> poison, <8 x i32> zeroinitializer
163e8c79fbaSSimon Pilgrim  ret <8 x i32> %4
164e8c79fbaSSimon Pilgrim}
165e8c79fbaSSimon Pilgrim
166e8c79fbaSSimon Pilgrimdefine <16 x i32> @elts_pmaddwd_512(<32 x i16> %a0, <32 x i16> %a1) {
167e8c79fbaSSimon Pilgrim; CHECK-LABEL: @elts_pmaddwd_512(
168e1751a10SSimon Pilgrim; CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> [[A0:%.*]], <32 x i16> [[A1:%.*]])
169e1751a10SSimon Pilgrim; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i32> [[TMP1]], <16 x i32> poison, <16 x i32> zeroinitializer
170e1751a10SSimon Pilgrim; CHECK-NEXT:    ret <16 x i32> [[TMP2]]
171e8c79fbaSSimon Pilgrim;
172e8c79fbaSSimon Pilgrim  %1 = shufflevector <32 x i16> %a0, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
173e8c79fbaSSimon Pilgrim  %2 = shufflevector <32 x i16> %a1, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
174e8c79fbaSSimon Pilgrim  %3 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> %1, <32 x i16> %2)
175e8c79fbaSSimon Pilgrim  %4 = shufflevector <16 x i32> %3, <16 x i32> poison, <16 x i32> zeroinitializer
176e8c79fbaSSimon Pilgrim  ret <16 x i32> %4
177e8c79fbaSSimon Pilgrim}
178