xref: /llvm-project/llvm/test/Analysis/ValueTracking/knownbits-x86-hadd-hsub.ll (revision 38fffa630ee80163dc65e759392ad29798905679)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
2; RUN: opt -S -passes=instcombine < %s | FileCheck %s
3
4define <4 x i1> @hadd_and_eq_v4i32(<4 x i32> %x, <4 x i32> %y) {
5; CHECK-LABEL: define <4 x i1> @hadd_and_eq_v4i32(
6; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]) {
7; CHECK-NEXT:  entry:
8; CHECK-NEXT:    ret <4 x i1> zeroinitializer
9;
10entry:
11  %and1 = and <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3>
12  %and2 = and <4 x i32> %y, <i32 3, i32 3, i32 3, i32 3>
13  %hadd = tail call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %and1, <4 x i32> %and2)
14  %andr = and <4 x i32> %hadd, <i32 -8, i32 -8, i32 -8, i32 -8>
15  %ret = icmp eq <4 x i32> %andr, <i32 3, i32 4, i32 5, i32 6>
16  ret <4 x i1> %ret
17}
18
19define <8 x i1> @hadd_and_eq_v8i16(<8 x i16> %x, <8 x i16> %y) {
20; CHECK-LABEL: define <8 x i1> @hadd_and_eq_v8i16(
21; CHECK-SAME: <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]]) {
22; CHECK-NEXT:  entry:
23; CHECK-NEXT:    ret <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true>
24;
25entry:
26  %and1 = and <8 x i16> %x, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
27  %and2 = and <8 x i16> %y, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
28  %hadd = tail call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %and1, <8 x i16> %and2)
29  %andr = and <8 x i16> %hadd, <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8>
30  %ret = icmp eq <8 x i16> %andr, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 0>
31  ret <8 x i1> %ret
32}
33
34define <8 x i1> @hadd_and_eq_v8i16_sat(<8 x i16> %x, <8 x i16> %y) {
35; CHECK-LABEL: define <8 x i1> @hadd_and_eq_v8i16_sat(
36; CHECK-SAME: <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]]) {
37; CHECK-NEXT:  entry:
38; CHECK-NEXT:    ret <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true>
39;
40entry:
41  %and1 = and <8 x i16> %x, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
42  %and2 = and <8 x i16> %y, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
43  %hadd = tail call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %and1, <8 x i16> %and2)
44  %andr = and <8 x i16> %hadd, <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8>
45  %ret = icmp eq <8 x i16> %andr, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 0>
46  ret <8 x i1> %ret
47}
48
49define <8 x i1> @hadd_and_eq_v8i32(<8 x i32> %x, <8 x i32> %y) {
50; CHECK-LABEL: define <8 x i1> @hadd_and_eq_v8i32(
51; CHECK-SAME: <8 x i32> [[X:%.*]], <8 x i32> [[Y:%.*]]) {
52; CHECK-NEXT:  entry:
53; CHECK-NEXT:    ret <8 x i1> zeroinitializer
54;
55entry:
56  %and1 = and <8 x i32> %x, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
57  %and2 = and <8 x i32> %y, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
58  %hadd = tail call <8 x i32> @llvm.x86.avx2.phadd.d(<8 x i32> %and1, <8 x i32> %and2)
59  %andr = and <8 x i32> %hadd, <i32 -8, i32 -8, i32 -8, i32 -8, i32 -8, i32 -8, i32 -8, i32 -8>
60  %ret = icmp eq <8 x i32> %andr, <i32 3, i32 4, i32 5, i32 6, i32 3, i32 4, i32 5, i32 6>
61  ret <8 x i1> %ret
62}
63
64define <16 x i1> @hadd_and_eq_v16i16(<16 x i16> %x, <16 x i16> %y) {
65; CHECK-LABEL: define <16 x i1> @hadd_and_eq_v16i16(
66; CHECK-SAME: <16 x i16> [[X:%.*]], <16 x i16> [[Y:%.*]]) {
67; CHECK-NEXT:  entry:
68; CHECK-NEXT:    ret <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true>
69;
70entry:
71  %and1 = and <16 x i16> %x, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
72  %and2 = and <16 x i16> %y, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
73  %hadd = tail call <16 x i16> @llvm.x86.avx2.phadd.w(<16 x i16> %and1, <16 x i16> %and2)
74  %andr = and <16 x i16> %hadd, <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8>
75  %ret = icmp eq <16 x i16> %andr, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 0, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 0>
76  ret <16 x i1> %ret
77}
78
79define <16 x i1> @hadd_and_eq_v16i16_sat(<16 x i16> %x, <16 x i16> %y) {
80; CHECK-LABEL: define <16 x i1> @hadd_and_eq_v16i16_sat(
81; CHECK-SAME: <16 x i16> [[X:%.*]], <16 x i16> [[Y:%.*]]) {
82; CHECK-NEXT:  entry:
83; CHECK-NEXT:    ret <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true>
84;
85entry:
86  %and1 = and <16 x i16> %x, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
87  %and2 = and <16 x i16> %y, <i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3, i16 3>
88  %hadd = tail call <16 x i16> @llvm.x86.avx2.phadd.sw(<16 x i16> %and1, <16 x i16> %and2)
89  %andr = and <16 x i16> %hadd, <i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8, i16 -8>
90  %ret = icmp eq <16 x i16> %andr, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 0, i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 0>
91  ret <16 x i1> %ret
92}
93
94define <4 x i1> @hsub_trunc_eq_v4i32(<4 x i32> %x, <4 x i32> %y) {
95; CHECK-LABEL: define <4 x i1> @hsub_trunc_eq_v4i32(
96; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]) {
97; CHECK-NEXT:  entry:
98; CHECK-NEXT:    ret <4 x i1> zeroinitializer
99;
100entry:
101  %or1 = or <4 x i32> %x, <i32 65535, i32 65535, i32 65535, i32 65535>
102  %or2 = or <4 x i32> %y, <i32 65535, i32 65535, i32 65535, i32 65535>
103  %hsub = tail call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %or1, <4 x i32> %or2)
104  %conv = trunc <4 x i32> %hsub to <4 x i16>
105  %ret = icmp eq <4 x i16> %conv, <i16 3, i16 4, i16 5, i16 6>
106  ret <4 x i1> %ret
107}
108
109define <8 x i1> @hsub_trunc_eq_v8i16(<8 x i16> %x, <8 x i16> %y) {
110; CHECK-LABEL: define <8 x i1> @hsub_trunc_eq_v8i16(
111; CHECK-SAME: <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]]) {
112; CHECK-NEXT:  entry:
113; CHECK-NEXT:    ret <8 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true>
114;
115entry:
116  %or1 = or <8 x i16> %x, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
117  %or2 = or <8 x i16> %y, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
118  %hsub = tail call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %or1, <8 x i16> %or2)
119  %conv = trunc <8 x i16> %hsub to <8 x i8>
120  %ret = icmp eq <8 x i8> %conv, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 0>
121  ret <8 x i1> %ret
122}
123
124define <8 x i1> @hsub_and_eq_v8i16_sat(<8 x i16> %x, <8 x i16> %y) {
125; CHECK-LABEL: define <8 x i1> @hsub_and_eq_v8i16_sat(
126; CHECK-SAME: <8 x i16> [[X:%.*]], <8 x i16> [[Y:%.*]]) {
127; CHECK-NEXT:  entry:
128; CHECK-NEXT:    ret <8 x i1> splat (i1 true)
129;
130entry:
131  %or1 = or <8 x i16> %x, <i16 3, i16 0, i16 3, i16 0, i16 3, i16 0, i16 3, i16 0>
132  %or2 = or <8 x i16> %y, <i16 3, i16 0, i16 3, i16 0, i16 3, i16 0, i16 3, i16 0>
133  %and1 = and <8 x i16> %or1, <i16 7, i16 3, i16 7, i16 3, i16 7, i16 3, i16 7, i16 3>
134  %and2 = and <8 x i16> %or2, <i16 7, i16 3, i16 7, i16 3, i16 7, i16 3, i16 7, i16 3>
135  %hsub = tail call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %and1, <8 x i16> %and2)
136  %ret = icmp sle <8 x i16> %hsub, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
137  ret <8 x i1> %ret
138}
139
140define <8 x i1> @hsub_trunc_eq_v8i32(<8 x i32> %x, <8 x i32> %y) {
141; CHECK-LABEL: define <8 x i1> @hsub_trunc_eq_v8i32(
142; CHECK-SAME: <8 x i32> [[X:%.*]], <8 x i32> [[Y:%.*]]) {
143; CHECK-NEXT:  entry:
144; CHECK-NEXT:    ret <8 x i1> zeroinitializer
145;
146entry:
147  %or1 = or <8 x i32> %x, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
148  %or2 = or <8 x i32> %y, <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>
149  %hsub = tail call <8 x i32> @llvm.x86.avx2.phsub.d(<8 x i32> %or1, <8 x i32> %or2)
150  %conv = trunc <8 x i32> %hsub to <8 x i16>
151  %ret = icmp eq <8 x i16> %conv, <i16 3, i16 4, i16 5, i16 6, i16 3, i16 4, i16 5, i16 6>
152  ret <8 x i1> %ret
153}
154
155define <16 x i1> @hsub_trunc_eq_v16i16(<16 x i16> %x, <16 x i16> %y) {
156; CHECK-LABEL: define <16 x i1> @hsub_trunc_eq_v16i16(
157; CHECK-SAME: <16 x i16> [[X:%.*]], <16 x i16> [[Y:%.*]]) {
158; CHECK-NEXT:  entry:
159; CHECK-NEXT:    ret <16 x i1> <i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true>
160;
161entry:
162  %or1 = or <16 x i16> %x, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
163  %or2 = or <16 x i16> %y, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
164  %hsub = tail call <16 x i16> @llvm.x86.avx2.phsub.w(<16 x i16> %or1, <16 x i16> %or2)
165  %conv = trunc <16 x i16> %hsub to <16 x i8>
166  %ret = icmp eq <16 x i8> %conv, <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 0, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 0>
167  ret <16 x i1> %ret
168}
169
170define <16 x i1> @hsub_and_eq_v16i16_sat(<16 x i16> %x, <16 x i16> %y) {
171; CHECK-LABEL: define <16 x i1> @hsub_and_eq_v16i16_sat(
172; CHECK-SAME: <16 x i16> [[X:%.*]], <16 x i16> [[Y:%.*]]) {
173; CHECK-NEXT:  entry:
174; CHECK-NEXT:    ret <16 x i1> splat (i1 true)
175;
176entry:
177  %or1 = or <16 x i16> %x, <i16 3, i16 0, i16 3, i16 0, i16 3, i16 0, i16 3, i16 0, i16 3, i16 0, i16 3, i16 0, i16 3, i16 0, i16 3, i16 0>
178  %or2 = or <16 x i16> %y, <i16 3, i16 0, i16 3, i16 0, i16 3, i16 0, i16 3, i16 0, i16 3, i16 0, i16 3, i16 0, i16 3, i16 0, i16 3, i16 0>
179  %and1 = and <16 x i16> %or1, <i16 7, i16 3, i16 7, i16 3, i16 7, i16 3, i16 7, i16 3, i16 7, i16 3, i16 7, i16 3, i16 7, i16 3, i16 7, i16 3>
180  %and2 = and <16 x i16> %or2, <i16 7, i16 3, i16 7, i16 3, i16 7, i16 3, i16 7, i16 3, i16 7, i16 3, i16 7, i16 3, i16 7, i16 3, i16 7, i16 3>
181  %hsub = tail call <16 x i16> @llvm.x86.avx2.phsub.sw(<16 x i16> %and1, <16 x i16> %and2)
182  %ret = icmp sle <16 x i16> %hsub, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
183  ret <16 x i1> %ret
184}
185
186define <4 x i1> @hadd_shuffle_2st_v4i32(<4 x i32> %x, <4 x i32> %y) {
187; CHECK-LABEL: define <4 x i1> @hadd_shuffle_2st_v4i32(
188; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]) {
189; CHECK-NEXT:  entry:
190; CHECK-NEXT:    ret <4 x i1> splat (i1 true)
191;
192entry:
193  %and1 = and <4 x i32> %x, <i32 -1, i32 -1, i32 3, i32 3>
194  %and2 = and <4 x i32> %y, <i32 -1, i32 -1, i32 -1, i32 -1>
195  %hadd = tail call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %and1, <4 x i32> %and2)
196  %shuf = shufflevector <4 x i32> %hadd, <4x i32> zeroinitializer, <4 x i32> <i32 4, i32 1, i32 5, i32 6>
197  %ret = icmp ne <4 x i32> %shuf, <i32 8, i32 8, i32 8, i32 8>
198  ret <4 x i1> %ret
199}
200
201define <4 x i1> @hadd_shuffle_4th_v4i32(<4 x i32> %x, <4 x i32> %y) {
202; CHECK-LABEL: define <4 x i1> @hadd_shuffle_4th_v4i32(
203; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]) {
204; CHECK-NEXT:  entry:
205; CHECK-NEXT:    ret <4 x i1> splat (i1 true)
206;
207entry:
208  %and1 = and <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1>
209  %and2 = and <4 x i32> %y, <i32 -1, i32 -1, i32 3, i32 3>
210  %hadd = tail call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %and1, <4 x i32> %and2)
211  %shuf = shufflevector <4 x i32> %hadd, <4x i32> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 3>
212  %ret = icmp ne <4 x i32> %shuf, <i32 8, i32 8, i32 8, i32 8>
213  ret <4 x i1> %ret
214}
215
216define <4 x i1> @hadd_shuffle_2st_negative_v4i32(<4 x i32> %x, <4 x i32> %y) {
217; CHECK-LABEL: define <4 x i1> @hadd_shuffle_2st_negative_v4i32(
218; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]) {
219; CHECK-NEXT:  entry:
220; CHECK-NEXT:    [[TMP0:%.*]] = and <4 x i32> [[X]], <i32 3, i32 3, i32 -1, i32 -1>
221; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[Y]], splat (i32 3)
222; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
223; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 0, i32 0, i32 poison>, <4 x i32> <i32 4, i32 1, i32 5, i32 6>
224; CHECK-NEXT:    [[RET:%.*]] = icmp ne <4 x i32> [[TMP3]], splat (i32 8)
225; CHECK-NEXT:    ret <4 x i1> [[RET]]
226;
227entry:
228  %and1 = and <4 x i32> %x, <i32 3, i32 3, i32 -1, i32 -1>
229  %and2 = and <4 x i32> %y, <i32 3, i32 3, i32 3, i32 3>
230  %hadd = tail call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %and1, <4 x i32> %and2)
231  %shuf = shufflevector <4 x i32> %hadd, <4x i32> zeroinitializer, <4 x i32> <i32 4, i32 1, i32 5, i32 6>
232  %ret = icmp ne <4 x i32> %shuf, <i32 8, i32 8, i32 8, i32 8>
233  ret <4 x i1> %ret
234}
235
236define <4 x i1> @hadd_shuffle_4th_negative_v4i32(<4 x i32> %x, <4 x i32> %y) {
237; CHECK-LABEL: define <4 x i1> @hadd_shuffle_4th_negative_v4i32(
238; CHECK-SAME: <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]]) {
239; CHECK-NEXT:  entry:
240; CHECK-NEXT:    [[TMP0:%.*]] = and <4 x i32> [[X]], splat (i32 3)
241; CHECK-NEXT:    [[TMP1:%.*]] = and <4 x i32> [[Y]], <i32 3, i32 3, i32 -1, i32 -1>
242; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> [[TMP0]], <4 x i32> [[TMP1]])
243; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> <i32 0, i32 0, i32 0, i32 poison>, <4 x i32> [[TMP2]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
244; CHECK-NEXT:    [[RET:%.*]] = icmp ne <4 x i32> [[TMP3]], splat (i32 8)
245; CHECK-NEXT:    ret <4 x i1> [[RET]]
246;
247entry:
248  %and1 = and <4 x i32> %x, <i32 3, i32 3, i32 3, i32 3>
249  %and2 = and <4 x i32> %y, <i32 3, i32 3, i32 -1, i32 -1>
250  %hadd = tail call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %and1, <4 x i32> %and2)
251  %shuf = shufflevector <4 x i32> %hadd, <4x i32> zeroinitializer, <4 x i32> <i32 4, i32 5, i32 6, i32 3>
252  %ret = icmp ne <4 x i32> %shuf, <i32 8, i32 8, i32 8, i32 8>
253  ret <4 x i1> %ret
254}
255