xref: /llvm-project/llvm/test/Transforms/InstCombine/shuffle_select.ll (revision 38fffa630ee80163dc65e759392ad29798905679)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -passes=instcombine -S | FileCheck %s
3
4; Try to eliminate binops and shuffles when the shuffle is a select in disguise:
5; PR37806 - https://bugs.llvm.org/show_bug.cgi?id=37806
6
7define <4 x i32> @add(<4 x i32> %v) {
8; CHECK-LABEL: @add(
9; CHECK-NEXT:    [[S:%.*]] = add <4 x i32> [[V:%.*]], <i32 11, i32 0, i32 13, i32 0>
10; CHECK-NEXT:    ret <4 x i32> [[S]]
11;
12  %b = add <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
13  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
14  ret <4 x i32> %s
15}
16
17; Propagate flags when possible.
18
19define <4 x i32> @add_nuw_nsw(<4 x i32> %v) {
20; CHECK-LABEL: @add_nuw_nsw(
21; CHECK-NEXT:    [[S:%.*]] = add nuw nsw <4 x i32> [[V:%.*]], <i32 11, i32 0, i32 13, i32 0>
22; CHECK-NEXT:    ret <4 x i32> [[S]]
23;
24  %b = add nuw nsw <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
25  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
26  ret <4 x i32> %s
27}
28
29define <4 x i32> @add_undef_mask_elt(<4 x i32> %v) {
30; CHECK-LABEL: @add_undef_mask_elt(
31; CHECK-NEXT:    [[S:%.*]] = add <4 x i32> [[V:%.*]], <i32 11, i32 0, i32 undef, i32 0>
32; CHECK-NEXT:    ret <4 x i32> [[S]]
33;
34  %b = add <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
35  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 5, i32 undef, i32 7>
36  ret <4 x i32> %s
37}
38
39; Poison flags must be dropped or undef must be replaced with safe constant.
40
41define <4 x i32> @add_nuw_nsw_undef_mask_elt(<4 x i32> %v) {
42; CHECK-LABEL: @add_nuw_nsw_undef_mask_elt(
43; CHECK-NEXT:    [[S:%.*]] = add <4 x i32> [[V:%.*]], <i32 11, i32 undef, i32 13, i32 0>
44; CHECK-NEXT:    ret <4 x i32> [[S]]
45;
46  %b = add nuw nsw <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
47  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 undef, i32 2, i32 7>
48  ret <4 x i32> %s
49}
50
51; Constant operand 0 (LHS) could work for some non-commutative binops?
52
53define <4 x i32> @sub(<4 x i32> %v) {
54; CHECK-LABEL: @sub(
55; CHECK-NEXT:    [[B:%.*]] = sub <4 x i32> <i32 poison, i32 poison, i32 poison, i32 14>, [[V:%.*]]
56; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[V]], <4 x i32> [[B]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
57; CHECK-NEXT:    ret <4 x i32> [[S]]
58;
59  %b = sub <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v
60  %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
61  ret <4 x i32> %s
62}
63
64; If any element of the shuffle mask operand is undef, that element of the result is undef.
65; The shuffle is eliminated in this transform, but we can replace a constant element with undef.
66; Preserve flags when possible. It's not safe to propagate poison-generating flags with undef constants.
67
68define <4 x i32> @mul(<4 x i32> %v) {
69; CHECK-LABEL: @mul(
70; CHECK-NEXT:    [[S:%.*]] = mul <4 x i32> [[V:%.*]], <i32 undef, i32 12, i32 1, i32 14>
71; CHECK-NEXT:    ret <4 x i32> [[S]]
72;
73  %b = mul nsw nuw <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
74  %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 undef, i32 5, i32 2, i32 7>
75  ret <4 x i32> %s
76}
77
78define <4 x i32> @shl(<4 x i32> %v) {
79; CHECK-LABEL: @shl(
80; CHECK-NEXT:    [[S:%.*]] = shl <4 x i32> [[V:%.*]], <i32 0, i32 12, i32 13, i32 0>
81; CHECK-NEXT:    ret <4 x i32> [[S]]
82;
83  %b = shl <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
84  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 4, i32 1, i32 2, i32 7>
85  ret <4 x i32> %s
86}
87
88define <4 x i32> @shl_nsw(<4 x i32> %v) {
89; CHECK-LABEL: @shl_nsw(
90; CHECK-NEXT:    [[S:%.*]] = shl nsw <4 x i32> [[V:%.*]], <i32 0, i32 12, i32 13, i32 0>
91; CHECK-NEXT:    ret <4 x i32> [[S]]
92;
93  %b = shl nsw <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
94  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 4, i32 1, i32 2, i32 7>
95  ret <4 x i32> %s
96}
97
98define <4 x i32> @shl_undef_mask_elt(<4 x i32> %v) {
99; CHECK-LABEL: @shl_undef_mask_elt(
100; CHECK-NEXT:    [[S:%.*]] = shl <4 x i32> [[V:%.*]], <i32 0, i32 12, i32 13, i32 0>
101; CHECK-NEXT:    ret <4 x i32> [[S]]
102;
103  %b = shl <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
104  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
105  ret <4 x i32> %s
106}
107
108define <4 x i32> @shl_nuw_undef_mask_elt(<4 x i32> %v) {
109; CHECK-LABEL: @shl_nuw_undef_mask_elt(
110; CHECK-NEXT:    [[S:%.*]] = shl nuw <4 x i32> [[V:%.*]], <i32 0, i32 0, i32 13, i32 0>
111; CHECK-NEXT:    ret <4 x i32> [[S]]
112;
113  %b = shl nuw <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
114  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 undef, i32 5, i32 2, i32 undef>
115  ret <4 x i32> %s
116}
117
118define <4 x i32> @lshr_constant_op0(<4 x i32> %v) {
119; CHECK-LABEL: @lshr_constant_op0(
120; CHECK-NEXT:    [[S:%.*]] = lshr <4 x i32> [[V:%.*]], <i32 11, i32 12, i32 0, i32 14>
121; CHECK-NEXT:    ret <4 x i32> [[S]]
122;
123  %b = lshr <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
124  %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
125  ret <4 x i32> %s
126}
127
128define <4 x i32> @lshr_exact_constant_op0(<4 x i32> %v) {
129; CHECK-LABEL: @lshr_exact_constant_op0(
130; CHECK-NEXT:    [[S:%.*]] = lshr exact <4 x i32> [[V:%.*]], <i32 11, i32 12, i32 0, i32 14>
131; CHECK-NEXT:    ret <4 x i32> [[S]]
132;
133  %b = lshr exact <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
134  %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
135  ret <4 x i32> %s
136}
137
138define <4 x i32> @lshr_undef_mask_elt(<4 x i32> %v) {
139; CHECK-LABEL: @lshr_undef_mask_elt(
140; CHECK-NEXT:    [[S:%.*]] = shl <4 x i32> [[V:%.*]], <i32 0, i32 12, i32 13, i32 0>
141; CHECK-NEXT:    ret <4 x i32> [[S]]
142;
143  %b = shl <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
144  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
145  ret <4 x i32> %s
146}
147
148define <4 x i32> @lshr_exact_undef_mask_elt(<4 x i32> %v) {
149; CHECK-LABEL: @lshr_exact_undef_mask_elt(
150; CHECK-NEXT:    [[S:%.*]] = lshr exact <4 x i32> [[V:%.*]], <i32 0, i32 0, i32 13, i32 0>
151; CHECK-NEXT:    ret <4 x i32> [[S]]
152;
153  %b = lshr exact  <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
154  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 undef, i32 5, i32 2, i32 undef>
155  ret <4 x i32> %s
156}
157
158define <4 x i32> @lshr_constant_op1(<4 x i32> %v) {
159; CHECK-LABEL: @lshr_constant_op1(
160; CHECK-NEXT:    [[B:%.*]] = lshr exact <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]]
161; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
162; CHECK-NEXT:    ret <4 x i32> [[S]]
163;
164  %b = lshr exact <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v
165  %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
166  ret <4 x i32> %s
167}
168
169; Try weird types.
170
171define <3 x i32> @ashr(<3 x i32> %v) {
172; CHECK-LABEL: @ashr(
173; CHECK-NEXT:    [[S:%.*]] = ashr <3 x i32> [[V:%.*]], <i32 0, i32 12, i32 13>
174; CHECK-NEXT:    ret <3 x i32> [[S]]
175;
176  %b = ashr <3 x i32> %v, <i32 11, i32 12, i32 13>
177  %s = shufflevector <3 x i32> %b, <3 x i32> %v, <3 x i32> <i32 3, i32 1, i32 2>
178  ret <3 x i32> %s
179}
180
181define <3 x i42> @and(<3 x i42> %v) {
182; CHECK-LABEL: @and(
183; CHECK-NEXT:    [[S:%.*]] = and <3 x i42> [[V:%.*]], <i42 -1, i42 12, i42 undef>
184; CHECK-NEXT:    ret <3 x i42> [[S]]
185;
186  %b = and <3 x i42> %v, <i42 11, i42 12, i42 13>
187  %s = shufflevector <3 x i42> %v, <3 x i42> %b, <3 x i32> <i32 0, i32 4, i32 undef>
188  ret <3 x i42> %s
189}
190
191; It doesn't matter if the intermediate op has extra uses.
192
193declare void @use_v4i32(<4 x i32>)
194
195define <4 x i32> @or(<4 x i32> %v) {
196; CHECK-LABEL: @or(
197; CHECK-NEXT:    [[B:%.*]] = or <4 x i32> [[V:%.*]], <i32 11, i32 12, i32 13, i32 14>
198; CHECK-NEXT:    [[S:%.*]] = or <4 x i32> [[V]], <i32 0, i32 0, i32 13, i32 14>
199; CHECK-NEXT:    call void @use_v4i32(<4 x i32> [[B]])
200; CHECK-NEXT:    ret <4 x i32> [[S]]
201;
202  %b = or <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
203  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
204  call void @use_v4i32(<4 x i32> %b)
205  ret <4 x i32> %s
206}
207
208define <4 x i32> @xor(<4 x i32> %v) {
209; CHECK-LABEL: @xor(
210; CHECK-NEXT:    [[S:%.*]] = xor <4 x i32> [[V:%.*]], <i32 0, i32 12, i32 0, i32 0>
211; CHECK-NEXT:    ret <4 x i32> [[S]]
212;
213  %b = xor <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
214  %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
215  ret <4 x i32> %s
216}
217
218define <4 x i32> @udiv(<4 x i32> %v) {
219; CHECK-LABEL: @udiv(
220; CHECK-NEXT:    [[B:%.*]] = udiv <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]]
221; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
222; CHECK-NEXT:    ret <4 x i32> [[S]]
223;
224  %b = udiv <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v
225  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
226  ret <4 x i32> %s
227}
228
229define <4 x i32> @udiv_exact(<4 x i32> %v) {
230; CHECK-LABEL: @udiv_exact(
231; CHECK-NEXT:    [[B:%.*]] = udiv exact <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]]
232; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
233; CHECK-NEXT:    ret <4 x i32> [[S]]
234;
235  %b = udiv exact <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v
236  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
237  ret <4 x i32> %s
238}
239
240define <4 x i32> @udiv_undef_mask_elt(<4 x i32> %v) {
241; CHECK-LABEL: @udiv_undef_mask_elt(
242; CHECK-NEXT:    [[B:%.*]] = udiv <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]]
243; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 0, i32 poison, i32 2, i32 7>
244; CHECK-NEXT:    ret <4 x i32> [[S]]
245;
246  %b = udiv <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v
247  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 undef, i32 2, i32 7>
248  ret <4 x i32> %s
249}
250
251define <4 x i32> @udiv_exact_undef_mask_elt(<4 x i32> %v) {
252; CHECK-LABEL: @udiv_exact_undef_mask_elt(
253; CHECK-NEXT:    [[B:%.*]] = udiv exact <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]]
254; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 0, i32 poison, i32 2, i32 7>
255; CHECK-NEXT:    ret <4 x i32> [[S]]
256;
257  %b = udiv exact <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v
258  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 undef, i32 2, i32 7>
259  ret <4 x i32> %s
260}
261
262define <4 x i32> @sdiv(<4 x i32> %v) {
263; CHECK-LABEL: @sdiv(
264; CHECK-NEXT:    [[S:%.*]] = sdiv <4 x i32> [[V:%.*]], <i32 11, i32 1, i32 13, i32 1>
265; CHECK-NEXT:    ret <4 x i32> [[S]]
266;
267  %b = sdiv <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
268  %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
269  ret <4 x i32> %s
270}
271
272define <4 x i32> @sdiv_exact(<4 x i32> %v) {
273; CHECK-LABEL: @sdiv_exact(
274; CHECK-NEXT:    [[S:%.*]] = sdiv exact <4 x i32> [[V:%.*]], <i32 11, i32 1, i32 13, i32 1>
275; CHECK-NEXT:    ret <4 x i32> [[S]]
276;
277  %b = sdiv exact <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
278  %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
279  ret <4 x i32> %s
280}
281
282; Div/rem need special handling if the shuffle has undef elements.
283
284define <4 x i32> @sdiv_undef_mask_elt(<4 x i32> %v) {
285; CHECK-LABEL: @sdiv_undef_mask_elt(
286; CHECK-NEXT:    [[S:%.*]] = sdiv <4 x i32> [[V:%.*]], <i32 1, i32 1, i32 13, i32 1>
287; CHECK-NEXT:    ret <4 x i32> [[S]]
288;
289  %b = sdiv <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
290  %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 undef, i32 1, i32 6, i32 undef>
291  ret <4 x i32> %s
292}
293
294define <4 x i32> @sdiv_exact_undef_mask_elt(<4 x i32> %v) {
295; CHECK-LABEL: @sdiv_exact_undef_mask_elt(
296; CHECK-NEXT:    [[S:%.*]] = sdiv exact <4 x i32> [[V:%.*]], <i32 1, i32 1, i32 13, i32 1>
297; CHECK-NEXT:    ret <4 x i32> [[S]]
298;
299  %b = sdiv exact <4 x i32> %v, <i32 11, i32 12, i32 13, i32 14>
300  %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 undef, i32 1, i32 6, i32 undef>
301  ret <4 x i32> %s
302}
303
304define <4 x i32> @urem(<4 x i32> %v) {
305; CHECK-LABEL: @urem(
306; CHECK-NEXT:    [[B:%.*]] = urem <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]]
307; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
308; CHECK-NEXT:    ret <4 x i32> [[S]]
309;
310  %b = urem <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v
311  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
312  ret <4 x i32> %s
313}
314
315define <4 x i32> @urem_undef_mask_elt(<4 x i32> %v) {
316; CHECK-LABEL: @urem_undef_mask_elt(
317; CHECK-NEXT:    [[B:%.*]] = urem <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]]
318; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[B]], <4 x i32> [[V]], <4 x i32> <i32 0, i32 1, i32 6, i32 poison>
319; CHECK-NEXT:    ret <4 x i32> [[S]]
320;
321  %b = urem <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v
322  %s = shufflevector <4 x i32> %b, <4 x i32> %v, <4 x i32> <i32 0, i32 1, i32 6, i32 undef>
323  ret <4 x i32> %s
324}
325
326define <4 x i32> @srem(<4 x i32> %v) {
327; CHECK-LABEL: @srem(
328; CHECK-NEXT:    [[B:%.*]] = srem <4 x i32> <i32 11, i32 12, i32 13, i32 14>, [[V:%.*]]
329; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[V]], <4 x i32> [[B]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
330; CHECK-NEXT:    ret <4 x i32> [[S]]
331;
332  %b = srem <4 x i32> <i32 11, i32 12, i32 13, i32 14>, %v
333  %s = shufflevector <4 x i32> %v, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
334  ret <4 x i32> %s
335}
336
337; Try FP ops/types.
338
339define <4 x float> @fadd_maybe_nan(<4 x float> %v) {
340; CHECK-LABEL: @fadd_maybe_nan(
341; CHECK-NEXT:    [[B:%.*]] = fadd <4 x float> [[V:%.*]], <float 4.100000e+01, float 4.200000e+01, float poison, float poison>
342; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x float> [[B]], <4 x float> [[V]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
343; CHECK-NEXT:    ret <4 x float> [[S]]
344;
345  %b = fadd <4 x float> %v, <float 41.0, float 42.0, float 43.0, float 44.0>
346  %s = shufflevector <4 x float> %b, <4 x float> %v, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
347  ret <4 x float> %s
348}
349
350define <4 x float> @fadd(<4 x float> nofpclass(nan) %v) {
351; CHECK-LABEL: @fadd(
352; CHECK-NEXT:    [[S:%.*]] = fadd <4 x float> [[V:%.*]], <float 4.100000e+01, float 4.200000e+01, float -0.000000e+00, float -0.000000e+00>
353; CHECK-NEXT:    ret <4 x float> [[S]]
354;
355  %b = fadd <4 x float> %v, <float 41.0, float 42.0, float 43.0, float 44.0>
356  %s = shufflevector <4 x float> %b, <4 x float> %v, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
357  ret <4 x float> %s
358}
359
360define <4 x double> @fsub(<4 x double> %v) {
361; CHECK-LABEL: @fsub(
362; CHECK-NEXT:    [[B:%.*]] = fsub <4 x double> <double poison, double poison, double 4.300000e+01, double 4.400000e+01>, [[V:%.*]]
363; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x double> [[V]], <4 x double> [[B]], <4 x i32> <i32 poison, i32 1, i32 6, i32 7>
364; CHECK-NEXT:    ret <4 x double> [[S]]
365;
366  %b = fsub <4 x double> <double 41.0, double 42.0, double 43.0, double 44.0>, %v
367  %s = shufflevector <4 x double> %v, <4 x double> %b, <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
368  ret <4 x double> %s
369}
370
371; Propagate any FMF.
372
373define <4 x float> @fmul(<4 x float> nofpclass(nan) %v) {
374; CHECK-LABEL: @fmul(
375; CHECK-NEXT:    [[S:%.*]] = fmul nnan ninf <4 x float> [[V:%.*]], <float 4.100000e+01, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
376; CHECK-NEXT:    ret <4 x float> [[S]]
377;
378  %b = fmul nnan ninf <4 x float> %v, <float 41.0, float 42.0, float 43.0, float 44.0>
379  %s = shufflevector <4 x float> %b, <4 x float> %v, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
380  ret <4 x float> %s
381}
382
383define <4 x double> @fdiv_constant_op0(<4 x double> %v) {
384; CHECK-LABEL: @fdiv_constant_op0(
385; CHECK-NEXT:    [[B:%.*]] = fdiv fast <4 x double> <double poison, double poison, double 4.300000e+01, double 4.400000e+01>, [[V:%.*]]
386; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x double> [[V]], <4 x double> [[B]], <4 x i32> <i32 poison, i32 1, i32 6, i32 7>
387; CHECK-NEXT:    ret <4 x double> [[S]]
388;
389  %b = fdiv fast <4 x double> <double 41.0, double 42.0, double 43.0, double 44.0>, %v
390  %s = shufflevector <4 x double> %v, <4 x double> %b, <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
391  ret <4 x double> %s
392}
393
394define <4 x double> @fdiv_constant_op1(<4 x double> nofpclass(nan) %v) {
395; CHECK-LABEL: @fdiv_constant_op1(
396; CHECK-NEXT:    [[S:%.*]] = fdiv reassoc <4 x double> [[V:%.*]], <double undef, double 1.000000e+00, double 4.300000e+01, double 4.400000e+01>
397; CHECK-NEXT:    ret <4 x double> [[S]]
398;
399  %b = fdiv reassoc <4 x double> %v, <double 41.0, double 42.0, double 43.0, double 44.0>
400  %s = shufflevector <4 x double> %v, <4 x double> %b, <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
401  ret <4 x double> %s
402}
403
404define <4 x double> @frem(<4 x double> %v) {
405; CHECK-LABEL: @frem(
406; CHECK-NEXT:    [[B:%.*]] = frem <4 x double> <double 4.100000e+01, double 4.200000e+01, double poison, double poison>, [[V:%.*]]
407; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x double> [[B]], <4 x double> [[V]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
408; CHECK-NEXT:    ret <4 x double> [[S]]
409;
410  %b = frem <4 x double> <double 41.0, double 42.0, double 43.0, double 44.0>, %v
411  %s = shufflevector <4 x double> %b, <4 x double> %v, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
412  ret <4 x double> %s
413}
414
415; Tests where both operands of the shuffle are binops with the same opcode.
416
417define <4 x i32> @add_add(<4 x i32> %v0) {
418; CHECK-LABEL: @add_add(
419; CHECK-NEXT:    [[T3:%.*]] = add <4 x i32> [[V0:%.*]], <i32 1, i32 6, i32 3, i32 8>
420; CHECK-NEXT:    ret <4 x i32> [[T3]]
421;
422  %t1 = add <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
423  %t2 = add <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
424  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
425  ret <4 x i32> %t3
426}
427
428define <4 x i32> @add_add_nsw(<4 x i32> %v0) {
429; CHECK-LABEL: @add_add_nsw(
430; CHECK-NEXT:    [[T3:%.*]] = add nsw <4 x i32> [[V0:%.*]], <i32 1, i32 6, i32 3, i32 8>
431; CHECK-NEXT:    ret <4 x i32> [[T3]]
432;
433  %t1 = add nsw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
434  %t2 = add nsw <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
435  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
436  ret <4 x i32> %t3
437}
438
439define <4 x i32> @add_add_undef_mask_elt(<4 x i32> %v0) {
440; CHECK-LABEL: @add_add_undef_mask_elt(
441; CHECK-NEXT:    [[T3:%.*]] = add <4 x i32> [[V0:%.*]], <i32 1, i32 6, i32 undef, i32 8>
442; CHECK-NEXT:    ret <4 x i32> [[T3]]
443;
444  %t1 = add <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
445  %t2 = add <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
446  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 undef, i32 7>
447  ret <4 x i32> %t3
448}
449
450; Poison flags must be dropped or undef must be replaced with safe constant.
451
452define <4 x i32> @add_add_nsw_undef_mask_elt(<4 x i32> %v0) {
453; CHECK-LABEL: @add_add_nsw_undef_mask_elt(
454; CHECK-NEXT:    [[T3:%.*]] = add <4 x i32> [[V0:%.*]], <i32 1, i32 6, i32 undef, i32 8>
455; CHECK-NEXT:    ret <4 x i32> [[T3]]
456;
457  %t1 = add nsw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
458  %t2 = add nsw <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
459  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 undef, i32 7>
460  ret <4 x i32> %t3
461}
462
463; Constant operand 0 (LHS) also works.
464
465define <4 x i32> @sub_sub(<4 x i32> %v0) {
466; CHECK-LABEL: @sub_sub(
467; CHECK-NEXT:    [[T3:%.*]] = sub <4 x i32> <i32 1, i32 2, i32 3, i32 8>, [[V0:%.*]]
468; CHECK-NEXT:    ret <4 x i32> [[T3]]
469;
470  %t1 = sub <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
471  %t2 = sub <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0
472  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
473  ret <4 x i32> %t3
474}
475
476define <4 x i32> @sub_sub_nuw(<4 x i32> %v0) {
477; CHECK-LABEL: @sub_sub_nuw(
478; CHECK-NEXT:    [[T3:%.*]] = sub nuw <4 x i32> <i32 1, i32 2, i32 3, i32 8>, [[V0:%.*]]
479; CHECK-NEXT:    ret <4 x i32> [[T3]]
480;
481  %t1 = sub nuw <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
482  %t2 = sub nuw <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0
483  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
484  ret <4 x i32> %t3
485}
486
487define <4 x i32> @sub_sub_undef_mask_elt(<4 x i32> %v0) {
488; CHECK-LABEL: @sub_sub_undef_mask_elt(
489; CHECK-NEXT:    [[T3:%.*]] = sub <4 x i32> <i32 undef, i32 2, i32 3, i32 8>, [[V0:%.*]]
490; CHECK-NEXT:    ret <4 x i32> [[T3]]
491;
492  %t1 = sub <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
493  %t2 = sub <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0
494  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
495  ret <4 x i32> %t3
496}
497
498; Poison flags must be dropped or undef must be replaced with safe constant.
499
500define <4 x i32> @sub_sub_nuw_undef_mask_elt(<4 x i32> %v0) {
501; CHECK-LABEL: @sub_sub_nuw_undef_mask_elt(
502; CHECK-NEXT:    [[T3:%.*]] = sub <4 x i32> <i32 undef, i32 2, i32 3, i32 8>, [[V0:%.*]]
503; CHECK-NEXT:    ret <4 x i32> [[T3]]
504;
505  %t1 = sub nuw <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
506  %t2 = sub nuw <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0
507  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
508  ret <4 x i32> %t3
509}
510
511; If any element of the shuffle mask operand is undef, that element of the result is undef.
512; The shuffle is eliminated in this transform, but we can replace a constant element with undef.
513
514define <4 x i32> @mul_mul(<4 x i32> %v0) {
515; CHECK-LABEL: @mul_mul(
516; CHECK-NEXT:    [[T3:%.*]] = mul <4 x i32> [[V0:%.*]], <i32 undef, i32 6, i32 3, i32 8>
517; CHECK-NEXT:    ret <4 x i32> [[T3]]
518;
519  %t1 = mul <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
520  %t2 = mul <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
521  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 5, i32 2, i32 7>
522  ret <4 x i32> %t3
523}
524
525; Preserve flags when possible.
526
527define <4 x i32> @shl_shl(<4 x i32> %v0) {
528; CHECK-LABEL: @shl_shl(
529; CHECK-NEXT:    [[T3:%.*]] = shl <4 x i32> [[V0:%.*]], <i32 5, i32 6, i32 3, i32 4>
530; CHECK-NEXT:    ret <4 x i32> [[T3]]
531;
532  %t1 = shl <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
533  %t2 = shl <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
534  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
535  ret <4 x i32> %t3
536}
537
538define <4 x i32> @shl_shl_nuw(<4 x i32> %v0) {
539; CHECK-LABEL: @shl_shl_nuw(
540; CHECK-NEXT:    [[T3:%.*]] = shl nuw <4 x i32> [[V0:%.*]], <i32 5, i32 6, i32 3, i32 4>
541; CHECK-NEXT:    ret <4 x i32> [[T3]]
542;
543  %t1 = shl nuw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
544  %t2 = shl nuw <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
545  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
546  ret <4 x i32> %t3
547}
548
549; Shift by undef is poison. Undef must be replaced by safe constant.
550
551define <4 x i32> @shl_shl_undef_mask_elt(<4 x i32> %v0) {
552; CHECK-LABEL: @shl_shl_undef_mask_elt(
553; CHECK-NEXT:    [[T3:%.*]] = shl <4 x i32> [[V0:%.*]], <i32 0, i32 6, i32 3, i32 0>
554; CHECK-NEXT:    ret <4 x i32> [[T3]]
555;
556  %t1 = shl <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
557  %t2 = shl <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
558  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 5, i32 2, i32 undef>
559  ret <4 x i32> %t3
560}
561
562; Shift by undef is poison. Undef must be replaced by safe constant.
563
564define <4 x i32> @shl_shl_nuw_undef_mask_elt(<4 x i32> %v0) {
565; CHECK-LABEL: @shl_shl_nuw_undef_mask_elt(
566; CHECK-NEXT:    [[T3:%.*]] = shl nuw <4 x i32> [[V0:%.*]], <i32 0, i32 6, i32 3, i32 0>
567; CHECK-NEXT:    ret <4 x i32> [[T3]]
568;
569  %t1 = shl nuw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
570  %t2 = shl nuw <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
571  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 5, i32 2, i32 undef>
572  ret <4 x i32> %t3
573}
574
575; Can't propagate the flag here.
576
577define <4 x i32> @lshr_lshr(<4 x i32> %v0) {
578; CHECK-LABEL: @lshr_lshr(
579; CHECK-NEXT:    [[T3:%.*]] = lshr <4 x i32> <i32 5, i32 6, i32 3, i32 8>, [[V0:%.*]]
580; CHECK-NEXT:    ret <4 x i32> [[T3]]
581;
582  %t1 = lshr exact <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
583  %t2 = lshr <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0
584  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
585  ret <4 x i32> %t3
586}
587
588; Try weird types.
589
590define <3 x i32> @ashr_ashr(<3 x i32> %v0) {
591; CHECK-LABEL: @ashr_ashr(
592; CHECK-NEXT:    [[T3:%.*]] = ashr <3 x i32> [[V0:%.*]], <i32 4, i32 2, i32 3>
593; CHECK-NEXT:    ret <3 x i32> [[T3]]
594;
595  %t1 = ashr <3 x i32> %v0, <i32 1, i32 2, i32 3>
596  %t2 = ashr <3 x i32> %v0, <i32 4, i32 5, i32 6>
597  %t3 = shufflevector <3 x i32> %t1, <3 x i32> %t2, <3 x i32> <i32 3, i32 1, i32 2>
598  ret <3 x i32> %t3
599}
600
601define <3 x i42> @and_and(<3 x i42> %v0) {
602; CHECK-LABEL: @and_and(
603; CHECK-NEXT:    [[T3:%.*]] = and <3 x i42> [[V0:%.*]], <i42 1, i42 5, i42 undef>
604; CHECK-NEXT:    ret <3 x i42> [[T3]]
605;
606  %t1 = and <3 x i42> %v0, <i42 1, i42 2, i42 3>
607  %t2 = and <3 x i42> %v0, <i42 4, i42 5, i42 6>
608  %t3 = shufflevector <3 x i42> %t1, <3 x i42> %t2, <3 x i32> <i32 0, i32 4, i32 undef>
609  ret <3 x i42> %t3
610}
611
612; It doesn't matter if the intermediate ops have extra uses.
613
614define <4 x i32> @or_or(<4 x i32> %v0) {
615; CHECK-LABEL: @or_or(
616; CHECK-NEXT:    [[T1:%.*]] = or <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 3, i32 4>
617; CHECK-NEXT:    [[T3:%.*]] = or <4 x i32> [[V0]], <i32 5, i32 6, i32 3, i32 4>
618; CHECK-NEXT:    call void @use_v4i32(<4 x i32> [[T1]])
619; CHECK-NEXT:    ret <4 x i32> [[T3]]
620;
621  %t1 = or <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
622  %t2 = or <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
623  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
624  call void @use_v4i32(<4 x i32> %t1)
625  ret <4 x i32> %t3
626}
627
628define <4 x i32> @xor_xor(<4 x i32> %v0) {
629; CHECK-LABEL: @xor_xor(
630; CHECK-NEXT:    [[T2:%.*]] = xor <4 x i32> [[V0:%.*]], <i32 5, i32 6, i32 7, i32 8>
631; CHECK-NEXT:    [[T3:%.*]] = xor <4 x i32> [[V0]], <i32 1, i32 6, i32 3, i32 4>
632; CHECK-NEXT:    call void @use_v4i32(<4 x i32> [[T2]])
633; CHECK-NEXT:    ret <4 x i32> [[T3]]
634;
635  %t1 = xor <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
636  %t2 = xor <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
637  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
638  call void @use_v4i32(<4 x i32> %t2)
639  ret <4 x i32> %t3
640}
641
642define <4 x i32> @udiv_udiv(<4 x i32> %v0) {
643; CHECK-LABEL: @udiv_udiv(
644; CHECK-NEXT:    [[T1:%.*]] = udiv <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]]
645; CHECK-NEXT:    [[T2:%.*]] = udiv <4 x i32> <i32 5, i32 6, i32 7, i32 8>, [[V0]]
646; CHECK-NEXT:    [[T3:%.*]] = udiv <4 x i32> <i32 1, i32 2, i32 3, i32 8>, [[V0]]
647; CHECK-NEXT:    call void @use_v4i32(<4 x i32> [[T1]])
648; CHECK-NEXT:    call void @use_v4i32(<4 x i32> [[T2]])
649; CHECK-NEXT:    ret <4 x i32> [[T3]]
650;
651  %t1 = udiv <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
652  %t2 = udiv <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0
653  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
654  call void @use_v4i32(<4 x i32> %t1)
655  call void @use_v4i32(<4 x i32> %t2)
656  ret <4 x i32> %t3
657}
658
659; Div/rem need special handling if the shuffle has undef elements.
660
661define <4 x i32> @sdiv_sdiv(<4 x i32> %v0) {
662; CHECK-LABEL: @sdiv_sdiv(
663; CHECK-NEXT:    [[T3:%.*]] = sdiv <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 7, i32 8>
664; CHECK-NEXT:    ret <4 x i32> [[T3]]
665;
666  %t1 = sdiv <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
667  %t2 = sdiv <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
668  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
669  ret <4 x i32> %t3
670}
671
672define <4 x i32> @sdiv_sdiv_exact(<4 x i32> %v0) {
673; CHECK-LABEL: @sdiv_sdiv_exact(
674; CHECK-NEXT:    [[T3:%.*]] = sdiv exact <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 7, i32 8>
675; CHECK-NEXT:    ret <4 x i32> [[T3]]
676;
677  %t1 = sdiv exact <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
678  %t2 = sdiv exact <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
679  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
680  ret <4 x i32> %t3
681}
682
683define <4 x i32> @sdiv_sdiv_undef_mask_elt(<4 x i32> %v0) {
684; CHECK-LABEL: @sdiv_sdiv_undef_mask_elt(
685; CHECK-NEXT:    [[T3:%.*]] = sdiv <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 7, i32 1>
686; CHECK-NEXT:    ret <4 x i32> [[T3]]
687;
688  %t1 = sdiv <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
689  %t2 = sdiv <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
690  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 6, i32 undef>
691  ret <4 x i32> %t3
692}
693
694define <4 x i32> @sdiv_sdiv_exact_undef_mask_elt(<4 x i32> %v0) {
695; CHECK-LABEL: @sdiv_sdiv_exact_undef_mask_elt(
696; CHECK-NEXT:    [[T3:%.*]] = sdiv exact <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 7, i32 1>
697; CHECK-NEXT:    ret <4 x i32> [[T3]]
698;
699  %t1 = sdiv exact <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
700  %t2 = sdiv exact <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
701  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 6, i32 undef>
702  ret <4 x i32> %t3
703}
704
705define <4 x i32> @urem_urem(<4 x i32> %v0) {
706; CHECK-LABEL: @urem_urem(
707; CHECK-NEXT:    [[T3:%.*]] = urem <4 x i32> <i32 1, i32 2, i32 7, i32 8>, [[V0:%.*]]
708; CHECK-NEXT:    ret <4 x i32> [[T3]]
709;
710  %t1 = urem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
711  %t2 = urem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0
712  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
713  ret <4 x i32> %t3
714}
715
716; This is folded by using a safe constant.
717
718define <4 x i32> @urem_urem_undef_mask_elt(<4 x i32> %v0) {
719; CHECK-LABEL: @urem_urem_undef_mask_elt(
720; CHECK-NEXT:    [[T3:%.*]] = urem <4 x i32> <i32 1, i32 2, i32 7, i32 0>, [[V0:%.*]]
721; CHECK-NEXT:    ret <4 x i32> [[T3]]
722;
723  %t1 = urem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
724  %t2 = urem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0
725  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 undef>
726  ret <4 x i32> %t3
727}
728
729define <4 x i32> @srem_srem(<4 x i32> %v0) {
730; CHECK-LABEL: @srem_srem(
731; CHECK-NEXT:    [[T3:%.*]] = srem <4 x i32> <i32 1, i32 2, i32 7, i32 4>, [[V0:%.*]]
732; CHECK-NEXT:    ret <4 x i32> [[T3]]
733;
734  %t1 = srem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
735  %t2 = srem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0
736  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
737  ret <4 x i32> %t3
738}
739
740; This is folded by using a safe constant.
741
742define <4 x i32> @srem_srem_undef_mask_elt(<4 x i32> %v0) {
743; CHECK-LABEL: @srem_srem_undef_mask_elt(
744; CHECK-NEXT:    [[T3:%.*]] = srem <4 x i32> <i32 1, i32 0, i32 7, i32 4>, [[V0:%.*]]
745; CHECK-NEXT:    ret <4 x i32> [[T3]]
746;
747  %t1 = srem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
748  %t2 = srem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v0
749  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 undef, i32 6, i32 3>
750  ret <4 x i32> %t3
751}
752
753; Try FP ops/types.
754
755define <4 x float> @fadd_fadd(<4 x float> %v0) {
756; CHECK-LABEL: @fadd_fadd(
757; CHECK-NEXT:    [[T3:%.*]] = fadd <4 x float> [[V0:%.*]], <float 1.000000e+00, float 2.000000e+00, float 7.000000e+00, float 8.000000e+00>
758; CHECK-NEXT:    ret <4 x float> [[T3]]
759;
760  %t1 = fadd <4 x float> %v0, <float 1.0, float 2.0, float 3.0, float 4.0>
761  %t2 = fadd <4 x float> %v0, <float 5.0, float 6.0, float 7.0, float 8.0>
762  %t3 = shufflevector <4 x float> %t1, <4 x float> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
763  ret <4 x float> %t3
764}
765
766define <4 x double> @fsub_fsub(<4 x double> %v0) {
767; CHECK-LABEL: @fsub_fsub(
768; CHECK-NEXT:    [[T3:%.*]] = fsub <4 x double> <double undef, double 2.000000e+00, double 7.000000e+00, double 8.000000e+00>, [[V0:%.*]]
769; CHECK-NEXT:    ret <4 x double> [[T3]]
770;
771  %t1 = fsub <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, %v0
772  %t2 = fsub <4 x double> <double 5.0, double 6.0, double 7.0, double 8.0>, %v0
773  %t3 = shufflevector <4 x double> %t1, <4 x double> %t2, <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
774  ret <4 x double> %t3
775}
776
777; Intersect any FMF.
778
779define <4 x float> @fmul_fmul(<4 x float> %v0) {
780; CHECK-LABEL: @fmul_fmul(
781; CHECK-NEXT:    [[T3:%.*]] = fmul nnan ninf <4 x float> [[V0:%.*]], <float 1.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00>
782; CHECK-NEXT:    ret <4 x float> [[T3]]
783;
784  %t1 = fmul nnan ninf <4 x float> %v0, <float 1.0, float 2.0, float 3.0, float 4.0>
785  %t2 = fmul nnan ninf <4 x float> %v0, <float 5.0, float 6.0, float 7.0, float 8.0>
786  %t3 = shufflevector <4 x float> %t1, <4 x float> %t2, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
787  ret <4 x float> %t3
788}
789
790define <4 x double> @fdiv_fdiv(<4 x double> %v0) {
791; CHECK-LABEL: @fdiv_fdiv(
792; CHECK-NEXT:    [[T3:%.*]] = fdiv arcp <4 x double> <double undef, double 2.000000e+00, double 7.000000e+00, double 8.000000e+00>, [[V0:%.*]]
793; CHECK-NEXT:    ret <4 x double> [[T3]]
794;
795  %t1 = fdiv fast <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, %v0
796  %t2 = fdiv nnan arcp <4 x double> <double 5.0, double 6.0, double 7.0, double 8.0>, %v0
797  %t3 = shufflevector <4 x double> %t1, <4 x double> %t2, <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
798  ret <4 x double> %t3
799}
800
801; The variable operand must be either the first operand or second operand in both binops.
802
803define <4 x double> @frem_frem(<4 x double> %v0) {
804; CHECK-LABEL: @frem_frem(
805; CHECK-NEXT:    [[T1:%.*]] = frem <4 x double> <double 1.000000e+00, double 2.000000e+00, double poison, double poison>, [[V0:%.*]]
806; CHECK-NEXT:    [[T2:%.*]] = frem <4 x double> [[V0]], <double poison, double poison, double 7.000000e+00, double 8.000000e+00>
807; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x double> [[T1]], <4 x double> [[T2]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
808; CHECK-NEXT:    ret <4 x double> [[T3]]
809;
810  %t1 = frem <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, %v0
811  %t2 = frem <4 x double> %v0, <double 5.0, double 6.0, double 7.0, double 8.0>
812  %t3 = shufflevector <4 x double> %t1, <4 x double> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
813  ret <4 x double> %t3
814}
815
816define <4 x i32> @add_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
817; CHECK-LABEL: @add_2_vars(
818; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
819; CHECK-NEXT:    [[T3:%.*]] = add <4 x i32> [[TMP1]], <i32 1, i32 6, i32 3, i32 8>
820; CHECK-NEXT:    ret <4 x i32> [[T3]]
821;
822  %t1 = add <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
823  %t2 = add <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
824  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
825  ret <4 x i32> %t3
826}
827
828; Constant operand 0 (LHS) also works.
829
830define <4 x i32> @sub_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
831; CHECK-LABEL: @sub_2_vars(
832; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
833; CHECK-NEXT:    [[T3:%.*]] = sub <4 x i32> <i32 1, i32 2, i32 3, i32 8>, [[TMP1]]
834; CHECK-NEXT:    ret <4 x i32> [[T3]]
835;
836  %t1 = sub <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
837  %t2 = sub <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
838  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
839  ret <4 x i32> %t3
840}
841
842define <4 x i32> @sub_2_vars_nsw(<4 x i32> %v0, <4 x i32> %v1) {
843; CHECK-LABEL: @sub_2_vars_nsw(
844; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 2, i32 7>
845; CHECK-NEXT:    [[T3:%.*]] = sub nsw <4 x i32> <i32 1, i32 2, i32 3, i32 8>, [[TMP1]]
846; CHECK-NEXT:    ret <4 x i32> [[T3]]
847;
848  %t1 = sub nsw <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
849  %t2 = sub nsw <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
850  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
851  ret <4 x i32> %t3
852}
853
854define <4 x i32> @sub_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
855; CHECK-LABEL: @sub_2_vars_undef_mask_elt(
856; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 poison, i32 1, i32 2, i32 7>
857; CHECK-NEXT:    [[T3:%.*]] = sub <4 x i32> <i32 undef, i32 2, i32 3, i32 8>, [[TMP1]]
858; CHECK-NEXT:    ret <4 x i32> [[T3]]
859;
860  %t1 = sub <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
861  %t2 = sub <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
862  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
863  ret <4 x i32> %t3
864}
865
866; Poison flags must be dropped or undef must be replaced with safe constant.
867
868define <4 x i32> @sub_2_vars_nsw_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
869; CHECK-LABEL: @sub_2_vars_nsw_undef_mask_elt(
870; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 poison, i32 1, i32 2, i32 7>
871; CHECK-NEXT:    [[T3:%.*]] = sub <4 x i32> <i32 undef, i32 2, i32 3, i32 8>, [[TMP1]]
872; CHECK-NEXT:    ret <4 x i32> [[T3]]
873;
874  %t1 = sub nsw <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
875  %t2 = sub nsw <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
876  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
877  ret <4 x i32> %t3
878}
879
880; If any element of the shuffle mask operand is undef, that element of the result is undef.
881; The shuffle is eliminated in this transform, but we can replace a constant element with undef.
882
883define <4 x i32> @mul_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
884; CHECK-LABEL: @mul_2_vars(
885; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
886; CHECK-NEXT:    [[T3:%.*]] = mul <4 x i32> [[TMP1]], <i32 1, i32 6, i32 3, i32 8>
887; CHECK-NEXT:    ret <4 x i32> [[T3]]
888;
889  %t1 = mul <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
890  %t2 = mul <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
891  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
892  ret <4 x i32> %t3
893}
894
895define <4 x i32> @mul_2_vars_nuw(<4 x i32> %v0, <4 x i32> %v1) {
896; CHECK-LABEL: @mul_2_vars_nuw(
897; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
898; CHECK-NEXT:    [[T3:%.*]] = mul nuw <4 x i32> [[TMP1]], <i32 1, i32 6, i32 3, i32 8>
899; CHECK-NEXT:    ret <4 x i32> [[T3]]
900;
901  %t1 = mul nuw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
902  %t2 = mul nuw <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
903  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
904  ret <4 x i32> %t3
905}
906
907define <4 x i32> @mul_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
908; CHECK-LABEL: @mul_2_vars_undef_mask_elt(
909; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 poison, i32 2, i32 7>
910; CHECK-NEXT:    [[T3:%.*]] = mul <4 x i32> [[TMP1]], <i32 1, i32 undef, i32 3, i32 8>
911; CHECK-NEXT:    ret <4 x i32> [[T3]]
912;
913  %t1 = mul <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
914  %t2 = mul <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
915  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 undef, i32 2, i32 7>
916  ret <4 x i32> %t3
917}
918
919; Poison flags must be dropped or undef must be replaced with safe constant.
920
921define <4 x i32> @mul_2_vars_nuw_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
922; CHECK-LABEL: @mul_2_vars_nuw_undef_mask_elt(
923; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 poison, i32 2, i32 7>
924; CHECK-NEXT:    [[T3:%.*]] = mul <4 x i32> [[TMP1]], <i32 1, i32 undef, i32 3, i32 8>
925; CHECK-NEXT:    ret <4 x i32> [[T3]]
926;
927  %t1 = mul nuw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
928  %t2 = mul nuw <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
929  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 undef, i32 2, i32 7>
930  ret <4 x i32> %t3
931}
932
933; Preserve flags when possible.
934
935define <4 x i32> @shl_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
936; CHECK-LABEL: @shl_2_vars(
937; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
938; CHECK-NEXT:    [[T3:%.*]] = shl <4 x i32> [[TMP1]], <i32 1, i32 6, i32 3, i32 4>
939; CHECK-NEXT:    ret <4 x i32> [[T3]]
940;
941  %t1 = shl <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
942  %t2 = shl <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
943  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
944  ret <4 x i32> %t3
945}
946
947define <4 x i32> @shl_2_vars_nsw(<4 x i32> %v0, <4 x i32> %v1) {
948; CHECK-LABEL: @shl_2_vars_nsw(
949; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
950; CHECK-NEXT:    [[T3:%.*]] = shl nsw <4 x i32> [[TMP1]], <i32 1, i32 6, i32 3, i32 4>
951; CHECK-NEXT:    ret <4 x i32> [[T3]]
952;
953  %t1 = shl nsw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
954  %t2 = shl nsw <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
955  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
956  ret <4 x i32> %t3
957}
958
959; Shift by undef is poison. Undef is replaced by safe constant.
960
961define <4 x i32> @shl_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
962; CHECK-LABEL: @shl_2_vars_undef_mask_elt(
963; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 poison, i32 5, i32 2, i32 poison>
964; CHECK-NEXT:    [[T3:%.*]] = shl <4 x i32> [[TMP1]], <i32 0, i32 6, i32 3, i32 0>
965; CHECK-NEXT:    ret <4 x i32> [[T3]]
966;
967  %t1 = shl <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
968  %t2 = shl <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
969  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 5, i32 2, i32 undef>
970  ret <4 x i32> %t3
971}
972
973; Shift by undef is poison. Undef is replaced by safe constant.
974
975define <4 x i32> @shl_2_vars_nsw_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
976; CHECK-LABEL: @shl_2_vars_nsw_undef_mask_elt(
977; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 poison, i32 5, i32 2, i32 poison>
978; CHECK-NEXT:    [[T3:%.*]] = shl nsw <4 x i32> [[TMP1]], <i32 0, i32 6, i32 3, i32 0>
979; CHECK-NEXT:    ret <4 x i32> [[T3]]
980;
981  %t1 = shl nsw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
982  %t2 = shl nsw <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
983  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 5, i32 2, i32 undef>
984  ret <4 x i32> %t3
985}
986
987; Can't propagate the flag here.
988
989define <4 x i32> @lshr_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
990; CHECK-LABEL: @lshr_2_vars(
991; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
992; CHECK-NEXT:    [[T3:%.*]] = lshr <4 x i32> <i32 5, i32 6, i32 3, i32 8>, [[TMP1]]
993; CHECK-NEXT:    ret <4 x i32> [[T3]]
994;
995  %t1 = lshr <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
996  %t2 = lshr exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
997  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
998  ret <4 x i32> %t3
999}
1000
1001define <4 x i32> @lshr_2_vars_exact(<4 x i32> %v0, <4 x i32> %v1) {
1002; CHECK-LABEL: @lshr_2_vars_exact(
1003; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
1004; CHECK-NEXT:    [[T3:%.*]] = lshr exact <4 x i32> <i32 5, i32 6, i32 3, i32 8>, [[TMP1]]
1005; CHECK-NEXT:    ret <4 x i32> [[T3]]
1006;
1007  %t1 = lshr exact <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
1008  %t2 = lshr exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
1009  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
1010  ret <4 x i32> %t3
1011}
1012
1013; TODO: This would require a new shuffle mask (replace undef with op0 or op1 lane). Otherwise, we have shift-by-undef.
1014
1015define <4 x i32> @lshr_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
1016; CHECK-LABEL: @lshr_2_vars_undef_mask_elt(
1017; CHECK-NEXT:    [[T1:%.*]] = lshr <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]]
1018; CHECK-NEXT:    [[T2:%.*]] = lshr <4 x i32> <i32 5, i32 6, i32 7, i32 8>, [[V1:%.*]]
1019; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 poison, i32 5, i32 2, i32 7>
1020; CHECK-NEXT:    ret <4 x i32> [[T3]]
1021;
1022  %t1 = lshr <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
1023  %t2 = lshr <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
1024  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 5, i32 2, i32 7>
1025  ret <4 x i32> %t3
1026}
1027
1028; TODO: This would require a new shuffle mask (replace undef with op0 or op1 lane). Otherwise, we have shift-by-undef.
1029
1030define <4 x i32> @lshr_2_vars_exact_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
1031; CHECK-LABEL: @lshr_2_vars_exact_undef_mask_elt(
1032; CHECK-NEXT:    [[T1:%.*]] = lshr exact <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]]
1033; CHECK-NEXT:    [[T2:%.*]] = lshr exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, [[V1:%.*]]
1034; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 poison, i32 5, i32 2, i32 7>
1035; CHECK-NEXT:    ret <4 x i32> [[T3]]
1036;
1037  %t1 = lshr exact <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
1038  %t2 = lshr exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
1039  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 5, i32 2, i32 7>
1040  ret <4 x i32> %t3
1041}
1042
1043; Try weird types.
1044
1045define <3 x i32> @ashr_2_vars(<3 x i32> %v0, <3 x i32> %v1) {
1046; CHECK-LABEL: @ashr_2_vars(
1047; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x i32> [[V1:%.*]], <3 x i32> [[V0:%.*]], <3 x i32> <i32 0, i32 4, i32 5>
1048; CHECK-NEXT:    [[T3:%.*]] = ashr <3 x i32> [[TMP1]], <i32 4, i32 2, i32 3>
1049; CHECK-NEXT:    ret <3 x i32> [[T3]]
1050;
1051  %t1 = ashr <3 x i32> %v0, <i32 1, i32 2, i32 3>
1052  %t2 = ashr <3 x i32> %v1, <i32 4, i32 5, i32 6>
1053  %t3 = shufflevector <3 x i32> %t1, <3 x i32> %t2, <3 x i32> <i32 3, i32 1, i32 2>
1054  ret <3 x i32> %t3
1055}
1056
1057define <3 x i42> @and_2_vars(<3 x i42> %v0, <3 x i42> %v1) {
1058; CHECK-LABEL: @and_2_vars(
1059; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x i42> [[V0:%.*]], <3 x i42> [[V1:%.*]], <3 x i32> <i32 0, i32 4, i32 poison>
1060; CHECK-NEXT:    [[T3:%.*]] = and <3 x i42> [[TMP1]], <i42 1, i42 5, i42 undef>
1061; CHECK-NEXT:    ret <3 x i42> [[T3]]
1062;
1063  %t1 = and <3 x i42> %v0, <i42 1, i42 2, i42 3>
1064  %t2 = and <3 x i42> %v1, <i42 4, i42 5, i42 6>
1065  %t3 = shufflevector <3 x i42> %t1, <3 x i42> %t2, <3 x i32> <i32 0, i32 4, i32 undef>
1066  ret <3 x i42> %t3
1067}
1068
1069; It doesn't matter if only one intermediate op has extra uses.
1070
1071define <4 x i32> @or_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
1072; CHECK-LABEL: @or_2_vars(
1073; CHECK-NEXT:    [[T1:%.*]] = or <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 3, i32 4>
1074; CHECK-NEXT:    call void @use_v4i32(<4 x i32> [[T1]])
1075; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
1076; CHECK-NEXT:    [[T3:%.*]] = or <4 x i32> [[TMP1]], <i32 5, i32 6, i32 3, i32 4>
1077; CHECK-NEXT:    ret <4 x i32> [[T3]]
1078;
1079  %t1 = or <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
1080  call void @use_v4i32(<4 x i32> %t1)
1081  %t2 = or <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
1082  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
1083  ret <4 x i32> %t3
1084}
1085
1086define <4 x i32> @or_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
1087; CHECK-LABEL: @or_2_vars_undef_mask_elt(
1088; CHECK-NEXT:    [[T1:%.*]] = or <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 3, i32 4>
1089; CHECK-NEXT:    call void @use_v4i32(<4 x i32> [[T1]])
1090; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0]], <4 x i32> <i32 0, i32 1, i32 6, i32 poison>
1091; CHECK-NEXT:    [[T3:%.*]] = or <4 x i32> [[TMP1]], <i32 5, i32 6, i32 3, i32 undef>
1092; CHECK-NEXT:    ret <4 x i32> [[T3]]
1093;
1094  %t1 = or <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
1095  call void @use_v4i32(<4 x i32> %t1)
1096  %t2 = or <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
1097  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 undef>
1098  ret <4 x i32> %t3
1099}
1100
1101; But we don't transform if both intermediate values have extra uses.
1102
1103define <4 x i32> @xor_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
1104; CHECK-LABEL: @xor_2_vars(
1105; CHECK-NEXT:    [[T1:%.*]] = xor <4 x i32> [[V0:%.*]], <i32 1, i32 2, i32 3, i32 4>
1106; CHECK-NEXT:    call void @use_v4i32(<4 x i32> [[T1]])
1107; CHECK-NEXT:    [[T2:%.*]] = xor <4 x i32> [[V1:%.*]], <i32 5, i32 6, i32 7, i32 8>
1108; CHECK-NEXT:    call void @use_v4i32(<4 x i32> [[T2]])
1109; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 0, i32 5, i32 2, i32 3>
1110; CHECK-NEXT:    ret <4 x i32> [[T3]]
1111;
1112  %t1 = xor <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
1113  call void @use_v4i32(<4 x i32> %t1)
1114  %t2 = xor <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
1115  call void @use_v4i32(<4 x i32> %t2)
1116  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 2, i32 3>
1117  ret <4 x i32> %t3
1118}
1119
1120; Div/rem need special handling if the shuffle has undef elements.
1121
1122define <4 x i32> @udiv_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
1123; CHECK-LABEL: @udiv_2_vars(
1124; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
1125; CHECK-NEXT:    [[T3:%.*]] = udiv <4 x i32> <i32 5, i32 2, i32 3, i32 8>, [[TMP1]]
1126; CHECK-NEXT:    ret <4 x i32> [[T3]]
1127;
1128  %t1 = udiv <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
1129  %t2 = udiv <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
1130  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 1, i32 2, i32 7>
1131  ret <4 x i32> %t3
1132}
1133
1134define <4 x i32> @udiv_2_vars_exact(<4 x i32> %v0, <4 x i32> %v1) {
1135; CHECK-LABEL: @udiv_2_vars_exact(
1136; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
1137; CHECK-NEXT:    [[T3:%.*]] = udiv exact <4 x i32> <i32 5, i32 2, i32 3, i32 8>, [[TMP1]]
1138; CHECK-NEXT:    ret <4 x i32> [[T3]]
1139;
1140  %t1 = udiv exact <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
1141  %t2 = udiv exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
1142  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 1, i32 2, i32 7>
1143  ret <4 x i32> %t3
1144}
1145
1146; TODO: This could be transformed using a safe constant.
1147
1148define <4 x i32> @udiv_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
1149; CHECK-LABEL: @udiv_2_vars_undef_mask_elt(
1150; CHECK-NEXT:    [[T1:%.*]] = udiv <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]]
1151; CHECK-NEXT:    [[T2:%.*]] = udiv <4 x i32> <i32 5, i32 6, i32 7, i32 8>, [[V1:%.*]]
1152; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 poison, i32 1, i32 2, i32 7>
1153; CHECK-NEXT:    ret <4 x i32> [[T3]]
1154;
1155  %t1 = udiv <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
1156  %t2 = udiv <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
1157  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
1158  ret <4 x i32> %t3
1159}
1160
1161; TODO: This could be transformed using a safe constant.
1162
1163define <4 x i32> @udiv_2_vars_exact_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
1164; CHECK-LABEL: @udiv_2_vars_exact_undef_mask_elt(
1165; CHECK-NEXT:    [[T1:%.*]] = udiv exact <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]]
1166; CHECK-NEXT:    [[T2:%.*]] = udiv exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, [[V1:%.*]]
1167; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 poison, i32 1, i32 2, i32 7>
1168; CHECK-NEXT:    ret <4 x i32> [[T3]]
1169;
1170  %t1 = udiv exact <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
1171  %t2 = udiv exact <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
1172  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 undef, i32 1, i32 2, i32 7>
1173  ret <4 x i32> %t3
1174}
1175
1176; If the shuffle has no undefs, it's safe to shuffle the variables first.
1177
1178define <4 x i32> @sdiv_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
1179; CHECK-LABEL: @sdiv_2_vars(
1180; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
1181; CHECK-NEXT:    [[T3:%.*]] = sdiv <4 x i32> [[TMP1]], <i32 1, i32 2, i32 7, i32 4>
1182; CHECK-NEXT:    ret <4 x i32> [[T3]]
1183;
1184  %t1 = sdiv <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
1185  %t2 = sdiv <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
1186  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
1187  ret <4 x i32> %t3
1188}
1189
1190define <4 x i32> @sdiv_2_vars_exact(<4 x i32> %v0, <4 x i32> %v1) {
1191; CHECK-LABEL: @sdiv_2_vars_exact(
1192; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 3>
1193; CHECK-NEXT:    [[T3:%.*]] = sdiv exact <4 x i32> [[TMP1]], <i32 1, i32 2, i32 7, i32 4>
1194; CHECK-NEXT:    ret <4 x i32> [[T3]]
1195;
1196  %t1 = sdiv exact <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
1197  %t2 = sdiv exact <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
1198  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
1199  ret <4 x i32> %t3
1200}
1201
1202; Div by undef is UB. Undef is replaced by safe constant.
1203
1204define <4 x i32> @sdiv_2_vars_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
1205; CHECK-LABEL: @sdiv_2_vars_undef_mask_elt(
1206; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 poison>
1207; CHECK-NEXT:    [[T3:%.*]] = sdiv <4 x i32> [[TMP1]], <i32 1, i32 2, i32 7, i32 1>
1208; CHECK-NEXT:    ret <4 x i32> [[T3]]
1209;
1210  %t1 = sdiv <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
1211  %t2 = sdiv <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
1212  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 undef>
1213  ret <4 x i32> %t3
1214}
1215
1216; Div by undef is UB. Undef is replaced by safe constant.
1217
1218define <4 x i32> @sdiv_2_vars_exact_undef_mask_elt(<4 x i32> %v0, <4 x i32> %v1) {
1219; CHECK-LABEL: @sdiv_2_vars_exact_undef_mask_elt(
1220; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 poison>
1221; CHECK-NEXT:    [[T3:%.*]] = sdiv exact <4 x i32> [[TMP1]], <i32 1, i32 2, i32 7, i32 1>
1222; CHECK-NEXT:    ret <4 x i32> [[T3]]
1223;
1224  %t1 = sdiv exact <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
1225  %t2 = sdiv exact <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
1226  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 undef>
1227  ret <4 x i32> %t3
1228}
1229
1230; If the shuffle has no undefs, it's safe to shuffle the variables first.
1231
1232define <4 x i32> @urem_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
1233; CHECK-LABEL: @urem_2_vars(
1234; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0:%.*]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
1235; CHECK-NEXT:    [[T3:%.*]] = urem <4 x i32> <i32 1, i32 2, i32 7, i32 8>, [[TMP1]]
1236; CHECK-NEXT:    ret <4 x i32> [[T3]]
1237;
1238  %t1 = urem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
1239  %t2 = urem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
1240  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
1241  ret <4 x i32> %t3
1242}
1243
1244define <4 x i32> @srem_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
1245; CHECK-LABEL: @srem_2_vars(
1246; CHECK-NEXT:    [[T1:%.*]] = srem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]]
1247; CHECK-NEXT:    [[T2:%.*]] = srem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, [[V1:%.*]]
1248; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[T1]], <4 x i32> [[T2]], <4 x i32> <i32 0, i32 poison, i32 6, i32 3>
1249; CHECK-NEXT:    ret <4 x i32> [[T3]]
1250;
1251  %t1 = srem <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
1252  %t2 = srem <4 x i32> <i32 5, i32 6, i32 7, i32 8>, %v1
1253  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 undef, i32 6, i32 3>
1254  ret <4 x i32> %t3
1255}
1256
1257; Try FP ops/types.
1258
1259define <4 x float> @fadd_2_vars(<4 x float> %v0, <4 x float> %v1) {
1260; CHECK-LABEL: @fadd_2_vars(
1261; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[V0:%.*]], <4 x float> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
1262; CHECK-NEXT:    [[T3:%.*]] = fadd <4 x float> [[TMP1]], <float 1.000000e+00, float 2.000000e+00, float 7.000000e+00, float 8.000000e+00>
1263; CHECK-NEXT:    ret <4 x float> [[T3]]
1264;
1265  %t1 = fadd <4 x float> %v0, <float 1.0, float 2.0, float 3.0, float 4.0>
1266  %t2 = fadd <4 x float> %v1, <float 5.0, float 6.0, float 7.0, float 8.0>
1267  %t3 = shufflevector <4 x float> %t1, <4 x float> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
1268  ret <4 x float> %t3
1269}
1270
1271define <4 x double> @fsub_2_vars(<4 x double> %v0, <4 x double> %v1) {
1272; CHECK-LABEL: @fsub_2_vars(
1273; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[V0:%.*]], <4 x double> [[V1:%.*]], <4 x i32> <i32 poison, i32 1, i32 6, i32 7>
1274; CHECK-NEXT:    [[T3:%.*]] = fsub <4 x double> <double undef, double 2.000000e+00, double 7.000000e+00, double 8.000000e+00>, [[TMP1]]
1275; CHECK-NEXT:    ret <4 x double> [[T3]]
1276;
1277  %t1 = fsub <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, %v0
1278  %t2 = fsub <4 x double> <double 5.0, double 6.0, double 7.0, double 8.0>, %v1
1279  %t3 = shufflevector <4 x double> %t1, <4 x double> %t2, <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
1280  ret <4 x double> %t3
1281}
1282
1283; Intersect any FMF.
1284
1285define <4 x float> @fmul_2_vars(<4 x float> %v0, <4 x float> %v1) {
1286; CHECK-LABEL: @fmul_2_vars(
1287; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[V0:%.*]], <4 x float> [[V1:%.*]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
1288; CHECK-NEXT:    [[T3:%.*]] = fmul reassoc nsz <4 x float> [[TMP1]], <float 1.000000e+00, float 6.000000e+00, float 7.000000e+00, float 8.000000e+00>
1289; CHECK-NEXT:    ret <4 x float> [[T3]]
1290;
1291  %t1 = fmul reassoc nsz <4 x float> %v0, <float 1.0, float 2.0, float 3.0, float 4.0>
1292  %t2 = fmul reassoc nsz <4 x float> %v1, <float 5.0, float 6.0, float 7.0, float 8.0>
1293  %t3 = shufflevector <4 x float> %t1, <4 x float> %t2, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
1294  ret <4 x float> %t3
1295}
1296
1297define <4 x double> @frem_2_vars(<4 x double> %v0, <4 x double> %v1) {
1298; CHECK-LABEL: @frem_2_vars(
1299; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[V0:%.*]], <4 x double> [[V1:%.*]], <4 x i32> <i32 poison, i32 1, i32 6, i32 7>
1300; CHECK-NEXT:    [[T3:%.*]] = frem <4 x double> <double undef, double 2.000000e+00, double 7.000000e+00, double 8.000000e+00>, [[TMP1]]
1301; CHECK-NEXT:    ret <4 x double> [[T3]]
1302;
1303  %t1 = frem nnan ninf <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, %v0
1304  %t2 = frem nnan arcp <4 x double> <double 5.0, double 6.0, double 7.0, double 8.0>, %v1
1305  %t3 = shufflevector <4 x double> %t1, <4 x double> %t2, <4 x i32> <i32 undef, i32 1, i32 6, i32 7>
1306  ret <4 x double> %t3
1307}
1308
1309; The variable operand must be either the first operand or second operand in both binops.
1310
1311define <4 x double> @fdiv_2_vars(<4 x double> %v0, <4 x double> %v1) {
1312; CHECK-LABEL: @fdiv_2_vars(
1313; CHECK-NEXT:    [[T1:%.*]] = fdiv <4 x double> <double 1.000000e+00, double 2.000000e+00, double poison, double poison>, [[V0:%.*]]
1314; CHECK-NEXT:    [[T2:%.*]] = fdiv <4 x double> [[V1:%.*]], <double poison, double poison, double 7.000000e+00, double 8.000000e+00>
1315; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x double> [[T1]], <4 x double> [[T2]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
1316; CHECK-NEXT:    ret <4 x double> [[T3]]
1317;
1318  %t1 = fdiv <4 x double> <double 1.0, double 2.0, double 3.0, double 4.0>, %v0
1319  %t2 = fdiv <4 x double> %v1, <double 5.0, double 6.0, double 7.0, double 8.0>
1320  %t3 = shufflevector <4 x double> %t1, <4 x double> %t2, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
1321  ret <4 x double> %t3
1322}
1323
1324; Shift-left with constant shift amount can be converted to mul to enable the fold.
1325
1326define <4 x i32> @mul_shl(<4 x i32> %v0) {
1327; CHECK-LABEL: @mul_shl(
1328; CHECK-NEXT:    [[T3:%.*]] = mul nuw <4 x i32> [[V0:%.*]], <i32 32, i32 64, i32 3, i32 4>
1329; CHECK-NEXT:    ret <4 x i32> [[T3]]
1330;
1331  %t1 = mul nuw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
1332  %t2 = shl nuw <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
1333  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
1334  ret <4 x i32> %t3
1335}
1336
1337; Try with shift as operand 0 of the shuffle; 'nsw' is dropped for safety, but that could be improved.
1338
1339define <4 x i32> @shl_mul(<4 x i32> %v0) {
1340; CHECK-LABEL: @shl_mul(
1341; CHECK-NEXT:    [[T3:%.*]] = mul <4 x i32> [[V0:%.*]], <i32 5, i32 undef, i32 8, i32 16>
1342; CHECK-NEXT:    ret <4 x i32> [[T3]]
1343;
1344  %t1 = shl nsw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
1345  %t2 = mul nsw <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
1346  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 undef, i32 2, i32 3>
1347  ret <4 x i32> %t3
1348}
1349
1350; Demanded elements + simplification can remove the mul alone, but that's not the best case.
1351
1352define <4 x i32> @mul_is_nop_shl(<4 x i32> %v0) {
1353; CHECK-LABEL: @mul_is_nop_shl(
1354; CHECK-NEXT:    [[T3:%.*]] = shl <4 x i32> [[V0:%.*]], <i32 0, i32 6, i32 7, i32 8>
1355; CHECK-NEXT:    ret <4 x i32> [[T3]]
1356;
1357  %t1 = mul <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
1358  %t2 = shl <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
1359  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
1360  ret <4 x i32> %t3
1361}
1362
1363; Negative test: shift amount (operand 1) must be constant.
1364
1365define <4 x i32> @shl_mul_not_constant_shift_amount(<4 x i32> %v0) {
1366; CHECK-LABEL: @shl_mul_not_constant_shift_amount(
1367; CHECK-NEXT:    [[T1:%.*]] = shl <4 x i32> <i32 1, i32 2, i32 3, i32 4>, [[V0:%.*]]
1368; CHECK-NEXT:    [[T2:%.*]] = mul <4 x i32> [[V0]], <i32 5, i32 6, i32 poison, i32 poison>
1369; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i32> [[T2]], <4 x i32> [[T1]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
1370; CHECK-NEXT:    ret <4 x i32> [[T3]]
1371;
1372  %t1 = shl <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %v0
1373  %t2 = mul <4 x i32> %v0, <i32 5, i32 6, i32 7, i32 8>
1374  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
1375  ret <4 x i32> %t3
1376}
1377
1378; Try with 2 variable inputs.
1379
1380define <4 x i32> @mul_shl_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
1381; CHECK-LABEL: @mul_shl_2_vars(
1382; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
1383; CHECK-NEXT:    [[T3:%.*]] = mul nuw <4 x i32> [[TMP1]], <i32 32, i32 64, i32 3, i32 4>
1384; CHECK-NEXT:    ret <4 x i32> [[T3]]
1385;
1386  %t1 = mul nuw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
1387  %t2 = shl nuw <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
1388  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
1389  ret <4 x i32> %t3
1390}
1391
1392define <4 x i32> @shl_mul_2_vars(<4 x i32> %v0, <4 x i32> %v1) {
1393; CHECK-LABEL: @shl_mul_2_vars(
1394; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <4 x i32> <i32 0, i32 poison, i32 6, i32 7>
1395; CHECK-NEXT:    [[T3:%.*]] = mul <4 x i32> [[TMP1]], <i32 5, i32 undef, i32 8, i32 16>
1396; CHECK-NEXT:    ret <4 x i32> [[T3]]
1397;
1398  %t1 = shl nsw <4 x i32> %v0, <i32 1, i32 2, i32 3, i32 4>
1399  %t2 = mul nsw <4 x i32> %v1, <i32 5, i32 6, i32 7, i32 8>
1400  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 undef, i32 2, i32 3>
1401  ret <4 x i32> %t3
1402}
1403
1404; Negate can be converted to mul to enable the fold.
1405
1406define <4 x i32> @mul_neg(<4 x i32> %x) {
1407; CHECK-LABEL: @mul_neg(
1408; CHECK-NEXT:    [[R:%.*]] = mul <4 x i32> [[X:%.*]], <i32 257, i32 -3, i32 -1, i32 -9>
1409; CHECK-NEXT:    ret <4 x i32> [[R]]
1410;
1411  %m = mul <4 x i32> %x, <i32 257, i32 -3, i32 poison, i32 -9>
1412  %n = sub <4 x i32> <i32 poison, i32 poison, i32 0, i32 poison>, %x
1413  %r = shufflevector <4 x i32> %m, <4 x i32> %n, <4 x i32> <i32 0, i32 1, i32 6, i32 3>
1414  ret <4 x i32> %r
1415}
1416
1417define <3 x i79> @neg_mul(<3 x i79> %x) {
1418; CHECK-LABEL: @neg_mul(
1419; CHECK-NEXT:    [[R:%.*]] = mul nsw <3 x i79> [[X:%.*]], <i79 -1, i79 -3, i79 -1>
1420; CHECK-NEXT:    ret <3 x i79> [[R]]
1421;
1422  %n = sub nsw <3 x i79> <i79 0, i79 poison, i79 0>, %x
1423  %m = mul nsw <3 x i79> %x, <i79 poison, i79 -3, i79 poison>
1424  %r = shufflevector <3 x i79> %n, <3 x i79> %m, <3 x i32> <i32 0, i32 4, i32 2>
1425  ret <3 x i79> %r
1426}
1427
1428define <4 x i32> @mul_neg_2_vars(<4 x i32> %x, <4 x i32> %y) {
1429; CHECK-LABEL: @mul_neg_2_vars(
1430; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> [[Y:%.*]], <4 x i32> <i32 0, i32 5, i32 6, i32 3>
1431; CHECK-NEXT:    [[R:%.*]] = mul <4 x i32> [[TMP1]], <i32 42, i32 -1, i32 -1, i32 6>
1432; CHECK-NEXT:    ret <4 x i32> [[R]]
1433;
1434  %m = mul nuw <4 x i32> %x, <i32 42, i32 poison, i32 poison, i32 6>
1435  %n = sub nsw <4 x i32> <i32 poison, i32 0, i32 0, i32 poison>, %y
1436  %r = shufflevector <4 x i32> %m, <4 x i32> %n, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
1437  ret <4 x i32> %r
1438}
1439
1440define <4 x i32> @neg_mul_2_vars(<4 x i32> %x, <4 x i32> %y) {
1441; CHECK-LABEL: @neg_mul_2_vars(
1442; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> [[X:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
1443; CHECK-NEXT:    [[R:%.*]] = mul nsw <4 x i32> [[TMP1]], <i32 -1, i32 42, i32 -1, i32 6>
1444; CHECK-NEXT:    ret <4 x i32> [[R]]
1445;
1446  %n = sub nsw <4 x i32> <i32 0, i32 poison, i32 0, i32 poison>, %y
1447  %m = mul nuw nsw <4 x i32> %x, <i32 poison, i32 42, i32 poison, i32 6>
1448  %r = shufflevector <4 x i32> %n, <4 x i32> %m, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
1449  ret <4 x i32> %r
1450}
1451
1452; Or with constant can be converted to add to enable the fold.
1453; The 'shl' is here to allow analysis to determine that the 'or' can be transformed to 'add'.
1454; TODO: The 'or' constant is limited to a splat.
1455
1456define <4 x i32> @add_or(<4 x i32> %v) {
1457; CHECK-LABEL: @add_or(
1458; CHECK-NEXT:    [[V0:%.*]] = shl <4 x i32> [[V:%.*]], splat (i32 5)
1459; CHECK-NEXT:    [[T3:%.*]] = add <4 x i32> [[V0]], <i32 31, i32 31, i32 65536, i32 65537>
1460; CHECK-NEXT:    ret <4 x i32> [[T3]]
1461;
1462  %v0 = shl <4 x i32> %v, <i32 5, i32 5, i32 5, i32 5>                   ; clear the bottom bits
1463  %t1 = add <4 x i32> %v0, <i32 65534, i32 65535, i32 65536, i32 65537>  ; this can't be converted to 'or'
1464  %t2 = or <4 x i32> %v0, <i32 31, i32 31, i32 31, i32 31>               ; set the bottom bits
1465  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
1466  ret <4 x i32> %t3
1467}
1468
1469define <4 x i32> @add_or_disjoint(<4 x i32> %v) {
1470; CHECK-LABEL: @add_or_disjoint(
1471; CHECK-NEXT:    [[T3:%.*]] = add <4 x i32> [[V:%.*]], <i32 31, i32 31, i32 65536, i32 65537>
1472; CHECK-NEXT:    ret <4 x i32> [[T3]]
1473;
1474  %t1 = add <4 x i32> %v, <i32 65534, i32 65535, i32 65536, i32 65537>
1475  %t2 = or disjoint <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31>
1476  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
1477  ret <4 x i32> %t3
1478}
1479
1480; Try with 'or' as operand 0 of the shuffle.
1481
1482define <4 x i8> @or_add(<4 x i8> %v) {
1483; CHECK-LABEL: @or_add(
1484; CHECK-NEXT:    [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], splat (i8 3)
1485; CHECK-NEXT:    [[T3:%.*]] = add nuw nsw <4 x i8> [[V0]], <i8 1, i8 2, i8 -64, i8 -64>
1486; CHECK-NEXT:    ret <4 x i8> [[T3]]
1487;
1488  %v0 = lshr <4 x i8> %v, <i8 3, i8 3, i8 3, i8 3>          ; clear the top bits
1489  %t1 = or <4 x i8> %v0, <i8 192, i8 192, i8 192, i8 192>   ; set some top bits
1490  %t2 = add nsw nuw <4 x i8> %v0, <i8 1, i8 2, i8 3, i8 4>  ; this can't be converted to 'or'
1491  %t3 = shufflevector <4 x i8> %t1, <4 x i8> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
1492  ret <4 x i8> %t3
1493}
1494
1495; Negative test: not all 'or' insts can be converted to 'add'.
1496
1497define <4 x i8> @or_add_not_enough_masking(<4 x i8> %v) {
1498; CHECK-LABEL: @or_add_not_enough_masking(
1499; CHECK-NEXT:    [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], splat (i8 1)
1500; CHECK-NEXT:    [[T1:%.*]] = or <4 x i8> [[V0]], <i8 poison, i8 poison, i8 -64, i8 -64>
1501; CHECK-NEXT:    [[T2:%.*]] = add nuw nsw <4 x i8> [[V0]], <i8 1, i8 2, i8 poison, i8 poison>
1502; CHECK-NEXT:    [[T3:%.*]] = shufflevector <4 x i8> [[T2]], <4 x i8> [[T1]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
1503; CHECK-NEXT:    ret <4 x i8> [[T3]]
1504;
1505  %v0 = lshr <4 x i8> %v, <i8 1, i8 1, i8 1, i8 1>          ; clear not enough top bits
1506  %t1 = or <4 x i8> %v0, <i8 192, i8 192, i8 192, i8 192>   ; set some top bits
1507  %t2 = add nsw nuw <4 x i8> %v0, <i8 1, i8 2, i8 3, i8 4>  ; this can't be converted to 'or'
1508  %t3 = shufflevector <4 x i8> %t1, <4 x i8> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
1509  ret <4 x i8> %t3
1510}
1511
1512; Try with 2 variable inputs.
1513
1514define <4 x i32> @add_or_2_vars(<4 x i32> %v, <4 x i32> %v1) {
1515; CHECK-LABEL: @add_or_2_vars(
1516; CHECK-NEXT:    [[V0:%.*]] = shl <4 x i32> [[V:%.*]], splat (i32 5)
1517; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[V0]], <4 x i32> [[V1:%.*]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
1518; CHECK-NEXT:    [[T3:%.*]] = add <4 x i32> [[TMP1]], <i32 31, i32 31, i32 65536, i32 65537>
1519; CHECK-NEXT:    ret <4 x i32> [[T3]]
1520;
1521  %v0 = shl <4 x i32> %v, <i32 5, i32 5, i32 5, i32 5>                   ; clear the bottom bits
1522  %t1 = add <4 x i32> %v1, <i32 65534, i32 65535, i32 65536, i32 65537>  ; this can't be converted to 'or'
1523  %t2 = or <4 x i32> %v0, <i32 31, i32 31, i32 31, i32 31>               ; set the bottom bits
1524  %t3 = shufflevector <4 x i32> %t1, <4 x i32> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
1525  ret <4 x i32> %t3
1526}
1527
1528define <4 x i8> @or_add_2_vars(<4 x i8> %v, <4 x i8> %v1) {
1529; CHECK-LABEL: @or_add_2_vars(
1530; CHECK-NEXT:    [[V0:%.*]] = lshr <4 x i8> [[V:%.*]], splat (i8 3)
1531; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i8> [[V1:%.*]], <4 x i8> [[V0]], <4 x i32> <i32 0, i32 1, i32 6, i32 7>
1532; CHECK-NEXT:    [[T3:%.*]] = add nuw nsw <4 x i8> [[TMP1]], <i8 1, i8 2, i8 -64, i8 -64>
1533; CHECK-NEXT:    ret <4 x i8> [[T3]]
1534;
1535  %v0 = lshr <4 x i8> %v, <i8 3, i8 3, i8 3, i8 3>          ; clear the top bits
1536  %t1 = or <4 x i8> %v0, <i8 192, i8 192, i8 192, i8 192>   ; set some top bits
1537  %t2 = add nsw nuw <4 x i8> %v1, <i8 1, i8 2, i8 3, i8 4>  ; this can't be converted to 'or'
1538  %t3 = shufflevector <4 x i8> %t1, <4 x i8> %t2, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
1539  ret <4 x i8> %t3
1540}
1541
1542; The undef operand is used to simplify the shuffle mask, but don't assert that too soon.
1543
1544define <4 x i32> @PR41419(<4 x i32> %v) {
1545; CHECK-LABEL: @PR41419(
1546; CHECK-NEXT:    [[S:%.*]] = shufflevector <4 x i32> [[V:%.*]], <4 x i32> <i32 undef, i32 undef, i32 poison, i32 undef>, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
1547; CHECK-NEXT:    ret <4 x i32> [[S]]
1548;
1549  %s = shufflevector <4 x i32> %v, <4 x i32> undef, <4 x i32> <i32 4, i32 5, i32 2, i32 7>
1550  ret <4 x i32> %s
1551}
1552
1553; The shuffle masks in the next 4 tests are identical to make it easier
1554; to see that we are choosing the correct elements in the new shuffle.
1555
1556define <5 x i4> @sel_common_op_commute0(<5 x i4> %x, <5 x i4> %y) {
1557; CHECK-LABEL: @sel_common_op_commute0(
1558; CHECK-NEXT:    [[S2:%.*]] = shufflevector <5 x i4> [[X:%.*]], <5 x i4> [[Y:%.*]], <5 x i32> <i32 0, i32 6, i32 2, i32 3, i32 4>
1559; CHECK-NEXT:    ret <5 x i4> [[S2]]
1560;
1561  %s1 = shufflevector <5 x i4> %x, <5 x i4> %y, <5 x i32> <i32 0, i32 6, i32 2, i32 3, i32 9>
1562  %s2 = shufflevector <5 x i4> %x, <5 x i4> %s1, <5 x i32> <i32 0, i32 6, i32 7, i32 3, i32 4>
1563  ret <5 x i4> %s2
1564}
1565
1566define <5 x i4> @sel_common_op_commute1(<5 x i4> %x, <5 x i4> %y) {
1567; CHECK-LABEL: @sel_common_op_commute1(
1568; CHECK-NEXT:    [[S2:%.*]] = shufflevector <5 x i4> [[X:%.*]], <5 x i4> [[Y:%.*]], <5 x i32> <i32 0, i32 1, i32 7, i32 3, i32 4>
1569; CHECK-NEXT:    ret <5 x i4> [[S2]]
1570;
1571  %s1 = shufflevector <5 x i4> %y, <5 x i4> %x, <5 x i32> <i32 0, i32 6, i32 2, i32 3, i32 9>
1572  %s2 = shufflevector <5 x i4> %x, <5 x i4> %s1, <5 x i32> <i32 0, i32 6, i32 7, i32 3, i32 4>
1573  ret <5 x i4> %s2
1574}
1575
1576define <5 x i4> @sel_common_op_commute2(<5 x i4> %x, <5 x i4> %y) {
1577; CHECK-LABEL: @sel_common_op_commute2(
1578; CHECK-NEXT:    [[S2:%.*]] = shufflevector <5 x i4> [[X:%.*]], <5 x i4> [[Y:%.*]], <5 x i32> <i32 0, i32 1, i32 2, i32 3, i32 9>
1579; CHECK-NEXT:    ret <5 x i4> [[S2]]
1580;
1581  %s1 = shufflevector <5 x i4> %x, <5 x i4> %y, <5 x i32> <i32 0, i32 6, i32 2, i32 3, i32 9>
1582  %s2 = shufflevector <5 x i4> %s1, <5 x i4> %x, <5 x i32> <i32 0, i32 6, i32 7, i32 3, i32 4>
1583  ret <5 x i4> %s2
1584}
1585
1586define <5 x i4> @sel_common_op_commute3(<5 x i4> %x, <5 x i4> %y) {
1587; CHECK-LABEL: @sel_common_op_commute3(
1588; CHECK-NEXT:    [[S2:%.*]] = shufflevector <5 x i4> [[Y:%.*]], <5 x i4> [[X:%.*]], <5 x i32> <i32 0, i32 6, i32 7, i32 3, i32 9>
1589; CHECK-NEXT:    ret <5 x i4> [[S2]]
1590;
1591  %s1 = shufflevector <5 x i4> %y, <5 x i4> %x, <5 x i32> <i32 0, i32 6, i32 2, i32 3, i32 9>
1592  %s2 = shufflevector <5 x i4> %s1, <5 x i4> %x, <5 x i32> <i32 0, i32 6, i32 7, i32 3, i32 4>
1593  ret <5 x i4> %s2
1594}
1595
1596define <5 x i4> @sel_common_op_commute3_poison_mask_elts(<5 x i4> %x, <5 x i4> %y) {
1597; CHECK-LABEL: @sel_common_op_commute3_poison_mask_elts(
1598; CHECK-NEXT:    [[S2:%.*]] = shufflevector <5 x i4> [[Y:%.*]], <5 x i4> [[X:%.*]], <5 x i32> <i32 0, i32 6, i32 poison, i32 poison, i32 9>
1599; CHECK-NEXT:    ret <5 x i4> [[S2]]
1600;
1601  %s1 = shufflevector <5 x i4> %y, <5 x i4> %x, <5 x i32> <i32 0, i32 6, i32 2, i32 poison, i32 9>
1602  %s2 = shufflevector <5 x i4> %s1, <5 x i4> %x, <5 x i32> <i32 0, i32 6, i32 poison, i32 3, i32 4>
1603  ret <5 x i4> %s2
1604}
1605
1606; negative test - need shared operand
1607
1608define <5 x i4> @sel_not_common_op_commute3(<5 x i4> %x, <5 x i4> %y, <5 x i4> %z) {
1609; CHECK-LABEL: @sel_not_common_op_commute3(
1610; CHECK-NEXT:    [[S1:%.*]] = shufflevector <5 x i4> [[Y:%.*]], <5 x i4> [[Z:%.*]], <5 x i32> <i32 0, i32 poison, i32 poison, i32 3, i32 9>
1611; CHECK-NEXT:    [[S2:%.*]] = shufflevector <5 x i4> [[S1]], <5 x i4> [[X:%.*]], <5 x i32> <i32 0, i32 6, i32 7, i32 3, i32 4>
1612; CHECK-NEXT:    ret <5 x i4> [[S2]]
1613;
1614  %s1 = shufflevector <5 x i4> %y, <5 x i4> %z, <5 x i32> <i32 0, i32 6, i32 2, i32 3, i32 9>
1615  %s2 = shufflevector <5 x i4> %s1, <5 x i4> %x, <5 x i32> <i32 0, i32 6, i32 7, i32 3, i32 4>
1616  ret <5 x i4> %s2
1617}
1618
1619; negative test - need "select" shuffle, no lane changes
1620
1621define <5 x i4> @not_sel_common_op(<5 x i4> %x, <5 x i4> %y) {
1622; CHECK-LABEL: @not_sel_common_op(
1623; CHECK-NEXT:    [[S1:%.*]] = shufflevector <5 x i4> [[Y:%.*]], <5 x i4> [[X:%.*]], <5 x i32> <i32 poison, i32 6, i32 poison, i32 3, i32 9>
1624; CHECK-NEXT:    [[S2:%.*]] = shufflevector <5 x i4> [[S1]], <5 x i4> [[X]], <5 x i32> <i32 1, i32 6, i32 7, i32 3, i32 4>
1625; CHECK-NEXT:    ret <5 x i4> [[S2]]
1626;
1627  %s1 = shufflevector <5 x i4> %y, <5 x i4> %x, <5 x i32> <i32 0, i32 6, i32 2, i32 3, i32 9>
1628  %s2 = shufflevector <5 x i4> %s1, <5 x i4> %x, <5 x i32> <i32 1, i32 6, i32 7, i32 3, i32 4>
1629  ret <5 x i4> %s2
1630}
1631
1632; extra use is ok
1633
1634define <4 x i32> @sel_common_op_extra_use(<4 x i32> %x, <4 x i32> %y) {
1635; CHECK-LABEL: @sel_common_op_extra_use(
1636; CHECK-NEXT:    [[S1:%.*]] = shufflevector <4 x i32> [[Y:%.*]], <4 x i32> [[X:%.*]], <4 x i32> <i32 0, i32 5, i32 2, i32 7>
1637; CHECK-NEXT:    call void @use_v4i32(<4 x i32> [[S1]])
1638; CHECK-NEXT:    [[S2:%.*]] = shufflevector <4 x i32> [[Y]], <4 x i32> [[X]], <4 x i32> <i32 0, i32 5, i32 6, i32 7>
1639; CHECK-NEXT:    ret <4 x i32> [[S2]]
1640;
1641  %s1 = shufflevector <4 x i32> %y, <4 x i32> %x, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
1642  call void @use_v4i32(<4 x i32> %s1)
1643  %s2 = shufflevector <4 x i32> %s1, <4 x i32> %x, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
1644  ret <4 x i32> %s2
1645}
1646
1647define <4 x float> @identity_mask(<4 x float>%x, <4 x float> %y) {
1648; CHECK-LABEL: @identity_mask(
1649; CHECK-NEXT:    [[S2:%.*]] = shufflevector <4 x float> [[X:%.*]], <4 x float> poison, <4 x i32> <i32 0, i32 poison, i32 2, i32 3>
1650; CHECK-NEXT:    ret <4 x float> [[S2]]
1651;
1652  %s1 = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> <i32 0, i32 5, i32 undef, i32 undef>
1653  %s2 = shufflevector <4 x float> %s1, <4 x float> %x, <4 x i32> <i32 0, i32 undef, i32 6, i32 7>
1654  ret <4 x float> %s2
1655}
1656