xref: /llvm-project/llvm/test/Transforms/InstCombine/bswap-fold.ll (revision 38fffa630ee80163dc65e759392ad29798905679)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -passes=instcombine -S | FileCheck %s
3
4; rdar://5992453
5; A & 255
6define i32 @test4(i32 %a) {
7; CHECK-LABEL: @test4(
8; CHECK-NEXT:    [[T2:%.*]] = and i32 [[A:%.*]], 255
9; CHECK-NEXT:    ret i32 [[T2]]
10;
11  %t2 = call i32 @llvm.bswap.i32( i32 %a )
12  %t4 = lshr i32 %t2, 24
13  ret i32 %t4
14}
15
16; a >> 24
17define i32 @test6(i32 %a) {
18; CHECK-LABEL: @test6(
19; CHECK-NEXT:    [[T2:%.*]] = lshr i32 [[A:%.*]], 24
20; CHECK-NEXT:    ret i32 [[T2]]
21;
22  %t2 = call i32 @llvm.bswap.i32( i32 %a )
23  %t4 = and i32 %t2, 255
24  ret i32 %t4
25}
26
27define i32 @lshr8_i32(i32 %x) {
28; CHECK-LABEL: @lshr8_i32(
29; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.bswap.i32(i32 [[X:%.*]])
30; CHECK-NEXT:    [[R:%.*]] = shl i32 [[TMP1]], 8
31; CHECK-NEXT:    ret i32 [[R]]
32;
33  %s = lshr i32 %x, 8
34  %r = call i32 @llvm.bswap.i32(i32 %s)
35  ret i32 %r
36}
37
38define <2 x i32> @lshr16_v2i32(<2 x i32> %x) {
39; CHECK-LABEL: @lshr16_v2i32(
40; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[X:%.*]])
41; CHECK-NEXT:    [[R:%.*]] = shl <2 x i32> [[TMP1]], splat (i32 16)
42; CHECK-NEXT:    ret <2 x i32> [[R]]
43;
44  %s = lshr <2 x i32> %x, <i32 16, i32 16>
45  %r = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %s)
46  ret <2 x i32> %r
47}
48
49define i32 @lshr24_i32(i32 %x) {
50; CHECK-LABEL: @lshr24_i32(
51; CHECK-NEXT:    [[R:%.*]] = and i32 [[X:%.*]], -16777216
52; CHECK-NEXT:    ret i32 [[R]]
53;
54  %s = lshr i32 %x, 24
55  %r = call i32 @llvm.bswap.i32(i32 %s)
56  ret i32 %r
57}
58
59; negative test - need shift-by-8-bit-multiple
60
61define i32 @lshr12_i32(i32 %x) {
62; CHECK-LABEL: @lshr12_i32(
63; CHECK-NEXT:    [[S:%.*]] = lshr i32 [[X:%.*]], 12
64; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.bswap.i32(i32 [[S]])
65; CHECK-NEXT:    ret i32 [[R]]
66;
67  %s = lshr i32 %x, 12
68  %r = call i32 @llvm.bswap.i32(i32 %s)
69  ret i32 %r
70}
71
72; negative test - uses
73
74define i32 @lshr8_i32_use(i32 %x, ptr %p) {
75; CHECK-LABEL: @lshr8_i32_use(
76; CHECK-NEXT:    [[S:%.*]] = lshr i32 [[X:%.*]], 12
77; CHECK-NEXT:    store i32 [[S]], ptr [[P:%.*]], align 4
78; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.bswap.i32(i32 [[S]])
79; CHECK-NEXT:    ret i32 [[R]]
80;
81  %s = lshr i32 %x, 12
82  store i32 %s, ptr %p
83  %r = call i32 @llvm.bswap.i32(i32 %s)
84  ret i32 %r
85}
86
87define i64 @shl16_i64(i64 %x) {
88; CHECK-LABEL: @shl16_i64(
89; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.bswap.i64(i64 [[X:%.*]])
90; CHECK-NEXT:    [[R:%.*]] = lshr i64 [[TMP1]], 16
91; CHECK-NEXT:    ret i64 [[R]]
92;
93  %s = shl i64 %x, 16
94  %r = call i64 @llvm.bswap.i64(i64 %s)
95  ret i64 %r
96}
97
98; poison vector element propagates
99
100define <2 x i64> @shl16_v2i64(<2 x i64> %x) {
101; CHECK-LABEL: @shl16_v2i64(
102; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[X:%.*]])
103; CHECK-NEXT:    [[R:%.*]] = lshr <2 x i64> [[TMP1]], <i64 poison, i64 24>
104; CHECK-NEXT:    ret <2 x i64> [[R]]
105;
106  %s = shl <2 x i64> %x, <i64 poison, i64 24>
107  %r = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %s)
108  ret <2 x i64> %r
109}
110
111define i64 @shl56_i64(i64 %x) {
112; CHECK-LABEL: @shl56_i64(
113; CHECK-NEXT:    [[R:%.*]] = and i64 [[X:%.*]], 255
114; CHECK-NEXT:    ret i64 [[R]]
115;
116  %s = shl i64 %x, 56
117  %r = call i64 @llvm.bswap.i64(i64 %s)
118  ret i64 %r
119}
120
121; negative test - need shift-by-8-bit-multiple
122
123define i64 @shl42_i64(i64 %x) {
124; CHECK-LABEL: @shl42_i64(
125; CHECK-NEXT:    [[S:%.*]] = shl i64 [[X:%.*]], 42
126; CHECK-NEXT:    [[R:%.*]] = call i64 @llvm.bswap.i64(i64 [[S]])
127; CHECK-NEXT:    ret i64 [[R]]
128;
129  %s = shl i64 %x, 42
130  %r = call i64 @llvm.bswap.i64(i64 %s)
131  ret i64 %r
132}
133
134; negative test - uses
135
136define i32 @shl8_i32_use(i32 %x, ptr %p) {
137; CHECK-LABEL: @shl8_i32_use(
138; CHECK-NEXT:    [[S:%.*]] = shl i32 [[X:%.*]], 8
139; CHECK-NEXT:    store i32 [[S]], ptr [[P:%.*]], align 4
140; CHECK-NEXT:    [[R:%.*]] = call i32 @llvm.bswap.i32(i32 [[S]])
141; CHECK-NEXT:    ret i32 [[R]]
142;
143  %s = shl i32 %x, 8
144  store i32 %s, ptr %p
145  %r = call i32 @llvm.bswap.i32(i32 %s)
146  ret i32 %r
147}
148
149; swaps cancel
150
151define i64 @swap_shl16_i64(i64 %x) {
152; CHECK-LABEL: @swap_shl16_i64(
153; CHECK-NEXT:    [[R:%.*]] = lshr i64 [[X:%.*]], 16
154; CHECK-NEXT:    ret i64 [[R]]
155;
156  %b = call i64 @llvm.bswap.i64(i64 %x)
157  %s = shl i64 %b, 16
158  %r = call i64 @llvm.bswap.i64(i64 %s)
159  ret i64 %r
160}
161
162; canonicalize shift after bswap if shift amount is multiple of 8-bits
163; (including non-uniform vector elements)
164
165define <2 x i32> @variable_lshr_v2i32(<2 x i32> %x, <2 x i32> %n) {
166; CHECK-LABEL: @variable_lshr_v2i32(
167; CHECK-NEXT:    [[SHAMT:%.*]] = and <2 x i32> [[N:%.*]], <i32 -8, i32 -16>
168; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[X:%.*]])
169; CHECK-NEXT:    [[R:%.*]] = lshr <2 x i32> [[TMP1]], [[SHAMT]]
170; CHECK-NEXT:    ret <2 x i32> [[R]]
171;
172  %shamt = and <2 x i32> %n, <i32 -8, i32 -16>
173  %s = shl <2 x i32> %x, %shamt
174  %r = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %s)
175  ret <2 x i32> %r
176}
177
178; PR55327 - swaps cancel
179
180define i64 @variable_shl_i64(i64 %x, i64 %n) {
181; CHECK-LABEL: @variable_shl_i64(
182; CHECK-NEXT:    [[N8:%.*]] = shl i64 [[N:%.*]], 3
183; CHECK-NEXT:    [[SHAMT:%.*]] = and i64 [[N8]], 56
184; CHECK-NEXT:    [[R:%.*]] = lshr i64 [[X:%.*]], [[SHAMT]]
185; CHECK-NEXT:    ret i64 [[R]]
186;
187  %b = tail call i64 @llvm.bswap.i64(i64 %x)
188  %n8 = shl i64 %n, 3
189  %shamt = and i64 %n8, 56
190  %s = shl i64 %b, %shamt
191  %r = tail call i64 @llvm.bswap.i64(i64 %s)
192  ret i64 %r
193}
194
195; negative test - must have multiple of 8-bit shift amount
196
197define i64 @variable_shl_not_masked_enough_i64(i64 %x, i64 %n) {
198; CHECK-LABEL: @variable_shl_not_masked_enough_i64(
199; CHECK-NEXT:    [[SHAMT:%.*]] = and i64 [[N:%.*]], -4
200; CHECK-NEXT:    [[S:%.*]] = shl i64 [[X:%.*]], [[SHAMT]]
201; CHECK-NEXT:    [[R:%.*]] = call i64 @llvm.bswap.i64(i64 [[S]])
202; CHECK-NEXT:    ret i64 [[R]]
203;
204  %shamt = and i64 %n, -4
205  %s = shl i64 %x, %shamt
206  %r = call i64 @llvm.bswap.i64(i64 %s)
207  ret i64 %r
208}
209
210; PR5284
211define i16 @test7(i32 %A) {
212; CHECK-LABEL: @test7(
213; CHECK-NEXT:    [[TMP1:%.*]] = lshr i32 [[A:%.*]], 16
214; CHECK-NEXT:    [[D:%.*]] = trunc nuw i32 [[TMP1]] to i16
215; CHECK-NEXT:    ret i16 [[D]]
216;
217  %B = tail call i32 @llvm.bswap.i32(i32 %A) nounwind
218  %C = trunc i32 %B to i16
219  %D = tail call i16 @llvm.bswap.i16(i16 %C) nounwind
220  ret i16 %D
221}
222
223define <2 x i16> @test7_vector(<2 x i32> %A) {
224; CHECK-LABEL: @test7_vector(
225; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i32> [[A:%.*]], splat (i32 16)
226; CHECK-NEXT:    [[D:%.*]] = trunc nuw <2 x i32> [[TMP1]] to <2 x i16>
227; CHECK-NEXT:    ret <2 x i16> [[D]]
228;
229  %B = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %A) nounwind
230  %C = trunc <2 x i32> %B to <2 x i16>
231  %D = tail call <2 x i16> @llvm.bswap.v2i16(<2 x i16> %C) nounwind
232  ret <2 x i16> %D
233}
234
235define i16 @test8(i64 %A) {
236; CHECK-LABEL: @test8(
237; CHECK-NEXT:    [[TMP1:%.*]] = lshr i64 [[A:%.*]], 48
238; CHECK-NEXT:    [[D:%.*]] = trunc nuw i64 [[TMP1]] to i16
239; CHECK-NEXT:    ret i16 [[D]]
240;
241  %B = tail call i64 @llvm.bswap.i64(i64 %A) nounwind
242  %C = trunc i64 %B to i16
243  %D = tail call i16 @llvm.bswap.i16(i16 %C) nounwind
244  ret i16 %D
245}
246
247define <2 x i16> @test8_vector(<2 x i64> %A) {
248; CHECK-LABEL: @test8_vector(
249; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i64> [[A:%.*]], splat (i64 48)
250; CHECK-NEXT:    [[D:%.*]] = trunc nuw <2 x i64> [[TMP1]] to <2 x i16>
251; CHECK-NEXT:    ret <2 x i16> [[D]]
252;
253  %B = tail call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %A) nounwind
254  %C = trunc <2 x i64> %B to <2 x i16>
255  %D = tail call <2 x i16> @llvm.bswap.v2i16(<2 x i16> %C) nounwind
256  ret <2 x i16> %D
257}
258
259; Misc: Fold bswap(undef) to undef.
260define i64 @foo() {
261; CHECK-LABEL: @foo(
262; CHECK-NEXT:    ret i64 undef
263;
264  %a = call i64 @llvm.bswap.i64(i64 undef)
265  ret i64 %a
266}
267
268; PR15782
269; Fold: OP( BSWAP(x), BSWAP(y) ) -> BSWAP( OP(x, y) )
270; Fold: OP( BSWAP(x), CONSTANT ) -> BSWAP( OP(x, BSWAP(CONSTANT) ) )
271define i16 @bs_and16i(i16 %a, i16 %b) #0 {
272; CHECK-LABEL: @bs_and16i(
273; CHECK-NEXT:    [[TMP1:%.*]] = and i16 [[A:%.*]], 4391
274; CHECK-NEXT:    [[TMP2:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
275; CHECK-NEXT:    ret i16 [[TMP2]]
276;
277  %1 = tail call i16 @llvm.bswap.i16(i16 %a)
278  %2 = and i16 %1, 10001
279  ret i16 %2
280}
281
282define i16 @bs_and16(i16 %a, i16 %b) #0 {
283; CHECK-LABEL: @bs_and16(
284; CHECK-NEXT:    [[TMP1:%.*]] = and i16 [[A:%.*]], [[B:%.*]]
285; CHECK-NEXT:    [[T3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
286; CHECK-NEXT:    ret i16 [[T3]]
287;
288  %t1 = tail call i16 @llvm.bswap.i16(i16 %a)
289  %t2 = tail call i16 @llvm.bswap.i16(i16 %b)
290  %t3 = and i16 %t1, %t2
291  ret i16 %t3
292}
293
294define i16 @bs_or16(i16 %a, i16 %b) #0 {
295; CHECK-LABEL: @bs_or16(
296; CHECK-NEXT:    [[TMP1:%.*]] = or i16 [[A:%.*]], [[B:%.*]]
297; CHECK-NEXT:    [[T3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
298; CHECK-NEXT:    ret i16 [[T3]]
299;
300  %t1 = tail call i16 @llvm.bswap.i16(i16 %a)
301  %t2 = tail call i16 @llvm.bswap.i16(i16 %b)
302  %t3 = or i16 %t1, %t2
303  ret i16 %t3
304}
305
306define i16 @bs_xor16(i16 %a, i16 %b) #0 {
307; CHECK-LABEL: @bs_xor16(
308; CHECK-NEXT:    [[TMP1:%.*]] = xor i16 [[A:%.*]], [[B:%.*]]
309; CHECK-NEXT:    [[T3:%.*]] = call i16 @llvm.bswap.i16(i16 [[TMP1]])
310; CHECK-NEXT:    ret i16 [[T3]]
311;
312  %t1 = tail call i16 @llvm.bswap.i16(i16 %a)
313  %t2 = tail call i16 @llvm.bswap.i16(i16 %b)
314  %t3 = xor i16 %t1, %t2
315  ret i16 %t3
316}
317
318define i32 @bs_and32i(i32 %a, i32 %b) #0 {
319; CHECK-LABEL: @bs_and32i(
320; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[A:%.*]], -1585053440
321; CHECK-NEXT:    [[T2:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
322; CHECK-NEXT:    ret i32 [[T2]]
323;
324  %t1 = tail call i32 @llvm.bswap.i32(i32 %a)
325  %t2 = and i32 %t1, 100001
326  ret i32 %t2
327}
328
329define i32 @bs_and32(i32 %a, i32 %b) #0 {
330; CHECK-LABEL: @bs_and32(
331; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[A:%.*]], [[B:%.*]]
332; CHECK-NEXT:    [[T3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
333; CHECK-NEXT:    ret i32 [[T3]]
334;
335  %t1 = tail call i32 @llvm.bswap.i32(i32 %a)
336  %t2 = tail call i32 @llvm.bswap.i32(i32 %b)
337  %t3 = and i32 %t1, %t2
338  ret i32 %t3
339}
340
341define i32 @bs_or32(i32 %a, i32 %b) #0 {
342; CHECK-LABEL: @bs_or32(
343; CHECK-NEXT:    [[TMP1:%.*]] = or i32 [[A:%.*]], [[B:%.*]]
344; CHECK-NEXT:    [[T3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
345; CHECK-NEXT:    ret i32 [[T3]]
346;
347  %t1 = tail call i32 @llvm.bswap.i32(i32 %a)
348  %t2 = tail call i32 @llvm.bswap.i32(i32 %b)
349  %t3 = or i32 %t1, %t2
350  ret i32 %t3
351}
352
353define i32 @bs_xor32(i32 %a, i32 %b) #0 {
354; CHECK-LABEL: @bs_xor32(
355; CHECK-NEXT:    [[TMP1:%.*]] = xor i32 [[A:%.*]], [[B:%.*]]
356; CHECK-NEXT:    [[T3:%.*]] = call i32 @llvm.bswap.i32(i32 [[TMP1]])
357; CHECK-NEXT:    ret i32 [[T3]]
358;
359  %t1 = tail call i32 @llvm.bswap.i32(i32 %a)
360  %t2 = tail call i32 @llvm.bswap.i32(i32 %b)
361  %t3 = xor i32 %t1, %t2
362  ret i32 %t3
363}
364
365define i64 @bs_and64i(i64 %a, i64 %b) #0 {
366; CHECK-LABEL: @bs_and64i(
367; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[A:%.*]], 129085117527228416
368; CHECK-NEXT:    [[T2:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
369; CHECK-NEXT:    ret i64 [[T2]]
370;
371  %t1 = tail call i64 @llvm.bswap.i64(i64 %a)
372  %t2 = and i64 %t1, 1000000001
373  ret i64 %t2
374}
375
376define i64 @bs_and64(i64 %a, i64 %b) #0 {
377; CHECK-LABEL: @bs_and64(
378; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[A:%.*]], [[B:%.*]]
379; CHECK-NEXT:    [[T3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
380; CHECK-NEXT:    ret i64 [[T3]]
381;
382  %t1 = tail call i64 @llvm.bswap.i64(i64 %a)
383  %t2 = tail call i64 @llvm.bswap.i64(i64 %b)
384  %t3 = and i64 %t1, %t2
385  ret i64 %t3
386}
387
388define i64 @bs_or64(i64 %a, i64 %b) #0 {
389; CHECK-LABEL: @bs_or64(
390; CHECK-NEXT:    [[TMP1:%.*]] = or i64 [[A:%.*]], [[B:%.*]]
391; CHECK-NEXT:    [[T3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
392; CHECK-NEXT:    ret i64 [[T3]]
393;
394  %t1 = tail call i64 @llvm.bswap.i64(i64 %a)
395  %t2 = tail call i64 @llvm.bswap.i64(i64 %b)
396  %t3 = or i64 %t1, %t2
397  ret i64 %t3
398}
399
400define i64 @bs_xor64(i64 %a, i64 %b) #0 {
401; CHECK-LABEL: @bs_xor64(
402; CHECK-NEXT:    [[TMP1:%.*]] = xor i64 [[A:%.*]], [[B:%.*]]
403; CHECK-NEXT:    [[T3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP1]])
404; CHECK-NEXT:    ret i64 [[T3]]
405;
406  %t1 = tail call i64 @llvm.bswap.i64(i64 %a)
407  %t2 = tail call i64 @llvm.bswap.i64(i64 %b)
408  %t3 = xor i64 %t1, %t2
409  ret i64 %t3
410}
411
412define <2 x i32> @bs_and32vec(<2 x i32> %a, <2 x i32> %b) #0 {
413; CHECK-LABEL: @bs_and32vec(
414; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], [[B:%.*]]
415; CHECK-NEXT:    [[T3:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP1]])
416; CHECK-NEXT:    ret <2 x i32> [[T3]]
417;
418  %t1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a)
419  %t2 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %b)
420  %t3 = and <2 x i32> %t1, %t2
421  ret <2 x i32> %t3
422}
423
424define <2 x i32> @bs_or32vec(<2 x i32> %a, <2 x i32> %b) #0 {
425; CHECK-LABEL: @bs_or32vec(
426; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[A:%.*]], [[B:%.*]]
427; CHECK-NEXT:    [[T3:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP1]])
428; CHECK-NEXT:    ret <2 x i32> [[T3]]
429;
430  %t1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a)
431  %t2 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %b)
432  %t3 = or <2 x i32> %t1, %t2
433  ret <2 x i32> %t3
434}
435
436define <2 x i32> @bs_xor32vec(<2 x i32> %a, <2 x i32> %b) #0 {
437; CHECK-LABEL: @bs_xor32vec(
438; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i32> [[A:%.*]], [[B:%.*]]
439; CHECK-NEXT:    [[T3:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP1]])
440; CHECK-NEXT:    ret <2 x i32> [[T3]]
441;
442  %t1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a)
443  %t2 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %b)
444  %t3 = xor <2 x i32> %t1, %t2
445  ret <2 x i32> %t3
446}
447
448define <2 x i32> @bs_and32ivec(<2 x i32> %a, <2 x i32> %b) #0 {
449; CHECK-LABEL: @bs_and32ivec(
450; CHECK-NEXT:    [[TMP1:%.*]] = and <2 x i32> [[A:%.*]], splat (i32 -1585053440)
451; CHECK-NEXT:    [[T2:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP1]])
452; CHECK-NEXT:    ret <2 x i32> [[T2]]
453;
454  %t1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a)
455  %t2 = and <2 x i32> %t1, <i32 100001, i32 100001>
456  ret <2 x i32> %t2
457}
458
459define <2 x i32> @bs_or32ivec(<2 x i32> %a, <2 x i32> %b) #0 {
460; CHECK-LABEL: @bs_or32ivec(
461; CHECK-NEXT:    [[TMP1:%.*]] = or <2 x i32> [[A:%.*]], splat (i32 -1585053440)
462; CHECK-NEXT:    [[T2:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP1]])
463; CHECK-NEXT:    ret <2 x i32> [[T2]]
464;
465  %t1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a)
466  %t2 = or <2 x i32> %t1, <i32 100001, i32 100001>
467  ret <2 x i32> %t2
468}
469
470define <2 x i32> @bs_xor32ivec(<2 x i32> %a, <2 x i32> %b) #0 {
471; CHECK-LABEL: @bs_xor32ivec(
472; CHECK-NEXT:    [[TMP1:%.*]] = xor <2 x i32> [[A:%.*]], splat (i32 -1585053440)
473; CHECK-NEXT:    [[T2:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP1]])
474; CHECK-NEXT:    ret <2 x i32> [[T2]]
475;
476  %t1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %a)
477  %t2 = xor <2 x i32> %t1, <i32 100001, i32 100001>
478  ret <2 x i32> %t2
479}
480
481define i64 @bs_and64_multiuse1(i64 %a, i64 %b) #0 {
482; CHECK-LABEL: @bs_and64_multiuse1(
483; CHECK-NEXT:    [[T1:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[A:%.*]])
484; CHECK-NEXT:    [[T2:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[B:%.*]])
485; CHECK-NEXT:    [[T3:%.*]] = and i64 [[T1]], [[T2]]
486; CHECK-NEXT:    [[T4:%.*]] = mul i64 [[T3]], [[T1]]
487; CHECK-NEXT:    [[T5:%.*]] = mul i64 [[T4]], [[T2]]
488; CHECK-NEXT:    ret i64 [[T5]]
489;
490  %t1 = tail call i64 @llvm.bswap.i64(i64 %a)
491  %t2 = tail call i64 @llvm.bswap.i64(i64 %b)
492  %t3 = and i64 %t1, %t2
493  %t4 = mul i64 %t3, %t1 ; to increase use count of the bswaps
494  %t5 = mul i64 %t4, %t2 ; to increase use count of the bswaps
495  ret i64 %t5
496}
497
498define i64 @bs_and64_multiuse2(i64 %a, i64 %b) #0 {
499; CHECK-LABEL: @bs_and64_multiuse2(
500; CHECK-NEXT:    [[T1:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[A:%.*]])
501; CHECK-NEXT:    [[T2:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[B:%.*]])
502; CHECK-NEXT:    [[T3:%.*]] = and i64 [[T1]], [[T2]]
503; CHECK-NEXT:    [[T4:%.*]] = mul i64 [[T3]], [[T1]]
504; CHECK-NEXT:    ret i64 [[T4]]
505;
506  %t1 = tail call i64 @llvm.bswap.i64(i64 %a)
507  %t2 = tail call i64 @llvm.bswap.i64(i64 %b)
508  %t3 = and i64 %t1, %t2
509  %t4 = mul i64 %t3, %t1 ; to increase use count of the bswaps
510  ret i64 %t4
511}
512
513define i64 @bs_and64_multiuse3(i64 %a, i64 %b) #0 {
514; CHECK-LABEL: @bs_and64_multiuse3(
515; CHECK-NEXT:    [[T1:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[A:%.*]])
516; CHECK-NEXT:    [[T2:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[B:%.*]])
517; CHECK-NEXT:    [[T3:%.*]] = and i64 [[T1]], [[T2]]
518; CHECK-NEXT:    [[T4:%.*]] = mul i64 [[T3]], [[T2]]
519; CHECK-NEXT:    ret i64 [[T4]]
520;
521  %t1 = tail call i64 @llvm.bswap.i64(i64 %a)
522  %t2 = tail call i64 @llvm.bswap.i64(i64 %b)
523  %t3 = and i64 %t1, %t2
524  %t4 = mul i64 %t3, %t2 ; to increase use count of the bswaps
525  ret i64 %t4
526}
527
528define i64 @bs_and64i_multiuse(i64 %a, i64 %b) #0 {
529; CHECK-LABEL: @bs_and64i_multiuse(
530; CHECK-NEXT:    [[T1:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[A:%.*]])
531; CHECK-NEXT:    [[T2:%.*]] = and i64 [[T1]], 1000000001
532; CHECK-NEXT:    [[T3:%.*]] = mul i64 [[T2]], [[T1]]
533; CHECK-NEXT:    ret i64 [[T3]]
534;
535  %t1 = tail call i64 @llvm.bswap.i64(i64 %a)
536  %t2 = and i64 %t1, 1000000001
537  %t3 = mul i64 %t2, %t1 ; to increase use count of the bswap
538  ret i64 %t3
539}
540
541
542; Issue#62236
543; Fold: BSWAP( OP( BSWAP(x), y ) ) -> OP( x, BSWAP(y) )
544define i16 @bs_and_lhs_bs16(i16 %a, i16 %b) #0 {
545; CHECK-LABEL: @bs_and_lhs_bs16(
546; CHECK-NEXT:    [[TMP1:%.*]] = call i16 @llvm.bswap.i16(i16 [[B:%.*]])
547; CHECK-NEXT:    [[TMP2:%.*]] = and i16 [[A:%.*]], [[TMP1]]
548; CHECK-NEXT:    ret i16 [[TMP2]]
549;
550  %1 = tail call i16 @llvm.bswap.i16(i16 %a)
551  %2 = and i16 %1, %b
552  %3 = tail call i16 @llvm.bswap.i16(i16 %2)
553  ret i16 %3
554}
555
556define i16 @bs_or_lhs_bs16(i16 %a, i16 %b) #0 {
557; CHECK-LABEL: @bs_or_lhs_bs16(
558; CHECK-NEXT:    [[TMP1:%.*]] = call i16 @llvm.bswap.i16(i16 [[B:%.*]])
559; CHECK-NEXT:    [[TMP2:%.*]] = or i16 [[A:%.*]], [[TMP1]]
560; CHECK-NEXT:    ret i16 [[TMP2]]
561;
562  %1 = tail call i16 @llvm.bswap.i16(i16 %a)
563  %2 = or i16 %1, %b
564  %3 = tail call i16 @llvm.bswap.i16(i16 %2)
565  ret i16 %3
566}
567
568define i16 @bs_xor_lhs_bs16(i16 %a, i16 %b) #0 {
569; CHECK-LABEL: @bs_xor_lhs_bs16(
570; CHECK-NEXT:    [[TMP1:%.*]] = call i16 @llvm.bswap.i16(i16 [[B:%.*]])
571; CHECK-NEXT:    [[TMP2:%.*]] = xor i16 [[A:%.*]], [[TMP1]]
572; CHECK-NEXT:    ret i16 [[TMP2]]
573;
574  %1 = tail call i16 @llvm.bswap.i16(i16 %a)
575  %2 = xor i16 %1, %b
576  %3 = tail call i16 @llvm.bswap.i16(i16 %2)
577  ret i16 %3
578}
579
580define i16 @bs_and_rhs_bs16(i16 %a, i16 %b) #0 {
581; CHECK-LABEL: @bs_and_rhs_bs16(
582; CHECK-NEXT:    [[TMP1:%.*]] = call i16 @llvm.bswap.i16(i16 [[A:%.*]])
583; CHECK-NEXT:    [[TMP2:%.*]] = and i16 [[TMP1]], [[B:%.*]]
584; CHECK-NEXT:    ret i16 [[TMP2]]
585;
586  %1 = tail call i16 @llvm.bswap.i16(i16 %b)
587  %2 = and i16 %a, %1
588  %3 = tail call i16 @llvm.bswap.i16(i16 %2)
589  ret i16 %3
590}
591
592define i16 @bs_or_rhs_bs16(i16 %a, i16 %b) #0 {
593; CHECK-LABEL: @bs_or_rhs_bs16(
594; CHECK-NEXT:    [[TMP1:%.*]] = call i16 @llvm.bswap.i16(i16 [[A:%.*]])
595; CHECK-NEXT:    [[TMP2:%.*]] = or i16 [[TMP1]], [[B:%.*]]
596; CHECK-NEXT:    ret i16 [[TMP2]]
597;
598  %1 = tail call i16 @llvm.bswap.i16(i16 %b)
599  %2 = or i16 %a, %1
600  %3 = tail call i16 @llvm.bswap.i16(i16 %2)
601  ret i16 %3
602}
603
604define i16 @bs_xor_rhs_bs16(i16 %a, i16 %b) #0 {
605; CHECK-LABEL: @bs_xor_rhs_bs16(
606; CHECK-NEXT:    [[TMP1:%.*]] = call i16 @llvm.bswap.i16(i16 [[A:%.*]])
607; CHECK-NEXT:    [[TMP2:%.*]] = xor i16 [[TMP1]], [[B:%.*]]
608; CHECK-NEXT:    ret i16 [[TMP2]]
609;
610  %1 = tail call i16 @llvm.bswap.i16(i16 %b)
611  %2 = xor i16 %a, %1
612  %3 = tail call i16 @llvm.bswap.i16(i16 %2)
613  ret i16 %3
614}
615
616define i32 @bs_and_rhs_bs32(i32 %a, i32 %b) #0 {
617; CHECK-LABEL: @bs_and_rhs_bs32(
618; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.bswap.i32(i32 [[A:%.*]])
619; CHECK-NEXT:    [[TMP2:%.*]] = and i32 [[TMP1]], [[B:%.*]]
620; CHECK-NEXT:    ret i32 [[TMP2]]
621;
622  %1 = tail call i32 @llvm.bswap.i32(i32 %b)
623  %2 = and i32 %a, %1
624  %3 = tail call i32 @llvm.bswap.i32(i32 %2)
625  ret i32 %3
626}
627
628define i32 @bs_or_rhs_bs32(i32 %a, i32 %b) #0 {
629; CHECK-LABEL: @bs_or_rhs_bs32(
630; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.bswap.i32(i32 [[A:%.*]])
631; CHECK-NEXT:    [[TMP2:%.*]] = or i32 [[TMP1]], [[B:%.*]]
632; CHECK-NEXT:    ret i32 [[TMP2]]
633;
634  %1 = tail call i32 @llvm.bswap.i32(i32 %b)
635  %2 = or i32 %a, %1
636  %3 = tail call i32 @llvm.bswap.i32(i32 %2)
637  ret i32 %3
638}
639
640define i32 @bs_xor_rhs_bs32(i32 %a, i32 %b) #0 {
641; CHECK-LABEL: @bs_xor_rhs_bs32(
642; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.bswap.i32(i32 [[A:%.*]])
643; CHECK-NEXT:    [[TMP2:%.*]] = xor i32 [[TMP1]], [[B:%.*]]
644; CHECK-NEXT:    ret i32 [[TMP2]]
645;
646  %1 = tail call i32 @llvm.bswap.i32(i32 %b)
647  %2 = xor i32 %a, %1
648  %3 = tail call i32 @llvm.bswap.i32(i32 %2)
649  ret i32 %3
650}
651
652define i64 @bs_and_rhs_bs64(i64 %a, i64 %b) #0 {
653; CHECK-LABEL: @bs_and_rhs_bs64(
654; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.bswap.i64(i64 [[A:%.*]])
655; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP1]], [[B:%.*]]
656; CHECK-NEXT:    ret i64 [[TMP2]]
657;
658  %1 = tail call i64 @llvm.bswap.i64(i64 %b)
659  %2 = and i64 %a, %1
660  %3 = tail call i64 @llvm.bswap.i64(i64 %2)
661  ret i64 %3
662}
663
664define i64 @bs_or_rhs_bs64(i64 %a, i64 %b) #0 {
665; CHECK-LABEL: @bs_or_rhs_bs64(
666; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.bswap.i64(i64 [[A:%.*]])
667; CHECK-NEXT:    [[TMP2:%.*]] = or i64 [[TMP1]], [[B:%.*]]
668; CHECK-NEXT:    ret i64 [[TMP2]]
669;
670  %1 = tail call i64 @llvm.bswap.i64(i64 %b)
671  %2 = or i64 %a, %1
672  %3 = tail call i64 @llvm.bswap.i64(i64 %2)
673  ret i64 %3
674}
675
676define i64 @bs_xor_rhs_bs64(i64 %a, i64 %b) #0 {
677; CHECK-LABEL: @bs_xor_rhs_bs64(
678; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.bswap.i64(i64 [[A:%.*]])
679; CHECK-NEXT:    [[TMP2:%.*]] = xor i64 [[TMP1]], [[B:%.*]]
680; CHECK-NEXT:    ret i64 [[TMP2]]
681;
682  %1 = tail call i64 @llvm.bswap.i64(i64 %b)
683  %2 = xor i64 %a, %1
684  %3 = tail call i64 @llvm.bswap.i64(i64 %2)
685  ret i64 %3
686}
687
688define <2 x i32> @bs_and_rhs_i32vec(<2 x i32> %a, <2 x i32> %b) #0 {
689; CHECK-LABEL: @bs_and_rhs_i32vec(
690; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[A:%.*]])
691; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP1]], [[B:%.*]]
692; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
693;
694  %1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %b)
695  %2 = and <2 x i32> %a, %1
696  %3 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %2)
697  ret <2 x i32> %3
698}
699
700define <2 x i32> @bs_or_rhs_i32vec(<2 x i32> %a, <2 x i32> %b) #0 {
701; CHECK-LABEL: @bs_or_rhs_i32vec(
702; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[A:%.*]])
703; CHECK-NEXT:    [[TMP2:%.*]] = or <2 x i32> [[TMP1]], [[B:%.*]]
704; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
705;
706  %1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %b)
707  %2 = or <2 x i32> %a, %1
708  %3 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %2)
709  ret <2 x i32> %3
710}
711
712define <2 x i32> @bs_xor_rhs_i32vec(<2 x i32> %a, <2 x i32> %b) #0 {
713; CHECK-LABEL: @bs_xor_rhs_i32vec(
714; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[A:%.*]])
715; CHECK-NEXT:    [[TMP2:%.*]] = xor <2 x i32> [[TMP1]], [[B:%.*]]
716; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
717;
718  %1 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %b)
719  %2 = xor <2 x i32> %a, %1
720  %3 = tail call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %2)
721  ret <2 x i32> %3
722}
723
724define i64 @bs_and_rhs_bs64_multiuse1(i64 %a, i64 %b) #0 {
725; CHECK-LABEL: @bs_and_rhs_bs64_multiuse1(
726; CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[B:%.*]])
727; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[A:%.*]], [[TMP1]]
728; CHECK-NEXT:    [[TMP3:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[TMP2]])
729; CHECK-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP2]], [[TMP3]]
730; CHECK-NEXT:    ret i64 [[TMP4]]
731;
732  %1 = tail call i64 @llvm.bswap.i64(i64 %b)
733  %2 = and i64 %a, %1
734  %3 = tail call i64 @llvm.bswap.i64(i64 %2)
735  %4 = mul i64 %2, %3 ;increase use of logical op
736  ret i64 %4
737}
738
739define i64 @bs_and_rhs_bs64_multiuse2(i64 %a, i64 %b) #0 {
740; CHECK-LABEL: @bs_and_rhs_bs64_multiuse2(
741; CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[B:%.*]])
742; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[A:%.*]], [[TMP1]]
743; CHECK-NEXT:    [[TMP3:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[TMP2]])
744; CHECK-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP1]], [[TMP3]]
745; CHECK-NEXT:    ret i64 [[TMP4]]
746;
747  %1 = tail call i64 @llvm.bswap.i64(i64 %b)
748  %2 = and i64 %a, %1
749  %3 = tail call i64 @llvm.bswap.i64(i64 %2)
750  %4 = mul i64 %1, %3 ;increase use of inner bswap
751  ret i64 %4
752}
753
754define i64 @bs_all_operand64(i64 %a, i64 %b) #0 {
755; CHECK-LABEL: @bs_all_operand64(
756; CHECK-NEXT:    [[TMP1:%.*]] = and i64 [[A:%.*]], [[B:%.*]]
757; CHECK-NEXT:    ret i64 [[TMP1]]
758;
759  %1 = tail call i64 @llvm.bswap.i64(i64 %a)
760  %2 = tail call i64 @llvm.bswap.i64(i64 %b)
761  %3 = and i64 %1, %2
762  %4 = tail call i64 @llvm.bswap.i64(i64 %3)
763  ret i64 %4
764}
765
766define i64 @bs_all_operand64_multiuse_both(i64 %a, i64 %b) #0 {
767; CHECK-LABEL: @bs_all_operand64_multiuse_both(
768; CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[A:%.*]])
769; CHECK-NEXT:    [[TMP2:%.*]] = tail call i64 @llvm.bswap.i64(i64 [[B:%.*]])
770; CHECK-NEXT:    [[TMP3:%.*]] = and i64 [[A]], [[B]]
771; CHECK-NEXT:    call void @use.i64(i64 [[TMP1]])
772; CHECK-NEXT:    call void @use.i64(i64 [[TMP2]])
773; CHECK-NEXT:    ret i64 [[TMP3]]
774;
775  %1 = tail call i64 @llvm.bswap.i64(i64 %a)
776  %2 = tail call i64 @llvm.bswap.i64(i64 %b)
777  %3 = and i64 %1, %2
778  %4 = tail call i64 @llvm.bswap.i64(i64 %3)
779
780  call void @use.i64(i64 %1)
781  call void @use.i64(i64 %2)
782  ret i64 %4
783}
784
785@gp = external global [0 x i8]
786
787define void @bs_and_constexpr(ptr %out, i64 %a) {
788; CHECK-LABEL: @bs_and_constexpr(
789; CHECK-NEXT:    [[EXP:%.*]] = and i64 ptrtoint (ptr @gp to i64), 4095
790; CHECK-NEXT:    [[RES:%.*]] = call i64 @llvm.bswap.i64(i64 [[EXP]])
791; CHECK-NEXT:    store i64 [[RES]], ptr [[OUT:%.*]], align 8
792; CHECK-NEXT:    ret void
793;
794  %gpi = ptrtoint ptr @gp to i64
795  %exp = and i64 %gpi, 4095
796  %res = call i64 @llvm.bswap.i64(i64 %exp)
797  store i64 %res, ptr %out, align 8
798  ret void
799}
800
801
802define void @bs_and_bs_constexpr(ptr %out, i64 %a) {
803; CHECK-LABEL: @bs_and_bs_constexpr(
804; CHECK-NEXT:    [[TMP1:%.*]] = and i64 ptrtoint (ptr @gp to i64), -67835469387268096
805; CHECK-NEXT:    store i64 [[TMP1]], ptr [[OUT:%.*]], align 8
806; CHECK-NEXT:    ret void
807;
808  %gpi = ptrtoint ptr @gp to i64
809  %bs_gpi = call i64 @llvm.bswap.i64(i64 %gpi)
810  %exp = and i64 %bs_gpi, 4095
811  %res = call i64 @llvm.bswap.i64(i64 %exp)
812  store i64 %res, ptr %out, align 8
813  ret void
814}
815
816
817define i64 @bs_active_high8(i64 %0) {
818; CHECK-LABEL: @bs_active_high8(
819; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 255
820; CHECK-NEXT:    ret i64 [[TMP2]]
821;
822  %2 = shl i64 %0, 56
823  %3 = call i64 @llvm.bswap.i64(i64 %2)
824  ret i64 %3
825}
826
827define i32 @bs_active_high7(i32 %0) {
828; CHECK-LABEL: @bs_active_high7(
829; CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 [[TMP0:%.*]], 24
830; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP2]], 254
831; CHECK-NEXT:    ret i32 [[TMP3]]
832;
833  %2 = and i32 %0, -33554432  ; 0xfe000000
834  %3 = call i32 @llvm.bswap.i32(i32 %2)
835  ret i32 %3
836}
837
838define <2 x i64> @bs_active_high4(<2 x i64> %0) {
839; CHECK-LABEL: @bs_active_high4(
840; CHECK-NEXT:    [[TMP2:%.*]] = shl <2 x i64> [[TMP0:%.*]], splat (i64 4)
841; CHECK-NEXT:    [[TMP3:%.*]] = and <2 x i64> [[TMP2]], splat (i64 240)
842; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
843;
844  %2 = shl <2 x i64> %0, <i64 60, i64 60>
845  %3 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %2)
846  ret <2 x i64> %3
847}
848
849define <2 x i64> @bs_active_high_different(<2 x i64> %0) {
850; CHECK-LABEL: @bs_active_high_different(
851; CHECK-NEXT:    [[TMP2:%.*]] = shl <2 x i64> [[TMP0:%.*]], <i64 56, i64 57>
852; CHECK-NEXT:    [[TMP3:%.*]] = lshr exact <2 x i64> [[TMP2]], splat (i64 56)
853; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
854;
855  %2 = shl <2 x i64> %0, <i64 56, i64 57>
856  %3 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %2)
857  ret <2 x i64> %3
858}
859
860; negative test
861define <2 x i64> @bs_active_high_different_negative(<2 x i64> %0) {
862; CHECK-LABEL: @bs_active_high_different_negative(
863; CHECK-NEXT:    [[TMP2:%.*]] = shl <2 x i64> [[TMP0:%.*]], <i64 56, i64 55>
864; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP2]])
865; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
866;
867  %2 = shl <2 x i64> %0, <i64 56, i64 55>  ; second elem has 9 active high bits
868  %3 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %2)
869  ret <2 x i64> %3
870}
871
872; TODO: This should fold to 'and'.
873define <2 x i64> @bs_active_high_poison(<2 x i64> %0) {
874; CHECK-LABEL: @bs_active_high_poison(
875; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP0:%.*]])
876; CHECK-NEXT:    [[TMP3:%.*]] = lshr <2 x i64> [[TMP2]], <i64 56, i64 poison>
877; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
878;
879  %2 = shl <2 x i64> %0, <i64 56, i64 poison>
880  %3 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %2)
881  ret <2 x i64> %3
882}
883
884define i64 @bs_active_high8_multiuse(i64 %0) {
885; CHECK-LABEL: @bs_active_high8_multiuse(
886; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 56
887; CHECK-NEXT:    [[TMP3:%.*]] = and i64 [[TMP0]], 255
888; CHECK-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP2]], [[TMP3]]
889; CHECK-NEXT:    ret i64 [[TMP4]]
890;
891  %2 = shl i64 %0, 56
892  %3 = call i64 @llvm.bswap.i64(i64 %2)
893  %4 = mul i64 %2, %3  ; increase use of shl and bswap
894  ret i64 %4
895}
896
897define i64 @bs_active_high7_multiuse(i64 %0) {
898; CHECK-LABEL: @bs_active_high7_multiuse(
899; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 57
900; CHECK-NEXT:    [[TMP3:%.*]] = lshr exact i64 [[TMP2]], 56
901; CHECK-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP2]], [[TMP3]]
902; CHECK-NEXT:    ret i64 [[TMP4]]
903;
904  %2 = shl i64 %0, 57
905  %3 = call i64 @llvm.bswap.i64(i64 %2)
906  %4 = mul i64 %2, %3  ; increase use of shl and bswap
907  ret i64 %4
908}
909
910define i64 @bs_active_byte_6h(i64 %0) {
911; CHECK-LABEL: @bs_active_byte_6h(
912; CHECK-NEXT:    [[TMP2:%.*]] = lshr i64 [[TMP0:%.*]], 24
913; CHECK-NEXT:    [[TMP3:%.*]] = and i64 [[TMP2]], 16711680
914; CHECK-NEXT:    ret i64 [[TMP3]]
915;
916  %2 = and i64 %0, 280375465082880  ; 0xff00'00000000
917  %3 = call i64 @llvm.bswap.i64(i64 %2)
918  ret i64 %3
919}
920
921define i32 @bs_active_byte_3h(i32 %0) {
922; CHECK-LABEL: @bs_active_byte_3h(
923; CHECK-NEXT:    [[TMP2:%.*]] = lshr i32 [[TMP0:%.*]], 8
924; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP2]], 1536
925; CHECK-NEXT:    ret i32 [[TMP3]]
926;
927  %2 = and i32 %0, 393216  ; 0x0006'0000
928  %3 = call i32 @llvm.bswap.i32(i32 %2)
929  ret i32 %3
930}
931
932define <2 x i32> @bs_active_byte_3h_v2(<2 x i32> %0) {
933; CHECK-LABEL: @bs_active_byte_3h_v2(
934; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], <i32 8388608, i32 65536>
935; CHECK-NEXT:    [[TMP3:%.*]] = lshr exact <2 x i32> [[TMP2]], splat (i32 8)
936; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
937;
938  %2 = and <2 x i32> %0, <i32 8388608, i32 65536>  ; 0x0080'0000, 0x0001'0000
939  %3 = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %2)
940  ret <2 x i32> %3
941}
942
943; negative test
944define i64 @bs_active_byte_78h(i64 %0) {
945; CHECK-LABEL: @bs_active_byte_78h(
946; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 108086391056891904
947; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
948; CHECK-NEXT:    ret i64 [[TMP3]]
949;
950  %2 = and i64 %0, 108086391056891904  ; 0x01800000'00000000
951  %3 = call i64 @llvm.bswap.i64(i64 %2)
952  ret i64 %3
953}
954
955
956define i16 @bs_active_low1(i16 %0) {
957; CHECK-LABEL: @bs_active_low1(
958; CHECK-NEXT:    [[TMP2:%.*]] = lshr i16 [[TMP0:%.*]], 7
959; CHECK-NEXT:    [[TMP3:%.*]] = and i16 [[TMP2]], 256
960; CHECK-NEXT:    ret i16 [[TMP3]]
961;
962  %2 = lshr i16 %0, 15
963  %3 = call i16 @llvm.bswap.i16(i16 %2)
964  ret i16 %3
965}
966
967define <2 x i32> @bs_active_low8(<2 x i32> %0) {
968; CHECK-LABEL: @bs_active_low8(
969; CHECK-NEXT:    [[TMP2:%.*]] = shl <2 x i32> [[TMP0:%.*]], splat (i32 24)
970; CHECK-NEXT:    ret <2 x i32> [[TMP2]]
971;
972  %2 = and <2 x i32> %0, <i32 255, i32 255>
973  %3 = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %2)
974  ret <2 x i32> %3
975}
976
977define <2 x i32> @bs_active_low_different(<2 x i32> %0) {
978; CHECK-LABEL: @bs_active_low_different(
979; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], <i32 2, i32 128>
980; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw <2 x i32> [[TMP2]], splat (i32 24)
981; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
982;
983  %2 = and <2 x i32> %0, <i32 2, i32 128>
984  %3 = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %2)
985  ret <2 x i32> %3
986}
987
988; negative test
989define <2 x i32> @bs_active_low_different_negative(<2 x i32> %0) {
990; CHECK-LABEL: @bs_active_low_different_negative(
991; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], <i32 256, i32 255>
992; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP2]])
993; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
994;
995  %2 = and <2 x i32> %0, <i32 256, i32 255>
996  %3 = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %2)
997  ret <2 x i32> %3
998}
999
1000; negative test
1001define <2 x i32> @bs_active_low_undef(<2 x i32> %0) {
1002; CHECK-LABEL: @bs_active_low_undef(
1003; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i32> [[TMP0:%.*]], <i32 255, i32 undef>
1004; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> [[TMP2]])
1005; CHECK-NEXT:    ret <2 x i32> [[TMP3]]
1006;
1007  %2 = and <2 x i32> %0, <i32 255, i32 undef>
1008  %3 = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %2)
1009  ret <2 x i32> %3
1010}
1011
1012define i64 @bs_active_low8_multiuse(i64 %0) {
1013; CHECK-LABEL: @bs_active_low8_multiuse(
1014; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 255
1015; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 56
1016; CHECK-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP2]], [[TMP3]]
1017; CHECK-NEXT:    ret i64 [[TMP4]]
1018;
1019  %2 = and i64 %0, 255
1020  %3 = call i64 @llvm.bswap.i64(i64 %2)
1021  %4 = mul i64 %2, %3  ; increase use of and and bswap
1022  ret i64 %4
1023}
1024
1025define i64 @bs_active_low7_multiuse(i64 %0) {
1026; CHECK-LABEL: @bs_active_low7_multiuse(
1027; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 127
1028; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 56
1029; CHECK-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP2]], [[TMP3]]
1030; CHECK-NEXT:    ret i64 [[TMP4]]
1031;
1032  %2 = and i64 %0, 127
1033  %3 = call i64 @llvm.bswap.i64(i64 %2)
1034  %4 = mul i64 %2, %3  ; increase use of and and bswap
1035  ret i64 %4
1036}
1037
1038define i64 @bs_active_byte_4l(i64 %0) {
1039; CHECK-LABEL: @bs_active_byte_4l(
1040; CHECK-NEXT:    [[TMP2:%.*]] = shl i64 [[TMP0:%.*]], 8
1041; CHECK-NEXT:    [[TMP3:%.*]] = and i64 [[TMP2]], 292057776128
1042; CHECK-NEXT:    ret i64 [[TMP3]]
1043;
1044  %2 = and i64 %0, 1140850688  ; 0x44000000
1045  %3 = call i64 @llvm.bswap.i64(i64 %2)
1046  ret i64 %3
1047}
1048
1049define i32 @bs_active_byte_2l(i32 %0) {
1050; CHECK-LABEL: @bs_active_byte_2l(
1051; CHECK-NEXT:    [[TMP2:%.*]] = shl i32 [[TMP0:%.*]], 8
1052; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[TMP2]], 16711680
1053; CHECK-NEXT:    ret i32 [[TMP3]]
1054;
1055  %2 = and i32 %0, 65280  ; 0xff00
1056  %3 = call i32 @llvm.bswap.i32(i32 %2)
1057  ret i32 %3
1058}
1059
1060define <2 x i64> @bs_active_byte_2l_v2(<2 x i64> %0) {
1061; CHECK-LABEL: @bs_active_byte_2l_v2(
1062; CHECK-NEXT:    [[TMP2:%.*]] = and <2 x i64> [[TMP0:%.*]], <i64 256, i64 65280>
1063; CHECK-NEXT:    [[TMP3:%.*]] = shl nuw nsw <2 x i64> [[TMP2]], splat (i64 40)
1064; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
1065;
1066  %2 = and <2 x i64> %0, <i64 256, i64 65280>  ; 0x0100, 0xff00
1067  %3 = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> %2)
1068  ret <2 x i64> %3
1069}
1070
1071; negative test
1072define i64 @bs_active_byte_12l(i64 %0) {
1073; CHECK-LABEL: @bs_active_byte_12l(
1074; CHECK-NEXT:    [[TMP2:%.*]] = and i64 [[TMP0:%.*]], 384
1075; CHECK-NEXT:    [[TMP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[TMP2]])
1076; CHECK-NEXT:    ret i64 [[TMP3]]
1077;
1078  %2 = and i64 %0, 384  ; 0x0180
1079  %3 = call i64 @llvm.bswap.i64(i64 %2)
1080  ret i64 %3
1081}
1082
1083
1084declare i64 @use.i64(i64)
1085declare i16 @llvm.bswap.i16(i16)
1086declare i32 @llvm.bswap.i32(i32)
1087declare i64 @llvm.bswap.i64(i64)
1088declare <2 x i16> @llvm.bswap.v2i16(<2 x i16>)
1089declare <2 x i32> @llvm.bswap.v2i32(<2 x i32>)
1090declare <2 x i64> @llvm.bswap.v2i64(<2 x i64>)
1091