xref: /llvm-project/llvm/test/CodeGen/X86/combine-bswap.ll (revision d069ac035add3095c771f49540223f98e5ba10b9)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86
3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=X64
4
5; These tests just check that the plumbing is in place for @llvm.bswap. The
6; actual output is massive at the moment as llvm.bswap is not yet legal.
7
8declare i16 @llvm.bswap.i16(i16) readnone
9declare i32 @llvm.bswap.i32(i32) readnone
10declare i64 @llvm.bswap.i64(i64) readnone
11declare i32 @llvm.bswap.v4i32(i32) readnone
12
13; fold (bswap undef) -> undef
14define i32 @test_undef() nounwind {
15; X86-LABEL: test_undef:
16; X86:       # %bb.0:
17; X86-NEXT:    retl
18;
19; X64-LABEL: test_undef:
20; X64:       # %bb.0:
21; X64-NEXT:    retq
22  %b = call i32 @llvm.bswap.i32(i32 undef)
23  ret i32 %b
24}
25
26; fold (bswap (bswap x)) -> x
27define i32 @test_bswap_bswap(i32 %a0) nounwind {
28; X86-LABEL: test_bswap_bswap:
29; X86:       # %bb.0:
30; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
31; X86-NEXT:    retl
32;
33; X64-LABEL: test_bswap_bswap:
34; X64:       # %bb.0:
35; X64-NEXT:    movl %edi, %eax
36; X64-NEXT:    retq
37  %b = call i32 @llvm.bswap.i32(i32 %a0)
38  %c = call i32 @llvm.bswap.i32(i32 %b)
39  ret i32 %c
40}
41
42define i16 @test_bswap_srli_8_bswap_i16(i16 %a) nounwind {
43; X86-LABEL: test_bswap_srli_8_bswap_i16:
44; X86:       # %bb.0:
45; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
46; X86-NEXT:    shll $8, %eax
47; X86-NEXT:    # kill: def $ax killed $ax killed $eax
48; X86-NEXT:    retl
49;
50; X64-LABEL: test_bswap_srli_8_bswap_i16:
51; X64:       # %bb.0:
52; X64-NEXT:    movl %edi, %eax
53; X64-NEXT:    shll $8, %eax
54; X64-NEXT:    # kill: def $ax killed $ax killed $eax
55; X64-NEXT:    retq
56    %1 = call i16 @llvm.bswap.i16(i16 %a)
57    %2 = lshr i16 %1, 8
58    %3 = call i16 @llvm.bswap.i16(i16 %2)
59    ret i16 %3
60}
61
62define i32 @test_bswap_srli_8_bswap_i32(i32 %a) nounwind {
63; X86-LABEL: test_bswap_srli_8_bswap_i32:
64; X86:       # %bb.0:
65; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
66; X86-NEXT:    shll $8, %eax
67; X86-NEXT:    retl
68;
69; X64-LABEL: test_bswap_srli_8_bswap_i32:
70; X64:       # %bb.0:
71; X64-NEXT:    movl %edi, %eax
72; X64-NEXT:    shll $8, %eax
73; X64-NEXT:    retq
74    %1 = call i32 @llvm.bswap.i32(i32 %a)
75    %2 = lshr i32 %1, 8
76    %3 = call i32 @llvm.bswap.i32(i32 %2)
77    ret i32 %3
78}
79
80define i64 @test_bswap_srli_16_bswap_i64(i64 %a) nounwind {
81; X86-LABEL: test_bswap_srli_16_bswap_i64:
82; X86:       # %bb.0:
83; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
84; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
85; X86-NEXT:    shll $16, %eax
86; X86-NEXT:    retl
87;
88; X64-LABEL: test_bswap_srli_16_bswap_i64:
89; X64:       # %bb.0:
90; X64-NEXT:    movq %rdi, %rax
91; X64-NEXT:    shlq $16, %rax
92; X64-NEXT:    retq
93    %1 = call i64 @llvm.bswap.i64(i64 %a)
94    %2 = lshr i64 %1, 16
95    %3 = call i64 @llvm.bswap.i64(i64 %2)
96    ret i64 %3
97}
98
99define i16 @test_bswap_shli_8_bswap_i16(i16 %a) nounwind {
100; X86-LABEL: test_bswap_shli_8_bswap_i16:
101; X86:       # %bb.0:
102; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
103; X86-NEXT:    # kill: def $ax killed $ax killed $eax
104; X86-NEXT:    retl
105;
106; X64-LABEL: test_bswap_shli_8_bswap_i16:
107; X64:       # %bb.0:
108; X64-NEXT:    movl %edi, %eax
109; X64-NEXT:    movzbl %ah, %eax
110; X64-NEXT:    # kill: def $ax killed $ax killed $eax
111; X64-NEXT:    retq
112    %1 = call i16 @llvm.bswap.i16(i16 %a)
113    %2 = shl i16 %1, 8
114    %3 = call i16 @llvm.bswap.i16(i16 %2)
115    ret i16 %3
116}
117
118define i32 @test_bswap_shli_8_bswap_i32(i32 %a) nounwind {
119; X86-LABEL: test_bswap_shli_8_bswap_i32:
120; X86:       # %bb.0:
121; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
122; X86-NEXT:    shrl $8, %eax
123; X86-NEXT:    retl
124;
125; X64-LABEL: test_bswap_shli_8_bswap_i32:
126; X64:       # %bb.0:
127; X64-NEXT:    movl %edi, %eax
128; X64-NEXT:    shrl $8, %eax
129; X64-NEXT:    retq
130    %1 = call i32 @llvm.bswap.i32(i32 %a)
131    %2 = shl i32 %1, 8
132    %3 = call i32 @llvm.bswap.i32(i32 %2)
133    ret i32 %3
134}
135
136define i64 @test_bswap_shli_16_bswap_i64(i64 %a) nounwind {
137; X86-LABEL: test_bswap_shli_16_bswap_i64:
138; X86:       # %bb.0:
139; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
140; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %edx
141; X86-NEXT:    retl
142;
143; X64-LABEL: test_bswap_shli_16_bswap_i64:
144; X64:       # %bb.0:
145; X64-NEXT:    movq %rdi, %rax
146; X64-NEXT:    shrq $16, %rax
147; X64-NEXT:    retq
148    %1 = call i64 @llvm.bswap.i64(i64 %a)
149    %2 = shl i64 %1, 16
150    %3 = call i64 @llvm.bswap.i64(i64 %2)
151    ret i64 %3
152}
153
154define i32 @test_demandedbits_bswap(i32 %a0) nounwind {
155; X86-LABEL: test_demandedbits_bswap:
156; X86:       # %bb.0:
157; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
158; X86-NEXT:    bswapl %eax
159; X86-NEXT:    andl $-65536, %eax # imm = 0xFFFF0000
160; X86-NEXT:    retl
161;
162; X64-LABEL: test_demandedbits_bswap:
163; X64:       # %bb.0:
164; X64-NEXT:    movl %edi, %eax
165; X64-NEXT:    bswapl %eax
166; X64-NEXT:    andl $-65536, %eax # imm = 0xFFFF0000
167; X64-NEXT:    retq
168  %b = or i32 %a0, 4278190080
169  %c = call i32 @llvm.bswap.i32(i32 %b)
170  %d = and i32 %c, 4294901760
171  ret i32 %d
172}
173
174define void @demand_one_loaded_byte(ptr %xp, ptr %yp) {
175; X86-LABEL: demand_one_loaded_byte:
176; X86:       # %bb.0:
177; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
178; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
179; X86-NEXT:    movzbl 4(%ecx), %ecx
180; X86-NEXT:    movb %cl, (%eax)
181; X86-NEXT:    retl
182;
183; X64-LABEL: demand_one_loaded_byte:
184; X64:       # %bb.0:
185; X64-NEXT:    movzbl 4(%rdi), %eax
186; X64-NEXT:    movb %al, (%rsi)
187; X64-NEXT:    retq
188  %x = load i64, ptr %xp, align 8
189  %x_zzzz7654 = lshr i64 %x, 32
190  %x_z7654zzz = shl nuw nsw i64 %x_zzzz7654, 24
191  %x_4zzz = trunc i64 %x_z7654zzz to i32
192  %y = load i32, ptr %yp, align 4
193  %y_321z = and i32 %y, -256
194  %x_zzz4 = call i32 @llvm.bswap.i32(i32 %x_4zzz)
195  %r = or i32 %x_zzz4, %y_321z
196  store i32 %r, ptr %yp, align 4
197  ret void
198}
199
200define i64 @test_bswap64_shift48_zext(i16 %a0) {
201; X86-LABEL: test_bswap64_shift48_zext:
202; X86:       # %bb.0:
203; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
204; X86-NEXT:    rolw $8, %ax
205; X86-NEXT:    movzwl %ax, %eax
206; X86-NEXT:    xorl %edx, %edx
207; X86-NEXT:    retl
208;
209; X64-LABEL: test_bswap64_shift48_zext:
210; X64:       # %bb.0:
211; X64-NEXT:    rolw $8, %di
212; X64-NEXT:    movzwl %di, %eax
213; X64-NEXT:    retq
214  %z = zext i16 %a0 to i64
215  %s = shl i64 %z, 48
216  %b = call i64 @llvm.bswap.i64(i64 %s)
217  ret i64 %b
218}
219
220define i64 @test_bswap64_shift48(i64 %a0) {
221; X86-LABEL: test_bswap64_shift48:
222; X86:       # %bb.0:
223; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
224; X86-NEXT:    rolw $8, %ax
225; X86-NEXT:    movzwl %ax, %eax
226; X86-NEXT:    xorl %edx, %edx
227; X86-NEXT:    retl
228;
229; X64-LABEL: test_bswap64_shift48:
230; X64:       # %bb.0:
231; X64-NEXT:    rolw $8, %di
232; X64-NEXT:    movzwl %di, %eax
233; X64-NEXT:    retq
234  %s = shl i64 %a0, 48
235  %b = call i64 @llvm.bswap.i64(i64 %s)
236  ret i64 %b
237}
238
239define i32 @test_bswap32_shift17(i32 %a0) {
240; X86-LABEL: test_bswap32_shift17:
241; X86:       # %bb.0:
242; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
243; X86-NEXT:    shll $17, %eax
244; X86-NEXT:    bswapl %eax
245; X86-NEXT:    retl
246;
247; X64-LABEL: test_bswap32_shift17:
248; X64:       # %bb.0:
249; X64-NEXT:    movl %edi, %eax
250; X64-NEXT:    shll $17, %eax
251; X64-NEXT:    bswapl %eax
252; X64-NEXT:    retq
253  %s = shl i32 %a0, 17
254  %b = call i32 @llvm.bswap.i32(i32 %s)
255  ret i32 %b
256}
257
258define i32 @bs_and_lhs_bs32(i32 %a, i32 %b) #0 {
259; X86-LABEL: bs_and_lhs_bs32:
260; X86:       # %bb.0:
261; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
262; X86-NEXT:    bswapl %eax
263; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
264; X86-NEXT:    retl
265;
266; X64-LABEL: bs_and_lhs_bs32:
267; X64:       # %bb.0:
268; X64-NEXT:    movl %esi, %eax
269; X64-NEXT:    bswapl %eax
270; X64-NEXT:    andl %edi, %eax
271; X64-NEXT:    retq
272  %1 = tail call i32 @llvm.bswap.i32(i32 %a)
273  %2 = and i32 %1, %b
274  %3 = tail call i32 @llvm.bswap.i32(i32 %2)
275  ret i32 %3
276}
277
278define i64 @bs_or_lhs_bs64(i64 %a, i64 %b) #0 {
279; X86-LABEL: bs_or_lhs_bs64:
280; X86:       # %bb.0:
281; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
282; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
283; X86-NEXT:    bswapl %eax
284; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
285; X86-NEXT:    bswapl %edx
286; X86-NEXT:    orl {{[0-9]+}}(%esp), %edx
287; X86-NEXT:    retl
288;
289; X64-LABEL: bs_or_lhs_bs64:
290; X64:       # %bb.0:
291; X64-NEXT:    movq %rsi, %rax
292; X64-NEXT:    bswapq %rax
293; X64-NEXT:    orq %rdi, %rax
294; X64-NEXT:    retq
295  %1 = tail call i64 @llvm.bswap.i64(i64 %a)
296  %2 = or i64 %1, %b
297  %3 = tail call i64 @llvm.bswap.i64(i64 %2)
298  ret i64 %3
299}
300
301define i64 @bs_xor_rhs_bs64(i64 %a, i64 %b) #0 {
302; X86-LABEL: bs_xor_rhs_bs64:
303; X86:       # %bb.0:
304; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
305; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
306; X86-NEXT:    bswapl %eax
307; X86-NEXT:    xorl {{[0-9]+}}(%esp), %eax
308; X86-NEXT:    bswapl %edx
309; X86-NEXT:    xorl {{[0-9]+}}(%esp), %edx
310; X86-NEXT:    retl
311;
312; X64-LABEL: bs_xor_rhs_bs64:
313; X64:       # %bb.0:
314; X64-NEXT:    movq %rdi, %rax
315; X64-NEXT:    bswapq %rax
316; X64-NEXT:    xorq %rsi, %rax
317; X64-NEXT:    retq
318  %1 = tail call i64 @llvm.bswap.i64(i64 %b)
319  %2 = xor i64 %a, %1
320  %3 = tail call i64 @llvm.bswap.i64(i64 %2)
321  ret i64 %3
322}
323
324define i32 @bs_and_all_operand_multiuse(i32 %a, i32 %b) #0 {
325; X86-LABEL: bs_and_all_operand_multiuse:
326; X86:       # %bb.0:
327; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
328; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
329; X86-NEXT:    movl %eax, %edx
330; X86-NEXT:    bswapl %edx
331; X86-NEXT:    andl %ecx, %eax
332; X86-NEXT:    bswapl %ecx
333; X86-NEXT:    imull %edx, %eax
334; X86-NEXT:    imull %ecx, %eax
335; X86-NEXT:    retl
336;
337; X64-LABEL: bs_and_all_operand_multiuse:
338; X64:       # %bb.0:
339; X64-NEXT:    movl %edi, %eax
340; X64-NEXT:    bswapl %eax
341; X64-NEXT:    andl %esi, %edi
342; X64-NEXT:    bswapl %esi
343; X64-NEXT:    imull %edi, %eax
344; X64-NEXT:    imull %esi, %eax
345; X64-NEXT:    retq
346  %1 = tail call i32 @llvm.bswap.i32(i32 %a)
347  %2 = tail call i32 @llvm.bswap.i32(i32 %b)
348  %3 = and i32 %1, %2
349  %4 = tail call i32 @llvm.bswap.i32(i32 %3)
350  %5 = mul i32 %1, %4 ;increase use of left bswap
351  %6 = mul i32 %2, %5 ;increase use of right bswap
352
353  ret i32 %6
354}
355
356; negative test
357define i32 @bs_and_rhs_bs32_multiuse1(i32 %a, i32 %b) #0 {
358; X86-LABEL: bs_and_rhs_bs32_multiuse1:
359; X86:       # %bb.0:
360; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
361; X86-NEXT:    bswapl %ecx
362; X86-NEXT:    andl {{[0-9]+}}(%esp), %ecx
363; X86-NEXT:    movl %ecx, %eax
364; X86-NEXT:    bswapl %eax
365; X86-NEXT:    imull %ecx, %eax
366; X86-NEXT:    retl
367;
368; X64-LABEL: bs_and_rhs_bs32_multiuse1:
369; X64:       # %bb.0:
370; X64-NEXT:    bswapl %esi
371; X64-NEXT:    andl %edi, %esi
372; X64-NEXT:    movl %esi, %eax
373; X64-NEXT:    bswapl %eax
374; X64-NEXT:    imull %esi, %eax
375; X64-NEXT:    retq
376  %1 = tail call i32 @llvm.bswap.i32(i32 %b)
377  %2 = and i32 %1, %a
378  %3 = tail call i32 @llvm.bswap.i32(i32 %2)
379  %4 = mul i32 %2, %3 ;increase use of logical op
380  ret i32 %4
381}
382
383; negative test
384define i32 @bs_and_rhs_bs32_multiuse2(i32 %a, i32 %b) #0 {
385; X86-LABEL: bs_and_rhs_bs32_multiuse2:
386; X86:       # %bb.0:
387; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
388; X86-NEXT:    bswapl %ecx
389; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
390; X86-NEXT:    andl %ecx, %eax
391; X86-NEXT:    bswapl %eax
392; X86-NEXT:    imull %ecx, %eax
393; X86-NEXT:    retl
394;
395; X64-LABEL: bs_and_rhs_bs32_multiuse2:
396; X64:       # %bb.0:
397; X64-NEXT:    movl %edi, %eax
398; X64-NEXT:    bswapl %esi
399; X64-NEXT:    andl %esi, %eax
400; X64-NEXT:    bswapl %eax
401; X64-NEXT:    imull %esi, %eax
402; X64-NEXT:    retq
403  %1 = tail call i32 @llvm.bswap.i32(i32 %b)
404  %2 = and i32 %1, %a
405  %3 = tail call i32 @llvm.bswap.i32(i32 %2)
406  %4 = mul i32 %1, %3 ;increase use of inner bswap
407  ret i32 %4
408}
409
410; negative test
411define i64 @test_bswap64_shift17(i64 %a0) {
412; X86-LABEL: test_bswap64_shift17:
413; X86:       # %bb.0:
414; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
415; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
416; X86-NEXT:    shldl $17, %edx, %eax
417; X86-NEXT:    shll $17, %edx
418; X86-NEXT:    bswapl %eax
419; X86-NEXT:    bswapl %edx
420; X86-NEXT:    retl
421;
422; X64-LABEL: test_bswap64_shift17:
423; X64:       # %bb.0:
424; X64-NEXT:    movq %rdi, %rax
425; X64-NEXT:    shlq $17, %rax
426; X64-NEXT:    bswapq %rax
427; X64-NEXT:    retq
428  %s = shl i64 %a0, 17
429  %b = call i64 @llvm.bswap.i64(i64 %s)
430  ret i64 %b
431}
432
433; negative test
434define i64 @test_bswap64_shift48_multiuse(i64 %a0, ptr %a1) {
435; X86-LABEL: test_bswap64_shift48_multiuse:
436; X86:       # %bb.0:
437; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
438; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
439; X86-NEXT:    shll $16, %eax
440; X86-NEXT:    movl %eax, 4(%ecx)
441; X86-NEXT:    bswapl %eax
442; X86-NEXT:    movl %eax, (%ecx)
443; X86-NEXT:    xorl %edx, %edx
444; X86-NEXT:    retl
445;
446; X64-LABEL: test_bswap64_shift48_multiuse:
447; X64:       # %bb.0:
448; X64-NEXT:    shlq $48, %rdi
449; X64-NEXT:    movq %rdi, %rax
450; X64-NEXT:    bswapq %rax
451; X64-NEXT:    orq %rax, %rdi
452; X64-NEXT:    movq %rdi, (%rsi)
453; X64-NEXT:    retq
454  %s = shl i64 %a0, 48
455  %b = call i64 @llvm.bswap.i64(i64 %s)
456  %a = add i64 %s, %b
457  store i64 %a, ptr %a1
458  ret i64 %b
459}
460