1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86 3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx | FileCheck %s --check-prefix=X64 4 5; These tests just check that the plumbing is in place for @llvm.bitreverse. The 6; actual output is massive at the moment as llvm.bitreverse is not yet legal. 7 8declare i32 @llvm.bitreverse.i32(i32) readnone 9declare i64 @llvm.bitreverse.i64(i64) readnone 10declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>) readnone 11declare i32 @llvm.bswap.i32(i32) readnone 12 13; fold (bitreverse undef) -> undef 14define i32 @test_undef() nounwind { 15; X86-LABEL: test_undef: 16; X86: # %bb.0: 17; X86-NEXT: retl 18; 19; X64-LABEL: test_undef: 20; X64: # %bb.0: 21; X64-NEXT: retq 22 %b = call i32 @llvm.bitreverse.i32(i32 undef) 23 ret i32 %b 24} 25 26; fold (bitreverse (bitreverse x)) -> x 27define i32 @test_bitreverse_bitreverse(i32 %a0) nounwind { 28; X86-LABEL: test_bitreverse_bitreverse: 29; X86: # %bb.0: 30; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 31; X86-NEXT: retl 32; 33; X64-LABEL: test_bitreverse_bitreverse: 34; X64: # %bb.0: 35; X64-NEXT: movl %edi, %eax 36; X64-NEXT: retq 37 %b = call i32 @llvm.bitreverse.i32(i32 %a0) 38 %c = call i32 @llvm.bitreverse.i32(i32 %b) 39 ret i32 %c 40} 41 42; fold (bitreverse(srl (bitreverse c), x)) -> (shl c, x) 43define i32 @test_bitreverse_srli_bitreverse(i32 %a0) nounwind { 44; X86-LABEL: test_bitreverse_srli_bitreverse: 45; X86: # %bb.0: 46; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 47; X86-NEXT: shll $7, %eax 48; X86-NEXT: retl 49; 50; X64-LABEL: test_bitreverse_srli_bitreverse: 51; X64: # %bb.0: 52; X64-NEXT: movl %edi, %eax 53; X64-NEXT: shll $7, %eax 54; X64-NEXT: retq 55 %b = call i32 @llvm.bitreverse.i32(i32 %a0) 56 %c = lshr i32 %b, 7 57 %d = call i32 @llvm.bitreverse.i32(i32 %c) 58 ret i32 %d 59} 60 61define i64 @test_bitreverse_srli_bitreverse_i64(i64 %a) nounwind { 62; X86-LABEL: test_bitreverse_srli_bitreverse_i64: 63; X86: # %bb.0: 64; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 65; X86-NEXT: addl %edx, %edx 66; X86-NEXT: xorl %eax, %eax 67; X86-NEXT: retl 68; 69; X64-LABEL: test_bitreverse_srli_bitreverse_i64: 70; X64: # %bb.0: 71; X64-NEXT: movq %rdi, %rax 72; X64-NEXT: shlq $33, %rax 73; X64-NEXT: retq 74 %1 = call i64 @llvm.bitreverse.i64(i64 %a) 75 %2 = lshr i64 %1, 33 76 %3 = call i64 @llvm.bitreverse.i64(i64 %2) 77 ret i64 %3 78} 79 80; fold (bitreverse(shl (bitreverse c), x)) -> (srl c, x) 81define i32 @test_bitreverse_shli_bitreverse(i32 %a0) nounwind { 82; X86-LABEL: test_bitreverse_shli_bitreverse: 83; X86: # %bb.0: 84; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 85; X86-NEXT: shrl $7, %eax 86; X86-NEXT: retl 87; 88; X64-LABEL: test_bitreverse_shli_bitreverse: 89; X64: # %bb.0: 90; X64-NEXT: movl %edi, %eax 91; X64-NEXT: shrl $7, %eax 92; X64-NEXT: retq 93 %b = call i32 @llvm.bitreverse.i32(i32 %a0) 94 %c = shl i32 %b, 7 95 %d = call i32 @llvm.bitreverse.i32(i32 %c) 96 ret i32 %d 97} 98 99define i64 @test_bitreverse_shli_bitreverse_i64(i64 %a) nounwind { 100; X86-LABEL: test_bitreverse_shli_bitreverse_i64: 101; X86: # %bb.0: 102; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 103; X86-NEXT: shrl %eax 104; X86-NEXT: xorl %edx, %edx 105; X86-NEXT: retl 106; 107; X64-LABEL: test_bitreverse_shli_bitreverse_i64: 108; X64: # %bb.0: 109; X64-NEXT: movq %rdi, %rax 110; X64-NEXT: shrq $33, %rax 111; X64-NEXT: retq 112 %1 = call i64 @llvm.bitreverse.i64(i64 %a) 113 %2 = shl i64 %1, 33 114 %3 = call i64 @llvm.bitreverse.i64(i64 %2) 115 ret i64 %3 116} 117 118define <4 x i32> @test_demandedbits_bitreverse(<4 x i32> %a0) nounwind { 119; X86-LABEL: test_demandedbits_bitreverse: 120; X86: # %bb.0: 121; X86-NEXT: pxor %xmm1, %xmm1 122; X86-NEXT: movdqa %xmm0, %xmm2 123; X86-NEXT: punpckhbw {{.*#+}} xmm2 = xmm2[8],xmm1[8],xmm2[9],xmm1[9],xmm2[10],xmm1[10],xmm2[11],xmm1[11],xmm2[12],xmm1[12],xmm2[13],xmm1[13],xmm2[14],xmm1[14],xmm2[15],xmm1[15] 124; X86-NEXT: pshuflw {{.*#+}} xmm2 = xmm2[3,2,1,0,4,5,6,7] 125; X86-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,7,6,5,4] 126; X86-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 127; X86-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[3,2,1,0,4,5,6,7] 128; X86-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,7,6,5,4] 129; X86-NEXT: packuswb %xmm2, %xmm0 130; X86-NEXT: movdqa %xmm0, %xmm1 131; X86-NEXT: psrlw $4, %xmm1 132; X86-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 133; X86-NEXT: pand %xmm2, %xmm1 134; X86-NEXT: pand %xmm2, %xmm0 135; X86-NEXT: psllw $4, %xmm0 136; X86-NEXT: por %xmm1, %xmm0 137; X86-NEXT: movdqa %xmm0, %xmm1 138; X86-NEXT: psrlw $2, %xmm1 139; X86-NEXT: movdqa {{.*#+}} xmm2 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 140; X86-NEXT: pand %xmm2, %xmm1 141; X86-NEXT: pand %xmm2, %xmm0 142; X86-NEXT: psllw $2, %xmm0 143; X86-NEXT: por %xmm1, %xmm0 144; X86-NEXT: movdqa %xmm0, %xmm1 145; X86-NEXT: psrlw $1, %xmm1 146; X86-NEXT: movdqa {{.*#+}} xmm2 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85] 147; X86-NEXT: pand %xmm2, %xmm1 148; X86-NEXT: pand %xmm2, %xmm0 149; X86-NEXT: paddb %xmm0, %xmm0 150; X86-NEXT: por %xmm1, %xmm0 151; X86-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 152; X86-NEXT: retl 153; 154; X64-LABEL: test_demandedbits_bitreverse: 155; X64: # %bb.0: 156; X64-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] 157; X64-NEXT: vbroadcastss {{.*#+}} xmm1 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 158; X64-NEXT: vpand %xmm1, %xmm0, %xmm2 159; X64-NEXT: vmovdqa {{.*#+}} xmm3 = [0,128,64,192,32,160,96,224,16,144,80,208,48,176,112,240] 160; X64-NEXT: vpshufb %xmm2, %xmm3, %xmm2 161; X64-NEXT: vpsrlw $4, %xmm0, %xmm0 162; X64-NEXT: vpand %xmm1, %xmm0, %xmm0 163; X64-NEXT: vmovdqa {{.*#+}} xmm1 = [0,8,4,12,2,10,6,14,1,9,5,13,3,11,7,15] 164; X64-NEXT: vpshufb %xmm0, %xmm1, %xmm0 165; X64-NEXT: vpor %xmm0, %xmm2, %xmm0 166; X64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 167; X64-NEXT: retq 168 %b = or <4 x i32> %a0, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648> 169 %c = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %b) 170 %d = and <4 x i32> %c, <i32 -2, i32 -2, i32 -2, i32 -2> 171 ret <4 x i32> %d 172} 173