1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s --check-prefix=X64 3 4; Verify that we can look through a ZERO_EXTEND/ANY_EXTEND when doing 5; ComputeNumSignBits for SHL. 6; We use the (sshlsat x, c) -> (shl x, c) fold as verification. 7; That fold should happen if c is less than the number of sign bits in x 8 9define void @computeNumSignBits_shl_zext_1(i8 %x, ptr %p) nounwind { 10; X64-LABEL: computeNumSignBits_shl_zext_1: 11; X64: # %bb.0: 12; X64-NEXT: sarb $5, %dil 13; X64-NEXT: movzbl %dil, %eax 14; X64-NEXT: movl %eax, %ecx 15; X64-NEXT: shll $11, %ecx 16; X64-NEXT: movw %cx, (%rsi) 17; X64-NEXT: movl %eax, %ecx 18; X64-NEXT: shll $12, %ecx 19; X64-NEXT: movw %cx, (%rsi) 20; X64-NEXT: shll $13, %eax 21; X64-NEXT: movw %ax, (%rsi) 22; X64-NEXT: retq 23 %ashr = ashr i8 %x, 5 24 %zext = zext i8 %ashr to i16 25 %nsb4 = shl i16 %zext, 10 26 ; Expecting (sshlsat x, c) -> (shl x, c) fold. 27 %tmp1 = call i16 @llvm.sshl.sat.i16(i16 %nsb4, i16 1) 28 store volatile i16 %tmp1, ptr %p 29 ; Expecting (sshlsat x, c) -> (shl x, c) fold. 30 %tmp2 = call i16 @llvm.sshl.sat.i16(i16 %nsb4, i16 2) 31 store volatile i16 %tmp2, ptr %p 32 ; Expecting (sshlsat x, c) -> (shl x, c) fold. 33 %tmp3 = call i16 @llvm.sshl.sat.i16(i16 %nsb4, i16 3) 34 store volatile i16 %tmp3, ptr %p 35 ret void 36} 37 38define void @computeNumSignBits_shl_zext_2(i8 %x, ptr %p) nounwind { 39; X64-LABEL: computeNumSignBits_shl_zext_2: 40; X64: # %bb.0: 41; X64-NEXT: sarb $5, %dil 42; X64-NEXT: movzbl %dil, %eax 43; X64-NEXT: movl %eax, %ecx 44; X64-NEXT: shll $10, %ecx 45; X64-NEXT: xorl %edx, %edx 46; X64-NEXT: testw %cx, %cx 47; X64-NEXT: sets %dl 48; X64-NEXT: addl $32767, %edx # imm = 0x7FFF 49; X64-NEXT: shll $14, %eax 50; X64-NEXT: movswl %ax, %edi 51; X64-NEXT: shrl $4, %edi 52; X64-NEXT: cmpw %di, %cx 53; X64-NEXT: cmovnel %edx, %eax 54; X64-NEXT: movw %ax, (%rsi) 55; X64-NEXT: retq 56 %ashr = ashr i8 %x, 5 57 %zext = zext i8 %ashr to i16 58 %nsb4 = shl i16 %zext, 10 59 ; 4 sign bits. Not expecting (sshlsat x, c) -> (shl x, c) fold. 60 %tmp4 = call i16 @llvm.sshl.sat.i16(i16 %nsb4, i16 4) 61 store volatile i16 %tmp4, ptr %p 62 ret void 63} 64 65define void @computeNumSignBits_shl_zext_vec_1(<2 x i8> %x, ptr %p) nounwind { 66; X64-LABEL: computeNumSignBits_shl_zext_vec_1: 67; X64: # %bb.0: 68; X64-NEXT: psrlw $5, %xmm0 69; X64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 70; X64-NEXT: movdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4] 71; X64-NEXT: pxor %xmm1, %xmm0 72; X64-NEXT: psubb %xmm1, %xmm0 73; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 74; X64-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [2048,8192,u,u,u,u,u,u] 75; X64-NEXT: movd %xmm0, (%rdi) 76; X64-NEXT: retq 77 %ashr = ashr <2 x i8> %x, <i8 5, i8 5> 78 %zext = zext <2 x i8> %ashr to <2 x i16> 79 %nsb4_2 = shl <2 x i16> %zext, <i16 10, i16 12> 80 ; Expecting (sshlsat x, c) -> (shl x, c) fold. 81 %tmp1 = call <2 x i16> @llvm.sshl.sat.v2i16(<2 x i16> %nsb4_2, <2 x i16> <i16 1, i16 1>) 82 store volatile <2 x i16> %tmp1, ptr %p 83 ret void 84} 85 86define void @computeNumSignBits_shl_zext_vec_2(<2 x i8> %x, ptr %p) nounwind { 87; X64-LABEL: computeNumSignBits_shl_zext_vec_2: 88; X64: # %bb.0: 89; X64-NEXT: psrlw $5, %xmm0 90; X64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 91; X64-NEXT: movdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4] 92; X64-NEXT: pxor %xmm1, %xmm0 93; X64-NEXT: psubb %xmm1, %xmm0 94; X64-NEXT: pxor %xmm1, %xmm1 95; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 96; X64-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [1024,4096,u,u,u,u,u,u] 97; X64-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] 98; X64-NEXT: pand %xmm0, %xmm2 99; X64-NEXT: pcmpgtw %xmm0, %xmm1 100; X64-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 101; X64-NEXT: por %xmm2, %xmm1 102; X64-NEXT: movdqa %xmm0, %xmm2 103; X64-NEXT: psllw $2, %xmm2 104; X64-NEXT: movdqa %xmm2, %xmm3 105; X64-NEXT: psraw $2, %xmm3 106; X64-NEXT: pcmpeqw %xmm0, %xmm3 107; X64-NEXT: movdqa %xmm3, %xmm0 108; X64-NEXT: pandn %xmm1, %xmm0 109; X64-NEXT: pand %xmm2, %xmm3 110; X64-NEXT: por %xmm0, %xmm3 111; X64-NEXT: movd %xmm3, (%rdi) 112; X64-NEXT: retq 113 %ashr = ashr <2 x i8> %x, <i8 5, i8 5> 114 %zext = zext <2 x i8> %ashr to <2 x i16> 115 %nsb4_2 = shl <2 x i16> %zext, <i16 10, i16 12> 116 ; Not expecting (sshlsat x, c) -> (shl x, c) fold. 117 ; Because only 2 sign bits in element 1. 118 %tmp1 = call <2 x i16> @llvm.sshl.sat.v2i16(<2 x i16> %nsb4_2, <2 x i16> <i16 2, i16 2>) 119 store volatile <2 x i16> %tmp1, ptr %p 120 ret void 121} 122 123define void @computeNumSignBits_shl_zext_vec_3(<2 x i8> %x, ptr %p) nounwind { 124; X64-LABEL: computeNumSignBits_shl_zext_vec_3: 125; X64: # %bb.0: 126; X64-NEXT: psrlw $5, %xmm0 127; X64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 128; X64-NEXT: movdqa {{.*#+}} xmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4] 129; X64-NEXT: pxor %xmm1, %xmm0 130; X64-NEXT: psubb %xmm1, %xmm0 131; X64-NEXT: pxor %xmm1, %xmm1 132; X64-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 133; X64-NEXT: pmullw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 # [16384,4096,u,u,u,u,u,u] 134; X64-NEXT: movdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] 135; X64-NEXT: pand %xmm0, %xmm2 136; X64-NEXT: pcmpgtw %xmm0, %xmm1 137; X64-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 138; X64-NEXT: por %xmm2, %xmm1 139; X64-NEXT: movdqa %xmm0, %xmm2 140; X64-NEXT: paddw %xmm0, %xmm2 141; X64-NEXT: movdqa %xmm2, %xmm3 142; X64-NEXT: psraw $1, %xmm3 143; X64-NEXT: pcmpeqw %xmm0, %xmm3 144; X64-NEXT: movdqa %xmm3, %xmm0 145; X64-NEXT: pandn %xmm1, %xmm0 146; X64-NEXT: pand %xmm2, %xmm3 147; X64-NEXT: por %xmm0, %xmm3 148; X64-NEXT: movd %xmm3, (%rdi) 149; X64-NEXT: retq 150 %ashr = ashr <2 x i8> %x, <i8 5, i8 5> 151 %zext = zext <2 x i8> %ashr to <2 x i16> 152 %nsb1_2 = shl <2 x i16> %zext, <i16 14, i16 12> 153 ; Not expecting (sshlsat x, c) -> (shl x, c) fold. 154 ; Because all sign bits shifted out for element 0 155 %tmp1 = call <2 x i16> @llvm.sshl.sat.v2i16(<2 x i16> %nsb1_2, <2 x i16> <i16 1, i16 1>) 156 store volatile <2 x i16> %tmp1, ptr %p 157 ret void 158} 159