1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+bmi,+bmi2,+sse,+sse2,+avx,+avx2 | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2,+sse,+sse2,+avx,+avx2 | FileCheck %s --check-prefixes=CHECK,X64 4 5; If we have a shift by sign-extended value, we can replace sign-extension 6; with zero-extension. 7 8define i32 @t0_shl(i32 %x, i8 %shamt) nounwind { 9; X86-LABEL: t0_shl: 10; X86: # %bb.0: 11; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 12; X86-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax 13; X86-NEXT: retl 14; 15; X64-LABEL: t0_shl: 16; X64: # %bb.0: 17; X64-NEXT: shlxl %esi, %edi, %eax 18; X64-NEXT: retq 19 %shamt_wide = sext i8 %shamt to i32 20 %r = shl i32 %x, %shamt_wide 21 ret i32 %r 22} 23define i32 @t1_lshr(i32 %x, i8 %shamt) nounwind { 24; X86-LABEL: t1_lshr: 25; X86: # %bb.0: 26; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 27; X86-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %eax 28; X86-NEXT: retl 29; 30; X64-LABEL: t1_lshr: 31; X64: # %bb.0: 32; X64-NEXT: shrxl %esi, %edi, %eax 33; X64-NEXT: retq 34 %shamt_wide = sext i8 %shamt to i32 35 %r = lshr i32 %x, %shamt_wide 36 ret i32 %r 37} 38define i32 @t2_ashr(i32 %x, i8 %shamt) nounwind { 39; X86-LABEL: t2_ashr: 40; X86: # %bb.0: 41; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 42; X86-NEXT: sarxl %eax, {{[0-9]+}}(%esp), %eax 43; X86-NEXT: retl 44; 45; X64-LABEL: t2_ashr: 46; X64: # %bb.0: 47; X64-NEXT: sarxl %esi, %edi, %eax 48; X64-NEXT: retq 49 %shamt_wide = sext i8 %shamt to i32 50 %r = ashr i32 %x, %shamt_wide 51 ret i32 %r 52} 53 54define <4 x i32> @t3_vec_shl(<4 x i32> %x, <4 x i8> %shamt) nounwind { 55; CHECK-LABEL: t3_vec_shl: 56; CHECK: # %bb.0: 57; CHECK-NEXT: vpmovsxbd %xmm1, %xmm1 58; CHECK-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 59; CHECK-NEXT: ret{{[l|q]}} 60 %shamt_wide = sext <4 x i8> %shamt to <4 x i32> 61 %r = shl <4 x i32> %x, %shamt_wide 62 ret <4 x i32> %r 63} 64define <4 x i32> @t4_vec_lshr(<4 x i32> %x, <4 x i8> %shamt) nounwind { 65; CHECK-LABEL: t4_vec_lshr: 66; CHECK: # %bb.0: 67; CHECK-NEXT: vpmovsxbd %xmm1, %xmm1 68; CHECK-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 69; CHECK-NEXT: ret{{[l|q]}} 70 %shamt_wide = sext <4 x i8> %shamt to <4 x i32> 71 %r = lshr <4 x i32> %x, %shamt_wide 72 ret <4 x i32> %r 73} 74define <4 x i32> @t5_vec_ashr(<4 x i32> %x, <4 x i8> %shamt) nounwind { 75; CHECK-LABEL: t5_vec_ashr: 76; CHECK: # %bb.0: 77; CHECK-NEXT: vpmovsxbd %xmm1, %xmm1 78; CHECK-NEXT: vpsravd %xmm1, %xmm0, %xmm0 79; CHECK-NEXT: ret{{[l|q]}} 80 %shamt_wide = sext <4 x i8> %shamt to <4 x i32> 81 %r = ashr <4 x i32> %x, %shamt_wide 82 ret <4 x i32> %r 83} 84 85; This is not valid for funnel shifts 86declare i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) 87declare i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c) 88define i32 @n6_fshl(i32 %x, i32 %y, i8 %shamt) nounwind { 89; X86-LABEL: n6_fshl: 90; X86: # %bb.0: 91; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 92; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 93; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 94; X86-NEXT: shldl %cl, %edx, %eax 95; X86-NEXT: retl 96; 97; X64-LABEL: n6_fshl: 98; X64: # %bb.0: 99; X64-NEXT: movl %edx, %ecx 100; X64-NEXT: movl %edi, %eax 101; X64-NEXT: # kill: def $cl killed $cl killed $ecx 102; X64-NEXT: shldl %cl, %esi, %eax 103; X64-NEXT: retq 104 %shamt_wide = sext i8 %shamt to i32 105 %r = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %shamt_wide) 106 ret i32 %r 107} 108define i32 @n7_fshr(i32 %x, i32 %y, i8 %shamt) nounwind { 109; X86-LABEL: n7_fshr: 110; X86: # %bb.0: 111; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 112; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 113; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx 114; X86-NEXT: shrdl %cl, %edx, %eax 115; X86-NEXT: retl 116; 117; X64-LABEL: n7_fshr: 118; X64: # %bb.0: 119; X64-NEXT: movl %edx, %ecx 120; X64-NEXT: movl %esi, %eax 121; X64-NEXT: # kill: def $cl killed $cl killed $ecx 122; X64-NEXT: shrdl %cl, %edi, %eax 123; X64-NEXT: retq 124 %shamt_wide = sext i8 %shamt to i32 125 %r = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %shamt_wide) 126 ret i32 %r 127} 128 129define i32 @n8_extrause(i32 %x, i8 %shamt, ptr %shamt_wide_store) nounwind { 130; X86-LABEL: n8_extrause: 131; X86: # %bb.0: 132; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax 133; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 134; X86-NEXT: movl %eax, (%ecx) 135; X86-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax 136; X86-NEXT: retl 137; 138; X64-LABEL: n8_extrause: 139; X64: # %bb.0: 140; X64-NEXT: movsbl %sil, %eax 141; X64-NEXT: movl %eax, (%rdx) 142; X64-NEXT: shlxl %eax, %edi, %eax 143; X64-NEXT: retq 144 %shamt_wide = sext i8 %shamt to i32 145 store i32 %shamt_wide, ptr %shamt_wide_store, align 4 146 %r = shl i32 %x, %shamt_wide 147 ret i32 %r 148} 149