1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X86 3; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+sse2 | FileCheck %s --check-prefix=X64 4 5; test vector shifts converted to proper SSE2 vector shifts when the shift 6; amounts are the same. 7 8define void @shift1a(<2 x i64> %val, ptr %dst) nounwind { 9; X86-LABEL: shift1a: 10; X86: # %bb.0: # %entry 11; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 12; X86-NEXT: psrlq $32, %xmm0 13; X86-NEXT: movdqa %xmm0, (%eax) 14; X86-NEXT: retl 15; 16; X64-LABEL: shift1a: 17; X64: # %bb.0: # %entry 18; X64-NEXT: psrlq $32, %xmm0 19; X64-NEXT: movdqa %xmm0, (%rdi) 20; X64-NEXT: retq 21entry: 22 %lshr = lshr <2 x i64> %val, < i64 32, i64 32 > 23 store <2 x i64> %lshr, ptr %dst 24 ret void 25} 26 27define void @shift1b(<2 x i64> %val, ptr %dst, i64 %amt) nounwind { 28; X86-LABEL: shift1b: 29; X86: # %bb.0: # %entry 30; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 31; X86-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 32; X86-NEXT: psrlq %xmm1, %xmm0 33; X86-NEXT: movdqa %xmm0, (%eax) 34; X86-NEXT: retl 35; 36; X64-LABEL: shift1b: 37; X64: # %bb.0: # %entry 38; X64-NEXT: movq %rsi, %xmm1 39; X64-NEXT: psrlq %xmm1, %xmm0 40; X64-NEXT: movdqa %xmm0, (%rdi) 41; X64-NEXT: retq 42entry: 43 %0 = insertelement <2 x i64> undef, i64 %amt, i32 0 44 %1 = insertelement <2 x i64> %0, i64 %amt, i32 1 45 %lshr = lshr <2 x i64> %val, %1 46 store <2 x i64> %lshr, ptr %dst 47 ret void 48} 49 50define void @shift2a(<4 x i32> %val, ptr %dst) nounwind { 51; X86-LABEL: shift2a: 52; X86: # %bb.0: # %entry 53; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 54; X86-NEXT: psrld $17, %xmm0 55; X86-NEXT: movdqa %xmm0, (%eax) 56; X86-NEXT: retl 57; 58; X64-LABEL: shift2a: 59; X64: # %bb.0: # %entry 60; X64-NEXT: psrld $17, %xmm0 61; X64-NEXT: movdqa %xmm0, (%rdi) 62; X64-NEXT: retq 63entry: 64 %lshr = lshr <4 x i32> %val, < i32 17, i32 17, i32 17, i32 17 > 65 store <4 x i32> %lshr, ptr %dst 66 ret void 67} 68 69define void @shift2b(<4 x i32> %val, ptr %dst, i32 %amt) nounwind { 70; X86-LABEL: shift2b: 71; X86: # %bb.0: # %entry 72; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 73; X86-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 74; X86-NEXT: psrld %xmm1, %xmm0 75; X86-NEXT: movdqa %xmm0, (%eax) 76; X86-NEXT: retl 77; 78; X64-LABEL: shift2b: 79; X64: # %bb.0: # %entry 80; X64-NEXT: movd %esi, %xmm1 81; X64-NEXT: psrld %xmm1, %xmm0 82; X64-NEXT: movdqa %xmm0, (%rdi) 83; X64-NEXT: retq 84entry: 85 %0 = insertelement <4 x i32> undef, i32 %amt, i32 0 86 %1 = insertelement <4 x i32> %0, i32 %amt, i32 1 87 %2 = insertelement <4 x i32> %1, i32 %amt, i32 2 88 %3 = insertelement <4 x i32> %2, i32 %amt, i32 3 89 %lshr = lshr <4 x i32> %val, %3 90 store <4 x i32> %lshr, ptr %dst 91 ret void 92} 93 94 95define void @shift3a(<8 x i16> %val, ptr %dst) nounwind { 96; X86-LABEL: shift3a: 97; X86: # %bb.0: # %entry 98; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 99; X86-NEXT: psrlw $5, %xmm0 100; X86-NEXT: movdqa %xmm0, (%eax) 101; X86-NEXT: retl 102; 103; X64-LABEL: shift3a: 104; X64: # %bb.0: # %entry 105; X64-NEXT: psrlw $5, %xmm0 106; X64-NEXT: movdqa %xmm0, (%rdi) 107; X64-NEXT: retq 108entry: 109 %lshr = lshr <8 x i16> %val, < i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5 > 110 store <8 x i16> %lshr, ptr %dst 111 ret void 112} 113 114; properly zero extend the shift amount 115define void @shift3b(<8 x i16> %val, ptr %dst, i16 %amt) nounwind { 116; X86-LABEL: shift3b: 117; X86: # %bb.0: # %entry 118; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 119; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx 120; X86-NEXT: movd %ecx, %xmm1 121; X86-NEXT: psrlw %xmm1, %xmm0 122; X86-NEXT: movdqa %xmm0, (%eax) 123; X86-NEXT: retl 124; 125; X64-LABEL: shift3b: 126; X64: # %bb.0: # %entry 127; X64-NEXT: movzwl %si, %eax 128; X64-NEXT: movd %eax, %xmm1 129; X64-NEXT: psrlw %xmm1, %xmm0 130; X64-NEXT: movdqa %xmm0, (%rdi) 131; X64-NEXT: retq 132entry: 133 %0 = insertelement <8 x i16> undef, i16 %amt, i32 0 134 %1 = insertelement <8 x i16> %0, i16 %amt, i32 1 135 %2 = insertelement <8 x i16> %1, i16 %amt, i32 2 136 %3 = insertelement <8 x i16> %2, i16 %amt, i32 3 137 %4 = insertelement <8 x i16> %3, i16 %amt, i32 4 138 %5 = insertelement <8 x i16> %4, i16 %amt, i32 5 139 %6 = insertelement <8 x i16> %5, i16 %amt, i32 6 140 %7 = insertelement <8 x i16> %6, i16 %amt, i32 7 141 %lshr = lshr <8 x i16> %val, %7 142 store <8 x i16> %lshr, ptr %dst 143 ret void 144} 145