xref: /llvm-project/llvm/test/CodeGen/X86/pmovext.ll (revision 2f448bf509432c1a19ec46ab8cbc7353c03c6280)
1; RUN: llc < %s -mtriple=x86_64-- -mcpu=corei7 | FileCheck %s
2
3; rdar://11897677
4
5;CHECK-LABEL: intrin_pmov:
6;CHECK: pmovzxbw  (%{{.*}}), %xmm0
7;CHECK-NEXT: movdqu
8;CHECK-NEXT: ret
9define void @intrin_pmov(ptr noalias %dest, ptr noalias %src) nounwind uwtable ssp {
10  %1 = load <2 x i64>, ptr %src, align 16
11  %2 = bitcast <2 x i64> %1 to <16 x i8>
12  %3 = tail call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %2) nounwind
13  %4 = bitcast <8 x i16> %3 to <16 x i8>
14  tail call void @llvm.x86.sse2.storeu.dq(ptr %dest, <16 x i8> %4) nounwind
15  ret void
16}
17
18declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
19declare void @llvm.x86.sse2.storeu.dq(ptr, <16 x i8>) nounwind
20
21; rdar://15245794
22
23define <4 x i32> @foo0(double %v.coerce) nounwind ssp {
24; CHECK-LABEL: foo0
25; CHECK: pmovzxwd %xmm0, %xmm0
26; CHECK-NEXT: ret
27  %tmp = bitcast double %v.coerce to <4 x i16>
28  %tmp1 = shufflevector <4 x i16> %tmp, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
29  %tmp2 = tail call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %tmp1) nounwind
30  ret <4 x i32> %tmp2
31}
32
33define <8 x i16> @foo1(double %v.coerce) nounwind ssp {
34; CHECK-LABEL: foo1
35; CHECK: pmovzxbw %xmm0, %xmm0
36; CHECK-NEXT: ret
37  %tmp = bitcast double %v.coerce to <8 x i8>
38  %tmp1 = shufflevector <8 x i8> %tmp, <8 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
39  %tmp2 = tail call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %tmp1)
40  ret <8 x i16> %tmp2
41}
42
43declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
44