xref: /llvm-project/llvm/test/CodeGen/X86/znver3-gather.ll (revision 189900eb149bb55ae3787346f57c1ccbdc50fb3c)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver3 | FileCheck %s --check-prefix=X64
3
4define <8 x i32> @simple(ptr %base, <8 x i32> %offsets) {
5; X64-LABEL: simple:
6; X64:       # %bb.0:
7; X64-NEXT:    vextracti128 $1, %ymm0, %xmm2
8; X64-NEXT:    vpmovsxdq %xmm0, %ymm0
9; X64-NEXT:    vmovq %rdi, %xmm1
10; X64-NEXT:    vpbroadcastq %xmm1, %ymm1
11; X64-NEXT:    vpmovsxdq %xmm2, %ymm2
12; X64-NEXT:    vpsllq $2, %ymm0, %ymm0
13; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
14; X64-NEXT:    vmovq %xmm0, %rax
15; X64-NEXT:    vpextrq $1, %xmm0, %rcx
16; X64-NEXT:    vextracti128 $1, %ymm0, %xmm0
17; X64-NEXT:    vpsllq $2, %ymm2, %ymm2
18; X64-NEXT:    vpaddq %ymm2, %ymm1, %ymm2
19; X64-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
20; X64-NEXT:    vpextrq $1, %xmm0, %rdx
21; X64-NEXT:    vmovq %xmm0, %rsi
22; X64-NEXT:    vextracti128 $1, %ymm2, %xmm0
23; X64-NEXT:    vmovq %xmm2, %rdi
24; X64-NEXT:    vpextrq $1, %xmm2, %r8
25; X64-NEXT:    vpinsrd $1, (%rcx), %xmm1, %xmm1
26; X64-NEXT:    vmovq %xmm0, %r9
27; X64-NEXT:    vpextrq $1, %xmm0, %r10
28; X64-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
29; X64-NEXT:    vpinsrd $2, (%rsi), %xmm1, %xmm1
30; X64-NEXT:    vpinsrd $1, (%r8), %xmm0, %xmm0
31; X64-NEXT:    vpinsrd $3, (%rdx), %xmm1, %xmm1
32; X64-NEXT:    vpinsrd $2, (%r9), %xmm0, %xmm0
33; X64-NEXT:    vpinsrd $3, (%r10), %xmm0, %xmm0
34; X64-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
35; X64-NEXT:    retq
36  %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %offsets
37  %wide.masked.gather = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
38  ret <8 x i32> %wide.masked.gather
39}
40
41define <8 x i32> @optsize(ptr %base, <8 x i32> %offsets) optsize {
42; X64-LABEL: optsize:
43; X64:       # %bb.0:
44; X64-NEXT:    vextracti128 $1, %ymm0, %xmm2
45; X64-NEXT:    vpmovsxdq %xmm0, %ymm0
46; X64-NEXT:    vmovq %rdi, %xmm1
47; X64-NEXT:    vpbroadcastq %xmm1, %ymm1
48; X64-NEXT:    vpmovsxdq %xmm2, %ymm2
49; X64-NEXT:    vpsllq $2, %ymm0, %ymm0
50; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
51; X64-NEXT:    vmovq %xmm0, %rax
52; X64-NEXT:    vpextrq $1, %xmm0, %rcx
53; X64-NEXT:    vextracti128 $1, %ymm0, %xmm0
54; X64-NEXT:    vpsllq $2, %ymm2, %ymm2
55; X64-NEXT:    vpaddq %ymm2, %ymm1, %ymm2
56; X64-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
57; X64-NEXT:    vpextrq $1, %xmm0, %rdx
58; X64-NEXT:    vmovq %xmm0, %rsi
59; X64-NEXT:    vextracti128 $1, %ymm2, %xmm0
60; X64-NEXT:    vmovq %xmm2, %rdi
61; X64-NEXT:    vpextrq $1, %xmm2, %r8
62; X64-NEXT:    vpinsrd $1, (%rcx), %xmm1, %xmm1
63; X64-NEXT:    vmovq %xmm0, %r9
64; X64-NEXT:    vpextrq $1, %xmm0, %r10
65; X64-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
66; X64-NEXT:    vpinsrd $2, (%rsi), %xmm1, %xmm1
67; X64-NEXT:    vpinsrd $1, (%r8), %xmm0, %xmm0
68; X64-NEXT:    vpinsrd $3, (%rdx), %xmm1, %xmm1
69; X64-NEXT:    vpinsrd $2, (%r9), %xmm0, %xmm0
70; X64-NEXT:    vpinsrd $3, (%r10), %xmm0, %xmm0
71; X64-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
72; X64-NEXT:    retq
73  %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %offsets
74  %wide.masked.gather = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
75  ret <8 x i32> %wide.masked.gather
76}
77
78define <8 x i32> @minsize(ptr %base, <8 x i32> %offsets) minsize {
79; X64-LABEL: minsize:
80; X64:       # %bb.0:
81; X64-NEXT:    vextracti128 $1, %ymm0, %xmm2
82; X64-NEXT:    vpmovsxdq %xmm0, %ymm0
83; X64-NEXT:    vmovq %rdi, %xmm1
84; X64-NEXT:    vpbroadcastq %xmm1, %ymm1
85; X64-NEXT:    vpmovsxdq %xmm2, %ymm2
86; X64-NEXT:    vpsllq $2, %ymm0, %ymm0
87; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
88; X64-NEXT:    vmovq %xmm0, %rax
89; X64-NEXT:    vpextrq $1, %xmm0, %rcx
90; X64-NEXT:    vextracti128 $1, %ymm0, %xmm0
91; X64-NEXT:    vpsllq $2, %ymm2, %ymm2
92; X64-NEXT:    vpaddq %ymm2, %ymm1, %ymm2
93; X64-NEXT:    vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
94; X64-NEXT:    vpextrq $1, %xmm0, %rdx
95; X64-NEXT:    vmovq %xmm0, %rsi
96; X64-NEXT:    vextracti128 $1, %ymm2, %xmm0
97; X64-NEXT:    vmovq %xmm2, %rdi
98; X64-NEXT:    vpextrq $1, %xmm2, %r8
99; X64-NEXT:    vpinsrd $1, (%rcx), %xmm1, %xmm1
100; X64-NEXT:    vmovq %xmm0, %r9
101; X64-NEXT:    vpextrq $1, %xmm0, %r10
102; X64-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
103; X64-NEXT:    vpinsrd $2, (%rsi), %xmm1, %xmm1
104; X64-NEXT:    vpinsrd $1, (%r8), %xmm0, %xmm0
105; X64-NEXT:    vpinsrd $3, (%rdx), %xmm1, %xmm1
106; X64-NEXT:    vpinsrd $2, (%r9), %xmm0, %xmm0
107; X64-NEXT:    vpinsrd $3, (%r10), %xmm0, %xmm0
108; X64-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0
109; X64-NEXT:    retq
110  %ptrs = getelementptr inbounds i32, ptr %base, <8 x i32> %offsets
111  %wide.masked.gather = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %ptrs, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
112  ret <8 x i32> %wide.masked.gather
113}
114
115declare <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr>, i32 immarg, <8 x i1>, <8 x i32>)
116