xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-masked-gather.ll (revision 1ee315ae7964c8433b772e0b5d667834994ba753)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
3
4define <vscale x 2 x i64> @masked_gather_nxv2i8(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %mask) {
5; CHECK-LABEL: masked_gather_nxv2i8:
6; CHECK:       // %bb.0:
7; CHECK-NEXT:    ld1b { z0.d }, p0/z, [z0.d]
8; CHECK-NEXT:    ret
9  %vals = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x ptr> %ptrs, i32 1, <vscale x 2 x i1> %mask, <vscale x 2 x i8> undef)
10  %vals.zext = zext <vscale x 2 x i8> %vals to <vscale x 2 x i64>
11  ret <vscale x 2 x i64> %vals.zext
12}
13
14define <vscale x 2 x i64> @masked_gather_nxv2i16(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %mask) {
15; CHECK-LABEL: masked_gather_nxv2i16:
16; CHECK:       // %bb.0:
17; CHECK-NEXT:    ld1h { z0.d }, p0/z, [z0.d]
18; CHECK-NEXT:    ret
19  %vals = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x ptr> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x i16> undef)
20  %vals.zext = zext <vscale x 2 x i16> %vals to <vscale x 2 x i64>
21  ret <vscale x 2 x i64> %vals.zext
22}
23
24define <vscale x 2 x i64> @masked_gather_nxv2i32(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %mask) {
25; CHECK-LABEL: masked_gather_nxv2i32:
26; CHECK:       // %bb.0:
27; CHECK-NEXT:    ld1w { z0.d }, p0/z, [z0.d]
28; CHECK-NEXT:    ret
29  %vals = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x ptr> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> undef)
30  %vals.zext = zext <vscale x 2 x i32> %vals to <vscale x 2 x i64>
31  ret <vscale x 2 x i64> %vals.zext
32}
33
34define <vscale x 2 x i64> @masked_gather_nxv2i64(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %mask) {
35; CHECK-LABEL: masked_gather_nxv2i64:
36; CHECK:       // %bb.0:
37; CHECK-NEXT:    ld1d { z0.d }, p0/z, [z0.d]
38; CHECK-NEXT:    ret
39  %vals = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64(<vscale x 2 x ptr> %ptrs, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x i64> undef)
40  ret <vscale x 2 x i64> %vals
41}
42
43define <vscale x 2 x half> @masked_gather_nxv2f16(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %mask) {
44; CHECK-LABEL: masked_gather_nxv2f16:
45; CHECK:       // %bb.0:
46; CHECK-NEXT:    ld1h { z0.d }, p0/z, [z0.d]
47; CHECK-NEXT:    ret
48  %vals = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16(<vscale x 2 x ptr> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x half> undef)
49  ret <vscale x 2 x half> %vals
50}
51
52define <vscale x 2 x bfloat> @masked_gather_nxv2bf16(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %mask) #0 {
53; CHECK-LABEL: masked_gather_nxv2bf16:
54; CHECK:       // %bb.0:
55; CHECK-NEXT:    ld1h { z0.d }, p0/z, [z0.d]
56; CHECK-NEXT:    ret
57  %vals = call <vscale x 2 x bfloat> @llvm.masked.gather.nxv2bf16(<vscale x 2 x ptr> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x bfloat> undef)
58  ret <vscale x 2 x bfloat> %vals
59}
60
61define <vscale x 2 x float> @masked_gather_nxv2f32(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %mask) {
62; CHECK-LABEL: masked_gather_nxv2f32:
63; CHECK:       // %bb.0:
64; CHECK-NEXT:    ld1w { z0.d }, p0/z, [z0.d]
65; CHECK-NEXT:    ret
66  %vals = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32(<vscale x 2 x ptr> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x float> undef)
67  ret <vscale x 2 x float> %vals
68}
69
70define <vscale x 2 x double> @masked_gather_nxv2f64(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %mask) {
71; CHECK-LABEL: masked_gather_nxv2f64:
72; CHECK:       // %bb.0:
73; CHECK-NEXT:    ld1d { z0.d }, p0/z, [z0.d]
74; CHECK-NEXT:    ret
75  %vals = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64(<vscale x 2 x ptr> %ptrs, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x double> undef)
76  ret <vscale x 2 x double> %vals
77}
78
79define <vscale x 2 x i64> @masked_sgather_nxv2i8(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %mask) {
80; CHECK-LABEL: masked_sgather_nxv2i8:
81; CHECK:       // %bb.0:
82; CHECK-NEXT:    ld1sb { z0.d }, p0/z, [z0.d]
83; CHECK-NEXT:    ret
84  %vals = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x ptr> %ptrs, i32 1, <vscale x 2 x i1> %mask, <vscale x 2 x i8> undef)
85  %vals.sext = sext <vscale x 2 x i8> %vals to <vscale x 2 x i64>
86  ret <vscale x 2 x i64> %vals.sext
87}
88
89define <vscale x 2 x i64> @masked_sgather_nxv2i16(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %mask) {
90; CHECK-LABEL: masked_sgather_nxv2i16:
91; CHECK:       // %bb.0:
92; CHECK-NEXT:    ld1sh { z0.d }, p0/z, [z0.d]
93; CHECK-NEXT:    ret
94  %vals = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x ptr> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x i16> undef)
95  %vals.sext = sext <vscale x 2 x i16> %vals to <vscale x 2 x i64>
96  ret <vscale x 2 x i64> %vals.sext
97}
98
99define <vscale x 2 x i64> @masked_sgather_nxv2i32(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %mask) {
100; CHECK-LABEL: masked_sgather_nxv2i32:
101; CHECK:       // %bb.0:
102; CHECK-NEXT:    ld1sw { z0.d }, p0/z, [z0.d]
103; CHECK-NEXT:    ret
104  %vals = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x ptr> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> undef)
105  %vals.sext = sext <vscale x 2 x i32> %vals to <vscale x 2 x i64>
106  ret <vscale x 2 x i64> %vals.sext
107}
108
109define <vscale x 2 x i64> @masked_gather_passthru(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %mask, <vscale x 2 x i32> %passthru) {
110; CHECK-LABEL: masked_gather_passthru:
111; CHECK:       // %bb.0:
112; CHECK-NEXT:    ld1sw { z0.d }, p0/z, [z0.d]
113; CHECK-NEXT:    sel z0.d, p0, z0.d, z1.d
114; CHECK-NEXT:    ret
115  %vals = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x ptr> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> %passthru)
116  %vals.sext = sext <vscale x 2 x i32> %vals to <vscale x 2 x i64>
117  ret <vscale x 2 x i64> %vals.sext
118}
119
120define <vscale x 2 x i64> @masked_gather_passthru_0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %mask) {
121; CHECK-LABEL: masked_gather_passthru_0:
122; CHECK:       // %bb.0:
123; CHECK-NEXT:    ld1sw { z0.d }, p0/z, [z0.d]
124; CHECK-NEXT:    ret
125  %vals = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x ptr> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> zeroinitializer)
126  %vals.sext = sext <vscale x 2 x i32> %vals to <vscale x 2 x i64>
127  ret <vscale x 2 x i64> %vals.sext
128}
129
130%i64_x3 = type { i64, i64, i64}
131define <vscale x 2 x i64> @masked_gather_non_power_of_two_based_scaling(ptr %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %mask) {
132; CHECK-LABEL: masked_gather_non_power_of_two_based_scaling:
133; CHECK:       // %bb.0:
134; CHECK-NEXT:    mul z0.d, z0.d, #24
135; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, z0.d]
136; CHECK-NEXT:    ret
137  %ptrs = getelementptr inbounds %i64_x3, ptr %base, <vscale x 2 x i64> %offsets
138  %vals = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64(<vscale x 2 x ptr> %ptrs, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x i64> undef)
139  ret <vscale x 2 x i64> %vals
140}
141
142%i64_x4 = type { i64, i64, i64, i64}
143define <vscale x 2 x i64> @masked_gather_non_element_type_based_scaling(ptr %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %mask) {
144; CHECK-LABEL: masked_gather_non_element_type_based_scaling:
145; CHECK:       // %bb.0:
146; CHECK-NEXT:    lsl z0.d, z0.d, #5
147; CHECK-NEXT:    ld1d { z0.d }, p0/z, [x0, z0.d]
148; CHECK-NEXT:    ret
149  %ptrs = getelementptr inbounds %i64_x4, ptr %base, <vscale x 2 x i64> %offsets
150  %vals = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64(<vscale x 2 x ptr> %ptrs, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x i64> undef)
151  ret <vscale x 2 x i64> %vals
152}
153
154declare <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x i8>)
155declare <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x i16>)
156declare <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x i32>)
157declare <vscale x 2 x i64> @llvm.masked.gather.nxv2i64(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x i64>)
158declare <vscale x 2 x half> @llvm.masked.gather.nxv2f16(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x half>)
159declare <vscale x 2 x bfloat> @llvm.masked.gather.nxv2bf16(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x bfloat>)
160declare <vscale x 2 x float> @llvm.masked.gather.nxv2f32(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x float>)
161declare <vscale x 2 x double> @llvm.masked.gather.nxv2f64(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x double>)
162attributes #0 = { "target-features"="+sve,+bf16" }
163