xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-insert-vector-to-predicate-load.ll (revision ff302f850242b7f5e1fc48235471b8273c421236)
1; RUN: opt -S -aarch64-sve-intrinsic-opts < %s | FileCheck %s
2
3target triple = "aarch64-unknown-linux-gnu"
4
5define <vscale x 16 x i1> @pred_load_v2i8(ptr %addr) #0 {
6; CHECK-LABEL: @pred_load_v2i8(
7; CHECK-NEXT:    [[TMP2:%.*]] = load <vscale x 16 x i1>, ptr %addr
8; CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
9  %load = load <2 x i8>, ptr %addr, align 4
10  %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8> undef, <2 x i8> %load, i64 0)
11  %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
12  ret <vscale x 16 x i1> %ret
13}
14
15define <vscale x 16 x i1> @pred_load_v4i8(ptr %addr) #1 {
16; CHECK-LABEL: @pred_load_v4i8(
17; CHECK-NEXT:    [[TMP2:%.*]] = load <vscale x 16 x i1>, ptr %addr
18; CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
19  %load = load <4 x i8>, ptr %addr, align 4
20  %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> undef, <4 x i8> %load, i64 0)
21  %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
22  ret <vscale x 16 x i1> %ret
23}
24
25define <vscale x 16 x i1> @pred_load_v8i8(ptr %addr) #2 {
26; CHECK-LABEL: @pred_load_v8i8(
27; CHECK-NEXT:    [[TMP2:%.*]] = load <vscale x 16 x i1>, ptr %addr
28; CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
29  %load = load <8 x i8>, ptr %addr, align 4
30  %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8> undef, <8 x i8> %load, i64 0)
31  %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
32  ret <vscale x 16 x i1> %ret
33}
34
35; Ensure the insertion point is at the load
36define <vscale x 16 x i1> @pred_load_insertion_point(ptr %addr) #0 {
37; CHECK-LABEL: @pred_load_insertion_point(
38; CHECK-NEXT:  entry:
39; CHECK-NEXT:    [[TMP2:%.*]] = load <vscale x 16 x i1>, ptr %addr
40; CHECK-NEXT:    br label %bb1
41; CHECK:       bb1:
42; CHECK-NEXT:    ret <vscale x 16 x i1> [[TMP2]]
43entry:
44  %load = load <2 x i8>, ptr %addr, align 4
45  br label %bb1
46
47bb1:
48  %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8> undef, <2 x i8> %load, i64 0)
49  %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
50  ret <vscale x 16 x i1> %ret
51}
52
53; Check that too small of a vscale prevents optimization
54define <vscale x 16 x i1> @pred_load_neg1(ptr %addr) #0 {
55; CHECK-LABEL: @pred_load_neg1(
56; CHECK:         call <vscale x 2 x i8> @llvm.vector.insert
57  %load = load <4 x i8>, ptr %addr, align 4
58  %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> undef, <4 x i8> %load, i64 0)
59  %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
60  ret <vscale x 16 x i1> %ret
61}
62
63; Check that too large of a vscale prevents optimization
64define <vscale x 16 x i1> @pred_load_neg2(ptr %addr) #2 {
65; CHECK-LABEL: @pred_load_neg2(
66; CHECK:         call <vscale x 2 x i8> @llvm.vector.insert
67  %load = load <4 x i8>, ptr %addr, align 4
68  %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> undef, <4 x i8> %load, i64 0)
69  %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
70  ret <vscale x 16 x i1> %ret
71}
72
73; Check that a non-zero index prevents optimization
74define <vscale x 16 x i1> @pred_load_neg3(ptr %addr) #1 {
75; CHECK-LABEL: @pred_load_neg3(
76; CHECK:         call <vscale x 2 x i8> @llvm.vector.insert
77  %load = load <4 x i8>, ptr %addr, align 4
78  %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> undef, <4 x i8> %load, i64 4)
79  %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
80  ret <vscale x 16 x i1> %ret
81}
82
83; Check that differing vscale min/max prevents optimization
84define <vscale x 16 x i1> @pred_load_neg4(ptr %addr) #3 {
85; CHECK-LABEL: @pred_load_neg4(
86; CHECK:         call <vscale x 2 x i8> @llvm.vector.insert
87  %load = load <4 x i8>, ptr %addr, align 4
88  %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> undef, <4 x i8> %load, i64 0)
89  %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
90  ret <vscale x 16 x i1> %ret
91}
92
93; Check that insertion into a non-undef vector prevents optimization
94define <vscale x 16 x i1> @pred_load_neg5(ptr %addr, <vscale x 2 x i8> %passthru) #1 {
95; CHECK-LABEL: @pred_load_neg5(
96; CHECK:         call <vscale x 2 x i8> @llvm.vector.insert
97  %load = load <4 x i8>, ptr %addr, align 4
98  %insert = tail call <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8> %passthru, <4 x i8> %load, i64 0)
99  %ret = bitcast <vscale x 2 x i8> %insert to <vscale x 16 x i1>
100  ret <vscale x 16 x i1> %ret
101}
102
103declare <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v2i8(<vscale x 2 x i8>, <2 x i8>, i64)
104declare <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v4i8(<vscale x 2 x i8>, <4 x i8>, i64)
105declare <vscale x 2 x i8> @llvm.vector.insert.nxv2i8.v8i8(<vscale x 2 x i8>, <8 x i8>, i64)
106
107attributes #0 = { "target-features"="+sve" vscale_range(1,1) }
108attributes #1 = { "target-features"="+sve" vscale_range(2,2) }
109attributes #2 = { "target-features"="+sve" vscale_range(4,4) }
110attributes #3 = { "target-features"="+sve" vscale_range(2,4) }
111