xref: /llvm-project/llvm/test/CodeGen/AArch64/aarch64-sve-asm.ll (revision 318c69de52b61d64d5ea113dc2e9f307f7fd4d51)
1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
2; RUN: llc < %s -mtriple aarch64-none-linux-gnu -mattr=+sve2p1 -stop-after=finalize-isel | FileCheck %s --check-prefix=CHECK
3
4target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
5target triple = "aarch64-none-linux-gnu"
6
7define <vscale x 16 x i8> @test_svadd_i8(<vscale x 16 x i8> %Zn, <vscale x 16 x i8> %Zm) {
8  ; CHECK-LABEL: name: test_svadd_i8
9  ; CHECK: bb.0 (%ir-block.0):
10  ; CHECK-NEXT:   liveins: $z0, $z1
11  ; CHECK-NEXT: {{  $}}
12  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:zpr = COPY $z1
13  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:zpr = COPY $z0
14  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:zpr = COPY [[COPY1]]
15  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:zpr_3b = COPY [[COPY]]
16  ; CHECK-NEXT:   INLINEASM &"add $0.b, $1.b, $2.b", 0 /* attdialect */, {{[0-9]+}} /* regdef:ZPR */, def %2, {{[0-9]+}} /* reguse:ZPR */, [[COPY2]], {{[0-9]+}} /* reguse:ZPR_3b */, [[COPY3]]
17  ; CHECK-NEXT:   $z0 = COPY %2
18  ; CHECK-NEXT:   RET_ReallyLR implicit $z0
19  %1 = tail call <vscale x 16 x i8> asm "add $0.b, $1.b, $2.b", "=w,w,y"(<vscale x 16 x i8> %Zn, <vscale x 16 x i8> %Zm)
20  ret <vscale x 16 x i8> %1
21}
22
23define <vscale x 2 x i64> @test_svsub_i64(<vscale x 2 x i64> %Zn, <vscale x 2 x i64> %Zm) {
24  ; CHECK-LABEL: name: test_svsub_i64
25  ; CHECK: bb.0 (%ir-block.0):
26  ; CHECK-NEXT:   liveins: $z0, $z1
27  ; CHECK-NEXT: {{  $}}
28  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:zpr = COPY $z1
29  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:zpr = COPY $z0
30  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:zpr = COPY [[COPY1]]
31  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:zpr_4b = COPY [[COPY]]
32  ; CHECK-NEXT:   INLINEASM &"sub $0.d, $1.d, $2.d", 0 /* attdialect */, {{[0-9]+}} /* regdef:ZPR */, def %2, {{[0-9]+}} /* reguse:ZPR */, [[COPY2]], {{[0-9]+}} /* reguse:ZPR_4b */, [[COPY3]]
33  ; CHECK-NEXT:   $z0 = COPY %2
34  ; CHECK-NEXT:   RET_ReallyLR implicit $z0
35  %1 = tail call <vscale x 2 x i64> asm "sub $0.d, $1.d, $2.d", "=w,w,x"(<vscale x 2 x i64> %Zn, <vscale x 2 x i64> %Zm)
36  ret <vscale x 2 x i64> %1
37}
38
39define <vscale x 8 x half> @test_svfmul_f16(<vscale x 8 x half> %Zn, <vscale x 8 x half> %Zm) {
40  ; CHECK-LABEL: name: test_svfmul_f16
41  ; CHECK: bb.0 (%ir-block.0):
42  ; CHECK-NEXT:   liveins: $z0, $z1
43  ; CHECK-NEXT: {{  $}}
44  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:zpr = COPY $z1
45  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:zpr = COPY $z0
46  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:zpr = COPY [[COPY1]]
47  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:zpr_3b = COPY [[COPY]]
48  ; CHECK-NEXT:   INLINEASM &"fmul $0.h, $1.h, $2.h", 0 /* attdialect */, {{[0-9]+}} /* regdef:ZPR */, def %2, {{[0-9]+}} /* reguse:ZPR */, [[COPY2]], {{[0-9]+}} /* reguse:ZPR_3b */, [[COPY3]]
49  ; CHECK-NEXT:   $z0 = COPY %2
50  ; CHECK-NEXT:   RET_ReallyLR implicit $z0
51  %1 = tail call <vscale x 8 x half> asm "fmul $0.h, $1.h, $2.h", "=w,w,y"(<vscale x 8 x half> %Zn, <vscale x 8 x half> %Zm)
52  ret <vscale x 8 x half> %1
53}
54
55define <vscale x 4 x float> @test_svfmul_f(<vscale x 4 x float> %Zn, <vscale x 4 x float> %Zm) {
56  ; CHECK-LABEL: name: test_svfmul_f
57  ; CHECK: bb.0 (%ir-block.0):
58  ; CHECK-NEXT:   liveins: $z0, $z1
59  ; CHECK-NEXT: {{  $}}
60  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:zpr = COPY $z1
61  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:zpr = COPY $z0
62  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:zpr = COPY [[COPY1]]
63  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:zpr_4b = COPY [[COPY]]
64  ; CHECK-NEXT:   INLINEASM &"fmul $0.s, $1.s, $2.s", 0 /* attdialect */, {{[0-9]+}} /* regdef:ZPR */, def %2, {{[0-9]+}} /* reguse:ZPR */, [[COPY2]], {{[0-9]+}} /* reguse:ZPR_4b */, [[COPY3]]
65  ; CHECK-NEXT:   $z0 = COPY %2
66  ; CHECK-NEXT:   RET_ReallyLR implicit $z0
67  %1 = tail call <vscale x 4 x float> asm "fmul $0.s, $1.s, $2.s", "=w,w,x"(<vscale x 4 x float> %Zn, <vscale x 4 x float> %Zm)
68  ret <vscale x 4 x float> %1
69}
70
71define <vscale x 8 x half> @test_svfadd_f16(<vscale x 16 x i1> %Pg, <vscale x 8 x half> %Zn, <vscale x 8 x half> %Zm) {
72  ; CHECK-LABEL: name: test_svfadd_f16
73  ; CHECK: bb.0 (%ir-block.0):
74  ; CHECK-NEXT:   liveins: $p0, $z0, $z1
75  ; CHECK-NEXT: {{  $}}
76  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:zpr = COPY $z1
77  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:zpr = COPY $z0
78  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:ppr = COPY $p0
79  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:ppr_3b = COPY [[COPY2]]
80  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:zpr = COPY [[COPY1]]
81  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:zpr = COPY [[COPY]]
82  ; CHECK-NEXT:   INLINEASM &"fadd $0.h, $1/m, $2.h, $3.h", 0 /* attdialect */, {{[0-9]+}} /* regdef:ZPR */, def %3, {{[0-9]+}} /* reguse:PPR_3b */, [[COPY3]], {{[0-9]+}} /* reguse:ZPR */, [[COPY4]], {{[0-9]+}} /* reguse:ZPR */, [[COPY5]]
83  ; CHECK-NEXT:   $z0 = COPY %3
84  ; CHECK-NEXT:   RET_ReallyLR implicit $z0
85  %1 = tail call <vscale x 8 x half> asm "fadd $0.h, $1/m, $2.h, $3.h", "=w,@3Upl,w,w"(<vscale x 16 x i1> %Pg, <vscale x 8 x half> %Zn, <vscale x 8 x half> %Zm)
86  ret <vscale x 8 x half> %1
87}
88
89define <vscale x 4 x i32> @test_incp(<vscale x 16 x i1> %Pg, <vscale x 4 x i32> %Zn) {
90  ; CHECK-LABEL: name: test_incp
91  ; CHECK: bb.0 (%ir-block.0):
92  ; CHECK-NEXT:   liveins: $p0, $z0
93  ; CHECK-NEXT: {{  $}}
94  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:zpr = COPY $z0
95  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:ppr = COPY $p0
96  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:ppr = COPY [[COPY1]]
97  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:zpr = COPY [[COPY]]
98  ; CHECK-NEXT:   INLINEASM &"incp $0.s, $1", 0 /* attdialect */, {{[0-9]+}} /* regdef:ZPR */, def %2, {{[0-9]+}} /* reguse:PPR */, [[COPY2]], {{[0-9]+}} /* reguse tiedto:$0 */, [[COPY3]](tied-def 3)
99  ; CHECK-NEXT:   $z0 = COPY %2
100  ; CHECK-NEXT:   RET_ReallyLR implicit $z0
101  %1 = tail call <vscale x 4 x i32> asm "incp $0.s, $1", "=w,@3Upa,0"(<vscale x 16 x i1> %Pg, <vscale x 4 x i32> %Zn)
102  ret <vscale x 4 x i32> %1
103}
104
105define <vscale x 8 x half> @test_svfadd_f16_Uph_constraint(<vscale x 16 x i1> %Pg, <vscale x 8 x half> %Zn, <vscale x 8 x half> %Zm) {
106  ; CHECK-LABEL: name: test_svfadd_f16_Uph_constraint
107  ; CHECK: bb.0 (%ir-block.0):
108  ; CHECK-NEXT:   liveins: $p0, $z0, $z1
109  ; CHECK-NEXT: {{  $}}
110  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:zpr = COPY $z1
111  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:zpr = COPY $z0
112  ; CHECK-NEXT:   [[COPY2:%[0-9]+]]:ppr = COPY $p0
113  ; CHECK-NEXT:   [[COPY3:%[0-9]+]]:ppr_p8to15 = COPY [[COPY2]]
114  ; CHECK-NEXT:   [[COPY4:%[0-9]+]]:zpr = COPY [[COPY1]]
115  ; CHECK-NEXT:   [[COPY5:%[0-9]+]]:zpr = COPY [[COPY]]
116  ; CHECK-NEXT:   INLINEASM &"fadd $0.h, $1/m, $2.h, $3.h", 0 /* attdialect */, {{[0-9]+}} /* regdef:ZPR */, def %3, {{[0-9]+}} /* reguse:PPR_p8to15 */, [[COPY3]], {{[0-9]+}} /* reguse:ZPR */, [[COPY4]], {{[0-9]+}} /* reguse:ZPR */, [[COPY5]]
117  ; CHECK-NEXT:   $z0 = COPY %3
118  ; CHECK-NEXT:   RET_ReallyLR implicit $z0
119  %1 = tail call <vscale x 8 x half> asm "fadd $0.h, $1/m, $2.h, $3.h", "=w,@3Uph,w,w"(<vscale x 16 x i1> %Pg, <vscale x 8 x half> %Zn, <vscale x 8 x half> %Zm)
120  ret <vscale x 8 x half> %1
121}
122
123define void @explicit_p0(ptr %p) {
124  ; CHECK-LABEL: name: explicit_p0
125  ; CHECK: bb.0 (%ir-block.0):
126  ; CHECK-NEXT:   liveins: $x0
127  ; CHECK-NEXT: {{  $}}
128  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gpr64 = COPY $x0
129  ; CHECK-NEXT:   [[PTRUE_B:%[0-9]+]]:ppr = PTRUE_B 31, implicit $vg
130  ; CHECK-NEXT:   $p0 = COPY [[PTRUE_B]]
131  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:gpr64common = COPY [[COPY]]
132  ; CHECK-NEXT:   INLINEASM &"ld4w { z0.s, z1.s, z2.s, z3.s }, $1/z, [$0]", 1 /* sideeffect attdialect */, {{[0-9]+}} /* regdef:GPR64common */, def %1, 9 /* reguse */, $p0, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3)
133  ; CHECK-NEXT:   RET_ReallyLR
134  %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.b8(i32 31)
135  %2 = tail call i64 asm sideeffect "ld4w { z0.s, z1.s, z2.s, z3.s }, $1/z, [$0]", "=r,{p0},0"(<vscale x 16 x i1> %1, ptr %p)
136  ret void
137}
138
139define void @explicit_p8_invalid(ptr %p) {
140  ; CHECK-LABEL: name: explicit_p8_invalid
141  ; CHECK: bb.0 (%ir-block.0):
142  ; CHECK-NEXT:   liveins: $x0
143  ; CHECK-NEXT: {{  $}}
144  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gpr64 = COPY $x0
145  ; CHECK-NEXT:   [[PTRUE_B:%[0-9]+]]:ppr = PTRUE_B 31, implicit $vg
146  ; CHECK-NEXT:   $p8 = COPY [[PTRUE_B]]
147  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:gpr64common = COPY [[COPY]]
148  ; CHECK-NEXT:   INLINEASM &"ld4w { z0.s, z1.s, z2.s, z3.s }, $1/z, [$0]", 1 /* sideeffect attdialect */, {{[0-9]+}} /* regdef:GPR64common */, def %1, 9 /* reguse */, $p8, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3)
149  ; CHECK-NEXT:   RET_ReallyLR
150  %1 = tail call <vscale x 16 x i1> @llvm.aarch64.sve.ptrue.b8(i32 31)
151  %2 = tail call i64 asm sideeffect "ld4w { z0.s, z1.s, z2.s, z3.s }, $1/z, [$0]", "=r,{p8},0"(<vscale x 16 x i1> %1, ptr %p)
152  ret void
153}
154
155define void @explicit_pn8(ptr %p) {
156  ; CHECK-LABEL: name: explicit_pn8
157  ; CHECK: bb.0 (%ir-block.0):
158  ; CHECK-NEXT:   liveins: $x0
159  ; CHECK-NEXT: {{  $}}
160  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gpr64 = COPY $x0
161  ; CHECK-NEXT:   [[PTRUE_C_B:%[0-9]+]]:pnr_p8to15 = PTRUE_C_B implicit $vg
162  ; CHECK-NEXT:   $pn8 = COPY [[PTRUE_C_B]]
163  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:gpr64common = COPY [[COPY]]
164  ; CHECK-NEXT:   INLINEASM &"ld1w { z0.s, z4.s, z8.s, z12.s }, $1/z, [$0]", 1 /* sideeffect attdialect */, {{[0-9]+}} /* regdef:GPR64common */, def %1, 9 /* reguse */, $pn8, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3)
165  ; CHECK-NEXT:   RET_ReallyLR
166  %1 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8()
167  %2 = tail call i64 asm sideeffect "ld1w { z0.s, z4.s, z8.s, z12.s }, $1/z, [$0]", "=r,{pn8},0"(target("aarch64.svcount") %1, ptr %p)
168  ret void
169}
170
171define void @explicit_pn0_invalid(ptr %p) {
172  ; CHECK-LABEL: name: explicit_pn0_invalid
173  ; CHECK: bb.0 (%ir-block.0):
174  ; CHECK-NEXT:   liveins: $x0
175  ; CHECK-NEXT: {{  $}}
176  ; CHECK-NEXT:   [[COPY:%[0-9]+]]:gpr64 = COPY $x0
177  ; CHECK-NEXT:   [[PTRUE_C_B:%[0-9]+]]:pnr_p8to15 = PTRUE_C_B implicit $vg
178  ; CHECK-NEXT:   $pn0 = COPY [[PTRUE_C_B]]
179  ; CHECK-NEXT:   [[COPY1:%[0-9]+]]:gpr64common = COPY [[COPY]]
180  ; CHECK-NEXT:   INLINEASM &"ld1w { z0.s, z4.s, z8.s, z12.s }, $1/z, [$0]", 1 /* sideeffect attdialect */, {{[0-9]+}} /* regdef:GPR64common */, def %1, 9 /* reguse */, $pn0, 2147483657 /* reguse tiedto:$0 */, [[COPY1]](tied-def 3)
181  ; CHECK-NEXT:   RET_ReallyLR
182  %1 = tail call target("aarch64.svcount") @llvm.aarch64.sve.ptrue.c8()
183  %2 = tail call i64 asm sideeffect "ld1w { z0.s, z4.s, z8.s, z12.s }, $1/z, [$0]", "=r,{pn0},0"(target("aarch64.svcount") %1, ptr %p)
184  ret void
185}
186