xref: /llvm-project/llvm/test/CodeGen/AArch64/arm64-copy-tuple.ll (revision 5ddce70ef0e5a641d7fea95e31fc5e2439cb98cb)
1; RUN: llc -mtriple=arm64-apple-ios -o - %s | FileCheck %s
2
3; The main purpose of this test is to find out whether copyPhysReg can deal with
4; the memmove-like situation arising in tuples, where an early copy can clobber
5; the value needed by a later one if the tuples overlap.
6
7; We use dummy inline asm to force LLVM to generate a COPY between the registers
8; we want by clobbering all the others.
9
10define void @test_D1D2_from_D0D1(ptr %addr) #0 {
11; CHECK-LABEL: test_D1D2_from_D0D1:
12; CHECK: mov.8b v2, v1
13; CHECK: mov.8b v1, v0
14entry:
15  %vec = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0(ptr %addr)
16  %vec0 = extractvalue { <8 x i8>, <8 x i8> } %vec, 0
17  %vec1 = extractvalue { <8 x i8>, <8 x i8> } %vec, 1
18  tail call void asm sideeffect "", "~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
19  tail call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> %vec0, <8 x i8> %vec1, ptr %addr)
20
21  tail call void asm sideeffect "", "~{v0},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
22  tail call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> %vec0, <8 x i8> %vec1, ptr %addr)
23  ret void
24}
25
26define void @test_D0D1_from_D1D2(ptr %addr) #0 {
27; CHECK-LABEL: test_D0D1_from_D1D2:
28; CHECK: mov.8b v0, v1
29; CHECK: mov.8b v1, v2
30entry:
31  %vec = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0(ptr %addr)
32  %vec0 = extractvalue { <8 x i8>, <8 x i8> } %vec, 0
33  %vec1 = extractvalue { <8 x i8>, <8 x i8> } %vec, 1
34  tail call void asm sideeffect "", "~{v0},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
35  tail call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> %vec0, <8 x i8> %vec1, ptr %addr)
36
37  tail call void asm sideeffect "", "~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
38  tail call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> %vec0, <8 x i8> %vec1, ptr %addr)
39  ret void
40}
41
42define void @test_D0D1_from_D31D0(ptr %addr) #0 {
43; CHECK-LABEL: test_D0D1_from_D31D0:
44; CHECK: mov.8b v1, v0
45; CHECK: mov.8b v0, v31
46entry:
47  %vec = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0(ptr %addr)
48  %vec0 = extractvalue { <8 x i8>, <8 x i8> } %vec, 0
49  %vec1 = extractvalue { <8 x i8>, <8 x i8> } %vec, 1
50  tail call void asm sideeffect "", "~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30}"()
51  tail call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> %vec0, <8 x i8> %vec1, ptr %addr)
52
53  tail call void asm sideeffect "", "~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
54  tail call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> %vec0, <8 x i8> %vec1, ptr %addr)
55  ret void
56}
57
58define void @test_D31D0_from_D0D1(ptr %addr) #0 {
59; CHECK-LABEL: test_D31D0_from_D0D1:
60; CHECK: mov.8b v31, v0
61; CHECK: mov.8b v0, v1
62entry:
63  %vec = tail call { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0(ptr %addr)
64  %vec0 = extractvalue { <8 x i8>, <8 x i8> } %vec, 0
65  %vec1 = extractvalue { <8 x i8>, <8 x i8> } %vec, 1
66  tail call void asm sideeffect "", "~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
67  tail call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> %vec0, <8 x i8> %vec1, ptr %addr)
68
69  tail call void asm sideeffect "", "~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30}"()
70  tail call void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8> %vec0, <8 x i8> %vec1, ptr %addr)
71  ret void
72}
73
74define void @test_D2D3D4_from_D0D1D2(ptr %addr) #0 {
75; CHECK-LABEL: test_D2D3D4_from_D0D1D2:
76; CHECK: mov.8b v4, v2
77; CHECK: mov.8b v3, v1
78; CHECK: mov.8b v2, v0
79entry:
80  %vec = tail call { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0(ptr %addr)
81  %vec0 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vec, 0
82  %vec1 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vec, 1
83  %vec2 = extractvalue { <8 x i8>, <8 x i8>, <8 x i8> } %vec, 2
84
85  tail call void asm sideeffect "", "~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
86  tail call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> %vec0, <8 x i8> %vec1, <8 x i8> %vec2, ptr %addr)
87
88  tail call void asm sideeffect "", "~{v0},~{v1},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
89  tail call void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8> %vec0, <8 x i8> %vec1, <8 x i8> %vec2, ptr %addr)
90  ret void
91}
92
93define void @test_Q0Q1Q2_from_Q1Q2Q3(ptr %addr) #0 {
94; CHECK-LABEL: test_Q0Q1Q2_from_Q1Q2Q3:
95; CHECK: mov.16b v0, v1
96; CHECK: mov.16b v1, v2
97; CHECK: mov.16b v2, v3
98entry:
99  %vec = tail call { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0(ptr %addr)
100  %vec0 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vec, 0
101  %vec1 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vec, 1
102  %vec2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8> } %vec, 2
103  tail call void asm sideeffect "", "~{v0},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
104  tail call void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8> %vec0, <16 x i8> %vec1, <16 x i8> %vec2, ptr %addr)
105
106  tail call void asm sideeffect "", "~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
107  tail call void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8> %vec0, <16 x i8> %vec1, <16 x i8> %vec2, ptr %addr)
108  ret void
109}
110
111define void @test_Q1Q2Q3Q4_from_Q30Q31Q0Q1(ptr %addr) #0 {
112; CHECK-LABEL: test_Q1Q2Q3Q4_from_Q30Q31Q0Q1:
113; CHECK: mov.16b v4, v1
114; CHECK: mov.16b v3, v0
115; CHECK: mov.16b v2, v31
116; CHECK: mov.16b v1, v30
117  %vec = tail call { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0(ptr %addr)
118  %vec0 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vec, 0
119  %vec1 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vec, 1
120  %vec2 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vec, 2
121  %vec3 = extractvalue { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } %vec, 3
122
123  tail call void asm sideeffect "", "~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29}"()
124  tail call void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8> %vec0, <16 x i8> %vec1, <16 x i8> %vec2, <16 x i8> %vec3, ptr %addr)
125
126  tail call void asm sideeffect "", "~{v0},~{v5},~{v6},~{v7},~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15},~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23},~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"()
127  tail call void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8> %vec0, <16 x i8> %vec1, <16 x i8> %vec2, <16 x i8> %vec3, ptr %addr)
128  ret void
129}
130
131declare { <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld2.v8i8.p0(ptr)
132declare { <8 x i8>, <8 x i8>, <8 x i8> } @llvm.aarch64.neon.ld3.v8i8.p0(ptr)
133declare { <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld3.v16i8.p0(ptr)
134declare { <16 x i8>, <16 x i8>, <16 x i8>, <16 x i8> } @llvm.aarch64.neon.ld4.v16i8.p0(ptr)
135
136declare void @llvm.aarch64.neon.st2.v8i8.p0(<8 x i8>, <8 x i8>, ptr)
137declare void @llvm.aarch64.neon.st3.v8i8.p0(<8 x i8>, <8 x i8>, <8 x i8>, ptr)
138declare void @llvm.aarch64.neon.st3.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, ptr)
139declare void @llvm.aarch64.neon.st4.v16i8.p0(<16 x i8>, <16 x i8>, <16 x i8>, <16 x i8>, ptr)
140