xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/merge-buffer-stores.ll (revision 63fe80fb18c9660e678d24c184021841cb02d82f)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -global-isel -mtriple=amdgcn -verify-machineinstrs -o - %s | FileCheck %s
3
4define amdgpu_cs void @test1(i32 %arg1, <4 x i32> inreg %arg2, i32, ptr addrspace(6) inreg %arg3) {
5; CHECK-LABEL: test1:
6; CHECK:       ; %bb.0: ; %.entry
7; CHECK-NEXT:    v_and_b32_e32 v3, 0x3ffffffc, v0
8; CHECK-NEXT:    v_mov_b32_e32 v0, 11
9; CHECK-NEXT:    v_mov_b32_e32 v1, 22
10; CHECK-NEXT:    v_mov_b32_e32 v2, 33
11; CHECK-NEXT:    v_lshlrev_b32_e32 v3, 2, v3
12; CHECK-NEXT:    v_add_i32_e32 v4, vcc, s4, v3
13; CHECK-NEXT:    v_mov_b32_e32 v3, 44
14; CHECK-NEXT:    buffer_store_dwordx4 v[0:3], v4, s[0:3], 0 offen
15; CHECK-NEXT:    s_endpgm
16.entry:
17  %bs1 = and i32 %arg1, 1073741820
18  %ep1 = getelementptr i32, ptr addrspace(6) %arg3, i32 %bs1
19  %ad1 = ptrtoint ptr addrspace(6) %ep1 to i32
20  call void @llvm.amdgcn.raw.buffer.store.i32(i32 11, <4 x i32> %arg2, i32 %ad1, i32 0, i32 0)
21
22  %bs2 = or disjoint i32 %bs1, 1
23  %ep2 = getelementptr i32, ptr addrspace(6) %arg3, i32 %bs2
24  %ad2 = ptrtoint ptr addrspace(6) %ep2 to i32
25  call void @llvm.amdgcn.raw.buffer.store.i32(i32 22, <4 x i32> %arg2, i32 %ad2, i32 0, i32 0)
26
27  %bs3 = or disjoint i32 %bs1, 2
28  %ep3 = getelementptr i32, ptr addrspace(6) %arg3, i32 %bs3
29  %ad3 = ptrtoint ptr addrspace(6) %ep3 to i32
30  call void @llvm.amdgcn.raw.buffer.store.i32(i32 33, <4 x i32> %arg2, i32 %ad3, i32 0, i32 0)
31
32  %bs4 = or disjoint i32 %bs1, 3
33  %ep4 = getelementptr i32, ptr addrspace(6) %arg3, i32 %bs4
34  %ad4 = ptrtoint ptr addrspace(6) %ep4 to i32
35  call void @llvm.amdgcn.raw.buffer.store.i32(i32 44, <4 x i32> %arg2, i32 %ad4, i32 0, i32 0)
36
37  ret void
38}
39
40define amdgpu_cs void @test1_ptr(i32 %arg1, ptr addrspace(8) inreg %arg2, i32, ptr addrspace(6) inreg %arg3) {
41; CHECK-LABEL: test1_ptr:
42; CHECK:       ; %bb.0: ; %.entry
43; CHECK-NEXT:    v_and_b32_e32 v3, 0x3ffffffc, v0
44; CHECK-NEXT:    v_mov_b32_e32 v0, 11
45; CHECK-NEXT:    v_mov_b32_e32 v1, 22
46; CHECK-NEXT:    v_mov_b32_e32 v2, 33
47; CHECK-NEXT:    v_lshlrev_b32_e32 v3, 2, v3
48; CHECK-NEXT:    v_add_i32_e32 v4, vcc, s4, v3
49; CHECK-NEXT:    v_mov_b32_e32 v3, 44
50; CHECK-NEXT:    buffer_store_dwordx4 v[0:3], v4, s[0:3], 0 offen
51; CHECK-NEXT:    s_endpgm
52.entry:
53  %bs1 = and i32 %arg1, 1073741820
54  %ep1 = getelementptr i32, ptr addrspace(6) %arg3, i32 %bs1
55  %ad1 = ptrtoint ptr addrspace(6) %ep1 to i32
56  call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 11, ptr addrspace(8) %arg2, i32 %ad1, i32 0, i32 0)
57
58  %bs2 = or disjoint i32 %bs1, 1
59  %ep2 = getelementptr i32, ptr addrspace(6) %arg3, i32 %bs2
60  %ad2 = ptrtoint ptr addrspace(6) %ep2 to i32
61  call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 22, ptr addrspace(8) %arg2, i32 %ad2, i32 0, i32 0)
62
63  %bs3 = or disjoint i32 %bs1, 2
64  %ep3 = getelementptr i32, ptr addrspace(6) %arg3, i32 %bs3
65  %ad3 = ptrtoint ptr addrspace(6) %ep3 to i32
66  call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 33, ptr addrspace(8) %arg2, i32 %ad3, i32 0, i32 0)
67
68  %bs4 = or disjoint i32 %bs1, 3
69  %ep4 = getelementptr i32, ptr addrspace(6) %arg3, i32 %bs4
70  %ad4 = ptrtoint ptr addrspace(6) %ep4 to i32
71  call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 44, ptr addrspace(8) %arg2, i32 %ad4, i32 0, i32 0)
72
73  ret void
74}
75
76define amdgpu_cs void @test2(i32 %arg1, <4 x i32> inreg %arg2) {
77; CHECK-LABEL: test2:
78; CHECK:       ; %bb.0: ; %.entry
79; CHECK-NEXT:    v_and_b32_e32 v3, 0x3ffffffc, v0
80; CHECK-NEXT:    v_mov_b32_e32 v0, 11
81; CHECK-NEXT:    v_mov_b32_e32 v1, 22
82; CHECK-NEXT:    v_mov_b32_e32 v2, 33
83; CHECK-NEXT:    v_lshlrev_b32_e32 v4, 2, v3
84; CHECK-NEXT:    v_mov_b32_e32 v3, 44
85; CHECK-NEXT:    buffer_store_dwordx4 v[0:3], v4, s[0:3], 0 offen
86; CHECK-NEXT:    s_endpgm
87.entry:
88  %bs1 = and i32 %arg1, 1073741820
89  %ep1 = getelementptr <{ [64 x i32] }>, ptr addrspace(6) null, i32 0, i32 0, i32 %bs1
90  %ad1 = ptrtoint ptr addrspace(6) %ep1 to i32
91  call void @llvm.amdgcn.raw.buffer.store.i32(i32 11, <4 x i32> %arg2, i32 %ad1, i32 0, i32 0)
92
93  %bs2 = or disjoint i32 %bs1, 1
94  %ep2 = getelementptr <{ [64 x i32] }>, ptr addrspace(6) null, i32 0, i32 0, i32 %bs2
95  %ad2 = ptrtoint ptr addrspace(6) %ep2 to i32
96  call void @llvm.amdgcn.raw.buffer.store.i32(i32 22, <4 x i32> %arg2, i32 %ad2, i32 0, i32 0)
97
98  %bs3 = or disjoint i32 %bs1, 2
99  %ep3 = getelementptr <{ [64 x i32] }>, ptr addrspace(6) null, i32 0, i32 0, i32 %bs3
100  %ad3 = ptrtoint ptr addrspace(6) %ep3 to i32
101  call void @llvm.amdgcn.raw.buffer.store.i32(i32 33, <4 x i32> %arg2, i32 %ad3, i32 0, i32 0)
102
103  %bs4 = or disjoint i32 %bs1, 3
104  %ep4 = getelementptr <{ [64 x i32] }>, ptr addrspace(6) null, i32 0, i32 0, i32 %bs4
105  %ad4 = ptrtoint ptr addrspace(6) %ep4 to i32
106  call void @llvm.amdgcn.raw.buffer.store.i32(i32 44, <4 x i32> %arg2, i32 %ad4, i32 0, i32 0)
107
108  ret void
109}
110
111define amdgpu_cs void @test2_ptr(i32 %arg1, ptr addrspace(8) inreg %arg2) {
112; CHECK-LABEL: test2_ptr:
113; CHECK:       ; %bb.0: ; %.entry
114; CHECK-NEXT:    v_and_b32_e32 v3, 0x3ffffffc, v0
115; CHECK-NEXT:    v_mov_b32_e32 v0, 11
116; CHECK-NEXT:    v_mov_b32_e32 v1, 22
117; CHECK-NEXT:    v_mov_b32_e32 v2, 33
118; CHECK-NEXT:    v_lshlrev_b32_e32 v4, 2, v3
119; CHECK-NEXT:    v_mov_b32_e32 v3, 44
120; CHECK-NEXT:    buffer_store_dwordx4 v[0:3], v4, s[0:3], 0 offen
121; CHECK-NEXT:    s_endpgm
122.entry:
123  %bs1 = and i32 %arg1, 1073741820
124  %ep1 = getelementptr <{ [64 x i32] }>, ptr addrspace(6) null, i32 0, i32 0, i32 %bs1
125  %ad1 = ptrtoint ptr addrspace(6) %ep1 to i32
126  call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 11, ptr addrspace(8) %arg2, i32 %ad1, i32 0, i32 0)
127
128  %bs2 = or disjoint i32 %bs1, 1
129  %ep2 = getelementptr <{ [64 x i32] }>, ptr addrspace(6) null, i32 0, i32 0, i32 %bs2
130  %ad2 = ptrtoint ptr addrspace(6) %ep2 to i32
131  call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 22, ptr addrspace(8) %arg2, i32 %ad2, i32 0, i32 0)
132
133  %bs3 = or disjoint i32 %bs1, 2
134  %ep3 = getelementptr <{ [64 x i32] }>, ptr addrspace(6) null, i32 0, i32 0, i32 %bs3
135  %ad3 = ptrtoint ptr addrspace(6) %ep3 to i32
136  call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 33, ptr addrspace(8) %arg2, i32 %ad3, i32 0, i32 0)
137
138  %bs4 = or disjoint i32 %bs1, 3
139  %ep4 = getelementptr <{ [64 x i32] }>, ptr addrspace(6) null, i32 0, i32 0, i32 %bs4
140  %ad4 = ptrtoint ptr addrspace(6) %ep4 to i32
141  call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 44, ptr addrspace(8) %arg2, i32 %ad4, i32 0, i32 0)
142
143  ret void
144}
145
146declare void @llvm.amdgcn.raw.buffer.store.i32(i32, <4 x i32>, i32, i32, i32 immarg)
147declare void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32, ptr addrspace(8), i32, i32, i32 immarg)
148