xref: /llvm-project/llvm/test/CodeGen/AMDGPU/ds-vectorization-alignment.ll (revision bdf2fbba9cee60b4b260ff17e4f44c475c11e715)
1; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --enable-var-scope --check-prefix=GCN %s
2
3; Check that vectorizer does not create slow misaligned loads
4
5; GCN-LABEL: {{^}}ds1align1:
6; GCN-COUNT-2: ds_read_u8
7; GCN-COUNT-2: ds_write_b8
8define amdgpu_kernel void @ds1align1(ptr addrspace(3) %in, ptr addrspace(3) %out) {
9  %val1 = load i8, ptr addrspace(3) %in, align 1
10  %gep1 = getelementptr i8, ptr addrspace(3) %in, i32 1
11  %val2 = load i8, ptr addrspace(3) %gep1, align 1
12  store i8 %val1, ptr addrspace(3) %out, align 1
13  %gep2 = getelementptr i8, ptr addrspace(3) %out, i32 1
14  store i8 %val2, ptr addrspace(3) %gep2, align 1
15  ret void
16}
17
18; GCN-LABEL: {{^}}ds2align2:
19; GCN-COUNT-2: ds_read_u16
20; GCN-COUNT-2: ds_write_b16
21define amdgpu_kernel void @ds2align2(ptr addrspace(3) %in, ptr addrspace(3) %out) {
22  %val1 = load i16, ptr addrspace(3) %in, align 2
23  %gep1 = getelementptr i16, ptr addrspace(3) %in, i32 1
24  %val2 = load i16, ptr addrspace(3) %gep1, align 2
25  store i16 %val1, ptr addrspace(3) %out, align 2
26  %gep2 = getelementptr i16, ptr addrspace(3) %out, i32 1
27  store i16 %val2, ptr addrspace(3) %gep2, align 2
28  ret void
29}
30
31; GCN-LABEL: {{^}}ds4align4:
32; GCN: ds_read2_b32
33; GCN: ds_write2_b32
34define amdgpu_kernel void @ds4align4(ptr addrspace(3) %in, ptr addrspace(3) %out) {
35  %val1 = load i32, ptr addrspace(3) %in, align 4
36  %gep1 = getelementptr i32, ptr addrspace(3) %in, i32 1
37  %val2 = load i32, ptr addrspace(3) %gep1, align 4
38  store i32 %val1, ptr addrspace(3) %out, align 4
39  %gep2 = getelementptr i32, ptr addrspace(3) %out, i32 1
40  store i32 %val2, ptr addrspace(3) %gep2, align 4
41  ret void
42}
43
44; GCN-LABEL: {{^}}ds8align8:
45; GCN: ds_read2_b64
46; GCN: ds_write2_b64
47define amdgpu_kernel void @ds8align8(ptr addrspace(3) %in, ptr addrspace(3) %out) {
48  %val1 = load i64, ptr addrspace(3) %in, align 8
49  %gep1 = getelementptr i64, ptr addrspace(3) %in, i64 1
50  %val2 = load i64, ptr addrspace(3) %gep1, align 8
51  store i64 %val1, ptr addrspace(3) %out, align 8
52  %gep2 = getelementptr i64, ptr addrspace(3) %out, i64 1
53  store i64 %val2, ptr addrspace(3) %gep2, align 8
54  ret void
55}
56
57; GCN-LABEL: {{^}}ds1align2:
58; GCN: ds_read_u16
59; GCN: ds_write_b16
60define amdgpu_kernel void @ds1align2(ptr addrspace(3) %in, ptr addrspace(3) %out) {
61  %val1 = load i8, ptr addrspace(3) %in, align 2
62  %gep1 = getelementptr i8, ptr addrspace(3) %in, i32 1
63  %val2 = load i8, ptr addrspace(3) %gep1, align 2
64  store i8 %val1, ptr addrspace(3) %out, align 2
65  %gep2 = getelementptr i8, ptr addrspace(3) %out, i32 1
66  store i8 %val2, ptr addrspace(3) %gep2, align 2
67  ret void
68}
69
70; GCN-LABEL: {{^}}ds2align4:
71; GCN: ds_read_b32
72; GCN: ds_write_b32
73define amdgpu_kernel void @ds2align4(ptr addrspace(3) %in, ptr addrspace(3) %out) {
74  %val1 = load i16, ptr addrspace(3) %in, align 4
75  %gep1 = getelementptr i16, ptr addrspace(3) %in, i32 1
76  %val2 = load i16, ptr addrspace(3) %gep1, align 4
77  store i16 %val1, ptr addrspace(3) %out, align 4
78  %gep2 = getelementptr i16, ptr addrspace(3) %out, i32 1
79  store i16 %val2, ptr addrspace(3) %gep2, align 4
80  ret void
81}
82
83; GCN-LABEL: {{^}}ds4align8:
84; GCN: ds_read_b64
85; GCN: ds_write_b64
86define amdgpu_kernel void @ds4align8(ptr addrspace(3) %in, ptr addrspace(3) %out) {
87  %val1 = load i32, ptr addrspace(3) %in, align 8
88  %gep1 = getelementptr i32, ptr addrspace(3) %in, i32 1
89  %val2 = load i32, ptr addrspace(3) %gep1, align 8
90  store i32 %val1, ptr addrspace(3) %out, align 8
91  %gep2 = getelementptr i32, ptr addrspace(3) %out, i32 1
92  store i32 %val2, ptr addrspace(3) %gep2, align 8
93  ret void
94}
95
96; GCN-LABEL: {{^}}ds8align16:
97; GCN: ds_read_b128
98; GCN: ds_write_b128
99define amdgpu_kernel void @ds8align16(ptr addrspace(3) %in, ptr addrspace(3) %out) {
100  %val1 = load i64, ptr addrspace(3) %in, align 16
101  %gep1 = getelementptr i64, ptr addrspace(3) %in, i64 1
102  %val2 = load i64, ptr addrspace(3) %gep1, align 16
103  store i64 %val1, ptr addrspace(3) %out, align 16
104  %gep2 = getelementptr i64, ptr addrspace(3) %out, i64 1
105  store i64 %val2, ptr addrspace(3) %gep2, align 16
106  ret void
107}
108