xref: /llvm-project/llvm/test/CodeGen/AMDGPU/ds-vectorization-alignment.ll (revision bdf2fbba9cee60b4b260ff17e4f44c475c11e715)
128eb9ed3SStanislav Mekhanoshin; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --enable-var-scope --check-prefix=GCN %s
228eb9ed3SStanislav Mekhanoshin
328eb9ed3SStanislav Mekhanoshin; Check that vectorizer does not create slow misaligned loads
428eb9ed3SStanislav Mekhanoshin
528eb9ed3SStanislav Mekhanoshin; GCN-LABEL: {{^}}ds1align1:
628eb9ed3SStanislav Mekhanoshin; GCN-COUNT-2: ds_read_u8
728eb9ed3SStanislav Mekhanoshin; GCN-COUNT-2: ds_write_b8
8*bdf2fbbaSNikita Popovdefine amdgpu_kernel void @ds1align1(ptr addrspace(3) %in, ptr addrspace(3) %out) {
9*bdf2fbbaSNikita Popov  %val1 = load i8, ptr addrspace(3) %in, align 1
10*bdf2fbbaSNikita Popov  %gep1 = getelementptr i8, ptr addrspace(3) %in, i32 1
11*bdf2fbbaSNikita Popov  %val2 = load i8, ptr addrspace(3) %gep1, align 1
12*bdf2fbbaSNikita Popov  store i8 %val1, ptr addrspace(3) %out, align 1
13*bdf2fbbaSNikita Popov  %gep2 = getelementptr i8, ptr addrspace(3) %out, i32 1
14*bdf2fbbaSNikita Popov  store i8 %val2, ptr addrspace(3) %gep2, align 1
1528eb9ed3SStanislav Mekhanoshin  ret void
1628eb9ed3SStanislav Mekhanoshin}
1728eb9ed3SStanislav Mekhanoshin
1828eb9ed3SStanislav Mekhanoshin; GCN-LABEL: {{^}}ds2align2:
1928eb9ed3SStanislav Mekhanoshin; GCN-COUNT-2: ds_read_u16
2028eb9ed3SStanislav Mekhanoshin; GCN-COUNT-2: ds_write_b16
21*bdf2fbbaSNikita Popovdefine amdgpu_kernel void @ds2align2(ptr addrspace(3) %in, ptr addrspace(3) %out) {
22*bdf2fbbaSNikita Popov  %val1 = load i16, ptr addrspace(3) %in, align 2
23*bdf2fbbaSNikita Popov  %gep1 = getelementptr i16, ptr addrspace(3) %in, i32 1
24*bdf2fbbaSNikita Popov  %val2 = load i16, ptr addrspace(3) %gep1, align 2
25*bdf2fbbaSNikita Popov  store i16 %val1, ptr addrspace(3) %out, align 2
26*bdf2fbbaSNikita Popov  %gep2 = getelementptr i16, ptr addrspace(3) %out, i32 1
27*bdf2fbbaSNikita Popov  store i16 %val2, ptr addrspace(3) %gep2, align 2
2828eb9ed3SStanislav Mekhanoshin  ret void
2928eb9ed3SStanislav Mekhanoshin}
3028eb9ed3SStanislav Mekhanoshin
3128eb9ed3SStanislav Mekhanoshin; GCN-LABEL: {{^}}ds4align4:
3228eb9ed3SStanislav Mekhanoshin; GCN: ds_read2_b32
3328eb9ed3SStanislav Mekhanoshin; GCN: ds_write2_b32
34*bdf2fbbaSNikita Popovdefine amdgpu_kernel void @ds4align4(ptr addrspace(3) %in, ptr addrspace(3) %out) {
35*bdf2fbbaSNikita Popov  %val1 = load i32, ptr addrspace(3) %in, align 4
36*bdf2fbbaSNikita Popov  %gep1 = getelementptr i32, ptr addrspace(3) %in, i32 1
37*bdf2fbbaSNikita Popov  %val2 = load i32, ptr addrspace(3) %gep1, align 4
38*bdf2fbbaSNikita Popov  store i32 %val1, ptr addrspace(3) %out, align 4
39*bdf2fbbaSNikita Popov  %gep2 = getelementptr i32, ptr addrspace(3) %out, i32 1
40*bdf2fbbaSNikita Popov  store i32 %val2, ptr addrspace(3) %gep2, align 4
4128eb9ed3SStanislav Mekhanoshin  ret void
4228eb9ed3SStanislav Mekhanoshin}
4328eb9ed3SStanislav Mekhanoshin
4428eb9ed3SStanislav Mekhanoshin; GCN-LABEL: {{^}}ds8align8:
4528eb9ed3SStanislav Mekhanoshin; GCN: ds_read2_b64
4628eb9ed3SStanislav Mekhanoshin; GCN: ds_write2_b64
47*bdf2fbbaSNikita Popovdefine amdgpu_kernel void @ds8align8(ptr addrspace(3) %in, ptr addrspace(3) %out) {
48*bdf2fbbaSNikita Popov  %val1 = load i64, ptr addrspace(3) %in, align 8
49*bdf2fbbaSNikita Popov  %gep1 = getelementptr i64, ptr addrspace(3) %in, i64 1
50*bdf2fbbaSNikita Popov  %val2 = load i64, ptr addrspace(3) %gep1, align 8
51*bdf2fbbaSNikita Popov  store i64 %val1, ptr addrspace(3) %out, align 8
52*bdf2fbbaSNikita Popov  %gep2 = getelementptr i64, ptr addrspace(3) %out, i64 1
53*bdf2fbbaSNikita Popov  store i64 %val2, ptr addrspace(3) %gep2, align 8
5428eb9ed3SStanislav Mekhanoshin  ret void
5528eb9ed3SStanislav Mekhanoshin}
5628eb9ed3SStanislav Mekhanoshin
5728eb9ed3SStanislav Mekhanoshin; GCN-LABEL: {{^}}ds1align2:
5828eb9ed3SStanislav Mekhanoshin; GCN: ds_read_u16
5928eb9ed3SStanislav Mekhanoshin; GCN: ds_write_b16
60*bdf2fbbaSNikita Popovdefine amdgpu_kernel void @ds1align2(ptr addrspace(3) %in, ptr addrspace(3) %out) {
61*bdf2fbbaSNikita Popov  %val1 = load i8, ptr addrspace(3) %in, align 2
62*bdf2fbbaSNikita Popov  %gep1 = getelementptr i8, ptr addrspace(3) %in, i32 1
63*bdf2fbbaSNikita Popov  %val2 = load i8, ptr addrspace(3) %gep1, align 2
64*bdf2fbbaSNikita Popov  store i8 %val1, ptr addrspace(3) %out, align 2
65*bdf2fbbaSNikita Popov  %gep2 = getelementptr i8, ptr addrspace(3) %out, i32 1
66*bdf2fbbaSNikita Popov  store i8 %val2, ptr addrspace(3) %gep2, align 2
6728eb9ed3SStanislav Mekhanoshin  ret void
6828eb9ed3SStanislav Mekhanoshin}
6928eb9ed3SStanislav Mekhanoshin
7028eb9ed3SStanislav Mekhanoshin; GCN-LABEL: {{^}}ds2align4:
7128eb9ed3SStanislav Mekhanoshin; GCN: ds_read_b32
7228eb9ed3SStanislav Mekhanoshin; GCN: ds_write_b32
73*bdf2fbbaSNikita Popovdefine amdgpu_kernel void @ds2align4(ptr addrspace(3) %in, ptr addrspace(3) %out) {
74*bdf2fbbaSNikita Popov  %val1 = load i16, ptr addrspace(3) %in, align 4
75*bdf2fbbaSNikita Popov  %gep1 = getelementptr i16, ptr addrspace(3) %in, i32 1
76*bdf2fbbaSNikita Popov  %val2 = load i16, ptr addrspace(3) %gep1, align 4
77*bdf2fbbaSNikita Popov  store i16 %val1, ptr addrspace(3) %out, align 4
78*bdf2fbbaSNikita Popov  %gep2 = getelementptr i16, ptr addrspace(3) %out, i32 1
79*bdf2fbbaSNikita Popov  store i16 %val2, ptr addrspace(3) %gep2, align 4
8028eb9ed3SStanislav Mekhanoshin  ret void
8128eb9ed3SStanislav Mekhanoshin}
8228eb9ed3SStanislav Mekhanoshin
8328eb9ed3SStanislav Mekhanoshin; GCN-LABEL: {{^}}ds4align8:
8428eb9ed3SStanislav Mekhanoshin; GCN: ds_read_b64
8528eb9ed3SStanislav Mekhanoshin; GCN: ds_write_b64
86*bdf2fbbaSNikita Popovdefine amdgpu_kernel void @ds4align8(ptr addrspace(3) %in, ptr addrspace(3) %out) {
87*bdf2fbbaSNikita Popov  %val1 = load i32, ptr addrspace(3) %in, align 8
88*bdf2fbbaSNikita Popov  %gep1 = getelementptr i32, ptr addrspace(3) %in, i32 1
89*bdf2fbbaSNikita Popov  %val2 = load i32, ptr addrspace(3) %gep1, align 8
90*bdf2fbbaSNikita Popov  store i32 %val1, ptr addrspace(3) %out, align 8
91*bdf2fbbaSNikita Popov  %gep2 = getelementptr i32, ptr addrspace(3) %out, i32 1
92*bdf2fbbaSNikita Popov  store i32 %val2, ptr addrspace(3) %gep2, align 8
9328eb9ed3SStanislav Mekhanoshin  ret void
9428eb9ed3SStanislav Mekhanoshin}
9528eb9ed3SStanislav Mekhanoshin
9628eb9ed3SStanislav Mekhanoshin; GCN-LABEL: {{^}}ds8align16:
9728eb9ed3SStanislav Mekhanoshin; GCN: ds_read_b128
9828eb9ed3SStanislav Mekhanoshin; GCN: ds_write_b128
99*bdf2fbbaSNikita Popovdefine amdgpu_kernel void @ds8align16(ptr addrspace(3) %in, ptr addrspace(3) %out) {
100*bdf2fbbaSNikita Popov  %val1 = load i64, ptr addrspace(3) %in, align 16
101*bdf2fbbaSNikita Popov  %gep1 = getelementptr i64, ptr addrspace(3) %in, i64 1
102*bdf2fbbaSNikita Popov  %val2 = load i64, ptr addrspace(3) %gep1, align 16
103*bdf2fbbaSNikita Popov  store i64 %val1, ptr addrspace(3) %out, align 16
104*bdf2fbbaSNikita Popov  %gep2 = getelementptr i64, ptr addrspace(3) %out, i64 1
105*bdf2fbbaSNikita Popov  store i64 %val2, ptr addrspace(3) %gep2, align 16
10628eb9ed3SStanislav Mekhanoshin  ret void
10728eb9ed3SStanislav Mekhanoshin}
108