1; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck --enable-var-scope --check-prefix=GCN %s 2 3; Check that vectorizer does not create slow misaligned loads 4 5; GCN-LABEL: {{^}}ds1align1: 6; GCN-COUNT-2: ds_read_u8 7; GCN-COUNT-2: ds_write_b8 8define amdgpu_kernel void @ds1align1(ptr addrspace(3) %in, ptr addrspace(3) %out) { 9 %val1 = load i8, ptr addrspace(3) %in, align 1 10 %gep1 = getelementptr i8, ptr addrspace(3) %in, i32 1 11 %val2 = load i8, ptr addrspace(3) %gep1, align 1 12 store i8 %val1, ptr addrspace(3) %out, align 1 13 %gep2 = getelementptr i8, ptr addrspace(3) %out, i32 1 14 store i8 %val2, ptr addrspace(3) %gep2, align 1 15 ret void 16} 17 18; GCN-LABEL: {{^}}ds2align2: 19; GCN-COUNT-2: ds_read_u16 20; GCN-COUNT-2: ds_write_b16 21define amdgpu_kernel void @ds2align2(ptr addrspace(3) %in, ptr addrspace(3) %out) { 22 %val1 = load i16, ptr addrspace(3) %in, align 2 23 %gep1 = getelementptr i16, ptr addrspace(3) %in, i32 1 24 %val2 = load i16, ptr addrspace(3) %gep1, align 2 25 store i16 %val1, ptr addrspace(3) %out, align 2 26 %gep2 = getelementptr i16, ptr addrspace(3) %out, i32 1 27 store i16 %val2, ptr addrspace(3) %gep2, align 2 28 ret void 29} 30 31; GCN-LABEL: {{^}}ds4align4: 32; GCN: ds_read2_b32 33; GCN: ds_write2_b32 34define amdgpu_kernel void @ds4align4(ptr addrspace(3) %in, ptr addrspace(3) %out) { 35 %val1 = load i32, ptr addrspace(3) %in, align 4 36 %gep1 = getelementptr i32, ptr addrspace(3) %in, i32 1 37 %val2 = load i32, ptr addrspace(3) %gep1, align 4 38 store i32 %val1, ptr addrspace(3) %out, align 4 39 %gep2 = getelementptr i32, ptr addrspace(3) %out, i32 1 40 store i32 %val2, ptr addrspace(3) %gep2, align 4 41 ret void 42} 43 44; GCN-LABEL: {{^}}ds8align8: 45; GCN: ds_read2_b64 46; GCN: ds_write2_b64 47define amdgpu_kernel void @ds8align8(ptr addrspace(3) %in, ptr addrspace(3) %out) { 48 %val1 = load i64, ptr addrspace(3) %in, align 8 49 %gep1 = getelementptr i64, ptr addrspace(3) %in, i64 1 50 %val2 = load i64, ptr addrspace(3) %gep1, align 8 51 store i64 %val1, ptr addrspace(3) %out, align 8 52 %gep2 = getelementptr i64, ptr addrspace(3) %out, i64 1 53 store i64 %val2, ptr addrspace(3) %gep2, align 8 54 ret void 55} 56 57; GCN-LABEL: {{^}}ds1align2: 58; GCN: ds_read_u16 59; GCN: ds_write_b16 60define amdgpu_kernel void @ds1align2(ptr addrspace(3) %in, ptr addrspace(3) %out) { 61 %val1 = load i8, ptr addrspace(3) %in, align 2 62 %gep1 = getelementptr i8, ptr addrspace(3) %in, i32 1 63 %val2 = load i8, ptr addrspace(3) %gep1, align 2 64 store i8 %val1, ptr addrspace(3) %out, align 2 65 %gep2 = getelementptr i8, ptr addrspace(3) %out, i32 1 66 store i8 %val2, ptr addrspace(3) %gep2, align 2 67 ret void 68} 69 70; GCN-LABEL: {{^}}ds2align4: 71; GCN: ds_read_b32 72; GCN: ds_write_b32 73define amdgpu_kernel void @ds2align4(ptr addrspace(3) %in, ptr addrspace(3) %out) { 74 %val1 = load i16, ptr addrspace(3) %in, align 4 75 %gep1 = getelementptr i16, ptr addrspace(3) %in, i32 1 76 %val2 = load i16, ptr addrspace(3) %gep1, align 4 77 store i16 %val1, ptr addrspace(3) %out, align 4 78 %gep2 = getelementptr i16, ptr addrspace(3) %out, i32 1 79 store i16 %val2, ptr addrspace(3) %gep2, align 4 80 ret void 81} 82 83; GCN-LABEL: {{^}}ds4align8: 84; GCN: ds_read_b64 85; GCN: ds_write_b64 86define amdgpu_kernel void @ds4align8(ptr addrspace(3) %in, ptr addrspace(3) %out) { 87 %val1 = load i32, ptr addrspace(3) %in, align 8 88 %gep1 = getelementptr i32, ptr addrspace(3) %in, i32 1 89 %val2 = load i32, ptr addrspace(3) %gep1, align 8 90 store i32 %val1, ptr addrspace(3) %out, align 8 91 %gep2 = getelementptr i32, ptr addrspace(3) %out, i32 1 92 store i32 %val2, ptr addrspace(3) %gep2, align 8 93 ret void 94} 95 96; GCN-LABEL: {{^}}ds8align16: 97; GCN: ds_read_b128 98; GCN: ds_write_b128 99define amdgpu_kernel void @ds8align16(ptr addrspace(3) %in, ptr addrspace(3) %out) { 100 %val1 = load i64, ptr addrspace(3) %in, align 16 101 %gep1 = getelementptr i64, ptr addrspace(3) %in, i64 1 102 %val2 = load i64, ptr addrspace(3) %gep1, align 16 103 store i64 %val1, ptr addrspace(3) %out, align 16 104 %gep2 = getelementptr i64, ptr addrspace(3) %out, i64 1 105 store i64 %val2, ptr addrspace(3) %gep2, align 16 106 ret void 107} 108