1; RUN: opt -S -mtriple=amdgcn-- -data-layout=A5 -passes='amdgpu-promote-alloca,sroa,instcombine' < %s | FileCheck -check-prefix=OPT %s 2 3; Show that what the alloca promotion pass will do for non-atomic load/store. 4 5; OPT-LABEL: @vector_alloca_not_atomic( 6; 7; OPT: extractelement <3 x i32> <i32 0, i32 1, i32 2>, i64 %index 8define amdgpu_kernel void @vector_alloca_not_atomic(ptr addrspace(1) %out, i64 %index) { 9entry: 10 %alloca = alloca [3 x i32], addrspace(5) 11 %a1 = getelementptr [3 x i32], ptr addrspace(5) %alloca, i32 0, i32 1 12 %a2 = getelementptr [3 x i32], ptr addrspace(5) %alloca, i32 0, i32 2 13 store i32 0, ptr addrspace(5) %alloca 14 store i32 1, ptr addrspace(5) %a1 15 store i32 2, ptr addrspace(5) %a2 16 %tmp = getelementptr [3 x i32], ptr addrspace(5) %alloca, i64 0, i64 %index 17 %data = load i32, ptr addrspace(5) %tmp 18 store i32 %data, ptr addrspace(1) %out 19 ret void 20} 21 22; OPT-LABEL: @vector_alloca_atomic_read( 23; 24; OPT: alloca [3 x i32] 25; OPT: store i32 0, ptr addrspace(5) 26; OPT: store i32 1, ptr addrspace(5) 27; OPT: store i32 2, ptr addrspace(5) 28; OPT: load atomic i32, ptr addrspace(5) 29define amdgpu_kernel void @vector_alloca_atomic_read(ptr addrspace(1) %out, i64 %index) { 30entry: 31 %alloca = alloca [3 x i32], addrspace(5) 32 %a1 = getelementptr [3 x i32], ptr addrspace(5) %alloca, i32 0, i32 1 33 %a2 = getelementptr [3 x i32], ptr addrspace(5) %alloca, i32 0, i32 2 34 store i32 0, ptr addrspace(5) %alloca 35 store i32 1, ptr addrspace(5) %a1 36 store i32 2, ptr addrspace(5) %a2 37 %tmp = getelementptr [3 x i32], ptr addrspace(5) %alloca, i64 0, i64 %index 38 %data = load atomic i32, ptr addrspace(5) %tmp acquire, align 4 39 store i32 %data, ptr addrspace(1) %out 40 ret void 41} 42 43; OPT-LABEL: @vector_alloca_atomic_write( 44; 45; OPT: alloca [3 x i32] 46; OPT: store atomic i32 0, ptr addrspace(5) 47; OPT: store atomic i32 1, ptr addrspace(5) 48; OPT: store atomic i32 2, ptr addrspace(5) 49; OPT: load i32, ptr addrspace(5) 50define amdgpu_kernel void @vector_alloca_atomic_write(ptr addrspace(1) %out, i64 %index) { 51entry: 52 %alloca = alloca [3 x i32], addrspace(5) 53 %a1 = getelementptr [3 x i32], ptr addrspace(5) %alloca, i32 0, i32 1 54 %a2 = getelementptr [3 x i32], ptr addrspace(5) %alloca, i32 0, i32 2 55 store atomic i32 0, ptr addrspace(5) %alloca release, align 4 56 store atomic i32 1, ptr addrspace(5) %a1 release, align 4 57 store atomic i32 2, ptr addrspace(5) %a2 release, align 4 58 %tmp = getelementptr [3 x i32], ptr addrspace(5) %alloca, i64 0, i64 %index 59 %data = load i32, ptr addrspace(5) %tmp 60 store i32 %data, ptr addrspace(1) %out 61 ret void 62} 63