1; RUN: opt -S -mtriple=amdgcn-- -passes='amdgpu-promote-alloca,sroa,instcombine' < %s | FileCheck -check-prefix=OPT %s 2; RUN: opt -S -mtriple=amdgcn-- -passes='amdgpu-promote-alloca,sroa,instcombine' -amdgpu-promote-alloca-to-vector-limit=32 < %s | FileCheck -check-prefix=LIMIT32 %s 3 4target datalayout = "A5" 5 6; OPT-LABEL: @alloca_8xi64_max1024( 7; OPT-NOT: alloca 8; OPT: <8 x i64> 9; LIMIT32: alloca 10; LIMIT32-NOT: <8 x i64> 11define amdgpu_kernel void @alloca_8xi64_max1024(ptr addrspace(1) %out, i32 %index) #0 { 12entry: 13 %tmp = alloca [8 x i64], addrspace(5) 14 store i64 0, ptr addrspace(5) %tmp 15 %tmp1 = getelementptr [8 x i64], ptr addrspace(5) %tmp, i32 0, i32 %index 16 %tmp2 = load i64, ptr addrspace(5) %tmp1 17 store i64 %tmp2, ptr addrspace(1) %out 18 ret void 19} 20 21; OPT-LABEL: @alloca_9xi64_max1024( 22; OPT: alloca [9 x i64] 23; OPT-NOT: <9 x i64> 24; LIMIT32: alloca 25; LIMIT32-NOT: <9 x i64> 26define amdgpu_kernel void @alloca_9xi64_max1024(ptr addrspace(1) %out, i32 %index) #0 { 27entry: 28 %tmp = alloca [9 x i64], addrspace(5) 29 store i64 0, ptr addrspace(5) %tmp 30 %tmp1 = getelementptr [9 x i64], ptr addrspace(5) %tmp, i32 0, i32 %index 31 %tmp2 = load i64, ptr addrspace(5) %tmp1 32 store i64 %tmp2, ptr addrspace(1) %out 33 ret void 34} 35 36; OPT-LABEL: @alloca_16xi64_max512( 37; OPT-NOT: alloca 38; OPT: <16 x i64> 39; LIMIT32: alloca 40; LIMIT32-NOT: <16 x i64> 41define amdgpu_kernel void @alloca_16xi64_max512(ptr addrspace(1) %out, i32 %index) #1 { 42entry: 43 %tmp = alloca [16 x i64], addrspace(5) 44 store i64 0, ptr addrspace(5) %tmp 45 %tmp1 = getelementptr [16 x i64], ptr addrspace(5) %tmp, i32 0, i32 %index 46 %tmp2 = load i64, ptr addrspace(5) %tmp1 47 store i64 %tmp2, ptr addrspace(1) %out 48 ret void 49} 50 51; OPT-LABEL: @alloca_17xi64_max512( 52; OPT: alloca [17 x i64] 53; OPT-NOT: <17 x i64> 54; LIMIT32: alloca 55; LIMIT32-NOT: <17 x i64> 56define amdgpu_kernel void @alloca_17xi64_max512(ptr addrspace(1) %out, i32 %index) #1 { 57entry: 58 %tmp = alloca [17 x i64], addrspace(5) 59 store i64 0, ptr addrspace(5) %tmp 60 %tmp1 = getelementptr [17 x i64], ptr addrspace(5) %tmp, i32 0, i32 %index 61 %tmp2 = load i64, ptr addrspace(5) %tmp1 62 store i64 %tmp2, ptr addrspace(1) %out 63 ret void 64} 65 66; OPT-LABEL: @alloca_9xi128_max512( 67; OPT: alloca [9 x i128] 68; OPT-NOT: <9 x i128> 69; LIMIT32: alloca 70; LIMIT32-NOT: <9 x i128> 71define amdgpu_kernel void @alloca_9xi128_max512(ptr addrspace(1) %out, i32 %index) #1 { 72entry: 73 %tmp = alloca [9 x i128], addrspace(5) 74 store i128 0, ptr addrspace(5) %tmp 75 %tmp1 = getelementptr [9 x i128], ptr addrspace(5) %tmp, i32 0, i32 %index 76 %tmp2 = load i128, ptr addrspace(5) %tmp1 77 store i128 %tmp2, ptr addrspace(1) %out 78 ret void 79} 80 81; OPT-LABEL: @alloca_9xi128_max256( 82; OPT-NOT: alloca 83; OPT: <9 x i128> 84; LIMIT32: alloca 85; LIMIT32-NOT: <9 x i128> 86define amdgpu_kernel void @alloca_9xi128_max256(ptr addrspace(1) %out, i32 %index) #2 { 87entry: 88 %tmp = alloca [9 x i128], addrspace(5) 89 store i128 0, ptr addrspace(5) %tmp 90 %tmp1 = getelementptr [9 x i128], ptr addrspace(5) %tmp, i32 0, i32 %index 91 %tmp2 = load i128, ptr addrspace(5) %tmp1 92 store i128 %tmp2, ptr addrspace(1) %out 93 ret void 94} 95 96; OPT-LABEL: @alloca_16xi128_max256( 97; OPT-NOT: alloca 98; OPT: <16 x i128> 99; LIMIT32: alloca 100; LIMIT32-NOT: <16 x i128> 101define amdgpu_kernel void @alloca_16xi128_max256(ptr addrspace(1) %out, i32 %index) #2 { 102entry: 103 %tmp = alloca [16 x i128], addrspace(5) 104 store i128 0, ptr addrspace(5) %tmp 105 %tmp1 = getelementptr [16 x i128], ptr addrspace(5) %tmp, i32 0, i32 %index 106 %tmp2 = load i128, ptr addrspace(5) %tmp1 107 store i128 %tmp2, ptr addrspace(1) %out 108 ret void 109} 110 111; OPT-LABEL: @alloca_9xi256_max256( 112; OPT: alloca [9 x i256] 113; OPT-NOT: <9 x i256> 114; LIMIT32: alloca 115; LIMIT32-NOT: <9 x i256> 116define amdgpu_kernel void @alloca_9xi256_max256(ptr addrspace(1) %out, i32 %index) #2 { 117entry: 118 %tmp = alloca [9 x i256], addrspace(5) 119 store i256 0, ptr addrspace(5) %tmp 120 %tmp1 = getelementptr [9 x i256], ptr addrspace(5) %tmp, i32 0, i32 %index 121 %tmp2 = load i256, ptr addrspace(5) %tmp1 122 store i256 %tmp2, ptr addrspace(1) %out 123 ret void 124} 125 126; OPT-LABEL: @alloca_9xi64_max256( 127; OPT-NOT: alloca 128; OPT: <9 x i64> 129; LIMIT32: alloca 130; LIMIT32-NOT: <9 x i64> 131define amdgpu_kernel void @alloca_9xi64_max256(ptr addrspace(1) %out, i32 %index) #2 { 132entry: 133 %tmp = alloca [9 x i64], addrspace(5) 134 store i64 0, ptr addrspace(5) %tmp 135 %tmp1 = getelementptr [9 x i64], ptr addrspace(5) %tmp, i32 0, i32 %index 136 %tmp2 = load i64, ptr addrspace(5) %tmp1 137 store i64 %tmp2, ptr addrspace(1) %out 138 ret void 139} 140 141; OPT-LABEL: @func_alloca_9xi64_max256( 142; OPT: alloca 143; OPT-NOT: <9 x i64> 144; LIMIT32: alloca 145; LIMIT32-NOT: <9 x i64> 146define void @func_alloca_9xi64_max256(ptr addrspace(1) %out, i32 %index) #2 { 147entry: 148 %tmp = alloca [9 x i64], addrspace(5) 149 store i64 0, ptr addrspace(5) %tmp 150 %tmp1 = getelementptr [9 x i64], ptr addrspace(5) %tmp, i32 0, i32 %index 151 %tmp2 = load i64, ptr addrspace(5) %tmp1 152 store i64 %tmp2, ptr addrspace(1) %out 153 ret void 154} 155 156; OPT-LABEL: @alwaysinlined_func_alloca_9xi64_max256( 157; OPT-NOT: alloca 158; OPT: <9 x i64> 159; LIMIT32: alloca 160; LIMIT32-NOT: <9 x i64> 161define void @alwaysinlined_func_alloca_9xi64_max256(ptr addrspace(1) %out, i32 %index) #3 { 162entry: 163 %tmp = alloca [9 x i64], addrspace(5) 164 store i64 0, ptr addrspace(5) %tmp 165 %tmp1 = getelementptr [9 x i64], ptr addrspace(5) %tmp, i32 0, i32 %index 166 %tmp2 = load i64, ptr addrspace(5) %tmp1 167 store i64 %tmp2, ptr addrspace(1) %out 168 ret void 169} 170 171attributes #0 = { "amdgpu-flat-work-group-size"="1,1024" } 172attributes #1 = { "amdgpu-flat-work-group-size"="1,512" } 173attributes #2 = { "amdgpu-flat-work-group-size"="1,256" } 174attributes #3 = { alwaysinline "amdgpu-flat-work-group-size"="1,256" } 175