1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -passes=sroa,amdgpu-promote-alloca < %s | FileCheck %s 3 4; Make sure that array alloca loaded and stored as multi-element aggregates are handled correctly 5; Strictly the promote-alloca pass shouldn't have to deal with this case as it is non-canonical, but 6; the pass should handle it gracefully if it is 7; The checks look for lines that previously caused issues in PromoteAlloca (non-canonical). Opt 8; should now leave these unchanged 9 10%Block = type { [1 x float], i32 } 11%gl_PerVertex = type { <4 x float>, float, [1 x float], [1 x float] } 12%struct = type { i32, i32 } 13 14@block = external addrspace(1) global %Block 15@pv = external addrspace(1) global %gl_PerVertex 16 17define amdgpu_vs void @promote_1d_aggr() #0 { 18; CHECK-LABEL: @promote_1d_aggr( 19; CHECK-NEXT: [[F1:%.*]] = alloca [1 x float], align 4, addrspace(5) 20; CHECK-NEXT: [[FOO:%.*]] = getelementptr [[BLOCK:%.*]], ptr addrspace(1) @block, i32 0, i32 1 21; CHECK-NEXT: [[FOO1:%.*]] = load i32, ptr addrspace(1) [[FOO]], align 4 22; CHECK-NEXT: [[FOO3:%.*]] = load [1 x float], ptr addrspace(1) @block, align 4 23; CHECK-NEXT: [[FOO3_FCA_0_EXTRACT:%.*]] = extractvalue [1 x float] [[FOO3]], 0 24; CHECK-NEXT: [[FOO3_FCA_0_GEP:%.*]] = getelementptr inbounds [1 x float], ptr addrspace(5) [[F1]], i32 0, i32 0 25; CHECK-NEXT: store float [[FOO3_FCA_0_EXTRACT]], ptr addrspace(5) [[FOO3_FCA_0_GEP]], align 4 26; CHECK-NEXT: [[FOO5:%.*]] = getelementptr [1 x float], ptr addrspace(5) [[F1]], i32 0, i32 [[FOO1]] 27; CHECK-NEXT: [[FOO6:%.*]] = load float, ptr addrspace(5) [[FOO5]], align 4 28; CHECK-NEXT: [[FOO9:%.*]] = insertelement <4 x float> undef, float [[FOO6]], i32 0 29; CHECK-NEXT: [[FOO10:%.*]] = insertelement <4 x float> [[FOO9]], float [[FOO6]], i32 1 30; CHECK-NEXT: [[FOO11:%.*]] = insertelement <4 x float> [[FOO10]], float [[FOO6]], i32 2 31; CHECK-NEXT: [[FOO12:%.*]] = insertelement <4 x float> [[FOO11]], float [[FOO6]], i32 3 32; CHECK-NEXT: store <4 x float> [[FOO12]], ptr addrspace(1) @pv, align 16 33; CHECK-NEXT: ret void 34; 35 %i = alloca i32, addrspace(5) 36 %f1 = alloca [1 x float], addrspace(5) 37 %foo = getelementptr %Block, ptr addrspace(1) @block, i32 0, i32 1 38 %foo1 = load i32, ptr addrspace(1) %foo 39 store i32 %foo1, ptr addrspace(5) %i 40 %foo3 = load [1 x float], ptr addrspace(1) @block 41 store [1 x float] %foo3, ptr addrspace(5) %f1 42 %foo4 = load i32, ptr addrspace(5) %i 43 %foo5 = getelementptr [1 x float], ptr addrspace(5) %f1, i32 0, i32 %foo4 44 %foo6 = load float, ptr addrspace(5) %foo5 45 %foo7 = alloca <4 x float>, addrspace(5) 46 %foo8 = load <4 x float>, ptr addrspace(5) %foo7 47 %foo9 = insertelement <4 x float> %foo8, float %foo6, i32 0 48 %foo10 = insertelement <4 x float> %foo9, float %foo6, i32 1 49 %foo11 = insertelement <4 x float> %foo10, float %foo6, i32 2 50 %foo12 = insertelement <4 x float> %foo11, float %foo6, i32 3 51 store <4 x float> %foo12, ptr addrspace(1) @pv 52 ret void 53} 54 55%Block2 = type { i32, [2 x float] } 56@block2 = external addrspace(1) global %Block2 57 58define amdgpu_vs void @promote_store_aggr() #0 { 59; CHECK-LABEL: @promote_store_aggr( 60; CHECK-NEXT: [[FOO1:%.*]] = load i32, ptr addrspace(1) @block2, align 4 61; CHECK-NEXT: [[FOO3:%.*]] = sitofp i32 [[FOO1]] to float 62; CHECK-NEXT: [[FOO6_FCA_0_INSERT:%.*]] = insertvalue [2 x float] poison, float [[FOO3]], 0 63; CHECK-NEXT: [[FOO6_FCA_1_INSERT:%.*]] = insertvalue [2 x float] [[FOO6_FCA_0_INSERT]], float 2.000000e+00, 1 64; CHECK-NEXT: [[FOO7:%.*]] = getelementptr [[BLOCK2:%.*]], ptr addrspace(1) @block2, i32 0, i32 1 65; CHECK-NEXT: store [2 x float] [[FOO6_FCA_1_INSERT]], ptr addrspace(1) [[FOO7]], align 4 66; CHECK-NEXT: store <4 x float> splat (float 1.000000e+00), ptr addrspace(1) @pv, align 16 67; CHECK-NEXT: ret void 68; 69 %i = alloca i32, addrspace(5) 70 %f1 = alloca [2 x float], addrspace(5) 71 %foo1 = load i32, ptr addrspace(1) @block2 72 store i32 %foo1, ptr addrspace(5) %i 73 %foo2 = load i32, ptr addrspace(5) %i 74 %foo3 = sitofp i32 %foo2 to float 75 store float %foo3, ptr addrspace(5) %f1 76 %foo5 = getelementptr [2 x float], ptr addrspace(5) %f1, i32 0, i32 1 77 store float 2.000000e+00, ptr addrspace(5) %foo5 78 %foo6 = load [2 x float], ptr addrspace(5) %f1 79 %foo7 = getelementptr %Block2, ptr addrspace(1) @block2, i32 0, i32 1 80 store [2 x float] %foo6, ptr addrspace(1) %foo7 81 store <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, ptr addrspace(1) @pv 82 ret void 83} 84 85%Block3 = type { [2 x float], i32 } 86@block3 = external addrspace(1) global %Block3 87 88define amdgpu_vs void @promote_load_from_store_aggr() #0 { 89; CHECK-LABEL: @promote_load_from_store_aggr( 90; CHECK-NEXT: [[FOO:%.*]] = getelementptr [[BLOCK3:%.*]], ptr addrspace(1) @block3, i32 0, i32 1 91; CHECK-NEXT: [[FOO1:%.*]] = load i32, ptr addrspace(1) [[FOO]], align 4 92; CHECK-NEXT: [[FOO3:%.*]] = load [2 x float], ptr addrspace(1) @block3, align 4 93; CHECK-NEXT: [[FOO3_FCA_0_EXTRACT:%.*]] = extractvalue [2 x float] [[FOO3]], 0 94; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x float> undef, float [[FOO3_FCA_0_EXTRACT]], i32 0 95; CHECK-NEXT: [[FOO3_FCA_1_EXTRACT:%.*]] = extractvalue [2 x float] [[FOO3]], 1 96; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> [[TMP1]], float [[FOO3_FCA_1_EXTRACT]], i32 1 97; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 [[FOO1]] 98; CHECK-NEXT: [[FOO9:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0 99; CHECK-NEXT: [[FOO10:%.*]] = insertelement <4 x float> [[FOO9]], float [[TMP3]], i32 1 100; CHECK-NEXT: [[FOO11:%.*]] = insertelement <4 x float> [[FOO10]], float [[TMP3]], i32 2 101; CHECK-NEXT: [[FOO12:%.*]] = insertelement <4 x float> [[FOO11]], float [[TMP3]], i32 3 102; CHECK-NEXT: store <4 x float> [[FOO12]], ptr addrspace(1) @pv, align 16 103; CHECK-NEXT: ret void 104; 105 %i = alloca i32, addrspace(5) 106 %f1 = alloca [2 x float], addrspace(5) 107 %foo = getelementptr %Block3, ptr addrspace(1) @block3, i32 0, i32 1 108 %foo1 = load i32, ptr addrspace(1) %foo 109 store i32 %foo1, ptr addrspace(5) %i 110 %foo3 = load [2 x float], ptr addrspace(1) @block3 111 store [2 x float] %foo3, ptr addrspace(5) %f1 112 %foo4 = load i32, ptr addrspace(5) %i 113 %foo5 = getelementptr [2 x float], ptr addrspace(5) %f1, i32 0, i32 %foo4 114 %foo6 = load float, ptr addrspace(5) %foo5 115 %foo7 = alloca <4 x float>, addrspace(5) 116 %foo8 = load <4 x float>, ptr addrspace(5) %foo7 117 %foo9 = insertelement <4 x float> %foo8, float %foo6, i32 0 118 %foo10 = insertelement <4 x float> %foo9, float %foo6, i32 1 119 %foo11 = insertelement <4 x float> %foo10, float %foo6, i32 2 120 %foo12 = insertelement <4 x float> %foo11, float %foo6, i32 3 121 store <4 x float> %foo12, ptr addrspace(1) @pv 122 ret void 123} 124 125define amdgpu_vs void @promote_memmove_aggr() #0 { 126; CHECK-LABEL: @promote_memmove_aggr( 127; CHECK-NEXT: store float 1.000000e+00, ptr addrspace(1) @pv, align 4 128; CHECK-NEXT: ret void 129; 130 %f1 = alloca [5 x float], addrspace(5) 131 store [5 x float] zeroinitializer, ptr addrspace(5) %f1 132 %foo1 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 1 133 store float 1.0, ptr addrspace(5) %foo1 134 %foo2 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 3 135 store float 2.0, ptr addrspace(5) %foo2 136 call void @llvm.memmove.p5.p5.i32(ptr addrspace(5) align 4 %f1, ptr addrspace(5) align 4 %foo1, i32 16, i1 false) 137 %foo3 = load float, ptr addrspace(5) %f1 138 store float %foo3, ptr addrspace(1) @pv 139 ret void 140} 141 142define amdgpu_vs void @promote_memcpy_aggr() #0 { 143; CHECK-LABEL: @promote_memcpy_aggr( 144; CHECK-NEXT: [[FOO3:%.*]] = getelementptr [[BLOCK3:%.*]], ptr addrspace(1) @block3, i32 0, i32 0 145; CHECK-NEXT: [[FOO4:%.*]] = load i32, ptr addrspace(1) [[FOO3]], align 4 146; CHECK-NEXT: [[TMP1:%.*]] = insertelement <5 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 2.000000e+00, float 0.000000e+00>, float 3.000000e+00, i32 [[FOO4]] 147; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <5 x float> [[TMP1]], <5 x float> poison, <5 x i32> <i32 3, i32 4, i32 2, i32 3, i32 4> 148; CHECK-NEXT: [[TMP3:%.*]] = extractelement <5 x float> [[TMP2]], i32 0 149; CHECK-NEXT: store float [[TMP3]], ptr addrspace(1) @pv, align 4 150; CHECK-NEXT: ret void 151; 152 %f1 = alloca [5 x float], addrspace(5) 153 store [5 x float] zeroinitializer, ptr addrspace(5) %f1 154 155 %foo2 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 3 156 store float 2.0, ptr addrspace(5) %foo2 157 158 %foo3 = getelementptr %Block3, ptr addrspace(1) @block3, i32 0, i32 0 159 %foo4 = load i32, ptr addrspace(1) %foo3 160 %foo5 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 %foo4 161 store float 3.0, ptr addrspace(5) %foo5 162 163 call void @llvm.memcpy.p5.p5.i32(ptr addrspace(5) align 4 %f1, ptr addrspace(5) align 4 %foo2, i32 8, i1 false) 164 %foo6 = load float, ptr addrspace(5) %f1 165 store float %foo6, ptr addrspace(1) @pv 166 ret void 167} 168 169define amdgpu_vs void @promote_memcpy_identity_aggr() #0 { 170; CHECK-LABEL: @promote_memcpy_identity_aggr( 171; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(1) @pv, align 4 172; CHECK-NEXT: ret void 173; 174 %f1 = alloca [5 x float], addrspace(5) 175 store [5 x float] zeroinitializer, ptr addrspace(5) %f1 176 %foo1 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 1 177 store float 1.0, ptr addrspace(5) %foo1 178 %foo2 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 3 179 store float 2.0, ptr addrspace(5) %foo2 180 call void @llvm.memcpy.p5.p5.i32(ptr addrspace(5) align 4 %f1, ptr addrspace(5) align 4 %f1, i32 20, i1 false) 181 %foo3 = load float, ptr addrspace(5) %f1 182 store float %foo3, ptr addrspace(1) @pv 183 ret void 184} 185 186; TODO: promote alloca even there is a memcpy between different alloca 187define amdgpu_vs void @promote_memcpy_two_aggrs() #0 { 188; CHECK-LABEL: @promote_memcpy_two_aggrs( 189; CHECK-NEXT: [[F1:%.*]] = alloca [5 x float], align 4, addrspace(5) 190; CHECK-NEXT: [[F2:%.*]] = alloca [5 x float], align 4, addrspace(5) 191; CHECK-NEXT: [[DOTFCA_0_GEP1:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 0 192; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_0_GEP1]], align 4 193; CHECK-NEXT: [[DOTFCA_1_GEP2:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 1 194; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_1_GEP2]], align 4 195; CHECK-NEXT: [[DOTFCA_2_GEP3:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 2 196; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_2_GEP3]], align 4 197; CHECK-NEXT: [[DOTFCA_3_GEP4:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 3 198; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_3_GEP4]], align 4 199; CHECK-NEXT: [[DOTFCA_4_GEP5:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 4 200; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_4_GEP5]], align 4 201; CHECK-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F2]], i32 0, i32 0 202; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_0_GEP]], align 4 203; CHECK-NEXT: [[DOTFCA_1_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F2]], i32 0, i32 1 204; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_1_GEP]], align 4 205; CHECK-NEXT: [[DOTFCA_2_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F2]], i32 0, i32 2 206; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_2_GEP]], align 4 207; CHECK-NEXT: [[DOTFCA_3_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F2]], i32 0, i32 3 208; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_3_GEP]], align 4 209; CHECK-NEXT: [[DOTFCA_4_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F2]], i32 0, i32 4 210; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_4_GEP]], align 4 211; CHECK-NEXT: [[FOO3:%.*]] = getelementptr [[BLOCK3:%.*]], ptr addrspace(1) @block3, i32 0, i32 0 212; CHECK-NEXT: [[FOO4:%.*]] = load i32, ptr addrspace(1) [[FOO3]], align 4 213; CHECK-NEXT: [[FOO5:%.*]] = getelementptr [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 [[FOO4]] 214; CHECK-NEXT: store float 3.000000e+00, ptr addrspace(5) [[FOO5]], align 4 215; CHECK-NEXT: call void @llvm.memcpy.p5.p5.i32(ptr addrspace(5) align 4 [[F2]], ptr addrspace(5) align 4 [[F1]], i32 8, i1 false) 216; CHECK-NEXT: [[FOO6:%.*]] = getelementptr [5 x float], ptr addrspace(5) [[F2]], i32 0, i32 [[FOO4]] 217; CHECK-NEXT: [[FOO7:%.*]] = load float, ptr addrspace(5) [[FOO6]], align 4 218; CHECK-NEXT: store float [[FOO7]], ptr addrspace(1) @pv, align 4 219; CHECK-NEXT: ret void 220; 221 %f1 = alloca [5 x float], addrspace(5) 222 %f2 = alloca [5 x float], addrspace(5) 223 224 store [5 x float] zeroinitializer, ptr addrspace(5) %f1 225 store [5 x float] zeroinitializer, ptr addrspace(5) %f2 226 227 %foo3 = getelementptr %Block3, ptr addrspace(1) @block3, i32 0, i32 0 228 %foo4 = load i32, ptr addrspace(1) %foo3 229 %foo5 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 %foo4 230 store float 3.0, ptr addrspace(5) %foo5 231 232 call void @llvm.memcpy.p5.p5.i32(ptr addrspace(5) align 4 %f2, ptr addrspace(5) align 4 %f1, i32 8, i1 false) 233 234 %foo6 = getelementptr [5 x float], ptr addrspace(5) %f2, i32 0, i32 %foo4 235 %foo7 = load float, ptr addrspace(5) %foo6 236 store float %foo7, ptr addrspace(1) @pv 237 ret void 238} 239 240; TODO: promote alloca even there is a memcpy between the alloca and other memory space. 241define amdgpu_vs void @promote_memcpy_p1p5_aggr(ptr addrspace(1) inreg %src) #0 { 242; CHECK-LABEL: @promote_memcpy_p1p5_aggr( 243; CHECK-NEXT: [[F1:%.*]] = alloca [5 x float], align 4, addrspace(5) 244; CHECK-NEXT: [[DOTFCA_0_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 0 245; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_0_GEP]], align 4 246; CHECK-NEXT: [[DOTFCA_1_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 1 247; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_1_GEP]], align 4 248; CHECK-NEXT: [[DOTFCA_2_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 2 249; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_2_GEP]], align 4 250; CHECK-NEXT: [[DOTFCA_3_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 3 251; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_3_GEP]], align 4 252; CHECK-NEXT: [[DOTFCA_4_GEP:%.*]] = getelementptr inbounds [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 4 253; CHECK-NEXT: store float 0.000000e+00, ptr addrspace(5) [[DOTFCA_4_GEP]], align 4 254; CHECK-NEXT: [[FOO3:%.*]] = getelementptr [[BLOCK3:%.*]], ptr addrspace(1) @block3, i32 0, i32 0 255; CHECK-NEXT: [[FOO4:%.*]] = load i32, ptr addrspace(1) [[FOO3]], align 4 256; CHECK-NEXT: [[FOO5:%.*]] = getelementptr [5 x float], ptr addrspace(5) [[F1]], i32 0, i32 [[FOO4]] 257; CHECK-NEXT: store float 3.000000e+00, ptr addrspace(5) [[FOO5]], align 4 258; CHECK-NEXT: call void @llvm.memcpy.p1.p5.i32(ptr addrspace(1) align 4 @pv, ptr addrspace(5) align 4 [[F1]], i32 8, i1 false) 259; CHECK-NEXT: ret void 260; 261 %f1 = alloca [5 x float], addrspace(5) 262 store [5 x float] zeroinitializer, ptr addrspace(5) %f1 263 264 %foo3 = getelementptr %Block3, ptr addrspace(1) @block3, i32 0, i32 0 265 %foo4 = load i32, ptr addrspace(1) %foo3 266 %foo5 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 %foo4 267 store float 3.0, ptr addrspace(5) %foo5 268 269 call void @llvm.memcpy.p1.p5.i32(ptr addrspace(1) align 4 @pv, ptr addrspace(5) align 4 %f1, i32 8, i1 false) 270 ret void 271} 272 273define amdgpu_vs void @promote_memcpy_inline_aggr() #0 { 274; CHECK-LABEL: @promote_memcpy_inline_aggr( 275; CHECK-NEXT: [[FOO3:%.*]] = getelementptr [[BLOCK3:%.*]], ptr addrspace(1) @block3, i32 0, i32 0 276; CHECK-NEXT: [[FOO4:%.*]] = load i32, ptr addrspace(1) [[FOO3]], align 4 277; CHECK-NEXT: [[TMP1:%.*]] = insertelement <5 x float> zeroinitializer, float 3.000000e+00, i32 [[FOO4]] 278; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <5 x float> [[TMP1]], <5 x float> poison, <5 x i32> <i32 3, i32 4, i32 2, i32 3, i32 4> 279; CHECK-NEXT: [[TMP3:%.*]] = extractelement <5 x float> [[TMP2]], i32 0 280; CHECK-NEXT: store float [[TMP3]], ptr addrspace(1) @pv, align 4 281; CHECK-NEXT: ret void 282; 283 %f1 = alloca [5 x float], addrspace(5) 284 store [5 x float] zeroinitializer, ptr addrspace(5) %f1 285 286 %foo2 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 3 287 %foo3 = getelementptr %Block3, ptr addrspace(1) @block3, i32 0, i32 0 288 %foo4 = load i32, ptr addrspace(1) %foo3 289 %foo5 = getelementptr [5 x float], ptr addrspace(5) %f1, i32 0, i32 %foo4 290 store float 3.0, ptr addrspace(5) %foo5 291 292 call void @llvm.memcpy.inline.p5.p5.i32(ptr addrspace(5) align 4 %f1, ptr addrspace(5) align 4 %foo2, i32 8, i1 false) 293 %foo6 = load float, ptr addrspace(5) %f1 294 store float %foo6, ptr addrspace(1) @pv 295 ret void 296} 297 298declare void @llvm.memcpy.p5.p5.i32(ptr addrspace(5) nocapture writeonly, ptr addrspace(5) nocapture readonly, i32, i1 immarg) 299declare void @llvm.memcpy.p1.p5.i32(ptr addrspace(1) nocapture writeonly, ptr addrspace(5) nocapture readonly, i32, i1 immarg) 300declare void @llvm.memcpy.inline.p5.p5.i32(ptr addrspace(5) nocapture writeonly, ptr addrspace(5) nocapture readonly, i32, i1 immarg) 301declare void @llvm.memmove.p5.p5.i32(ptr addrspace(5) nocapture writeonly, ptr addrspace(5) nocapture readonly, i32, i1 immarg) 302 303@tmp_g = external addrspace(1) global { [4 x double], <2 x double>, <3 x double>, <4 x double> } 304@frag_color = external addrspace(1) global <4 x float> 305 306define amdgpu_ps void @promote_double_aggr() #0 { 307; CHECK-LABEL: @promote_double_aggr( 308; CHECK-NEXT: [[FOO:%.*]] = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, ptr addrspace(1) @tmp_g, i32 0, i32 0, i32 0 309; CHECK-NEXT: [[FOO1:%.*]] = load double, ptr addrspace(1) [[FOO]], align 8 310; CHECK-NEXT: [[FOO2:%.*]] = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, ptr addrspace(1) @tmp_g, i32 0, i32 0, i32 1 311; CHECK-NEXT: [[FOO3:%.*]] = load double, ptr addrspace(1) [[FOO2]], align 8 312; CHECK-NEXT: [[FOO4:%.*]] = insertvalue [2 x double] undef, double [[FOO1]], 0 313; CHECK-NEXT: [[FOO5:%.*]] = insertvalue [2 x double] [[FOO4]], double [[FOO3]], 1 314; CHECK-NEXT: [[FOO5_FCA_0_EXTRACT:%.*]] = extractvalue [2 x double] [[FOO5]], 0 315; CHECK-NEXT: [[FOO5_FCA_1_EXTRACT:%.*]] = extractvalue [2 x double] [[FOO5]], 1 316; CHECK-NEXT: [[FOO10:%.*]] = fadd double [[FOO5_FCA_1_EXTRACT]], [[FOO5_FCA_1_EXTRACT]] 317; CHECK-NEXT: [[FOO16:%.*]] = fadd double [[FOO10]], [[FOO5_FCA_1_EXTRACT]] 318; CHECK-NEXT: [[FOO17:%.*]] = fptrunc double [[FOO16]] to float 319; CHECK-NEXT: [[FOO18:%.*]] = insertelement <4 x float> undef, float [[FOO17]], i32 0 320; CHECK-NEXT: [[FOO19:%.*]] = insertelement <4 x float> [[FOO18]], float [[FOO17]], i32 1 321; CHECK-NEXT: [[FOO20:%.*]] = insertelement <4 x float> [[FOO19]], float [[FOO17]], i32 2 322; CHECK-NEXT: [[FOO21:%.*]] = insertelement <4 x float> [[FOO20]], float [[FOO17]], i32 3 323; CHECK-NEXT: store <4 x float> [[FOO21]], ptr addrspace(1) @frag_color, align 16 324; CHECK-NEXT: ret void 325; 326 %s = alloca [2 x double], addrspace(5) 327 %foo = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, ptr addrspace(1) @tmp_g, i32 0, i32 0, i32 0 328 %foo1 = load double, ptr addrspace(1) %foo 329 %foo2 = getelementptr { [4 x double], <2 x double>, <3 x double>, <4 x double> }, ptr addrspace(1) @tmp_g, i32 0, i32 0, i32 1 330 %foo3 = load double, ptr addrspace(1) %foo2 331 %foo4 = insertvalue [2 x double] undef, double %foo1, 0 332 %foo5 = insertvalue [2 x double] %foo4, double %foo3, 1 333 store [2 x double] %foo5, ptr addrspace(5) %s 334 %foo6 = getelementptr [2 x double], ptr addrspace(5) %s, i32 0, i32 1 335 %foo7 = load double, ptr addrspace(5) %foo6 336 %foo8 = getelementptr [2 x double], ptr addrspace(5) %s, i32 0, i32 1 337 %foo9 = load double, ptr addrspace(5) %foo8 338 %foo10 = fadd double %foo7, %foo9 339 store double %foo10, ptr addrspace(5) %s 340 %foo13 = load double, ptr addrspace(5) %s 341 %foo14 = getelementptr [2 x double], ptr addrspace(5) %s, i32 0, i32 1 342 %foo15 = load double, ptr addrspace(5) %foo14 343 %foo16 = fadd double %foo13, %foo15 344 %foo17 = fptrunc double %foo16 to float 345 %foo18 = insertelement <4 x float> undef, float %foo17, i32 0 346 %foo19 = insertelement <4 x float> %foo18, float %foo17, i32 1 347 %foo20 = insertelement <4 x float> %foo19, float %foo17, i32 2 348 %foo21 = insertelement <4 x float> %foo20, float %foo17, i32 3 349 store <4 x float> %foo21, ptr addrspace(1) @frag_color 350 ret void 351} 352 353; Don't crash on a type that isn't a valid vector element. 354define amdgpu_kernel void @alloca_struct() #0 { 355; CHECK-LABEL: @alloca_struct( 356; CHECK-NEXT: entry: 357; CHECK-NEXT: ret void 358; 359entry: 360 %alloca = alloca [2 x %struct], align 4, addrspace(5) 361 ret void 362} 363