1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -passes='sroa<preserve-cfg>' -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-PRESERVE-CFG 3; RUN: opt -passes='sroa<modify-cfg>' -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-MODIFY-CFG 4 5%"struct.a" = type { <8 x half> } 6%"struct.b" = type { %"struct.a" } 7%"struct.c" = type { %"struct.a", i32, i8 } 8%"struct.d" = type { [4 x i32], %"struct.a" } 9%"struct.e" = type { [2 x <8 x half>], i32, i32 } 10%"struct.f" = type { [2 x <8 x i16>], i32, i32 } 11%"array.a" = type [2 x <8 x half>] 12%"array.b" = type [2 x %"struct.a"] 13 14define amdgpu_kernel void @test_zeroinit() #0 { 15; CHECK-LABEL: @test_zeroinit( 16; CHECK-NEXT: entry: 17; CHECK-NEXT: [[DATA:%.*]] = load <4 x float>, ptr undef, align 16 18; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[DATA]] to <8 x i16> 19; CHECK-NEXT: br label [[BB:%.*]] 20; CHECK: bb: 21; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP0]], i32 0 22; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_0_0_VEC_EXTRACT]] to half 23; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_2_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP0]], i32 1 24; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_0_2_VEC_EXTRACT]] to half 25; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP0]], i32 2 26; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_0_4_VEC_EXTRACT]] to half 27; CHECK-NEXT: ret void 28; 29entry: 30 %b_blockwise_copy = alloca %"struct.b", align 16 31 store <8 x half> zeroinitializer, ptr %b_blockwise_copy, align 16 32 %data = load <4 x float>, ptr undef 33 store <4 x float> %data, ptr %b_blockwise_copy, align 16 34 br label %bb 35 36bb: 37 %load1 = load half, ptr %b_blockwise_copy, align 16 38 %ptr2 = getelementptr inbounds i8, ptr %b_blockwise_copy, i64 2 39 %load2 = load half, ptr %ptr2, align 16 40 %ptr3 = getelementptr inbounds i8, ptr %b_blockwise_copy, i64 4 41 %load3 = load half, ptr %ptr3, align 16 42 ret void 43} 44 45define amdgpu_kernel void @test_memset() #0 { 46; CHECK-LABEL: @test_memset( 47; CHECK-NEXT: entry: 48; CHECK-NEXT: [[DATA:%.*]] = load <4 x float>, ptr undef, align 16 49; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[DATA]] to <8 x i16> 50; CHECK-NEXT: br label [[BB:%.*]] 51; CHECK: bb: 52; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP0]], i32 0 53; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_0_0_VEC_EXTRACT]] to half 54; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_2_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP0]], i32 1 55; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_0_2_VEC_EXTRACT]] to half 56; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP0]], i32 2 57; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_0_4_VEC_EXTRACT]] to half 58; CHECK-NEXT: ret void 59; 60entry: 61 %b_blockwise_copy = alloca %"struct.b", align 16 62 call void @llvm.memset.p0.i64(ptr align 16 %b_blockwise_copy, i8 0, i64 16, i1 false) 63 %data = load <4 x float>, ptr undef 64 store <4 x float> %data, ptr %b_blockwise_copy, align 16 65 br label %bb 66 67bb: 68 %load1 = load half, ptr %b_blockwise_copy, align 16 69 %ptr2 = getelementptr inbounds i8, ptr %b_blockwise_copy, i64 2 70 %load2 = load half, ptr %ptr2, align 16 71 %ptr3 = getelementptr inbounds i8, ptr %b_blockwise_copy, i64 4 72 %load3 = load half, ptr %ptr3, align 16 73 ret void 74} 75 76define amdgpu_kernel void @vector_type_alloca() #0 { 77; CHECK-LABEL: @vector_type_alloca( 78; CHECK-NEXT: entry: 79; CHECK-NEXT: [[DATA:%.*]] = load <4 x float>, ptr undef, align 16 80; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[DATA]] to <8 x i16> 81; CHECK-NEXT: br label [[BB:%.*]] 82; CHECK: bb: 83; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP0]], i32 0 84; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_0_0_VEC_EXTRACT]] to half 85; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_2_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP0]], i32 1 86; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_0_2_VEC_EXTRACT]] to half 87; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP0]], i32 2 88; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_0_4_VEC_EXTRACT]] to half 89; CHECK-NEXT: ret void 90; 91entry: 92 %b_blockwise_copy = alloca <8 x half>, align 16 93 store <8 x half> zeroinitializer, ptr %b_blockwise_copy, align 16 94 %data = load <4 x float>, ptr undef 95 store <4 x float> %data, ptr %b_blockwise_copy, align 16 96 br label %bb 97 98bb: 99 %load1 = load half, ptr %b_blockwise_copy, align 16 100 %ptr2 = getelementptr inbounds i8, ptr %b_blockwise_copy, i64 2 101 %load2 = load half, ptr %ptr2, align 16 102 %ptr3 = getelementptr inbounds i8, ptr %b_blockwise_copy, i64 4 103 %load3 = load half, ptr %ptr3, align 16 104 ret void 105} 106 107define amdgpu_kernel void @test_struct_contain_multiple_types1() #0 { 108; CHECK-LABEL: @test_struct_contain_multiple_types1( 109; CHECK-NEXT: entry: 110; CHECK-NEXT: [[DATA:%.*]] = load <4 x float>, ptr undef, align 16 111; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[DATA]] to <8 x i16> 112; CHECK-NEXT: br label [[BB:%.*]] 113; CHECK: bb: 114; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP0]], i32 0 115; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_0_0_VEC_EXTRACT]] to half 116; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_2_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP0]], i32 1 117; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_0_2_VEC_EXTRACT]] to half 118; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP0]], i32 2 119; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_0_4_VEC_EXTRACT]] to half 120; CHECK-NEXT: ret void 121; 122entry: 123 %b_blockwise_copy = alloca %"struct.c", align 16 124 store <8 x half> zeroinitializer, ptr %b_blockwise_copy, align 16 125 %data = load <4 x float>, ptr undef 126 store <4 x float> %data, ptr %b_blockwise_copy, align 16 127 br label %bb 128 129bb: 130 %load1 = load half, ptr %b_blockwise_copy, align 16 131 %ptr2 = getelementptr inbounds i8, ptr %b_blockwise_copy, i64 2 132 %load2 = load half, ptr %ptr2, align 16 133 %ptr3 = getelementptr inbounds i8, ptr %b_blockwise_copy, i64 4 134 %load3 = load half, ptr %ptr3, align 16 135 ret void 136} 137 138define amdgpu_kernel void @test_struct_contain_multiple_types2() #0 { 139; CHECK-LABEL: @test_struct_contain_multiple_types2( 140; CHECK-NEXT: entry: 141; CHECK-NEXT: [[DATA1:%.*]] = load [4 x i32], ptr undef, align 4 142; CHECK-NEXT: [[DATA1_FCA_0_EXTRACT:%.*]] = extractvalue [4 x i32] [[DATA1]], 0 143; CHECK-NEXT: [[DATA1_FCA_1_EXTRACT:%.*]] = extractvalue [4 x i32] [[DATA1]], 1 144; CHECK-NEXT: [[DATA1_FCA_2_EXTRACT:%.*]] = extractvalue [4 x i32] [[DATA1]], 2 145; CHECK-NEXT: [[DATA1_FCA_3_EXTRACT:%.*]] = extractvalue [4 x i32] [[DATA1]], 3 146; CHECK-NEXT: [[DATA2:%.*]] = load <4 x float>, ptr undef, align 16 147; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[DATA2]] to <8 x i16> 148; CHECK-NEXT: br label [[BB:%.*]] 149; CHECK: bb: 150; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_5_16_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP0]], i32 0 151; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_5_16_VEC_EXTRACT]] to half 152; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_5_18_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP0]], i32 1 153; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_5_18_VEC_EXTRACT]] to half 154; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_5_20_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP0]], i32 2 155; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_5_20_VEC_EXTRACT]] to half 156; CHECK-NEXT: ret void 157; 158entry: 159 %b_blockwise_copy = alloca %"struct.d", align 16 160 call void @llvm.memset.p0.i32(ptr align 16 %b_blockwise_copy, i8 0, i32 16, i1 false) 161 %data1 = load [4 x i32], ptr undef 162 store [4 x i32] %data1, ptr %b_blockwise_copy, align 16 163 %data2_gep = getelementptr inbounds i8, ptr %b_blockwise_copy, i64 16 164 store <8 x half> zeroinitializer, ptr %data2_gep, align 16 165 %data2 = load <4 x float>, ptr undef 166 store <4 x float> %data2, ptr %data2_gep, align 16 167 br label %bb 168 169bb: 170 %ptr1 = getelementptr inbounds i8, ptr %b_blockwise_copy, i64 16 171 %load1 = load half, ptr %ptr1, align 16 172 %ptr2 = getelementptr inbounds i8, ptr %b_blockwise_copy, i64 18 173 %load2 = load half, ptr %ptr2, align 16 174 %ptr3 = getelementptr inbounds i8, ptr %b_blockwise_copy, i64 20 175 %load3 = load half, ptr %ptr3, align 16 176 ret void 177} 178 179define amdgpu_kernel void @test_struct_array_vector() #0 { 180; CHECK-LABEL: @test_struct_array_vector( 181; CHECK-NEXT: entry: 182; CHECK-NEXT: [[DATA0:%.*]] = load <4 x float>, ptr undef, align 16 183; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[DATA0]] to <8 x i16> 184; CHECK-NEXT: [[DATA1:%.*]] = load <4 x float>, ptr undef, align 16 185; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[DATA1]] to <8 x i16> 186; CHECK-NEXT: br label [[BB:%.*]] 187; CHECK: bb: 188; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP0]], i32 0 189; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_0_0_VEC_EXTRACT]] to half 190; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_3_16_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP1]], i32 0 191; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_3_16_VEC_EXTRACT]] to half 192; CHECK-NEXT: ret void 193; 194entry: 195 %b_blockwise_copy = alloca %"struct.e", align 16 196 store <8 x half> zeroinitializer, ptr %b_blockwise_copy, align 16 197 %0 = getelementptr inbounds i8, ptr %b_blockwise_copy, i64 16 198 store <8 x half> zeroinitializer, ptr %0, align 16 199 %data0 = load <4 x float>, ptr undef 200 store <4 x float> %data0, ptr %b_blockwise_copy, align 16 201 %data1 = load <4 x float>, ptr undef 202 store <4 x float> %data1, ptr %0, align 16 203 br label %bb 204 205bb: 206 %load1 = load half, ptr %b_blockwise_copy, align 16 207 %ptr2 = getelementptr inbounds i8, ptr %b_blockwise_copy, i64 16 208 %load2 = load half, ptr %ptr2, align 16 209 ret void 210} 211 212define amdgpu_kernel void @test_struct_array_vector_i16() #0 { 213; CHECK-LABEL: @test_struct_array_vector_i16( 214; CHECK-NEXT: entry: 215; CHECK-NEXT: [[DATA:%.*]] = load <4 x i32>, ptr undef, align 16 216; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[DATA]] to <8 x i16> 217; CHECK-NEXT: [[DATA2:%.*]] = load <4 x i32>, ptr undef, align 16 218; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[DATA2]] to <8 x i16> 219; CHECK-NEXT: br label [[BB:%.*]] 220; CHECK: bb: 221; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP0]], i32 0 222; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_2_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP0]], i32 1 223; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_4_16_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP1]], i32 0 224; CHECK-NEXT: ret void 225; 226entry: 227 %b_blockwise_copy = alloca %"struct.f", align 16 228 call void @llvm.memset.p0.i32(ptr align 16 %b_blockwise_copy, i8 0, i32 32, i1 false) 229 %data = load <4 x i32>, ptr undef 230 store <4 x i32> %data, ptr %b_blockwise_copy, align 16 231 %data2 = load <4 x i32>, ptr undef 232 %data2_gep = getelementptr inbounds i8, ptr %b_blockwise_copy, i64 16 233 store <4 x i32> %data2, ptr %data2_gep, align 16 234 br label %bb 235 236bb: 237 %load1 = load i16, ptr %b_blockwise_copy, align 16 238 %ptr2 = getelementptr inbounds i8, ptr %b_blockwise_copy, i64 2 239 %load2 = load i16, ptr %ptr2, align 16 240 %ptr3 = getelementptr inbounds i8, ptr %b_blockwise_copy, i64 16 241 %load3 = load i16, ptr %ptr3, align 16 242 ret void 243} 244 245define amdgpu_kernel void @test_half_array() #0 { 246; CHECK-LABEL: @test_half_array( 247; CHECK-NEXT: entry: 248; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0:%.*]] = alloca float, align 16 249; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_4:%.*]] = alloca float, align 4 250; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 16 [[B_BLOCKWISE_COPY_SROA_0]], i8 0, i32 4, i1 false) 251; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 4 [[B_BLOCKWISE_COPY_SROA_4]], i8 0, i32 4, i1 false) 252; CHECK-NEXT: [[TMP0:%.*]] = bitcast float undef to i32 253; CHECK-NEXT: [[TMP1:%.*]] = bitcast float undef to i32 254; CHECK-NEXT: [[DATA:%.*]] = load [4 x float], ptr undef, align 4 255; CHECK-NEXT: [[DATA_FCA_0_EXTRACT:%.*]] = extractvalue [4 x float] [[DATA]], 0 256; CHECK-NEXT: store float [[DATA_FCA_0_EXTRACT]], ptr [[B_BLOCKWISE_COPY_SROA_0]], align 16 257; CHECK-NEXT: [[DATA_FCA_1_EXTRACT:%.*]] = extractvalue [4 x float] [[DATA]], 1 258; CHECK-NEXT: store float [[DATA_FCA_1_EXTRACT]], ptr [[B_BLOCKWISE_COPY_SROA_4]], align 4 259; CHECK-NEXT: [[DATA_FCA_2_EXTRACT:%.*]] = extractvalue [4 x float] [[DATA]], 2 260; CHECK-NEXT: [[DATA_FCA_3_EXTRACT:%.*]] = extractvalue [4 x float] [[DATA]], 3 261; CHECK-NEXT: br label [[BB:%.*]] 262; CHECK: bb: 263; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_0_B_BLOCKWISE_COPY_SROA_0_0_LOAD1:%.*]] = load half, ptr [[B_BLOCKWISE_COPY_SROA_0]], align 16 264; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_2_PTR2_SROA_IDX1:%.*]] = getelementptr inbounds i8, ptr [[B_BLOCKWISE_COPY_SROA_0]], i64 2 265; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_2_B_BLOCKWISE_COPY_SROA_0_2_LOAD2:%.*]] = load half, ptr [[B_BLOCKWISE_COPY_SROA_0_2_PTR2_SROA_IDX1]], align 2 266; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_4_0_B_BLOCKWISE_COPY_SROA_4_4_LOAD3:%.*]] = load half, ptr [[B_BLOCKWISE_COPY_SROA_4]], align 4 267; CHECK-NEXT: ret void 268; 269entry: 270 %b_blockwise_copy = alloca [8 x half], align 16 271 call void @llvm.memset.p0.i32(ptr align 16 %b_blockwise_copy, i8 0, i32 16, i1 false) 272 %data = load [4 x float], ptr undef 273 store [4 x float] %data, ptr %b_blockwise_copy, align 16 274 br label %bb 275 276bb: 277 %load1 = load half, ptr %b_blockwise_copy, align 16 278 %ptr2 = getelementptr inbounds i8, ptr %b_blockwise_copy, i64 2 279 %load2 = load half, ptr %ptr2, align 16 280 %ptr3 = getelementptr inbounds i8, ptr %b_blockwise_copy, i64 4 281 %load3 = load half, ptr %ptr3, align 16 282 ret void 283} 284 285define amdgpu_kernel void @test_array_vector() #0 { 286; CHECK-LABEL: @test_array_vector( 287; CHECK-NEXT: entry: 288; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_5:%.*]] = alloca <8 x half>, align 16 289; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 16 [[B_BLOCKWISE_COPY_SROA_5]], i8 0, i32 16, i1 false) 290; CHECK-NEXT: [[DATA:%.*]] = load <4 x float>, ptr undef, align 16 291; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[DATA]] to <8 x i16> 292; CHECK-NEXT: br label [[BB:%.*]] 293; CHECK: bb: 294; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP0]], i32 0 295; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_0_0_VEC_EXTRACT]] to half 296; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_2_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP0]], i32 1 297; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_0_2_VEC_EXTRACT]] to half 298; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP0]], i32 2 299; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_0_4_VEC_EXTRACT]] to half 300; CHECK-NEXT: ret void 301; 302entry: 303 %b_blockwise_copy = alloca %"array.a", align 16 304 call void @llvm.memset.p0.i32(ptr align 16 %b_blockwise_copy, i8 0, i32 32, i1 false) 305 %data = load <4 x float>, ptr undef 306 store <4 x float> %data, ptr %b_blockwise_copy, align 16 307 br label %bb 308 309bb: 310 %load1 = load half, ptr %b_blockwise_copy, align 16 311 %ptr2 = getelementptr inbounds i8, ptr %b_blockwise_copy, i64 2 312 %load2 = load half, ptr %ptr2, align 16 313 %ptr3 = getelementptr inbounds i8, ptr %b_blockwise_copy, i64 4 314 %load3 = load half, ptr %ptr3, align 16 315 ret void 316} 317 318define amdgpu_kernel void @test_array_vector2() #0 { 319; CHECK-LABEL: @test_array_vector2( 320; CHECK-NEXT: entry: 321; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_5:%.*]] = alloca <8 x half>, align 16 322; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 16 [[B_BLOCKWISE_COPY_SROA_5]], i8 0, i32 16, i1 false) 323; CHECK-NEXT: [[DATA:%.*]] = load <4 x float>, ptr undef, align 16 324; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[DATA]] to <8 x i16> 325; CHECK-NEXT: br label [[BB:%.*]] 326; CHECK: bb: 327; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_0_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP0]], i32 0 328; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_0_0_VEC_EXTRACT]] to half 329; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_2_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP0]], i32 1 330; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_0_2_VEC_EXTRACT]] to half 331; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_4_VEC_EXTRACT:%.*]] = extractelement <8 x i16> [[TMP0]], i32 2 332; CHECK-NEXT: [[TMP3:%.*]] = bitcast i16 [[B_BLOCKWISE_COPY_SROA_0_4_VEC_EXTRACT]] to half 333; CHECK-NEXT: ret void 334; 335entry: 336 %b_blockwise_copy = alloca %"array.b", align 16 337 call void @llvm.memset.p0.i32(ptr align 16 %b_blockwise_copy, i8 0, i32 32, i1 false) 338 %data = load <4 x float>, ptr undef 339 store <4 x float> %data, ptr %b_blockwise_copy, align 16 340 br label %bb 341 342bb: 343 %load1 = load half, ptr %b_blockwise_copy, align 16 344 %ptr2 = getelementptr inbounds i8, ptr %b_blockwise_copy, i64 2 345 %load2 = load half, ptr %ptr2, align 16 346 %ptr3 = getelementptr inbounds i8, ptr %b_blockwise_copy, i64 4 347 %load3 = load half, ptr %ptr3, align 16 348 ret void 349} 350 351define amdgpu_kernel void @test_array_vector_no_vector_common_type() #0 { 352; CHECK-LABEL: @test_array_vector_no_vector_common_type( 353; CHECK-NEXT: entry: 354; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0:%.*]] = alloca float, align 16 355; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_4:%.*]] = alloca float, align 4 356; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_7:%.*]] = alloca float, align 8 357; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_10:%.*]] = alloca float, align 4 358; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_13:%.*]] = alloca <8 x half>, align 16 359; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 16 [[B_BLOCKWISE_COPY_SROA_0]], i8 0, i32 4, i1 false) 360; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 4 [[B_BLOCKWISE_COPY_SROA_4]], i8 0, i32 4, i1 false) 361; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 8 [[B_BLOCKWISE_COPY_SROA_7]], i8 0, i32 4, i1 false) 362; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 4 [[B_BLOCKWISE_COPY_SROA_10]], i8 0, i32 4, i1 false) 363; CHECK-NEXT: call void @llvm.memset.p0.i32(ptr align 16 [[B_BLOCKWISE_COPY_SROA_13]], i8 0, i32 16, i1 false) 364; CHECK-NEXT: [[DATA1:%.*]] = load float, ptr undef, align 4 365; CHECK-NEXT: [[DATA2:%.*]] = load float, ptr undef, align 4 366; CHECK-NEXT: [[DATA3:%.*]] = load float, ptr undef, align 4 367; CHECK-NEXT: [[DATA4:%.*]] = load float, ptr undef, align 4 368; CHECK-NEXT: store float [[DATA1]], ptr [[B_BLOCKWISE_COPY_SROA_0]], align 16 369; CHECK-NEXT: store float [[DATA2]], ptr [[B_BLOCKWISE_COPY_SROA_4]], align 4 370; CHECK-NEXT: store float [[DATA3]], ptr [[B_BLOCKWISE_COPY_SROA_7]], align 8 371; CHECK-NEXT: store float [[DATA4]], ptr [[B_BLOCKWISE_COPY_SROA_10]], align 4 372; CHECK-NEXT: br label [[BB:%.*]] 373; CHECK: bb: 374; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_0_B_BLOCKWISE_COPY_SROA_0_0_LOAD1:%.*]] = load half, ptr [[B_BLOCKWISE_COPY_SROA_0]], align 16 375; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_2_PTR2_SROA_IDX1:%.*]] = getelementptr inbounds i8, ptr [[B_BLOCKWISE_COPY_SROA_0]], i64 2 376; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_0_2_B_BLOCKWISE_COPY_SROA_0_2_LOAD2:%.*]] = load half, ptr [[B_BLOCKWISE_COPY_SROA_0_2_PTR2_SROA_IDX1]], align 2 377; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_4_0_B_BLOCKWISE_COPY_SROA_4_4_LOAD3:%.*]] = load half, ptr [[B_BLOCKWISE_COPY_SROA_4]], align 4 378; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_4_2_PTR4_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[B_BLOCKWISE_COPY_SROA_4]], i64 2 379; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_4_2_B_BLOCKWISE_COPY_SROA_4_6_LOAD4:%.*]] = load half, ptr [[B_BLOCKWISE_COPY_SROA_4_2_PTR4_SROA_IDX]], align 2 380; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_7_0_B_BLOCKWISE_COPY_SROA_7_8_LOAD5:%.*]] = load half, ptr [[B_BLOCKWISE_COPY_SROA_7]], align 8 381; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_7_2_PTR6_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[B_BLOCKWISE_COPY_SROA_7]], i64 2 382; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_7_2_B_BLOCKWISE_COPY_SROA_7_10_LOAD6:%.*]] = load half, ptr [[B_BLOCKWISE_COPY_SROA_7_2_PTR6_SROA_IDX]], align 2 383; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_10_0_B_BLOCKWISE_COPY_SROA_10_12_LOAD7:%.*]] = load half, ptr [[B_BLOCKWISE_COPY_SROA_10]], align 4 384; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_10_2_PTR8_SROA_IDX:%.*]] = getelementptr inbounds i8, ptr [[B_BLOCKWISE_COPY_SROA_10]], i64 2 385; CHECK-NEXT: [[B_BLOCKWISE_COPY_SROA_10_2_B_BLOCKWISE_COPY_SROA_10_14_LOAD8:%.*]] = load half, ptr [[B_BLOCKWISE_COPY_SROA_10_2_PTR8_SROA_IDX]], align 2 386; CHECK-NEXT: ret void 387; 388entry: 389 %b_blockwise_copy = alloca %"array.a", align 16 390 call void @llvm.memset.p0.i32(ptr align 16 %b_blockwise_copy, i8 0, i32 32, i1 false) 391 %data1 = load float, ptr undef 392 %data2 = load float, ptr undef 393 %data3 = load float, ptr undef 394 %data4 = load float, ptr undef 395 store float %data1, ptr %b_blockwise_copy, align 16 396 %data_ptr1 = getelementptr inbounds i8, ptr %b_blockwise_copy, i64 4 397 store float %data2, ptr %data_ptr1, align 16 398 %data_ptr2 = getelementptr inbounds i8, ptr %b_blockwise_copy, i64 8 399 store float %data3, ptr %data_ptr2, align 16 400 %data_ptr3 = getelementptr inbounds i8, ptr %b_blockwise_copy, i64 12 401 store float %data4, ptr %data_ptr3, align 16 402 br label %bb 403 404bb: 405 %load1 = load half, ptr %b_blockwise_copy, align 16 406 %ptr2 = getelementptr inbounds i8, ptr %b_blockwise_copy, i64 2 407 %load2 = load half, ptr %ptr2, align 16 408 %ptr3 = getelementptr inbounds i8, ptr %b_blockwise_copy, i64 4 409 %load3 = load half, ptr %ptr3, align 16 410 %ptr4 = getelementptr inbounds i8, ptr %b_blockwise_copy, i64 6 411 %load4 = load half, ptr %ptr4, align 16 412 %ptr5 = getelementptr inbounds i8, ptr %b_blockwise_copy, i64 8 413 %load5 = load half, ptr %ptr5, align 16 414 %ptr6 = getelementptr inbounds i8, ptr %b_blockwise_copy, i64 10 415 %load6 = load half, ptr %ptr6, align 16 416 %ptr7 = getelementptr inbounds i8, ptr %b_blockwise_copy, i64 12 417 %load7 = load half, ptr %ptr7, align 16 418 %ptr8 = getelementptr inbounds i8, ptr %b_blockwise_copy, i64 14 419 %load8 = load half, ptr %ptr8, align 16 420 ret void 421} 422 423declare void @llvm.memset.p0.i64(ptr nocapture writeonly, i8, i64, i1) nounwind 424declare void @llvm.memset.p0.i32(ptr nocapture writeonly, i8, i32, i1) nounwind 425attributes #0 = { nounwind readonly } 426 427;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 428; CHECK-MODIFY-CFG: {{.*}} 429; CHECK-PRESERVE-CFG: {{.*}} 430