1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S -passes=load-store-vectorizer --mcpu=hawaii -mattr=-unaligned-access-mode,+max-private-element-size-16 < %s | FileCheck -check-prefixes=CHECK,ALIGNED %s 3; RUN: opt -S -passes=load-store-vectorizer --mcpu=hawaii -mattr=+unaligned-access-mode,+unaligned-scratch-access,+max-private-element-size-16 < %s | FileCheck -check-prefixes=CHECK,UNALIGNED %s 4; RUN: opt -S -passes='function(load-store-vectorizer)' --mcpu=hawaii -mattr=-unaligned-access-mode,+max-private-element-size-16 < %s | FileCheck -check-prefixes=CHECK,ALIGNED %s 5; RUN: opt -S -passes='function(load-store-vectorizer)' --mcpu=hawaii -mattr=+unaligned-access-mode,+unaligned-scratch-access,+max-private-element-size-16 < %s | FileCheck -check-prefixes=CHECK,UNALIGNED %s 6 7target triple = "amdgcn--" 8target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5" 9 10define amdgpu_kernel void @load_unknown_offset_align1_i8(ptr addrspace(1) noalias %out, i32 %offset) #0 { 11; ALIGNED-LABEL: @load_unknown_offset_align1_i8( 12; ALIGNED-NEXT: [[ALLOCA:%.*]] = alloca [128 x i8], align 1, addrspace(5) 13; ALIGNED-NEXT: [[PTR0:%.*]] = getelementptr inbounds [128 x i8], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[OFFSET:%.*]] 14; ALIGNED-NEXT: [[VAL0:%.*]] = load i8, ptr addrspace(5) [[PTR0]], align 1 15; ALIGNED-NEXT: [[PTR1:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[PTR0]], i32 1 16; ALIGNED-NEXT: [[VAL1:%.*]] = load i8, ptr addrspace(5) [[PTR1]], align 1 17; ALIGNED-NEXT: [[ADD:%.*]] = add i8 [[VAL0]], [[VAL1]] 18; ALIGNED-NEXT: store i8 [[ADD]], ptr addrspace(1) [[OUT:%.*]], align 1 19; ALIGNED-NEXT: ret void 20; 21; UNALIGNED-LABEL: @load_unknown_offset_align1_i8( 22; UNALIGNED-NEXT: [[ALLOCA:%.*]] = alloca [128 x i8], align 1, addrspace(5) 23; UNALIGNED-NEXT: [[PTR0:%.*]] = getelementptr inbounds [128 x i8], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[OFFSET:%.*]] 24; UNALIGNED-NEXT: [[TMP1:%.*]] = load <2 x i8>, ptr addrspace(5) [[PTR0]], align 1 25; UNALIGNED-NEXT: [[VAL01:%.*]] = extractelement <2 x i8> [[TMP1]], i32 0 26; UNALIGNED-NEXT: [[VAL12:%.*]] = extractelement <2 x i8> [[TMP1]], i32 1 27; UNALIGNED-NEXT: [[ADD:%.*]] = add i8 [[VAL01]], [[VAL12]] 28; UNALIGNED-NEXT: store i8 [[ADD]], ptr addrspace(1) [[OUT:%.*]], align 1 29; UNALIGNED-NEXT: ret void 30; 31 %alloca = alloca [128 x i8], align 1, addrspace(5) 32 %ptr0 = getelementptr inbounds [128 x i8], ptr addrspace(5) %alloca, i32 0, i32 %offset 33 %val0 = load i8, ptr addrspace(5) %ptr0, align 1 34 %ptr1 = getelementptr inbounds i8, ptr addrspace(5) %ptr0, i32 1 35 %val1 = load i8, ptr addrspace(5) %ptr1, align 1 36 %add = add i8 %val0, %val1 37 store i8 %add, ptr addrspace(1) %out 38 ret void 39} 40 41define amdgpu_kernel void @load_unknown_offset_align1_i16(ptr addrspace(1) noalias %out, i32 %offset) #0 { 42; ALIGNED-LABEL: @load_unknown_offset_align1_i16( 43; ALIGNED-NEXT: [[ALLOCA:%.*]] = alloca [128 x i16], align 1, addrspace(5) 44; ALIGNED-NEXT: [[PTR0:%.*]] = getelementptr inbounds [128 x i16], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[OFFSET:%.*]] 45; ALIGNED-NEXT: [[VAL0:%.*]] = load i16, ptr addrspace(5) [[PTR0]], align 1 46; ALIGNED-NEXT: [[PTR1:%.*]] = getelementptr inbounds i16, ptr addrspace(5) [[PTR0]], i32 1 47; ALIGNED-NEXT: [[VAL1:%.*]] = load i16, ptr addrspace(5) [[PTR1]], align 1 48; ALIGNED-NEXT: [[ADD:%.*]] = add i16 [[VAL0]], [[VAL1]] 49; ALIGNED-NEXT: store i16 [[ADD]], ptr addrspace(1) [[OUT:%.*]], align 2 50; ALIGNED-NEXT: ret void 51; 52; UNALIGNED-LABEL: @load_unknown_offset_align1_i16( 53; UNALIGNED-NEXT: [[ALLOCA:%.*]] = alloca [128 x i16], align 1, addrspace(5) 54; UNALIGNED-NEXT: [[PTR0:%.*]] = getelementptr inbounds [128 x i16], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[OFFSET:%.*]] 55; UNALIGNED-NEXT: [[TMP1:%.*]] = load <2 x i16>, ptr addrspace(5) [[PTR0]], align 1 56; UNALIGNED-NEXT: [[VAL01:%.*]] = extractelement <2 x i16> [[TMP1]], i32 0 57; UNALIGNED-NEXT: [[VAL12:%.*]] = extractelement <2 x i16> [[TMP1]], i32 1 58; UNALIGNED-NEXT: [[ADD:%.*]] = add i16 [[VAL01]], [[VAL12]] 59; UNALIGNED-NEXT: store i16 [[ADD]], ptr addrspace(1) [[OUT:%.*]], align 2 60; UNALIGNED-NEXT: ret void 61; 62 %alloca = alloca [128 x i16], align 1, addrspace(5) 63 %ptr0 = getelementptr inbounds [128 x i16], ptr addrspace(5) %alloca, i32 0, i32 %offset 64 %val0 = load i16, ptr addrspace(5) %ptr0, align 1 65 %ptr1 = getelementptr inbounds i16, ptr addrspace(5) %ptr0, i32 1 66 %val1 = load i16, ptr addrspace(5) %ptr1, align 1 67 %add = add i16 %val0, %val1 68 store i16 %add, ptr addrspace(1) %out 69 ret void 70} 71 72; FIXME: Although the offset is unknown here, we know it is a multiple 73; of the element size, so should still be align 4 74define amdgpu_kernel void @load_unknown_offset_align1_i32(ptr addrspace(1) noalias %out, i32 %offset) #0 { 75; ALIGNED-LABEL: @load_unknown_offset_align1_i32( 76; ALIGNED-NEXT: [[ALLOCA:%.*]] = alloca [128 x i32], align 1, addrspace(5) 77; ALIGNED-NEXT: [[PTR0:%.*]] = getelementptr inbounds [128 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[OFFSET:%.*]] 78; ALIGNED-NEXT: [[VAL0:%.*]] = load i32, ptr addrspace(5) [[PTR0]], align 1 79; ALIGNED-NEXT: [[PTR1:%.*]] = getelementptr inbounds i32, ptr addrspace(5) [[PTR0]], i32 1 80; ALIGNED-NEXT: [[VAL1:%.*]] = load i32, ptr addrspace(5) [[PTR1]], align 1 81; ALIGNED-NEXT: [[ADD:%.*]] = add i32 [[VAL0]], [[VAL1]] 82; ALIGNED-NEXT: store i32 [[ADD]], ptr addrspace(1) [[OUT:%.*]], align 4 83; ALIGNED-NEXT: ret void 84; 85; UNALIGNED-LABEL: @load_unknown_offset_align1_i32( 86; UNALIGNED-NEXT: [[ALLOCA:%.*]] = alloca [128 x i32], align 1, addrspace(5) 87; UNALIGNED-NEXT: [[PTR0:%.*]] = getelementptr inbounds [128 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[OFFSET:%.*]] 88; UNALIGNED-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[PTR0]], align 1 89; UNALIGNED-NEXT: [[VAL01:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0 90; UNALIGNED-NEXT: [[VAL12:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 91; UNALIGNED-NEXT: [[ADD:%.*]] = add i32 [[VAL01]], [[VAL12]] 92; UNALIGNED-NEXT: store i32 [[ADD]], ptr addrspace(1) [[OUT:%.*]], align 4 93; UNALIGNED-NEXT: ret void 94; 95 %alloca = alloca [128 x i32], align 1, addrspace(5) 96 %ptr0 = getelementptr inbounds [128 x i32], ptr addrspace(5) %alloca, i32 0, i32 %offset 97 %val0 = load i32, ptr addrspace(5) %ptr0, align 1 98 %ptr1 = getelementptr inbounds i32, ptr addrspace(5) %ptr0, i32 1 99 %val1 = load i32, ptr addrspace(5) %ptr1, align 1 100 %add = add i32 %val0, %val1 101 store i32 %add, ptr addrspace(1) %out 102 ret void 103} 104 105; Make sure alloca alignment isn't decreased 106define amdgpu_kernel void @load_alloca16_unknown_offset_align1_i32(ptr addrspace(1) noalias %out, i32 %offset) #0 { 107; ALIGNED-LABEL: @load_alloca16_unknown_offset_align1_i32( 108; ALIGNED-NEXT: [[ALLOCA:%.*]] = alloca [128 x i32], align 16, addrspace(5) 109; ALIGNED-NEXT: [[PTR0:%.*]] = getelementptr inbounds [128 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[OFFSET:%.*]] 110; ALIGNED-NEXT: [[VAL0:%.*]] = load i32, ptr addrspace(5) [[PTR0]], align 1 111; ALIGNED-NEXT: [[PTR1:%.*]] = getelementptr inbounds i32, ptr addrspace(5) [[PTR0]], i32 1 112; ALIGNED-NEXT: [[VAL1:%.*]] = load i32, ptr addrspace(5) [[PTR1]], align 1 113; ALIGNED-NEXT: [[ADD:%.*]] = add i32 [[VAL0]], [[VAL1]] 114; ALIGNED-NEXT: store i32 [[ADD]], ptr addrspace(1) [[OUT:%.*]], align 4 115; ALIGNED-NEXT: ret void 116; 117; UNALIGNED-LABEL: @load_alloca16_unknown_offset_align1_i32( 118; UNALIGNED-NEXT: [[ALLOCA:%.*]] = alloca [128 x i32], align 16, addrspace(5) 119; UNALIGNED-NEXT: [[PTR0:%.*]] = getelementptr inbounds [128 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[OFFSET:%.*]] 120; UNALIGNED-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr addrspace(5) [[PTR0]], align 4 121; UNALIGNED-NEXT: [[VAL01:%.*]] = extractelement <2 x i32> [[TMP1]], i32 0 122; UNALIGNED-NEXT: [[VAL12:%.*]] = extractelement <2 x i32> [[TMP1]], i32 1 123; UNALIGNED-NEXT: [[ADD:%.*]] = add i32 [[VAL01]], [[VAL12]] 124; UNALIGNED-NEXT: store i32 [[ADD]], ptr addrspace(1) [[OUT:%.*]], align 4 125; UNALIGNED-NEXT: ret void 126; 127 %alloca = alloca [128 x i32], align 16, addrspace(5) 128 %ptr0 = getelementptr inbounds [128 x i32], ptr addrspace(5) %alloca, i32 0, i32 %offset 129 %val0 = load i32, ptr addrspace(5) %ptr0, align 1 130 %ptr1 = getelementptr inbounds i32, ptr addrspace(5) %ptr0, i32 1 131 %val1 = load i32, ptr addrspace(5) %ptr1, align 1 132 %add = add i32 %val0, %val1 133 store i32 %add, ptr addrspace(1) %out 134 ret void 135} 136 137define amdgpu_kernel void @store_unknown_offset_align1_i8(ptr addrspace(1) noalias %out, i32 %offset) #0 { 138; ALIGNED-LABEL: @store_unknown_offset_align1_i8( 139; ALIGNED-NEXT: [[ALLOCA:%.*]] = alloca [128 x i8], align 1, addrspace(5) 140; ALIGNED-NEXT: [[PTR0:%.*]] = getelementptr inbounds [128 x i8], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[OFFSET:%.*]] 141; ALIGNED-NEXT: store i8 9, ptr addrspace(5) [[PTR0]], align 1 142; ALIGNED-NEXT: [[PTR1:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[PTR0]], i32 1 143; ALIGNED-NEXT: store i8 10, ptr addrspace(5) [[PTR1]], align 1 144; ALIGNED-NEXT: ret void 145; 146; UNALIGNED-LABEL: @store_unknown_offset_align1_i8( 147; UNALIGNED-NEXT: [[ALLOCA:%.*]] = alloca [128 x i8], align 1, addrspace(5) 148; UNALIGNED-NEXT: [[PTR0:%.*]] = getelementptr inbounds [128 x i8], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[OFFSET:%.*]] 149; UNALIGNED-NEXT: store <2 x i8> <i8 9, i8 10>, ptr addrspace(5) [[PTR0]], align 1 150; UNALIGNED-NEXT: ret void 151; 152 %alloca = alloca [128 x i8], align 1, addrspace(5) 153 %ptr0 = getelementptr inbounds [128 x i8], ptr addrspace(5) %alloca, i32 0, i32 %offset 154 store i8 9, ptr addrspace(5) %ptr0, align 1 155 %ptr1 = getelementptr inbounds i8, ptr addrspace(5) %ptr0, i32 1 156 store i8 10, ptr addrspace(5) %ptr1, align 1 157 ret void 158} 159 160define amdgpu_kernel void @store_unknown_offset_align1_i16(ptr addrspace(1) noalias %out, i32 %offset) #0 { 161; ALIGNED-LABEL: @store_unknown_offset_align1_i16( 162; ALIGNED-NEXT: [[ALLOCA:%.*]] = alloca [128 x i16], align 1, addrspace(5) 163; ALIGNED-NEXT: [[PTR0:%.*]] = getelementptr inbounds [128 x i16], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[OFFSET:%.*]] 164; ALIGNED-NEXT: store i16 9, ptr addrspace(5) [[PTR0]], align 1 165; ALIGNED-NEXT: [[PTR1:%.*]] = getelementptr inbounds i16, ptr addrspace(5) [[PTR0]], i32 1 166; ALIGNED-NEXT: store i16 10, ptr addrspace(5) [[PTR1]], align 1 167; ALIGNED-NEXT: ret void 168; 169; UNALIGNED-LABEL: @store_unknown_offset_align1_i16( 170; UNALIGNED-NEXT: [[ALLOCA:%.*]] = alloca [128 x i16], align 1, addrspace(5) 171; UNALIGNED-NEXT: [[PTR0:%.*]] = getelementptr inbounds [128 x i16], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[OFFSET:%.*]] 172; UNALIGNED-NEXT: store <2 x i16> <i16 9, i16 10>, ptr addrspace(5) [[PTR0]], align 1 173; UNALIGNED-NEXT: ret void 174; 175 %alloca = alloca [128 x i16], align 1, addrspace(5) 176 %ptr0 = getelementptr inbounds [128 x i16], ptr addrspace(5) %alloca, i32 0, i32 %offset 177 store i16 9, ptr addrspace(5) %ptr0, align 1 178 %ptr1 = getelementptr inbounds i16, ptr addrspace(5) %ptr0, i32 1 179 store i16 10, ptr addrspace(5) %ptr1, align 1 180 ret void 181} 182 183; FIXME: Although the offset is unknown here, we know it is a multiple 184; of the element size, so it still should be align 4. 185 186define amdgpu_kernel void @store_unknown_offset_align1_i32(ptr addrspace(1) noalias %out, i32 %offset) #0 { 187; ALIGNED-LABEL: @store_unknown_offset_align1_i32( 188; ALIGNED-NEXT: [[ALLOCA:%.*]] = alloca [128 x i32], align 1, addrspace(5) 189; ALIGNED-NEXT: [[PTR0:%.*]] = getelementptr inbounds [128 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[OFFSET:%.*]] 190; ALIGNED-NEXT: store i32 9, ptr addrspace(5) [[PTR0]], align 1 191; ALIGNED-NEXT: [[PTR1:%.*]] = getelementptr inbounds i32, ptr addrspace(5) [[PTR0]], i32 1 192; ALIGNED-NEXT: store i32 10, ptr addrspace(5) [[PTR1]], align 1 193; ALIGNED-NEXT: ret void 194; 195; UNALIGNED-LABEL: @store_unknown_offset_align1_i32( 196; UNALIGNED-NEXT: [[ALLOCA:%.*]] = alloca [128 x i32], align 1, addrspace(5) 197; UNALIGNED-NEXT: [[PTR0:%.*]] = getelementptr inbounds [128 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[OFFSET:%.*]] 198; UNALIGNED-NEXT: store <2 x i32> <i32 9, i32 10>, ptr addrspace(5) [[PTR0]], align 1 199; UNALIGNED-NEXT: ret void 200; 201 %alloca = alloca [128 x i32], align 1, addrspace(5) 202 %ptr0 = getelementptr inbounds [128 x i32], ptr addrspace(5) %alloca, i32 0, i32 %offset 203 store i32 9, ptr addrspace(5) %ptr0, align 1 204 %ptr1 = getelementptr inbounds i32, ptr addrspace(5) %ptr0, i32 1 205 store i32 10, ptr addrspace(5) %ptr1, align 1 206 ret void 207} 208 209define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i32() { 210; CHECK-LABEL: @merge_private_store_4_vector_elts_loads_v4i32( 211; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [8 x i32], align 4, addrspace(5) 212; CHECK-NEXT: store <4 x i32> <i32 9, i32 1, i32 23, i32 19>, ptr addrspace(5) [[ALLOCA]], align 4 213; CHECK-NEXT: ret void 214; 215 %alloca = alloca [8 x i32], align 1, addrspace(5) 216 %out.gep.1 = getelementptr i32, ptr addrspace(5) %alloca, i32 1 217 %out.gep.2 = getelementptr i32, ptr addrspace(5) %alloca, i32 2 218 %out.gep.3 = getelementptr i32, ptr addrspace(5) %alloca, i32 3 219 220 store i32 9, ptr addrspace(5) %alloca, align 1 221 store i32 1, ptr addrspace(5) %out.gep.1, align 1 222 store i32 23, ptr addrspace(5) %out.gep.2, align 1 223 store i32 19, ptr addrspace(5) %out.gep.3, align 1 224 ret void 225} 226 227define amdgpu_kernel void @merge_private_store_4_vector_elts_loads_v4i8() { 228; CHECK-LABEL: @merge_private_store_4_vector_elts_loads_v4i8( 229; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [8 x i8], align 4, addrspace(5) 230; CHECK-NEXT: store <4 x i8> <i8 9, i8 1, i8 23, i8 19>, ptr addrspace(5) [[ALLOCA]], align 4 231; CHECK-NEXT: ret void 232; 233 %alloca = alloca [8 x i8], align 1, addrspace(5) 234 %out.gep.1 = getelementptr i8, ptr addrspace(5) %alloca, i8 1 235 %out.gep.2 = getelementptr i8, ptr addrspace(5) %alloca, i8 2 236 %out.gep.3 = getelementptr i8, ptr addrspace(5) %alloca, i8 3 237 238 store i8 9, ptr addrspace(5) %alloca, align 1 239 store i8 1, ptr addrspace(5) %out.gep.1, align 1 240 store i8 23, ptr addrspace(5) %out.gep.2, align 1 241 store i8 19, ptr addrspace(5) %out.gep.3, align 1 242 ret void 243} 244 245define amdgpu_kernel void @merge_private_load_4_vector_elts_loads_v4i32() { 246; CHECK-LABEL: @merge_private_load_4_vector_elts_loads_v4i32( 247; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [8 x i32], align 4, addrspace(5) 248; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr addrspace(5) [[ALLOCA]], align 4 249; CHECK-NEXT: [[LOAD01:%.*]] = extractelement <4 x i32> [[TMP1]], i32 0 250; CHECK-NEXT: [[LOAD12:%.*]] = extractelement <4 x i32> [[TMP1]], i32 1 251; CHECK-NEXT: [[LOAD23:%.*]] = extractelement <4 x i32> [[TMP1]], i32 2 252; CHECK-NEXT: [[LOAD34:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3 253; CHECK-NEXT: ret void 254; 255 %alloca = alloca [8 x i32], align 1, addrspace(5) 256 %out.gep.1 = getelementptr i32, ptr addrspace(5) %alloca, i32 1 257 %out.gep.2 = getelementptr i32, ptr addrspace(5) %alloca, i32 2 258 %out.gep.3 = getelementptr i32, ptr addrspace(5) %alloca, i32 3 259 260 %load0 = load i32, ptr addrspace(5) %alloca, align 1 261 %load1 = load i32, ptr addrspace(5) %out.gep.1, align 1 262 %load2 = load i32, ptr addrspace(5) %out.gep.2, align 1 263 %load3 = load i32, ptr addrspace(5) %out.gep.3, align 1 264 ret void 265} 266 267define amdgpu_kernel void @merge_private_load_4_vector_elts_loads_v4i8() { 268; CHECK-LABEL: @merge_private_load_4_vector_elts_loads_v4i8( 269; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [8 x i8], align 4, addrspace(5) 270; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr addrspace(5) [[ALLOCA]], align 4 271; CHECK-NEXT: [[LOAD01:%.*]] = extractelement <4 x i8> [[TMP1]], i32 0 272; CHECK-NEXT: [[LOAD12:%.*]] = extractelement <4 x i8> [[TMP1]], i32 1 273; CHECK-NEXT: [[LOAD23:%.*]] = extractelement <4 x i8> [[TMP1]], i32 2 274; CHECK-NEXT: [[LOAD34:%.*]] = extractelement <4 x i8> [[TMP1]], i32 3 275; CHECK-NEXT: ret void 276; 277 %alloca = alloca [8 x i8], align 1, addrspace(5) 278 %out.gep.1 = getelementptr i8, ptr addrspace(5) %alloca, i8 1 279 %out.gep.2 = getelementptr i8, ptr addrspace(5) %alloca, i8 2 280 %out.gep.3 = getelementptr i8, ptr addrspace(5) %alloca, i8 3 281 282 %load0 = load i8, ptr addrspace(5) %alloca, align 1 283 %load1 = load i8, ptr addrspace(5) %out.gep.1, align 1 284 %load2 = load i8, ptr addrspace(5) %out.gep.2, align 1 285 %load3 = load i8, ptr addrspace(5) %out.gep.3, align 1 286 ret void 287} 288 289; Make sure we don't think the alignment will increase if the base address isn't an alloca 290define void @private_store_2xi16_align2_not_alloca(ptr addrspace(5) %p, ptr addrspace(5) %r) #0 { 291; ALIGNED-LABEL: @private_store_2xi16_align2_not_alloca( 292; ALIGNED-NEXT: [[GEP_R:%.*]] = getelementptr i16, ptr addrspace(5) [[R:%.*]], i32 1 293; ALIGNED-NEXT: store i16 1, ptr addrspace(5) [[R]], align 2 294; ALIGNED-NEXT: store i16 2, ptr addrspace(5) [[GEP_R]], align 2 295; ALIGNED-NEXT: ret void 296; 297; UNALIGNED-LABEL: @private_store_2xi16_align2_not_alloca( 298; UNALIGNED-NEXT: store <2 x i16> <i16 1, i16 2>, ptr addrspace(5) [[R:%.*]], align 2 299; UNALIGNED-NEXT: ret void 300; 301 %gep.r = getelementptr i16, ptr addrspace(5) %r, i32 1 302 store i16 1, ptr addrspace(5) %r, align 2 303 store i16 2, ptr addrspace(5) %gep.r, align 2 304 ret void 305} 306 307define void @private_store_2xi16_align1_not_alloca(ptr addrspace(5) %p, ptr addrspace(5) %r) #0 { 308; ALIGNED-LABEL: @private_store_2xi16_align1_not_alloca( 309; ALIGNED-NEXT: [[GEP_R:%.*]] = getelementptr i16, ptr addrspace(5) [[R:%.*]], i32 1 310; ALIGNED-NEXT: store i16 1, ptr addrspace(5) [[R]], align 1 311; ALIGNED-NEXT: store i16 2, ptr addrspace(5) [[GEP_R]], align 1 312; ALIGNED-NEXT: ret void 313; 314; UNALIGNED-LABEL: @private_store_2xi16_align1_not_alloca( 315; UNALIGNED-NEXT: store <2 x i16> <i16 1, i16 2>, ptr addrspace(5) [[R:%.*]], align 1 316; UNALIGNED-NEXT: ret void 317; 318 %gep.r = getelementptr i16, ptr addrspace(5) %r, i32 1 319 store i16 1, ptr addrspace(5) %r, align 1 320 store i16 2, ptr addrspace(5) %gep.r, align 1 321 ret void 322} 323 324define i32 @private_load_2xi16_align2_not_alloca(ptr addrspace(5) %p) #0 { 325; ALIGNED-LABEL: @private_load_2xi16_align2_not_alloca( 326; ALIGNED-NEXT: [[GEP_P:%.*]] = getelementptr i16, ptr addrspace(5) [[P:%.*]], i64 1 327; ALIGNED-NEXT: [[P_0:%.*]] = load i16, ptr addrspace(5) [[P]], align 2 328; ALIGNED-NEXT: [[P_1:%.*]] = load i16, ptr addrspace(5) [[GEP_P]], align 2 329; ALIGNED-NEXT: [[ZEXT_0:%.*]] = zext i16 [[P_0]] to i32 330; ALIGNED-NEXT: [[ZEXT_1:%.*]] = zext i16 [[P_1]] to i32 331; ALIGNED-NEXT: [[SHL_1:%.*]] = shl i32 [[ZEXT_1]], 16 332; ALIGNED-NEXT: [[OR:%.*]] = or i32 [[ZEXT_0]], [[SHL_1]] 333; ALIGNED-NEXT: ret i32 [[OR]] 334; 335; UNALIGNED-LABEL: @private_load_2xi16_align2_not_alloca( 336; UNALIGNED-NEXT: [[TMP1:%.*]] = load <2 x i16>, ptr addrspace(5) [[P:%.*]], align 2 337; UNALIGNED-NEXT: [[P_01:%.*]] = extractelement <2 x i16> [[TMP1]], i32 0 338; UNALIGNED-NEXT: [[P_12:%.*]] = extractelement <2 x i16> [[TMP1]], i32 1 339; UNALIGNED-NEXT: [[ZEXT_0:%.*]] = zext i16 [[P_01]] to i32 340; UNALIGNED-NEXT: [[ZEXT_1:%.*]] = zext i16 [[P_12]] to i32 341; UNALIGNED-NEXT: [[SHL_1:%.*]] = shl i32 [[ZEXT_1]], 16 342; UNALIGNED-NEXT: [[OR:%.*]] = or i32 [[ZEXT_0]], [[SHL_1]] 343; UNALIGNED-NEXT: ret i32 [[OR]] 344; 345 %gep.p = getelementptr i16, ptr addrspace(5) %p, i64 1 346 %p.0 = load i16, ptr addrspace(5) %p, align 2 347 %p.1 = load i16, ptr addrspace(5) %gep.p, align 2 348 %zext.0 = zext i16 %p.0 to i32 349 %zext.1 = zext i16 %p.1 to i32 350 %shl.1 = shl i32 %zext.1, 16 351 %or = or i32 %zext.0, %shl.1 352 ret i32 %or 353} 354 355define i32 @private_load_2xi16_align1_not_alloca(ptr addrspace(5) %p) #0 { 356; ALIGNED-LABEL: @private_load_2xi16_align1_not_alloca( 357; ALIGNED-NEXT: [[GEP_P:%.*]] = getelementptr i16, ptr addrspace(5) [[P:%.*]], i64 1 358; ALIGNED-NEXT: [[P_0:%.*]] = load i16, ptr addrspace(5) [[P]], align 1 359; ALIGNED-NEXT: [[P_1:%.*]] = load i16, ptr addrspace(5) [[GEP_P]], align 1 360; ALIGNED-NEXT: [[ZEXT_0:%.*]] = zext i16 [[P_0]] to i32 361; ALIGNED-NEXT: [[ZEXT_1:%.*]] = zext i16 [[P_1]] to i32 362; ALIGNED-NEXT: [[SHL_1:%.*]] = shl i32 [[ZEXT_1]], 16 363; ALIGNED-NEXT: [[OR:%.*]] = or i32 [[ZEXT_0]], [[SHL_1]] 364; ALIGNED-NEXT: ret i32 [[OR]] 365; 366; UNALIGNED-LABEL: @private_load_2xi16_align1_not_alloca( 367; UNALIGNED-NEXT: [[TMP1:%.*]] = load <2 x i16>, ptr addrspace(5) [[P:%.*]], align 1 368; UNALIGNED-NEXT: [[P_01:%.*]] = extractelement <2 x i16> [[TMP1]], i32 0 369; UNALIGNED-NEXT: [[P_12:%.*]] = extractelement <2 x i16> [[TMP1]], i32 1 370; UNALIGNED-NEXT: [[ZEXT_0:%.*]] = zext i16 [[P_01]] to i32 371; UNALIGNED-NEXT: [[ZEXT_1:%.*]] = zext i16 [[P_12]] to i32 372; UNALIGNED-NEXT: [[SHL_1:%.*]] = shl i32 [[ZEXT_1]], 16 373; UNALIGNED-NEXT: [[OR:%.*]] = or i32 [[ZEXT_0]], [[SHL_1]] 374; UNALIGNED-NEXT: ret i32 [[OR]] 375; 376 %gep.p = getelementptr i16, ptr addrspace(5) %p, i64 1 377 %p.0 = load i16, ptr addrspace(5) %p, align 1 378 %p.1 = load i16, ptr addrspace(5) %gep.p, align 1 379 %zext.0 = zext i16 %p.0 to i32 380 %zext.1 = zext i16 %p.1 to i32 381 %shl.1 = shl i32 %zext.1, 16 382 %or = or i32 %zext.0, %shl.1 383 ret i32 %or 384} 385 386define void @load_alloca16_unknown_offset_align1_i8(ptr addrspace(1) noalias %out, i32 %offset) #0 { 387; ALIGNED-LABEL: @load_alloca16_unknown_offset_align1_i8( 388; ALIGNED-NEXT: [[ALLOCA:%.*]] = alloca [128 x i8], align 16, addrspace(5) 389; ALIGNED-NEXT: [[PTR0:%.*]] = getelementptr inbounds [128 x i8], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[OFFSET:%.*]] 390; ALIGNED-NEXT: [[VAL0:%.*]] = load i8, ptr addrspace(5) [[PTR0]], align 1 391; ALIGNED-NEXT: [[PTR1:%.*]] = getelementptr inbounds i8, ptr addrspace(5) [[PTR0]], i32 1 392; ALIGNED-NEXT: [[VAL1:%.*]] = load i8, ptr addrspace(5) [[PTR1]], align 1 393; ALIGNED-NEXT: [[ADD:%.*]] = add i8 [[VAL0]], [[VAL1]] 394; ALIGNED-NEXT: store i8 [[ADD]], ptr addrspace(1) [[OUT:%.*]], align 1 395; ALIGNED-NEXT: ret void 396; 397; UNALIGNED-LABEL: @load_alloca16_unknown_offset_align1_i8( 398; UNALIGNED-NEXT: [[ALLOCA:%.*]] = alloca [128 x i8], align 16, addrspace(5) 399; UNALIGNED-NEXT: [[PTR0:%.*]] = getelementptr inbounds [128 x i8], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[OFFSET:%.*]] 400; UNALIGNED-NEXT: [[TMP1:%.*]] = load <2 x i8>, ptr addrspace(5) [[PTR0]], align 1 401; UNALIGNED-NEXT: [[VAL01:%.*]] = extractelement <2 x i8> [[TMP1]], i32 0 402; UNALIGNED-NEXT: [[VAL12:%.*]] = extractelement <2 x i8> [[TMP1]], i32 1 403; UNALIGNED-NEXT: [[ADD:%.*]] = add i8 [[VAL01]], [[VAL12]] 404; UNALIGNED-NEXT: store i8 [[ADD]], ptr addrspace(1) [[OUT:%.*]], align 1 405; UNALIGNED-NEXT: ret void 406; 407 %alloca = alloca [128 x i8], align 16, addrspace(5) 408 %ptr0 = getelementptr inbounds [128 x i8], ptr addrspace(5) %alloca, i32 0, i32 %offset 409 %val0 = load i8, ptr addrspace(5) %ptr0, align 1 410 %ptr1 = getelementptr inbounds i8, ptr addrspace(5) %ptr0, i32 1 411 %val1 = load i8, ptr addrspace(5) %ptr1, align 1 412 %add = add i8 %val0, %val1 413 store i8 %add, ptr addrspace(1) %out 414 ret void 415} 416 417define void @store_alloca16_unknown_offset_align1_i32(ptr addrspace(1) noalias %out, i32 %offset) #0 { 418; ALIGNED-LABEL: @store_alloca16_unknown_offset_align1_i32( 419; ALIGNED-NEXT: [[ALLOCA:%.*]] = alloca [128 x i32], align 16, addrspace(5) 420; ALIGNED-NEXT: [[PTR0:%.*]] = getelementptr inbounds [128 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[OFFSET:%.*]] 421; ALIGNED-NEXT: store i32 9, ptr addrspace(5) [[PTR0]], align 1 422; ALIGNED-NEXT: [[PTR1:%.*]] = getelementptr inbounds i32, ptr addrspace(5) [[PTR0]], i32 1 423; ALIGNED-NEXT: store i32 10, ptr addrspace(5) [[PTR1]], align 1 424; ALIGNED-NEXT: ret void 425; 426; UNALIGNED-LABEL: @store_alloca16_unknown_offset_align1_i32( 427; UNALIGNED-NEXT: [[ALLOCA:%.*]] = alloca [128 x i32], align 16, addrspace(5) 428; UNALIGNED-NEXT: [[PTR0:%.*]] = getelementptr inbounds [128 x i32], ptr addrspace(5) [[ALLOCA]], i32 0, i32 [[OFFSET:%.*]] 429; UNALIGNED-NEXT: store <2 x i32> <i32 9, i32 10>, ptr addrspace(5) [[PTR0]], align 4 430; UNALIGNED-NEXT: ret void 431; 432 %alloca = alloca [128 x i32], align 16, addrspace(5) 433 %ptr0 = getelementptr inbounds [128 x i32], ptr addrspace(5) %alloca, i32 0, i32 %offset 434 store i32 9, ptr addrspace(5) %ptr0, align 1 435 %ptr1 = getelementptr inbounds i32, ptr addrspace(5) %ptr0, i32 1 436 store i32 10, ptr addrspace(5) %ptr1, align 1 437 ret void 438} 439 440attributes #0 = { nounwind } 441