1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals 2; RUN: opt -mtriple=amdgcn-amd-amdhsa -S -o - -passes=amdgpu-lower-kernel-arguments %s | FileCheck -check-prefixes=GCN,HSA %s 3; RUN: opt -mtriple=amdgcn-- -S -o - -passes=amdgpu-lower-kernel-arguments %s | FileCheck -check-prefixes=GCN,MESA %s 4 5target datalayout = "A5" 6 7declare void @llvm.fake.use(...) 8 9define amdgpu_kernel void @kern_noargs() { 10; GCN-LABEL: @kern_noargs( 11; GCN-NEXT: ret void 12; 13 ret void 14} 15 16define amdgpu_kernel void @kern_i8(i8 %arg) #0 { 17; HSA-LABEL: @kern_i8( 18; HSA-NEXT: [[KERN_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 19; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I8_KERNARG_SEGMENT]], i64 0 20; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1:![0-9]+]] 21; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 22; HSA-NEXT: store i8 [[TMP2]], ptr addrspace(1) undef, align 1 23; HSA-NEXT: ret void 24; 25; MESA-LABEL: @kern_i8( 26; MESA-NEXT: [[KERN_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 27; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I8_KERNARG_SEGMENT]], i64 36 28; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1:![0-9]+]] 29; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 30; MESA-NEXT: store i8 [[TMP2]], ptr addrspace(1) undef, align 1 31; MESA-NEXT: ret void 32; 33 store i8 %arg, ptr addrspace(1) undef, align 1 34 ret void 35} 36 37define amdgpu_kernel void @kern_i16(i16 %arg) #0 { 38; HSA-LABEL: @kern_i16( 39; HSA-NEXT: [[KERN_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 40; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I16_KERNARG_SEGMENT]], i64 0 41; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] 42; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 43; HSA-NEXT: store i16 [[TMP2]], ptr addrspace(1) undef, align 1 44; HSA-NEXT: ret void 45; 46; MESA-LABEL: @kern_i16( 47; MESA-NEXT: [[KERN_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 48; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I16_KERNARG_SEGMENT]], i64 36 49; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] 50; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 51; MESA-NEXT: store i16 [[TMP2]], ptr addrspace(1) undef, align 1 52; MESA-NEXT: ret void 53; 54 store i16 %arg, ptr addrspace(1) undef, align 1 55 ret void 56} 57 58define amdgpu_kernel void @kern_f16(half %arg) #0 { 59; HSA-LABEL: @kern_f16( 60; HSA-NEXT: [[KERN_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 61; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_F16_KERNARG_SEGMENT]], i64 0 62; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] 63; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 64; HSA-NEXT: [[ARG_LOAD:%.*]] = bitcast i16 [[TMP2]] to half 65; HSA-NEXT: store half [[ARG_LOAD]], ptr addrspace(1) undef, align 1 66; HSA-NEXT: ret void 67; 68; MESA-LABEL: @kern_f16( 69; MESA-NEXT: [[KERN_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 70; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_F16_KERNARG_SEGMENT]], i64 36 71; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] 72; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 73; MESA-NEXT: [[ARG_LOAD:%.*]] = bitcast i16 [[TMP2]] to half 74; MESA-NEXT: store half [[ARG_LOAD]], ptr addrspace(1) undef, align 1 75; MESA-NEXT: ret void 76; 77 store half %arg, ptr addrspace(1) undef, align 1 78 ret void 79} 80 81define amdgpu_kernel void @kern_zeroext_i8(i8 zeroext %arg) #0 { 82; HSA-LABEL: @kern_zeroext_i8( 83; HSA-NEXT: [[KERN_ZEROEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 84; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_ZEROEXT_I8_KERNARG_SEGMENT]], i64 0 85; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] 86; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 87; HSA-NEXT: store i8 [[TMP2]], ptr addrspace(1) undef, align 1 88; HSA-NEXT: ret void 89; 90; MESA-LABEL: @kern_zeroext_i8( 91; MESA-NEXT: [[KERN_ZEROEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 92; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_ZEROEXT_I8_KERNARG_SEGMENT]], i64 36 93; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] 94; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 95; MESA-NEXT: store i8 [[TMP2]], ptr addrspace(1) undef, align 1 96; MESA-NEXT: ret void 97; 98 store i8 %arg, ptr addrspace(1) undef, align 1 99 ret void 100} 101 102define amdgpu_kernel void @kern_zeroext_i16(i16 zeroext %arg) #0 { 103; HSA-LABEL: @kern_zeroext_i16( 104; HSA-NEXT: [[KERN_ZEROEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 105; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_ZEROEXT_I16_KERNARG_SEGMENT]], i64 0 106; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] 107; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 108; HSA-NEXT: store i16 [[TMP2]], ptr addrspace(1) undef, align 1 109; HSA-NEXT: ret void 110; 111; MESA-LABEL: @kern_zeroext_i16( 112; MESA-NEXT: [[KERN_ZEROEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 113; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_ZEROEXT_I16_KERNARG_SEGMENT]], i64 36 114; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] 115; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 116; MESA-NEXT: store i16 [[TMP2]], ptr addrspace(1) undef, align 1 117; MESA-NEXT: ret void 118; 119 store i16 %arg, ptr addrspace(1) undef, align 1 120 ret void 121} 122 123define amdgpu_kernel void @kern_signext_i8(i8 signext %arg) #0 { 124; HSA-LABEL: @kern_signext_i8( 125; HSA-NEXT: [[KERN_SIGNEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 126; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_SIGNEXT_I8_KERNARG_SEGMENT]], i64 0 127; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] 128; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 129; HSA-NEXT: store i8 [[TMP2]], ptr addrspace(1) undef, align 1 130; HSA-NEXT: ret void 131; 132; MESA-LABEL: @kern_signext_i8( 133; MESA-NEXT: [[KERN_SIGNEXT_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 134; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_SIGNEXT_I8_KERNARG_SEGMENT]], i64 36 135; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] 136; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 137; MESA-NEXT: store i8 [[TMP2]], ptr addrspace(1) undef, align 1 138; MESA-NEXT: ret void 139; 140 store i8 %arg, ptr addrspace(1) undef, align 1 141 ret void 142} 143 144define amdgpu_kernel void @kern_signext_i16(i16 signext %arg) #0 { 145; HSA-LABEL: @kern_signext_i16( 146; HSA-NEXT: [[KERN_SIGNEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 147; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_SIGNEXT_I16_KERNARG_SEGMENT]], i64 0 148; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] 149; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 150; HSA-NEXT: store i16 [[TMP2]], ptr addrspace(1) undef, align 1 151; HSA-NEXT: ret void 152; 153; MESA-LABEL: @kern_signext_i16( 154; MESA-NEXT: [[KERN_SIGNEXT_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 155; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_SIGNEXT_I16_KERNARG_SEGMENT]], i64 36 156; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] 157; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 158; MESA-NEXT: store i16 [[TMP2]], ptr addrspace(1) undef, align 1 159; MESA-NEXT: ret void 160; 161 store i16 %arg, ptr addrspace(1) undef, align 1 162 ret void 163} 164 165define amdgpu_kernel void @kern_i8_i8(i8 %arg0, i8 %arg1) { 166; HSA-LABEL: @kern_i8_i8( 167; HSA-NEXT: [[KERN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 168; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I8_I8_KERNARG_SEGMENT]], i64 0 169; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] 170; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 171; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I8_I8_KERNARG_SEGMENT]], i64 0 172; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] 173; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 174; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 175; HSA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) undef, align 1 176; HSA-NEXT: store volatile i8 [[TMP5]], ptr addrspace(1) undef, align 1 177; HSA-NEXT: ret void 178; 179; MESA-LABEL: @kern_i8_i8( 180; MESA-NEXT: [[KERN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 181; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I8_I8_KERNARG_SEGMENT]], i64 36 182; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] 183; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 184; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I8_I8_KERNARG_SEGMENT]], i64 36 185; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] 186; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 187; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 188; MESA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) undef, align 1 189; MESA-NEXT: store volatile i8 [[TMP5]], ptr addrspace(1) undef, align 1 190; MESA-NEXT: ret void 191; 192 store volatile i8 %arg0, ptr addrspace(1) undef, align 1 193 store volatile i8 %arg1, ptr addrspace(1) undef, align 1 194 ret void 195} 196 197define amdgpu_kernel void @kern_v3i8(<3 x i8> %arg) { 198; HSA-LABEL: @kern_v3i8( 199; HSA-NEXT: [[KERN_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 200; HSA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_V3I8_KERNARG_SEGMENT]], i64 0 201; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] 202; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i24 203; HSA-NEXT: [[ARG_LOAD:%.*]] = bitcast i24 [[TMP2]] to <3 x i8> 204; HSA-NEXT: store <3 x i8> [[ARG_LOAD]], ptr addrspace(1) undef, align 4 205; HSA-NEXT: ret void 206; 207; MESA-LABEL: @kern_v3i8( 208; MESA-NEXT: [[KERN_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 209; MESA-NEXT: [[ARG_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_V3I8_KERNARG_SEGMENT]], i64 36 210; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] 211; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i24 212; MESA-NEXT: [[ARG_LOAD:%.*]] = bitcast i24 [[TMP2]] to <3 x i8> 213; MESA-NEXT: store <3 x i8> [[ARG_LOAD]], ptr addrspace(1) undef, align 4 214; MESA-NEXT: ret void 215; 216 store <3 x i8> %arg, ptr addrspace(1) undef, align 4 217 ret void 218} 219 220define amdgpu_kernel void @kern_i24(i24 %arg0) { 221; HSA-LABEL: @kern_i24( 222; HSA-NEXT: [[KERN_I24_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 223; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I24_KERNARG_SEGMENT]], i64 0 224; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] 225; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i24 226; HSA-NEXT: store i24 [[TMP2]], ptr addrspace(1) undef, align 4 227; HSA-NEXT: ret void 228; 229; MESA-LABEL: @kern_i24( 230; MESA-NEXT: [[KERN_I24_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 231; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I24_KERNARG_SEGMENT]], i64 36 232; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] 233; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i24 234; MESA-NEXT: store i24 [[TMP2]], ptr addrspace(1) undef, align 4 235; MESA-NEXT: ret void 236; 237 store i24 %arg0, ptr addrspace(1) undef 238 ret void 239} 240 241define amdgpu_kernel void @kern_i32(i32 %arg0) { 242; HSA-LABEL: @kern_i32( 243; HSA-NEXT: [[KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 244; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I32_KERNARG_SEGMENT]], i64 0 245; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] 246; HSA-NEXT: store i32 [[ARG0_LOAD]], ptr addrspace(1) undef, align 4 247; HSA-NEXT: ret void 248; 249; MESA-LABEL: @kern_i32( 250; MESA-NEXT: [[KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 251; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I32_KERNARG_SEGMENT]], i64 36 252; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] 253; MESA-NEXT: store i32 [[ARG0_LOAD]], ptr addrspace(1) undef, align 4 254; MESA-NEXT: ret void 255; 256 store i32 %arg0, ptr addrspace(1) undef 257 ret void 258} 259 260define amdgpu_kernel void @kern_range_noundef_i32(i32 noundef range(i32 0, 8) %arg0) { 261; HSA-LABEL: @kern_range_noundef_i32( 262; HSA-NEXT: [[KERN_RANGE_NOUNDEF_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 263; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_RANGE_NOUNDEF_I32_KERNARG_SEGMENT]], i64 0 264; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !range [[RNG2:![0-9]+]], !invariant.load [[META1]], !noundef [[META1]] 265; HSA-NEXT: call void (...) @llvm.fake.use(i32 [[ARG0_LOAD]]) 266; HSA-NEXT: ret void 267; 268; MESA-LABEL: @kern_range_noundef_i32( 269; MESA-NEXT: [[KERN_RANGE_NOUNDEF_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 270; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_RANGE_NOUNDEF_I32_KERNARG_SEGMENT]], i64 36 271; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !range [[RNG2:![0-9]+]], !invariant.load [[META1]], !noundef [[META1]] 272; MESA-NEXT: call void (...) @llvm.fake.use(i32 [[ARG0_LOAD]]) 273; MESA-NEXT: ret void 274; 275 call void (...) @llvm.fake.use(i32 %arg0) 276 ret void 277} 278 279define amdgpu_kernel void @kern_f32(float %arg0) { 280; HSA-LABEL: @kern_f32( 281; HSA-NEXT: [[KERN_F32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 282; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_F32_KERNARG_SEGMENT]], i64 0 283; HSA-NEXT: [[ARG0_LOAD:%.*]] = load float, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] 284; HSA-NEXT: store float [[ARG0_LOAD]], ptr addrspace(1) undef, align 4 285; HSA-NEXT: ret void 286; 287; MESA-LABEL: @kern_f32( 288; MESA-NEXT: [[KERN_F32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 289; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_F32_KERNARG_SEGMENT]], i64 36 290; MESA-NEXT: [[ARG0_LOAD:%.*]] = load float, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] 291; MESA-NEXT: store float [[ARG0_LOAD]], ptr addrspace(1) undef, align 4 292; MESA-NEXT: ret void 293; 294 store float %arg0, ptr addrspace(1) undef 295 ret void 296} 297 298define amdgpu_kernel void @kern_v3i32(<3 x i32> %arg0) { 299; HSA-LABEL: @kern_v3i32( 300; HSA-NEXT: [[KERN_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 301; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_V3I32_KERNARG_SEGMENT]], i64 0 302; HSA-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] 303; HSA-NEXT: [[ARG0_LOAD:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2> 304; HSA-NEXT: store <3 x i32> [[ARG0_LOAD]], ptr addrspace(1) undef, align 4 305; HSA-NEXT: ret void 306; 307; MESA-LABEL: @kern_v3i32( 308; MESA-NEXT: [[KERN_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 309; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_V3I32_KERNARG_SEGMENT]], i64 36 310; MESA-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] 311; MESA-NEXT: [[ARG0_LOAD:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2> 312; MESA-NEXT: store <3 x i32> [[ARG0_LOAD]], ptr addrspace(1) undef, align 4 313; MESA-NEXT: ret void 314; 315 store <3 x i32> %arg0, ptr addrspace(1) undef, align 4 316 ret void 317} 318 319define amdgpu_kernel void @kern_v8i32(<8 x i32> %arg) #0 { 320; HSA-LABEL: @kern_v8i32( 321; HSA-NEXT: [[KERN_V8I32_KERNARG_SEGMENT:%.*]] = call nonnull align 32 dereferenceable(288) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 322; HSA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_V8I32_KERNARG_SEGMENT]], i64 0 323; HSA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i32>, ptr addrspace(4) [[ARG_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] 324; HSA-NEXT: store <8 x i32> [[ARG_LOAD]], ptr addrspace(1) undef, align 32 325; HSA-NEXT: ret void 326; 327; MESA-LABEL: @kern_v8i32( 328; MESA-NEXT: [[KERN_V8I32_KERNARG_SEGMENT:%.*]] = call nonnull align 32 dereferenceable(288) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 329; MESA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_V8I32_KERNARG_SEGMENT]], i64 36 330; MESA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i32>, ptr addrspace(4) [[ARG_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] 331; MESA-NEXT: store <8 x i32> [[ARG_LOAD]], ptr addrspace(1) undef, align 32 332; MESA-NEXT: ret void 333; 334 store <8 x i32> %arg, ptr addrspace(1) undef 335 ret void 336} 337 338define amdgpu_kernel void @kern_v8i64(<8 x i64> %arg) #0 { 339; HSA-LABEL: @kern_v8i64( 340; HSA-NEXT: [[KERN_V8I64_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(320) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 341; HSA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_V8I64_KERNARG_SEGMENT]], i64 0 342; HSA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i64>, ptr addrspace(4) [[ARG_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] 343; HSA-NEXT: store <8 x i64> [[ARG_LOAD]], ptr addrspace(1) undef, align 64 344; HSA-NEXT: ret void 345; 346; MESA-LABEL: @kern_v8i64( 347; MESA-NEXT: [[KERN_V8I64_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(320) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 348; MESA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_V8I64_KERNARG_SEGMENT]], i64 36 349; MESA-NEXT: [[ARG_LOAD:%.*]] = load <8 x i64>, ptr addrspace(4) [[ARG_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] 350; MESA-NEXT: store <8 x i64> [[ARG_LOAD]], ptr addrspace(1) undef, align 64 351; MESA-NEXT: ret void 352; 353 store <8 x i64> %arg, ptr addrspace(1) undef 354 ret void 355} 356 357define amdgpu_kernel void @kern_v16i64(<16 x i64> %arg) #0 { 358; HSA-LABEL: @kern_v16i64( 359; HSA-NEXT: [[KERN_V16I64_KERNARG_SEGMENT:%.*]] = call nonnull align 128 dereferenceable(384) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 360; HSA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_V16I64_KERNARG_SEGMENT]], i64 0 361; HSA-NEXT: [[ARG_LOAD:%.*]] = load <16 x i64>, ptr addrspace(4) [[ARG_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] 362; HSA-NEXT: store <16 x i64> [[ARG_LOAD]], ptr addrspace(1) undef, align 128 363; HSA-NEXT: ret void 364; 365; MESA-LABEL: @kern_v16i64( 366; MESA-NEXT: [[KERN_V16I64_KERNARG_SEGMENT:%.*]] = call nonnull align 128 dereferenceable(384) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 367; MESA-NEXT: [[ARG_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_V16I64_KERNARG_SEGMENT]], i64 36 368; MESA-NEXT: [[ARG_LOAD:%.*]] = load <16 x i64>, ptr addrspace(4) [[ARG_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] 369; MESA-NEXT: store <16 x i64> [[ARG_LOAD]], ptr addrspace(1) undef, align 128 370; MESA-NEXT: ret void 371; 372 store <16 x i64> %arg, ptr addrspace(1) undef 373 ret void 374} 375 376define amdgpu_kernel void @kern_i32_v3i32(i32 %arg0, <3 x i32> %arg1) { 377; HSA-LABEL: @kern_i32_v3i32( 378; HSA-NEXT: [[KERN_I32_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(288) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 379; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I32_V3I32_KERNARG_SEGMENT]], i64 0 380; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] 381; HSA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I32_V3I32_KERNARG_SEGMENT]], i64 16 382; HSA-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr addrspace(4) [[ARG1_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] 383; HSA-NEXT: [[ARG1_LOAD:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2> 384; HSA-NEXT: store i32 [[ARG0_LOAD]], ptr addrspace(1) undef, align 4 385; HSA-NEXT: store <3 x i32> [[ARG1_LOAD]], ptr addrspace(1) undef, align 4 386; HSA-NEXT: ret void 387; 388; MESA-LABEL: @kern_i32_v3i32( 389; MESA-NEXT: [[KERN_I32_V3I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(288) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 390; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I32_V3I32_KERNARG_SEGMENT]], i64 36 391; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] 392; MESA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_I32_V3I32_KERNARG_SEGMENT]], i64 52 393; MESA-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr addrspace(4) [[ARG1_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] 394; MESA-NEXT: [[ARG1_LOAD:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <3 x i32> <i32 0, i32 1, i32 2> 395; MESA-NEXT: store i32 [[ARG0_LOAD]], ptr addrspace(1) undef, align 4 396; MESA-NEXT: store <3 x i32> [[ARG1_LOAD]], ptr addrspace(1) undef, align 4 397; MESA-NEXT: ret void 398; 399 store i32 %arg0, ptr addrspace(1) undef 400 store <3 x i32> %arg1, ptr addrspace(1) undef, align 4 401 ret void 402} 403 404%struct.a = type { i32, i8, [4 x i8] } 405%struct.b.packed = type { i8, i32, [3 x i16], <2 x double> } 406 407define amdgpu_kernel void @kern_struct_a(%struct.a %arg0) { 408; HSA-LABEL: @kern_struct_a( 409; HSA-NEXT: [[KERN_STRUCT_A_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 410; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_STRUCT_A_KERNARG_SEGMENT]], i64 0 411; HSA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_A:%.*]], ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] 412; HSA-NEXT: store [[STRUCT_A]] [[ARG0_LOAD]], ptr addrspace(1) undef, align 4 413; HSA-NEXT: ret void 414; 415; MESA-LABEL: @kern_struct_a( 416; MESA-NEXT: [[KERN_STRUCT_A_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(268) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 417; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_STRUCT_A_KERNARG_SEGMENT]], i64 36 418; MESA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_A:%.*]], ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] 419; MESA-NEXT: store [[STRUCT_A]] [[ARG0_LOAD]], ptr addrspace(1) undef, align 4 420; MESA-NEXT: ret void 421; 422 store %struct.a %arg0, ptr addrspace(1) undef 423 ret void 424} 425 426define amdgpu_kernel void @kern_struct_b_packed(%struct.b.packed %arg0) #0 { 427; HSA-LABEL: @kern_struct_b_packed( 428; HSA-NEXT: [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(288) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 429; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT]], i64 0 430; HSA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_B_PACKED:%.*]], ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] 431; HSA-NEXT: store [[STRUCT_B_PACKED]] [[ARG0_LOAD]], ptr addrspace(1) undef, align 16 432; HSA-NEXT: ret void 433; 434; MESA-LABEL: @kern_struct_b_packed( 435; MESA-NEXT: [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(288) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 436; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_STRUCT_B_PACKED_KERNARG_SEGMENT]], i64 36 437; MESA-NEXT: [[ARG0_LOAD:%.*]] = load [[STRUCT_B_PACKED:%.*]], ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] 438; MESA-NEXT: store [[STRUCT_B_PACKED]] [[ARG0_LOAD]], ptr addrspace(1) undef, align 16 439; MESA-NEXT: ret void 440; 441 store %struct.b.packed %arg0, ptr addrspace(1) undef 442 ret void 443} 444 445define amdgpu_kernel void @kern_implicit_arg_num_bytes(i32 %arg0) #1 { 446; HSA-LABEL: @kern_implicit_arg_num_bytes( 447; HSA-NEXT: [[KERN_IMPLICIT_ARG_NUM_BYTES_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(48) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 448; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_IMPLICIT_ARG_NUM_BYTES_KERNARG_SEGMENT]], i64 0 449; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] 450; HSA-NEXT: store i32 [[ARG0_LOAD]], ptr addrspace(1) undef, align 4 451; HSA-NEXT: ret void 452; 453; MESA-LABEL: @kern_implicit_arg_num_bytes( 454; MESA-NEXT: [[KERN_IMPLICIT_ARG_NUM_BYTES_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(44) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 455; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_IMPLICIT_ARG_NUM_BYTES_KERNARG_SEGMENT]], i64 36 456; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] 457; MESA-NEXT: store i32 [[ARG0_LOAD]], ptr addrspace(1) undef, align 4 458; MESA-NEXT: ret void 459; 460 store i32 %arg0, ptr addrspace(1) undef 461 ret void 462} 463 464define amdgpu_kernel void @kernel_implicitarg_no_struct_align(<16 x i32>, i32 %arg1) #1 { 465; HSA-LABEL: @kernel_implicitarg_no_struct_align( 466; HSA-NEXT: [[KERNEL_IMPLICITARG_NO_STRUCT_ALIGN_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(112) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 467; HSA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERNEL_IMPLICITARG_NO_STRUCT_ALIGN_KERNARG_SEGMENT]], i64 64 468; HSA-NEXT: [[ARG1_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] 469; HSA-NEXT: store i32 [[ARG1_LOAD]], ptr addrspace(1) undef, align 4 470; HSA-NEXT: ret void 471; 472; MESA-LABEL: @kernel_implicitarg_no_struct_align( 473; MESA-NEXT: [[KERNEL_IMPLICITARG_NO_STRUCT_ALIGN_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(108) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 474; MESA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERNEL_IMPLICITARG_NO_STRUCT_ALIGN_KERNARG_SEGMENT]], i64 100 475; MESA-NEXT: [[ARG1_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] 476; MESA-NEXT: store i32 [[ARG1_LOAD]], ptr addrspace(1) undef, align 4 477; MESA-NEXT: ret void 478; 479 store i32 %arg1, ptr addrspace(1) undef 480 ret void 481} 482 483define amdgpu_kernel void @kern_lds_ptr(ptr addrspace(3) %lds) #0 { 484; HSA-LABEL: @kern_lds_ptr( 485; HSA-NEXT: [[KERN_LDS_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 486; HSA-NEXT: [[LDS_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_LDS_PTR_KERNARG_SEGMENT]], i64 0 487; HSA-NEXT: [[LDS_LOAD:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[LDS_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] 488; HSA-NEXT: store i32 0, ptr addrspace(3) [[LDS_LOAD]], align 4 489; HSA-NEXT: ret void 490; 491; MESA-LABEL: @kern_lds_ptr( 492; MESA-NEXT: [[KERN_LDS_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 493; MESA-NEXT: [[LDS_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_LDS_PTR_KERNARG_SEGMENT]], i64 36 494; MESA-NEXT: [[LDS_LOAD:%.*]] = load ptr addrspace(3), ptr addrspace(4) [[LDS_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] 495; MESA-NEXT: store i32 0, ptr addrspace(3) [[LDS_LOAD]], align 4 496; MESA-NEXT: ret void 497; 498 store i32 0, ptr addrspace(3) %lds, align 4 499 ret void 500} 501 502define amdgpu_kernel void @kern_lds_ptr_si(ptr addrspace(3) %lds) #2 { 503; GCN-LABEL: @kern_lds_ptr_si( 504; GCN-NEXT: [[KERN_LDS_PTR_SI_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 505; GCN-NEXT: store i32 0, ptr addrspace(3) [[LDS:%.*]], align 4 506; GCN-NEXT: ret void 507; 508 store i32 0, ptr addrspace(3) %lds, align 4 509 ret void 510} 511 512define amdgpu_kernel void @kern_realign_i8_i8(i8 %arg0, i8 %arg1) #0 { 513; HSA-LABEL: @kern_realign_i8_i8( 514; HSA-NEXT: [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 515; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT]], i64 0 516; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] 517; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 518; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT]], i64 0 519; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] 520; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 521; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 522; HSA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) undef, align 1 523; HSA-NEXT: store volatile i8 [[TMP5]], ptr addrspace(1) undef, align 1 524; HSA-NEXT: ret void 525; 526; MESA-LABEL: @kern_realign_i8_i8( 527; MESA-NEXT: [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 528; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT]], i64 36 529; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] 530; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 531; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_KERNARG_SEGMENT]], i64 36 532; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] 533; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 534; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 535; MESA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) undef, align 1 536; MESA-NEXT: store volatile i8 [[TMP5]], ptr addrspace(1) undef, align 1 537; MESA-NEXT: ret void 538; 539 store volatile i8 %arg0, ptr addrspace(1) undef 540 store volatile i8 %arg1, ptr addrspace(1) undef 541 ret void 542} 543 544define amdgpu_kernel void @kern_realign_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2) #0 { 545; HSA-LABEL: @kern_realign_i8_i8_i8( 546; HSA-NEXT: [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 547; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 0 548; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] 549; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 550; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 0 551; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] 552; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 553; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 554; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 0 555; HSA-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] 556; HSA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16 557; HSA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8 558; HSA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) undef, align 1 559; HSA-NEXT: store volatile i8 [[TMP5]], ptr addrspace(1) undef, align 1 560; HSA-NEXT: store volatile i8 [[TMP8]], ptr addrspace(1) undef, align 1 561; HSA-NEXT: ret void 562; 563; MESA-LABEL: @kern_realign_i8_i8_i8( 564; MESA-NEXT: [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 565; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 36 566; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] 567; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 568; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 36 569; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] 570; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 571; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 572; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_KERNARG_SEGMENT]], i64 36 573; MESA-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] 574; MESA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16 575; MESA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8 576; MESA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) undef, align 1 577; MESA-NEXT: store volatile i8 [[TMP5]], ptr addrspace(1) undef, align 1 578; MESA-NEXT: store volatile i8 [[TMP8]], ptr addrspace(1) undef, align 1 579; MESA-NEXT: ret void 580; 581 store volatile i8 %arg0, ptr addrspace(1) undef 582 store volatile i8 %arg1, ptr addrspace(1) undef 583 store volatile i8 %arg2, ptr addrspace(1) undef 584 ret void 585} 586 587define amdgpu_kernel void @kern_realign_i8_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2, i8 %arg3) #0 { 588; HSA-LABEL: @kern_realign_i8_i8_i8_i8( 589; HSA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 590; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0 591; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] 592; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 593; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0 594; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] 595; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 596; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 597; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0 598; HSA-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] 599; HSA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16 600; HSA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8 601; HSA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0 602; HSA-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(4) [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] 603; HSA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24 604; HSA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i8 605; HSA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) undef, align 1 606; HSA-NEXT: store volatile i8 [[TMP5]], ptr addrspace(1) undef, align 1 607; HSA-NEXT: store volatile i8 [[TMP8]], ptr addrspace(1) undef, align 1 608; HSA-NEXT: store volatile i8 [[TMP11]], ptr addrspace(1) undef, align 1 609; HSA-NEXT: ret void 610; 611; MESA-LABEL: @kern_realign_i8_i8_i8_i8( 612; MESA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 613; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36 614; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] 615; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 616; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36 617; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] 618; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 619; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 620; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36 621; MESA-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] 622; MESA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16 623; MESA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8 624; MESA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36 625; MESA-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(4) [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] 626; MESA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24 627; MESA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i8 628; MESA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) undef, align 1 629; MESA-NEXT: store volatile i8 [[TMP5]], ptr addrspace(1) undef, align 1 630; MESA-NEXT: store volatile i8 [[TMP8]], ptr addrspace(1) undef, align 1 631; MESA-NEXT: store volatile i8 [[TMP11]], ptr addrspace(1) undef, align 1 632; MESA-NEXT: ret void 633; 634 store volatile i8 %arg0, ptr addrspace(1) undef 635 store volatile i8 %arg1, ptr addrspace(1) undef 636 store volatile i8 %arg2, ptr addrspace(1) undef 637 store volatile i8 %arg3, ptr addrspace(1) undef 638 ret void 639} 640 641define amdgpu_kernel void @kern_realign_i8_v3i8(i8 %arg0, <3 x i8> %arg1) #0 { 642; HSA-LABEL: @kern_realign_i8_v3i8( 643; HSA-NEXT: [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 644; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT]], i64 0 645; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] 646; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 647; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT]], i64 4 648; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] 649; HSA-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i24 650; HSA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i24 [[TMP4]] to <3 x i8> 651; HSA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) undef, align 1 652; HSA-NEXT: store volatile <3 x i8> [[ARG1_LOAD]], ptr addrspace(1) undef, align 4 653; HSA-NEXT: ret void 654; 655; MESA-LABEL: @kern_realign_i8_v3i8( 656; MESA-NEXT: [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 657; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT]], i64 36 658; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] 659; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 660; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_V3I8_KERNARG_SEGMENT]], i64 40 661; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 8, !invariant.load [[META1]] 662; MESA-NEXT: [[TMP4:%.*]] = trunc i32 [[TMP3]] to i24 663; MESA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i24 [[TMP4]] to <3 x i8> 664; MESA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) undef, align 1 665; MESA-NEXT: store volatile <3 x i8> [[ARG1_LOAD]], ptr addrspace(1) undef, align 4 666; MESA-NEXT: ret void 667; 668 store volatile i8 %arg0, ptr addrspace(1) undef 669 store volatile <3 x i8> %arg1, ptr addrspace(1) undef 670 ret void 671} 672 673define amdgpu_kernel void @kern_realign_i8_i16(i8 %arg0, i16 %arg1) #0 { 674; HSA-LABEL: @kern_realign_i8_i16( 675; HSA-NEXT: [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 676; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT]], i64 0 677; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] 678; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 679; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT]], i64 0 680; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] 681; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16 682; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 683; HSA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) undef, align 1 684; HSA-NEXT: store volatile i16 [[TMP5]], ptr addrspace(1) undef, align 2 685; HSA-NEXT: ret void 686; 687; MESA-LABEL: @kern_realign_i8_i16( 688; MESA-NEXT: [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 689; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT]], i64 36 690; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] 691; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 692; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I16_KERNARG_SEGMENT]], i64 36 693; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] 694; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16 695; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 696; MESA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) undef, align 1 697; MESA-NEXT: store volatile i16 [[TMP5]], ptr addrspace(1) undef, align 2 698; MESA-NEXT: ret void 699; 700 store volatile i8 %arg0, ptr addrspace(1) undef 701 store volatile i16 %arg1, ptr addrspace(1) undef 702 ret void 703} 704 705define amdgpu_kernel void @kern_realign_i1_i1(i1 %arg0, i1 %arg1) #0 { 706; HSA-LABEL: @kern_realign_i1_i1( 707; HSA-NEXT: [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 708; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT]], i64 0 709; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] 710; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1 711; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT]], i64 0 712; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] 713; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 714; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1 715; HSA-NEXT: store volatile i1 [[TMP2]], ptr addrspace(1) undef, align 1 716; HSA-NEXT: store volatile i1 [[TMP5]], ptr addrspace(1) undef, align 1 717; HSA-NEXT: ret void 718; 719; MESA-LABEL: @kern_realign_i1_i1( 720; MESA-NEXT: [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 721; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT]], i64 36 722; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] 723; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1 724; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_KERNARG_SEGMENT]], i64 36 725; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] 726; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 727; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1 728; MESA-NEXT: store volatile i1 [[TMP2]], ptr addrspace(1) undef, align 1 729; MESA-NEXT: store volatile i1 [[TMP5]], ptr addrspace(1) undef, align 1 730; MESA-NEXT: ret void 731; 732 store volatile i1 %arg0, ptr addrspace(1) undef 733 store volatile i1 %arg1, ptr addrspace(1) undef 734 ret void 735} 736 737define amdgpu_kernel void @kern_realign_i1_i1_i1(i1 %arg0, i1 %arg1, i1 %arg2) #0 { 738; HSA-LABEL: @kern_realign_i1_i1_i1( 739; HSA-NEXT: [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 740; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 0 741; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] 742; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1 743; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 0 744; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] 745; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 746; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1 747; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 0 748; HSA-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] 749; HSA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16 750; HSA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i1 751; HSA-NEXT: store volatile i1 [[TMP2]], ptr addrspace(1) undef, align 1 752; HSA-NEXT: store volatile i1 [[TMP5]], ptr addrspace(1) undef, align 1 753; HSA-NEXT: store volatile i1 [[TMP8]], ptr addrspace(1) undef, align 1 754; HSA-NEXT: ret void 755; 756; MESA-LABEL: @kern_realign_i1_i1_i1( 757; MESA-NEXT: [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 758; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 36 759; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] 760; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1 761; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 36 762; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] 763; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 764; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1 765; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_KERNARG_SEGMENT]], i64 36 766; MESA-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] 767; MESA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16 768; MESA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i1 769; MESA-NEXT: store volatile i1 [[TMP2]], ptr addrspace(1) undef, align 1 770; MESA-NEXT: store volatile i1 [[TMP5]], ptr addrspace(1) undef, align 1 771; MESA-NEXT: store volatile i1 [[TMP8]], ptr addrspace(1) undef, align 1 772; MESA-NEXT: ret void 773; 774 store volatile i1 %arg0, ptr addrspace(1) undef 775 store volatile i1 %arg1, ptr addrspace(1) undef 776 store volatile i1 %arg2, ptr addrspace(1) undef 777 ret void 778} 779 780define amdgpu_kernel void @kern_realign_i1_i1_i1_i1(i1 %arg0, i1 %arg1, i1 %arg2, i1 %arg3) #0 { 781; HSA-LABEL: @kern_realign_i1_i1_i1_i1( 782; HSA-NEXT: [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 783; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 0 784; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] 785; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1 786; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 0 787; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] 788; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 789; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1 790; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 0 791; HSA-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] 792; HSA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16 793; HSA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i1 794; HSA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 0 795; HSA-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(4) [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] 796; HSA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24 797; HSA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i1 798; HSA-NEXT: store volatile i1 [[TMP2]], ptr addrspace(1) undef, align 1 799; HSA-NEXT: store volatile i1 [[TMP5]], ptr addrspace(1) undef, align 1 800; HSA-NEXT: store volatile i1 [[TMP8]], ptr addrspace(1) undef, align 1 801; HSA-NEXT: store volatile i1 [[TMP11]], ptr addrspace(1) undef, align 1 802; HSA-NEXT: ret void 803; 804; MESA-LABEL: @kern_realign_i1_i1_i1_i1( 805; MESA-NEXT: [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 806; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 36 807; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] 808; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1 809; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 36 810; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] 811; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 812; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i1 813; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 36 814; MESA-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] 815; MESA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16 816; MESA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i1 817; MESA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I1_I1_I1_KERNARG_SEGMENT]], i64 36 818; MESA-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(4) [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] 819; MESA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24 820; MESA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i1 821; MESA-NEXT: store volatile i1 [[TMP2]], ptr addrspace(1) undef, align 1 822; MESA-NEXT: store volatile i1 [[TMP5]], ptr addrspace(1) undef, align 1 823; MESA-NEXT: store volatile i1 [[TMP8]], ptr addrspace(1) undef, align 1 824; MESA-NEXT: store volatile i1 [[TMP11]], ptr addrspace(1) undef, align 1 825; MESA-NEXT: ret void 826; 827 store volatile i1 %arg0, ptr addrspace(1) undef 828 store volatile i1 %arg1, ptr addrspace(1) undef 829 store volatile i1 %arg2, ptr addrspace(1) undef 830 store volatile i1 %arg3, ptr addrspace(1) undef 831 ret void 832} 833 834define amdgpu_kernel void @kern_realign_i1_v3i1(i1 %arg0, <3 x i1> %arg1) #0 { 835; HSA-LABEL: @kern_realign_i1_v3i1( 836; HSA-NEXT: [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 837; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT]], i64 0 838; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] 839; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1 840; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT]], i64 0 841; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] 842; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 843; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i3 844; HSA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i3 [[TMP5]] to <3 x i1> 845; HSA-NEXT: store volatile i1 [[TMP2]], ptr addrspace(1) undef, align 1 846; HSA-NEXT: store volatile <3 x i1> [[ARG1_LOAD]], ptr addrspace(1) undef, align 1 847; HSA-NEXT: ret void 848; 849; MESA-LABEL: @kern_realign_i1_v3i1( 850; MESA-NEXT: [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 851; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT]], i64 36 852; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] 853; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1 854; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_V3I1_KERNARG_SEGMENT]], i64 36 855; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] 856; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 857; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i3 858; MESA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i3 [[TMP5]] to <3 x i1> 859; MESA-NEXT: store volatile i1 [[TMP2]], ptr addrspace(1) undef, align 1 860; MESA-NEXT: store volatile <3 x i1> [[ARG1_LOAD]], ptr addrspace(1) undef, align 1 861; MESA-NEXT: ret void 862; 863 store volatile i1 %arg0, ptr addrspace(1) undef 864 store volatile <3 x i1> %arg1, ptr addrspace(1) undef 865 ret void 866} 867 868define amdgpu_kernel void @kern_realign_i1_i16(i1 %arg0, i16 %arg1) #0 { 869; HSA-LABEL: @kern_realign_i1_i16( 870; HSA-NEXT: [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 871; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT]], i64 0 872; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] 873; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1 874; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT]], i64 0 875; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] 876; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16 877; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 878; HSA-NEXT: store volatile i1 [[TMP2]], ptr addrspace(1) undef, align 1 879; HSA-NEXT: store volatile i16 [[TMP5]], ptr addrspace(1) undef, align 2 880; HSA-NEXT: ret void 881; 882; MESA-LABEL: @kern_realign_i1_i16( 883; MESA-NEXT: [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 884; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT]], i64 36 885; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] 886; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i1 887; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I1_I16_KERNARG_SEGMENT]], i64 36 888; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] 889; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16 890; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 891; MESA-NEXT: store volatile i1 [[TMP2]], ptr addrspace(1) undef, align 1 892; MESA-NEXT: store volatile i16 [[TMP5]], ptr addrspace(1) undef, align 2 893; MESA-NEXT: ret void 894; 895 store volatile i1 %arg0, ptr addrspace(1) undef 896 store volatile i16 %arg1, ptr addrspace(1) undef 897 ret void 898} 899 900define amdgpu_kernel void @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8(i8 %arg0, i8 %arg1, i8 %arg2, i8 %arg3, i8 %arg4, i8 %arg5, i8 %arg6, i8 %arg7) #0 { 901; HSA-LABEL: @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8( 902; HSA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 903; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0 904; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] 905; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 906; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0 907; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] 908; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 909; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 910; HSA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0 911; HSA-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] 912; HSA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16 913; HSA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8 914; HSA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 0 915; HSA-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(4) [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] 916; HSA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24 917; HSA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i8 918; HSA-NEXT: [[ARG5_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 4 919; HSA-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(4) [[ARG5_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] 920; HSA-NEXT: [[TMP13:%.*]] = lshr i32 [[TMP12]], 8 921; HSA-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i8 922; HSA-NEXT: [[ARG6_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 4 923; HSA-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(4) [[ARG6_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] 924; HSA-NEXT: [[TMP16:%.*]] = lshr i32 [[TMP15]], 16 925; HSA-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i8 926; HSA-NEXT: [[ARG7_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 4 927; HSA-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(4) [[ARG7_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] 928; HSA-NEXT: [[TMP19:%.*]] = lshr i32 [[TMP18]], 24 929; HSA-NEXT: [[TMP20:%.*]] = trunc i32 [[TMP19]] to i8 930; HSA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) undef, align 1 931; HSA-NEXT: store volatile i8 [[TMP5]], ptr addrspace(1) undef, align 1 932; HSA-NEXT: store volatile i8 [[TMP8]], ptr addrspace(1) undef, align 1 933; HSA-NEXT: store volatile i8 [[TMP11]], ptr addrspace(1) undef, align 1 934; HSA-NEXT: store volatile i8 [[TMP14]], ptr addrspace(1) undef, align 1 935; HSA-NEXT: store volatile i8 [[TMP17]], ptr addrspace(1) undef, align 1 936; HSA-NEXT: store volatile i8 [[TMP20]], ptr addrspace(1) undef, align 1 937; HSA-NEXT: ret void 938; 939; MESA-LABEL: @kern_realign_i8_i8_i8_i8_i8_i8_i8_i8( 940; MESA-NEXT: [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 941; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36 942; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] 943; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i8 944; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36 945; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] 946; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 8 947; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i8 948; MESA-NEXT: [[ARG2_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36 949; MESA-NEXT: [[TMP6:%.*]] = load i32, ptr addrspace(4) [[ARG2_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] 950; MESA-NEXT: [[TMP7:%.*]] = lshr i32 [[TMP6]], 16 951; MESA-NEXT: [[TMP8:%.*]] = trunc i32 [[TMP7]] to i8 952; MESA-NEXT: [[ARG3_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 36 953; MESA-NEXT: [[TMP9:%.*]] = load i32, ptr addrspace(4) [[ARG3_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] 954; MESA-NEXT: [[TMP10:%.*]] = lshr i32 [[TMP9]], 24 955; MESA-NEXT: [[TMP11:%.*]] = trunc i32 [[TMP10]] to i8 956; MESA-NEXT: [[ARG5_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 40 957; MESA-NEXT: [[TMP12:%.*]] = load i32, ptr addrspace(4) [[ARG5_KERNARG_OFFSET_ALIGN_DOWN]], align 8, !invariant.load [[META1]] 958; MESA-NEXT: [[TMP13:%.*]] = lshr i32 [[TMP12]], 8 959; MESA-NEXT: [[TMP14:%.*]] = trunc i32 [[TMP13]] to i8 960; MESA-NEXT: [[ARG6_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 40 961; MESA-NEXT: [[TMP15:%.*]] = load i32, ptr addrspace(4) [[ARG6_KERNARG_OFFSET_ALIGN_DOWN]], align 8, !invariant.load [[META1]] 962; MESA-NEXT: [[TMP16:%.*]] = lshr i32 [[TMP15]], 16 963; MESA-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i8 964; MESA-NEXT: [[ARG7_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_I8_I8_I8_I8_I8_I8_I8_I8_KERNARG_SEGMENT]], i64 40 965; MESA-NEXT: [[TMP18:%.*]] = load i32, ptr addrspace(4) [[ARG7_KERNARG_OFFSET_ALIGN_DOWN]], align 8, !invariant.load [[META1]] 966; MESA-NEXT: [[TMP19:%.*]] = lshr i32 [[TMP18]], 24 967; MESA-NEXT: [[TMP20:%.*]] = trunc i32 [[TMP19]] to i8 968; MESA-NEXT: store volatile i8 [[TMP2]], ptr addrspace(1) undef, align 1 969; MESA-NEXT: store volatile i8 [[TMP5]], ptr addrspace(1) undef, align 1 970; MESA-NEXT: store volatile i8 [[TMP8]], ptr addrspace(1) undef, align 1 971; MESA-NEXT: store volatile i8 [[TMP11]], ptr addrspace(1) undef, align 1 972; MESA-NEXT: store volatile i8 [[TMP14]], ptr addrspace(1) undef, align 1 973; MESA-NEXT: store volatile i8 [[TMP17]], ptr addrspace(1) undef, align 1 974; MESA-NEXT: store volatile i8 [[TMP20]], ptr addrspace(1) undef, align 1 975; MESA-NEXT: ret void 976; 977 store volatile i8 %arg0, ptr addrspace(1) undef 978 store volatile i8 %arg1, ptr addrspace(1) undef 979 store volatile i8 %arg2, ptr addrspace(1) undef 980 store volatile i8 %arg3, ptr addrspace(1) undef 981 store volatile i8 %arg5, ptr addrspace(1) undef 982 store volatile i8 %arg6, ptr addrspace(1) undef 983 store volatile i8 %arg7, ptr addrspace(1) undef 984 ret void 985} 986 987define amdgpu_kernel void @kern_realign_f16_f16(half %arg0, half %arg1) #0 { 988; HSA-LABEL: @kern_realign_f16_f16( 989; HSA-NEXT: [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 990; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT]], i64 0 991; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] 992; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 993; HSA-NEXT: [[ARG0_LOAD:%.*]] = bitcast i16 [[TMP2]] to half 994; HSA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT]], i64 0 995; HSA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]] 996; HSA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16 997; HSA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 998; HSA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i16 [[TMP5]] to half 999; HSA-NEXT: store volatile half [[ARG0_LOAD]], ptr addrspace(1) undef, align 2 1000; HSA-NEXT: store volatile half [[ARG1_LOAD]], ptr addrspace(1) undef, align 2 1001; HSA-NEXT: ret void 1002; 1003; MESA-LABEL: @kern_realign_f16_f16( 1004; MESA-NEXT: [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1005; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT]], i64 36 1006; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] 1007; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 1008; MESA-NEXT: [[ARG0_LOAD:%.*]] = bitcast i16 [[TMP2]] to half 1009; MESA-NEXT: [[ARG1_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_REALIGN_F16_F16_KERNARG_SEGMENT]], i64 36 1010; MESA-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]] 1011; MESA-NEXT: [[TMP4:%.*]] = lshr i32 [[TMP3]], 16 1012; MESA-NEXT: [[TMP5:%.*]] = trunc i32 [[TMP4]] to i16 1013; MESA-NEXT: [[ARG1_LOAD:%.*]] = bitcast i16 [[TMP5]] to half 1014; MESA-NEXT: store volatile half [[ARG0_LOAD]], ptr addrspace(1) undef, align 2 1015; MESA-NEXT: store volatile half [[ARG1_LOAD]], ptr addrspace(1) undef, align 2 1016; MESA-NEXT: ret void 1017; 1018 store volatile half %arg0, ptr addrspace(1) undef 1019 store volatile half %arg1, ptr addrspace(1) undef 1020 ret void 1021} 1022 1023define amdgpu_kernel void @kern_global_ptr(ptr addrspace(1) %ptr) #0 { 1024; HSA-LABEL: @kern_global_ptr( 1025; HSA-NEXT: [[KERN_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1026; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0 1027; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] 1028; HSA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8 1029; HSA-NEXT: ret void 1030; 1031; MESA-LABEL: @kern_global_ptr( 1032; MESA-NEXT: [[KERN_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1033; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36 1034; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] 1035; MESA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8 1036; MESA-NEXT: ret void 1037; 1038 store volatile ptr addrspace(1) %ptr, ptr addrspace(1) undef 1039 ret void 1040} 1041 1042define amdgpu_kernel void @kern_global_ptr_dereferencable(ptr addrspace(1) dereferenceable(42) %ptr) #0 { 1043; HSA-LABEL: @kern_global_ptr_dereferencable( 1044; HSA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1045; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT]], i64 0 1046; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !dereferenceable [[META3:![0-9]+]] 1047; HSA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8 1048; HSA-NEXT: ret void 1049; 1050; MESA-LABEL: @kern_global_ptr_dereferencable( 1051; MESA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1052; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_GLOBAL_PTR_DEREFERENCABLE_KERNARG_SEGMENT]], i64 36 1053; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !dereferenceable [[META3:![0-9]+]] 1054; MESA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8 1055; MESA-NEXT: ret void 1056; 1057 store volatile ptr addrspace(1) %ptr, ptr addrspace(1) undef 1058 ret void 1059} 1060 1061define amdgpu_kernel void @kern_global_ptr_dereferencable_or_null(ptr addrspace(1) dereferenceable_or_null(128) %ptr) #0 { 1062; HSA-LABEL: @kern_global_ptr_dereferencable_or_null( 1063; HSA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1064; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT]], i64 0 1065; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !dereferenceable_or_null [[META4:![0-9]+]] 1066; HSA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8 1067; HSA-NEXT: ret void 1068; 1069; MESA-LABEL: @kern_global_ptr_dereferencable_or_null( 1070; MESA-NEXT: [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1071; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_GLOBAL_PTR_DEREFERENCABLE_OR_NULL_KERNARG_SEGMENT]], i64 36 1072; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !dereferenceable_or_null [[META4:![0-9]+]] 1073; MESA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8 1074; MESA-NEXT: ret void 1075; 1076 store volatile ptr addrspace(1) %ptr, ptr addrspace(1) undef 1077 ret void 1078} 1079 1080define amdgpu_kernel void @kern_nonnull_global_ptr(ptr addrspace(1) nonnull %ptr) #0 { 1081; HSA-LABEL: @kern_nonnull_global_ptr( 1082; HSA-NEXT: [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1083; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0 1084; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !nonnull [[META1]] 1085; HSA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8 1086; HSA-NEXT: ret void 1087; 1088; MESA-LABEL: @kern_nonnull_global_ptr( 1089; MESA-NEXT: [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1090; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_NONNULL_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36 1091; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !nonnull [[META1]] 1092; MESA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8 1093; MESA-NEXT: ret void 1094; 1095 store volatile ptr addrspace(1) %ptr, ptr addrspace(1) undef 1096 ret void 1097} 1098 1099define amdgpu_kernel void @kern_align32_global_ptr(ptr addrspace(1) align 1024 %ptr) #0 { 1100; HSA-LABEL: @kern_align32_global_ptr( 1101; HSA-NEXT: [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1102; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0 1103; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !align [[META5:![0-9]+]] 1104; HSA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8 1105; HSA-NEXT: ret void 1106; 1107; MESA-LABEL: @kern_align32_global_ptr( 1108; MESA-NEXT: [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1109; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_ALIGN32_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36 1110; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !align [[META5:![0-9]+]] 1111; MESA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) undef, align 8 1112; MESA-NEXT: ret void 1113; 1114 store volatile ptr addrspace(1) %ptr, ptr addrspace(1) undef 1115 ret void 1116} 1117 1118define amdgpu_kernel void @kern_noalias_global_ptr(ptr addrspace(1) noalias %ptr) #0 { 1119; GCN-LABEL: @kern_noalias_global_ptr( 1120; GCN-NEXT: [[KERN_NOALIAS_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1121; GCN-NEXT: store volatile ptr addrspace(1) [[PTR:%.*]], ptr addrspace(1) undef, align 8 1122; GCN-NEXT: ret void 1123; 1124 store volatile ptr addrspace(1) %ptr, ptr addrspace(1) undef 1125 ret void 1126} 1127 1128define amdgpu_kernel void @kern_noalias_global_ptr_x2(ptr addrspace(1) noalias %ptr0, ptr addrspace(1) noalias %ptr1) #0 { 1129; GCN-LABEL: @kern_noalias_global_ptr_x2( 1130; GCN-NEXT: [[KERN_NOALIAS_GLOBAL_PTR_X2_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1131; GCN-NEXT: store volatile ptr addrspace(1) [[PTR0:%.*]], ptr addrspace(1) undef, align 8 1132; GCN-NEXT: store volatile ptr addrspace(1) [[PTR1:%.*]], ptr addrspace(1) undef, align 8 1133; GCN-NEXT: ret void 1134; 1135 store volatile ptr addrspace(1) %ptr0, ptr addrspace(1) undef 1136 store volatile ptr addrspace(1) %ptr1, ptr addrspace(1) undef 1137 ret void 1138} 1139 1140define amdgpu_kernel void @kern_noundef_global_ptr(ptr addrspace(1) noundef %ptr) #0 { 1141; HSA-LABEL: @kern_noundef_global_ptr( 1142; HSA-NEXT: [[KERN_NOUNDEF_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1143; HSA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_NOUNDEF_GLOBAL_PTR_KERNARG_SEGMENT]], i64 0 1144; HSA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !noundef [[META1]] 1145; HSA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) null, align 8 1146; HSA-NEXT: ret void 1147; 1148; MESA-LABEL: @kern_noundef_global_ptr( 1149; MESA-NEXT: [[KERN_NOUNDEF_GLOBAL_PTR_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1150; MESA-NEXT: [[PTR_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[KERN_NOUNDEF_GLOBAL_PTR_KERNARG_SEGMENT]], i64 36 1151; MESA-NEXT: [[PTR_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[PTR_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !noundef [[META1]] 1152; MESA-NEXT: store volatile ptr addrspace(1) [[PTR_LOAD]], ptr addrspace(1) null, align 8 1153; MESA-NEXT: ret void 1154; 1155 store volatile ptr addrspace(1) %ptr, ptr addrspace(1) null 1156 ret void 1157} 1158 1159define amdgpu_kernel void @struct_i8_i8_arg({i8, i8} %in) #0 { 1160; HSA-LABEL: @struct_i8_i8_arg( 1161; HSA-NEXT: entry: 1162; HSA-NEXT: [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1163; HSA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT]], i64 0 1164; HSA-NEXT: [[IN_LOAD:%.*]] = load { i8, i8 }, ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] 1165; HSA-NEXT: [[ELT0:%.*]] = extractvalue { i8, i8 } [[IN_LOAD]], 0 1166; HSA-NEXT: [[ELT1:%.*]] = extractvalue { i8, i8 } [[IN_LOAD]], 1 1167; HSA-NEXT: store volatile i8 [[ELT0]], ptr addrspace(1) null, align 4 1168; HSA-NEXT: store volatile i8 [[ELT1]], ptr addrspace(1) null, align 4 1169; HSA-NEXT: ret void 1170; 1171; MESA-LABEL: @struct_i8_i8_arg( 1172; MESA-NEXT: entry: 1173; MESA-NEXT: [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1174; MESA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[STRUCT_I8_I8_ARG_KERNARG_SEGMENT]], i64 36 1175; MESA-NEXT: [[IN_LOAD:%.*]] = load { i8, i8 }, ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] 1176; MESA-NEXT: [[ELT0:%.*]] = extractvalue { i8, i8 } [[IN_LOAD]], 0 1177; MESA-NEXT: [[ELT1:%.*]] = extractvalue { i8, i8 } [[IN_LOAD]], 1 1178; MESA-NEXT: store volatile i8 [[ELT0]], ptr addrspace(1) null, align 4 1179; MESA-NEXT: store volatile i8 [[ELT1]], ptr addrspace(1) null, align 4 1180; MESA-NEXT: ret void 1181; 1182entry: 1183 %elt0 = extractvalue {i8, i8} %in, 0 1184 %elt1 = extractvalue {i8, i8} %in, 1 1185 store volatile i8 %elt0, ptr addrspace(1) null, align 4 1186 store volatile i8 %elt1, ptr addrspace(1) null, align 4 1187 ret void 1188} 1189 1190define amdgpu_kernel void @struct_i8_i16_arg({i8, i16} %in) #0 { 1191; HSA-LABEL: @struct_i8_i16_arg( 1192; HSA-NEXT: entry: 1193; HSA-NEXT: [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1194; HSA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT]], i64 0 1195; HSA-NEXT: [[IN_LOAD:%.*]] = load { i8, i16 }, ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] 1196; HSA-NEXT: [[ELT0:%.*]] = extractvalue { i8, i16 } [[IN_LOAD]], 0 1197; HSA-NEXT: [[ELT1:%.*]] = extractvalue { i8, i16 } [[IN_LOAD]], 1 1198; HSA-NEXT: store volatile i8 [[ELT0]], ptr addrspace(1) null, align 4 1199; HSA-NEXT: store volatile i16 [[ELT1]], ptr addrspace(1) null, align 4 1200; HSA-NEXT: ret void 1201; 1202; MESA-LABEL: @struct_i8_i16_arg( 1203; MESA-NEXT: entry: 1204; MESA-NEXT: [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1205; MESA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[STRUCT_I8_I16_ARG_KERNARG_SEGMENT]], i64 36 1206; MESA-NEXT: [[IN_LOAD:%.*]] = load { i8, i16 }, ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] 1207; MESA-NEXT: [[ELT0:%.*]] = extractvalue { i8, i16 } [[IN_LOAD]], 0 1208; MESA-NEXT: [[ELT1:%.*]] = extractvalue { i8, i16 } [[IN_LOAD]], 1 1209; MESA-NEXT: store volatile i8 [[ELT0]], ptr addrspace(1) null, align 4 1210; MESA-NEXT: store volatile i16 [[ELT1]], ptr addrspace(1) null, align 4 1211; MESA-NEXT: ret void 1212; 1213entry: 1214 %elt0 = extractvalue {i8, i16} %in, 0 1215 %elt1 = extractvalue {i8, i16} %in, 1 1216 store volatile i8 %elt0, ptr addrspace(1) null, align 4 1217 store volatile i16 %elt1, ptr addrspace(1) null, align 4 1218 ret void 1219} 1220 1221define amdgpu_kernel void @array_2xi8_arg([2 x i8] %in) #0 { 1222; HSA-LABEL: @array_2xi8_arg( 1223; HSA-NEXT: entry: 1224; HSA-NEXT: [[ARRAY_2XI8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1225; HSA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[ARRAY_2XI8_ARG_KERNARG_SEGMENT]], i64 0 1226; HSA-NEXT: [[IN_LOAD:%.*]] = load [2 x i8], ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] 1227; HSA-NEXT: [[ELT0:%.*]] = extractvalue [2 x i8] [[IN_LOAD]], 0 1228; HSA-NEXT: [[ELT1:%.*]] = extractvalue [2 x i8] [[IN_LOAD]], 1 1229; HSA-NEXT: store volatile i8 [[ELT0]], ptr addrspace(1) null, align 4 1230; HSA-NEXT: store volatile i8 [[ELT1]], ptr addrspace(1) null, align 4 1231; HSA-NEXT: ret void 1232; 1233; MESA-LABEL: @array_2xi8_arg( 1234; MESA-NEXT: entry: 1235; MESA-NEXT: [[ARRAY_2XI8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1236; MESA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[ARRAY_2XI8_ARG_KERNARG_SEGMENT]], i64 36 1237; MESA-NEXT: [[IN_LOAD:%.*]] = load [2 x i8], ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] 1238; MESA-NEXT: [[ELT0:%.*]] = extractvalue [2 x i8] [[IN_LOAD]], 0 1239; MESA-NEXT: [[ELT1:%.*]] = extractvalue [2 x i8] [[IN_LOAD]], 1 1240; MESA-NEXT: store volatile i8 [[ELT0]], ptr addrspace(1) null, align 4 1241; MESA-NEXT: store volatile i8 [[ELT1]], ptr addrspace(1) null, align 4 1242; MESA-NEXT: ret void 1243; 1244entry: 1245 %elt0 = extractvalue [2 x i8] %in, 0 1246 %elt1 = extractvalue [2 x i8] %in, 1 1247 store volatile i8 %elt0, ptr addrspace(1) null, align 4 1248 store volatile i8 %elt1, ptr addrspace(1) null, align 4 1249 ret void 1250} 1251 1252define amdgpu_kernel void @array_2xi1_arg([2 x i1] %in) #0 { 1253; HSA-LABEL: @array_2xi1_arg( 1254; HSA-NEXT: entry: 1255; HSA-NEXT: [[ARRAY_2XI1_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1256; HSA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[ARRAY_2XI1_ARG_KERNARG_SEGMENT]], i64 0 1257; HSA-NEXT: [[IN_LOAD:%.*]] = load [2 x i1], ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] 1258; HSA-NEXT: [[ELT0:%.*]] = extractvalue [2 x i1] [[IN_LOAD]], 0 1259; HSA-NEXT: [[ELT1:%.*]] = extractvalue [2 x i1] [[IN_LOAD]], 1 1260; HSA-NEXT: store volatile i1 [[ELT0]], ptr addrspace(1) null, align 4 1261; HSA-NEXT: store volatile i1 [[ELT1]], ptr addrspace(1) null, align 4 1262; HSA-NEXT: ret void 1263; 1264; MESA-LABEL: @array_2xi1_arg( 1265; MESA-NEXT: entry: 1266; MESA-NEXT: [[ARRAY_2XI1_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1267; MESA-NEXT: [[IN_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[ARRAY_2XI1_ARG_KERNARG_SEGMENT]], i64 36 1268; MESA-NEXT: [[IN_LOAD:%.*]] = load [2 x i1], ptr addrspace(4) [[IN_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] 1269; MESA-NEXT: [[ELT0:%.*]] = extractvalue [2 x i1] [[IN_LOAD]], 0 1270; MESA-NEXT: [[ELT1:%.*]] = extractvalue [2 x i1] [[IN_LOAD]], 1 1271; MESA-NEXT: store volatile i1 [[ELT0]], ptr addrspace(1) null, align 4 1272; MESA-NEXT: store volatile i1 [[ELT1]], ptr addrspace(1) null, align 4 1273; MESA-NEXT: ret void 1274; 1275entry: 1276 %elt0 = extractvalue [2 x i1] %in, 0 1277 %elt1 = extractvalue [2 x i1] %in, 1 1278 store volatile i1 %elt0, ptr addrspace(1) null, align 4 1279 store volatile i1 %elt1, ptr addrspace(1) null, align 4 1280 ret void 1281} 1282 1283define amdgpu_kernel void @only_empty_struct({} %empty) #0 { 1284; GCN-LABEL: @only_empty_struct( 1285; GCN-NEXT: [[ONLY_EMPTY_STRUCT_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(256) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1286; GCN-NEXT: ret void 1287; 1288 ret void 1289} 1290 1291define amdgpu_kernel void @empty_struct_with_other({} %empty, i32 %arg1) #0 { 1292; HSA-LABEL: @empty_struct_with_other( 1293; HSA-NEXT: [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1294; HSA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT]], i64 0 1295; HSA-NEXT: [[ARG1_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] 1296; HSA-NEXT: store i32 [[ARG1_LOAD]], ptr addrspace(1) undef, align 4 1297; HSA-NEXT: ret void 1298; 1299; MESA-LABEL: @empty_struct_with_other( 1300; MESA-NEXT: [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1301; MESA-NEXT: [[ARG1_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[EMPTY_STRUCT_WITH_OTHER_KERNARG_SEGMENT]], i64 36 1302; MESA-NEXT: [[ARG1_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG1_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] 1303; MESA-NEXT: store i32 [[ARG1_LOAD]], ptr addrspace(1) undef, align 4 1304; MESA-NEXT: ret void 1305; 1306 store i32 %arg1, ptr addrspace(1) undef 1307 ret void 1308} 1309 1310; Should insert code after the allocas 1311define amdgpu_kernel void @static_alloca_kern_i32(i32 %arg0) { 1312; HSA-LABEL: @static_alloca_kern_i32( 1313; HSA-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5) 1314; HSA-NEXT: [[STATIC_ALLOCA_KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1315; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[STATIC_ALLOCA_KERN_I32_KERNARG_SEGMENT]], i64 0 1316; HSA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] 1317; HSA-NEXT: store volatile i32 [[ARG0_LOAD]], ptr addrspace(5) [[ALLOCA]], align 4 1318; HSA-NEXT: ret void 1319; 1320; MESA-LABEL: @static_alloca_kern_i32( 1321; MESA-NEXT: [[ALLOCA:%.*]] = alloca i32, align 4, addrspace(5) 1322; MESA-NEXT: [[STATIC_ALLOCA_KERN_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1323; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[STATIC_ALLOCA_KERN_I32_KERNARG_SEGMENT]], i64 36 1324; MESA-NEXT: [[ARG0_LOAD:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] 1325; MESA-NEXT: store volatile i32 [[ARG0_LOAD]], ptr addrspace(5) [[ALLOCA]], align 4 1326; MESA-NEXT: ret void 1327; 1328 %alloca = alloca i32, addrspace(5) 1329 store volatile i32 %arg0, ptr addrspace(5) %alloca 1330 ret void 1331} 1332 1333; Make sure we don't break the IR if an alloca depends on the 1334; kernargs. 1335define amdgpu_kernel void @dyn_alloca_kernarg_i32(i32 %n) { 1336; HSA-LABEL: @dyn_alloca_kernarg_i32( 1337; HSA-NEXT: [[ALLOCA0:%.*]] = alloca i32, align 4, addrspace(5) 1338; HSA-NEXT: [[DYN_ALLOCA_KERNARG_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1339; HSA-NEXT: [[N_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[DYN_ALLOCA_KERNARG_I32_KERNARG_SEGMENT]], i64 0 1340; HSA-NEXT: [[N_LOAD:%.*]] = load i32, ptr addrspace(4) [[N_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] 1341; HSA-NEXT: [[ALLOCA1:%.*]] = alloca i32, i32 [[N_LOAD]], align 4, addrspace(5) 1342; HSA-NEXT: store volatile i32 0, ptr addrspace(5) [[ALLOCA0]], align 4 1343; HSA-NEXT: store volatile i32 1, ptr addrspace(5) [[ALLOCA1]], align 4 1344; HSA-NEXT: ret void 1345; 1346; MESA-LABEL: @dyn_alloca_kernarg_i32( 1347; MESA-NEXT: [[ALLOCA0:%.*]] = alloca i32, align 4, addrspace(5) 1348; MESA-NEXT: [[DYN_ALLOCA_KERNARG_I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1349; MESA-NEXT: [[N_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[DYN_ALLOCA_KERNARG_I32_KERNARG_SEGMENT]], i64 36 1350; MESA-NEXT: [[N_LOAD:%.*]] = load i32, ptr addrspace(4) [[N_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] 1351; MESA-NEXT: [[ALLOCA1:%.*]] = alloca i32, i32 [[N_LOAD]], align 4, addrspace(5) 1352; MESA-NEXT: store volatile i32 0, ptr addrspace(5) [[ALLOCA0]], align 4 1353; MESA-NEXT: store volatile i32 1, ptr addrspace(5) [[ALLOCA1]], align 4 1354; MESA-NEXT: ret void 1355; 1356 %alloca0 = alloca i32, addrspace(5) 1357 %alloca1 = alloca i32, i32 %n, addrspace(5) 1358 store volatile i32 0, ptr addrspace(5) %alloca0 1359 store volatile i32 1, ptr addrspace(5) %alloca1 1360 ret void 1361} 1362 1363; Byref pointers should only be treated as offsets from kernarg 1364define amdgpu_kernel void @byref_constant_i8_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i8) %in.byref) { 1365; HSA-LABEL: @byref_constant_i8_arg( 1366; HSA-NEXT: [[BYREF_CONSTANT_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1367; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I8_ARG_KERNARG_SEGMENT]], i64 0 1368; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] 1369; HSA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I8_ARG_KERNARG_SEGMENT]], i64 8 1370; HSA-NEXT: [[IN:%.*]] = load i8, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 1 1371; HSA-NEXT: [[EXT:%.*]] = zext i8 [[IN]] to i32 1372; HSA-NEXT: store i32 [[EXT]], ptr addrspace(1) [[OUT_LOAD]], align 4 1373; HSA-NEXT: ret void 1374; 1375; MESA-LABEL: @byref_constant_i8_arg( 1376; MESA-NEXT: [[BYREF_CONSTANT_I8_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(268) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1377; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I8_ARG_KERNARG_SEGMENT]], i64 36 1378; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] 1379; MESA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I8_ARG_KERNARG_SEGMENT]], i64 44 1380; MESA-NEXT: [[IN:%.*]] = load i8, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 1 1381; MESA-NEXT: [[EXT:%.*]] = zext i8 [[IN]] to i32 1382; MESA-NEXT: store i32 [[EXT]], ptr addrspace(1) [[OUT_LOAD]], align 4 1383; MESA-NEXT: ret void 1384; 1385 %in = load i8, ptr addrspace(4) %in.byref 1386 %ext = zext i8 %in to i32 1387 store i32 %ext, ptr addrspace(1) %out, align 4 1388 ret void 1389} 1390 1391define amdgpu_kernel void @byref_constant_i16_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i16) %in.byref) { 1392; HSA-LABEL: @byref_constant_i16_arg( 1393; HSA-NEXT: [[BYREF_CONSTANT_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1394; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I16_ARG_KERNARG_SEGMENT]], i64 0 1395; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] 1396; HSA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I16_ARG_KERNARG_SEGMENT]], i64 8 1397; HSA-NEXT: [[IN:%.*]] = load i16, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 2 1398; HSA-NEXT: [[EXT:%.*]] = zext i16 [[IN]] to i32 1399; HSA-NEXT: store i32 [[EXT]], ptr addrspace(1) [[OUT_LOAD]], align 4 1400; HSA-NEXT: ret void 1401; 1402; MESA-LABEL: @byref_constant_i16_arg( 1403; MESA-NEXT: [[BYREF_CONSTANT_I16_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(268) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1404; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I16_ARG_KERNARG_SEGMENT]], i64 36 1405; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] 1406; MESA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I16_ARG_KERNARG_SEGMENT]], i64 44 1407; MESA-NEXT: [[IN:%.*]] = load i16, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 2 1408; MESA-NEXT: [[EXT:%.*]] = zext i16 [[IN]] to i32 1409; MESA-NEXT: store i32 [[EXT]], ptr addrspace(1) [[OUT_LOAD]], align 4 1410; MESA-NEXT: ret void 1411; 1412 %in = load i16, ptr addrspace(4) %in.byref 1413 %ext = zext i16 %in to i32 1414 store i32 %ext, ptr addrspace(1) %out, align 4 1415 ret void 1416} 1417 1418define amdgpu_kernel void @byref_constant_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i32) %in.byref, i32 %after.offset) { 1419; HSA-LABEL: @byref_constant_i32_arg( 1420; HSA-NEXT: [[BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1421; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 0 1422; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] 1423; HSA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 8 1424; HSA-NEXT: [[AFTER_OFFSET_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 12 1425; HSA-NEXT: [[AFTER_OFFSET_LOAD:%.*]] = load i32, ptr addrspace(4) [[AFTER_OFFSET_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] 1426; HSA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 4 1427; HSA-NEXT: store volatile i32 [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4 1428; HSA-NEXT: store volatile i32 [[AFTER_OFFSET_LOAD]], ptr addrspace(1) [[OUT_LOAD]], align 4 1429; HSA-NEXT: ret void 1430; 1431; MESA-LABEL: @byref_constant_i32_arg( 1432; MESA-NEXT: [[BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1433; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 36 1434; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] 1435; MESA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 44 1436; MESA-NEXT: [[AFTER_OFFSET_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 48 1437; MESA-NEXT: [[AFTER_OFFSET_LOAD:%.*]] = load i32, ptr addrspace(4) [[AFTER_OFFSET_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] 1438; MESA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 4 1439; MESA-NEXT: store volatile i32 [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4 1440; MESA-NEXT: store volatile i32 [[AFTER_OFFSET_LOAD]], ptr addrspace(1) [[OUT_LOAD]], align 4 1441; MESA-NEXT: ret void 1442; 1443 %in = load i32, ptr addrspace(4) %in.byref 1444 store volatile i32 %in, ptr addrspace(1) %out, align 4 1445 store volatile i32 %after.offset, ptr addrspace(1) %out, align 4 1446 ret void 1447} 1448 1449define amdgpu_kernel void @byref_constant_v4i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(<4 x i32>) %in.byref, i32 %after.offset) { 1450; HSA-LABEL: @byref_constant_v4i32_arg( 1451; HSA-NEXT: [[BYREF_CONSTANT_V4I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(296) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1452; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_V4I32_ARG_KERNARG_SEGMENT]], i64 0 1453; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] 1454; HSA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_V4I32_ARG_KERNARG_SEGMENT]], i64 16 1455; HSA-NEXT: [[AFTER_OFFSET_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_V4I32_ARG_KERNARG_SEGMENT]], i64 32 1456; HSA-NEXT: [[AFTER_OFFSET_LOAD:%.*]] = load i32, ptr addrspace(4) [[AFTER_OFFSET_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] 1457; HSA-NEXT: [[IN:%.*]] = load <4 x i32>, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 16 1458; HSA-NEXT: store volatile <4 x i32> [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4 1459; HSA-NEXT: store volatile i32 [[AFTER_OFFSET_LOAD]], ptr addrspace(1) [[OUT_LOAD]], align 4 1460; HSA-NEXT: ret void 1461; 1462; MESA-LABEL: @byref_constant_v4i32_arg( 1463; MESA-NEXT: [[BYREF_CONSTANT_V4I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(292) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1464; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_V4I32_ARG_KERNARG_SEGMENT]], i64 36 1465; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] 1466; MESA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_V4I32_ARG_KERNARG_SEGMENT]], i64 52 1467; MESA-NEXT: [[AFTER_OFFSET_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_V4I32_ARG_KERNARG_SEGMENT]], i64 68 1468; MESA-NEXT: [[AFTER_OFFSET_LOAD:%.*]] = load i32, ptr addrspace(4) [[AFTER_OFFSET_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] 1469; MESA-NEXT: [[IN:%.*]] = load <4 x i32>, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 16 1470; MESA-NEXT: store volatile <4 x i32> [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4 1471; MESA-NEXT: store volatile i32 [[AFTER_OFFSET_LOAD]], ptr addrspace(1) [[OUT_LOAD]], align 4 1472; MESA-NEXT: ret void 1473; 1474 %in = load <4 x i32>, ptr addrspace(4) %in.byref 1475 store volatile <4 x i32> %in, ptr addrspace(1) %out, align 4 1476 store volatile i32 %after.offset, ptr addrspace(1) %out, align 4 1477 ret void 1478} 1479 1480define amdgpu_kernel void @byref_align_constant_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i32) align(256) %in.byref, i32 %after.offset) { 1481; HSA-LABEL: @byref_align_constant_i32_arg( 1482; HSA-NEXT: [[BYREF_ALIGN_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 256 dereferenceable(520) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1483; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_ALIGN_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 0 1484; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] 1485; HSA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_ALIGN_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 256 1486; HSA-NEXT: [[AFTER_OFFSET_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_ALIGN_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 260 1487; HSA-NEXT: [[AFTER_OFFSET_LOAD:%.*]] = load i32, ptr addrspace(4) [[AFTER_OFFSET_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] 1488; HSA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 4 1489; HSA-NEXT: store volatile i32 [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4 1490; HSA-NEXT: store volatile i32 [[AFTER_OFFSET_LOAD]], ptr addrspace(1) [[OUT_LOAD]], align 4 1491; HSA-NEXT: ret void 1492; 1493; MESA-LABEL: @byref_align_constant_i32_arg( 1494; MESA-NEXT: [[BYREF_ALIGN_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 256 dereferenceable(520) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1495; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_ALIGN_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 36 1496; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] 1497; MESA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_ALIGN_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 292 1498; MESA-NEXT: [[AFTER_OFFSET_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_ALIGN_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 296 1499; MESA-NEXT: [[AFTER_OFFSET_LOAD:%.*]] = load i32, ptr addrspace(4) [[AFTER_OFFSET_KERNARG_OFFSET]], align 8, !invariant.load [[META1]] 1500; MESA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 4 1501; MESA-NEXT: store volatile i32 [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4 1502; MESA-NEXT: store volatile i32 [[AFTER_OFFSET_LOAD]], ptr addrspace(1) [[OUT_LOAD]], align 4 1503; MESA-NEXT: ret void 1504; 1505 %in = load i32, ptr addrspace(4) %in.byref 1506 store volatile i32 %in, ptr addrspace(1) %out, align 4 1507 store volatile i32 %after.offset, ptr addrspace(1) %out, align 4 1508 ret void 1509} 1510 1511define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(ptr addrspace(1) nocapture %out, i8, ptr addrspace(4) byref(<16 x i32>) %in.byref, i32 %after.offset) { 1512; HSA-LABEL: @byref_natural_align_constant_v16i32_arg( 1513; HSA-NEXT: [[BYREF_NATURAL_ALIGN_CONSTANT_V16I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(392) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1514; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_NATURAL_ALIGN_CONSTANT_V16I32_ARG_KERNARG_SEGMENT]], i64 0 1515; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] 1516; HSA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_NATURAL_ALIGN_CONSTANT_V16I32_ARG_KERNARG_SEGMENT]], i64 64 1517; HSA-NEXT: [[AFTER_OFFSET_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_NATURAL_ALIGN_CONSTANT_V16I32_ARG_KERNARG_SEGMENT]], i64 128 1518; HSA-NEXT: [[AFTER_OFFSET_LOAD:%.*]] = load i32, ptr addrspace(4) [[AFTER_OFFSET_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] 1519; HSA-NEXT: [[IN:%.*]] = load <16 x i32>, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 64 1520; HSA-NEXT: store volatile <16 x i32> [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4 1521; HSA-NEXT: store volatile i32 [[AFTER_OFFSET_LOAD]], ptr addrspace(1) [[OUT_LOAD]], align 4 1522; HSA-NEXT: ret void 1523; 1524; MESA-LABEL: @byref_natural_align_constant_v16i32_arg( 1525; MESA-NEXT: [[BYREF_NATURAL_ALIGN_CONSTANT_V16I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 64 dereferenceable(388) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1526; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_NATURAL_ALIGN_CONSTANT_V16I32_ARG_KERNARG_SEGMENT]], i64 36 1527; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] 1528; MESA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_NATURAL_ALIGN_CONSTANT_V16I32_ARG_KERNARG_SEGMENT]], i64 100 1529; MESA-NEXT: [[AFTER_OFFSET_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_NATURAL_ALIGN_CONSTANT_V16I32_ARG_KERNARG_SEGMENT]], i64 164 1530; MESA-NEXT: [[AFTER_OFFSET_LOAD:%.*]] = load i32, ptr addrspace(4) [[AFTER_OFFSET_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] 1531; MESA-NEXT: [[IN:%.*]] = load <16 x i32>, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 64 1532; MESA-NEXT: store volatile <16 x i32> [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4 1533; MESA-NEXT: store volatile i32 [[AFTER_OFFSET_LOAD]], ptr addrspace(1) [[OUT_LOAD]], align 4 1534; MESA-NEXT: ret void 1535; 1536 %in = load <16 x i32>, ptr addrspace(4) %in.byref 1537 store volatile <16 x i32> %in, ptr addrspace(1) %out, align 4 1538 store volatile i32 %after.offset, ptr addrspace(1) %out, align 4 1539 ret void 1540} 1541 1542; Also accept byref kernel arguments with other global address spaces. 1543define amdgpu_kernel void @byref_global_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(1) byref(i32) %in.byref) { 1544; HSA-LABEL: @byref_global_i32_arg( 1545; HSA-NEXT: [[BYREF_GLOBAL_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1546; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_GLOBAL_I32_ARG_KERNARG_SEGMENT]], i64 0 1547; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] 1548; HSA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_GLOBAL_I32_ARG_KERNARG_SEGMENT]], i64 8 1549; HSA-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]] to ptr addrspace(1) 1550; HSA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(1) [[TMP1]], align 4 1551; HSA-NEXT: store i32 [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4 1552; HSA-NEXT: ret void 1553; 1554; MESA-LABEL: @byref_global_i32_arg( 1555; MESA-NEXT: [[BYREF_GLOBAL_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(268) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1556; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_GLOBAL_I32_ARG_KERNARG_SEGMENT]], i64 36 1557; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] 1558; MESA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_GLOBAL_I32_ARG_KERNARG_SEGMENT]], i64 44 1559; MESA-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]] to ptr addrspace(1) 1560; MESA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(1) [[TMP1]], align 4 1561; MESA-NEXT: store i32 [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4 1562; MESA-NEXT: ret void 1563; 1564 %in = load i32, ptr addrspace(1) %in.byref 1565 store i32 %in, ptr addrspace(1) %out, align 4 1566 ret void 1567} 1568 1569define amdgpu_kernel void @byref_flat_i32_arg(ptr addrspace(1) nocapture %out, ptr byref(i32) %in.byref) { 1570; HSA-LABEL: @byref_flat_i32_arg( 1571; HSA-NEXT: [[BYREF_FLAT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1572; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_FLAT_I32_ARG_KERNARG_SEGMENT]], i64 0 1573; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] 1574; HSA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_FLAT_I32_ARG_KERNARG_SEGMENT]], i64 8 1575; HSA-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]] to ptr 1576; HSA-NEXT: [[IN:%.*]] = load i32, ptr [[TMP1]], align 4 1577; HSA-NEXT: store i32 [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4 1578; HSA-NEXT: ret void 1579; 1580; MESA-LABEL: @byref_flat_i32_arg( 1581; MESA-NEXT: [[BYREF_FLAT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(268) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1582; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_FLAT_I32_ARG_KERNARG_SEGMENT]], i64 36 1583; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] 1584; MESA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_FLAT_I32_ARG_KERNARG_SEGMENT]], i64 44 1585; MESA-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]] to ptr 1586; MESA-NEXT: [[IN:%.*]] = load i32, ptr [[TMP1]], align 4 1587; MESA-NEXT: store i32 [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4 1588; MESA-NEXT: ret void 1589; 1590 %in = load i32, ptr %in.byref 1591 store i32 %in, ptr addrspace(1) %out, align 4 1592 ret void 1593} 1594 1595define amdgpu_kernel void @byref_constant_32bit_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(6) byref(i32) %in.byref) { 1596; HSA-LABEL: @byref_constant_32bit_i32_arg( 1597; HSA-NEXT: [[BYREF_CONSTANT_32BIT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1598; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_32BIT_I32_ARG_KERNARG_SEGMENT]], i64 0 1599; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] 1600; HSA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_32BIT_I32_ARG_KERNARG_SEGMENT]], i64 8 1601; HSA-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]] to ptr addrspace(6) 1602; HSA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(6) [[TMP1]], align 4 1603; HSA-NEXT: store i32 [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4 1604; HSA-NEXT: ret void 1605; 1606; MESA-LABEL: @byref_constant_32bit_i32_arg( 1607; MESA-NEXT: [[BYREF_CONSTANT_32BIT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(268) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1608; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_32BIT_I32_ARG_KERNARG_SEGMENT]], i64 36 1609; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] 1610; MESA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_32BIT_I32_ARG_KERNARG_SEGMENT]], i64 44 1611; MESA-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]] to ptr addrspace(6) 1612; MESA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(6) [[TMP1]], align 4 1613; MESA-NEXT: store i32 [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4 1614; MESA-NEXT: ret void 1615; 1616 %in = load i32, ptr addrspace(6) %in.byref 1617 store i32 %in, ptr addrspace(1) %out, align 4 1618 ret void 1619} 1620 1621define amdgpu_kernel void @byref_unknown_as_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(999) byref(i32) %in.byref) { 1622; HSA-LABEL: @byref_unknown_as_i32_arg( 1623; HSA-NEXT: [[BYREF_UNKNOWN_AS_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1624; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_UNKNOWN_AS_I32_ARG_KERNARG_SEGMENT]], i64 0 1625; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] 1626; HSA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_UNKNOWN_AS_I32_ARG_KERNARG_SEGMENT]], i64 8 1627; HSA-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]] to ptr addrspace(999) 1628; HSA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(999) [[TMP1]], align 4 1629; HSA-NEXT: store i32 [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4 1630; HSA-NEXT: ret void 1631; 1632; MESA-LABEL: @byref_unknown_as_i32_arg( 1633; MESA-NEXT: [[BYREF_UNKNOWN_AS_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(268) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1634; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_UNKNOWN_AS_I32_ARG_KERNARG_SEGMENT]], i64 36 1635; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] 1636; MESA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_UNKNOWN_AS_I32_ARG_KERNARG_SEGMENT]], i64 44 1637; MESA-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]] to ptr addrspace(999) 1638; MESA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(999) [[TMP1]], align 4 1639; MESA-NEXT: store i32 [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4 1640; MESA-NEXT: ret void 1641; 1642 %in = load i32, ptr addrspace(999) %in.byref 1643 store i32 %in, ptr addrspace(1) %out, align 4 1644 ret void 1645} 1646 1647; Invalid, but should not crash. 1648define amdgpu_kernel void @byref_local_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(3) byref(i32) %in.byref) { 1649; HSA-LABEL: @byref_local_i32_arg( 1650; HSA-NEXT: [[BYREF_LOCAL_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1651; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_LOCAL_I32_ARG_KERNARG_SEGMENT]], i64 0 1652; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] 1653; HSA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_LOCAL_I32_ARG_KERNARG_SEGMENT]], i64 8 1654; HSA-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]] to ptr addrspace(3) 1655; HSA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(3) [[TMP1]], align 4 1656; HSA-NEXT: store i32 [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4 1657; HSA-NEXT: ret void 1658; 1659; MESA-LABEL: @byref_local_i32_arg( 1660; MESA-NEXT: [[BYREF_LOCAL_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(268) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1661; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_LOCAL_I32_ARG_KERNARG_SEGMENT]], i64 36 1662; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] 1663; MESA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_LOCAL_I32_ARG_KERNARG_SEGMENT]], i64 44 1664; MESA-NEXT: [[TMP1:%.*]] = addrspacecast ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]] to ptr addrspace(3) 1665; MESA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(3) [[TMP1]], align 4 1666; MESA-NEXT: store i32 [[IN]], ptr addrspace(1) [[OUT_LOAD]], align 4 1667; MESA-NEXT: ret void 1668; 1669 %in = load i32, ptr addrspace(3) %in.byref 1670 store i32 %in, ptr addrspace(1) %out, align 4 1671 ret void 1672} 1673 1674define amdgpu_kernel void @multi_byref_constant_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i32) %in0.byref, ptr addrspace(4) byref(i32) %in1.byref, i32 %after.offset) { 1675; HSA-LABEL: @multi_byref_constant_i32_arg( 1676; HSA-NEXT: [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(280) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1677; HSA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 0 1678; HSA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] 1679; HSA-NEXT: [[IN0_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 8 1680; HSA-NEXT: [[IN1_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 12 1681; HSA-NEXT: [[AFTER_OFFSET_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 16 1682; HSA-NEXT: [[AFTER_OFFSET_LOAD:%.*]] = load i32, ptr addrspace(4) [[AFTER_OFFSET_KERNARG_OFFSET]], align 16, !invariant.load [[META1]] 1683; HSA-NEXT: [[IN0:%.*]] = load i32, ptr addrspace(4) [[IN0_BYREF_BYVAL_KERNARG_OFFSET]], align 4 1684; HSA-NEXT: [[IN1:%.*]] = load i32, ptr addrspace(4) [[IN1_BYREF_BYVAL_KERNARG_OFFSET]], align 4 1685; HSA-NEXT: store volatile i32 [[IN0]], ptr addrspace(1) [[OUT_LOAD]], align 4 1686; HSA-NEXT: store volatile i32 [[IN1]], ptr addrspace(1) [[OUT_LOAD]], align 4 1687; HSA-NEXT: store volatile i32 [[AFTER_OFFSET_LOAD]], ptr addrspace(1) [[OUT_LOAD]], align 4 1688; HSA-NEXT: ret void 1689; 1690; MESA-LABEL: @multi_byref_constant_i32_arg( 1691; MESA-NEXT: [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(276) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1692; MESA-NEXT: [[OUT_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 36 1693; MESA-NEXT: [[OUT_LOAD:%.*]] = load ptr addrspace(1), ptr addrspace(4) [[OUT_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] 1694; MESA-NEXT: [[IN0_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 44 1695; MESA-NEXT: [[IN1_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 48 1696; MESA-NEXT: [[AFTER_OFFSET_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[MULTI_BYREF_CONSTANT_I32_ARG_KERNARG_SEGMENT]], i64 52 1697; MESA-NEXT: [[AFTER_OFFSET_LOAD:%.*]] = load i32, ptr addrspace(4) [[AFTER_OFFSET_KERNARG_OFFSET]], align 4, !invariant.load [[META1]] 1698; MESA-NEXT: [[IN0:%.*]] = load i32, ptr addrspace(4) [[IN0_BYREF_BYVAL_KERNARG_OFFSET]], align 4 1699; MESA-NEXT: [[IN1:%.*]] = load i32, ptr addrspace(4) [[IN1_BYREF_BYVAL_KERNARG_OFFSET]], align 4 1700; MESA-NEXT: store volatile i32 [[IN0]], ptr addrspace(1) [[OUT_LOAD]], align 4 1701; MESA-NEXT: store volatile i32 [[IN1]], ptr addrspace(1) [[OUT_LOAD]], align 4 1702; MESA-NEXT: store volatile i32 [[AFTER_OFFSET_LOAD]], ptr addrspace(1) [[OUT_LOAD]], align 4 1703; MESA-NEXT: ret void 1704; 1705 %in0 = load i32, ptr addrspace(4) %in0.byref 1706 %in1 = load i32, ptr addrspace(4) %in1.byref 1707 store volatile i32 %in0, ptr addrspace(1) %out, align 4 1708 store volatile i32 %in1, ptr addrspace(1) %out, align 4 1709 store volatile i32 %after.offset, ptr addrspace(1) %out, align 4 1710 ret void 1711} 1712 1713define amdgpu_kernel void @byref_constant_i32_arg_offset0(ptr addrspace(4) byref(i32) %in.byref) { 1714; HSA-LABEL: @byref_constant_i32_arg_offset0( 1715; HSA-NEXT: [[BYREF_CONSTANT_I32_ARG_OFFSET0_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1716; HSA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I32_ARG_OFFSET0_KERNARG_SEGMENT]], i64 0 1717; HSA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 4 1718; HSA-NEXT: store i32 [[IN]], ptr addrspace(1) undef, align 4 1719; HSA-NEXT: ret void 1720; 1721; MESA-LABEL: @byref_constant_i32_arg_offset0( 1722; MESA-NEXT: [[BYREF_CONSTANT_I32_ARG_OFFSET0_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1723; MESA-NEXT: [[IN_BYREF_BYVAL_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[BYREF_CONSTANT_I32_ARG_OFFSET0_KERNARG_SEGMENT]], i64 36 1724; MESA-NEXT: [[IN:%.*]] = load i32, ptr addrspace(4) [[IN_BYREF_BYVAL_KERNARG_OFFSET]], align 4 1725; MESA-NEXT: store i32 [[IN]], ptr addrspace(1) undef, align 4 1726; MESA-NEXT: ret void 1727; 1728 %in = load i32, ptr addrspace(4) %in.byref 1729 store i32 %in, ptr addrspace(1) undef, align 4 1730 ret void 1731} 1732 1733define amdgpu_kernel void @noundef_f32(float noundef %arg0) { 1734; HSA-LABEL: @noundef_f32( 1735; HSA-NEXT: [[NOUNDEF_F32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1736; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_F32_KERNARG_SEGMENT]], i64 0 1737; HSA-NEXT: [[ARG0_LOAD:%.*]] = load float, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !noundef [[META1]] 1738; HSA-NEXT: call void (...) @llvm.fake.use(float [[ARG0_LOAD]]) 1739; HSA-NEXT: ret void 1740; 1741; MESA-LABEL: @noundef_f32( 1742; MESA-NEXT: [[NOUNDEF_F32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1743; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_F32_KERNARG_SEGMENT]], i64 36 1744; MESA-NEXT: [[ARG0_LOAD:%.*]] = load float, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !noundef [[META1]] 1745; MESA-NEXT: call void (...) @llvm.fake.use(float [[ARG0_LOAD]]) 1746; MESA-NEXT: ret void 1747; 1748 call void (...) @llvm.fake.use(float %arg0) 1749 ret void 1750} 1751 1752define amdgpu_kernel void @noundef_f16(half noundef %arg0) { 1753; HSA-LABEL: @noundef_f16( 1754; HSA-NEXT: [[NOUNDEF_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1755; HSA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_F16_KERNARG_SEGMENT]], i64 0 1756; HSA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 16, !invariant.load [[META1]], !noundef [[META1]] 1757; HSA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 1758; HSA-NEXT: [[ARG0_LOAD:%.*]] = bitcast i16 [[TMP2]] to half 1759; HSA-NEXT: call void (...) @llvm.fake.use(half [[ARG0_LOAD]]) 1760; HSA-NEXT: ret void 1761; 1762; MESA-LABEL: @noundef_f16( 1763; MESA-NEXT: [[NOUNDEF_F16_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(260) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1764; MESA-NEXT: [[ARG0_KERNARG_OFFSET_ALIGN_DOWN:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_F16_KERNARG_SEGMENT]], i64 36 1765; MESA-NEXT: [[TMP1:%.*]] = load i32, ptr addrspace(4) [[ARG0_KERNARG_OFFSET_ALIGN_DOWN]], align 4, !invariant.load [[META1]], !noundef [[META1]] 1766; MESA-NEXT: [[TMP2:%.*]] = trunc i32 [[TMP1]] to i16 1767; MESA-NEXT: [[ARG0_LOAD:%.*]] = bitcast i16 [[TMP2]] to half 1768; MESA-NEXT: call void (...) @llvm.fake.use(half [[ARG0_LOAD]]) 1769; MESA-NEXT: ret void 1770; 1771 call void (...) @llvm.fake.use(half %arg0) 1772 ret void 1773} 1774 1775define amdgpu_kernel void @noundef_v2i32(<2 x i32> noundef %arg0) { 1776; HSA-LABEL: @noundef_v2i32( 1777; HSA-NEXT: [[NOUNDEF_V2I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1778; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_V2I32_KERNARG_SEGMENT]], i64 0 1779; HSA-NEXT: [[ARG0_LOAD:%.*]] = load <2 x i32>, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !noundef [[META1]] 1780; HSA-NEXT: call void (...) @llvm.fake.use(<2 x i32> [[ARG0_LOAD]]) 1781; HSA-NEXT: ret void 1782; 1783; MESA-LABEL: @noundef_v2i32( 1784; MESA-NEXT: [[NOUNDEF_V2I32_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1785; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_V2I32_KERNARG_SEGMENT]], i64 36 1786; MESA-NEXT: [[ARG0_LOAD:%.*]] = load <2 x i32>, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !noundef [[META1]] 1787; MESA-NEXT: call void (...) @llvm.fake.use(<2 x i32> [[ARG0_LOAD]]) 1788; MESA-NEXT: ret void 1789; 1790 call void (...) @llvm.fake.use(<2 x i32> %arg0) 1791 ret void 1792} 1793 1794define amdgpu_kernel void @noundef_p0(ptr noundef %arg0) { 1795; HSA-LABEL: @noundef_p0( 1796; HSA-NEXT: [[NOUNDEF_P0_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1797; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_P0_KERNARG_SEGMENT]], i64 0 1798; HSA-NEXT: [[ARG0_LOAD:%.*]] = load ptr, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !noundef [[META1]] 1799; HSA-NEXT: call void (...) @llvm.fake.use(ptr [[ARG0_LOAD]]) 1800; HSA-NEXT: ret void 1801; 1802; MESA-LABEL: @noundef_p0( 1803; MESA-NEXT: [[NOUNDEF_P0_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(264) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1804; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_P0_KERNARG_SEGMENT]], i64 36 1805; MESA-NEXT: [[ARG0_LOAD:%.*]] = load ptr, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !noundef [[META1]] 1806; MESA-NEXT: call void (...) @llvm.fake.use(ptr [[ARG0_LOAD]]) 1807; MESA-NEXT: ret void 1808; 1809 call void (...) @llvm.fake.use(ptr %arg0) 1810 ret void 1811} 1812 1813define amdgpu_kernel void @noundef_v2p0(<2 x ptr> noundef %arg0) { 1814; HSA-LABEL: @noundef_v2p0( 1815; HSA-NEXT: [[NOUNDEF_V2P0_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1816; HSA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_V2P0_KERNARG_SEGMENT]], i64 0 1817; HSA-NEXT: [[ARG0_LOAD:%.*]] = load <2 x ptr>, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 16, !invariant.load [[META1]], !noundef [[META1]] 1818; HSA-NEXT: call void (...) @llvm.fake.use(<2 x ptr> [[ARG0_LOAD]]) 1819; HSA-NEXT: ret void 1820; 1821; MESA-LABEL: @noundef_v2p0( 1822; MESA-NEXT: [[NOUNDEF_V2P0_KERNARG_SEGMENT:%.*]] = call nonnull align 16 dereferenceable(272) ptr addrspace(4) @llvm.amdgcn.kernarg.segment.ptr() 1823; MESA-NEXT: [[ARG0_KERNARG_OFFSET:%.*]] = getelementptr inbounds i8, ptr addrspace(4) [[NOUNDEF_V2P0_KERNARG_SEGMENT]], i64 36 1824; MESA-NEXT: [[ARG0_LOAD:%.*]] = load <2 x ptr>, ptr addrspace(4) [[ARG0_KERNARG_OFFSET]], align 4, !invariant.load [[META1]], !noundef [[META1]] 1825; MESA-NEXT: call void (...) @llvm.fake.use(<2 x ptr> [[ARG0_LOAD]]) 1826; MESA-NEXT: ret void 1827; 1828 call void (...) @llvm.fake.use(<2 x ptr> %arg0) 1829 ret void 1830} 1831 1832attributes #0 = { nounwind "target-cpu"="kaveri" } 1833attributes #1 = { nounwind "target-cpu"="kaveri" "amdgpu-implicitarg-num-bytes"="40" } 1834attributes #2 = { nounwind "target-cpu"="tahiti" } 1835 1836 1837!llvm.module.flags = !{!0} 1838!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} 1839;. 1840; HSA: attributes #[[ATTR0:[0-9]+]] = { nounwind } 1841; HSA: attributes #[[ATTR1:[0-9]+]] = { nounwind "target-cpu"="kaveri" } 1842; HSA: attributes #[[ATTR2:[0-9]+]] = { nounwind "amdgpu-implicitarg-num-bytes"="40" "target-cpu"="kaveri" } 1843; HSA: attributes #[[ATTR3:[0-9]+]] = { nounwind "target-cpu"="tahiti" } 1844; HSA: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } 1845;. 1846; MESA: attributes #[[ATTR0:[0-9]+]] = { nounwind } 1847; MESA: attributes #[[ATTR1:[0-9]+]] = { nounwind "target-cpu"="kaveri" } 1848; MESA: attributes #[[ATTR2:[0-9]+]] = { nounwind "amdgpu-implicitarg-num-bytes"="40" "target-cpu"="kaveri" } 1849; MESA: attributes #[[ATTR3:[0-9]+]] = { nounwind "target-cpu"="tahiti" } 1850; MESA: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } 1851;. 1852; HSA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500} 1853; HSA: [[META1]] = !{} 1854; HSA: [[RNG2]] = !{i32 0, i32 8} 1855; HSA: [[META3]] = !{i64 42} 1856; HSA: [[META4]] = !{i64 128} 1857; HSA: [[META5]] = !{i64 1024} 1858;. 1859; MESA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500} 1860; MESA: [[META1]] = !{} 1861; MESA: [[RNG2]] = !{i32 0, i32 8} 1862; MESA: [[META3]] = !{i64 42} 1863; MESA: [[META4]] = !{i64 128} 1864; MESA: [[META5]] = !{i64 1024} 1865;. 1866