1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3 2; RUN: opt -S -mtriple=amdgcn--amdhsa -passes=amdgpu-lower-module-lds < %s --amdgpu-lower-module-lds-strategy=table | FileCheck -check-prefix=OPT %s 3; RUN: llc -mtriple=amdgcn--amdhsa -verify-machineinstrs < %s --amdgpu-lower-module-lds-strategy=table | FileCheck -check-prefix=GCN %s 4 5; Opt checks from utils/update_test_checks.py, llc checks from utils/update_llc_test_checks.py, both modified. 6 7; Define four variables and four non-kernel functions which access exactly one variable each 8@v0 = addrspace(3) global float poison 9@v1 = addrspace(3) global i16 poison, align 16 10@v2 = addrspace(3) global i64 poison 11@v3 = addrspace(3) global i8 poison 12@unused = addrspace(3) global i16 poison 13 14; OPT: %llvm.amdgcn.kernel.kernel_no_table.lds.t = type { i64 } 15; OPT: %llvm.amdgcn.kernel.k01.lds.t = type { i16, [2 x i8], float } 16; OPT: %llvm.amdgcn.kernel.k23.lds.t = type { i64, i8 } 17; OPT: %llvm.amdgcn.kernel.k123.lds.t = type { i16, i8, [5 x i8], i64 } 18 19 20; Salient parts of the IR lookup table check: 21; It has (top level) size 3 as there are 3 kernels that call functions which use lds 22; The next level down has type [4 x i16] as there are 4 variables accessed by functions which use lds 23; The kernel naming pattern and the structs being named after the functions helps verify placement of poison 24; The remainder are constant expressions into the variable instances checked above 25 26; OPT{LITERAL}: @llvm.amdgcn.lds.offset.table = internal addrspace(4) constant [3 x [4 x i32]] [[4 x i32] [i32 ptrtoint (ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k01.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k01.lds, i32 0, i32 2) to i32), i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.kernel.k01.lds to i32), i32 poison, i32 poison], [4 x i32] [i32 poison, i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds to i32), i32 ptrtoint (ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k123.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 3) to i32), i32 ptrtoint (ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k123.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1) to i32)], [4 x i32] [i32 poison, i32 poison, i32 ptrtoint (ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds to i32), i32 ptrtoint (ptr addrspace(3) getelementptr inbounds (%llvm.amdgcn.kernel.k23.lds.t, ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds, i32 0, i32 1) to i32)]] 27 28 29define void @f0() { 30; OPT-LABEL: define void @f0() { 31; OPT-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() 32; OPT-NEXT: [[V02:%.*]] = getelementptr inbounds [3 x [4 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0 33; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[V02]], align 4 34; OPT-NEXT: [[V03:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3) 35; OPT-NEXT: [[LD:%.*]] = load float, ptr addrspace(3) [[V03]], align 4 36; OPT-NEXT: [[MUL:%.*]] = fmul float [[LD]], 2.000000e+00 37; OPT-NEXT: [[V0:%.*]] = getelementptr inbounds [3 x [4 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 0 38; OPT-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[V0]], align 4 39; OPT-NEXT: [[V01:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3) 40; OPT-NEXT: store float [[MUL]], ptr addrspace(3) [[V01]], align 4 41; OPT-NEXT: ret void 42; 43; GCN-LABEL: f0: 44; GCN: ; %bb.0: 45; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 46; GCN-NEXT: s_mov_b32 s4, s15 47; GCN-NEXT: s_ashr_i32 s5, s15, 31 48; GCN-NEXT: s_getpc_b64 s[6:7] 49; GCN-NEXT: s_add_u32 s6, s6, llvm.amdgcn.lds.offset.table@rel32@lo+4 50; GCN-NEXT: s_addc_u32 s7, s7, llvm.amdgcn.lds.offset.table@rel32@hi+12 51; GCN-NEXT: s_lshl_b64 s[4:5], s[4:5], 4 52; GCN-NEXT: s_add_u32 s4, s4, s6 53; GCN-NEXT: s_addc_u32 s5, s5, s7 54; GCN-NEXT: s_load_dword s4, s[4:5], 0x0 55; GCN-NEXT: s_waitcnt lgkmcnt(0) 56; GCN-NEXT: v_mov_b32_e32 v0, s4 57; GCN-NEXT: s_mov_b32 m0, -1 58; GCN-NEXT: ds_read_b32 v1, v0 59; GCN-NEXT: s_waitcnt lgkmcnt(0) 60; GCN-NEXT: v_add_f32_e32 v1, v1, v1 61; GCN-NEXT: ds_write_b32 v0, v1 62; GCN-NEXT: s_waitcnt lgkmcnt(0) 63; GCN-NEXT: s_setpc_b64 s[30:31] 64 %ld = load float, ptr addrspace(3) @v0 65 %mul = fmul float %ld, 2. 66 store float %mul, ptr addrspace(3) @v0 67 ret void 68} 69 70define void @f1() { 71; OPT-LABEL: define void @f1() { 72; OPT-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() 73; OPT-NEXT: [[V12:%.*]] = getelementptr inbounds [3 x [4 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1 74; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[V12]], align 4 75; OPT-NEXT: [[V13:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3) 76; OPT-NEXT: [[LD:%.*]] = load i16, ptr addrspace(3) [[V13]], align 2 77; OPT-NEXT: [[MUL:%.*]] = mul i16 [[LD]], 3 78; OPT-NEXT: [[V1:%.*]] = getelementptr inbounds [3 x [4 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 1 79; OPT-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[V1]], align 4 80; OPT-NEXT: [[V11:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3) 81; OPT-NEXT: store i16 [[MUL]], ptr addrspace(3) [[V11]], align 2 82; OPT-NEXT: ret void 83; 84; GCN-LABEL: f1: 85; GCN: ; %bb.0: 86; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 87; GCN-NEXT: s_mov_b32 s4, s15 88; GCN-NEXT: s_ashr_i32 s5, s15, 31 89; GCN-NEXT: s_getpc_b64 s[6:7] 90; GCN-NEXT: s_add_u32 s6, s6, llvm.amdgcn.lds.offset.table@rel32@lo+8 91; GCN-NEXT: s_addc_u32 s7, s7, llvm.amdgcn.lds.offset.table@rel32@hi+16 92; GCN-NEXT: s_lshl_b64 s[4:5], s[4:5], 4 93; GCN-NEXT: s_add_u32 s4, s4, s6 94; GCN-NEXT: s_addc_u32 s5, s5, s7 95; GCN-NEXT: s_load_dword s4, s[4:5], 0x0 96; GCN-NEXT: s_waitcnt lgkmcnt(0) 97; GCN-NEXT: v_mov_b32_e32 v0, s4 98; GCN-NEXT: s_mov_b32 m0, -1 99; GCN-NEXT: ds_read_u16 v1, v0 100; GCN-NEXT: s_waitcnt lgkmcnt(0) 101; GCN-NEXT: v_mul_lo_u32 v1, v1, 3 102; GCN-NEXT: ds_write_b16 v0, v1 103; GCN-NEXT: s_waitcnt lgkmcnt(0) 104; GCN-NEXT: s_setpc_b64 s[30:31] 105 %ld = load i16, ptr addrspace(3) @v1 106 %mul = mul i16 %ld, 3 107 store i16 %mul, ptr addrspace(3) @v1 108 ret void 109} 110 111define void @f2() { 112; OPT-LABEL: define void @f2() { 113; OPT-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() 114; OPT-NEXT: [[V22:%.*]] = getelementptr inbounds [3 x [4 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 2 115; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[V22]], align 4 116; OPT-NEXT: [[V23:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3) 117; OPT-NEXT: [[LD:%.*]] = load i64, ptr addrspace(3) [[V23]], align 8 118; OPT-NEXT: [[MUL:%.*]] = mul i64 [[LD]], 4 119; OPT-NEXT: [[V2:%.*]] = getelementptr inbounds [3 x [4 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 2 120; OPT-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[V2]], align 4 121; OPT-NEXT: [[V21:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3) 122; OPT-NEXT: store i64 [[MUL]], ptr addrspace(3) [[V21]], align 8 123; OPT-NEXT: ret void 124; 125; GCN-LABEL: f2: 126; GCN: ; %bb.0: 127; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 128; GCN-NEXT: s_mov_b32 s4, s15 129; GCN-NEXT: s_ashr_i32 s5, s15, 31 130; GCN-NEXT: s_getpc_b64 s[6:7] 131; GCN-NEXT: s_add_u32 s6, s6, llvm.amdgcn.lds.offset.table@rel32@lo+12 132; GCN-NEXT: s_addc_u32 s7, s7, llvm.amdgcn.lds.offset.table@rel32@hi+20 133; GCN-NEXT: s_lshl_b64 s[4:5], s[4:5], 4 134; GCN-NEXT: s_add_u32 s4, s4, s6 135; GCN-NEXT: s_addc_u32 s5, s5, s7 136; GCN-NEXT: s_load_dword s4, s[4:5], 0x0 137; GCN-NEXT: s_waitcnt lgkmcnt(0) 138; GCN-NEXT: v_mov_b32_e32 v2, s4 139; GCN-NEXT: s_mov_b32 m0, -1 140; GCN-NEXT: ds_read_b64 v[0:1], v2 141; GCN-NEXT: s_waitcnt lgkmcnt(0) 142; GCN-NEXT: v_lshl_b64 v[0:1], v[0:1], 2 143; GCN-NEXT: ds_write_b64 v2, v[0:1] 144; GCN-NEXT: s_waitcnt lgkmcnt(0) 145; GCN-NEXT: s_setpc_b64 s[30:31] 146 %ld = load i64, ptr addrspace(3) @v2 147 %mul = mul i64 %ld, 4 148 store i64 %mul, ptr addrspace(3) @v2 149 ret void 150} 151 152define void @f3() { 153; OPT-LABEL: define void @f3() { 154; OPT-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id() 155; OPT-NEXT: [[V32:%.*]] = getelementptr inbounds [3 x [4 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 3 156; OPT-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(4) [[V32]], align 4 157; OPT-NEXT: [[V33:%.*]] = inttoptr i32 [[TMP2]] to ptr addrspace(3) 158; OPT-NEXT: [[LD:%.*]] = load i8, ptr addrspace(3) [[V33]], align 1 159; OPT-NEXT: [[MUL:%.*]] = mul i8 [[LD]], 5 160; OPT-NEXT: [[V3:%.*]] = getelementptr inbounds [3 x [4 x i32]], ptr addrspace(4) @llvm.amdgcn.lds.offset.table, i32 0, i32 [[TMP1]], i32 3 161; OPT-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(4) [[V3]], align 4 162; OPT-NEXT: [[V31:%.*]] = inttoptr i32 [[TMP3]] to ptr addrspace(3) 163; OPT-NEXT: store i8 [[MUL]], ptr addrspace(3) [[V31]], align 1 164; OPT-NEXT: ret void 165; 166; GCN-LABEL: f3: 167; GCN: ; %bb.0: 168; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 169; GCN-NEXT: s_mov_b32 s4, s15 170; GCN-NEXT: s_ashr_i32 s5, s15, 31 171; GCN-NEXT: s_getpc_b64 s[6:7] 172; GCN-NEXT: s_add_u32 s6, s6, llvm.amdgcn.lds.offset.table@rel32@lo+16 173; GCN-NEXT: s_addc_u32 s7, s7, llvm.amdgcn.lds.offset.table@rel32@hi+24 174; GCN-NEXT: s_lshl_b64 s[4:5], s[4:5], 4 175; GCN-NEXT: s_add_u32 s4, s4, s6 176; GCN-NEXT: s_addc_u32 s5, s5, s7 177; GCN-NEXT: s_load_dword s4, s[4:5], 0x0 178; GCN-NEXT: s_waitcnt lgkmcnt(0) 179; GCN-NEXT: v_mov_b32_e32 v0, s4 180; GCN-NEXT: s_mov_b32 m0, -1 181; GCN-NEXT: ds_read_u8 v1, v0 182; GCN-NEXT: s_waitcnt lgkmcnt(0) 183; GCN-NEXT: v_mul_lo_u32 v1, v1, 5 184; GCN-NEXT: ds_write_b8 v0, v1 185; GCN-NEXT: s_waitcnt lgkmcnt(0) 186; GCN-NEXT: s_setpc_b64 s[30:31] 187 %ld = load i8, ptr addrspace(3) @v3 188 %mul = mul i8 %ld, 5 189 store i8 %mul, ptr addrspace(3) @v3 190 ret void 191} 192 193; Doesn't access any via a function, won't be in the lookup table 194define amdgpu_kernel void @kernel_no_table() { 195; OPT-LABEL: define amdgpu_kernel void @kernel_no_table( 196; OPT-SAME: ) #[[ATTR0:[0-9]+]] { 197; OPT-NEXT: [[LD:%.*]] = load i64, ptr addrspace(3) @llvm.amdgcn.kernel.kernel_no_table.lds, align 8 198; OPT-NEXT: [[MUL:%.*]] = mul i64 [[LD]], 8 199; OPT-NEXT: store i64 [[MUL]], ptr addrspace(3) @llvm.amdgcn.kernel.kernel_no_table.lds, align 8 200; OPT-NEXT: ret void 201; 202; GCN-LABEL: kernel_no_table: 203; GCN: ; %bb.0: 204; GCN-NEXT: v_mov_b32_e32 v2, 0 205; GCN-NEXT: s_mov_b32 m0, -1 206; GCN-NEXT: ds_read_b64 v[0:1], v2 207; GCN-NEXT: s_waitcnt lgkmcnt(0) 208; GCN-NEXT: v_lshl_b64 v[0:1], v[0:1], 3 209; GCN-NEXT: ds_write_b64 v2, v[0:1] 210; GCN-NEXT: s_endpgm 211 %ld = load i64, ptr addrspace(3) @v2 212 %mul = mul i64 %ld, 8 213 store i64 %mul, ptr addrspace(3) @v2 214 ret void 215} 216 217; Access two variables, will allocate those two 218define amdgpu_kernel void @k01() { 219; OPT-LABEL: define amdgpu_kernel void @k01( 220; OPT-SAME: ) #[[ATTR0]] !llvm.amdgcn.lds.kernel.id [[META2:![0-9]+]] { 221; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k01.lds) ], !alias.scope [[META3:![0-9]+]], !noalias [[META6:![0-9]+]] 222; OPT-NEXT: call void @f0() 223; OPT-NEXT: call void @f1() 224; OPT-NEXT: ret void 225; 226; GCN-LABEL: k01: 227; GCN: ; %bb.0: 228; GCN-NEXT: s_mov_b32 s32, 0 229; GCN-NEXT: s_mov_b32 flat_scratch_lo, s13 230; GCN-NEXT: s_add_i32 s12, s12, s17 231; GCN-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 232; GCN-NEXT: s_add_u32 s0, s0, s17 233; GCN-NEXT: s_addc_u32 s1, s1, 0 234; GCN-NEXT: s_mov_b32 s20, s16 235; GCN-NEXT: s_mov_b32 s13, s15 236; GCN-NEXT: s_mov_b32 s12, s14 237; GCN-NEXT: s_mov_b64 s[16:17], s[6:7] 238; GCN-NEXT: s_mov_b64 s[18:19], s[4:5] 239; GCN-NEXT: s_getpc_b64 s[4:5] 240; GCN-NEXT: s_add_u32 s4, s4, f0@gotpcrel32@lo+4 241; GCN-NEXT: s_addc_u32 s5, s5, f0@gotpcrel32@hi+12 242; GCN-NEXT: v_lshlrev_b32_e32 v2, 20, v2 243; GCN-NEXT: s_load_dwordx2 s[22:23], s[4:5], 0x0 244; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1 245; GCN-NEXT: v_or_b32_e32 v0, v0, v1 246; GCN-NEXT: v_or_b32_e32 v31, v0, v2 247; GCN-NEXT: s_mov_b32 s15, 0 248; GCN-NEXT: s_mov_b64 s[4:5], s[18:19] 249; GCN-NEXT: s_mov_b32 s14, s20 250; GCN-NEXT: s_waitcnt lgkmcnt(0) 251; GCN-NEXT: s_swappc_b64 s[30:31], s[22:23] 252; GCN-NEXT: s_getpc_b64 s[4:5] 253; GCN-NEXT: s_add_u32 s4, s4, f1@gotpcrel32@lo+4 254; GCN-NEXT: s_addc_u32 s5, s5, f1@gotpcrel32@hi+12 255; GCN-NEXT: s_load_dwordx2 s[22:23], s[4:5], 0x0 256; GCN-NEXT: s_mov_b64 s[4:5], s[18:19] 257; GCN-NEXT: s_mov_b64 s[6:7], s[16:17] 258; GCN-NEXT: s_waitcnt lgkmcnt(0) 259; GCN-NEXT: s_swappc_b64 s[30:31], s[22:23] 260; GCN-NEXT: s_endpgm 261 262 263 264 call void @f0() 265 call void @f1() 266 ret void 267} 268 269define amdgpu_kernel void @k23() { 270; OPT-LABEL: define amdgpu_kernel void @k23( 271; OPT-SAME: ) #[[ATTR1:[0-9]+]] !llvm.amdgcn.lds.kernel.id [[META8:![0-9]+]] { 272; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k23.lds) ], !alias.scope [[META9:![0-9]+]], !noalias [[META12:![0-9]+]] 273; OPT-NEXT: call void @f2() 274; OPT-NEXT: call void @f3() 275; OPT-NEXT: ret void 276; 277; GCN-LABEL: k23: 278; GCN: ; %bb.0: 279; GCN-NEXT: s_mov_b32 s32, 0 280; GCN-NEXT: s_mov_b32 flat_scratch_lo, s13 281; GCN-NEXT: s_add_i32 s12, s12, s17 282; GCN-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 283; GCN-NEXT: s_add_u32 s0, s0, s17 284; GCN-NEXT: s_addc_u32 s1, s1, 0 285; GCN-NEXT: s_mov_b32 s20, s16 286; GCN-NEXT: s_mov_b32 s13, s15 287; GCN-NEXT: s_mov_b32 s12, s14 288; GCN-NEXT: s_mov_b64 s[16:17], s[6:7] 289; GCN-NEXT: s_mov_b64 s[18:19], s[4:5] 290; GCN-NEXT: s_getpc_b64 s[4:5] 291; GCN-NEXT: s_add_u32 s4, s4, f2@gotpcrel32@lo+4 292; GCN-NEXT: s_addc_u32 s5, s5, f2@gotpcrel32@hi+12 293; GCN-NEXT: v_lshlrev_b32_e32 v2, 20, v2 294; GCN-NEXT: s_load_dwordx2 s[22:23], s[4:5], 0x0 295; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1 296; GCN-NEXT: v_or_b32_e32 v0, v0, v1 297; GCN-NEXT: v_or_b32_e32 v31, v0, v2 298; GCN-NEXT: s_mov_b32 s15, 2 299; GCN-NEXT: s_mov_b64 s[4:5], s[18:19] 300; GCN-NEXT: s_mov_b32 s14, s20 301; GCN-NEXT: s_waitcnt lgkmcnt(0) 302; GCN-NEXT: s_swappc_b64 s[30:31], s[22:23] 303; GCN-NEXT: s_getpc_b64 s[4:5] 304; GCN-NEXT: s_add_u32 s4, s4, f3@gotpcrel32@lo+4 305; GCN-NEXT: s_addc_u32 s5, s5, f3@gotpcrel32@hi+12 306; GCN-NEXT: s_load_dwordx2 s[22:23], s[4:5], 0x0 307; GCN-NEXT: s_mov_b64 s[4:5], s[18:19] 308; GCN-NEXT: s_mov_b64 s[6:7], s[16:17] 309; GCN-NEXT: s_waitcnt lgkmcnt(0) 310; GCN-NEXT: s_swappc_b64 s[30:31], s[22:23] 311; GCN-NEXT: s_endpgm 312 313 314 call void @f2() 315 call void @f3() 316 ret void 317} 318 319; Access and allocate three variables 320define amdgpu_kernel void @k123() { 321; OPT-LABEL: define amdgpu_kernel void @k123( 322; OPT-SAME: ) #[[ATTR1]] !llvm.amdgcn.lds.kernel.id [[META14:![0-9]+]] { 323; OPT-NEXT: call void @llvm.donothing() [ "ExplicitUse"(ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds) ], !alias.scope [[META15:![0-9]+]], !noalias [[META18:![0-9]+]] 324; OPT-NEXT: call void @f1() 325; OPT-NEXT: [[LD:%.*]] = load i8, ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_K123_LDS_T:%.*]], ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 2, !alias.scope [[META21:![0-9]+]], !noalias [[META22:![0-9]+]] 326; OPT-NEXT: [[MUL:%.*]] = mul i8 [[LD]], 8 327; OPT-NEXT: store i8 [[MUL]], ptr addrspace(3) getelementptr inbounds ([[LLVM_AMDGCN_KERNEL_K123_LDS_T]], ptr addrspace(3) @llvm.amdgcn.kernel.k123.lds, i32 0, i32 1), align 2, !alias.scope [[META21]], !noalias [[META22]] 328; OPT-NEXT: call void @f2() 329; OPT-NEXT: ret void 330; 331; GCN-LABEL: k123: 332; GCN: ; %bb.0: 333; GCN-NEXT: s_mov_b32 s32, 0 334; GCN-NEXT: s_mov_b32 flat_scratch_lo, s13 335; GCN-NEXT: s_add_i32 s12, s12, s17 336; GCN-NEXT: s_lshr_b32 flat_scratch_hi, s12, 8 337; GCN-NEXT: s_add_u32 s0, s0, s17 338; GCN-NEXT: s_addc_u32 s1, s1, 0 339; GCN-NEXT: s_mov_b32 s20, s16 340; GCN-NEXT: s_mov_b32 s13, s15 341; GCN-NEXT: s_mov_b32 s12, s14 342; GCN-NEXT: s_mov_b64 s[16:17], s[6:7] 343; GCN-NEXT: s_mov_b64 s[18:19], s[4:5] 344; GCN-NEXT: s_getpc_b64 s[4:5] 345; GCN-NEXT: s_add_u32 s4, s4, f1@gotpcrel32@lo+4 346; GCN-NEXT: s_addc_u32 s5, s5, f1@gotpcrel32@hi+12 347; GCN-NEXT: v_lshlrev_b32_e32 v2, 20, v2 348; GCN-NEXT: s_load_dwordx2 s[22:23], s[4:5], 0x0 349; GCN-NEXT: v_lshlrev_b32_e32 v1, 10, v1 350; GCN-NEXT: v_or_b32_e32 v0, v0, v1 351; GCN-NEXT: v_or_b32_e32 v31, v0, v2 352; GCN-NEXT: s_mov_b32 s15, 1 353; GCN-NEXT: s_mov_b64 s[4:5], s[18:19] 354; GCN-NEXT: s_mov_b32 s14, s20 355; GCN-NEXT: s_waitcnt lgkmcnt(0) 356; GCN-NEXT: s_swappc_b64 s[30:31], s[22:23] 357; GCN-NEXT: v_mov_b32_e32 v0, 0 358; GCN-NEXT: s_mov_b32 m0, -1 359; GCN-NEXT: ds_read_u8 v1, v0 offset:2 360; GCN-NEXT: s_getpc_b64 s[4:5] 361; GCN-NEXT: s_add_u32 s4, s4, f2@gotpcrel32@lo+4 362; GCN-NEXT: s_addc_u32 s5, s5, f2@gotpcrel32@hi+12 363; GCN-NEXT: s_load_dwordx2 s[22:23], s[4:5], 0x0 364; GCN-NEXT: s_waitcnt lgkmcnt(0) 365; GCN-NEXT: v_lshlrev_b32_e32 v1, 3, v1 366; GCN-NEXT: ds_write_b8 v0, v1 offset:2 367; GCN-NEXT: s_mov_b64 s[4:5], s[18:19] 368; GCN-NEXT: s_mov_b64 s[6:7], s[16:17] 369; GCN-NEXT: s_swappc_b64 s[30:31], s[22:23] 370; GCN-NEXT: s_endpgm 371 372 373 call void @f1() 374 %ld = load i8, ptr addrspace(3) @v3 375 %mul = mul i8 %ld, 8 376 store i8 %mul, ptr addrspace(3) @v3 377 call void @f2() 378 ret void 379} 380 381 382 383; OPT: attributes #0 = { "amdgpu-lds-size"="8" } 384; OPT: attributes #1 = { "amdgpu-lds-size"="16" } 385 386!0 = !{i64 0, i64 1} 387!1 = !{i32 0} 388!2 = !{i32 2} 389!3 = !{i32 1} 390 391 392; Table size length number-kernels * number-variables * sizeof(uint16_t) 393; GCN: .type llvm.amdgcn.lds.offset.table,@object 394; GCN-NEXT: .section .data.rel.ro,"aw" 395; GCN-NEXT: .p2align 4, 0x0 396; GCN-NEXT: llvm.amdgcn.lds.offset.table: 397; GCN-NEXT: .long 0+4 398; GCN-NEXT: .long 0 399; GCN-NEXT: .zero 4 400; GCN-NEXT: .zero 4 401; GCN-NEXT: .zero 4 402; GCN-NEXT: .long 0 403; GCN-NEXT: .long 0+8 404; GCN-NEXT: .long 0+2 405; GCN-NEXT: .zero 4 406; GCN-NEXT: .zero 4 407; GCN-NEXT: .long 0 408; GCN-NEXT: .long 0+8 409; GCN-NEXT: .size llvm.amdgcn.lds.offset.table, 48 410 411!llvm.module.flags = !{!4} 412!4 = !{i32 1, !"amdhsa_code_object_version", i32 500} 413