1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: opt -S -passes='require<profile-summary>,function(codegenprepare)' -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck --check-prefixes=OPT,OPT-GFX7 %s 3; RUN: opt -S -passes='require<profile-summary>,function(codegenprepare)' -mtriple=amdgcn-unknown-unknown -mcpu=tonga < %s | FileCheck --check-prefixes=OPT,OPT-GFX8 %s 4; RUN: opt -S -passes='require<profile-summary>,function(codegenprepare)' -mtriple=amdgcn-unknown-unknown -mcpu=gfx900 < %s | FileCheck --check-prefixes=OPT,OPT-GFX9 %s 5; RUN: opt -S -passes='require<profile-summary>,function(codegenprepare)' -mtriple=amdgcn-unknown-unknown -mcpu=gfx1030 < %s | FileCheck --check-prefixes=OPT,OPT-GFX10 %s 6 7; RUN: llc -mtriple=amdgcn -mcpu=bonaire < %s | FileCheck --check-prefix=GFX7 %s 8; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck --check-prefix=GFX8 %s 9; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck --check-prefix=GFX9 %s 10; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck --check-prefix=GFX10 %s 11 12define void @test_sinkable_flat_small_offset_i32(ptr %out, ptr %in, i32 %cond) { 13; OPT-GFX7-LABEL: @test_sinkable_flat_small_offset_i32( 14; OPT-GFX7-NEXT: entry: 15; OPT-GFX7-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999 16; OPT-GFX7-NEXT: [[IN_GEP:%.*]] = getelementptr i32, ptr [[IN:%.*]], i64 7 17; OPT-GFX7-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0 18; OPT-GFX7-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]] 19; OPT-GFX7: if: 20; OPT-GFX7-NEXT: [[LOAD:%.*]] = load i32, ptr [[IN_GEP]], align 4 21; OPT-GFX7-NEXT: br label [[ENDIF]] 22; OPT-GFX7: endif: 23; OPT-GFX7-NEXT: [[X:%.*]] = phi i32 [ [[LOAD]], [[IF]] ], [ 0, [[ENTRY:%.*]] ] 24; OPT-GFX7-NEXT: store i32 [[X]], ptr [[OUT_GEP]], align 4 25; OPT-GFX7-NEXT: br label [[DONE:%.*]] 26; OPT-GFX7: done: 27; OPT-GFX7-NEXT: ret void 28; 29; OPT-GFX8-LABEL: @test_sinkable_flat_small_offset_i32( 30; OPT-GFX8-NEXT: entry: 31; OPT-GFX8-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999 32; OPT-GFX8-NEXT: [[IN_GEP:%.*]] = getelementptr i32, ptr [[IN:%.*]], i64 7 33; OPT-GFX8-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0 34; OPT-GFX8-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]] 35; OPT-GFX8: if: 36; OPT-GFX8-NEXT: [[LOAD:%.*]] = load i32, ptr [[IN_GEP]], align 4 37; OPT-GFX8-NEXT: br label [[ENDIF]] 38; OPT-GFX8: endif: 39; OPT-GFX8-NEXT: [[X:%.*]] = phi i32 [ [[LOAD]], [[IF]] ], [ 0, [[ENTRY:%.*]] ] 40; OPT-GFX8-NEXT: store i32 [[X]], ptr [[OUT_GEP]], align 4 41; OPT-GFX8-NEXT: br label [[DONE:%.*]] 42; OPT-GFX8: done: 43; OPT-GFX8-NEXT: ret void 44; 45; OPT-GFX9-LABEL: @test_sinkable_flat_small_offset_i32( 46; OPT-GFX9-NEXT: entry: 47; OPT-GFX9-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999 48; OPT-GFX9-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0 49; OPT-GFX9-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]] 50; OPT-GFX9: if: 51; OPT-GFX9-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 28 52; OPT-GFX9-NEXT: [[LOAD:%.*]] = load i32, ptr [[SUNKADDR]], align 4 53; OPT-GFX9-NEXT: br label [[ENDIF]] 54; OPT-GFX9: endif: 55; OPT-GFX9-NEXT: [[X:%.*]] = phi i32 [ [[LOAD]], [[IF]] ], [ 0, [[ENTRY:%.*]] ] 56; OPT-GFX9-NEXT: store i32 [[X]], ptr [[OUT_GEP]], align 4 57; OPT-GFX9-NEXT: ret void 58; 59; OPT-GFX10-LABEL: @test_sinkable_flat_small_offset_i32( 60; OPT-GFX10-NEXT: entry: 61; OPT-GFX10-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999 62; OPT-GFX10-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0 63; OPT-GFX10-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]] 64; OPT-GFX10: if: 65; OPT-GFX10-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 28 66; OPT-GFX10-NEXT: [[LOAD:%.*]] = load i32, ptr [[SUNKADDR]], align 4 67; OPT-GFX10-NEXT: br label [[ENDIF]] 68; OPT-GFX10: endif: 69; OPT-GFX10-NEXT: [[X:%.*]] = phi i32 [ [[LOAD]], [[IF]] ], [ 0, [[ENTRY:%.*]] ] 70; OPT-GFX10-NEXT: store i32 [[X]], ptr [[OUT_GEP]], align 4 71; OPT-GFX10-NEXT: ret void 72; 73; GFX7-LABEL: test_sinkable_flat_small_offset_i32: 74; GFX7: ; %bb.0: ; %entry 75; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 76; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 77; GFX7-NEXT: v_mov_b32_e32 v4, 0 78; GFX7-NEXT: s_and_saveexec_b64 s[4:5], vcc 79; GFX7-NEXT: s_cbranch_execz .LBB0_2 80; GFX7-NEXT: ; %bb.1: ; %if 81; GFX7-NEXT: v_add_i32_e32 v2, vcc, 28, v2 82; GFX7-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 83; GFX7-NEXT: flat_load_dword v4, v[2:3] 84; GFX7-NEXT: .LBB0_2: ; %endif 85; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] 86; GFX7-NEXT: v_add_i32_e32 v0, vcc, 0x3d08fc, v0 87; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 88; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 89; GFX7-NEXT: flat_store_dword v[0:1], v4 90; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 91; GFX7-NEXT: s_setpc_b64 s[30:31] 92; 93; GFX8-LABEL: test_sinkable_flat_small_offset_i32: 94; GFX8: ; %bb.0: ; %entry 95; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 96; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 97; GFX8-NEXT: v_mov_b32_e32 v4, 0 98; GFX8-NEXT: s_and_saveexec_b64 s[4:5], vcc 99; GFX8-NEXT: s_cbranch_execz .LBB0_2 100; GFX8-NEXT: ; %bb.1: ; %if 101; GFX8-NEXT: v_add_u32_e32 v2, vcc, 28, v2 102; GFX8-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 103; GFX8-NEXT: flat_load_dword v4, v[2:3] 104; GFX8-NEXT: .LBB0_2: ; %endif 105; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] 106; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0x3d08fc, v0 107; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 108; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 109; GFX8-NEXT: flat_store_dword v[0:1], v4 110; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 111; GFX8-NEXT: s_setpc_b64 s[30:31] 112; 113; GFX9-LABEL: test_sinkable_flat_small_offset_i32: 114; GFX9: ; %bb.0: ; %entry 115; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 116; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 117; GFX9-NEXT: v_mov_b32_e32 v4, 0 118; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc 119; GFX9-NEXT: s_cbranch_execz .LBB0_2 120; GFX9-NEXT: ; %bb.1: ; %if 121; GFX9-NEXT: flat_load_dword v4, v[2:3] offset:28 122; GFX9-NEXT: .LBB0_2: ; %endif 123; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 124; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x3d0000, v0 125; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 126; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 127; GFX9-NEXT: flat_store_dword v[0:1], v4 offset:2300 128; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 129; GFX9-NEXT: s_setpc_b64 s[30:31] 130; 131; GFX10-LABEL: test_sinkable_flat_small_offset_i32: 132; GFX10: ; %bb.0: ; %entry 133; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 134; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 135; GFX10-NEXT: v_mov_b32_e32 v4, 0 136; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo 137; GFX10-NEXT: s_cbranch_execz .LBB0_2 138; GFX10-NEXT: ; %bb.1: ; %if 139; GFX10-NEXT: flat_load_dword v4, v[2:3] offset:28 140; GFX10-NEXT: .LBB0_2: ; %endif 141; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4 142; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x3d0800, v0 143; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 144; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 145; GFX10-NEXT: flat_store_dword v[0:1], v4 offset:252 146; GFX10-NEXT: s_waitcnt lgkmcnt(0) 147; GFX10-NEXT: s_setpc_b64 s[30:31] 148entry: 149 %out.gep = getelementptr i32, ptr %out, i64 999999 150 %in.gep = getelementptr i32, ptr %in, i64 7 151 %cmp0 = icmp eq i32 %cond, 0 152 br i1 %cmp0, label %endif, label %if 153 154if: 155 %load = load i32, ptr %in.gep 156 br label %endif 157 158endif: 159 %x = phi i32 [ %load, %if ], [ 0, %entry ] 160 store i32 %x, ptr %out.gep 161 br label %done 162 163done: 164 ret void 165} 166 167define void @test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr %in, i32 %cond) { 168; OPT-GFX7-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32( 169; OPT-GFX7-NEXT: entry: 170; OPT-GFX7-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999 171; OPT-GFX7-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0 172; OPT-GFX7-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]] 173; OPT-GFX7: if: 174; OPT-GFX7-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[IN:%.*]] to ptr addrspace(1) 175; OPT-GFX7-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP0]], i64 28 176; OPT-GFX7-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[SUNKADDR]], align 4 177; OPT-GFX7-NEXT: br label [[ENDIF]] 178; OPT-GFX7: endif: 179; OPT-GFX7-NEXT: [[X:%.*]] = phi i32 [ [[LOAD]], [[IF]] ], [ 0, [[ENTRY:%.*]] ] 180; OPT-GFX7-NEXT: store i32 [[X]], ptr [[OUT_GEP]], align 4 181; OPT-GFX7-NEXT: ret void 182; 183; OPT-GFX8-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32( 184; OPT-GFX8-NEXT: entry: 185; OPT-GFX8-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999 186; OPT-GFX8-NEXT: [[IN_GEP:%.*]] = getelementptr i32, ptr [[IN:%.*]], i64 7 187; OPT-GFX8-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0 188; OPT-GFX8-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]] 189; OPT-GFX8: if: 190; OPT-GFX8-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[IN_GEP]] to ptr addrspace(1) 191; OPT-GFX8-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[TMP0]], align 4 192; OPT-GFX8-NEXT: br label [[ENDIF]] 193; OPT-GFX8: endif: 194; OPT-GFX8-NEXT: [[X:%.*]] = phi i32 [ [[LOAD]], [[IF]] ], [ 0, [[ENTRY:%.*]] ] 195; OPT-GFX8-NEXT: store i32 [[X]], ptr [[OUT_GEP]], align 4 196; OPT-GFX8-NEXT: ret void 197; 198; OPT-GFX9-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32( 199; OPT-GFX9-NEXT: entry: 200; OPT-GFX9-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999 201; OPT-GFX9-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0 202; OPT-GFX9-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]] 203; OPT-GFX9: if: 204; OPT-GFX9-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[IN:%.*]] to ptr addrspace(1) 205; OPT-GFX9-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP0]], i64 28 206; OPT-GFX9-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[SUNKADDR]], align 4 207; OPT-GFX9-NEXT: br label [[ENDIF]] 208; OPT-GFX9: endif: 209; OPT-GFX9-NEXT: [[X:%.*]] = phi i32 [ [[LOAD]], [[IF]] ], [ 0, [[ENTRY:%.*]] ] 210; OPT-GFX9-NEXT: store i32 [[X]], ptr [[OUT_GEP]], align 4 211; OPT-GFX9-NEXT: ret void 212; 213; OPT-GFX10-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32( 214; OPT-GFX10-NEXT: entry: 215; OPT-GFX10-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999 216; OPT-GFX10-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0 217; OPT-GFX10-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]] 218; OPT-GFX10: if: 219; OPT-GFX10-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[IN:%.*]] to ptr addrspace(1) 220; OPT-GFX10-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP0]], i64 28 221; OPT-GFX10-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[SUNKADDR]], align 4 222; OPT-GFX10-NEXT: br label [[ENDIF]] 223; OPT-GFX10: endif: 224; OPT-GFX10-NEXT: [[X:%.*]] = phi i32 [ [[LOAD]], [[IF]] ], [ 0, [[ENTRY:%.*]] ] 225; OPT-GFX10-NEXT: store i32 [[X]], ptr [[OUT_GEP]], align 4 226; OPT-GFX10-NEXT: ret void 227; 228; GFX7-LABEL: test_sink_noop_addrspacecast_flat_to_global_i32: 229; GFX7: ; %bb.0: ; %entry 230; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 231; GFX7-NEXT: s_mov_b32 s6, 0 232; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 233; GFX7-NEXT: v_mov_b32_e32 v4, 0 234; GFX7-NEXT: s_and_saveexec_b64 s[8:9], vcc 235; GFX7-NEXT: s_cbranch_execz .LBB1_2 236; GFX7-NEXT: ; %bb.1: ; %if 237; GFX7-NEXT: s_mov_b32 s7, 0xf000 238; GFX7-NEXT: s_mov_b32 s4, s6 239; GFX7-NEXT: s_mov_b32 s5, s6 240; GFX7-NEXT: buffer_load_dword v4, v[2:3], s[4:7], 0 addr64 offset:28 241; GFX7-NEXT: .LBB1_2: ; %endif 242; GFX7-NEXT: s_or_b64 exec, exec, s[8:9] 243; GFX7-NEXT: v_add_i32_e32 v0, vcc, 0x3d08fc, v0 244; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 245; GFX7-NEXT: s_waitcnt vmcnt(0) 246; GFX7-NEXT: flat_store_dword v[0:1], v4 247; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 248; GFX7-NEXT: s_setpc_b64 s[30:31] 249; 250; GFX8-LABEL: test_sink_noop_addrspacecast_flat_to_global_i32: 251; GFX8: ; %bb.0: ; %entry 252; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 253; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 254; GFX8-NEXT: v_mov_b32_e32 v4, 0 255; GFX8-NEXT: s_and_saveexec_b64 s[4:5], vcc 256; GFX8-NEXT: s_cbranch_execz .LBB1_2 257; GFX8-NEXT: ; %bb.1: ; %if 258; GFX8-NEXT: v_add_u32_e32 v2, vcc, 28, v2 259; GFX8-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 260; GFX8-NEXT: flat_load_dword v4, v[2:3] 261; GFX8-NEXT: .LBB1_2: ; %endif 262; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] 263; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0x3d08fc, v0 264; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 265; GFX8-NEXT: s_waitcnt vmcnt(0) 266; GFX8-NEXT: flat_store_dword v[0:1], v4 267; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 268; GFX8-NEXT: s_setpc_b64 s[30:31] 269; 270; GFX9-LABEL: test_sink_noop_addrspacecast_flat_to_global_i32: 271; GFX9: ; %bb.0: ; %entry 272; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 273; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 274; GFX9-NEXT: v_mov_b32_e32 v4, 0 275; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc 276; GFX9-NEXT: s_cbranch_execz .LBB1_2 277; GFX9-NEXT: ; %bb.1: ; %if 278; GFX9-NEXT: global_load_dword v4, v[2:3], off offset:28 279; GFX9-NEXT: .LBB1_2: ; %endif 280; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 281; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x3d0000, v0 282; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 283; GFX9-NEXT: s_waitcnt vmcnt(0) 284; GFX9-NEXT: flat_store_dword v[0:1], v4 offset:2300 285; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 286; GFX9-NEXT: s_setpc_b64 s[30:31] 287; 288; GFX10-LABEL: test_sink_noop_addrspacecast_flat_to_global_i32: 289; GFX10: ; %bb.0: ; %entry 290; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 291; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 292; GFX10-NEXT: v_mov_b32_e32 v4, 0 293; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo 294; GFX10-NEXT: s_cbranch_execz .LBB1_2 295; GFX10-NEXT: ; %bb.1: ; %if 296; GFX10-NEXT: global_load_dword v4, v[2:3], off offset:28 297; GFX10-NEXT: .LBB1_2: ; %endif 298; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4 299; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x3d0800, v0 300; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 301; GFX10-NEXT: s_waitcnt vmcnt(0) 302; GFX10-NEXT: flat_store_dword v[0:1], v4 offset:252 303; GFX10-NEXT: s_waitcnt lgkmcnt(0) 304; GFX10-NEXT: s_setpc_b64 s[30:31] 305entry: 306 %out.gep = getelementptr i32, ptr %out, i64 999999 307 %in.gep = getelementptr i32, ptr %in, i64 7 308 %cast = addrspacecast ptr %in.gep to ptr addrspace(1) 309 %cmp0 = icmp eq i32 %cond, 0 310 br i1 %cmp0, label %endif, label %if 311 312if: 313 %load = load i32, ptr addrspace(1) %cast 314 br label %endif 315 316endif: 317 %x = phi i32 [ %load, %if ], [ 0, %entry ] 318 store i32 %x, ptr %out.gep 319 br label %done 320 321done: 322 ret void 323} 324 325define void @test_sink_noop_addrspacecast_flat_to_constant_i32(ptr %out, ptr %in, i32 %cond) { 326; OPT-LABEL: @test_sink_noop_addrspacecast_flat_to_constant_i32( 327; OPT-NEXT: entry: 328; OPT-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999 329; OPT-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0 330; OPT-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]] 331; OPT: if: 332; OPT-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[IN:%.*]] to ptr addrspace(4) 333; OPT-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP0]], i64 28 334; OPT-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(4) [[SUNKADDR]], align 4 335; OPT-NEXT: br label [[ENDIF]] 336; OPT: endif: 337; OPT-NEXT: [[X:%.*]] = phi i32 [ [[LOAD]], [[IF]] ], [ 0, [[ENTRY:%.*]] ] 338; OPT-NEXT: store i32 [[X]], ptr [[OUT_GEP]], align 4 339; OPT-NEXT: ret void 340; 341; GFX7-LABEL: test_sink_noop_addrspacecast_flat_to_constant_i32: 342; GFX7: ; %bb.0: ; %entry 343; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 344; GFX7-NEXT: s_mov_b32 s6, 0 345; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 346; GFX7-NEXT: v_mov_b32_e32 v4, 0 347; GFX7-NEXT: s_and_saveexec_b64 s[8:9], vcc 348; GFX7-NEXT: s_cbranch_execz .LBB2_2 349; GFX7-NEXT: ; %bb.1: ; %if 350; GFX7-NEXT: s_mov_b32 s7, 0xf000 351; GFX7-NEXT: s_mov_b32 s4, s6 352; GFX7-NEXT: s_mov_b32 s5, s6 353; GFX7-NEXT: buffer_load_dword v4, v[2:3], s[4:7], 0 addr64 offset:28 354; GFX7-NEXT: .LBB2_2: ; %endif 355; GFX7-NEXT: s_or_b64 exec, exec, s[8:9] 356; GFX7-NEXT: v_add_i32_e32 v0, vcc, 0x3d08fc, v0 357; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 358; GFX7-NEXT: s_waitcnt vmcnt(0) 359; GFX7-NEXT: flat_store_dword v[0:1], v4 360; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 361; GFX7-NEXT: s_setpc_b64 s[30:31] 362; 363; GFX8-LABEL: test_sink_noop_addrspacecast_flat_to_constant_i32: 364; GFX8: ; %bb.0: ; %entry 365; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 366; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 367; GFX8-NEXT: v_mov_b32_e32 v4, 0 368; GFX8-NEXT: s_and_saveexec_b64 s[4:5], vcc 369; GFX8-NEXT: s_cbranch_execz .LBB2_2 370; GFX8-NEXT: ; %bb.1: ; %if 371; GFX8-NEXT: v_add_u32_e32 v2, vcc, 28, v2 372; GFX8-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 373; GFX8-NEXT: flat_load_dword v4, v[2:3] 374; GFX8-NEXT: .LBB2_2: ; %endif 375; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] 376; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0x3d08fc, v0 377; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 378; GFX8-NEXT: s_waitcnt vmcnt(0) 379; GFX8-NEXT: flat_store_dword v[0:1], v4 380; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 381; GFX8-NEXT: s_setpc_b64 s[30:31] 382; 383; GFX9-LABEL: test_sink_noop_addrspacecast_flat_to_constant_i32: 384; GFX9: ; %bb.0: ; %entry 385; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 386; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4 387; GFX9-NEXT: v_mov_b32_e32 v4, 0 388; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc 389; GFX9-NEXT: s_cbranch_execz .LBB2_2 390; GFX9-NEXT: ; %bb.1: ; %if 391; GFX9-NEXT: global_load_dword v4, v[2:3], off offset:28 392; GFX9-NEXT: .LBB2_2: ; %endif 393; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 394; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x3d0000, v0 395; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 396; GFX9-NEXT: s_waitcnt vmcnt(0) 397; GFX9-NEXT: flat_store_dword v[0:1], v4 offset:2300 398; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 399; GFX9-NEXT: s_setpc_b64 s[30:31] 400; 401; GFX10-LABEL: test_sink_noop_addrspacecast_flat_to_constant_i32: 402; GFX10: ; %bb.0: ; %entry 403; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 404; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 405; GFX10-NEXT: v_mov_b32_e32 v4, 0 406; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo 407; GFX10-NEXT: s_cbranch_execz .LBB2_2 408; GFX10-NEXT: ; %bb.1: ; %if 409; GFX10-NEXT: global_load_dword v4, v[2:3], off offset:28 410; GFX10-NEXT: .LBB2_2: ; %endif 411; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4 412; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x3d0800, v0 413; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 414; GFX10-NEXT: s_waitcnt vmcnt(0) 415; GFX10-NEXT: flat_store_dword v[0:1], v4 offset:252 416; GFX10-NEXT: s_waitcnt lgkmcnt(0) 417; GFX10-NEXT: s_setpc_b64 s[30:31] 418entry: 419 %out.gep = getelementptr i32, ptr %out, i64 999999 420 %in.gep = getelementptr i32, ptr %in, i64 7 421 %cast = addrspacecast ptr %in.gep to ptr addrspace(4) 422 %cmp0 = icmp eq i32 %cond, 0 423 br i1 %cmp0, label %endif, label %if 424 425if: 426 %load = load i32, ptr addrspace(4) %cast 427 br label %endif 428 429endif: 430 %x = phi i32 [ %load, %if ], [ 0, %entry ] 431 store i32 %x, ptr %out.gep 432 br label %done 433 434done: 435 ret void 436} 437 438define void @test_sink_flat_small_max_flat_offset(ptr %out, ptr %in) #1 { 439; OPT-GFX7-LABEL: @test_sink_flat_small_max_flat_offset( 440; OPT-GFX7-NEXT: entry: 441; OPT-GFX7-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024 442; OPT-GFX7-NEXT: [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4095 443; OPT-GFX7-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]] 444; OPT-GFX7-NEXT: [[CMP0:%.*]] = icmp eq i32 [[TID]], 0 445; OPT-GFX7-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]] 446; OPT-GFX7: if: 447; OPT-GFX7-NEXT: [[LOAD:%.*]] = load i8, ptr [[IN_GEP]], align 1 448; OPT-GFX7-NEXT: [[CAST:%.*]] = sext i8 [[LOAD]] to i32 449; OPT-GFX7-NEXT: br label [[ENDIF]] 450; OPT-GFX7: endif: 451; OPT-GFX7-NEXT: [[X:%.*]] = phi i32 [ [[CAST]], [[IF]] ], [ 0, [[ENTRY:%.*]] ] 452; OPT-GFX7-NEXT: store i32 [[X]], ptr [[OUT_GEP]], align 4 453; OPT-GFX7-NEXT: br label [[DONE:%.*]] 454; OPT-GFX7: done: 455; OPT-GFX7-NEXT: ret void 456; 457; OPT-GFX8-LABEL: @test_sink_flat_small_max_flat_offset( 458; OPT-GFX8-NEXT: entry: 459; OPT-GFX8-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024 460; OPT-GFX8-NEXT: [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4095 461; OPT-GFX8-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]] 462; OPT-GFX8-NEXT: [[CMP0:%.*]] = icmp eq i32 [[TID]], 0 463; OPT-GFX8-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]] 464; OPT-GFX8: if: 465; OPT-GFX8-NEXT: [[LOAD:%.*]] = load i8, ptr [[IN_GEP]], align 1 466; OPT-GFX8-NEXT: [[CAST:%.*]] = sext i8 [[LOAD]] to i32 467; OPT-GFX8-NEXT: br label [[ENDIF]] 468; OPT-GFX8: endif: 469; OPT-GFX8-NEXT: [[X:%.*]] = phi i32 [ [[CAST]], [[IF]] ], [ 0, [[ENTRY:%.*]] ] 470; OPT-GFX8-NEXT: store i32 [[X]], ptr [[OUT_GEP]], align 4 471; OPT-GFX8-NEXT: br label [[DONE:%.*]] 472; OPT-GFX8: done: 473; OPT-GFX8-NEXT: ret void 474; 475; OPT-GFX9-LABEL: @test_sink_flat_small_max_flat_offset( 476; OPT-GFX9-NEXT: entry: 477; OPT-GFX9-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024 478; OPT-GFX9-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]] 479; OPT-GFX9-NEXT: [[CMP0:%.*]] = icmp eq i32 [[TID]], 0 480; OPT-GFX9-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]] 481; OPT-GFX9: if: 482; OPT-GFX9-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4095 483; OPT-GFX9-NEXT: [[LOAD:%.*]] = load i8, ptr [[SUNKADDR]], align 1 484; OPT-GFX9-NEXT: [[CAST:%.*]] = sext i8 [[LOAD]] to i32 485; OPT-GFX9-NEXT: br label [[ENDIF]] 486; OPT-GFX9: endif: 487; OPT-GFX9-NEXT: [[X:%.*]] = phi i32 [ [[CAST]], [[IF]] ], [ 0, [[ENTRY:%.*]] ] 488; OPT-GFX9-NEXT: store i32 [[X]], ptr [[OUT_GEP]], align 4 489; OPT-GFX9-NEXT: ret void 490; 491; OPT-GFX10-LABEL: @test_sink_flat_small_max_flat_offset( 492; OPT-GFX10-NEXT: entry: 493; OPT-GFX10-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024 494; OPT-GFX10-NEXT: [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4095 495; OPT-GFX10-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]] 496; OPT-GFX10-NEXT: [[CMP0:%.*]] = icmp eq i32 [[TID]], 0 497; OPT-GFX10-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]] 498; OPT-GFX10: if: 499; OPT-GFX10-NEXT: [[LOAD:%.*]] = load i8, ptr [[IN_GEP]], align 1 500; OPT-GFX10-NEXT: [[CAST:%.*]] = sext i8 [[LOAD]] to i32 501; OPT-GFX10-NEXT: br label [[ENDIF]] 502; OPT-GFX10: endif: 503; OPT-GFX10-NEXT: [[X:%.*]] = phi i32 [ [[CAST]], [[IF]] ], [ 0, [[ENTRY:%.*]] ] 504; OPT-GFX10-NEXT: store i32 [[X]], ptr [[OUT_GEP]], align 4 505; OPT-GFX10-NEXT: br label [[DONE:%.*]] 506; OPT-GFX10: done: 507; OPT-GFX10-NEXT: ret void 508; 509; GFX7-LABEL: test_sink_flat_small_max_flat_offset: 510; GFX7: ; %bb.0: ; %entry 511; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 512; GFX7-NEXT: v_mbcnt_lo_u32_b32_e64 v5, -1, 0 513; GFX7-NEXT: v_mov_b32_e32 v4, 0 514; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 515; GFX7-NEXT: s_and_saveexec_b64 s[4:5], vcc 516; GFX7-NEXT: s_cbranch_execz .LBB3_2 517; GFX7-NEXT: ; %bb.1: ; %if 518; GFX7-NEXT: v_add_i32_e32 v2, vcc, 0xfff, v2 519; GFX7-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 520; GFX7-NEXT: flat_load_sbyte v4, v[2:3] 521; GFX7-NEXT: .LBB3_2: ; %endif 522; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] 523; GFX7-NEXT: v_add_i32_e32 v0, vcc, 0x1000, v0 524; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 525; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 526; GFX7-NEXT: flat_store_dword v[0:1], v4 527; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 528; GFX7-NEXT: s_setpc_b64 s[30:31] 529; 530; GFX8-LABEL: test_sink_flat_small_max_flat_offset: 531; GFX8: ; %bb.0: ; %entry 532; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 533; GFX8-NEXT: v_mbcnt_lo_u32_b32 v5, -1, 0 534; GFX8-NEXT: v_mov_b32_e32 v4, 0 535; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 536; GFX8-NEXT: s_and_saveexec_b64 s[4:5], vcc 537; GFX8-NEXT: s_cbranch_execz .LBB3_2 538; GFX8-NEXT: ; %bb.1: ; %if 539; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0xfff, v2 540; GFX8-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 541; GFX8-NEXT: flat_load_sbyte v4, v[2:3] 542; GFX8-NEXT: .LBB3_2: ; %endif 543; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] 544; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0x1000, v0 545; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 546; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 547; GFX8-NEXT: flat_store_dword v[0:1], v4 548; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 549; GFX8-NEXT: s_setpc_b64 s[30:31] 550; 551; GFX9-LABEL: test_sink_flat_small_max_flat_offset: 552; GFX9: ; %bb.0: ; %entry 553; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 554; GFX9-NEXT: v_mbcnt_lo_u32_b32 v5, -1, 0 555; GFX9-NEXT: v_mov_b32_e32 v4, 0 556; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 557; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc 558; GFX9-NEXT: s_cbranch_execz .LBB3_2 559; GFX9-NEXT: ; %bb.1: ; %if 560; GFX9-NEXT: flat_load_sbyte v4, v[2:3] offset:4095 561; GFX9-NEXT: .LBB3_2: ; %endif 562; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 563; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 564; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 565; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 566; GFX9-NEXT: flat_store_dword v[0:1], v4 567; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 568; GFX9-NEXT: s_setpc_b64 s[30:31] 569; 570; GFX10-LABEL: test_sink_flat_small_max_flat_offset: 571; GFX10: ; %bb.0: ; %entry 572; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 573; GFX10-NEXT: v_mbcnt_lo_u32_b32 v4, -1, 0 574; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 575; GFX10-NEXT: v_mov_b32_e32 v4, 0 576; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo 577; GFX10-NEXT: s_cbranch_execz .LBB3_2 578; GFX10-NEXT: ; %bb.1: ; %if 579; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, 0x800, v2 580; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo 581; GFX10-NEXT: flat_load_sbyte v4, v[2:3] offset:2047 582; GFX10-NEXT: .LBB3_2: ; %endif 583; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4 584; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 585; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 586; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 587; GFX10-NEXT: flat_store_dword v[0:1], v4 588; GFX10-NEXT: s_waitcnt lgkmcnt(0) 589; GFX10-NEXT: s_setpc_b64 s[30:31] 590entry: 591 %out.gep = getelementptr i32, ptr %out, i32 1024 592 %in.gep = getelementptr i8, ptr %in, i64 4095 593 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 594 %cmp0 = icmp eq i32 %tid, 0 595 br i1 %cmp0, label %endif, label %if 596 597if: 598 %load = load i8, ptr %in.gep 599 %cast = sext i8 %load to i32 600 br label %endif 601 602endif: 603 %x = phi i32 [ %cast, %if ], [ 0, %entry ] 604 store i32 %x, ptr %out.gep 605 br label %done 606 607done: 608 ret void 609} 610 611define void @test_sink_flat_small_max_plus_1_flat_offset(ptr %out, ptr %in) #1 { 612; OPT-LABEL: @test_sink_flat_small_max_plus_1_flat_offset( 613; OPT-NEXT: entry: 614; OPT-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 99999 615; OPT-NEXT: [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4096 616; OPT-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]] 617; OPT-NEXT: [[CMP0:%.*]] = icmp eq i32 [[TID]], 0 618; OPT-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]] 619; OPT: if: 620; OPT-NEXT: [[LOAD:%.*]] = load i8, ptr [[IN_GEP]], align 1 621; OPT-NEXT: [[CAST:%.*]] = sext i8 [[LOAD]] to i32 622; OPT-NEXT: br label [[ENDIF]] 623; OPT: endif: 624; OPT-NEXT: [[X:%.*]] = phi i32 [ [[CAST]], [[IF]] ], [ 0, [[ENTRY:%.*]] ] 625; OPT-NEXT: store i32 [[X]], ptr [[OUT_GEP]], align 4 626; OPT-NEXT: br label [[DONE:%.*]] 627; OPT: done: 628; OPT-NEXT: ret void 629; 630; GFX7-LABEL: test_sink_flat_small_max_plus_1_flat_offset: 631; GFX7: ; %bb.0: ; %entry 632; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 633; GFX7-NEXT: v_mbcnt_lo_u32_b32_e64 v5, -1, 0 634; GFX7-NEXT: v_mov_b32_e32 v4, 0 635; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 636; GFX7-NEXT: s_and_saveexec_b64 s[4:5], vcc 637; GFX7-NEXT: s_cbranch_execz .LBB4_2 638; GFX7-NEXT: ; %bb.1: ; %if 639; GFX7-NEXT: v_add_i32_e32 v2, vcc, 0x1000, v2 640; GFX7-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 641; GFX7-NEXT: flat_load_sbyte v4, v[2:3] 642; GFX7-NEXT: .LBB4_2: ; %endif 643; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] 644; GFX7-NEXT: v_add_i32_e32 v0, vcc, 0x61a7c, v0 645; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 646; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 647; GFX7-NEXT: flat_store_dword v[0:1], v4 648; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 649; GFX7-NEXT: s_setpc_b64 s[30:31] 650; 651; GFX8-LABEL: test_sink_flat_small_max_plus_1_flat_offset: 652; GFX8: ; %bb.0: ; %entry 653; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 654; GFX8-NEXT: v_mbcnt_lo_u32_b32 v5, -1, 0 655; GFX8-NEXT: v_mov_b32_e32 v4, 0 656; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 657; GFX8-NEXT: s_and_saveexec_b64 s[4:5], vcc 658; GFX8-NEXT: s_cbranch_execz .LBB4_2 659; GFX8-NEXT: ; %bb.1: ; %if 660; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0x1000, v2 661; GFX8-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 662; GFX8-NEXT: flat_load_sbyte v4, v[2:3] 663; GFX8-NEXT: .LBB4_2: ; %endif 664; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] 665; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0x61a7c, v0 666; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 667; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 668; GFX8-NEXT: flat_store_dword v[0:1], v4 669; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 670; GFX8-NEXT: s_setpc_b64 s[30:31] 671; 672; GFX9-LABEL: test_sink_flat_small_max_plus_1_flat_offset: 673; GFX9: ; %bb.0: ; %entry 674; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 675; GFX9-NEXT: v_mbcnt_lo_u32_b32 v5, -1, 0 676; GFX9-NEXT: v_mov_b32_e32 v4, 0 677; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5 678; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc 679; GFX9-NEXT: s_cbranch_execz .LBB4_2 680; GFX9-NEXT: ; %bb.1: ; %if 681; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, 0x1000, v2 682; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc 683; GFX9-NEXT: flat_load_sbyte v4, v[2:3] 684; GFX9-NEXT: .LBB4_2: ; %endif 685; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 686; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x61000, v0 687; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 688; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 689; GFX9-NEXT: flat_store_dword v[0:1], v4 offset:2684 690; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 691; GFX9-NEXT: s_setpc_b64 s[30:31] 692; 693; GFX10-LABEL: test_sink_flat_small_max_plus_1_flat_offset: 694; GFX10: ; %bb.0: ; %entry 695; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 696; GFX10-NEXT: v_mbcnt_lo_u32_b32 v4, -1, 0 697; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4 698; GFX10-NEXT: v_mov_b32_e32 v4, 0 699; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo 700; GFX10-NEXT: s_cbranch_execz .LBB4_2 701; GFX10-NEXT: ; %bb.1: ; %if 702; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, 0x1000, v2 703; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo 704; GFX10-NEXT: flat_load_sbyte v4, v[2:3] 705; GFX10-NEXT: .LBB4_2: ; %endif 706; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4 707; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x61800, v0 708; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 709; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 710; GFX10-NEXT: flat_store_dword v[0:1], v4 offset:636 711; GFX10-NEXT: s_waitcnt lgkmcnt(0) 712; GFX10-NEXT: s_setpc_b64 s[30:31] 713entry: 714 %out.gep = getelementptr i32, ptr %out, i64 99999 715 %in.gep = getelementptr i8, ptr %in, i64 4096 716 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 717 %cmp0 = icmp eq i32 %tid, 0 718 br i1 %cmp0, label %endif, label %if 719 720if: 721 %load = load i8, ptr %in.gep 722 %cast = sext i8 %load to i32 723 br label %endif 724 725endif: 726 %x = phi i32 [ %cast, %if ], [ 0, %entry ] 727 store i32 %x, ptr %out.gep 728 br label %done 729 730done: 731 ret void 732} 733 734define void @test_sinkable_flat_reg_offset(ptr %out, ptr %in, i64 %reg) #1 { 735; OPT-LABEL: @test_sinkable_flat_reg_offset( 736; OPT-NEXT: entry: 737; OPT-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024 738; OPT-NEXT: [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 [[REG:%.*]] 739; OPT-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3]] 740; OPT-NEXT: [[CMP0:%.*]] = icmp eq i32 [[TID]], 0 741; OPT-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]] 742; OPT: if: 743; OPT-NEXT: [[LOAD:%.*]] = load i8, ptr [[IN_GEP]], align 1 744; OPT-NEXT: [[CAST:%.*]] = sext i8 [[LOAD]] to i32 745; OPT-NEXT: br label [[ENDIF]] 746; OPT: endif: 747; OPT-NEXT: [[X:%.*]] = phi i32 [ [[CAST]], [[IF]] ], [ 0, [[ENTRY:%.*]] ] 748; OPT-NEXT: store i32 [[X]], ptr [[OUT_GEP]], align 4 749; OPT-NEXT: br label [[DONE:%.*]] 750; OPT: done: 751; OPT-NEXT: ret void 752; 753; GFX7-LABEL: test_sinkable_flat_reg_offset: 754; GFX7: ; %bb.0: ; %entry 755; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 756; GFX7-NEXT: v_mbcnt_lo_u32_b32_e64 v7, -1, 0 757; GFX7-NEXT: v_mov_b32_e32 v6, 0 758; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 759; GFX7-NEXT: s_and_saveexec_b64 s[4:5], vcc 760; GFX7-NEXT: s_cbranch_execz .LBB5_2 761; GFX7-NEXT: ; %bb.1: ; %if 762; GFX7-NEXT: v_add_i32_e32 v2, vcc, v2, v4 763; GFX7-NEXT: v_addc_u32_e32 v3, vcc, v3, v5, vcc 764; GFX7-NEXT: flat_load_sbyte v6, v[2:3] 765; GFX7-NEXT: .LBB5_2: ; %endif 766; GFX7-NEXT: s_or_b64 exec, exec, s[4:5] 767; GFX7-NEXT: v_add_i32_e32 v0, vcc, 0x1000, v0 768; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 769; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 770; GFX7-NEXT: flat_store_dword v[0:1], v6 771; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 772; GFX7-NEXT: s_setpc_b64 s[30:31] 773; 774; GFX8-LABEL: test_sinkable_flat_reg_offset: 775; GFX8: ; %bb.0: ; %entry 776; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 777; GFX8-NEXT: v_mbcnt_lo_u32_b32 v7, -1, 0 778; GFX8-NEXT: v_mov_b32_e32 v6, 0 779; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 780; GFX8-NEXT: s_and_saveexec_b64 s[4:5], vcc 781; GFX8-NEXT: s_cbranch_execz .LBB5_2 782; GFX8-NEXT: ; %bb.1: ; %if 783; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v4 784; GFX8-NEXT: v_addc_u32_e32 v3, vcc, v3, v5, vcc 785; GFX8-NEXT: flat_load_sbyte v6, v[2:3] 786; GFX8-NEXT: .LBB5_2: ; %endif 787; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] 788; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0x1000, v0 789; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 790; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 791; GFX8-NEXT: flat_store_dword v[0:1], v6 792; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 793; GFX8-NEXT: s_setpc_b64 s[30:31] 794; 795; GFX9-LABEL: test_sinkable_flat_reg_offset: 796; GFX9: ; %bb.0: ; %entry 797; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 798; GFX9-NEXT: v_mbcnt_lo_u32_b32 v7, -1, 0 799; GFX9-NEXT: v_mov_b32_e32 v6, 0 800; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7 801; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc 802; GFX9-NEXT: s_cbranch_execz .LBB5_2 803; GFX9-NEXT: ; %bb.1: ; %if 804; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4 805; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v5, vcc 806; GFX9-NEXT: flat_load_sbyte v6, v[2:3] 807; GFX9-NEXT: .LBB5_2: ; %endif 808; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 809; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0 810; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 811; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 812; GFX9-NEXT: flat_store_dword v[0:1], v6 813; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 814; GFX9-NEXT: s_setpc_b64 s[30:31] 815; 816; GFX10-LABEL: test_sinkable_flat_reg_offset: 817; GFX10: ; %bb.0: ; %entry 818; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 819; GFX10-NEXT: v_mbcnt_lo_u32_b32 v6, -1, 0 820; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6 821; GFX10-NEXT: v_mov_b32_e32 v6, 0 822; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo 823; GFX10-NEXT: s_cbranch_execz .LBB5_2 824; GFX10-NEXT: ; %bb.1: ; %if 825; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4 826; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v3, v5, vcc_lo 827; GFX10-NEXT: flat_load_sbyte v6, v[2:3] 828; GFX10-NEXT: .LBB5_2: ; %endif 829; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4 830; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0 831; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo 832; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 833; GFX10-NEXT: flat_store_dword v[0:1], v6 834; GFX10-NEXT: s_waitcnt lgkmcnt(0) 835; GFX10-NEXT: s_setpc_b64 s[30:31] 836entry: 837 %out.gep = getelementptr i32, ptr %out, i32 1024 838 %in.gep = getelementptr i8, ptr %in, i64 %reg 839 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0 840 %cmp0 = icmp eq i32 %tid, 0 841 br i1 %cmp0, label %endif, label %if 842 843if: 844 %load = load i8, ptr %in.gep 845 %cast = sext i8 %load to i32 846 br label %endif 847 848endif: 849 %x = phi i32 [ %cast, %if ], [ 0, %entry ] 850 store i32 %x, ptr %out.gep 851 br label %done 852 853done: 854 ret void 855} 856 857declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0 858 859attributes #0 = { nounwind readnone } 860attributes #1 = { nounwind } 861attributes #2 = { nounwind argmemonly } 862