1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; RUN: llc -mtriple=amdgcn -mcpu=gfx600 < %s | FileCheck %s -check-prefixes=GFX6789,GFX678,GFX689,GFX67,GFX6 3; RUN: llc -mtriple=amdgcn -mcpu=gfx700 < %s | FileCheck %s -check-prefixes=GFX6789,GFX678,GFX67,GFX7 4; RUN: llc -mtriple=amdgcn -mcpu=gfx801 < %s | FileCheck %s -check-prefixes=GFX6789,GFX678,GFX689,GFX89 5; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s -check-prefixes=GFX6789,GFX689,GFX89,GFX9 6; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s -check-prefixes=GFX12 7 8define amdgpu_cs void @test_sink_smem_offset_400(ptr addrspace(4) inreg %ptr, i32 inreg %val) { 9; GFX67-LABEL: test_sink_smem_offset_400: 10; GFX67: ; %bb.0: ; %entry 11; GFX67-NEXT: .LBB0_1: ; %loop 12; GFX67-NEXT: ; =>This Inner Loop Header: Depth=1 13; GFX67-NEXT: s_waitcnt lgkmcnt(0) 14; GFX67-NEXT: s_load_dword s3, s[0:1], 0x64 15; GFX67-NEXT: s_add_i32 s2, s2, -1 16; GFX67-NEXT: s_cmp_lg_u32 s2, 0 17; GFX67-NEXT: s_cbranch_scc1 .LBB0_1 18; GFX67-NEXT: ; %bb.2: ; %end 19; GFX67-NEXT: s_endpgm 20; 21; GFX89-LABEL: test_sink_smem_offset_400: 22; GFX89: ; %bb.0: ; %entry 23; GFX89-NEXT: .LBB0_1: ; %loop 24; GFX89-NEXT: ; =>This Inner Loop Header: Depth=1 25; GFX89-NEXT: s_waitcnt lgkmcnt(0) 26; GFX89-NEXT: s_load_dword s3, s[0:1], 0x190 27; GFX89-NEXT: s_add_i32 s2, s2, -1 28; GFX89-NEXT: s_cmp_lg_u32 s2, 0 29; GFX89-NEXT: s_cbranch_scc1 .LBB0_1 30; GFX89-NEXT: ; %bb.2: ; %end 31; GFX89-NEXT: s_endpgm 32; 33; GFX12-LABEL: test_sink_smem_offset_400: 34; GFX12: ; %bb.0: ; %entry 35; GFX12-NEXT: .LBB0_1: ; %loop 36; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1 37; GFX12-NEXT: s_wait_kmcnt 0x0 38; GFX12-NEXT: s_load_b32 s3, s[0:1], 0x190 39; GFX12-NEXT: s_add_co_i32 s2, s2, -1 40; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 41; GFX12-NEXT: s_cmp_lg_u32 s2, 0 42; GFX12-NEXT: s_cbranch_scc1 .LBB0_1 43; GFX12-NEXT: ; %bb.2: ; %end 44; GFX12-NEXT: s_endpgm 45entry: 46 %gep = getelementptr i8, ptr addrspace(4) %ptr, i64 400 47 br label %loop 48 49loop: 50 %count = phi i32 [ %dec, %loop ], [ %val, %entry ] 51 %dec = sub i32 %count, 1 52 %load = load volatile i32, ptr addrspace(4) %gep 53 %cond = icmp eq i32 %dec, 0 54 br i1 %cond, label %end, label %loop 55 56end: 57 ret void 58} 59 60define amdgpu_cs void @test_sink_smem_offset_4000(ptr addrspace(4) inreg %ptr, i32 inreg %val) { 61; GFX6-LABEL: test_sink_smem_offset_4000: 62; GFX6: ; %bb.0: ; %entry 63; GFX6-NEXT: s_add_u32 s0, s0, 0xfa0 64; GFX6-NEXT: s_addc_u32 s1, s1, 0 65; GFX6-NEXT: .LBB1_1: ; %loop 66; GFX6-NEXT: ; =>This Inner Loop Header: Depth=1 67; GFX6-NEXT: s_waitcnt lgkmcnt(0) 68; GFX6-NEXT: s_load_dword s3, s[0:1], 0x0 69; GFX6-NEXT: s_add_i32 s2, s2, -1 70; GFX6-NEXT: s_cmp_lg_u32 s2, 0 71; GFX6-NEXT: s_cbranch_scc1 .LBB1_1 72; GFX6-NEXT: ; %bb.2: ; %end 73; GFX6-NEXT: s_endpgm 74; 75; GFX7-LABEL: test_sink_smem_offset_4000: 76; GFX7: ; %bb.0: ; %entry 77; GFX7-NEXT: .LBB1_1: ; %loop 78; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 79; GFX7-NEXT: s_waitcnt lgkmcnt(0) 80; GFX7-NEXT: s_load_dword s3, s[0:1], 0x3e8 81; GFX7-NEXT: s_add_i32 s2, s2, -1 82; GFX7-NEXT: s_cmp_lg_u32 s2, 0 83; GFX7-NEXT: s_cbranch_scc1 .LBB1_1 84; GFX7-NEXT: ; %bb.2: ; %end 85; GFX7-NEXT: s_endpgm 86; 87; GFX89-LABEL: test_sink_smem_offset_4000: 88; GFX89: ; %bb.0: ; %entry 89; GFX89-NEXT: .LBB1_1: ; %loop 90; GFX89-NEXT: ; =>This Inner Loop Header: Depth=1 91; GFX89-NEXT: s_waitcnt lgkmcnt(0) 92; GFX89-NEXT: s_load_dword s3, s[0:1], 0xfa0 93; GFX89-NEXT: s_add_i32 s2, s2, -1 94; GFX89-NEXT: s_cmp_lg_u32 s2, 0 95; GFX89-NEXT: s_cbranch_scc1 .LBB1_1 96; GFX89-NEXT: ; %bb.2: ; %end 97; GFX89-NEXT: s_endpgm 98; 99; GFX12-LABEL: test_sink_smem_offset_4000: 100; GFX12: ; %bb.0: ; %entry 101; GFX12-NEXT: .LBB1_1: ; %loop 102; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1 103; GFX12-NEXT: s_wait_kmcnt 0x0 104; GFX12-NEXT: s_load_b32 s3, s[0:1], 0xfa0 105; GFX12-NEXT: s_add_co_i32 s2, s2, -1 106; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 107; GFX12-NEXT: s_cmp_lg_u32 s2, 0 108; GFX12-NEXT: s_cbranch_scc1 .LBB1_1 109; GFX12-NEXT: ; %bb.2: ; %end 110; GFX12-NEXT: s_endpgm 111entry: 112 %gep = getelementptr i8, ptr addrspace(4) %ptr, i64 4000 113 br label %loop 114 115loop: 116 %count = phi i32 [ %dec, %loop ], [ %val, %entry ] 117 %dec = sub i32 %count, 1 118 %load = load volatile i32, ptr addrspace(4) %gep 119 %cond = icmp eq i32 %dec, 0 120 br i1 %cond, label %end, label %loop 121 122end: 123 ret void 124} 125 126define amdgpu_cs void @test_sink_smem_offset_4000000(ptr addrspace(4) inreg %ptr, i32 inreg %val) { 127; GFX689-LABEL: test_sink_smem_offset_4000000: 128; GFX689: ; %bb.0: ; %entry 129; GFX689-NEXT: s_add_u32 s0, s0, 0x3d0900 130; GFX689-NEXT: s_addc_u32 s1, s1, 0 131; GFX689-NEXT: .LBB2_1: ; %loop 132; GFX689-NEXT: ; =>This Inner Loop Header: Depth=1 133; GFX689-NEXT: s_waitcnt lgkmcnt(0) 134; GFX689-NEXT: s_load_dword s3, s[0:1], 0x0 135; GFX689-NEXT: s_add_i32 s2, s2, -1 136; GFX689-NEXT: s_cmp_lg_u32 s2, 0 137; GFX689-NEXT: s_cbranch_scc1 .LBB2_1 138; GFX689-NEXT: ; %bb.2: ; %end 139; GFX689-NEXT: s_endpgm 140; 141; GFX7-LABEL: test_sink_smem_offset_4000000: 142; GFX7: ; %bb.0: ; %entry 143; GFX7-NEXT: .LBB2_1: ; %loop 144; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 145; GFX7-NEXT: s_waitcnt lgkmcnt(0) 146; GFX7-NEXT: s_load_dword s3, s[0:1], 0xf4240 147; GFX7-NEXT: s_add_i32 s2, s2, -1 148; GFX7-NEXT: s_cmp_lg_u32 s2, 0 149; GFX7-NEXT: s_cbranch_scc1 .LBB2_1 150; GFX7-NEXT: ; %bb.2: ; %end 151; GFX7-NEXT: s_endpgm 152; 153; GFX12-LABEL: test_sink_smem_offset_4000000: 154; GFX12: ; %bb.0: ; %entry 155; GFX12-NEXT: .LBB2_1: ; %loop 156; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1 157; GFX12-NEXT: s_wait_kmcnt 0x0 158; GFX12-NEXT: s_load_b32 s3, s[0:1], 0x3d0900 159; GFX12-NEXT: s_add_co_i32 s2, s2, -1 160; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 161; GFX12-NEXT: s_cmp_lg_u32 s2, 0 162; GFX12-NEXT: s_cbranch_scc1 .LBB2_1 163; GFX12-NEXT: ; %bb.2: ; %end 164; GFX12-NEXT: s_endpgm 165entry: 166 %gep = getelementptr i8, ptr addrspace(4) %ptr, i64 4000000 167 br label %loop 168 169loop: 170 %count = phi i32 [ %dec, %loop ], [ %val, %entry ] 171 %dec = sub i32 %count, 1 172 %load = load volatile i32, ptr addrspace(4) %gep 173 %cond = icmp eq i32 %dec, 0 174 br i1 %cond, label %end, label %loop 175 176end: 177 ret void 178} 179 180define amdgpu_cs void @test_sink_smem_offset_40000000(ptr addrspace(4) inreg %ptr, i32 inreg %val) { 181; GFX689-LABEL: test_sink_smem_offset_40000000: 182; GFX689: ; %bb.0: ; %entry 183; GFX689-NEXT: s_add_u32 s0, s0, 0x2625a00 184; GFX689-NEXT: s_addc_u32 s1, s1, 0 185; GFX689-NEXT: .LBB3_1: ; %loop 186; GFX689-NEXT: ; =>This Inner Loop Header: Depth=1 187; GFX689-NEXT: s_waitcnt lgkmcnt(0) 188; GFX689-NEXT: s_load_dword s3, s[0:1], 0x0 189; GFX689-NEXT: s_add_i32 s2, s2, -1 190; GFX689-NEXT: s_cmp_lg_u32 s2, 0 191; GFX689-NEXT: s_cbranch_scc1 .LBB3_1 192; GFX689-NEXT: ; %bb.2: ; %end 193; GFX689-NEXT: s_endpgm 194; 195; GFX7-LABEL: test_sink_smem_offset_40000000: 196; GFX7: ; %bb.0: ; %entry 197; GFX7-NEXT: .LBB3_1: ; %loop 198; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1 199; GFX7-NEXT: s_waitcnt lgkmcnt(0) 200; GFX7-NEXT: s_load_dword s3, s[0:1], 0x989680 201; GFX7-NEXT: s_add_i32 s2, s2, -1 202; GFX7-NEXT: s_cmp_lg_u32 s2, 0 203; GFX7-NEXT: s_cbranch_scc1 .LBB3_1 204; GFX7-NEXT: ; %bb.2: ; %end 205; GFX7-NEXT: s_endpgm 206; 207; GFX12-LABEL: test_sink_smem_offset_40000000: 208; GFX12: ; %bb.0: ; %entry 209; GFX12-NEXT: s_add_nc_u64 s[0:1], s[0:1], 0x2625a00 210; GFX12-NEXT: .LBB3_1: ; %loop 211; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1 212; GFX12-NEXT: s_wait_kmcnt 0x0 213; GFX12-NEXT: s_load_b32 s3, s[0:1], 0x0 214; GFX12-NEXT: s_add_co_i32 s2, s2, -1 215; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 216; GFX12-NEXT: s_cmp_lg_u32 s2, 0 217; GFX12-NEXT: s_cbranch_scc1 .LBB3_1 218; GFX12-NEXT: ; %bb.2: ; %end 219; GFX12-NEXT: s_endpgm 220entry: 221 %gep = getelementptr i8, ptr addrspace(4) %ptr, i64 40000000 222 br label %loop 223 224loop: 225 %count = phi i32 [ %dec, %loop ], [ %val, %entry ] 226 %dec = sub i32 %count, 1 227 %load = load volatile i32, ptr addrspace(4) %gep 228 %cond = icmp eq i32 %dec, 0 229 br i1 %cond, label %end, label %loop 230 231end: 232 ret void 233} 234 235define amdgpu_cs void @test_sink_smem_offset_40000000000(ptr addrspace(4) inreg %ptr, i32 inreg %val) { 236; GFX6789-LABEL: test_sink_smem_offset_40000000000: 237; GFX6789: ; %bb.0: ; %entry 238; GFX6789-NEXT: s_add_u32 s0, s0, 0x502f9000 239; GFX6789-NEXT: s_addc_u32 s1, s1, 9 240; GFX6789-NEXT: .LBB4_1: ; %loop 241; GFX6789-NEXT: ; =>This Inner Loop Header: Depth=1 242; GFX6789-NEXT: s_waitcnt lgkmcnt(0) 243; GFX6789-NEXT: s_load_dword s3, s[0:1], 0x0 244; GFX6789-NEXT: s_add_i32 s2, s2, -1 245; GFX6789-NEXT: s_cmp_lg_u32 s2, 0 246; GFX6789-NEXT: s_cbranch_scc1 .LBB4_1 247; GFX6789-NEXT: ; %bb.2: ; %end 248; GFX6789-NEXT: s_endpgm 249; 250; GFX12-LABEL: test_sink_smem_offset_40000000000: 251; GFX12: ; %bb.0: ; %entry 252; GFX12-NEXT: s_mov_b32 s4, 0x502f9000 253; GFX12-NEXT: s_mov_b32 s5, 9 254; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 255; GFX12-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[4:5] 256; GFX12-NEXT: .LBB4_1: ; %loop 257; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1 258; GFX12-NEXT: s_wait_kmcnt 0x0 259; GFX12-NEXT: s_load_b32 s3, s[0:1], 0x0 260; GFX12-NEXT: s_add_co_i32 s2, s2, -1 261; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 262; GFX12-NEXT: s_cmp_lg_u32 s2, 0 263; GFX12-NEXT: s_cbranch_scc1 .LBB4_1 264; GFX12-NEXT: ; %bb.2: ; %end 265; GFX12-NEXT: s_endpgm 266entry: 267 %gep = getelementptr i8, ptr addrspace(4) %ptr, i64 40000000000 268 br label %loop 269 270loop: 271 %count = phi i32 [ %dec, %loop ], [ %val, %entry ] 272 %dec = sub i32 %count, 1 273 %load = load volatile i32, ptr addrspace(4) %gep 274 %cond = icmp eq i32 %dec, 0 275 br i1 %cond, label %end, label %loop 276 277end: 278 ret void 279} 280 281define amdgpu_cs void @test_sink_smem_offset_neg400(ptr addrspace(4) inreg %ptr, i32 inreg %val) { 282; GFX6789-LABEL: test_sink_smem_offset_neg400: 283; GFX6789: ; %bb.0: ; %entry 284; GFX6789-NEXT: s_add_u32 s0, s0, 0xfffffe70 285; GFX6789-NEXT: s_addc_u32 s1, s1, -1 286; GFX6789-NEXT: .LBB5_1: ; %loop 287; GFX6789-NEXT: ; =>This Inner Loop Header: Depth=1 288; GFX6789-NEXT: s_waitcnt lgkmcnt(0) 289; GFX6789-NEXT: s_load_dword s3, s[0:1], 0x0 290; GFX6789-NEXT: s_add_i32 s2, s2, -1 291; GFX6789-NEXT: s_cmp_lg_u32 s2, 0 292; GFX6789-NEXT: s_cbranch_scc1 .LBB5_1 293; GFX6789-NEXT: ; %bb.2: ; %end 294; GFX6789-NEXT: s_endpgm 295; 296; GFX12-LABEL: test_sink_smem_offset_neg400: 297; GFX12: ; %bb.0: ; %entry 298; GFX12-NEXT: s_movk_i32 s4, 0xfe70 299; GFX12-NEXT: s_mov_b32 s5, -1 300; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 301; GFX12-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[4:5] 302; GFX12-NEXT: .LBB5_1: ; %loop 303; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1 304; GFX12-NEXT: s_wait_kmcnt 0x0 305; GFX12-NEXT: s_load_b32 s3, s[0:1], 0x0 306; GFX12-NEXT: s_add_co_i32 s2, s2, -1 307; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 308; GFX12-NEXT: s_cmp_lg_u32 s2, 0 309; GFX12-NEXT: s_cbranch_scc1 .LBB5_1 310; GFX12-NEXT: ; %bb.2: ; %end 311; GFX12-NEXT: s_endpgm 312entry: 313 %gep = getelementptr i8, ptr addrspace(4) %ptr, i64 -400 314 br label %loop 315 316loop: 317 %count = phi i32 [ %dec, %loop ], [ %val, %entry ] 318 %dec = sub i32 %count, 1 319 %load = load volatile i32, ptr addrspace(4) %gep 320 %cond = icmp eq i32 %dec, 0 321 br i1 %cond, label %end, label %loop 322 323end: 324 ret void 325} 326 327; Same for address space 6, constant 32-bit. 328define amdgpu_cs void @test_sink_smem_offset_neg400_32bit(ptr addrspace(6) inreg %ptr, i32 inreg %val) { 329; GFX6789-LABEL: test_sink_smem_offset_neg400_32bit: 330; GFX6789: ; %bb.0: ; %entry 331; GFX6789-NEXT: s_add_i32 s2, s0, 0xfffffe70 332; GFX6789-NEXT: s_mov_b32 s3, 0 333; GFX6789-NEXT: .LBB6_1: ; %loop 334; GFX6789-NEXT: ; =>This Inner Loop Header: Depth=1 335; GFX6789-NEXT: s_waitcnt lgkmcnt(0) 336; GFX6789-NEXT: s_load_dword s0, s[2:3], 0x0 337; GFX6789-NEXT: s_add_i32 s1, s1, -1 338; GFX6789-NEXT: s_cmp_lg_u32 s1, 0 339; GFX6789-NEXT: s_cbranch_scc1 .LBB6_1 340; GFX6789-NEXT: ; %bb.2: ; %end 341; GFX6789-NEXT: s_endpgm 342; 343; GFX12-LABEL: test_sink_smem_offset_neg400_32bit: 344; GFX12: ; %bb.0: ; %entry 345; GFX12-NEXT: s_add_co_i32 s2, s0, 0xfffffe70 346; GFX12-NEXT: s_mov_b32 s3, 0 347; GFX12-NEXT: .LBB6_1: ; %loop 348; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1 349; GFX12-NEXT: s_wait_kmcnt 0x0 350; GFX12-NEXT: s_load_b32 s0, s[2:3], 0x0 351; GFX12-NEXT: s_add_co_i32 s1, s1, -1 352; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 353; GFX12-NEXT: s_cmp_lg_u32 s1, 0 354; GFX12-NEXT: s_cbranch_scc1 .LBB6_1 355; GFX12-NEXT: ; %bb.2: ; %end 356; GFX12-NEXT: s_endpgm 357entry: 358 %gep = getelementptr i8, ptr addrspace(6) %ptr, i64 -400 359 br label %loop 360 361loop: 362 %count = phi i32 [ %dec, %loop ], [ %val, %entry ] 363 %dec = sub i32 %count, 1 364 %load = load volatile i32, ptr addrspace(6) %gep 365 %cond = icmp eq i32 %dec, 0 366 br i1 %cond, label %end, label %loop 367 368end: 369 ret void 370} 371 372;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 373; GFX678: {{.*}} 374; GFX9: {{.*}} 375