1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefix=MUBUF %s 3; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 -mattr=+enable-flat-scratch -verify-machineinstrs | FileCheck -check-prefix=FLATSCR %s 4; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs | FileCheck -check-prefix=MUBUF11 %s 5; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -mattr=+enable-flat-scratch -verify-machineinstrs | FileCheck -check-prefix=FLATSCR11 %s 6 7; During instruction selection, we use immediate const zero for soffset in 8; MUBUF stack accesses and let eliminateFrameIndex to fix up this field to use 9; the correct frame register whenever required. 10define amdgpu_kernel void @kernel_background_evaluate(ptr addrspace(5) %kg, ptr addrspace(1) %input, ptr addrspace(1) %output, i32 %i) { 11; MUBUF-LABEL: kernel_background_evaluate: 12; MUBUF: ; %bb.0: ; %entry 13; MUBUF-NEXT: s_load_dword s0, s[4:5], 0x24 14; MUBUF-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 15; MUBUF-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 16; MUBUF-NEXT: s_mov_b32 s38, -1 17; MUBUF-NEXT: s_mov_b32 s39, 0x31c16000 18; MUBUF-NEXT: s_add_u32 s36, s36, s11 19; MUBUF-NEXT: s_addc_u32 s37, s37, 0 20; MUBUF-NEXT: v_mov_b32_e32 v1, 0x2000 21; MUBUF-NEXT: v_mov_b32_e32 v2, 0x4000 22; MUBUF-NEXT: v_mov_b32_e32 v3, 0 23; MUBUF-NEXT: v_mov_b32_e32 v4, 0x400000 24; MUBUF-NEXT: s_mov_b32 s32, 0xc0000 25; MUBUF-NEXT: s_getpc_b64 s[4:5] 26; MUBUF-NEXT: s_add_u32 s4, s4, svm_eval_nodes@rel32@lo+4 27; MUBUF-NEXT: s_addc_u32 s5, s5, svm_eval_nodes@rel32@hi+12 28; MUBUF-NEXT: s_waitcnt lgkmcnt(0) 29; MUBUF-NEXT: v_mov_b32_e32 v0, s0 30; MUBUF-NEXT: s_mov_b64 s[0:1], s[36:37] 31; MUBUF-NEXT: s_mov_b64 s[2:3], s[38:39] 32; MUBUF-NEXT: s_swappc_b64 s[30:31], s[4:5] 33; MUBUF-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 34; MUBUF-NEXT: s_and_saveexec_b32 s0, vcc_lo 35; MUBUF-NEXT: s_cbranch_execz .LBB0_2 36; MUBUF-NEXT: ; %bb.1: ; %if.then4.i 37; MUBUF-NEXT: v_mov_b32_e32 v0, 0x4004 38; MUBUF-NEXT: s_mov_b32 s0, 0x41c64e6d 39; MUBUF-NEXT: s_clause 0x1 40; MUBUF-NEXT: buffer_load_dword v1, v0, s[36:39], 0 offen 41; MUBUF-NEXT: buffer_load_dword v2, v0, s[36:39], 0 offen offset:4 42; MUBUF-NEXT: s_waitcnt vmcnt(0) 43; MUBUF-NEXT: v_add_nc_u32_e32 v0, v2, v1 44; MUBUF-NEXT: v_mad_u64_u32 v[0:1], s0, v0, s0, 0x3039 45; MUBUF-NEXT: buffer_store_dword v0, v0, s[36:39], 0 offen 46; MUBUF-NEXT: .LBB0_2: ; %shader_eval_surface.exit 47; MUBUF-NEXT: s_endpgm 48; 49; FLATSCR-LABEL: kernel_background_evaluate: 50; FLATSCR: ; %bb.0: ; %entry 51; FLATSCR-NEXT: s_add_u32 s8, s8, s13 52; FLATSCR-NEXT: s_movk_i32 s32, 0x6000 53; FLATSCR-NEXT: s_addc_u32 s9, s9, 0 54; FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_LO), s8 55; FLATSCR-NEXT: s_setreg_b32 hwreg(HW_REG_FLAT_SCR_HI), s9 56; FLATSCR-NEXT: s_load_dword s2, s[4:5], 0x24 57; FLATSCR-NEXT: v_mov_b32_e32 v1, 0x2000 58; FLATSCR-NEXT: v_mov_b32_e32 v2, 0x4000 59; FLATSCR-NEXT: v_mov_b32_e32 v3, 0 60; FLATSCR-NEXT: v_mov_b32_e32 v4, 0x400000 61; FLATSCR-NEXT: s_getpc_b64 s[0:1] 62; FLATSCR-NEXT: s_add_u32 s0, s0, svm_eval_nodes@rel32@lo+4 63; FLATSCR-NEXT: s_addc_u32 s1, s1, svm_eval_nodes@rel32@hi+12 64; FLATSCR-NEXT: s_waitcnt lgkmcnt(0) 65; FLATSCR-NEXT: v_mov_b32_e32 v0, s2 66; FLATSCR-NEXT: s_swappc_b64 s[30:31], s[0:1] 67; FLATSCR-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 68; FLATSCR-NEXT: s_and_saveexec_b32 s0, vcc_lo 69; FLATSCR-NEXT: s_cbranch_execz .LBB0_2 70; FLATSCR-NEXT: ; %bb.1: ; %if.then4.i 71; FLATSCR-NEXT: s_movk_i32 s0, 0x4000 72; FLATSCR-NEXT: scratch_load_dwordx2 v[0:1], off, s0 offset:4 73; FLATSCR-NEXT: s_waitcnt_depctr 0xffe3 74; FLATSCR-NEXT: s_mov_b32 s0, 0x41c64e6d 75; FLATSCR-NEXT: s_waitcnt vmcnt(0) 76; FLATSCR-NEXT: v_add_nc_u32_e32 v0, v1, v0 77; FLATSCR-NEXT: v_mad_u64_u32 v[0:1], s0, v0, s0, 0x3039 78; FLATSCR-NEXT: scratch_store_dword off, v0, s0 79; FLATSCR-NEXT: .LBB0_2: ; %shader_eval_surface.exit 80; FLATSCR-NEXT: s_endpgm 81; 82; MUBUF11-LABEL: kernel_background_evaluate: 83; MUBUF11: ; %bb.0: ; %entry 84; MUBUF11-NEXT: s_load_b32 s2, s[4:5], 0x24 85; MUBUF11-NEXT: v_mov_b32_e32 v1, 0x2000 86; MUBUF11-NEXT: v_dual_mov_b32 v2, 0x4000 :: v_dual_mov_b32 v3, 0 87; MUBUF11-NEXT: v_mov_b32_e32 v4, 0x400000 88; MUBUF11-NEXT: s_movk_i32 s32, 0x6000 89; MUBUF11-NEXT: s_getpc_b64 s[0:1] 90; MUBUF11-NEXT: s_add_u32 s0, s0, svm_eval_nodes@rel32@lo+4 91; MUBUF11-NEXT: s_addc_u32 s1, s1, svm_eval_nodes@rel32@hi+12 92; MUBUF11-NEXT: s_waitcnt lgkmcnt(0) 93; MUBUF11-NEXT: v_mov_b32_e32 v0, s2 94; MUBUF11-NEXT: s_swappc_b64 s[30:31], s[0:1] 95; MUBUF11-NEXT: s_mov_b32 s0, exec_lo 96; MUBUF11-NEXT: v_cmpx_ne_u32_e32 0, v0 97; MUBUF11-NEXT: s_cbranch_execz .LBB0_2 98; MUBUF11-NEXT: ; %bb.1: ; %if.then4.i 99; MUBUF11-NEXT: s_movk_i32 s0, 0x4000 100; MUBUF11-NEXT: scratch_load_b64 v[0:1], off, s0 offset:4 101; MUBUF11-NEXT: s_mov_b32 s0, 0x41c64e6d 102; MUBUF11-NEXT: s_waitcnt vmcnt(0) 103; MUBUF11-NEXT: v_add_nc_u32_e32 v2, v1, v0 104; MUBUF11-NEXT: v_mad_u64_u32 v[0:1], null, v2, s0, 0x3039 105; MUBUF11-NEXT: scratch_store_b32 off, v0, s0 106; MUBUF11-NEXT: .LBB0_2: ; %shader_eval_surface.exit 107; MUBUF11-NEXT: s_endpgm 108; 109; FLATSCR11-LABEL: kernel_background_evaluate: 110; FLATSCR11: ; %bb.0: ; %entry 111; FLATSCR11-NEXT: s_load_b32 s2, s[4:5], 0x24 112; FLATSCR11-NEXT: v_mov_b32_e32 v1, 0x2000 113; FLATSCR11-NEXT: v_dual_mov_b32 v2, 0x4000 :: v_dual_mov_b32 v3, 0 114; FLATSCR11-NEXT: v_mov_b32_e32 v4, 0x400000 115; FLATSCR11-NEXT: s_movk_i32 s32, 0x6000 116; FLATSCR11-NEXT: s_getpc_b64 s[0:1] 117; FLATSCR11-NEXT: s_add_u32 s0, s0, svm_eval_nodes@rel32@lo+4 118; FLATSCR11-NEXT: s_addc_u32 s1, s1, svm_eval_nodes@rel32@hi+12 119; FLATSCR11-NEXT: s_waitcnt lgkmcnt(0) 120; FLATSCR11-NEXT: v_mov_b32_e32 v0, s2 121; FLATSCR11-NEXT: s_swappc_b64 s[30:31], s[0:1] 122; FLATSCR11-NEXT: s_mov_b32 s0, exec_lo 123; FLATSCR11-NEXT: v_cmpx_ne_u32_e32 0, v0 124; FLATSCR11-NEXT: s_cbranch_execz .LBB0_2 125; FLATSCR11-NEXT: ; %bb.1: ; %if.then4.i 126; FLATSCR11-NEXT: s_movk_i32 s0, 0x4000 127; FLATSCR11-NEXT: scratch_load_b64 v[0:1], off, s0 offset:4 128; FLATSCR11-NEXT: s_mov_b32 s0, 0x41c64e6d 129; FLATSCR11-NEXT: s_waitcnt vmcnt(0) 130; FLATSCR11-NEXT: v_add_nc_u32_e32 v2, v1, v0 131; FLATSCR11-NEXT: v_mad_u64_u32 v[0:1], null, v2, s0, 0x3039 132; FLATSCR11-NEXT: scratch_store_b32 off, v0, s0 133; FLATSCR11-NEXT: .LBB0_2: ; %shader_eval_surface.exit 134; FLATSCR11-NEXT: s_endpgm 135entry: 136 %sd = alloca < 1339 x i32>, align 8192, addrspace(5) 137 %state = alloca <4 x i32>, align 16, addrspace(5) 138 %rslt = call i32 @svm_eval_nodes(ptr addrspace(5) %kg, ptr addrspace(5) %sd, ptr addrspace(5) %state, i32 0, i32 4194304) 139 %cmp = icmp eq i32 %rslt, 0 140 br i1 %cmp, label %shader_eval_surface.exit, label %if.then4.i 141 142if.then4.i: ; preds = %entry 143 %rng_hash.i.i = getelementptr inbounds < 4 x i32>, ptr addrspace(5) %state, i32 0, i32 1 144 %tmp0 = load i32, ptr addrspace(5) %rng_hash.i.i, align 4 145 %rng_offset.i.i = getelementptr inbounds <4 x i32>, ptr addrspace(5) %state, i32 0, i32 2 146 %tmp1 = load i32, ptr addrspace(5) %rng_offset.i.i, align 4 147 %add.i.i = add i32 %tmp1, %tmp0 148 %add1.i.i = add i32 %add.i.i, 0 149 %mul.i.i.i.i = mul i32 %add1.i.i, 1103515245 150 %add.i.i.i.i = add i32 %mul.i.i.i.i, 12345 151 store i32 %add.i.i.i.i, ptr addrspace(5) undef, align 16 152 br label %shader_eval_surface.exit 153 154shader_eval_surface.exit: ; preds = %entry 155 ret void 156} 157 158declare hidden i32 @svm_eval_nodes(ptr addrspace(5), ptr addrspace(5), ptr addrspace(5), i32, i32) local_unnamed_addr #0 159 160attributes #0 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" } 161