1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11,GISEL11 %s 3; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11,DAGISEL11 %s 4; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10,GISEL10 %s 5; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10,DAGISEL10 %s 6; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11_W64,GISEL11_W64 %s 7; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1100 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX11_W64,DAGISEL11_W64 %s 8; RUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1030 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10_W64,GISEL10_W64 %s 9; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx1030 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck --check-prefixes=GFX10_W64,DAGISEL10_W64 %s 10 11define amdgpu_cs_chain void @set_inactive_chain_arg(ptr addrspace(1) %out, i32 %inactive, i32 %active) { 12; GFX11-LABEL: set_inactive_chain_arg: 13; GFX11: ; %bb.0: 14; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15; GFX11-NEXT: s_or_saveexec_b32 s0, -1 16; GFX11-NEXT: v_mov_b32_e32 v0, v10 17; GFX11-NEXT: s_mov_b32 exec_lo, s0 18; GFX11-NEXT: s_or_saveexec_b32 s0, -1 19; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 20; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v11, s0 21; GFX11-NEXT: s_mov_b32 exec_lo, s0 22; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 23; GFX11-NEXT: v_mov_b32_e32 v1, v0 24; GFX11-NEXT: global_store_b32 v[8:9], v1, off 25; GFX11-NEXT: s_endpgm 26; 27; GFX10-LABEL: set_inactive_chain_arg: 28; GFX10: ; %bb.0: 29; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 30; GFX10-NEXT: s_or_saveexec_b32 s0, -1 31; GFX10-NEXT: v_mov_b32_e32 v0, v10 32; GFX10-NEXT: s_mov_b32 exec_lo, s0 33; GFX10-NEXT: s_or_saveexec_b32 s0, -1 34; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v11, s0 35; GFX10-NEXT: s_mov_b32 exec_lo, s0 36; GFX10-NEXT: v_mov_b32_e32 v1, v0 37; GFX10-NEXT: global_store_dword v[8:9], v1, off 38; GFX10-NEXT: s_endpgm 39; 40; GFX11_W64-LABEL: set_inactive_chain_arg: 41; GFX11_W64: ; %bb.0: 42; GFX11_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 43; GFX11_W64-NEXT: s_or_saveexec_b64 s[0:1], -1 44; GFX11_W64-NEXT: v_mov_b32_e32 v0, v10 45; GFX11_W64-NEXT: s_mov_b64 exec, s[0:1] 46; GFX11_W64-NEXT: s_or_saveexec_b64 s[0:1], -1 47; GFX11_W64-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 48; GFX11_W64-NEXT: v_cndmask_b32_e64 v0, v0, v11, s[0:1] 49; GFX11_W64-NEXT: s_mov_b64 exec, s[0:1] 50; GFX11_W64-NEXT: s_delay_alu instid0(VALU_DEP_1) 51; GFX11_W64-NEXT: v_mov_b32_e32 v1, v0 52; GFX11_W64-NEXT: global_store_b32 v[8:9], v1, off 53; GFX11_W64-NEXT: s_endpgm 54; 55; GFX10_W64-LABEL: set_inactive_chain_arg: 56; GFX10_W64: ; %bb.0: 57; GFX10_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 58; GFX10_W64-NEXT: s_or_saveexec_b64 s[0:1], -1 59; GFX10_W64-NEXT: v_mov_b32_e32 v0, v10 60; GFX10_W64-NEXT: s_mov_b64 exec, s[0:1] 61; GFX10_W64-NEXT: s_or_saveexec_b64 s[0:1], -1 62; GFX10_W64-NEXT: v_cndmask_b32_e64 v0, v0, v11, s[0:1] 63; GFX10_W64-NEXT: s_mov_b64 exec, s[0:1] 64; GFX10_W64-NEXT: v_mov_b32_e32 v1, v0 65; GFX10_W64-NEXT: global_store_dword v[8:9], v1, off 66; GFX10_W64-NEXT: s_endpgm 67 %tmp = call i32 @llvm.amdgcn.set.inactive.chain.arg.i32(i32 %active, i32 %inactive) #0 68 %wwm = call i32 @llvm.amdgcn.strict.wwm.i32(i32 %tmp) 69 store i32 %wwm, ptr addrspace(1) %out 70 ret void 71} 72 73define amdgpu_cs_chain void @set_inactive_chain_arg_64(ptr addrspace(1) %out, i64 %inactive, i64 %active) { 74; GISEL11-LABEL: set_inactive_chain_arg_64: 75; GISEL11: ; %bb.0: 76; GISEL11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 77; GISEL11-NEXT: s_or_saveexec_b32 s0, -1 78; GISEL11-NEXT: v_dual_mov_b32 v0, v10 :: v_dual_mov_b32 v1, v11 79; GISEL11-NEXT: s_mov_b32 exec_lo, s0 80; GISEL11-NEXT: s_or_saveexec_b32 s0, -1 81; GISEL11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 82; GISEL11-NEXT: v_cndmask_b32_e64 v0, v0, v12, s0 83; GISEL11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) 84; GISEL11-NEXT: v_cndmask_b32_e64 v1, v1, v13, s0 85; GISEL11-NEXT: s_mov_b32 exec_lo, s0 86; GISEL11-NEXT: v_mov_b32_e32 v2, v0 87; GISEL11-NEXT: s_delay_alu instid0(VALU_DEP_2) 88; GISEL11-NEXT: v_mov_b32_e32 v3, v1 89; GISEL11-NEXT: global_store_b64 v[8:9], v[2:3], off 90; GISEL11-NEXT: s_endpgm 91; 92; DAGISEL11-LABEL: set_inactive_chain_arg_64: 93; DAGISEL11: ; %bb.0: 94; DAGISEL11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 95; DAGISEL11-NEXT: s_or_saveexec_b32 s0, -1 96; DAGISEL11-NEXT: v_dual_mov_b32 v0, v11 :: v_dual_mov_b32 v1, v10 97; DAGISEL11-NEXT: s_mov_b32 exec_lo, s0 98; DAGISEL11-NEXT: s_or_saveexec_b32 s0, -1 99; DAGISEL11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 100; DAGISEL11-NEXT: v_cndmask_b32_e64 v2, v0, v13, s0 101; DAGISEL11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) 102; DAGISEL11-NEXT: v_cndmask_b32_e64 v1, v1, v12, s0 103; DAGISEL11-NEXT: s_mov_b32 exec_lo, s0 104; DAGISEL11-NEXT: v_mov_b32_e32 v3, v1 105; DAGISEL11-NEXT: s_delay_alu instid0(VALU_DEP_3) 106; DAGISEL11-NEXT: v_mov_b32_e32 v4, v2 107; DAGISEL11-NEXT: global_store_b64 v[8:9], v[3:4], off 108; DAGISEL11-NEXT: s_endpgm 109; 110; GISEL10-LABEL: set_inactive_chain_arg_64: 111; GISEL10: ; %bb.0: 112; GISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 113; GISEL10-NEXT: s_or_saveexec_b32 s0, -1 114; GISEL10-NEXT: v_mov_b32_e32 v0, v10 115; GISEL10-NEXT: v_mov_b32_e32 v1, v11 116; GISEL10-NEXT: s_mov_b32 exec_lo, s0 117; GISEL10-NEXT: s_or_saveexec_b32 s0, -1 118; GISEL10-NEXT: v_cndmask_b32_e64 v0, v0, v12, s0 119; GISEL10-NEXT: v_cndmask_b32_e64 v1, v1, v13, s0 120; GISEL10-NEXT: s_mov_b32 exec_lo, s0 121; GISEL10-NEXT: v_mov_b32_e32 v2, v0 122; GISEL10-NEXT: v_mov_b32_e32 v3, v1 123; GISEL10-NEXT: global_store_dwordx2 v[8:9], v[2:3], off 124; GISEL10-NEXT: s_endpgm 125; 126; DAGISEL10-LABEL: set_inactive_chain_arg_64: 127; DAGISEL10: ; %bb.0: 128; DAGISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 129; DAGISEL10-NEXT: s_or_saveexec_b32 s0, -1 130; DAGISEL10-NEXT: v_mov_b32_e32 v0, v11 131; DAGISEL10-NEXT: v_mov_b32_e32 v1, v10 132; DAGISEL10-NEXT: s_mov_b32 exec_lo, s0 133; DAGISEL10-NEXT: s_or_saveexec_b32 s0, -1 134; DAGISEL10-NEXT: v_cndmask_b32_e64 v2, v0, v13, s0 135; DAGISEL10-NEXT: v_cndmask_b32_e64 v1, v1, v12, s0 136; DAGISEL10-NEXT: s_mov_b32 exec_lo, s0 137; DAGISEL10-NEXT: v_mov_b32_e32 v3, v1 138; DAGISEL10-NEXT: v_mov_b32_e32 v4, v2 139; DAGISEL10-NEXT: global_store_dwordx2 v[8:9], v[3:4], off 140; DAGISEL10-NEXT: s_endpgm 141; 142; GISEL11_W64-LABEL: set_inactive_chain_arg_64: 143; GISEL11_W64: ; %bb.0: 144; GISEL11_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 145; GISEL11_W64-NEXT: s_or_saveexec_b64 s[0:1], -1 146; GISEL11_W64-NEXT: v_mov_b32_e32 v0, v10 147; GISEL11_W64-NEXT: v_mov_b32_e32 v1, v11 148; GISEL11_W64-NEXT: s_mov_b64 exec, s[0:1] 149; GISEL11_W64-NEXT: s_or_saveexec_b64 s[0:1], -1 150; GISEL11_W64-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1) 151; GISEL11_W64-NEXT: v_cndmask_b32_e64 v0, v0, v12, s[0:1] 152; GISEL11_W64-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) 153; GISEL11_W64-NEXT: v_cndmask_b32_e64 v1, v1, v13, s[0:1] 154; GISEL11_W64-NEXT: s_mov_b64 exec, s[0:1] 155; GISEL11_W64-NEXT: v_mov_b32_e32 v2, v0 156; GISEL11_W64-NEXT: s_delay_alu instid0(VALU_DEP_2) 157; GISEL11_W64-NEXT: v_mov_b32_e32 v3, v1 158; GISEL11_W64-NEXT: global_store_b64 v[8:9], v[2:3], off 159; GISEL11_W64-NEXT: s_endpgm 160; 161; DAGISEL11_W64-LABEL: set_inactive_chain_arg_64: 162; DAGISEL11_W64: ; %bb.0: 163; DAGISEL11_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 164; DAGISEL11_W64-NEXT: s_or_saveexec_b64 s[0:1], -1 165; DAGISEL11_W64-NEXT: v_mov_b32_e32 v0, v11 166; DAGISEL11_W64-NEXT: v_mov_b32_e32 v1, v10 167; DAGISEL11_W64-NEXT: s_mov_b64 exec, s[0:1] 168; DAGISEL11_W64-NEXT: s_or_saveexec_b64 s[0:1], -1 169; DAGISEL11_W64-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1) 170; DAGISEL11_W64-NEXT: v_cndmask_b32_e64 v2, v0, v13, s[0:1] 171; DAGISEL11_W64-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) 172; DAGISEL11_W64-NEXT: v_cndmask_b32_e64 v1, v1, v12, s[0:1] 173; DAGISEL11_W64-NEXT: s_mov_b64 exec, s[0:1] 174; DAGISEL11_W64-NEXT: v_mov_b32_e32 v3, v1 175; DAGISEL11_W64-NEXT: s_delay_alu instid0(VALU_DEP_3) 176; DAGISEL11_W64-NEXT: v_mov_b32_e32 v4, v2 177; DAGISEL11_W64-NEXT: global_store_b64 v[8:9], v[3:4], off 178; DAGISEL11_W64-NEXT: s_endpgm 179; 180; GISEL10_W64-LABEL: set_inactive_chain_arg_64: 181; GISEL10_W64: ; %bb.0: 182; GISEL10_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 183; GISEL10_W64-NEXT: s_or_saveexec_b64 s[0:1], -1 184; GISEL10_W64-NEXT: v_mov_b32_e32 v0, v10 185; GISEL10_W64-NEXT: v_mov_b32_e32 v1, v11 186; GISEL10_W64-NEXT: s_mov_b64 exec, s[0:1] 187; GISEL10_W64-NEXT: s_or_saveexec_b64 s[0:1], -1 188; GISEL10_W64-NEXT: v_cndmask_b32_e64 v0, v0, v12, s[0:1] 189; GISEL10_W64-NEXT: v_cndmask_b32_e64 v1, v1, v13, s[0:1] 190; GISEL10_W64-NEXT: s_mov_b64 exec, s[0:1] 191; GISEL10_W64-NEXT: v_mov_b32_e32 v2, v0 192; GISEL10_W64-NEXT: v_mov_b32_e32 v3, v1 193; GISEL10_W64-NEXT: global_store_dwordx2 v[8:9], v[2:3], off 194; GISEL10_W64-NEXT: s_endpgm 195; 196; DAGISEL10_W64-LABEL: set_inactive_chain_arg_64: 197; DAGISEL10_W64: ; %bb.0: 198; DAGISEL10_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 199; DAGISEL10_W64-NEXT: s_or_saveexec_b64 s[0:1], -1 200; DAGISEL10_W64-NEXT: v_mov_b32_e32 v0, v11 201; DAGISEL10_W64-NEXT: v_mov_b32_e32 v1, v10 202; DAGISEL10_W64-NEXT: s_mov_b64 exec, s[0:1] 203; DAGISEL10_W64-NEXT: s_or_saveexec_b64 s[0:1], -1 204; DAGISEL10_W64-NEXT: v_cndmask_b32_e64 v2, v0, v13, s[0:1] 205; DAGISEL10_W64-NEXT: v_cndmask_b32_e64 v1, v1, v12, s[0:1] 206; DAGISEL10_W64-NEXT: s_mov_b64 exec, s[0:1] 207; DAGISEL10_W64-NEXT: v_mov_b32_e32 v3, v1 208; DAGISEL10_W64-NEXT: v_mov_b32_e32 v4, v2 209; DAGISEL10_W64-NEXT: global_store_dwordx2 v[8:9], v[3:4], off 210; DAGISEL10_W64-NEXT: s_endpgm 211 %tmp = call i64 @llvm.amdgcn.set.inactive.chain.arg.i64(i64 %active, i64 %inactive) #0 212 %wwm = call i64 @llvm.amdgcn.strict.wwm.i64(i64 %tmp) 213 store i64 %wwm, ptr addrspace(1) %out 214 ret void 215} 216 217define amdgpu_cs_chain void @set_inactive_chain_arg_dpp(ptr addrspace(1) %out, i32 %inactive, i32 %active) { 218; GFX11-LABEL: set_inactive_chain_arg_dpp: 219; GFX11: ; %bb.0: 220; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 221; GFX11-NEXT: s_or_saveexec_b32 s0, -1 222; GFX11-NEXT: v_mov_b32_e32 v0, v10 223; GFX11-NEXT: s_mov_b32 exec_lo, s0 224; GFX11-NEXT: s_or_saveexec_b32 s0, -1 225; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 226; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v11, s0 227; GFX11-NEXT: v_mov_b32_e32 v1, 0 228; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 229; GFX11-NEXT: v_mov_b32_dpp v1, v0 row_xmask:1 row_mask:0xf bank_mask:0xf 230; GFX11-NEXT: s_mov_b32 exec_lo, s0 231; GFX11-NEXT: v_mov_b32_e32 v2, v1 232; GFX11-NEXT: global_store_b32 v[8:9], v2, off 233; GFX11-NEXT: s_endpgm 234; 235; GFX10-LABEL: set_inactive_chain_arg_dpp: 236; GFX10: ; %bb.0: 237; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 238; GFX10-NEXT: s_or_saveexec_b32 s0, -1 239; GFX10-NEXT: v_mov_b32_e32 v0, v10 240; GFX10-NEXT: s_mov_b32 exec_lo, s0 241; GFX10-NEXT: s_or_saveexec_b32 s0, -1 242; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v11, s0 243; GFX10-NEXT: v_mov_b32_e32 v1, 0 244; GFX10-NEXT: v_mov_b32_dpp v1, v0 row_xmask:1 row_mask:0xf bank_mask:0xf 245; GFX10-NEXT: s_mov_b32 exec_lo, s0 246; GFX10-NEXT: v_mov_b32_e32 v2, v1 247; GFX10-NEXT: global_store_dword v[8:9], v2, off 248; GFX10-NEXT: s_endpgm 249; 250; GFX11_W64-LABEL: set_inactive_chain_arg_dpp: 251; GFX11_W64: ; %bb.0: 252; GFX11_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 253; GFX11_W64-NEXT: s_or_saveexec_b64 s[0:1], -1 254; GFX11_W64-NEXT: v_mov_b32_e32 v0, v10 255; GFX11_W64-NEXT: s_mov_b64 exec, s[0:1] 256; GFX11_W64-NEXT: s_or_saveexec_b64 s[0:1], -1 257; GFX11_W64-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 258; GFX11_W64-NEXT: v_cndmask_b32_e64 v0, v0, v11, s[0:1] 259; GFX11_W64-NEXT: v_mov_b32_e32 v1, 0 260; GFX11_W64-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 261; GFX11_W64-NEXT: v_mov_b32_dpp v1, v0 row_xmask:1 row_mask:0xf bank_mask:0xf 262; GFX11_W64-NEXT: s_mov_b64 exec, s[0:1] 263; GFX11_W64-NEXT: v_mov_b32_e32 v2, v1 264; GFX11_W64-NEXT: global_store_b32 v[8:9], v2, off 265; GFX11_W64-NEXT: s_endpgm 266; 267; GFX10_W64-LABEL: set_inactive_chain_arg_dpp: 268; GFX10_W64: ; %bb.0: 269; GFX10_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 270; GFX10_W64-NEXT: s_or_saveexec_b64 s[0:1], -1 271; GFX10_W64-NEXT: v_mov_b32_e32 v0, v10 272; GFX10_W64-NEXT: s_mov_b64 exec, s[0:1] 273; GFX10_W64-NEXT: s_or_saveexec_b64 s[0:1], -1 274; GFX10_W64-NEXT: v_cndmask_b32_e64 v0, v0, v11, s[0:1] 275; GFX10_W64-NEXT: v_mov_b32_e32 v1, 0 276; GFX10_W64-NEXT: v_mov_b32_dpp v1, v0 row_xmask:1 row_mask:0xf bank_mask:0xf 277; GFX10_W64-NEXT: s_mov_b64 exec, s[0:1] 278; GFX10_W64-NEXT: v_mov_b32_e32 v2, v1 279; GFX10_W64-NEXT: global_store_dword v[8:9], v2, off 280; GFX10_W64-NEXT: s_endpgm 281 %tmp = call i32 @llvm.amdgcn.set.inactive.chain.arg.i32(i32 %active, i32 %inactive) #0 282 %dpp = call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %tmp, i32 353, i32 15, i32 15, i1 false) 283 %wwm = call i32 @llvm.amdgcn.strict.wwm.i32(i32 %dpp) 284 store i32 %wwm, ptr addrspace(1) %out 285 ret void 286} 287 288; Make sure that if we need to use the register for %inactive for something else (in this case a call), 289; we save its inactive lanes for later use in set.inactive.chain.arg. 290define amdgpu_cs_chain void @set_inactive_chain_arg_call(ptr addrspace(1) %out, i32 %inactive, i32 %active) { 291; GISEL11-LABEL: set_inactive_chain_arg_call: 292; GISEL11: ; %bb.0: 293; GISEL11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 294; GISEL11-NEXT: s_mov_b32 s32, 0 295; GISEL11-NEXT: v_dual_mov_b32 v41, v8 :: v_dual_mov_b32 v42, v9 296; GISEL11-NEXT: s_or_saveexec_b32 s0, -1 297; GISEL11-NEXT: v_mov_b32_e32 v40, v10 298; GISEL11-NEXT: s_mov_b32 exec_lo, s0 299; GISEL11-NEXT: s_getpc_b64 s[0:1] 300; GISEL11-NEXT: s_add_u32 s0, s0, gfx_callee@gotpcrel32@lo+4 301; GISEL11-NEXT: s_addc_u32 s1, s1, gfx_callee@gotpcrel32@hi+12 302; GISEL11-NEXT: v_dual_mov_b32 v43, v11 :: v_dual_mov_b32 v0, 0 303; GISEL11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 304; GISEL11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0 305; GISEL11-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_mov_b32 v4, 0 306; GISEL11-NEXT: v_dual_mov_b32 v5, 0 :: v_dual_mov_b32 v6, 0 307; GISEL11-NEXT: v_dual_mov_b32 v7, 0 :: v_dual_mov_b32 v8, 0 308; GISEL11-NEXT: v_dual_mov_b32 v9, 0 :: v_dual_mov_b32 v10, 0 309; GISEL11-NEXT: v_mov_b32_e32 v11, 0 310; GISEL11-NEXT: s_waitcnt lgkmcnt(0) 311; GISEL11-NEXT: s_swappc_b64 s[30:31], s[0:1] 312; GISEL11-NEXT: s_or_saveexec_b32 s0, -1 313; GISEL11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 314; GISEL11-NEXT: v_cndmask_b32_e64 v12, v40, v43, s0 315; GISEL11-NEXT: s_mov_b32 exec_lo, s0 316; GISEL11-NEXT: v_mov_b32_e32 v0, v12 317; GISEL11-NEXT: global_store_b32 v[41:42], v0, off 318; GISEL11-NEXT: s_endpgm 319; 320; DAGISEL11-LABEL: set_inactive_chain_arg_call: 321; DAGISEL11: ; %bb.0: 322; DAGISEL11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 323; DAGISEL11-NEXT: s_mov_b32 s32, 0 324; DAGISEL11-NEXT: v_mov_b32_e32 v43, v11 325; DAGISEL11-NEXT: s_or_saveexec_b32 s0, -1 326; DAGISEL11-NEXT: v_mov_b32_e32 v40, v10 327; DAGISEL11-NEXT: s_mov_b32 exec_lo, s0 328; DAGISEL11-NEXT: s_getpc_b64 s[0:1] 329; DAGISEL11-NEXT: s_add_u32 s0, s0, gfx_callee@gotpcrel32@lo+4 330; DAGISEL11-NEXT: s_addc_u32 s1, s1, gfx_callee@gotpcrel32@hi+12 331; DAGISEL11-NEXT: v_dual_mov_b32 v42, v9 :: v_dual_mov_b32 v41, v8 332; DAGISEL11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 333; DAGISEL11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0 334; DAGISEL11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 0 335; DAGISEL11-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 0 336; DAGISEL11-NEXT: v_dual_mov_b32 v6, 0 :: v_dual_mov_b32 v7, 0 337; DAGISEL11-NEXT: v_dual_mov_b32 v8, 0 :: v_dual_mov_b32 v9, 0 338; DAGISEL11-NEXT: v_dual_mov_b32 v10, 0 :: v_dual_mov_b32 v11, 0 339; DAGISEL11-NEXT: s_waitcnt lgkmcnt(0) 340; DAGISEL11-NEXT: s_swappc_b64 s[30:31], s[0:1] 341; DAGISEL11-NEXT: s_or_saveexec_b32 s0, -1 342; DAGISEL11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 343; DAGISEL11-NEXT: v_cndmask_b32_e64 v12, v40, v43, s0 344; DAGISEL11-NEXT: s_mov_b32 exec_lo, s0 345; DAGISEL11-NEXT: v_mov_b32_e32 v0, v12 346; DAGISEL11-NEXT: global_store_b32 v[41:42], v0, off 347; DAGISEL11-NEXT: s_endpgm 348; 349; GISEL10-LABEL: set_inactive_chain_arg_call: 350; GISEL10: ; %bb.0: 351; GISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 352; GISEL10-NEXT: s_mov_b32 s32, 0 353; GISEL10-NEXT: v_mov_b32_e32 v41, v8 354; GISEL10-NEXT: v_mov_b32_e32 v42, v9 355; GISEL10-NEXT: s_or_saveexec_b32 s0, -1 356; GISEL10-NEXT: v_mov_b32_e32 v40, v10 357; GISEL10-NEXT: s_mov_b32 exec_lo, s0 358; GISEL10-NEXT: s_getpc_b64 s[0:1] 359; GISEL10-NEXT: s_add_u32 s0, s0, gfx_callee@gotpcrel32@lo+4 360; GISEL10-NEXT: s_addc_u32 s1, s1, gfx_callee@gotpcrel32@hi+12 361; GISEL10-NEXT: v_mov_b32_e32 v43, v11 362; GISEL10-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 363; GISEL10-NEXT: v_mov_b32_e32 v0, 0 364; GISEL10-NEXT: v_mov_b32_e32 v1, 0 365; GISEL10-NEXT: v_mov_b32_e32 v2, 0 366; GISEL10-NEXT: v_mov_b32_e32 v3, 0 367; GISEL10-NEXT: v_mov_b32_e32 v4, 0 368; GISEL10-NEXT: v_mov_b32_e32 v5, 0 369; GISEL10-NEXT: v_mov_b32_e32 v6, 0 370; GISEL10-NEXT: v_mov_b32_e32 v7, 0 371; GISEL10-NEXT: v_mov_b32_e32 v8, 0 372; GISEL10-NEXT: v_mov_b32_e32 v9, 0 373; GISEL10-NEXT: v_mov_b32_e32 v10, 0 374; GISEL10-NEXT: v_mov_b32_e32 v11, 0 375; GISEL10-NEXT: s_mov_b64 s[0:1], s[48:49] 376; GISEL10-NEXT: s_mov_b64 s[2:3], s[50:51] 377; GISEL10-NEXT: s_waitcnt lgkmcnt(0) 378; GISEL10-NEXT: s_swappc_b64 s[30:31], s[4:5] 379; GISEL10-NEXT: s_or_saveexec_b32 s0, -1 380; GISEL10-NEXT: v_cndmask_b32_e64 v12, v40, v43, s0 381; GISEL10-NEXT: s_mov_b32 exec_lo, s0 382; GISEL10-NEXT: v_mov_b32_e32 v0, v12 383; GISEL10-NEXT: global_store_dword v[41:42], v0, off 384; GISEL10-NEXT: s_endpgm 385; 386; DAGISEL10-LABEL: set_inactive_chain_arg_call: 387; DAGISEL10: ; %bb.0: 388; DAGISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 389; DAGISEL10-NEXT: s_mov_b32 s32, 0 390; DAGISEL10-NEXT: v_mov_b32_e32 v43, v11 391; DAGISEL10-NEXT: s_or_saveexec_b32 s0, -1 392; DAGISEL10-NEXT: v_mov_b32_e32 v40, v10 393; DAGISEL10-NEXT: s_mov_b32 exec_lo, s0 394; DAGISEL10-NEXT: s_getpc_b64 s[0:1] 395; DAGISEL10-NEXT: s_add_u32 s0, s0, gfx_callee@gotpcrel32@lo+4 396; DAGISEL10-NEXT: s_addc_u32 s1, s1, gfx_callee@gotpcrel32@hi+12 397; DAGISEL10-NEXT: v_mov_b32_e32 v42, v9 398; DAGISEL10-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 399; DAGISEL10-NEXT: v_mov_b32_e32 v41, v8 400; DAGISEL10-NEXT: v_mov_b32_e32 v0, 0 401; DAGISEL10-NEXT: v_mov_b32_e32 v1, 0 402; DAGISEL10-NEXT: v_mov_b32_e32 v2, 0 403; DAGISEL10-NEXT: v_mov_b32_e32 v3, 0 404; DAGISEL10-NEXT: v_mov_b32_e32 v4, 0 405; DAGISEL10-NEXT: v_mov_b32_e32 v5, 0 406; DAGISEL10-NEXT: v_mov_b32_e32 v6, 0 407; DAGISEL10-NEXT: v_mov_b32_e32 v7, 0 408; DAGISEL10-NEXT: v_mov_b32_e32 v8, 0 409; DAGISEL10-NEXT: v_mov_b32_e32 v9, 0 410; DAGISEL10-NEXT: v_mov_b32_e32 v10, 0 411; DAGISEL10-NEXT: v_mov_b32_e32 v11, 0 412; DAGISEL10-NEXT: s_mov_b64 s[0:1], s[48:49] 413; DAGISEL10-NEXT: s_mov_b64 s[2:3], s[50:51] 414; DAGISEL10-NEXT: s_waitcnt lgkmcnt(0) 415; DAGISEL10-NEXT: s_swappc_b64 s[30:31], s[4:5] 416; DAGISEL10-NEXT: s_or_saveexec_b32 s0, -1 417; DAGISEL10-NEXT: v_cndmask_b32_e64 v12, v40, v43, s0 418; DAGISEL10-NEXT: s_mov_b32 exec_lo, s0 419; DAGISEL10-NEXT: v_mov_b32_e32 v0, v12 420; DAGISEL10-NEXT: global_store_dword v[41:42], v0, off 421; DAGISEL10-NEXT: s_endpgm 422; 423; GISEL11_W64-LABEL: set_inactive_chain_arg_call: 424; GISEL11_W64: ; %bb.0: 425; GISEL11_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 426; GISEL11_W64-NEXT: s_mov_b32 s32, 0 427; GISEL11_W64-NEXT: v_mov_b32_e32 v41, v8 428; GISEL11_W64-NEXT: v_mov_b32_e32 v42, v9 429; GISEL11_W64-NEXT: s_or_saveexec_b64 s[0:1], -1 430; GISEL11_W64-NEXT: v_mov_b32_e32 v40, v10 431; GISEL11_W64-NEXT: s_mov_b64 exec, s[0:1] 432; GISEL11_W64-NEXT: s_getpc_b64 s[0:1] 433; GISEL11_W64-NEXT: s_add_u32 s0, s0, gfx_callee@gotpcrel32@lo+4 434; GISEL11_W64-NEXT: s_addc_u32 s1, s1, gfx_callee@gotpcrel32@hi+12 435; GISEL11_W64-NEXT: v_mov_b32_e32 v43, v11 436; GISEL11_W64-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 437; GISEL11_W64-NEXT: v_mov_b32_e32 v0, 0 438; GISEL11_W64-NEXT: v_mov_b32_e32 v1, 0 439; GISEL11_W64-NEXT: v_mov_b32_e32 v2, 0 440; GISEL11_W64-NEXT: v_mov_b32_e32 v3, 0 441; GISEL11_W64-NEXT: v_mov_b32_e32 v4, 0 442; GISEL11_W64-NEXT: v_mov_b32_e32 v5, 0 443; GISEL11_W64-NEXT: v_mov_b32_e32 v6, 0 444; GISEL11_W64-NEXT: v_mov_b32_e32 v7, 0 445; GISEL11_W64-NEXT: v_mov_b32_e32 v8, 0 446; GISEL11_W64-NEXT: v_mov_b32_e32 v9, 0 447; GISEL11_W64-NEXT: v_mov_b32_e32 v10, 0 448; GISEL11_W64-NEXT: v_mov_b32_e32 v11, 0 449; GISEL11_W64-NEXT: s_waitcnt lgkmcnt(0) 450; GISEL11_W64-NEXT: s_swappc_b64 s[30:31], s[0:1] 451; GISEL11_W64-NEXT: s_or_saveexec_b64 s[0:1], -1 452; GISEL11_W64-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 453; GISEL11_W64-NEXT: v_cndmask_b32_e64 v12, v40, v43, s[0:1] 454; GISEL11_W64-NEXT: s_mov_b64 exec, s[0:1] 455; GISEL11_W64-NEXT: v_mov_b32_e32 v0, v12 456; GISEL11_W64-NEXT: global_store_b32 v[41:42], v0, off 457; GISEL11_W64-NEXT: s_endpgm 458; 459; DAGISEL11_W64-LABEL: set_inactive_chain_arg_call: 460; DAGISEL11_W64: ; %bb.0: 461; DAGISEL11_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 462; DAGISEL11_W64-NEXT: s_mov_b32 s32, 0 463; DAGISEL11_W64-NEXT: v_mov_b32_e32 v43, v11 464; DAGISEL11_W64-NEXT: s_or_saveexec_b64 s[0:1], -1 465; DAGISEL11_W64-NEXT: v_mov_b32_e32 v40, v10 466; DAGISEL11_W64-NEXT: s_mov_b64 exec, s[0:1] 467; DAGISEL11_W64-NEXT: s_getpc_b64 s[0:1] 468; DAGISEL11_W64-NEXT: s_add_u32 s0, s0, gfx_callee@gotpcrel32@lo+4 469; DAGISEL11_W64-NEXT: s_addc_u32 s1, s1, gfx_callee@gotpcrel32@hi+12 470; DAGISEL11_W64-NEXT: v_mov_b32_e32 v42, v9 471; DAGISEL11_W64-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 472; DAGISEL11_W64-NEXT: v_mov_b32_e32 v41, v8 473; DAGISEL11_W64-NEXT: v_mov_b32_e32 v0, 0 474; DAGISEL11_W64-NEXT: v_mov_b32_e32 v1, 0 475; DAGISEL11_W64-NEXT: v_mov_b32_e32 v2, 0 476; DAGISEL11_W64-NEXT: v_mov_b32_e32 v3, 0 477; DAGISEL11_W64-NEXT: v_mov_b32_e32 v4, 0 478; DAGISEL11_W64-NEXT: v_mov_b32_e32 v5, 0 479; DAGISEL11_W64-NEXT: v_mov_b32_e32 v6, 0 480; DAGISEL11_W64-NEXT: v_mov_b32_e32 v7, 0 481; DAGISEL11_W64-NEXT: v_mov_b32_e32 v8, 0 482; DAGISEL11_W64-NEXT: v_mov_b32_e32 v9, 0 483; DAGISEL11_W64-NEXT: v_mov_b32_e32 v10, 0 484; DAGISEL11_W64-NEXT: v_mov_b32_e32 v11, 0 485; DAGISEL11_W64-NEXT: s_waitcnt lgkmcnt(0) 486; DAGISEL11_W64-NEXT: s_swappc_b64 s[30:31], s[0:1] 487; DAGISEL11_W64-NEXT: s_or_saveexec_b64 s[0:1], -1 488; DAGISEL11_W64-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 489; DAGISEL11_W64-NEXT: v_cndmask_b32_e64 v12, v40, v43, s[0:1] 490; DAGISEL11_W64-NEXT: s_mov_b64 exec, s[0:1] 491; DAGISEL11_W64-NEXT: v_mov_b32_e32 v0, v12 492; DAGISEL11_W64-NEXT: global_store_b32 v[41:42], v0, off 493; DAGISEL11_W64-NEXT: s_endpgm 494; 495; GISEL10_W64-LABEL: set_inactive_chain_arg_call: 496; GISEL10_W64: ; %bb.0: 497; GISEL10_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 498; GISEL10_W64-NEXT: s_mov_b32 s32, 0 499; GISEL10_W64-NEXT: v_mov_b32_e32 v41, v8 500; GISEL10_W64-NEXT: v_mov_b32_e32 v42, v9 501; GISEL10_W64-NEXT: s_or_saveexec_b64 s[0:1], -1 502; GISEL10_W64-NEXT: v_mov_b32_e32 v40, v10 503; GISEL10_W64-NEXT: s_mov_b64 exec, s[0:1] 504; GISEL10_W64-NEXT: s_getpc_b64 s[0:1] 505; GISEL10_W64-NEXT: s_add_u32 s0, s0, gfx_callee@gotpcrel32@lo+4 506; GISEL10_W64-NEXT: s_addc_u32 s1, s1, gfx_callee@gotpcrel32@hi+12 507; GISEL10_W64-NEXT: v_mov_b32_e32 v43, v11 508; GISEL10_W64-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 509; GISEL10_W64-NEXT: v_mov_b32_e32 v0, 0 510; GISEL10_W64-NEXT: v_mov_b32_e32 v1, 0 511; GISEL10_W64-NEXT: v_mov_b32_e32 v2, 0 512; GISEL10_W64-NEXT: v_mov_b32_e32 v3, 0 513; GISEL10_W64-NEXT: v_mov_b32_e32 v4, 0 514; GISEL10_W64-NEXT: v_mov_b32_e32 v5, 0 515; GISEL10_W64-NEXT: v_mov_b32_e32 v6, 0 516; GISEL10_W64-NEXT: v_mov_b32_e32 v7, 0 517; GISEL10_W64-NEXT: v_mov_b32_e32 v8, 0 518; GISEL10_W64-NEXT: v_mov_b32_e32 v9, 0 519; GISEL10_W64-NEXT: v_mov_b32_e32 v10, 0 520; GISEL10_W64-NEXT: v_mov_b32_e32 v11, 0 521; GISEL10_W64-NEXT: s_mov_b64 s[0:1], s[48:49] 522; GISEL10_W64-NEXT: s_mov_b64 s[2:3], s[50:51] 523; GISEL10_W64-NEXT: s_waitcnt lgkmcnt(0) 524; GISEL10_W64-NEXT: s_swappc_b64 s[30:31], s[4:5] 525; GISEL10_W64-NEXT: s_or_saveexec_b64 s[0:1], -1 526; GISEL10_W64-NEXT: v_cndmask_b32_e64 v12, v40, v43, s[0:1] 527; GISEL10_W64-NEXT: s_mov_b64 exec, s[0:1] 528; GISEL10_W64-NEXT: v_mov_b32_e32 v0, v12 529; GISEL10_W64-NEXT: global_store_dword v[41:42], v0, off 530; GISEL10_W64-NEXT: s_endpgm 531; 532; DAGISEL10_W64-LABEL: set_inactive_chain_arg_call: 533; DAGISEL10_W64: ; %bb.0: 534; DAGISEL10_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 535; DAGISEL10_W64-NEXT: s_mov_b32 s32, 0 536; DAGISEL10_W64-NEXT: v_mov_b32_e32 v43, v11 537; DAGISEL10_W64-NEXT: s_or_saveexec_b64 s[0:1], -1 538; DAGISEL10_W64-NEXT: v_mov_b32_e32 v40, v10 539; DAGISEL10_W64-NEXT: s_mov_b64 exec, s[0:1] 540; DAGISEL10_W64-NEXT: s_getpc_b64 s[0:1] 541; DAGISEL10_W64-NEXT: s_add_u32 s0, s0, gfx_callee@gotpcrel32@lo+4 542; DAGISEL10_W64-NEXT: s_addc_u32 s1, s1, gfx_callee@gotpcrel32@hi+12 543; DAGISEL10_W64-NEXT: v_mov_b32_e32 v42, v9 544; DAGISEL10_W64-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 545; DAGISEL10_W64-NEXT: v_mov_b32_e32 v41, v8 546; DAGISEL10_W64-NEXT: v_mov_b32_e32 v0, 0 547; DAGISEL10_W64-NEXT: v_mov_b32_e32 v1, 0 548; DAGISEL10_W64-NEXT: v_mov_b32_e32 v2, 0 549; DAGISEL10_W64-NEXT: v_mov_b32_e32 v3, 0 550; DAGISEL10_W64-NEXT: v_mov_b32_e32 v4, 0 551; DAGISEL10_W64-NEXT: v_mov_b32_e32 v5, 0 552; DAGISEL10_W64-NEXT: v_mov_b32_e32 v6, 0 553; DAGISEL10_W64-NEXT: v_mov_b32_e32 v7, 0 554; DAGISEL10_W64-NEXT: v_mov_b32_e32 v8, 0 555; DAGISEL10_W64-NEXT: v_mov_b32_e32 v9, 0 556; DAGISEL10_W64-NEXT: v_mov_b32_e32 v10, 0 557; DAGISEL10_W64-NEXT: v_mov_b32_e32 v11, 0 558; DAGISEL10_W64-NEXT: s_mov_b64 s[0:1], s[48:49] 559; DAGISEL10_W64-NEXT: s_mov_b64 s[2:3], s[50:51] 560; DAGISEL10_W64-NEXT: s_waitcnt lgkmcnt(0) 561; DAGISEL10_W64-NEXT: s_swappc_b64 s[30:31], s[4:5] 562; DAGISEL10_W64-NEXT: s_or_saveexec_b64 s[0:1], -1 563; DAGISEL10_W64-NEXT: v_cndmask_b32_e64 v12, v40, v43, s[0:1] 564; DAGISEL10_W64-NEXT: s_mov_b64 exec, s[0:1] 565; DAGISEL10_W64-NEXT: v_mov_b32_e32 v0, v12 566; DAGISEL10_W64-NEXT: global_store_dword v[41:42], v0, off 567; DAGISEL10_W64-NEXT: s_endpgm 568 call amdgpu_gfx void @gfx_callee(<12 x i32> zeroinitializer) 569 %tmp = call i32 @llvm.amdgcn.set.inactive.chain.arg.i32(i32 %active, i32 %inactive) #0 570 %wwm = call i32 @llvm.amdgcn.strict.wwm.i32(i32 %tmp) 571 store i32 %wwm, ptr addrspace(1) %out 572 ret void 573} 574 575; When lowering function arguments, SelectionDAG will put the COPY for the last argument first. 576; This used to trigger a bug in si-wqm where the first COPY in the entry block was always skipped 577; before entering a strict mode, meaning that we'd only copy the active lanes of the last VGPR 578; argument, so we'd end up using arbitrary values for the inactive lanes. 579define amdgpu_cs_chain void @set_inactive_chain_arg_last_vgpr(ptr addrspace(1) %out, i32 %active, i32 %inactive) { 580; GISEL11-LABEL: set_inactive_chain_arg_last_vgpr: 581; GISEL11: ; %bb.0: 582; GISEL11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 583; GISEL11-NEXT: s_mov_b32 s32, 0 584; GISEL11-NEXT: v_dual_mov_b32 v41, v8 :: v_dual_mov_b32 v42, v9 585; GISEL11-NEXT: v_mov_b32_e32 v43, v10 586; GISEL11-NEXT: s_or_saveexec_b32 s0, -1 587; GISEL11-NEXT: v_mov_b32_e32 v40, v11 588; GISEL11-NEXT: s_mov_b32 exec_lo, s0 589; GISEL11-NEXT: s_getpc_b64 s[0:1] 590; GISEL11-NEXT: s_add_u32 s0, s0, gfx_callee@gotpcrel32@lo+4 591; GISEL11-NEXT: s_addc_u32 s1, s1, gfx_callee@gotpcrel32@hi+12 592; GISEL11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0 593; GISEL11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 594; GISEL11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v3, 0 595; GISEL11-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v5, 0 596; GISEL11-NEXT: v_dual_mov_b32 v6, 0 :: v_dual_mov_b32 v7, 0 597; GISEL11-NEXT: v_dual_mov_b32 v8, 0 :: v_dual_mov_b32 v9, 0 598; GISEL11-NEXT: v_dual_mov_b32 v10, 0 :: v_dual_mov_b32 v11, 0 599; GISEL11-NEXT: s_waitcnt lgkmcnt(0) 600; GISEL11-NEXT: s_swappc_b64 s[30:31], s[0:1] 601; GISEL11-NEXT: s_or_saveexec_b32 s0, -1 602; GISEL11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 603; GISEL11-NEXT: v_cndmask_b32_e64 v12, v40, v43, s0 604; GISEL11-NEXT: s_mov_b32 exec_lo, s0 605; GISEL11-NEXT: v_mov_b32_e32 v0, v12 606; GISEL11-NEXT: global_store_b32 v[41:42], v0, off 607; GISEL11-NEXT: s_endpgm 608; 609; DAGISEL11-LABEL: set_inactive_chain_arg_last_vgpr: 610; DAGISEL11: ; %bb.0: 611; DAGISEL11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 612; DAGISEL11-NEXT: s_mov_b32 s32, 0 613; DAGISEL11-NEXT: s_or_saveexec_b32 s0, -1 614; DAGISEL11-NEXT: v_mov_b32_e32 v40, v11 615; DAGISEL11-NEXT: s_mov_b32 exec_lo, s0 616; DAGISEL11-NEXT: s_getpc_b64 s[0:1] 617; DAGISEL11-NEXT: s_add_u32 s0, s0, gfx_callee@gotpcrel32@lo+4 618; DAGISEL11-NEXT: s_addc_u32 s1, s1, gfx_callee@gotpcrel32@hi+12 619; DAGISEL11-NEXT: v_dual_mov_b32 v43, v10 :: v_dual_mov_b32 v42, v9 620; DAGISEL11-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 621; DAGISEL11-NEXT: v_dual_mov_b32 v41, v8 :: v_dual_mov_b32 v0, 0 622; DAGISEL11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0 623; DAGISEL11-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_mov_b32 v4, 0 624; DAGISEL11-NEXT: v_dual_mov_b32 v5, 0 :: v_dual_mov_b32 v6, 0 625; DAGISEL11-NEXT: v_dual_mov_b32 v7, 0 :: v_dual_mov_b32 v8, 0 626; DAGISEL11-NEXT: v_dual_mov_b32 v9, 0 :: v_dual_mov_b32 v10, 0 627; DAGISEL11-NEXT: v_mov_b32_e32 v11, 0 628; DAGISEL11-NEXT: s_waitcnt lgkmcnt(0) 629; DAGISEL11-NEXT: s_swappc_b64 s[30:31], s[0:1] 630; DAGISEL11-NEXT: s_or_saveexec_b32 s0, -1 631; DAGISEL11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 632; DAGISEL11-NEXT: v_cndmask_b32_e64 v12, v40, v43, s0 633; DAGISEL11-NEXT: s_mov_b32 exec_lo, s0 634; DAGISEL11-NEXT: v_mov_b32_e32 v0, v12 635; DAGISEL11-NEXT: global_store_b32 v[41:42], v0, off 636; DAGISEL11-NEXT: s_endpgm 637; 638; GISEL10-LABEL: set_inactive_chain_arg_last_vgpr: 639; GISEL10: ; %bb.0: 640; GISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 641; GISEL10-NEXT: s_mov_b32 s32, 0 642; GISEL10-NEXT: v_mov_b32_e32 v41, v8 643; GISEL10-NEXT: v_mov_b32_e32 v42, v9 644; GISEL10-NEXT: v_mov_b32_e32 v43, v10 645; GISEL10-NEXT: s_or_saveexec_b32 s0, -1 646; GISEL10-NEXT: v_mov_b32_e32 v40, v11 647; GISEL10-NEXT: s_mov_b32 exec_lo, s0 648; GISEL10-NEXT: s_getpc_b64 s[0:1] 649; GISEL10-NEXT: s_add_u32 s0, s0, gfx_callee@gotpcrel32@lo+4 650; GISEL10-NEXT: s_addc_u32 s1, s1, gfx_callee@gotpcrel32@hi+12 651; GISEL10-NEXT: v_mov_b32_e32 v0, 0 652; GISEL10-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 653; GISEL10-NEXT: v_mov_b32_e32 v1, 0 654; GISEL10-NEXT: v_mov_b32_e32 v2, 0 655; GISEL10-NEXT: v_mov_b32_e32 v3, 0 656; GISEL10-NEXT: v_mov_b32_e32 v4, 0 657; GISEL10-NEXT: v_mov_b32_e32 v5, 0 658; GISEL10-NEXT: v_mov_b32_e32 v6, 0 659; GISEL10-NEXT: v_mov_b32_e32 v7, 0 660; GISEL10-NEXT: v_mov_b32_e32 v8, 0 661; GISEL10-NEXT: v_mov_b32_e32 v9, 0 662; GISEL10-NEXT: v_mov_b32_e32 v10, 0 663; GISEL10-NEXT: v_mov_b32_e32 v11, 0 664; GISEL10-NEXT: s_mov_b64 s[0:1], s[48:49] 665; GISEL10-NEXT: s_mov_b64 s[2:3], s[50:51] 666; GISEL10-NEXT: s_waitcnt lgkmcnt(0) 667; GISEL10-NEXT: s_swappc_b64 s[30:31], s[4:5] 668; GISEL10-NEXT: s_or_saveexec_b32 s0, -1 669; GISEL10-NEXT: v_cndmask_b32_e64 v12, v40, v43, s0 670; GISEL10-NEXT: s_mov_b32 exec_lo, s0 671; GISEL10-NEXT: v_mov_b32_e32 v0, v12 672; GISEL10-NEXT: global_store_dword v[41:42], v0, off 673; GISEL10-NEXT: s_endpgm 674; 675; DAGISEL10-LABEL: set_inactive_chain_arg_last_vgpr: 676; DAGISEL10: ; %bb.0: 677; DAGISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 678; DAGISEL10-NEXT: s_mov_b32 s32, 0 679; DAGISEL10-NEXT: s_or_saveexec_b32 s0, -1 680; DAGISEL10-NEXT: v_mov_b32_e32 v40, v11 681; DAGISEL10-NEXT: s_mov_b32 exec_lo, s0 682; DAGISEL10-NEXT: s_getpc_b64 s[0:1] 683; DAGISEL10-NEXT: s_add_u32 s0, s0, gfx_callee@gotpcrel32@lo+4 684; DAGISEL10-NEXT: s_addc_u32 s1, s1, gfx_callee@gotpcrel32@hi+12 685; DAGISEL10-NEXT: v_mov_b32_e32 v43, v10 686; DAGISEL10-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 687; DAGISEL10-NEXT: v_mov_b32_e32 v42, v9 688; DAGISEL10-NEXT: v_mov_b32_e32 v41, v8 689; DAGISEL10-NEXT: v_mov_b32_e32 v0, 0 690; DAGISEL10-NEXT: v_mov_b32_e32 v1, 0 691; DAGISEL10-NEXT: v_mov_b32_e32 v2, 0 692; DAGISEL10-NEXT: v_mov_b32_e32 v3, 0 693; DAGISEL10-NEXT: v_mov_b32_e32 v4, 0 694; DAGISEL10-NEXT: v_mov_b32_e32 v5, 0 695; DAGISEL10-NEXT: v_mov_b32_e32 v6, 0 696; DAGISEL10-NEXT: v_mov_b32_e32 v7, 0 697; DAGISEL10-NEXT: v_mov_b32_e32 v8, 0 698; DAGISEL10-NEXT: v_mov_b32_e32 v9, 0 699; DAGISEL10-NEXT: v_mov_b32_e32 v10, 0 700; DAGISEL10-NEXT: v_mov_b32_e32 v11, 0 701; DAGISEL10-NEXT: s_mov_b64 s[0:1], s[48:49] 702; DAGISEL10-NEXT: s_mov_b64 s[2:3], s[50:51] 703; DAGISEL10-NEXT: s_waitcnt lgkmcnt(0) 704; DAGISEL10-NEXT: s_swappc_b64 s[30:31], s[4:5] 705; DAGISEL10-NEXT: s_or_saveexec_b32 s0, -1 706; DAGISEL10-NEXT: v_cndmask_b32_e64 v12, v40, v43, s0 707; DAGISEL10-NEXT: s_mov_b32 exec_lo, s0 708; DAGISEL10-NEXT: v_mov_b32_e32 v0, v12 709; DAGISEL10-NEXT: global_store_dword v[41:42], v0, off 710; DAGISEL10-NEXT: s_endpgm 711; 712; GISEL11_W64-LABEL: set_inactive_chain_arg_last_vgpr: 713; GISEL11_W64: ; %bb.0: 714; GISEL11_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 715; GISEL11_W64-NEXT: s_mov_b32 s32, 0 716; GISEL11_W64-NEXT: v_mov_b32_e32 v41, v8 717; GISEL11_W64-NEXT: v_mov_b32_e32 v42, v9 718; GISEL11_W64-NEXT: v_mov_b32_e32 v43, v10 719; GISEL11_W64-NEXT: s_or_saveexec_b64 s[0:1], -1 720; GISEL11_W64-NEXT: v_mov_b32_e32 v40, v11 721; GISEL11_W64-NEXT: s_mov_b64 exec, s[0:1] 722; GISEL11_W64-NEXT: s_getpc_b64 s[0:1] 723; GISEL11_W64-NEXT: s_add_u32 s0, s0, gfx_callee@gotpcrel32@lo+4 724; GISEL11_W64-NEXT: s_addc_u32 s1, s1, gfx_callee@gotpcrel32@hi+12 725; GISEL11_W64-NEXT: v_mov_b32_e32 v0, 0 726; GISEL11_W64-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 727; GISEL11_W64-NEXT: v_mov_b32_e32 v1, 0 728; GISEL11_W64-NEXT: v_mov_b32_e32 v2, 0 729; GISEL11_W64-NEXT: v_mov_b32_e32 v3, 0 730; GISEL11_W64-NEXT: v_mov_b32_e32 v4, 0 731; GISEL11_W64-NEXT: v_mov_b32_e32 v5, 0 732; GISEL11_W64-NEXT: v_mov_b32_e32 v6, 0 733; GISEL11_W64-NEXT: v_mov_b32_e32 v7, 0 734; GISEL11_W64-NEXT: v_mov_b32_e32 v8, 0 735; GISEL11_W64-NEXT: v_mov_b32_e32 v9, 0 736; GISEL11_W64-NEXT: v_mov_b32_e32 v10, 0 737; GISEL11_W64-NEXT: v_mov_b32_e32 v11, 0 738; GISEL11_W64-NEXT: s_waitcnt lgkmcnt(0) 739; GISEL11_W64-NEXT: s_swappc_b64 s[30:31], s[0:1] 740; GISEL11_W64-NEXT: s_or_saveexec_b64 s[0:1], -1 741; GISEL11_W64-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 742; GISEL11_W64-NEXT: v_cndmask_b32_e64 v12, v40, v43, s[0:1] 743; GISEL11_W64-NEXT: s_mov_b64 exec, s[0:1] 744; GISEL11_W64-NEXT: v_mov_b32_e32 v0, v12 745; GISEL11_W64-NEXT: global_store_b32 v[41:42], v0, off 746; GISEL11_W64-NEXT: s_endpgm 747; 748; DAGISEL11_W64-LABEL: set_inactive_chain_arg_last_vgpr: 749; DAGISEL11_W64: ; %bb.0: 750; DAGISEL11_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 751; DAGISEL11_W64-NEXT: s_mov_b32 s32, 0 752; DAGISEL11_W64-NEXT: s_or_saveexec_b64 s[0:1], -1 753; DAGISEL11_W64-NEXT: v_mov_b32_e32 v40, v11 754; DAGISEL11_W64-NEXT: s_mov_b64 exec, s[0:1] 755; DAGISEL11_W64-NEXT: s_getpc_b64 s[0:1] 756; DAGISEL11_W64-NEXT: s_add_u32 s0, s0, gfx_callee@gotpcrel32@lo+4 757; DAGISEL11_W64-NEXT: s_addc_u32 s1, s1, gfx_callee@gotpcrel32@hi+12 758; DAGISEL11_W64-NEXT: v_mov_b32_e32 v43, v10 759; DAGISEL11_W64-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 760; DAGISEL11_W64-NEXT: v_mov_b32_e32 v42, v9 761; DAGISEL11_W64-NEXT: v_mov_b32_e32 v41, v8 762; DAGISEL11_W64-NEXT: v_mov_b32_e32 v0, 0 763; DAGISEL11_W64-NEXT: v_mov_b32_e32 v1, 0 764; DAGISEL11_W64-NEXT: v_mov_b32_e32 v2, 0 765; DAGISEL11_W64-NEXT: v_mov_b32_e32 v3, 0 766; DAGISEL11_W64-NEXT: v_mov_b32_e32 v4, 0 767; DAGISEL11_W64-NEXT: v_mov_b32_e32 v5, 0 768; DAGISEL11_W64-NEXT: v_mov_b32_e32 v6, 0 769; DAGISEL11_W64-NEXT: v_mov_b32_e32 v7, 0 770; DAGISEL11_W64-NEXT: v_mov_b32_e32 v8, 0 771; DAGISEL11_W64-NEXT: v_mov_b32_e32 v9, 0 772; DAGISEL11_W64-NEXT: v_mov_b32_e32 v10, 0 773; DAGISEL11_W64-NEXT: v_mov_b32_e32 v11, 0 774; DAGISEL11_W64-NEXT: s_waitcnt lgkmcnt(0) 775; DAGISEL11_W64-NEXT: s_swappc_b64 s[30:31], s[0:1] 776; DAGISEL11_W64-NEXT: s_or_saveexec_b64 s[0:1], -1 777; DAGISEL11_W64-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 778; DAGISEL11_W64-NEXT: v_cndmask_b32_e64 v12, v40, v43, s[0:1] 779; DAGISEL11_W64-NEXT: s_mov_b64 exec, s[0:1] 780; DAGISEL11_W64-NEXT: v_mov_b32_e32 v0, v12 781; DAGISEL11_W64-NEXT: global_store_b32 v[41:42], v0, off 782; DAGISEL11_W64-NEXT: s_endpgm 783; 784; GISEL10_W64-LABEL: set_inactive_chain_arg_last_vgpr: 785; GISEL10_W64: ; %bb.0: 786; GISEL10_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 787; GISEL10_W64-NEXT: s_mov_b32 s32, 0 788; GISEL10_W64-NEXT: v_mov_b32_e32 v41, v8 789; GISEL10_W64-NEXT: v_mov_b32_e32 v42, v9 790; GISEL10_W64-NEXT: v_mov_b32_e32 v43, v10 791; GISEL10_W64-NEXT: s_or_saveexec_b64 s[0:1], -1 792; GISEL10_W64-NEXT: v_mov_b32_e32 v40, v11 793; GISEL10_W64-NEXT: s_mov_b64 exec, s[0:1] 794; GISEL10_W64-NEXT: s_getpc_b64 s[0:1] 795; GISEL10_W64-NEXT: s_add_u32 s0, s0, gfx_callee@gotpcrel32@lo+4 796; GISEL10_W64-NEXT: s_addc_u32 s1, s1, gfx_callee@gotpcrel32@hi+12 797; GISEL10_W64-NEXT: v_mov_b32_e32 v0, 0 798; GISEL10_W64-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 799; GISEL10_W64-NEXT: v_mov_b32_e32 v1, 0 800; GISEL10_W64-NEXT: v_mov_b32_e32 v2, 0 801; GISEL10_W64-NEXT: v_mov_b32_e32 v3, 0 802; GISEL10_W64-NEXT: v_mov_b32_e32 v4, 0 803; GISEL10_W64-NEXT: v_mov_b32_e32 v5, 0 804; GISEL10_W64-NEXT: v_mov_b32_e32 v6, 0 805; GISEL10_W64-NEXT: v_mov_b32_e32 v7, 0 806; GISEL10_W64-NEXT: v_mov_b32_e32 v8, 0 807; GISEL10_W64-NEXT: v_mov_b32_e32 v9, 0 808; GISEL10_W64-NEXT: v_mov_b32_e32 v10, 0 809; GISEL10_W64-NEXT: v_mov_b32_e32 v11, 0 810; GISEL10_W64-NEXT: s_mov_b64 s[0:1], s[48:49] 811; GISEL10_W64-NEXT: s_mov_b64 s[2:3], s[50:51] 812; GISEL10_W64-NEXT: s_waitcnt lgkmcnt(0) 813; GISEL10_W64-NEXT: s_swappc_b64 s[30:31], s[4:5] 814; GISEL10_W64-NEXT: s_or_saveexec_b64 s[0:1], -1 815; GISEL10_W64-NEXT: v_cndmask_b32_e64 v12, v40, v43, s[0:1] 816; GISEL10_W64-NEXT: s_mov_b64 exec, s[0:1] 817; GISEL10_W64-NEXT: v_mov_b32_e32 v0, v12 818; GISEL10_W64-NEXT: global_store_dword v[41:42], v0, off 819; GISEL10_W64-NEXT: s_endpgm 820; 821; DAGISEL10_W64-LABEL: set_inactive_chain_arg_last_vgpr: 822; DAGISEL10_W64: ; %bb.0: 823; DAGISEL10_W64-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 824; DAGISEL10_W64-NEXT: s_mov_b32 s32, 0 825; DAGISEL10_W64-NEXT: s_or_saveexec_b64 s[0:1], -1 826; DAGISEL10_W64-NEXT: v_mov_b32_e32 v40, v11 827; DAGISEL10_W64-NEXT: s_mov_b64 exec, s[0:1] 828; DAGISEL10_W64-NEXT: s_getpc_b64 s[0:1] 829; DAGISEL10_W64-NEXT: s_add_u32 s0, s0, gfx_callee@gotpcrel32@lo+4 830; DAGISEL10_W64-NEXT: s_addc_u32 s1, s1, gfx_callee@gotpcrel32@hi+12 831; DAGISEL10_W64-NEXT: v_mov_b32_e32 v43, v10 832; DAGISEL10_W64-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 833; DAGISEL10_W64-NEXT: v_mov_b32_e32 v42, v9 834; DAGISEL10_W64-NEXT: v_mov_b32_e32 v41, v8 835; DAGISEL10_W64-NEXT: v_mov_b32_e32 v0, 0 836; DAGISEL10_W64-NEXT: v_mov_b32_e32 v1, 0 837; DAGISEL10_W64-NEXT: v_mov_b32_e32 v2, 0 838; DAGISEL10_W64-NEXT: v_mov_b32_e32 v3, 0 839; DAGISEL10_W64-NEXT: v_mov_b32_e32 v4, 0 840; DAGISEL10_W64-NEXT: v_mov_b32_e32 v5, 0 841; DAGISEL10_W64-NEXT: v_mov_b32_e32 v6, 0 842; DAGISEL10_W64-NEXT: v_mov_b32_e32 v7, 0 843; DAGISEL10_W64-NEXT: v_mov_b32_e32 v8, 0 844; DAGISEL10_W64-NEXT: v_mov_b32_e32 v9, 0 845; DAGISEL10_W64-NEXT: v_mov_b32_e32 v10, 0 846; DAGISEL10_W64-NEXT: v_mov_b32_e32 v11, 0 847; DAGISEL10_W64-NEXT: s_mov_b64 s[0:1], s[48:49] 848; DAGISEL10_W64-NEXT: s_mov_b64 s[2:3], s[50:51] 849; DAGISEL10_W64-NEXT: s_waitcnt lgkmcnt(0) 850; DAGISEL10_W64-NEXT: s_swappc_b64 s[30:31], s[4:5] 851; DAGISEL10_W64-NEXT: s_or_saveexec_b64 s[0:1], -1 852; DAGISEL10_W64-NEXT: v_cndmask_b32_e64 v12, v40, v43, s[0:1] 853; DAGISEL10_W64-NEXT: s_mov_b64 exec, s[0:1] 854; DAGISEL10_W64-NEXT: v_mov_b32_e32 v0, v12 855; DAGISEL10_W64-NEXT: global_store_dword v[41:42], v0, off 856; DAGISEL10_W64-NEXT: s_endpgm 857 call amdgpu_gfx void @gfx_callee(<12 x i32> zeroinitializer) 858 %tmp = call i32 @llvm.amdgcn.set.inactive.chain.arg.i32(i32 %active, i32 %inactive) #0 859 %wwm = call i32 @llvm.amdgcn.strict.wwm.i32(i32 %tmp) 860 store i32 %wwm, ptr addrspace(1) %out 861 ret void 862} 863 864declare i32 @llvm.amdgcn.set.inactive.chain.arg.i32(i32, i32) #0 865declare i64 @llvm.amdgcn.set.inactive.chain.arg.i64(i64, i64) #0 866declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32 immarg, i32 immarg, i32 immarg, i1 immarg) 867declare i32 @llvm.amdgcn.strict.wwm.i32(i32) 868declare i64 @llvm.amdgcn.strict.wwm.i64(i64) 869declare amdgpu_gfx void @gfx_callee(<12 x i32>) 870 871attributes #0 = { convergent readnone willreturn nocallback nofree} 872