1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GISEL-GFX11 %s 3; RUN: llc -global-isel=1 -mtriple=amdgcn--amdpal -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefix=GISEL-GFX10 %s 4; RUN: llc -global-isel=0 -mtriple=amdgcn--amdpal -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=DAGISEL-GFX11 %s 5; RUN: llc -global-isel=0 -mtriple=amdgcn--amdpal -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck -check-prefix=DAGISEL-GFX10 %s 6 7declare amdgpu_gfx void @use(...) 8 9define amdgpu_cs_chain void @amdgpu_cs_chain_no_stack({ptr, i32, <4 x i32>} inreg %a, {ptr, i32, <4 x i32>} %b) { 10; GISEL-GFX11-LABEL: amdgpu_cs_chain_no_stack: 11; GISEL-GFX11: ; %bb.0: 12; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13; GISEL-GFX11-NEXT: s_endpgm 14; 15; GISEL-GFX10-LABEL: amdgpu_cs_chain_no_stack: 16; GISEL-GFX10: ; %bb.0: 17; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18; GISEL-GFX10-NEXT: s_endpgm 19; 20; DAGISEL-GFX11-LABEL: amdgpu_cs_chain_no_stack: 21; DAGISEL-GFX11: ; %bb.0: 22; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23; DAGISEL-GFX11-NEXT: s_endpgm 24; 25; DAGISEL-GFX10-LABEL: amdgpu_cs_chain_no_stack: 26; DAGISEL-GFX10: ; %bb.0: 27; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 28; DAGISEL-GFX10-NEXT: s_endpgm 29 ret void 30} 31 32define amdgpu_cs_chain void @amdgpu_cs_chain_simple_call(<4 x i32> inreg %sgpr, <4 x i32> %vgpr) { 33; GISEL-GFX11-LABEL: amdgpu_cs_chain_simple_call: 34; GISEL-GFX11: ; %bb.0: 35; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 36; GISEL-GFX11-NEXT: v_dual_mov_b32 v4, v8 :: v_dual_mov_b32 v5, v9 37; GISEL-GFX11-NEXT: v_dual_mov_b32 v6, v10 :: v_dual_mov_b32 v7, v11 38; GISEL-GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 39; GISEL-GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 40; GISEL-GFX11-NEXT: s_mov_b32 s4, use@abs32@lo 41; GISEL-GFX11-NEXT: s_mov_b32 s5, use@abs32@hi 42; GISEL-GFX11-NEXT: s_mov_b32 s32, 0 43; GISEL-GFX11-NEXT: s_swappc_b64 s[30:31], s[4:5] 44; GISEL-GFX11-NEXT: s_endpgm 45; 46; GISEL-GFX10-LABEL: amdgpu_cs_chain_simple_call: 47; GISEL-GFX10: ; %bb.0: 48; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 49; GISEL-GFX10-NEXT: v_mov_b32_e32 v4, v8 50; GISEL-GFX10-NEXT: v_mov_b32_e32 v5, v9 51; GISEL-GFX10-NEXT: v_mov_b32_e32 v6, v10 52; GISEL-GFX10-NEXT: v_mov_b32_e32 v7, v11 53; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s0 54; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, s1 55; GISEL-GFX10-NEXT: v_mov_b32_e32 v2, s2 56; GISEL-GFX10-NEXT: v_mov_b32_e32 v3, s3 57; GISEL-GFX10-NEXT: s_mov_b64 s[0:1], s[48:49] 58; GISEL-GFX10-NEXT: s_mov_b32 s4, use@abs32@lo 59; GISEL-GFX10-NEXT: s_mov_b32 s5, use@abs32@hi 60; GISEL-GFX10-NEXT: s_mov_b64 s[2:3], s[50:51] 61; GISEL-GFX10-NEXT: s_mov_b32 s32, 0 62; GISEL-GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] 63; GISEL-GFX10-NEXT: s_endpgm 64; 65; DAGISEL-GFX11-LABEL: amdgpu_cs_chain_simple_call: 66; DAGISEL-GFX11: ; %bb.0: 67; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 68; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v7, v11 :: v_dual_mov_b32 v6, v10 69; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v5, v9 :: v_dual_mov_b32 v4, v8 70; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 71; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 72; DAGISEL-GFX11-NEXT: s_mov_b32 s5, use@abs32@hi 73; DAGISEL-GFX11-NEXT: s_mov_b32 s4, use@abs32@lo 74; DAGISEL-GFX11-NEXT: s_mov_b32 s32, 0 75; DAGISEL-GFX11-NEXT: s_swappc_b64 s[30:31], s[4:5] 76; DAGISEL-GFX11-NEXT: s_endpgm 77; 78; DAGISEL-GFX10-LABEL: amdgpu_cs_chain_simple_call: 79; DAGISEL-GFX10: ; %bb.0: 80; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 81; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v7, v11 82; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v6, v10 83; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v5, v9 84; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v4, v8 85; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v0, s0 86; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v1, s1 87; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v2, s2 88; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v3, s3 89; DAGISEL-GFX10-NEXT: s_mov_b64 s[0:1], s[48:49] 90; DAGISEL-GFX10-NEXT: s_mov_b32 s5, use@abs32@hi 91; DAGISEL-GFX10-NEXT: s_mov_b32 s4, use@abs32@lo 92; DAGISEL-GFX10-NEXT: s_mov_b64 s[2:3], s[50:51] 93; DAGISEL-GFX10-NEXT: s_mov_b32 s32, 0 94; DAGISEL-GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] 95; DAGISEL-GFX10-NEXT: s_endpgm 96 call amdgpu_gfx void @use(<4 x i32> %sgpr, <4 x i32> %vgpr) 97 ret void 98} 99 100define amdgpu_cs_chain void @amdgpu_cs_chain_spill(<24 x i32> inreg %sgprs, <24 x i32> %vgprs) { 101; GISEL-GFX11-LABEL: amdgpu_cs_chain_spill: 102; GISEL-GFX11: ; %bb.0: 103; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 104; GISEL-GFX11-NEXT: s_mov_b32 s32, 0 105; GISEL-GFX11-NEXT: v_dual_mov_b32 v32, v8 :: v_dual_mov_b32 v33, v9 106; GISEL-GFX11-NEXT: s_add_u32 s24, s32, 4 107; GISEL-GFX11-NEXT: scratch_store_b32 off, v16, s32 108; GISEL-GFX11-NEXT: scratch_store_b32 off, v17, s24 109; GISEL-GFX11-NEXT: s_add_u32 s24, s32, 8 110; GISEL-GFX11-NEXT: s_add_u32 s25, s32, 12 111; GISEL-GFX11-NEXT: scratch_store_b32 off, v18, s24 112; GISEL-GFX11-NEXT: scratch_store_b32 off, v19, s25 113; GISEL-GFX11-NEXT: s_add_u32 s24, s32, 16 114; GISEL-GFX11-NEXT: s_add_u32 s25, s32, 20 115; GISEL-GFX11-NEXT: scratch_store_b32 off, v20, s24 116; GISEL-GFX11-NEXT: scratch_store_b32 off, v21, s25 117; GISEL-GFX11-NEXT: s_add_u32 s24, s32, 24 118; GISEL-GFX11-NEXT: s_add_u32 s25, s32, 28 119; GISEL-GFX11-NEXT: scratch_store_b32 off, v22, s24 120; GISEL-GFX11-NEXT: scratch_store_b32 off, v23, s25 121; GISEL-GFX11-NEXT: s_add_u32 s24, s32, 32 122; GISEL-GFX11-NEXT: s_add_u32 s25, s32, 36 123; GISEL-GFX11-NEXT: scratch_store_b32 off, v24, s24 124; GISEL-GFX11-NEXT: scratch_store_b32 off, v25, s25 125; GISEL-GFX11-NEXT: s_add_u32 s24, s32, 40 126; GISEL-GFX11-NEXT: v_dual_mov_b32 v34, v10 :: v_dual_mov_b32 v35, v11 127; GISEL-GFX11-NEXT: v_dual_mov_b32 v36, v12 :: v_dual_mov_b32 v37, v13 128; GISEL-GFX11-NEXT: v_dual_mov_b32 v38, v14 :: v_dual_mov_b32 v39, v15 129; GISEL-GFX11-NEXT: s_add_u32 s25, s32, 44 130; GISEL-GFX11-NEXT: scratch_store_b32 off, v26, s24 131; GISEL-GFX11-NEXT: scratch_store_b32 off, v27, s25 132; GISEL-GFX11-NEXT: s_add_u32 s24, s32, 48 133; GISEL-GFX11-NEXT: s_add_u32 s25, s32, 52 134; GISEL-GFX11-NEXT: scratch_store_b32 off, v28, s24 135; GISEL-GFX11-NEXT: scratch_store_b32 off, v29, s25 136; GISEL-GFX11-NEXT: s_add_u32 s24, s32, 56 137; GISEL-GFX11-NEXT: s_add_u32 s25, s32, 60 138; GISEL-GFX11-NEXT: scratch_store_b32 off, v30, s24 139; GISEL-GFX11-NEXT: scratch_store_b32 off, v31, s25 140; GISEL-GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 141; GISEL-GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 142; GISEL-GFX11-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 143; GISEL-GFX11-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 144; GISEL-GFX11-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 145; GISEL-GFX11-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 146; GISEL-GFX11-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 147; GISEL-GFX11-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 148; GISEL-GFX11-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17 149; GISEL-GFX11-NEXT: v_dual_mov_b32 v18, s18 :: v_dual_mov_b32 v19, s19 150; GISEL-GFX11-NEXT: v_dual_mov_b32 v20, s20 :: v_dual_mov_b32 v21, s21 151; GISEL-GFX11-NEXT: v_dual_mov_b32 v22, s22 :: v_dual_mov_b32 v23, s23 152; GISEL-GFX11-NEXT: v_dual_mov_b32 v24, v32 :: v_dual_mov_b32 v25, v33 153; GISEL-GFX11-NEXT: v_dual_mov_b32 v26, v34 :: v_dual_mov_b32 v27, v35 154; GISEL-GFX11-NEXT: v_dual_mov_b32 v28, v36 :: v_dual_mov_b32 v29, v37 155; GISEL-GFX11-NEXT: v_dual_mov_b32 v30, v38 :: v_dual_mov_b32 v31, v39 156; GISEL-GFX11-NEXT: s_mov_b32 s24, use@abs32@lo 157; GISEL-GFX11-NEXT: s_mov_b32 s25, use@abs32@hi 158; GISEL-GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 159; GISEL-GFX11-NEXT: s_swappc_b64 s[30:31], s[24:25] 160; GISEL-GFX11-NEXT: s_endpgm 161; 162; GISEL-GFX10-LABEL: amdgpu_cs_chain_spill: 163; GISEL-GFX10: ; %bb.0: 164; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 165; GISEL-GFX10-NEXT: v_mov_b32_e32 v32, v8 166; GISEL-GFX10-NEXT: v_mov_b32_e32 v33, v9 167; GISEL-GFX10-NEXT: v_mov_b32_e32 v34, v10 168; GISEL-GFX10-NEXT: v_mov_b32_e32 v35, v11 169; GISEL-GFX10-NEXT: v_mov_b32_e32 v36, v12 170; GISEL-GFX10-NEXT: v_mov_b32_e32 v37, v13 171; GISEL-GFX10-NEXT: v_mov_b32_e32 v38, v14 172; GISEL-GFX10-NEXT: v_mov_b32_e32 v39, v15 173; GISEL-GFX10-NEXT: s_mov_b32 s32, 0 174; GISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], s32 175; GISEL-GFX10-NEXT: buffer_store_dword v17, off, s[48:51], s32 offset:4 176; GISEL-GFX10-NEXT: buffer_store_dword v18, off, s[48:51], s32 offset:8 177; GISEL-GFX10-NEXT: buffer_store_dword v19, off, s[48:51], s32 offset:12 178; GISEL-GFX10-NEXT: buffer_store_dword v20, off, s[48:51], s32 offset:16 179; GISEL-GFX10-NEXT: buffer_store_dword v21, off, s[48:51], s32 offset:20 180; GISEL-GFX10-NEXT: buffer_store_dword v22, off, s[48:51], s32 offset:24 181; GISEL-GFX10-NEXT: buffer_store_dword v23, off, s[48:51], s32 offset:28 182; GISEL-GFX10-NEXT: buffer_store_dword v24, off, s[48:51], s32 offset:32 183; GISEL-GFX10-NEXT: buffer_store_dword v25, off, s[48:51], s32 offset:36 184; GISEL-GFX10-NEXT: buffer_store_dword v26, off, s[48:51], s32 offset:40 185; GISEL-GFX10-NEXT: buffer_store_dword v27, off, s[48:51], s32 offset:44 186; GISEL-GFX10-NEXT: buffer_store_dword v28, off, s[48:51], s32 offset:48 187; GISEL-GFX10-NEXT: buffer_store_dword v29, off, s[48:51], s32 offset:52 188; GISEL-GFX10-NEXT: buffer_store_dword v30, off, s[48:51], s32 offset:56 189; GISEL-GFX10-NEXT: buffer_store_dword v31, off, s[48:51], s32 offset:60 190; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s0 191; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, s1 192; GISEL-GFX10-NEXT: v_mov_b32_e32 v2, s2 193; GISEL-GFX10-NEXT: v_mov_b32_e32 v3, s3 194; GISEL-GFX10-NEXT: v_mov_b32_e32 v4, s4 195; GISEL-GFX10-NEXT: v_mov_b32_e32 v5, s5 196; GISEL-GFX10-NEXT: v_mov_b32_e32 v6, s6 197; GISEL-GFX10-NEXT: v_mov_b32_e32 v7, s7 198; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, s8 199; GISEL-GFX10-NEXT: v_mov_b32_e32 v9, s9 200; GISEL-GFX10-NEXT: v_mov_b32_e32 v10, s10 201; GISEL-GFX10-NEXT: v_mov_b32_e32 v11, s11 202; GISEL-GFX10-NEXT: v_mov_b32_e32 v12, s12 203; GISEL-GFX10-NEXT: v_mov_b32_e32 v13, s13 204; GISEL-GFX10-NEXT: v_mov_b32_e32 v14, s14 205; GISEL-GFX10-NEXT: v_mov_b32_e32 v15, s15 206; GISEL-GFX10-NEXT: v_mov_b32_e32 v16, s16 207; GISEL-GFX10-NEXT: v_mov_b32_e32 v17, s17 208; GISEL-GFX10-NEXT: v_mov_b32_e32 v18, s18 209; GISEL-GFX10-NEXT: v_mov_b32_e32 v19, s19 210; GISEL-GFX10-NEXT: v_mov_b32_e32 v20, s20 211; GISEL-GFX10-NEXT: v_mov_b32_e32 v21, s21 212; GISEL-GFX10-NEXT: v_mov_b32_e32 v22, s22 213; GISEL-GFX10-NEXT: v_mov_b32_e32 v23, s23 214; GISEL-GFX10-NEXT: v_mov_b32_e32 v24, v32 215; GISEL-GFX10-NEXT: v_mov_b32_e32 v25, v33 216; GISEL-GFX10-NEXT: v_mov_b32_e32 v26, v34 217; GISEL-GFX10-NEXT: v_mov_b32_e32 v27, v35 218; GISEL-GFX10-NEXT: v_mov_b32_e32 v28, v36 219; GISEL-GFX10-NEXT: v_mov_b32_e32 v29, v37 220; GISEL-GFX10-NEXT: v_mov_b32_e32 v30, v38 221; GISEL-GFX10-NEXT: v_mov_b32_e32 v31, v39 222; GISEL-GFX10-NEXT: s_mov_b64 s[0:1], s[48:49] 223; GISEL-GFX10-NEXT: s_mov_b32 s24, use@abs32@lo 224; GISEL-GFX10-NEXT: s_mov_b32 s25, use@abs32@hi 225; GISEL-GFX10-NEXT: s_mov_b64 s[2:3], s[50:51] 226; GISEL-GFX10-NEXT: s_swappc_b64 s[30:31], s[24:25] 227; GISEL-GFX10-NEXT: s_endpgm 228; 229; DAGISEL-GFX11-LABEL: amdgpu_cs_chain_spill: 230; DAGISEL-GFX11: ; %bb.0: 231; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 232; DAGISEL-GFX11-NEXT: s_mov_b32 s32, 0 233; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v32, v15 :: v_dual_mov_b32 v33, v14 234; DAGISEL-GFX11-NEXT: s_add_i32 s24, s32, 60 235; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v16, s32 236; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v31, s24 237; DAGISEL-GFX11-NEXT: s_add_i32 s24, s32, 56 238; DAGISEL-GFX11-NEXT: s_add_i32 s25, s32, 52 239; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v30, s24 240; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v29, s25 241; DAGISEL-GFX11-NEXT: s_add_i32 s24, s32, 48 242; DAGISEL-GFX11-NEXT: s_add_i32 s25, s32, 44 243; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v28, s24 244; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v27, s25 245; DAGISEL-GFX11-NEXT: s_add_i32 s24, s32, 40 246; DAGISEL-GFX11-NEXT: s_add_i32 s25, s32, 36 247; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v26, s24 248; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v25, s25 249; DAGISEL-GFX11-NEXT: s_add_i32 s24, s32, 32 250; DAGISEL-GFX11-NEXT: s_add_i32 s25, s32, 28 251; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v24, s24 252; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v23, s25 253; DAGISEL-GFX11-NEXT: s_add_i32 s24, s32, 24 254; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v34, v13 :: v_dual_mov_b32 v35, v12 255; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v36, v11 :: v_dual_mov_b32 v37, v10 256; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v38, v9 :: v_dual_mov_b32 v39, v8 257; DAGISEL-GFX11-NEXT: s_add_i32 s25, s32, 20 258; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v22, s24 259; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v21, s25 260; DAGISEL-GFX11-NEXT: s_add_i32 s24, s32, 16 261; DAGISEL-GFX11-NEXT: s_add_i32 s25, s32, 12 262; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v20, s24 263; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v19, s25 264; DAGISEL-GFX11-NEXT: s_add_i32 s24, s32, 8 265; DAGISEL-GFX11-NEXT: s_add_i32 s25, s32, 4 266; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v18, s24 267; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v17, s25 268; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1 269; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s3 270; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s5 271; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v6, s6 :: v_dual_mov_b32 v7, s7 272; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v8, s8 :: v_dual_mov_b32 v9, s9 273; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v10, s10 :: v_dual_mov_b32 v11, s11 274; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v12, s12 :: v_dual_mov_b32 v13, s13 275; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v14, s14 :: v_dual_mov_b32 v15, s15 276; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v16, s16 :: v_dual_mov_b32 v17, s17 277; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v18, s18 :: v_dual_mov_b32 v19, s19 278; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v20, s20 :: v_dual_mov_b32 v21, s21 279; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v22, s22 :: v_dual_mov_b32 v23, s23 280; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v24, v39 :: v_dual_mov_b32 v25, v38 281; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v26, v37 :: v_dual_mov_b32 v27, v36 282; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v28, v35 :: v_dual_mov_b32 v29, v34 283; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v30, v33 :: v_dual_mov_b32 v31, v32 284; DAGISEL-GFX11-NEXT: s_mov_b32 s25, use@abs32@hi 285; DAGISEL-GFX11-NEXT: s_mov_b32 s24, use@abs32@lo 286; DAGISEL-GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 287; DAGISEL-GFX11-NEXT: s_swappc_b64 s[30:31], s[24:25] 288; DAGISEL-GFX11-NEXT: s_endpgm 289; 290; DAGISEL-GFX10-LABEL: amdgpu_cs_chain_spill: 291; DAGISEL-GFX10: ; %bb.0: 292; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 293; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v32, v15 294; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v33, v14 295; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v34, v13 296; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v35, v12 297; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v36, v11 298; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v37, v10 299; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v38, v9 300; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v39, v8 301; DAGISEL-GFX10-NEXT: s_mov_b32 s32, 0 302; DAGISEL-GFX10-NEXT: buffer_store_dword v16, off, s[48:51], s32 303; DAGISEL-GFX10-NEXT: buffer_store_dword v17, off, s[48:51], s32 offset:4 304; DAGISEL-GFX10-NEXT: buffer_store_dword v18, off, s[48:51], s32 offset:8 305; DAGISEL-GFX10-NEXT: buffer_store_dword v19, off, s[48:51], s32 offset:12 306; DAGISEL-GFX10-NEXT: buffer_store_dword v20, off, s[48:51], s32 offset:16 307; DAGISEL-GFX10-NEXT: buffer_store_dword v21, off, s[48:51], s32 offset:20 308; DAGISEL-GFX10-NEXT: buffer_store_dword v22, off, s[48:51], s32 offset:24 309; DAGISEL-GFX10-NEXT: buffer_store_dword v23, off, s[48:51], s32 offset:28 310; DAGISEL-GFX10-NEXT: buffer_store_dword v24, off, s[48:51], s32 offset:32 311; DAGISEL-GFX10-NEXT: buffer_store_dword v25, off, s[48:51], s32 offset:36 312; DAGISEL-GFX10-NEXT: buffer_store_dword v26, off, s[48:51], s32 offset:40 313; DAGISEL-GFX10-NEXT: buffer_store_dword v27, off, s[48:51], s32 offset:44 314; DAGISEL-GFX10-NEXT: buffer_store_dword v28, off, s[48:51], s32 offset:48 315; DAGISEL-GFX10-NEXT: buffer_store_dword v29, off, s[48:51], s32 offset:52 316; DAGISEL-GFX10-NEXT: buffer_store_dword v30, off, s[48:51], s32 offset:56 317; DAGISEL-GFX10-NEXT: buffer_store_dword v31, off, s[48:51], s32 offset:60 318; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v0, s0 319; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v1, s1 320; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v2, s2 321; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v3, s3 322; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v4, s4 323; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v5, s5 324; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v6, s6 325; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v7, s7 326; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, s8 327; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v9, s9 328; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v10, s10 329; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v11, s11 330; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v12, s12 331; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v13, s13 332; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v14, s14 333; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v15, s15 334; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v16, s16 335; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v17, s17 336; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v18, s18 337; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v19, s19 338; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v20, s20 339; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v21, s21 340; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v22, s22 341; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v23, s23 342; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v24, v39 343; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v25, v38 344; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v26, v37 345; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v27, v36 346; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v28, v35 347; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v29, v34 348; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v30, v33 349; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v31, v32 350; DAGISEL-GFX10-NEXT: s_mov_b64 s[0:1], s[48:49] 351; DAGISEL-GFX10-NEXT: s_mov_b32 s25, use@abs32@hi 352; DAGISEL-GFX10-NEXT: s_mov_b32 s24, use@abs32@lo 353; DAGISEL-GFX10-NEXT: s_mov_b64 s[2:3], s[50:51] 354; DAGISEL-GFX10-NEXT: s_swappc_b64 s[30:31], s[24:25] 355; DAGISEL-GFX10-NEXT: s_endpgm 356 call amdgpu_gfx void @use(<24 x i32> %sgprs, <24 x i32> %vgprs) 357 ret void 358} 359 360define amdgpu_cs_chain void @alloca_and_call() { 361; GISEL-GFX11-LABEL: alloca_and_call: 362; GISEL-GFX11: ; %bb.0: ; %.entry 363; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 364; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, 42 365; GISEL-GFX11-NEXT: s_mov_b32 s0, use@abs32@lo 366; GISEL-GFX11-NEXT: s_mov_b32 s1, use@abs32@hi 367; GISEL-GFX11-NEXT: s_mov_b32 s32, 16 368; GISEL-GFX11-NEXT: scratch_store_b32 off, v0, off 369; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, 0 370; GISEL-GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 371; GISEL-GFX11-NEXT: s_endpgm 372; 373; GISEL-GFX10-LABEL: alloca_and_call: 374; GISEL-GFX10: ; %bb.0: ; %.entry 375; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 376; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, 42 377; GISEL-GFX10-NEXT: s_mov_b64 s[0:1], s[48:49] 378; GISEL-GFX10-NEXT: s_mov_b32 s4, use@abs32@lo 379; GISEL-GFX10-NEXT: s_mov_b32 s5, use@abs32@hi 380; GISEL-GFX10-NEXT: s_mov_b64 s[2:3], s[50:51] 381; GISEL-GFX10-NEXT: buffer_store_dword v0, off, s[48:51], 0 382; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, 0 383; GISEL-GFX10-NEXT: s_movk_i32 s32, 0x200 384; GISEL-GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] 385; GISEL-GFX10-NEXT: s_endpgm 386; 387; DAGISEL-GFX11-LABEL: alloca_and_call: 388; DAGISEL-GFX11: ; %bb.0: ; %.entry 389; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 390; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v0, 42 391; DAGISEL-GFX11-NEXT: s_mov_b32 s1, use@abs32@hi 392; DAGISEL-GFX11-NEXT: s_mov_b32 s0, use@abs32@lo 393; DAGISEL-GFX11-NEXT: s_mov_b32 s32, 16 394; DAGISEL-GFX11-NEXT: scratch_store_b32 off, v0, off 395; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v0, 0 396; DAGISEL-GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] 397; DAGISEL-GFX11-NEXT: s_endpgm 398; 399; DAGISEL-GFX10-LABEL: alloca_and_call: 400; DAGISEL-GFX10: ; %bb.0: ; %.entry 401; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 402; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v0, 42 403; DAGISEL-GFX10-NEXT: s_mov_b64 s[0:1], s[48:49] 404; DAGISEL-GFX10-NEXT: s_mov_b32 s5, use@abs32@hi 405; DAGISEL-GFX10-NEXT: s_mov_b32 s4, use@abs32@lo 406; DAGISEL-GFX10-NEXT: s_mov_b64 s[2:3], s[50:51] 407; DAGISEL-GFX10-NEXT: buffer_store_dword v0, off, s[48:51], 0 408; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v0, 0 409; DAGISEL-GFX10-NEXT: s_movk_i32 s32, 0x200 410; DAGISEL-GFX10-NEXT: s_swappc_b64 s[30:31], s[4:5] 411; DAGISEL-GFX10-NEXT: s_endpgm 412.entry: 413 %v = alloca [3 x i32], addrspace(5) 414 store i32 42, ptr addrspace(5) %v 415 call amdgpu_gfx void @use(ptr addrspace(5) %v) 416 ret void 417} 418 419define amdgpu_cs void @cs_to_chain(<3 x i32> inreg %a, <3 x i32> %b) { 420; GISEL-GFX11-LABEL: cs_to_chain: 421; GISEL-GFX11: ; %bb.0: 422; GISEL-GFX11-NEXT: v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v10, v2 423; GISEL-GFX11-NEXT: s_mov_b32 s3, s0 424; GISEL-GFX11-NEXT: ;;#ASMSTART 425; GISEL-GFX11-NEXT: s_nop 426; GISEL-GFX11-NEXT: ;;#ASMEND 427; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo 428; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 429; GISEL-GFX11-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v1 430; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi 431; GISEL-GFX11-NEXT: s_mov_b32 s0, s3 432; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 433; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5] 434; 435; GISEL-GFX10-LABEL: cs_to_chain: 436; GISEL-GFX10: ; %bb.0: 437; GISEL-GFX10-NEXT: s_getpc_b64 s[100:101] 438; GISEL-GFX10-NEXT: s_mov_b32 s100, s0 439; GISEL-GFX10-NEXT: v_mov_b32_e32 v3, v0 440; GISEL-GFX10-NEXT: s_load_dwordx4 s[100:103], s[100:101], 0x10 441; GISEL-GFX10-NEXT: v_mov_b32_e32 v9, v1 442; GISEL-GFX10-NEXT: v_mov_b32_e32 v10, v2 443; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo 444; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi 445; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 446; GISEL-GFX10-NEXT: s_bitset0_b32 s103, 21 447; GISEL-GFX10-NEXT: s_add_u32 s100, s100, s3 448; GISEL-GFX10-NEXT: s_mov_b32 s3, s0 449; GISEL-GFX10-NEXT: ;;#ASMSTART 450; GISEL-GFX10-NEXT: s_nop 451; GISEL-GFX10-NEXT: ;;#ASMEND 452; GISEL-GFX10-NEXT: s_addc_u32 s101, s101, 0 453; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v3 454; GISEL-GFX10-NEXT: s_mov_b64 s[48:49], s[100:101] 455; GISEL-GFX10-NEXT: s_mov_b32 s0, s3 456; GISEL-GFX10-NEXT: s_mov_b64 s[50:51], s[102:103] 457; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 458; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5] 459; 460; DAGISEL-GFX11-LABEL: cs_to_chain: 461; DAGISEL-GFX11: ; %bb.0: 462; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v10, v2 463; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0 464; DAGISEL-GFX11-NEXT: ;;#ASMSTART 465; DAGISEL-GFX11-NEXT: s_nop 466; DAGISEL-GFX11-NEXT: ;;#ASMEND 467; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi 468; DAGISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 469; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v1 470; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo 471; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s3 472; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 473; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5] 474; 475; DAGISEL-GFX10-LABEL: cs_to_chain: 476; DAGISEL-GFX10: ; %bb.0: 477; DAGISEL-GFX10-NEXT: s_getpc_b64 s[100:101] 478; DAGISEL-GFX10-NEXT: s_mov_b32 s100, s0 479; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v3, v0 480; DAGISEL-GFX10-NEXT: s_load_dwordx4 s[100:103], s[100:101], 0x10 481; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v9, v1 482; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v10, v2 483; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi 484; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo 485; DAGISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 486; DAGISEL-GFX10-NEXT: s_bitset0_b32 s103, 21 487; DAGISEL-GFX10-NEXT: s_add_u32 s100, s100, s3 488; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0 489; DAGISEL-GFX10-NEXT: ;;#ASMSTART 490; DAGISEL-GFX10-NEXT: s_nop 491; DAGISEL-GFX10-NEXT: ;;#ASMEND 492; DAGISEL-GFX10-NEXT: s_addc_u32 s101, s101, 0 493; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v3 494; DAGISEL-GFX10-NEXT: s_mov_b64 s[48:49], s[100:101] 495; DAGISEL-GFX10-NEXT: s_mov_b64 s[50:51], s[102:103] 496; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3 497; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 498; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5] 499 call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"() 500 call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0) 501 unreachable 502} 503 504; Chain call with SGPR arguments that we cannot prove are uniform. 505define amdgpu_cs void @cs_to_chain_nonuniform(<3 x i32> %a, <3 x i32> %b) { 506; GISEL-GFX11-LABEL: cs_to_chain_nonuniform: 507; GISEL-GFX11: ; %bb.0: 508; GISEL-GFX11-NEXT: v_readfirstlane_b32 s0, v0 509; GISEL-GFX11-NEXT: v_readfirstlane_b32 s1, v1 510; GISEL-GFX11-NEXT: v_readfirstlane_b32 s2, v2 511; GISEL-GFX11-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v4 512; GISEL-GFX11-NEXT: v_mov_b32_e32 v10, v5 513; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo 514; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi 515; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 516; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5] 517; 518; GISEL-GFX10-LABEL: cs_to_chain_nonuniform: 519; GISEL-GFX10: ; %bb.0: 520; GISEL-GFX10-NEXT: s_getpc_b64 s[100:101] 521; GISEL-GFX10-NEXT: s_mov_b32 s100, s0 522; GISEL-GFX10-NEXT: v_readfirstlane_b32 s1, v1 523; GISEL-GFX10-NEXT: s_load_dwordx4 s[100:103], s[100:101], 0x10 524; GISEL-GFX10-NEXT: v_readfirstlane_b32 s2, v2 525; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v3 526; GISEL-GFX10-NEXT: v_mov_b32_e32 v9, v4 527; GISEL-GFX10-NEXT: v_mov_b32_e32 v10, v5 528; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo 529; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi 530; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 531; GISEL-GFX10-NEXT: s_bitset0_b32 s103, 21 532; GISEL-GFX10-NEXT: s_add_u32 s100, s100, s0 533; GISEL-GFX10-NEXT: s_addc_u32 s101, s101, 0 534; GISEL-GFX10-NEXT: v_readfirstlane_b32 s0, v0 535; GISEL-GFX10-NEXT: s_mov_b64 s[48:49], s[100:101] 536; GISEL-GFX10-NEXT: s_mov_b64 s[50:51], s[102:103] 537; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 538; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5] 539; 540; DAGISEL-GFX11-LABEL: cs_to_chain_nonuniform: 541; DAGISEL-GFX11: ; %bb.0: 542; DAGISEL-GFX11-NEXT: v_readfirstlane_b32 s0, v0 543; DAGISEL-GFX11-NEXT: v_readfirstlane_b32 s1, v1 544; DAGISEL-GFX11-NEXT: v_readfirstlane_b32 s2, v2 545; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v8, v3 :: v_dual_mov_b32 v9, v4 546; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v10, v5 547; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi 548; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo 549; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 550; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5] 551; 552; DAGISEL-GFX10-LABEL: cs_to_chain_nonuniform: 553; DAGISEL-GFX10: ; %bb.0: 554; DAGISEL-GFX10-NEXT: s_getpc_b64 s[100:101] 555; DAGISEL-GFX10-NEXT: s_mov_b32 s100, s0 556; DAGISEL-GFX10-NEXT: v_readfirstlane_b32 s1, v1 557; DAGISEL-GFX10-NEXT: s_load_dwordx4 s[100:103], s[100:101], 0x10 558; DAGISEL-GFX10-NEXT: v_readfirstlane_b32 s2, v2 559; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v3 560; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v9, v4 561; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v10, v5 562; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi 563; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo 564; DAGISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 565; DAGISEL-GFX10-NEXT: s_bitset0_b32 s103, 21 566; DAGISEL-GFX10-NEXT: s_add_u32 s100, s100, s0 567; DAGISEL-GFX10-NEXT: s_addc_u32 s101, s101, 0 568; DAGISEL-GFX10-NEXT: v_readfirstlane_b32 s0, v0 569; DAGISEL-GFX10-NEXT: s_mov_b64 s[48:49], s[100:101] 570; DAGISEL-GFX10-NEXT: s_mov_b64 s[50:51], s[102:103] 571; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 572; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5] 573 call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0) 574 unreachable 575} 576 577define amdgpu_cs_chain void @chain_to_chain(<3 x i32> inreg %a, <3 x i32> %b) { 578; GISEL-GFX11-LABEL: chain_to_chain: 579; GISEL-GFX11: ; %bb.0: 580; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 581; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8 582; GISEL-GFX11-NEXT: s_mov_b32 s3, s0 583; GISEL-GFX11-NEXT: ;;#ASMSTART 584; GISEL-GFX11-NEXT: s_nop 585; GISEL-GFX11-NEXT: ;;#ASMEND 586; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo 587; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi 588; GISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1 589; GISEL-GFX11-NEXT: s_mov_b32 s0, s3 590; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 591; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5] 592; 593; GISEL-GFX10-LABEL: chain_to_chain: 594; GISEL-GFX10: ; %bb.0: 595; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 596; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8 597; GISEL-GFX10-NEXT: s_mov_b32 s3, s0 598; GISEL-GFX10-NEXT: ;;#ASMSTART 599; GISEL-GFX10-NEXT: s_nop 600; GISEL-GFX10-NEXT: ;;#ASMEND 601; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo 602; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi 603; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1 604; GISEL-GFX10-NEXT: s_mov_b32 s0, s3 605; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 606; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5] 607; 608; DAGISEL-GFX11-LABEL: chain_to_chain: 609; DAGISEL-GFX11: ; %bb.0: 610; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 611; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8 612; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0 613; DAGISEL-GFX11-NEXT: ;;#ASMSTART 614; DAGISEL-GFX11-NEXT: s_nop 615; DAGISEL-GFX11-NEXT: ;;#ASMEND 616; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi 617; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo 618; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1 619; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s3 620; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 621; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5] 622; 623; DAGISEL-GFX10-LABEL: chain_to_chain: 624; DAGISEL-GFX10: ; %bb.0: 625; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 626; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8 627; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0 628; DAGISEL-GFX10-NEXT: ;;#ASMSTART 629; DAGISEL-GFX10-NEXT: s_nop 630; DAGISEL-GFX10-NEXT: ;;#ASMEND 631; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi 632; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo 633; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1 634; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3 635; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 636; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5] 637 call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"() 638 call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0) 639 unreachable 640} 641 642define amdgpu_cs_chain void @chain_to_chain_wwm(<3 x i32> inreg %a, <3 x i32> %b) { 643; GISEL-GFX11-LABEL: chain_to_chain_wwm: 644; GISEL-GFX11: ; %bb.0: 645; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 646; GISEL-GFX11-NEXT: s_mov_b32 s3, s0 647; GISEL-GFX11-NEXT: s_or_saveexec_b32 s0, -1 648; GISEL-GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 649; GISEL-GFX11-NEXT: v_cndmask_b32_e64 v1, 4, 3, s0 650; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, s0 651; GISEL-GFX11-NEXT: v_mov_b32_e32 v2, v1 652; GISEL-GFX11-NEXT: ;;#ASMSTART 653; GISEL-GFX11-NEXT: s_nop 654; GISEL-GFX11-NEXT: ;;#ASMEND 655; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo 656; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi 657; GISEL-GFX11-NEXT: s_mov_b32 s0, s3 658; GISEL-GFX11-NEXT: v_mov_b32_e32 v8, v2 659; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 660; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5] 661; 662; GISEL-GFX10-LABEL: chain_to_chain_wwm: 663; GISEL-GFX10: ; %bb.0: 664; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 665; GISEL-GFX10-NEXT: s_mov_b32 s3, s0 666; GISEL-GFX10-NEXT: s_or_saveexec_b32 s0, -1 667; GISEL-GFX10-NEXT: v_cndmask_b32_e64 v1, 4, 3, s0 668; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, s0 669; GISEL-GFX10-NEXT: v_mov_b32_e32 v2, v1 670; GISEL-GFX10-NEXT: ;;#ASMSTART 671; GISEL-GFX10-NEXT: s_nop 672; GISEL-GFX10-NEXT: ;;#ASMEND 673; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo 674; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi 675; GISEL-GFX10-NEXT: s_mov_b32 s0, s3 676; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v2 677; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 678; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5] 679; 680; DAGISEL-GFX11-LABEL: chain_to_chain_wwm: 681; DAGISEL-GFX11: ; %bb.0: 682; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 683; DAGISEL-GFX11-NEXT: s_or_saveexec_b32 s4, -1 684; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0 685; DAGISEL-GFX11-NEXT: v_cndmask_b32_e64 v1, 4, 3, s4 686; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, s4 687; DAGISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 688; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v2, v1 689; DAGISEL-GFX11-NEXT: ;;#ASMSTART 690; DAGISEL-GFX11-NEXT: s_nop 691; DAGISEL-GFX11-NEXT: ;;#ASMEND 692; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi 693; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo 694; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s3 695; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v8, v2 696; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 697; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5] 698; 699; DAGISEL-GFX10-LABEL: chain_to_chain_wwm: 700; DAGISEL-GFX10: ; %bb.0: 701; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 702; DAGISEL-GFX10-NEXT: s_or_saveexec_b32 s4, -1 703; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0 704; DAGISEL-GFX10-NEXT: v_cndmask_b32_e64 v1, 4, 3, s4 705; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, s4 706; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v2, v1 707; DAGISEL-GFX10-NEXT: ;;#ASMSTART 708; DAGISEL-GFX10-NEXT: s_nop 709; DAGISEL-GFX10-NEXT: ;;#ASMEND 710; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi 711; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo 712; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3 713; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v2 714; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 715; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5] 716 %i = call i32 @llvm.amdgcn.set.inactive(i32 3, i32 4) 717 call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"() 718 %w = call i32 @llvm.amdgcn.wwm(i32 %i) 719 %c = insertelement <3 x i32> %b, i32 %w, i32 0 720 call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %c, i32 0) 721 unreachable 722} 723 724define amdgpu_cs_chain void @chain_to_chain_use_all_v0_v7(<3 x i32> inreg %a, <3 x i32> %b) { 725; GISEL-GFX11-LABEL: chain_to_chain_use_all_v0_v7: 726; GISEL-GFX11: ; %bb.0: 727; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 728; GISEL-GFX11-NEXT: v_mov_b32_e32 v11, v8 729; GISEL-GFX11-NEXT: s_mov_b32 s3, s0 730; GISEL-GFX11-NEXT: ;;#ASMSTART 731; GISEL-GFX11-NEXT: s_nop 732; GISEL-GFX11-NEXT: ;;#ASMEND 733; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo 734; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi 735; GISEL-GFX11-NEXT: v_mov_b32_e32 v8, v11 736; GISEL-GFX11-NEXT: s_mov_b32 s0, s3 737; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 738; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5] 739; 740; GISEL-GFX10-LABEL: chain_to_chain_use_all_v0_v7: 741; GISEL-GFX10: ; %bb.0: 742; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 743; GISEL-GFX10-NEXT: v_mov_b32_e32 v11, v8 744; GISEL-GFX10-NEXT: s_mov_b32 s3, s0 745; GISEL-GFX10-NEXT: ;;#ASMSTART 746; GISEL-GFX10-NEXT: s_nop 747; GISEL-GFX10-NEXT: ;;#ASMEND 748; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo 749; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi 750; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v11 751; GISEL-GFX10-NEXT: s_mov_b32 s0, s3 752; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 753; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5] 754; 755; DAGISEL-GFX11-LABEL: chain_to_chain_use_all_v0_v7: 756; DAGISEL-GFX11: ; %bb.0: 757; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 758; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v11, v8 759; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0 760; DAGISEL-GFX11-NEXT: ;;#ASMSTART 761; DAGISEL-GFX11-NEXT: s_nop 762; DAGISEL-GFX11-NEXT: ;;#ASMEND 763; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee@abs32@hi 764; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee@abs32@lo 765; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v8, v11 766; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s3 767; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 768; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5] 769; 770; DAGISEL-GFX10-LABEL: chain_to_chain_use_all_v0_v7: 771; DAGISEL-GFX10: ; %bb.0: 772; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 773; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v11, v8 774; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0 775; DAGISEL-GFX10-NEXT: ;;#ASMSTART 776; DAGISEL-GFX10-NEXT: s_nop 777; DAGISEL-GFX10-NEXT: ;;#ASMEND 778; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee@abs32@hi 779; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee@abs32@lo 780; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v11 781; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3 782; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 783; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5] 784 call void asm "s_nop", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7},~{v8},~{v16},~{s0}"() 785 call void(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v3i32(ptr @chain_callee, i32 -1, <3 x i32> inreg %a, <3 x i32> %b, i32 0) 786 unreachable 787} 788 789define amdgpu_cs_chain void @chain_to_chain_fewer_args(<3 x i32> inreg %a, <3 x i32> %b) { 790; GISEL-GFX11-LABEL: chain_to_chain_fewer_args: 791; GISEL-GFX11: ; %bb.0: 792; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 793; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8 794; GISEL-GFX11-NEXT: s_mov_b32 s2, s0 795; GISEL-GFX11-NEXT: ;;#ASMSTART 796; GISEL-GFX11-NEXT: s_nop 797; GISEL-GFX11-NEXT: ;;#ASMEND 798; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee_2@abs32@lo 799; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee_2@abs32@hi 800; GISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1 801; GISEL-GFX11-NEXT: s_mov_b32 s0, s2 802; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 803; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5] 804; 805; GISEL-GFX10-LABEL: chain_to_chain_fewer_args: 806; GISEL-GFX10: ; %bb.0: 807; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 808; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8 809; GISEL-GFX10-NEXT: s_mov_b32 s2, s0 810; GISEL-GFX10-NEXT: ;;#ASMSTART 811; GISEL-GFX10-NEXT: s_nop 812; GISEL-GFX10-NEXT: ;;#ASMEND 813; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee_2@abs32@lo 814; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee_2@abs32@hi 815; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1 816; GISEL-GFX10-NEXT: s_mov_b32 s0, s2 817; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 818; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5] 819; 820; DAGISEL-GFX11-LABEL: chain_to_chain_fewer_args: 821; DAGISEL-GFX11: ; %bb.0: 822; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 823; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8 824; DAGISEL-GFX11-NEXT: s_mov_b32 s2, s0 825; DAGISEL-GFX11-NEXT: ;;#ASMSTART 826; DAGISEL-GFX11-NEXT: s_nop 827; DAGISEL-GFX11-NEXT: ;;#ASMEND 828; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee_2@abs32@hi 829; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee_2@abs32@lo 830; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v8, v1 831; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s2 832; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 833; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5] 834; 835; DAGISEL-GFX10-LABEL: chain_to_chain_fewer_args: 836; DAGISEL-GFX10: ; %bb.0: 837; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 838; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8 839; DAGISEL-GFX10-NEXT: s_mov_b32 s2, s0 840; DAGISEL-GFX10-NEXT: ;;#ASMSTART 841; DAGISEL-GFX10-NEXT: s_nop 842; DAGISEL-GFX10-NEXT: ;;#ASMEND 843; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee_2@abs32@hi 844; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee_2@abs32@lo 845; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1 846; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s2 847; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 848; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5] 849 %s = shufflevector <3 x i32> %a, <3 x i32> zeroinitializer, <2 x i32> <i32 0, i32 1> 850 %v = shufflevector <3 x i32> %b, <3 x i32> zeroinitializer, <2 x i32> <i32 0, i32 1> 851 call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"() 852 call void(ptr, i32, <2 x i32>, <2 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v2i32(ptr @chain_callee_2, i32 -1, <2 x i32> inreg %s, <2 x i32> %v, i32 0) 853 unreachable 854} 855 856define amdgpu_cs_chain void @chain_to_chain_more_args(<3 x i32> inreg %a, <3 x i32> %b) { 857; GISEL-GFX11-LABEL: chain_to_chain_more_args: 858; GISEL-GFX11: ; %bb.0: 859; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 860; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8 861; GISEL-GFX11-NEXT: s_mov_b32 s3, s0 862; GISEL-GFX11-NEXT: ;;#ASMSTART 863; GISEL-GFX11-NEXT: s_nop 864; GISEL-GFX11-NEXT: ;;#ASMEND 865; GISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee_2@abs32@lo 866; GISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee_2@abs32@hi 867; GISEL-GFX11-NEXT: v_dual_mov_b32 v8, v1 :: v_dual_mov_b32 v11, 0 868; GISEL-GFX11-NEXT: s_mov_b32 s0, s3 869; GISEL-GFX11-NEXT: s_mov_b32 s3, 0 870; GISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 871; GISEL-GFX11-NEXT: s_setpc_b64 s[4:5] 872; 873; GISEL-GFX10-LABEL: chain_to_chain_more_args: 874; GISEL-GFX10: ; %bb.0: 875; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 876; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8 877; GISEL-GFX10-NEXT: s_mov_b32 s3, s0 878; GISEL-GFX10-NEXT: ;;#ASMSTART 879; GISEL-GFX10-NEXT: s_nop 880; GISEL-GFX10-NEXT: ;;#ASMEND 881; GISEL-GFX10-NEXT: v_mov_b32_e32 v11, 0 882; GISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee_2@abs32@lo 883; GISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1 884; GISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee_2@abs32@hi 885; GISEL-GFX10-NEXT: s_mov_b32 s0, s3 886; GISEL-GFX10-NEXT: s_mov_b32 s3, 0 887; GISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 888; GISEL-GFX10-NEXT: s_setpc_b64 s[4:5] 889; 890; DAGISEL-GFX11-LABEL: chain_to_chain_more_args: 891; DAGISEL-GFX11: ; %bb.0: 892; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 893; DAGISEL-GFX11-NEXT: v_mov_b32_e32 v1, v8 894; DAGISEL-GFX11-NEXT: s_mov_b32 s3, s0 895; DAGISEL-GFX11-NEXT: ;;#ASMSTART 896; DAGISEL-GFX11-NEXT: s_nop 897; DAGISEL-GFX11-NEXT: ;;#ASMEND 898; DAGISEL-GFX11-NEXT: s_mov_b32 s5, chain_callee_2@abs32@hi 899; DAGISEL-GFX11-NEXT: s_mov_b32 s4, chain_callee_2@abs32@lo 900; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v8, v1 :: v_dual_mov_b32 v11, 0 901; DAGISEL-GFX11-NEXT: s_mov_b32 s0, s3 902; DAGISEL-GFX11-NEXT: s_mov_b32 s3, 0 903; DAGISEL-GFX11-NEXT: s_mov_b32 exec_lo, -1 904; DAGISEL-GFX11-NEXT: s_setpc_b64 s[4:5] 905; 906; DAGISEL-GFX10-LABEL: chain_to_chain_more_args: 907; DAGISEL-GFX10: ; %bb.0: 908; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 909; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v1, v8 910; DAGISEL-GFX10-NEXT: s_mov_b32 s3, s0 911; DAGISEL-GFX10-NEXT: ;;#ASMSTART 912; DAGISEL-GFX10-NEXT: s_nop 913; DAGISEL-GFX10-NEXT: ;;#ASMEND 914; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v11, 0 915; DAGISEL-GFX10-NEXT: s_mov_b32 s5, chain_callee_2@abs32@hi 916; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v8, v1 917; DAGISEL-GFX10-NEXT: s_mov_b32 s4, chain_callee_2@abs32@lo 918; DAGISEL-GFX10-NEXT: s_mov_b32 s0, s3 919; DAGISEL-GFX10-NEXT: s_mov_b32 s3, 0 920; DAGISEL-GFX10-NEXT: s_mov_b32 exec_lo, -1 921; DAGISEL-GFX10-NEXT: s_setpc_b64 s[4:5] 922 %s = shufflevector <3 x i32> %a, <3 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 923 %v = shufflevector <3 x i32> %b, <3 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 924 call void asm "s_nop", "~{v0},~{v8},~{v16},~{s0}"() 925 call void(ptr, i32, <4 x i32>, <4 x i32>, i32, ...) @llvm.amdgcn.cs.chain.v4i32(ptr @chain_callee_2, i32 -1, <4 x i32> inreg %s, <4 x i32> %v, i32 0) 926 unreachable 927} 928 929define amdgpu_cs_chain void @amdgpu_cs_chain_dont_realign_stack(i32 %idx) { 930; GISEL-GFX11-LABEL: amdgpu_cs_chain_dont_realign_stack: 931; GISEL-GFX11: ; %bb.0: 932; GISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 933; GISEL-GFX11-NEXT: s_mov_b32 s3, 4 934; GISEL-GFX11-NEXT: s_mov_b32 s2, 3 935; GISEL-GFX11-NEXT: s_mov_b32 s1, 2 936; GISEL-GFX11-NEXT: s_mov_b32 s0, 1 937; GISEL-GFX11-NEXT: v_lshlrev_b32_e32 v0, 4, v8 938; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 939; GISEL-GFX11-NEXT: v_mov_b32_e32 v4, v0 940; GISEL-GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, s3 941; GISEL-GFX11-NEXT: v_dual_mov_b32 v1, s1 :: v_dual_mov_b32 v2, s2 942; GISEL-GFX11-NEXT: scratch_store_b128 v4, v[0:3], off dlc 943; GISEL-GFX11-NEXT: s_waitcnt_vscnt null, 0x0 944; GISEL-GFX11-NEXT: s_endpgm 945; 946; GISEL-GFX10-LABEL: amdgpu_cs_chain_dont_realign_stack: 947; GISEL-GFX10: ; %bb.0: 948; GISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 949; GISEL-GFX10-NEXT: v_lshlrev_b32_e32 v0, 4, v8 950; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 1 951; GISEL-GFX10-NEXT: v_mov_b32_e32 v2, 2 952; GISEL-GFX10-NEXT: v_mov_b32_e32 v3, 3 953; GISEL-GFX10-NEXT: v_mov_b32_e32 v4, 4 954; GISEL-GFX10-NEXT: buffer_store_dword v1, v0, s[48:51], 0 offen 955; GISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0 956; GISEL-GFX10-NEXT: buffer_store_dword v2, v0, s[48:51], 0 offen offset:4 957; GISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0 958; GISEL-GFX10-NEXT: buffer_store_dword v3, v0, s[48:51], 0 offen offset:8 959; GISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0 960; GISEL-GFX10-NEXT: buffer_store_dword v4, v0, s[48:51], 0 offen offset:12 961; GISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0 962; GISEL-GFX10-NEXT: s_endpgm 963; 964; DAGISEL-GFX11-LABEL: amdgpu_cs_chain_dont_realign_stack: 965; DAGISEL-GFX11: ; %bb.0: 966; DAGISEL-GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 967; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, 2 968; DAGISEL-GFX11-NEXT: v_dual_mov_b32 v2, 3 :: v_dual_mov_b32 v3, 4 969; DAGISEL-GFX11-NEXT: v_lshl_add_u32 v4, v8, 4, 0 970; DAGISEL-GFX11-NEXT: scratch_store_b128 v4, v[0:3], off dlc 971; DAGISEL-GFX11-NEXT: s_waitcnt_vscnt null, 0x0 972; DAGISEL-GFX11-NEXT: s_endpgm 973; 974; DAGISEL-GFX10-LABEL: amdgpu_cs_chain_dont_realign_stack: 975; DAGISEL-GFX10: ; %bb.0: 976; DAGISEL-GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 977; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v0, 4 978; DAGISEL-GFX10-NEXT: v_lshl_add_u32 v1, v8, 4, 0 979; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v2, 3 980; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v3, 2 981; DAGISEL-GFX10-NEXT: v_mov_b32_e32 v4, 1 982; DAGISEL-GFX10-NEXT: buffer_store_dword v0, v1, s[48:51], 0 offen offset:12 983; DAGISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0 984; DAGISEL-GFX10-NEXT: buffer_store_dword v2, v1, s[48:51], 0 offen offset:8 985; DAGISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0 986; DAGISEL-GFX10-NEXT: buffer_store_dword v3, v1, s[48:51], 0 offen offset:4 987; DAGISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0 988; DAGISEL-GFX10-NEXT: buffer_store_dword v4, v1, s[48:51], 0 offen 989; DAGISEL-GFX10-NEXT: s_waitcnt_vscnt null, 0x0 990; DAGISEL-GFX10-NEXT: s_endpgm 991 %alloca.align32 = alloca [8 x <4 x i32>], align 32, addrspace(5) 992 %gep0 = getelementptr inbounds [8 x <4 x i32>], ptr addrspace(5) %alloca.align32, i32 0, i32 %idx 993 store volatile <4 x i32> <i32 1, i32 2, i32 3, i32 4>, ptr addrspace(5) %gep0, align 32 994 ret void 995} 996 997declare void @llvm.amdgcn.cs.chain.v2i32(ptr, i32, <2 x i32>, <2 x i32>, i32, ...) 998declare void @llvm.amdgcn.cs.chain.v3i32(ptr, i32, <3 x i32>, <3 x i32>, i32, ...) 999declare void @llvm.amdgcn.cs.chain.v4i32(ptr, i32, <4 x i32>, <4 x i32>, i32, ...) 1000declare amdgpu_cs_chain void @chain_callee_2(<2 x i32> inreg, <2 x i32>) 1001declare amdgpu_cs_chain void @chain_callee(<3 x i32> inreg, <3 x i32>) 1002declare amdgpu_cs_chain void @chain_callee_4(<4 x i32> inreg, <4 x i32>) 1003declare i32 @llvm.amdgcn.set.inactive(i32, i32) 1004declare i32 @llvm.amdgcn.wwm(i32) 1005