1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel=1 -O2 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck --check-prefix=GISEL12 %s 3; RUN: llc -global-isel=0 -O2 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck --check-prefix=DAGISEL12 %s 4; RUN: llc -global-isel=1 -O2 -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck --check-prefix=GISEL10 %s 5; RUN: llc -global-isel=0 -O2 -mtriple=amdgcn -mcpu=gfx1030 -verify-machineinstrs < %s | FileCheck --check-prefix=DAGISEL10 %s 6 7define amdgpu_cs_chain void @basic(<3 x i32> inreg %sgpr, ptr inreg %callee, i32 inreg %exec, { i32, ptr addrspace(5), i32, i32 } %vgpr, i32 %x, i32 %y) { 8; GISEL12-LABEL: basic: 9; GISEL12: ; %bb.0: ; %entry 10; GISEL12-NEXT: s_wait_loadcnt_dscnt 0x0 11; GISEL12-NEXT: s_wait_expcnt 0x0 12; GISEL12-NEXT: s_wait_samplecnt 0x0 13; GISEL12-NEXT: s_wait_bvhcnt 0x0 14; GISEL12-NEXT: s_wait_kmcnt 0x0 15; GISEL12-NEXT: s_or_saveexec_b32 s8, -1 16; GISEL12-NEXT: s_mov_b32 s6, s3 17; GISEL12-NEXT: s_mov_b32 s7, s4 18; GISEL12-NEXT: s_wait_alu 0xfffe 19; GISEL12-NEXT: s_and_saveexec_b32 s3, s8 20; GISEL12-NEXT: ; %bb.1: ; %shader 21; GISEL12-NEXT: v_add_nc_u32_e32 v12, 42, v12 22; GISEL12-NEXT: v_add_nc_u32_e32 v8, 5, v8 23; GISEL12-NEXT: ; %bb.2: ; %tail 24; GISEL12-NEXT: s_wait_alu 0xfffe 25; GISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s3 26; GISEL12-NEXT: s_delay_alu instid0(VALU_DEP_2) 27; GISEL12-NEXT: v_add_nc_u32_e32 v11, 32, v12 28; GISEL12-NEXT: s_mov_b32 exec_lo, s5 29; GISEL12-NEXT: s_wait_alu 0xfffe 30; GISEL12-NEXT: s_setpc_b64 s[6:7] 31; 32; DAGISEL12-LABEL: basic: 33; DAGISEL12: ; %bb.0: ; %entry 34; DAGISEL12-NEXT: s_wait_loadcnt_dscnt 0x0 35; DAGISEL12-NEXT: s_wait_expcnt 0x0 36; DAGISEL12-NEXT: s_wait_samplecnt 0x0 37; DAGISEL12-NEXT: s_wait_bvhcnt 0x0 38; DAGISEL12-NEXT: s_wait_kmcnt 0x0 39; DAGISEL12-NEXT: s_or_saveexec_b32 s8, -1 40; DAGISEL12-NEXT: s_mov_b32 s7, s4 41; DAGISEL12-NEXT: s_mov_b32 s6, s3 42; DAGISEL12-NEXT: s_wait_alu 0xfffe 43; DAGISEL12-NEXT: s_and_saveexec_b32 s3, s8 44; DAGISEL12-NEXT: ; %bb.1: ; %shader 45; DAGISEL12-NEXT: v_add_nc_u32_e32 v12, 42, v12 46; DAGISEL12-NEXT: v_add_nc_u32_e32 v8, 5, v8 47; DAGISEL12-NEXT: ; %bb.2: ; %tail 48; DAGISEL12-NEXT: s_wait_alu 0xfffe 49; DAGISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s3 50; DAGISEL12-NEXT: s_delay_alu instid0(VALU_DEP_2) 51; DAGISEL12-NEXT: v_add_nc_u32_e32 v11, 32, v12 52; DAGISEL12-NEXT: s_mov_b32 exec_lo, s5 53; DAGISEL12-NEXT: s_wait_alu 0xfffe 54; DAGISEL12-NEXT: s_setpc_b64 s[6:7] 55; 56; GISEL10-LABEL: basic: 57; GISEL10: ; %bb.0: ; %entry 58; GISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 59; GISEL10-NEXT: s_or_saveexec_b32 s8, -1 60; GISEL10-NEXT: s_mov_b32 s6, s3 61; GISEL10-NEXT: s_mov_b32 s7, s4 62; GISEL10-NEXT: s_and_saveexec_b32 s3, s8 63; GISEL10-NEXT: ; %bb.1: ; %shader 64; GISEL10-NEXT: v_add_nc_u32_e32 v12, 42, v12 65; GISEL10-NEXT: v_add_nc_u32_e32 v8, 5, v8 66; GISEL10-NEXT: ; %bb.2: ; %tail 67; GISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s3 68; GISEL10-NEXT: v_add_nc_u32_e32 v11, 32, v12 69; GISEL10-NEXT: s_mov_b32 exec_lo, s5 70; GISEL10-NEXT: s_setpc_b64 s[6:7] 71; 72; DAGISEL10-LABEL: basic: 73; DAGISEL10: ; %bb.0: ; %entry 74; DAGISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 75; DAGISEL10-NEXT: s_or_saveexec_b32 s8, -1 76; DAGISEL10-NEXT: s_mov_b32 s7, s4 77; DAGISEL10-NEXT: s_mov_b32 s6, s3 78; DAGISEL10-NEXT: s_and_saveexec_b32 s3, s8 79; DAGISEL10-NEXT: ; %bb.1: ; %shader 80; DAGISEL10-NEXT: v_add_nc_u32_e32 v12, 42, v12 81; DAGISEL10-NEXT: v_add_nc_u32_e32 v8, 5, v8 82; DAGISEL10-NEXT: ; %bb.2: ; %tail 83; DAGISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s3 84; DAGISEL10-NEXT: v_add_nc_u32_e32 v11, 32, v12 85; DAGISEL10-NEXT: s_mov_b32 exec_lo, s5 86; DAGISEL10-NEXT: s_setpc_b64 s[6:7] 87entry: 88 %entry_exec = call i1 @llvm.amdgcn.init.whole.wave() 89 br i1 %entry_exec, label %shader, label %tail 90 91shader: 92 %newx = add i32 %x, 42 93 %oldval = extractvalue { i32, ptr addrspace(5), i32, i32 } %vgpr, 0 94 %newval = add i32 %oldval, 5 95 %newvgpr = insertvalue { i32, ptr addrspace(5), i32, i32 } %vgpr, i32 %newval, 0 96 97 br label %tail 98 99tail: 100 %full.x = phi i32 [%x, %entry], [%newx, %shader] 101 %full.vgpr = phi { i32, ptr addrspace(5), i32, i32 } [%vgpr, %entry], [%newvgpr, %shader] 102 %modified.x = add i32 %full.x, 32 103 %vgpr.args = insertvalue { i32, ptr addrspace(5), i32, i32 } %full.vgpr, i32 %modified.x, 3 104 call void(ptr, i32, <3 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain(ptr %callee, i32 %exec, <3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr.args, i32 0) 105 unreachable 106} 107 108define amdgpu_cs_chain void @wwm_in_shader(<3 x i32> inreg %sgpr, ptr inreg %callee, i32 inreg %exec, { i32, ptr addrspace(5), i32, i32 } %vgpr, i32 %x, i32 %y) { 109; GISEL12-LABEL: wwm_in_shader: 110; GISEL12: ; %bb.0: ; %entry 111; GISEL12-NEXT: s_wait_loadcnt_dscnt 0x0 112; GISEL12-NEXT: s_wait_expcnt 0x0 113; GISEL12-NEXT: s_wait_samplecnt 0x0 114; GISEL12-NEXT: s_wait_bvhcnt 0x0 115; GISEL12-NEXT: s_wait_kmcnt 0x0 116; GISEL12-NEXT: s_or_saveexec_b32 s8, -1 117; GISEL12-NEXT: v_dual_mov_b32 v10, v12 :: v_dual_mov_b32 v11, v13 118; GISEL12-NEXT: s_mov_b32 s6, s3 119; GISEL12-NEXT: s_mov_b32 s7, s4 120; GISEL12-NEXT: s_wait_alu 0xfffe 121; GISEL12-NEXT: s_and_saveexec_b32 s3, s8 122; GISEL12-NEXT: ; %bb.1: ; %shader 123; GISEL12-NEXT: s_or_saveexec_b32 s4, -1 124; GISEL12-NEXT: s_wait_alu 0xfffe 125; GISEL12-NEXT: v_cndmask_b32_e64 v0, 0x47, v10, s4 126; GISEL12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 127; GISEL12-NEXT: v_cmp_ne_u32_e64 s8, 0, v0 128; GISEL12-NEXT: v_mov_b32_e32 v0, s8 129; GISEL12-NEXT: s_mov_b32 exec_lo, s4 130; GISEL12-NEXT: s_delay_alu instid0(VALU_DEP_1) 131; GISEL12-NEXT: v_dual_mov_b32 v11, v0 :: v_dual_add_nc_u32 v10, 42, v10 132; GISEL12-NEXT: ; %bb.2: ; %tail 133; GISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s3 134; GISEL12-NEXT: s_mov_b32 exec_lo, s5 135; GISEL12-NEXT: s_wait_alu 0xfffe 136; GISEL12-NEXT: s_setpc_b64 s[6:7] 137; 138; DAGISEL12-LABEL: wwm_in_shader: 139; DAGISEL12: ; %bb.0: ; %entry 140; DAGISEL12-NEXT: s_wait_loadcnt_dscnt 0x0 141; DAGISEL12-NEXT: s_wait_expcnt 0x0 142; DAGISEL12-NEXT: s_wait_samplecnt 0x0 143; DAGISEL12-NEXT: s_wait_bvhcnt 0x0 144; DAGISEL12-NEXT: s_wait_kmcnt 0x0 145; DAGISEL12-NEXT: s_or_saveexec_b32 s8, -1 146; DAGISEL12-NEXT: v_dual_mov_b32 v11, v13 :: v_dual_mov_b32 v10, v12 147; DAGISEL12-NEXT: s_mov_b32 s7, s4 148; DAGISEL12-NEXT: s_mov_b32 s6, s3 149; DAGISEL12-NEXT: s_wait_alu 0xfffe 150; DAGISEL12-NEXT: s_and_saveexec_b32 s3, s8 151; DAGISEL12-NEXT: ; %bb.1: ; %shader 152; DAGISEL12-NEXT: s_or_saveexec_b32 s4, -1 153; DAGISEL12-NEXT: s_wait_alu 0xfffe 154; DAGISEL12-NEXT: v_cndmask_b32_e64 v0, 0x47, v10, s4 155; DAGISEL12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 156; DAGISEL12-NEXT: v_cmp_ne_u32_e64 s8, 0, v0 157; DAGISEL12-NEXT: s_mov_b32 exec_lo, s4 158; DAGISEL12-NEXT: v_dual_mov_b32 v11, s8 :: v_dual_add_nc_u32 v10, 42, v10 159; DAGISEL12-NEXT: ; %bb.2: ; %tail 160; DAGISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s3 161; DAGISEL12-NEXT: s_mov_b32 exec_lo, s5 162; DAGISEL12-NEXT: s_wait_alu 0xfffe 163; DAGISEL12-NEXT: s_setpc_b64 s[6:7] 164; 165; GISEL10-LABEL: wwm_in_shader: 166; GISEL10: ; %bb.0: ; %entry 167; GISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 168; GISEL10-NEXT: s_or_saveexec_b32 s8, -1 169; GISEL10-NEXT: v_mov_b32_e32 v10, v12 170; GISEL10-NEXT: v_mov_b32_e32 v11, v13 171; GISEL10-NEXT: s_mov_b32 s6, s3 172; GISEL10-NEXT: s_mov_b32 s7, s4 173; GISEL10-NEXT: s_and_saveexec_b32 s3, s8 174; GISEL10-NEXT: ; %bb.1: ; %shader 175; GISEL10-NEXT: s_or_saveexec_b32 s4, -1 176; GISEL10-NEXT: v_cndmask_b32_e64 v0, 0x47, v10, s4 177; GISEL10-NEXT: v_cmp_ne_u32_e64 s8, 0, v0 178; GISEL10-NEXT: v_mov_b32_e32 v0, s8 179; GISEL10-NEXT: s_mov_b32 exec_lo, s4 180; GISEL10-NEXT: v_add_nc_u32_e32 v10, 42, v10 181; GISEL10-NEXT: v_mov_b32_e32 v11, v0 182; GISEL10-NEXT: ; %bb.2: ; %tail 183; GISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s3 184; GISEL10-NEXT: s_mov_b32 exec_lo, s5 185; GISEL10-NEXT: s_setpc_b64 s[6:7] 186; 187; DAGISEL10-LABEL: wwm_in_shader: 188; DAGISEL10: ; %bb.0: ; %entry 189; DAGISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 190; DAGISEL10-NEXT: s_or_saveexec_b32 s8, -1 191; DAGISEL10-NEXT: v_mov_b32_e32 v11, v13 192; DAGISEL10-NEXT: v_mov_b32_e32 v10, v12 193; DAGISEL10-NEXT: s_mov_b32 s7, s4 194; DAGISEL10-NEXT: s_mov_b32 s6, s3 195; DAGISEL10-NEXT: s_and_saveexec_b32 s3, s8 196; DAGISEL10-NEXT: ; %bb.1: ; %shader 197; DAGISEL10-NEXT: s_or_saveexec_b32 s4, -1 198; DAGISEL10-NEXT: v_cndmask_b32_e64 v0, 0x47, v10, s4 199; DAGISEL10-NEXT: v_cmp_ne_u32_e64 s8, 0, v0 200; DAGISEL10-NEXT: s_mov_b32 exec_lo, s4 201; DAGISEL10-NEXT: v_add_nc_u32_e32 v10, 42, v10 202; DAGISEL10-NEXT: v_mov_b32_e32 v11, s8 203; DAGISEL10-NEXT: ; %bb.2: ; %tail 204; DAGISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s3 205; DAGISEL10-NEXT: s_mov_b32 exec_lo, s5 206; DAGISEL10-NEXT: s_setpc_b64 s[6:7] 207entry: 208 %entry_exec = call i1 @llvm.amdgcn.init.whole.wave() 209 br i1 %entry_exec, label %shader, label %tail 210 211shader: 212 %nonwwm = add i32 %x, 42 213 214 %full.vgpr = call i32 @llvm.amdgcn.set.inactive.i32(i32 %x, i32 71) 215 %non.zero = icmp ne i32 %full.vgpr, 0 216 %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %non.zero) 217 %wwm = call i32 @llvm.amdgcn.strict.wwm.i32(i32 %ballot) 218 219 br label %tail 220 221tail: 222 %full.nonwwm = phi i32 [%x, %entry], [%nonwwm, %shader] 223 %full.wwm = phi i32 [%y, %entry], [%wwm, %shader] 224 %vgpr.1 = insertvalue { i32, ptr addrspace(5), i32, i32} %vgpr, i32 %full.nonwwm, 2 225 %vgpr.2 = insertvalue { i32, ptr addrspace(5), i32, i32} %vgpr.1, i32 %full.wwm, 3 226 call void(ptr, i32, <3 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain(ptr %callee, i32 %exec, <3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr.2, i32 0) 227 unreachable 228} 229 230define amdgpu_cs_chain void @phi_whole_struct(<3 x i32> inreg %sgpr, ptr inreg %callee, i32 inreg %exec, { i32, ptr addrspace(5), i32, i32 } %vgpr, i32 %x, i32 %y) { 231; GISEL12-LABEL: phi_whole_struct: 232; GISEL12: ; %bb.0: ; %entry 233; GISEL12-NEXT: s_wait_loadcnt_dscnt 0x0 234; GISEL12-NEXT: s_wait_expcnt 0x0 235; GISEL12-NEXT: s_wait_samplecnt 0x0 236; GISEL12-NEXT: s_wait_bvhcnt 0x0 237; GISEL12-NEXT: s_wait_kmcnt 0x0 238; GISEL12-NEXT: s_or_saveexec_b32 s8, -1 239; GISEL12-NEXT: s_mov_b32 s6, s3 240; GISEL12-NEXT: s_mov_b32 s7, s4 241; GISEL12-NEXT: s_wait_alu 0xfffe 242; GISEL12-NEXT: s_and_saveexec_b32 s3, s8 243; GISEL12-NEXT: ; %bb.1: ; %shader 244; GISEL12-NEXT: s_or_saveexec_b32 s4, -1 245; GISEL12-NEXT: s_wait_alu 0xfffe 246; GISEL12-NEXT: v_cndmask_b32_e64 v0, 0x47, v12, s4 247; GISEL12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 248; GISEL12-NEXT: v_cmp_ne_u32_e64 s8, 0, v0 249; GISEL12-NEXT: v_mov_b32_e32 v0, s8 250; GISEL12-NEXT: s_mov_b32 exec_lo, s4 251; GISEL12-NEXT: s_delay_alu instid0(VALU_DEP_1) 252; GISEL12-NEXT: v_dual_mov_b32 v11, v0 :: v_dual_add_nc_u32 v10, 42, v12 253; GISEL12-NEXT: ; %bb.2: ; %tail 254; GISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s3 255; GISEL12-NEXT: s_mov_b32 exec_lo, s5 256; GISEL12-NEXT: s_wait_alu 0xfffe 257; GISEL12-NEXT: s_setpc_b64 s[6:7] 258; 259; DAGISEL12-LABEL: phi_whole_struct: 260; DAGISEL12: ; %bb.0: ; %entry 261; DAGISEL12-NEXT: s_wait_loadcnt_dscnt 0x0 262; DAGISEL12-NEXT: s_wait_expcnt 0x0 263; DAGISEL12-NEXT: s_wait_samplecnt 0x0 264; DAGISEL12-NEXT: s_wait_bvhcnt 0x0 265; DAGISEL12-NEXT: s_wait_kmcnt 0x0 266; DAGISEL12-NEXT: s_or_saveexec_b32 s8, -1 267; DAGISEL12-NEXT: s_mov_b32 s7, s4 268; DAGISEL12-NEXT: s_mov_b32 s6, s3 269; DAGISEL12-NEXT: s_wait_alu 0xfffe 270; DAGISEL12-NEXT: s_and_saveexec_b32 s3, s8 271; DAGISEL12-NEXT: ; %bb.1: ; %shader 272; DAGISEL12-NEXT: s_or_saveexec_b32 s4, -1 273; DAGISEL12-NEXT: s_wait_alu 0xfffe 274; DAGISEL12-NEXT: v_cndmask_b32_e64 v0, 0x47, v12, s4 275; DAGISEL12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 276; DAGISEL12-NEXT: v_cmp_ne_u32_e64 s8, 0, v0 277; DAGISEL12-NEXT: s_mov_b32 exec_lo, s4 278; DAGISEL12-NEXT: v_dual_mov_b32 v11, s8 :: v_dual_add_nc_u32 v10, 42, v12 279; DAGISEL12-NEXT: ; %bb.2: ; %tail 280; DAGISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s3 281; DAGISEL12-NEXT: s_mov_b32 exec_lo, s5 282; DAGISEL12-NEXT: s_wait_alu 0xfffe 283; DAGISEL12-NEXT: s_setpc_b64 s[6:7] 284; 285; GISEL10-LABEL: phi_whole_struct: 286; GISEL10: ; %bb.0: ; %entry 287; GISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 288; GISEL10-NEXT: s_or_saveexec_b32 s8, -1 289; GISEL10-NEXT: s_mov_b32 s6, s3 290; GISEL10-NEXT: s_mov_b32 s7, s4 291; GISEL10-NEXT: s_and_saveexec_b32 s3, s8 292; GISEL10-NEXT: ; %bb.1: ; %shader 293; GISEL10-NEXT: s_or_saveexec_b32 s4, -1 294; GISEL10-NEXT: v_cndmask_b32_e64 v0, 0x47, v12, s4 295; GISEL10-NEXT: v_cmp_ne_u32_e64 s8, 0, v0 296; GISEL10-NEXT: v_mov_b32_e32 v0, s8 297; GISEL10-NEXT: s_mov_b32 exec_lo, s4 298; GISEL10-NEXT: v_add_nc_u32_e32 v10, 42, v12 299; GISEL10-NEXT: v_mov_b32_e32 v11, v0 300; GISEL10-NEXT: ; %bb.2: ; %tail 301; GISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s3 302; GISEL10-NEXT: s_mov_b32 exec_lo, s5 303; GISEL10-NEXT: s_setpc_b64 s[6:7] 304; 305; DAGISEL10-LABEL: phi_whole_struct: 306; DAGISEL10: ; %bb.0: ; %entry 307; DAGISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 308; DAGISEL10-NEXT: s_or_saveexec_b32 s8, -1 309; DAGISEL10-NEXT: s_mov_b32 s7, s4 310; DAGISEL10-NEXT: s_mov_b32 s6, s3 311; DAGISEL10-NEXT: s_and_saveexec_b32 s3, s8 312; DAGISEL10-NEXT: ; %bb.1: ; %shader 313; DAGISEL10-NEXT: s_or_saveexec_b32 s4, -1 314; DAGISEL10-NEXT: v_cndmask_b32_e64 v0, 0x47, v12, s4 315; DAGISEL10-NEXT: v_cmp_ne_u32_e64 s8, 0, v0 316; DAGISEL10-NEXT: s_mov_b32 exec_lo, s4 317; DAGISEL10-NEXT: v_add_nc_u32_e32 v10, 42, v12 318; DAGISEL10-NEXT: v_mov_b32_e32 v11, s8 319; DAGISEL10-NEXT: ; %bb.2: ; %tail 320; DAGISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s3 321; DAGISEL10-NEXT: s_mov_b32 exec_lo, s5 322; DAGISEL10-NEXT: s_setpc_b64 s[6:7] 323entry: 324 %entry_exec = call i1 @llvm.amdgcn.init.whole.wave() 325 br i1 %entry_exec, label %shader, label %tail 326 327shader: 328 %nonwwm = add i32 %x, 42 329 %vgpr.1 = insertvalue { i32, ptr addrspace(5), i32, i32} %vgpr, i32 %nonwwm, 2 330 331 %full.vgpr = call i32 @llvm.amdgcn.set.inactive.i32(i32 %x, i32 71) 332 %non.zero = icmp ne i32 %full.vgpr, 0 333 %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %non.zero) 334 %wwm = call i32 @llvm.amdgcn.strict.wwm.i32(i32 %ballot) 335 %vgpr.2 = insertvalue { i32, ptr addrspace(5), i32, i32} %vgpr.1, i32 %wwm, 3 336 337 br label %tail 338 339tail: 340 %vgpr.args = phi { i32, ptr addrspace(5), i32, i32} [%vgpr, %entry], [%vgpr.2, %shader] 341 call void(ptr, i32, <3 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain(ptr %callee, i32 %exec, <3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr.args, i32 0) 342 unreachable 343} 344 345; Introduce more complex control flow - %shader contains a simple loop, and %tail contains an if. 346define amdgpu_cs_chain void @control_flow(<3 x i32> inreg %sgpr, ptr inreg %callee, i32 inreg %exec, { i32, ptr addrspace(5), i32, i32 } %vgpr, i32 %x, i32 %y) { 347; GISEL12-LABEL: control_flow: 348; GISEL12: ; %bb.0: ; %entry 349; GISEL12-NEXT: s_wait_loadcnt_dscnt 0x0 350; GISEL12-NEXT: s_wait_expcnt 0x0 351; GISEL12-NEXT: s_wait_samplecnt 0x0 352; GISEL12-NEXT: s_wait_bvhcnt 0x0 353; GISEL12-NEXT: s_wait_kmcnt 0x0 354; GISEL12-NEXT: s_or_saveexec_b32 s8, -1 355; GISEL12-NEXT: s_mov_b32 s6, s3 356; GISEL12-NEXT: s_mov_b32 s7, s4 357; GISEL12-NEXT: s_wait_alu 0xfffe 358; GISEL12-NEXT: s_and_saveexec_b32 s3, s8 359; GISEL12-NEXT: s_cbranch_execz .LBB3_4 360; GISEL12-NEXT: ; %bb.1: ; %shader.preheader 361; GISEL12-NEXT: v_add_nc_u32_e32 v1, -1, v12 362; GISEL12-NEXT: s_mov_b32 s4, 0 363; GISEL12-NEXT: .LBB3_2: ; %shader 364; GISEL12-NEXT: ; =>This Inner Loop Header: Depth=1 365; GISEL12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) 366; GISEL12-NEXT: v_add_nc_u32_e32 v1, 1, v1 367; GISEL12-NEXT: s_or_saveexec_b32 s8, -1 368; GISEL12-NEXT: s_wait_alu 0xfffe 369; GISEL12-NEXT: v_cndmask_b32_e64 v0, 0x47, v1, s8 370; GISEL12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 371; GISEL12-NEXT: v_cmp_ne_u32_e64 s9, 0, v0 372; GISEL12-NEXT: v_mov_b32_e32 v0, s9 373; GISEL12-NEXT: s_mov_b32 exec_lo, s8 374; GISEL12-NEXT: v_cmp_eq_u32_e32 vcc_lo, v13, v1 375; GISEL12-NEXT: s_delay_alu instid0(VALU_DEP_2) 376; GISEL12-NEXT: v_mov_b32_e32 v11, v0 377; GISEL12-NEXT: s_or_b32 s4, vcc_lo, s4 378; GISEL12-NEXT: s_wait_alu 0xfffe 379; GISEL12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 380; GISEL12-NEXT: s_cbranch_execnz .LBB3_2 381; GISEL12-NEXT: ; %bb.3: ; %tail.loopexit 382; GISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s4 383; GISEL12-NEXT: v_add_nc_u32_e32 v10, 42, v1 384; GISEL12-NEXT: .LBB3_4: ; %Flow1 385; GISEL12-NEXT: s_wait_alu 0xfffe 386; GISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s3 387; GISEL12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 388; GISEL12-NEXT: s_mov_b32 s3, exec_lo 389; GISEL12-NEXT: ; implicit-def: $vgpr8 390; GISEL12-NEXT: v_cmpx_lt_i32_e64 v12, v13 391; GISEL12-NEXT: s_wait_alu 0xfffe 392; GISEL12-NEXT: s_xor_b32 s3, exec_lo, s3 393; GISEL12-NEXT: ; %bb.5: ; %tail.else 394; GISEL12-NEXT: s_or_saveexec_b32 s4, -1 395; GISEL12-NEXT: v_mov_b32_e32 v0, 15 396; GISEL12-NEXT: s_wait_alu 0xfffe 397; GISEL12-NEXT: s_mov_b32 exec_lo, s4 398; GISEL12-NEXT: s_delay_alu instid0(VALU_DEP_1) 399; GISEL12-NEXT: v_mov_b32_e32 v8, v0 400; GISEL12-NEXT: ; %bb.6: ; %Flow 401; GISEL12-NEXT: s_and_not1_saveexec_b32 s3, s3 402; GISEL12-NEXT: ; %bb.7: ; %tail.then 403; GISEL12-NEXT: s_mov_b32 s4, 44 404; GISEL12-NEXT: s_wait_alu 0xfffe 405; GISEL12-NEXT: v_mov_b32_e32 v8, s4 406; GISEL12-NEXT: ; %bb.8: ; %tail.end 407; GISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s3 408; GISEL12-NEXT: s_mov_b32 exec_lo, s5 409; GISEL12-NEXT: s_wait_alu 0xfffe 410; GISEL12-NEXT: s_setpc_b64 s[6:7] 411; 412; DAGISEL12-LABEL: control_flow: 413; DAGISEL12: ; %bb.0: ; %entry 414; DAGISEL12-NEXT: s_wait_loadcnt_dscnt 0x0 415; DAGISEL12-NEXT: s_wait_expcnt 0x0 416; DAGISEL12-NEXT: s_wait_samplecnt 0x0 417; DAGISEL12-NEXT: s_wait_bvhcnt 0x0 418; DAGISEL12-NEXT: s_wait_kmcnt 0x0 419; DAGISEL12-NEXT: s_or_saveexec_b32 s8, -1 420; DAGISEL12-NEXT: s_mov_b32 s7, s4 421; DAGISEL12-NEXT: s_mov_b32 s6, s3 422; DAGISEL12-NEXT: s_wait_alu 0xfffe 423; DAGISEL12-NEXT: s_and_saveexec_b32 s3, s8 424; DAGISEL12-NEXT: s_cbranch_execz .LBB3_4 425; DAGISEL12-NEXT: ; %bb.1: ; %shader.preheader 426; DAGISEL12-NEXT: v_add_nc_u32_e32 v1, -1, v12 427; DAGISEL12-NEXT: s_mov_b32 s4, 0 428; DAGISEL12-NEXT: .LBB3_2: ; %shader 429; DAGISEL12-NEXT: ; =>This Inner Loop Header: Depth=1 430; DAGISEL12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) 431; DAGISEL12-NEXT: v_add_nc_u32_e32 v1, 1, v1 432; DAGISEL12-NEXT: s_or_saveexec_b32 s8, -1 433; DAGISEL12-NEXT: s_wait_alu 0xfffe 434; DAGISEL12-NEXT: v_cndmask_b32_e64 v0, 0x47, v1, s8 435; DAGISEL12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) 436; DAGISEL12-NEXT: v_cmp_ne_u32_e64 s9, 0, v0 437; DAGISEL12-NEXT: s_mov_b32 exec_lo, s8 438; DAGISEL12-NEXT: v_cmp_eq_u32_e32 vcc_lo, v13, v1 439; DAGISEL12-NEXT: v_mov_b32_e32 v11, s9 440; DAGISEL12-NEXT: s_or_b32 s4, vcc_lo, s4 441; DAGISEL12-NEXT: s_wait_alu 0xfffe 442; DAGISEL12-NEXT: s_and_not1_b32 exec_lo, exec_lo, s4 443; DAGISEL12-NEXT: s_cbranch_execnz .LBB3_2 444; DAGISEL12-NEXT: ; %bb.3: ; %tail.loopexit 445; DAGISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s4 446; DAGISEL12-NEXT: v_add_nc_u32_e32 v10, 42, v1 447; DAGISEL12-NEXT: .LBB3_4: ; %Flow1 448; DAGISEL12-NEXT: s_wait_alu 0xfffe 449; DAGISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s3 450; DAGISEL12-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 451; DAGISEL12-NEXT: s_mov_b32 s3, exec_lo 452; DAGISEL12-NEXT: ; implicit-def: $vgpr8 453; DAGISEL12-NEXT: v_cmpx_lt_i32_e64 v12, v13 454; DAGISEL12-NEXT: s_wait_alu 0xfffe 455; DAGISEL12-NEXT: s_xor_b32 s3, exec_lo, s3 456; DAGISEL12-NEXT: ; %bb.5: ; %tail.else 457; DAGISEL12-NEXT: s_mov_b32 s4, 15 458; DAGISEL12-NEXT: s_wait_alu 0xfffe 459; DAGISEL12-NEXT: v_mov_b32_e32 v8, s4 460; DAGISEL12-NEXT: ; %bb.6: ; %Flow 461; DAGISEL12-NEXT: s_and_not1_saveexec_b32 s3, s3 462; DAGISEL12-NEXT: ; %bb.7: ; %tail.then 463; DAGISEL12-NEXT: v_mov_b32_e32 v8, 44 464; DAGISEL12-NEXT: ; %bb.8: ; %tail.end 465; DAGISEL12-NEXT: s_wait_alu 0xfffe 466; DAGISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s3 467; DAGISEL12-NEXT: s_mov_b32 exec_lo, s5 468; DAGISEL12-NEXT: s_wait_alu 0xfffe 469; DAGISEL12-NEXT: s_setpc_b64 s[6:7] 470; 471; GISEL10-LABEL: control_flow: 472; GISEL10: ; %bb.0: ; %entry 473; GISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 474; GISEL10-NEXT: s_or_saveexec_b32 s8, -1 475; GISEL10-NEXT: s_mov_b32 s6, s3 476; GISEL10-NEXT: s_mov_b32 s7, s4 477; GISEL10-NEXT: s_and_saveexec_b32 s3, s8 478; GISEL10-NEXT: s_cbranch_execz .LBB3_4 479; GISEL10-NEXT: ; %bb.1: ; %shader.preheader 480; GISEL10-NEXT: v_add_nc_u32_e32 v1, -1, v12 481; GISEL10-NEXT: s_mov_b32 s4, 0 482; GISEL10-NEXT: .LBB3_2: ; %shader 483; GISEL10-NEXT: ; =>This Inner Loop Header: Depth=1 484; GISEL10-NEXT: v_add_nc_u32_e32 v1, 1, v1 485; GISEL10-NEXT: s_or_saveexec_b32 s8, -1 486; GISEL10-NEXT: v_cndmask_b32_e64 v0, 0x47, v1, s8 487; GISEL10-NEXT: v_cmp_ne_u32_e64 s9, 0, v0 488; GISEL10-NEXT: v_mov_b32_e32 v0, s9 489; GISEL10-NEXT: s_mov_b32 exec_lo, s8 490; GISEL10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v13, v1 491; GISEL10-NEXT: v_mov_b32_e32 v11, v0 492; GISEL10-NEXT: s_or_b32 s4, vcc_lo, s4 493; GISEL10-NEXT: s_andn2_b32 exec_lo, exec_lo, s4 494; GISEL10-NEXT: s_cbranch_execnz .LBB3_2 495; GISEL10-NEXT: ; %bb.3: ; %tail.loopexit 496; GISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s4 497; GISEL10-NEXT: v_add_nc_u32_e32 v10, 42, v1 498; GISEL10-NEXT: .LBB3_4: ; %Flow1 499; GISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s3 500; GISEL10-NEXT: s_mov_b32 s3, exec_lo 501; GISEL10-NEXT: ; implicit-def: $vgpr8 502; GISEL10-NEXT: v_cmpx_lt_i32_e64 v12, v13 503; GISEL10-NEXT: s_xor_b32 s3, exec_lo, s3 504; GISEL10-NEXT: ; %bb.5: ; %tail.else 505; GISEL10-NEXT: s_or_saveexec_b32 s4, -1 506; GISEL10-NEXT: v_mov_b32_e32 v0, 15 507; GISEL10-NEXT: s_mov_b32 exec_lo, s4 508; GISEL10-NEXT: v_mov_b32_e32 v8, v0 509; GISEL10-NEXT: ; %bb.6: ; %Flow 510; GISEL10-NEXT: s_andn2_saveexec_b32 s3, s3 511; GISEL10-NEXT: ; %bb.7: ; %tail.then 512; GISEL10-NEXT: s_mov_b32 s4, 44 513; GISEL10-NEXT: v_mov_b32_e32 v8, s4 514; GISEL10-NEXT: ; %bb.8: ; %tail.end 515; GISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s3 516; GISEL10-NEXT: s_mov_b32 exec_lo, s5 517; GISEL10-NEXT: s_setpc_b64 s[6:7] 518; 519; DAGISEL10-LABEL: control_flow: 520; DAGISEL10: ; %bb.0: ; %entry 521; DAGISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 522; DAGISEL10-NEXT: s_or_saveexec_b32 s8, -1 523; DAGISEL10-NEXT: s_mov_b32 s7, s4 524; DAGISEL10-NEXT: s_mov_b32 s6, s3 525; DAGISEL10-NEXT: s_and_saveexec_b32 s3, s8 526; DAGISEL10-NEXT: s_cbranch_execz .LBB3_4 527; DAGISEL10-NEXT: ; %bb.1: ; %shader.preheader 528; DAGISEL10-NEXT: v_add_nc_u32_e32 v1, -1, v12 529; DAGISEL10-NEXT: s_mov_b32 s4, 0 530; DAGISEL10-NEXT: .LBB3_2: ; %shader 531; DAGISEL10-NEXT: ; =>This Inner Loop Header: Depth=1 532; DAGISEL10-NEXT: v_add_nc_u32_e32 v1, 1, v1 533; DAGISEL10-NEXT: s_or_saveexec_b32 s8, -1 534; DAGISEL10-NEXT: v_cndmask_b32_e64 v0, 0x47, v1, s8 535; DAGISEL10-NEXT: v_cmp_ne_u32_e64 s9, 0, v0 536; DAGISEL10-NEXT: s_mov_b32 exec_lo, s8 537; DAGISEL10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v13, v1 538; DAGISEL10-NEXT: v_mov_b32_e32 v11, s9 539; DAGISEL10-NEXT: s_or_b32 s4, vcc_lo, s4 540; DAGISEL10-NEXT: s_andn2_b32 exec_lo, exec_lo, s4 541; DAGISEL10-NEXT: s_cbranch_execnz .LBB3_2 542; DAGISEL10-NEXT: ; %bb.3: ; %tail.loopexit 543; DAGISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s4 544; DAGISEL10-NEXT: v_add_nc_u32_e32 v10, 42, v1 545; DAGISEL10-NEXT: .LBB3_4: ; %Flow1 546; DAGISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s3 547; DAGISEL10-NEXT: s_mov_b32 s3, exec_lo 548; DAGISEL10-NEXT: ; implicit-def: $vgpr8 549; DAGISEL10-NEXT: v_cmpx_lt_i32_e64 v12, v13 550; DAGISEL10-NEXT: s_xor_b32 s3, exec_lo, s3 551; DAGISEL10-NEXT: ; %bb.5: ; %tail.else 552; DAGISEL10-NEXT: s_mov_b32 s4, 15 553; DAGISEL10-NEXT: v_mov_b32_e32 v8, s4 554; DAGISEL10-NEXT: ; %bb.6: ; %Flow 555; DAGISEL10-NEXT: s_andn2_saveexec_b32 s3, s3 556; DAGISEL10-NEXT: ; %bb.7: ; %tail.then 557; DAGISEL10-NEXT: v_mov_b32_e32 v8, 44 558; DAGISEL10-NEXT: ; %bb.8: ; %tail.end 559; DAGISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s3 560; DAGISEL10-NEXT: s_mov_b32 exec_lo, s5 561; DAGISEL10-NEXT: s_setpc_b64 s[6:7] 562entry: 563 %entry_exec = call i1 @llvm.amdgcn.init.whole.wave() 564 br i1 %entry_exec, label %shader, label %tail 565 566shader: 567 %i = phi i32 [%x, %entry], [%i.inc, %shader] 568 569 %nonwwm = add i32 %i, 42 570 %vgpr.1 = insertvalue { i32, ptr addrspace(5), i32, i32} %vgpr, i32 %nonwwm, 2 571 572 %full.vgpr = call i32 @llvm.amdgcn.set.inactive.i32(i32 %i, i32 71) 573 %non.zero = icmp ne i32 %full.vgpr, 0 574 %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %non.zero) 575 %wwm = call i32 @llvm.amdgcn.strict.wwm.i32(i32 %ballot) 576 %vgpr.2 = insertvalue { i32, ptr addrspace(5), i32, i32} %vgpr.1, i32 %wwm, 3 577 578 %i.inc = add i32 %i, 1 579 %loop.cond = icmp ne i32 %i, %y 580 br i1 %loop.cond, label %shader, label %tail 581 582tail: 583 %vgpr.tail = phi { i32, ptr addrspace(5), i32, i32} [%vgpr, %entry], [%vgpr.2, %shader] 584 585 %if.cond = icmp sge i32 %x, %y 586 br i1 %if.cond, label %tail.then, label %tail.else 587 588tail.then: 589 %vgpr.then = insertvalue { i32, ptr addrspace(5), i32, i32} %vgpr.tail, i32 44, 0 590 br label %tail.end 591 592tail.else: 593 %wwm.tail = call i32 @llvm.amdgcn.strict.wwm.i32(i32 15) 594 %vgpr.else = insertvalue { i32, ptr addrspace(5), i32, i32} %vgpr.tail, i32 %wwm.tail, 0 595 br label %tail.end 596 597tail.end: 598 %vgpr.args = phi { i32, ptr addrspace(5), i32, i32 } [%vgpr.then, %tail.then], [%vgpr.else, %tail.else] 599 call void(ptr, i32, <3 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain(ptr %callee, i32 %exec, <3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr.args, i32 0) 600 unreachable 601} 602 603; Try with v0-v7 occupied - this will force us to use higher registers for temporaries. Make sure we don't preserve them. 604define amdgpu_cs_chain void @use_v0_7(<3 x i32> inreg %sgpr, ptr inreg %callee, i32 inreg %exec, { i32, ptr addrspace(5), i32, i32 } %vgpr, i32 %x, i32 %y) { 605; GISEL12-LABEL: use_v0_7: 606; GISEL12: ; %bb.0: ; %entry 607; GISEL12-NEXT: s_wait_loadcnt_dscnt 0x0 608; GISEL12-NEXT: s_wait_expcnt 0x0 609; GISEL12-NEXT: s_wait_samplecnt 0x0 610; GISEL12-NEXT: s_wait_bvhcnt 0x0 611; GISEL12-NEXT: s_wait_kmcnt 0x0 612; GISEL12-NEXT: s_or_saveexec_b32 s8, -1 613; GISEL12-NEXT: s_mov_b32 s6, s3 614; GISEL12-NEXT: s_mov_b32 s7, s4 615; GISEL12-NEXT: s_wait_alu 0xfffe 616; GISEL12-NEXT: s_and_saveexec_b32 s3, s8 617; GISEL12-NEXT: s_cbranch_execz .LBB4_2 618; GISEL12-NEXT: ; %bb.1: ; %shader 619; GISEL12-NEXT: s_or_saveexec_b32 s4, -1 620; GISEL12-NEXT: s_wait_alu 0xfffe 621; GISEL12-NEXT: v_cndmask_b32_e64 v13, 0x47, v12, s4 622; GISEL12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 623; GISEL12-NEXT: v_cmp_ne_u32_e64 s8, 0, v13 624; GISEL12-NEXT: v_mov_b32_e32 v13, s8 625; GISEL12-NEXT: s_mov_b32 exec_lo, s4 626; GISEL12-NEXT: s_delay_alu instid0(VALU_DEP_1) 627; GISEL12-NEXT: v_dual_mov_b32 v11, v13 :: v_dual_add_nc_u32 v10, 42, v12 628; GISEL12-NEXT: ;;#ASMSTART 629; GISEL12-NEXT: ; use v0-7 630; GISEL12-NEXT: ;;#ASMEND 631; GISEL12-NEXT: .LBB4_2: ; %tail 632; GISEL12-NEXT: s_wait_alu 0xfffe 633; GISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s3 634; GISEL12-NEXT: s_mov_b32 exec_lo, s5 635; GISEL12-NEXT: s_wait_alu 0xfffe 636; GISEL12-NEXT: s_setpc_b64 s[6:7] 637; 638; DAGISEL12-LABEL: use_v0_7: 639; DAGISEL12: ; %bb.0: ; %entry 640; DAGISEL12-NEXT: s_wait_loadcnt_dscnt 0x0 641; DAGISEL12-NEXT: s_wait_expcnt 0x0 642; DAGISEL12-NEXT: s_wait_samplecnt 0x0 643; DAGISEL12-NEXT: s_wait_bvhcnt 0x0 644; DAGISEL12-NEXT: s_wait_kmcnt 0x0 645; DAGISEL12-NEXT: s_or_saveexec_b32 s8, -1 646; DAGISEL12-NEXT: s_mov_b32 s7, s4 647; DAGISEL12-NEXT: s_mov_b32 s6, s3 648; DAGISEL12-NEXT: s_wait_alu 0xfffe 649; DAGISEL12-NEXT: s_and_saveexec_b32 s3, s8 650; DAGISEL12-NEXT: s_cbranch_execz .LBB4_2 651; DAGISEL12-NEXT: ; %bb.1: ; %shader 652; DAGISEL12-NEXT: s_or_saveexec_b32 s4, -1 653; DAGISEL12-NEXT: s_wait_alu 0xfffe 654; DAGISEL12-NEXT: v_cndmask_b32_e64 v13, 0x47, v12, s4 655; DAGISEL12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 656; DAGISEL12-NEXT: v_cmp_ne_u32_e64 s8, 0, v13 657; DAGISEL12-NEXT: s_mov_b32 exec_lo, s4 658; DAGISEL12-NEXT: v_dual_mov_b32 v11, s8 :: v_dual_add_nc_u32 v10, 42, v12 659; DAGISEL12-NEXT: ;;#ASMSTART 660; DAGISEL12-NEXT: ; use v0-7 661; DAGISEL12-NEXT: ;;#ASMEND 662; DAGISEL12-NEXT: .LBB4_2: ; %tail 663; DAGISEL12-NEXT: s_wait_alu 0xfffe 664; DAGISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s3 665; DAGISEL12-NEXT: s_mov_b32 exec_lo, s5 666; DAGISEL12-NEXT: s_wait_alu 0xfffe 667; DAGISEL12-NEXT: s_setpc_b64 s[6:7] 668; 669; GISEL10-LABEL: use_v0_7: 670; GISEL10: ; %bb.0: ; %entry 671; GISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 672; GISEL10-NEXT: s_or_saveexec_b32 s8, -1 673; GISEL10-NEXT: s_mov_b32 s6, s3 674; GISEL10-NEXT: s_mov_b32 s7, s4 675; GISEL10-NEXT: s_and_saveexec_b32 s3, s8 676; GISEL10-NEXT: s_cbranch_execz .LBB4_2 677; GISEL10-NEXT: ; %bb.1: ; %shader 678; GISEL10-NEXT: s_or_saveexec_b32 s4, -1 679; GISEL10-NEXT: v_cndmask_b32_e64 v13, 0x47, v12, s4 680; GISEL10-NEXT: v_cmp_ne_u32_e64 s8, 0, v13 681; GISEL10-NEXT: v_mov_b32_e32 v13, s8 682; GISEL10-NEXT: s_mov_b32 exec_lo, s4 683; GISEL10-NEXT: v_add_nc_u32_e32 v10, 42, v12 684; GISEL10-NEXT: v_mov_b32_e32 v11, v13 685; GISEL10-NEXT: ;;#ASMSTART 686; GISEL10-NEXT: ; use v0-7 687; GISEL10-NEXT: ;;#ASMEND 688; GISEL10-NEXT: .LBB4_2: ; %tail 689; GISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s3 690; GISEL10-NEXT: s_mov_b32 exec_lo, s5 691; GISEL10-NEXT: s_setpc_b64 s[6:7] 692; 693; DAGISEL10-LABEL: use_v0_7: 694; DAGISEL10: ; %bb.0: ; %entry 695; DAGISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 696; DAGISEL10-NEXT: s_or_saveexec_b32 s8, -1 697; DAGISEL10-NEXT: s_mov_b32 s7, s4 698; DAGISEL10-NEXT: s_mov_b32 s6, s3 699; DAGISEL10-NEXT: s_and_saveexec_b32 s3, s8 700; DAGISEL10-NEXT: s_cbranch_execz .LBB4_2 701; DAGISEL10-NEXT: ; %bb.1: ; %shader 702; DAGISEL10-NEXT: s_or_saveexec_b32 s4, -1 703; DAGISEL10-NEXT: v_cndmask_b32_e64 v13, 0x47, v12, s4 704; DAGISEL10-NEXT: v_cmp_ne_u32_e64 s8, 0, v13 705; DAGISEL10-NEXT: s_mov_b32 exec_lo, s4 706; DAGISEL10-NEXT: v_add_nc_u32_e32 v10, 42, v12 707; DAGISEL10-NEXT: v_mov_b32_e32 v11, s8 708; DAGISEL10-NEXT: ;;#ASMSTART 709; DAGISEL10-NEXT: ; use v0-7 710; DAGISEL10-NEXT: ;;#ASMEND 711; DAGISEL10-NEXT: .LBB4_2: ; %tail 712; DAGISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s3 713; DAGISEL10-NEXT: s_mov_b32 exec_lo, s5 714; DAGISEL10-NEXT: s_setpc_b64 s[6:7] 715entry: 716 %entry_exec = call i1 @llvm.amdgcn.init.whole.wave() 717 br i1 %entry_exec, label %shader, label %tail 718 719shader: 720 call void asm sideeffect "; use v0-7", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}"() 721 722 %nonwwm = add i32 %x, 42 723 %vgpr.1 = insertvalue { i32, ptr addrspace(5), i32, i32} %vgpr, i32 %nonwwm, 2 724 725 %full.vgpr = call i32 @llvm.amdgcn.set.inactive.i32(i32 %x, i32 71) 726 %non.zero = icmp ne i32 %full.vgpr, 0 727 %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %non.zero) 728 %wwm = call i32 @llvm.amdgcn.strict.wwm.i32(i32 %ballot) 729 %vgpr.2 = insertvalue { i32, ptr addrspace(5), i32, i32} %vgpr.1, i32 %wwm, 3 730 731 br label %tail 732 733tail: 734 %vgpr.args = phi { i32, ptr addrspace(5), i32, i32} [%vgpr, %entry], [%vgpr.2, %shader] 735 call void(ptr, i32, <3 x i32>, { i32, ptr addrspace(5), i32, i32 }, i32, ...) @llvm.amdgcn.cs.chain(ptr %callee, i32 %exec, <3 x i32> inreg %sgpr, { i32, ptr addrspace(5), i32, i32 } %vgpr.args, i32 0) 736 unreachable 737} 738 739 740; Check that the inactive lanes of v8:15 are correctly preserved even across a 741; WWM call that reads and writes them. 742; FIXME: The GlobalISel path hits a pre-existing issue, so the inactive lanes do get overwritten. 743define amdgpu_cs_chain void @wwm_write_to_arg_reg(<3 x i32> inreg %sgpr, ptr inreg %callee, i32 inreg %exec, <16 x i32> %vgpr, i32 %x, i32 %y) { 744; GISEL12-LABEL: wwm_write_to_arg_reg: 745; GISEL12: ; %bb.0: ; %entry 746; GISEL12-NEXT: s_wait_loadcnt_dscnt 0x0 747; GISEL12-NEXT: s_wait_expcnt 0x0 748; GISEL12-NEXT: s_wait_samplecnt 0x0 749; GISEL12-NEXT: s_wait_bvhcnt 0x0 750; GISEL12-NEXT: s_wait_kmcnt 0x0 751; GISEL12-NEXT: s_mov_b32 s32, 0 752; GISEL12-NEXT: s_or_saveexec_b32 s9, -1 753; GISEL12-NEXT: s_or_saveexec_b32 s12, -1 754; GISEL12-NEXT: s_mov_b32 s6, s0 755; GISEL12-NEXT: s_mov_b32 s7, s1 756; GISEL12-NEXT: s_mov_b32 s8, s2 757; GISEL12-NEXT: s_mov_b32 s10, s3 758; GISEL12-NEXT: s_mov_b32 s11, s4 759; GISEL12-NEXT: v_dual_mov_b32 v24, v8 :: v_dual_mov_b32 v25, v9 760; GISEL12-NEXT: v_dual_mov_b32 v26, v10 :: v_dual_mov_b32 v27, v11 761; GISEL12-NEXT: v_dual_mov_b32 v28, v12 :: v_dual_mov_b32 v29, v13 762; GISEL12-NEXT: v_dual_mov_b32 v30, v14 :: v_dual_mov_b32 v31, v15 763; GISEL12-NEXT: v_dual_mov_b32 v32, v16 :: v_dual_mov_b32 v33, v17 764; GISEL12-NEXT: v_dual_mov_b32 v34, v18 :: v_dual_mov_b32 v35, v19 765; GISEL12-NEXT: v_dual_mov_b32 v36, v20 :: v_dual_mov_b32 v37, v21 766; GISEL12-NEXT: v_dual_mov_b32 v38, v22 :: v_dual_mov_b32 v39, v23 767; GISEL12-NEXT: s_wait_alu 0xfffe 768; GISEL12-NEXT: s_mov_b32 exec_lo, s12 769; GISEL12-NEXT: s_and_saveexec_b32 s4, s9 770; GISEL12-NEXT: s_cbranch_execz .LBB5_2 771; GISEL12-NEXT: ; %bb.1: ; %shader 772; GISEL12-NEXT: s_or_saveexec_b32 s9, -1 773; GISEL12-NEXT: s_getpc_b64 s[0:1] 774; GISEL12-NEXT: s_wait_alu 0xfffe 775; GISEL12-NEXT: s_sext_i32_i16 s1, s1 776; GISEL12-NEXT: s_add_co_u32 s0, s0, write_v0_v15@gotpcrel32@lo+12 777; GISEL12-NEXT: s_wait_alu 0xfffe 778; GISEL12-NEXT: s_add_co_ci_u32 s1, s1, write_v0_v15@gotpcrel32@hi+24 779; GISEL12-NEXT: v_dual_mov_b32 v0, v24 :: v_dual_mov_b32 v1, v25 780; GISEL12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 781; GISEL12-NEXT: v_dual_mov_b32 v2, v26 :: v_dual_mov_b32 v3, v27 782; GISEL12-NEXT: v_dual_mov_b32 v4, v28 :: v_dual_mov_b32 v5, v29 783; GISEL12-NEXT: v_dual_mov_b32 v6, v30 :: v_dual_mov_b32 v7, v31 784; GISEL12-NEXT: v_dual_mov_b32 v8, v32 :: v_dual_mov_b32 v9, v33 785; GISEL12-NEXT: v_dual_mov_b32 v10, v34 :: v_dual_mov_b32 v11, v35 786; GISEL12-NEXT: v_dual_mov_b32 v12, v36 :: v_dual_mov_b32 v13, v37 787; GISEL12-NEXT: v_dual_mov_b32 v14, v38 :: v_dual_mov_b32 v15, v39 788; GISEL12-NEXT: s_wait_kmcnt 0x0 789; GISEL12-NEXT: s_wait_alu 0xfffe 790; GISEL12-NEXT: s_swappc_b64 s[30:31], s[0:1] 791; GISEL12-NEXT: v_dual_mov_b32 v24, v0 :: v_dual_mov_b32 v25, v1 792; GISEL12-NEXT: v_dual_mov_b32 v26, v2 :: v_dual_mov_b32 v27, v3 793; GISEL12-NEXT: v_dual_mov_b32 v28, v4 :: v_dual_mov_b32 v29, v5 794; GISEL12-NEXT: v_dual_mov_b32 v30, v6 :: v_dual_mov_b32 v31, v7 795; GISEL12-NEXT: v_dual_mov_b32 v32, v8 :: v_dual_mov_b32 v33, v9 796; GISEL12-NEXT: v_dual_mov_b32 v34, v10 :: v_dual_mov_b32 v35, v11 797; GISEL12-NEXT: v_dual_mov_b32 v36, v12 :: v_dual_mov_b32 v37, v13 798; GISEL12-NEXT: v_dual_mov_b32 v38, v14 :: v_dual_mov_b32 v39, v15 799; GISEL12-NEXT: s_mov_b32 exec_lo, s9 800; GISEL12-NEXT: ; kill: def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 killed $exec 801; GISEL12-NEXT: .LBB5_2: ; %tail 802; GISEL12-NEXT: s_wait_alu 0xfffe 803; GISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s4 804; GISEL12-NEXT: v_dual_mov_b32 v8, v24 :: v_dual_mov_b32 v9, v25 805; GISEL12-NEXT: v_dual_mov_b32 v10, v26 :: v_dual_mov_b32 v11, v27 806; GISEL12-NEXT: v_dual_mov_b32 v12, v28 :: v_dual_mov_b32 v13, v29 807; GISEL12-NEXT: v_dual_mov_b32 v14, v30 :: v_dual_mov_b32 v15, v31 808; GISEL12-NEXT: v_dual_mov_b32 v16, v32 :: v_dual_mov_b32 v17, v33 809; GISEL12-NEXT: v_dual_mov_b32 v18, v34 :: v_dual_mov_b32 v19, v35 810; GISEL12-NEXT: v_dual_mov_b32 v20, v36 :: v_dual_mov_b32 v21, v37 811; GISEL12-NEXT: v_dual_mov_b32 v22, v38 :: v_dual_mov_b32 v23, v39 812; GISEL12-NEXT: s_mov_b32 s0, s6 813; GISEL12-NEXT: s_mov_b32 s1, s7 814; GISEL12-NEXT: s_mov_b32 s2, s8 815; GISEL12-NEXT: s_mov_b32 exec_lo, s5 816; GISEL12-NEXT: s_wait_alu 0xfffe 817; GISEL12-NEXT: s_setpc_b64 s[10:11] 818; 819; DAGISEL12-LABEL: wwm_write_to_arg_reg: 820; DAGISEL12: ; %bb.0: ; %entry 821; DAGISEL12-NEXT: s_wait_loadcnt_dscnt 0x0 822; DAGISEL12-NEXT: s_wait_expcnt 0x0 823; DAGISEL12-NEXT: s_wait_samplecnt 0x0 824; DAGISEL12-NEXT: s_wait_bvhcnt 0x0 825; DAGISEL12-NEXT: s_wait_kmcnt 0x0 826; DAGISEL12-NEXT: s_mov_b32 s32, 0 827; DAGISEL12-NEXT: s_or_saveexec_b32 s11, -1 828; DAGISEL12-NEXT: s_or_saveexec_b32 s6, -1 829; DAGISEL12-NEXT: v_dual_mov_b32 v39, v23 :: v_dual_mov_b32 v38, v22 830; DAGISEL12-NEXT: v_dual_mov_b32 v37, v21 :: v_dual_mov_b32 v36, v20 831; DAGISEL12-NEXT: v_dual_mov_b32 v35, v19 :: v_dual_mov_b32 v34, v18 832; DAGISEL12-NEXT: v_dual_mov_b32 v33, v17 :: v_dual_mov_b32 v32, v16 833; DAGISEL12-NEXT: v_dual_mov_b32 v31, v15 :: v_dual_mov_b32 v30, v14 834; DAGISEL12-NEXT: v_dual_mov_b32 v29, v13 :: v_dual_mov_b32 v28, v12 835; DAGISEL12-NEXT: v_dual_mov_b32 v27, v11 :: v_dual_mov_b32 v26, v10 836; DAGISEL12-NEXT: v_dual_mov_b32 v25, v9 :: v_dual_mov_b32 v24, v8 837; DAGISEL12-NEXT: s_wait_alu 0xfffe 838; DAGISEL12-NEXT: s_mov_b32 exec_lo, s6 839; DAGISEL12-NEXT: s_mov_b32 s9, s4 840; DAGISEL12-NEXT: s_mov_b32 s8, s3 841; DAGISEL12-NEXT: s_mov_b32 s4, s2 842; DAGISEL12-NEXT: s_mov_b32 s6, s1 843; DAGISEL12-NEXT: s_mov_b32 s7, s0 844; DAGISEL12-NEXT: s_and_saveexec_b32 s10, s11 845; DAGISEL12-NEXT: s_cbranch_execz .LBB5_2 846; DAGISEL12-NEXT: ; %bb.1: ; %shader 847; DAGISEL12-NEXT: s_or_saveexec_b32 s11, -1 848; DAGISEL12-NEXT: s_getpc_b64 s[0:1] 849; DAGISEL12-NEXT: s_wait_alu 0xfffe 850; DAGISEL12-NEXT: s_sext_i32_i16 s1, s1 851; DAGISEL12-NEXT: s_add_co_u32 s0, s0, write_v0_v15@gotpcrel32@lo+12 852; DAGISEL12-NEXT: s_wait_alu 0xfffe 853; DAGISEL12-NEXT: s_add_co_ci_u32 s1, s1, write_v0_v15@gotpcrel32@hi+24 854; DAGISEL12-NEXT: v_dual_mov_b32 v0, v24 :: v_dual_mov_b32 v1, v25 855; DAGISEL12-NEXT: s_load_b64 s[0:1], s[0:1], 0x0 856; DAGISEL12-NEXT: v_dual_mov_b32 v2, v26 :: v_dual_mov_b32 v3, v27 857; DAGISEL12-NEXT: v_dual_mov_b32 v4, v28 :: v_dual_mov_b32 v5, v29 858; DAGISEL12-NEXT: v_dual_mov_b32 v6, v30 :: v_dual_mov_b32 v7, v31 859; DAGISEL12-NEXT: v_dual_mov_b32 v8, v32 :: v_dual_mov_b32 v9, v33 860; DAGISEL12-NEXT: v_dual_mov_b32 v10, v34 :: v_dual_mov_b32 v11, v35 861; DAGISEL12-NEXT: v_dual_mov_b32 v12, v36 :: v_dual_mov_b32 v13, v37 862; DAGISEL12-NEXT: v_dual_mov_b32 v14, v38 :: v_dual_mov_b32 v15, v39 863; DAGISEL12-NEXT: s_wait_kmcnt 0x0 864; DAGISEL12-NEXT: s_wait_alu 0xfffe 865; DAGISEL12-NEXT: s_swappc_b64 s[30:31], s[0:1] 866; DAGISEL12-NEXT: v_dual_mov_b32 v40, v0 :: v_dual_mov_b32 v41, v1 867; DAGISEL12-NEXT: v_dual_mov_b32 v42, v2 :: v_dual_mov_b32 v43, v3 868; DAGISEL12-NEXT: v_dual_mov_b32 v44, v4 :: v_dual_mov_b32 v45, v5 869; DAGISEL12-NEXT: v_dual_mov_b32 v46, v6 :: v_dual_mov_b32 v47, v7 870; DAGISEL12-NEXT: v_dual_mov_b32 v48, v8 :: v_dual_mov_b32 v49, v9 871; DAGISEL12-NEXT: v_dual_mov_b32 v50, v10 :: v_dual_mov_b32 v51, v11 872; DAGISEL12-NEXT: v_dual_mov_b32 v52, v12 :: v_dual_mov_b32 v53, v13 873; DAGISEL12-NEXT: v_dual_mov_b32 v54, v14 :: v_dual_mov_b32 v55, v15 874; DAGISEL12-NEXT: s_mov_b32 exec_lo, s11 875; DAGISEL12-NEXT: v_dual_mov_b32 v24, v40 :: v_dual_mov_b32 v25, v41 876; DAGISEL12-NEXT: v_dual_mov_b32 v26, v42 :: v_dual_mov_b32 v27, v43 877; DAGISEL12-NEXT: v_dual_mov_b32 v28, v44 :: v_dual_mov_b32 v29, v45 878; DAGISEL12-NEXT: v_dual_mov_b32 v30, v46 :: v_dual_mov_b32 v31, v47 879; DAGISEL12-NEXT: v_dual_mov_b32 v32, v48 :: v_dual_mov_b32 v33, v49 880; DAGISEL12-NEXT: v_dual_mov_b32 v34, v50 :: v_dual_mov_b32 v35, v51 881; DAGISEL12-NEXT: v_dual_mov_b32 v36, v52 :: v_dual_mov_b32 v37, v53 882; DAGISEL12-NEXT: v_dual_mov_b32 v38, v54 :: v_dual_mov_b32 v39, v55 883; DAGISEL12-NEXT: .LBB5_2: ; %tail 884; DAGISEL12-NEXT: s_wait_alu 0xfffe 885; DAGISEL12-NEXT: s_or_b32 exec_lo, exec_lo, s10 886; DAGISEL12-NEXT: v_dual_mov_b32 v8, v24 :: v_dual_mov_b32 v9, v25 887; DAGISEL12-NEXT: v_dual_mov_b32 v10, v26 :: v_dual_mov_b32 v11, v27 888; DAGISEL12-NEXT: v_dual_mov_b32 v12, v28 :: v_dual_mov_b32 v13, v29 889; DAGISEL12-NEXT: v_dual_mov_b32 v14, v30 :: v_dual_mov_b32 v15, v31 890; DAGISEL12-NEXT: v_dual_mov_b32 v16, v32 :: v_dual_mov_b32 v17, v33 891; DAGISEL12-NEXT: v_dual_mov_b32 v18, v34 :: v_dual_mov_b32 v19, v35 892; DAGISEL12-NEXT: v_dual_mov_b32 v20, v36 :: v_dual_mov_b32 v21, v37 893; DAGISEL12-NEXT: v_dual_mov_b32 v22, v38 :: v_dual_mov_b32 v23, v39 894; DAGISEL12-NEXT: s_mov_b32 s0, s7 895; DAGISEL12-NEXT: s_mov_b32 s1, s6 896; DAGISEL12-NEXT: s_mov_b32 s2, s4 897; DAGISEL12-NEXT: s_mov_b32 exec_lo, s5 898; DAGISEL12-NEXT: s_wait_alu 0xfffe 899; DAGISEL12-NEXT: s_setpc_b64 s[8:9] 900; 901; GISEL10-LABEL: wwm_write_to_arg_reg: 902; GISEL10: ; %bb.0: ; %entry 903; GISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 904; GISEL10-NEXT: s_mov_b32 s32, 0 905; GISEL10-NEXT: s_or_saveexec_b32 s9, -1 906; GISEL10-NEXT: s_or_saveexec_b32 s12, -1 907; GISEL10-NEXT: s_mov_b32 s6, s0 908; GISEL10-NEXT: s_mov_b32 s7, s1 909; GISEL10-NEXT: s_mov_b32 s8, s2 910; GISEL10-NEXT: s_mov_b32 s10, s3 911; GISEL10-NEXT: s_mov_b32 s11, s4 912; GISEL10-NEXT: v_mov_b32_e32 v24, v8 913; GISEL10-NEXT: v_mov_b32_e32 v25, v9 914; GISEL10-NEXT: v_mov_b32_e32 v26, v10 915; GISEL10-NEXT: v_mov_b32_e32 v27, v11 916; GISEL10-NEXT: v_mov_b32_e32 v28, v12 917; GISEL10-NEXT: v_mov_b32_e32 v29, v13 918; GISEL10-NEXT: v_mov_b32_e32 v30, v14 919; GISEL10-NEXT: v_mov_b32_e32 v31, v15 920; GISEL10-NEXT: v_mov_b32_e32 v32, v16 921; GISEL10-NEXT: v_mov_b32_e32 v33, v17 922; GISEL10-NEXT: v_mov_b32_e32 v34, v18 923; GISEL10-NEXT: v_mov_b32_e32 v35, v19 924; GISEL10-NEXT: v_mov_b32_e32 v36, v20 925; GISEL10-NEXT: v_mov_b32_e32 v37, v21 926; GISEL10-NEXT: v_mov_b32_e32 v38, v22 927; GISEL10-NEXT: v_mov_b32_e32 v39, v23 928; GISEL10-NEXT: s_mov_b32 exec_lo, s12 929; GISEL10-NEXT: s_and_saveexec_b32 s4, s9 930; GISEL10-NEXT: s_cbranch_execz .LBB5_2 931; GISEL10-NEXT: ; %bb.1: ; %shader 932; GISEL10-NEXT: s_or_saveexec_b32 s9, -1 933; GISEL10-NEXT: s_getpc_b64 s[0:1] 934; GISEL10-NEXT: s_add_u32 s0, s0, write_v0_v15@gotpcrel32@lo+4 935; GISEL10-NEXT: s_addc_u32 s1, s1, write_v0_v15@gotpcrel32@hi+12 936; GISEL10-NEXT: v_mov_b32_e32 v0, v24 937; GISEL10-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x0 938; GISEL10-NEXT: v_mov_b32_e32 v1, v25 939; GISEL10-NEXT: v_mov_b32_e32 v2, v26 940; GISEL10-NEXT: v_mov_b32_e32 v3, v27 941; GISEL10-NEXT: v_mov_b32_e32 v4, v28 942; GISEL10-NEXT: v_mov_b32_e32 v5, v29 943; GISEL10-NEXT: v_mov_b32_e32 v6, v30 944; GISEL10-NEXT: v_mov_b32_e32 v7, v31 945; GISEL10-NEXT: v_mov_b32_e32 v8, v32 946; GISEL10-NEXT: v_mov_b32_e32 v9, v33 947; GISEL10-NEXT: v_mov_b32_e32 v10, v34 948; GISEL10-NEXT: v_mov_b32_e32 v11, v35 949; GISEL10-NEXT: v_mov_b32_e32 v12, v36 950; GISEL10-NEXT: v_mov_b32_e32 v13, v37 951; GISEL10-NEXT: v_mov_b32_e32 v14, v38 952; GISEL10-NEXT: v_mov_b32_e32 v15, v39 953; GISEL10-NEXT: s_mov_b64 s[0:1], s[48:49] 954; GISEL10-NEXT: s_mov_b64 s[2:3], s[50:51] 955; GISEL10-NEXT: s_waitcnt lgkmcnt(0) 956; GISEL10-NEXT: s_swappc_b64 s[30:31], s[12:13] 957; GISEL10-NEXT: v_mov_b32_e32 v24, v0 958; GISEL10-NEXT: v_mov_b32_e32 v25, v1 959; GISEL10-NEXT: v_mov_b32_e32 v26, v2 960; GISEL10-NEXT: v_mov_b32_e32 v27, v3 961; GISEL10-NEXT: v_mov_b32_e32 v28, v4 962; GISEL10-NEXT: v_mov_b32_e32 v29, v5 963; GISEL10-NEXT: v_mov_b32_e32 v30, v6 964; GISEL10-NEXT: v_mov_b32_e32 v31, v7 965; GISEL10-NEXT: v_mov_b32_e32 v32, v8 966; GISEL10-NEXT: v_mov_b32_e32 v33, v9 967; GISEL10-NEXT: v_mov_b32_e32 v34, v10 968; GISEL10-NEXT: v_mov_b32_e32 v35, v11 969; GISEL10-NEXT: v_mov_b32_e32 v36, v12 970; GISEL10-NEXT: v_mov_b32_e32 v37, v13 971; GISEL10-NEXT: v_mov_b32_e32 v38, v14 972; GISEL10-NEXT: v_mov_b32_e32 v39, v15 973; GISEL10-NEXT: s_mov_b32 exec_lo, s9 974; GISEL10-NEXT: ; kill: def $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 killed $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39 killed $exec 975; GISEL10-NEXT: .LBB5_2: ; %tail 976; GISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s4 977; GISEL10-NEXT: v_mov_b32_e32 v8, v24 978; GISEL10-NEXT: v_mov_b32_e32 v9, v25 979; GISEL10-NEXT: v_mov_b32_e32 v10, v26 980; GISEL10-NEXT: v_mov_b32_e32 v11, v27 981; GISEL10-NEXT: v_mov_b32_e32 v12, v28 982; GISEL10-NEXT: v_mov_b32_e32 v13, v29 983; GISEL10-NEXT: v_mov_b32_e32 v14, v30 984; GISEL10-NEXT: v_mov_b32_e32 v15, v31 985; GISEL10-NEXT: v_mov_b32_e32 v16, v32 986; GISEL10-NEXT: v_mov_b32_e32 v17, v33 987; GISEL10-NEXT: v_mov_b32_e32 v18, v34 988; GISEL10-NEXT: v_mov_b32_e32 v19, v35 989; GISEL10-NEXT: v_mov_b32_e32 v20, v36 990; GISEL10-NEXT: v_mov_b32_e32 v21, v37 991; GISEL10-NEXT: v_mov_b32_e32 v22, v38 992; GISEL10-NEXT: v_mov_b32_e32 v23, v39 993; GISEL10-NEXT: s_mov_b32 s0, s6 994; GISEL10-NEXT: s_mov_b32 s1, s7 995; GISEL10-NEXT: s_mov_b32 s2, s8 996; GISEL10-NEXT: s_mov_b32 exec_lo, s5 997; GISEL10-NEXT: s_setpc_b64 s[10:11] 998; 999; DAGISEL10-LABEL: wwm_write_to_arg_reg: 1000; DAGISEL10: ; %bb.0: ; %entry 1001; DAGISEL10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1002; DAGISEL10-NEXT: s_mov_b32 s32, 0 1003; DAGISEL10-NEXT: s_or_saveexec_b32 s11, -1 1004; DAGISEL10-NEXT: s_or_saveexec_b32 s6, -1 1005; DAGISEL10-NEXT: v_mov_b32_e32 v39, v23 1006; DAGISEL10-NEXT: v_mov_b32_e32 v38, v22 1007; DAGISEL10-NEXT: v_mov_b32_e32 v37, v21 1008; DAGISEL10-NEXT: v_mov_b32_e32 v36, v20 1009; DAGISEL10-NEXT: v_mov_b32_e32 v35, v19 1010; DAGISEL10-NEXT: v_mov_b32_e32 v34, v18 1011; DAGISEL10-NEXT: v_mov_b32_e32 v33, v17 1012; DAGISEL10-NEXT: v_mov_b32_e32 v32, v16 1013; DAGISEL10-NEXT: v_mov_b32_e32 v31, v15 1014; DAGISEL10-NEXT: v_mov_b32_e32 v30, v14 1015; DAGISEL10-NEXT: v_mov_b32_e32 v29, v13 1016; DAGISEL10-NEXT: v_mov_b32_e32 v28, v12 1017; DAGISEL10-NEXT: v_mov_b32_e32 v27, v11 1018; DAGISEL10-NEXT: v_mov_b32_e32 v26, v10 1019; DAGISEL10-NEXT: v_mov_b32_e32 v25, v9 1020; DAGISEL10-NEXT: v_mov_b32_e32 v24, v8 1021; DAGISEL10-NEXT: s_mov_b32 exec_lo, s6 1022; DAGISEL10-NEXT: s_mov_b32 s9, s4 1023; DAGISEL10-NEXT: s_mov_b32 s8, s3 1024; DAGISEL10-NEXT: s_mov_b32 s4, s2 1025; DAGISEL10-NEXT: s_mov_b32 s6, s1 1026; DAGISEL10-NEXT: s_mov_b32 s7, s0 1027; DAGISEL10-NEXT: s_and_saveexec_b32 s10, s11 1028; DAGISEL10-NEXT: s_cbranch_execz .LBB5_2 1029; DAGISEL10-NEXT: ; %bb.1: ; %shader 1030; DAGISEL10-NEXT: s_or_saveexec_b32 s11, -1 1031; DAGISEL10-NEXT: s_getpc_b64 s[0:1] 1032; DAGISEL10-NEXT: s_add_u32 s0, s0, write_v0_v15@gotpcrel32@lo+4 1033; DAGISEL10-NEXT: s_addc_u32 s1, s1, write_v0_v15@gotpcrel32@hi+12 1034; DAGISEL10-NEXT: v_mov_b32_e32 v0, v24 1035; DAGISEL10-NEXT: s_load_dwordx2 s[12:13], s[0:1], 0x0 1036; DAGISEL10-NEXT: v_mov_b32_e32 v1, v25 1037; DAGISEL10-NEXT: v_mov_b32_e32 v2, v26 1038; DAGISEL10-NEXT: v_mov_b32_e32 v3, v27 1039; DAGISEL10-NEXT: v_mov_b32_e32 v4, v28 1040; DAGISEL10-NEXT: v_mov_b32_e32 v5, v29 1041; DAGISEL10-NEXT: v_mov_b32_e32 v6, v30 1042; DAGISEL10-NEXT: v_mov_b32_e32 v7, v31 1043; DAGISEL10-NEXT: v_mov_b32_e32 v8, v32 1044; DAGISEL10-NEXT: v_mov_b32_e32 v9, v33 1045; DAGISEL10-NEXT: v_mov_b32_e32 v10, v34 1046; DAGISEL10-NEXT: v_mov_b32_e32 v11, v35 1047; DAGISEL10-NEXT: v_mov_b32_e32 v12, v36 1048; DAGISEL10-NEXT: v_mov_b32_e32 v13, v37 1049; DAGISEL10-NEXT: v_mov_b32_e32 v14, v38 1050; DAGISEL10-NEXT: v_mov_b32_e32 v15, v39 1051; DAGISEL10-NEXT: s_mov_b64 s[0:1], s[48:49] 1052; DAGISEL10-NEXT: s_mov_b64 s[2:3], s[50:51] 1053; DAGISEL10-NEXT: s_waitcnt lgkmcnt(0) 1054; DAGISEL10-NEXT: s_swappc_b64 s[30:31], s[12:13] 1055; DAGISEL10-NEXT: v_mov_b32_e32 v40, v0 1056; DAGISEL10-NEXT: v_mov_b32_e32 v41, v1 1057; DAGISEL10-NEXT: v_mov_b32_e32 v42, v2 1058; DAGISEL10-NEXT: v_mov_b32_e32 v43, v3 1059; DAGISEL10-NEXT: v_mov_b32_e32 v44, v4 1060; DAGISEL10-NEXT: v_mov_b32_e32 v45, v5 1061; DAGISEL10-NEXT: v_mov_b32_e32 v46, v6 1062; DAGISEL10-NEXT: v_mov_b32_e32 v47, v7 1063; DAGISEL10-NEXT: v_mov_b32_e32 v48, v8 1064; DAGISEL10-NEXT: v_mov_b32_e32 v49, v9 1065; DAGISEL10-NEXT: v_mov_b32_e32 v50, v10 1066; DAGISEL10-NEXT: v_mov_b32_e32 v51, v11 1067; DAGISEL10-NEXT: v_mov_b32_e32 v52, v12 1068; DAGISEL10-NEXT: v_mov_b32_e32 v53, v13 1069; DAGISEL10-NEXT: v_mov_b32_e32 v54, v14 1070; DAGISEL10-NEXT: v_mov_b32_e32 v55, v15 1071; DAGISEL10-NEXT: s_mov_b32 exec_lo, s11 1072; DAGISEL10-NEXT: v_mov_b32_e32 v24, v40 1073; DAGISEL10-NEXT: v_mov_b32_e32 v25, v41 1074; DAGISEL10-NEXT: v_mov_b32_e32 v26, v42 1075; DAGISEL10-NEXT: v_mov_b32_e32 v27, v43 1076; DAGISEL10-NEXT: v_mov_b32_e32 v28, v44 1077; DAGISEL10-NEXT: v_mov_b32_e32 v29, v45 1078; DAGISEL10-NEXT: v_mov_b32_e32 v30, v46 1079; DAGISEL10-NEXT: v_mov_b32_e32 v31, v47 1080; DAGISEL10-NEXT: v_mov_b32_e32 v32, v48 1081; DAGISEL10-NEXT: v_mov_b32_e32 v33, v49 1082; DAGISEL10-NEXT: v_mov_b32_e32 v34, v50 1083; DAGISEL10-NEXT: v_mov_b32_e32 v35, v51 1084; DAGISEL10-NEXT: v_mov_b32_e32 v36, v52 1085; DAGISEL10-NEXT: v_mov_b32_e32 v37, v53 1086; DAGISEL10-NEXT: v_mov_b32_e32 v38, v54 1087; DAGISEL10-NEXT: v_mov_b32_e32 v39, v55 1088; DAGISEL10-NEXT: .LBB5_2: ; %tail 1089; DAGISEL10-NEXT: s_or_b32 exec_lo, exec_lo, s10 1090; DAGISEL10-NEXT: v_mov_b32_e32 v8, v24 1091; DAGISEL10-NEXT: v_mov_b32_e32 v9, v25 1092; DAGISEL10-NEXT: v_mov_b32_e32 v10, v26 1093; DAGISEL10-NEXT: v_mov_b32_e32 v11, v27 1094; DAGISEL10-NEXT: v_mov_b32_e32 v12, v28 1095; DAGISEL10-NEXT: v_mov_b32_e32 v13, v29 1096; DAGISEL10-NEXT: v_mov_b32_e32 v14, v30 1097; DAGISEL10-NEXT: v_mov_b32_e32 v15, v31 1098; DAGISEL10-NEXT: v_mov_b32_e32 v16, v32 1099; DAGISEL10-NEXT: v_mov_b32_e32 v17, v33 1100; DAGISEL10-NEXT: v_mov_b32_e32 v18, v34 1101; DAGISEL10-NEXT: v_mov_b32_e32 v19, v35 1102; DAGISEL10-NEXT: v_mov_b32_e32 v20, v36 1103; DAGISEL10-NEXT: v_mov_b32_e32 v21, v37 1104; DAGISEL10-NEXT: v_mov_b32_e32 v22, v38 1105; DAGISEL10-NEXT: v_mov_b32_e32 v23, v39 1106; DAGISEL10-NEXT: s_mov_b32 s0, s7 1107; DAGISEL10-NEXT: s_mov_b32 s1, s6 1108; DAGISEL10-NEXT: s_mov_b32 s2, s4 1109; DAGISEL10-NEXT: s_mov_b32 exec_lo, s5 1110; DAGISEL10-NEXT: s_setpc_b64 s[8:9] 1111entry: 1112 %entry_exec = call i1 @llvm.amdgcn.init.whole.wave() 1113 br i1 %entry_exec, label %shader, label %tail 1114 1115shader: 1116 %v0.15 = call amdgpu_gfx <16 x i32> @write_v0_v15(<16 x i32> %vgpr) 1117 %vgpr.wwm = call <16 x i32> @llvm.amdgcn.strict.wwm.v16i32(<16 x i32> %v0.15) 1118 1119 br label %tail 1120 1121tail: 1122 %vgpr.args = phi <16 x i32> [%vgpr, %entry], [%vgpr.wwm, %shader] 1123 call void(ptr, i32, <3 x i32>, <16 x i32>, i32, ...) @llvm.amdgcn.cs.chain(ptr %callee, i32 %exec, <3 x i32> inreg %sgpr, <16 x i32> %vgpr.args, i32 0) 1124 unreachable 1125} 1126 1127declare amdgpu_gfx <16 x i32> @write_v0_v15(<16 x i32>) 1128