1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 2; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX8DAGISEL %s 3; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX8GISEL %s 4; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9DAGISEL %s 5; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9GISEL %s 6; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -global-isel=0 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10DAGISEL,GFX1064DAGISEL %s 7; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -global-isel=1 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10GISEL,GFX1064GISEL %s 8; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10DAGISEL,GFX1032DAGISEL %s 9; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10GISEL,GFX1032GISEL %s 10; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11DAGISEL,GFX1164DAGISEL %s 11; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11GISEL,GFX1164GISEL %s 12; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11DAGISEL,GFX1132DAGISEL %s 13; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11GISEL,GFX1132GISEL %s 14 15declare i32 @llvm.amdgcn.wave.reduce.umax.i32(i32, i32 immarg) 16declare i32 @llvm.amdgcn.workitem.id.x() 17 18define amdgpu_kernel void @uniform_value(ptr addrspace(1) %out, i32 %in) { 19; GFX8DAGISEL-LABEL: uniform_value: 20; GFX8DAGISEL: ; %bb.0: ; %entry 21; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 22; GFX8DAGISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 23; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 24; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 25; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 26; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2 27; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2 28; GFX8DAGISEL-NEXT: s_endpgm 29; 30; GFX8GISEL-LABEL: uniform_value: 31; GFX8GISEL: ; %bb.0: ; %entry 32; GFX8GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 33; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 34; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) 35; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s2 36; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 37; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 38; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2 39; GFX8GISEL-NEXT: s_endpgm 40; 41; GFX9DAGISEL-LABEL: uniform_value: 42; GFX9DAGISEL: ; %bb.0: ; %entry 43; GFX9DAGISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 44; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 45; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 46; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 47; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2 48; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] 49; GFX9DAGISEL-NEXT: s_endpgm 50; 51; GFX9GISEL-LABEL: uniform_value: 52; GFX9GISEL: ; %bb.0: ; %entry 53; GFX9GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 54; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 55; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 56; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) 57; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2 58; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1] 59; GFX9GISEL-NEXT: s_endpgm 60; 61; GFX10DAGISEL-LABEL: uniform_value: 62; GFX10DAGISEL: ; %bb.0: ; %entry 63; GFX10DAGISEL-NEXT: s_clause 0x1 64; GFX10DAGISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 65; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 66; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0 67; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 68; GFX10DAGISEL-NEXT: v_mov_b32_e32 v1, s2 69; GFX10DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] 70; GFX10DAGISEL-NEXT: s_endpgm 71; 72; GFX10GISEL-LABEL: uniform_value: 73; GFX10GISEL: ; %bb.0: ; %entry 74; GFX10GISEL-NEXT: s_clause 0x1 75; GFX10GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 76; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 77; GFX10GISEL-NEXT: v_mov_b32_e32 v1, 0 78; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0) 79; GFX10GISEL-NEXT: v_mov_b32_e32 v0, s2 80; GFX10GISEL-NEXT: global_store_dword v1, v0, s[0:1] 81; GFX10GISEL-NEXT: s_endpgm 82; 83; GFX1164DAGISEL-LABEL: uniform_value: 84; GFX1164DAGISEL: ; %bb.0: ; %entry 85; GFX1164DAGISEL-NEXT: s_clause 0x1 86; GFX1164DAGISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c 87; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 88; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 89; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 90; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2 91; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] 92; GFX1164DAGISEL-NEXT: s_endpgm 93; 94; GFX1164GISEL-LABEL: uniform_value: 95; GFX1164GISEL: ; %bb.0: ; %entry 96; GFX1164GISEL-NEXT: s_clause 0x1 97; GFX1164GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c 98; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 99; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 100; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) 101; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2 102; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 103; GFX1164GISEL-NEXT: s_endpgm 104; 105; GFX1132DAGISEL-LABEL: uniform_value: 106; GFX1132DAGISEL: ; %bb.0: ; %entry 107; GFX1132DAGISEL-NEXT: s_clause 0x1 108; GFX1132DAGISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c 109; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 110; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 111; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 112; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] 113; GFX1132DAGISEL-NEXT: s_endpgm 114; 115; GFX1132GISEL-LABEL: uniform_value: 116; GFX1132GISEL: ; %bb.0: ; %entry 117; GFX1132GISEL-NEXT: s_clause 0x1 118; GFX1132GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c 119; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 120; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) 121; GFX1132GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 122; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 123; GFX1132GISEL-NEXT: s_endpgm 124entry: 125 %result = call i32 @llvm.amdgcn.wave.reduce.umax.i32(i32 %in, i32 1) 126 store i32 %result, ptr addrspace(1) %out 127 ret void 128} 129 130define amdgpu_kernel void @const_value(ptr addrspace(1) %out) { 131; GFX8DAGISEL-LABEL: const_value: 132; GFX8DAGISEL: ; %bb.0: ; %entry 133; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 134; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, 0x7b 135; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 136; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 137; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 138; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2 139; GFX8DAGISEL-NEXT: s_endpgm 140; 141; GFX8GISEL-LABEL: const_value: 142; GFX8GISEL: ; %bb.0: ; %entry 143; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 144; GFX8GISEL-NEXT: v_mov_b32_e32 v2, 0x7b 145; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) 146; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 147; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 148; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2 149; GFX8GISEL-NEXT: s_endpgm 150; 151; GFX9DAGISEL-LABEL: const_value: 152; GFX9DAGISEL: ; %bb.0: ; %entry 153; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 154; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 155; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b 156; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 157; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] 158; GFX9DAGISEL-NEXT: s_endpgm 159; 160; GFX9GISEL-LABEL: const_value: 161; GFX9GISEL: ; %bb.0: ; %entry 162; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 163; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0x7b 164; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 165; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) 166; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1] 167; GFX9GISEL-NEXT: s_endpgm 168; 169; GFX10DAGISEL-LABEL: const_value: 170; GFX10DAGISEL: ; %bb.0: ; %entry 171; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 172; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0 173; GFX10DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b 174; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 175; GFX10DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] 176; GFX10DAGISEL-NEXT: s_endpgm 177; 178; GFX10GISEL-LABEL: const_value: 179; GFX10GISEL: ; %bb.0: ; %entry 180; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 181; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0x7b 182; GFX10GISEL-NEXT: v_mov_b32_e32 v1, 0 183; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0) 184; GFX10GISEL-NEXT: global_store_dword v1, v0, s[0:1] 185; GFX10GISEL-NEXT: s_endpgm 186; 187; GFX1164DAGISEL-LABEL: const_value: 188; GFX1164DAGISEL: ; %bb.0: ; %entry 189; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 190; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 191; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b 192; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 193; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] 194; GFX1164DAGISEL-NEXT: s_endpgm 195; 196; GFX1164GISEL-LABEL: const_value: 197; GFX1164GISEL: ; %bb.0: ; %entry 198; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 199; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, 0x7b 200; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 201; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) 202; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 203; GFX1164GISEL-NEXT: s_endpgm 204; 205; GFX1132DAGISEL-LABEL: const_value: 206; GFX1132DAGISEL: ; %bb.0: ; %entry 207; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 208; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7b 209; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 210; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] 211; GFX1132DAGISEL-NEXT: s_endpgm 212; 213; GFX1132GISEL-LABEL: const_value: 214; GFX1132GISEL: ; %bb.0: ; %entry 215; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 216; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, 0x7b :: v_dual_mov_b32 v1, 0 217; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) 218; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 219; GFX1132GISEL-NEXT: s_endpgm 220entry: 221 %result = call i32 @llvm.amdgcn.wave.reduce.umax.i32(i32 123, i32 1) 222 store i32 %result, ptr addrspace(1) %out 223 ret void 224} 225 226define amdgpu_kernel void @poison_value(ptr addrspace(1) %out, i32 %in) { 227; GFX8DAGISEL-LABEL: poison_value: 228; GFX8DAGISEL: ; %bb.0: ; %entry 229; GFX8DAGISEL-NEXT: s_endpgm 230; 231; GFX8GISEL-LABEL: poison_value: 232; GFX8GISEL: ; %bb.0: ; %entry 233; GFX8GISEL-NEXT: s_endpgm 234; 235; GFX9DAGISEL-LABEL: poison_value: 236; GFX9DAGISEL: ; %bb.0: ; %entry 237; GFX9DAGISEL-NEXT: s_endpgm 238; 239; GFX9GISEL-LABEL: poison_value: 240; GFX9GISEL: ; %bb.0: ; %entry 241; GFX9GISEL-NEXT: s_endpgm 242; 243; GFX10DAGISEL-LABEL: poison_value: 244; GFX10DAGISEL: ; %bb.0: ; %entry 245; GFX10DAGISEL-NEXT: s_endpgm 246; 247; GFX10GISEL-LABEL: poison_value: 248; GFX10GISEL: ; %bb.0: ; %entry 249; GFX10GISEL-NEXT: s_endpgm 250; 251; GFX11DAGISEL-LABEL: poison_value: 252; GFX11DAGISEL: ; %bb.0: ; %entry 253; GFX11DAGISEL-NEXT: s_endpgm 254; 255; GFX11GISEL-LABEL: poison_value: 256; GFX11GISEL: ; %bb.0: ; %entry 257; GFX11GISEL-NEXT: s_endpgm 258entry: 259 %result = call i32 @llvm.amdgcn.wave.reduce.umax.i32(i32 poison, i32 1) 260 store i32 %result, ptr addrspace(1) %out 261 ret void 262} 263 264define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out, i32 %in) { 265; GFX8DAGISEL-LABEL: divergent_value: 266; GFX8DAGISEL: ; %bb.0: ; %entry 267; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 268; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec 269; GFX8DAGISEL-NEXT: s_mov_b32 s4, 0 270; GFX8DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 271; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] 272; GFX8DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 273; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 274; GFX8DAGISEL-NEXT: s_max_u32 s4, s4, s6 275; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 276; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 277; GFX8DAGISEL-NEXT: ; %bb.2: 278; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 279; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 280; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 281; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s4 282; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2 283; GFX8DAGISEL-NEXT: s_endpgm 284; 285; GFX8GISEL-LABEL: divergent_value: 286; GFX8GISEL: ; %bb.0: ; %entry 287; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 288; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec 289; GFX8GISEL-NEXT: s_mov_b32 s4, 0 290; GFX8GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 291; GFX8GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] 292; GFX8GISEL-NEXT: v_readlane_b32 s6, v0, s5 293; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s5 294; GFX8GISEL-NEXT: s_max_u32 s4, s4, s6 295; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 296; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB3_1 297; GFX8GISEL-NEXT: ; %bb.2: 298; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) 299; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 300; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s4 301; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 302; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2 303; GFX8GISEL-NEXT: s_endpgm 304; 305; GFX9DAGISEL-LABEL: divergent_value: 306; GFX9DAGISEL: ; %bb.0: ; %entry 307; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 308; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0 309; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec 310; GFX9DAGISEL-NEXT: s_mov_b32 s4, 0 311; GFX9DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 312; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] 313; GFX9DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 314; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 315; GFX9DAGISEL-NEXT: s_max_u32 s4, s4, s6 316; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 317; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 318; GFX9DAGISEL-NEXT: ; %bb.2: 319; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4 320; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 321; GFX9DAGISEL-NEXT: global_store_dword v1, v0, s[0:1] 322; GFX9DAGISEL-NEXT: s_endpgm 323; 324; GFX9GISEL-LABEL: divergent_value: 325; GFX9GISEL: ; %bb.0: ; %entry 326; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 327; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec 328; GFX9GISEL-NEXT: s_mov_b32 s4, 0 329; GFX9GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 330; GFX9GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] 331; GFX9GISEL-NEXT: v_readlane_b32 s6, v0, s5 332; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s5 333; GFX9GISEL-NEXT: s_max_u32 s4, s4, s6 334; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 335; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB3_1 336; GFX9GISEL-NEXT: ; %bb.2: 337; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4 338; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 339; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) 340; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1] 341; GFX9GISEL-NEXT: s_endpgm 342; 343; GFX1064DAGISEL-LABEL: divergent_value: 344; GFX1064DAGISEL: ; %bb.0: ; %entry 345; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 346; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0 347; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec 348; GFX1064DAGISEL-NEXT: s_mov_b32 s4, 0 349; GFX1064DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 350; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] 351; GFX1064DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 352; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 353; GFX1064DAGISEL-NEXT: s_max_u32 s4, s4, s6 354; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 355; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 356; GFX1064DAGISEL-NEXT: ; %bb.2: 357; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4 358; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 359; GFX1064DAGISEL-NEXT: global_store_dword v1, v0, s[0:1] 360; GFX1064DAGISEL-NEXT: s_endpgm 361; 362; GFX1064GISEL-LABEL: divergent_value: 363; GFX1064GISEL: ; %bb.0: ; %entry 364; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 365; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec 366; GFX1064GISEL-NEXT: s_mov_b32 s4, 0 367; GFX1064GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 368; GFX1064GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] 369; GFX1064GISEL-NEXT: v_readlane_b32 s6, v0, s5 370; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s5 371; GFX1064GISEL-NEXT: s_max_u32 s4, s4, s6 372; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 373; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB3_1 374; GFX1064GISEL-NEXT: ; %bb.2: 375; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s4 376; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0 377; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) 378; GFX1064GISEL-NEXT: global_store_dword v1, v0, s[0:1] 379; GFX1064GISEL-NEXT: s_endpgm 380; 381; GFX1032DAGISEL-LABEL: divergent_value: 382; GFX1032DAGISEL: ; %bb.0: ; %entry 383; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 384; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0 385; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo 386; GFX1032DAGISEL-NEXT: s_mov_b32 s2, 0 387; GFX1032DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 388; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s4, s3 389; GFX1032DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 390; GFX1032DAGISEL-NEXT: s_bitset0_b32 s3, s4 391; GFX1032DAGISEL-NEXT: s_max_u32 s2, s2, s5 392; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 393; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 394; GFX1032DAGISEL-NEXT: ; %bb.2: 395; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2 396; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 397; GFX1032DAGISEL-NEXT: global_store_dword v1, v0, s[0:1] 398; GFX1032DAGISEL-NEXT: s_endpgm 399; 400; GFX1032GISEL-LABEL: divergent_value: 401; GFX1032GISEL: ; %bb.0: ; %entry 402; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 403; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo 404; GFX1032GISEL-NEXT: s_mov_b32 s2, 0 405; GFX1032GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 406; GFX1032GISEL-NEXT: s_ff1_i32_b32 s4, s3 407; GFX1032GISEL-NEXT: v_readlane_b32 s5, v0, s4 408; GFX1032GISEL-NEXT: s_bitset0_b32 s3, s4 409; GFX1032GISEL-NEXT: s_max_u32 s2, s2, s5 410; GFX1032GISEL-NEXT: s_cmp_lg_u32 s3, 0 411; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB3_1 412; GFX1032GISEL-NEXT: ; %bb.2: 413; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 414; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 415; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) 416; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[0:1] 417; GFX1032GISEL-NEXT: s_endpgm 418; 419; GFX1164DAGISEL-LABEL: divergent_value: 420; GFX1164DAGISEL: ; %bb.0: ; %entry 421; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 422; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0 423; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 424; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec 425; GFX1164DAGISEL-NEXT: s_mov_b32 s4, 0 426; GFX1164DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 427; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] 428; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 429; GFX1164DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 430; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 431; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 432; GFX1164DAGISEL-NEXT: s_max_u32 s4, s4, s6 433; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 434; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 435; GFX1164DAGISEL-NEXT: ; %bb.2: 436; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4 437; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 438; GFX1164DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1] 439; GFX1164DAGISEL-NEXT: s_endpgm 440; 441; GFX1164GISEL-LABEL: divergent_value: 442; GFX1164GISEL: ; %bb.0: ; %entry 443; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 444; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 445; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec 446; GFX1164GISEL-NEXT: s_mov_b32 s4, 0 447; GFX1164GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 448; GFX1164GISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] 449; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 450; GFX1164GISEL-NEXT: v_readlane_b32 s6, v0, s5 451; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s5 452; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 453; GFX1164GISEL-NEXT: s_max_u32 s4, s4, s6 454; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 455; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB3_1 456; GFX1164GISEL-NEXT: ; %bb.2: 457; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s4 458; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 459; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) 460; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 461; GFX1164GISEL-NEXT: s_endpgm 462; 463; GFX1132DAGISEL-LABEL: divergent_value: 464; GFX1132DAGISEL: ; %bb.0: ; %entry 465; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 466; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0 467; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo 468; GFX1132DAGISEL-NEXT: s_mov_b32 s2, 0 469; GFX1132DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 470; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s4, s3 471; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 472; GFX1132DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 473; GFX1132DAGISEL-NEXT: s_bitset0_b32 s3, s4 474; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 475; GFX1132DAGISEL-NEXT: s_max_u32 s2, s2, s5 476; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 477; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 478; GFX1132DAGISEL-NEXT: ; %bb.2: 479; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2 480; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 481; GFX1132DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1] 482; GFX1132DAGISEL-NEXT: s_endpgm 483; 484; GFX1132GISEL-LABEL: divergent_value: 485; GFX1132GISEL: ; %bb.0: ; %entry 486; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 487; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 488; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo 489; GFX1132GISEL-NEXT: s_mov_b32 s2, 0 490; GFX1132GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 491; GFX1132GISEL-NEXT: s_ctz_i32_b32 s4, s3 492; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 493; GFX1132GISEL-NEXT: v_readlane_b32 s5, v0, s4 494; GFX1132GISEL-NEXT: s_bitset0_b32 s3, s4 495; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 496; GFX1132GISEL-NEXT: s_max_u32 s2, s2, s5 497; GFX1132GISEL-NEXT: s_cmp_lg_u32 s3, 0 498; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB3_1 499; GFX1132GISEL-NEXT: ; %bb.2: 500; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0 501; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) 502; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 503; GFX1132GISEL-NEXT: s_endpgm 504entry: 505 %id.x = call i32 @llvm.amdgcn.workitem.id.x() 506 %result = call i32 @llvm.amdgcn.wave.reduce.umax.i32(i32 %id.x, i32 1) 507 store i32 %result, ptr addrspace(1) %out 508 ret void 509} 510 511define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { 512; GFX8DAGISEL-LABEL: divergent_cfg: 513; GFX8DAGISEL: ; %bb.0: ; %entry 514; GFX8DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0 515; GFX8DAGISEL-NEXT: ; implicit-def: $sgpr2 516; GFX8DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc 517; GFX8DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] 518; GFX8DAGISEL-NEXT: ; %bb.1: ; %else 519; GFX8DAGISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 520; GFX8DAGISEL-NEXT: ; implicit-def: $vgpr0 521; GFX8DAGISEL-NEXT: ; %bb.2: ; %Flow 522; GFX8DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] 523; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 524; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s2 525; GFX8DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] 526; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB4_6 527; GFX8DAGISEL-NEXT: ; %bb.3: ; %if 528; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec 529; GFX8DAGISEL-NEXT: s_mov_b32 s6, 0 530; GFX8DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 531; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] 532; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 533; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 534; GFX8DAGISEL-NEXT: s_max_u32 s6, s6, s8 535; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 536; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 537; GFX8DAGISEL-NEXT: ; %bb.5: 538; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s6 539; GFX8DAGISEL-NEXT: .LBB4_6: ; %endif 540; GFX8DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] 541; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 542; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 543; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1 544; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0 545; GFX8DAGISEL-NEXT: flat_store_dword v[2:3], v1 546; GFX8DAGISEL-NEXT: s_endpgm 547; 548; GFX8GISEL-LABEL: divergent_cfg: 549; GFX8GISEL: ; %bb.0: ; %entry 550; GFX8GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0 551; GFX8GISEL-NEXT: ; implicit-def: $sgpr6 552; GFX8GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc 553; GFX8GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] 554; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_2 555; GFX8GISEL-NEXT: ; %bb.1: ; %else 556; GFX8GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 557; GFX8GISEL-NEXT: ; implicit-def: $vgpr0 558; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) 559; GFX8GISEL-NEXT: s_mov_b32 s6, s2 560; GFX8GISEL-NEXT: .LBB4_2: ; %Flow 561; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] 562; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_5 563; GFX8GISEL-NEXT: ; %bb.3: ; %if 564; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec 565; GFX8GISEL-NEXT: s_mov_b32 s6, 0 566; GFX8GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 567; GFX8GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] 568; GFX8GISEL-NEXT: v_readlane_b32 s8, v0, s7 569; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s7 570; GFX8GISEL-NEXT: s_max_u32 s6, s6, s8 571; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 572; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_4 573; GFX8GISEL-NEXT: .LBB4_5: ; %endif 574; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[0:1] 575; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 576; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s6 577; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) 578; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 579; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 580; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2 581; GFX8GISEL-NEXT: s_endpgm 582; 583; GFX9DAGISEL-LABEL: divergent_cfg: 584; GFX9DAGISEL: ; %bb.0: ; %entry 585; GFX9DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0 586; GFX9DAGISEL-NEXT: ; implicit-def: $sgpr2 587; GFX9DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc 588; GFX9DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] 589; GFX9DAGISEL-NEXT: ; %bb.1: ; %else 590; GFX9DAGISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 591; GFX9DAGISEL-NEXT: ; implicit-def: $vgpr0 592; GFX9DAGISEL-NEXT: ; %bb.2: ; %Flow 593; GFX9DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] 594; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 595; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2 596; GFX9DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] 597; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB4_6 598; GFX9DAGISEL-NEXT: ; %bb.3: ; %if 599; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec 600; GFX9DAGISEL-NEXT: s_mov_b32 s6, 0 601; GFX9DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 602; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] 603; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 604; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 605; GFX9DAGISEL-NEXT: s_max_u32 s6, s6, s8 606; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 607; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 608; GFX9DAGISEL-NEXT: ; %bb.5: 609; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s6 610; GFX9DAGISEL-NEXT: .LBB4_6: ; %endif 611; GFX9DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] 612; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 613; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 614; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 615; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] 616; GFX9DAGISEL-NEXT: s_endpgm 617; 618; GFX9GISEL-LABEL: divergent_cfg: 619; GFX9GISEL: ; %bb.0: ; %entry 620; GFX9GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0 621; GFX9GISEL-NEXT: ; implicit-def: $sgpr6 622; GFX9GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc 623; GFX9GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] 624; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_2 625; GFX9GISEL-NEXT: ; %bb.1: ; %else 626; GFX9GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 627; GFX9GISEL-NEXT: ; implicit-def: $vgpr0 628; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) 629; GFX9GISEL-NEXT: s_mov_b32 s6, s2 630; GFX9GISEL-NEXT: .LBB4_2: ; %Flow 631; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] 632; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_5 633; GFX9GISEL-NEXT: ; %bb.3: ; %if 634; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec 635; GFX9GISEL-NEXT: s_mov_b32 s6, 0 636; GFX9GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 637; GFX9GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] 638; GFX9GISEL-NEXT: v_readlane_b32 s8, v0, s7 639; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s7 640; GFX9GISEL-NEXT: s_max_u32 s6, s6, s8 641; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 642; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_4 643; GFX9GISEL-NEXT: .LBB4_5: ; %endif 644; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[0:1] 645; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 646; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6 647; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 648; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) 649; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1] 650; GFX9GISEL-NEXT: s_endpgm 651; 652; GFX1064DAGISEL-LABEL: divergent_cfg: 653; GFX1064DAGISEL: ; %bb.0: ; %entry 654; GFX1064DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0 655; GFX1064DAGISEL-NEXT: ; implicit-def: $sgpr2 656; GFX1064DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc 657; GFX1064DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] 658; GFX1064DAGISEL-NEXT: ; %bb.1: ; %else 659; GFX1064DAGISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 660; GFX1064DAGISEL-NEXT: ; implicit-def: $vgpr0 661; GFX1064DAGISEL-NEXT: ; %bb.2: ; %Flow 662; GFX1064DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] 663; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 664; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2 665; GFX1064DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] 666; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB4_6 667; GFX1064DAGISEL-NEXT: ; %bb.3: ; %if 668; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec 669; GFX1064DAGISEL-NEXT: s_mov_b32 s6, 0 670; GFX1064DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 671; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] 672; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 673; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 674; GFX1064DAGISEL-NEXT: s_max_u32 s6, s6, s8 675; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 676; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 677; GFX1064DAGISEL-NEXT: ; %bb.5: 678; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s6 679; GFX1064DAGISEL-NEXT: .LBB4_6: ; %endif 680; GFX1064DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] 681; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 682; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0 683; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 684; GFX1064DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] 685; GFX1064DAGISEL-NEXT: s_endpgm 686; 687; GFX1064GISEL-LABEL: divergent_cfg: 688; GFX1064GISEL: ; %bb.0: ; %entry 689; GFX1064GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0 690; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6 691; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc 692; GFX1064GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] 693; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_2 694; GFX1064GISEL-NEXT: ; %bb.1: ; %else 695; GFX1064GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 696; GFX1064GISEL-NEXT: ; implicit-def: $vgpr0 697; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) 698; GFX1064GISEL-NEXT: s_mov_b32 s6, s2 699; GFX1064GISEL-NEXT: .LBB4_2: ; %Flow 700; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] 701; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_5 702; GFX1064GISEL-NEXT: ; %bb.3: ; %if 703; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec 704; GFX1064GISEL-NEXT: s_mov_b32 s6, 0 705; GFX1064GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 706; GFX1064GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] 707; GFX1064GISEL-NEXT: v_readlane_b32 s8, v0, s7 708; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s7 709; GFX1064GISEL-NEXT: s_max_u32 s6, s6, s8 710; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 711; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_4 712; GFX1064GISEL-NEXT: .LBB4_5: ; %endif 713; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[0:1] 714; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 715; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6 716; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0 717; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) 718; GFX1064GISEL-NEXT: global_store_dword v1, v0, s[0:1] 719; GFX1064GISEL-NEXT: s_endpgm 720; 721; GFX1032DAGISEL-LABEL: divergent_cfg: 722; GFX1032DAGISEL: ; %bb.0: ; %entry 723; GFX1032DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc_lo, 15, v0 724; GFX1032DAGISEL-NEXT: ; implicit-def: $sgpr1 725; GFX1032DAGISEL-NEXT: s_and_saveexec_b32 s0, vcc_lo 726; GFX1032DAGISEL-NEXT: s_xor_b32 s0, exec_lo, s0 727; GFX1032DAGISEL-NEXT: ; %bb.1: ; %else 728; GFX1032DAGISEL-NEXT: s_load_dword s1, s[4:5], 0x2c 729; GFX1032DAGISEL-NEXT: ; implicit-def: $vgpr0 730; GFX1032DAGISEL-NEXT: ; %bb.2: ; %Flow 731; GFX1032DAGISEL-NEXT: s_or_saveexec_b32 s0, s0 732; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 733; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 734; GFX1032DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 735; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB4_6 736; GFX1032DAGISEL-NEXT: ; %bb.3: ; %if 737; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo 738; GFX1032DAGISEL-NEXT: s_mov_b32 s1, 0 739; GFX1032DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 740; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s3, s2 741; GFX1032DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 742; GFX1032DAGISEL-NEXT: s_bitset0_b32 s2, s3 743; GFX1032DAGISEL-NEXT: s_max_u32 s1, s1, s6 744; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 745; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 746; GFX1032DAGISEL-NEXT: ; %bb.5: 747; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 748; GFX1032DAGISEL-NEXT: .LBB4_6: ; %endif 749; GFX1032DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 750; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 751; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0 752; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 753; GFX1032DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] 754; GFX1032DAGISEL-NEXT: s_endpgm 755; 756; GFX1032GISEL-LABEL: divergent_cfg: 757; GFX1032GISEL: ; %bb.0: ; %entry 758; GFX1032GISEL-NEXT: v_cmp_le_u32_e32 vcc_lo, 16, v0 759; GFX1032GISEL-NEXT: ; implicit-def: $sgpr0 760; GFX1032GISEL-NEXT: s_and_saveexec_b32 s1, vcc_lo 761; GFX1032GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 762; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_2 763; GFX1032GISEL-NEXT: ; %bb.1: ; %else 764; GFX1032GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c 765; GFX1032GISEL-NEXT: ; implicit-def: $vgpr0 766; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) 767; GFX1032GISEL-NEXT: s_mov_b32 s0, s0 768; GFX1032GISEL-NEXT: .LBB4_2: ; %Flow 769; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s1, s1 770; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_5 771; GFX1032GISEL-NEXT: ; %bb.3: ; %if 772; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo 773; GFX1032GISEL-NEXT: s_mov_b32 s0, 0 774; GFX1032GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 775; GFX1032GISEL-NEXT: s_ff1_i32_b32 s3, s2 776; GFX1032GISEL-NEXT: v_readlane_b32 s6, v0, s3 777; GFX1032GISEL-NEXT: s_bitset0_b32 s2, s3 778; GFX1032GISEL-NEXT: s_max_u32 s0, s0, s6 779; GFX1032GISEL-NEXT: s_cmp_lg_u32 s2, 0 780; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_4 781; GFX1032GISEL-NEXT: .LBB4_5: ; %endif 782; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 783; GFX1032GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 784; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s0 785; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 786; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) 787; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[2:3] 788; GFX1032GISEL-NEXT: s_endpgm 789; 790; GFX1164DAGISEL-LABEL: divergent_cfg: 791; GFX1164DAGISEL: ; %bb.0: ; %entry 792; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 793; GFX1164DAGISEL-NEXT: s_mov_b64 s[0:1], exec 794; GFX1164DAGISEL-NEXT: ; implicit-def: $sgpr2 795; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 796; GFX1164DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0 797; GFX1164DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] 798; GFX1164DAGISEL-NEXT: ; %bb.1: ; %else 799; GFX1164DAGISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c 800; GFX1164DAGISEL-NEXT: ; implicit-def: $vgpr0 801; GFX1164DAGISEL-NEXT: ; %bb.2: ; %Flow 802; GFX1164DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] 803; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 804; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2 805; GFX1164DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] 806; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB4_6 807; GFX1164DAGISEL-NEXT: ; %bb.3: ; %if 808; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec 809; GFX1164DAGISEL-NEXT: s_mov_b32 s6, 0 810; GFX1164DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 811; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] 812; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 813; GFX1164DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 814; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 815; GFX1164DAGISEL-NEXT: s_max_u32 s6, s6, s8 816; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 817; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 818; GFX1164DAGISEL-NEXT: ; %bb.5: 819; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s6 820; GFX1164DAGISEL-NEXT: .LBB4_6: ; %endif 821; GFX1164DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] 822; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 823; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 824; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 825; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] 826; GFX1164DAGISEL-NEXT: s_endpgm 827; 828; GFX1164GISEL-LABEL: divergent_cfg: 829; GFX1164GISEL: ; %bb.0: ; %entry 830; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 831; GFX1164GISEL-NEXT: s_mov_b64 s[0:1], exec 832; GFX1164GISEL-NEXT: ; implicit-def: $sgpr6 833; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 834; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 835; GFX1164GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] 836; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_2 837; GFX1164GISEL-NEXT: ; %bb.1: ; %else 838; GFX1164GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c 839; GFX1164GISEL-NEXT: ; implicit-def: $vgpr0 840; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) 841; GFX1164GISEL-NEXT: s_mov_b32 s6, s2 842; GFX1164GISEL-NEXT: .LBB4_2: ; %Flow 843; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[0:1], s[0:1] 844; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_5 845; GFX1164GISEL-NEXT: ; %bb.3: ; %if 846; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec 847; GFX1164GISEL-NEXT: s_mov_b32 s6, 0 848; GFX1164GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 849; GFX1164GISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] 850; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 851; GFX1164GISEL-NEXT: v_readlane_b32 s8, v0, s7 852; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s7 853; GFX1164GISEL-NEXT: s_max_u32 s6, s6, s8 854; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 855; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_4 856; GFX1164GISEL-NEXT: .LBB4_5: ; %endif 857; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[0:1] 858; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 859; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6 860; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 861; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) 862; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 863; GFX1164GISEL-NEXT: s_endpgm 864; 865; GFX1132DAGISEL-LABEL: divergent_cfg: 866; GFX1132DAGISEL: ; %bb.0: ; %entry 867; GFX1132DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 868; GFX1132DAGISEL-NEXT: s_mov_b32 s0, exec_lo 869; GFX1132DAGISEL-NEXT: ; implicit-def: $sgpr1 870; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 871; GFX1132DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0 872; GFX1132DAGISEL-NEXT: s_xor_b32 s0, exec_lo, s0 873; GFX1132DAGISEL-NEXT: ; %bb.1: ; %else 874; GFX1132DAGISEL-NEXT: s_load_b32 s1, s[4:5], 0x2c 875; GFX1132DAGISEL-NEXT: ; implicit-def: $vgpr0 876; GFX1132DAGISEL-NEXT: ; %bb.2: ; %Flow 877; GFX1132DAGISEL-NEXT: s_or_saveexec_b32 s0, s0 878; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 879; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 880; GFX1132DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 881; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB4_6 882; GFX1132DAGISEL-NEXT: ; %bb.3: ; %if 883; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo 884; GFX1132DAGISEL-NEXT: s_mov_b32 s1, 0 885; GFX1132DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 886; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 887; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 888; GFX1132DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 889; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3 890; GFX1132DAGISEL-NEXT: s_max_u32 s1, s1, s6 891; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 892; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 893; GFX1132DAGISEL-NEXT: ; %bb.5: 894; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 895; GFX1132DAGISEL-NEXT: .LBB4_6: ; %endif 896; GFX1132DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 897; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 898; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0 899; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 900; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] 901; GFX1132DAGISEL-NEXT: s_endpgm 902; 903; GFX1132GISEL-LABEL: divergent_cfg: 904; GFX1132GISEL: ; %bb.0: ; %entry 905; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 906; GFX1132GISEL-NEXT: s_mov_b32 s1, exec_lo 907; GFX1132GISEL-NEXT: ; implicit-def: $sgpr0 908; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 909; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 910; GFX1132GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 911; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_2 912; GFX1132GISEL-NEXT: ; %bb.1: ; %else 913; GFX1132GISEL-NEXT: s_load_b32 s0, s[4:5], 0x2c 914; GFX1132GISEL-NEXT: ; implicit-def: $vgpr0 915; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) 916; GFX1132GISEL-NEXT: s_mov_b32 s0, s0 917; GFX1132GISEL-NEXT: .LBB4_2: ; %Flow 918; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s1, s1 919; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_5 920; GFX1132GISEL-NEXT: ; %bb.3: ; %if 921; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo 922; GFX1132GISEL-NEXT: s_mov_b32 s0, 0 923; GFX1132GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 924; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2 925; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 926; GFX1132GISEL-NEXT: v_readlane_b32 s6, v0, s3 927; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3 928; GFX1132GISEL-NEXT: s_max_u32 s0, s0, s6 929; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0 930; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_4 931; GFX1132GISEL-NEXT: .LBB4_5: ; %endif 932; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 933; GFX1132GISEL-NEXT: s_load_b64 s[2:3], s[4:5], 0x24 934; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0 935; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) 936; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[2:3] 937; GFX1132GISEL-NEXT: s_endpgm 938entry: 939 %tid = call i32 @llvm.amdgcn.workitem.id.x() 940 %d_cmp = icmp ult i32 %tid, 16 941 br i1 %d_cmp, label %if, label %else 942 943if: 944 %reducedValTid = call i32 @llvm.amdgcn.wave.reduce.umax.i32(i32 %tid, i32 1) 945 br label %endif 946 947else: 948 %reducedValIn = call i32 @llvm.amdgcn.wave.reduce.umax.i32(i32 %in, i32 1) 949 br label %endif 950 951endif: 952 %combine = phi i32 [%reducedValTid, %if], [%reducedValIn, %else] 953 store i32 %combine, ptr addrspace(1) %out 954 ret void 955} 956