1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 2; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX8DAGISEL %s 3; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX8GISEL %s 4; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9DAGISEL %s 5; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9GISEL %s 6; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -global-isel=0 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10DAGISEL,GFX1064DAGISEL %s 7; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -global-isel=1 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10GISEL,GFX1064GISEL %s 8; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10DAGISEL,GFX1032DAGISEL %s 9; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10GISEL,GFX1032GISEL %s 10; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11DAGISEL,GFX1164DAGISEL %s 11; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=+wavefrontsize64 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11GISEL,GFX1164GISEL %s 12; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11DAGISEL,GFX1132DAGISEL %s 13; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11GISEL,GFX1132GISEL %s 14 15 16declare i32 @llvm.amdgcn.wave.reduce.umin.i32(i32, i32 immarg) 17declare i32 @llvm.amdgcn.workitem.id.x() 18 19define amdgpu_kernel void @uniform_value(ptr addrspace(1) %out, i32 %in) { 20; GFX8DAGISEL-LABEL: uniform_value: 21; GFX8DAGISEL: ; %bb.0: ; %entry 22; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 23; GFX8DAGISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 24; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 25; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 26; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 27; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s2 28; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2 29; GFX8DAGISEL-NEXT: s_endpgm 30; 31; GFX8GISEL-LABEL: uniform_value: 32; GFX8GISEL: ; %bb.0: ; %entry 33; GFX8GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 34; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 35; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) 36; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s2 37; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 38; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 39; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2 40; GFX8GISEL-NEXT: s_endpgm 41; 42; GFX9DAGISEL-LABEL: uniform_value: 43; GFX9DAGISEL: ; %bb.0: ; %entry 44; GFX9DAGISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 45; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 46; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 47; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 48; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2 49; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] 50; GFX9DAGISEL-NEXT: s_endpgm 51; 52; GFX9GISEL-LABEL: uniform_value: 53; GFX9GISEL: ; %bb.0: ; %entry 54; GFX9GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 55; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 56; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 57; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) 58; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s2 59; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1] 60; GFX9GISEL-NEXT: s_endpgm 61; 62; GFX10DAGISEL-LABEL: uniform_value: 63; GFX10DAGISEL: ; %bb.0: ; %entry 64; GFX10DAGISEL-NEXT: s_clause 0x1 65; GFX10DAGISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 66; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 67; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0 68; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 69; GFX10DAGISEL-NEXT: v_mov_b32_e32 v1, s2 70; GFX10DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] 71; GFX10DAGISEL-NEXT: s_endpgm 72; 73; GFX10GISEL-LABEL: uniform_value: 74; GFX10GISEL: ; %bb.0: ; %entry 75; GFX10GISEL-NEXT: s_clause 0x1 76; GFX10GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 77; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 78; GFX10GISEL-NEXT: v_mov_b32_e32 v1, 0 79; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0) 80; GFX10GISEL-NEXT: v_mov_b32_e32 v0, s2 81; GFX10GISEL-NEXT: global_store_dword v1, v0, s[0:1] 82; GFX10GISEL-NEXT: s_endpgm 83; 84; GFX1164DAGISEL-LABEL: uniform_value: 85; GFX1164DAGISEL: ; %bb.0: ; %entry 86; GFX1164DAGISEL-NEXT: s_clause 0x1 87; GFX1164DAGISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c 88; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 89; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 90; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 91; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2 92; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] 93; GFX1164DAGISEL-NEXT: s_endpgm 94; 95; GFX1164GISEL-LABEL: uniform_value: 96; GFX1164GISEL: ; %bb.0: ; %entry 97; GFX1164GISEL-NEXT: s_clause 0x1 98; GFX1164GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c 99; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 100; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 101; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) 102; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s2 103; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 104; GFX1164GISEL-NEXT: s_endpgm 105; 106; GFX1132DAGISEL-LABEL: uniform_value: 107; GFX1132DAGISEL: ; %bb.0: ; %entry 108; GFX1132DAGISEL-NEXT: s_clause 0x1 109; GFX1132DAGISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c 110; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 111; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 112; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 113; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] 114; GFX1132DAGISEL-NEXT: s_endpgm 115; 116; GFX1132GISEL-LABEL: uniform_value: 117; GFX1132GISEL: ; %bb.0: ; %entry 118; GFX1132GISEL-NEXT: s_clause 0x1 119; GFX1132GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c 120; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 121; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) 122; GFX1132GISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v0, s2 123; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 124; GFX1132GISEL-NEXT: s_endpgm 125entry: 126 %result = call i32 @llvm.amdgcn.wave.reduce.umin.i32(i32 %in, i32 1) 127 store i32 %result, ptr addrspace(1) %out 128 ret void 129} 130 131define amdgpu_kernel void @const_value(ptr addrspace(1) %out) { 132; GFX8DAGISEL-LABEL: const_value: 133; GFX8DAGISEL: ; %bb.0: ; %entry 134; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 135; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, 0x7b 136; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 137; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 138; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 139; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2 140; GFX8DAGISEL-NEXT: s_endpgm 141; 142; GFX8GISEL-LABEL: const_value: 143; GFX8GISEL: ; %bb.0: ; %entry 144; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 145; GFX8GISEL-NEXT: v_mov_b32_e32 v2, 0x7b 146; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) 147; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 148; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 149; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2 150; GFX8GISEL-NEXT: s_endpgm 151; 152; GFX9DAGISEL-LABEL: const_value: 153; GFX9DAGISEL: ; %bb.0: ; %entry 154; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 155; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 156; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b 157; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 158; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] 159; GFX9DAGISEL-NEXT: s_endpgm 160; 161; GFX9GISEL-LABEL: const_value: 162; GFX9GISEL: ; %bb.0: ; %entry 163; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 164; GFX9GISEL-NEXT: v_mov_b32_e32 v0, 0x7b 165; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 166; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) 167; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1] 168; GFX9GISEL-NEXT: s_endpgm 169; 170; GFX10DAGISEL-LABEL: const_value: 171; GFX10DAGISEL: ; %bb.0: ; %entry 172; GFX10DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 173; GFX10DAGISEL-NEXT: v_mov_b32_e32 v0, 0 174; GFX10DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b 175; GFX10DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 176; GFX10DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] 177; GFX10DAGISEL-NEXT: s_endpgm 178; 179; GFX10GISEL-LABEL: const_value: 180; GFX10GISEL: ; %bb.0: ; %entry 181; GFX10GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 182; GFX10GISEL-NEXT: v_mov_b32_e32 v0, 0x7b 183; GFX10GISEL-NEXT: v_mov_b32_e32 v1, 0 184; GFX10GISEL-NEXT: s_waitcnt lgkmcnt(0) 185; GFX10GISEL-NEXT: global_store_dword v1, v0, s[0:1] 186; GFX10GISEL-NEXT: s_endpgm 187; 188; GFX1164DAGISEL-LABEL: const_value: 189; GFX1164DAGISEL: ; %bb.0: ; %entry 190; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 191; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 192; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0x7b 193; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 194; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] 195; GFX1164DAGISEL-NEXT: s_endpgm 196; 197; GFX1164GISEL-LABEL: const_value: 198; GFX1164GISEL: ; %bb.0: ; %entry 199; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 200; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, 0x7b 201; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 202; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) 203; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 204; GFX1164GISEL-NEXT: s_endpgm 205; 206; GFX1132DAGISEL-LABEL: const_value: 207; GFX1132DAGISEL: ; %bb.0: ; %entry 208; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 209; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, 0x7b 210; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 211; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] 212; GFX1132DAGISEL-NEXT: s_endpgm 213; 214; GFX1132GISEL-LABEL: const_value: 215; GFX1132GISEL: ; %bb.0: ; %entry 216; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 217; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, 0x7b :: v_dual_mov_b32 v1, 0 218; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) 219; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 220; GFX1132GISEL-NEXT: s_endpgm 221entry: 222 %result = call i32 @llvm.amdgcn.wave.reduce.umin.i32(i32 123, i32 1) 223 store i32 %result, ptr addrspace(1) %out 224 ret void 225} 226 227define amdgpu_kernel void @poison_value(ptr addrspace(1) %out) { 228; GFX8DAGISEL-LABEL: poison_value: 229; GFX8DAGISEL: ; %bb.0: ; %entry 230; GFX8DAGISEL-NEXT: s_endpgm 231; 232; GFX8GISEL-LABEL: poison_value: 233; GFX8GISEL: ; %bb.0: ; %entry 234; GFX8GISEL-NEXT: s_endpgm 235; 236; GFX9DAGISEL-LABEL: poison_value: 237; GFX9DAGISEL: ; %bb.0: ; %entry 238; GFX9DAGISEL-NEXT: s_endpgm 239; 240; GFX9GISEL-LABEL: poison_value: 241; GFX9GISEL: ; %bb.0: ; %entry 242; GFX9GISEL-NEXT: s_endpgm 243; 244; GFX10DAGISEL-LABEL: poison_value: 245; GFX10DAGISEL: ; %bb.0: ; %entry 246; GFX10DAGISEL-NEXT: s_endpgm 247; 248; GFX10GISEL-LABEL: poison_value: 249; GFX10GISEL: ; %bb.0: ; %entry 250; GFX10GISEL-NEXT: s_endpgm 251; 252; GFX11DAGISEL-LABEL: poison_value: 253; GFX11DAGISEL: ; %bb.0: ; %entry 254; GFX11DAGISEL-NEXT: s_endpgm 255; 256; GFX11GISEL-LABEL: poison_value: 257; GFX11GISEL: ; %bb.0: ; %entry 258; GFX11GISEL-NEXT: s_endpgm 259entry: 260 %result = call i32 @llvm.amdgcn.wave.reduce.umin.i32(i32 poison, i32 1) 261 store i32 %result, ptr addrspace(1) %out 262 ret void 263} 264 265define amdgpu_kernel void @divergent_value(ptr addrspace(1) %out) { 266; GFX8DAGISEL-LABEL: divergent_value: 267; GFX8DAGISEL: ; %bb.0: ; %entry 268; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 269; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec 270; GFX8DAGISEL-NEXT: s_mov_b32 s4, -1 271; GFX8DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 272; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] 273; GFX8DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 274; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 275; GFX8DAGISEL-NEXT: s_min_u32 s4, s4, s6 276; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 277; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 278; GFX8DAGISEL-NEXT: ; %bb.2: 279; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 280; GFX8DAGISEL-NEXT: v_mov_b32_e32 v0, s0 281; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s1 282; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s4 283; GFX8DAGISEL-NEXT: flat_store_dword v[0:1], v2 284; GFX8DAGISEL-NEXT: s_endpgm 285; 286; GFX8GISEL-LABEL: divergent_value: 287; GFX8GISEL: ; %bb.0: ; %entry 288; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 289; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec 290; GFX8GISEL-NEXT: s_mov_b32 s4, -1 291; GFX8GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 292; GFX8GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] 293; GFX8GISEL-NEXT: v_readlane_b32 s6, v0, s5 294; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s5 295; GFX8GISEL-NEXT: s_min_u32 s4, s4, s6 296; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 297; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB3_1 298; GFX8GISEL-NEXT: ; %bb.2: 299; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) 300; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 301; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s4 302; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 303; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2 304; GFX8GISEL-NEXT: s_endpgm 305; 306; GFX9DAGISEL-LABEL: divergent_value: 307; GFX9DAGISEL: ; %bb.0: ; %entry 308; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 309; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, 0 310; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec 311; GFX9DAGISEL-NEXT: s_mov_b32 s4, -1 312; GFX9DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 313; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] 314; GFX9DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 315; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 316; GFX9DAGISEL-NEXT: s_min_u32 s4, s4, s6 317; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 318; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 319; GFX9DAGISEL-NEXT: ; %bb.2: 320; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, s4 321; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 322; GFX9DAGISEL-NEXT: global_store_dword v1, v0, s[0:1] 323; GFX9DAGISEL-NEXT: s_endpgm 324; 325; GFX9GISEL-LABEL: divergent_value: 326; GFX9GISEL: ; %bb.0: ; %entry 327; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 328; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec 329; GFX9GISEL-NEXT: s_mov_b32 s4, -1 330; GFX9GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 331; GFX9GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] 332; GFX9GISEL-NEXT: v_readlane_b32 s6, v0, s5 333; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s5 334; GFX9GISEL-NEXT: s_min_u32 s4, s4, s6 335; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 336; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB3_1 337; GFX9GISEL-NEXT: ; %bb.2: 338; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s4 339; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 340; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) 341; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1] 342; GFX9GISEL-NEXT: s_endpgm 343; 344; GFX1064DAGISEL-LABEL: divergent_value: 345; GFX1064DAGISEL: ; %bb.0: ; %entry 346; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 347; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, 0 348; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec 349; GFX1064DAGISEL-NEXT: s_mov_b32 s4, -1 350; GFX1064DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 351; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] 352; GFX1064DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 353; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 354; GFX1064DAGISEL-NEXT: s_min_u32 s4, s4, s6 355; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 356; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 357; GFX1064DAGISEL-NEXT: ; %bb.2: 358; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, s4 359; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 360; GFX1064DAGISEL-NEXT: global_store_dword v1, v0, s[0:1] 361; GFX1064DAGISEL-NEXT: s_endpgm 362; 363; GFX1064GISEL-LABEL: divergent_value: 364; GFX1064GISEL: ; %bb.0: ; %entry 365; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 366; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec 367; GFX1064GISEL-NEXT: s_mov_b32 s4, -1 368; GFX1064GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 369; GFX1064GISEL-NEXT: s_ff1_i32_b64 s5, s[2:3] 370; GFX1064GISEL-NEXT: v_readlane_b32 s6, v0, s5 371; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s5 372; GFX1064GISEL-NEXT: s_min_u32 s4, s4, s6 373; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 374; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB3_1 375; GFX1064GISEL-NEXT: ; %bb.2: 376; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s4 377; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0 378; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) 379; GFX1064GISEL-NEXT: global_store_dword v1, v0, s[0:1] 380; GFX1064GISEL-NEXT: s_endpgm 381; 382; GFX1032DAGISEL-LABEL: divergent_value: 383; GFX1032DAGISEL: ; %bb.0: ; %entry 384; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 385; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, 0 386; GFX1032DAGISEL-NEXT: s_mov_b32 s3, exec_lo 387; GFX1032DAGISEL-NEXT: s_mov_b32 s2, -1 388; GFX1032DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 389; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s4, s3 390; GFX1032DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 391; GFX1032DAGISEL-NEXT: s_bitset0_b32 s3, s4 392; GFX1032DAGISEL-NEXT: s_min_u32 s2, s2, s5 393; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 394; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 395; GFX1032DAGISEL-NEXT: ; %bb.2: 396; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, s2 397; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 398; GFX1032DAGISEL-NEXT: global_store_dword v1, v0, s[0:1] 399; GFX1032DAGISEL-NEXT: s_endpgm 400; 401; GFX1032GISEL-LABEL: divergent_value: 402; GFX1032GISEL: ; %bb.0: ; %entry 403; GFX1032GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 404; GFX1032GISEL-NEXT: s_mov_b32 s3, exec_lo 405; GFX1032GISEL-NEXT: s_mov_b32 s2, -1 406; GFX1032GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 407; GFX1032GISEL-NEXT: s_ff1_i32_b32 s4, s3 408; GFX1032GISEL-NEXT: v_readlane_b32 s5, v0, s4 409; GFX1032GISEL-NEXT: s_bitset0_b32 s3, s4 410; GFX1032GISEL-NEXT: s_min_u32 s2, s2, s5 411; GFX1032GISEL-NEXT: s_cmp_lg_u32 s3, 0 412; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB3_1 413; GFX1032GISEL-NEXT: ; %bb.2: 414; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s2 415; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 416; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) 417; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[0:1] 418; GFX1032GISEL-NEXT: s_endpgm 419; 420; GFX1164DAGISEL-LABEL: divergent_value: 421; GFX1164DAGISEL: ; %bb.0: ; %entry 422; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 423; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, 0 424; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 425; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec 426; GFX1164DAGISEL-NEXT: s_mov_b32 s4, -1 427; GFX1164DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 428; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] 429; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 430; GFX1164DAGISEL-NEXT: v_readlane_b32 s6, v0, s5 431; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s5 432; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 433; GFX1164DAGISEL-NEXT: s_min_u32 s4, s4, s6 434; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 435; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 436; GFX1164DAGISEL-NEXT: ; %bb.2: 437; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, s4 438; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 439; GFX1164DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1] 440; GFX1164DAGISEL-NEXT: s_endpgm 441; 442; GFX1164GISEL-LABEL: divergent_value: 443; GFX1164GISEL: ; %bb.0: ; %entry 444; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 445; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 446; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec 447; GFX1164GISEL-NEXT: s_mov_b32 s4, -1 448; GFX1164GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 449; GFX1164GISEL-NEXT: s_ctz_i32_b64 s5, s[2:3] 450; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 451; GFX1164GISEL-NEXT: v_readlane_b32 s6, v0, s5 452; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s5 453; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 454; GFX1164GISEL-NEXT: s_min_u32 s4, s4, s6 455; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 456; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB3_1 457; GFX1164GISEL-NEXT: ; %bb.2: 458; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s4 459; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 460; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) 461; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 462; GFX1164GISEL-NEXT: s_endpgm 463; 464; GFX1132DAGISEL-LABEL: divergent_value: 465; GFX1132DAGISEL: ; %bb.0: ; %entry 466; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 467; GFX1132DAGISEL-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_and_b32 v0, 0x3ff, v0 468; GFX1132DAGISEL-NEXT: s_mov_b32 s3, exec_lo 469; GFX1132DAGISEL-NEXT: s_mov_b32 s2, -1 470; GFX1132DAGISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 471; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s4, s3 472; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 473; GFX1132DAGISEL-NEXT: v_readlane_b32 s5, v0, s4 474; GFX1132DAGISEL-NEXT: s_bitset0_b32 s3, s4 475; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 476; GFX1132DAGISEL-NEXT: s_min_u32 s2, s2, s5 477; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s3, 0 478; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB3_1 479; GFX1132DAGISEL-NEXT: ; %bb.2: 480; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, s2 481; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 482; GFX1132DAGISEL-NEXT: global_store_b32 v1, v0, s[0:1] 483; GFX1132DAGISEL-NEXT: s_endpgm 484; 485; GFX1132GISEL-LABEL: divergent_value: 486; GFX1132GISEL: ; %bb.0: ; %entry 487; GFX1132GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 488; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 489; GFX1132GISEL-NEXT: s_mov_b32 s3, exec_lo 490; GFX1132GISEL-NEXT: s_mov_b32 s2, -1 491; GFX1132GISEL-NEXT: .LBB3_1: ; =>This Inner Loop Header: Depth=1 492; GFX1132GISEL-NEXT: s_ctz_i32_b32 s4, s3 493; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 494; GFX1132GISEL-NEXT: v_readlane_b32 s5, v0, s4 495; GFX1132GISEL-NEXT: s_bitset0_b32 s3, s4 496; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 497; GFX1132GISEL-NEXT: s_min_u32 s2, s2, s5 498; GFX1132GISEL-NEXT: s_cmp_lg_u32 s3, 0 499; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB3_1 500; GFX1132GISEL-NEXT: ; %bb.2: 501; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, 0 502; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) 503; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 504; GFX1132GISEL-NEXT: s_endpgm 505entry: 506 %id.x = call i32 @llvm.amdgcn.workitem.id.x() 507 %result = call i32 @llvm.amdgcn.wave.reduce.umin.i32(i32 %id.x, i32 1) 508 store i32 %result, ptr addrspace(1) %out 509 ret void 510} 511 512define amdgpu_kernel void @divergent_cfg(ptr addrspace(1) %out, i32 %in) { 513; GFX8DAGISEL-LABEL: divergent_cfg: 514; GFX8DAGISEL: ; %bb.0: ; %entry 515; GFX8DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0 516; GFX8DAGISEL-NEXT: ; implicit-def: $sgpr2 517; GFX8DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc 518; GFX8DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] 519; GFX8DAGISEL-NEXT: ; %bb.1: ; %else 520; GFX8DAGISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 521; GFX8DAGISEL-NEXT: ; implicit-def: $vgpr0 522; GFX8DAGISEL-NEXT: ; %bb.2: ; %Flow 523; GFX8DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] 524; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 525; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s2 526; GFX8DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] 527; GFX8DAGISEL-NEXT: s_cbranch_execz .LBB4_6 528; GFX8DAGISEL-NEXT: ; %bb.3: ; %if 529; GFX8DAGISEL-NEXT: s_mov_b64 s[2:3], exec 530; GFX8DAGISEL-NEXT: s_mov_b32 s6, -1 531; GFX8DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 532; GFX8DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] 533; GFX8DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 534; GFX8DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 535; GFX8DAGISEL-NEXT: s_min_u32 s6, s6, s8 536; GFX8DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 537; GFX8DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 538; GFX8DAGISEL-NEXT: ; %bb.5: 539; GFX8DAGISEL-NEXT: v_mov_b32_e32 v1, s6 540; GFX8DAGISEL-NEXT: .LBB4_6: ; %endif 541; GFX8DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] 542; GFX8DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 543; GFX8DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 544; GFX8DAGISEL-NEXT: v_mov_b32_e32 v3, s1 545; GFX8DAGISEL-NEXT: v_mov_b32_e32 v2, s0 546; GFX8DAGISEL-NEXT: flat_store_dword v[2:3], v1 547; GFX8DAGISEL-NEXT: s_endpgm 548; 549; GFX8GISEL-LABEL: divergent_cfg: 550; GFX8GISEL: ; %bb.0: ; %entry 551; GFX8GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0 552; GFX8GISEL-NEXT: ; implicit-def: $sgpr6 553; GFX8GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc 554; GFX8GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] 555; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_2 556; GFX8GISEL-NEXT: ; %bb.1: ; %else 557; GFX8GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 558; GFX8GISEL-NEXT: ; implicit-def: $vgpr0 559; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) 560; GFX8GISEL-NEXT: s_mov_b32 s6, s2 561; GFX8GISEL-NEXT: .LBB4_2: ; %Flow 562; GFX8GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] 563; GFX8GISEL-NEXT: s_cbranch_execz .LBB4_5 564; GFX8GISEL-NEXT: ; %bb.3: ; %if 565; GFX8GISEL-NEXT: s_mov_b64 s[2:3], exec 566; GFX8GISEL-NEXT: s_mov_b32 s6, -1 567; GFX8GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 568; GFX8GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] 569; GFX8GISEL-NEXT: v_readlane_b32 s8, v0, s7 570; GFX8GISEL-NEXT: s_bitset0_b64 s[2:3], s7 571; GFX8GISEL-NEXT: s_min_u32 s6, s6, s8 572; GFX8GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 573; GFX8GISEL-NEXT: s_cbranch_scc1 .LBB4_4 574; GFX8GISEL-NEXT: .LBB4_5: ; %endif 575; GFX8GISEL-NEXT: s_or_b64 exec, exec, s[0:1] 576; GFX8GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 577; GFX8GISEL-NEXT: v_mov_b32_e32 v2, s6 578; GFX8GISEL-NEXT: s_waitcnt lgkmcnt(0) 579; GFX8GISEL-NEXT: v_mov_b32_e32 v0, s0 580; GFX8GISEL-NEXT: v_mov_b32_e32 v1, s1 581; GFX8GISEL-NEXT: flat_store_dword v[0:1], v2 582; GFX8GISEL-NEXT: s_endpgm 583; 584; GFX9DAGISEL-LABEL: divergent_cfg: 585; GFX9DAGISEL: ; %bb.0: ; %entry 586; GFX9DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0 587; GFX9DAGISEL-NEXT: ; implicit-def: $sgpr2 588; GFX9DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc 589; GFX9DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] 590; GFX9DAGISEL-NEXT: ; %bb.1: ; %else 591; GFX9DAGISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 592; GFX9DAGISEL-NEXT: ; implicit-def: $vgpr0 593; GFX9DAGISEL-NEXT: ; %bb.2: ; %Flow 594; GFX9DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] 595; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 596; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s2 597; GFX9DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] 598; GFX9DAGISEL-NEXT: s_cbranch_execz .LBB4_6 599; GFX9DAGISEL-NEXT: ; %bb.3: ; %if 600; GFX9DAGISEL-NEXT: s_mov_b64 s[2:3], exec 601; GFX9DAGISEL-NEXT: s_mov_b32 s6, -1 602; GFX9DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 603; GFX9DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] 604; GFX9DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 605; GFX9DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 606; GFX9DAGISEL-NEXT: s_min_u32 s6, s6, s8 607; GFX9DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 608; GFX9DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 609; GFX9DAGISEL-NEXT: ; %bb.5: 610; GFX9DAGISEL-NEXT: v_mov_b32_e32 v1, s6 611; GFX9DAGISEL-NEXT: .LBB4_6: ; %endif 612; GFX9DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] 613; GFX9DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 614; GFX9DAGISEL-NEXT: v_mov_b32_e32 v0, 0 615; GFX9DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 616; GFX9DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] 617; GFX9DAGISEL-NEXT: s_endpgm 618; 619; GFX9GISEL-LABEL: divergent_cfg: 620; GFX9GISEL: ; %bb.0: ; %entry 621; GFX9GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0 622; GFX9GISEL-NEXT: ; implicit-def: $sgpr6 623; GFX9GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc 624; GFX9GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] 625; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_2 626; GFX9GISEL-NEXT: ; %bb.1: ; %else 627; GFX9GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 628; GFX9GISEL-NEXT: ; implicit-def: $vgpr0 629; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) 630; GFX9GISEL-NEXT: s_mov_b32 s6, s2 631; GFX9GISEL-NEXT: .LBB4_2: ; %Flow 632; GFX9GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] 633; GFX9GISEL-NEXT: s_cbranch_execz .LBB4_5 634; GFX9GISEL-NEXT: ; %bb.3: ; %if 635; GFX9GISEL-NEXT: s_mov_b64 s[2:3], exec 636; GFX9GISEL-NEXT: s_mov_b32 s6, -1 637; GFX9GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 638; GFX9GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] 639; GFX9GISEL-NEXT: v_readlane_b32 s8, v0, s7 640; GFX9GISEL-NEXT: s_bitset0_b64 s[2:3], s7 641; GFX9GISEL-NEXT: s_min_u32 s6, s6, s8 642; GFX9GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 643; GFX9GISEL-NEXT: s_cbranch_scc1 .LBB4_4 644; GFX9GISEL-NEXT: .LBB4_5: ; %endif 645; GFX9GISEL-NEXT: s_or_b64 exec, exec, s[0:1] 646; GFX9GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 647; GFX9GISEL-NEXT: v_mov_b32_e32 v0, s6 648; GFX9GISEL-NEXT: v_mov_b32_e32 v1, 0 649; GFX9GISEL-NEXT: s_waitcnt lgkmcnt(0) 650; GFX9GISEL-NEXT: global_store_dword v1, v0, s[0:1] 651; GFX9GISEL-NEXT: s_endpgm 652; 653; GFX1064DAGISEL-LABEL: divergent_cfg: 654; GFX1064DAGISEL: ; %bb.0: ; %entry 655; GFX1064DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc, 15, v0 656; GFX1064DAGISEL-NEXT: ; implicit-def: $sgpr2 657; GFX1064DAGISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc 658; GFX1064DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] 659; GFX1064DAGISEL-NEXT: ; %bb.1: ; %else 660; GFX1064DAGISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 661; GFX1064DAGISEL-NEXT: ; implicit-def: $vgpr0 662; GFX1064DAGISEL-NEXT: ; %bb.2: ; %Flow 663; GFX1064DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] 664; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 665; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s2 666; GFX1064DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] 667; GFX1064DAGISEL-NEXT: s_cbranch_execz .LBB4_6 668; GFX1064DAGISEL-NEXT: ; %bb.3: ; %if 669; GFX1064DAGISEL-NEXT: s_mov_b64 s[2:3], exec 670; GFX1064DAGISEL-NEXT: s_mov_b32 s6, -1 671; GFX1064DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 672; GFX1064DAGISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] 673; GFX1064DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 674; GFX1064DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 675; GFX1064DAGISEL-NEXT: s_min_u32 s6, s6, s8 676; GFX1064DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 677; GFX1064DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 678; GFX1064DAGISEL-NEXT: ; %bb.5: 679; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v1, s6 680; GFX1064DAGISEL-NEXT: .LBB4_6: ; %endif 681; GFX1064DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] 682; GFX1064DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 683; GFX1064DAGISEL-NEXT: v_mov_b32_e32 v0, 0 684; GFX1064DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 685; GFX1064DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] 686; GFX1064DAGISEL-NEXT: s_endpgm 687; 688; GFX1064GISEL-LABEL: divergent_cfg: 689; GFX1064GISEL: ; %bb.0: ; %entry 690; GFX1064GISEL-NEXT: v_cmp_le_u32_e32 vcc, 16, v0 691; GFX1064GISEL-NEXT: ; implicit-def: $sgpr6 692; GFX1064GISEL-NEXT: s_and_saveexec_b64 s[0:1], vcc 693; GFX1064GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] 694; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_2 695; GFX1064GISEL-NEXT: ; %bb.1: ; %else 696; GFX1064GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 697; GFX1064GISEL-NEXT: ; implicit-def: $vgpr0 698; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) 699; GFX1064GISEL-NEXT: s_mov_b32 s6, s2 700; GFX1064GISEL-NEXT: .LBB4_2: ; %Flow 701; GFX1064GISEL-NEXT: s_andn2_saveexec_b64 s[0:1], s[0:1] 702; GFX1064GISEL-NEXT: s_cbranch_execz .LBB4_5 703; GFX1064GISEL-NEXT: ; %bb.3: ; %if 704; GFX1064GISEL-NEXT: s_mov_b64 s[2:3], exec 705; GFX1064GISEL-NEXT: s_mov_b32 s6, -1 706; GFX1064GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 707; GFX1064GISEL-NEXT: s_ff1_i32_b64 s7, s[2:3] 708; GFX1064GISEL-NEXT: v_readlane_b32 s8, v0, s7 709; GFX1064GISEL-NEXT: s_bitset0_b64 s[2:3], s7 710; GFX1064GISEL-NEXT: s_min_u32 s6, s6, s8 711; GFX1064GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 712; GFX1064GISEL-NEXT: s_cbranch_scc1 .LBB4_4 713; GFX1064GISEL-NEXT: .LBB4_5: ; %endif 714; GFX1064GISEL-NEXT: s_or_b64 exec, exec, s[0:1] 715; GFX1064GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 716; GFX1064GISEL-NEXT: v_mov_b32_e32 v0, s6 717; GFX1064GISEL-NEXT: v_mov_b32_e32 v1, 0 718; GFX1064GISEL-NEXT: s_waitcnt lgkmcnt(0) 719; GFX1064GISEL-NEXT: global_store_dword v1, v0, s[0:1] 720; GFX1064GISEL-NEXT: s_endpgm 721; 722; GFX1032DAGISEL-LABEL: divergent_cfg: 723; GFX1032DAGISEL: ; %bb.0: ; %entry 724; GFX1032DAGISEL-NEXT: v_cmp_lt_u32_e32 vcc_lo, 15, v0 725; GFX1032DAGISEL-NEXT: ; implicit-def: $sgpr1 726; GFX1032DAGISEL-NEXT: s_and_saveexec_b32 s0, vcc_lo 727; GFX1032DAGISEL-NEXT: s_xor_b32 s0, exec_lo, s0 728; GFX1032DAGISEL-NEXT: ; %bb.1: ; %else 729; GFX1032DAGISEL-NEXT: s_load_dword s1, s[4:5], 0x2c 730; GFX1032DAGISEL-NEXT: ; implicit-def: $vgpr0 731; GFX1032DAGISEL-NEXT: ; %bb.2: ; %Flow 732; GFX1032DAGISEL-NEXT: s_or_saveexec_b32 s0, s0 733; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 734; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 735; GFX1032DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 736; GFX1032DAGISEL-NEXT: s_cbranch_execz .LBB4_6 737; GFX1032DAGISEL-NEXT: ; %bb.3: ; %if 738; GFX1032DAGISEL-NEXT: s_mov_b32 s2, exec_lo 739; GFX1032DAGISEL-NEXT: s_mov_b32 s1, -1 740; GFX1032DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 741; GFX1032DAGISEL-NEXT: s_ff1_i32_b32 s3, s2 742; GFX1032DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 743; GFX1032DAGISEL-NEXT: s_bitset0_b32 s2, s3 744; GFX1032DAGISEL-NEXT: s_min_u32 s1, s1, s6 745; GFX1032DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 746; GFX1032DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 747; GFX1032DAGISEL-NEXT: ; %bb.5: 748; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v1, s1 749; GFX1032DAGISEL-NEXT: .LBB4_6: ; %endif 750; GFX1032DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 751; GFX1032DAGISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 752; GFX1032DAGISEL-NEXT: v_mov_b32_e32 v0, 0 753; GFX1032DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 754; GFX1032DAGISEL-NEXT: global_store_dword v0, v1, s[0:1] 755; GFX1032DAGISEL-NEXT: s_endpgm 756; 757; GFX1032GISEL-LABEL: divergent_cfg: 758; GFX1032GISEL: ; %bb.0: ; %entry 759; GFX1032GISEL-NEXT: v_cmp_le_u32_e32 vcc_lo, 16, v0 760; GFX1032GISEL-NEXT: ; implicit-def: $sgpr0 761; GFX1032GISEL-NEXT: s_and_saveexec_b32 s1, vcc_lo 762; GFX1032GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 763; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_2 764; GFX1032GISEL-NEXT: ; %bb.1: ; %else 765; GFX1032GISEL-NEXT: s_load_dword s0, s[4:5], 0x2c 766; GFX1032GISEL-NEXT: ; implicit-def: $vgpr0 767; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) 768; GFX1032GISEL-NEXT: s_mov_b32 s0, s0 769; GFX1032GISEL-NEXT: .LBB4_2: ; %Flow 770; GFX1032GISEL-NEXT: s_andn2_saveexec_b32 s1, s1 771; GFX1032GISEL-NEXT: s_cbranch_execz .LBB4_5 772; GFX1032GISEL-NEXT: ; %bb.3: ; %if 773; GFX1032GISEL-NEXT: s_mov_b32 s2, exec_lo 774; GFX1032GISEL-NEXT: s_mov_b32 s0, -1 775; GFX1032GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 776; GFX1032GISEL-NEXT: s_ff1_i32_b32 s3, s2 777; GFX1032GISEL-NEXT: v_readlane_b32 s6, v0, s3 778; GFX1032GISEL-NEXT: s_bitset0_b32 s2, s3 779; GFX1032GISEL-NEXT: s_min_u32 s0, s0, s6 780; GFX1032GISEL-NEXT: s_cmp_lg_u32 s2, 0 781; GFX1032GISEL-NEXT: s_cbranch_scc1 .LBB4_4 782; GFX1032GISEL-NEXT: .LBB4_5: ; %endif 783; GFX1032GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 784; GFX1032GISEL-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 785; GFX1032GISEL-NEXT: v_mov_b32_e32 v0, s0 786; GFX1032GISEL-NEXT: v_mov_b32_e32 v1, 0 787; GFX1032GISEL-NEXT: s_waitcnt lgkmcnt(0) 788; GFX1032GISEL-NEXT: global_store_dword v1, v0, s[2:3] 789; GFX1032GISEL-NEXT: s_endpgm 790; 791; GFX1164DAGISEL-LABEL: divergent_cfg: 792; GFX1164DAGISEL: ; %bb.0: ; %entry 793; GFX1164DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 794; GFX1164DAGISEL-NEXT: s_mov_b64 s[0:1], exec 795; GFX1164DAGISEL-NEXT: ; implicit-def: $sgpr2 796; GFX1164DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 797; GFX1164DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0 798; GFX1164DAGISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] 799; GFX1164DAGISEL-NEXT: ; %bb.1: ; %else 800; GFX1164DAGISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c 801; GFX1164DAGISEL-NEXT: ; implicit-def: $vgpr0 802; GFX1164DAGISEL-NEXT: ; %bb.2: ; %Flow 803; GFX1164DAGISEL-NEXT: s_or_saveexec_b64 s[0:1], s[0:1] 804; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 805; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s2 806; GFX1164DAGISEL-NEXT: s_xor_b64 exec, exec, s[0:1] 807; GFX1164DAGISEL-NEXT: s_cbranch_execz .LBB4_6 808; GFX1164DAGISEL-NEXT: ; %bb.3: ; %if 809; GFX1164DAGISEL-NEXT: s_mov_b64 s[2:3], exec 810; GFX1164DAGISEL-NEXT: s_mov_b32 s6, -1 811; GFX1164DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 812; GFX1164DAGISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] 813; GFX1164DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 814; GFX1164DAGISEL-NEXT: v_readlane_b32 s8, v0, s7 815; GFX1164DAGISEL-NEXT: s_bitset0_b64 s[2:3], s7 816; GFX1164DAGISEL-NEXT: s_min_u32 s6, s6, s8 817; GFX1164DAGISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 818; GFX1164DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 819; GFX1164DAGISEL-NEXT: ; %bb.5: 820; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v1, s6 821; GFX1164DAGISEL-NEXT: .LBB4_6: ; %endif 822; GFX1164DAGISEL-NEXT: s_or_b64 exec, exec, s[0:1] 823; GFX1164DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 824; GFX1164DAGISEL-NEXT: v_mov_b32_e32 v0, 0 825; GFX1164DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 826; GFX1164DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] 827; GFX1164DAGISEL-NEXT: s_endpgm 828; 829; GFX1164GISEL-LABEL: divergent_cfg: 830; GFX1164GISEL: ; %bb.0: ; %entry 831; GFX1164GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 832; GFX1164GISEL-NEXT: s_mov_b64 s[0:1], exec 833; GFX1164GISEL-NEXT: ; implicit-def: $sgpr6 834; GFX1164GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 835; GFX1164GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 836; GFX1164GISEL-NEXT: s_xor_b64 s[0:1], exec, s[0:1] 837; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_2 838; GFX1164GISEL-NEXT: ; %bb.1: ; %else 839; GFX1164GISEL-NEXT: s_load_b32 s2, s[4:5], 0x2c 840; GFX1164GISEL-NEXT: ; implicit-def: $vgpr0 841; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) 842; GFX1164GISEL-NEXT: s_mov_b32 s6, s2 843; GFX1164GISEL-NEXT: .LBB4_2: ; %Flow 844; GFX1164GISEL-NEXT: s_and_not1_saveexec_b64 s[0:1], s[0:1] 845; GFX1164GISEL-NEXT: s_cbranch_execz .LBB4_5 846; GFX1164GISEL-NEXT: ; %bb.3: ; %if 847; GFX1164GISEL-NEXT: s_mov_b64 s[2:3], exec 848; GFX1164GISEL-NEXT: s_mov_b32 s6, -1 849; GFX1164GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 850; GFX1164GISEL-NEXT: s_ctz_i32_b64 s7, s[2:3] 851; GFX1164GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 852; GFX1164GISEL-NEXT: v_readlane_b32 s8, v0, s7 853; GFX1164GISEL-NEXT: s_bitset0_b64 s[2:3], s7 854; GFX1164GISEL-NEXT: s_min_u32 s6, s6, s8 855; GFX1164GISEL-NEXT: s_cmp_lg_u64 s[2:3], 0 856; GFX1164GISEL-NEXT: s_cbranch_scc1 .LBB4_4 857; GFX1164GISEL-NEXT: .LBB4_5: ; %endif 858; GFX1164GISEL-NEXT: s_or_b64 exec, exec, s[0:1] 859; GFX1164GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 860; GFX1164GISEL-NEXT: v_mov_b32_e32 v0, s6 861; GFX1164GISEL-NEXT: v_mov_b32_e32 v1, 0 862; GFX1164GISEL-NEXT: s_waitcnt lgkmcnt(0) 863; GFX1164GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 864; GFX1164GISEL-NEXT: s_endpgm 865; 866; GFX1132DAGISEL-LABEL: divergent_cfg: 867; GFX1132DAGISEL: ; %bb.0: ; %entry 868; GFX1132DAGISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 869; GFX1132DAGISEL-NEXT: s_mov_b32 s0, exec_lo 870; GFX1132DAGISEL-NEXT: ; implicit-def: $sgpr1 871; GFX1132DAGISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 872; GFX1132DAGISEL-NEXT: v_cmpx_lt_u32_e32 15, v0 873; GFX1132DAGISEL-NEXT: s_xor_b32 s0, exec_lo, s0 874; GFX1132DAGISEL-NEXT: ; %bb.1: ; %else 875; GFX1132DAGISEL-NEXT: s_load_b32 s1, s[4:5], 0x2c 876; GFX1132DAGISEL-NEXT: ; implicit-def: $vgpr0 877; GFX1132DAGISEL-NEXT: ; %bb.2: ; %Flow 878; GFX1132DAGISEL-NEXT: s_or_saveexec_b32 s0, s0 879; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 880; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 881; GFX1132DAGISEL-NEXT: s_xor_b32 exec_lo, exec_lo, s0 882; GFX1132DAGISEL-NEXT: s_cbranch_execz .LBB4_6 883; GFX1132DAGISEL-NEXT: ; %bb.3: ; %if 884; GFX1132DAGISEL-NEXT: s_mov_b32 s2, exec_lo 885; GFX1132DAGISEL-NEXT: s_mov_b32 s1, -1 886; GFX1132DAGISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 887; GFX1132DAGISEL-NEXT: s_ctz_i32_b32 s3, s2 888; GFX1132DAGISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 889; GFX1132DAGISEL-NEXT: v_readlane_b32 s6, v0, s3 890; GFX1132DAGISEL-NEXT: s_bitset0_b32 s2, s3 891; GFX1132DAGISEL-NEXT: s_min_u32 s1, s1, s6 892; GFX1132DAGISEL-NEXT: s_cmp_lg_u32 s2, 0 893; GFX1132DAGISEL-NEXT: s_cbranch_scc1 .LBB4_4 894; GFX1132DAGISEL-NEXT: ; %bb.5: 895; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v1, s1 896; GFX1132DAGISEL-NEXT: .LBB4_6: ; %endif 897; GFX1132DAGISEL-NEXT: s_or_b32 exec_lo, exec_lo, s0 898; GFX1132DAGISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 899; GFX1132DAGISEL-NEXT: v_mov_b32_e32 v0, 0 900; GFX1132DAGISEL-NEXT: s_waitcnt lgkmcnt(0) 901; GFX1132DAGISEL-NEXT: global_store_b32 v0, v1, s[0:1] 902; GFX1132DAGISEL-NEXT: s_endpgm 903; 904; GFX1132GISEL-LABEL: divergent_cfg: 905; GFX1132GISEL: ; %bb.0: ; %entry 906; GFX1132GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 907; GFX1132GISEL-NEXT: s_mov_b32 s1, exec_lo 908; GFX1132GISEL-NEXT: ; implicit-def: $sgpr0 909; GFX1132GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 910; GFX1132GISEL-NEXT: v_cmpx_le_u32_e32 16, v0 911; GFX1132GISEL-NEXT: s_xor_b32 s1, exec_lo, s1 912; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_2 913; GFX1132GISEL-NEXT: ; %bb.1: ; %else 914; GFX1132GISEL-NEXT: s_load_b32 s0, s[4:5], 0x2c 915; GFX1132GISEL-NEXT: ; implicit-def: $vgpr0 916; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) 917; GFX1132GISEL-NEXT: s_mov_b32 s0, s0 918; GFX1132GISEL-NEXT: .LBB4_2: ; %Flow 919; GFX1132GISEL-NEXT: s_and_not1_saveexec_b32 s1, s1 920; GFX1132GISEL-NEXT: s_cbranch_execz .LBB4_5 921; GFX1132GISEL-NEXT: ; %bb.3: ; %if 922; GFX1132GISEL-NEXT: s_mov_b32 s2, exec_lo 923; GFX1132GISEL-NEXT: s_mov_b32 s0, -1 924; GFX1132GISEL-NEXT: .LBB4_4: ; =>This Inner Loop Header: Depth=1 925; GFX1132GISEL-NEXT: s_ctz_i32_b32 s3, s2 926; GFX1132GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 927; GFX1132GISEL-NEXT: v_readlane_b32 s6, v0, s3 928; GFX1132GISEL-NEXT: s_bitset0_b32 s2, s3 929; GFX1132GISEL-NEXT: s_min_u32 s0, s0, s6 930; GFX1132GISEL-NEXT: s_cmp_lg_u32 s2, 0 931; GFX1132GISEL-NEXT: s_cbranch_scc1 .LBB4_4 932; GFX1132GISEL-NEXT: .LBB4_5: ; %endif 933; GFX1132GISEL-NEXT: s_or_b32 exec_lo, exec_lo, s1 934; GFX1132GISEL-NEXT: s_load_b64 s[2:3], s[4:5], 0x24 935; GFX1132GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, 0 936; GFX1132GISEL-NEXT: s_waitcnt lgkmcnt(0) 937; GFX1132GISEL-NEXT: global_store_b32 v1, v0, s[2:3] 938; GFX1132GISEL-NEXT: s_endpgm 939entry: 940 %tid = call i32 @llvm.amdgcn.workitem.id.x() 941 %d_cmp = icmp ult i32 %tid, 16 942 br i1 %d_cmp, label %if, label %else 943 944if: 945 %reducedValTid = call i32 @llvm.amdgcn.wave.reduce.umin.i32(i32 %tid, i32 1) 946 br label %endif 947 948else: 949 %reducedValIn = call i32 @llvm.amdgcn.wave.reduce.umin.i32(i32 %in, i32 1) 950 br label %endif 951 952endif: 953 %combine = phi i32 [%reducedValTid, %if], [%reducedValIn, %else] 954 store i32 %combine, ptr addrspace(1) %out 955 ret void 956} 957