1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,WAVE64 %s 3; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1031 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,WAVE32 %s 4 5; End to end tests for scalar vs. vector boolean legalization strategies. 6 7define amdgpu_ps float @select_vgpr_sgpr_trunc_cond(i32 inreg %a, i32 %b, i32 %c) { 8; WAVE64-LABEL: select_vgpr_sgpr_trunc_cond: 9; WAVE64: ; %bb.0: 10; WAVE64-NEXT: s_and_b32 s0, 1, s0 11; WAVE64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 12; WAVE64-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 13; WAVE64-NEXT: ; return to shader part epilog 14; 15; WAVE32-LABEL: select_vgpr_sgpr_trunc_cond: 16; WAVE32: ; %bb.0: 17; WAVE32-NEXT: s_and_b32 s0, 1, s0 18; WAVE32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 19; WAVE32-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo 20; WAVE32-NEXT: ; return to shader part epilog 21 %cc = trunc i32 %a to i1 22 %r = select i1 %cc, i32 %b, i32 %c 23 %r.f = bitcast i32 %r to float 24 ret float %r.f 25} 26 27define amdgpu_ps float @select_vgpr_sgpr_trunc_and_cond(i32 inreg %a.0, i32 inreg %a.1, i32 %b, i32 %c) { 28; WAVE64-LABEL: select_vgpr_sgpr_trunc_and_cond: 29; WAVE64: ; %bb.0: 30; WAVE64-NEXT: s_and_b32 s0, s0, s1 31; WAVE64-NEXT: s_and_b32 s0, 1, s0 32; WAVE64-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 33; WAVE64-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 34; WAVE64-NEXT: ; return to shader part epilog 35; 36; WAVE32-LABEL: select_vgpr_sgpr_trunc_and_cond: 37; WAVE32: ; %bb.0: 38; WAVE32-NEXT: s_and_b32 s0, s0, s1 39; WAVE32-NEXT: s_and_b32 s0, 1, s0 40; WAVE32-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s0 41; WAVE32-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo 42; WAVE32-NEXT: ; return to shader part epilog 43 %cc.0 = trunc i32 %a.0 to i1 44 %cc.1 = trunc i32 %a.1 to i1 45 %and = and i1 %cc.0, %cc.1 46 %r = select i1 %and, i32 %b, i32 %c 47 %r.f = bitcast i32 %r to float 48 ret float %r.f 49} 50 51define amdgpu_ps i32 @select_sgpr_trunc_and_cond(i32 inreg %a.0, i32 inreg %a.1, i32 inreg %b, i32 inreg %c) { 52; GCN-LABEL: select_sgpr_trunc_and_cond: 53; GCN: ; %bb.0: 54; GCN-NEXT: s_and_b32 s0, s0, s1 55; GCN-NEXT: s_and_b32 s0, s0, 1 56; GCN-NEXT: s_cmp_lg_u32 s0, 0 57; GCN-NEXT: s_cselect_b32 s0, s2, s3 58; GCN-NEXT: ; return to shader part epilog 59 %cc.0 = trunc i32 %a.0 to i1 60 %cc.1 = trunc i32 %a.1 to i1 61 %and = and i1 %cc.0, %cc.1 62 %r = select i1 %and, i32 %b, i32 %c 63 ret i32 %r 64} 65 66define amdgpu_kernel void @sgpr_trunc_brcond(i32 %cond) { 67; WAVE64-LABEL: sgpr_trunc_brcond: 68; WAVE64: ; %bb.0: ; %entry 69; WAVE64-NEXT: s_load_dword s0, s[4:5], 0x24 70; WAVE64-NEXT: s_waitcnt lgkmcnt(0) 71; WAVE64-NEXT: s_xor_b32 s0, s0, 1 72; WAVE64-NEXT: s_and_b32 s0, s0, 1 73; WAVE64-NEXT: s_cmp_lg_u32 s0, 0 74; WAVE64-NEXT: s_cbranch_scc1 .LBB3_2 75; WAVE64-NEXT: ; %bb.1: ; %bb0 76; WAVE64-NEXT: v_mov_b32_e32 v0, 0 77; WAVE64-NEXT: global_store_dword v[0:1], v0, off 78; WAVE64-NEXT: s_waitcnt vmcnt(0) 79; WAVE64-NEXT: .LBB3_2: ; %bb1 80; WAVE64-NEXT: v_mov_b32_e32 v0, 1 81; WAVE64-NEXT: global_store_dword v[0:1], v0, off 82; WAVE64-NEXT: s_waitcnt vmcnt(0) 83; 84; WAVE32-LABEL: sgpr_trunc_brcond: 85; WAVE32: ; %bb.0: ; %entry 86; WAVE32-NEXT: s_load_dword s0, s[4:5], 0x24 87; WAVE32-NEXT: s_waitcnt lgkmcnt(0) 88; WAVE32-NEXT: s_xor_b32 s0, s0, 1 89; WAVE32-NEXT: s_and_b32 s0, s0, 1 90; WAVE32-NEXT: s_cmp_lg_u32 s0, 0 91; WAVE32-NEXT: s_cbranch_scc1 .LBB3_2 92; WAVE32-NEXT: ; %bb.1: ; %bb0 93; WAVE32-NEXT: v_mov_b32_e32 v0, 0 94; WAVE32-NEXT: global_store_dword v[0:1], v0, off 95; WAVE32-NEXT: s_waitcnt_vscnt null, 0x0 96; WAVE32-NEXT: .LBB3_2: ; %bb1 97; WAVE32-NEXT: v_mov_b32_e32 v0, 1 98; WAVE32-NEXT: global_store_dword v[0:1], v0, off 99; WAVE32-NEXT: s_waitcnt_vscnt null, 0x0 100entry: 101 %trunc = trunc i32 %cond to i1 102 br i1 %trunc, label %bb0, label %bb1 103 104bb0: 105 store volatile i32 0, ptr addrspace(1) undef 106 unreachable 107 108bb1: 109 store volatile i32 1, ptr addrspace(1) undef 110 unreachable 111} 112 113define amdgpu_kernel void @brcond_sgpr_trunc_and(i32 %cond0, i32 %cond1) { 114; WAVE64-LABEL: brcond_sgpr_trunc_and: 115; WAVE64: ; %bb.0: ; %entry 116; WAVE64-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 117; WAVE64-NEXT: s_waitcnt lgkmcnt(0) 118; WAVE64-NEXT: s_and_b32 s0, s0, s1 119; WAVE64-NEXT: s_xor_b32 s0, s0, 1 120; WAVE64-NEXT: s_and_b32 s0, s0, 1 121; WAVE64-NEXT: s_cmp_lg_u32 s0, 0 122; WAVE64-NEXT: s_cbranch_scc1 .LBB4_2 123; WAVE64-NEXT: ; %bb.1: ; %bb0 124; WAVE64-NEXT: v_mov_b32_e32 v0, 0 125; WAVE64-NEXT: global_store_dword v[0:1], v0, off 126; WAVE64-NEXT: s_waitcnt vmcnt(0) 127; WAVE64-NEXT: .LBB4_2: ; %bb1 128; WAVE64-NEXT: v_mov_b32_e32 v0, 1 129; WAVE64-NEXT: global_store_dword v[0:1], v0, off 130; WAVE64-NEXT: s_waitcnt vmcnt(0) 131; 132; WAVE32-LABEL: brcond_sgpr_trunc_and: 133; WAVE32: ; %bb.0: ; %entry 134; WAVE32-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 135; WAVE32-NEXT: s_waitcnt lgkmcnt(0) 136; WAVE32-NEXT: s_and_b32 s0, s0, s1 137; WAVE32-NEXT: s_xor_b32 s0, s0, 1 138; WAVE32-NEXT: s_and_b32 s0, s0, 1 139; WAVE32-NEXT: s_cmp_lg_u32 s0, 0 140; WAVE32-NEXT: s_cbranch_scc1 .LBB4_2 141; WAVE32-NEXT: ; %bb.1: ; %bb0 142; WAVE32-NEXT: v_mov_b32_e32 v0, 0 143; WAVE32-NEXT: global_store_dword v[0:1], v0, off 144; WAVE32-NEXT: s_waitcnt_vscnt null, 0x0 145; WAVE32-NEXT: .LBB4_2: ; %bb1 146; WAVE32-NEXT: v_mov_b32_e32 v0, 1 147; WAVE32-NEXT: global_store_dword v[0:1], v0, off 148; WAVE32-NEXT: s_waitcnt_vscnt null, 0x0 149entry: 150 %trunc0 = trunc i32 %cond0 to i1 151 %trunc1 = trunc i32 %cond1 to i1 152 %and = and i1 %trunc0, %trunc1 153 br i1 %and, label %bb0, label %bb1 154 155bb0: 156 store volatile i32 0, ptr addrspace(1) undef 157 unreachable 158 159bb1: 160 store volatile i32 1, ptr addrspace(1) undef 161 unreachable 162} 163