1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s 3 4define amdgpu_kernel void @negated_cond(ptr addrspace(1) %arg1) { 5; GCN-LABEL: negated_cond: 6; GCN: ; %bb.0: ; %bb 7; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 8; GCN-NEXT: s_mov_b32 s7, 0xf000 9; GCN-NEXT: s_mov_b32 s10, -1 10; GCN-NEXT: s_mov_b32 s6, 0 11; GCN-NEXT: s_mov_b32 s11, s7 12; GCN-NEXT: s_waitcnt lgkmcnt(0) 13; GCN-NEXT: s_mov_b32 s8, s4 14; GCN-NEXT: s_mov_b32 s9, s5 15; GCN-NEXT: v_mov_b32_e32 v0, 0 16; GCN-NEXT: s_branch .LBB0_2 17; GCN-NEXT: .LBB0_1: ; %loop.exit.guard 18; GCN-NEXT: ; in Loop: Header=BB0_2 Depth=1 19; GCN-NEXT: s_and_b64 vcc, exec, s[14:15] 20; GCN-NEXT: s_cbranch_vccnz .LBB0_9 21; GCN-NEXT: .LBB0_2: ; %bb1 22; GCN-NEXT: ; =>This Loop Header: Depth=1 23; GCN-NEXT: ; Child Loop BB0_4 Depth 2 24; GCN-NEXT: buffer_load_dword v1, off, s[8:11], 0 25; GCN-NEXT: s_waitcnt vmcnt(0) 26; GCN-NEXT: v_cmp_ne_u32_e64 s[2:3], 0, v1 27; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1 28; GCN-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 29; GCN-NEXT: v_cmp_ne_u32_e64 s[0:1], 1, v1 30; GCN-NEXT: s_mov_b32 s12, s6 31; GCN-NEXT: s_branch .LBB0_4 32; GCN-NEXT: .LBB0_3: ; %Flow1 33; GCN-NEXT: ; in Loop: Header=BB0_4 Depth=2 34; GCN-NEXT: s_andn2_b64 vcc, exec, s[16:17] 35; GCN-NEXT: s_cbranch_vccz .LBB0_1 36; GCN-NEXT: .LBB0_4: ; %bb2 37; GCN-NEXT: ; Parent Loop BB0_2 Depth=1 38; GCN-NEXT: ; => This Inner Loop Header: Depth=2 39; GCN-NEXT: s_and_b64 vcc, exec, s[0:1] 40; GCN-NEXT: s_lshl_b32 s12, s12, 5 41; GCN-NEXT: s_cbranch_vccz .LBB0_6 42; GCN-NEXT: ; %bb.5: ; in Loop: Header=BB0_4 Depth=2 43; GCN-NEXT: s_mov_b64 s[14:15], s[2:3] 44; GCN-NEXT: s_branch .LBB0_7 45; GCN-NEXT: .LBB0_6: ; %bb3 46; GCN-NEXT: ; in Loop: Header=BB0_4 Depth=2 47; GCN-NEXT: s_add_i32 s12, s12, 1 48; GCN-NEXT: s_mov_b64 s[14:15], -1 49; GCN-NEXT: .LBB0_7: ; %Flow 50; GCN-NEXT: ; in Loop: Header=BB0_4 Depth=2 51; GCN-NEXT: s_andn2_b64 vcc, exec, s[14:15] 52; GCN-NEXT: s_mov_b64 s[16:17], -1 53; GCN-NEXT: s_cbranch_vccnz .LBB0_3 54; GCN-NEXT: ; %bb.8: ; %bb4 55; GCN-NEXT: ; in Loop: Header=BB0_4 Depth=2 56; GCN-NEXT: s_ashr_i32 s13, s12, 31 57; GCN-NEXT: s_lshl_b64 s[16:17], s[12:13], 2 58; GCN-NEXT: s_mov_b64 s[14:15], 0 59; GCN-NEXT: v_mov_b32_e32 v1, s16 60; GCN-NEXT: v_mov_b32_e32 v2, s17 61; GCN-NEXT: buffer_store_dword v0, v[1:2], s[4:7], 0 addr64 62; GCN-NEXT: s_cmp_eq_u32 s12, 32 63; GCN-NEXT: s_cselect_b64 s[16:17], -1, 0 64; GCN-NEXT: s_branch .LBB0_3 65; GCN-NEXT: .LBB0_9: ; %DummyReturnBlock 66; GCN-NEXT: s_endpgm 67bb: 68 br label %bb1 69 70bb1: 71 %tmp1 = load i32, ptr addrspace(1) %arg1 72 %tmp2 = icmp eq i32 %tmp1, 0 73 br label %bb2 74 75bb2: 76 %tmp3 = phi i32 [ 0, %bb1 ], [ %tmp6, %bb4 ] 77 %tmp4 = shl i32 %tmp3, 5 78 br i1 %tmp2, label %bb3, label %bb4 79 80bb3: 81 %tmp5 = add i32 %tmp4, 1 82 br label %bb4 83 84bb4: 85 %tmp6 = phi i32 [ %tmp5, %bb3 ], [ %tmp4, %bb2 ] 86 %gep = getelementptr inbounds i32, ptr addrspace(1) %arg1, i32 %tmp6 87 store i32 0, ptr addrspace(1) %gep 88 %tmp7 = icmp eq i32 %tmp6, 32 89 br i1 %tmp7, label %bb1, label %bb2 90} 91 92define amdgpu_kernel void @negated_cond_dominated_blocks(ptr addrspace(1) %arg1) { 93; GCN-LABEL: negated_cond_dominated_blocks: 94; GCN: ; %bb.0: ; %bb 95; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 96; GCN-NEXT: s_waitcnt lgkmcnt(0) 97; GCN-NEXT: s_load_dword s0, s[4:5], 0x0 98; GCN-NEXT: s_mov_b32 s6, 0 99; GCN-NEXT: s_mov_b32 s7, 0xf000 100; GCN-NEXT: s_waitcnt lgkmcnt(0) 101; GCN-NEXT: s_cmp_lg_u32 s0, 0 102; GCN-NEXT: s_cselect_b64 s[0:1], -1, 0 103; GCN-NEXT: s_and_b64 s[0:1], exec, s[0:1] 104; GCN-NEXT: v_mov_b32_e32 v0, 0 105; GCN-NEXT: s_mov_b32 s3, s6 106; GCN-NEXT: s_branch .LBB1_2 107; GCN-NEXT: .LBB1_1: ; %bb7 108; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1 109; GCN-NEXT: s_ashr_i32 s3, s2, 31 110; GCN-NEXT: s_lshl_b64 s[8:9], s[2:3], 2 111; GCN-NEXT: v_mov_b32_e32 v1, s8 112; GCN-NEXT: v_mov_b32_e32 v2, s9 113; GCN-NEXT: s_cmp_eq_u32 s2, 32 114; GCN-NEXT: buffer_store_dword v0, v[1:2], s[4:7], 0 addr64 115; GCN-NEXT: s_mov_b32 s3, s2 116; GCN-NEXT: s_cbranch_scc1 .LBB1_6 117; GCN-NEXT: .LBB1_2: ; %bb4 118; GCN-NEXT: ; =>This Inner Loop Header: Depth=1 119; GCN-NEXT: s_mov_b64 vcc, s[0:1] 120; GCN-NEXT: s_cbranch_vccz .LBB1_4 121; GCN-NEXT: ; %bb.3: ; %bb6 122; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1 123; GCN-NEXT: s_add_i32 s2, s3, 1 124; GCN-NEXT: s_mov_b64 vcc, exec 125; GCN-NEXT: s_cbranch_execnz .LBB1_1 126; GCN-NEXT: s_branch .LBB1_5 127; GCN-NEXT: .LBB1_4: ; in Loop: Header=BB1_2 Depth=1 128; GCN-NEXT: ; implicit-def: $sgpr2 129; GCN-NEXT: s_mov_b64 vcc, 0 130; GCN-NEXT: .LBB1_5: ; %bb5 131; GCN-NEXT: ; in Loop: Header=BB1_2 Depth=1 132; GCN-NEXT: s_lshl_b32 s2, s3, 5 133; GCN-NEXT: s_or_b32 s2, s2, 1 134; GCN-NEXT: s_branch .LBB1_1 135; GCN-NEXT: .LBB1_6: ; %bb3 136; GCN-NEXT: s_endpgm 137bb: 138 br label %bb2 139 140bb2: 141 %tmp1 = load i32, ptr addrspace(1) %arg1 142 %tmp2 = icmp eq i32 %tmp1, 0 143 br label %bb4 144 145bb3: 146 ret void 147 148bb4: 149 %tmp3 = phi i32 [ 0, %bb2 ], [ %tmp7, %bb7 ] 150 %tmp4 = shl i32 %tmp3, 5 151 br i1 %tmp2, label %bb5, label %bb6 152 153bb5: 154 %tmp5 = add i32 %tmp4, 1 155 br label %bb7 156 157bb6: 158 %tmp6 = add i32 %tmp3, 1 159 br label %bb7 160 161bb7: 162 %tmp7 = phi i32 [ %tmp5, %bb5 ], [ %tmp6, %bb6 ] 163 %gep = getelementptr inbounds i32, ptr addrspace(1) %arg1, i32 %tmp7 164 store i32 0, ptr addrspace(1) %gep 165 %tmp8 = icmp eq i32 %tmp7, 32 166 br i1 %tmp8, label %bb3, label %bb4 167} 168