1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs -stop-after=si-pre-emit-peephole -o - %s | FileCheck -check-prefix=GCN %s 3; If the block containing the SI_RETURN_TO_EPILOG is not the last block, insert an empty block at the end and 4; insert an unconditional jump there. 5define amdgpu_ps float @simple_test_return_to_epilog(float %a) #0 { 6 ; GCN-LABEL: name: simple_test_return_to_epilog 7 ; GCN: bb.0.entry: 8 ; GCN-NEXT: liveins: $vgpr0 9 ; GCN-NEXT: {{ $}} 10 ; GCN-NEXT: SI_RETURN_TO_EPILOG killed $vgpr0 11entry: 12 ret float %a 13} 14 15define amdgpu_ps float @test_return_to_epilog_into_end_block(i32 inreg %a, float %b) #0 { 16 ; GCN-LABEL: name: test_return_to_epilog_into_end_block 17 ; GCN: bb.0.entry: 18 ; GCN-NEXT: successors: %bb.1(0x80000000), %bb.2(0x00000000) 19 ; GCN-NEXT: liveins: $sgpr2, $vgpr0 20 ; GCN-NEXT: {{ $}} 21 ; GCN-NEXT: S_CMP_LT_I32 killed renamable $sgpr2, 1, implicit-def $scc 22 ; GCN-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc 23 ; GCN-NEXT: {{ $}} 24 ; GCN-NEXT: bb.1.if: 25 ; GCN-NEXT: successors: %bb.3(0x80000000) 26 ; GCN-NEXT: liveins: $vgpr0 27 ; GCN-NEXT: {{ $}} 28 ; GCN-NEXT: S_BRANCH %bb.3 29 ; GCN-NEXT: {{ $}} 30 ; GCN-NEXT: bb.2.else: 31 ; GCN-NEXT: successors: 32 ; GCN-NEXT: {{ $}} 33 ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec 34 ; GCN-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) 35 ; GCN-NEXT: S_WAITCNT 3952 36 ; GCN-NEXT: {{ $}} 37 ; GCN-NEXT: bb.3: 38entry: 39 %cc = icmp sgt i32 %a, 0 40 br i1 %cc, label %if, label %else 41if: ; preds = %entry 42 ret float %b 43else: ; preds = %entry 44 store volatile i32 0, ptr addrspace(1) undef 45 unreachable 46} 47 48define amdgpu_ps float @test_unify_return_to_epilog_into_end_block(i32 inreg %a, i32 inreg %b, float %c, float %d) #0 { 49 ; GCN-LABEL: name: test_unify_return_to_epilog_into_end_block 50 ; GCN: bb.0.entry: 51 ; GCN-NEXT: successors: %bb.1(0x50000000), %bb.2(0x30000000) 52 ; GCN-NEXT: liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1 53 ; GCN-NEXT: {{ $}} 54 ; GCN-NEXT: S_CMP_LT_I32 killed renamable $sgpr2, 1, implicit-def $scc 55 ; GCN-NEXT: S_CBRANCH_SCC1 %bb.2, implicit killed $scc 56 ; GCN-NEXT: {{ $}} 57 ; GCN-NEXT: bb.1.if: 58 ; GCN-NEXT: successors: %bb.5(0x80000000) 59 ; GCN-NEXT: liveins: $vgpr0 60 ; GCN-NEXT: {{ $}} 61 ; GCN-NEXT: S_BRANCH %bb.5 62 ; GCN-NEXT: {{ $}} 63 ; GCN-NEXT: bb.2.else.if.cond: 64 ; GCN-NEXT: successors: %bb.3(0x80000000), %bb.4(0x00000000) 65 ; GCN-NEXT: liveins: $sgpr3, $vgpr1 66 ; GCN-NEXT: {{ $}} 67 ; GCN-NEXT: S_CMP_LT_I32 killed renamable $sgpr3, 1, implicit-def $scc 68 ; GCN-NEXT: S_CBRANCH_SCC1 %bb.4, implicit killed $scc 69 ; GCN-NEXT: {{ $}} 70 ; GCN-NEXT: bb.3.else.if: 71 ; GCN-NEXT: successors: %bb.5(0x80000000) 72 ; GCN-NEXT: liveins: $vgpr1 73 ; GCN-NEXT: {{ $}} 74 ; GCN-NEXT: $vgpr0 = V_MOV_B32_e32 killed $vgpr1, implicit $exec, implicit $exec 75 ; GCN-NEXT: S_BRANCH %bb.5 76 ; GCN-NEXT: {{ $}} 77 ; GCN-NEXT: bb.4.else: 78 ; GCN-NEXT: successors: 79 ; GCN-NEXT: {{ $}} 80 ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec 81 ; GCN-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) 82 ; GCN-NEXT: S_WAITCNT 3952 83 ; GCN-NEXT: {{ $}} 84 ; GCN-NEXT: bb.5: 85entry: 86 %cc = icmp sgt i32 %a, 0 87 br i1 %cc, label %if, label %else.if.cond 88if: ; preds = %entry 89 ret float %c 90else.if.cond: ; preds = %entry 91 %cc1 = icmp sgt i32 %b, 0 92 br i1 %cc1, label %else.if, label %else 93else.if: ; preds = %else.if.cond 94 ret float %d 95else: ; preds = %else.if.cond 96 store volatile i32 0, ptr addrspace(1) undef 97 unreachable 98} 99 100define amdgpu_ps { <4 x float> } @test_return_to_epilog_with_optimized_kill(float %val) #0 { 101 ; GCN-LABEL: name: test_return_to_epilog_with_optimized_kill 102 ; GCN: bb.0 (%ir-block.0): 103 ; GCN-NEXT: successors: %bb.3(0x40000000), %bb.1(0x40000000) 104 ; GCN-NEXT: liveins: $vgpr0 105 ; GCN-NEXT: {{ $}} 106 ; GCN-NEXT: renamable $vgpr1 = nofpexcept V_RCP_F32_e32 $vgpr0, implicit $mode, implicit $exec 107 ; GCN-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec 108 ; GCN-NEXT: nofpexcept V_CMP_NGT_F32_e32 0, killed $vgpr1, implicit-def $vcc, implicit $mode, implicit $exec 109 ; GCN-NEXT: $sgpr2_sgpr3 = S_AND_SAVEEXEC_B64 killed $vcc, implicit-def $exec, implicit-def $scc, implicit $exec 110 ; GCN-NEXT: renamable $sgpr2_sgpr3 = S_XOR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def dead $scc 111 ; GCN-NEXT: S_CBRANCH_EXECNZ %bb.3, implicit $exec 112 ; GCN-NEXT: {{ $}} 113 ; GCN-NEXT: bb.1.Flow1: 114 ; GCN-NEXT: successors: %bb.6(0x40000000), %bb.2(0x40000000) 115 ; GCN-NEXT: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 116 ; GCN-NEXT: {{ $}} 117 ; GCN-NEXT: $sgpr2_sgpr3 = S_ANDN2_SAVEEXEC_B64 killed $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec 118 ; GCN-NEXT: S_CBRANCH_EXECNZ %bb.6, implicit $exec 119 ; GCN-NEXT: {{ $}} 120 ; GCN-NEXT: bb.2.end: 121 ; GCN-NEXT: successors: %bb.9(0x80000000) 122 ; GCN-NEXT: liveins: $sgpr2_sgpr3 123 ; GCN-NEXT: {{ $}} 124 ; GCN-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc 125 ; GCN-NEXT: S_BRANCH %bb.9 126 ; GCN-NEXT: {{ $}} 127 ; GCN-NEXT: bb.3.flow.preheader: 128 ; GCN-NEXT: successors: %bb.4(0x80000000) 129 ; GCN-NEXT: liveins: $vgpr0, $sgpr0_sgpr1, $sgpr2_sgpr3 130 ; GCN-NEXT: {{ $}} 131 ; GCN-NEXT: nofpexcept V_CMP_NGT_F32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $mode, implicit $exec 132 ; GCN-NEXT: renamable $sgpr4_sgpr5 = S_MOV_B64 0 133 ; GCN-NEXT: {{ $}} 134 ; GCN-NEXT: bb.4.flow: 135 ; GCN-NEXT: successors: %bb.5(0x04000000), %bb.4(0x7c000000) 136 ; GCN-NEXT: liveins: $vcc, $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5 137 ; GCN-NEXT: {{ $}} 138 ; GCN-NEXT: renamable $sgpr6_sgpr7 = S_AND_B64 $exec, renamable $vcc, implicit-def $scc 139 ; GCN-NEXT: renamable $sgpr4_sgpr5 = S_OR_B64 killed renamable $sgpr6_sgpr7, killed renamable $sgpr4_sgpr5, implicit-def $scc 140 ; GCN-NEXT: $exec = S_ANDN2_B64 $exec, renamable $sgpr4_sgpr5, implicit-def $scc 141 ; GCN-NEXT: S_CBRANCH_EXECNZ %bb.4, implicit $exec 142 ; GCN-NEXT: {{ $}} 143 ; GCN-NEXT: bb.5.Flow: 144 ; GCN-NEXT: successors: %bb.6(0x40000000), %bb.2(0x40000000) 145 ; GCN-NEXT: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3, $sgpr4_sgpr5 146 ; GCN-NEXT: {{ $}} 147 ; GCN-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc 148 ; GCN-NEXT: $sgpr2_sgpr3 = S_ANDN2_SAVEEXEC_B64 killed $sgpr2_sgpr3, implicit-def $exec, implicit-def $scc, implicit $exec 149 ; GCN-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec 150 ; GCN-NEXT: {{ $}} 151 ; GCN-NEXT: bb.6.kill0: 152 ; GCN-NEXT: successors: %bb.7(0x40000000), %bb.8(0x40000000) 153 ; GCN-NEXT: liveins: $sgpr0_sgpr1, $sgpr2_sgpr3 154 ; GCN-NEXT: {{ $}} 155 ; GCN-NEXT: dead renamable $sgpr0_sgpr1 = S_ANDN2_B64 killed renamable $sgpr0_sgpr1, $exec, implicit-def $scc 156 ; GCN-NEXT: S_CBRANCH_SCC0 %bb.8, implicit $scc 157 ; GCN-NEXT: {{ $}} 158 ; GCN-NEXT: bb.7.kill0: 159 ; GCN-NEXT: successors: %bb.9(0x80000000) 160 ; GCN-NEXT: liveins: $sgpr2_sgpr3, $scc 161 ; GCN-NEXT: {{ $}} 162 ; GCN-NEXT: $exec = S_MOV_B64 0 163 ; GCN-NEXT: $exec = S_OR_B64 $exec, killed renamable $sgpr2_sgpr3, implicit-def $scc 164 ; GCN-NEXT: S_BRANCH %bb.9 165 ; GCN-NEXT: {{ $}} 166 ; GCN-NEXT: bb.8: 167 ; GCN-NEXT: $exec = S_MOV_B64 0 168 ; GCN-NEXT: EXP_DONE 9, undef $vgpr0, undef $vgpr0, undef $vgpr0, undef $vgpr0, 1, 0, 0, implicit $exec 169 ; GCN-NEXT: S_ENDPGM 0 170 ; GCN-NEXT: {{ $}} 171 ; GCN-NEXT: bb.9: 172 %.i0 = fdiv reassoc nnan nsz arcp contract afn float 1.000000e+00, %val 173 %cmp0 = fcmp olt float %.i0, 0.000000e+00 174 br i1 %cmp0, label %kill0, label %flow 175 176kill0: ; preds = %entry 177 call void @llvm.amdgcn.kill(i1 false) 178 br label %end 179 180flow: ; preds = %entry 181 %cmp1 = fcmp olt float %val, 0.000000e+00 182 br i1 %cmp1, label %flow, label %end 183 184kill1: ; preds = %flow 185 call void @llvm.amdgcn.kill(i1 false) 186 br label %end 187 188end: ; preds = %kill0, %kill1, %flow 189 ret { <4 x float> } undef 190} 191 192declare void @llvm.amdgcn.kill(i1) #0 193 194attributes #0 = { nounwind } 195