1; RUN: llc -mtriple=amdgcn -amdgpu-set-wave-priority=true -o - %s | \ 2; RUN: FileCheck %s 3 4; CHECK-LABEL: no_setprio: 5; CHECK-NOT: s_setprio 6; CHECK: ; return to shader part epilog 7define amdgpu_ps <2 x float> @no_setprio(<2 x float> %a, <2 x float> %b) "amdgpu-wave-priority-threshold"="1" { 8 %s = fadd <2 x float> %a, %b 9 ret <2 x float> %s 10} 11 12; CHECK-LABEL: vmem_in_exit_block: 13; CHECK: s_setprio 3 14; CHECK: buffer_load_dwordx2 15; CHECK-NEXT: s_setprio 0 16; CHECK: ; return to shader part epilog 17define amdgpu_ps <2 x float> @vmem_in_exit_block(ptr addrspace(8) inreg %p, <2 x float> %x) "amdgpu-wave-priority-threshold"="2" { 18 %v = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %p, i32 0, i32 0, i32 0, i32 0) 19 %s = fadd <2 x float> %v, %x 20 ret <2 x float> %s 21} 22 23; CHECK-LABEL: branch: 24; CHECK: s_setprio 3 25; CHECK: s_cbranch_scc0 [[A:.*]] 26; CHECK: {{.*}}: ; %b 27; CHECK: buffer_load_dwordx2 28; CHECK-NEXT: s_setprio 0 29; CHECK: s_branch [[EXIT:.*]] 30; CHECK: [[A]]: ; %a 31; CHECK-NEXT: s_setprio 0 32; CHECK: s_branch [[EXIT]] 33; CHECK-NEXT: [[EXIT]]: 34define amdgpu_ps <2 x float> @branch(ptr addrspace(8) inreg %p, i32 inreg %i, <2 x float> %x) "amdgpu-wave-priority-threshold"="2" { 35 %cond = icmp eq i32 %i, 0 36 br i1 %cond, label %a, label %b 37 38a: 39 ret <2 x float> <float 0.0, float 0.0> 40 41b: 42 %v = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %p, i32 0, i32 0, i32 0, i32 0) 43 %s = fadd <2 x float> %v, %x 44 ret <2 x float> %s 45} 46 47; CHECK-LABEL: setprio_follows_setprio: 48; CHECK: s_setprio 3 49; CHECK: buffer_load_dwordx2 50; CHECK: s_cbranch_scc1 [[C:.*]] 51; CHECK: {{.*}}: ; %a 52; CHECK: buffer_load_dwordx2 53; CHECK-NEXT: s_setprio 0 54; CHECK: s_cbranch_vccnz [[C]] 55; CHECK: {{.*}}: ; %b 56; CHECK-NOT: s_setprio 57; CHECK: s_branch [[EXIT:.*]] 58; CHECK: [[C]]: ; %c 59; CHECK-NEXT: s_setprio 0 60; CHECK: s_branch [[EXIT]] 61; CHECK: [[EXIT]]: 62define amdgpu_ps <2 x float> @setprio_follows_setprio(ptr addrspace(8) inreg %p, i32 inreg %i) "amdgpu-wave-priority-threshold"="3" { 63entry: 64 %v1 = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %p, i32 0, i32 0, i32 0, i32 0) 65 %cond1 = icmp ne i32 %i, 0 66 br i1 %cond1, label %a, label %c 67 68a: 69 %v2 = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %p, i32 0, i32 0, i32 1, i32 0) 70 %v20 = extractelement <2 x float> %v2, i32 0 71 %v21 = extractelement <2 x float> %v2, i32 1 72 %cond2 = fcmp ult float %v20, %v21 73 br i1 %cond2, label %b, label %c 74 75b: 76 ret <2 x float> %v2 77 78c: 79 %v4 = fadd <2 x float> %v1, %v1 80 ret <2 x float> %v4 81} 82 83; CHECK-LABEL: loop: 84; CHECK: {{.*}}: ; %entry 85; CHECK: s_setprio 3 86; CHECK-NOT: s_setprio 87; CHECK: [[LOOP:.*]]: ; %loop 88; CHECK-NOT: s_setprio 89; CHECK: buffer_load_dwordx2 90; CHECK-NOT: s_setprio 91; CHECK: s_cbranch_scc1 [[LOOP]] 92; CHECK-NEXT: {{.*}}: ; %exit 93; CHECK-NEXT: s_setprio 0 94define amdgpu_ps <2 x float> @loop(ptr addrspace(8) inreg %p) "amdgpu-wave-priority-threshold"="2" { 95entry: 96 br label %loop 97 98loop: 99 %i = phi i32 [0, %entry], [%i2, %loop] 100 %sum = phi <2 x float> [<float 0.0, float 0.0>, %entry], [%sum2, %loop] 101 102 %i2 = add i32 %i, 1 103 104 %v = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %p, i32 %i, i32 0, i32 0, i32 0) 105 %sum2 = fadd <2 x float> %sum, %v 106 107 %cond = icmp ult i32 %i2, 5 108 br i1 %cond, label %loop, label %exit 109 110exit: 111 ret <2 x float> %sum2 112} 113 114; CHECK-LABEL: edge_split: 115; CHECK: s_setprio 3 116; CHECK: buffer_load_dwordx2 117; CHECK-NOT: s_setprio 118; CHECK: s_cbranch_scc1 [[ANOTHER_LOAD:.*]] 119; CHECK: {{.*}}: ; %loop.preheader 120; CHECK-NEXT: s_setprio 0 121; CHECK: [[LOOP:.*]]: ; %loop 122; CHECK-NOT: s_setprio 123; CHECK: s_cbranch_scc1 [[LOOP]] 124; CHECK {{.*}}: ; %exit 125; CHECK-NOT: s_setprio 126; CHECK: s_branch [[RET:.*]] 127; CHECK: [[ANOTHER_LOAD]]: ; %another_load 128; CHECK: buffer_load_dwordx2 129; CHECK-NEXT: s_setprio 0 130; CHECK: s_branch [[RET]] 131; CHECK: [[RET]]: 132define amdgpu_ps <2 x float> @edge_split(ptr addrspace(8) inreg %p, i32 inreg %x) "amdgpu-wave-priority-threshold"="2" { 133entry: 134 %v = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %p, i32 0, i32 0, i32 0, i32 0) 135 %cond = icmp ne i32 %x, 0 136 br i1 %cond, label %loop, label %another_load 137 138loop: 139 %i = phi i32 [0, %entry], [%i2, %loop] 140 %mul = phi <2 x float> [%v, %entry], [%mul2, %loop] 141 142 %i2 = add i32 %i, 1 143 %mul2 = fmul <2 x float> %mul, %v 144 145 %cond2 = icmp ult i32 %i2, 5 146 br i1 %cond2, label %loop, label %exit 147 148exit: 149 ret <2 x float> %mul2 150 151another_load: 152 %v2 = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %p, i32 0, i32 0, i32 1, i32 0) 153 %sum = fadd <2 x float> %v, %v2 154 ret <2 x float> %sum 155} 156 157; CHECK-LABEL: valu_insts_threshold: 158; CHECK: s_setprio 3 159; CHECK: buffer_load_dwordx2 160; CHECK-NEXT: s_setprio 0 161; CHECK-COUNT-4: v_add_f32_e32 162; CHECK: s_cbranch_scc0 [[A:.*]] 163; CHECK: {{.*}}: ; %b 164; CHECK-NEXT: buffer_load_dwordx2 165; CHECK: s_branch [[END:.*]] 166; CHECK: [[A]]: ; %a 167; CHECK: s_branch [[END]] 168; CHECK: [[END]]: 169define amdgpu_ps <2 x float> @valu_insts_threshold(ptr addrspace(8) inreg %p, i32 inreg %i) "amdgpu-wave-priority-threshold"="4" { 170 %v = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %p, i32 0, i32 0, i32 0, i32 0) 171 %add = fadd <2 x float> %v, %v 172 %add2 = fadd <2 x float> %add, %add 173 174 %cond = icmp eq i32 %i, 0 175 br i1 %cond, label %a, label %b 176 177a: 178 ret <2 x float> %add2 179 180b: 181 %v2 = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %p, i32 0, i32 1, i32 0, i32 0) 182 %sub = fsub <2 x float> %add2, %v2 183 ret <2 x float> %sub 184} 185 186; CHECK-LABEL: valu_insts_threshold2: 187; CHECK-NOT: s_setprio 188; CHECK: ; -- End function 189define amdgpu_ps <2 x float> @valu_insts_threshold2(ptr addrspace(8) inreg %p, i32 inreg %i) "amdgpu-wave-priority-threshold"="5" { 190 %v = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %p, i32 0, i32 0, i32 0, i32 0) 191 %add = fadd <2 x float> %v, %v 192 %add2 = fadd <2 x float> %add, %add 193 194 %cond = icmp eq i32 %i, 0 195 br i1 %cond, label %a, label %b 196 197a: 198 ret <2 x float> %add2 199 200b: 201 %v2 = call <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8) %p, i32 0, i32 1, i32 0, i32 0) 202 %sub = fsub <2 x float> %add2, %v2 203 ret <2 x float> %sub 204} 205 206declare <2 x float> @llvm.amdgcn.struct.ptr.buffer.load.v2f32(ptr addrspace(8), i32, i32, i32, i32) nounwind 207