1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=irtranslator %s -o - | FileCheck %s 3 4; Check that we correctly skip over disabled inputs 5define amdgpu_ps void @disabled_input(float inreg %arg0, float %psinput0, float %psinput1) #1 { 6 ; CHECK-LABEL: name: disabled_input 7 ; CHECK: bb.1.main_body: 8 ; CHECK-NEXT: liveins: $sgpr2, $vgpr0 9 ; CHECK-NEXT: {{ $}} 10 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 11 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 12 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 13 ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 15, [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY1]](s32), 0, 0 14 ; CHECK-NEXT: S_ENDPGM 0 15main_body: 16 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg0, float %arg0, float %arg0, float %psinput1, i1 false, i1 false) #0 17 ret void 18} 19 20define amdgpu_ps void @disabled_input_struct(float inreg %arg0, { float, float } %psinput0, float %psinput1) #1 { 21 ; CHECK-LABEL: name: disabled_input_struct 22 ; CHECK: bb.1.main_body: 23 ; CHECK-NEXT: liveins: $sgpr2, $vgpr0 24 ; CHECK-NEXT: {{ $}} 25 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr2 26 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr0 27 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 28 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[DEF]](s32) 29 ; CHECK-NEXT: G_INTRINSIC_W_SIDE_EFFECTS intrinsic(@llvm.amdgcn.exp), 0, 15, [[COPY]](s32), [[COPY]](s32), [[COPY]](s32), [[COPY1]](s32), 0, 0 30 ; CHECK-NEXT: S_ENDPGM 0 31main_body: 32 call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %arg0, float %arg0, float %arg0, float %psinput1, i1 false, i1 false) #0 33 ret void 34} 35 36define amdgpu_ps float @vgpr_return(i32 %vgpr) { 37 ; CHECK-LABEL: name: vgpr_return 38 ; CHECK: bb.1 (%ir-block.0): 39 ; CHECK-NEXT: liveins: $vgpr0 40 ; CHECK-NEXT: {{ $}} 41 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 42 ; CHECK-NEXT: $vgpr0 = COPY [[COPY]](s32) 43 ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 44 %cast = bitcast i32 %vgpr to float 45 ret float %cast 46} 47 48define amdgpu_ps i32 @sgpr_return_i32(i32 %vgpr) { 49 ; CHECK-LABEL: name: sgpr_return_i32 50 ; CHECK: bb.1 (%ir-block.0): 51 ; CHECK-NEXT: liveins: $vgpr0 52 ; CHECK-NEXT: {{ $}} 53 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 54 ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32) 55 ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) 56 ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 57 ret i32 %vgpr 58} 59 60define amdgpu_ps i64 @sgpr_return_i64(i64 %vgpr) { 61 ; CHECK-LABEL: name: sgpr_return_i64 62 ; CHECK: bb.1 (%ir-block.0): 63 ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 64 ; CHECK-NEXT: {{ $}} 65 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 66 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 67 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) 68 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64) 69 ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) 70 ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) 71 ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) 72 ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) 73 ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 74 ret i64 %vgpr 75} 76 77define amdgpu_ps <2 x i32> @sgpr_return_v2i32(<2 x i32> %vgpr) { 78 ; CHECK-LABEL: name: sgpr_return_v2i32 79 ; CHECK: bb.1 (%ir-block.0): 80 ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 81 ; CHECK-NEXT: {{ $}} 82 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 83 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 84 ; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32) 85 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s32>) 86 ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) 87 ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) 88 ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) 89 ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) 90 ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 91 ret <2 x i32> %vgpr 92} 93 94define amdgpu_ps { i32, i32 } @sgpr_struct_return_i32_i32(i32 %vgpr0, i32 %vgpr1) { 95 ; CHECK-LABEL: name: sgpr_struct_return_i32_i32 96 ; CHECK: bb.1 (%ir-block.0): 97 ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 98 ; CHECK-NEXT: {{ $}} 99 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 100 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 101 ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF 102 ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY]](s32) 103 ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) 104 ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[COPY1]](s32) 105 ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) 106 ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 107 %insertvalue0 = insertvalue { i32, i32 } undef, i32 %vgpr0, 0 108 %value = insertvalue { i32, i32 } %insertvalue0, i32 %vgpr1, 1 109 ret { i32, i32 } %value 110} 111 112define amdgpu_ps ptr addrspace(3) @sgpr_return_p3i8(ptr addrspace(3) %vgpr) { 113 ; CHECK-LABEL: name: sgpr_return_p3i8 114 ; CHECK: bb.1 (%ir-block.0): 115 ; CHECK-NEXT: liveins: $vgpr0 116 ; CHECK-NEXT: {{ $}} 117 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(p3) = COPY $vgpr0 118 ; CHECK-NEXT: [[PTRTOINT:%[0-9]+]]:_(s32) = G_PTRTOINT [[COPY]](p3) 119 ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[PTRTOINT]](s32) 120 ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) 121 ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 122 ret ptr addrspace(3) %vgpr 123} 124 125define amdgpu_ps ptr addrspace(1) @sgpr_return_p1i8(ptr addrspace(1) %vgpr) { 126 ; CHECK-LABEL: name: sgpr_return_p1i8 127 ; CHECK: bb.1 (%ir-block.0): 128 ; CHECK-NEXT: liveins: $vgpr0, $vgpr1 129 ; CHECK-NEXT: {{ $}} 130 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 131 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 132 ; CHECK-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) 133 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](p1) 134 ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV]](s32) 135 ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) 136 ; CHECK-NEXT: [[INTRINSIC_CONVERGENT1:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[UV1]](s32) 137 ; CHECK-NEXT: $sgpr1 = COPY [[INTRINSIC_CONVERGENT1]](s32) 138 ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0, implicit $sgpr1 139 ret ptr addrspace(1) %vgpr 140} 141 142define amdgpu_ps <2 x i16> @sgpr_return_v2i16(<2 x i16> %vgpr) { 143 ; CHECK-LABEL: name: sgpr_return_v2i16 144 ; CHECK: bb.1 (%ir-block.0): 145 ; CHECK-NEXT: liveins: $vgpr0 146 ; CHECK-NEXT: {{ $}} 147 ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 148 ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) 149 ; CHECK-NEXT: [[INTRINSIC_CONVERGENT:%[0-9]+]]:_(s32) = G_INTRINSIC_CONVERGENT intrinsic(@llvm.amdgcn.readfirstlane), [[BITCAST]](s32) 150 ; CHECK-NEXT: $sgpr0 = COPY [[INTRINSIC_CONVERGENT]](s32) 151 ; CHECK-NEXT: SI_RETURN_TO_EPILOG implicit $sgpr0 152 ret <2 x i16> %vgpr 153} 154 155declare void @llvm.amdgcn.exp.f32(i32 immarg, i32 immarg, float, float, float, float, i1 immarg, i1 immarg) #0 156 157attributes #0 = { nounwind } 158attributes #1 = { "InitialPSInputAddr"="0x00002" } 159