1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 2 2; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs -stop-after=amdgpu-isel < %s | FileCheck %s 3; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck %s 4 5define amdgpu_ps ptr addrspace(8) @basic_raw_buffer(ptr inreg %p) { 6 ; CHECK-LABEL: name: basic_raw_buffer 7 ; CHECK: bb.0 (%ir-block.0): 8 ; CHECK-NEXT: liveins: $sgpr0, $sgpr1 9 ; CHECK-NEXT: {{ $}} 10 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr1 11 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 12 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 13 ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], killed [[S_MOV_B32_]], implicit-def dead $scc 14 ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 1234 15 ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 5678 16 ; CHECK-NEXT: $sgpr0 = COPY [[COPY1]] 17 ; CHECK-NEXT: $sgpr1 = COPY [[S_AND_B32_]] 18 ; CHECK-NEXT: $sgpr2 = COPY [[S_MOV_B32_1]] 19 ; CHECK-NEXT: $sgpr3 = COPY [[S_MOV_B32_2]] 20 ; CHECK-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1, $sgpr2, $sgpr3 21 %rsrc = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p0(ptr %p, i16 0, i32 1234, i32 5678) 22 ret ptr addrspace(8) %rsrc 23} 24 25define amdgpu_ps float @read_raw_buffer(ptr addrspace(1) inreg %p) { 26 ; CHECK-LABEL: name: read_raw_buffer 27 ; CHECK: bb.0 (%ir-block.0): 28 ; CHECK-NEXT: liveins: $sgpr0, $sgpr1 29 ; CHECK-NEXT: {{ $}} 30 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr1 31 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 32 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 33 ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], killed [[S_MOV_B32_]], implicit-def dead $scc 34 ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 35 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY1]], %subreg.sub0, killed [[S_AND_B32_]], %subreg.sub1, [[S_MOV_B32_1]], %subreg.sub2, [[S_MOV_B32_1]], %subreg.sub3 36 ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET killed [[REG_SEQUENCE]], [[S_MOV_B32_1]], 4, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) 37 ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]] 38 ; CHECK-NEXT: SI_RETURN_TO_EPILOG $vgpr0 39 %rsrc = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p1(ptr addrspace(1) %p, i16 0, i32 0, i32 0) 40 %loaded = call float @llvm.amdgcn.raw.ptr.buffer.load(ptr addrspace(8) %rsrc, i32 4, i32 0, i32 0) 41 ret float %loaded 42} 43 44define amdgpu_ps ptr addrspace(8) @basic_struct_buffer(ptr inreg %p) { 45 ; CHECK-LABEL: name: basic_struct_buffer 46 ; CHECK: bb.0 (%ir-block.0): 47 ; CHECK-NEXT: liveins: $sgpr0, $sgpr1 48 ; CHECK-NEXT: {{ $}} 49 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr1 50 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr0 51 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 52 ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY]], killed [[S_MOV_B32_]], implicit-def dead $scc 53 ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 262144 54 ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 killed [[S_AND_B32_]], killed [[S_MOV_B32_1]], implicit-def dead $scc 55 ; CHECK-NEXT: [[S_MOV_B32_2:%[0-9]+]]:sreg_32 = S_MOV_B32 1234 56 ; CHECK-NEXT: [[S_MOV_B32_3:%[0-9]+]]:sreg_32 = S_MOV_B32 5678 57 ; CHECK-NEXT: $sgpr0 = COPY [[COPY1]] 58 ; CHECK-NEXT: $sgpr1 = COPY [[S_OR_B32_]] 59 ; CHECK-NEXT: $sgpr2 = COPY [[S_MOV_B32_2]] 60 ; CHECK-NEXT: $sgpr3 = COPY [[S_MOV_B32_3]] 61 ; CHECK-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1, $sgpr2, $sgpr3 62 %rsrc = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p0(ptr %p, i16 4, i32 1234, i32 5678) 63 ret ptr addrspace(8) %rsrc 64} 65 66define amdgpu_ps ptr addrspace(8) @variable_top_half(ptr inreg %p, i32 inreg %numVals, i32 inreg %flags) { 67 ; CHECK-LABEL: name: variable_top_half 68 ; CHECK: bb.0 (%ir-block.0): 69 ; CHECK-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3 70 ; CHECK-NEXT: {{ $}} 71 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr3 72 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr2 73 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr1 74 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr0 75 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 76 ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], killed [[S_MOV_B32_]], implicit-def dead $scc 77 ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 262144 78 ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 killed [[S_AND_B32_]], killed [[S_MOV_B32_1]], implicit-def dead $scc 79 ; CHECK-NEXT: $sgpr0 = COPY [[COPY3]] 80 ; CHECK-NEXT: $sgpr1 = COPY [[S_OR_B32_]] 81 ; CHECK-NEXT: $sgpr2 = COPY [[COPY1]] 82 ; CHECK-NEXT: $sgpr3 = COPY [[COPY]] 83 ; CHECK-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1, $sgpr2, $sgpr3 84 %rsrc = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p0(ptr %p, i16 4, i32 %numVals, i32 %flags) 85 ret ptr addrspace(8) %rsrc 86} 87 88define amdgpu_ps ptr addrspace(8) @general_case(ptr inreg %p, i16 inreg %stride, i32 inreg %numVals, i32 inreg %flags) { 89 ; CHECK-LABEL: name: general_case 90 ; CHECK: bb.0 (%ir-block.0): 91 ; CHECK-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 92 ; CHECK-NEXT: {{ $}} 93 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 94 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 95 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 96 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 97 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 98 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 99 ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY3]], killed [[S_MOV_B32_]], implicit-def dead $scc 100 ; CHECK-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY2]], 16, implicit-def dead $scc 101 ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 killed [[S_AND_B32_]], killed [[S_LSHL_B32_]], implicit-def dead $scc 102 ; CHECK-NEXT: $sgpr0 = COPY [[COPY4]] 103 ; CHECK-NEXT: $sgpr1 = COPY [[S_OR_B32_]] 104 ; CHECK-NEXT: $sgpr2 = COPY [[COPY1]] 105 ; CHECK-NEXT: $sgpr3 = COPY [[COPY]] 106 ; CHECK-NEXT: SI_RETURN_TO_EPILOG $sgpr0, $sgpr1, $sgpr2, $sgpr3 107 %rsrc = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p0(ptr %p, i16 %stride, i32 %numVals, i32 %flags) 108 ret ptr addrspace(8) %rsrc 109} 110 111define amdgpu_ps float @general_case_load(ptr inreg %p, i16 inreg %stride, i32 inreg %numVals, i32 inreg %flags) { 112 ; CHECK-LABEL: name: general_case_load 113 ; CHECK: bb.0 (%ir-block.0): 114 ; CHECK-NEXT: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4 115 ; CHECK-NEXT: {{ $}} 116 ; CHECK-NEXT: [[COPY:%[0-9]+]]:sgpr_32 = COPY $sgpr4 117 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:sgpr_32 = COPY $sgpr3 118 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:sgpr_32 = COPY $sgpr2 119 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:sgpr_32 = COPY $sgpr1 120 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:sgpr_32 = COPY $sgpr0 121 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 122 ; CHECK-NEXT: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY3]], killed [[S_MOV_B32_]], implicit-def dead $scc 123 ; CHECK-NEXT: [[S_LSHL_B32_:%[0-9]+]]:sreg_32 = S_LSHL_B32 [[COPY2]], 16, implicit-def dead $scc 124 ; CHECK-NEXT: [[S_OR_B32_:%[0-9]+]]:sreg_32 = S_OR_B32 killed [[S_AND_B32_]], killed [[S_LSHL_B32_]], implicit-def dead $scc 125 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, killed [[S_OR_B32_]], %subreg.sub1, [[COPY1]], %subreg.sub2, [[COPY]], %subreg.sub3 126 ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 127 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] 128 ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[COPY5]], killed [[REG_SEQUENCE]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) 129 ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_IDXEN]] 130 ; CHECK-NEXT: SI_RETURN_TO_EPILOG $vgpr0 131 %rsrc = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p0(ptr %p, i16 %stride, i32 %numVals, i32 %flags) 132 %value = call float @llvm.amdgcn.struct.ptr.buffer.load(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0) 133 ret float %value 134} 135 136; None of the components are uniform due to the lack of an inreg 137define amdgpu_ps float @general_case_load_with_waterfall(ptr %p, i16 %stride, i32 %numVals, i32 %flags) { 138 ; CHECK-LABEL: name: general_case_load_with_waterfall 139 ; CHECK: bb.0 (%ir-block.0): 140 ; CHECK-NEXT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4 141 ; CHECK-NEXT: {{ $}} 142 ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr4 143 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr3 144 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 145 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr1 146 ; CHECK-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0 147 ; CHECK-NEXT: [[V_LSHLREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHLREV_B32_e64 16, [[COPY2]], implicit $exec 148 ; CHECK-NEXT: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 65535 149 ; CHECK-NEXT: [[V_AND_OR_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_OR_B32_e64 [[COPY3]], killed [[S_MOV_B32_]], killed [[V_LSHLREV_B32_e64_]], implicit $exec 150 ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, killed [[V_AND_OR_B32_e64_]], %subreg.sub1, [[COPY1]], %subreg.sub2, [[COPY]], %subreg.sub3 151 ; CHECK-NEXT: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 0 152 ; CHECK-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]] 153 ; CHECK-NEXT: [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[COPY5]], killed [[REG_SEQUENCE]], [[S_MOV_B32_1]], 0, 0, 0, implicit $exec :: (dereferenceable load (s32) from %ir.rsrc, align 1, addrspace 8) 154 ; CHECK-NEXT: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_IDXEN]] 155 ; CHECK-NEXT: SI_RETURN_TO_EPILOG $vgpr0 156 %rsrc = call ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p0(ptr %p, i16 %stride, i32 %numVals, i32 %flags) 157 %value = call float @llvm.amdgcn.struct.ptr.buffer.load(ptr addrspace(8) %rsrc, i32 0, i32 0, i32 0, i32 0) 158 ret float %value 159} 160 161declare ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p0(ptr nocapture readnone, i16, i32, i32) 162declare ptr addrspace(8) @llvm.amdgcn.make.buffer.rsrc.p1(ptr addrspace(1) nocapture readnone, i16, i32, i32) 163declare float @llvm.amdgcn.raw.ptr.buffer.load(ptr addrspace(8) nocapture readonly, i32, i32, i32 immarg) 164declare float @llvm.amdgcn.struct.ptr.buffer.load(ptr addrspace(8) nocapture readonly, i32, i32, i32, i32 immarg) 165