1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -amdgpu-ir-lower-kernel-arguments=0 -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck -check-prefix=HSA-VI %s 3; RUN: llc -global-isel -mtriple=amdgcn-- -mcpu=fiji -amdgpu-ir-lower-kernel-arguments=0 -stop-after=irtranslator -verify-machineinstrs %s -o - | FileCheck -check-prefix=LEGACY-MESA-VI %s 4 5define amdgpu_kernel void @i8_arg(ptr addrspace(1) nocapture %out, i8 %in) nounwind { 6 ; HSA-VI-LABEL: name: i8_arg 7 ; HSA-VI: bb.1 (%ir-block.0): 8 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 9 ; HSA-VI-NEXT: {{ $}} 10 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 11 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 12 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 13 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) 14 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 15 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 16 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 8, addrspace 4) 17 ; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8) 18 ; HSA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) 19 ; HSA-VI-NEXT: S_ENDPGM 0 20 ; 21 ; LEGACY-MESA-VI-LABEL: name: i8_arg 22 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 23 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 24 ; LEGACY-MESA-VI-NEXT: {{ $}} 25 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 26 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 27 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 28 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) 29 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 30 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 31 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4) 32 ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8) 33 ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) 34 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 35 %ext = zext i8 %in to i32 36 store i32 %ext, ptr addrspace(1) %out, align 4 37 ret void 38} 39 40define amdgpu_kernel void @i8_zext_arg(ptr addrspace(1) nocapture %out, i8 zeroext %in) nounwind { 41 ; HSA-VI-LABEL: name: i8_zext_arg 42 ; HSA-VI: bb.1 (%ir-block.0): 43 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 44 ; HSA-VI-NEXT: {{ $}} 45 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 46 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 47 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 48 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) 49 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 50 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 51 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 8, addrspace 4) 52 ; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8) 53 ; HSA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) 54 ; HSA-VI-NEXT: S_ENDPGM 0 55 ; 56 ; LEGACY-MESA-VI-LABEL: name: i8_zext_arg 57 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 58 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 59 ; LEGACY-MESA-VI-NEXT: {{ $}} 60 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 61 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 62 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 63 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) 64 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 65 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 66 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4) 67 ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8) 68 ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) 69 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 70 %ext = zext i8 %in to i32 71 store i32 %ext, ptr addrspace(1) %out, align 4 72 ret void 73} 74 75define amdgpu_kernel void @i8_sext_arg(ptr addrspace(1) nocapture %out, i8 signext %in) nounwind { 76 ; HSA-VI-LABEL: name: i8_sext_arg 77 ; HSA-VI: bb.1 (%ir-block.0): 78 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 79 ; HSA-VI-NEXT: {{ $}} 80 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 81 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 82 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 83 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) 84 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 85 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 86 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 8, addrspace 4) 87 ; HSA-VI-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s8) 88 ; HSA-VI-NEXT: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) 89 ; HSA-VI-NEXT: S_ENDPGM 0 90 ; 91 ; LEGACY-MESA-VI-LABEL: name: i8_sext_arg 92 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 93 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 94 ; LEGACY-MESA-VI-NEXT: {{ $}} 95 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 96 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 97 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 98 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) 99 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 100 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 101 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4) 102 ; LEGACY-MESA-VI-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s8) 103 ; LEGACY-MESA-VI-NEXT: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) 104 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 105 %ext = sext i8 %in to i32 106 store i32 %ext, ptr addrspace(1) %out, align 4 107 ret void 108} 109 110define amdgpu_kernel void @i16_arg(ptr addrspace(1) nocapture %out, i16 %in) nounwind { 111 ; HSA-VI-LABEL: name: i16_arg 112 ; HSA-VI: bb.1 (%ir-block.0): 113 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 114 ; HSA-VI-NEXT: {{ $}} 115 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 116 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 117 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 118 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) 119 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 120 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 121 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 8, addrspace 4) 122 ; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16) 123 ; HSA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) 124 ; HSA-VI-NEXT: S_ENDPGM 0 125 ; 126 ; LEGACY-MESA-VI-LABEL: name: i16_arg 127 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 128 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 129 ; LEGACY-MESA-VI-NEXT: {{ $}} 130 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 131 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 132 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 133 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) 134 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 135 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 136 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 4, addrspace 4) 137 ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16) 138 ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) 139 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 140 %ext = zext i16 %in to i32 141 store i32 %ext, ptr addrspace(1) %out, align 4 142 ret void 143} 144 145define amdgpu_kernel void @i16_zext_arg(ptr addrspace(1) nocapture %out, i16 zeroext %in) nounwind { 146 ; HSA-VI-LABEL: name: i16_zext_arg 147 ; HSA-VI: bb.1 (%ir-block.0): 148 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 149 ; HSA-VI-NEXT: {{ $}} 150 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 151 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 152 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 153 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) 154 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 155 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 156 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 8, addrspace 4) 157 ; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16) 158 ; HSA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) 159 ; HSA-VI-NEXT: S_ENDPGM 0 160 ; 161 ; LEGACY-MESA-VI-LABEL: name: i16_zext_arg 162 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 163 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 164 ; LEGACY-MESA-VI-NEXT: {{ $}} 165 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 166 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 167 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 168 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) 169 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 170 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 171 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 4, addrspace 4) 172 ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16) 173 ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) 174 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 175 %ext = zext i16 %in to i32 176 store i32 %ext, ptr addrspace(1) %out, align 4 177 ret void 178} 179 180define amdgpu_kernel void @i16_sext_arg(ptr addrspace(1) nocapture %out, i16 signext %in) nounwind { 181 ; HSA-VI-LABEL: name: i16_sext_arg 182 ; HSA-VI: bb.1 (%ir-block.0): 183 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 184 ; HSA-VI-NEXT: {{ $}} 185 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 186 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 187 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 188 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) 189 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 190 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 191 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 8, addrspace 4) 192 ; HSA-VI-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s16) 193 ; HSA-VI-NEXT: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) 194 ; HSA-VI-NEXT: S_ENDPGM 0 195 ; 196 ; LEGACY-MESA-VI-LABEL: name: i16_sext_arg 197 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 198 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 199 ; LEGACY-MESA-VI-NEXT: {{ $}} 200 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 201 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 202 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 203 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) 204 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 205 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 206 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16), align 4, addrspace 4) 207 ; LEGACY-MESA-VI-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s16) 208 ; LEGACY-MESA-VI-NEXT: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) 209 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 210 %ext = sext i16 %in to i32 211 store i32 %ext, ptr addrspace(1) %out, align 4 212 ret void 213} 214 215define amdgpu_kernel void @i32_arg(ptr addrspace(1) nocapture %out, i32 %in) nounwind { 216 ; HSA-VI-LABEL: name: i32_arg 217 ; HSA-VI: bb.1.entry: 218 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 219 ; HSA-VI-NEXT: {{ $}} 220 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 221 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 222 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 223 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) 224 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 225 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 226 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4) 227 ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) 228 ; HSA-VI-NEXT: S_ENDPGM 0 229 ; 230 ; LEGACY-MESA-VI-LABEL: name: i32_arg 231 ; LEGACY-MESA-VI: bb.1.entry: 232 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 233 ; LEGACY-MESA-VI-NEXT: {{ $}} 234 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 235 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 236 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 237 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) 238 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 239 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 240 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), addrspace 4) 241 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) 242 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 243entry: 244 store i32 %in, ptr addrspace(1) %out, align 4 245 ret void 246} 247 248define amdgpu_kernel void @f32_arg(ptr addrspace(1) nocapture %out, float %in) nounwind { 249 ; HSA-VI-LABEL: name: f32_arg 250 ; HSA-VI: bb.1.entry: 251 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 252 ; HSA-VI-NEXT: {{ $}} 253 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 254 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 255 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 256 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) 257 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 258 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 259 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4) 260 ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) 261 ; HSA-VI-NEXT: S_ENDPGM 0 262 ; 263 ; LEGACY-MESA-VI-LABEL: name: f32_arg 264 ; LEGACY-MESA-VI: bb.1.entry: 265 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 266 ; LEGACY-MESA-VI-NEXT: {{ $}} 267 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 268 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 269 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 270 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) 271 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 272 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 273 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32), addrspace 4) 274 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) 275 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 276entry: 277 store float %in, ptr addrspace(1) %out, align 4 278 ret void 279} 280 281define amdgpu_kernel void @v2i8_arg(ptr addrspace(1) %out, <2 x i8> %in) { 282 ; HSA-VI-LABEL: name: v2i8_arg 283 ; HSA-VI: bb.1.entry: 284 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 285 ; HSA-VI-NEXT: {{ $}} 286 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 287 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 288 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 289 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) 290 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 291 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 292 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s8>), align 8, addrspace 4) 293 ; HSA-VI-NEXT: G_STORE [[LOAD1]](<2 x s8>), [[LOAD]](p1) :: (store (<2 x s8>) into %ir.out, addrspace 1) 294 ; HSA-VI-NEXT: S_ENDPGM 0 295 ; 296 ; LEGACY-MESA-VI-LABEL: name: v2i8_arg 297 ; LEGACY-MESA-VI: bb.1.entry: 298 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 299 ; LEGACY-MESA-VI-NEXT: {{ $}} 300 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 301 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 302 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 303 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) 304 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 305 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 306 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s8>), align 4, addrspace 4) 307 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<2 x s8>), [[LOAD]](p1) :: (store (<2 x s8>) into %ir.out, addrspace 1) 308 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 309entry: 310 store <2 x i8> %in, ptr addrspace(1) %out 311 ret void 312} 313 314define amdgpu_kernel void @v2i16_arg(ptr addrspace(1) %out, <2 x i16> %in) { 315 ; HSA-VI-LABEL: name: v2i16_arg 316 ; HSA-VI: bb.1.entry: 317 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 318 ; HSA-VI-NEXT: {{ $}} 319 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 320 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 321 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 322 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) 323 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 324 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 325 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s16>), align 8, addrspace 4) 326 ; HSA-VI-NEXT: G_STORE [[LOAD1]](<2 x s16>), [[LOAD]](p1) :: (store (<2 x s16>) into %ir.out, addrspace 1) 327 ; HSA-VI-NEXT: S_ENDPGM 0 328 ; 329 ; LEGACY-MESA-VI-LABEL: name: v2i16_arg 330 ; LEGACY-MESA-VI: bb.1.entry: 331 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 332 ; LEGACY-MESA-VI-NEXT: {{ $}} 333 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 334 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 335 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 336 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) 337 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 338 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 339 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s16>), addrspace 4) 340 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<2 x s16>), [[LOAD]](p1) :: (store (<2 x s16>) into %ir.out, addrspace 1) 341 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 342entry: 343 store <2 x i16> %in, ptr addrspace(1) %out 344 ret void 345} 346 347define amdgpu_kernel void @v2i32_arg(ptr addrspace(1) nocapture %out, <2 x i32> %in) nounwind { 348 ; HSA-VI-LABEL: name: v2i32_arg 349 ; HSA-VI: bb.1.entry: 350 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 351 ; HSA-VI-NEXT: {{ $}} 352 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 353 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 354 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 355 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) 356 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 357 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 358 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), addrspace 4) 359 ; HSA-VI-NEXT: G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1) 360 ; HSA-VI-NEXT: S_ENDPGM 0 361 ; 362 ; LEGACY-MESA-VI-LABEL: name: v2i32_arg 363 ; LEGACY-MESA-VI: bb.1.entry: 364 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 365 ; LEGACY-MESA-VI-NEXT: {{ $}} 366 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 367 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 368 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 369 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) 370 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 371 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 372 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), align 4, addrspace 4) 373 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1) 374 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 375entry: 376 store <2 x i32> %in, ptr addrspace(1) %out, align 4 377 ret void 378} 379 380define amdgpu_kernel void @v2f32_arg(ptr addrspace(1) nocapture %out, <2 x float> %in) nounwind { 381 ; HSA-VI-LABEL: name: v2f32_arg 382 ; HSA-VI: bb.1.entry: 383 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 384 ; HSA-VI-NEXT: {{ $}} 385 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 386 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 387 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 388 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) 389 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 390 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 391 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), addrspace 4) 392 ; HSA-VI-NEXT: G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1) 393 ; HSA-VI-NEXT: S_ENDPGM 0 394 ; 395 ; LEGACY-MESA-VI-LABEL: name: v2f32_arg 396 ; LEGACY-MESA-VI: bb.1.entry: 397 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 398 ; LEGACY-MESA-VI-NEXT: {{ $}} 399 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 400 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 401 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 402 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) 403 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 404 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 405 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), align 4, addrspace 4) 406 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<2 x s32>), [[LOAD]](p1) :: (store (<2 x s32>) into %ir.out, align 4, addrspace 1) 407 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 408entry: 409 store <2 x float> %in, ptr addrspace(1) %out, align 4 410 ret void 411} 412 413define amdgpu_kernel void @v3i8_arg(ptr addrspace(1) nocapture %out, <3 x i8> %in) nounwind { 414 ; HSA-VI-LABEL: name: v3i8_arg 415 ; HSA-VI: bb.1.entry: 416 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 417 ; HSA-VI-NEXT: {{ $}} 418 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 419 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 420 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 421 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) 422 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 423 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 424 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s8>), align 8, addrspace 4) 425 ; HSA-VI-NEXT: G_STORE [[LOAD1]](<3 x s8>), [[LOAD]](p1) :: (store (<3 x s8>) into %ir.out, align 4, addrspace 1) 426 ; HSA-VI-NEXT: S_ENDPGM 0 427 ; 428 ; LEGACY-MESA-VI-LABEL: name: v3i8_arg 429 ; LEGACY-MESA-VI: bb.1.entry: 430 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 431 ; LEGACY-MESA-VI-NEXT: {{ $}} 432 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 433 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 434 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 435 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) 436 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 437 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 438 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s8>), align 4, addrspace 4) 439 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<3 x s8>), [[LOAD]](p1) :: (store (<3 x s8>) into %ir.out, align 4, addrspace 1) 440 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 441entry: 442 store <3 x i8> %in, ptr addrspace(1) %out, align 4 443 ret void 444} 445 446define amdgpu_kernel void @v3i16_arg(ptr addrspace(1) nocapture %out, <3 x i16> %in) nounwind { 447 ; HSA-VI-LABEL: name: v3i16_arg 448 ; HSA-VI: bb.1.entry: 449 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 450 ; HSA-VI-NEXT: {{ $}} 451 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 452 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 453 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 454 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) 455 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 456 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 457 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s16>), align 8, addrspace 4) 458 ; HSA-VI-NEXT: G_STORE [[LOAD1]](<3 x s16>), [[LOAD]](p1) :: (store (<3 x s16>) into %ir.out, align 4, addrspace 1) 459 ; HSA-VI-NEXT: S_ENDPGM 0 460 ; 461 ; LEGACY-MESA-VI-LABEL: name: v3i16_arg 462 ; LEGACY-MESA-VI: bb.1.entry: 463 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 464 ; LEGACY-MESA-VI-NEXT: {{ $}} 465 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 466 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 467 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 468 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) 469 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 470 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 471 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s16>), align 4, addrspace 4) 472 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<3 x s16>), [[LOAD]](p1) :: (store (<3 x s16>) into %ir.out, align 4, addrspace 1) 473 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 474entry: 475 store <3 x i16> %in, ptr addrspace(1) %out, align 4 476 ret void 477} 478 479define amdgpu_kernel void @v3i32_arg(ptr addrspace(1) nocapture %out, <3 x i32> %in) nounwind { 480 ; HSA-VI-LABEL: name: v3i32_arg 481 ; HSA-VI: bb.1.entry: 482 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 483 ; HSA-VI-NEXT: {{ $}} 484 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 485 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 486 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 487 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) 488 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 489 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 490 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s32>), align 16, addrspace 4) 491 ; HSA-VI-NEXT: G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1) 492 ; HSA-VI-NEXT: S_ENDPGM 0 493 ; 494 ; LEGACY-MESA-VI-LABEL: name: v3i32_arg 495 ; LEGACY-MESA-VI: bb.1.entry: 496 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 497 ; LEGACY-MESA-VI-NEXT: {{ $}} 498 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 499 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 500 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 501 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) 502 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 503 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 504 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s32>), align 4, addrspace 4) 505 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1) 506 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 507entry: 508 store <3 x i32> %in, ptr addrspace(1) %out, align 4 509 ret void 510} 511 512define amdgpu_kernel void @v3f32_arg(ptr addrspace(1) nocapture %out, <3 x float> %in) nounwind { 513 ; HSA-VI-LABEL: name: v3f32_arg 514 ; HSA-VI: bb.1.entry: 515 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 516 ; HSA-VI-NEXT: {{ $}} 517 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 518 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 519 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 520 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) 521 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 522 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 523 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s32>), align 16, addrspace 4) 524 ; HSA-VI-NEXT: G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1) 525 ; HSA-VI-NEXT: S_ENDPGM 0 526 ; 527 ; LEGACY-MESA-VI-LABEL: name: v3f32_arg 528 ; LEGACY-MESA-VI: bb.1.entry: 529 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 530 ; LEGACY-MESA-VI-NEXT: {{ $}} 531 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 532 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 533 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 534 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) 535 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 536 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 537 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<3 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<3 x s32>), align 4, addrspace 4) 538 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<3 x s32>), [[LOAD]](p1) :: (store (<3 x s32>) into %ir.out, align 4, addrspace 1) 539 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 540entry: 541 store <3 x float> %in, ptr addrspace(1) %out, align 4 542 ret void 543} 544 545define amdgpu_kernel void @v4i8_arg(ptr addrspace(1) %out, <4 x i8> %in) { 546 ; HSA-VI-LABEL: name: v4i8_arg 547 ; HSA-VI: bb.1.entry: 548 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 549 ; HSA-VI-NEXT: {{ $}} 550 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 551 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 552 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 553 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) 554 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 555 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 556 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s8>), align 8, addrspace 4) 557 ; HSA-VI-NEXT: G_STORE [[LOAD1]](<4 x s8>), [[LOAD]](p1) :: (store (<4 x s8>) into %ir.out, addrspace 1) 558 ; HSA-VI-NEXT: S_ENDPGM 0 559 ; 560 ; LEGACY-MESA-VI-LABEL: name: v4i8_arg 561 ; LEGACY-MESA-VI: bb.1.entry: 562 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 563 ; LEGACY-MESA-VI-NEXT: {{ $}} 564 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 565 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 566 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 567 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) 568 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 569 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 570 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s8>), addrspace 4) 571 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<4 x s8>), [[LOAD]](p1) :: (store (<4 x s8>) into %ir.out, addrspace 1) 572 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 573entry: 574 store <4 x i8> %in, ptr addrspace(1) %out 575 ret void 576} 577 578define amdgpu_kernel void @v4i16_arg(ptr addrspace(1) %out, <4 x i16> %in) { 579 ; HSA-VI-LABEL: name: v4i16_arg 580 ; HSA-VI: bb.1.entry: 581 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 582 ; HSA-VI-NEXT: {{ $}} 583 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 584 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 585 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 586 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) 587 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 588 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 589 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s16>), addrspace 4) 590 ; HSA-VI-NEXT: G_STORE [[LOAD1]](<4 x s16>), [[LOAD]](p1) :: (store (<4 x s16>) into %ir.out, addrspace 1) 591 ; HSA-VI-NEXT: S_ENDPGM 0 592 ; 593 ; LEGACY-MESA-VI-LABEL: name: v4i16_arg 594 ; LEGACY-MESA-VI: bb.1.entry: 595 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 596 ; LEGACY-MESA-VI-NEXT: {{ $}} 597 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 598 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 599 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 600 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) 601 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 602 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 603 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s16>), align 4, addrspace 4) 604 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<4 x s16>), [[LOAD]](p1) :: (store (<4 x s16>) into %ir.out, addrspace 1) 605 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 606entry: 607 store <4 x i16> %in, ptr addrspace(1) %out 608 ret void 609} 610 611define amdgpu_kernel void @v4i32_arg(ptr addrspace(1) nocapture %out, <4 x i32> %in) nounwind { 612 ; HSA-VI-LABEL: name: v4i32_arg 613 ; HSA-VI: bb.1.entry: 614 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 615 ; HSA-VI-NEXT: {{ $}} 616 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 617 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 618 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 619 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) 620 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 621 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 622 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>), addrspace 4) 623 ; HSA-VI-NEXT: G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1) 624 ; HSA-VI-NEXT: S_ENDPGM 0 625 ; 626 ; LEGACY-MESA-VI-LABEL: name: v4i32_arg 627 ; LEGACY-MESA-VI: bb.1.entry: 628 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 629 ; LEGACY-MESA-VI-NEXT: {{ $}} 630 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 631 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 632 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 633 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) 634 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 635 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 636 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>), align 4, addrspace 4) 637 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1) 638 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 639entry: 640 store <4 x i32> %in, ptr addrspace(1) %out, align 4 641 ret void 642} 643 644define amdgpu_kernel void @v4f32_arg(ptr addrspace(1) nocapture %out, <4 x float> %in) nounwind { 645 ; HSA-VI-LABEL: name: v4f32_arg 646 ; HSA-VI: bb.1.entry: 647 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 648 ; HSA-VI-NEXT: {{ $}} 649 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 650 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 651 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 652 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) 653 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 654 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 655 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>), addrspace 4) 656 ; HSA-VI-NEXT: G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1) 657 ; HSA-VI-NEXT: S_ENDPGM 0 658 ; 659 ; LEGACY-MESA-VI-LABEL: name: v4f32_arg 660 ; LEGACY-MESA-VI: bb.1.entry: 661 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 662 ; LEGACY-MESA-VI-NEXT: {{ $}} 663 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 664 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 665 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 666 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) 667 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 668 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 669 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>), align 4, addrspace 4) 670 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<4 x s32>), [[LOAD]](p1) :: (store (<4 x s32>) into %ir.out, align 4, addrspace 1) 671 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 672entry: 673 store <4 x float> %in, ptr addrspace(1) %out, align 4 674 ret void 675} 676 677define amdgpu_kernel void @v8i8_arg(ptr addrspace(1) %out, <8 x i8> %in) { 678 ; HSA-VI-LABEL: name: v8i8_arg 679 ; HSA-VI: bb.1.entry: 680 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 681 ; HSA-VI-NEXT: {{ $}} 682 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 683 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 684 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 685 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) 686 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 687 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 688 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s8>), addrspace 4) 689 ; HSA-VI-NEXT: G_STORE [[LOAD1]](<8 x s8>), [[LOAD]](p1) :: (store (<8 x s8>) into %ir.out, addrspace 1) 690 ; HSA-VI-NEXT: S_ENDPGM 0 691 ; 692 ; LEGACY-MESA-VI-LABEL: name: v8i8_arg 693 ; LEGACY-MESA-VI: bb.1.entry: 694 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 695 ; LEGACY-MESA-VI-NEXT: {{ $}} 696 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 697 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 698 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 699 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) 700 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 701 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 702 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s8>), align 4, addrspace 4) 703 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<8 x s8>), [[LOAD]](p1) :: (store (<8 x s8>) into %ir.out, addrspace 1) 704 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 705entry: 706 store <8 x i8> %in, ptr addrspace(1) %out 707 ret void 708} 709 710define amdgpu_kernel void @v8i16_arg(ptr addrspace(1) %out, <8 x i16> %in) { 711 ; HSA-VI-LABEL: name: v8i16_arg 712 ; HSA-VI: bb.1.entry: 713 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 714 ; HSA-VI-NEXT: {{ $}} 715 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 716 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 717 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 718 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) 719 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 720 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 721 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s16>), addrspace 4) 722 ; HSA-VI-NEXT: G_STORE [[LOAD1]](<8 x s16>), [[LOAD]](p1) :: (store (<8 x s16>) into %ir.out, addrspace 1) 723 ; HSA-VI-NEXT: S_ENDPGM 0 724 ; 725 ; LEGACY-MESA-VI-LABEL: name: v8i16_arg 726 ; LEGACY-MESA-VI: bb.1.entry: 727 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 728 ; LEGACY-MESA-VI-NEXT: {{ $}} 729 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 730 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 731 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 732 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) 733 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 734 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 735 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s16>), align 4, addrspace 4) 736 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<8 x s16>), [[LOAD]](p1) :: (store (<8 x s16>) into %ir.out, addrspace 1) 737 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 738entry: 739 store <8 x i16> %in, ptr addrspace(1) %out 740 ret void 741} 742 743define amdgpu_kernel void @v8i32_arg(ptr addrspace(1) nocapture %out, <8 x i32> %in) nounwind { 744 ; HSA-VI-LABEL: name: v8i32_arg 745 ; HSA-VI: bb.1.entry: 746 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 747 ; HSA-VI-NEXT: {{ $}} 748 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 749 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 750 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 751 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) 752 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 753 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 754 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s32>), align 16, addrspace 4) 755 ; HSA-VI-NEXT: G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1) 756 ; HSA-VI-NEXT: S_ENDPGM 0 757 ; 758 ; LEGACY-MESA-VI-LABEL: name: v8i32_arg 759 ; LEGACY-MESA-VI: bb.1.entry: 760 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 761 ; LEGACY-MESA-VI-NEXT: {{ $}} 762 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 763 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 764 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 765 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) 766 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 68 767 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 768 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s32>), align 4, addrspace 4) 769 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1) 770 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 771entry: 772 store <8 x i32> %in, ptr addrspace(1) %out, align 4 773 ret void 774} 775 776define amdgpu_kernel void @v8f32_arg(ptr addrspace(1) nocapture %out, <8 x float> %in) nounwind { 777 ; HSA-VI-LABEL: name: v8f32_arg 778 ; HSA-VI: bb.1.entry: 779 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 780 ; HSA-VI-NEXT: {{ $}} 781 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 782 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 783 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 784 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) 785 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 786 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 787 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s32>), align 16, addrspace 4) 788 ; HSA-VI-NEXT: G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1) 789 ; HSA-VI-NEXT: S_ENDPGM 0 790 ; 791 ; LEGACY-MESA-VI-LABEL: name: v8f32_arg 792 ; LEGACY-MESA-VI: bb.1.entry: 793 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 794 ; LEGACY-MESA-VI-NEXT: {{ $}} 795 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 796 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 797 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 798 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) 799 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 68 800 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 801 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<8 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<8 x s32>), align 4, addrspace 4) 802 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<8 x s32>), [[LOAD]](p1) :: (store (<8 x s32>) into %ir.out, align 4, addrspace 1) 803 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 804entry: 805 store <8 x float> %in, ptr addrspace(1) %out, align 4 806 ret void 807} 808 809define amdgpu_kernel void @v16i8_arg(ptr addrspace(1) %out, <16 x i8> %in) { 810 ; HSA-VI-LABEL: name: v16i8_arg 811 ; HSA-VI: bb.1.entry: 812 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 813 ; HSA-VI-NEXT: {{ $}} 814 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 815 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 816 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 817 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) 818 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 819 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 820 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s8>), addrspace 4) 821 ; HSA-VI-NEXT: G_STORE [[LOAD1]](<16 x s8>), [[LOAD]](p1) :: (store (<16 x s8>) into %ir.out, addrspace 1) 822 ; HSA-VI-NEXT: S_ENDPGM 0 823 ; 824 ; LEGACY-MESA-VI-LABEL: name: v16i8_arg 825 ; LEGACY-MESA-VI: bb.1.entry: 826 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 827 ; LEGACY-MESA-VI-NEXT: {{ $}} 828 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 829 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 830 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 831 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) 832 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 833 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 834 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s8>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s8>), align 4, addrspace 4) 835 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<16 x s8>), [[LOAD]](p1) :: (store (<16 x s8>) into %ir.out, addrspace 1) 836 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 837entry: 838 store <16 x i8> %in, ptr addrspace(1) %out 839 ret void 840} 841 842define amdgpu_kernel void @v16i16_arg(ptr addrspace(1) %out, <16 x i16> %in) { 843 ; HSA-VI-LABEL: name: v16i16_arg 844 ; HSA-VI: bb.1.entry: 845 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 846 ; HSA-VI-NEXT: {{ $}} 847 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 848 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 849 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 850 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) 851 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 852 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 853 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s16>), align 16, addrspace 4) 854 ; HSA-VI-NEXT: G_STORE [[LOAD1]](<16 x s16>), [[LOAD]](p1) :: (store (<16 x s16>) into %ir.out, addrspace 1) 855 ; HSA-VI-NEXT: S_ENDPGM 0 856 ; 857 ; LEGACY-MESA-VI-LABEL: name: v16i16_arg 858 ; LEGACY-MESA-VI: bb.1.entry: 859 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 860 ; LEGACY-MESA-VI-NEXT: {{ $}} 861 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 862 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 863 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 864 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) 865 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 68 866 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 867 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s16>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s16>), align 4, addrspace 4) 868 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<16 x s16>), [[LOAD]](p1) :: (store (<16 x s16>) into %ir.out, addrspace 1) 869 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 870entry: 871 store <16 x i16> %in, ptr addrspace(1) %out 872 ret void 873} 874 875define amdgpu_kernel void @v16i32_arg(ptr addrspace(1) nocapture %out, <16 x i32> %in) nounwind { 876 ; HSA-VI-LABEL: name: v16i32_arg 877 ; HSA-VI: bb.1.entry: 878 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 879 ; HSA-VI-NEXT: {{ $}} 880 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 881 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 882 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 883 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) 884 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 885 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 886 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>), align 16, addrspace 4) 887 ; HSA-VI-NEXT: G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1) 888 ; HSA-VI-NEXT: S_ENDPGM 0 889 ; 890 ; LEGACY-MESA-VI-LABEL: name: v16i32_arg 891 ; LEGACY-MESA-VI: bb.1.entry: 892 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 893 ; LEGACY-MESA-VI-NEXT: {{ $}} 894 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 895 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 896 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 897 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) 898 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 100 899 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 900 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>), align 4, addrspace 4) 901 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1) 902 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 903entry: 904 store <16 x i32> %in, ptr addrspace(1) %out, align 4 905 ret void 906} 907 908define amdgpu_kernel void @v16f32_arg(ptr addrspace(1) nocapture %out, <16 x float> %in) nounwind { 909 ; HSA-VI-LABEL: name: v16f32_arg 910 ; HSA-VI: bb.1.entry: 911 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 912 ; HSA-VI-NEXT: {{ $}} 913 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 914 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 915 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 916 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) 917 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 918 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 919 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>), align 16, addrspace 4) 920 ; HSA-VI-NEXT: G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1) 921 ; HSA-VI-NEXT: S_ENDPGM 0 922 ; 923 ; LEGACY-MESA-VI-LABEL: name: v16f32_arg 924 ; LEGACY-MESA-VI: bb.1.entry: 925 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 926 ; LEGACY-MESA-VI-NEXT: {{ $}} 927 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 928 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 929 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 930 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) 931 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 100 932 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 933 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>), align 4, addrspace 4) 934 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<16 x s32>), [[LOAD]](p1) :: (store (<16 x s32>) into %ir.out, align 4, addrspace 1) 935 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 936entry: 937 store <16 x float> %in, ptr addrspace(1) %out, align 4 938 ret void 939} 940 941define amdgpu_kernel void @kernel_arg_i64(ptr addrspace(1) %out, i64 %a) nounwind { 942 ; HSA-VI-LABEL: name: kernel_arg_i64 943 ; HSA-VI: bb.1 (%ir-block.0): 944 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 945 ; HSA-VI-NEXT: {{ $}} 946 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 947 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 948 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 949 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) 950 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 951 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 952 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4) 953 ; HSA-VI-NEXT: G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1) 954 ; HSA-VI-NEXT: S_ENDPGM 0 955 ; 956 ; LEGACY-MESA-VI-LABEL: name: kernel_arg_i64 957 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 958 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 959 ; LEGACY-MESA-VI-NEXT: {{ $}} 960 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 961 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 962 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 963 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) 964 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 965 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 966 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) 967 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1) 968 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 969 store i64 %a, ptr addrspace(1) %out, align 8 970 ret void 971} 972 973define amdgpu_kernel void @f64_kernel_arg(ptr addrspace(1) %out, double %in) { 974 ; HSA-VI-LABEL: name: f64_kernel_arg 975 ; HSA-VI: bb.1.entry: 976 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 977 ; HSA-VI-NEXT: {{ $}} 978 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 979 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 980 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 981 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) 982 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 983 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 984 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4) 985 ; HSA-VI-NEXT: G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1) 986 ; HSA-VI-NEXT: S_ENDPGM 0 987 ; 988 ; LEGACY-MESA-VI-LABEL: name: f64_kernel_arg 989 ; LEGACY-MESA-VI: bb.1.entry: 990 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 991 ; LEGACY-MESA-VI-NEXT: {{ $}} 992 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 993 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 994 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 995 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) 996 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 997 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 998 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) 999 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1) 1000 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 1001entry: 1002 store double %in, ptr addrspace(1) %out 1003 ret void 1004} 1005 1006define amdgpu_kernel void @i1_arg(ptr addrspace(1) %out, i1 %x) nounwind { 1007 ; HSA-VI-LABEL: name: i1_arg 1008 ; HSA-VI: bb.1 (%ir-block.0): 1009 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 1010 ; HSA-VI-NEXT: {{ $}} 1011 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 1012 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 1013 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1014 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) 1015 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 1016 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1017 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4) 1018 ; HSA-VI-NEXT: G_STORE [[LOAD1]](s1), [[LOAD]](p1) :: (store (s1) into %ir.out, addrspace 1) 1019 ; HSA-VI-NEXT: S_ENDPGM 0 1020 ; 1021 ; LEGACY-MESA-VI-LABEL: name: i1_arg 1022 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 1023 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 1024 ; LEGACY-MESA-VI-NEXT: {{ $}} 1025 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 1026 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 1027 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1028 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) 1029 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 1030 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1031 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4) 1032 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s1), [[LOAD]](p1) :: (store (s1) into %ir.out, addrspace 1) 1033 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 1034 store i1 %x, ptr addrspace(1) %out, align 1 1035 ret void 1036} 1037 1038define amdgpu_kernel void @i1_arg_zext_i32(ptr addrspace(1) %out, i1 %x) nounwind { 1039 ; HSA-VI-LABEL: name: i1_arg_zext_i32 1040 ; HSA-VI: bb.1 (%ir-block.0): 1041 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 1042 ; HSA-VI-NEXT: {{ $}} 1043 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 1044 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 1045 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1046 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) 1047 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 1048 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1049 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4) 1050 ; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s1) 1051 ; HSA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) 1052 ; HSA-VI-NEXT: S_ENDPGM 0 1053 ; 1054 ; LEGACY-MESA-VI-LABEL: name: i1_arg_zext_i32 1055 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 1056 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 1057 ; LEGACY-MESA-VI-NEXT: {{ $}} 1058 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 1059 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 1060 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1061 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) 1062 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 1063 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1064 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4) 1065 ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s1) 1066 ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) 1067 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 1068 %ext = zext i1 %x to i32 1069 store i32 %ext, ptr addrspace(1) %out, align 4 1070 ret void 1071} 1072 1073define amdgpu_kernel void @i1_arg_zext_i64(ptr addrspace(1) %out, i1 %x) nounwind { 1074 ; HSA-VI-LABEL: name: i1_arg_zext_i64 1075 ; HSA-VI: bb.1 (%ir-block.0): 1076 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 1077 ; HSA-VI-NEXT: {{ $}} 1078 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 1079 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 1080 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1081 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) 1082 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 1083 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1084 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4) 1085 ; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD1]](s1) 1086 ; HSA-VI-NEXT: G_STORE [[ZEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1) 1087 ; HSA-VI-NEXT: S_ENDPGM 0 1088 ; 1089 ; LEGACY-MESA-VI-LABEL: name: i1_arg_zext_i64 1090 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 1091 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 1092 ; LEGACY-MESA-VI-NEXT: {{ $}} 1093 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 1094 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 1095 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1096 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) 1097 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 1098 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1099 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4) 1100 ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s64) = G_ZEXT [[LOAD1]](s1) 1101 ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1) 1102 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 1103 %ext = zext i1 %x to i64 1104 store i64 %ext, ptr addrspace(1) %out, align 8 1105 ret void 1106} 1107 1108define amdgpu_kernel void @i1_arg_sext_i32(ptr addrspace(1) %out, i1 %x) nounwind { 1109 ; HSA-VI-LABEL: name: i1_arg_sext_i32 1110 ; HSA-VI: bb.1 (%ir-block.0): 1111 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 1112 ; HSA-VI-NEXT: {{ $}} 1113 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 1114 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 1115 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1116 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) 1117 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 1118 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1119 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4) 1120 ; HSA-VI-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s1) 1121 ; HSA-VI-NEXT: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) 1122 ; HSA-VI-NEXT: S_ENDPGM 0 1123 ; 1124 ; LEGACY-MESA-VI-LABEL: name: i1_arg_sext_i32 1125 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 1126 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 1127 ; LEGACY-MESA-VI-NEXT: {{ $}} 1128 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 1129 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 1130 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1131 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) 1132 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 1133 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1134 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4) 1135 ; LEGACY-MESA-VI-NEXT: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[LOAD1]](s1) 1136 ; LEGACY-MESA-VI-NEXT: G_STORE [[SEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) 1137 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 1138 %ext = sext i1 %x to i32 1139 store i32 %ext, ptr addrspace(1) %out, align 4 1140 ret void 1141} 1142 1143define amdgpu_kernel void @i1_arg_sext_i64(ptr addrspace(1) %out, i1 %x) nounwind { 1144 ; HSA-VI-LABEL: name: i1_arg_sext_i64 1145 ; HSA-VI: bb.1 (%ir-block.0): 1146 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 1147 ; HSA-VI-NEXT: {{ $}} 1148 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 1149 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 1150 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1151 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) 1152 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 1153 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1154 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 8, addrspace 4) 1155 ; HSA-VI-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD1]](s1) 1156 ; HSA-VI-NEXT: G_STORE [[SEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1) 1157 ; HSA-VI-NEXT: S_ENDPGM 0 1158 ; 1159 ; LEGACY-MESA-VI-LABEL: name: i1_arg_sext_i64 1160 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 1161 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 1162 ; LEGACY-MESA-VI-NEXT: {{ $}} 1163 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 1164 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 1165 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1166 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) 1167 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 1168 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1169 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s1), align 4, addrspace 4) 1170 ; LEGACY-MESA-VI-NEXT: [[SEXT:%[0-9]+]]:_(s64) = G_SEXT [[LOAD1]](s1) 1171 ; LEGACY-MESA-VI-NEXT: G_STORE [[SEXT]](s64), [[LOAD]](p1) :: (store (s64) into %ir.out, addrspace 1) 1172 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 1173 %ext = sext i1 %x to i64 1174 store i64 %ext, ptr addrspace(1) %out, align 8 1175 ret void 1176} 1177 1178; 0-sized arguments do not add a slot to the argument register set, so 1179; waste an index in the virtual register array. 1180define amdgpu_kernel void @empty_struct_arg({} %arg0, i32 %arg1) nounwind { 1181 ; HSA-VI-LABEL: name: empty_struct_arg 1182 ; HSA-VI: bb.1 (%ir-block.0): 1183 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 1184 ; HSA-VI-NEXT: {{ $}} 1185 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 1186 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 1187 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1188 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4) 1189 ; HSA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 1190 ; HSA-VI-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) 1191 ; HSA-VI-NEXT: S_ENDPGM 0 1192 ; 1193 ; LEGACY-MESA-VI-LABEL: name: empty_struct_arg 1194 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 1195 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 1196 ; LEGACY-MESA-VI-NEXT: {{ $}} 1197 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 1198 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 1199 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1200 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4) 1201 ; LEGACY-MESA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 1202 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) 1203 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 1204 store i32 %arg1, ptr addrspace(1) undef 1205 ret void 1206} 1207 1208define amdgpu_kernel void @empty_array_arg([0 x i8] %arg0, i32 %arg1) nounwind { 1209 ; HSA-VI-LABEL: name: empty_array_arg 1210 ; HSA-VI: bb.1 (%ir-block.0): 1211 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 1212 ; HSA-VI-NEXT: {{ $}} 1213 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 1214 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 1215 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1216 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4) 1217 ; HSA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 1218 ; HSA-VI-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) 1219 ; HSA-VI-NEXT: S_ENDPGM 0 1220 ; 1221 ; LEGACY-MESA-VI-LABEL: name: empty_array_arg 1222 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 1223 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 1224 ; LEGACY-MESA-VI-NEXT: {{ $}} 1225 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 1226 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 1227 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1228 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4) 1229 ; LEGACY-MESA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 1230 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) 1231 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 1232 store i32 %arg1, ptr addrspace(1) undef 1233 ret void 1234} 1235 1236; The correct load offsets for these: 1237; load 4 from 0, 1238; load 8 from 8 1239; load 4 from 24 1240; load 8 from 32 1241 1242; With the SelectionDAG argument lowering, the alignments for the 1243; struct members is not properly considered, making these wrong. 1244define amdgpu_kernel void @struct_argument_alignment({i32, i64} %arg0, i8 %pad, {i32, i64} %arg1) { 1245 ; HSA-VI-LABEL: name: struct_argument_alignment 1246 ; HSA-VI: bb.1 (%ir-block.0): 1247 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 1248 ; HSA-VI-NEXT: {{ $}} 1249 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 1250 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 1251 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1252 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4) 1253 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 1254 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1255 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4) 1256 ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 1257 ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) 1258 ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s8), align 16, addrspace 4) 1259 ; HSA-VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 1260 ; HSA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) 1261 ; HSA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4) 1262 ; HSA-VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 1263 ; HSA-VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) 1264 ; HSA-VI-NEXT: [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), addrspace 4) 1265 ; HSA-VI-NEXT: [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 1266 ; HSA-VI-NEXT: G_STORE [[LOAD]](s32), [[C5]](p1) :: (volatile store (s32) into `ptr addrspace(1) null`, addrspace 1) 1267 ; HSA-VI-NEXT: G_STORE [[LOAD1]](s64), [[C5]](p1) :: (volatile store (s64) into `ptr addrspace(1) null`, addrspace 1) 1268 ; HSA-VI-NEXT: G_STORE [[LOAD2]](s8), [[C5]](p1) :: (volatile store (s8) into `ptr addrspace(1) null`, addrspace 1) 1269 ; HSA-VI-NEXT: G_STORE [[LOAD3]](s32), [[C5]](p1) :: (volatile store (s32) into `ptr addrspace(1) null`, addrspace 1) 1270 ; HSA-VI-NEXT: G_STORE [[LOAD4]](s64), [[C5]](p1) :: (volatile store (s64) into `ptr addrspace(1) null`, addrspace 1) 1271 ; HSA-VI-NEXT: S_ENDPGM 0 1272 ; 1273 ; LEGACY-MESA-VI-LABEL: name: struct_argument_alignment 1274 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 1275 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 1276 ; LEGACY-MESA-VI-NEXT: {{ $}} 1277 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 1278 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 1279 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1280 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4) 1281 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 1282 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1283 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) 1284 ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 1285 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) 1286 ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4) 1287 ; LEGACY-MESA-VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 60 1288 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) 1289 ; LEGACY-MESA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), addrspace 4) 1290 ; LEGACY-MESA-VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 68 1291 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) 1292 ; LEGACY-MESA-VI-NEXT: [[LOAD4:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) 1293 ; LEGACY-MESA-VI-NEXT: [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 1294 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](s32), [[C5]](p1) :: (volatile store (s32) into `ptr addrspace(1) null`, addrspace 1) 1295 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s64), [[C5]](p1) :: (volatile store (s64) into `ptr addrspace(1) null`, addrspace 1) 1296 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](s8), [[C5]](p1) :: (volatile store (s8) into `ptr addrspace(1) null`, addrspace 1) 1297 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD3]](s32), [[C5]](p1) :: (volatile store (s32) into `ptr addrspace(1) null`, addrspace 1) 1298 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD4]](s64), [[C5]](p1) :: (volatile store (s64) into `ptr addrspace(1) null`, addrspace 1) 1299 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 1300 %val0 = extractvalue {i32, i64} %arg0, 0 1301 %val1 = extractvalue {i32, i64} %arg0, 1 1302 %val2 = extractvalue {i32, i64} %arg1, 0 1303 %val3 = extractvalue {i32, i64} %arg1, 1 1304 store volatile i32 %val0, ptr addrspace(1) null 1305 store volatile i64 %val1, ptr addrspace(1) null 1306 store volatile i8 %pad, ptr addrspace(1) null 1307 store volatile i32 %val2, ptr addrspace(1) null 1308 store volatile i64 %val3, ptr addrspace(1) null 1309 ret void 1310} 1311 1312define amdgpu_kernel void @pointer_in_struct_argument({ptr addrspace(3), ptr addrspace(1)} %arg0, i8 %pad, {ptr addrspace(3), ptr addrspace(1234)} %arg1) { 1313 ; HSA-VI-LABEL: name: pointer_in_struct_argument 1314 ; HSA-VI: bb.1 (%ir-block.0): 1315 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 1316 ; HSA-VI-NEXT: {{ $}} 1317 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 1318 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 1319 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1320 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4) 1321 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 1322 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1323 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), addrspace 4) 1324 ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 1325 ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) 1326 ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s8), align 16, addrspace 4) 1327 ; HSA-VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 24 1328 ; HSA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) 1329 ; HSA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4) 1330 ; HSA-VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 1331 ; HSA-VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) 1332 ; HSA-VI-NEXT: [[LOAD4:%[0-9]+]]:_(p1234) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), addrspace 4) 1333 ; HSA-VI-NEXT: [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 1334 ; HSA-VI-NEXT: G_STORE [[LOAD]](p3), [[C5]](p1) :: (volatile store (p3) into `ptr addrspace(1) null`, addrspace 1) 1335 ; HSA-VI-NEXT: G_STORE [[LOAD1]](p1), [[C5]](p1) :: (volatile store (p1) into `ptr addrspace(1) null`, addrspace 1) 1336 ; HSA-VI-NEXT: G_STORE [[LOAD2]](s8), [[C5]](p1) :: (volatile store (s8) into `ptr addrspace(1) null`, addrspace 1) 1337 ; HSA-VI-NEXT: G_STORE [[LOAD3]](p3), [[C5]](p1) :: (volatile store (p3) into `ptr addrspace(1) null`, addrspace 1) 1338 ; HSA-VI-NEXT: G_STORE [[LOAD4]](p1234), [[C5]](p1) :: (volatile store (p1234) into `ptr addrspace(1) null`, addrspace 1) 1339 ; HSA-VI-NEXT: S_ENDPGM 0 1340 ; 1341 ; LEGACY-MESA-VI-LABEL: name: pointer_in_struct_argument 1342 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 1343 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 1344 ; LEGACY-MESA-VI-NEXT: {{ $}} 1345 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 1346 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 1347 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1348 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4) 1349 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 1350 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1351 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) 1352 ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 1353 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) 1354 ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s8), align 4, addrspace 4) 1355 ; LEGACY-MESA-VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 60 1356 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) 1357 ; LEGACY-MESA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), addrspace 4) 1358 ; LEGACY-MESA-VI-NEXT: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 68 1359 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD4:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C4]](s64) 1360 ; LEGACY-MESA-VI-NEXT: [[LOAD4:%[0-9]+]]:_(p1234) = G_LOAD [[PTR_ADD4]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) 1361 ; LEGACY-MESA-VI-NEXT: [[C5:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 1362 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](p3), [[C5]](p1) :: (volatile store (p3) into `ptr addrspace(1) null`, addrspace 1) 1363 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](p1), [[C5]](p1) :: (volatile store (p1) into `ptr addrspace(1) null`, addrspace 1) 1364 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](s8), [[C5]](p1) :: (volatile store (s8) into `ptr addrspace(1) null`, addrspace 1) 1365 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD3]](p3), [[C5]](p1) :: (volatile store (p3) into `ptr addrspace(1) null`, addrspace 1) 1366 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD4]](p1234), [[C5]](p1) :: (volatile store (p1234) into `ptr addrspace(1) null`, addrspace 1) 1367 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 1368 %val0 = extractvalue {ptr addrspace(3), ptr addrspace(1)} %arg0, 0 1369 %val1 = extractvalue {ptr addrspace(3), ptr addrspace(1)} %arg0, 1 1370 %val2 = extractvalue {ptr addrspace(3), ptr addrspace(1234)} %arg1, 0 1371 %val3 = extractvalue {ptr addrspace(3), ptr addrspace(1234)} %arg1, 1 1372 store volatile ptr addrspace(3) %val0, ptr addrspace(1) null 1373 store volatile ptr addrspace(1) %val1, ptr addrspace(1) null 1374 store volatile i8 %pad, ptr addrspace(1) null 1375 store volatile ptr addrspace(3) %val2, ptr addrspace(1) null 1376 store volatile ptr addrspace(1234) %val3, ptr addrspace(1) null 1377 ret void 1378} 1379 1380; No padding between i8 and next struct, but round up at end to 4 byte 1381; multiple. 1382define amdgpu_kernel void @packed_struct_argument_alignment(<{i32, i64}> %arg0, i8, <{i32, i64}> %arg1) { 1383 ; HSA-VI-LABEL: name: packed_struct_argument_alignment 1384 ; HSA-VI: bb.1 (%ir-block.1): 1385 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 1386 ; HSA-VI-NEXT: {{ $}} 1387 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 1388 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 1389 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1390 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4) 1391 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 1392 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1393 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) 1394 ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 13 1395 ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) 1396 ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 1, addrspace 4) 1397 ; HSA-VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 17 1398 ; HSA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) 1399 ; HSA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s64), align 1, addrspace 4) 1400 ; HSA-VI-NEXT: [[C4:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 1401 ; HSA-VI-NEXT: G_STORE [[LOAD]](s32), [[C4]](p1) :: (volatile store (s32) into `ptr addrspace(1) null`, addrspace 1) 1402 ; HSA-VI-NEXT: G_STORE [[LOAD1]](s64), [[C4]](p1) :: (volatile store (s64) into `ptr addrspace(1) null`, addrspace 1) 1403 ; HSA-VI-NEXT: G_STORE [[LOAD2]](s32), [[C4]](p1) :: (volatile store (s32) into `ptr addrspace(1) null`, addrspace 1) 1404 ; HSA-VI-NEXT: G_STORE [[LOAD3]](s64), [[C4]](p1) :: (volatile store (s64) into `ptr addrspace(1) null`, addrspace 1) 1405 ; HSA-VI-NEXT: S_ENDPGM 0 1406 ; 1407 ; LEGACY-MESA-VI-LABEL: name: packed_struct_argument_alignment 1408 ; LEGACY-MESA-VI: bb.1 (%ir-block.1): 1409 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 1410 ; LEGACY-MESA-VI-NEXT: {{ $}} 1411 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 1412 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 1413 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1414 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32), addrspace 4) 1415 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 40 1416 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1417 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s64), align 4, addrspace 4) 1418 ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 49 1419 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) 1420 ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 1, addrspace 4) 1421 ; LEGACY-MESA-VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 53 1422 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) 1423 ; LEGACY-MESA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(s64) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s64), align 1, addrspace 4) 1424 ; LEGACY-MESA-VI-NEXT: [[C4:%[0-9]+]]:_(p1) = G_CONSTANT i64 0 1425 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](s32), [[C4]](p1) :: (volatile store (s32) into `ptr addrspace(1) null`, addrspace 1) 1426 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s64), [[C4]](p1) :: (volatile store (s64) into `ptr addrspace(1) null`, addrspace 1) 1427 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](s32), [[C4]](p1) :: (volatile store (s32) into `ptr addrspace(1) null`, addrspace 1) 1428 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD3]](s64), [[C4]](p1) :: (volatile store (s64) into `ptr addrspace(1) null`, addrspace 1) 1429 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 1430 %val0 = extractvalue <{i32, i64}> %arg0, 0 1431 %val1 = extractvalue <{i32, i64}> %arg0, 1 1432 %val2 = extractvalue <{i32, i64}> %arg1, 0 1433 %val3 = extractvalue <{i32, i64}> %arg1, 1 1434 store volatile i32 %val0, ptr addrspace(1) null 1435 store volatile i64 %val1, ptr addrspace(1) null 1436 store volatile i32 %val2, ptr addrspace(1) null 1437 store volatile i64 %val3, ptr addrspace(1) null 1438 ret void 1439} 1440 1441define amdgpu_kernel void @unused_i32_arg(ptr addrspace(1) nocapture %out, i32 %unused, i32 %in) nounwind { 1442 ; HSA-VI-LABEL: name: unused_i32_arg 1443 ; HSA-VI: bb.1.entry: 1444 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 1445 ; HSA-VI-NEXT: {{ $}} 1446 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 1447 ; HSA-VI-NEXT: S_ENDPGM 0 1448 ; 1449 ; LEGACY-MESA-VI-LABEL: name: unused_i32_arg 1450 ; LEGACY-MESA-VI: bb.1.entry: 1451 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 1452 ; LEGACY-MESA-VI-NEXT: {{ $}} 1453 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 1454 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 1455entry: 1456 ret void 1457} 1458 1459; Byref pointers should only be treated as offsets from kernarg 1460define amdgpu_kernel void @byref_constant_i8_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i8) %in.byref) { 1461 ; HSA-VI-LABEL: name: byref_constant_i8_arg 1462 ; HSA-VI: bb.1 (%ir-block.0): 1463 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 1464 ; HSA-VI-NEXT: {{ $}} 1465 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 1466 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 1467 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1468 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) 1469 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 1470 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1471 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8) from %ir.in.byref, addrspace 4) 1472 ; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8) 1473 ; HSA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) 1474 ; HSA-VI-NEXT: S_ENDPGM 0 1475 ; 1476 ; LEGACY-MESA-VI-LABEL: name: byref_constant_i8_arg 1477 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 1478 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 1479 ; LEGACY-MESA-VI-NEXT: {{ $}} 1480 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 1481 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 1482 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1483 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) 1484 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 1485 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1486 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s8) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s8) from %ir.in.byref, addrspace 4) 1487 ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s8) 1488 ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) 1489 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 1490 %in = load i8, ptr addrspace(4) %in.byref 1491 %ext = zext i8 %in to i32 1492 store i32 %ext, ptr addrspace(1) %out, align 4 1493 ret void 1494} 1495 1496define amdgpu_kernel void @byref_constant_i16_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i16) align 2 %in.byref) { 1497 ; HSA-VI-LABEL: name: byref_constant_i16_arg 1498 ; HSA-VI: bb.1 (%ir-block.0): 1499 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 1500 ; HSA-VI-NEXT: {{ $}} 1501 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 1502 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 1503 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1504 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) 1505 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 1506 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1507 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16) from %ir.in.byref, addrspace 4) 1508 ; HSA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16) 1509 ; HSA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) 1510 ; HSA-VI-NEXT: S_ENDPGM 0 1511 ; 1512 ; LEGACY-MESA-VI-LABEL: name: byref_constant_i16_arg 1513 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 1514 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 1515 ; LEGACY-MESA-VI-NEXT: {{ $}} 1516 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 1517 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 1518 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1519 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) 1520 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 1521 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1522 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s16) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s16) from %ir.in.byref, addrspace 4) 1523 ; LEGACY-MESA-VI-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[LOAD1]](s16) 1524 ; LEGACY-MESA-VI-NEXT: G_STORE [[ZEXT]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) 1525 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 1526 %in = load i16, ptr addrspace(4) %in.byref 1527 %ext = zext i16 %in to i32 1528 store i32 %ext, ptr addrspace(1) %out, align 4 1529 ret void 1530} 1531 1532define amdgpu_kernel void @byref_constant_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i32) align 4 %in.byref, i32 %after.offset) { 1533 ; HSA-VI-LABEL: name: byref_constant_i32_arg 1534 ; HSA-VI: bb.1 (%ir-block.0): 1535 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 1536 ; HSA-VI-NEXT: {{ $}} 1537 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 1538 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 1539 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1540 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) 1541 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 1542 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1543 ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 1544 ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) 1545 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4) 1546 ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32) from %ir.in.byref, addrspace 4) 1547 ; HSA-VI-NEXT: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) 1548 ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) 1549 ; HSA-VI-NEXT: S_ENDPGM 0 1550 ; 1551 ; LEGACY-MESA-VI-LABEL: name: byref_constant_i32_arg 1552 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 1553 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 1554 ; LEGACY-MESA-VI-NEXT: {{ $}} 1555 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 1556 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 1557 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1558 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) 1559 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 1560 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1561 ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 1562 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) 1563 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4) 1564 ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32) from %ir.in.byref, addrspace 4) 1565 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) 1566 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) 1567 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 1568 %in = load i32, ptr addrspace(4) %in.byref 1569 store volatile i32 %in, ptr addrspace(1) %out, align 4 1570 store volatile i32 %after.offset, ptr addrspace(1) %out, align 4 1571 ret void 1572} 1573 1574define amdgpu_kernel void @byref_constant_v4i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(<4 x i32>) align(16) %in.byref, i32 %after.offset) { 1575 ; HSA-VI-LABEL: name: byref_constant_v4i32_arg 1576 ; HSA-VI: bb.1 (%ir-block.0): 1577 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 1578 ; HSA-VI-NEXT: {{ $}} 1579 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 1580 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 1581 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1582 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) 1583 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 1584 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1585 ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 32 1586 ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) 1587 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4) 1588 ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>) from %ir.in.byref, addrspace 4) 1589 ; HSA-VI-NEXT: G_STORE [[LOAD2]](<4 x s32>), [[LOAD]](p1) :: (volatile store (<4 x s32>) into %ir.out, align 4, addrspace 1) 1590 ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) 1591 ; HSA-VI-NEXT: S_ENDPGM 0 1592 ; 1593 ; LEGACY-MESA-VI-LABEL: name: byref_constant_v4i32_arg 1594 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 1595 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 1596 ; LEGACY-MESA-VI-NEXT: {{ $}} 1597 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 1598 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 1599 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1600 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) 1601 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 1602 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1603 ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 68 1604 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) 1605 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4) 1606 ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(<4 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<4 x s32>) from %ir.in.byref, addrspace 4) 1607 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](<4 x s32>), [[LOAD]](p1) :: (volatile store (<4 x s32>) into %ir.out, align 4, addrspace 1) 1608 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) 1609 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 1610 %in = load <4 x i32>, ptr addrspace(4) %in.byref 1611 store volatile <4 x i32> %in, ptr addrspace(1) %out, align 4 1612 store volatile i32 %after.offset, ptr addrspace(1) %out, align 4 1613 ret void 1614} 1615 1616define amdgpu_kernel void @byref_align_constant_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i32) align(256) %in.byref, i32 %after.offset) { 1617 ; HSA-VI-LABEL: name: byref_align_constant_i32_arg 1618 ; HSA-VI: bb.1 (%ir-block.0): 1619 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 1620 ; HSA-VI-NEXT: {{ $}} 1621 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 1622 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 1623 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1624 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) 1625 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 256 1626 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1627 ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 260 1628 ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) 1629 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4) 1630 ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32) from %ir.in.byref, addrspace 4) 1631 ; HSA-VI-NEXT: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) 1632 ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) 1633 ; HSA-VI-NEXT: S_ENDPGM 0 1634 ; 1635 ; LEGACY-MESA-VI-LABEL: name: byref_align_constant_i32_arg 1636 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 1637 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 1638 ; LEGACY-MESA-VI-NEXT: {{ $}} 1639 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 1640 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 1641 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1642 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) 1643 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 292 1644 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1645 ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 296 1646 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) 1647 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 8, addrspace 4) 1648 ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32) from %ir.in.byref, addrspace 4) 1649 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) 1650 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) 1651 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 1652 %in = load i32, ptr addrspace(4) %in.byref 1653 store volatile i32 %in, ptr addrspace(1) %out, align 4 1654 store volatile i32 %after.offset, ptr addrspace(1) %out, align 4 1655 ret void 1656} 1657 1658define amdgpu_kernel void @byref_natural_align_constant_v16i32_arg(ptr addrspace(1) nocapture %out, i8, ptr addrspace(4) byref(<16 x i32>) align(64) %in.byref, i32 %after.offset) { 1659 ; HSA-VI-LABEL: name: byref_natural_align_constant_v16i32_arg 1660 ; HSA-VI: bb.1 (%ir-block.1): 1661 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 1662 ; HSA-VI-NEXT: {{ $}} 1663 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 1664 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 1665 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1666 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) 1667 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 64 1668 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1669 ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 128 1670 ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) 1671 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4) 1672 ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>) from %ir.in.byref, addrspace 4) 1673 ; HSA-VI-NEXT: G_STORE [[LOAD2]](<16 x s32>), [[LOAD]](p1) :: (volatile store (<16 x s32>) into %ir.out, align 4, addrspace 1) 1674 ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) 1675 ; HSA-VI-NEXT: S_ENDPGM 0 1676 ; 1677 ; LEGACY-MESA-VI-LABEL: name: byref_natural_align_constant_v16i32_arg 1678 ; LEGACY-MESA-VI: bb.1 (%ir-block.1): 1679 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 1680 ; LEGACY-MESA-VI-NEXT: {{ $}} 1681 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 1682 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 1683 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1684 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) 1685 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 100 1686 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1687 ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 164 1688 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) 1689 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32), addrspace 4) 1690 ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(<16 x s32>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<16 x s32>) from %ir.in.byref, addrspace 4) 1691 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](<16 x s32>), [[LOAD]](p1) :: (volatile store (<16 x s32>) into %ir.out, align 4, addrspace 1) 1692 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) 1693 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 1694 %in = load <16 x i32>, ptr addrspace(4) %in.byref 1695 store volatile <16 x i32> %in, ptr addrspace(1) %out, align 4 1696 store volatile i32 %after.offset, ptr addrspace(1) %out, align 4 1697 ret void 1698} 1699 1700; Also accept byref kernel arguments with other global address spaces. 1701define amdgpu_kernel void @byref_global_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(1) byref(i32) align(4) %in.byref) { 1702 ; HSA-VI-LABEL: name: byref_global_i32_arg 1703 ; HSA-VI: bb.1 (%ir-block.0): 1704 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 1705 ; HSA-VI-NEXT: {{ $}} 1706 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 1707 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 1708 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1709 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) 1710 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 1711 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1712 ; HSA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p1) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) 1713 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p1) :: (dereferenceable "amdgpu-noclobber" load (s32) from %ir.in.byref, addrspace 1) 1714 ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) 1715 ; HSA-VI-NEXT: S_ENDPGM 0 1716 ; 1717 ; LEGACY-MESA-VI-LABEL: name: byref_global_i32_arg 1718 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 1719 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 1720 ; LEGACY-MESA-VI-NEXT: {{ $}} 1721 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 1722 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 1723 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1724 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) 1725 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 1726 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1727 ; LEGACY-MESA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p1) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) 1728 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p1) :: (dereferenceable "amdgpu-noclobber" load (s32) from %ir.in.byref, addrspace 1) 1729 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) 1730 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 1731 %in = load i32, ptr addrspace(1) %in.byref 1732 store i32 %in, ptr addrspace(1) %out, align 4 1733 ret void 1734} 1735 1736define amdgpu_kernel void @byref_flat_i32_arg(ptr addrspace(1) nocapture %out, ptr byref(i32) align(4) %in.byref) { 1737 ; HSA-VI-LABEL: name: byref_flat_i32_arg 1738 ; HSA-VI: bb.1 (%ir-block.0): 1739 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 1740 ; HSA-VI-NEXT: {{ $}} 1741 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 1742 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 1743 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1744 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) 1745 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 1746 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1747 ; HSA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p0) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) 1748 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p0) :: (dereferenceable load (s32) from %ir.in.byref) 1749 ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) 1750 ; HSA-VI-NEXT: S_ENDPGM 0 1751 ; 1752 ; LEGACY-MESA-VI-LABEL: name: byref_flat_i32_arg 1753 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 1754 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 1755 ; LEGACY-MESA-VI-NEXT: {{ $}} 1756 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 1757 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 1758 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1759 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) 1760 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 1761 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1762 ; LEGACY-MESA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p0) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) 1763 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p0) :: (dereferenceable load (s32) from %ir.in.byref) 1764 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) 1765 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 1766 %in = load i32, ptr %in.byref 1767 store i32 %in, ptr addrspace(1) %out, align 4 1768 ret void 1769} 1770 1771define amdgpu_kernel void @byref_constant_32bit_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(6) byref(i32) align(4) %in.byref) { 1772 ; HSA-VI-LABEL: name: byref_constant_32bit_i32_arg 1773 ; HSA-VI: bb.1 (%ir-block.0): 1774 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 1775 ; HSA-VI-NEXT: {{ $}} 1776 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 1777 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 1778 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1779 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) 1780 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 1781 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1782 ; HSA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p6) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) 1783 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p6) :: (dereferenceable invariant load (s32) from %ir.in.byref, addrspace 6) 1784 ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) 1785 ; HSA-VI-NEXT: S_ENDPGM 0 1786 ; 1787 ; LEGACY-MESA-VI-LABEL: name: byref_constant_32bit_i32_arg 1788 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 1789 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 1790 ; LEGACY-MESA-VI-NEXT: {{ $}} 1791 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 1792 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 1793 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1794 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) 1795 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 1796 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1797 ; LEGACY-MESA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p6) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) 1798 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p6) :: (dereferenceable invariant load (s32) from %ir.in.byref, addrspace 6) 1799 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) 1800 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 1801 %in = load i32, ptr addrspace(6) %in.byref 1802 store i32 %in, ptr addrspace(1) %out, align 4 1803 ret void 1804} 1805 1806define amdgpu_kernel void @byref_unknown_as_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(999) byref(i32) align(4) %in.byref) { 1807 ; HSA-VI-LABEL: name: byref_unknown_as_i32_arg 1808 ; HSA-VI: bb.1 (%ir-block.0): 1809 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 1810 ; HSA-VI-NEXT: {{ $}} 1811 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 1812 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 1813 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1814 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) 1815 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 1816 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1817 ; HSA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p999) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) 1818 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p999) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 999) 1819 ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) 1820 ; HSA-VI-NEXT: S_ENDPGM 0 1821 ; 1822 ; LEGACY-MESA-VI-LABEL: name: byref_unknown_as_i32_arg 1823 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 1824 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 1825 ; LEGACY-MESA-VI-NEXT: {{ $}} 1826 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 1827 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 1828 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1829 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) 1830 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 1831 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1832 ; LEGACY-MESA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p999) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) 1833 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p999) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 999) 1834 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) 1835 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 1836 %in = load i32, ptr addrspace(999) %in.byref 1837 store i32 %in, ptr addrspace(1) %out, align 4 1838 ret void 1839} 1840 1841; Invalid, but should not crash. 1842define amdgpu_kernel void @byref_local_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(3) byref(i32) align(4) %in.byref) { 1843 ; HSA-VI-LABEL: name: byref_local_i32_arg 1844 ; HSA-VI: bb.1 (%ir-block.0): 1845 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 1846 ; HSA-VI-NEXT: {{ $}} 1847 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 1848 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 1849 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1850 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) 1851 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 1852 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1853 ; HSA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p3) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) 1854 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p3) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 3) 1855 ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) 1856 ; HSA-VI-NEXT: S_ENDPGM 0 1857 ; 1858 ; LEGACY-MESA-VI-LABEL: name: byref_local_i32_arg 1859 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 1860 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 1861 ; LEGACY-MESA-VI-NEXT: {{ $}} 1862 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 1863 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 1864 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1865 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) 1866 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 1867 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1868 ; LEGACY-MESA-VI-NEXT: [[ADDRSPACE_CAST:%[0-9]+]]:_(p3) = G_ADDRSPACE_CAST [[PTR_ADD1]](p4) 1869 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[ADDRSPACE_CAST]](p3) :: (dereferenceable load (s32) from %ir.in.byref, addrspace 3) 1870 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (store (s32) into %ir.out, addrspace 1) 1871 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 1872 %in = load i32, ptr addrspace(3) %in.byref 1873 store i32 %in, ptr addrspace(1) %out, align 4 1874 ret void 1875} 1876 1877define amdgpu_kernel void @multi_byref_constant_i32_arg(ptr addrspace(1) nocapture %out, ptr addrspace(4) byref(i32) align(4) %in0.byref, ptr addrspace(4) byref(i32) align(4) %in1.byref, i32 %after.offset) { 1878 ; HSA-VI-LABEL: name: multi_byref_constant_i32_arg 1879 ; HSA-VI: bb.1 (%ir-block.0): 1880 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 1881 ; HSA-VI-NEXT: {{ $}} 1882 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 1883 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 1884 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1885 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 16, addrspace 4) 1886 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 8 1887 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1888 ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 12 1889 ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) 1890 ; HSA-VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 1891 ; HSA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) 1892 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), align 16, addrspace 4) 1893 ; HSA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32) from %ir.in0.byref, addrspace 4) 1894 ; HSA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32) from %ir.in1.byref, addrspace 4) 1895 ; HSA-VI-NEXT: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) 1896 ; HSA-VI-NEXT: G_STORE [[LOAD3]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) 1897 ; HSA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) 1898 ; HSA-VI-NEXT: S_ENDPGM 0 1899 ; 1900 ; LEGACY-MESA-VI-LABEL: name: multi_byref_constant_i32_arg 1901 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 1902 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 1903 ; LEGACY-MESA-VI-NEXT: {{ $}} 1904 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 1905 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 1906 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1907 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p1) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p1), align 4, addrspace 4) 1908 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 44 1909 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 1910 ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 48 1911 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C2]](s64) 1912 ; LEGACY-MESA-VI-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 1913 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD3:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C3]](s64) 1914 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD3]](p4) :: (dereferenceable invariant load (s32), addrspace 4) 1915 ; LEGACY-MESA-VI-NEXT: [[LOAD2:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (s32) from %ir.in0.byref, addrspace 4) 1916 ; LEGACY-MESA-VI-NEXT: [[LOAD3:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD2]](p4) :: (dereferenceable invariant load (s32) from %ir.in1.byref, addrspace 4) 1917 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD2]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) 1918 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD3]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) 1919 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](s32), [[LOAD]](p1) :: (volatile store (s32) into %ir.out, addrspace 1) 1920 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 1921 %in0 = load i32, ptr addrspace(4) %in0.byref 1922 %in1 = load i32, ptr addrspace(4) %in1.byref 1923 store volatile i32 %in0, ptr addrspace(1) %out, align 4 1924 store volatile i32 %in1, ptr addrspace(1) %out, align 4 1925 store volatile i32 %after.offset, ptr addrspace(1) %out, align 4 1926 ret void 1927} 1928 1929define amdgpu_kernel void @byref_constant_i32_arg_offset0(ptr addrspace(4) byref(i32) align(4) %in.byref) { 1930 ; HSA-VI-LABEL: name: byref_constant_i32_arg_offset0 1931 ; HSA-VI: bb.1 (%ir-block.0): 1932 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 1933 ; HSA-VI-NEXT: {{ $}} 1934 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 1935 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 1936 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1937 ; HSA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 1938 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32) from %ir.in.byref, addrspace 4) 1939 ; HSA-VI-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) 1940 ; HSA-VI-NEXT: S_ENDPGM 0 1941 ; 1942 ; LEGACY-MESA-VI-LABEL: name: byref_constant_i32_arg_offset0 1943 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 1944 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 1945 ; LEGACY-MESA-VI-NEXT: {{ $}} 1946 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 1947 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 1948 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1949 ; LEGACY-MESA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 1950 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (s32) from %ir.in.byref, addrspace 4) 1951 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](s32), [[DEF]](p1) :: (store (s32) into `ptr addrspace(1) undef`, addrspace 1) 1952 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 1953 %in = load i32, ptr addrspace(4) %in.byref 1954 store i32 %in, ptr addrspace(1) undef, align 4 1955 ret void 1956} 1957 1958define amdgpu_kernel void @p3i8_arg(ptr addrspace(3) %arg) nounwind { 1959 ; HSA-VI-LABEL: name: p3i8_arg 1960 ; HSA-VI: bb.1 (%ir-block.0): 1961 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 1962 ; HSA-VI-NEXT: {{ $}} 1963 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 1964 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 1965 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1966 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p3), align 16, addrspace 4) 1967 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 9 1968 ; HSA-VI-NEXT: G_STORE [[C1]](s8), [[LOAD]](p3) :: (store (s8) into %ir.arg, align 4, addrspace 3) 1969 ; HSA-VI-NEXT: S_ENDPGM 0 1970 ; 1971 ; LEGACY-MESA-VI-LABEL: name: p3i8_arg 1972 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 1973 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 1974 ; LEGACY-MESA-VI-NEXT: {{ $}} 1975 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 1976 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 1977 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 1978 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(p3) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (p3), addrspace 4) 1979 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s8) = G_CONSTANT i8 9 1980 ; LEGACY-MESA-VI-NEXT: G_STORE [[C1]](s8), [[LOAD]](p3) :: (store (s8) into %ir.arg, align 4, addrspace 3) 1981 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 1982 store i8 9, ptr addrspace(3) %arg, align 4 1983 ret void 1984} 1985 1986define amdgpu_kernel void @p1i8_arg(ptr addrspace(1) %arg) nounwind { 1987 ; HSA-VI-LABEL: name: p1i8_arg 1988 ; HSA-VI: bb.1 (%ir-block.0): 1989 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 1990 ; HSA-VI-NEXT: {{ $}} 1991 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 1992 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 9 1993 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(p3) = G_CONSTANT i32 0 1994 ; HSA-VI-NEXT: G_STORE [[C]](s8), [[C1]](p3) :: (store (s8) into `ptr addrspace(3) null`, addrspace 3) 1995 ; HSA-VI-NEXT: S_ENDPGM 0 1996 ; 1997 ; LEGACY-MESA-VI-LABEL: name: p1i8_arg 1998 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 1999 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 2000 ; LEGACY-MESA-VI-NEXT: {{ $}} 2001 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 2002 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s8) = G_CONSTANT i8 9 2003 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(p3) = G_CONSTANT i32 0 2004 ; LEGACY-MESA-VI-NEXT: G_STORE [[C]](s8), [[C1]](p3) :: (store (s8) into `ptr addrspace(3) null`, addrspace 3) 2005 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 2006 store i8 9, ptr addrspace(3) null 2007 ret void 2008} 2009 2010define amdgpu_kernel void @v2p1i8_arg(<2 x ptr addrspace(1)> %arg) nounwind { 2011 ; HSA-VI-LABEL: name: v2p1i8_arg 2012 ; HSA-VI: bb.1 (%ir-block.0): 2013 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 2014 ; HSA-VI-NEXT: {{ $}} 2015 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 2016 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 2017 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 2018 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x p1>), addrspace 4) 2019 ; HSA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 2020 ; HSA-VI-NEXT: G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef`, addrspace 1) 2021 ; HSA-VI-NEXT: S_ENDPGM 0 2022 ; 2023 ; LEGACY-MESA-VI-LABEL: name: v2p1i8_arg 2024 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 2025 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 2026 ; LEGACY-MESA-VI-NEXT: {{ $}} 2027 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 2028 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 2029 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 2030 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x p1>), align 4, addrspace 4) 2031 ; LEGACY-MESA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 2032 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef`, addrspace 1) 2033 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 2034 store <2 x ptr addrspace(1)> %arg, ptr addrspace(1) undef 2035 ret void 2036} 2037 2038define amdgpu_kernel void @v2p3i8_arg(<2 x ptr addrspace(3)> %arg) nounwind { 2039 ; HSA-VI-LABEL: name: v2p3i8_arg 2040 ; HSA-VI: bb.1 (%ir-block.0): 2041 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 2042 ; HSA-VI-NEXT: {{ $}} 2043 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 2044 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 2045 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 2046 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x p3>), align 16, addrspace 4) 2047 ; HSA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 2048 ; HSA-VI-NEXT: G_STORE [[LOAD]](<2 x p3>), [[DEF]](p1) :: (store (<2 x p3>) into `ptr addrspace(1) undef`, addrspace 1) 2049 ; HSA-VI-NEXT: S_ENDPGM 0 2050 ; 2051 ; LEGACY-MESA-VI-LABEL: name: v2p3i8_arg 2052 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 2053 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 2054 ; LEGACY-MESA-VI-NEXT: {{ $}} 2055 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 2056 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 2057 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 2058 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x p3>), align 4, addrspace 4) 2059 ; LEGACY-MESA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 2060 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](<2 x p3>), [[DEF]](p1) :: (store (<2 x p3>) into `ptr addrspace(1) undef`, addrspace 1) 2061 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 2062 store <2 x ptr addrspace(3)> %arg, ptr addrspace(1) undef 2063 ret void 2064} 2065 2066define amdgpu_kernel void @v2p1i8_in_struct_arg({ <2 x ptr addrspace(1)>, <2 x ptr addrspace(3)> } %arg) nounwind { 2067 ; HSA-VI-LABEL: name: v2p1i8_in_struct_arg 2068 ; HSA-VI: bb.1 (%ir-block.0): 2069 ; HSA-VI-NEXT: liveins: $sgpr8_sgpr9 2070 ; HSA-VI-NEXT: {{ $}} 2071 ; HSA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr8_sgpr9 2072 ; HSA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 2073 ; HSA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 2074 ; HSA-VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x s64>), addrspace 4) 2075 ; HSA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 2076 ; HSA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 2077 ; HSA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), align 16, addrspace 4) 2078 ; HSA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 2079 ; HSA-VI-NEXT: G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef`, addrspace 1) 2080 ; HSA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 2081 ; HSA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) 2082 ; HSA-VI-NEXT: G_STORE [[LOAD1]](<2 x p3>), [[PTR_ADD2]](p1) :: (store (<2 x p3>) into `ptr addrspace(1) undef` + 16, align 16, addrspace 1) 2083 ; HSA-VI-NEXT: S_ENDPGM 0 2084 ; 2085 ; LEGACY-MESA-VI-LABEL: name: v2p1i8_in_struct_arg 2086 ; LEGACY-MESA-VI: bb.1 (%ir-block.0): 2087 ; LEGACY-MESA-VI-NEXT: liveins: $sgpr4_sgpr5 2088 ; LEGACY-MESA-VI-NEXT: {{ $}} 2089 ; LEGACY-MESA-VI-NEXT: [[COPY:%[0-9]+]]:_(p4) = COPY $sgpr4_sgpr5 2090 ; LEGACY-MESA-VI-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 36 2091 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C]](s64) 2092 ; LEGACY-MESA-VI-NEXT: [[LOAD:%[0-9]+]]:_(<2 x p1>) = G_LOAD [[PTR_ADD]](p4) :: (dereferenceable invariant load (<2 x s64>), align 4, addrspace 4) 2093 ; LEGACY-MESA-VI-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 52 2094 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD1:%[0-9]+]]:_(p4) = G_PTR_ADD [[COPY]], [[C1]](s64) 2095 ; LEGACY-MESA-VI-NEXT: [[LOAD1:%[0-9]+]]:_(<2 x p3>) = G_LOAD [[PTR_ADD1]](p4) :: (dereferenceable invariant load (<2 x s32>), align 4, addrspace 4) 2096 ; LEGACY-MESA-VI-NEXT: [[DEF:%[0-9]+]]:_(p1) = G_IMPLICIT_DEF 2097 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD]](<2 x p1>), [[DEF]](p1) :: (store (<2 x p1>) into `ptr addrspace(1) undef`, addrspace 1) 2098 ; LEGACY-MESA-VI-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 16 2099 ; LEGACY-MESA-VI-NEXT: [[PTR_ADD2:%[0-9]+]]:_(p1) = G_PTR_ADD [[DEF]], [[C2]](s64) 2100 ; LEGACY-MESA-VI-NEXT: G_STORE [[LOAD1]](<2 x p3>), [[PTR_ADD2]](p1) :: (store (<2 x p3>) into `ptr addrspace(1) undef` + 16, align 16, addrspace 1) 2101 ; LEGACY-MESA-VI-NEXT: S_ENDPGM 0 2102 store { <2 x ptr addrspace(1)>, <2 x ptr addrspace(3)> } %arg, ptr addrspace(1) undef 2103 ret void 2104} 2105