1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX12,GFX12-SDAG %s 3; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX11 %s 4; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX12,GFX12-GISEL %s 5; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX11 %s 6 7; Scalar data prefetch 8 9define amdgpu_ps void @prefetch_data_sgpr(ptr addrspace(4) inreg %ptr) { 10; GFX12-LABEL: prefetch_data_sgpr: 11; GFX12: ; %bb.0: ; %entry 12; GFX12-NEXT: s_prefetch_data s[0:1], 0x0, null, 0 13; GFX12-NEXT: s_endpgm 14; 15; GFX11-LABEL: prefetch_data_sgpr: 16; GFX11: ; %bb.0: ; %entry 17; GFX11-NEXT: s_endpgm 18entry: 19 tail call void @llvm.prefetch.p4(ptr addrspace(4) %ptr, i32 0, i32 0, i32 1) 20 ret void 21} 22 23define amdgpu_ps void @prefetch_data_sgpr_offset(ptr addrspace(4) inreg %ptr) { 24; GFX12-LABEL: prefetch_data_sgpr_offset: 25; GFX12: ; %bb.0: ; %entry 26; GFX12-NEXT: s_prefetch_data s[0:1], 0x200, null, 0 27; GFX12-NEXT: s_endpgm 28; 29; GFX11-LABEL: prefetch_data_sgpr_offset: 30; GFX11: ; %bb.0: ; %entry 31; GFX11-NEXT: s_endpgm 32entry: 33 %gep = getelementptr float, ptr addrspace(4) %ptr, i32 128 34 tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 1) 35 ret void 36} 37 38; Check large offsets 39 40define amdgpu_ps void @prefetch_data_sgpr_max_offset(ptr addrspace(4) inreg %ptr) { 41; GFX12-LABEL: prefetch_data_sgpr_max_offset: 42; GFX12: ; %bb.0: ; %entry 43; GFX12-NEXT: s_prefetch_data s[0:1], 0x7fffff, null, 0 44; GFX12-NEXT: s_endpgm 45; 46; GFX11-LABEL: prefetch_data_sgpr_max_offset: 47; GFX11: ; %bb.0: ; %entry 48; GFX11-NEXT: s_endpgm 49entry: 50 %gep = getelementptr i8, ptr addrspace(4) %ptr, i32 8388607 51 tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 1) 52 ret void 53} 54 55define amdgpu_ps void @prefetch_data_sgpr_min_offset(ptr addrspace(4) inreg %ptr) { 56; GFX12-SDAG-LABEL: prefetch_data_sgpr_min_offset: 57; GFX12-SDAG: ; %bb.0: ; %entry 58; GFX12-SDAG-NEXT: s_mov_b32 s2, 0xff800000 59; GFX12-SDAG-NEXT: s_mov_b32 s3, -1 60; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 61; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3] 62; GFX12-SDAG-NEXT: s_prefetch_data s[0:1], 0x0, null, 0 63; GFX12-SDAG-NEXT: s_endpgm 64; 65; GFX11-LABEL: prefetch_data_sgpr_min_offset: 66; GFX11: ; %bb.0: ; %entry 67; GFX11-NEXT: s_endpgm 68; 69; GFX12-GISEL-LABEL: prefetch_data_sgpr_min_offset: 70; GFX12-GISEL: ; %bb.0: ; %entry 71; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, 0xff800000 72; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, -1 73; GFX12-GISEL-NEXT: s_prefetch_data s[0:1], 0x0, null, 0 74; GFX12-GISEL-NEXT: s_endpgm 75entry: 76 %gep = getelementptr i8, ptr addrspace(4) %ptr, i32 -8388608 77 tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 1) 78 ret void 79} 80 81define amdgpu_ps void @prefetch_data_sgpr_too_large_offset(ptr addrspace(4) inreg %ptr) { 82; GFX12-SDAG-LABEL: prefetch_data_sgpr_too_large_offset: 83; GFX12-SDAG: ; %bb.0: ; %entry 84; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], 0x800000 85; GFX12-SDAG-NEXT: s_prefetch_data s[0:1], 0x0, null, 0 86; GFX12-SDAG-NEXT: s_endpgm 87; 88; GFX11-LABEL: prefetch_data_sgpr_too_large_offset: 89; GFX11: ; %bb.0: ; %entry 90; GFX11-NEXT: s_endpgm 91; 92; GFX12-GISEL-LABEL: prefetch_data_sgpr_too_large_offset: 93; GFX12-GISEL: ; %bb.0: ; %entry 94; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, 0x800000 95; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 0 96; GFX12-GISEL-NEXT: s_prefetch_data s[0:1], 0x0, null, 0 97; GFX12-GISEL-NEXT: s_endpgm 98entry: 99 %gep = getelementptr i8, ptr addrspace(4) %ptr, i32 8388608 100 tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 1) 101 ret void 102} 103 104; Check divergent address 105 106define amdgpu_ps void @prefetch_data_vgpr(ptr addrspace(1) %ptr) { 107; GCN-LABEL: prefetch_data_vgpr: 108; GCN: ; %bb.0: ; %entry 109; GCN-NEXT: s_endpgm 110entry: 111 tail call void @llvm.prefetch.p1(ptr addrspace(1) %ptr, i32 0, i32 0, i32 1) 112 ret void 113} 114 115; Check LDS and Scratch, we cannot prefetch it 116 117define amdgpu_ps void @prefetch_data_lds(ptr addrspace(3) inreg %ptr) { 118; GCN-LABEL: prefetch_data_lds: 119; GCN: ; %bb.0: ; %entry 120; GCN-NEXT: s_endpgm 121entry: 122 tail call void @llvm.prefetch.p3(ptr addrspace(3) %ptr, i32 0, i32 0, i32 1) 123 ret void 124} 125 126define amdgpu_ps void @prefetch_data_scratch(ptr addrspace(5) inreg %ptr) { 127; GCN-LABEL: prefetch_data_scratch: 128; GCN: ; %bb.0: ; %entry 129; GCN-NEXT: s_endpgm 130entry: 131 tail call void @llvm.prefetch.p5(ptr addrspace(5) %ptr, i32 0, i32 0, i32 1) 132 ret void 133} 134 135; Check supported address spaces 136 137define amdgpu_ps void @prefetch_data_sgpr_flat(ptr inreg %ptr) { 138; GFX12-LABEL: prefetch_data_sgpr_flat: 139; GFX12: ; %bb.0: ; %entry 140; GFX12-NEXT: s_prefetch_data s[0:1], 0x0, null, 0 141; GFX12-NEXT: s_endpgm 142; 143; GFX11-LABEL: prefetch_data_sgpr_flat: 144; GFX11: ; %bb.0: ; %entry 145; GFX11-NEXT: s_endpgm 146entry: 147 tail call void @llvm.prefetch.pf(ptr %ptr, i32 0, i32 0, i32 1) 148 ret void 149} 150 151define amdgpu_ps void @prefetch_data_sgpr_global(ptr addrspace(1) inreg %ptr) { 152; GFX12-LABEL: prefetch_data_sgpr_global: 153; GFX12: ; %bb.0: ; %entry 154; GFX12-NEXT: s_prefetch_data s[0:1], 0x0, null, 0 155; GFX12-NEXT: s_endpgm 156; 157; GFX11-LABEL: prefetch_data_sgpr_global: 158; GFX11: ; %bb.0: ; %entry 159; GFX11-NEXT: s_endpgm 160entry: 161 tail call void @llvm.prefetch.p1(ptr addrspace(1) %ptr, i32 0, i32 0, i32 1) 162 ret void 163} 164 165define amdgpu_ps void @prefetch_data_sgpr_constant_32bit(ptr addrspace(6) inreg %ptr) { 166; GFX12-LABEL: prefetch_data_sgpr_constant_32bit: 167; GFX12: ; %bb.0: ; %entry 168; GFX12-NEXT: s_mov_b32 s1, 0 169; GFX12-NEXT: s_prefetch_data s[0:1], 0x0, null, 0 170; GFX12-NEXT: s_endpgm 171; 172; GFX11-LABEL: prefetch_data_sgpr_constant_32bit: 173; GFX11: ; %bb.0: ; %entry 174; GFX11-NEXT: s_endpgm 175entry: 176 tail call void @llvm.prefetch.p6(ptr addrspace(6) %ptr, i32 0, i32 0, i32 1) 177 ret void 178} 179 180; I$ prefetch 181 182define amdgpu_ps void @prefetch_inst_sgpr(ptr addrspace(4) inreg %ptr) { 183; GFX12-LABEL: prefetch_inst_sgpr: 184; GFX12: ; %bb.0: ; %entry 185; GFX12-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0 186; GFX12-NEXT: s_endpgm 187; 188; GFX11-LABEL: prefetch_inst_sgpr: 189; GFX11: ; %bb.0: ; %entry 190; GFX11-NEXT: s_endpgm 191entry: 192 tail call void @llvm.prefetch.p4(ptr addrspace(4) %ptr, i32 0, i32 0, i32 0) 193 ret void 194} 195 196define amdgpu_ps void @prefetch_inst_sgpr_offset(ptr addrspace(4) inreg %ptr) { 197; GFX12-LABEL: prefetch_inst_sgpr_offset: 198; GFX12: ; %bb.0: ; %entry 199; GFX12-NEXT: s_prefetch_inst s[0:1], 0x80, null, 0 200; GFX12-NEXT: s_endpgm 201; 202; GFX11-LABEL: prefetch_inst_sgpr_offset: 203; GFX11: ; %bb.0: ; %entry 204; GFX11-NEXT: s_endpgm 205entry: 206 %gep = getelementptr i8, ptr addrspace(4) %ptr, i32 128 207 tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 0) 208 ret void 209} 210 211; Check large offsets 212 213define amdgpu_ps void @prefetch_inst_sgpr_max_offset(ptr addrspace(4) inreg %ptr) { 214; GFX12-LABEL: prefetch_inst_sgpr_max_offset: 215; GFX12: ; %bb.0: ; %entry 216; GFX12-NEXT: s_prefetch_inst s[0:1], 0x7fffff, null, 0 217; GFX12-NEXT: s_endpgm 218; 219; GFX11-LABEL: prefetch_inst_sgpr_max_offset: 220; GFX11: ; %bb.0: ; %entry 221; GFX11-NEXT: s_endpgm 222entry: 223 %gep = getelementptr i8, ptr addrspace(4) %ptr, i32 8388607 224 tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 0) 225 ret void 226} 227 228define amdgpu_ps void @prefetch_inst_sgpr_min_offset(ptr addrspace(4) inreg %ptr) { 229; GFX12-SDAG-LABEL: prefetch_inst_sgpr_min_offset: 230; GFX12-SDAG: ; %bb.0: ; %entry 231; GFX12-SDAG-NEXT: s_mov_b32 s2, 0xff800000 232; GFX12-SDAG-NEXT: s_mov_b32 s3, -1 233; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 234; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3] 235; GFX12-SDAG-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0 236; GFX12-SDAG-NEXT: s_endpgm 237; 238; GFX11-LABEL: prefetch_inst_sgpr_min_offset: 239; GFX11: ; %bb.0: ; %entry 240; GFX11-NEXT: s_endpgm 241; 242; GFX12-GISEL-LABEL: prefetch_inst_sgpr_min_offset: 243; GFX12-GISEL: ; %bb.0: ; %entry 244; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, 0xff800000 245; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, -1 246; GFX12-GISEL-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0 247; GFX12-GISEL-NEXT: s_endpgm 248entry: 249 %gep = getelementptr i8, ptr addrspace(4) %ptr, i32 -8388608 250 tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 0) 251 ret void 252} 253 254define amdgpu_ps void @prefetch_inst_sgpr_too_large_offset(ptr addrspace(4) inreg %ptr) { 255; GFX12-SDAG-LABEL: prefetch_inst_sgpr_too_large_offset: 256; GFX12-SDAG: ; %bb.0: ; %entry 257; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], 0x800000 258; GFX12-SDAG-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0 259; GFX12-SDAG-NEXT: s_endpgm 260; 261; GFX11-LABEL: prefetch_inst_sgpr_too_large_offset: 262; GFX11: ; %bb.0: ; %entry 263; GFX11-NEXT: s_endpgm 264; 265; GFX12-GISEL-LABEL: prefetch_inst_sgpr_too_large_offset: 266; GFX12-GISEL: ; %bb.0: ; %entry 267; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, 0x800000 268; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 0 269; GFX12-GISEL-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0 270; GFX12-GISEL-NEXT: s_endpgm 271entry: 272 %gep = getelementptr i8, ptr addrspace(4) %ptr, i32 8388608 273 tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 0) 274 ret void 275} 276 277declare void @llvm.prefetch.pf(ptr nocapture readonly, i32, i32, i32) 278declare void @llvm.prefetch.p1(ptr addrspace(1) nocapture readonly, i32, i32, i32) 279declare void @llvm.prefetch.p3(ptr addrspace(3) nocapture readonly, i32, i32, i32) 280declare void @llvm.prefetch.p4(ptr addrspace(4) nocapture readonly, i32, i32, i32) 281declare void @llvm.prefetch.p5(ptr addrspace(5) nocapture readonly, i32, i32, i32) 282declare void @llvm.prefetch.p6(ptr addrspace(6) nocapture readonly, i32, i32, i32) 283