1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GFX6 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s 4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s 5; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s 6 7define amdgpu_ps <4 x float> @load_3d_v4f32_xyzw(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r) { 8; GFX6-LABEL: load_3d_v4f32_xyzw: 9; GFX6: ; %bb.0: 10; GFX6-NEXT: s_mov_b32 s0, s2 11; GFX6-NEXT: s_mov_b32 s1, s3 12; GFX6-NEXT: s_mov_b32 s2, s4 13; GFX6-NEXT: s_mov_b32 s3, s5 14; GFX6-NEXT: s_mov_b32 s4, s6 15; GFX6-NEXT: s_mov_b32 s5, s7 16; GFX6-NEXT: s_mov_b32 s6, s8 17; GFX6-NEXT: s_mov_b32 s7, s9 18; GFX6-NEXT: image_load v[0:3], v[0:2], s[0:7] dmask:0xf unorm 19; GFX6-NEXT: s_waitcnt vmcnt(0) 20; GFX6-NEXT: ; return to shader part epilog 21; 22; GFX10PLUS-LABEL: load_3d_v4f32_xyzw: 23; GFX10PLUS: ; %bb.0: 24; GFX10PLUS-NEXT: s_mov_b32 s0, s2 25; GFX10PLUS-NEXT: s_mov_b32 s1, s3 26; GFX10PLUS-NEXT: s_mov_b32 s2, s4 27; GFX10PLUS-NEXT: s_mov_b32 s3, s5 28; GFX10PLUS-NEXT: s_mov_b32 s4, s6 29; GFX10PLUS-NEXT: s_mov_b32 s5, s7 30; GFX10PLUS-NEXT: s_mov_b32 s6, s8 31; GFX10PLUS-NEXT: s_mov_b32 s7, s9 32; GFX10PLUS-NEXT: image_load v[0:3], v[0:2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm 33; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 34; GFX10PLUS-NEXT: ; return to shader part epilog 35; 36; GFX12-LABEL: load_3d_v4f32_xyzw: 37; GFX12: ; %bb.0: 38; GFX12-NEXT: s_mov_b32 s0, s2 39; GFX12-NEXT: s_mov_b32 s1, s3 40; GFX12-NEXT: s_mov_b32 s2, s4 41; GFX12-NEXT: s_mov_b32 s3, s5 42; GFX12-NEXT: s_mov_b32 s4, s6 43; GFX12-NEXT: s_mov_b32 s5, s7 44; GFX12-NEXT: s_mov_b32 s6, s8 45; GFX12-NEXT: s_mov_b32 s7, s9 46; GFX12-NEXT: image_load v[0:3], [v0, v1, v2], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D 47; GFX12-NEXT: s_wait_loadcnt 0x0 48; GFX12-NEXT: ; return to shader part epilog 49 %v = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0) 50 ret <4 x float> %v 51} 52 53define amdgpu_ps <4 x float> @load_3d_v4f32_xyzw_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %r) { 54; GFX6-LABEL: load_3d_v4f32_xyzw_tfe: 55; GFX6: ; %bb.0: 56; GFX6-NEXT: v_mov_b32_e32 v5, v0 57; GFX6-NEXT: v_mov_b32_e32 v0, 0 58; GFX6-NEXT: s_mov_b32 s0, s2 59; GFX6-NEXT: s_mov_b32 s1, s3 60; GFX6-NEXT: s_mov_b32 s2, s4 61; GFX6-NEXT: s_mov_b32 s3, s5 62; GFX6-NEXT: s_mov_b32 s4, s6 63; GFX6-NEXT: s_mov_b32 s5, s7 64; GFX6-NEXT: s_mov_b32 s6, s8 65; GFX6-NEXT: s_mov_b32 s7, s9 66; GFX6-NEXT: v_mov_b32_e32 v6, v1 67; GFX6-NEXT: v_mov_b32_e32 v7, v2 68; GFX6-NEXT: v_mov_b32_e32 v1, v0 69; GFX6-NEXT: v_mov_b32_e32 v2, v0 70; GFX6-NEXT: v_mov_b32_e32 v3, v0 71; GFX6-NEXT: v_mov_b32_e32 v4, v0 72; GFX6-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf unorm tfe 73; GFX6-NEXT: s_mov_b32 s8, s10 74; GFX6-NEXT: s_mov_b32 s9, s11 75; GFX6-NEXT: s_mov_b32 s10, -1 76; GFX6-NEXT: s_mov_b32 s11, 0xf000 77; GFX6-NEXT: s_waitcnt vmcnt(0) 78; GFX6-NEXT: buffer_store_dword v4, off, s[8:11], 0 79; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 80; GFX6-NEXT: ; return to shader part epilog 81; 82; GFX10-LABEL: load_3d_v4f32_xyzw_tfe: 83; GFX10: ; %bb.0: 84; GFX10-NEXT: v_mov_b32_e32 v8, 0 85; GFX10-NEXT: v_mov_b32_e32 v5, v0 86; GFX10-NEXT: v_mov_b32_e32 v6, v1 87; GFX10-NEXT: v_mov_b32_e32 v7, v2 88; GFX10-NEXT: s_mov_b32 s0, s2 89; GFX10-NEXT: v_mov_b32_e32 v9, v8 90; GFX10-NEXT: v_mov_b32_e32 v10, v8 91; GFX10-NEXT: v_mov_b32_e32 v11, v8 92; GFX10-NEXT: v_mov_b32_e32 v12, v8 93; GFX10-NEXT: s_mov_b32 s1, s3 94; GFX10-NEXT: s_mov_b32 s2, s4 95; GFX10-NEXT: s_mov_b32 s3, s5 96; GFX10-NEXT: s_mov_b32 s4, s6 97; GFX10-NEXT: s_mov_b32 s5, s7 98; GFX10-NEXT: s_mov_b32 s6, s8 99; GFX10-NEXT: s_mov_b32 s7, s9 100; GFX10-NEXT: v_mov_b32_e32 v0, v8 101; GFX10-NEXT: v_mov_b32_e32 v1, v9 102; GFX10-NEXT: v_mov_b32_e32 v2, v10 103; GFX10-NEXT: v_mov_b32_e32 v3, v11 104; GFX10-NEXT: v_mov_b32_e32 v4, v12 105; GFX10-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm tfe 106; GFX10-NEXT: s_waitcnt vmcnt(0) 107; GFX10-NEXT: global_store_dword v8, v4, s[10:11] 108; GFX10-NEXT: ; return to shader part epilog 109; 110; GFX11-LABEL: load_3d_v4f32_xyzw_tfe: 111; GFX11: ; %bb.0: 112; GFX11-NEXT: v_dual_mov_b32 v7, v2 :: v_dual_mov_b32 v8, 0 113; GFX11-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v6, v1 114; GFX11-NEXT: s_mov_b32 s0, s2 115; GFX11-NEXT: s_mov_b32 s1, s3 116; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 117; GFX11-NEXT: v_mov_b32_e32 v9, v8 118; GFX11-NEXT: v_mov_b32_e32 v10, v8 119; GFX11-NEXT: v_mov_b32_e32 v11, v8 120; GFX11-NEXT: v_mov_b32_e32 v12, v8 121; GFX11-NEXT: s_mov_b32 s2, s4 122; GFX11-NEXT: s_mov_b32 s3, s5 123; GFX11-NEXT: s_mov_b32 s4, s6 124; GFX11-NEXT: s_mov_b32 s5, s7 125; GFX11-NEXT: s_mov_b32 s6, s8 126; GFX11-NEXT: s_mov_b32 s7, s9 127; GFX11-NEXT: v_dual_mov_b32 v0, v8 :: v_dual_mov_b32 v1, v9 128; GFX11-NEXT: v_dual_mov_b32 v2, v10 :: v_dual_mov_b32 v3, v11 129; GFX11-NEXT: v_mov_b32_e32 v4, v12 130; GFX11-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm tfe 131; GFX11-NEXT: s_waitcnt vmcnt(0) 132; GFX11-NEXT: global_store_b32 v8, v4, s[10:11] 133; GFX11-NEXT: ; return to shader part epilog 134; 135; GFX12-LABEL: load_3d_v4f32_xyzw_tfe: 136; GFX12: ; %bb.0: 137; GFX12-NEXT: v_dual_mov_b32 v7, v2 :: v_dual_mov_b32 v8, 0 138; GFX12-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v6, v1 139; GFX12-NEXT: s_mov_b32 s0, s2 140; GFX12-NEXT: s_mov_b32 s1, s3 141; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) 142; GFX12-NEXT: v_dual_mov_b32 v9, v8 :: v_dual_mov_b32 v10, v8 143; GFX12-NEXT: v_dual_mov_b32 v11, v8 :: v_dual_mov_b32 v12, v8 144; GFX12-NEXT: s_mov_b32 s2, s4 145; GFX12-NEXT: s_mov_b32 s3, s5 146; GFX12-NEXT: s_mov_b32 s4, s6 147; GFX12-NEXT: s_mov_b32 s5, s7 148; GFX12-NEXT: s_mov_b32 s6, s8 149; GFX12-NEXT: s_mov_b32 s7, s9 150; GFX12-NEXT: v_dual_mov_b32 v0, v8 :: v_dual_mov_b32 v1, v9 151; GFX12-NEXT: v_dual_mov_b32 v2, v10 :: v_dual_mov_b32 v3, v11 152; GFX12-NEXT: v_mov_b32_e32 v4, v12 153; GFX12-NEXT: image_load v[0:4], [v5, v6, v7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D tfe 154; GFX12-NEXT: s_wait_loadcnt 0x0 155; GFX12-NEXT: global_store_b32 v8, v4, s[10:11] 156; GFX12-NEXT: ; return to shader part epilog 157 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.3d.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 1, i32 0) 158 %v.vec = extractvalue { <4 x float>, i32 } %v, 0 159 %v.err = extractvalue { <4 x float>, i32 } %v, 1 160 store i32 %v.err, ptr addrspace(1) %out, align 4 161 ret <4 x float> %v.vec 162} 163 164define amdgpu_ps <4 x float> @load_3d_v4f32_xyzw_tfe_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %r) { 165; GFX6-LABEL: load_3d_v4f32_xyzw_tfe_lwe: 166; GFX6: ; %bb.0: 167; GFX6-NEXT: v_mov_b32_e32 v5, v0 168; GFX6-NEXT: v_mov_b32_e32 v0, 0 169; GFX6-NEXT: s_mov_b32 s0, s2 170; GFX6-NEXT: s_mov_b32 s1, s3 171; GFX6-NEXT: s_mov_b32 s2, s4 172; GFX6-NEXT: s_mov_b32 s3, s5 173; GFX6-NEXT: s_mov_b32 s4, s6 174; GFX6-NEXT: s_mov_b32 s5, s7 175; GFX6-NEXT: s_mov_b32 s6, s8 176; GFX6-NEXT: s_mov_b32 s7, s9 177; GFX6-NEXT: v_mov_b32_e32 v6, v1 178; GFX6-NEXT: v_mov_b32_e32 v7, v2 179; GFX6-NEXT: v_mov_b32_e32 v1, v0 180; GFX6-NEXT: v_mov_b32_e32 v2, v0 181; GFX6-NEXT: v_mov_b32_e32 v3, v0 182; GFX6-NEXT: v_mov_b32_e32 v4, v0 183; GFX6-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf unorm tfe lwe 184; GFX6-NEXT: s_mov_b32 s8, s10 185; GFX6-NEXT: s_mov_b32 s9, s11 186; GFX6-NEXT: s_mov_b32 s10, -1 187; GFX6-NEXT: s_mov_b32 s11, 0xf000 188; GFX6-NEXT: s_waitcnt vmcnt(0) 189; GFX6-NEXT: buffer_store_dword v4, off, s[8:11], 0 190; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 191; GFX6-NEXT: ; return to shader part epilog 192; 193; GFX10-LABEL: load_3d_v4f32_xyzw_tfe_lwe: 194; GFX10: ; %bb.0: 195; GFX10-NEXT: v_mov_b32_e32 v8, 0 196; GFX10-NEXT: v_mov_b32_e32 v5, v0 197; GFX10-NEXT: v_mov_b32_e32 v6, v1 198; GFX10-NEXT: v_mov_b32_e32 v7, v2 199; GFX10-NEXT: s_mov_b32 s0, s2 200; GFX10-NEXT: v_mov_b32_e32 v9, v8 201; GFX10-NEXT: v_mov_b32_e32 v10, v8 202; GFX10-NEXT: v_mov_b32_e32 v11, v8 203; GFX10-NEXT: v_mov_b32_e32 v12, v8 204; GFX10-NEXT: s_mov_b32 s1, s3 205; GFX10-NEXT: s_mov_b32 s2, s4 206; GFX10-NEXT: s_mov_b32 s3, s5 207; GFX10-NEXT: s_mov_b32 s4, s6 208; GFX10-NEXT: s_mov_b32 s5, s7 209; GFX10-NEXT: s_mov_b32 s6, s8 210; GFX10-NEXT: s_mov_b32 s7, s9 211; GFX10-NEXT: v_mov_b32_e32 v0, v8 212; GFX10-NEXT: v_mov_b32_e32 v1, v9 213; GFX10-NEXT: v_mov_b32_e32 v2, v10 214; GFX10-NEXT: v_mov_b32_e32 v3, v11 215; GFX10-NEXT: v_mov_b32_e32 v4, v12 216; GFX10-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm tfe lwe 217; GFX10-NEXT: s_waitcnt vmcnt(0) 218; GFX10-NEXT: global_store_dword v8, v4, s[10:11] 219; GFX10-NEXT: ; return to shader part epilog 220; 221; GFX11-LABEL: load_3d_v4f32_xyzw_tfe_lwe: 222; GFX11: ; %bb.0: 223; GFX11-NEXT: v_dual_mov_b32 v7, v2 :: v_dual_mov_b32 v8, 0 224; GFX11-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v6, v1 225; GFX11-NEXT: s_mov_b32 s0, s2 226; GFX11-NEXT: s_mov_b32 s1, s3 227; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 228; GFX11-NEXT: v_mov_b32_e32 v9, v8 229; GFX11-NEXT: v_mov_b32_e32 v10, v8 230; GFX11-NEXT: v_mov_b32_e32 v11, v8 231; GFX11-NEXT: v_mov_b32_e32 v12, v8 232; GFX11-NEXT: s_mov_b32 s2, s4 233; GFX11-NEXT: s_mov_b32 s3, s5 234; GFX11-NEXT: s_mov_b32 s4, s6 235; GFX11-NEXT: s_mov_b32 s5, s7 236; GFX11-NEXT: s_mov_b32 s6, s8 237; GFX11-NEXT: s_mov_b32 s7, s9 238; GFX11-NEXT: v_dual_mov_b32 v0, v8 :: v_dual_mov_b32 v1, v9 239; GFX11-NEXT: v_dual_mov_b32 v2, v10 :: v_dual_mov_b32 v3, v11 240; GFX11-NEXT: v_mov_b32_e32 v4, v12 241; GFX11-NEXT: image_load v[0:4], v[5:7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D unorm tfe lwe 242; GFX11-NEXT: s_waitcnt vmcnt(0) 243; GFX11-NEXT: global_store_b32 v8, v4, s[10:11] 244; GFX11-NEXT: ; return to shader part epilog 245; 246; GFX12-LABEL: load_3d_v4f32_xyzw_tfe_lwe: 247; GFX12: ; %bb.0: 248; GFX12-NEXT: v_dual_mov_b32 v7, v2 :: v_dual_mov_b32 v8, 0 249; GFX12-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v6, v1 250; GFX12-NEXT: s_mov_b32 s0, s2 251; GFX12-NEXT: s_mov_b32 s1, s3 252; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) 253; GFX12-NEXT: v_dual_mov_b32 v9, v8 :: v_dual_mov_b32 v10, v8 254; GFX12-NEXT: v_dual_mov_b32 v11, v8 :: v_dual_mov_b32 v12, v8 255; GFX12-NEXT: s_mov_b32 s2, s4 256; GFX12-NEXT: s_mov_b32 s3, s5 257; GFX12-NEXT: s_mov_b32 s4, s6 258; GFX12-NEXT: s_mov_b32 s5, s7 259; GFX12-NEXT: s_mov_b32 s6, s8 260; GFX12-NEXT: s_mov_b32 s7, s9 261; GFX12-NEXT: v_dual_mov_b32 v0, v8 :: v_dual_mov_b32 v1, v9 262; GFX12-NEXT: v_dual_mov_b32 v2, v10 :: v_dual_mov_b32 v3, v11 263; GFX12-NEXT: v_mov_b32_e32 v4, v12 264; GFX12-NEXT: image_load v[0:4], [v5, v6, v7], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_3D tfe 265; GFX12-NEXT: s_wait_loadcnt 0x0 266; GFX12-NEXT: global_store_b32 v8, v4, s[10:11] 267; GFX12-NEXT: ; return to shader part epilog 268 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.3d.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 3, i32 0) 269 %v.vec = extractvalue { <4 x float>, i32 } %v, 0 270 %v.err = extractvalue { <4 x float>, i32 } %v, 1 271 store i32 %v.err, ptr addrspace(1) %out, align 4 272 ret <4 x float> %v.vec 273} 274 275declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 276declare { <4 x float>, i32 } @llvm.amdgcn.image.load.3d.sl_v4f32i32s.i32(i32 immarg, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 277 278attributes #0 = { nounwind readonly } 279