1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GFX6 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s 4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s 5; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s 6 7define amdgpu_ps <4 x float> @load_2d_v4f32_xyzw(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 8; GFX6-LABEL: load_2d_v4f32_xyzw: 9; GFX6: ; %bb.0: 10; GFX6-NEXT: s_mov_b32 s0, s2 11; GFX6-NEXT: s_mov_b32 s1, s3 12; GFX6-NEXT: s_mov_b32 s2, s4 13; GFX6-NEXT: s_mov_b32 s3, s5 14; GFX6-NEXT: s_mov_b32 s4, s6 15; GFX6-NEXT: s_mov_b32 s5, s7 16; GFX6-NEXT: s_mov_b32 s6, s8 17; GFX6-NEXT: s_mov_b32 s7, s9 18; GFX6-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm 19; GFX6-NEXT: s_waitcnt vmcnt(0) 20; GFX6-NEXT: ; return to shader part epilog 21; 22; GFX10PLUS-LABEL: load_2d_v4f32_xyzw: 23; GFX10PLUS: ; %bb.0: 24; GFX10PLUS-NEXT: s_mov_b32 s0, s2 25; GFX10PLUS-NEXT: s_mov_b32 s1, s3 26; GFX10PLUS-NEXT: s_mov_b32 s2, s4 27; GFX10PLUS-NEXT: s_mov_b32 s3, s5 28; GFX10PLUS-NEXT: s_mov_b32 s4, s6 29; GFX10PLUS-NEXT: s_mov_b32 s5, s7 30; GFX10PLUS-NEXT: s_mov_b32 s6, s8 31; GFX10PLUS-NEXT: s_mov_b32 s7, s9 32; GFX10PLUS-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm 33; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 34; GFX10PLUS-NEXT: ; return to shader part epilog 35; 36; GFX12-LABEL: load_2d_v4f32_xyzw: 37; GFX12: ; %bb.0: 38; GFX12-NEXT: s_mov_b32 s0, s2 39; GFX12-NEXT: s_mov_b32 s1, s3 40; GFX12-NEXT: s_mov_b32 s2, s4 41; GFX12-NEXT: s_mov_b32 s3, s5 42; GFX12-NEXT: s_mov_b32 s4, s6 43; GFX12-NEXT: s_mov_b32 s5, s7 44; GFX12-NEXT: s_mov_b32 s6, s8 45; GFX12-NEXT: s_mov_b32 s7, s9 46; GFX12-NEXT: image_load v[0:3], [v0, v1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D 47; GFX12-NEXT: s_wait_loadcnt 0x0 48; GFX12-NEXT: ; return to shader part epilog 49 %v = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 50 ret <4 x float> %v 51} 52 53define amdgpu_ps <4 x float> @load_2d_v4f32_xyzw_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t) { 54; GFX6-LABEL: load_2d_v4f32_xyzw_tfe: 55; GFX6: ; %bb.0: 56; GFX6-NEXT: v_mov_b32_e32 v5, v0 57; GFX6-NEXT: v_mov_b32_e32 v0, 0 58; GFX6-NEXT: s_mov_b32 s0, s2 59; GFX6-NEXT: s_mov_b32 s1, s3 60; GFX6-NEXT: s_mov_b32 s2, s4 61; GFX6-NEXT: s_mov_b32 s3, s5 62; GFX6-NEXT: s_mov_b32 s4, s6 63; GFX6-NEXT: s_mov_b32 s5, s7 64; GFX6-NEXT: s_mov_b32 s6, s8 65; GFX6-NEXT: s_mov_b32 s7, s9 66; GFX6-NEXT: v_mov_b32_e32 v6, v1 67; GFX6-NEXT: v_mov_b32_e32 v1, v0 68; GFX6-NEXT: v_mov_b32_e32 v2, v0 69; GFX6-NEXT: v_mov_b32_e32 v3, v0 70; GFX6-NEXT: v_mov_b32_e32 v4, v0 71; GFX6-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf unorm tfe 72; GFX6-NEXT: s_mov_b32 s8, s10 73; GFX6-NEXT: s_mov_b32 s9, s11 74; GFX6-NEXT: s_mov_b32 s10, -1 75; GFX6-NEXT: s_mov_b32 s11, 0xf000 76; GFX6-NEXT: s_waitcnt vmcnt(0) 77; GFX6-NEXT: buffer_store_dword v4, off, s[8:11], 0 78; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 79; GFX6-NEXT: ; return to shader part epilog 80; 81; GFX10-LABEL: load_2d_v4f32_xyzw_tfe: 82; GFX10: ; %bb.0: 83; GFX10-NEXT: v_mov_b32_e32 v7, 0 84; GFX10-NEXT: v_mov_b32_e32 v5, v0 85; GFX10-NEXT: v_mov_b32_e32 v6, v1 86; GFX10-NEXT: s_mov_b32 s0, s2 87; GFX10-NEXT: s_mov_b32 s1, s3 88; GFX10-NEXT: v_mov_b32_e32 v8, v7 89; GFX10-NEXT: v_mov_b32_e32 v9, v7 90; GFX10-NEXT: v_mov_b32_e32 v10, v7 91; GFX10-NEXT: v_mov_b32_e32 v11, v7 92; GFX10-NEXT: s_mov_b32 s2, s4 93; GFX10-NEXT: s_mov_b32 s3, s5 94; GFX10-NEXT: s_mov_b32 s4, s6 95; GFX10-NEXT: s_mov_b32 s5, s7 96; GFX10-NEXT: s_mov_b32 s6, s8 97; GFX10-NEXT: s_mov_b32 s7, s9 98; GFX10-NEXT: v_mov_b32_e32 v0, v7 99; GFX10-NEXT: v_mov_b32_e32 v1, v8 100; GFX10-NEXT: v_mov_b32_e32 v2, v9 101; GFX10-NEXT: v_mov_b32_e32 v3, v10 102; GFX10-NEXT: v_mov_b32_e32 v4, v11 103; GFX10-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm tfe 104; GFX10-NEXT: s_waitcnt vmcnt(0) 105; GFX10-NEXT: global_store_dword v7, v4, s[10:11] 106; GFX10-NEXT: ; return to shader part epilog 107; 108; GFX11-LABEL: load_2d_v4f32_xyzw_tfe: 109; GFX11: ; %bb.0: 110; GFX11-NEXT: v_mov_b32_e32 v7, 0 111; GFX11-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v6, v1 112; GFX11-NEXT: s_mov_b32 s0, s2 113; GFX11-NEXT: s_mov_b32 s1, s3 114; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 115; GFX11-NEXT: v_mov_b32_e32 v8, v7 116; GFX11-NEXT: v_mov_b32_e32 v9, v7 117; GFX11-NEXT: v_mov_b32_e32 v10, v7 118; GFX11-NEXT: v_mov_b32_e32 v11, v7 119; GFX11-NEXT: s_mov_b32 s2, s4 120; GFX11-NEXT: s_mov_b32 s3, s5 121; GFX11-NEXT: s_mov_b32 s4, s6 122; GFX11-NEXT: s_mov_b32 s5, s7 123; GFX11-NEXT: s_mov_b32 s6, s8 124; GFX11-NEXT: s_mov_b32 s7, s9 125; GFX11-NEXT: v_dual_mov_b32 v0, v7 :: v_dual_mov_b32 v1, v8 126; GFX11-NEXT: v_dual_mov_b32 v2, v9 :: v_dual_mov_b32 v3, v10 127; GFX11-NEXT: v_mov_b32_e32 v4, v11 128; GFX11-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm tfe 129; GFX11-NEXT: s_waitcnt vmcnt(0) 130; GFX11-NEXT: global_store_b32 v7, v4, s[10:11] 131; GFX11-NEXT: ; return to shader part epilog 132; 133; GFX12-LABEL: load_2d_v4f32_xyzw_tfe: 134; GFX12: ; %bb.0: 135; GFX12-NEXT: v_mov_b32_e32 v7, 0 136; GFX12-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v6, v1 137; GFX12-NEXT: s_mov_b32 s0, s2 138; GFX12-NEXT: s_mov_b32 s1, s3 139; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) 140; GFX12-NEXT: v_dual_mov_b32 v8, v7 :: v_dual_mov_b32 v9, v7 141; GFX12-NEXT: v_dual_mov_b32 v10, v7 :: v_dual_mov_b32 v11, v7 142; GFX12-NEXT: s_mov_b32 s2, s4 143; GFX12-NEXT: s_mov_b32 s3, s5 144; GFX12-NEXT: s_mov_b32 s4, s6 145; GFX12-NEXT: s_mov_b32 s5, s7 146; GFX12-NEXT: s_mov_b32 s6, s8 147; GFX12-NEXT: s_mov_b32 s7, s9 148; GFX12-NEXT: v_dual_mov_b32 v0, v7 :: v_dual_mov_b32 v1, v8 149; GFX12-NEXT: v_dual_mov_b32 v2, v9 :: v_dual_mov_b32 v3, v10 150; GFX12-NEXT: v_mov_b32_e32 v4, v11 151; GFX12-NEXT: image_load v[0:4], [v5, v6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D tfe 152; GFX12-NEXT: s_wait_loadcnt 0x0 153; GFX12-NEXT: global_store_b32 v7, v4, s[10:11] 154; GFX12-NEXT: ; return to shader part epilog 155 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 1, i32 0) 156 %v.vec = extractvalue { <4 x float>, i32 } %v, 0 157 %v.err = extractvalue { <4 x float>, i32 } %v, 1 158 store i32 %v.err, ptr addrspace(1) %out, align 4 159 ret <4 x float> %v.vec 160} 161 162define amdgpu_ps <4 x float> @load_2d_v4f32_xyzw_tfe_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t) { 163; GFX6-LABEL: load_2d_v4f32_xyzw_tfe_lwe: 164; GFX6: ; %bb.0: 165; GFX6-NEXT: v_mov_b32_e32 v5, v0 166; GFX6-NEXT: v_mov_b32_e32 v0, 0 167; GFX6-NEXT: s_mov_b32 s0, s2 168; GFX6-NEXT: s_mov_b32 s1, s3 169; GFX6-NEXT: s_mov_b32 s2, s4 170; GFX6-NEXT: s_mov_b32 s3, s5 171; GFX6-NEXT: s_mov_b32 s4, s6 172; GFX6-NEXT: s_mov_b32 s5, s7 173; GFX6-NEXT: s_mov_b32 s6, s8 174; GFX6-NEXT: s_mov_b32 s7, s9 175; GFX6-NEXT: v_mov_b32_e32 v6, v1 176; GFX6-NEXT: v_mov_b32_e32 v1, v0 177; GFX6-NEXT: v_mov_b32_e32 v2, v0 178; GFX6-NEXT: v_mov_b32_e32 v3, v0 179; GFX6-NEXT: v_mov_b32_e32 v4, v0 180; GFX6-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf unorm tfe lwe 181; GFX6-NEXT: s_mov_b32 s8, s10 182; GFX6-NEXT: s_mov_b32 s9, s11 183; GFX6-NEXT: s_mov_b32 s10, -1 184; GFX6-NEXT: s_mov_b32 s11, 0xf000 185; GFX6-NEXT: s_waitcnt vmcnt(0) 186; GFX6-NEXT: buffer_store_dword v4, off, s[8:11], 0 187; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 188; GFX6-NEXT: ; return to shader part epilog 189; 190; GFX10-LABEL: load_2d_v4f32_xyzw_tfe_lwe: 191; GFX10: ; %bb.0: 192; GFX10-NEXT: v_mov_b32_e32 v7, 0 193; GFX10-NEXT: v_mov_b32_e32 v5, v0 194; GFX10-NEXT: v_mov_b32_e32 v6, v1 195; GFX10-NEXT: s_mov_b32 s0, s2 196; GFX10-NEXT: s_mov_b32 s1, s3 197; GFX10-NEXT: v_mov_b32_e32 v8, v7 198; GFX10-NEXT: v_mov_b32_e32 v9, v7 199; GFX10-NEXT: v_mov_b32_e32 v10, v7 200; GFX10-NEXT: v_mov_b32_e32 v11, v7 201; GFX10-NEXT: s_mov_b32 s2, s4 202; GFX10-NEXT: s_mov_b32 s3, s5 203; GFX10-NEXT: s_mov_b32 s4, s6 204; GFX10-NEXT: s_mov_b32 s5, s7 205; GFX10-NEXT: s_mov_b32 s6, s8 206; GFX10-NEXT: s_mov_b32 s7, s9 207; GFX10-NEXT: v_mov_b32_e32 v0, v7 208; GFX10-NEXT: v_mov_b32_e32 v1, v8 209; GFX10-NEXT: v_mov_b32_e32 v2, v9 210; GFX10-NEXT: v_mov_b32_e32 v3, v10 211; GFX10-NEXT: v_mov_b32_e32 v4, v11 212; GFX10-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm tfe lwe 213; GFX10-NEXT: s_waitcnt vmcnt(0) 214; GFX10-NEXT: global_store_dword v7, v4, s[10:11] 215; GFX10-NEXT: ; return to shader part epilog 216; 217; GFX11-LABEL: load_2d_v4f32_xyzw_tfe_lwe: 218; GFX11: ; %bb.0: 219; GFX11-NEXT: v_mov_b32_e32 v7, 0 220; GFX11-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v6, v1 221; GFX11-NEXT: s_mov_b32 s0, s2 222; GFX11-NEXT: s_mov_b32 s1, s3 223; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 224; GFX11-NEXT: v_mov_b32_e32 v8, v7 225; GFX11-NEXT: v_mov_b32_e32 v9, v7 226; GFX11-NEXT: v_mov_b32_e32 v10, v7 227; GFX11-NEXT: v_mov_b32_e32 v11, v7 228; GFX11-NEXT: s_mov_b32 s2, s4 229; GFX11-NEXT: s_mov_b32 s3, s5 230; GFX11-NEXT: s_mov_b32 s4, s6 231; GFX11-NEXT: s_mov_b32 s5, s7 232; GFX11-NEXT: s_mov_b32 s6, s8 233; GFX11-NEXT: s_mov_b32 s7, s9 234; GFX11-NEXT: v_dual_mov_b32 v0, v7 :: v_dual_mov_b32 v1, v8 235; GFX11-NEXT: v_dual_mov_b32 v2, v9 :: v_dual_mov_b32 v3, v10 236; GFX11-NEXT: v_mov_b32_e32 v4, v11 237; GFX11-NEXT: image_load v[0:4], v[5:6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D unorm tfe lwe 238; GFX11-NEXT: s_waitcnt vmcnt(0) 239; GFX11-NEXT: global_store_b32 v7, v4, s[10:11] 240; GFX11-NEXT: ; return to shader part epilog 241; 242; GFX12-LABEL: load_2d_v4f32_xyzw_tfe_lwe: 243; GFX12: ; %bb.0: 244; GFX12-NEXT: v_mov_b32_e32 v7, 0 245; GFX12-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v6, v1 246; GFX12-NEXT: s_mov_b32 s0, s2 247; GFX12-NEXT: s_mov_b32 s1, s3 248; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) 249; GFX12-NEXT: v_dual_mov_b32 v8, v7 :: v_dual_mov_b32 v9, v7 250; GFX12-NEXT: v_dual_mov_b32 v10, v7 :: v_dual_mov_b32 v11, v7 251; GFX12-NEXT: s_mov_b32 s2, s4 252; GFX12-NEXT: s_mov_b32 s3, s5 253; GFX12-NEXT: s_mov_b32 s4, s6 254; GFX12-NEXT: s_mov_b32 s5, s7 255; GFX12-NEXT: s_mov_b32 s6, s8 256; GFX12-NEXT: s_mov_b32 s7, s9 257; GFX12-NEXT: v_dual_mov_b32 v0, v7 :: v_dual_mov_b32 v1, v8 258; GFX12-NEXT: v_dual_mov_b32 v2, v9 :: v_dual_mov_b32 v3, v10 259; GFX12-NEXT: v_mov_b32_e32 v4, v11 260; GFX12-NEXT: image_load v[0:4], [v5, v6], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D tfe 261; GFX12-NEXT: s_wait_loadcnt 0x0 262; GFX12-NEXT: global_store_b32 v7, v4, s[10:11] 263; GFX12-NEXT: ; return to shader part epilog 264 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 3, i32 0) 265 %v.vec = extractvalue { <4 x float>, i32 } %v, 0 266 %v.err = extractvalue { <4 x float>, i32 } %v, 1 267 store i32 %v.err, ptr addrspace(1) %out, align 4 268 ret <4 x float> %v.vec 269} 270 271declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 272declare { <4 x float>, i32 } @llvm.amdgcn.image.load.2d.sl_v4f32i32s.i32(i32 immarg, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 273 274attributes #0 = { nounwind readonly } 275