1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=GFX6 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s 4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s 5; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s 6 7define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %fragid) { 8; GFX6-LABEL: load_2darraymsaa_v4f32_xyzw: 9; GFX6: ; %bb.0: 10; GFX6-NEXT: s_mov_b32 s0, s2 11; GFX6-NEXT: s_mov_b32 s1, s3 12; GFX6-NEXT: s_mov_b32 s2, s4 13; GFX6-NEXT: s_mov_b32 s3, s5 14; GFX6-NEXT: s_mov_b32 s4, s6 15; GFX6-NEXT: s_mov_b32 s5, s7 16; GFX6-NEXT: s_mov_b32 s6, s8 17; GFX6-NEXT: s_mov_b32 s7, s9 18; GFX6-NEXT: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm da 19; GFX6-NEXT: s_waitcnt vmcnt(0) 20; GFX6-NEXT: ; return to shader part epilog 21; 22; GFX10PLUS-LABEL: load_2darraymsaa_v4f32_xyzw: 23; GFX10PLUS: ; %bb.0: 24; GFX10PLUS-NEXT: s_mov_b32 s0, s2 25; GFX10PLUS-NEXT: s_mov_b32 s1, s3 26; GFX10PLUS-NEXT: s_mov_b32 s2, s4 27; GFX10PLUS-NEXT: s_mov_b32 s3, s5 28; GFX10PLUS-NEXT: s_mov_b32 s4, s6 29; GFX10PLUS-NEXT: s_mov_b32 s5, s7 30; GFX10PLUS-NEXT: s_mov_b32 s6, s8 31; GFX10PLUS-NEXT: s_mov_b32 s7, s9 32; GFX10PLUS-NEXT: image_load v[0:3], v[0:3], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm 33; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 34; GFX10PLUS-NEXT: ; return to shader part epilog 35; 36; GFX12-LABEL: load_2darraymsaa_v4f32_xyzw: 37; GFX12: ; %bb.0: 38; GFX12-NEXT: s_mov_b32 s0, s2 39; GFX12-NEXT: s_mov_b32 s1, s3 40; GFX12-NEXT: s_mov_b32 s2, s4 41; GFX12-NEXT: s_mov_b32 s3, s5 42; GFX12-NEXT: s_mov_b32 s4, s6 43; GFX12-NEXT: s_mov_b32 s5, s7 44; GFX12-NEXT: s_mov_b32 s6, s8 45; GFX12-NEXT: s_mov_b32 s7, s9 46; GFX12-NEXT: image_load v[0:3], [v0, v1, v2, v3], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY 47; GFX12-NEXT: s_wait_loadcnt 0x0 48; GFX12-NEXT: ; return to shader part epilog 49 %v = call <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0) 50 ret <4 x float> %v 51} 52 53define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) { 54; GFX6-LABEL: load_2darraymsaa_v4f32_xyzw_tfe: 55; GFX6: ; %bb.0: 56; GFX6-NEXT: v_mov_b32_e32 v5, v0 57; GFX6-NEXT: v_mov_b32_e32 v0, 0 58; GFX6-NEXT: s_mov_b32 s0, s2 59; GFX6-NEXT: s_mov_b32 s1, s3 60; GFX6-NEXT: s_mov_b32 s2, s4 61; GFX6-NEXT: s_mov_b32 s3, s5 62; GFX6-NEXT: s_mov_b32 s4, s6 63; GFX6-NEXT: s_mov_b32 s5, s7 64; GFX6-NEXT: s_mov_b32 s6, s8 65; GFX6-NEXT: s_mov_b32 s7, s9 66; GFX6-NEXT: v_mov_b32_e32 v6, v1 67; GFX6-NEXT: v_mov_b32_e32 v7, v2 68; GFX6-NEXT: v_mov_b32_e32 v8, v3 69; GFX6-NEXT: v_mov_b32_e32 v1, v0 70; GFX6-NEXT: v_mov_b32_e32 v2, v0 71; GFX6-NEXT: v_mov_b32_e32 v3, v0 72; GFX6-NEXT: v_mov_b32_e32 v4, v0 73; GFX6-NEXT: image_load v[0:4], v[5:8], s[0:7] dmask:0xf unorm tfe da 74; GFX6-NEXT: s_mov_b32 s8, s10 75; GFX6-NEXT: s_mov_b32 s9, s11 76; GFX6-NEXT: s_mov_b32 s10, -1 77; GFX6-NEXT: s_mov_b32 s11, 0xf000 78; GFX6-NEXT: s_waitcnt vmcnt(0) 79; GFX6-NEXT: buffer_store_dword v4, off, s[8:11], 0 80; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 81; GFX6-NEXT: ; return to shader part epilog 82; 83; GFX10-LABEL: load_2darraymsaa_v4f32_xyzw_tfe: 84; GFX10: ; %bb.0: 85; GFX10-NEXT: v_mov_b32_e32 v9, 0 86; GFX10-NEXT: v_mov_b32_e32 v5, v0 87; GFX10-NEXT: v_mov_b32_e32 v6, v1 88; GFX10-NEXT: v_mov_b32_e32 v7, v2 89; GFX10-NEXT: v_mov_b32_e32 v8, v3 90; GFX10-NEXT: v_mov_b32_e32 v10, v9 91; GFX10-NEXT: v_mov_b32_e32 v11, v9 92; GFX10-NEXT: v_mov_b32_e32 v12, v9 93; GFX10-NEXT: v_mov_b32_e32 v13, v9 94; GFX10-NEXT: s_mov_b32 s0, s2 95; GFX10-NEXT: s_mov_b32 s1, s3 96; GFX10-NEXT: s_mov_b32 s2, s4 97; GFX10-NEXT: s_mov_b32 s3, s5 98; GFX10-NEXT: s_mov_b32 s4, s6 99; GFX10-NEXT: s_mov_b32 s5, s7 100; GFX10-NEXT: s_mov_b32 s6, s8 101; GFX10-NEXT: s_mov_b32 s7, s9 102; GFX10-NEXT: v_mov_b32_e32 v0, v9 103; GFX10-NEXT: v_mov_b32_e32 v1, v10 104; GFX10-NEXT: v_mov_b32_e32 v2, v11 105; GFX10-NEXT: v_mov_b32_e32 v3, v12 106; GFX10-NEXT: v_mov_b32_e32 v4, v13 107; GFX10-NEXT: image_load v[0:4], v[5:8], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe 108; GFX10-NEXT: s_waitcnt vmcnt(0) 109; GFX10-NEXT: global_store_dword v9, v4, s[10:11] 110; GFX10-NEXT: ; return to shader part epilog 111; 112; GFX11-LABEL: load_2darraymsaa_v4f32_xyzw_tfe: 113; GFX11: ; %bb.0: 114; GFX11-NEXT: v_mov_b32_e32 v9, 0 115; GFX11-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v6, v1 116; GFX11-NEXT: v_dual_mov_b32 v7, v2 :: v_dual_mov_b32 v8, v3 117; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) 118; GFX11-NEXT: v_mov_b32_e32 v10, v9 119; GFX11-NEXT: v_mov_b32_e32 v11, v9 120; GFX11-NEXT: v_mov_b32_e32 v12, v9 121; GFX11-NEXT: v_mov_b32_e32 v13, v9 122; GFX11-NEXT: s_mov_b32 s0, s2 123; GFX11-NEXT: s_mov_b32 s1, s3 124; GFX11-NEXT: s_mov_b32 s2, s4 125; GFX11-NEXT: s_mov_b32 s3, s5 126; GFX11-NEXT: s_mov_b32 s4, s6 127; GFX11-NEXT: s_mov_b32 s5, s7 128; GFX11-NEXT: s_mov_b32 s6, s8 129; GFX11-NEXT: s_mov_b32 s7, s9 130; GFX11-NEXT: v_dual_mov_b32 v0, v9 :: v_dual_mov_b32 v1, v10 131; GFX11-NEXT: v_dual_mov_b32 v2, v11 :: v_dual_mov_b32 v3, v12 132; GFX11-NEXT: v_mov_b32_e32 v4, v13 133; GFX11-NEXT: image_load v[0:4], v[5:8], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe 134; GFX11-NEXT: s_waitcnt vmcnt(0) 135; GFX11-NEXT: global_store_b32 v9, v4, s[10:11] 136; GFX11-NEXT: ; return to shader part epilog 137; 138; GFX12-LABEL: load_2darraymsaa_v4f32_xyzw_tfe: 139; GFX12: ; %bb.0: 140; GFX12-NEXT: v_mov_b32_e32 v9, 0 141; GFX12-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v6, v1 142; GFX12-NEXT: v_dual_mov_b32 v7, v2 :: v_dual_mov_b32 v8, v3 143; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) 144; GFX12-NEXT: v_dual_mov_b32 v10, v9 :: v_dual_mov_b32 v11, v9 145; GFX12-NEXT: v_dual_mov_b32 v12, v9 :: v_dual_mov_b32 v13, v9 146; GFX12-NEXT: s_mov_b32 s0, s2 147; GFX12-NEXT: s_mov_b32 s1, s3 148; GFX12-NEXT: s_mov_b32 s2, s4 149; GFX12-NEXT: s_mov_b32 s3, s5 150; GFX12-NEXT: s_mov_b32 s4, s6 151; GFX12-NEXT: s_mov_b32 s5, s7 152; GFX12-NEXT: s_mov_b32 s6, s8 153; GFX12-NEXT: s_mov_b32 s7, s9 154; GFX12-NEXT: v_dual_mov_b32 v0, v9 :: v_dual_mov_b32 v1, v10 155; GFX12-NEXT: v_dual_mov_b32 v2, v11 :: v_dual_mov_b32 v3, v12 156; GFX12-NEXT: v_mov_b32_e32 v4, v13 157; GFX12-NEXT: image_load v[0:4], [v5, v6, v7, v8], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY tfe 158; GFX12-NEXT: s_wait_loadcnt 0x0 159; GFX12-NEXT: global_store_b32 v9, v4, s[10:11] 160; GFX12-NEXT: ; return to shader part epilog 161 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 1, i32 0) 162 %v.vec = extractvalue { <4 x float>, i32 } %v, 0 163 %v.err = extractvalue { <4 x float>, i32 } %v, 1 164 store i32 %v.err, ptr addrspace(1) %out, align 4 165 ret <4 x float> %v.vec 166} 167 168define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i32 %s, i32 %t, i32 %slice, i32 %fragid) { 169; GFX6-LABEL: load_2darraymsaa_v4f32_xyzw_tfe_lwe: 170; GFX6: ; %bb.0: 171; GFX6-NEXT: v_mov_b32_e32 v5, v0 172; GFX6-NEXT: v_mov_b32_e32 v0, 0 173; GFX6-NEXT: s_mov_b32 s0, s2 174; GFX6-NEXT: s_mov_b32 s1, s3 175; GFX6-NEXT: s_mov_b32 s2, s4 176; GFX6-NEXT: s_mov_b32 s3, s5 177; GFX6-NEXT: s_mov_b32 s4, s6 178; GFX6-NEXT: s_mov_b32 s5, s7 179; GFX6-NEXT: s_mov_b32 s6, s8 180; GFX6-NEXT: s_mov_b32 s7, s9 181; GFX6-NEXT: v_mov_b32_e32 v6, v1 182; GFX6-NEXT: v_mov_b32_e32 v7, v2 183; GFX6-NEXT: v_mov_b32_e32 v8, v3 184; GFX6-NEXT: v_mov_b32_e32 v1, v0 185; GFX6-NEXT: v_mov_b32_e32 v2, v0 186; GFX6-NEXT: v_mov_b32_e32 v3, v0 187; GFX6-NEXT: v_mov_b32_e32 v4, v0 188; GFX6-NEXT: image_load v[0:4], v[5:8], s[0:7] dmask:0xf unorm tfe lwe da 189; GFX6-NEXT: s_mov_b32 s8, s10 190; GFX6-NEXT: s_mov_b32 s9, s11 191; GFX6-NEXT: s_mov_b32 s10, -1 192; GFX6-NEXT: s_mov_b32 s11, 0xf000 193; GFX6-NEXT: s_waitcnt vmcnt(0) 194; GFX6-NEXT: buffer_store_dword v4, off, s[8:11], 0 195; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 196; GFX6-NEXT: ; return to shader part epilog 197; 198; GFX10-LABEL: load_2darraymsaa_v4f32_xyzw_tfe_lwe: 199; GFX10: ; %bb.0: 200; GFX10-NEXT: v_mov_b32_e32 v9, 0 201; GFX10-NEXT: v_mov_b32_e32 v5, v0 202; GFX10-NEXT: v_mov_b32_e32 v6, v1 203; GFX10-NEXT: v_mov_b32_e32 v7, v2 204; GFX10-NEXT: v_mov_b32_e32 v8, v3 205; GFX10-NEXT: v_mov_b32_e32 v10, v9 206; GFX10-NEXT: v_mov_b32_e32 v11, v9 207; GFX10-NEXT: v_mov_b32_e32 v12, v9 208; GFX10-NEXT: v_mov_b32_e32 v13, v9 209; GFX10-NEXT: s_mov_b32 s0, s2 210; GFX10-NEXT: s_mov_b32 s1, s3 211; GFX10-NEXT: s_mov_b32 s2, s4 212; GFX10-NEXT: s_mov_b32 s3, s5 213; GFX10-NEXT: s_mov_b32 s4, s6 214; GFX10-NEXT: s_mov_b32 s5, s7 215; GFX10-NEXT: s_mov_b32 s6, s8 216; GFX10-NEXT: s_mov_b32 s7, s9 217; GFX10-NEXT: v_mov_b32_e32 v0, v9 218; GFX10-NEXT: v_mov_b32_e32 v1, v10 219; GFX10-NEXT: v_mov_b32_e32 v2, v11 220; GFX10-NEXT: v_mov_b32_e32 v3, v12 221; GFX10-NEXT: v_mov_b32_e32 v4, v13 222; GFX10-NEXT: image_load v[0:4], v[5:8], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe lwe 223; GFX10-NEXT: s_waitcnt vmcnt(0) 224; GFX10-NEXT: global_store_dword v9, v4, s[10:11] 225; GFX10-NEXT: ; return to shader part epilog 226; 227; GFX11-LABEL: load_2darraymsaa_v4f32_xyzw_tfe_lwe: 228; GFX11: ; %bb.0: 229; GFX11-NEXT: v_mov_b32_e32 v9, 0 230; GFX11-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v6, v1 231; GFX11-NEXT: v_dual_mov_b32 v7, v2 :: v_dual_mov_b32 v8, v3 232; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) 233; GFX11-NEXT: v_mov_b32_e32 v10, v9 234; GFX11-NEXT: v_mov_b32_e32 v11, v9 235; GFX11-NEXT: v_mov_b32_e32 v12, v9 236; GFX11-NEXT: v_mov_b32_e32 v13, v9 237; GFX11-NEXT: s_mov_b32 s0, s2 238; GFX11-NEXT: s_mov_b32 s1, s3 239; GFX11-NEXT: s_mov_b32 s2, s4 240; GFX11-NEXT: s_mov_b32 s3, s5 241; GFX11-NEXT: s_mov_b32 s4, s6 242; GFX11-NEXT: s_mov_b32 s5, s7 243; GFX11-NEXT: s_mov_b32 s6, s8 244; GFX11-NEXT: s_mov_b32 s7, s9 245; GFX11-NEXT: v_dual_mov_b32 v0, v9 :: v_dual_mov_b32 v1, v10 246; GFX11-NEXT: v_dual_mov_b32 v2, v11 :: v_dual_mov_b32 v3, v12 247; GFX11-NEXT: v_mov_b32_e32 v4, v13 248; GFX11-NEXT: image_load v[0:4], v[5:8], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm tfe lwe 249; GFX11-NEXT: s_waitcnt vmcnt(0) 250; GFX11-NEXT: global_store_b32 v9, v4, s[10:11] 251; GFX11-NEXT: ; return to shader part epilog 252; 253; GFX12-LABEL: load_2darraymsaa_v4f32_xyzw_tfe_lwe: 254; GFX12: ; %bb.0: 255; GFX12-NEXT: v_mov_b32_e32 v9, 0 256; GFX12-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v6, v1 257; GFX12-NEXT: v_dual_mov_b32 v7, v2 :: v_dual_mov_b32 v8, v3 258; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) 259; GFX12-NEXT: v_dual_mov_b32 v10, v9 :: v_dual_mov_b32 v11, v9 260; GFX12-NEXT: v_dual_mov_b32 v12, v9 :: v_dual_mov_b32 v13, v9 261; GFX12-NEXT: s_mov_b32 s0, s2 262; GFX12-NEXT: s_mov_b32 s1, s3 263; GFX12-NEXT: s_mov_b32 s2, s4 264; GFX12-NEXT: s_mov_b32 s3, s5 265; GFX12-NEXT: s_mov_b32 s4, s6 266; GFX12-NEXT: s_mov_b32 s5, s7 267; GFX12-NEXT: s_mov_b32 s6, s8 268; GFX12-NEXT: s_mov_b32 s7, s9 269; GFX12-NEXT: v_dual_mov_b32 v0, v9 :: v_dual_mov_b32 v1, v10 270; GFX12-NEXT: v_dual_mov_b32 v2, v11 :: v_dual_mov_b32 v3, v12 271; GFX12-NEXT: v_mov_b32_e32 v4, v13 272; GFX12-NEXT: image_load v[0:4], [v5, v6, v7, v8], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY tfe 273; GFX12-NEXT: s_wait_loadcnt 0x0 274; GFX12-NEXT: global_store_b32 v9, v4, s[10:11] 275; GFX12-NEXT: ; return to shader part epilog 276 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 3, i32 0) 277 %v.vec = extractvalue { <4 x float>, i32 } %v, 0 278 %v.err = extractvalue { <4 x float>, i32 } %v, 1 279 store i32 %v.err, ptr addrspace(1) %out, align 4 280 ret <4 x float> %v.vec 281} 282 283declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 284declare { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i32(i32 immarg, i32, i32, i32, i32, <8 x i32>, i32 immarg, i32 immarg) #0 285 286attributes #0 = { nounwind readonly } 287