1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s 4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s 5; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s 6 7define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw(<8 x i32> inreg %rsrc, i16 %s, i16 %t, i16 %slice, i16 %fragid) { 8; GFX9-LABEL: load_2darraymsaa_v4f32_xyzw: 9; GFX9: ; %bb.0: 10; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 11; GFX9-NEXT: v_lshl_or_b32 v0, v1, 16, v0 12; GFX9-NEXT: v_and_b32_e32 v1, 0xffff, v2 13; GFX9-NEXT: s_mov_b32 s0, s2 14; GFX9-NEXT: s_mov_b32 s1, s3 15; GFX9-NEXT: s_mov_b32 s2, s4 16; GFX9-NEXT: s_mov_b32 s3, s5 17; GFX9-NEXT: s_mov_b32 s4, s6 18; GFX9-NEXT: s_mov_b32 s5, s7 19; GFX9-NEXT: s_mov_b32 s6, s8 20; GFX9-NEXT: s_mov_b32 s7, s9 21; GFX9-NEXT: v_lshl_or_b32 v1, v3, 16, v1 22; GFX9-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm a16 da 23; GFX9-NEXT: s_waitcnt vmcnt(0) 24; GFX9-NEXT: ; return to shader part epilog 25; 26; GFX10PLUS-LABEL: load_2darraymsaa_v4f32_xyzw: 27; GFX10PLUS: ; %bb.0: 28; GFX10PLUS-NEXT: v_and_b32_e32 v0, 0xffff, v0 29; GFX10PLUS-NEXT: v_and_b32_e32 v2, 0xffff, v2 30; GFX10PLUS-NEXT: s_mov_b32 s0, s2 31; GFX10PLUS-NEXT: s_mov_b32 s1, s3 32; GFX10PLUS-NEXT: s_mov_b32 s2, s4 33; GFX10PLUS-NEXT: v_lshl_or_b32 v0, v1, 16, v0 34; GFX10PLUS-NEXT: v_lshl_or_b32 v1, v3, 16, v2 35; GFX10PLUS-NEXT: s_mov_b32 s3, s5 36; GFX10PLUS-NEXT: s_mov_b32 s4, s6 37; GFX10PLUS-NEXT: s_mov_b32 s5, s7 38; GFX10PLUS-NEXT: s_mov_b32 s6, s8 39; GFX10PLUS-NEXT: s_mov_b32 s7, s9 40; GFX10PLUS-NEXT: image_load v[0:3], v[0:1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16 41; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 42; GFX10PLUS-NEXT: ; return to shader part epilog 43; 44; GFX12-LABEL: load_2darraymsaa_v4f32_xyzw: 45; GFX12: ; %bb.0: 46; GFX12-NEXT: v_and_b32_e32 v0, 0xffff, v0 47; GFX12-NEXT: v_and_b32_e32 v2, 0xffff, v2 48; GFX12-NEXT: s_mov_b32 s0, s2 49; GFX12-NEXT: s_mov_b32 s1, s3 50; GFX12-NEXT: s_mov_b32 s2, s4 51; GFX12-NEXT: v_lshl_or_b32 v0, v1, 16, v0 52; GFX12-NEXT: v_lshl_or_b32 v1, v3, 16, v2 53; GFX12-NEXT: s_mov_b32 s3, s5 54; GFX12-NEXT: s_mov_b32 s4, s6 55; GFX12-NEXT: s_mov_b32 s5, s7 56; GFX12-NEXT: s_mov_b32 s6, s8 57; GFX12-NEXT: s_mov_b32 s7, s9 58; GFX12-NEXT: image_load v[0:3], [v0, v1], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY a16 59; GFX12-NEXT: s_wait_loadcnt 0x0 60; GFX12-NEXT: ; return to shader part epilog 61 %v = call <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 0, i32 0) 62 ret <4 x float> %v 63} 64 65define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i16 %s, i16 %t, i16 %slice, i16 %fragid) { 66; GFX9-LABEL: load_2darraymsaa_v4f32_xyzw_tfe: 67; GFX9: ; %bb.0: 68; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 69; GFX9-NEXT: v_lshl_or_b32 v10, v1, 16, v0 70; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v2 71; GFX9-NEXT: v_mov_b32_e32 v5, 0 72; GFX9-NEXT: v_lshl_or_b32 v11, v3, 16, v0 73; GFX9-NEXT: v_mov_b32_e32 v6, v5 74; GFX9-NEXT: v_mov_b32_e32 v7, v5 75; GFX9-NEXT: v_mov_b32_e32 v8, v5 76; GFX9-NEXT: v_mov_b32_e32 v9, v5 77; GFX9-NEXT: v_mov_b32_e32 v0, v5 78; GFX9-NEXT: s_mov_b32 s0, s2 79; GFX9-NEXT: s_mov_b32 s1, s3 80; GFX9-NEXT: s_mov_b32 s2, s4 81; GFX9-NEXT: s_mov_b32 s3, s5 82; GFX9-NEXT: s_mov_b32 s4, s6 83; GFX9-NEXT: s_mov_b32 s5, s7 84; GFX9-NEXT: s_mov_b32 s6, s8 85; GFX9-NEXT: s_mov_b32 s7, s9 86; GFX9-NEXT: v_mov_b32_e32 v1, v6 87; GFX9-NEXT: v_mov_b32_e32 v2, v7 88; GFX9-NEXT: v_mov_b32_e32 v3, v8 89; GFX9-NEXT: v_mov_b32_e32 v4, v9 90; GFX9-NEXT: image_load v[0:4], v[10:11], s[0:7] dmask:0xf unorm a16 tfe da 91; GFX9-NEXT: s_waitcnt vmcnt(0) 92; GFX9-NEXT: global_store_dword v5, v4, s[10:11] 93; GFX9-NEXT: s_waitcnt vmcnt(0) 94; GFX9-NEXT: ; return to shader part epilog 95; 96; GFX10-LABEL: load_2darraymsaa_v4f32_xyzw_tfe: 97; GFX10: ; %bb.0: 98; GFX10-NEXT: v_mov_b32_e32 v5, 0 99; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 100; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2 101; GFX10-NEXT: s_mov_b32 s0, s2 102; GFX10-NEXT: s_mov_b32 s1, s3 103; GFX10-NEXT: v_mov_b32_e32 v6, v5 104; GFX10-NEXT: v_mov_b32_e32 v7, v5 105; GFX10-NEXT: v_mov_b32_e32 v8, v5 106; GFX10-NEXT: v_mov_b32_e32 v9, v5 107; GFX10-NEXT: v_lshl_or_b32 v10, v1, 16, v0 108; GFX10-NEXT: v_lshl_or_b32 v11, v3, 16, v2 109; GFX10-NEXT: s_mov_b32 s2, s4 110; GFX10-NEXT: s_mov_b32 s3, s5 111; GFX10-NEXT: s_mov_b32 s4, s6 112; GFX10-NEXT: s_mov_b32 s5, s7 113; GFX10-NEXT: s_mov_b32 s6, s8 114; GFX10-NEXT: s_mov_b32 s7, s9 115; GFX10-NEXT: v_mov_b32_e32 v0, v5 116; GFX10-NEXT: v_mov_b32_e32 v1, v6 117; GFX10-NEXT: v_mov_b32_e32 v2, v7 118; GFX10-NEXT: v_mov_b32_e32 v3, v8 119; GFX10-NEXT: v_mov_b32_e32 v4, v9 120; GFX10-NEXT: image_load v[0:4], v[10:11], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16 tfe 121; GFX10-NEXT: s_waitcnt vmcnt(0) 122; GFX10-NEXT: global_store_dword v5, v4, s[10:11] 123; GFX10-NEXT: ; return to shader part epilog 124; 125; GFX11-LABEL: load_2darraymsaa_v4f32_xyzw_tfe: 126; GFX11: ; %bb.0: 127; GFX11-NEXT: v_mov_b32_e32 v5, 0 128; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 129; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 130; GFX11-NEXT: s_mov_b32 s0, s2 131; GFX11-NEXT: s_mov_b32 s1, s3 132; GFX11-NEXT: v_mov_b32_e32 v6, v5 133; GFX11-NEXT: v_mov_b32_e32 v7, v5 134; GFX11-NEXT: v_mov_b32_e32 v8, v5 135; GFX11-NEXT: v_mov_b32_e32 v9, v5 136; GFX11-NEXT: v_lshl_or_b32 v10, v1, 16, v0 137; GFX11-NEXT: v_lshl_or_b32 v11, v3, 16, v2 138; GFX11-NEXT: s_mov_b32 s2, s4 139; GFX11-NEXT: s_mov_b32 s3, s5 140; GFX11-NEXT: s_mov_b32 s4, s6 141; GFX11-NEXT: s_mov_b32 s5, s7 142; GFX11-NEXT: s_mov_b32 s6, s8 143; GFX11-NEXT: s_mov_b32 s7, s9 144; GFX11-NEXT: v_mov_b32_e32 v0, v5 145; GFX11-NEXT: v_mov_b32_e32 v1, v6 146; GFX11-NEXT: v_mov_b32_e32 v2, v7 147; GFX11-NEXT: v_mov_b32_e32 v3, v8 148; GFX11-NEXT: v_mov_b32_e32 v4, v9 149; GFX11-NEXT: image_load v[0:4], v[10:11], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16 tfe 150; GFX11-NEXT: s_waitcnt vmcnt(0) 151; GFX11-NEXT: global_store_b32 v5, v4, s[10:11] 152; GFX11-NEXT: ; return to shader part epilog 153; 154; GFX12-LABEL: load_2darraymsaa_v4f32_xyzw_tfe: 155; GFX12: ; %bb.0: 156; GFX12-NEXT: v_mov_b32_e32 v5, 0 157; GFX12-NEXT: v_and_b32_e32 v0, 0xffff, v0 158; GFX12-NEXT: v_and_b32_e32 v2, 0xffff, v2 159; GFX12-NEXT: s_mov_b32 s0, s2 160; GFX12-NEXT: s_mov_b32 s1, s3 161; GFX12-NEXT: v_mov_b32_e32 v6, v5 162; GFX12-NEXT: v_mov_b32_e32 v7, v5 163; GFX12-NEXT: v_mov_b32_e32 v8, v5 164; GFX12-NEXT: v_mov_b32_e32 v9, v5 165; GFX12-NEXT: v_lshl_or_b32 v10, v1, 16, v0 166; GFX12-NEXT: v_lshl_or_b32 v11, v3, 16, v2 167; GFX12-NEXT: s_mov_b32 s2, s4 168; GFX12-NEXT: s_mov_b32 s3, s5 169; GFX12-NEXT: s_mov_b32 s4, s6 170; GFX12-NEXT: s_mov_b32 s5, s7 171; GFX12-NEXT: s_mov_b32 s6, s8 172; GFX12-NEXT: s_mov_b32 s7, s9 173; GFX12-NEXT: v_mov_b32_e32 v0, v5 174; GFX12-NEXT: v_mov_b32_e32 v1, v6 175; GFX12-NEXT: v_mov_b32_e32 v2, v7 176; GFX12-NEXT: v_mov_b32_e32 v3, v8 177; GFX12-NEXT: v_mov_b32_e32 v4, v9 178; GFX12-NEXT: image_load v[0:4], [v10, v11], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY a16 tfe 179; GFX12-NEXT: s_wait_loadcnt 0x0 180; GFX12-NEXT: global_store_b32 v5, v4, s[10:11] 181; GFX12-NEXT: ; return to shader part epilog 182 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 1, i32 0) 183 %v.vec = extractvalue { <4 x float>, i32 } %v, 0 184 %v.err = extractvalue { <4 x float>, i32 } %v, 1 185 store i32 %v.err, ptr addrspace(1) %out, align 4 186 ret <4 x float> %v.vec 187} 188 189define amdgpu_ps <4 x float> @load_2darraymsaa_v4f32_xyzw_tfe_lwe(<8 x i32> inreg %rsrc, ptr addrspace(1) inreg %out, i16 %s, i16 %t, i16 %slice, i16 %fragid) { 190; GFX9-LABEL: load_2darraymsaa_v4f32_xyzw_tfe_lwe: 191; GFX9: ; %bb.0: 192; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v0 193; GFX9-NEXT: v_lshl_or_b32 v10, v1, 16, v0 194; GFX9-NEXT: v_and_b32_e32 v0, 0xffff, v2 195; GFX9-NEXT: v_mov_b32_e32 v5, 0 196; GFX9-NEXT: v_lshl_or_b32 v11, v3, 16, v0 197; GFX9-NEXT: v_mov_b32_e32 v6, v5 198; GFX9-NEXT: v_mov_b32_e32 v7, v5 199; GFX9-NEXT: v_mov_b32_e32 v8, v5 200; GFX9-NEXT: v_mov_b32_e32 v9, v5 201; GFX9-NEXT: v_mov_b32_e32 v0, v5 202; GFX9-NEXT: s_mov_b32 s0, s2 203; GFX9-NEXT: s_mov_b32 s1, s3 204; GFX9-NEXT: s_mov_b32 s2, s4 205; GFX9-NEXT: s_mov_b32 s3, s5 206; GFX9-NEXT: s_mov_b32 s4, s6 207; GFX9-NEXT: s_mov_b32 s5, s7 208; GFX9-NEXT: s_mov_b32 s6, s8 209; GFX9-NEXT: s_mov_b32 s7, s9 210; GFX9-NEXT: v_mov_b32_e32 v1, v6 211; GFX9-NEXT: v_mov_b32_e32 v2, v7 212; GFX9-NEXT: v_mov_b32_e32 v3, v8 213; GFX9-NEXT: v_mov_b32_e32 v4, v9 214; GFX9-NEXT: image_load v[0:4], v[10:11], s[0:7] dmask:0xf unorm a16 tfe lwe da 215; GFX9-NEXT: s_waitcnt vmcnt(0) 216; GFX9-NEXT: global_store_dword v5, v4, s[10:11] 217; GFX9-NEXT: s_waitcnt vmcnt(0) 218; GFX9-NEXT: ; return to shader part epilog 219; 220; GFX10-LABEL: load_2darraymsaa_v4f32_xyzw_tfe_lwe: 221; GFX10: ; %bb.0: 222; GFX10-NEXT: v_mov_b32_e32 v5, 0 223; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 224; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2 225; GFX10-NEXT: s_mov_b32 s0, s2 226; GFX10-NEXT: s_mov_b32 s1, s3 227; GFX10-NEXT: v_mov_b32_e32 v6, v5 228; GFX10-NEXT: v_mov_b32_e32 v7, v5 229; GFX10-NEXT: v_mov_b32_e32 v8, v5 230; GFX10-NEXT: v_mov_b32_e32 v9, v5 231; GFX10-NEXT: v_lshl_or_b32 v10, v1, 16, v0 232; GFX10-NEXT: v_lshl_or_b32 v11, v3, 16, v2 233; GFX10-NEXT: s_mov_b32 s2, s4 234; GFX10-NEXT: s_mov_b32 s3, s5 235; GFX10-NEXT: s_mov_b32 s4, s6 236; GFX10-NEXT: s_mov_b32 s5, s7 237; GFX10-NEXT: s_mov_b32 s6, s8 238; GFX10-NEXT: s_mov_b32 s7, s9 239; GFX10-NEXT: v_mov_b32_e32 v0, v5 240; GFX10-NEXT: v_mov_b32_e32 v1, v6 241; GFX10-NEXT: v_mov_b32_e32 v2, v7 242; GFX10-NEXT: v_mov_b32_e32 v3, v8 243; GFX10-NEXT: v_mov_b32_e32 v4, v9 244; GFX10-NEXT: image_load v[0:4], v[10:11], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16 tfe lwe 245; GFX10-NEXT: s_waitcnt vmcnt(0) 246; GFX10-NEXT: global_store_dword v5, v4, s[10:11] 247; GFX10-NEXT: ; return to shader part epilog 248; 249; GFX11-LABEL: load_2darraymsaa_v4f32_xyzw_tfe_lwe: 250; GFX11: ; %bb.0: 251; GFX11-NEXT: v_mov_b32_e32 v5, 0 252; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 253; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 254; GFX11-NEXT: s_mov_b32 s0, s2 255; GFX11-NEXT: s_mov_b32 s1, s3 256; GFX11-NEXT: v_mov_b32_e32 v6, v5 257; GFX11-NEXT: v_mov_b32_e32 v7, v5 258; GFX11-NEXT: v_mov_b32_e32 v8, v5 259; GFX11-NEXT: v_mov_b32_e32 v9, v5 260; GFX11-NEXT: v_lshl_or_b32 v10, v1, 16, v0 261; GFX11-NEXT: v_lshl_or_b32 v11, v3, 16, v2 262; GFX11-NEXT: s_mov_b32 s2, s4 263; GFX11-NEXT: s_mov_b32 s3, s5 264; GFX11-NEXT: s_mov_b32 s4, s6 265; GFX11-NEXT: s_mov_b32 s5, s7 266; GFX11-NEXT: s_mov_b32 s6, s8 267; GFX11-NEXT: s_mov_b32 s7, s9 268; GFX11-NEXT: v_mov_b32_e32 v0, v5 269; GFX11-NEXT: v_mov_b32_e32 v1, v6 270; GFX11-NEXT: v_mov_b32_e32 v2, v7 271; GFX11-NEXT: v_mov_b32_e32 v3, v8 272; GFX11-NEXT: v_mov_b32_e32 v4, v9 273; GFX11-NEXT: image_load v[0:4], v[10:11], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY unorm a16 tfe lwe 274; GFX11-NEXT: s_waitcnt vmcnt(0) 275; GFX11-NEXT: global_store_b32 v5, v4, s[10:11] 276; GFX11-NEXT: ; return to shader part epilog 277; 278; GFX12-LABEL: load_2darraymsaa_v4f32_xyzw_tfe_lwe: 279; GFX12: ; %bb.0: 280; GFX12-NEXT: v_mov_b32_e32 v5, 0 281; GFX12-NEXT: v_and_b32_e32 v0, 0xffff, v0 282; GFX12-NEXT: v_and_b32_e32 v2, 0xffff, v2 283; GFX12-NEXT: s_mov_b32 s0, s2 284; GFX12-NEXT: s_mov_b32 s1, s3 285; GFX12-NEXT: v_mov_b32_e32 v6, v5 286; GFX12-NEXT: v_mov_b32_e32 v7, v5 287; GFX12-NEXT: v_mov_b32_e32 v8, v5 288; GFX12-NEXT: v_mov_b32_e32 v9, v5 289; GFX12-NEXT: v_lshl_or_b32 v10, v1, 16, v0 290; GFX12-NEXT: v_lshl_or_b32 v11, v3, 16, v2 291; GFX12-NEXT: s_mov_b32 s2, s4 292; GFX12-NEXT: s_mov_b32 s3, s5 293; GFX12-NEXT: s_mov_b32 s4, s6 294; GFX12-NEXT: s_mov_b32 s5, s7 295; GFX12-NEXT: s_mov_b32 s6, s8 296; GFX12-NEXT: s_mov_b32 s7, s9 297; GFX12-NEXT: v_mov_b32_e32 v0, v5 298; GFX12-NEXT: v_mov_b32_e32 v1, v6 299; GFX12-NEXT: v_mov_b32_e32 v2, v7 300; GFX12-NEXT: v_mov_b32_e32 v3, v8 301; GFX12-NEXT: v_mov_b32_e32 v4, v9 302; GFX12-NEXT: image_load v[0:4], [v10, v11], s[0:7] dmask:0xf dim:SQ_RSRC_IMG_2D_MSAA_ARRAY a16 tfe 303; GFX12-NEXT: s_wait_loadcnt 0x0 304; GFX12-NEXT: global_store_b32 v5, v4, s[10:11] 305; GFX12-NEXT: ; return to shader part epilog 306 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i16(i32 15, i16 %s, i16 %t, i16 %slice, i16 %fragid, <8 x i32> %rsrc, i32 3, i32 0) 307 %v.vec = extractvalue { <4 x float>, i32 } %v, 0 308 %v.err = extractvalue { <4 x float>, i32 } %v, 1 309 store i32 %v.err, ptr addrspace(1) %out, align 4 310 ret <4 x float> %v.vec 311} 312 313declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 314declare { <4 x float>, i32 } @llvm.amdgcn.image.load.2darraymsaa.sl_v4f32i32s.i16(i32 immarg, i16, i16, i16, i16, <8 x i32>, i32 immarg, i32 immarg) #0 315 316attributes #0 = { nounwind readonly } 317