1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s 3; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11 %s 4; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-delay-alu=0 -amdgpu-enable-vopd=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s 5 6define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) { 7; GFX10-LABEL: sample_d_1d: 8; GFX10: ; %bb.0: ; %main_body 9; GFX10-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 10; GFX10-NEXT: s_waitcnt vmcnt(0) 11; GFX10-NEXT: ; return to shader part epilog 12; 13; GFX11-LABEL: sample_d_1d: 14; GFX11: ; %bb.0: ; %main_body 15; GFX11-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 16; GFX11-NEXT: s_waitcnt vmcnt(0) 17; GFX11-NEXT: ; return to shader part epilog 18; 19; GFX12-LABEL: sample_d_1d: 20; GFX12: ; %bb.0: ; %main_body 21; GFX12-NEXT: image_sample_d_g16 v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 22; GFX12-NEXT: s_wait_samplecnt 0x0 23; GFX12-NEXT: ; return to shader part epilog 24main_body: 25 %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 26 ret <4 x float> %v 27} 28 29define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { 30; GFX10-LABEL: sample_d_2d: 31; GFX10: ; %bb.0: ; %main_body 32; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 33; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2 34; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 35; GFX10-NEXT: v_lshl_or_b32 v1, v3, 16, v2 36; GFX10-NEXT: image_sample_d_g16 v[0:3], [v0, v1, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 37; GFX10-NEXT: s_waitcnt vmcnt(0) 38; GFX10-NEXT: ; return to shader part epilog 39; 40; GFX11-LABEL: sample_d_2d: 41; GFX11: ; %bb.0: ; %main_body 42; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 43; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 44; GFX11-NEXT: v_lshl_or_b32 v0, v1, 16, v0 45; GFX11-NEXT: v_lshl_or_b32 v1, v3, 16, v2 46; GFX11-NEXT: image_sample_d_g16 v[0:3], [v0, v1, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 47; GFX11-NEXT: s_waitcnt vmcnt(0) 48; GFX11-NEXT: ; return to shader part epilog 49; 50; GFX12-LABEL: sample_d_2d: 51; GFX12: ; %bb.0: ; %main_body 52; GFX12-NEXT: v_and_b32_e32 v0, 0xffff, v0 53; GFX12-NEXT: v_and_b32_e32 v2, 0xffff, v2 54; GFX12-NEXT: v_lshl_or_b32 v0, v1, 16, v0 55; GFX12-NEXT: v_lshl_or_b32 v1, v3, 16, v2 56; GFX12-NEXT: image_sample_d_g16 v[0:3], [v0, v1, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 57; GFX12-NEXT: s_wait_samplecnt 0x0 58; GFX12-NEXT: ; return to shader part epilog 59main_body: 60 %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 61 ret <4 x float> %v 62} 63 64define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r) { 65; GFX10-LABEL: sample_d_3d: 66; GFX10: ; %bb.0: ; %main_body 67; GFX10-NEXT: v_mov_b32_e32 v9, v3 68; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 69; GFX10-NEXT: v_mov_b32_e32 v3, v2 70; GFX10-NEXT: v_and_b32_e32 v9, 0xffff, v9 71; GFX10-NEXT: v_lshl_or_b32 v2, v1, 16, v0 72; GFX10-NEXT: v_lshl_or_b32 v4, v4, 16, v9 73; GFX10-NEXT: image_sample_d_g16 v[0:3], v[2:8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D 74; GFX10-NEXT: s_waitcnt vmcnt(0) 75; GFX10-NEXT: ; return to shader part epilog 76; 77; GFX11-LABEL: sample_d_3d: 78; GFX11: ; %bb.0: ; %main_body 79; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 80; GFX11-NEXT: v_and_b32_e32 v3, 0xffff, v3 81; GFX11-NEXT: v_lshl_or_b32 v0, v1, 16, v0 82; GFX11-NEXT: v_lshl_or_b32 v1, v4, 16, v3 83; GFX11-NEXT: image_sample_d_g16 v[0:3], [v0, v2, v1, v5, v[6:8]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D 84; GFX11-NEXT: s_waitcnt vmcnt(0) 85; GFX11-NEXT: ; return to shader part epilog 86; 87; GFX12-LABEL: sample_d_3d: 88; GFX12: ; %bb.0: ; %main_body 89; GFX12-NEXT: v_and_b32_e32 v0, 0xffff, v0 90; GFX12-NEXT: v_and_b32_e32 v3, 0xffff, v3 91; GFX12-NEXT: v_lshl_or_b32 v0, v1, 16, v0 92; GFX12-NEXT: v_lshl_or_b32 v1, v4, 16, v3 93; GFX12-NEXT: image_sample_d_g16 v[0:3], [v0, v2, v1, v[5:8]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D 94; GFX12-NEXT: s_wait_samplecnt 0x0 95; GFX12-NEXT: ; return to shader part epilog 96main_body: 97 %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 98 ret <4 x float> %v 99} 100 101define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) { 102; GFX10-LABEL: sample_c_d_1d: 103; GFX10: ; %bb.0: ; %main_body 104; GFX10-NEXT: image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 105; GFX10-NEXT: s_waitcnt vmcnt(0) 106; GFX10-NEXT: ; return to shader part epilog 107; 108; GFX11-LABEL: sample_c_d_1d: 109; GFX11: ; %bb.0: ; %main_body 110; GFX11-NEXT: image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 111; GFX11-NEXT: s_waitcnt vmcnt(0) 112; GFX11-NEXT: ; return to shader part epilog 113; 114; GFX12-LABEL: sample_c_d_1d: 115; GFX12: ; %bb.0: ; %main_body 116; GFX12-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v2, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 117; GFX12-NEXT: s_wait_samplecnt 0x0 118; GFX12-NEXT: ; return to shader part epilog 119main_body: 120 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 121 ret <4 x float> %v 122} 123 124define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { 125; GFX10-LABEL: sample_c_d_2d: 126; GFX10: ; %bb.0: ; %main_body 127; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v1 128; GFX10-NEXT: v_and_b32_e32 v3, 0xffff, v3 129; GFX10-NEXT: v_lshl_or_b32 v1, v2, 16, v1 130; GFX10-NEXT: v_lshl_or_b32 v2, v4, 16, v3 131; GFX10-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v2, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 132; GFX10-NEXT: s_waitcnt vmcnt(0) 133; GFX10-NEXT: ; return to shader part epilog 134; 135; GFX11-LABEL: sample_c_d_2d: 136; GFX11: ; %bb.0: ; %main_body 137; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1 138; GFX11-NEXT: v_and_b32_e32 v3, 0xffff, v3 139; GFX11-NEXT: v_lshl_or_b32 v1, v2, 16, v1 140; GFX11-NEXT: v_lshl_or_b32 v2, v4, 16, v3 141; GFX11-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v2, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 142; GFX11-NEXT: s_waitcnt vmcnt(0) 143; GFX11-NEXT: ; return to shader part epilog 144; 145; GFX12-LABEL: sample_c_d_2d: 146; GFX12: ; %bb.0: ; %main_body 147; GFX12-NEXT: v_and_b32_e32 v1, 0xffff, v1 148; GFX12-NEXT: v_and_b32_e32 v3, 0xffff, v3 149; GFX12-NEXT: v_lshl_or_b32 v1, v2, 16, v1 150; GFX12-NEXT: v_lshl_or_b32 v2, v4, 16, v3 151; GFX12-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v2, v[5:6]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 152; GFX12-NEXT: s_wait_samplecnt 0x0 153; GFX12-NEXT: ; return to shader part epilog 154main_body: 155 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 156 ret <4 x float> %v 157} 158 159define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) { 160; GFX10-LABEL: sample_d_cl_1d: 161; GFX10: ; %bb.0: ; %main_body 162; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 163; GFX10-NEXT: s_waitcnt vmcnt(0) 164; GFX10-NEXT: ; return to shader part epilog 165; 166; GFX11-LABEL: sample_d_cl_1d: 167; GFX11: ; %bb.0: ; %main_body 168; GFX11-NEXT: image_sample_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 169; GFX11-NEXT: s_waitcnt vmcnt(0) 170; GFX11-NEXT: ; return to shader part epilog 171; 172; GFX12-LABEL: sample_d_cl_1d: 173; GFX12: ; %bb.0: ; %main_body 174; GFX12-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v1, v2, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 175; GFX12-NEXT: s_wait_samplecnt 0x0 176; GFX12-NEXT: ; return to shader part epilog 177main_body: 178 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 179 ret <4 x float> %v 180} 181 182define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { 183; GFX10-LABEL: sample_d_cl_2d: 184; GFX10: ; %bb.0: ; %main_body 185; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v0 186; GFX10-NEXT: v_and_b32_e32 v2, 0xffff, v2 187; GFX10-NEXT: v_lshl_or_b32 v0, v1, 16, v0 188; GFX10-NEXT: v_lshl_or_b32 v1, v3, 16, v2 189; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v1, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 190; GFX10-NEXT: s_waitcnt vmcnt(0) 191; GFX10-NEXT: ; return to shader part epilog 192; 193; GFX11-LABEL: sample_d_cl_2d: 194; GFX11: ; %bb.0: ; %main_body 195; GFX11-NEXT: v_and_b32_e32 v0, 0xffff, v0 196; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 197; GFX11-NEXT: v_lshl_or_b32 v0, v1, 16, v0 198; GFX11-NEXT: v_lshl_or_b32 v1, v3, 16, v2 199; GFX11-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v1, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 200; GFX11-NEXT: s_waitcnt vmcnt(0) 201; GFX11-NEXT: ; return to shader part epilog 202; 203; GFX12-LABEL: sample_d_cl_2d: 204; GFX12: ; %bb.0: ; %main_body 205; GFX12-NEXT: v_and_b32_e32 v0, 0xffff, v0 206; GFX12-NEXT: v_and_b32_e32 v2, 0xffff, v2 207; GFX12-NEXT: v_lshl_or_b32 v0, v1, 16, v0 208; GFX12-NEXT: v_lshl_or_b32 v1, v3, 16, v2 209; GFX12-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v1, v4, v[5:6]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 210; GFX12-NEXT: s_wait_samplecnt 0x0 211; GFX12-NEXT: ; return to shader part epilog 212main_body: 213 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 214 ret <4 x float> %v 215} 216 217define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) { 218; GFX10-LABEL: sample_c_d_cl_1d: 219; GFX10: ; %bb.0: ; %main_body 220; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 221; GFX10-NEXT: s_waitcnt vmcnt(0) 222; GFX10-NEXT: ; return to shader part epilog 223; 224; GFX11-LABEL: sample_c_d_cl_1d: 225; GFX11: ; %bb.0: ; %main_body 226; GFX11-NEXT: image_sample_c_d_cl_g16 v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 227; GFX11-NEXT: s_waitcnt vmcnt(0) 228; GFX11-NEXT: ; return to shader part epilog 229; 230; GFX12-LABEL: sample_c_d_cl_1d: 231; GFX12: ; %bb.0: ; %main_body 232; GFX12-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v1, v2, v[3:4]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 233; GFX12-NEXT: s_wait_samplecnt 0x0 234; GFX12-NEXT: ; return to shader part epilog 235main_body: 236 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 237 ret <4 x float> %v 238} 239 240define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { 241; GFX10-LABEL: sample_c_d_cl_2d: 242; GFX10: ; %bb.0: ; %main_body 243; GFX10-NEXT: v_mov_b32_e32 v8, v2 244; GFX10-NEXT: v_mov_b32_e32 v2, v0 245; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v1 246; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v3 247; GFX10-NEXT: v_lshl_or_b32 v3, v8, 16, v0 248; GFX10-NEXT: v_lshl_or_b32 v4, v4, 16, v1 249; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 250; GFX10-NEXT: s_waitcnt vmcnt(0) 251; GFX10-NEXT: ; return to shader part epilog 252; 253; GFX11-LABEL: sample_c_d_cl_2d: 254; GFX11: ; %bb.0: ; %main_body 255; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1 256; GFX11-NEXT: v_and_b32_e32 v3, 0xffff, v3 257; GFX11-NEXT: v_lshl_or_b32 v1, v2, 16, v1 258; GFX11-NEXT: v_lshl_or_b32 v2, v4, 16, v3 259; GFX11-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v1, v2, v5, v[6:7]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 260; GFX11-NEXT: s_waitcnt vmcnt(0) 261; GFX11-NEXT: ; return to shader part epilog 262; 263; GFX12-LABEL: sample_c_d_cl_2d: 264; GFX12: ; %bb.0: ; %main_body 265; GFX12-NEXT: v_and_b32_e32 v1, 0xffff, v1 266; GFX12-NEXT: v_and_b32_e32 v3, 0xffff, v3 267; GFX12-NEXT: v_lshl_or_b32 v1, v2, 16, v1 268; GFX12-NEXT: v_lshl_or_b32 v2, v4, 16, v3 269; GFX12-NEXT: image_sample_c_d_cl_g16 v[0:3], [v0, v1, v2, v[5:7]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 270; GFX12-NEXT: s_wait_samplecnt 0x0 271; GFX12-NEXT: ; return to shader part epilog 272main_body: 273 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 274 ret <4 x float> %v 275} 276 277define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) { 278; GFX10-LABEL: sample_c_d_o_2darray_V1: 279; GFX10: ; %bb.0: ; %main_body 280; GFX10-NEXT: v_mov_b32_e32 v9, v2 281; GFX10-NEXT: v_mov_b32_e32 v10, v3 282; GFX10-NEXT: v_mov_b32_e32 v2, v0 283; GFX10-NEXT: v_mov_b32_e32 v3, v1 284; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v4 285; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v9 286; GFX10-NEXT: v_lshl_or_b32 v5, v5, 16, v1 287; GFX10-NEXT: v_lshl_or_b32 v4, v10, 16, v0 288; GFX10-NEXT: image_sample_c_d_o_g16 v0, v[2:8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY 289; GFX10-NEXT: s_waitcnt vmcnt(0) 290; GFX10-NEXT: ; return to shader part epilog 291; 292; GFX11-LABEL: sample_c_d_o_2darray_V1: 293; GFX11: ; %bb.0: ; %main_body 294; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 295; GFX11-NEXT: v_and_b32_e32 v4, 0xffff, v4 296; GFX11-NEXT: v_lshl_or_b32 v2, v3, 16, v2 297; GFX11-NEXT: v_lshl_or_b32 v3, v5, 16, v4 298; GFX11-NEXT: image_sample_c_d_o_g16 v0, [v0, v1, v2, v3, v[6:8]], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY 299; GFX11-NEXT: s_waitcnt vmcnt(0) 300; GFX11-NEXT: ; return to shader part epilog 301; 302; GFX12-LABEL: sample_c_d_o_2darray_V1: 303; GFX12: ; %bb.0: ; %main_body 304; GFX12-NEXT: v_and_b32_e32 v2, 0xffff, v2 305; GFX12-NEXT: v_and_b32_e32 v4, 0xffff, v4 306; GFX12-NEXT: v_lshl_or_b32 v2, v3, 16, v2 307; GFX12-NEXT: v_lshl_or_b32 v5, v5, 16, v4 308; GFX12-NEXT: image_sample_c_d_o_g16 v0, [v0, v1, v2, v[5:8]], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY 309; GFX12-NEXT: s_wait_samplecnt 0x0 310; GFX12-NEXT: ; return to shader part epilog 311main_body: 312 %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 313 ret float %v 314} 315 316define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) { 317; GFX10-LABEL: sample_c_d_o_2darray_V2: 318; GFX10: ; %bb.0: ; %main_body 319; GFX10-NEXT: v_mov_b32_e32 v9, v2 320; GFX10-NEXT: v_mov_b32_e32 v10, v3 321; GFX10-NEXT: v_mov_b32_e32 v2, v0 322; GFX10-NEXT: v_mov_b32_e32 v3, v1 323; GFX10-NEXT: v_and_b32_e32 v1, 0xffff, v4 324; GFX10-NEXT: v_and_b32_e32 v0, 0xffff, v9 325; GFX10-NEXT: v_lshl_or_b32 v5, v5, 16, v1 326; GFX10-NEXT: v_lshl_or_b32 v4, v10, 16, v0 327; GFX10-NEXT: image_sample_c_d_o_g16 v[0:1], v[2:8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY 328; GFX10-NEXT: s_waitcnt vmcnt(0) 329; GFX10-NEXT: ; return to shader part epilog 330; 331; GFX11-LABEL: sample_c_d_o_2darray_V2: 332; GFX11: ; %bb.0: ; %main_body 333; GFX11-NEXT: v_and_b32_e32 v2, 0xffff, v2 334; GFX11-NEXT: v_and_b32_e32 v4, 0xffff, v4 335; GFX11-NEXT: v_lshl_or_b32 v2, v3, 16, v2 336; GFX11-NEXT: v_lshl_or_b32 v3, v5, 16, v4 337; GFX11-NEXT: image_sample_c_d_o_g16 v[0:1], [v0, v1, v2, v3, v[6:8]], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY 338; GFX11-NEXT: s_waitcnt vmcnt(0) 339; GFX11-NEXT: ; return to shader part epilog 340; 341; GFX12-LABEL: sample_c_d_o_2darray_V2: 342; GFX12: ; %bb.0: ; %main_body 343; GFX12-NEXT: v_and_b32_e32 v2, 0xffff, v2 344; GFX12-NEXT: v_and_b32_e32 v4, 0xffff, v4 345; GFX12-NEXT: v_lshl_or_b32 v2, v3, 16, v2 346; GFX12-NEXT: v_lshl_or_b32 v5, v5, 16, v4 347; GFX12-NEXT: image_sample_c_d_o_g16 v[0:1], [v0, v1, v2, v[5:8]], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY 348; GFX12-NEXT: s_wait_samplecnt 0x0 349; GFX12-NEXT: ; return to shader part epilog 350main_body: 351 %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 352 ret <2 x float> %v 353} 354 355declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 356declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 357declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32, half, half, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 358declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 359declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 360declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 361declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 362declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 363declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 364 365declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f16.f32.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 366declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 367 368attributes #0 = { nounwind } 369attributes #1 = { nounwind readonly } 370attributes #2 = { nounwind readnone } 371