1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=VERDE %s 3; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX6789 %s 4; RUN: llc -mtriple=amdgcn -mcpu=gfx9-generic --amdhsa-code-object-version=6 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX6789 %s 5; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s 6; RUN: llc -mtriple=amdgcn -mcpu=gfx10-1-generic --amdhsa-code-object-version=6 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s 7; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s 8; RUN: llc -mtriple=amdgcn -mcpu=gfx11-generic --amdhsa-code-object-version=6 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s 9; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -amdgpu-enable-delay-alu=0 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12 %s 10 11define amdgpu_ps <4 x float> @sample_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 12; VERDE-LABEL: sample_1d: 13; VERDE: ; %bb.0: ; %main_body 14; VERDE-NEXT: s_mov_b64 s[12:13], exec 15; VERDE-NEXT: s_wqm_b64 exec, exec 16; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 17; VERDE-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf 18; VERDE-NEXT: s_waitcnt vmcnt(0) 19; VERDE-NEXT: ; return to shader part epilog 20; 21; GFX6789-LABEL: sample_1d: 22; GFX6789: ; %bb.0: ; %main_body 23; GFX6789-NEXT: s_mov_b64 s[12:13], exec 24; GFX6789-NEXT: s_wqm_b64 exec, exec 25; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 26; GFX6789-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf 27; GFX6789-NEXT: s_waitcnt vmcnt(0) 28; GFX6789-NEXT: ; return to shader part epilog 29; 30; GFX10PLUS-LABEL: sample_1d: 31; GFX10PLUS: ; %bb.0: ; %main_body 32; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 33; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 34; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 35; GFX10PLUS-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 36; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 37; GFX10PLUS-NEXT: ; return to shader part epilog 38; 39; GFX12-LABEL: sample_1d: 40; GFX12: ; %bb.0: ; %main_body 41; GFX12-NEXT: s_mov_b32 s12, exec_lo 42; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 43; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12 44; GFX12-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 45; GFX12-NEXT: s_wait_samplecnt 0x0 46; GFX12-NEXT: ; return to shader part epilog 47main_body: 48 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 49 ret <4 x float> %v 50} 51 52define amdgpu_ps <4 x float> @sample_1d_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, ptr addrspace(1) inreg %out, float %s) { 53; VERDE-LABEL: sample_1d_tfe: 54; VERDE: ; %bb.0: ; %main_body 55; VERDE-NEXT: s_mov_b64 s[14:15], exec 56; VERDE-NEXT: s_wqm_b64 exec, exec 57; VERDE-NEXT: v_mov_b32_e32 v5, v0 58; VERDE-NEXT: v_mov_b32_e32 v0, 0 59; VERDE-NEXT: v_mov_b32_e32 v1, v0 60; VERDE-NEXT: v_mov_b32_e32 v2, v0 61; VERDE-NEXT: v_mov_b32_e32 v3, v0 62; VERDE-NEXT: v_mov_b32_e32 v4, v0 63; VERDE-NEXT: s_and_b64 exec, exec, s[14:15] 64; VERDE-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf tfe 65; VERDE-NEXT: s_mov_b32 s15, 0xf000 66; VERDE-NEXT: s_mov_b32 s14, -1 67; VERDE-NEXT: s_waitcnt vmcnt(0) 68; VERDE-NEXT: buffer_store_dword v4, off, s[12:15], 0 69; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) 70; VERDE-NEXT: ; return to shader part epilog 71; 72; GFX6789-LABEL: sample_1d_tfe: 73; GFX6789: ; %bb.0: ; %main_body 74; GFX6789-NEXT: s_mov_b64 s[14:15], exec 75; GFX6789-NEXT: s_wqm_b64 exec, exec 76; GFX6789-NEXT: v_mov_b32_e32 v6, 0 77; GFX6789-NEXT: v_mov_b32_e32 v5, v0 78; GFX6789-NEXT: v_mov_b32_e32 v7, v6 79; GFX6789-NEXT: v_mov_b32_e32 v8, v6 80; GFX6789-NEXT: v_mov_b32_e32 v9, v6 81; GFX6789-NEXT: v_mov_b32_e32 v10, v6 82; GFX6789-NEXT: v_mov_b32_e32 v0, v6 83; GFX6789-NEXT: v_mov_b32_e32 v1, v7 84; GFX6789-NEXT: v_mov_b32_e32 v2, v8 85; GFX6789-NEXT: v_mov_b32_e32 v3, v9 86; GFX6789-NEXT: v_mov_b32_e32 v4, v10 87; GFX6789-NEXT: s_and_b64 exec, exec, s[14:15] 88; GFX6789-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf tfe 89; GFX6789-NEXT: s_waitcnt vmcnt(0) 90; GFX6789-NEXT: global_store_dword v6, v4, s[12:13] 91; GFX6789-NEXT: s_waitcnt vmcnt(0) 92; GFX6789-NEXT: ; return to shader part epilog 93; 94; GFX10-LABEL: sample_1d_tfe: 95; GFX10: ; %bb.0: ; %main_body 96; GFX10-NEXT: s_mov_b32 s14, exec_lo 97; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 98; GFX10-NEXT: v_mov_b32_e32 v6, 0 99; GFX10-NEXT: v_mov_b32_e32 v5, v0 100; GFX10-NEXT: v_mov_b32_e32 v7, v6 101; GFX10-NEXT: v_mov_b32_e32 v8, v6 102; GFX10-NEXT: v_mov_b32_e32 v9, v6 103; GFX10-NEXT: v_mov_b32_e32 v10, v6 104; GFX10-NEXT: v_mov_b32_e32 v0, v6 105; GFX10-NEXT: v_mov_b32_e32 v1, v7 106; GFX10-NEXT: v_mov_b32_e32 v2, v8 107; GFX10-NEXT: v_mov_b32_e32 v3, v9 108; GFX10-NEXT: v_mov_b32_e32 v4, v10 109; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s14 110; GFX10-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D tfe 111; GFX10-NEXT: s_waitcnt vmcnt(0) 112; GFX10-NEXT: global_store_dword v6, v4, s[12:13] 113; GFX10-NEXT: ; return to shader part epilog 114; 115; GFX11-LABEL: sample_1d_tfe: 116; GFX11: ; %bb.0: ; %main_body 117; GFX11-NEXT: s_mov_b32 s14, exec_lo 118; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo 119; GFX11-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v6, 0 120; GFX11-NEXT: v_mov_b32_e32 v7, v6 121; GFX11-NEXT: v_mov_b32_e32 v8, v6 122; GFX11-NEXT: v_mov_b32_e32 v9, v6 123; GFX11-NEXT: v_mov_b32_e32 v10, v6 124; GFX11-NEXT: v_dual_mov_b32 v0, v6 :: v_dual_mov_b32 v1, v7 125; GFX11-NEXT: v_dual_mov_b32 v2, v8 :: v_dual_mov_b32 v3, v9 126; GFX11-NEXT: v_mov_b32_e32 v4, v10 127; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s14 128; GFX11-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D tfe 129; GFX11-NEXT: s_waitcnt vmcnt(0) 130; GFX11-NEXT: global_store_b32 v6, v4, s[12:13] 131; GFX11-NEXT: ; return to shader part epilog 132; 133; GFX12-LABEL: sample_1d_tfe: 134; GFX12: ; %bb.0: ; %main_body 135; GFX12-NEXT: s_mov_b32 s14, exec_lo 136; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 137; GFX12-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v6, 0 138; GFX12-NEXT: v_dual_mov_b32 v7, v6 :: v_dual_mov_b32 v8, v6 139; GFX12-NEXT: v_dual_mov_b32 v9, v6 :: v_dual_mov_b32 v10, v6 140; GFX12-NEXT: v_dual_mov_b32 v0, v6 :: v_dual_mov_b32 v1, v7 141; GFX12-NEXT: v_dual_mov_b32 v2, v8 :: v_dual_mov_b32 v3, v9 142; GFX12-NEXT: v_mov_b32_e32 v4, v10 143; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s14 144; GFX12-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D tfe 145; GFX12-NEXT: s_wait_samplecnt 0x0 146; GFX12-NEXT: global_store_b32 v6, v4, s[12:13] 147; GFX12-NEXT: ; return to shader part epilog 148main_body: 149 %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) 150 %v.vec = extractvalue {<4 x float>, i32} %v, 0 151 %v.err = extractvalue {<4 x float>, i32} %v, 1 152 store i32 %v.err, ptr addrspace(1) %out, align 4 153 ret <4 x float> %v.vec 154} 155 156define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, ptr addrspace(1) inreg %out, float %s) { 157; VERDE-LABEL: sample_1d_tfe_adjust_writemask_1: 158; VERDE: ; %bb.0: ; %main_body 159; VERDE-NEXT: s_mov_b64 s[12:13], exec 160; VERDE-NEXT: s_wqm_b64 exec, exec 161; VERDE-NEXT: v_mov_b32_e32 v2, v0 162; VERDE-NEXT: v_mov_b32_e32 v0, 0 163; VERDE-NEXT: v_mov_b32_e32 v1, v0 164; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 165; VERDE-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 tfe 166; VERDE-NEXT: s_waitcnt vmcnt(0) 167; VERDE-NEXT: ; return to shader part epilog 168; 169; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_1: 170; GFX6789: ; %bb.0: ; %main_body 171; GFX6789-NEXT: s_mov_b64 s[12:13], exec 172; GFX6789-NEXT: s_wqm_b64 exec, exec 173; GFX6789-NEXT: v_mov_b32_e32 v2, v0 174; GFX6789-NEXT: v_mov_b32_e32 v0, 0 175; GFX6789-NEXT: v_mov_b32_e32 v1, v0 176; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 177; GFX6789-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 tfe 178; GFX6789-NEXT: s_waitcnt vmcnt(0) 179; GFX6789-NEXT: ; return to shader part epilog 180; 181; GFX10PLUS-LABEL: sample_1d_tfe_adjust_writemask_1: 182; GFX10PLUS: ; %bb.0: ; %main_body 183; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 184; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 185; GFX10PLUS-NEXT: v_mov_b32_e32 v2, v0 186; GFX10PLUS-NEXT: v_mov_b32_e32 v0, 0 187; GFX10PLUS-NEXT: v_mov_b32_e32 v1, v0 188; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 189; GFX10PLUS-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe 190; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 191; GFX10PLUS-NEXT: ; return to shader part epilog 192; 193; GFX12-LABEL: sample_1d_tfe_adjust_writemask_1: 194; GFX12: ; %bb.0: ; %main_body 195; GFX12-NEXT: s_mov_b32 s12, exec_lo 196; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 197; GFX12-NEXT: v_mov_b32_e32 v2, v0 198; GFX12-NEXT: v_mov_b32_e32 v0, 0 199; GFX12-NEXT: v_mov_b32_e32 v1, v0 200; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12 201; GFX12-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D tfe 202; GFX12-NEXT: s_wait_samplecnt 0x0 203; GFX12-NEXT: ; return to shader part epilog 204main_body: 205 %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) 206 %res.vec = extractvalue {<4 x float>,i32} %v, 0 207 %res.f = extractelement <4 x float> %res.vec, i32 0 208 %res.err = extractvalue {<4 x float>,i32} %v, 1 209 %res.errf = bitcast i32 %res.err to float 210 %res.tmp = insertelement <2 x float> undef, float %res.f, i32 0 211 %res = insertelement <2 x float> %res.tmp, float %res.errf, i32 1 212 ret <2 x float> %res 213} 214 215define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 216; VERDE-LABEL: sample_1d_tfe_adjust_writemask_2: 217; VERDE: ; %bb.0: ; %main_body 218; VERDE-NEXT: s_mov_b64 s[12:13], exec 219; VERDE-NEXT: s_wqm_b64 exec, exec 220; VERDE-NEXT: v_mov_b32_e32 v2, v0 221; VERDE-NEXT: v_mov_b32_e32 v0, 0 222; VERDE-NEXT: v_mov_b32_e32 v1, v0 223; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 224; VERDE-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 tfe 225; VERDE-NEXT: s_waitcnt vmcnt(0) 226; VERDE-NEXT: ; return to shader part epilog 227; 228; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_2: 229; GFX6789: ; %bb.0: ; %main_body 230; GFX6789-NEXT: s_mov_b64 s[12:13], exec 231; GFX6789-NEXT: s_wqm_b64 exec, exec 232; GFX6789-NEXT: v_mov_b32_e32 v2, v0 233; GFX6789-NEXT: v_mov_b32_e32 v0, 0 234; GFX6789-NEXT: v_mov_b32_e32 v1, v0 235; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 236; GFX6789-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 tfe 237; GFX6789-NEXT: s_waitcnt vmcnt(0) 238; GFX6789-NEXT: ; return to shader part epilog 239; 240; GFX10PLUS-LABEL: sample_1d_tfe_adjust_writemask_2: 241; GFX10PLUS: ; %bb.0: ; %main_body 242; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 243; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 244; GFX10PLUS-NEXT: v_mov_b32_e32 v2, v0 245; GFX10PLUS-NEXT: v_mov_b32_e32 v0, 0 246; GFX10PLUS-NEXT: v_mov_b32_e32 v1, v0 247; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 248; GFX10PLUS-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 dim:SQ_RSRC_IMG_1D tfe 249; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 250; GFX10PLUS-NEXT: ; return to shader part epilog 251; 252; GFX12-LABEL: sample_1d_tfe_adjust_writemask_2: 253; GFX12: ; %bb.0: ; %main_body 254; GFX12-NEXT: s_mov_b32 s12, exec_lo 255; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 256; GFX12-NEXT: v_mov_b32_e32 v2, v0 257; GFX12-NEXT: v_mov_b32_e32 v0, 0 258; GFX12-NEXT: v_mov_b32_e32 v1, v0 259; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12 260; GFX12-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x2 dim:SQ_RSRC_IMG_1D tfe 261; GFX12-NEXT: s_wait_samplecnt 0x0 262; GFX12-NEXT: ; return to shader part epilog 263main_body: 264 %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) 265 %res.vec = extractvalue {<4 x float>,i32} %v, 0 266 %res.f = extractelement <4 x float> %res.vec, i32 1 267 %res.err = extractvalue {<4 x float>,i32} %v, 1 268 %res.errf = bitcast i32 %res.err to float 269 %res.tmp = insertelement <2 x float> undef, float %res.f, i32 0 270 %res = insertelement <2 x float> %res.tmp, float %res.errf, i32 1 271 ret <2 x float> %res 272} 273 274define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_3(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 275; VERDE-LABEL: sample_1d_tfe_adjust_writemask_3: 276; VERDE: ; %bb.0: ; %main_body 277; VERDE-NEXT: s_mov_b64 s[12:13], exec 278; VERDE-NEXT: s_wqm_b64 exec, exec 279; VERDE-NEXT: v_mov_b32_e32 v2, v0 280; VERDE-NEXT: v_mov_b32_e32 v0, 0 281; VERDE-NEXT: v_mov_b32_e32 v1, v0 282; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 283; VERDE-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 tfe 284; VERDE-NEXT: s_waitcnt vmcnt(0) 285; VERDE-NEXT: ; return to shader part epilog 286; 287; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_3: 288; GFX6789: ; %bb.0: ; %main_body 289; GFX6789-NEXT: s_mov_b64 s[12:13], exec 290; GFX6789-NEXT: s_wqm_b64 exec, exec 291; GFX6789-NEXT: v_mov_b32_e32 v2, v0 292; GFX6789-NEXT: v_mov_b32_e32 v0, 0 293; GFX6789-NEXT: v_mov_b32_e32 v1, v0 294; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 295; GFX6789-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 tfe 296; GFX6789-NEXT: s_waitcnt vmcnt(0) 297; GFX6789-NEXT: ; return to shader part epilog 298; 299; GFX10PLUS-LABEL: sample_1d_tfe_adjust_writemask_3: 300; GFX10PLUS: ; %bb.0: ; %main_body 301; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 302; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 303; GFX10PLUS-NEXT: v_mov_b32_e32 v2, v0 304; GFX10PLUS-NEXT: v_mov_b32_e32 v0, 0 305; GFX10PLUS-NEXT: v_mov_b32_e32 v1, v0 306; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 307; GFX10PLUS-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_1D tfe 308; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 309; GFX10PLUS-NEXT: ; return to shader part epilog 310; 311; GFX12-LABEL: sample_1d_tfe_adjust_writemask_3: 312; GFX12: ; %bb.0: ; %main_body 313; GFX12-NEXT: s_mov_b32 s12, exec_lo 314; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 315; GFX12-NEXT: v_mov_b32_e32 v2, v0 316; GFX12-NEXT: v_mov_b32_e32 v0, 0 317; GFX12-NEXT: v_mov_b32_e32 v1, v0 318; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12 319; GFX12-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_1D tfe 320; GFX12-NEXT: s_wait_samplecnt 0x0 321; GFX12-NEXT: ; return to shader part epilog 322main_body: 323 %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) 324 %res.vec = extractvalue {<4 x float>,i32} %v, 0 325 %res.f = extractelement <4 x float> %res.vec, i32 2 326 %res.err = extractvalue {<4 x float>,i32} %v, 1 327 %res.errf = bitcast i32 %res.err to float 328 %res.tmp = insertelement <2 x float> undef, float %res.f, i32 0 329 %res = insertelement <2 x float> %res.tmp, float %res.errf, i32 1 330 ret <2 x float> %res 331} 332 333define amdgpu_ps <2 x float> @sample_1d_tfe_adjust_writemask_4(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 334; VERDE-LABEL: sample_1d_tfe_adjust_writemask_4: 335; VERDE: ; %bb.0: ; %main_body 336; VERDE-NEXT: s_mov_b64 s[12:13], exec 337; VERDE-NEXT: s_wqm_b64 exec, exec 338; VERDE-NEXT: v_mov_b32_e32 v2, v0 339; VERDE-NEXT: v_mov_b32_e32 v0, 0 340; VERDE-NEXT: v_mov_b32_e32 v1, v0 341; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 342; VERDE-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 tfe 343; VERDE-NEXT: s_waitcnt vmcnt(0) 344; VERDE-NEXT: ; return to shader part epilog 345; 346; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_4: 347; GFX6789: ; %bb.0: ; %main_body 348; GFX6789-NEXT: s_mov_b64 s[12:13], exec 349; GFX6789-NEXT: s_wqm_b64 exec, exec 350; GFX6789-NEXT: v_mov_b32_e32 v2, v0 351; GFX6789-NEXT: v_mov_b32_e32 v0, 0 352; GFX6789-NEXT: v_mov_b32_e32 v1, v0 353; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 354; GFX6789-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 tfe 355; GFX6789-NEXT: s_waitcnt vmcnt(0) 356; GFX6789-NEXT: ; return to shader part epilog 357; 358; GFX10PLUS-LABEL: sample_1d_tfe_adjust_writemask_4: 359; GFX10PLUS: ; %bb.0: ; %main_body 360; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 361; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 362; GFX10PLUS-NEXT: v_mov_b32_e32 v2, v0 363; GFX10PLUS-NEXT: v_mov_b32_e32 v0, 0 364; GFX10PLUS-NEXT: v_mov_b32_e32 v1, v0 365; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 366; GFX10PLUS-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 dim:SQ_RSRC_IMG_1D tfe 367; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 368; GFX10PLUS-NEXT: ; return to shader part epilog 369; 370; GFX12-LABEL: sample_1d_tfe_adjust_writemask_4: 371; GFX12: ; %bb.0: ; %main_body 372; GFX12-NEXT: s_mov_b32 s12, exec_lo 373; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 374; GFX12-NEXT: v_mov_b32_e32 v2, v0 375; GFX12-NEXT: v_mov_b32_e32 v0, 0 376; GFX12-NEXT: v_mov_b32_e32 v1, v0 377; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12 378; GFX12-NEXT: image_sample v[0:1], v2, s[0:7], s[8:11] dmask:0x8 dim:SQ_RSRC_IMG_1D tfe 379; GFX12-NEXT: s_wait_samplecnt 0x0 380; GFX12-NEXT: ; return to shader part epilog 381main_body: 382 %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) 383 %res.vec = extractvalue {<4 x float>,i32} %v, 0 384 %res.f = extractelement <4 x float> %res.vec, i32 3 385 %res.err = extractvalue {<4 x float>,i32} %v, 1 386 %res.errf = bitcast i32 %res.err to float 387 %res.tmp = insertelement <2 x float> undef, float %res.f, i32 0 388 %res = insertelement <2 x float> %res.tmp, float %res.errf, i32 1 389 ret <2 x float> %res 390} 391 392define amdgpu_ps <4 x float> @sample_1d_tfe_adjust_writemask_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 393; VERDE-LABEL: sample_1d_tfe_adjust_writemask_12: 394; VERDE: ; %bb.0: ; %main_body 395; VERDE-NEXT: s_mov_b64 s[12:13], exec 396; VERDE-NEXT: s_wqm_b64 exec, exec 397; VERDE-NEXT: v_mov_b32_e32 v3, v0 398; VERDE-NEXT: v_mov_b32_e32 v0, 0 399; VERDE-NEXT: v_mov_b32_e32 v1, v0 400; VERDE-NEXT: v_mov_b32_e32 v2, v0 401; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 402; VERDE-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 tfe 403; VERDE-NEXT: s_waitcnt vmcnt(0) 404; VERDE-NEXT: ; return to shader part epilog 405; 406; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_12: 407; GFX6789: ; %bb.0: ; %main_body 408; GFX6789-NEXT: s_mov_b64 s[12:13], exec 409; GFX6789-NEXT: s_wqm_b64 exec, exec 410; GFX6789-NEXT: v_mov_b32_e32 v3, v0 411; GFX6789-NEXT: v_mov_b32_e32 v0, 0 412; GFX6789-NEXT: v_mov_b32_e32 v1, v0 413; GFX6789-NEXT: v_mov_b32_e32 v2, v0 414; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 415; GFX6789-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 tfe 416; GFX6789-NEXT: s_waitcnt vmcnt(0) 417; GFX6789-NEXT: ; return to shader part epilog 418; 419; GFX10-LABEL: sample_1d_tfe_adjust_writemask_12: 420; GFX10: ; %bb.0: ; %main_body 421; GFX10-NEXT: s_mov_b32 s12, exec_lo 422; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 423; GFX10-NEXT: v_mov_b32_e32 v3, v0 424; GFX10-NEXT: v_mov_b32_e32 v0, 0 425; GFX10-NEXT: v_mov_b32_e32 v1, v0 426; GFX10-NEXT: v_mov_b32_e32 v2, v0 427; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 428; GFX10-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe 429; GFX10-NEXT: s_waitcnt vmcnt(0) 430; GFX10-NEXT: ; return to shader part epilog 431; 432; GFX11-LABEL: sample_1d_tfe_adjust_writemask_12: 433; GFX11: ; %bb.0: ; %main_body 434; GFX11-NEXT: s_mov_b32 s12, exec_lo 435; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo 436; GFX11-NEXT: v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v0, 0 437; GFX11-NEXT: v_mov_b32_e32 v1, v0 438; GFX11-NEXT: v_mov_b32_e32 v2, v0 439; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12 440; GFX11-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe 441; GFX11-NEXT: s_waitcnt vmcnt(0) 442; GFX11-NEXT: ; return to shader part epilog 443; 444; GFX12-LABEL: sample_1d_tfe_adjust_writemask_12: 445; GFX12: ; %bb.0: ; %main_body 446; GFX12-NEXT: s_mov_b32 s12, exec_lo 447; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 448; GFX12-NEXT: v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v0, 0 449; GFX12-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v2, v0 450; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12 451; GFX12-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D tfe 452; GFX12-NEXT: s_wait_samplecnt 0x0 453; GFX12-NEXT: ; return to shader part epilog 454main_body: 455 %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) 456 %res.vec = extractvalue {<4 x float>,i32} %v, 0 457 %res.f1 = extractelement <4 x float> %res.vec, i32 0 458 %res.f2 = extractelement <4 x float> %res.vec, i32 1 459 %res.err = extractvalue {<4 x float>,i32} %v, 1 460 %res.errf = bitcast i32 %res.err to float 461 %res.tmp1 = insertelement <4 x float> undef, float %res.f1, i32 0 462 %res.tmp2 = insertelement <4 x float> %res.tmp1, float %res.f2, i32 1 463 %res = insertelement <4 x float> %res.tmp2, float %res.errf, i32 2 464 ret <4 x float> %res 465} 466 467define amdgpu_ps <4 x float> @sample_1d_tfe_adjust_writemask_24(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 468; VERDE-LABEL: sample_1d_tfe_adjust_writemask_24: 469; VERDE: ; %bb.0: ; %main_body 470; VERDE-NEXT: s_mov_b64 s[12:13], exec 471; VERDE-NEXT: s_wqm_b64 exec, exec 472; VERDE-NEXT: v_mov_b32_e32 v3, v0 473; VERDE-NEXT: v_mov_b32_e32 v0, 0 474; VERDE-NEXT: v_mov_b32_e32 v1, v0 475; VERDE-NEXT: v_mov_b32_e32 v2, v0 476; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 477; VERDE-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa tfe 478; VERDE-NEXT: s_waitcnt vmcnt(0) 479; VERDE-NEXT: ; return to shader part epilog 480; 481; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_24: 482; GFX6789: ; %bb.0: ; %main_body 483; GFX6789-NEXT: s_mov_b64 s[12:13], exec 484; GFX6789-NEXT: s_wqm_b64 exec, exec 485; GFX6789-NEXT: v_mov_b32_e32 v3, v0 486; GFX6789-NEXT: v_mov_b32_e32 v0, 0 487; GFX6789-NEXT: v_mov_b32_e32 v1, v0 488; GFX6789-NEXT: v_mov_b32_e32 v2, v0 489; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 490; GFX6789-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa tfe 491; GFX6789-NEXT: s_waitcnt vmcnt(0) 492; GFX6789-NEXT: ; return to shader part epilog 493; 494; GFX10-LABEL: sample_1d_tfe_adjust_writemask_24: 495; GFX10: ; %bb.0: ; %main_body 496; GFX10-NEXT: s_mov_b32 s12, exec_lo 497; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 498; GFX10-NEXT: v_mov_b32_e32 v3, v0 499; GFX10-NEXT: v_mov_b32_e32 v0, 0 500; GFX10-NEXT: v_mov_b32_e32 v1, v0 501; GFX10-NEXT: v_mov_b32_e32 v2, v0 502; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s12 503; GFX10-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D tfe 504; GFX10-NEXT: s_waitcnt vmcnt(0) 505; GFX10-NEXT: ; return to shader part epilog 506; 507; GFX11-LABEL: sample_1d_tfe_adjust_writemask_24: 508; GFX11: ; %bb.0: ; %main_body 509; GFX11-NEXT: s_mov_b32 s12, exec_lo 510; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo 511; GFX11-NEXT: v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v0, 0 512; GFX11-NEXT: v_mov_b32_e32 v1, v0 513; GFX11-NEXT: v_mov_b32_e32 v2, v0 514; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s12 515; GFX11-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D tfe 516; GFX11-NEXT: s_waitcnt vmcnt(0) 517; GFX11-NEXT: ; return to shader part epilog 518; 519; GFX12-LABEL: sample_1d_tfe_adjust_writemask_24: 520; GFX12: ; %bb.0: ; %main_body 521; GFX12-NEXT: s_mov_b32 s12, exec_lo 522; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 523; GFX12-NEXT: v_dual_mov_b32 v3, v0 :: v_dual_mov_b32 v0, 0 524; GFX12-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v2, v0 525; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12 526; GFX12-NEXT: image_sample v[0:2], v3, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D tfe 527; GFX12-NEXT: s_wait_samplecnt 0x0 528; GFX12-NEXT: ; return to shader part epilog 529main_body: 530 %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) 531 %res.vec = extractvalue {<4 x float>,i32} %v, 0 532 %res.f1 = extractelement <4 x float> %res.vec, i32 1 533 %res.f2 = extractelement <4 x float> %res.vec, i32 3 534 %res.err = extractvalue {<4 x float>,i32} %v, 1 535 %res.errf = bitcast i32 %res.err to float 536 %res.tmp1 = insertelement <4 x float> undef, float %res.f1, i32 0 537 %res.tmp2 = insertelement <4 x float> %res.tmp1, float %res.f2, i32 1 538 %res = insertelement <4 x float> %res.tmp2, float %res.errf, i32 2 539 ret <4 x float> %res 540} 541 542define amdgpu_ps <4 x float> @sample_1d_tfe_adjust_writemask_134(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 543; VERDE-LABEL: sample_1d_tfe_adjust_writemask_134: 544; VERDE: ; %bb.0: ; %main_body 545; VERDE-NEXT: s_mov_b64 s[12:13], exec 546; VERDE-NEXT: s_wqm_b64 exec, exec 547; VERDE-NEXT: v_mov_b32_e32 v4, v0 548; VERDE-NEXT: v_mov_b32_e32 v0, 0 549; VERDE-NEXT: v_mov_b32_e32 v1, v0 550; VERDE-NEXT: v_mov_b32_e32 v2, v0 551; VERDE-NEXT: v_mov_b32_e32 v3, v0 552; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 553; VERDE-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd tfe 554; VERDE-NEXT: s_waitcnt vmcnt(0) 555; VERDE-NEXT: ; return to shader part epilog 556; 557; GFX6789-LABEL: sample_1d_tfe_adjust_writemask_134: 558; GFX6789: ; %bb.0: ; %main_body 559; GFX6789-NEXT: s_mov_b64 s[12:13], exec 560; GFX6789-NEXT: s_wqm_b64 exec, exec 561; GFX6789-NEXT: v_mov_b32_e32 v4, v0 562; GFX6789-NEXT: v_mov_b32_e32 v0, 0 563; GFX6789-NEXT: v_mov_b32_e32 v1, v0 564; GFX6789-NEXT: v_mov_b32_e32 v2, v0 565; GFX6789-NEXT: v_mov_b32_e32 v3, v0 566; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 567; GFX6789-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd tfe 568; GFX6789-NEXT: s_waitcnt vmcnt(0) 569; GFX6789-NEXT: ; return to shader part epilog 570; 571; GFX10PLUS-LABEL: sample_1d_tfe_adjust_writemask_134: 572; GFX10PLUS: ; %bb.0: ; %main_body 573; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 574; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 575; GFX10PLUS-NEXT: v_mov_b32_e32 v4, v0 576; GFX10PLUS-NEXT: v_mov_b32_e32 v0, 0 577; GFX10PLUS-NEXT: v_mov_b32_e32 v1, v0 578; GFX10PLUS-NEXT: v_mov_b32_e32 v2, v0 579; GFX10PLUS-NEXT: v_mov_b32_e32 v3, v0 580; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 581; GFX10PLUS-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd dim:SQ_RSRC_IMG_1D tfe 582; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 583; GFX10PLUS-NEXT: ; return to shader part epilog 584; 585; GFX12-LABEL: sample_1d_tfe_adjust_writemask_134: 586; GFX12: ; %bb.0: ; %main_body 587; GFX12-NEXT: s_mov_b32 s12, exec_lo 588; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 589; GFX12-NEXT: v_mov_b32_e32 v4, v0 590; GFX12-NEXT: v_mov_b32_e32 v0, 0 591; GFX12-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v2, v0 592; GFX12-NEXT: v_mov_b32_e32 v3, v0 593; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12 594; GFX12-NEXT: image_sample v[0:3], v4, s[0:7], s[8:11] dmask:0xd dim:SQ_RSRC_IMG_1D tfe 595; GFX12-NEXT: s_wait_samplecnt 0x0 596; GFX12-NEXT: ; return to shader part epilog 597main_body: 598 %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) 599 %res.vec = extractvalue {<4 x float>,i32} %v, 0 600 %res.f1 = extractelement <4 x float> %res.vec, i32 0 601 %res.f2 = extractelement <4 x float> %res.vec, i32 2 602 %res.f3 = extractelement <4 x float> %res.vec, i32 3 603 %res.err = extractvalue {<4 x float>,i32} %v, 1 604 %res.errf = bitcast i32 %res.err to float 605 %res.tmp1 = insertelement <4 x float> undef, float %res.f1, i32 0 606 %res.tmp2 = insertelement <4 x float> %res.tmp1, float %res.f2, i32 1 607 %res.tmp3 = insertelement <4 x float> %res.tmp2, float %res.f3, i32 2 608 %res = insertelement <4 x float> %res.tmp3, float %res.errf, i32 3 609 ret <4 x float> %res 610} 611 612define amdgpu_ps <4 x float> @sample_1d_lwe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, ptr addrspace(1) inreg %out, float %s) { 613; VERDE-LABEL: sample_1d_lwe: 614; VERDE: ; %bb.0: ; %main_body 615; VERDE-NEXT: s_mov_b64 s[14:15], exec 616; VERDE-NEXT: s_wqm_b64 exec, exec 617; VERDE-NEXT: v_mov_b32_e32 v5, v0 618; VERDE-NEXT: v_mov_b32_e32 v0, 0 619; VERDE-NEXT: v_mov_b32_e32 v1, v0 620; VERDE-NEXT: v_mov_b32_e32 v2, v0 621; VERDE-NEXT: v_mov_b32_e32 v3, v0 622; VERDE-NEXT: v_mov_b32_e32 v4, v0 623; VERDE-NEXT: s_and_b64 exec, exec, s[14:15] 624; VERDE-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf lwe 625; VERDE-NEXT: s_mov_b32 s15, 0xf000 626; VERDE-NEXT: s_mov_b32 s14, -1 627; VERDE-NEXT: s_waitcnt vmcnt(0) 628; VERDE-NEXT: buffer_store_dword v4, off, s[12:15], 0 629; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) 630; VERDE-NEXT: ; return to shader part epilog 631; 632; GFX6789-LABEL: sample_1d_lwe: 633; GFX6789: ; %bb.0: ; %main_body 634; GFX6789-NEXT: s_mov_b64 s[14:15], exec 635; GFX6789-NEXT: s_wqm_b64 exec, exec 636; GFX6789-NEXT: v_mov_b32_e32 v6, 0 637; GFX6789-NEXT: v_mov_b32_e32 v5, v0 638; GFX6789-NEXT: v_mov_b32_e32 v7, v6 639; GFX6789-NEXT: v_mov_b32_e32 v8, v6 640; GFX6789-NEXT: v_mov_b32_e32 v9, v6 641; GFX6789-NEXT: v_mov_b32_e32 v10, v6 642; GFX6789-NEXT: v_mov_b32_e32 v0, v6 643; GFX6789-NEXT: v_mov_b32_e32 v1, v7 644; GFX6789-NEXT: v_mov_b32_e32 v2, v8 645; GFX6789-NEXT: v_mov_b32_e32 v3, v9 646; GFX6789-NEXT: v_mov_b32_e32 v4, v10 647; GFX6789-NEXT: s_and_b64 exec, exec, s[14:15] 648; GFX6789-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf lwe 649; GFX6789-NEXT: s_waitcnt vmcnt(0) 650; GFX6789-NEXT: global_store_dword v6, v4, s[12:13] 651; GFX6789-NEXT: s_waitcnt vmcnt(0) 652; GFX6789-NEXT: ; return to shader part epilog 653; 654; GFX10-LABEL: sample_1d_lwe: 655; GFX10: ; %bb.0: ; %main_body 656; GFX10-NEXT: s_mov_b32 s14, exec_lo 657; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 658; GFX10-NEXT: v_mov_b32_e32 v6, 0 659; GFX10-NEXT: v_mov_b32_e32 v5, v0 660; GFX10-NEXT: v_mov_b32_e32 v7, v6 661; GFX10-NEXT: v_mov_b32_e32 v8, v6 662; GFX10-NEXT: v_mov_b32_e32 v9, v6 663; GFX10-NEXT: v_mov_b32_e32 v10, v6 664; GFX10-NEXT: v_mov_b32_e32 v0, v6 665; GFX10-NEXT: v_mov_b32_e32 v1, v7 666; GFX10-NEXT: v_mov_b32_e32 v2, v8 667; GFX10-NEXT: v_mov_b32_e32 v3, v9 668; GFX10-NEXT: v_mov_b32_e32 v4, v10 669; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s14 670; GFX10-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D lwe 671; GFX10-NEXT: s_waitcnt vmcnt(0) 672; GFX10-NEXT: global_store_dword v6, v4, s[12:13] 673; GFX10-NEXT: ; return to shader part epilog 674; 675; GFX11-LABEL: sample_1d_lwe: 676; GFX11: ; %bb.0: ; %main_body 677; GFX11-NEXT: s_mov_b32 s14, exec_lo 678; GFX11-NEXT: s_wqm_b32 exec_lo, exec_lo 679; GFX11-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v6, 0 680; GFX11-NEXT: v_mov_b32_e32 v7, v6 681; GFX11-NEXT: v_mov_b32_e32 v8, v6 682; GFX11-NEXT: v_mov_b32_e32 v9, v6 683; GFX11-NEXT: v_mov_b32_e32 v10, v6 684; GFX11-NEXT: v_dual_mov_b32 v0, v6 :: v_dual_mov_b32 v1, v7 685; GFX11-NEXT: v_dual_mov_b32 v2, v8 :: v_dual_mov_b32 v3, v9 686; GFX11-NEXT: v_mov_b32_e32 v4, v10 687; GFX11-NEXT: s_and_b32 exec_lo, exec_lo, s14 688; GFX11-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D lwe 689; GFX11-NEXT: s_waitcnt vmcnt(0) 690; GFX11-NEXT: global_store_b32 v6, v4, s[12:13] 691; GFX11-NEXT: ; return to shader part epilog 692; 693; GFX12-LABEL: sample_1d_lwe: 694; GFX12: ; %bb.0: ; %main_body 695; GFX12-NEXT: s_mov_b32 s14, exec_lo 696; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 697; GFX12-NEXT: v_dual_mov_b32 v5, v0 :: v_dual_mov_b32 v6, 0 698; GFX12-NEXT: v_dual_mov_b32 v7, v6 :: v_dual_mov_b32 v8, v6 699; GFX12-NEXT: v_dual_mov_b32 v9, v6 :: v_dual_mov_b32 v10, v6 700; GFX12-NEXT: v_dual_mov_b32 v0, v6 :: v_dual_mov_b32 v1, v7 701; GFX12-NEXT: v_dual_mov_b32 v2, v8 :: v_dual_mov_b32 v3, v9 702; GFX12-NEXT: v_mov_b32_e32 v4, v10 703; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s14 704; GFX12-NEXT: image_sample v[0:4], v5, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D lwe 705; GFX12-NEXT: s_wait_samplecnt 0x0 706; GFX12-NEXT: global_store_b32 v6, v4, s[12:13] 707; GFX12-NEXT: ; return to shader part epilog 708main_body: 709 %v = call {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 2, i32 0) 710 %v.vec = extractvalue {<4 x float>, i32} %v, 0 711 %v.err = extractvalue {<4 x float>, i32} %v, 1 712 store i32 %v.err, ptr addrspace(1) %out, align 4 713 ret <4 x float> %v.vec 714} 715 716define amdgpu_ps <4 x float> @sample_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { 717; VERDE-LABEL: sample_2d: 718; VERDE: ; %bb.0: ; %main_body 719; VERDE-NEXT: s_mov_b64 s[12:13], exec 720; VERDE-NEXT: s_wqm_b64 exec, exec 721; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 722; VERDE-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf 723; VERDE-NEXT: s_waitcnt vmcnt(0) 724; VERDE-NEXT: ; return to shader part epilog 725; 726; GFX6789-LABEL: sample_2d: 727; GFX6789: ; %bb.0: ; %main_body 728; GFX6789-NEXT: s_mov_b64 s[12:13], exec 729; GFX6789-NEXT: s_wqm_b64 exec, exec 730; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 731; GFX6789-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf 732; GFX6789-NEXT: s_waitcnt vmcnt(0) 733; GFX6789-NEXT: ; return to shader part epilog 734; 735; GFX10PLUS-LABEL: sample_2d: 736; GFX10PLUS: ; %bb.0: ; %main_body 737; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 738; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 739; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 740; GFX10PLUS-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 741; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 742; GFX10PLUS-NEXT: ; return to shader part epilog 743; 744; GFX12-LABEL: sample_2d: 745; GFX12: ; %bb.0: ; %main_body 746; GFX12-NEXT: s_mov_b32 s12, exec_lo 747; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 748; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12 749; GFX12-NEXT: image_sample v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 750; GFX12-NEXT: s_wait_samplecnt 0x0 751; GFX12-NEXT: ; return to shader part epilog 752main_body: 753 %v = call <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 754 ret <4 x float> %v 755} 756 757define amdgpu_ps <4 x float> @sample_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %r) { 758; VERDE-LABEL: sample_3d: 759; VERDE: ; %bb.0: ; %main_body 760; VERDE-NEXT: s_mov_b64 s[12:13], exec 761; VERDE-NEXT: s_wqm_b64 exec, exec 762; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 763; VERDE-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 764; VERDE-NEXT: s_waitcnt vmcnt(0) 765; VERDE-NEXT: ; return to shader part epilog 766; 767; GFX6789-LABEL: sample_3d: 768; GFX6789: ; %bb.0: ; %main_body 769; GFX6789-NEXT: s_mov_b64 s[12:13], exec 770; GFX6789-NEXT: s_wqm_b64 exec, exec 771; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 772; GFX6789-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 773; GFX6789-NEXT: s_waitcnt vmcnt(0) 774; GFX6789-NEXT: ; return to shader part epilog 775; 776; GFX10PLUS-LABEL: sample_3d: 777; GFX10PLUS: ; %bb.0: ; %main_body 778; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 779; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 780; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 781; GFX10PLUS-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D 782; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 783; GFX10PLUS-NEXT: ; return to shader part epilog 784; 785; GFX12-LABEL: sample_3d: 786; GFX12: ; %bb.0: ; %main_body 787; GFX12-NEXT: s_mov_b32 s12, exec_lo 788; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 789; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12 790; GFX12-NEXT: image_sample v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D 791; GFX12-NEXT: s_wait_samplecnt 0x0 792; GFX12-NEXT: ; return to shader part epilog 793main_body: 794 %v = call <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32 15, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 795 ret <4 x float> %v 796} 797 798define amdgpu_ps <4 x float> @sample_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %face) { 799; VERDE-LABEL: sample_cube: 800; VERDE: ; %bb.0: ; %main_body 801; VERDE-NEXT: s_mov_b64 s[12:13], exec 802; VERDE-NEXT: s_wqm_b64 exec, exec 803; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 804; VERDE-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da 805; VERDE-NEXT: s_waitcnt vmcnt(0) 806; VERDE-NEXT: ; return to shader part epilog 807; 808; GFX6789-LABEL: sample_cube: 809; GFX6789: ; %bb.0: ; %main_body 810; GFX6789-NEXT: s_mov_b64 s[12:13], exec 811; GFX6789-NEXT: s_wqm_b64 exec, exec 812; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 813; GFX6789-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da 814; GFX6789-NEXT: s_waitcnt vmcnt(0) 815; GFX6789-NEXT: ; return to shader part epilog 816; 817; GFX10PLUS-LABEL: sample_cube: 818; GFX10PLUS: ; %bb.0: ; %main_body 819; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 820; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 821; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 822; GFX10PLUS-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE 823; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 824; GFX10PLUS-NEXT: ; return to shader part epilog 825; 826; GFX12-LABEL: sample_cube: 827; GFX12: ; %bb.0: ; %main_body 828; GFX12-NEXT: s_mov_b32 s12, exec_lo 829; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 830; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12 831; GFX12-NEXT: image_sample v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_CUBE 832; GFX12-NEXT: s_wait_samplecnt 0x0 833; GFX12-NEXT: ; return to shader part epilog 834main_body: 835 %v = call <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32 15, float %s, float %t, float %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 836 ret <4 x float> %v 837} 838 839define amdgpu_ps <4 x float> @sample_1darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %slice) { 840; VERDE-LABEL: sample_1darray: 841; VERDE: ; %bb.0: ; %main_body 842; VERDE-NEXT: s_mov_b64 s[12:13], exec 843; VERDE-NEXT: s_wqm_b64 exec, exec 844; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 845; VERDE-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf da 846; VERDE-NEXT: s_waitcnt vmcnt(0) 847; VERDE-NEXT: ; return to shader part epilog 848; 849; GFX6789-LABEL: sample_1darray: 850; GFX6789: ; %bb.0: ; %main_body 851; GFX6789-NEXT: s_mov_b64 s[12:13], exec 852; GFX6789-NEXT: s_wqm_b64 exec, exec 853; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 854; GFX6789-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf da 855; GFX6789-NEXT: s_waitcnt vmcnt(0) 856; GFX6789-NEXT: ; return to shader part epilog 857; 858; GFX10PLUS-LABEL: sample_1darray: 859; GFX10PLUS: ; %bb.0: ; %main_body 860; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 861; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 862; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 863; GFX10PLUS-NEXT: image_sample v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY 864; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 865; GFX10PLUS-NEXT: ; return to shader part epilog 866; 867; GFX12-LABEL: sample_1darray: 868; GFX12: ; %bb.0: ; %main_body 869; GFX12-NEXT: s_mov_b32 s12, exec_lo 870; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 871; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12 872; GFX12-NEXT: image_sample v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D_ARRAY 873; GFX12-NEXT: s_wait_samplecnt 0x0 874; GFX12-NEXT: ; return to shader part epilog 875main_body: 876 %v = call <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32 15, float %s, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 877 ret <4 x float> %v 878} 879 880define amdgpu_ps <4 x float> @sample_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %slice) { 881; VERDE-LABEL: sample_2darray: 882; VERDE: ; %bb.0: ; %main_body 883; VERDE-NEXT: s_mov_b64 s[12:13], exec 884; VERDE-NEXT: s_wqm_b64 exec, exec 885; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 886; VERDE-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da 887; VERDE-NEXT: s_waitcnt vmcnt(0) 888; VERDE-NEXT: ; return to shader part epilog 889; 890; GFX6789-LABEL: sample_2darray: 891; GFX6789: ; %bb.0: ; %main_body 892; GFX6789-NEXT: s_mov_b64 s[12:13], exec 893; GFX6789-NEXT: s_wqm_b64 exec, exec 894; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 895; GFX6789-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf da 896; GFX6789-NEXT: s_waitcnt vmcnt(0) 897; GFX6789-NEXT: ; return to shader part epilog 898; 899; GFX10PLUS-LABEL: sample_2darray: 900; GFX10PLUS: ; %bb.0: ; %main_body 901; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 902; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 903; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 904; GFX10PLUS-NEXT: image_sample v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY 905; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 906; GFX10PLUS-NEXT: ; return to shader part epilog 907; 908; GFX12-LABEL: sample_2darray: 909; GFX12: ; %bb.0: ; %main_body 910; GFX12-NEXT: s_mov_b32 s12, exec_lo 911; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 912; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12 913; GFX12-NEXT: image_sample v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D_ARRAY 914; GFX12-NEXT: s_wait_samplecnt 0x0 915; GFX12-NEXT: ; return to shader part epilog 916main_body: 917 %v = call <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32 15, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 918 ret <4 x float> %v 919} 920 921define amdgpu_ps <4 x float> @sample_c_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) { 922; VERDE-LABEL: sample_c_1d: 923; VERDE: ; %bb.0: ; %main_body 924; VERDE-NEXT: s_mov_b64 s[12:13], exec 925; VERDE-NEXT: s_wqm_b64 exec, exec 926; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 927; VERDE-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf 928; VERDE-NEXT: s_waitcnt vmcnt(0) 929; VERDE-NEXT: ; return to shader part epilog 930; 931; GFX6789-LABEL: sample_c_1d: 932; GFX6789: ; %bb.0: ; %main_body 933; GFX6789-NEXT: s_mov_b64 s[12:13], exec 934; GFX6789-NEXT: s_wqm_b64 exec, exec 935; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 936; GFX6789-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf 937; GFX6789-NEXT: s_waitcnt vmcnt(0) 938; GFX6789-NEXT: ; return to shader part epilog 939; 940; GFX10PLUS-LABEL: sample_c_1d: 941; GFX10PLUS: ; %bb.0: ; %main_body 942; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 943; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 944; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 945; GFX10PLUS-NEXT: image_sample_c v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 946; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 947; GFX10PLUS-NEXT: ; return to shader part epilog 948; 949; GFX12-LABEL: sample_c_1d: 950; GFX12: ; %bb.0: ; %main_body 951; GFX12-NEXT: s_mov_b32 s12, exec_lo 952; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 953; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12 954; GFX12-NEXT: image_sample_c v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 955; GFX12-NEXT: s_wait_samplecnt 0x0 956; GFX12-NEXT: ; return to shader part epilog 957main_body: 958 %v = call <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32 15, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 959 ret <4 x float> %v 960} 961 962define amdgpu_ps <4 x float> @sample_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) { 963; VERDE-LABEL: sample_c_2d: 964; VERDE: ; %bb.0: ; %main_body 965; VERDE-NEXT: s_mov_b64 s[12:13], exec 966; VERDE-NEXT: s_wqm_b64 exec, exec 967; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 968; VERDE-NEXT: image_sample_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 969; VERDE-NEXT: s_waitcnt vmcnt(0) 970; VERDE-NEXT: ; return to shader part epilog 971; 972; GFX6789-LABEL: sample_c_2d: 973; GFX6789: ; %bb.0: ; %main_body 974; GFX6789-NEXT: s_mov_b64 s[12:13], exec 975; GFX6789-NEXT: s_wqm_b64 exec, exec 976; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 977; GFX6789-NEXT: image_sample_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 978; GFX6789-NEXT: s_waitcnt vmcnt(0) 979; GFX6789-NEXT: ; return to shader part epilog 980; 981; GFX10PLUS-LABEL: sample_c_2d: 982; GFX10PLUS: ; %bb.0: ; %main_body 983; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 984; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 985; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 986; GFX10PLUS-NEXT: image_sample_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 987; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 988; GFX10PLUS-NEXT: ; return to shader part epilog 989; 990; GFX12-LABEL: sample_c_2d: 991; GFX12: ; %bb.0: ; %main_body 992; GFX12-NEXT: s_mov_b32 s12, exec_lo 993; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 994; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12 995; GFX12-NEXT: image_sample_c v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 996; GFX12-NEXT: s_wait_samplecnt 0x0 997; GFX12-NEXT: ; return to shader part epilog 998main_body: 999 %v = call <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1000 ret <4 x float> %v 1001} 1002 1003define amdgpu_ps <4 x float> @sample_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %clamp) { 1004; VERDE-LABEL: sample_cl_1d: 1005; VERDE: ; %bb.0: ; %main_body 1006; VERDE-NEXT: s_mov_b64 s[12:13], exec 1007; VERDE-NEXT: s_wqm_b64 exec, exec 1008; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 1009; VERDE-NEXT: image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf 1010; VERDE-NEXT: s_waitcnt vmcnt(0) 1011; VERDE-NEXT: ; return to shader part epilog 1012; 1013; GFX6789-LABEL: sample_cl_1d: 1014; GFX6789: ; %bb.0: ; %main_body 1015; GFX6789-NEXT: s_mov_b64 s[12:13], exec 1016; GFX6789-NEXT: s_wqm_b64 exec, exec 1017; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 1018; GFX6789-NEXT: image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf 1019; GFX6789-NEXT: s_waitcnt vmcnt(0) 1020; GFX6789-NEXT: ; return to shader part epilog 1021; 1022; GFX10PLUS-LABEL: sample_cl_1d: 1023; GFX10PLUS: ; %bb.0: ; %main_body 1024; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 1025; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 1026; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 1027; GFX10PLUS-NEXT: image_sample_cl v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 1028; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1029; GFX10PLUS-NEXT: ; return to shader part epilog 1030; 1031; GFX12-LABEL: sample_cl_1d: 1032; GFX12: ; %bb.0: ; %main_body 1033; GFX12-NEXT: s_mov_b32 s12, exec_lo 1034; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 1035; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12 1036; GFX12-NEXT: image_sample_cl v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 1037; GFX12-NEXT: s_wait_samplecnt 0x0 1038; GFX12-NEXT: ; return to shader part epilog 1039main_body: 1040 %v = call <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32(i32 15, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1041 ret <4 x float> %v 1042} 1043 1044define amdgpu_ps <4 x float> @sample_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %clamp) { 1045; VERDE-LABEL: sample_cl_2d: 1046; VERDE: ; %bb.0: ; %main_body 1047; VERDE-NEXT: s_mov_b64 s[12:13], exec 1048; VERDE-NEXT: s_wqm_b64 exec, exec 1049; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 1050; VERDE-NEXT: image_sample_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 1051; VERDE-NEXT: s_waitcnt vmcnt(0) 1052; VERDE-NEXT: ; return to shader part epilog 1053; 1054; GFX6789-LABEL: sample_cl_2d: 1055; GFX6789: ; %bb.0: ; %main_body 1056; GFX6789-NEXT: s_mov_b64 s[12:13], exec 1057; GFX6789-NEXT: s_wqm_b64 exec, exec 1058; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 1059; GFX6789-NEXT: image_sample_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 1060; GFX6789-NEXT: s_waitcnt vmcnt(0) 1061; GFX6789-NEXT: ; return to shader part epilog 1062; 1063; GFX10PLUS-LABEL: sample_cl_2d: 1064; GFX10PLUS: ; %bb.0: ; %main_body 1065; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 1066; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 1067; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 1068; GFX10PLUS-NEXT: image_sample_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 1069; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1070; GFX10PLUS-NEXT: ; return to shader part epilog 1071; 1072; GFX12-LABEL: sample_cl_2d: 1073; GFX12: ; %bb.0: ; %main_body 1074; GFX12-NEXT: s_mov_b32 s12, exec_lo 1075; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 1076; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12 1077; GFX12-NEXT: image_sample_cl v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 1078; GFX12-NEXT: s_wait_samplecnt 0x0 1079; GFX12-NEXT: ; return to shader part epilog 1080main_body: 1081 %v = call <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32(i32 15, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1082 ret <4 x float> %v 1083} 1084 1085define amdgpu_ps <4 x float> @sample_c_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %clamp) { 1086; VERDE-LABEL: sample_c_cl_1d: 1087; VERDE: ; %bb.0: ; %main_body 1088; VERDE-NEXT: s_mov_b64 s[12:13], exec 1089; VERDE-NEXT: s_wqm_b64 exec, exec 1090; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 1091; VERDE-NEXT: image_sample_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 1092; VERDE-NEXT: s_waitcnt vmcnt(0) 1093; VERDE-NEXT: ; return to shader part epilog 1094; 1095; GFX6789-LABEL: sample_c_cl_1d: 1096; GFX6789: ; %bb.0: ; %main_body 1097; GFX6789-NEXT: s_mov_b64 s[12:13], exec 1098; GFX6789-NEXT: s_wqm_b64 exec, exec 1099; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 1100; GFX6789-NEXT: image_sample_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 1101; GFX6789-NEXT: s_waitcnt vmcnt(0) 1102; GFX6789-NEXT: ; return to shader part epilog 1103; 1104; GFX10PLUS-LABEL: sample_c_cl_1d: 1105; GFX10PLUS: ; %bb.0: ; %main_body 1106; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 1107; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 1108; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 1109; GFX10PLUS-NEXT: image_sample_c_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 1110; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1111; GFX10PLUS-NEXT: ; return to shader part epilog 1112; 1113; GFX12-LABEL: sample_c_cl_1d: 1114; GFX12: ; %bb.0: ; %main_body 1115; GFX12-NEXT: s_mov_b32 s12, exec_lo 1116; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 1117; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12 1118; GFX12-NEXT: image_sample_c_cl v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 1119; GFX12-NEXT: s_wait_samplecnt 0x0 1120; GFX12-NEXT: ; return to shader part epilog 1121main_body: 1122 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32 15, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1123 ret <4 x float> %v 1124} 1125 1126define amdgpu_ps <4 x float> @sample_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %clamp) { 1127; VERDE-LABEL: sample_c_cl_2d: 1128; VERDE: ; %bb.0: ; %main_body 1129; VERDE-NEXT: s_mov_b64 s[12:13], exec 1130; VERDE-NEXT: s_wqm_b64 exec, exec 1131; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 1132; VERDE-NEXT: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf 1133; VERDE-NEXT: s_waitcnt vmcnt(0) 1134; VERDE-NEXT: ; return to shader part epilog 1135; 1136; GFX6789-LABEL: sample_c_cl_2d: 1137; GFX6789: ; %bb.0: ; %main_body 1138; GFX6789-NEXT: s_mov_b64 s[12:13], exec 1139; GFX6789-NEXT: s_wqm_b64 exec, exec 1140; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 1141; GFX6789-NEXT: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf 1142; GFX6789-NEXT: s_waitcnt vmcnt(0) 1143; GFX6789-NEXT: ; return to shader part epilog 1144; 1145; GFX10PLUS-LABEL: sample_c_cl_2d: 1146; GFX10PLUS: ; %bb.0: ; %main_body 1147; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 1148; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 1149; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 1150; GFX10PLUS-NEXT: image_sample_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 1151; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1152; GFX10PLUS-NEXT: ; return to shader part epilog 1153; 1154; GFX12-LABEL: sample_c_cl_2d: 1155; GFX12: ; %bb.0: ; %main_body 1156; GFX12-NEXT: s_mov_b32 s12, exec_lo 1157; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 1158; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12 1159; GFX12-NEXT: image_sample_c_cl v[0:3], [v0, v1, v2, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 1160; GFX12-NEXT: s_wait_samplecnt 0x0 1161; GFX12-NEXT: ; return to shader part epilog 1162main_body: 1163 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1164 ret <4 x float> %v 1165} 1166 1167define amdgpu_ps <4 x float> @sample_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s) { 1168; VERDE-LABEL: sample_b_1d: 1169; VERDE: ; %bb.0: ; %main_body 1170; VERDE-NEXT: s_mov_b64 s[12:13], exec 1171; VERDE-NEXT: s_wqm_b64 exec, exec 1172; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 1173; VERDE-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf 1174; VERDE-NEXT: s_waitcnt vmcnt(0) 1175; VERDE-NEXT: ; return to shader part epilog 1176; 1177; GFX6789-LABEL: sample_b_1d: 1178; GFX6789: ; %bb.0: ; %main_body 1179; GFX6789-NEXT: s_mov_b64 s[12:13], exec 1180; GFX6789-NEXT: s_wqm_b64 exec, exec 1181; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 1182; GFX6789-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf 1183; GFX6789-NEXT: s_waitcnt vmcnt(0) 1184; GFX6789-NEXT: ; return to shader part epilog 1185; 1186; GFX10PLUS-LABEL: sample_b_1d: 1187; GFX10PLUS: ; %bb.0: ; %main_body 1188; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 1189; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 1190; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 1191; GFX10PLUS-NEXT: image_sample_b v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 1192; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1193; GFX10PLUS-NEXT: ; return to shader part epilog 1194; 1195; GFX12-LABEL: sample_b_1d: 1196; GFX12: ; %bb.0: ; %main_body 1197; GFX12-NEXT: s_mov_b32 s12, exec_lo 1198; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 1199; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12 1200; GFX12-NEXT: image_sample_b v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 1201; GFX12-NEXT: s_wait_samplecnt 0x0 1202; GFX12-NEXT: ; return to shader part epilog 1203main_body: 1204 %v = call <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32 15, float %bias, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1205 ret <4 x float> %v 1206} 1207 1208define amdgpu_ps <4 x float> @sample_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) { 1209; VERDE-LABEL: sample_b_2d: 1210; VERDE: ; %bb.0: ; %main_body 1211; VERDE-NEXT: s_mov_b64 s[12:13], exec 1212; VERDE-NEXT: s_wqm_b64 exec, exec 1213; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 1214; VERDE-NEXT: image_sample_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 1215; VERDE-NEXT: s_waitcnt vmcnt(0) 1216; VERDE-NEXT: ; return to shader part epilog 1217; 1218; GFX6789-LABEL: sample_b_2d: 1219; GFX6789: ; %bb.0: ; %main_body 1220; GFX6789-NEXT: s_mov_b64 s[12:13], exec 1221; GFX6789-NEXT: s_wqm_b64 exec, exec 1222; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 1223; GFX6789-NEXT: image_sample_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 1224; GFX6789-NEXT: s_waitcnt vmcnt(0) 1225; GFX6789-NEXT: ; return to shader part epilog 1226; 1227; GFX10PLUS-LABEL: sample_b_2d: 1228; GFX10PLUS: ; %bb.0: ; %main_body 1229; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 1230; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 1231; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 1232; GFX10PLUS-NEXT: image_sample_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 1233; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1234; GFX10PLUS-NEXT: ; return to shader part epilog 1235; 1236; GFX12-LABEL: sample_b_2d: 1237; GFX12: ; %bb.0: ; %main_body 1238; GFX12-NEXT: s_mov_b32 s12, exec_lo 1239; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 1240; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12 1241; GFX12-NEXT: image_sample_b v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 1242; GFX12-NEXT: s_wait_samplecnt 0x0 1243; GFX12-NEXT: ; return to shader part epilog 1244main_body: 1245 %v = call <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32 15, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1246 ret <4 x float> %v 1247} 1248 1249define amdgpu_ps <4 x float> @sample_c_b_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s) { 1250; VERDE-LABEL: sample_c_b_1d: 1251; VERDE: ; %bb.0: ; %main_body 1252; VERDE-NEXT: s_mov_b64 s[12:13], exec 1253; VERDE-NEXT: s_wqm_b64 exec, exec 1254; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 1255; VERDE-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 1256; VERDE-NEXT: s_waitcnt vmcnt(0) 1257; VERDE-NEXT: ; return to shader part epilog 1258; 1259; GFX6789-LABEL: sample_c_b_1d: 1260; GFX6789: ; %bb.0: ; %main_body 1261; GFX6789-NEXT: s_mov_b64 s[12:13], exec 1262; GFX6789-NEXT: s_wqm_b64 exec, exec 1263; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 1264; GFX6789-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 1265; GFX6789-NEXT: s_waitcnt vmcnt(0) 1266; GFX6789-NEXT: ; return to shader part epilog 1267; 1268; GFX10PLUS-LABEL: sample_c_b_1d: 1269; GFX10PLUS: ; %bb.0: ; %main_body 1270; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 1271; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 1272; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 1273; GFX10PLUS-NEXT: image_sample_c_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 1274; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1275; GFX10PLUS-NEXT: ; return to shader part epilog 1276; 1277; GFX12-LABEL: sample_c_b_1d: 1278; GFX12: ; %bb.0: ; %main_body 1279; GFX12-NEXT: s_mov_b32 s12, exec_lo 1280; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 1281; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12 1282; GFX12-NEXT: image_sample_c_b v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 1283; GFX12-NEXT: s_wait_samplecnt 0x0 1284; GFX12-NEXT: ; return to shader part epilog 1285main_body: 1286 %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1287 ret <4 x float> %v 1288} 1289 1290define amdgpu_ps <4 x float> @sample_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t) { 1291; VERDE-LABEL: sample_c_b_2d: 1292; VERDE: ; %bb.0: ; %main_body 1293; VERDE-NEXT: s_mov_b64 s[12:13], exec 1294; VERDE-NEXT: s_wqm_b64 exec, exec 1295; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 1296; VERDE-NEXT: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf 1297; VERDE-NEXT: s_waitcnt vmcnt(0) 1298; VERDE-NEXT: ; return to shader part epilog 1299; 1300; GFX6789-LABEL: sample_c_b_2d: 1301; GFX6789: ; %bb.0: ; %main_body 1302; GFX6789-NEXT: s_mov_b64 s[12:13], exec 1303; GFX6789-NEXT: s_wqm_b64 exec, exec 1304; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 1305; GFX6789-NEXT: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf 1306; GFX6789-NEXT: s_waitcnt vmcnt(0) 1307; GFX6789-NEXT: ; return to shader part epilog 1308; 1309; GFX10PLUS-LABEL: sample_c_b_2d: 1310; GFX10PLUS: ; %bb.0: ; %main_body 1311; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 1312; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 1313; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 1314; GFX10PLUS-NEXT: image_sample_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 1315; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1316; GFX10PLUS-NEXT: ; return to shader part epilog 1317; 1318; GFX12-LABEL: sample_c_b_2d: 1319; GFX12: ; %bb.0: ; %main_body 1320; GFX12-NEXT: s_mov_b32 s12, exec_lo 1321; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 1322; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12 1323; GFX12-NEXT: image_sample_c_b v[0:3], [v0, v1, v2, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 1324; GFX12-NEXT: s_wait_samplecnt 0x0 1325; GFX12-NEXT: ; return to shader part epilog 1326main_body: 1327 %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1328 ret <4 x float> %v 1329} 1330 1331define amdgpu_ps <4 x float> @sample_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %clamp) { 1332; VERDE-LABEL: sample_b_cl_1d: 1333; VERDE: ; %bb.0: ; %main_body 1334; VERDE-NEXT: s_mov_b64 s[12:13], exec 1335; VERDE-NEXT: s_wqm_b64 exec, exec 1336; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 1337; VERDE-NEXT: image_sample_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 1338; VERDE-NEXT: s_waitcnt vmcnt(0) 1339; VERDE-NEXT: ; return to shader part epilog 1340; 1341; GFX6789-LABEL: sample_b_cl_1d: 1342; GFX6789: ; %bb.0: ; %main_body 1343; GFX6789-NEXT: s_mov_b64 s[12:13], exec 1344; GFX6789-NEXT: s_wqm_b64 exec, exec 1345; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 1346; GFX6789-NEXT: image_sample_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 1347; GFX6789-NEXT: s_waitcnt vmcnt(0) 1348; GFX6789-NEXT: ; return to shader part epilog 1349; 1350; GFX10PLUS-LABEL: sample_b_cl_1d: 1351; GFX10PLUS: ; %bb.0: ; %main_body 1352; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 1353; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 1354; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 1355; GFX10PLUS-NEXT: image_sample_b_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 1356; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1357; GFX10PLUS-NEXT: ; return to shader part epilog 1358; 1359; GFX12-LABEL: sample_b_cl_1d: 1360; GFX12: ; %bb.0: ; %main_body 1361; GFX12-NEXT: s_mov_b32 s12, exec_lo 1362; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 1363; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12 1364; GFX12-NEXT: image_sample_b_cl v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 1365; GFX12-NEXT: s_wait_samplecnt 0x0 1366; GFX12-NEXT: ; return to shader part epilog 1367main_body: 1368 %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32 15, float %bias, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1369 ret <4 x float> %v 1370} 1371 1372define amdgpu_ps <4 x float> @sample_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t, float %clamp) { 1373; VERDE-LABEL: sample_b_cl_2d: 1374; VERDE: ; %bb.0: ; %main_body 1375; VERDE-NEXT: s_mov_b64 s[12:13], exec 1376; VERDE-NEXT: s_wqm_b64 exec, exec 1377; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 1378; VERDE-NEXT: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf 1379; VERDE-NEXT: s_waitcnt vmcnt(0) 1380; VERDE-NEXT: ; return to shader part epilog 1381; 1382; GFX6789-LABEL: sample_b_cl_2d: 1383; GFX6789: ; %bb.0: ; %main_body 1384; GFX6789-NEXT: s_mov_b64 s[12:13], exec 1385; GFX6789-NEXT: s_wqm_b64 exec, exec 1386; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 1387; GFX6789-NEXT: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf 1388; GFX6789-NEXT: s_waitcnt vmcnt(0) 1389; GFX6789-NEXT: ; return to shader part epilog 1390; 1391; GFX10PLUS-LABEL: sample_b_cl_2d: 1392; GFX10PLUS: ; %bb.0: ; %main_body 1393; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 1394; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 1395; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 1396; GFX10PLUS-NEXT: image_sample_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 1397; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1398; GFX10PLUS-NEXT: ; return to shader part epilog 1399; 1400; GFX12-LABEL: sample_b_cl_2d: 1401; GFX12: ; %bb.0: ; %main_body 1402; GFX12-NEXT: s_mov_b32 s12, exec_lo 1403; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 1404; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12 1405; GFX12-NEXT: image_sample_b_cl v[0:3], [v0, v1, v2, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 1406; GFX12-NEXT: s_wait_samplecnt 0x0 1407; GFX12-NEXT: ; return to shader part epilog 1408main_body: 1409 %v = call <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32 15, float %bias, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1410 ret <4 x float> %v 1411} 1412 1413define amdgpu_ps <4 x float> @sample_c_b_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %clamp) { 1414; VERDE-LABEL: sample_c_b_cl_1d: 1415; VERDE: ; %bb.0: ; %main_body 1416; VERDE-NEXT: s_mov_b64 s[12:13], exec 1417; VERDE-NEXT: s_wqm_b64 exec, exec 1418; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 1419; VERDE-NEXT: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf 1420; VERDE-NEXT: s_waitcnt vmcnt(0) 1421; VERDE-NEXT: ; return to shader part epilog 1422; 1423; GFX6789-LABEL: sample_c_b_cl_1d: 1424; GFX6789: ; %bb.0: ; %main_body 1425; GFX6789-NEXT: s_mov_b64 s[12:13], exec 1426; GFX6789-NEXT: s_wqm_b64 exec, exec 1427; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 1428; GFX6789-NEXT: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf 1429; GFX6789-NEXT: s_waitcnt vmcnt(0) 1430; GFX6789-NEXT: ; return to shader part epilog 1431; 1432; GFX10PLUS-LABEL: sample_c_b_cl_1d: 1433; GFX10PLUS: ; %bb.0: ; %main_body 1434; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 1435; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 1436; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 1437; GFX10PLUS-NEXT: image_sample_c_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 1438; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1439; GFX10PLUS-NEXT: ; return to shader part epilog 1440; 1441; GFX12-LABEL: sample_c_b_cl_1d: 1442; GFX12: ; %bb.0: ; %main_body 1443; GFX12-NEXT: s_mov_b32 s12, exec_lo 1444; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 1445; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12 1446; GFX12-NEXT: image_sample_c_b_cl v[0:3], [v0, v1, v2, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 1447; GFX12-NEXT: s_wait_samplecnt 0x0 1448; GFX12-NEXT: ; return to shader part epilog 1449main_body: 1450 %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1451 ret <4 x float> %v 1452} 1453 1454define amdgpu_ps <4 x float> @sample_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t, float %clamp) { 1455; VERDE-LABEL: sample_c_b_cl_2d: 1456; VERDE: ; %bb.0: ; %main_body 1457; VERDE-NEXT: s_mov_b64 s[12:13], exec 1458; VERDE-NEXT: s_wqm_b64 exec, exec 1459; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 1460; VERDE-NEXT: image_sample_c_b_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf 1461; VERDE-NEXT: s_waitcnt vmcnt(0) 1462; VERDE-NEXT: ; return to shader part epilog 1463; 1464; GFX6789-LABEL: sample_c_b_cl_2d: 1465; GFX6789: ; %bb.0: ; %main_body 1466; GFX6789-NEXT: s_mov_b64 s[12:13], exec 1467; GFX6789-NEXT: s_wqm_b64 exec, exec 1468; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 1469; GFX6789-NEXT: image_sample_c_b_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf 1470; GFX6789-NEXT: s_waitcnt vmcnt(0) 1471; GFX6789-NEXT: ; return to shader part epilog 1472; 1473; GFX10PLUS-LABEL: sample_c_b_cl_2d: 1474; GFX10PLUS: ; %bb.0: ; %main_body 1475; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 1476; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 1477; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 1478; GFX10PLUS-NEXT: image_sample_c_b_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 1479; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1480; GFX10PLUS-NEXT: ; return to shader part epilog 1481; 1482; GFX12-LABEL: sample_c_b_cl_2d: 1483; GFX12: ; %bb.0: ; %main_body 1484; GFX12-NEXT: s_mov_b32 s12, exec_lo 1485; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 1486; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12 1487; GFX12-NEXT: image_sample_c_b_cl v[0:3], [v0, v1, v2, v[3:4]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 1488; GFX12-NEXT: s_wait_samplecnt 0x0 1489; GFX12-NEXT: ; return to shader part epilog 1490main_body: 1491 %v = call <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32 15, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1492 ret <4 x float> %v 1493} 1494 1495define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s) { 1496; VERDE-LABEL: sample_d_1d: 1497; VERDE: ; %bb.0: ; %main_body 1498; VERDE-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 1499; VERDE-NEXT: s_waitcnt vmcnt(0) 1500; VERDE-NEXT: ; return to shader part epilog 1501; 1502; GFX6789-LABEL: sample_d_1d: 1503; GFX6789: ; %bb.0: ; %main_body 1504; GFX6789-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 1505; GFX6789-NEXT: s_waitcnt vmcnt(0) 1506; GFX6789-NEXT: ; return to shader part epilog 1507; 1508; GFX10PLUS-LABEL: sample_d_1d: 1509; GFX10PLUS: ; %bb.0: ; %main_body 1510; GFX10PLUS-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 1511; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1512; GFX10PLUS-NEXT: ; return to shader part epilog 1513; 1514; GFX12-LABEL: sample_d_1d: 1515; GFX12: ; %bb.0: ; %main_body 1516; GFX12-NEXT: image_sample_d v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 1517; GFX12-NEXT: s_wait_samplecnt 0x0 1518; GFX12-NEXT: ; return to shader part epilog 1519main_body: 1520 %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1521 ret <4 x float> %v 1522} 1523 1524define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) { 1525; VERDE-LABEL: sample_d_2d: 1526; VERDE: ; %bb.0: ; %main_body 1527; VERDE-NEXT: image_sample_d v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf 1528; VERDE-NEXT: s_waitcnt vmcnt(0) 1529; VERDE-NEXT: ; return to shader part epilog 1530; 1531; GFX6789-LABEL: sample_d_2d: 1532; GFX6789: ; %bb.0: ; %main_body 1533; GFX6789-NEXT: image_sample_d v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf 1534; GFX6789-NEXT: s_waitcnt vmcnt(0) 1535; GFX6789-NEXT: ; return to shader part epilog 1536; 1537; GFX10PLUS-LABEL: sample_d_2d: 1538; GFX10PLUS: ; %bb.0: ; %main_body 1539; GFX10PLUS-NEXT: image_sample_d v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 1540; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1541; GFX10PLUS-NEXT: ; return to shader part epilog 1542; 1543; GFX12-LABEL: sample_d_2d: 1544; GFX12: ; %bb.0: ; %main_body 1545; GFX12-NEXT: image_sample_d v[0:3], [v0, v1, v2, v[3:5]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 1546; GFX12-NEXT: s_wait_samplecnt 0x0 1547; GFX12-NEXT: ; return to shader part epilog 1548main_body: 1549 %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1550 ret <4 x float> %v 1551} 1552 1553define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s) { 1554; VERDE-LABEL: sample_c_d_1d: 1555; VERDE: ; %bb.0: ; %main_body 1556; VERDE-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf 1557; VERDE-NEXT: s_waitcnt vmcnt(0) 1558; VERDE-NEXT: ; return to shader part epilog 1559; 1560; GFX6789-LABEL: sample_c_d_1d: 1561; GFX6789: ; %bb.0: ; %main_body 1562; GFX6789-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf 1563; GFX6789-NEXT: s_waitcnt vmcnt(0) 1564; GFX6789-NEXT: ; return to shader part epilog 1565; 1566; GFX10PLUS-LABEL: sample_c_d_1d: 1567; GFX10PLUS: ; %bb.0: ; %main_body 1568; GFX10PLUS-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 1569; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1570; GFX10PLUS-NEXT: ; return to shader part epilog 1571; 1572; GFX12-LABEL: sample_c_d_1d: 1573; GFX12: ; %bb.0: ; %main_body 1574; GFX12-NEXT: image_sample_c_d v[0:3], [v0, v1, v2, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 1575; GFX12-NEXT: s_wait_samplecnt 0x0 1576; GFX12-NEXT: ; return to shader part epilog 1577main_body: 1578 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1579 ret <4 x float> %v 1580} 1581 1582define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t) { 1583; VERDE-LABEL: sample_c_d_2d: 1584; VERDE: ; %bb.0: ; %main_body 1585; VERDE-NEXT: image_sample_c_d v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf 1586; VERDE-NEXT: s_waitcnt vmcnt(0) 1587; VERDE-NEXT: ; return to shader part epilog 1588; 1589; GFX6789-LABEL: sample_c_d_2d: 1590; GFX6789: ; %bb.0: ; %main_body 1591; GFX6789-NEXT: image_sample_c_d v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf 1592; GFX6789-NEXT: s_waitcnt vmcnt(0) 1593; GFX6789-NEXT: ; return to shader part epilog 1594; 1595; GFX10PLUS-LABEL: sample_c_d_2d: 1596; GFX10PLUS: ; %bb.0: ; %main_body 1597; GFX10PLUS-NEXT: image_sample_c_d v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 1598; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1599; GFX10PLUS-NEXT: ; return to shader part epilog 1600; 1601; GFX12-LABEL: sample_c_d_2d: 1602; GFX12: ; %bb.0: ; %main_body 1603; GFX12-NEXT: image_sample_c_d v[0:3], [v0, v1, v2, v[3:6]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 1604; GFX12-NEXT: s_wait_samplecnt 0x0 1605; GFX12-NEXT: ; return to shader part epilog 1606main_body: 1607 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1608 ret <4 x float> %v 1609} 1610 1611define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, float %s, float %clamp) { 1612; VERDE-LABEL: sample_d_cl_1d: 1613; VERDE: ; %bb.0: ; %main_body 1614; VERDE-NEXT: image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf 1615; VERDE-NEXT: s_waitcnt vmcnt(0) 1616; VERDE-NEXT: ; return to shader part epilog 1617; 1618; GFX6789-LABEL: sample_d_cl_1d: 1619; GFX6789: ; %bb.0: ; %main_body 1620; GFX6789-NEXT: image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf 1621; GFX6789-NEXT: s_waitcnt vmcnt(0) 1622; GFX6789-NEXT: ; return to shader part epilog 1623; 1624; GFX10PLUS-LABEL: sample_d_cl_1d: 1625; GFX10PLUS: ; %bb.0: ; %main_body 1626; GFX10PLUS-NEXT: image_sample_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 1627; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1628; GFX10PLUS-NEXT: ; return to shader part epilog 1629; 1630; GFX12-LABEL: sample_d_cl_1d: 1631; GFX12: ; %bb.0: ; %main_body 1632; GFX12-NEXT: image_sample_d_cl v[0:3], [v0, v1, v2, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 1633; GFX12-NEXT: s_wait_samplecnt 0x0 1634; GFX12-NEXT: ; return to shader part epilog 1635main_body: 1636 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32 15, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1637 ret <4 x float> %v 1638} 1639 1640define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) { 1641; VERDE-LABEL: sample_d_cl_2d: 1642; VERDE: ; %bb.0: ; %main_body 1643; VERDE-NEXT: image_sample_d_cl v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf 1644; VERDE-NEXT: s_waitcnt vmcnt(0) 1645; VERDE-NEXT: ; return to shader part epilog 1646; 1647; GFX6789-LABEL: sample_d_cl_2d: 1648; GFX6789: ; %bb.0: ; %main_body 1649; GFX6789-NEXT: image_sample_d_cl v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf 1650; GFX6789-NEXT: s_waitcnt vmcnt(0) 1651; GFX6789-NEXT: ; return to shader part epilog 1652; 1653; GFX10PLUS-LABEL: sample_d_cl_2d: 1654; GFX10PLUS: ; %bb.0: ; %main_body 1655; GFX10PLUS-NEXT: image_sample_d_cl v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 1656; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1657; GFX10PLUS-NEXT: ; return to shader part epilog 1658; 1659; GFX12-LABEL: sample_d_cl_2d: 1660; GFX12: ; %bb.0: ; %main_body 1661; GFX12-NEXT: image_sample_d_cl v[0:3], [v0, v1, v2, v[3:6]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 1662; GFX12-NEXT: s_wait_samplecnt 0x0 1663; GFX12-NEXT: ; return to shader part epilog 1664main_body: 1665 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1666 ret <4 x float> %v 1667} 1668 1669define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp) { 1670; VERDE-LABEL: sample_c_d_cl_1d: 1671; VERDE: ; %bb.0: ; %main_body 1672; VERDE-NEXT: image_sample_c_d_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf 1673; VERDE-NEXT: s_waitcnt vmcnt(0) 1674; VERDE-NEXT: ; return to shader part epilog 1675; 1676; GFX6789-LABEL: sample_c_d_cl_1d: 1677; GFX6789: ; %bb.0: ; %main_body 1678; GFX6789-NEXT: image_sample_c_d_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf 1679; GFX6789-NEXT: s_waitcnt vmcnt(0) 1680; GFX6789-NEXT: ; return to shader part epilog 1681; 1682; GFX10PLUS-LABEL: sample_c_d_cl_1d: 1683; GFX10PLUS: ; %bb.0: ; %main_body 1684; GFX10PLUS-NEXT: image_sample_c_d_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 1685; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1686; GFX10PLUS-NEXT: ; return to shader part epilog 1687; 1688; GFX12-LABEL: sample_c_d_cl_1d: 1689; GFX12: ; %bb.0: ; %main_body 1690; GFX12-NEXT: image_sample_c_d_cl v[0:3], [v0, v1, v2, v[3:4]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 1691; GFX12-NEXT: s_wait_samplecnt 0x0 1692; GFX12-NEXT: ; return to shader part epilog 1693main_body: 1694 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1695 ret <4 x float> %v 1696} 1697 1698define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp) { 1699; VERDE-LABEL: sample_c_d_cl_2d: 1700; VERDE: ; %bb.0: ; %main_body 1701; VERDE-NEXT: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf 1702; VERDE-NEXT: s_waitcnt vmcnt(0) 1703; VERDE-NEXT: ; return to shader part epilog 1704; 1705; GFX6789-LABEL: sample_c_d_cl_2d: 1706; GFX6789: ; %bb.0: ; %main_body 1707; GFX6789-NEXT: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf 1708; GFX6789-NEXT: s_waitcnt vmcnt(0) 1709; GFX6789-NEXT: ; return to shader part epilog 1710; 1711; GFX10PLUS-LABEL: sample_c_d_cl_2d: 1712; GFX10PLUS: ; %bb.0: ; %main_body 1713; GFX10PLUS-NEXT: image_sample_c_d_cl v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 1714; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1715; GFX10PLUS-NEXT: ; return to shader part epilog 1716; 1717; GFX12-LABEL: sample_c_d_cl_2d: 1718; GFX12: ; %bb.0: ; %main_body 1719; GFX12-NEXT: image_sample_c_d_cl v[0:3], [v0, v1, v2, v[3:7]], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 1720; GFX12-NEXT: s_wait_samplecnt 0x0 1721; GFX12-NEXT: ; return to shader part epilog 1722main_body: 1723 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1724 ret <4 x float> %v 1725} 1726 1727define amdgpu_ps <4 x float> @sample_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %lod) { 1728; VERDE-LABEL: sample_l_1d: 1729; VERDE: ; %bb.0: ; %main_body 1730; VERDE-NEXT: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf 1731; VERDE-NEXT: s_waitcnt vmcnt(0) 1732; VERDE-NEXT: ; return to shader part epilog 1733; 1734; GFX6789-LABEL: sample_l_1d: 1735; GFX6789: ; %bb.0: ; %main_body 1736; GFX6789-NEXT: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf 1737; GFX6789-NEXT: s_waitcnt vmcnt(0) 1738; GFX6789-NEXT: ; return to shader part epilog 1739; 1740; GFX10PLUS-LABEL: sample_l_1d: 1741; GFX10PLUS: ; %bb.0: ; %main_body 1742; GFX10PLUS-NEXT: image_sample_l v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 1743; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1744; GFX10PLUS-NEXT: ; return to shader part epilog 1745; 1746; GFX12-LABEL: sample_l_1d: 1747; GFX12: ; %bb.0: ; %main_body 1748; GFX12-NEXT: image_sample_l v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 1749; GFX12-NEXT: s_wait_samplecnt 0x0 1750; GFX12-NEXT: ; return to shader part epilog 1751main_body: 1752 %v = call <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32 15, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1753 ret <4 x float> %v 1754} 1755 1756define amdgpu_ps <4 x float> @sample_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) { 1757; VERDE-LABEL: sample_l_2d: 1758; VERDE: ; %bb.0: ; %main_body 1759; VERDE-NEXT: image_sample_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 1760; VERDE-NEXT: s_waitcnt vmcnt(0) 1761; VERDE-NEXT: ; return to shader part epilog 1762; 1763; GFX6789-LABEL: sample_l_2d: 1764; GFX6789: ; %bb.0: ; %main_body 1765; GFX6789-NEXT: image_sample_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 1766; GFX6789-NEXT: s_waitcnt vmcnt(0) 1767; GFX6789-NEXT: ; return to shader part epilog 1768; 1769; GFX10PLUS-LABEL: sample_l_2d: 1770; GFX10PLUS: ; %bb.0: ; %main_body 1771; GFX10PLUS-NEXT: image_sample_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 1772; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1773; GFX10PLUS-NEXT: ; return to shader part epilog 1774; 1775; GFX12-LABEL: sample_l_2d: 1776; GFX12: ; %bb.0: ; %main_body 1777; GFX12-NEXT: image_sample_l v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 1778; GFX12-NEXT: s_wait_samplecnt 0x0 1779; GFX12-NEXT: ; return to shader part epilog 1780main_body: 1781 %v = call <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32 15, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1782 ret <4 x float> %v 1783} 1784 1785define amdgpu_ps <4 x float> @sample_c_l_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %lod) { 1786; VERDE-LABEL: sample_c_l_1d: 1787; VERDE: ; %bb.0: ; %main_body 1788; VERDE-NEXT: image_sample_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 1789; VERDE-NEXT: s_waitcnt vmcnt(0) 1790; VERDE-NEXT: ; return to shader part epilog 1791; 1792; GFX6789-LABEL: sample_c_l_1d: 1793; GFX6789: ; %bb.0: ; %main_body 1794; GFX6789-NEXT: image_sample_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 1795; GFX6789-NEXT: s_waitcnt vmcnt(0) 1796; GFX6789-NEXT: ; return to shader part epilog 1797; 1798; GFX10PLUS-LABEL: sample_c_l_1d: 1799; GFX10PLUS: ; %bb.0: ; %main_body 1800; GFX10PLUS-NEXT: image_sample_c_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 1801; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1802; GFX10PLUS-NEXT: ; return to shader part epilog 1803; 1804; GFX12-LABEL: sample_c_l_1d: 1805; GFX12: ; %bb.0: ; %main_body 1806; GFX12-NEXT: image_sample_c_l v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 1807; GFX12-NEXT: s_wait_samplecnt 0x0 1808; GFX12-NEXT: ; return to shader part epilog 1809main_body: 1810 %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32 15, float %zcompare, float %s, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1811 ret <4 x float> %v 1812} 1813 1814define amdgpu_ps <4 x float> @sample_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) { 1815; VERDE-LABEL: sample_c_l_2d: 1816; VERDE: ; %bb.0: ; %main_body 1817; VERDE-NEXT: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf 1818; VERDE-NEXT: s_waitcnt vmcnt(0) 1819; VERDE-NEXT: ; return to shader part epilog 1820; 1821; GFX6789-LABEL: sample_c_l_2d: 1822; GFX6789: ; %bb.0: ; %main_body 1823; GFX6789-NEXT: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf 1824; GFX6789-NEXT: s_waitcnt vmcnt(0) 1825; GFX6789-NEXT: ; return to shader part epilog 1826; 1827; GFX10PLUS-LABEL: sample_c_l_2d: 1828; GFX10PLUS: ; %bb.0: ; %main_body 1829; GFX10PLUS-NEXT: image_sample_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 1830; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1831; GFX10PLUS-NEXT: ; return to shader part epilog 1832; 1833; GFX12-LABEL: sample_c_l_2d: 1834; GFX12: ; %bb.0: ; %main_body 1835; GFX12-NEXT: image_sample_c_l v[0:3], [v0, v1, v2, v3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 1836; GFX12-NEXT: s_wait_samplecnt 0x0 1837; GFX12-NEXT: ; return to shader part epilog 1838main_body: 1839 %v = call <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1840 ret <4 x float> %v 1841} 1842 1843define amdgpu_ps <4 x float> @sample_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 1844; VERDE-LABEL: sample_lz_1d: 1845; VERDE: ; %bb.0: ; %main_body 1846; VERDE-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf 1847; VERDE-NEXT: s_waitcnt vmcnt(0) 1848; VERDE-NEXT: ; return to shader part epilog 1849; 1850; GFX6789-LABEL: sample_lz_1d: 1851; GFX6789: ; %bb.0: ; %main_body 1852; GFX6789-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf 1853; GFX6789-NEXT: s_waitcnt vmcnt(0) 1854; GFX6789-NEXT: ; return to shader part epilog 1855; 1856; GFX10PLUS-LABEL: sample_lz_1d: 1857; GFX10PLUS: ; %bb.0: ; %main_body 1858; GFX10PLUS-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 1859; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1860; GFX10PLUS-NEXT: ; return to shader part epilog 1861; 1862; GFX12-LABEL: sample_lz_1d: 1863; GFX12: ; %bb.0: ; %main_body 1864; GFX12-NEXT: image_sample_lz v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 1865; GFX12-NEXT: s_wait_samplecnt 0x0 1866; GFX12-NEXT: ; return to shader part epilog 1867main_body: 1868 %v = call <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1869 ret <4 x float> %v 1870} 1871 1872define amdgpu_ps <4 x float> @sample_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { 1873; VERDE-LABEL: sample_lz_2d: 1874; VERDE: ; %bb.0: ; %main_body 1875; VERDE-NEXT: image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf 1876; VERDE-NEXT: s_waitcnt vmcnt(0) 1877; VERDE-NEXT: ; return to shader part epilog 1878; 1879; GFX6789-LABEL: sample_lz_2d: 1880; GFX6789: ; %bb.0: ; %main_body 1881; GFX6789-NEXT: image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf 1882; GFX6789-NEXT: s_waitcnt vmcnt(0) 1883; GFX6789-NEXT: ; return to shader part epilog 1884; 1885; GFX10PLUS-LABEL: sample_lz_2d: 1886; GFX10PLUS: ; %bb.0: ; %main_body 1887; GFX10PLUS-NEXT: image_sample_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 1888; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1889; GFX10PLUS-NEXT: ; return to shader part epilog 1890; 1891; GFX12-LABEL: sample_lz_2d: 1892; GFX12: ; %bb.0: ; %main_body 1893; GFX12-NEXT: image_sample_lz v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 1894; GFX12-NEXT: s_wait_samplecnt 0x0 1895; GFX12-NEXT: ; return to shader part epilog 1896main_body: 1897 %v = call <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32 15, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1898 ret <4 x float> %v 1899} 1900 1901define amdgpu_ps <4 x float> @sample_c_lz_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s) { 1902; VERDE-LABEL: sample_c_lz_1d: 1903; VERDE: ; %bb.0: ; %main_body 1904; VERDE-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf 1905; VERDE-NEXT: s_waitcnt vmcnt(0) 1906; VERDE-NEXT: ; return to shader part epilog 1907; 1908; GFX6789-LABEL: sample_c_lz_1d: 1909; GFX6789: ; %bb.0: ; %main_body 1910; GFX6789-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf 1911; GFX6789-NEXT: s_waitcnt vmcnt(0) 1912; GFX6789-NEXT: ; return to shader part epilog 1913; 1914; GFX10PLUS-LABEL: sample_c_lz_1d: 1915; GFX10PLUS: ; %bb.0: ; %main_body 1916; GFX10PLUS-NEXT: image_sample_c_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 1917; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1918; GFX10PLUS-NEXT: ; return to shader part epilog 1919; 1920; GFX12-LABEL: sample_c_lz_1d: 1921; GFX12: ; %bb.0: ; %main_body 1922; GFX12-NEXT: image_sample_c_lz v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 1923; GFX12-NEXT: s_wait_samplecnt 0x0 1924; GFX12-NEXT: ; return to shader part epilog 1925main_body: 1926 %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32 15, float %zcompare, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1927 ret <4 x float> %v 1928} 1929 1930define amdgpu_ps <4 x float> @sample_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) { 1931; VERDE-LABEL: sample_c_lz_2d: 1932; VERDE: ; %bb.0: ; %main_body 1933; VERDE-NEXT: image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 1934; VERDE-NEXT: s_waitcnt vmcnt(0) 1935; VERDE-NEXT: ; return to shader part epilog 1936; 1937; GFX6789-LABEL: sample_c_lz_2d: 1938; GFX6789: ; %bb.0: ; %main_body 1939; GFX6789-NEXT: image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf 1940; GFX6789-NEXT: s_waitcnt vmcnt(0) 1941; GFX6789-NEXT: ; return to shader part epilog 1942; 1943; GFX10PLUS-LABEL: sample_c_lz_2d: 1944; GFX10PLUS: ; %bb.0: ; %main_body 1945; GFX10PLUS-NEXT: image_sample_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 1946; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1947; GFX10PLUS-NEXT: ; return to shader part epilog 1948; 1949; GFX12-LABEL: sample_c_lz_2d: 1950; GFX12: ; %bb.0: ; %main_body 1951; GFX12-NEXT: image_sample_c_lz v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 1952; GFX12-NEXT: s_wait_samplecnt 0x0 1953; GFX12-NEXT: ; return to shader part epilog 1954main_body: 1955 %v = call <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32 15, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1956 ret <4 x float> %v 1957} 1958 1959define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) { 1960; VERDE-LABEL: sample_c_d_o_2darray_V1: 1961; VERDE: ; %bb.0: ; %main_body 1962; VERDE-NEXT: image_sample_c_d_o v0, v[0:8], s[0:7], s[8:11] dmask:0x4 da 1963; VERDE-NEXT: s_waitcnt vmcnt(0) 1964; VERDE-NEXT: ; return to shader part epilog 1965; 1966; GFX6789-LABEL: sample_c_d_o_2darray_V1: 1967; GFX6789: ; %bb.0: ; %main_body 1968; GFX6789-NEXT: image_sample_c_d_o v0, v[0:8], s[0:7], s[8:11] dmask:0x4 da 1969; GFX6789-NEXT: s_waitcnt vmcnt(0) 1970; GFX6789-NEXT: ; return to shader part epilog 1971; 1972; GFX10PLUS-LABEL: sample_c_d_o_2darray_V1: 1973; GFX10PLUS: ; %bb.0: ; %main_body 1974; GFX10PLUS-NEXT: image_sample_c_d_o v0, v[0:8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY 1975; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 1976; GFX10PLUS-NEXT: ; return to shader part epilog 1977; 1978; GFX12-LABEL: sample_c_d_o_2darray_V1: 1979; GFX12: ; %bb.0: ; %main_body 1980; GFX12-NEXT: image_sample_c_d_o v0, [v0, v1, v2, v[3:8]], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY 1981; GFX12-NEXT: s_wait_samplecnt 0x0 1982; GFX12-NEXT: ; return to shader part epilog 1983main_body: 1984 %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 1985 ret float %v 1986} 1987 1988define amdgpu_ps float @sample_c_d_o_2darray_V1_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, ptr addrspace(1) inreg %out) { 1989; VERDE-LABEL: sample_c_d_o_2darray_V1_tfe: 1990; VERDE: ; %bb.0: ; %main_body 1991; VERDE-NEXT: v_mov_b32_e32 v9, 0 1992; VERDE-NEXT: v_mov_b32_e32 v10, v9 1993; VERDE-NEXT: image_sample_c_d_o v[9:10], v[0:8], s[0:7], s[8:11] dmask:0x4 tfe da 1994; VERDE-NEXT: s_mov_b32 s15, 0xf000 1995; VERDE-NEXT: s_mov_b32 s14, -1 1996; VERDE-NEXT: s_waitcnt vmcnt(0) 1997; VERDE-NEXT: v_mov_b32_e32 v0, v9 1998; VERDE-NEXT: buffer_store_dword v10, off, s[12:15], 0 1999; VERDE-NEXT: s_waitcnt vmcnt(0) expcnt(0) 2000; VERDE-NEXT: ; return to shader part epilog 2001; 2002; GFX6789-LABEL: sample_c_d_o_2darray_V1_tfe: 2003; GFX6789: ; %bb.0: ; %main_body 2004; GFX6789-NEXT: v_mov_b32_e32 v11, 0 2005; GFX6789-NEXT: v_mov_b32_e32 v12, v11 2006; GFX6789-NEXT: v_mov_b32_e32 v9, v11 2007; GFX6789-NEXT: v_mov_b32_e32 v10, v12 2008; GFX6789-NEXT: image_sample_c_d_o v[9:10], v[0:8], s[0:7], s[8:11] dmask:0x4 tfe da 2009; GFX6789-NEXT: s_waitcnt vmcnt(0) 2010; GFX6789-NEXT: v_mov_b32_e32 v0, v9 2011; GFX6789-NEXT: global_store_dword v11, v10, s[12:13] 2012; GFX6789-NEXT: s_waitcnt vmcnt(0) 2013; GFX6789-NEXT: ; return to shader part epilog 2014; 2015; GFX10-LABEL: sample_c_d_o_2darray_V1_tfe: 2016; GFX10: ; %bb.0: ; %main_body 2017; GFX10-NEXT: v_mov_b32_e32 v11, 0 2018; GFX10-NEXT: v_mov_b32_e32 v12, v11 2019; GFX10-NEXT: v_mov_b32_e32 v9, v11 2020; GFX10-NEXT: v_mov_b32_e32 v10, v12 2021; GFX10-NEXT: image_sample_c_d_o v[9:10], v[0:8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY tfe 2022; GFX10-NEXT: s_waitcnt vmcnt(0) 2023; GFX10-NEXT: v_mov_b32_e32 v0, v9 2024; GFX10-NEXT: global_store_dword v11, v10, s[12:13] 2025; GFX10-NEXT: ; return to shader part epilog 2026; 2027; GFX11-LABEL: sample_c_d_o_2darray_V1_tfe: 2028; GFX11: ; %bb.0: ; %main_body 2029; GFX11-NEXT: v_mov_b32_e32 v11, 0 2030; GFX11-NEXT: v_dual_mov_b32 v9, v1 :: v_dual_mov_b32 v10, v0 2031; GFX11-NEXT: v_mov_b32_e32 v12, v11 2032; GFX11-NEXT: v_dual_mov_b32 v0, v11 :: v_dual_mov_b32 v1, v12 2033; GFX11-NEXT: image_sample_c_d_o v[0:1], [v10, v9, v2, v3, v[4:8]], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY tfe 2034; GFX11-NEXT: s_waitcnt vmcnt(0) 2035; GFX11-NEXT: global_store_b32 v11, v1, s[12:13] 2036; GFX11-NEXT: ; return to shader part epilog 2037; 2038; GFX12-LABEL: sample_c_d_o_2darray_V1_tfe: 2039; GFX12: ; %bb.0: ; %main_body 2040; GFX12-NEXT: v_mov_b32_e32 v11, 0 2041; GFX12-NEXT: v_dual_mov_b32 v9, v1 :: v_dual_mov_b32 v10, v0 2042; GFX12-NEXT: v_mov_b32_e32 v12, v11 2043; GFX12-NEXT: v_dual_mov_b32 v0, v11 :: v_dual_mov_b32 v1, v12 2044; GFX12-NEXT: image_sample_c_d_o v[0:1], [v10, v9, v2, v[3:8]], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY tfe 2045; GFX12-NEXT: s_wait_samplecnt 0x0 2046; GFX12-NEXT: global_store_b32 v11, v1, s[12:13] 2047; GFX12-NEXT: ; return to shader part epilog 2048main_body: 2049 %v = call {float,i32} @llvm.amdgcn.image.sample.c.d.o.2darray.f32i32.f32.f32(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) 2050 %v.vec = extractvalue {float, i32} %v, 0 2051 %v.err = extractvalue {float, i32} %v, 1 2052 store i32 %v.err, ptr addrspace(1) %out, align 4 2053 ret float %v.vec 2054} 2055 2056define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) { 2057; VERDE-LABEL: sample_c_d_o_2darray_V2: 2058; VERDE: ; %bb.0: ; %main_body 2059; VERDE-NEXT: image_sample_c_d_o v[0:1], v[0:8], s[0:7], s[8:11] dmask:0x6 da 2060; VERDE-NEXT: s_waitcnt vmcnt(0) 2061; VERDE-NEXT: ; return to shader part epilog 2062; 2063; GFX6789-LABEL: sample_c_d_o_2darray_V2: 2064; GFX6789: ; %bb.0: ; %main_body 2065; GFX6789-NEXT: image_sample_c_d_o v[0:1], v[0:8], s[0:7], s[8:11] dmask:0x6 da 2066; GFX6789-NEXT: s_waitcnt vmcnt(0) 2067; GFX6789-NEXT: ; return to shader part epilog 2068; 2069; GFX10PLUS-LABEL: sample_c_d_o_2darray_V2: 2070; GFX10PLUS: ; %bb.0: ; %main_body 2071; GFX10PLUS-NEXT: image_sample_c_d_o v[0:1], v[0:8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY 2072; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 2073; GFX10PLUS-NEXT: ; return to shader part epilog 2074; 2075; GFX12-LABEL: sample_c_d_o_2darray_V2: 2076; GFX12: ; %bb.0: ; %main_body 2077; GFX12-NEXT: image_sample_c_d_o v[0:1], [v0, v1, v2, v[3:8]], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY 2078; GFX12-NEXT: s_wait_samplecnt 0x0 2079; GFX12-NEXT: ; return to shader part epilog 2080main_body: 2081 %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 2082 ret <2 x float> %v 2083} 2084 2085define amdgpu_ps <4 x float> @sample_c_d_o_2darray_V2_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice) { 2086; VERDE-LABEL: sample_c_d_o_2darray_V2_tfe: 2087; VERDE: ; %bb.0: ; %main_body 2088; VERDE-NEXT: v_mov_b32_e32 v9, 0 2089; VERDE-NEXT: v_mov_b32_e32 v10, v9 2090; VERDE-NEXT: v_mov_b32_e32 v11, v9 2091; VERDE-NEXT: image_sample_c_d_o v[9:11], v[0:8], s[0:7], s[8:11] dmask:0x6 tfe da 2092; VERDE-NEXT: s_waitcnt vmcnt(0) 2093; VERDE-NEXT: v_mov_b32_e32 v0, v9 2094; VERDE-NEXT: v_mov_b32_e32 v1, v10 2095; VERDE-NEXT: v_mov_b32_e32 v2, v11 2096; VERDE-NEXT: ; return to shader part epilog 2097; 2098; GFX6789-LABEL: sample_c_d_o_2darray_V2_tfe: 2099; GFX6789: ; %bb.0: ; %main_body 2100; GFX6789-NEXT: v_mov_b32_e32 v9, 0 2101; GFX6789-NEXT: v_mov_b32_e32 v10, v9 2102; GFX6789-NEXT: v_mov_b32_e32 v11, v9 2103; GFX6789-NEXT: image_sample_c_d_o v[9:11], v[0:8], s[0:7], s[8:11] dmask:0x6 tfe da 2104; GFX6789-NEXT: s_waitcnt vmcnt(0) 2105; GFX6789-NEXT: v_mov_b32_e32 v0, v9 2106; GFX6789-NEXT: v_mov_b32_e32 v1, v10 2107; GFX6789-NEXT: v_mov_b32_e32 v2, v11 2108; GFX6789-NEXT: ; return to shader part epilog 2109; 2110; GFX10-LABEL: sample_c_d_o_2darray_V2_tfe: 2111; GFX10: ; %bb.0: ; %main_body 2112; GFX10-NEXT: v_mov_b32_e32 v9, 0 2113; GFX10-NEXT: v_mov_b32_e32 v10, v9 2114; GFX10-NEXT: v_mov_b32_e32 v11, v9 2115; GFX10-NEXT: image_sample_c_d_o v[9:11], v[0:8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY tfe 2116; GFX10-NEXT: s_waitcnt vmcnt(0) 2117; GFX10-NEXT: v_mov_b32_e32 v0, v9 2118; GFX10-NEXT: v_mov_b32_e32 v1, v10 2119; GFX10-NEXT: v_mov_b32_e32 v2, v11 2120; GFX10-NEXT: ; return to shader part epilog 2121; 2122; GFX11-LABEL: sample_c_d_o_2darray_V2_tfe: 2123; GFX11: ; %bb.0: ; %main_body 2124; GFX11-NEXT: v_dual_mov_b32 v11, v0 :: v_dual_mov_b32 v0, 0 2125; GFX11-NEXT: v_dual_mov_b32 v9, v2 :: v_dual_mov_b32 v10, v1 2126; GFX11-NEXT: v_mov_b32_e32 v1, v0 2127; GFX11-NEXT: v_mov_b32_e32 v2, v0 2128; GFX11-NEXT: image_sample_c_d_o v[0:2], [v11, v10, v9, v3, v[4:8]], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY tfe 2129; GFX11-NEXT: s_waitcnt vmcnt(0) 2130; GFX11-NEXT: ; return to shader part epilog 2131; 2132; GFX12-LABEL: sample_c_d_o_2darray_V2_tfe: 2133; GFX12: ; %bb.0: ; %main_body 2134; GFX12-NEXT: v_dual_mov_b32 v11, v0 :: v_dual_mov_b32 v0, 0 2135; GFX12-NEXT: v_dual_mov_b32 v9, v2 :: v_dual_mov_b32 v10, v1 2136; GFX12-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v2, v0 2137; GFX12-NEXT: image_sample_c_d_o v[0:2], [v11, v10, v9, v[3:8]], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY tfe 2138; GFX12-NEXT: s_wait_samplecnt 0x0 2139; GFX12-NEXT: ; return to shader part epilog 2140main_body: 2141 %v = call {<2 x float>, i32} @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32i32.f32.f32(i32 6, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 1, i32 0) 2142 %v.vec = extractvalue {<2 x float>, i32} %v, 0 2143 %v.f1 = extractelement <2 x float> %v.vec, i32 0 2144 %v.f2 = extractelement <2 x float> %v.vec, i32 1 2145 %v.err = extractvalue {<2 x float>, i32} %v, 1 2146 %v.errf = bitcast i32 %v.err to float 2147 %res.0 = insertelement <4 x float> undef, float %v.f1, i32 0 2148 %res.1 = insertelement <4 x float> %res.0, float %v.f2, i32 1 2149 %res.2 = insertelement <4 x float> %res.1, float %v.errf, i32 2 2150 ret <4 x float> %res.2 2151} 2152 2153define amdgpu_ps <4 x float> @sample_1d_unorm(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 2154; VERDE-LABEL: sample_1d_unorm: 2155; VERDE: ; %bb.0: ; %main_body 2156; VERDE-NEXT: s_mov_b64 s[12:13], exec 2157; VERDE-NEXT: s_wqm_b64 exec, exec 2158; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 2159; VERDE-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf unorm 2160; VERDE-NEXT: s_waitcnt vmcnt(0) 2161; VERDE-NEXT: ; return to shader part epilog 2162; 2163; GFX6789-LABEL: sample_1d_unorm: 2164; GFX6789: ; %bb.0: ; %main_body 2165; GFX6789-NEXT: s_mov_b64 s[12:13], exec 2166; GFX6789-NEXT: s_wqm_b64 exec, exec 2167; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 2168; GFX6789-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf unorm 2169; GFX6789-NEXT: s_waitcnt vmcnt(0) 2170; GFX6789-NEXT: ; return to shader part epilog 2171; 2172; GFX10PLUS-LABEL: sample_1d_unorm: 2173; GFX10PLUS: ; %bb.0: ; %main_body 2174; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 2175; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 2176; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 2177; GFX10PLUS-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D unorm 2178; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 2179; GFX10PLUS-NEXT: ; return to shader part epilog 2180; 2181; GFX12-LABEL: sample_1d_unorm: 2182; GFX12: ; %bb.0: ; %main_body 2183; GFX12-NEXT: s_mov_b32 s12, exec_lo 2184; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 2185; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12 2186; GFX12-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D unorm 2187; GFX12-NEXT: s_wait_samplecnt 0x0 2188; GFX12-NEXT: ; return to shader part epilog 2189main_body: 2190 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 1, i32 0, i32 0) 2191 ret <4 x float> %v 2192} 2193 2194define amdgpu_ps <4 x float> @sample_1d_glc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 2195; VERDE-LABEL: sample_1d_glc: 2196; VERDE: ; %bb.0: ; %main_body 2197; VERDE-NEXT: s_mov_b64 s[12:13], exec 2198; VERDE-NEXT: s_wqm_b64 exec, exec 2199; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 2200; VERDE-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc 2201; VERDE-NEXT: s_waitcnt vmcnt(0) 2202; VERDE-NEXT: ; return to shader part epilog 2203; 2204; GFX6789-LABEL: sample_1d_glc: 2205; GFX6789: ; %bb.0: ; %main_body 2206; GFX6789-NEXT: s_mov_b64 s[12:13], exec 2207; GFX6789-NEXT: s_wqm_b64 exec, exec 2208; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 2209; GFX6789-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc 2210; GFX6789-NEXT: s_waitcnt vmcnt(0) 2211; GFX6789-NEXT: ; return to shader part epilog 2212; 2213; GFX10PLUS-LABEL: sample_1d_glc: 2214; GFX10PLUS: ; %bb.0: ; %main_body 2215; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 2216; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 2217; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 2218; GFX10PLUS-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D glc 2219; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 2220; GFX10PLUS-NEXT: ; return to shader part epilog 2221; 2222; GFX12-LABEL: sample_1d_glc: 2223; GFX12: ; %bb.0: ; %main_body 2224; GFX12-NEXT: s_mov_b32 s12, exec_lo 2225; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 2226; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12 2227; GFX12-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D th:TH_LOAD_NT 2228; GFX12-NEXT: s_wait_samplecnt 0x0 2229; GFX12-NEXT: ; return to shader part epilog 2230main_body: 2231 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 1) 2232 ret <4 x float> %v 2233} 2234 2235define amdgpu_ps <4 x float> @sample_1d_slc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 2236; VERDE-LABEL: sample_1d_slc: 2237; VERDE: ; %bb.0: ; %main_body 2238; VERDE-NEXT: s_mov_b64 s[12:13], exec 2239; VERDE-NEXT: s_wqm_b64 exec, exec 2240; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 2241; VERDE-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf slc 2242; VERDE-NEXT: s_waitcnt vmcnt(0) 2243; VERDE-NEXT: ; return to shader part epilog 2244; 2245; GFX6789-LABEL: sample_1d_slc: 2246; GFX6789: ; %bb.0: ; %main_body 2247; GFX6789-NEXT: s_mov_b64 s[12:13], exec 2248; GFX6789-NEXT: s_wqm_b64 exec, exec 2249; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 2250; GFX6789-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf slc 2251; GFX6789-NEXT: s_waitcnt vmcnt(0) 2252; GFX6789-NEXT: ; return to shader part epilog 2253; 2254; GFX10PLUS-LABEL: sample_1d_slc: 2255; GFX10PLUS: ; %bb.0: ; %main_body 2256; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 2257; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 2258; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 2259; GFX10PLUS-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D slc 2260; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 2261; GFX10PLUS-NEXT: ; return to shader part epilog 2262; 2263; GFX12-LABEL: sample_1d_slc: 2264; GFX12: ; %bb.0: ; %main_body 2265; GFX12-NEXT: s_mov_b32 s12, exec_lo 2266; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 2267; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12 2268; GFX12-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D th:TH_LOAD_HT 2269; GFX12-NEXT: s_wait_samplecnt 0x0 2270; GFX12-NEXT: ; return to shader part epilog 2271main_body: 2272 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 2) 2273 ret <4 x float> %v 2274} 2275 2276define amdgpu_ps <4 x float> @sample_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 2277; VERDE-LABEL: sample_1d_glc_slc: 2278; VERDE: ; %bb.0: ; %main_body 2279; VERDE-NEXT: s_mov_b64 s[12:13], exec 2280; VERDE-NEXT: s_wqm_b64 exec, exec 2281; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 2282; VERDE-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc slc 2283; VERDE-NEXT: s_waitcnt vmcnt(0) 2284; VERDE-NEXT: ; return to shader part epilog 2285; 2286; GFX6789-LABEL: sample_1d_glc_slc: 2287; GFX6789: ; %bb.0: ; %main_body 2288; GFX6789-NEXT: s_mov_b64 s[12:13], exec 2289; GFX6789-NEXT: s_wqm_b64 exec, exec 2290; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 2291; GFX6789-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf glc slc 2292; GFX6789-NEXT: s_waitcnt vmcnt(0) 2293; GFX6789-NEXT: ; return to shader part epilog 2294; 2295; GFX10PLUS-LABEL: sample_1d_glc_slc: 2296; GFX10PLUS: ; %bb.0: ; %main_body 2297; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 2298; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 2299; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 2300; GFX10PLUS-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D glc slc 2301; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 2302; GFX10PLUS-NEXT: ; return to shader part epilog 2303; 2304; GFX12-LABEL: sample_1d_glc_slc: 2305; GFX12: ; %bb.0: ; %main_body 2306; GFX12-NEXT: s_mov_b32 s12, exec_lo 2307; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 2308; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12 2309; GFX12-NEXT: image_sample v[0:3], v0, s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D th:TH_LOAD_LU 2310; GFX12-NEXT: s_wait_samplecnt 0x0 2311; GFX12-NEXT: ; return to shader part epilog 2312main_body: 2313 %v = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 3) 2314 ret <4 x float> %v 2315} 2316 2317define amdgpu_ps float @adjust_writemask_sample_0(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 2318; VERDE-LABEL: adjust_writemask_sample_0: 2319; VERDE: ; %bb.0: ; %main_body 2320; VERDE-NEXT: s_mov_b64 s[12:13], exec 2321; VERDE-NEXT: s_wqm_b64 exec, exec 2322; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 2323; VERDE-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1 2324; VERDE-NEXT: s_waitcnt vmcnt(0) 2325; VERDE-NEXT: ; return to shader part epilog 2326; 2327; GFX6789-LABEL: adjust_writemask_sample_0: 2328; GFX6789: ; %bb.0: ; %main_body 2329; GFX6789-NEXT: s_mov_b64 s[12:13], exec 2330; GFX6789-NEXT: s_wqm_b64 exec, exec 2331; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 2332; GFX6789-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1 2333; GFX6789-NEXT: s_waitcnt vmcnt(0) 2334; GFX6789-NEXT: ; return to shader part epilog 2335; 2336; GFX10PLUS-LABEL: adjust_writemask_sample_0: 2337; GFX10PLUS: ; %bb.0: ; %main_body 2338; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 2339; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 2340; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 2341; GFX10PLUS-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D 2342; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 2343; GFX10PLUS-NEXT: ; return to shader part epilog 2344; 2345; GFX12-LABEL: adjust_writemask_sample_0: 2346; GFX12: ; %bb.0: ; %main_body 2347; GFX12-NEXT: s_mov_b32 s12, exec_lo 2348; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 2349; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12 2350; GFX12-NEXT: image_sample v0, v0, s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_1D 2351; GFX12-NEXT: s_wait_samplecnt 0x0 2352; GFX12-NEXT: ; return to shader part epilog 2353main_body: 2354 %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 2355 %elt0 = extractelement <4 x float> %r, i32 0 2356 ret float %elt0 2357} 2358 2359define amdgpu_ps <2 x float> @adjust_writemask_sample_01(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 2360; VERDE-LABEL: adjust_writemask_sample_01: 2361; VERDE: ; %bb.0: ; %main_body 2362; VERDE-NEXT: s_mov_b64 s[12:13], exec 2363; VERDE-NEXT: s_wqm_b64 exec, exec 2364; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 2365; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3 2366; VERDE-NEXT: s_waitcnt vmcnt(0) 2367; VERDE-NEXT: ; return to shader part epilog 2368; 2369; GFX6789-LABEL: adjust_writemask_sample_01: 2370; GFX6789: ; %bb.0: ; %main_body 2371; GFX6789-NEXT: s_mov_b64 s[12:13], exec 2372; GFX6789-NEXT: s_wqm_b64 exec, exec 2373; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 2374; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3 2375; GFX6789-NEXT: s_waitcnt vmcnt(0) 2376; GFX6789-NEXT: ; return to shader part epilog 2377; 2378; GFX10PLUS-LABEL: adjust_writemask_sample_01: 2379; GFX10PLUS: ; %bb.0: ; %main_body 2380; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 2381; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 2382; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 2383; GFX10PLUS-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D 2384; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 2385; GFX10PLUS-NEXT: ; return to shader part epilog 2386; 2387; GFX12-LABEL: adjust_writemask_sample_01: 2388; GFX12: ; %bb.0: ; %main_body 2389; GFX12-NEXT: s_mov_b32 s12, exec_lo 2390; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 2391; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12 2392; GFX12-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x3 dim:SQ_RSRC_IMG_1D 2393; GFX12-NEXT: s_wait_samplecnt 0x0 2394; GFX12-NEXT: ; return to shader part epilog 2395main_body: 2396 %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 2397 %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 1> 2398 ret <2 x float> %out 2399} 2400 2401define amdgpu_ps <3 x float> @adjust_writemask_sample_012(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 2402; VERDE-LABEL: adjust_writemask_sample_012: 2403; VERDE: ; %bb.0: ; %main_body 2404; VERDE-NEXT: s_mov_b64 s[12:13], exec 2405; VERDE-NEXT: s_wqm_b64 exec, exec 2406; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 2407; VERDE-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7 2408; VERDE-NEXT: s_waitcnt vmcnt(0) 2409; VERDE-NEXT: ; return to shader part epilog 2410; 2411; GFX6789-LABEL: adjust_writemask_sample_012: 2412; GFX6789: ; %bb.0: ; %main_body 2413; GFX6789-NEXT: s_mov_b64 s[12:13], exec 2414; GFX6789-NEXT: s_wqm_b64 exec, exec 2415; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 2416; GFX6789-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7 2417; GFX6789-NEXT: s_waitcnt vmcnt(0) 2418; GFX6789-NEXT: ; return to shader part epilog 2419; 2420; GFX10PLUS-LABEL: adjust_writemask_sample_012: 2421; GFX10PLUS: ; %bb.0: ; %main_body 2422; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 2423; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 2424; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 2425; GFX10PLUS-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7 dim:SQ_RSRC_IMG_1D 2426; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 2427; GFX10PLUS-NEXT: ; return to shader part epilog 2428; 2429; GFX12-LABEL: adjust_writemask_sample_012: 2430; GFX12: ; %bb.0: ; %main_body 2431; GFX12-NEXT: s_mov_b32 s12, exec_lo 2432; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 2433; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12 2434; GFX12-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0x7 dim:SQ_RSRC_IMG_1D 2435; GFX12-NEXT: s_wait_samplecnt 0x0 2436; GFX12-NEXT: ; return to shader part epilog 2437main_body: 2438 %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 2439 %out = shufflevector <4 x float> %r, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 2440 ret <3 x float> %out 2441} 2442 2443define amdgpu_ps <2 x float> @adjust_writemask_sample_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 2444; VERDE-LABEL: adjust_writemask_sample_12: 2445; VERDE: ; %bb.0: ; %main_body 2446; VERDE-NEXT: s_mov_b64 s[12:13], exec 2447; VERDE-NEXT: s_wqm_b64 exec, exec 2448; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 2449; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 2450; VERDE-NEXT: s_waitcnt vmcnt(0) 2451; VERDE-NEXT: ; return to shader part epilog 2452; 2453; GFX6789-LABEL: adjust_writemask_sample_12: 2454; GFX6789: ; %bb.0: ; %main_body 2455; GFX6789-NEXT: s_mov_b64 s[12:13], exec 2456; GFX6789-NEXT: s_wqm_b64 exec, exec 2457; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 2458; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 2459; GFX6789-NEXT: s_waitcnt vmcnt(0) 2460; GFX6789-NEXT: ; return to shader part epilog 2461; 2462; GFX10PLUS-LABEL: adjust_writemask_sample_12: 2463; GFX10PLUS: ; %bb.0: ; %main_body 2464; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 2465; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 2466; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 2467; GFX10PLUS-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_1D 2468; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 2469; GFX10PLUS-NEXT: ; return to shader part epilog 2470; 2471; GFX12-LABEL: adjust_writemask_sample_12: 2472; GFX12: ; %bb.0: ; %main_body 2473; GFX12-NEXT: s_mov_b32 s12, exec_lo 2474; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 2475; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12 2476; GFX12-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_1D 2477; GFX12-NEXT: s_wait_samplecnt 0x0 2478; GFX12-NEXT: ; return to shader part epilog 2479main_body: 2480 %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 2481 %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 2> 2482 ret <2 x float> %out 2483} 2484 2485define amdgpu_ps <2 x float> @adjust_writemask_sample_03(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 2486; VERDE-LABEL: adjust_writemask_sample_03: 2487; VERDE: ; %bb.0: ; %main_body 2488; VERDE-NEXT: s_mov_b64 s[12:13], exec 2489; VERDE-NEXT: s_wqm_b64 exec, exec 2490; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 2491; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9 2492; VERDE-NEXT: s_waitcnt vmcnt(0) 2493; VERDE-NEXT: ; return to shader part epilog 2494; 2495; GFX6789-LABEL: adjust_writemask_sample_03: 2496; GFX6789: ; %bb.0: ; %main_body 2497; GFX6789-NEXT: s_mov_b64 s[12:13], exec 2498; GFX6789-NEXT: s_wqm_b64 exec, exec 2499; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 2500; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9 2501; GFX6789-NEXT: s_waitcnt vmcnt(0) 2502; GFX6789-NEXT: ; return to shader part epilog 2503; 2504; GFX10PLUS-LABEL: adjust_writemask_sample_03: 2505; GFX10PLUS: ; %bb.0: ; %main_body 2506; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 2507; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 2508; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 2509; GFX10PLUS-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9 dim:SQ_RSRC_IMG_1D 2510; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 2511; GFX10PLUS-NEXT: ; return to shader part epilog 2512; 2513; GFX12-LABEL: adjust_writemask_sample_03: 2514; GFX12: ; %bb.0: ; %main_body 2515; GFX12-NEXT: s_mov_b32 s12, exec_lo 2516; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 2517; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12 2518; GFX12-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x9 dim:SQ_RSRC_IMG_1D 2519; GFX12-NEXT: s_wait_samplecnt 0x0 2520; GFX12-NEXT: ; return to shader part epilog 2521main_body: 2522 %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 2523 %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 3> 2524 ret <2 x float> %out 2525} 2526 2527define amdgpu_ps <2 x float> @adjust_writemask_sample_13(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 2528; VERDE-LABEL: adjust_writemask_sample_13: 2529; VERDE: ; %bb.0: ; %main_body 2530; VERDE-NEXT: s_mov_b64 s[12:13], exec 2531; VERDE-NEXT: s_wqm_b64 exec, exec 2532; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 2533; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa 2534; VERDE-NEXT: s_waitcnt vmcnt(0) 2535; VERDE-NEXT: ; return to shader part epilog 2536; 2537; GFX6789-LABEL: adjust_writemask_sample_13: 2538; GFX6789: ; %bb.0: ; %main_body 2539; GFX6789-NEXT: s_mov_b64 s[12:13], exec 2540; GFX6789-NEXT: s_wqm_b64 exec, exec 2541; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 2542; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa 2543; GFX6789-NEXT: s_waitcnt vmcnt(0) 2544; GFX6789-NEXT: ; return to shader part epilog 2545; 2546; GFX10PLUS-LABEL: adjust_writemask_sample_13: 2547; GFX10PLUS: ; %bb.0: ; %main_body 2548; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 2549; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 2550; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 2551; GFX10PLUS-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D 2552; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 2553; GFX10PLUS-NEXT: ; return to shader part epilog 2554; 2555; GFX12-LABEL: adjust_writemask_sample_13: 2556; GFX12: ; %bb.0: ; %main_body 2557; GFX12-NEXT: s_mov_b32 s12, exec_lo 2558; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 2559; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12 2560; GFX12-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D 2561; GFX12-NEXT: s_wait_samplecnt 0x0 2562; GFX12-NEXT: ; return to shader part epilog 2563main_body: 2564 %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 2565 %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 3> 2566 ret <2 x float> %out 2567} 2568 2569define amdgpu_ps <3 x float> @adjust_writemask_sample_123(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 2570; VERDE-LABEL: adjust_writemask_sample_123: 2571; VERDE: ; %bb.0: ; %main_body 2572; VERDE-NEXT: s_mov_b64 s[12:13], exec 2573; VERDE-NEXT: s_wqm_b64 exec, exec 2574; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 2575; VERDE-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe 2576; VERDE-NEXT: s_waitcnt vmcnt(0) 2577; VERDE-NEXT: ; return to shader part epilog 2578; 2579; GFX6789-LABEL: adjust_writemask_sample_123: 2580; GFX6789: ; %bb.0: ; %main_body 2581; GFX6789-NEXT: s_mov_b64 s[12:13], exec 2582; GFX6789-NEXT: s_wqm_b64 exec, exec 2583; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 2584; GFX6789-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe 2585; GFX6789-NEXT: s_waitcnt vmcnt(0) 2586; GFX6789-NEXT: ; return to shader part epilog 2587; 2588; GFX10PLUS-LABEL: adjust_writemask_sample_123: 2589; GFX10PLUS: ; %bb.0: ; %main_body 2590; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 2591; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 2592; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 2593; GFX10PLUS-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe dim:SQ_RSRC_IMG_1D 2594; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 2595; GFX10PLUS-NEXT: ; return to shader part epilog 2596; 2597; GFX12-LABEL: adjust_writemask_sample_123: 2598; GFX12: ; %bb.0: ; %main_body 2599; GFX12-NEXT: s_mov_b32 s12, exec_lo 2600; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 2601; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12 2602; GFX12-NEXT: image_sample v[0:2], v0, s[0:7], s[8:11] dmask:0xe dim:SQ_RSRC_IMG_1D 2603; GFX12-NEXT: s_wait_samplecnt 0x0 2604; GFX12-NEXT: ; return to shader part epilog 2605main_body: 2606 %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 15, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 2607 %out = shufflevector <4 x float> %r, <4 x float> undef, <3 x i32> <i32 1, i32 2, i32 3> 2608 ret <3 x float> %out 2609} 2610 2611define amdgpu_ps <4 x float> @adjust_writemask_sample_none_enabled(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 2612; VERDE-LABEL: adjust_writemask_sample_none_enabled: 2613; VERDE: ; %bb.0: ; %main_body 2614; VERDE-NEXT: ; return to shader part epilog 2615; 2616; GFX6789-LABEL: adjust_writemask_sample_none_enabled: 2617; GFX6789: ; %bb.0: ; %main_body 2618; GFX6789-NEXT: ; return to shader part epilog 2619; 2620; GFX10PLUS-LABEL: adjust_writemask_sample_none_enabled: 2621; GFX10PLUS: ; %bb.0: ; %main_body 2622; GFX10PLUS-NEXT: ; return to shader part epilog 2623; 2624; GFX12-LABEL: adjust_writemask_sample_none_enabled: 2625; GFX12: ; %bb.0: ; %main_body 2626; GFX12-NEXT: ; return to shader part epilog 2627main_body: 2628 %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 0, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 2629 ret <4 x float> %r 2630} 2631 2632define amdgpu_ps <2 x float> @adjust_writemask_sample_123_to_12(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 2633; VERDE-LABEL: adjust_writemask_sample_123_to_12: 2634; VERDE: ; %bb.0: ; %main_body 2635; VERDE-NEXT: s_mov_b64 s[12:13], exec 2636; VERDE-NEXT: s_wqm_b64 exec, exec 2637; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 2638; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 2639; VERDE-NEXT: s_waitcnt vmcnt(0) 2640; VERDE-NEXT: ; return to shader part epilog 2641; 2642; GFX6789-LABEL: adjust_writemask_sample_123_to_12: 2643; GFX6789: ; %bb.0: ; %main_body 2644; GFX6789-NEXT: s_mov_b64 s[12:13], exec 2645; GFX6789-NEXT: s_wqm_b64 exec, exec 2646; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 2647; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 2648; GFX6789-NEXT: s_waitcnt vmcnt(0) 2649; GFX6789-NEXT: ; return to shader part epilog 2650; 2651; GFX10PLUS-LABEL: adjust_writemask_sample_123_to_12: 2652; GFX10PLUS: ; %bb.0: ; %main_body 2653; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 2654; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 2655; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 2656; GFX10PLUS-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_1D 2657; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 2658; GFX10PLUS-NEXT: ; return to shader part epilog 2659; 2660; GFX12-LABEL: adjust_writemask_sample_123_to_12: 2661; GFX12: ; %bb.0: ; %main_body 2662; GFX12-NEXT: s_mov_b32 s12, exec_lo 2663; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 2664; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12 2665; GFX12-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_1D 2666; GFX12-NEXT: s_wait_samplecnt 0x0 2667; GFX12-NEXT: ; return to shader part epilog 2668main_body: 2669 %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 14, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 2670 %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 0, i32 1> 2671 ret <2 x float> %out 2672} 2673 2674define amdgpu_ps <2 x float> @adjust_writemask_sample_013_to_13(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s) { 2675; VERDE-LABEL: adjust_writemask_sample_013_to_13: 2676; VERDE: ; %bb.0: ; %main_body 2677; VERDE-NEXT: s_mov_b64 s[12:13], exec 2678; VERDE-NEXT: s_wqm_b64 exec, exec 2679; VERDE-NEXT: s_and_b64 exec, exec, s[12:13] 2680; VERDE-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa 2681; VERDE-NEXT: s_waitcnt vmcnt(0) 2682; VERDE-NEXT: ; return to shader part epilog 2683; 2684; GFX6789-LABEL: adjust_writemask_sample_013_to_13: 2685; GFX6789: ; %bb.0: ; %main_body 2686; GFX6789-NEXT: s_mov_b64 s[12:13], exec 2687; GFX6789-NEXT: s_wqm_b64 exec, exec 2688; GFX6789-NEXT: s_and_b64 exec, exec, s[12:13] 2689; GFX6789-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa 2690; GFX6789-NEXT: s_waitcnt vmcnt(0) 2691; GFX6789-NEXT: ; return to shader part epilog 2692; 2693; GFX10PLUS-LABEL: adjust_writemask_sample_013_to_13: 2694; GFX10PLUS: ; %bb.0: ; %main_body 2695; GFX10PLUS-NEXT: s_mov_b32 s12, exec_lo 2696; GFX10PLUS-NEXT: s_wqm_b32 exec_lo, exec_lo 2697; GFX10PLUS-NEXT: s_and_b32 exec_lo, exec_lo, s12 2698; GFX10PLUS-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D 2699; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) 2700; GFX10PLUS-NEXT: ; return to shader part epilog 2701; 2702; GFX12-LABEL: adjust_writemask_sample_013_to_13: 2703; GFX12: ; %bb.0: ; %main_body 2704; GFX12-NEXT: s_mov_b32 s12, exec_lo 2705; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 2706; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s12 2707; GFX12-NEXT: image_sample v[0:1], v0, s[0:7], s[8:11] dmask:0xa dim:SQ_RSRC_IMG_1D 2708; GFX12-NEXT: s_wait_samplecnt 0x0 2709; GFX12-NEXT: ; return to shader part epilog 2710main_body: 2711 %r = call <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32 11, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 2712 %out = shufflevector <4 x float> %r, <4 x float> undef, <2 x i32> <i32 1, i32 2> 2713 ret <2 x float> %out 2714} 2715 2716declare <4 x float> @llvm.amdgcn.image.sample.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2717declare {<4 x float>,i32} @llvm.amdgcn.image.sample.1d.v4f32i32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2718declare <4 x float> @llvm.amdgcn.image.sample.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2719declare <4 x float> @llvm.amdgcn.image.sample.3d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2720declare <4 x float> @llvm.amdgcn.image.sample.cube.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2721declare <4 x float> @llvm.amdgcn.image.sample.1darray.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2722declare <4 x float> @llvm.amdgcn.image.sample.2darray.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2723 2724declare <4 x float> @llvm.amdgcn.image.sample.c.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2725declare <4 x float> @llvm.amdgcn.image.sample.c.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2726declare <4 x float> @llvm.amdgcn.image.sample.cl.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2727declare <4 x float> @llvm.amdgcn.image.sample.cl.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2728declare <4 x float> @llvm.amdgcn.image.sample.c.cl.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2729declare <4 x float> @llvm.amdgcn.image.sample.c.cl.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2730 2731declare <4 x float> @llvm.amdgcn.image.sample.b.1d.v4f32.f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2732declare <4 x float> @llvm.amdgcn.image.sample.b.2d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2733declare <4 x float> @llvm.amdgcn.image.sample.c.b.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2734declare <4 x float> @llvm.amdgcn.image.sample.c.b.2d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2735declare <4 x float> @llvm.amdgcn.image.sample.b.cl.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2736declare <4 x float> @llvm.amdgcn.image.sample.b.cl.2d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2737declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2738declare <4 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2739 2740declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2741declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2742declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2743declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2744declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2745declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2746declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f32(i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2747declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f32(i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2748 2749declare <4 x float> @llvm.amdgcn.image.sample.l.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2750declare <4 x float> @llvm.amdgcn.image.sample.l.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2751declare <4 x float> @llvm.amdgcn.image.sample.c.l.1d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2752declare <4 x float> @llvm.amdgcn.image.sample.c.l.2d.v4f32.f32(i32, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2753 2754declare <4 x float> @llvm.amdgcn.image.sample.lz.1d.v4f32.f32(i32, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2755declare <4 x float> @llvm.amdgcn.image.sample.lz.2d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2756declare <4 x float> @llvm.amdgcn.image.sample.c.lz.1d.v4f32.f32(i32, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2757declare <4 x float> @llvm.amdgcn.image.sample.c.lz.2d.v4f32.f32(i32, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2758 2759declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2760declare {float, i32} @llvm.amdgcn.image.sample.c.d.o.2darray.f32i32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2761declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2762declare {<2 x float>, i32} @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32i32.f32.f32(i32, i32, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 2763 2764attributes #0 = { nounwind } 2765attributes #1 = { nounwind readonly } 2766attributes #2 = { nounwind readnone } 2767