1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - %s | FileCheck -check-prefix=GFX6 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10NSA %s 4; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -amdgpu-enable-vopd=0 -o - %s | FileCheck -check-prefix=GFX10NSA %s 5; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1200 -amdgpu-enable-delay-alu=0 -amdgpu-enable-vopd=0 -o - %s | FileCheck -check-prefix=GFX12 %s 6 7define amdgpu_ps <4 x float> @gather4_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { 8; GFX6-LABEL: gather4_2d: 9; GFX6: ; %bb.0: ; %main_body 10; GFX6-NEXT: s_mov_b64 s[14:15], exec 11; GFX6-NEXT: s_mov_b32 s0, s2 12; GFX6-NEXT: s_mov_b32 s1, s3 13; GFX6-NEXT: s_mov_b32 s2, s4 14; GFX6-NEXT: s_mov_b32 s3, s5 15; GFX6-NEXT: s_mov_b32 s4, s6 16; GFX6-NEXT: s_mov_b32 s5, s7 17; GFX6-NEXT: s_mov_b32 s6, s8 18; GFX6-NEXT: s_mov_b32 s7, s9 19; GFX6-NEXT: s_mov_b32 s8, s10 20; GFX6-NEXT: s_mov_b32 s9, s11 21; GFX6-NEXT: s_mov_b32 s10, s12 22; GFX6-NEXT: s_mov_b32 s11, s13 23; GFX6-NEXT: s_wqm_b64 exec, exec 24; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 25; GFX6-NEXT: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 26; GFX6-NEXT: s_waitcnt vmcnt(0) 27; GFX6-NEXT: ; return to shader part epilog 28; 29; GFX10NSA-LABEL: gather4_2d: 30; GFX10NSA: ; %bb.0: ; %main_body 31; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo 32; GFX10NSA-NEXT: s_mov_b32 s0, s2 33; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo 34; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1 35; GFX10NSA-NEXT: s_mov_b32 s1, s3 36; GFX10NSA-NEXT: s_mov_b32 s2, s4 37; GFX10NSA-NEXT: s_mov_b32 s3, s5 38; GFX10NSA-NEXT: s_mov_b32 s4, s6 39; GFX10NSA-NEXT: s_mov_b32 s5, s7 40; GFX10NSA-NEXT: s_mov_b32 s6, s8 41; GFX10NSA-NEXT: s_mov_b32 s7, s9 42; GFX10NSA-NEXT: s_mov_b32 s8, s10 43; GFX10NSA-NEXT: s_mov_b32 s9, s11 44; GFX10NSA-NEXT: s_mov_b32 s10, s12 45; GFX10NSA-NEXT: s_mov_b32 s11, s13 46; GFX10NSA-NEXT: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 47; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 48; GFX10NSA-NEXT: ; return to shader part epilog 49; 50; GFX12-LABEL: gather4_2d: 51; GFX12: ; %bb.0: ; %main_body 52; GFX12-NEXT: s_mov_b32 s1, exec_lo 53; GFX12-NEXT: s_mov_b32 s0, s2 54; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 55; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s1 56; GFX12-NEXT: s_mov_b32 s1, s3 57; GFX12-NEXT: s_mov_b32 s2, s4 58; GFX12-NEXT: s_mov_b32 s3, s5 59; GFX12-NEXT: s_mov_b32 s4, s6 60; GFX12-NEXT: s_mov_b32 s5, s7 61; GFX12-NEXT: s_mov_b32 s6, s8 62; GFX12-NEXT: s_mov_b32 s7, s9 63; GFX12-NEXT: s_mov_b32 s8, s10 64; GFX12-NEXT: s_mov_b32 s9, s11 65; GFX12-NEXT: s_mov_b32 s10, s12 66; GFX12-NEXT: s_mov_b32 s11, s13 67; GFX12-NEXT: image_gather4 v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 68; GFX12-NEXT: s_wait_samplecnt 0x0 69; GFX12-NEXT: ; return to shader part epilog 70main_body: 71 %v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 1, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 72 ret <4 x float> %v 73} 74 75define amdgpu_ps <4 x float> @gather4_2d_tfe(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { 76; GFX6-LABEL: gather4_2d_tfe: 77; GFX6: ; %bb.0: ; %main_body 78; GFX6-NEXT: s_mov_b64 s[14:15], exec 79; GFX6-NEXT: s_mov_b32 s0, s2 80; GFX6-NEXT: s_mov_b32 s1, s3 81; GFX6-NEXT: s_mov_b32 s2, s4 82; GFX6-NEXT: s_mov_b32 s3, s5 83; GFX6-NEXT: s_mov_b32 s4, s6 84; GFX6-NEXT: s_mov_b32 s5, s7 85; GFX6-NEXT: s_mov_b32 s6, s8 86; GFX6-NEXT: s_mov_b32 s7, s9 87; GFX6-NEXT: s_mov_b32 s8, s10 88; GFX6-NEXT: s_mov_b32 s9, s11 89; GFX6-NEXT: s_mov_b32 s10, s12 90; GFX6-NEXT: s_mov_b32 s11, s13 91; GFX6-NEXT: s_wqm_b64 exec, exec 92; GFX6-NEXT: v_mov_b32_e32 v5, v0 93; GFX6-NEXT: v_mov_b32_e32 v0, 0 94; GFX6-NEXT: v_mov_b32_e32 v6, v1 95; GFX6-NEXT: v_mov_b32_e32 v1, v0 96; GFX6-NEXT: v_mov_b32_e32 v2, v0 97; GFX6-NEXT: v_mov_b32_e32 v3, v0 98; GFX6-NEXT: v_mov_b32_e32 v4, v0 99; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 100; GFX6-NEXT: image_gather4 v[0:4], v[5:6], s[0:7], s[8:11] dmask:0x1 tfe 101; GFX6-NEXT: s_waitcnt vmcnt(0) 102; GFX6-NEXT: ; return to shader part epilog 103; 104; GFX10NSA-LABEL: gather4_2d_tfe: 105; GFX10NSA: ; %bb.0: ; %main_body 106; GFX10NSA-NEXT: s_mov_b32 s14, exec_lo 107; GFX10NSA-NEXT: s_mov_b32 s0, s2 108; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo 109; GFX10NSA-NEXT: v_mov_b32_e32 v5, v0 110; GFX10NSA-NEXT: v_mov_b32_e32 v0, 0 111; GFX10NSA-NEXT: v_mov_b32_e32 v6, v1 112; GFX10NSA-NEXT: s_mov_b32 s1, s3 113; GFX10NSA-NEXT: s_mov_b32 s2, s4 114; GFX10NSA-NEXT: s_mov_b32 s3, s5 115; GFX10NSA-NEXT: s_mov_b32 s4, s6 116; GFX10NSA-NEXT: s_mov_b32 s5, s7 117; GFX10NSA-NEXT: s_mov_b32 s6, s8 118; GFX10NSA-NEXT: s_mov_b32 s7, s9 119; GFX10NSA-NEXT: s_mov_b32 s8, s10 120; GFX10NSA-NEXT: s_mov_b32 s9, s11 121; GFX10NSA-NEXT: s_mov_b32 s10, s12 122; GFX10NSA-NEXT: s_mov_b32 s11, s13 123; GFX10NSA-NEXT: v_mov_b32_e32 v1, v0 124; GFX10NSA-NEXT: v_mov_b32_e32 v2, v0 125; GFX10NSA-NEXT: v_mov_b32_e32 v3, v0 126; GFX10NSA-NEXT: v_mov_b32_e32 v4, v0 127; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s14 128; GFX10NSA-NEXT: image_gather4 v[0:4], v[5:6], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe 129; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 130; GFX10NSA-NEXT: ; return to shader part epilog 131; 132; GFX12-LABEL: gather4_2d_tfe: 133; GFX12: ; %bb.0: ; %main_body 134; GFX12-NEXT: s_mov_b32 s14, exec_lo 135; GFX12-NEXT: s_mov_b32 s0, s2 136; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 137; GFX12-NEXT: v_mov_b32_e32 v5, v0 138; GFX12-NEXT: v_mov_b32_e32 v0, 0 139; GFX12-NEXT: v_mov_b32_e32 v6, v1 140; GFX12-NEXT: s_mov_b32 s1, s3 141; GFX12-NEXT: s_mov_b32 s2, s4 142; GFX12-NEXT: s_mov_b32 s3, s5 143; GFX12-NEXT: s_mov_b32 s4, s6 144; GFX12-NEXT: s_mov_b32 s5, s7 145; GFX12-NEXT: s_mov_b32 s6, s8 146; GFX12-NEXT: s_mov_b32 s7, s9 147; GFX12-NEXT: s_mov_b32 s8, s10 148; GFX12-NEXT: s_mov_b32 s9, s11 149; GFX12-NEXT: s_mov_b32 s10, s12 150; GFX12-NEXT: s_mov_b32 s11, s13 151; GFX12-NEXT: v_mov_b32_e32 v1, v0 152; GFX12-NEXT: v_mov_b32_e32 v2, v0 153; GFX12-NEXT: v_mov_b32_e32 v3, v0 154; GFX12-NEXT: v_mov_b32_e32 v4, v0 155; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s14 156; GFX12-NEXT: image_gather4 v[0:4], [v5, v6], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D tfe 157; GFX12-NEXT: s_wait_samplecnt 0x0 158; GFX12-NEXT: ; return to shader part epilog 159main_body: 160 %v = call { <4 x float>, i32 } @llvm.amdgcn.image.gather4.2d.sl_v4f32i32s.f32(i32 1, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 1, i32 0) 161 %r = extractvalue { <4 x float>, i32 } %v, 0 162 ret <4 x float> %r 163} 164 165define amdgpu_ps <4 x float> @gather4_cube(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %face) { 166; GFX6-LABEL: gather4_cube: 167; GFX6: ; %bb.0: ; %main_body 168; GFX6-NEXT: s_mov_b64 s[14:15], exec 169; GFX6-NEXT: s_mov_b32 s0, s2 170; GFX6-NEXT: s_mov_b32 s1, s3 171; GFX6-NEXT: s_mov_b32 s2, s4 172; GFX6-NEXT: s_mov_b32 s3, s5 173; GFX6-NEXT: s_mov_b32 s4, s6 174; GFX6-NEXT: s_mov_b32 s5, s7 175; GFX6-NEXT: s_mov_b32 s6, s8 176; GFX6-NEXT: s_mov_b32 s7, s9 177; GFX6-NEXT: s_mov_b32 s8, s10 178; GFX6-NEXT: s_mov_b32 s9, s11 179; GFX6-NEXT: s_mov_b32 s10, s12 180; GFX6-NEXT: s_mov_b32 s11, s13 181; GFX6-NEXT: s_wqm_b64 exec, exec 182; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 183; GFX6-NEXT: image_gather4 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 da 184; GFX6-NEXT: s_waitcnt vmcnt(0) 185; GFX6-NEXT: ; return to shader part epilog 186; 187; GFX10NSA-LABEL: gather4_cube: 188; GFX10NSA: ; %bb.0: ; %main_body 189; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo 190; GFX10NSA-NEXT: s_mov_b32 s0, s2 191; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo 192; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1 193; GFX10NSA-NEXT: s_mov_b32 s1, s3 194; GFX10NSA-NEXT: s_mov_b32 s2, s4 195; GFX10NSA-NEXT: s_mov_b32 s3, s5 196; GFX10NSA-NEXT: s_mov_b32 s4, s6 197; GFX10NSA-NEXT: s_mov_b32 s5, s7 198; GFX10NSA-NEXT: s_mov_b32 s6, s8 199; GFX10NSA-NEXT: s_mov_b32 s7, s9 200; GFX10NSA-NEXT: s_mov_b32 s8, s10 201; GFX10NSA-NEXT: s_mov_b32 s9, s11 202; GFX10NSA-NEXT: s_mov_b32 s10, s12 203; GFX10NSA-NEXT: s_mov_b32 s11, s13 204; GFX10NSA-NEXT: image_gather4 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_CUBE 205; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 206; GFX10NSA-NEXT: ; return to shader part epilog 207; 208; GFX12-LABEL: gather4_cube: 209; GFX12: ; %bb.0: ; %main_body 210; GFX12-NEXT: s_mov_b32 s1, exec_lo 211; GFX12-NEXT: s_mov_b32 s0, s2 212; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 213; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s1 214; GFX12-NEXT: s_mov_b32 s1, s3 215; GFX12-NEXT: s_mov_b32 s2, s4 216; GFX12-NEXT: s_mov_b32 s3, s5 217; GFX12-NEXT: s_mov_b32 s4, s6 218; GFX12-NEXT: s_mov_b32 s5, s7 219; GFX12-NEXT: s_mov_b32 s6, s8 220; GFX12-NEXT: s_mov_b32 s7, s9 221; GFX12-NEXT: s_mov_b32 s8, s10 222; GFX12-NEXT: s_mov_b32 s9, s11 223; GFX12-NEXT: s_mov_b32 s10, s12 224; GFX12-NEXT: s_mov_b32 s11, s13 225; GFX12-NEXT: image_gather4 v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_CUBE 226; GFX12-NEXT: s_wait_samplecnt 0x0 227; GFX12-NEXT: ; return to shader part epilog 228main_body: 229 %v = call <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f32(i32 1, float %s, float %t, float %face, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 230 ret <4 x float> %v 231} 232 233define amdgpu_ps <4 x float> @gather4_2darray(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %slice) { 234; GFX6-LABEL: gather4_2darray: 235; GFX6: ; %bb.0: ; %main_body 236; GFX6-NEXT: s_mov_b64 s[14:15], exec 237; GFX6-NEXT: s_mov_b32 s0, s2 238; GFX6-NEXT: s_mov_b32 s1, s3 239; GFX6-NEXT: s_mov_b32 s2, s4 240; GFX6-NEXT: s_mov_b32 s3, s5 241; GFX6-NEXT: s_mov_b32 s4, s6 242; GFX6-NEXT: s_mov_b32 s5, s7 243; GFX6-NEXT: s_mov_b32 s6, s8 244; GFX6-NEXT: s_mov_b32 s7, s9 245; GFX6-NEXT: s_mov_b32 s8, s10 246; GFX6-NEXT: s_mov_b32 s9, s11 247; GFX6-NEXT: s_mov_b32 s10, s12 248; GFX6-NEXT: s_mov_b32 s11, s13 249; GFX6-NEXT: s_wqm_b64 exec, exec 250; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 251; GFX6-NEXT: image_gather4 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 da 252; GFX6-NEXT: s_waitcnt vmcnt(0) 253; GFX6-NEXT: ; return to shader part epilog 254; 255; GFX10NSA-LABEL: gather4_2darray: 256; GFX10NSA: ; %bb.0: ; %main_body 257; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo 258; GFX10NSA-NEXT: s_mov_b32 s0, s2 259; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo 260; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1 261; GFX10NSA-NEXT: s_mov_b32 s1, s3 262; GFX10NSA-NEXT: s_mov_b32 s2, s4 263; GFX10NSA-NEXT: s_mov_b32 s3, s5 264; GFX10NSA-NEXT: s_mov_b32 s4, s6 265; GFX10NSA-NEXT: s_mov_b32 s5, s7 266; GFX10NSA-NEXT: s_mov_b32 s6, s8 267; GFX10NSA-NEXT: s_mov_b32 s7, s9 268; GFX10NSA-NEXT: s_mov_b32 s8, s10 269; GFX10NSA-NEXT: s_mov_b32 s9, s11 270; GFX10NSA-NEXT: s_mov_b32 s10, s12 271; GFX10NSA-NEXT: s_mov_b32 s11, s13 272; GFX10NSA-NEXT: image_gather4 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY 273; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 274; GFX10NSA-NEXT: ; return to shader part epilog 275; 276; GFX12-LABEL: gather4_2darray: 277; GFX12: ; %bb.0: ; %main_body 278; GFX12-NEXT: s_mov_b32 s1, exec_lo 279; GFX12-NEXT: s_mov_b32 s0, s2 280; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 281; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s1 282; GFX12-NEXT: s_mov_b32 s1, s3 283; GFX12-NEXT: s_mov_b32 s2, s4 284; GFX12-NEXT: s_mov_b32 s3, s5 285; GFX12-NEXT: s_mov_b32 s4, s6 286; GFX12-NEXT: s_mov_b32 s5, s7 287; GFX12-NEXT: s_mov_b32 s6, s8 288; GFX12-NEXT: s_mov_b32 s7, s9 289; GFX12-NEXT: s_mov_b32 s8, s10 290; GFX12-NEXT: s_mov_b32 s9, s11 291; GFX12-NEXT: s_mov_b32 s10, s12 292; GFX12-NEXT: s_mov_b32 s11, s13 293; GFX12-NEXT: image_gather4 v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D_ARRAY 294; GFX12-NEXT: s_wait_samplecnt 0x0 295; GFX12-NEXT: ; return to shader part epilog 296main_body: 297 %v = call <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f32(i32 1, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 298 ret <4 x float> %v 299} 300 301define amdgpu_ps <4 x float> @gather4_c_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) { 302; GFX6-LABEL: gather4_c_2d: 303; GFX6: ; %bb.0: ; %main_body 304; GFX6-NEXT: s_mov_b64 s[14:15], exec 305; GFX6-NEXT: s_mov_b32 s0, s2 306; GFX6-NEXT: s_mov_b32 s1, s3 307; GFX6-NEXT: s_mov_b32 s2, s4 308; GFX6-NEXT: s_mov_b32 s3, s5 309; GFX6-NEXT: s_mov_b32 s4, s6 310; GFX6-NEXT: s_mov_b32 s5, s7 311; GFX6-NEXT: s_mov_b32 s6, s8 312; GFX6-NEXT: s_mov_b32 s7, s9 313; GFX6-NEXT: s_mov_b32 s8, s10 314; GFX6-NEXT: s_mov_b32 s9, s11 315; GFX6-NEXT: s_mov_b32 s10, s12 316; GFX6-NEXT: s_mov_b32 s11, s13 317; GFX6-NEXT: s_wqm_b64 exec, exec 318; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 319; GFX6-NEXT: image_gather4_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 320; GFX6-NEXT: s_waitcnt vmcnt(0) 321; GFX6-NEXT: ; return to shader part epilog 322; 323; GFX10NSA-LABEL: gather4_c_2d: 324; GFX10NSA: ; %bb.0: ; %main_body 325; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo 326; GFX10NSA-NEXT: s_mov_b32 s0, s2 327; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo 328; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1 329; GFX10NSA-NEXT: s_mov_b32 s1, s3 330; GFX10NSA-NEXT: s_mov_b32 s2, s4 331; GFX10NSA-NEXT: s_mov_b32 s3, s5 332; GFX10NSA-NEXT: s_mov_b32 s4, s6 333; GFX10NSA-NEXT: s_mov_b32 s5, s7 334; GFX10NSA-NEXT: s_mov_b32 s6, s8 335; GFX10NSA-NEXT: s_mov_b32 s7, s9 336; GFX10NSA-NEXT: s_mov_b32 s8, s10 337; GFX10NSA-NEXT: s_mov_b32 s9, s11 338; GFX10NSA-NEXT: s_mov_b32 s10, s12 339; GFX10NSA-NEXT: s_mov_b32 s11, s13 340; GFX10NSA-NEXT: image_gather4_c v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 341; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 342; GFX10NSA-NEXT: ; return to shader part epilog 343; 344; GFX12-LABEL: gather4_c_2d: 345; GFX12: ; %bb.0: ; %main_body 346; GFX12-NEXT: s_mov_b32 s1, exec_lo 347; GFX12-NEXT: s_mov_b32 s0, s2 348; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 349; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s1 350; GFX12-NEXT: s_mov_b32 s1, s3 351; GFX12-NEXT: s_mov_b32 s2, s4 352; GFX12-NEXT: s_mov_b32 s3, s5 353; GFX12-NEXT: s_mov_b32 s4, s6 354; GFX12-NEXT: s_mov_b32 s5, s7 355; GFX12-NEXT: s_mov_b32 s6, s8 356; GFX12-NEXT: s_mov_b32 s7, s9 357; GFX12-NEXT: s_mov_b32 s8, s10 358; GFX12-NEXT: s_mov_b32 s9, s11 359; GFX12-NEXT: s_mov_b32 s10, s12 360; GFX12-NEXT: s_mov_b32 s11, s13 361; GFX12-NEXT: image_gather4_c v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 362; GFX12-NEXT: s_wait_samplecnt 0x0 363; GFX12-NEXT: ; return to shader part epilog 364main_body: 365 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32 1, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 366 ret <4 x float> %v 367} 368 369define amdgpu_ps <4 x float> @gather4_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %clamp) { 370; GFX6-LABEL: gather4_cl_2d: 371; GFX6: ; %bb.0: ; %main_body 372; GFX6-NEXT: s_mov_b64 s[14:15], exec 373; GFX6-NEXT: s_mov_b32 s0, s2 374; GFX6-NEXT: s_mov_b32 s1, s3 375; GFX6-NEXT: s_mov_b32 s2, s4 376; GFX6-NEXT: s_mov_b32 s3, s5 377; GFX6-NEXT: s_mov_b32 s4, s6 378; GFX6-NEXT: s_mov_b32 s5, s7 379; GFX6-NEXT: s_mov_b32 s6, s8 380; GFX6-NEXT: s_mov_b32 s7, s9 381; GFX6-NEXT: s_mov_b32 s8, s10 382; GFX6-NEXT: s_mov_b32 s9, s11 383; GFX6-NEXT: s_mov_b32 s10, s12 384; GFX6-NEXT: s_mov_b32 s11, s13 385; GFX6-NEXT: s_wqm_b64 exec, exec 386; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 387; GFX6-NEXT: image_gather4_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 388; GFX6-NEXT: s_waitcnt vmcnt(0) 389; GFX6-NEXT: ; return to shader part epilog 390; 391; GFX10NSA-LABEL: gather4_cl_2d: 392; GFX10NSA: ; %bb.0: ; %main_body 393; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo 394; GFX10NSA-NEXT: s_mov_b32 s0, s2 395; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo 396; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1 397; GFX10NSA-NEXT: s_mov_b32 s1, s3 398; GFX10NSA-NEXT: s_mov_b32 s2, s4 399; GFX10NSA-NEXT: s_mov_b32 s3, s5 400; GFX10NSA-NEXT: s_mov_b32 s4, s6 401; GFX10NSA-NEXT: s_mov_b32 s5, s7 402; GFX10NSA-NEXT: s_mov_b32 s6, s8 403; GFX10NSA-NEXT: s_mov_b32 s7, s9 404; GFX10NSA-NEXT: s_mov_b32 s8, s10 405; GFX10NSA-NEXT: s_mov_b32 s9, s11 406; GFX10NSA-NEXT: s_mov_b32 s10, s12 407; GFX10NSA-NEXT: s_mov_b32 s11, s13 408; GFX10NSA-NEXT: image_gather4_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 409; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 410; GFX10NSA-NEXT: ; return to shader part epilog 411; 412; GFX12-LABEL: gather4_cl_2d: 413; GFX12: ; %bb.0: ; %main_body 414; GFX12-NEXT: s_mov_b32 s1, exec_lo 415; GFX12-NEXT: s_mov_b32 s0, s2 416; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 417; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s1 418; GFX12-NEXT: s_mov_b32 s1, s3 419; GFX12-NEXT: s_mov_b32 s2, s4 420; GFX12-NEXT: s_mov_b32 s3, s5 421; GFX12-NEXT: s_mov_b32 s4, s6 422; GFX12-NEXT: s_mov_b32 s5, s7 423; GFX12-NEXT: s_mov_b32 s6, s8 424; GFX12-NEXT: s_mov_b32 s7, s9 425; GFX12-NEXT: s_mov_b32 s8, s10 426; GFX12-NEXT: s_mov_b32 s9, s11 427; GFX12-NEXT: s_mov_b32 s10, s12 428; GFX12-NEXT: s_mov_b32 s11, s13 429; GFX12-NEXT: image_gather4_cl v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 430; GFX12-NEXT: s_wait_samplecnt 0x0 431; GFX12-NEXT: ; return to shader part epilog 432main_body: 433 %v = call <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32(i32 1, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 434 ret <4 x float> %v 435} 436 437define amdgpu_ps <4 x float> @gather4_c_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %clamp) { 438; GFX6-LABEL: gather4_c_cl_2d: 439; GFX6: ; %bb.0: ; %main_body 440; GFX6-NEXT: s_mov_b64 s[14:15], exec 441; GFX6-NEXT: s_mov_b32 s0, s2 442; GFX6-NEXT: s_mov_b32 s1, s3 443; GFX6-NEXT: s_mov_b32 s2, s4 444; GFX6-NEXT: s_mov_b32 s3, s5 445; GFX6-NEXT: s_mov_b32 s4, s6 446; GFX6-NEXT: s_mov_b32 s5, s7 447; GFX6-NEXT: s_mov_b32 s6, s8 448; GFX6-NEXT: s_mov_b32 s7, s9 449; GFX6-NEXT: s_mov_b32 s8, s10 450; GFX6-NEXT: s_mov_b32 s9, s11 451; GFX6-NEXT: s_mov_b32 s10, s12 452; GFX6-NEXT: s_mov_b32 s11, s13 453; GFX6-NEXT: s_wqm_b64 exec, exec 454; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 455; GFX6-NEXT: image_gather4_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 456; GFX6-NEXT: s_waitcnt vmcnt(0) 457; GFX6-NEXT: ; return to shader part epilog 458; 459; GFX10NSA-LABEL: gather4_c_cl_2d: 460; GFX10NSA: ; %bb.0: ; %main_body 461; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo 462; GFX10NSA-NEXT: s_mov_b32 s0, s2 463; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo 464; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1 465; GFX10NSA-NEXT: s_mov_b32 s1, s3 466; GFX10NSA-NEXT: s_mov_b32 s2, s4 467; GFX10NSA-NEXT: s_mov_b32 s3, s5 468; GFX10NSA-NEXT: s_mov_b32 s4, s6 469; GFX10NSA-NEXT: s_mov_b32 s5, s7 470; GFX10NSA-NEXT: s_mov_b32 s6, s8 471; GFX10NSA-NEXT: s_mov_b32 s7, s9 472; GFX10NSA-NEXT: s_mov_b32 s8, s10 473; GFX10NSA-NEXT: s_mov_b32 s9, s11 474; GFX10NSA-NEXT: s_mov_b32 s10, s12 475; GFX10NSA-NEXT: s_mov_b32 s11, s13 476; GFX10NSA-NEXT: image_gather4_c_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 477; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 478; GFX10NSA-NEXT: ; return to shader part epilog 479; 480; GFX12-LABEL: gather4_c_cl_2d: 481; GFX12: ; %bb.0: ; %main_body 482; GFX12-NEXT: s_mov_b32 s1, exec_lo 483; GFX12-NEXT: s_mov_b32 s0, s2 484; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 485; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s1 486; GFX12-NEXT: s_mov_b32 s1, s3 487; GFX12-NEXT: s_mov_b32 s2, s4 488; GFX12-NEXT: s_mov_b32 s3, s5 489; GFX12-NEXT: s_mov_b32 s4, s6 490; GFX12-NEXT: s_mov_b32 s5, s7 491; GFX12-NEXT: s_mov_b32 s6, s8 492; GFX12-NEXT: s_mov_b32 s7, s9 493; GFX12-NEXT: s_mov_b32 s8, s10 494; GFX12-NEXT: s_mov_b32 s9, s11 495; GFX12-NEXT: s_mov_b32 s10, s12 496; GFX12-NEXT: s_mov_b32 s11, s13 497; GFX12-NEXT: image_gather4_c_cl v[0:3], [v0, v1, v2, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 498; GFX12-NEXT: s_wait_samplecnt 0x0 499; GFX12-NEXT: ; return to shader part epilog 500main_body: 501 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32 1, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 502 ret <4 x float> %v 503} 504 505define amdgpu_ps <4 x float> @gather4_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t) { 506; GFX6-LABEL: gather4_b_2d: 507; GFX6: ; %bb.0: ; %main_body 508; GFX6-NEXT: s_mov_b64 s[14:15], exec 509; GFX6-NEXT: s_mov_b32 s0, s2 510; GFX6-NEXT: s_mov_b32 s1, s3 511; GFX6-NEXT: s_mov_b32 s2, s4 512; GFX6-NEXT: s_mov_b32 s3, s5 513; GFX6-NEXT: s_mov_b32 s4, s6 514; GFX6-NEXT: s_mov_b32 s5, s7 515; GFX6-NEXT: s_mov_b32 s6, s8 516; GFX6-NEXT: s_mov_b32 s7, s9 517; GFX6-NEXT: s_mov_b32 s8, s10 518; GFX6-NEXT: s_mov_b32 s9, s11 519; GFX6-NEXT: s_mov_b32 s10, s12 520; GFX6-NEXT: s_mov_b32 s11, s13 521; GFX6-NEXT: s_wqm_b64 exec, exec 522; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 523; GFX6-NEXT: image_gather4_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 524; GFX6-NEXT: s_waitcnt vmcnt(0) 525; GFX6-NEXT: ; return to shader part epilog 526; 527; GFX10NSA-LABEL: gather4_b_2d: 528; GFX10NSA: ; %bb.0: ; %main_body 529; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo 530; GFX10NSA-NEXT: s_mov_b32 s0, s2 531; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo 532; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1 533; GFX10NSA-NEXT: s_mov_b32 s1, s3 534; GFX10NSA-NEXT: s_mov_b32 s2, s4 535; GFX10NSA-NEXT: s_mov_b32 s3, s5 536; GFX10NSA-NEXT: s_mov_b32 s4, s6 537; GFX10NSA-NEXT: s_mov_b32 s5, s7 538; GFX10NSA-NEXT: s_mov_b32 s6, s8 539; GFX10NSA-NEXT: s_mov_b32 s7, s9 540; GFX10NSA-NEXT: s_mov_b32 s8, s10 541; GFX10NSA-NEXT: s_mov_b32 s9, s11 542; GFX10NSA-NEXT: s_mov_b32 s10, s12 543; GFX10NSA-NEXT: s_mov_b32 s11, s13 544; GFX10NSA-NEXT: image_gather4_b v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 545; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 546; GFX10NSA-NEXT: ; return to shader part epilog 547; 548; GFX12-LABEL: gather4_b_2d: 549; GFX12: ; %bb.0: ; %main_body 550; GFX12-NEXT: s_mov_b32 s1, exec_lo 551; GFX12-NEXT: s_mov_b32 s0, s2 552; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 553; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s1 554; GFX12-NEXT: s_mov_b32 s1, s3 555; GFX12-NEXT: s_mov_b32 s2, s4 556; GFX12-NEXT: s_mov_b32 s3, s5 557; GFX12-NEXT: s_mov_b32 s4, s6 558; GFX12-NEXT: s_mov_b32 s5, s7 559; GFX12-NEXT: s_mov_b32 s6, s8 560; GFX12-NEXT: s_mov_b32 s7, s9 561; GFX12-NEXT: s_mov_b32 s8, s10 562; GFX12-NEXT: s_mov_b32 s9, s11 563; GFX12-NEXT: s_mov_b32 s10, s12 564; GFX12-NEXT: s_mov_b32 s11, s13 565; GFX12-NEXT: image_gather4_b v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 566; GFX12-NEXT: s_wait_samplecnt 0x0 567; GFX12-NEXT: ; return to shader part epilog 568main_body: 569 %v = call <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.f32(i32 1, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 570 ret <4 x float> %v 571} 572 573define amdgpu_ps <4 x float> @gather4_c_b_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t) { 574; GFX6-LABEL: gather4_c_b_2d: 575; GFX6: ; %bb.0: ; %main_body 576; GFX6-NEXT: s_mov_b64 s[14:15], exec 577; GFX6-NEXT: s_mov_b32 s0, s2 578; GFX6-NEXT: s_mov_b32 s1, s3 579; GFX6-NEXT: s_mov_b32 s2, s4 580; GFX6-NEXT: s_mov_b32 s3, s5 581; GFX6-NEXT: s_mov_b32 s4, s6 582; GFX6-NEXT: s_mov_b32 s5, s7 583; GFX6-NEXT: s_mov_b32 s6, s8 584; GFX6-NEXT: s_mov_b32 s7, s9 585; GFX6-NEXT: s_mov_b32 s8, s10 586; GFX6-NEXT: s_mov_b32 s9, s11 587; GFX6-NEXT: s_mov_b32 s10, s12 588; GFX6-NEXT: s_mov_b32 s11, s13 589; GFX6-NEXT: s_wqm_b64 exec, exec 590; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 591; GFX6-NEXT: image_gather4_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 592; GFX6-NEXT: s_waitcnt vmcnt(0) 593; GFX6-NEXT: ; return to shader part epilog 594; 595; GFX10NSA-LABEL: gather4_c_b_2d: 596; GFX10NSA: ; %bb.0: ; %main_body 597; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo 598; GFX10NSA-NEXT: s_mov_b32 s0, s2 599; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo 600; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1 601; GFX10NSA-NEXT: s_mov_b32 s1, s3 602; GFX10NSA-NEXT: s_mov_b32 s2, s4 603; GFX10NSA-NEXT: s_mov_b32 s3, s5 604; GFX10NSA-NEXT: s_mov_b32 s4, s6 605; GFX10NSA-NEXT: s_mov_b32 s5, s7 606; GFX10NSA-NEXT: s_mov_b32 s6, s8 607; GFX10NSA-NEXT: s_mov_b32 s7, s9 608; GFX10NSA-NEXT: s_mov_b32 s8, s10 609; GFX10NSA-NEXT: s_mov_b32 s9, s11 610; GFX10NSA-NEXT: s_mov_b32 s10, s12 611; GFX10NSA-NEXT: s_mov_b32 s11, s13 612; GFX10NSA-NEXT: image_gather4_c_b v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 613; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 614; GFX10NSA-NEXT: ; return to shader part epilog 615; 616; GFX12-LABEL: gather4_c_b_2d: 617; GFX12: ; %bb.0: ; %main_body 618; GFX12-NEXT: s_mov_b32 s1, exec_lo 619; GFX12-NEXT: s_mov_b32 s0, s2 620; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 621; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s1 622; GFX12-NEXT: s_mov_b32 s1, s3 623; GFX12-NEXT: s_mov_b32 s2, s4 624; GFX12-NEXT: s_mov_b32 s3, s5 625; GFX12-NEXT: s_mov_b32 s4, s6 626; GFX12-NEXT: s_mov_b32 s5, s7 627; GFX12-NEXT: s_mov_b32 s6, s8 628; GFX12-NEXT: s_mov_b32 s7, s9 629; GFX12-NEXT: s_mov_b32 s8, s10 630; GFX12-NEXT: s_mov_b32 s9, s11 631; GFX12-NEXT: s_mov_b32 s10, s12 632; GFX12-NEXT: s_mov_b32 s11, s13 633; GFX12-NEXT: image_gather4_c_b v[0:3], [v0, v1, v2, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 634; GFX12-NEXT: s_wait_samplecnt 0x0 635; GFX12-NEXT: ; return to shader part epilog 636main_body: 637 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.f32(i32 1, float %bias, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 638 ret <4 x float> %v 639} 640 641define amdgpu_ps <4 x float> @gather4_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %s, float %t, float %clamp) { 642; GFX6-LABEL: gather4_b_cl_2d: 643; GFX6: ; %bb.0: ; %main_body 644; GFX6-NEXT: s_mov_b64 s[14:15], exec 645; GFX6-NEXT: s_mov_b32 s0, s2 646; GFX6-NEXT: s_mov_b32 s1, s3 647; GFX6-NEXT: s_mov_b32 s2, s4 648; GFX6-NEXT: s_mov_b32 s3, s5 649; GFX6-NEXT: s_mov_b32 s4, s6 650; GFX6-NEXT: s_mov_b32 s5, s7 651; GFX6-NEXT: s_mov_b32 s6, s8 652; GFX6-NEXT: s_mov_b32 s7, s9 653; GFX6-NEXT: s_mov_b32 s8, s10 654; GFX6-NEXT: s_mov_b32 s9, s11 655; GFX6-NEXT: s_mov_b32 s10, s12 656; GFX6-NEXT: s_mov_b32 s11, s13 657; GFX6-NEXT: s_wqm_b64 exec, exec 658; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 659; GFX6-NEXT: image_gather4_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 660; GFX6-NEXT: s_waitcnt vmcnt(0) 661; GFX6-NEXT: ; return to shader part epilog 662; 663; GFX10NSA-LABEL: gather4_b_cl_2d: 664; GFX10NSA: ; %bb.0: ; %main_body 665; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo 666; GFX10NSA-NEXT: s_mov_b32 s0, s2 667; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo 668; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1 669; GFX10NSA-NEXT: s_mov_b32 s1, s3 670; GFX10NSA-NEXT: s_mov_b32 s2, s4 671; GFX10NSA-NEXT: s_mov_b32 s3, s5 672; GFX10NSA-NEXT: s_mov_b32 s4, s6 673; GFX10NSA-NEXT: s_mov_b32 s5, s7 674; GFX10NSA-NEXT: s_mov_b32 s6, s8 675; GFX10NSA-NEXT: s_mov_b32 s7, s9 676; GFX10NSA-NEXT: s_mov_b32 s8, s10 677; GFX10NSA-NEXT: s_mov_b32 s9, s11 678; GFX10NSA-NEXT: s_mov_b32 s10, s12 679; GFX10NSA-NEXT: s_mov_b32 s11, s13 680; GFX10NSA-NEXT: image_gather4_b_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 681; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 682; GFX10NSA-NEXT: ; return to shader part epilog 683; 684; GFX12-LABEL: gather4_b_cl_2d: 685; GFX12: ; %bb.0: ; %main_body 686; GFX12-NEXT: s_mov_b32 s1, exec_lo 687; GFX12-NEXT: s_mov_b32 s0, s2 688; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 689; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s1 690; GFX12-NEXT: s_mov_b32 s1, s3 691; GFX12-NEXT: s_mov_b32 s2, s4 692; GFX12-NEXT: s_mov_b32 s3, s5 693; GFX12-NEXT: s_mov_b32 s4, s6 694; GFX12-NEXT: s_mov_b32 s5, s7 695; GFX12-NEXT: s_mov_b32 s6, s8 696; GFX12-NEXT: s_mov_b32 s7, s9 697; GFX12-NEXT: s_mov_b32 s8, s10 698; GFX12-NEXT: s_mov_b32 s9, s11 699; GFX12-NEXT: s_mov_b32 s10, s12 700; GFX12-NEXT: s_mov_b32 s11, s13 701; GFX12-NEXT: image_gather4_b_cl v[0:3], [v0, v1, v2, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 702; GFX12-NEXT: s_wait_samplecnt 0x0 703; GFX12-NEXT: ; return to shader part epilog 704main_body: 705 %v = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f32.f32(i32 1, float %bias, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 706 ret <4 x float> %v 707} 708 709define amdgpu_ps <4 x float> @gather4_c_b_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %bias, float %zcompare, float %s, float %t, float %clamp) { 710; GFX6-LABEL: gather4_c_b_cl_2d: 711; GFX6: ; %bb.0: ; %main_body 712; GFX6-NEXT: s_mov_b64 s[14:15], exec 713; GFX6-NEXT: s_mov_b32 s0, s2 714; GFX6-NEXT: s_mov_b32 s1, s3 715; GFX6-NEXT: s_mov_b32 s2, s4 716; GFX6-NEXT: s_mov_b32 s3, s5 717; GFX6-NEXT: s_mov_b32 s4, s6 718; GFX6-NEXT: s_mov_b32 s5, s7 719; GFX6-NEXT: s_mov_b32 s6, s8 720; GFX6-NEXT: s_mov_b32 s7, s9 721; GFX6-NEXT: s_mov_b32 s8, s10 722; GFX6-NEXT: s_mov_b32 s9, s11 723; GFX6-NEXT: s_mov_b32 s10, s12 724; GFX6-NEXT: s_mov_b32 s11, s13 725; GFX6-NEXT: s_wqm_b64 exec, exec 726; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 727; GFX6-NEXT: image_gather4_c_b_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0x1 728; GFX6-NEXT: s_waitcnt vmcnt(0) 729; GFX6-NEXT: ; return to shader part epilog 730; 731; GFX10NSA-LABEL: gather4_c_b_cl_2d: 732; GFX10NSA: ; %bb.0: ; %main_body 733; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo 734; GFX10NSA-NEXT: s_mov_b32 s0, s2 735; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo 736; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1 737; GFX10NSA-NEXT: s_mov_b32 s1, s3 738; GFX10NSA-NEXT: s_mov_b32 s2, s4 739; GFX10NSA-NEXT: s_mov_b32 s3, s5 740; GFX10NSA-NEXT: s_mov_b32 s4, s6 741; GFX10NSA-NEXT: s_mov_b32 s5, s7 742; GFX10NSA-NEXT: s_mov_b32 s6, s8 743; GFX10NSA-NEXT: s_mov_b32 s7, s9 744; GFX10NSA-NEXT: s_mov_b32 s8, s10 745; GFX10NSA-NEXT: s_mov_b32 s9, s11 746; GFX10NSA-NEXT: s_mov_b32 s10, s12 747; GFX10NSA-NEXT: s_mov_b32 s11, s13 748; GFX10NSA-NEXT: image_gather4_c_b_cl v[0:3], v[0:4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 749; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 750; GFX10NSA-NEXT: ; return to shader part epilog 751; 752; GFX12-LABEL: gather4_c_b_cl_2d: 753; GFX12: ; %bb.0: ; %main_body 754; GFX12-NEXT: s_mov_b32 s1, exec_lo 755; GFX12-NEXT: s_mov_b32 s0, s2 756; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 757; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s1 758; GFX12-NEXT: s_mov_b32 s1, s3 759; GFX12-NEXT: s_mov_b32 s2, s4 760; GFX12-NEXT: s_mov_b32 s3, s5 761; GFX12-NEXT: s_mov_b32 s4, s6 762; GFX12-NEXT: s_mov_b32 s5, s7 763; GFX12-NEXT: s_mov_b32 s6, s8 764; GFX12-NEXT: s_mov_b32 s7, s9 765; GFX12-NEXT: s_mov_b32 s8, s10 766; GFX12-NEXT: s_mov_b32 s9, s11 767; GFX12-NEXT: s_mov_b32 s10, s12 768; GFX12-NEXT: s_mov_b32 s11, s13 769; GFX12-NEXT: image_gather4_c_b_cl v[0:3], [v0, v1, v2, v[3:4]], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 770; GFX12-NEXT: s_wait_samplecnt 0x0 771; GFX12-NEXT: ; return to shader part epilog 772main_body: 773 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32 1, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 774 ret <4 x float> %v 775} 776 777define amdgpu_ps <4 x float> @gather4_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t, float %lod) { 778; GFX6-LABEL: gather4_l_2d: 779; GFX6: ; %bb.0: ; %main_body 780; GFX6-NEXT: s_mov_b32 s0, s2 781; GFX6-NEXT: s_mov_b32 s1, s3 782; GFX6-NEXT: s_mov_b32 s2, s4 783; GFX6-NEXT: s_mov_b32 s3, s5 784; GFX6-NEXT: s_mov_b32 s4, s6 785; GFX6-NEXT: s_mov_b32 s5, s7 786; GFX6-NEXT: s_mov_b32 s6, s8 787; GFX6-NEXT: s_mov_b32 s7, s9 788; GFX6-NEXT: s_mov_b32 s8, s10 789; GFX6-NEXT: s_mov_b32 s9, s11 790; GFX6-NEXT: s_mov_b32 s10, s12 791; GFX6-NEXT: s_mov_b32 s11, s13 792; GFX6-NEXT: image_gather4_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 793; GFX6-NEXT: s_waitcnt vmcnt(0) 794; GFX6-NEXT: ; return to shader part epilog 795; 796; GFX10NSA-LABEL: gather4_l_2d: 797; GFX10NSA: ; %bb.0: ; %main_body 798; GFX10NSA-NEXT: s_mov_b32 s0, s2 799; GFX10NSA-NEXT: s_mov_b32 s1, s3 800; GFX10NSA-NEXT: s_mov_b32 s2, s4 801; GFX10NSA-NEXT: s_mov_b32 s3, s5 802; GFX10NSA-NEXT: s_mov_b32 s4, s6 803; GFX10NSA-NEXT: s_mov_b32 s5, s7 804; GFX10NSA-NEXT: s_mov_b32 s6, s8 805; GFX10NSA-NEXT: s_mov_b32 s7, s9 806; GFX10NSA-NEXT: s_mov_b32 s8, s10 807; GFX10NSA-NEXT: s_mov_b32 s9, s11 808; GFX10NSA-NEXT: s_mov_b32 s10, s12 809; GFX10NSA-NEXT: s_mov_b32 s11, s13 810; GFX10NSA-NEXT: image_gather4_l v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 811; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 812; GFX10NSA-NEXT: ; return to shader part epilog 813; 814; GFX12-LABEL: gather4_l_2d: 815; GFX12: ; %bb.0: ; %main_body 816; GFX12-NEXT: s_mov_b32 s0, s2 817; GFX12-NEXT: s_mov_b32 s1, s3 818; GFX12-NEXT: s_mov_b32 s2, s4 819; GFX12-NEXT: s_mov_b32 s3, s5 820; GFX12-NEXT: s_mov_b32 s4, s6 821; GFX12-NEXT: s_mov_b32 s5, s7 822; GFX12-NEXT: s_mov_b32 s6, s8 823; GFX12-NEXT: s_mov_b32 s7, s9 824; GFX12-NEXT: s_mov_b32 s8, s10 825; GFX12-NEXT: s_mov_b32 s9, s11 826; GFX12-NEXT: s_mov_b32 s10, s12 827; GFX12-NEXT: s_mov_b32 s11, s13 828; GFX12-NEXT: image_gather4_l v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 829; GFX12-NEXT: s_wait_samplecnt 0x0 830; GFX12-NEXT: ; return to shader part epilog 831main_body: 832 %v = call <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 1, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 833 ret <4 x float> %v 834} 835 836define amdgpu_ps <4 x float> @gather4_c_l_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t, float %lod) { 837; GFX6-LABEL: gather4_c_l_2d: 838; GFX6: ; %bb.0: ; %main_body 839; GFX6-NEXT: s_mov_b32 s0, s2 840; GFX6-NEXT: s_mov_b32 s1, s3 841; GFX6-NEXT: s_mov_b32 s2, s4 842; GFX6-NEXT: s_mov_b32 s3, s5 843; GFX6-NEXT: s_mov_b32 s4, s6 844; GFX6-NEXT: s_mov_b32 s5, s7 845; GFX6-NEXT: s_mov_b32 s6, s8 846; GFX6-NEXT: s_mov_b32 s7, s9 847; GFX6-NEXT: s_mov_b32 s8, s10 848; GFX6-NEXT: s_mov_b32 s9, s11 849; GFX6-NEXT: s_mov_b32 s10, s12 850; GFX6-NEXT: s_mov_b32 s11, s13 851; GFX6-NEXT: image_gather4_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 852; GFX6-NEXT: s_waitcnt vmcnt(0) 853; GFX6-NEXT: ; return to shader part epilog 854; 855; GFX10NSA-LABEL: gather4_c_l_2d: 856; GFX10NSA: ; %bb.0: ; %main_body 857; GFX10NSA-NEXT: s_mov_b32 s0, s2 858; GFX10NSA-NEXT: s_mov_b32 s1, s3 859; GFX10NSA-NEXT: s_mov_b32 s2, s4 860; GFX10NSA-NEXT: s_mov_b32 s3, s5 861; GFX10NSA-NEXT: s_mov_b32 s4, s6 862; GFX10NSA-NEXT: s_mov_b32 s5, s7 863; GFX10NSA-NEXT: s_mov_b32 s6, s8 864; GFX10NSA-NEXT: s_mov_b32 s7, s9 865; GFX10NSA-NEXT: s_mov_b32 s8, s10 866; GFX10NSA-NEXT: s_mov_b32 s9, s11 867; GFX10NSA-NEXT: s_mov_b32 s10, s12 868; GFX10NSA-NEXT: s_mov_b32 s11, s13 869; GFX10NSA-NEXT: image_gather4_c_l v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 870; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 871; GFX10NSA-NEXT: ; return to shader part epilog 872; 873; GFX12-LABEL: gather4_c_l_2d: 874; GFX12: ; %bb.0: ; %main_body 875; GFX12-NEXT: s_mov_b32 s0, s2 876; GFX12-NEXT: s_mov_b32 s1, s3 877; GFX12-NEXT: s_mov_b32 s2, s4 878; GFX12-NEXT: s_mov_b32 s3, s5 879; GFX12-NEXT: s_mov_b32 s4, s6 880; GFX12-NEXT: s_mov_b32 s5, s7 881; GFX12-NEXT: s_mov_b32 s6, s8 882; GFX12-NEXT: s_mov_b32 s7, s9 883; GFX12-NEXT: s_mov_b32 s8, s10 884; GFX12-NEXT: s_mov_b32 s9, s11 885; GFX12-NEXT: s_mov_b32 s10, s12 886; GFX12-NEXT: s_mov_b32 s11, s13 887; GFX12-NEXT: image_gather4_c_l v[0:3], [v0, v1, v2, v3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 888; GFX12-NEXT: s_wait_samplecnt 0x0 889; GFX12-NEXT: ; return to shader part epilog 890main_body: 891 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 1, float %zcompare, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 892 ret <4 x float> %v 893} 894 895define amdgpu_ps <4 x float> @gather4_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { 896; GFX6-LABEL: gather4_lz_2d: 897; GFX6: ; %bb.0: ; %main_body 898; GFX6-NEXT: s_mov_b32 s0, s2 899; GFX6-NEXT: s_mov_b32 s1, s3 900; GFX6-NEXT: s_mov_b32 s2, s4 901; GFX6-NEXT: s_mov_b32 s3, s5 902; GFX6-NEXT: s_mov_b32 s4, s6 903; GFX6-NEXT: s_mov_b32 s5, s7 904; GFX6-NEXT: s_mov_b32 s6, s8 905; GFX6-NEXT: s_mov_b32 s7, s9 906; GFX6-NEXT: s_mov_b32 s8, s10 907; GFX6-NEXT: s_mov_b32 s9, s11 908; GFX6-NEXT: s_mov_b32 s10, s12 909; GFX6-NEXT: s_mov_b32 s11, s13 910; GFX6-NEXT: image_gather4_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 911; GFX6-NEXT: s_waitcnt vmcnt(0) 912; GFX6-NEXT: ; return to shader part epilog 913; 914; GFX10NSA-LABEL: gather4_lz_2d: 915; GFX10NSA: ; %bb.0: ; %main_body 916; GFX10NSA-NEXT: s_mov_b32 s0, s2 917; GFX10NSA-NEXT: s_mov_b32 s1, s3 918; GFX10NSA-NEXT: s_mov_b32 s2, s4 919; GFX10NSA-NEXT: s_mov_b32 s3, s5 920; GFX10NSA-NEXT: s_mov_b32 s4, s6 921; GFX10NSA-NEXT: s_mov_b32 s5, s7 922; GFX10NSA-NEXT: s_mov_b32 s6, s8 923; GFX10NSA-NEXT: s_mov_b32 s7, s9 924; GFX10NSA-NEXT: s_mov_b32 s8, s10 925; GFX10NSA-NEXT: s_mov_b32 s9, s11 926; GFX10NSA-NEXT: s_mov_b32 s10, s12 927; GFX10NSA-NEXT: s_mov_b32 s11, s13 928; GFX10NSA-NEXT: image_gather4_lz v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 929; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 930; GFX10NSA-NEXT: ; return to shader part epilog 931; 932; GFX12-LABEL: gather4_lz_2d: 933; GFX12: ; %bb.0: ; %main_body 934; GFX12-NEXT: s_mov_b32 s0, s2 935; GFX12-NEXT: s_mov_b32 s1, s3 936; GFX12-NEXT: s_mov_b32 s2, s4 937; GFX12-NEXT: s_mov_b32 s3, s5 938; GFX12-NEXT: s_mov_b32 s4, s6 939; GFX12-NEXT: s_mov_b32 s5, s7 940; GFX12-NEXT: s_mov_b32 s6, s8 941; GFX12-NEXT: s_mov_b32 s7, s9 942; GFX12-NEXT: s_mov_b32 s8, s10 943; GFX12-NEXT: s_mov_b32 s9, s11 944; GFX12-NEXT: s_mov_b32 s10, s12 945; GFX12-NEXT: s_mov_b32 s11, s13 946; GFX12-NEXT: image_gather4_lz v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 947; GFX12-NEXT: s_wait_samplecnt 0x0 948; GFX12-NEXT: ; return to shader part epilog 949main_body: 950 %v = call <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 1, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 951 ret <4 x float> %v 952} 953 954define amdgpu_ps <4 x float> @gather4_c_lz_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %s, float %t) { 955; GFX6-LABEL: gather4_c_lz_2d: 956; GFX6: ; %bb.0: ; %main_body 957; GFX6-NEXT: s_mov_b32 s0, s2 958; GFX6-NEXT: s_mov_b32 s1, s3 959; GFX6-NEXT: s_mov_b32 s2, s4 960; GFX6-NEXT: s_mov_b32 s3, s5 961; GFX6-NEXT: s_mov_b32 s4, s6 962; GFX6-NEXT: s_mov_b32 s5, s7 963; GFX6-NEXT: s_mov_b32 s6, s8 964; GFX6-NEXT: s_mov_b32 s7, s9 965; GFX6-NEXT: s_mov_b32 s8, s10 966; GFX6-NEXT: s_mov_b32 s9, s11 967; GFX6-NEXT: s_mov_b32 s10, s12 968; GFX6-NEXT: s_mov_b32 s11, s13 969; GFX6-NEXT: image_gather4_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 970; GFX6-NEXT: s_waitcnt vmcnt(0) 971; GFX6-NEXT: ; return to shader part epilog 972; 973; GFX10NSA-LABEL: gather4_c_lz_2d: 974; GFX10NSA: ; %bb.0: ; %main_body 975; GFX10NSA-NEXT: s_mov_b32 s0, s2 976; GFX10NSA-NEXT: s_mov_b32 s1, s3 977; GFX10NSA-NEXT: s_mov_b32 s2, s4 978; GFX10NSA-NEXT: s_mov_b32 s3, s5 979; GFX10NSA-NEXT: s_mov_b32 s4, s6 980; GFX10NSA-NEXT: s_mov_b32 s5, s7 981; GFX10NSA-NEXT: s_mov_b32 s6, s8 982; GFX10NSA-NEXT: s_mov_b32 s7, s9 983; GFX10NSA-NEXT: s_mov_b32 s8, s10 984; GFX10NSA-NEXT: s_mov_b32 s9, s11 985; GFX10NSA-NEXT: s_mov_b32 s10, s12 986; GFX10NSA-NEXT: s_mov_b32 s11, s13 987; GFX10NSA-NEXT: image_gather4_c_lz v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 988; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 989; GFX10NSA-NEXT: ; return to shader part epilog 990; 991; GFX12-LABEL: gather4_c_lz_2d: 992; GFX12: ; %bb.0: ; %main_body 993; GFX12-NEXT: s_mov_b32 s0, s2 994; GFX12-NEXT: s_mov_b32 s1, s3 995; GFX12-NEXT: s_mov_b32 s2, s4 996; GFX12-NEXT: s_mov_b32 s3, s5 997; GFX12-NEXT: s_mov_b32 s4, s6 998; GFX12-NEXT: s_mov_b32 s5, s7 999; GFX12-NEXT: s_mov_b32 s6, s8 1000; GFX12-NEXT: s_mov_b32 s7, s9 1001; GFX12-NEXT: s_mov_b32 s8, s10 1002; GFX12-NEXT: s_mov_b32 s9, s11 1003; GFX12-NEXT: s_mov_b32 s10, s12 1004; GFX12-NEXT: s_mov_b32 s11, s13 1005; GFX12-NEXT: image_gather4_c_lz v[0:3], [v0, v1, v2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 1006; GFX12-NEXT: s_wait_samplecnt 0x0 1007; GFX12-NEXT: ; return to shader part epilog 1008main_body: 1009 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32 1, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 1010 ret <4 x float> %v 1011} 1012 1013define amdgpu_ps <4 x float> @gather4_2d_dmask_2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { 1014; GFX6-LABEL: gather4_2d_dmask_2: 1015; GFX6: ; %bb.0: ; %main_body 1016; GFX6-NEXT: s_mov_b64 s[14:15], exec 1017; GFX6-NEXT: s_mov_b32 s0, s2 1018; GFX6-NEXT: s_mov_b32 s1, s3 1019; GFX6-NEXT: s_mov_b32 s2, s4 1020; GFX6-NEXT: s_mov_b32 s3, s5 1021; GFX6-NEXT: s_mov_b32 s4, s6 1022; GFX6-NEXT: s_mov_b32 s5, s7 1023; GFX6-NEXT: s_mov_b32 s6, s8 1024; GFX6-NEXT: s_mov_b32 s7, s9 1025; GFX6-NEXT: s_mov_b32 s8, s10 1026; GFX6-NEXT: s_mov_b32 s9, s11 1027; GFX6-NEXT: s_mov_b32 s10, s12 1028; GFX6-NEXT: s_mov_b32 s11, s13 1029; GFX6-NEXT: s_wqm_b64 exec, exec 1030; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 1031; GFX6-NEXT: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x2 1032; GFX6-NEXT: s_waitcnt vmcnt(0) 1033; GFX6-NEXT: ; return to shader part epilog 1034; 1035; GFX10NSA-LABEL: gather4_2d_dmask_2: 1036; GFX10NSA: ; %bb.0: ; %main_body 1037; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo 1038; GFX10NSA-NEXT: s_mov_b32 s0, s2 1039; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo 1040; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1 1041; GFX10NSA-NEXT: s_mov_b32 s1, s3 1042; GFX10NSA-NEXT: s_mov_b32 s2, s4 1043; GFX10NSA-NEXT: s_mov_b32 s3, s5 1044; GFX10NSA-NEXT: s_mov_b32 s4, s6 1045; GFX10NSA-NEXT: s_mov_b32 s5, s7 1046; GFX10NSA-NEXT: s_mov_b32 s6, s8 1047; GFX10NSA-NEXT: s_mov_b32 s7, s9 1048; GFX10NSA-NEXT: s_mov_b32 s8, s10 1049; GFX10NSA-NEXT: s_mov_b32 s9, s11 1050; GFX10NSA-NEXT: s_mov_b32 s10, s12 1051; GFX10NSA-NEXT: s_mov_b32 s11, s13 1052; GFX10NSA-NEXT: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x2 dim:SQ_RSRC_IMG_2D 1053; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 1054; GFX10NSA-NEXT: ; return to shader part epilog 1055; 1056; GFX12-LABEL: gather4_2d_dmask_2: 1057; GFX12: ; %bb.0: ; %main_body 1058; GFX12-NEXT: s_mov_b32 s1, exec_lo 1059; GFX12-NEXT: s_mov_b32 s0, s2 1060; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 1061; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s1 1062; GFX12-NEXT: s_mov_b32 s1, s3 1063; GFX12-NEXT: s_mov_b32 s2, s4 1064; GFX12-NEXT: s_mov_b32 s3, s5 1065; GFX12-NEXT: s_mov_b32 s4, s6 1066; GFX12-NEXT: s_mov_b32 s5, s7 1067; GFX12-NEXT: s_mov_b32 s6, s8 1068; GFX12-NEXT: s_mov_b32 s7, s9 1069; GFX12-NEXT: s_mov_b32 s8, s10 1070; GFX12-NEXT: s_mov_b32 s9, s11 1071; GFX12-NEXT: s_mov_b32 s10, s12 1072; GFX12-NEXT: s_mov_b32 s11, s13 1073; GFX12-NEXT: image_gather4 v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0x2 dim:SQ_RSRC_IMG_2D 1074; GFX12-NEXT: s_wait_samplecnt 0x0 1075; GFX12-NEXT: ; return to shader part epilog 1076main_body: 1077 %v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 2, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 1078 ret <4 x float> %v 1079} 1080 1081define amdgpu_ps <4 x float> @gather4_2d_dmask_4(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { 1082; GFX6-LABEL: gather4_2d_dmask_4: 1083; GFX6: ; %bb.0: ; %main_body 1084; GFX6-NEXT: s_mov_b64 s[14:15], exec 1085; GFX6-NEXT: s_mov_b32 s0, s2 1086; GFX6-NEXT: s_mov_b32 s1, s3 1087; GFX6-NEXT: s_mov_b32 s2, s4 1088; GFX6-NEXT: s_mov_b32 s3, s5 1089; GFX6-NEXT: s_mov_b32 s4, s6 1090; GFX6-NEXT: s_mov_b32 s5, s7 1091; GFX6-NEXT: s_mov_b32 s6, s8 1092; GFX6-NEXT: s_mov_b32 s7, s9 1093; GFX6-NEXT: s_mov_b32 s8, s10 1094; GFX6-NEXT: s_mov_b32 s9, s11 1095; GFX6-NEXT: s_mov_b32 s10, s12 1096; GFX6-NEXT: s_mov_b32 s11, s13 1097; GFX6-NEXT: s_wqm_b64 exec, exec 1098; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 1099; GFX6-NEXT: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x4 1100; GFX6-NEXT: s_waitcnt vmcnt(0) 1101; GFX6-NEXT: ; return to shader part epilog 1102; 1103; GFX10NSA-LABEL: gather4_2d_dmask_4: 1104; GFX10NSA: ; %bb.0: ; %main_body 1105; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo 1106; GFX10NSA-NEXT: s_mov_b32 s0, s2 1107; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo 1108; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1 1109; GFX10NSA-NEXT: s_mov_b32 s1, s3 1110; GFX10NSA-NEXT: s_mov_b32 s2, s4 1111; GFX10NSA-NEXT: s_mov_b32 s3, s5 1112; GFX10NSA-NEXT: s_mov_b32 s4, s6 1113; GFX10NSA-NEXT: s_mov_b32 s5, s7 1114; GFX10NSA-NEXT: s_mov_b32 s6, s8 1115; GFX10NSA-NEXT: s_mov_b32 s7, s9 1116; GFX10NSA-NEXT: s_mov_b32 s8, s10 1117; GFX10NSA-NEXT: s_mov_b32 s9, s11 1118; GFX10NSA-NEXT: s_mov_b32 s10, s12 1119; GFX10NSA-NEXT: s_mov_b32 s11, s13 1120; GFX10NSA-NEXT: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D 1121; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 1122; GFX10NSA-NEXT: ; return to shader part epilog 1123; 1124; GFX12-LABEL: gather4_2d_dmask_4: 1125; GFX12: ; %bb.0: ; %main_body 1126; GFX12-NEXT: s_mov_b32 s1, exec_lo 1127; GFX12-NEXT: s_mov_b32 s0, s2 1128; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 1129; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s1 1130; GFX12-NEXT: s_mov_b32 s1, s3 1131; GFX12-NEXT: s_mov_b32 s2, s4 1132; GFX12-NEXT: s_mov_b32 s3, s5 1133; GFX12-NEXT: s_mov_b32 s4, s6 1134; GFX12-NEXT: s_mov_b32 s5, s7 1135; GFX12-NEXT: s_mov_b32 s6, s8 1136; GFX12-NEXT: s_mov_b32 s7, s9 1137; GFX12-NEXT: s_mov_b32 s8, s10 1138; GFX12-NEXT: s_mov_b32 s9, s11 1139; GFX12-NEXT: s_mov_b32 s10, s12 1140; GFX12-NEXT: s_mov_b32 s11, s13 1141; GFX12-NEXT: image_gather4 v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D 1142; GFX12-NEXT: s_wait_samplecnt 0x0 1143; GFX12-NEXT: ; return to shader part epilog 1144main_body: 1145 %v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 4, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 1146 ret <4 x float> %v 1147} 1148 1149define amdgpu_ps <4 x float> @gather4_2d_dmask_8(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %s, float %t) { 1150; GFX6-LABEL: gather4_2d_dmask_8: 1151; GFX6: ; %bb.0: ; %main_body 1152; GFX6-NEXT: s_mov_b64 s[14:15], exec 1153; GFX6-NEXT: s_mov_b32 s0, s2 1154; GFX6-NEXT: s_mov_b32 s1, s3 1155; GFX6-NEXT: s_mov_b32 s2, s4 1156; GFX6-NEXT: s_mov_b32 s3, s5 1157; GFX6-NEXT: s_mov_b32 s4, s6 1158; GFX6-NEXT: s_mov_b32 s5, s7 1159; GFX6-NEXT: s_mov_b32 s6, s8 1160; GFX6-NEXT: s_mov_b32 s7, s9 1161; GFX6-NEXT: s_mov_b32 s8, s10 1162; GFX6-NEXT: s_mov_b32 s9, s11 1163; GFX6-NEXT: s_mov_b32 s10, s12 1164; GFX6-NEXT: s_mov_b32 s11, s13 1165; GFX6-NEXT: s_wqm_b64 exec, exec 1166; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 1167; GFX6-NEXT: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x8 1168; GFX6-NEXT: s_waitcnt vmcnt(0) 1169; GFX6-NEXT: ; return to shader part epilog 1170; 1171; GFX10NSA-LABEL: gather4_2d_dmask_8: 1172; GFX10NSA: ; %bb.0: ; %main_body 1173; GFX10NSA-NEXT: s_mov_b32 s1, exec_lo 1174; GFX10NSA-NEXT: s_mov_b32 s0, s2 1175; GFX10NSA-NEXT: s_wqm_b32 exec_lo, exec_lo 1176; GFX10NSA-NEXT: s_and_b32 exec_lo, exec_lo, s1 1177; GFX10NSA-NEXT: s_mov_b32 s1, s3 1178; GFX10NSA-NEXT: s_mov_b32 s2, s4 1179; GFX10NSA-NEXT: s_mov_b32 s3, s5 1180; GFX10NSA-NEXT: s_mov_b32 s4, s6 1181; GFX10NSA-NEXT: s_mov_b32 s5, s7 1182; GFX10NSA-NEXT: s_mov_b32 s6, s8 1183; GFX10NSA-NEXT: s_mov_b32 s7, s9 1184; GFX10NSA-NEXT: s_mov_b32 s8, s10 1185; GFX10NSA-NEXT: s_mov_b32 s9, s11 1186; GFX10NSA-NEXT: s_mov_b32 s10, s12 1187; GFX10NSA-NEXT: s_mov_b32 s11, s13 1188; GFX10NSA-NEXT: image_gather4 v[0:3], v[0:1], s[0:7], s[8:11] dmask:0x8 dim:SQ_RSRC_IMG_2D 1189; GFX10NSA-NEXT: s_waitcnt vmcnt(0) 1190; GFX10NSA-NEXT: ; return to shader part epilog 1191; 1192; GFX12-LABEL: gather4_2d_dmask_8: 1193; GFX12: ; %bb.0: ; %main_body 1194; GFX12-NEXT: s_mov_b32 s1, exec_lo 1195; GFX12-NEXT: s_mov_b32 s0, s2 1196; GFX12-NEXT: s_wqm_b32 exec_lo, exec_lo 1197; GFX12-NEXT: s_and_b32 exec_lo, exec_lo, s1 1198; GFX12-NEXT: s_mov_b32 s1, s3 1199; GFX12-NEXT: s_mov_b32 s2, s4 1200; GFX12-NEXT: s_mov_b32 s3, s5 1201; GFX12-NEXT: s_mov_b32 s4, s6 1202; GFX12-NEXT: s_mov_b32 s5, s7 1203; GFX12-NEXT: s_mov_b32 s6, s8 1204; GFX12-NEXT: s_mov_b32 s7, s9 1205; GFX12-NEXT: s_mov_b32 s8, s10 1206; GFX12-NEXT: s_mov_b32 s9, s11 1207; GFX12-NEXT: s_mov_b32 s10, s12 1208; GFX12-NEXT: s_mov_b32 s11, s13 1209; GFX12-NEXT: image_gather4 v[0:3], [v0, v1], s[0:7], s[8:11] dmask:0x8 dim:SQ_RSRC_IMG_2D 1210; GFX12-NEXT: s_wait_samplecnt 0x0 1211; GFX12-NEXT: ; return to shader part epilog 1212main_body: 1213 %v = call <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 8, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 1214 ret <4 x float> %v 1215} 1216 1217declare <4 x float> @llvm.amdgcn.image.gather4.2d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 1218declare { <4 x float>, i32 } @llvm.amdgcn.image.gather4.2d.sl_v4f32i32s.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 1219declare <4 x float> @llvm.amdgcn.image.gather4.cube.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 1220declare <4 x float> @llvm.amdgcn.image.gather4.2darray.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 1221declare <4 x float> @llvm.amdgcn.image.gather4.c.2d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 1222declare <4 x float> @llvm.amdgcn.image.gather4.cl.2d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 1223declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.2d.v4f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 1224declare <4 x float> @llvm.amdgcn.image.gather4.b.2d.v4f32.f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 1225declare <4 x float> @llvm.amdgcn.image.gather4.c.b.2d.v4f32.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 1226declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.2d.v4f32.f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 1227declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.2d.v4f32.f32.f32(i32 immarg, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 1228declare <4 x float> @llvm.amdgcn.image.gather4.l.2d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 1229declare <4 x float> @llvm.amdgcn.image.gather4.c.l.2d.v4f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 1230declare <4 x float> @llvm.amdgcn.image.gather4.lz.2d.v4f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 1231declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.2d.v4f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 1232 1233attributes #0 = { nounwind readonly } 1234