1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -o - %s | FileCheck -check-prefix=GFX6 %s 3; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -o - %s | FileCheck -check-prefix=GFX10 %s 4; RUN: not --crash llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1100 -o - %s 2>&1 | FileCheck -check-prefix=GFX11-ERR %s 5 6; GFX11-ERR: LLVM ERROR: cannot select: {{.*}} = G_AMDGPU_INTRIN_IMAGE_LOAD intrinsic(@llvm.amdgcn.image.gather4 7 8define amdgpu_ps <4 x float> @gather4_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) { 9; GFX6-LABEL: gather4_o_2d: 10; GFX6: ; %bb.0: ; %main_body 11; GFX6-NEXT: s_mov_b64 s[14:15], exec 12; GFX6-NEXT: s_mov_b32 s0, s2 13; GFX6-NEXT: s_mov_b32 s1, s3 14; GFX6-NEXT: s_mov_b32 s2, s4 15; GFX6-NEXT: s_mov_b32 s3, s5 16; GFX6-NEXT: s_mov_b32 s4, s6 17; GFX6-NEXT: s_mov_b32 s5, s7 18; GFX6-NEXT: s_mov_b32 s6, s8 19; GFX6-NEXT: s_mov_b32 s7, s9 20; GFX6-NEXT: s_mov_b32 s8, s10 21; GFX6-NEXT: s_mov_b32 s9, s11 22; GFX6-NEXT: s_mov_b32 s10, s12 23; GFX6-NEXT: s_mov_b32 s11, s13 24; GFX6-NEXT: s_wqm_b64 exec, exec 25; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 26; GFX6-NEXT: image_gather4_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 27; GFX6-NEXT: s_waitcnt vmcnt(0) 28; GFX6-NEXT: ; return to shader part epilog 29; 30; GFX10-LABEL: gather4_o_2d: 31; GFX10: ; %bb.0: ; %main_body 32; GFX10-NEXT: s_mov_b32 s1, exec_lo 33; GFX10-NEXT: s_mov_b32 s0, s2 34; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 35; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s1 36; GFX10-NEXT: s_mov_b32 s1, s3 37; GFX10-NEXT: s_mov_b32 s2, s4 38; GFX10-NEXT: s_mov_b32 s3, s5 39; GFX10-NEXT: s_mov_b32 s4, s6 40; GFX10-NEXT: s_mov_b32 s5, s7 41; GFX10-NEXT: s_mov_b32 s6, s8 42; GFX10-NEXT: s_mov_b32 s7, s9 43; GFX10-NEXT: s_mov_b32 s8, s10 44; GFX10-NEXT: s_mov_b32 s9, s11 45; GFX10-NEXT: s_mov_b32 s10, s12 46; GFX10-NEXT: s_mov_b32 s11, s13 47; GFX10-NEXT: image_gather4_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 48; GFX10-NEXT: s_waitcnt vmcnt(0) 49; GFX10-NEXT: ; return to shader part epilog 50main_body: 51 %v = call <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 52 ret <4 x float> %v 53} 54 55define amdgpu_ps <4 x float> @gather4_c_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) { 56; GFX6-LABEL: gather4_c_o_2d: 57; GFX6: ; %bb.0: ; %main_body 58; GFX6-NEXT: s_mov_b64 s[14:15], exec 59; GFX6-NEXT: s_mov_b32 s0, s2 60; GFX6-NEXT: s_mov_b32 s1, s3 61; GFX6-NEXT: s_mov_b32 s2, s4 62; GFX6-NEXT: s_mov_b32 s3, s5 63; GFX6-NEXT: s_mov_b32 s4, s6 64; GFX6-NEXT: s_mov_b32 s5, s7 65; GFX6-NEXT: s_mov_b32 s6, s8 66; GFX6-NEXT: s_mov_b32 s7, s9 67; GFX6-NEXT: s_mov_b32 s8, s10 68; GFX6-NEXT: s_mov_b32 s9, s11 69; GFX6-NEXT: s_mov_b32 s10, s12 70; GFX6-NEXT: s_mov_b32 s11, s13 71; GFX6-NEXT: s_wqm_b64 exec, exec 72; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 73; GFX6-NEXT: image_gather4_c_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 74; GFX6-NEXT: s_waitcnt vmcnt(0) 75; GFX6-NEXT: ; return to shader part epilog 76; 77; GFX10-LABEL: gather4_c_o_2d: 78; GFX10: ; %bb.0: ; %main_body 79; GFX10-NEXT: s_mov_b32 s1, exec_lo 80; GFX10-NEXT: s_mov_b32 s0, s2 81; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 82; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s1 83; GFX10-NEXT: s_mov_b32 s1, s3 84; GFX10-NEXT: s_mov_b32 s2, s4 85; GFX10-NEXT: s_mov_b32 s3, s5 86; GFX10-NEXT: s_mov_b32 s4, s6 87; GFX10-NEXT: s_mov_b32 s5, s7 88; GFX10-NEXT: s_mov_b32 s6, s8 89; GFX10-NEXT: s_mov_b32 s7, s9 90; GFX10-NEXT: s_mov_b32 s8, s10 91; GFX10-NEXT: s_mov_b32 s9, s11 92; GFX10-NEXT: s_mov_b32 s10, s12 93; GFX10-NEXT: s_mov_b32 s11, s13 94; GFX10-NEXT: image_gather4_c_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 95; GFX10-NEXT: s_waitcnt vmcnt(0) 96; GFX10-NEXT: ; return to shader part epilog 97main_body: 98 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 99 ret <4 x float> %v 100} 101 102define amdgpu_ps <4 x float> @gather4_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %clamp) { 103; GFX6-LABEL: gather4_cl_o_2d: 104; GFX6: ; %bb.0: ; %main_body 105; GFX6-NEXT: s_mov_b64 s[14:15], exec 106; GFX6-NEXT: s_mov_b32 s0, s2 107; GFX6-NEXT: s_mov_b32 s1, s3 108; GFX6-NEXT: s_mov_b32 s2, s4 109; GFX6-NEXT: s_mov_b32 s3, s5 110; GFX6-NEXT: s_mov_b32 s4, s6 111; GFX6-NEXT: s_mov_b32 s5, s7 112; GFX6-NEXT: s_mov_b32 s6, s8 113; GFX6-NEXT: s_mov_b32 s7, s9 114; GFX6-NEXT: s_mov_b32 s8, s10 115; GFX6-NEXT: s_mov_b32 s9, s11 116; GFX6-NEXT: s_mov_b32 s10, s12 117; GFX6-NEXT: s_mov_b32 s11, s13 118; GFX6-NEXT: s_wqm_b64 exec, exec 119; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 120; GFX6-NEXT: image_gather4_cl_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 121; GFX6-NEXT: s_waitcnt vmcnt(0) 122; GFX6-NEXT: ; return to shader part epilog 123; 124; GFX10-LABEL: gather4_cl_o_2d: 125; GFX10: ; %bb.0: ; %main_body 126; GFX10-NEXT: s_mov_b32 s1, exec_lo 127; GFX10-NEXT: s_mov_b32 s0, s2 128; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 129; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s1 130; GFX10-NEXT: s_mov_b32 s1, s3 131; GFX10-NEXT: s_mov_b32 s2, s4 132; GFX10-NEXT: s_mov_b32 s3, s5 133; GFX10-NEXT: s_mov_b32 s4, s6 134; GFX10-NEXT: s_mov_b32 s5, s7 135; GFX10-NEXT: s_mov_b32 s6, s8 136; GFX10-NEXT: s_mov_b32 s7, s9 137; GFX10-NEXT: s_mov_b32 s8, s10 138; GFX10-NEXT: s_mov_b32 s9, s11 139; GFX10-NEXT: s_mov_b32 s10, s12 140; GFX10-NEXT: s_mov_b32 s11, s13 141; GFX10-NEXT: image_gather4_cl_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 142; GFX10-NEXT: s_waitcnt vmcnt(0) 143; GFX10-NEXT: ; return to shader part epilog 144main_body: 145 %v = call <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 146 ret <4 x float> %v 147} 148 149define amdgpu_ps <4 x float> @gather4_c_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %clamp) { 150; GFX6-LABEL: gather4_c_cl_o_2d: 151; GFX6: ; %bb.0: ; %main_body 152; GFX6-NEXT: s_mov_b64 s[14:15], exec 153; GFX6-NEXT: s_mov_b32 s0, s2 154; GFX6-NEXT: s_mov_b32 s1, s3 155; GFX6-NEXT: s_mov_b32 s2, s4 156; GFX6-NEXT: s_mov_b32 s3, s5 157; GFX6-NEXT: s_mov_b32 s4, s6 158; GFX6-NEXT: s_mov_b32 s5, s7 159; GFX6-NEXT: s_mov_b32 s6, s8 160; GFX6-NEXT: s_mov_b32 s7, s9 161; GFX6-NEXT: s_mov_b32 s8, s10 162; GFX6-NEXT: s_mov_b32 s9, s11 163; GFX6-NEXT: s_mov_b32 s10, s12 164; GFX6-NEXT: s_mov_b32 s11, s13 165; GFX6-NEXT: s_wqm_b64 exec, exec 166; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 167; GFX6-NEXT: image_gather4_c_cl_o v[0:3], v[0:4], s[0:7], s[8:11] dmask:0x1 168; GFX6-NEXT: s_waitcnt vmcnt(0) 169; GFX6-NEXT: ; return to shader part epilog 170; 171; GFX10-LABEL: gather4_c_cl_o_2d: 172; GFX10: ; %bb.0: ; %main_body 173; GFX10-NEXT: s_mov_b32 s1, exec_lo 174; GFX10-NEXT: s_mov_b32 s0, s2 175; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 176; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s1 177; GFX10-NEXT: s_mov_b32 s1, s3 178; GFX10-NEXT: s_mov_b32 s2, s4 179; GFX10-NEXT: s_mov_b32 s3, s5 180; GFX10-NEXT: s_mov_b32 s4, s6 181; GFX10-NEXT: s_mov_b32 s5, s7 182; GFX10-NEXT: s_mov_b32 s6, s8 183; GFX10-NEXT: s_mov_b32 s7, s9 184; GFX10-NEXT: s_mov_b32 s8, s10 185; GFX10-NEXT: s_mov_b32 s9, s11 186; GFX10-NEXT: s_mov_b32 s10, s12 187; GFX10-NEXT: s_mov_b32 s11, s13 188; GFX10-NEXT: image_gather4_c_cl_o v[0:3], v[0:4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 189; GFX10-NEXT: s_waitcnt vmcnt(0) 190; GFX10-NEXT: ; return to shader part epilog 191main_body: 192 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 193 ret <4 x float> %v 194} 195 196define amdgpu_ps <4 x float> @gather4_b_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %s, float %t) { 197; GFX6-LABEL: gather4_b_o_2d: 198; GFX6: ; %bb.0: ; %main_body 199; GFX6-NEXT: s_mov_b64 s[14:15], exec 200; GFX6-NEXT: s_mov_b32 s0, s2 201; GFX6-NEXT: s_mov_b32 s1, s3 202; GFX6-NEXT: s_mov_b32 s2, s4 203; GFX6-NEXT: s_mov_b32 s3, s5 204; GFX6-NEXT: s_mov_b32 s4, s6 205; GFX6-NEXT: s_mov_b32 s5, s7 206; GFX6-NEXT: s_mov_b32 s6, s8 207; GFX6-NEXT: s_mov_b32 s7, s9 208; GFX6-NEXT: s_mov_b32 s8, s10 209; GFX6-NEXT: s_mov_b32 s9, s11 210; GFX6-NEXT: s_mov_b32 s10, s12 211; GFX6-NEXT: s_mov_b32 s11, s13 212; GFX6-NEXT: s_wqm_b64 exec, exec 213; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 214; GFX6-NEXT: image_gather4_b_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 215; GFX6-NEXT: s_waitcnt vmcnt(0) 216; GFX6-NEXT: ; return to shader part epilog 217; 218; GFX10-LABEL: gather4_b_o_2d: 219; GFX10: ; %bb.0: ; %main_body 220; GFX10-NEXT: s_mov_b32 s1, exec_lo 221; GFX10-NEXT: s_mov_b32 s0, s2 222; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 223; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s1 224; GFX10-NEXT: s_mov_b32 s1, s3 225; GFX10-NEXT: s_mov_b32 s2, s4 226; GFX10-NEXT: s_mov_b32 s3, s5 227; GFX10-NEXT: s_mov_b32 s4, s6 228; GFX10-NEXT: s_mov_b32 s5, s7 229; GFX10-NEXT: s_mov_b32 s6, s8 230; GFX10-NEXT: s_mov_b32 s7, s9 231; GFX10-NEXT: s_mov_b32 s8, s10 232; GFX10-NEXT: s_mov_b32 s9, s11 233; GFX10-NEXT: s_mov_b32 s10, s12 234; GFX10-NEXT: s_mov_b32 s11, s13 235; GFX10-NEXT: image_gather4_b_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 236; GFX10-NEXT: s_waitcnt vmcnt(0) 237; GFX10-NEXT: ; return to shader part epilog 238main_body: 239 %v = call <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 240 ret <4 x float> %v 241} 242 243define amdgpu_ps <4 x float> @gather4_c_b_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %zcompare, float %s, float %t) { 244; GFX6-LABEL: gather4_c_b_o_2d: 245; GFX6: ; %bb.0: ; %main_body 246; GFX6-NEXT: s_mov_b64 s[14:15], exec 247; GFX6-NEXT: s_mov_b32 s0, s2 248; GFX6-NEXT: s_mov_b32 s1, s3 249; GFX6-NEXT: s_mov_b32 s2, s4 250; GFX6-NEXT: s_mov_b32 s3, s5 251; GFX6-NEXT: s_mov_b32 s4, s6 252; GFX6-NEXT: s_mov_b32 s5, s7 253; GFX6-NEXT: s_mov_b32 s6, s8 254; GFX6-NEXT: s_mov_b32 s7, s9 255; GFX6-NEXT: s_mov_b32 s8, s10 256; GFX6-NEXT: s_mov_b32 s9, s11 257; GFX6-NEXT: s_mov_b32 s10, s12 258; GFX6-NEXT: s_mov_b32 s11, s13 259; GFX6-NEXT: s_wqm_b64 exec, exec 260; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 261; GFX6-NEXT: image_gather4_c_b_o v[0:3], v[0:4], s[0:7], s[8:11] dmask:0x1 262; GFX6-NEXT: s_waitcnt vmcnt(0) 263; GFX6-NEXT: ; return to shader part epilog 264; 265; GFX10-LABEL: gather4_c_b_o_2d: 266; GFX10: ; %bb.0: ; %main_body 267; GFX10-NEXT: s_mov_b32 s1, exec_lo 268; GFX10-NEXT: s_mov_b32 s0, s2 269; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 270; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s1 271; GFX10-NEXT: s_mov_b32 s1, s3 272; GFX10-NEXT: s_mov_b32 s2, s4 273; GFX10-NEXT: s_mov_b32 s3, s5 274; GFX10-NEXT: s_mov_b32 s4, s6 275; GFX10-NEXT: s_mov_b32 s5, s7 276; GFX10-NEXT: s_mov_b32 s6, s8 277; GFX10-NEXT: s_mov_b32 s7, s9 278; GFX10-NEXT: s_mov_b32 s8, s10 279; GFX10-NEXT: s_mov_b32 s9, s11 280; GFX10-NEXT: s_mov_b32 s10, s12 281; GFX10-NEXT: s_mov_b32 s11, s13 282; GFX10-NEXT: image_gather4_c_b_o v[0:3], v[0:4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 283; GFX10-NEXT: s_waitcnt vmcnt(0) 284; GFX10-NEXT: ; return to shader part epilog 285main_body: 286 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 287 ret <4 x float> %v 288} 289 290define amdgpu_ps <4 x float> @gather4_b_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %s, float %t, float %clamp) { 291; GFX6-LABEL: gather4_b_cl_o_2d: 292; GFX6: ; %bb.0: ; %main_body 293; GFX6-NEXT: s_mov_b32 s0, s2 294; GFX6-NEXT: s_mov_b32 s1, s3 295; GFX6-NEXT: s_mov_b32 s2, s4 296; GFX6-NEXT: s_mov_b32 s3, s5 297; GFX6-NEXT: s_mov_b32 s4, s6 298; GFX6-NEXT: s_mov_b32 s5, s7 299; GFX6-NEXT: s_mov_b32 s6, s8 300; GFX6-NEXT: s_mov_b32 s7, s9 301; GFX6-NEXT: s_mov_b32 s8, s10 302; GFX6-NEXT: s_mov_b32 s9, s11 303; GFX6-NEXT: s_mov_b32 s10, s12 304; GFX6-NEXT: s_mov_b32 s11, s13 305; GFX6-NEXT: image_gather4_b_cl_o v[0:3], v[0:4], s[0:7], s[8:11] dmask:0x1 306; GFX6-NEXT: s_waitcnt vmcnt(0) 307; GFX6-NEXT: ; return to shader part epilog 308; 309; GFX10-LABEL: gather4_b_cl_o_2d: 310; GFX10: ; %bb.0: ; %main_body 311; GFX10-NEXT: s_mov_b32 s0, s2 312; GFX10-NEXT: s_mov_b32 s1, s3 313; GFX10-NEXT: s_mov_b32 s2, s4 314; GFX10-NEXT: s_mov_b32 s3, s5 315; GFX10-NEXT: s_mov_b32 s4, s6 316; GFX10-NEXT: s_mov_b32 s5, s7 317; GFX10-NEXT: s_mov_b32 s6, s8 318; GFX10-NEXT: s_mov_b32 s7, s9 319; GFX10-NEXT: s_mov_b32 s8, s10 320; GFX10-NEXT: s_mov_b32 s9, s11 321; GFX10-NEXT: s_mov_b32 s10, s12 322; GFX10-NEXT: s_mov_b32 s11, s13 323; GFX10-NEXT: image_gather4_b_cl_o v[0:3], v[0:4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 324; GFX10-NEXT: s_waitcnt vmcnt(0) 325; GFX10-NEXT: ; return to shader part epilog 326main_body: 327 %v = call <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 328 ret <4 x float> %v 329} 330 331define amdgpu_ps <4 x float> @gather4_c_b_cl_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp) { 332; GFX6-LABEL: gather4_c_b_cl_o_2d: 333; GFX6: ; %bb.0: ; %main_body 334; GFX6-NEXT: s_mov_b64 s[14:15], exec 335; GFX6-NEXT: s_mov_b32 s0, s2 336; GFX6-NEXT: s_mov_b32 s1, s3 337; GFX6-NEXT: s_mov_b32 s2, s4 338; GFX6-NEXT: s_mov_b32 s3, s5 339; GFX6-NEXT: s_mov_b32 s4, s6 340; GFX6-NEXT: s_mov_b32 s5, s7 341; GFX6-NEXT: s_mov_b32 s6, s8 342; GFX6-NEXT: s_mov_b32 s7, s9 343; GFX6-NEXT: s_mov_b32 s8, s10 344; GFX6-NEXT: s_mov_b32 s9, s11 345; GFX6-NEXT: s_mov_b32 s10, s12 346; GFX6-NEXT: s_mov_b32 s11, s13 347; GFX6-NEXT: s_wqm_b64 exec, exec 348; GFX6-NEXT: s_and_b64 exec, exec, s[14:15] 349; GFX6-NEXT: image_gather4_c_b_cl_o v[0:3], v[0:5], s[0:7], s[8:11] dmask:0x1 350; GFX6-NEXT: s_waitcnt vmcnt(0) 351; GFX6-NEXT: ; return to shader part epilog 352; 353; GFX10-LABEL: gather4_c_b_cl_o_2d: 354; GFX10: ; %bb.0: ; %main_body 355; GFX10-NEXT: s_mov_b32 s1, exec_lo 356; GFX10-NEXT: s_mov_b32 s0, s2 357; GFX10-NEXT: s_wqm_b32 exec_lo, exec_lo 358; GFX10-NEXT: s_and_b32 exec_lo, exec_lo, s1 359; GFX10-NEXT: s_mov_b32 s1, s3 360; GFX10-NEXT: s_mov_b32 s2, s4 361; GFX10-NEXT: s_mov_b32 s3, s5 362; GFX10-NEXT: s_mov_b32 s4, s6 363; GFX10-NEXT: s_mov_b32 s5, s7 364; GFX10-NEXT: s_mov_b32 s6, s8 365; GFX10-NEXT: s_mov_b32 s7, s9 366; GFX10-NEXT: s_mov_b32 s8, s10 367; GFX10-NEXT: s_mov_b32 s9, s11 368; GFX10-NEXT: s_mov_b32 s10, s12 369; GFX10-NEXT: s_mov_b32 s11, s13 370; GFX10-NEXT: image_gather4_c_b_cl_o v[0:3], v[0:5], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 371; GFX10-NEXT: s_waitcnt vmcnt(0) 372; GFX10-NEXT: ; return to shader part epilog 373main_body: 374 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32 1, i32 %offset, float %bias, float %zcompare, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 375 ret <4 x float> %v 376} 377 378define amdgpu_ps <4 x float> @gather4_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t, float %lod) { 379; GFX6-LABEL: gather4_l_o_2d: 380; GFX6: ; %bb.0: ; %main_body 381; GFX6-NEXT: s_mov_b32 s0, s2 382; GFX6-NEXT: s_mov_b32 s1, s3 383; GFX6-NEXT: s_mov_b32 s2, s4 384; GFX6-NEXT: s_mov_b32 s3, s5 385; GFX6-NEXT: s_mov_b32 s4, s6 386; GFX6-NEXT: s_mov_b32 s5, s7 387; GFX6-NEXT: s_mov_b32 s6, s8 388; GFX6-NEXT: s_mov_b32 s7, s9 389; GFX6-NEXT: s_mov_b32 s8, s10 390; GFX6-NEXT: s_mov_b32 s9, s11 391; GFX6-NEXT: s_mov_b32 s10, s12 392; GFX6-NEXT: s_mov_b32 s11, s13 393; GFX6-NEXT: image_gather4_l_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 394; GFX6-NEXT: s_waitcnt vmcnt(0) 395; GFX6-NEXT: ; return to shader part epilog 396; 397; GFX10-LABEL: gather4_l_o_2d: 398; GFX10: ; %bb.0: ; %main_body 399; GFX10-NEXT: s_mov_b32 s0, s2 400; GFX10-NEXT: s_mov_b32 s1, s3 401; GFX10-NEXT: s_mov_b32 s2, s4 402; GFX10-NEXT: s_mov_b32 s3, s5 403; GFX10-NEXT: s_mov_b32 s4, s6 404; GFX10-NEXT: s_mov_b32 s5, s7 405; GFX10-NEXT: s_mov_b32 s6, s8 406; GFX10-NEXT: s_mov_b32 s7, s9 407; GFX10-NEXT: s_mov_b32 s8, s10 408; GFX10-NEXT: s_mov_b32 s9, s11 409; GFX10-NEXT: s_mov_b32 s10, s12 410; GFX10-NEXT: s_mov_b32 s11, s13 411; GFX10-NEXT: image_gather4_l_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 412; GFX10-NEXT: s_waitcnt vmcnt(0) 413; GFX10-NEXT: ; return to shader part epilog 414main_body: 415 %v = call <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 416 ret <4 x float> %v 417} 418 419define amdgpu_ps <4 x float> @gather4_c_l_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t, float %lod) { 420; GFX6-LABEL: gather4_c_l_o_2d: 421; GFX6: ; %bb.0: ; %main_body 422; GFX6-NEXT: s_mov_b32 s0, s2 423; GFX6-NEXT: s_mov_b32 s1, s3 424; GFX6-NEXT: s_mov_b32 s2, s4 425; GFX6-NEXT: s_mov_b32 s3, s5 426; GFX6-NEXT: s_mov_b32 s4, s6 427; GFX6-NEXT: s_mov_b32 s5, s7 428; GFX6-NEXT: s_mov_b32 s6, s8 429; GFX6-NEXT: s_mov_b32 s7, s9 430; GFX6-NEXT: s_mov_b32 s8, s10 431; GFX6-NEXT: s_mov_b32 s9, s11 432; GFX6-NEXT: s_mov_b32 s10, s12 433; GFX6-NEXT: s_mov_b32 s11, s13 434; GFX6-NEXT: image_gather4_c_l_o v[0:3], v[0:4], s[0:7], s[8:11] dmask:0x1 435; GFX6-NEXT: s_waitcnt vmcnt(0) 436; GFX6-NEXT: ; return to shader part epilog 437; 438; GFX10-LABEL: gather4_c_l_o_2d: 439; GFX10: ; %bb.0: ; %main_body 440; GFX10-NEXT: s_mov_b32 s0, s2 441; GFX10-NEXT: s_mov_b32 s1, s3 442; GFX10-NEXT: s_mov_b32 s2, s4 443; GFX10-NEXT: s_mov_b32 s3, s5 444; GFX10-NEXT: s_mov_b32 s4, s6 445; GFX10-NEXT: s_mov_b32 s5, s7 446; GFX10-NEXT: s_mov_b32 s6, s8 447; GFX10-NEXT: s_mov_b32 s7, s9 448; GFX10-NEXT: s_mov_b32 s8, s10 449; GFX10-NEXT: s_mov_b32 s9, s11 450; GFX10-NEXT: s_mov_b32 s10, s12 451; GFX10-NEXT: s_mov_b32 s11, s13 452; GFX10-NEXT: image_gather4_c_l_o v[0:3], v[0:4], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 453; GFX10-NEXT: s_waitcnt vmcnt(0) 454; GFX10-NEXT: ; return to shader part epilog 455main_body: 456 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, float %lod, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 457 ret <4 x float> %v 458} 459 460define amdgpu_ps <4 x float> @gather4_lz_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %s, float %t) { 461; GFX6-LABEL: gather4_lz_o_2d: 462; GFX6: ; %bb.0: ; %main_body 463; GFX6-NEXT: s_mov_b32 s0, s2 464; GFX6-NEXT: s_mov_b32 s1, s3 465; GFX6-NEXT: s_mov_b32 s2, s4 466; GFX6-NEXT: s_mov_b32 s3, s5 467; GFX6-NEXT: s_mov_b32 s4, s6 468; GFX6-NEXT: s_mov_b32 s5, s7 469; GFX6-NEXT: s_mov_b32 s6, s8 470; GFX6-NEXT: s_mov_b32 s7, s9 471; GFX6-NEXT: s_mov_b32 s8, s10 472; GFX6-NEXT: s_mov_b32 s9, s11 473; GFX6-NEXT: s_mov_b32 s10, s12 474; GFX6-NEXT: s_mov_b32 s11, s13 475; GFX6-NEXT: image_gather4_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 476; GFX6-NEXT: s_waitcnt vmcnt(0) 477; GFX6-NEXT: ; return to shader part epilog 478; 479; GFX10-LABEL: gather4_lz_o_2d: 480; GFX10: ; %bb.0: ; %main_body 481; GFX10-NEXT: s_mov_b32 s0, s2 482; GFX10-NEXT: s_mov_b32 s1, s3 483; GFX10-NEXT: s_mov_b32 s2, s4 484; GFX10-NEXT: s_mov_b32 s3, s5 485; GFX10-NEXT: s_mov_b32 s4, s6 486; GFX10-NEXT: s_mov_b32 s5, s7 487; GFX10-NEXT: s_mov_b32 s6, s8 488; GFX10-NEXT: s_mov_b32 s7, s9 489; GFX10-NEXT: s_mov_b32 s8, s10 490; GFX10-NEXT: s_mov_b32 s9, s11 491; GFX10-NEXT: s_mov_b32 s10, s12 492; GFX10-NEXT: s_mov_b32 s11, s13 493; GFX10-NEXT: image_gather4_lz_o v[0:3], v[0:2], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 494; GFX10-NEXT: s_waitcnt vmcnt(0) 495; GFX10-NEXT: ; return to shader part epilog 496main_body: 497 %v = call <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32 1, i32 %offset, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 498 ret <4 x float> %v 499} 500 501define amdgpu_ps <4 x float> @gather4_c_lz_o_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %s, float %t) { 502; GFX6-LABEL: gather4_c_lz_o_2d: 503; GFX6: ; %bb.0: ; %main_body 504; GFX6-NEXT: s_mov_b32 s0, s2 505; GFX6-NEXT: s_mov_b32 s1, s3 506; GFX6-NEXT: s_mov_b32 s2, s4 507; GFX6-NEXT: s_mov_b32 s3, s5 508; GFX6-NEXT: s_mov_b32 s4, s6 509; GFX6-NEXT: s_mov_b32 s5, s7 510; GFX6-NEXT: s_mov_b32 s6, s8 511; GFX6-NEXT: s_mov_b32 s7, s9 512; GFX6-NEXT: s_mov_b32 s8, s10 513; GFX6-NEXT: s_mov_b32 s9, s11 514; GFX6-NEXT: s_mov_b32 s10, s12 515; GFX6-NEXT: s_mov_b32 s11, s13 516; GFX6-NEXT: image_gather4_c_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 517; GFX6-NEXT: s_waitcnt vmcnt(0) 518; GFX6-NEXT: ; return to shader part epilog 519; 520; GFX10-LABEL: gather4_c_lz_o_2d: 521; GFX10: ; %bb.0: ; %main_body 522; GFX10-NEXT: s_mov_b32 s0, s2 523; GFX10-NEXT: s_mov_b32 s1, s3 524; GFX10-NEXT: s_mov_b32 s2, s4 525; GFX10-NEXT: s_mov_b32 s3, s5 526; GFX10-NEXT: s_mov_b32 s4, s6 527; GFX10-NEXT: s_mov_b32 s5, s7 528; GFX10-NEXT: s_mov_b32 s6, s8 529; GFX10-NEXT: s_mov_b32 s7, s9 530; GFX10-NEXT: s_mov_b32 s8, s10 531; GFX10-NEXT: s_mov_b32 s9, s11 532; GFX10-NEXT: s_mov_b32 s10, s12 533; GFX10-NEXT: s_mov_b32 s11, s13 534; GFX10-NEXT: image_gather4_c_lz_o v[0:3], v[0:3], s[0:7], s[8:11] dmask:0x1 dim:SQ_RSRC_IMG_2D 535; GFX10-NEXT: s_waitcnt vmcnt(0) 536; GFX10-NEXT: ; return to shader part epilog 537main_body: 538 %v = call <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32 1, i32 %offset, float %zcompare, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 false, i32 0, i32 0) 539 ret <4 x float> %v 540} 541 542declare <4 x float> @llvm.amdgcn.image.gather4.o.2d.v4f32.f32(i32 immarg, i32, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 543declare <4 x float> @llvm.amdgcn.image.gather4.c.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 544declare <4 x float> @llvm.amdgcn.image.gather4.cl.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 545declare <4 x float> @llvm.amdgcn.image.gather4.c.cl.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 546declare <4 x float> @llvm.amdgcn.image.gather4.b.o.2d.v4f32.f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 547declare <4 x float> @llvm.amdgcn.image.gather4.c.b.o.2d.v4f32.f32.f32(i32 immarg, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 548declare <4 x float> @llvm.amdgcn.image.gather4.b.cl.o.2d.v4f32.f32.f32(i32 immarg, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 549declare <4 x float> @llvm.amdgcn.image.gather4.c.b.cl.o.2d.v4f32.f32.f32(i32 immarg, i32, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 550declare <4 x float> @llvm.amdgcn.image.gather4.l.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 551declare <4 x float> @llvm.amdgcn.image.gather4.c.l.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 552declare <4 x float> @llvm.amdgcn.image.gather4.lz.o.2d.v4f32.f32(i32 immarg, i32, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 553declare <4 x float> @llvm.amdgcn.image.gather4.c.lz.o.2d.v4f32.f32(i32 immarg, i32, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #0 554 555attributes #0 = { nounwind readonly } 556