1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s 3 4define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) { 5; GFX10-LABEL: sample_cd_1d: 6; GFX10: ; %bb.0: ; %main_body 7; GFX10-NEXT: image_sample_cd_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 8; GFX10-NEXT: s_waitcnt vmcnt(0) 9; GFX10-NEXT: ; return to shader part epilog 10main_body: 11 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 12 ret <4 x float> %v 13} 14 15define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { 16; GFX10-LABEL: sample_cd_2d: 17; GFX10: ; %bb.0: ; %main_body 18; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100 19; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 20; GFX10-NEXT: image_sample_cd_g16 v[0:3], [v0, v2, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 21; GFX10-NEXT: s_waitcnt vmcnt(0) 22; GFX10-NEXT: ; return to shader part epilog 23main_body: 24 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 25 ret <4 x float> %v 26} 27 28define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) { 29; GFX10-LABEL: sample_c_cd_1d: 30; GFX10: ; %bb.0: ; %main_body 31; GFX10-NEXT: image_sample_c_cd_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 32; GFX10-NEXT: s_waitcnt vmcnt(0) 33; GFX10-NEXT: ; return to shader part epilog 34main_body: 35 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 36 ret <4 x float> %v 37} 38 39define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { 40; GFX10-LABEL: sample_c_cd_2d: 41; GFX10: ; %bb.0: ; %main_body 42; GFX10-NEXT: v_perm_b32 v3, v4, v3, 0x5040100 43; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100 44; GFX10-NEXT: image_sample_c_cd_g16 v[0:3], [v0, v1, v3, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 45; GFX10-NEXT: s_waitcnt vmcnt(0) 46; GFX10-NEXT: ; return to shader part epilog 47main_body: 48 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 49 ret <4 x float> %v 50} 51 52define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) { 53; GFX10-LABEL: sample_cd_cl_1d: 54; GFX10: ; %bb.0: ; %main_body 55; GFX10-NEXT: image_sample_cd_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 56; GFX10-NEXT: s_waitcnt vmcnt(0) 57; GFX10-NEXT: ; return to shader part epilog 58main_body: 59 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 60 ret <4 x float> %v 61} 62 63define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { 64; GFX10-LABEL: sample_cd_cl_2d: 65; GFX10: ; %bb.0: ; %main_body 66; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100 67; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 68; GFX10-NEXT: image_sample_cd_cl_g16 v[0:3], [v0, v2, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 69; GFX10-NEXT: s_waitcnt vmcnt(0) 70; GFX10-NEXT: ; return to shader part epilog 71main_body: 72 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 73 ret <4 x float> %v 74} 75 76define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) { 77; GFX10-LABEL: sample_c_cd_cl_1d: 78; GFX10: ; %bb.0: ; %main_body 79; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 80; GFX10-NEXT: s_waitcnt vmcnt(0) 81; GFX10-NEXT: ; return to shader part epilog 82main_body: 83 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 84 ret <4 x float> %v 85} 86 87define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { 88; GFX10-LABEL: sample_c_cd_cl_2d: 89; GFX10: ; %bb.0: ; %main_body 90; GFX10-NEXT: v_mov_b32_e32 v8, v2 91; GFX10-NEXT: v_mov_b32_e32 v2, v0 92; GFX10-NEXT: v_perm_b32 v4, v4, v3, 0x5040100 93; GFX10-NEXT: v_perm_b32 v3, v8, v1, 0x5040100 94; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 95; GFX10-NEXT: s_waitcnt vmcnt(0) 96; GFX10-NEXT: ; return to shader part epilog 97main_body: 98 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 99 ret <4 x float> %v 100} 101 102declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 103declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 104declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 105declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 106declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 107declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 108declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 109declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 110 111attributes #0 = { nounwind } 112attributes #1 = { nounwind readonly } 113attributes #2 = { nounwind readnone } 114