1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9 %s 3; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s 4 5define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) { 6; GFX9-LABEL: sample_cd_1d: 7; GFX9: ; %bb.0: ; %main_body 8; GFX9-NEXT: image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16 9; GFX9-NEXT: s_waitcnt vmcnt(0) 10; GFX9-NEXT: ; return to shader part epilog 11; 12; GFX10-LABEL: sample_cd_1d: 13; GFX10: ; %bb.0: ; %main_body 14; GFX10-NEXT: image_sample_cd_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 15; GFX10-NEXT: s_waitcnt vmcnt(0) 16; GFX10-NEXT: ; return to shader part epilog 17main_body: 18 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 19 ret <4 x float> %v 20} 21 22define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) { 23; GFX9-LABEL: sample_cd_2d: 24; GFX9: ; %bb.0: ; %main_body 25; GFX9-NEXT: s_mov_b32 s12, 0x5040100 26; GFX9-NEXT: v_perm_b32 v4, v5, v4, s12 27; GFX9-NEXT: v_perm_b32 v3, v3, v2, s12 28; GFX9-NEXT: v_perm_b32 v2, v1, v0, s12 29; GFX9-NEXT: image_sample_cd v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf a16 30; GFX9-NEXT: s_waitcnt vmcnt(0) 31; GFX9-NEXT: ; return to shader part epilog 32; 33; GFX10-LABEL: sample_cd_2d: 34; GFX10: ; %bb.0: ; %main_body 35; GFX10-NEXT: v_perm_b32 v4, v5, v4, 0x5040100 36; GFX10-NEXT: v_perm_b32 v3, v3, v2, 0x5040100 37; GFX10-NEXT: v_perm_b32 v2, v1, v0, 0x5040100 38; GFX10-NEXT: image_sample_cd_g16 v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 39; GFX10-NEXT: s_waitcnt vmcnt(0) 40; GFX10-NEXT: ; return to shader part epilog 41main_body: 42 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 43 ret <4 x float> %v 44} 45 46define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s) { 47; GFX9-LABEL: sample_c_cd_1d: 48; GFX9: ; %bb.0: ; %main_body 49; GFX9-NEXT: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16 50; GFX9-NEXT: s_waitcnt vmcnt(0) 51; GFX9-NEXT: ; return to shader part epilog 52; 53; GFX10-LABEL: sample_c_cd_1d: 54; GFX10: ; %bb.0: ; %main_body 55; GFX10-NEXT: image_sample_c_cd_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 56; GFX10-NEXT: s_waitcnt vmcnt(0) 57; GFX10-NEXT: ; return to shader part epilog 58main_body: 59 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 60 ret <4 x float> %v 61} 62 63define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) { 64; GFX9-LABEL: sample_c_cd_2d: 65; GFX9: ; %bb.0: ; %main_body 66; GFX9-NEXT: v_mov_b32_e32 v7, v3 67; GFX9-NEXT: v_mov_b32_e32 v8, v2 68; GFX9-NEXT: s_mov_b32 s12, 0x5040100 69; GFX9-NEXT: v_perm_b32 v3, v6, v5, s12 70; GFX9-NEXT: v_perm_b32 v2, v4, v7, s12 71; GFX9-NEXT: v_perm_b32 v1, v8, v1, s12 72; GFX9-NEXT: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16 73; GFX9-NEXT: s_waitcnt vmcnt(0) 74; GFX9-NEXT: ; return to shader part epilog 75; 76; GFX10-LABEL: sample_c_cd_2d: 77; GFX10: ; %bb.0: ; %main_body 78; GFX10-NEXT: v_perm_b32 v5, v6, v5, 0x5040100 79; GFX10-NEXT: v_perm_b32 v3, v4, v3, 0x5040100 80; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100 81; GFX10-NEXT: image_sample_c_cd_g16 v[0:3], [v0, v1, v3, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 82; GFX10-NEXT: s_waitcnt vmcnt(0) 83; GFX10-NEXT: ; return to shader part epilog 84main_body: 85 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 86 ret <4 x float> %v 87} 88 89define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s, half %clamp) { 90; GFX9-LABEL: sample_cd_cl_1d: 91; GFX9: ; %bb.0: ; %main_body 92; GFX9-NEXT: s_mov_b32 s12, 0x5040100 93; GFX9-NEXT: v_perm_b32 v2, v3, v2, s12 94; GFX9-NEXT: image_sample_cd_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf a16 95; GFX9-NEXT: s_waitcnt vmcnt(0) 96; GFX9-NEXT: ; return to shader part epilog 97; 98; GFX10-LABEL: sample_cd_cl_1d: 99; GFX10: ; %bb.0: ; %main_body 100; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100 101; GFX10-NEXT: image_sample_cd_cl_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 102; GFX10-NEXT: s_waitcnt vmcnt(0) 103; GFX10-NEXT: ; return to shader part epilog 104main_body: 105 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 106 ret <4 x float> %v 107} 108 109define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) { 110; GFX9-LABEL: sample_cd_cl_2d: 111; GFX9: ; %bb.0: ; %main_body 112; GFX9-NEXT: s_mov_b32 s12, 0x5040100 113; GFX9-NEXT: v_perm_b32 v5, v5, v4, s12 114; GFX9-NEXT: v_perm_b32 v4, v3, v2, s12 115; GFX9-NEXT: v_perm_b32 v3, v1, v0, s12 116; GFX9-NEXT: image_sample_cd_cl v[0:3], v[3:6], s[0:7], s[8:11] dmask:0xf a16 117; GFX9-NEXT: s_waitcnt vmcnt(0) 118; GFX9-NEXT: ; return to shader part epilog 119; 120; GFX10-LABEL: sample_cd_cl_2d: 121; GFX10: ; %bb.0: ; %main_body 122; GFX10-NEXT: v_perm_b32 v4, v5, v4, 0x5040100 123; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100 124; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 125; GFX10-NEXT: image_sample_cd_cl_g16 v[0:3], [v0, v2, v4, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 126; GFX10-NEXT: s_waitcnt vmcnt(0) 127; GFX10-NEXT: ; return to shader part epilog 128main_body: 129 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 130 ret <4 x float> %v 131} 132 133define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp) { 134; GFX9-LABEL: sample_c_cd_cl_1d: 135; GFX9: ; %bb.0: ; %main_body 136; GFX9-NEXT: s_mov_b32 s12, 0x5040100 137; GFX9-NEXT: v_perm_b32 v3, v4, v3, s12 138; GFX9-NEXT: image_sample_c_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf a16 139; GFX9-NEXT: s_waitcnt vmcnt(0) 140; GFX9-NEXT: ; return to shader part epilog 141; 142; GFX10-LABEL: sample_c_cd_cl_1d: 143; GFX10: ; %bb.0: ; %main_body 144; GFX10-NEXT: v_perm_b32 v3, v4, v3, 0x5040100 145; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 146; GFX10-NEXT: s_waitcnt vmcnt(0) 147; GFX10-NEXT: ; return to shader part epilog 148main_body: 149 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 150 ret <4 x float> %v 151} 152 153define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp) { 154; GFX9-LABEL: sample_c_cd_cl_2d: 155; GFX9: ; %bb.0: ; %main_body 156; GFX9-NEXT: s_mov_b32 s12, 0x5040100 157; GFX9-NEXT: v_mov_b32_e32 v11, v7 158; GFX9-NEXT: v_mov_b32_e32 v7, v0 159; GFX9-NEXT: v_perm_b32 v10, v6, v5, s12 160; GFX9-NEXT: v_perm_b32 v9, v4, v3, s12 161; GFX9-NEXT: v_perm_b32 v8, v2, v1, s12 162; GFX9-NEXT: image_sample_c_cd_cl v[0:3], v[7:11], s[0:7], s[8:11] dmask:0xf a16 163; GFX9-NEXT: s_waitcnt vmcnt(0) 164; GFX9-NEXT: ; return to shader part epilog 165; 166; GFX10-LABEL: sample_c_cd_cl_2d: 167; GFX10: ; %bb.0: ; %main_body 168; GFX10-NEXT: v_perm_b32 v5, v6, v5, 0x5040100 169; GFX10-NEXT: v_perm_b32 v3, v4, v3, 0x5040100 170; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100 171; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], [v0, v1, v3, v5, v7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 172; GFX10-NEXT: s_waitcnt vmcnt(0) 173; GFX10-NEXT: ; return to shader part epilog 174main_body: 175 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 176 ret <4 x float> %v 177} 178 179declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 180declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 181declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 182declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 183declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f16(i32, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 184declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 185declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32, float, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 186declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32, float, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 187 188attributes #0 = { nounwind } 189attributes #1 = { nounwind readonly } 190attributes #2 = { nounwind readnone } 191