1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10 %s 3; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -global-isel -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10GISEL %s 4; TODO: global-isel produces more code - there will need to be some more combines in the postregbankselectcombine phase 5; Depends on some other changes to pass this test - those are in review separately 6 7define amdgpu_ps <4 x float> @sample_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, half %s) { 8; GFX10-LABEL: sample_d_1d: 9; GFX10: ; %bb.0: ; %main_body 10; GFX10-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 11; GFX10-NEXT: s_waitcnt vmcnt(0) 12; GFX10-NEXT: ; return to shader part epilog 13; 14; GFX10GISEL-LABEL: sample_d_1d: 15; GFX10GISEL: ; %bb.0: ; %main_body 16; GFX10GISEL-NEXT: image_sample_d v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 17; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 18; GFX10GISEL-NEXT: ; return to shader part epilog 19main_body: 20 %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 21 ret <4 x float> %v 22} 23 24define amdgpu_ps <4 x float> @sample_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t) { 25; GFX10-LABEL: sample_d_2d: 26; GFX10: ; %bb.0: ; %main_body 27; GFX10-NEXT: v_perm_b32 v4, v5, v4, 0x5040100 28; GFX10-NEXT: image_sample_d v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 29; GFX10-NEXT: s_waitcnt vmcnt(0) 30; GFX10-NEXT: ; return to shader part epilog 31; 32; GFX10GISEL-LABEL: sample_d_2d: 33; GFX10GISEL: ; %bb.0: ; %main_body 34; GFX10GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4 35; GFX10GISEL-NEXT: v_lshl_or_b32 v4, v5, 16, v4 36; GFX10GISEL-NEXT: image_sample_d v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 37; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 38; GFX10GISEL-NEXT: ; return to shader part epilog 39main_body: 40 %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 41 ret <4 x float> %v 42} 43 44define amdgpu_ps <4 x float> @sample_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, half %s, half %t, half %r) { 45; GFX10-LABEL: sample_d_3d: 46; GFX10: ; %bb.0: ; %main_body 47; GFX10-NEXT: v_mov_b32_e32 v15, v8 48; GFX10-NEXT: v_mov_b32_e32 v13, v5 49; GFX10-NEXT: v_mov_b32_e32 v12, v4 50; GFX10-NEXT: v_mov_b32_e32 v11, v3 51; GFX10-NEXT: v_mov_b32_e32 v10, v2 52; GFX10-NEXT: v_mov_b32_e32 v9, v1 53; GFX10-NEXT: v_mov_b32_e32 v8, v0 54; GFX10-NEXT: v_perm_b32 v14, v7, v6, 0x5040100 55; GFX10-NEXT: image_sample_d v[0:3], v[8:15], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16 56; GFX10-NEXT: s_waitcnt vmcnt(0) 57; GFX10-NEXT: ; return to shader part epilog 58; 59; GFX10GISEL-LABEL: sample_d_3d: 60; GFX10GISEL: ; %bb.0: ; %main_body 61; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v7 62; GFX10GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v6 63; GFX10GISEL-NEXT: v_mov_b32_e32 v7, v8 64; GFX10GISEL-NEXT: v_lshl_or_b32 v6, v9, 16, v6 65; GFX10GISEL-NEXT: image_sample_d v[0:3], v[0:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16 66; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 67; GFX10GISEL-NEXT: ; return to shader part epilog 68main_body: 69 %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %drdh, float %dsdv, float %dtdv, float %drdv, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 70 ret <4 x float> %v 71} 72 73define amdgpu_ps <4 x float> @sample_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, half %s) { 74; GFX10-LABEL: sample_c_d_1d: 75; GFX10: ; %bb.0: ; %main_body 76; GFX10-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 77; GFX10-NEXT: s_waitcnt vmcnt(0) 78; GFX10-NEXT: ; return to shader part epilog 79; 80; GFX10GISEL-LABEL: sample_c_d_1d: 81; GFX10GISEL: ; %bb.0: ; %main_body 82; GFX10GISEL-NEXT: image_sample_c_d v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 83; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 84; GFX10GISEL-NEXT: ; return to shader part epilog 85main_body: 86 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 87 ret <4 x float> %v 88} 89 90define amdgpu_ps <4 x float> @sample_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t) { 91; GFX10-LABEL: sample_c_d_2d: 92; GFX10: ; %bb.0: ; %main_body 93; GFX10-NEXT: v_perm_b32 v5, v6, v5, 0x5040100 94; GFX10-NEXT: image_sample_c_d v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 95; GFX10-NEXT: s_waitcnt vmcnt(0) 96; GFX10-NEXT: ; return to shader part epilog 97; 98; GFX10GISEL-LABEL: sample_c_d_2d: 99; GFX10GISEL: ; %bb.0: ; %main_body 100; GFX10GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5 101; GFX10GISEL-NEXT: v_lshl_or_b32 v5, v6, 16, v5 102; GFX10GISEL-NEXT: image_sample_c_d v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 103; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 104; GFX10GISEL-NEXT: ; return to shader part epilog 105main_body: 106 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 107 ret <4 x float> %v 108} 109 110define amdgpu_ps <4 x float> @sample_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, half %s, half %clamp) { 111; GFX10-LABEL: sample_d_cl_1d: 112; GFX10: ; %bb.0: ; %main_body 113; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100 114; GFX10-NEXT: image_sample_d_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 115; GFX10-NEXT: s_waitcnt vmcnt(0) 116; GFX10-NEXT: ; return to shader part epilog 117; 118; GFX10GISEL-LABEL: sample_d_cl_1d: 119; GFX10GISEL: ; %bb.0: ; %main_body 120; GFX10GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 121; GFX10GISEL-NEXT: v_lshl_or_b32 v2, v3, 16, v2 122; GFX10GISEL-NEXT: image_sample_d_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 123; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 124; GFX10GISEL-NEXT: ; return to shader part epilog 125main_body: 126 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 127 ret <4 x float> %v 128} 129 130define amdgpu_ps <4 x float> @sample_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) { 131; GFX10-LABEL: sample_d_cl_2d: 132; GFX10: ; %bb.0: ; %main_body 133; GFX10-NEXT: v_mov_b32_e32 v11, v6 134; GFX10-NEXT: v_mov_b32_e32 v9, v3 135; GFX10-NEXT: v_mov_b32_e32 v8, v2 136; GFX10-NEXT: v_mov_b32_e32 v7, v1 137; GFX10-NEXT: v_mov_b32_e32 v6, v0 138; GFX10-NEXT: v_perm_b32 v10, v5, v4, 0x5040100 139; GFX10-NEXT: image_sample_d_cl v[0:3], v[6:11], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 140; GFX10-NEXT: s_waitcnt vmcnt(0) 141; GFX10-NEXT: ; return to shader part epilog 142; 143; GFX10GISEL-LABEL: sample_d_cl_2d: 144; GFX10GISEL: ; %bb.0: ; %main_body 145; GFX10GISEL-NEXT: v_mov_b32_e32 v7, v5 146; GFX10GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4 147; GFX10GISEL-NEXT: v_mov_b32_e32 v5, v6 148; GFX10GISEL-NEXT: v_lshl_or_b32 v4, v7, 16, v4 149; GFX10GISEL-NEXT: image_sample_d_cl v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 150; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 151; GFX10GISEL-NEXT: ; return to shader part epilog 152main_body: 153 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 154 ret <4 x float> %v 155} 156 157define amdgpu_ps <4 x float> @sample_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp) { 158; GFX10-LABEL: sample_c_d_cl_1d: 159; GFX10: ; %bb.0: ; %main_body 160; GFX10-NEXT: v_perm_b32 v3, v4, v3, 0x5040100 161; GFX10-NEXT: image_sample_c_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 162; GFX10-NEXT: s_waitcnt vmcnt(0) 163; GFX10-NEXT: ; return to shader part epilog 164; 165; GFX10GISEL-LABEL: sample_c_d_cl_1d: 166; GFX10GISEL: ; %bb.0: ; %main_body 167; GFX10GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 168; GFX10GISEL-NEXT: v_lshl_or_b32 v3, v4, 16, v3 169; GFX10GISEL-NEXT: image_sample_c_d_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 170; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 171; GFX10GISEL-NEXT: ; return to shader part epilog 172main_body: 173 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 174 ret <4 x float> %v 175} 176 177define amdgpu_ps <4 x float> @sample_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) { 178; GFX10-LABEL: sample_c_d_cl_2d: 179; GFX10: ; %bb.0: ; %main_body 180; GFX10-NEXT: v_mov_b32_e32 v13, v7 181; GFX10-NEXT: v_mov_b32_e32 v11, v4 182; GFX10-NEXT: v_mov_b32_e32 v10, v3 183; GFX10-NEXT: v_mov_b32_e32 v9, v2 184; GFX10-NEXT: v_mov_b32_e32 v8, v1 185; GFX10-NEXT: v_mov_b32_e32 v7, v0 186; GFX10-NEXT: v_perm_b32 v12, v6, v5, 0x5040100 187; GFX10-NEXT: image_sample_c_d_cl v[0:3], v[7:13], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 188; GFX10-NEXT: s_waitcnt vmcnt(0) 189; GFX10-NEXT: ; return to shader part epilog 190; 191; GFX10GISEL-LABEL: sample_c_d_cl_2d: 192; GFX10GISEL: ; %bb.0: ; %main_body 193; GFX10GISEL-NEXT: v_mov_b32_e32 v8, v6 194; GFX10GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5 195; GFX10GISEL-NEXT: v_mov_b32_e32 v6, v7 196; GFX10GISEL-NEXT: v_lshl_or_b32 v5, v8, 16, v5 197; GFX10GISEL-NEXT: image_sample_c_d_cl v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 198; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 199; GFX10GISEL-NEXT: ; return to shader part epilog 200main_body: 201 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 202 ret <4 x float> %v 203} 204 205define amdgpu_ps <4 x float> @sample_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, half %s) { 206; GFX10-LABEL: sample_cd_1d: 207; GFX10: ; %bb.0: ; %main_body 208; GFX10-NEXT: image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 209; GFX10-NEXT: s_waitcnt vmcnt(0) 210; GFX10-NEXT: ; return to shader part epilog 211; 212; GFX10GISEL-LABEL: sample_cd_1d: 213; GFX10GISEL: ; %bb.0: ; %main_body 214; GFX10GISEL-NEXT: image_sample_cd v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 215; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 216; GFX10GISEL-NEXT: ; return to shader part epilog 217main_body: 218 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 219 ret <4 x float> %v 220} 221 222define amdgpu_ps <4 x float> @sample_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t) { 223; GFX10-LABEL: sample_cd_2d: 224; GFX10: ; %bb.0: ; %main_body 225; GFX10-NEXT: v_perm_b32 v4, v5, v4, 0x5040100 226; GFX10-NEXT: image_sample_cd v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 227; GFX10-NEXT: s_waitcnt vmcnt(0) 228; GFX10-NEXT: ; return to shader part epilog 229; 230; GFX10GISEL-LABEL: sample_cd_2d: 231; GFX10GISEL: ; %bb.0: ; %main_body 232; GFX10GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4 233; GFX10GISEL-NEXT: v_lshl_or_b32 v4, v5, 16, v4 234; GFX10GISEL-NEXT: image_sample_cd v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 235; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 236; GFX10GISEL-NEXT: ; return to shader part epilog 237main_body: 238 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 239 ret <4 x float> %v 240} 241 242define amdgpu_ps <4 x float> @sample_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, half %s) { 243; GFX10-LABEL: sample_c_cd_1d: 244; GFX10: ; %bb.0: ; %main_body 245; GFX10-NEXT: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 246; GFX10-NEXT: s_waitcnt vmcnt(0) 247; GFX10-NEXT: ; return to shader part epilog 248; 249; GFX10GISEL-LABEL: sample_c_cd_1d: 250; GFX10GISEL: ; %bb.0: ; %main_body 251; GFX10GISEL-NEXT: image_sample_c_cd v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 252; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 253; GFX10GISEL-NEXT: ; return to shader part epilog 254main_body: 255 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 256 ret <4 x float> %v 257} 258 259define amdgpu_ps <4 x float> @sample_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t) { 260; GFX10-LABEL: sample_c_cd_2d: 261; GFX10: ; %bb.0: ; %main_body 262; GFX10-NEXT: v_perm_b32 v5, v6, v5, 0x5040100 263; GFX10-NEXT: image_sample_c_cd v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 264; GFX10-NEXT: s_waitcnt vmcnt(0) 265; GFX10-NEXT: ; return to shader part epilog 266; 267; GFX10GISEL-LABEL: sample_c_cd_2d: 268; GFX10GISEL: ; %bb.0: ; %main_body 269; GFX10GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5 270; GFX10GISEL-NEXT: v_lshl_or_b32 v5, v6, 16, v5 271; GFX10GISEL-NEXT: image_sample_c_cd v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 272; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 273; GFX10GISEL-NEXT: ; return to shader part epilog 274main_body: 275 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 276 ret <4 x float> %v 277} 278 279define amdgpu_ps <4 x float> @sample_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dsdv, half %s, half %clamp) { 280; GFX10-LABEL: sample_cd_cl_1d: 281; GFX10: ; %bb.0: ; %main_body 282; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100 283; GFX10-NEXT: image_sample_cd_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 284; GFX10-NEXT: s_waitcnt vmcnt(0) 285; GFX10-NEXT: ; return to shader part epilog 286; 287; GFX10GISEL-LABEL: sample_cd_cl_1d: 288; GFX10GISEL: ; %bb.0: ; %main_body 289; GFX10GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 290; GFX10GISEL-NEXT: v_lshl_or_b32 v2, v3, 16, v2 291; GFX10GISEL-NEXT: image_sample_cd_cl v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 292; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 293; GFX10GISEL-NEXT: ; return to shader part epilog 294main_body: 295 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f16(i32 15, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 296 ret <4 x float> %v 297} 298 299define amdgpu_ps <4 x float> @sample_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) { 300; GFX10-LABEL: sample_cd_cl_2d: 301; GFX10: ; %bb.0: ; %main_body 302; GFX10-NEXT: v_mov_b32_e32 v11, v6 303; GFX10-NEXT: v_mov_b32_e32 v9, v3 304; GFX10-NEXT: v_mov_b32_e32 v8, v2 305; GFX10-NEXT: v_mov_b32_e32 v7, v1 306; GFX10-NEXT: v_mov_b32_e32 v6, v0 307; GFX10-NEXT: v_perm_b32 v10, v5, v4, 0x5040100 308; GFX10-NEXT: image_sample_cd_cl v[0:3], v[6:11], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 309; GFX10-NEXT: s_waitcnt vmcnt(0) 310; GFX10-NEXT: ; return to shader part epilog 311; 312; GFX10GISEL-LABEL: sample_cd_cl_2d: 313; GFX10GISEL: ; %bb.0: ; %main_body 314; GFX10GISEL-NEXT: v_mov_b32_e32 v7, v5 315; GFX10GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4 316; GFX10GISEL-NEXT: v_mov_b32_e32 v5, v6 317; GFX10GISEL-NEXT: v_lshl_or_b32 v4, v7, 16, v4 318; GFX10GISEL-NEXT: image_sample_cd_cl v[0:3], v[0:5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 319; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 320; GFX10GISEL-NEXT: ; return to shader part epilog 321main_body: 322 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f16(i32 15, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 323 ret <4 x float> %v 324} 325 326define amdgpu_ps <4 x float> @sample_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp) { 327; GFX10-LABEL: sample_c_cd_cl_1d: 328; GFX10: ; %bb.0: ; %main_body 329; GFX10-NEXT: v_perm_b32 v3, v4, v3, 0x5040100 330; GFX10-NEXT: image_sample_c_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 331; GFX10-NEXT: s_waitcnt vmcnt(0) 332; GFX10-NEXT: ; return to shader part epilog 333; 334; GFX10GISEL-LABEL: sample_c_cd_cl_1d: 335; GFX10GISEL: ; %bb.0: ; %main_body 336; GFX10GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 337; GFX10GISEL-NEXT: v_lshl_or_b32 v3, v4, 16, v3 338; GFX10GISEL-NEXT: image_sample_c_cd_cl v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 339; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 340; GFX10GISEL-NEXT: ; return to shader part epilog 341main_body: 342 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dsdv, half %s, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 343 ret <4 x float> %v 344} 345 346define amdgpu_ps <4 x float> @sample_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp) { 347; GFX10-LABEL: sample_c_cd_cl_2d: 348; GFX10: ; %bb.0: ; %main_body 349; GFX10-NEXT: v_mov_b32_e32 v13, v7 350; GFX10-NEXT: v_mov_b32_e32 v11, v4 351; GFX10-NEXT: v_mov_b32_e32 v10, v3 352; GFX10-NEXT: v_mov_b32_e32 v9, v2 353; GFX10-NEXT: v_mov_b32_e32 v8, v1 354; GFX10-NEXT: v_mov_b32_e32 v7, v0 355; GFX10-NEXT: v_perm_b32 v12, v6, v5, 0x5040100 356; GFX10-NEXT: image_sample_c_cd_cl v[0:3], v[7:13], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 357; GFX10-NEXT: s_waitcnt vmcnt(0) 358; GFX10-NEXT: ; return to shader part epilog 359; 360; GFX10GISEL-LABEL: sample_c_cd_cl_2d: 361; GFX10GISEL: ; %bb.0: ; %main_body 362; GFX10GISEL-NEXT: v_mov_b32_e32 v8, v6 363; GFX10GISEL-NEXT: v_and_b32_e32 v5, 0xffff, v5 364; GFX10GISEL-NEXT: v_mov_b32_e32 v6, v7 365; GFX10GISEL-NEXT: v_lshl_or_b32 v5, v8, 16, v5 366; GFX10GISEL-NEXT: image_sample_c_cd_cl v[0:3], v[0:6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 367; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 368; GFX10GISEL-NEXT: ; return to shader part epilog 369main_body: 370 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32 15, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 371 ret <4 x float> %v 372} 373 374define amdgpu_ps float @sample_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice) { 375; GFX10-LABEL: sample_c_d_o_2darray_V1: 376; GFX10: ; %bb.0: ; %main_body 377; GFX10-NEXT: v_mov_b32_e32 v15, v8 378; GFX10-NEXT: v_mov_b32_e32 v13, v5 379; GFX10-NEXT: v_mov_b32_e32 v12, v4 380; GFX10-NEXT: v_mov_b32_e32 v11, v3 381; GFX10-NEXT: v_mov_b32_e32 v10, v2 382; GFX10-NEXT: v_mov_b32_e32 v9, v1 383; GFX10-NEXT: v_mov_b32_e32 v8, v0 384; GFX10-NEXT: v_perm_b32 v14, v7, v6, 0x5040100 385; GFX10-NEXT: image_sample_c_d_o v0, v[8:15], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16 386; GFX10-NEXT: s_waitcnt vmcnt(0) 387; GFX10-NEXT: ; return to shader part epilog 388; 389; GFX10GISEL-LABEL: sample_c_d_o_2darray_V1: 390; GFX10GISEL: ; %bb.0: ; %main_body 391; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v7 392; GFX10GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v6 393; GFX10GISEL-NEXT: v_mov_b32_e32 v7, v8 394; GFX10GISEL-NEXT: v_lshl_or_b32 v6, v9, 16, v6 395; GFX10GISEL-NEXT: image_sample_c_d_o v0, v[0:7], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY a16 396; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 397; GFX10GISEL-NEXT: ; return to shader part epilog 398main_body: 399 %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f16(i32 4, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 400 ret float %v 401} 402 403define amdgpu_ps <2 x float> @sample_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice) { 404; GFX10-LABEL: sample_c_d_o_2darray_V2: 405; GFX10: ; %bb.0: ; %main_body 406; GFX10-NEXT: v_mov_b32_e32 v15, v8 407; GFX10-NEXT: v_mov_b32_e32 v13, v5 408; GFX10-NEXT: v_mov_b32_e32 v12, v4 409; GFX10-NEXT: v_mov_b32_e32 v11, v3 410; GFX10-NEXT: v_mov_b32_e32 v10, v2 411; GFX10-NEXT: v_mov_b32_e32 v9, v1 412; GFX10-NEXT: v_mov_b32_e32 v8, v0 413; GFX10-NEXT: v_perm_b32 v14, v7, v6, 0x5040100 414; GFX10-NEXT: image_sample_c_d_o v[0:1], v[8:15], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16 415; GFX10-NEXT: s_waitcnt vmcnt(0) 416; GFX10-NEXT: ; return to shader part epilog 417; 418; GFX10GISEL-LABEL: sample_c_d_o_2darray_V2: 419; GFX10GISEL: ; %bb.0: ; %main_body 420; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v7 421; GFX10GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v6 422; GFX10GISEL-NEXT: v_mov_b32_e32 v7, v8 423; GFX10GISEL-NEXT: v_lshl_or_b32 v6, v9, 16, v6 424; GFX10GISEL-NEXT: image_sample_c_d_o v[0:1], v[0:7], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY a16 425; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 426; GFX10GISEL-NEXT: ; return to shader part epilog 427main_body: 428 %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32 6, i32 %offset, float %zcompare, float %dsdh, float %dtdh, float %dsdv, float %dtdv, half %s, half %t, half %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 429 ret <2 x float> %v 430} 431 432declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f32.f16(i32, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 433declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f32.f16(i32, float, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 434declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f32.f16(i32, float, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 435declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f32.f16(i32, float, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 436declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f32.f16(i32, float, float, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 437declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f32.f16(i32, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 438declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f32.f16(i32, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 439declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f32.f16(i32, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 440declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f32.f16(i32, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 441 442declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f32.f16(i32, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 443declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f32.f16(i32, float, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 444declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f32.f16(i32, float, float, float, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 445declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f32.f16(i32, float, float, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 446declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f32.f16(i32, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 447declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f32.f16(i32, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 448declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f32.f16(i32, float, float, float, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 449declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f32.f16(i32, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 450 451declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f32.f16(i32, i32, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 452declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f32.f16(i32, i32, float, float, float, float, float, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) #1 453 454define amdgpu_ps <4 x float> @sample_g16_noa16_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) { 455; GFX10-LABEL: sample_g16_noa16_d_1d: 456; GFX10: ; %bb.0: ; %main_body 457; GFX10-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 458; GFX10-NEXT: s_waitcnt vmcnt(0) 459; GFX10-NEXT: ; return to shader part epilog 460; 461; GFX10GISEL-LABEL: sample_g16_noa16_d_1d: 462; GFX10GISEL: ; %bb.0: ; %main_body 463; GFX10GISEL-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 464; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 465; GFX10GISEL-NEXT: ; return to shader part epilog 466main_body: 467 %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 468 ret <4 x float> %v 469} 470 471define amdgpu_ps <4 x float> @sample_g16_noa16_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { 472; GFX10-LABEL: sample_g16_noa16_d_2d: 473; GFX10: ; %bb.0: ; %main_body 474; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100 475; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 476; GFX10-NEXT: image_sample_d_g16 v[0:3], [v0, v2, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 477; GFX10-NEXT: s_waitcnt vmcnt(0) 478; GFX10-NEXT: ; return to shader part epilog 479; 480; GFX10GISEL-LABEL: sample_g16_noa16_d_2d: 481; GFX10GISEL: ; %bb.0: ; %main_body 482; GFX10GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 483; GFX10GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 484; GFX10GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 485; GFX10GISEL-NEXT: v_lshl_or_b32 v1, v3, 16, v2 486; GFX10GISEL-NEXT: image_sample_d_g16 v[0:3], [v0, v1, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 487; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 488; GFX10GISEL-NEXT: ; return to shader part epilog 489main_body: 490 %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 491 ret <4 x float> %v 492} 493 494define amdgpu_ps <4 x float> @sample_g16_noa16_d_3d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r) { 495; GFX10-LABEL: sample_g16_noa16_d_3d: 496; GFX10: ; %bb.0: ; %main_body 497; GFX10-NEXT: v_mov_b32_e32 v9, v3 498; GFX10-NEXT: v_mov_b32_e32 v3, v2 499; GFX10-NEXT: v_perm_b32 v2, v1, v0, 0x5040100 500; GFX10-NEXT: v_perm_b32 v4, v4, v9, 0x5040100 501; GFX10-NEXT: image_sample_d_g16 v[0:3], v[2:8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D 502; GFX10-NEXT: s_waitcnt vmcnt(0) 503; GFX10-NEXT: ; return to shader part epilog 504; 505; GFX10GISEL-LABEL: sample_g16_noa16_d_3d: 506; GFX10GISEL: ; %bb.0: ; %main_body 507; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v3 508; GFX10GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 509; GFX10GISEL-NEXT: v_mov_b32_e32 v3, v2 510; GFX10GISEL-NEXT: v_and_b32_e32 v9, 0xffff, v9 511; GFX10GISEL-NEXT: v_lshl_or_b32 v2, v1, 16, v0 512; GFX10GISEL-NEXT: v_lshl_or_b32 v4, v4, 16, v9 513; GFX10GISEL-NEXT: image_sample_d_g16 v[0:3], v[2:8], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D 514; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 515; GFX10GISEL-NEXT: ; return to shader part epilog 516main_body: 517 %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, float %s, float %t, float %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 518 ret <4 x float> %v 519} 520 521define amdgpu_ps <4 x float> @sample_g16_noa16_c_d_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) { 522; GFX10-LABEL: sample_g16_noa16_c_d_1d: 523; GFX10: ; %bb.0: ; %main_body 524; GFX10-NEXT: image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 525; GFX10-NEXT: s_waitcnt vmcnt(0) 526; GFX10-NEXT: ; return to shader part epilog 527; 528; GFX10GISEL-LABEL: sample_g16_noa16_c_d_1d: 529; GFX10GISEL: ; %bb.0: ; %main_body 530; GFX10GISEL-NEXT: image_sample_c_d_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 531; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 532; GFX10GISEL-NEXT: ; return to shader part epilog 533main_body: 534 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 535 ret <4 x float> %v 536} 537 538define amdgpu_ps <4 x float> @sample_g16_noa16_c_d_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { 539; GFX10-LABEL: sample_g16_noa16_c_d_2d: 540; GFX10: ; %bb.0: ; %main_body 541; GFX10-NEXT: v_perm_b32 v3, v4, v3, 0x5040100 542; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100 543; GFX10-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v3, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 544; GFX10-NEXT: s_waitcnt vmcnt(0) 545; GFX10-NEXT: ; return to shader part epilog 546; 547; GFX10GISEL-LABEL: sample_g16_noa16_c_d_2d: 548; GFX10GISEL: ; %bb.0: ; %main_body 549; GFX10GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 550; GFX10GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 551; GFX10GISEL-NEXT: v_lshl_or_b32 v1, v2, 16, v1 552; GFX10GISEL-NEXT: v_lshl_or_b32 v2, v4, 16, v3 553; GFX10GISEL-NEXT: image_sample_c_d_g16 v[0:3], [v0, v1, v2, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 554; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 555; GFX10GISEL-NEXT: ; return to shader part epilog 556main_body: 557 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 558 ret <4 x float> %v 559} 560 561define amdgpu_ps <4 x float> @sample_g16_noa16_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) { 562; GFX10-LABEL: sample_g16_noa16_d_cl_1d: 563; GFX10: ; %bb.0: ; %main_body 564; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 565; GFX10-NEXT: s_waitcnt vmcnt(0) 566; GFX10-NEXT: ; return to shader part epilog 567; 568; GFX10GISEL-LABEL: sample_g16_noa16_d_cl_1d: 569; GFX10GISEL: ; %bb.0: ; %main_body 570; GFX10GISEL-NEXT: image_sample_d_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 571; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 572; GFX10GISEL-NEXT: ; return to shader part epilog 573main_body: 574 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 575 ret <4 x float> %v 576} 577 578define amdgpu_ps <4 x float> @sample_g16_noa16_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { 579; GFX10-LABEL: sample_g16_noa16_d_cl_2d: 580; GFX10: ; %bb.0: ; %main_body 581; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100 582; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 583; GFX10-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v2, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 584; GFX10-NEXT: s_waitcnt vmcnt(0) 585; GFX10-NEXT: ; return to shader part epilog 586; 587; GFX10GISEL-LABEL: sample_g16_noa16_d_cl_2d: 588; GFX10GISEL: ; %bb.0: ; %main_body 589; GFX10GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 590; GFX10GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 591; GFX10GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 592; GFX10GISEL-NEXT: v_lshl_or_b32 v1, v3, 16, v2 593; GFX10GISEL-NEXT: image_sample_d_cl_g16 v[0:3], [v0, v1, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 594; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 595; GFX10GISEL-NEXT: ; return to shader part epilog 596main_body: 597 %v = call <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 598 ret <4 x float> %v 599} 600 601define amdgpu_ps <4 x float> @sample_g16_noa16_c_d_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) { 602; GFX10-LABEL: sample_g16_noa16_c_d_cl_1d: 603; GFX10: ; %bb.0: ; %main_body 604; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 605; GFX10-NEXT: s_waitcnt vmcnt(0) 606; GFX10-NEXT: ; return to shader part epilog 607; 608; GFX10GISEL-LABEL: sample_g16_noa16_c_d_cl_1d: 609; GFX10GISEL: ; %bb.0: ; %main_body 610; GFX10GISEL-NEXT: image_sample_c_d_cl_g16 v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 611; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 612; GFX10GISEL-NEXT: ; return to shader part epilog 613main_body: 614 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 615 ret <4 x float> %v 616} 617 618define amdgpu_ps <4 x float> @sample_g16_noa16_c_d_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { 619; GFX10-LABEL: sample_g16_noa16_c_d_cl_2d: 620; GFX10: ; %bb.0: ; %main_body 621; GFX10-NEXT: v_mov_b32_e32 v8, v2 622; GFX10-NEXT: v_mov_b32_e32 v2, v0 623; GFX10-NEXT: v_perm_b32 v4, v4, v3, 0x5040100 624; GFX10-NEXT: v_perm_b32 v3, v8, v1, 0x5040100 625; GFX10-NEXT: image_sample_c_d_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 626; GFX10-NEXT: s_waitcnt vmcnt(0) 627; GFX10-NEXT: ; return to shader part epilog 628; 629; GFX10GISEL-LABEL: sample_g16_noa16_c_d_cl_2d: 630; GFX10GISEL: ; %bb.0: ; %main_body 631; GFX10GISEL-NEXT: v_mov_b32_e32 v8, v2 632; GFX10GISEL-NEXT: v_mov_b32_e32 v2, v0 633; GFX10GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v1 634; GFX10GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v3 635; GFX10GISEL-NEXT: v_lshl_or_b32 v3, v8, 16, v0 636; GFX10GISEL-NEXT: v_lshl_or_b32 v4, v4, 16, v1 637; GFX10GISEL-NEXT: image_sample_c_d_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 638; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 639; GFX10GISEL-NEXT: ; return to shader part epilog 640main_body: 641 %v = call <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 642 ret <4 x float> %v 643} 644 645define amdgpu_ps <4 x float> @sample_g16_noa16_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s) { 646; GFX10-LABEL: sample_g16_noa16_cd_1d: 647; GFX10: ; %bb.0: ; %main_body 648; GFX10-NEXT: image_sample_cd_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 649; GFX10-NEXT: s_waitcnt vmcnt(0) 650; GFX10-NEXT: ; return to shader part epilog 651; 652; GFX10GISEL-LABEL: sample_g16_noa16_cd_1d: 653; GFX10GISEL: ; %bb.0: ; %main_body 654; GFX10GISEL-NEXT: image_sample_cd_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 655; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 656; GFX10GISEL-NEXT: ; return to shader part epilog 657main_body: 658 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 659 ret <4 x float> %v 660} 661 662define amdgpu_ps <4 x float> @sample_g16_noa16_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { 663; GFX10-LABEL: sample_g16_noa16_cd_2d: 664; GFX10: ; %bb.0: ; %main_body 665; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100 666; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 667; GFX10-NEXT: image_sample_cd_g16 v[0:3], [v0, v2, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 668; GFX10-NEXT: s_waitcnt vmcnt(0) 669; GFX10-NEXT: ; return to shader part epilog 670; 671; GFX10GISEL-LABEL: sample_g16_noa16_cd_2d: 672; GFX10GISEL: ; %bb.0: ; %main_body 673; GFX10GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 674; GFX10GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 675; GFX10GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 676; GFX10GISEL-NEXT: v_lshl_or_b32 v1, v3, 16, v2 677; GFX10GISEL-NEXT: image_sample_cd_g16 v[0:3], [v0, v1, v4, v5], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 678; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 679; GFX10GISEL-NEXT: ; return to shader part epilog 680main_body: 681 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 682 ret <4 x float> %v 683} 684 685define amdgpu_ps <4 x float> @sample_g16_noa16_c_cd_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s) { 686; GFX10-LABEL: sample_g16_noa16_c_cd_1d: 687; GFX10: ; %bb.0: ; %main_body 688; GFX10-NEXT: image_sample_c_cd_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 689; GFX10-NEXT: s_waitcnt vmcnt(0) 690; GFX10-NEXT: ; return to shader part epilog 691; 692; GFX10GISEL-LABEL: sample_g16_noa16_c_cd_1d: 693; GFX10GISEL: ; %bb.0: ; %main_body 694; GFX10GISEL-NEXT: image_sample_c_cd_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 695; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 696; GFX10GISEL-NEXT: ; return to shader part epilog 697main_body: 698 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 699 ret <4 x float> %v 700} 701 702define amdgpu_ps <4 x float> @sample_g16_noa16_c_cd_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t) { 703; GFX10-LABEL: sample_g16_noa16_c_cd_2d: 704; GFX10: ; %bb.0: ; %main_body 705; GFX10-NEXT: v_perm_b32 v3, v4, v3, 0x5040100 706; GFX10-NEXT: v_perm_b32 v1, v2, v1, 0x5040100 707; GFX10-NEXT: image_sample_c_cd_g16 v[0:3], [v0, v1, v3, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 708; GFX10-NEXT: s_waitcnt vmcnt(0) 709; GFX10-NEXT: ; return to shader part epilog 710; 711; GFX10GISEL-LABEL: sample_g16_noa16_c_cd_2d: 712; GFX10GISEL: ; %bb.0: ; %main_body 713; GFX10GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v1 714; GFX10GISEL-NEXT: v_and_b32_e32 v3, 0xffff, v3 715; GFX10GISEL-NEXT: v_lshl_or_b32 v1, v2, 16, v1 716; GFX10GISEL-NEXT: v_lshl_or_b32 v2, v4, 16, v3 717; GFX10GISEL-NEXT: image_sample_c_cd_g16 v[0:3], [v0, v1, v2, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 718; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 719; GFX10GISEL-NEXT: ; return to shader part epilog 720main_body: 721 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 722 ret <4 x float> %v 723} 724 725define amdgpu_ps <4 x float> @sample_g16_noa16_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, float %s, float %clamp) { 726; GFX10-LABEL: sample_g16_noa16_cd_cl_1d: 727; GFX10: ; %bb.0: ; %main_body 728; GFX10-NEXT: image_sample_cd_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 729; GFX10-NEXT: s_waitcnt vmcnt(0) 730; GFX10-NEXT: ; return to shader part epilog 731; 732; GFX10GISEL-LABEL: sample_g16_noa16_cd_cl_1d: 733; GFX10GISEL: ; %bb.0: ; %main_body 734; GFX10GISEL-NEXT: image_sample_cd_cl_g16 v[0:3], v[0:3], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 735; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 736; GFX10GISEL-NEXT: ; return to shader part epilog 737main_body: 738 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32 15, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 739 ret <4 x float> %v 740} 741 742define amdgpu_ps <4 x float> @sample_g16_noa16_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { 743; GFX10-LABEL: sample_g16_noa16_cd_cl_2d: 744; GFX10: ; %bb.0: ; %main_body 745; GFX10-NEXT: v_perm_b32 v2, v3, v2, 0x5040100 746; GFX10-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 747; GFX10-NEXT: image_sample_cd_cl_g16 v[0:3], [v0, v2, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 748; GFX10-NEXT: s_waitcnt vmcnt(0) 749; GFX10-NEXT: ; return to shader part epilog 750; 751; GFX10GISEL-LABEL: sample_g16_noa16_cd_cl_2d: 752; GFX10GISEL: ; %bb.0: ; %main_body 753; GFX10GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 754; GFX10GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 755; GFX10GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 756; GFX10GISEL-NEXT: v_lshl_or_b32 v1, v3, 16, v2 757; GFX10GISEL-NEXT: image_sample_cd_cl_g16 v[0:3], [v0, v1, v4, v5, v6], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 758; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 759; GFX10GISEL-NEXT: ; return to shader part epilog 760main_body: 761 %v = call <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 762 ret <4 x float> %v 763} 764 765define amdgpu_ps <4 x float> @sample_g16_noa16_c_cd_cl_1d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp) { 766; GFX10-LABEL: sample_g16_noa16_c_cd_cl_1d: 767; GFX10: ; %bb.0: ; %main_body 768; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 769; GFX10-NEXT: s_waitcnt vmcnt(0) 770; GFX10-NEXT: ; return to shader part epilog 771; 772; GFX10GISEL-LABEL: sample_g16_noa16_c_cd_cl_1d: 773; GFX10GISEL: ; %bb.0: ; %main_body 774; GFX10GISEL-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[0:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D 775; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 776; GFX10GISEL-NEXT: ; return to shader part epilog 777main_body: 778 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dsdv, float %s, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 779 ret <4 x float> %v 780} 781 782define amdgpu_ps <4 x float> @sample_g16_noa16_c_cd_cl_2d(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp) { 783; GFX10-LABEL: sample_g16_noa16_c_cd_cl_2d: 784; GFX10: ; %bb.0: ; %main_body 785; GFX10-NEXT: v_mov_b32_e32 v8, v2 786; GFX10-NEXT: v_mov_b32_e32 v2, v0 787; GFX10-NEXT: v_perm_b32 v4, v4, v3, 0x5040100 788; GFX10-NEXT: v_perm_b32 v3, v8, v1, 0x5040100 789; GFX10-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 790; GFX10-NEXT: s_waitcnt vmcnt(0) 791; GFX10-NEXT: ; return to shader part epilog 792; 793; GFX10GISEL-LABEL: sample_g16_noa16_c_cd_cl_2d: 794; GFX10GISEL: ; %bb.0: ; %main_body 795; GFX10GISEL-NEXT: v_mov_b32_e32 v8, v2 796; GFX10GISEL-NEXT: v_mov_b32_e32 v2, v0 797; GFX10GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v1 798; GFX10GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v3 799; GFX10GISEL-NEXT: v_lshl_or_b32 v3, v8, 16, v0 800; GFX10GISEL-NEXT: v_lshl_or_b32 v4, v4, 16, v1 801; GFX10GISEL-NEXT: image_sample_c_cd_cl_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D 802; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 803; GFX10GISEL-NEXT: ; return to shader part epilog 804main_body: 805 %v = call <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32 15, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %clamp, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 806 ret <4 x float> %v 807} 808 809define amdgpu_ps float @sample_g16_noa16_c_d_o_2darray_V1(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) { 810; GFX10-LABEL: sample_g16_noa16_c_d_o_2darray_V1: 811; GFX10: ; %bb.0: ; %main_body 812; GFX10-NEXT: v_mov_b32_e32 v9, v3 813; GFX10-NEXT: v_mov_b32_e32 v10, v2 814; GFX10-NEXT: v_mov_b32_e32 v3, v1 815; GFX10-NEXT: v_mov_b32_e32 v2, v0 816; GFX10-NEXT: v_perm_b32 v5, v5, v4, 0x5040100 817; GFX10-NEXT: v_perm_b32 v4, v9, v10, 0x5040100 818; GFX10-NEXT: image_sample_c_d_o_g16 v0, v[2:8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY 819; GFX10-NEXT: s_waitcnt vmcnt(0) 820; GFX10-NEXT: ; return to shader part epilog 821; 822; GFX10GISEL-LABEL: sample_g16_noa16_c_d_o_2darray_V1: 823; GFX10GISEL: ; %bb.0: ; %main_body 824; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v2 825; GFX10GISEL-NEXT: v_mov_b32_e32 v10, v3 826; GFX10GISEL-NEXT: v_mov_b32_e32 v2, v0 827; GFX10GISEL-NEXT: v_mov_b32_e32 v3, v1 828; GFX10GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v4 829; GFX10GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v9 830; GFX10GISEL-NEXT: v_lshl_or_b32 v5, v5, 16, v1 831; GFX10GISEL-NEXT: v_lshl_or_b32 v4, v10, 16, v0 832; GFX10GISEL-NEXT: image_sample_c_d_o_g16 v0, v[2:8], s[0:7], s[8:11] dmask:0x4 dim:SQ_RSRC_IMG_2D_ARRAY 833; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 834; GFX10GISEL-NEXT: ; return to shader part epilog 835main_body: 836 %v = call float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f32(i32 4, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 837 ret float %v 838} 839 840define amdgpu_ps <2 x float> @sample_g16_noa16_c_d_o_2darray_V2(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice) { 841; GFX10-LABEL: sample_g16_noa16_c_d_o_2darray_V2: 842; GFX10: ; %bb.0: ; %main_body 843; GFX10-NEXT: v_mov_b32_e32 v9, v3 844; GFX10-NEXT: v_mov_b32_e32 v10, v2 845; GFX10-NEXT: v_mov_b32_e32 v3, v1 846; GFX10-NEXT: v_mov_b32_e32 v2, v0 847; GFX10-NEXT: v_perm_b32 v5, v5, v4, 0x5040100 848; GFX10-NEXT: v_perm_b32 v4, v9, v10, 0x5040100 849; GFX10-NEXT: image_sample_c_d_o_g16 v[0:1], v[2:8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY 850; GFX10-NEXT: s_waitcnt vmcnt(0) 851; GFX10-NEXT: ; return to shader part epilog 852; 853; GFX10GISEL-LABEL: sample_g16_noa16_c_d_o_2darray_V2: 854; GFX10GISEL: ; %bb.0: ; %main_body 855; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v2 856; GFX10GISEL-NEXT: v_mov_b32_e32 v10, v3 857; GFX10GISEL-NEXT: v_mov_b32_e32 v2, v0 858; GFX10GISEL-NEXT: v_mov_b32_e32 v3, v1 859; GFX10GISEL-NEXT: v_and_b32_e32 v1, 0xffff, v4 860; GFX10GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v9 861; GFX10GISEL-NEXT: v_lshl_or_b32 v5, v5, 16, v1 862; GFX10GISEL-NEXT: v_lshl_or_b32 v4, v10, 16, v0 863; GFX10GISEL-NEXT: image_sample_c_d_o_g16 v[0:1], v[2:8], s[0:7], s[8:11] dmask:0x6 dim:SQ_RSRC_IMG_2D_ARRAY 864; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 865; GFX10GISEL-NEXT: ; return to shader part epilog 866main_body: 867 %v = call <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32 6, i32 %offset, float %zcompare, half %dsdh, half %dtdh, half %dsdv, half %dtdv, float %s, float %t, float %slice, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 868 ret <2 x float> %v 869} 870 871declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 872declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 873declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f32(i32, half, half, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 874declare <4 x float> @llvm.amdgcn.image.sample.c.d.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 875declare <4 x float> @llvm.amdgcn.image.sample.c.d.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 876declare <4 x float> @llvm.amdgcn.image.sample.d.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 877declare <4 x float> @llvm.amdgcn.image.sample.d.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 878declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 879declare <4 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 880 881declare <4 x float> @llvm.amdgcn.image.sample.cd.1d.v4f32.f16.f32(i32, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 882declare <4 x float> @llvm.amdgcn.image.sample.cd.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 883declare <4 x float> @llvm.amdgcn.image.sample.c.cd.1d.v4f32.f16.f32(i32, float, half, half, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 884declare <4 x float> @llvm.amdgcn.image.sample.c.cd.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 885declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.1d.v4f32.f16.f32(i32, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 886declare <4 x float> @llvm.amdgcn.image.sample.cd.cl.2d.v4f32.f16.f32(i32, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 887declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.1d.v4f32.f16.f32(i32, float, half, half, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 888declare <4 x float> @llvm.amdgcn.image.sample.c.cd.cl.2d.v4f32.f16.f32(i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 889 890declare float @llvm.amdgcn.image.sample.c.d.o.2darray.f32.f16.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 891declare <2 x float> @llvm.amdgcn.image.sample.c.d.o.2darray.v2f32.f16.f32(i32, i32, float, half, half, half, half, float, float, float, <8 x i32>, <4 x i32>, i1, i32, i32) #1 892 893define amdgpu_ps <4 x float> @sample_d_1d_g16_a16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dsdv, half %s) { 894; GFX10-LABEL: sample_d_1d_g16_a16: 895; GFX10: ; %bb.0: ; %main_body 896; GFX10-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 897; GFX10-NEXT: s_waitcnt vmcnt(0) 898; GFX10-NEXT: ; return to shader part epilog 899; 900; GFX10GISEL-LABEL: sample_d_1d_g16_a16: 901; GFX10GISEL: ; %bb.0: ; %main_body 902; GFX10GISEL-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_1D a16 903; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 904; GFX10GISEL-NEXT: ; return to shader part epilog 905main_body: 906 %v = call <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32 15, half %dsdh, half %dsdv, half %s, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 907 ret <4 x float> %v 908} 909 910define amdgpu_ps <4 x float> @sample_d_2d_g16_a16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t) { 911; GFX10-LABEL: sample_d_2d_g16_a16: 912; GFX10: ; %bb.0: ; %main_body 913; GFX10-NEXT: v_perm_b32 v4, v5, v4, 0x5040100 914; GFX10-NEXT: v_perm_b32 v3, v3, v2, 0x5040100 915; GFX10-NEXT: v_perm_b32 v2, v1, v0, 0x5040100 916; GFX10-NEXT: image_sample_d_g16 v[0:3], v[2:4], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 917; GFX10-NEXT: s_waitcnt vmcnt(0) 918; GFX10-NEXT: ; return to shader part epilog 919; 920; GFX10GISEL-LABEL: sample_d_2d_g16_a16: 921; GFX10GISEL: ; %bb.0: ; %main_body 922; GFX10GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 923; GFX10GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 924; GFX10GISEL-NEXT: v_and_b32_e32 v4, 0xffff, v4 925; GFX10GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 926; GFX10GISEL-NEXT: v_lshl_or_b32 v1, v3, 16, v2 927; GFX10GISEL-NEXT: v_lshl_or_b32 v2, v5, 16, v4 928; GFX10GISEL-NEXT: image_sample_d_g16 v[0:3], v[0:2], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_2D a16 929; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 930; GFX10GISEL-NEXT: ; return to shader part epilog 931main_body: 932 %v = call <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %dsdv, half %dtdv, half %s, half %t, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 933 ret <4 x float> %v 934} 935 936define amdgpu_ps <4 x float> @sample_d_3d_g16_a16(<8 x i32> inreg %rsrc, <4 x i32> inreg %samp, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r) { 937; GFX10-LABEL: sample_d_3d_g16_a16: 938; GFX10: ; %bb.0: ; %main_body 939; GFX10-NEXT: v_mov_b32_e32 v12, v8 940; GFX10-NEXT: v_mov_b32_e32 v10, v5 941; GFX10-NEXT: v_mov_b32_e32 v8, v2 942; GFX10-NEXT: v_perm_b32 v11, v7, v6, 0x5040100 943; GFX10-NEXT: v_perm_b32 v9, v4, v3, 0x5040100 944; GFX10-NEXT: v_perm_b32 v7, v1, v0, 0x5040100 945; GFX10-NEXT: image_sample_d_g16 v[0:3], v[7:12], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16 946; GFX10-NEXT: s_waitcnt vmcnt(0) 947; GFX10-NEXT: ; return to shader part epilog 948; 949; GFX10GISEL-LABEL: sample_d_3d_g16_a16: 950; GFX10GISEL: ; %bb.0: ; %main_body 951; GFX10GISEL-NEXT: v_mov_b32_e32 v9, v3 952; GFX10GISEL-NEXT: v_mov_b32_e32 v10, v7 953; GFX10GISEL-NEXT: v_mov_b32_e32 v7, v8 954; GFX10GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 955; GFX10GISEL-NEXT: v_and_b32_e32 v6, 0xffff, v6 956; GFX10GISEL-NEXT: v_and_b32_e32 v8, 0xffff, v9 957; GFX10GISEL-NEXT: v_mov_b32_e32 v3, v2 958; GFX10GISEL-NEXT: v_lshl_or_b32 v2, v1, 16, v0 959; GFX10GISEL-NEXT: v_lshl_or_b32 v6, v10, 16, v6 960; GFX10GISEL-NEXT: v_lshl_or_b32 v4, v4, 16, v8 961; GFX10GISEL-NEXT: image_sample_d_g16 v[0:3], v[2:7], s[0:7], s[8:11] dmask:0xf dim:SQ_RSRC_IMG_3D a16 962; GFX10GISEL-NEXT: s_waitcnt vmcnt(0) 963; GFX10GISEL-NEXT: ; return to shader part epilog 964main_body: 965 %v = call <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32 15, half %dsdh, half %dtdh, half %drdh, half %dsdv, half %dtdv, half %drdv, half %s, half %t, half %r, <8 x i32> %rsrc, <4 x i32> %samp, i1 0, i32 0, i32 0) 966 ret <4 x float> %v 967} 968 969declare <4 x float> @llvm.amdgcn.image.sample.d.1d.v4f32.f16.f16(i32, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) 970declare <4 x float> @llvm.amdgcn.image.sample.d.2d.v4f32.f16.f16(i32, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) 971declare <4 x float> @llvm.amdgcn.image.sample.d.3d.v4f32.f16.f16(i32, half, half, half, half, half, half, half, half, half, <8 x i32>, <4 x i32>, i1, i32, i32) 972 973attributes #0 = { nounwind } 974attributes #1 = { nounwind readonly } 975attributes #2 = { nounwind readnone } 976