1; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s 2; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s 3 4; Test that image.sample LOD(_L), Level 0(_LZ), Derivative(_D) instructions are sunk across the branch and not left in the first block. Since the kill may terminate the shader there might be no need for sampling the image. 5 6; GCN-LABEL: {{^}}sinking_img_sample: 7; GCN-NOT: image_sample_l v 8; GCN-NOT: image_sample_lz v 9; GCN-NOT: image_sample_c_lz v 10; GCN-NOT: image_sample_c_l v 11; GCN-NOT: image_sample_d v 12; GCN-NOT: image_sample_c_d v 13; GCN-NOT: image_sample_d_cl v 14; GCN-NOT: image_sample_c_d_cl v 15; GCN: branch 16; GCN: image_sample_l v 17; GCN: image_sample_lz v 18; GCN: image_sample_c_lz v 19; GCN: image_sample_c_l v 20; GCN: image_sample_d v 21; GCN: image_sample_c_d v 22; GCN: image_sample_d_cl v 23; GCN: image_sample_c_d_cl v 24; GCN: exp null 25 26define amdgpu_ps float @sinking_img_sample(i1 %cond) { 27main_body: 28 %i1 = call <3 x float> @llvm.amdgcn.image.sample.l.2d.v3f32.f32(i32 7, float poison, float poison, float poison, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0) 29 %i2 = call <3 x float> @llvm.amdgcn.image.sample.lz.2d.v3f32.f32(i32 7, float poison, float poison, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0) 30 %i3 = call <3 x float> @llvm.amdgcn.image.sample.c.lz.2d.v3f32.f32(i32 7, float poison, float poison, float poison, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0) 31 %i4 = call <3 x float> @llvm.amdgcn.image.sample.c.l.2d.v3f32.f32(i32 7, float poison, float poison, float poison, float poison, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0) 32 %i5 = call <3 x float> @llvm.amdgcn.image.sample.d.2d.v3f32.f32(i32 7, float poison, float poison, float poison, float poison, float poison, float poison, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0) 33 %i6 = call <3 x float> @llvm.amdgcn.image.sample.c.d.2d.v3f32.f32(i32 7, float poison, float poison, float poison, float poison, float poison, float poison, float poison, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0) 34 %i7 = call <3 x float> @llvm.amdgcn.image.sample.d.cl.2d.v3f32.f32(i32 7, float poison, float poison, float poison, float poison, float poison, float poison, float poison, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0) 35 %i8 = call <3 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v3f32.f32(i32 7, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0) 36 br i1 %cond, label %endif1, label %if1 37 38if1: ; preds = %main_body 39 call void @llvm.amdgcn.kill(i1 false) #4 40 br label %exit 41 42endif1: ; preds = %main_body 43 %i22 = extractelement <3 x float> %i1, i32 1 44 %i23 = call nsz arcp contract float @llvm.fma.f32(float %i22, float 0.000000e+00, float 0.000000e+00) #1 45 %i24 = extractelement <3 x float> %i2, i32 1 46 %i25 = call nsz arcp contract float @llvm.fma.f32(float %i23, float %i24, float 0.000000e+00) #1 47 %i26 = extractelement <3 x float> %i3, i32 1 48 %i27 = call nsz arcp contract float @llvm.fma.f32(float %i25, float %i26, float 0.000000e+00) #1 49 %i28 = extractelement <3 x float> %i4, i32 1 50 %i29 = call nsz arcp contract float @llvm.fma.f32(float %i27, float %i28, float 0.000000e+00) #1 51 %i30 = extractelement <3 x float> %i5, i32 1 52 %i31 = call nsz arcp contract float @llvm.fma.f32(float %i29, float %i30, float 0.000000e+00) #1 53 %i32 = extractelement <3 x float> %i6, i32 1 54 %i33 = call nsz arcp contract float @llvm.fma.f32(float %i31, float %i32, float 0.000000e+00) #1 55 %i34 = extractelement <3 x float> %i7, i32 1 56 %i35 = call nsz arcp contract float @llvm.fma.f32(float %i33, float %i34, float 0.000000e+00) #1 57 %i36 = extractelement <3 x float> %i8, i32 1 58 %i37 = call nsz arcp contract float @llvm.fma.f32(float %i35, float %i36, float 0.000000e+00) #1 59 br label %exit 60 61exit: ; preds = %endif1, %if1 62 %i38 = phi float [ poison, %if1 ], [ %i37, %endif1 ] 63 ret float %i38 64} 65 66 67; Test that image.sample instructions which use WQM are marked as Convergent and will be left in the first block. 68 69; GCN-LABEL: {{^}}no_sinking_img_sample: 70; GCN: image_sample v 71; GCN: image_sample_c v 72; GCN: image_sample_cl v 73; GCN: image_sample_c_cl v 74; GCN: image_sample_b v 75; GCN: image_sample_c_b v 76; GCN: image_sample_b_cl v 77; GCN: branch 78; GCN-NOT: image_sample v 79; GCN-NOT: image_sample_c v 80; GCN-NOT: image_sample_cl v 81; GCN-NOT: image_sample_c_cl v 82; GCN-NOT: image_sample_b v 83; GCN-NOT: image_sample_c_b v 84; GCN-NOT: image_sample_b_cl v 85; GCN: exp null 86 87define amdgpu_ps float @no_sinking_img_sample(i1 %cond) { 88main_body: 89 %i1 = call <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 7, float poison, float poison, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0) 90 %i2 = call <3 x float> @llvm.amdgcn.image.sample.c.2d.v3f32.f32(i32 7, float poison, float poison, float poison, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0) 91 %i3 = call <3 x float> @llvm.amdgcn.image.sample.cl.2d.v3f32.f32(i32 7, float poison, float poison, float poison, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0) 92 %i4 = call <3 x float> @llvm.amdgcn.image.sample.c.cl.2d.v3f32.f32(i32 7, float poison, float poison, float poison, float poison, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0) 93 %i5 = call <3 x float> @llvm.amdgcn.image.sample.b.2d.v3f32.f32(i32 7, float poison, float poison, float poison, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0) 94 %i6 = call <3 x float> @llvm.amdgcn.image.sample.c.b.2d.v3f32.f32(i32 7, float poison, float poison, float poison, float poison, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0) 95 %i7 = call <3 x float> @llvm.amdgcn.image.sample.b.cl.2d.v3f32.f32.f32(i32 7, float poison, float poison, float poison, float poison, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0) 96 br i1 %cond, label %endif1, label %if1 97 98if1: ; preds = %main_body 99 call void @llvm.amdgcn.kill(i1 false) #4 100 br label %exit 101 102endif1: ; preds = %main_body 103 %i22 = extractelement <3 x float> %i1, i32 2 104 %i23 = call nsz arcp contract float @llvm.fma.f32(float %i22, float 0.000000e+00, float 0.000000e+00) #1 105 %i24 = extractelement <3 x float> %i2, i32 2 106 %i25 = call nsz arcp contract float @llvm.fma.f32(float %i23, float %i24, float 0.000000e+00) #1 107 %i26 = extractelement <3 x float> %i3, i32 2 108 %i27 = call nsz arcp contract float @llvm.fma.f32(float %i25, float %i26, float 0.000000e+00) #1 109 %i28 = extractelement <3 x float> %i4, i32 2 110 %i29 = call nsz arcp contract float @llvm.fma.f32(float %i27, float %i28, float 0.000000e+00) #1 111 %i30 = extractelement <3 x float> %i5, i32 2 112 %i31 = call nsz arcp contract float @llvm.fma.f32(float %i29, float %i30, float 0.000000e+00) #1 113 %i32 = extractelement <3 x float> %i6, i32 2 114 %i33 = call nsz arcp contract float @llvm.fma.f32(float %i31, float %i32, float 0.000000e+00) #1 115 %i34 = extractelement <3 x float> %i7, i32 2 116 %i35 = call nsz arcp contract float @llvm.fma.f32(float %i33, float %i34, float 0.000000e+00) #1 117 br label %exit 118 119exit: ; preds = %endif1, %if1 120 %i36 = phi float [ poison, %if1 ], [ %i35, %endif1 ] 121 ret float %i36 122} 123 124; Function Attrs: nounwind readonly willreturn 125declare <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3 126declare <3 x float> @llvm.amdgcn.image.sample.c.2d.v3f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3 127declare <3 x float> @llvm.amdgcn.image.sample.cl.2d.v3f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3 128declare <3 x float> @llvm.amdgcn.image.sample.c.cl.2d.v3f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3 129declare <3 x float> @llvm.amdgcn.image.sample.b.2d.v3f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3 130declare <3 x float> @llvm.amdgcn.image.sample.c.b.2d.v3f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3 131declare <3 x float> @llvm.amdgcn.image.sample.b.cl.2d.v3f32.f32.(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3 132declare <3 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v3f32.f32(i32 immarg, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3 133declare <3 x float> @llvm.amdgcn.image.sample.l.2d.v3f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3 134declare <3 x float> @llvm.amdgcn.image.sample.lz.2d.v3f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3 135declare <3 x float> @llvm.amdgcn.image.sample.c.lz.2d.v3f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3 136declare <3 x float> @llvm.amdgcn.image.sample.c.l.2d.v3f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3 137declare <3 x float> @llvm.amdgcn.image.sample.d.2d.v3f32.f32.f32(i32 immarg, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3 138declare <3 x float> @llvm.amdgcn.image.sample.c.d.2d.v3f32.f32.f32(i32 immarg, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3 139declare <3 x float> @llvm.amdgcn.image.sample.d.cl.2d.v3f32.f32.f32(i32 immarg, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3 140declare <3 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v3f32.f32.f32(i32 immarg, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3 141 142; Function Attrs: nofree nosync nounwind readnone speculatable willreturn 143declare float @llvm.fma.f32(float, float, float) #2 144 145; Function Attrs: nounwind 146declare void @llvm.amdgcn.kill(i1) #4 147 148attributes #1 = { nounwind readnone } 149attributes #2 = { nofree nosync nounwind readnone speculatable willreturn } 150attributes #3 = { nounwind readonly willreturn } 151attributes #4 = { nounwind } 152