xref: /llvm-project/llvm/test/CodeGen/AMDGPU/sink-image-sample.ll (revision b3924cb9ecc95aa428d48e58ef5f2629f5166e02)
1; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
2; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
3
4; Test that image.sample LOD(_L), Level 0(_LZ), Derivative(_D) instructions are sunk across the branch and not left in the first block. Since the kill may terminate the shader there might be no need for sampling the image.
5
6; GCN-LABEL: {{^}}sinking_img_sample:
7; GCN-NOT: image_sample_l v
8; GCN-NOT: image_sample_lz v
9; GCN-NOT: image_sample_c_lz v
10; GCN-NOT: image_sample_c_l v
11; GCN-NOT: image_sample_d v
12; GCN-NOT: image_sample_c_d v
13; GCN-NOT: image_sample_d_cl v
14; GCN-NOT: image_sample_c_d_cl v
15; GCN: branch
16; GCN: image_sample_l v
17; GCN: image_sample_lz v
18; GCN: image_sample_c_lz v
19; GCN: image_sample_c_l v
20; GCN: image_sample_d v
21; GCN: image_sample_c_d v
22; GCN: image_sample_d_cl v
23; GCN: image_sample_c_d_cl v
24; GCN: exp null
25
26define amdgpu_ps float @sinking_img_sample(i1 %cond) {
27main_body:
28  %i1 = call <3 x float> @llvm.amdgcn.image.sample.l.2d.v3f32.f32(i32 7, float poison, float poison, float poison, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0)
29  %i2 = call <3 x float> @llvm.amdgcn.image.sample.lz.2d.v3f32.f32(i32 7, float poison, float poison, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0)
30  %i3 = call <3 x float> @llvm.amdgcn.image.sample.c.lz.2d.v3f32.f32(i32 7, float poison, float poison, float poison, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0)
31  %i4 = call <3 x float> @llvm.amdgcn.image.sample.c.l.2d.v3f32.f32(i32 7, float poison, float poison, float poison, float poison, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0)
32  %i5 = call <3 x float> @llvm.amdgcn.image.sample.d.2d.v3f32.f32(i32 7, float poison, float poison, float poison, float poison, float poison, float poison, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0)
33  %i6 = call <3 x float> @llvm.amdgcn.image.sample.c.d.2d.v3f32.f32(i32 7, float poison, float poison, float poison, float poison, float poison, float poison, float poison, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0)
34  %i7 = call <3 x float> @llvm.amdgcn.image.sample.d.cl.2d.v3f32.f32(i32 7, float poison, float poison, float poison, float poison, float poison, float poison, float poison, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0)
35  %i8 = call <3 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v3f32.f32(i32 7, float poison, float poison, float poison, float poison, float poison, float poison, float poison, float poison, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0)
36  br i1 %cond, label %endif1, label %if1
37
38if1:                                              ; preds = %main_body
39  call void @llvm.amdgcn.kill(i1 false) #4
40  br label %exit
41
42endif1:                                           ; preds = %main_body
43  %i22 = extractelement <3 x float> %i1, i32 1
44  %i23 = call nsz arcp contract float @llvm.fma.f32(float %i22, float 0.000000e+00, float 0.000000e+00) #1
45  %i24 = extractelement <3 x float> %i2, i32 1
46  %i25 = call nsz arcp contract float @llvm.fma.f32(float %i23, float %i24, float 0.000000e+00) #1
47  %i26 = extractelement <3 x float> %i3, i32 1
48  %i27 = call nsz arcp contract float @llvm.fma.f32(float %i25, float %i26, float 0.000000e+00) #1
49  %i28 = extractelement <3 x float> %i4, i32 1
50  %i29 = call nsz arcp contract float @llvm.fma.f32(float %i27, float %i28, float 0.000000e+00) #1
51  %i30 = extractelement <3 x float> %i5, i32 1
52  %i31 = call nsz arcp contract float @llvm.fma.f32(float %i29, float %i30, float 0.000000e+00) #1
53  %i32 = extractelement <3 x float> %i6, i32 1
54  %i33 = call nsz arcp contract float @llvm.fma.f32(float %i31, float %i32, float 0.000000e+00) #1
55  %i34 = extractelement <3 x float> %i7, i32 1
56  %i35 = call nsz arcp contract float @llvm.fma.f32(float %i33, float %i34, float 0.000000e+00) #1
57  %i36 = extractelement <3 x float> %i8, i32 1
58  %i37 = call nsz arcp contract float @llvm.fma.f32(float %i35, float %i36, float 0.000000e+00) #1
59  br label %exit
60
61exit:                                             ; preds = %endif1, %if1
62  %i38 = phi float [ poison, %if1 ], [ %i37, %endif1 ]
63  ret float %i38
64}
65
66
67; Test that image.sample instructions which use WQM are marked as Convergent and will be left in the first block.
68
69; GCN-LABEL: {{^}}no_sinking_img_sample:
70; GCN: image_sample v
71; GCN: image_sample_c v
72; GCN: image_sample_cl v
73; GCN: image_sample_c_cl v
74; GCN: image_sample_b v
75; GCN: image_sample_c_b v
76; GCN: image_sample_b_cl v
77; GCN: branch
78; GCN-NOT: image_sample v
79; GCN-NOT: image_sample_c v
80; GCN-NOT: image_sample_cl v
81; GCN-NOT: image_sample_c_cl v
82; GCN-NOT: image_sample_b v
83; GCN-NOT: image_sample_c_b v
84; GCN-NOT: image_sample_b_cl v
85; GCN: exp null
86
87define amdgpu_ps float @no_sinking_img_sample(i1 %cond) {
88main_body:
89  %i1 = call <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 7, float poison, float poison, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0)
90  %i2 = call <3 x float> @llvm.amdgcn.image.sample.c.2d.v3f32.f32(i32 7, float poison, float poison, float poison, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0)
91  %i3 = call <3 x float> @llvm.amdgcn.image.sample.cl.2d.v3f32.f32(i32 7, float poison, float poison, float poison, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0)
92  %i4 = call <3 x float> @llvm.amdgcn.image.sample.c.cl.2d.v3f32.f32(i32 7, float poison, float poison, float poison, float poison, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0)
93  %i5 = call <3 x float> @llvm.amdgcn.image.sample.b.2d.v3f32.f32(i32 7, float poison, float poison, float poison, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0)
94  %i6 = call <3 x float> @llvm.amdgcn.image.sample.c.b.2d.v3f32.f32(i32 7, float poison, float poison, float poison, float poison, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0)
95  %i7 = call <3 x float> @llvm.amdgcn.image.sample.b.cl.2d.v3f32.f32.f32(i32 7, float poison, float poison, float poison, float poison, <8 x i32> poison, <4 x i32> poison, i1 false, i32 0, i32 0)
96  br i1 %cond, label %endif1, label %if1
97
98if1:                                              ; preds = %main_body
99  call void @llvm.amdgcn.kill(i1 false) #4
100  br label %exit
101
102endif1:                                           ; preds = %main_body
103  %i22 = extractelement <3 x float> %i1, i32 2
104  %i23 = call nsz arcp contract float @llvm.fma.f32(float %i22, float 0.000000e+00, float 0.000000e+00) #1
105  %i24 = extractelement <3 x float> %i2, i32 2
106  %i25 = call nsz arcp contract float @llvm.fma.f32(float %i23, float %i24, float 0.000000e+00) #1
107  %i26 = extractelement <3 x float> %i3, i32 2
108  %i27 = call nsz arcp contract float @llvm.fma.f32(float %i25, float %i26, float 0.000000e+00) #1
109  %i28 = extractelement <3 x float> %i4, i32 2
110  %i29 = call nsz arcp contract float @llvm.fma.f32(float %i27, float %i28, float 0.000000e+00) #1
111  %i30 = extractelement <3 x float> %i5, i32 2
112  %i31 = call nsz arcp contract float @llvm.fma.f32(float %i29, float %i30, float 0.000000e+00) #1
113  %i32 = extractelement <3 x float> %i6, i32 2
114  %i33 = call nsz arcp contract float @llvm.fma.f32(float %i31, float %i32, float 0.000000e+00) #1
115  %i34 = extractelement <3 x float> %i7, i32 2
116  %i35 = call nsz arcp contract float @llvm.fma.f32(float %i33, float %i34, float 0.000000e+00) #1
117  br label %exit
118
119exit:                                             ; preds = %endif1, %if1
120  %i36 = phi float [ poison, %if1 ], [ %i35, %endif1 ]
121  ret float %i36
122}
123
124; Function Attrs: nounwind readonly willreturn
125declare <3 x float> @llvm.amdgcn.image.sample.2d.v3f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3
126declare <3 x float> @llvm.amdgcn.image.sample.c.2d.v3f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3
127declare <3 x float> @llvm.amdgcn.image.sample.cl.2d.v3f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3
128declare <3 x float> @llvm.amdgcn.image.sample.c.cl.2d.v3f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3
129declare <3 x float> @llvm.amdgcn.image.sample.b.2d.v3f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3
130declare <3 x float> @llvm.amdgcn.image.sample.c.b.2d.v3f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3
131declare <3 x float> @llvm.amdgcn.image.sample.b.cl.2d.v3f32.f32.(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3
132declare <3 x float> @llvm.amdgcn.image.sample.c.b.cl.2d.v3f32.f32(i32 immarg, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3
133declare <3 x float> @llvm.amdgcn.image.sample.l.2d.v3f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3
134declare <3 x float> @llvm.amdgcn.image.sample.lz.2d.v3f32.f32(i32 immarg, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3
135declare <3 x float> @llvm.amdgcn.image.sample.c.lz.2d.v3f32.f32(i32 immarg, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3
136declare <3 x float> @llvm.amdgcn.image.sample.c.l.2d.v3f32.f32(i32 immarg, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3
137declare <3 x float> @llvm.amdgcn.image.sample.d.2d.v3f32.f32.f32(i32 immarg, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3
138declare <3 x float> @llvm.amdgcn.image.sample.c.d.2d.v3f32.f32.f32(i32 immarg, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3
139declare <3 x float> @llvm.amdgcn.image.sample.d.cl.2d.v3f32.f32.f32(i32 immarg, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3
140declare <3 x float> @llvm.amdgcn.image.sample.c.d.cl.2d.v3f32.f32.f32(i32 immarg, float, float, float, float, float, float, float, float, <8 x i32>, <4 x i32>, i1 immarg, i32 immarg, i32 immarg) #3
141
142; Function Attrs: nofree nosync nounwind readnone speculatable willreturn
143declare float @llvm.fma.f32(float, float, float) #2
144
145; Function Attrs: nounwind
146declare void @llvm.amdgcn.kill(i1) #4
147
148attributes #1 = { nounwind readnone }
149attributes #2 = { nofree nosync nounwind readnone speculatable willreturn }
150attributes #3 = { nounwind readonly willreturn }
151attributes #4 = { nounwind }
152