xref: /llvm-project/llvm/test/CodeGen/AMDGPU/schedule-amdgpu-trackers.ll (revision 11b040192640ef3b1f481124c440f464ed6ec86a)
1; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 -verify-misched < %s | FileCheck --check-prefixes=GFX11-PAL %s
2; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 -amdgpu-use-amdgpu-trackers=1 -verify-misched < %s | FileCheck --check-prefixes=GFX11-PAL-GCNTRACKERS %s
3; RUN: llc -mtriple=amdgcn -mcpu=tonga -amdgpu-scalarize-global-loads=false -verify-misched < %s | FileCheck --check-prefixes=TONGA %s
4; RUN: llc -mtriple=amdgcn -mcpu=tonga -amdgpu-scalarize-global-loads=false -amdgpu-use-amdgpu-trackers=1 -verify-misched < %s | FileCheck --check-prefixes=TONGA-GCNTRACKERS %s
5; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-misched < %s | FileCheck --check-prefixes=GFX908 %s
6; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -amdgpu-use-amdgpu-trackers=1 -verify-misched < %s | FileCheck --check-prefixes=GFX908-GCNTRACKERS %s
7; RUN: llc -mtriple=amdgcn -verify-misched < %s | FileCheck --check-prefixes=GENERIC %s
8; RUN: llc -mtriple=amdgcn -amdgpu-use-amdgpu-trackers=1 -verify-misched < %s | FileCheck --check-prefixes=GENERIC-GCNTRACKERS %s
9
10; GCN Trackers are sensitive to minor changes in RP, and will avoid scheduling certain instructions, which, if scheduled,
11; allow scheduling of other instructions which reduce RP
12
13; CHECK-LABEL: {{^}}return_72xi32:
14; GFX11-PAL:    NumSgprs: 33
15; GFX11-PAL-GCNTRACKERS:    NumSgprs: 33
16; GFX11-PAL:    NumVgprs: 64
17; GFX11-PAL-GCNTRACKERS:    NumVgprs: 64
18; GFX11-PAL:    ScratchSize: 220
19; GFX11-PAL-GCNTRACKERS:    ScratchSize: 248
20
21
22; CHECK-LABEL: {{^}}call_72xi32:
23; GFX11-PAL:    NumSgprs: 37
24; GFX11-PAL-GCNTRACKERS:    NumSgprs: 37
25; GFX11-PAL:    NumVgprs: 64
26; GFX11-PAL-GCNTRACKERS:    NumVgprs: 64
27; GFX11-PAL:    ScratchSize: 2780
28; GFX11-PAL-GCNTRACKERS:    ScratchSize: 2808
29
30
31define amdgpu_gfx <72 x i32> @return_72xi32(<72 x i32> %val) #1 {
32  ret <72 x i32> %val
33}
34
35define amdgpu_gfx void @call_72xi32() #1 {
36entry:
37  %ret.0 = call amdgpu_gfx <72 x i32> @return_72xi32(<72 x i32> zeroinitializer)
38  %val.0 = insertelement <72 x i32> %ret.0, i32 42, i32 0
39  %val.1 = insertelement <72 x i32> %val.0, i32 24, i32 58
40  %ret.1 = call amdgpu_gfx <72 x i32> @return_72xi32(<72 x i32> %val.1)
41  ret void
42}
43
44; CHECK-LABEL: {{^}}global_extload_v16f16_to_v16f64:
45; TONGA:    NumSgprs: 96
46; TONGA-GCNTRACKERS:    NumSgprs: 96
47; TONGA:    NumVgprs: 21
48; TONGA-GCNTRACKERS:    NumVgprs: 23
49; TONGA:    Occupancy: 8
50; TONGA-GCNTRACKERS:    Occupancy: 8
51
52
53define amdgpu_kernel void @global_extload_v16f16_to_v16f64(ptr addrspace(1) %out, ptr addrspace(1) %in)  {
54  %val = load <16 x half>, ptr addrspace(1) %in
55  %cvt = fpext <16 x half> %val to <16 x double>
56  store <16 x double> %cvt, ptr addrspace(1) %out
57  ret void
58}
59
60; CHECK-LABEL: {{^}}constant_zextload_v64i16_to_v64i32:
61; GENERIC:    NumSgprs: 71
62; GENERIC-GCNTRACKERS:    NumSgprs: 45
63; GENERIC:    NumVgprs: 20
64; GENERIC-GCNTRACKERS:    NumVgprs: 20
65; GENERIC:    Occupancy: 7
66; GENERIC-GCNTRACKERS:    Occupancy: 10
67
68define amdgpu_kernel void @constant_zextload_v64i16_to_v64i32(ptr addrspace(1) %out, ptr addrspace(4) %in) {
69  %load = load <64 x i16>, ptr addrspace(4) %in
70  %ext = zext <64 x i16> %load to <64 x i32>
71  store <64 x i32> %ext, ptr addrspace(1) %out
72  ret void
73}
74
75; CHECK-LABEL: {{^}}excess_soft_clause_reg_pressure:
76; GFX908:    NumSgprs: 64
77; GFX908-GCNTRACKERS:    NumSgprs: 64
78; GFX908:    NumVgprs: 43
79; GFX908-GCNTRACKERS:    NumVgprs: 39
80; GFX908:    Occupancy: 5
81; GFX908-GCNTRACKERS:    Occupancy: 6
82
83
84define protected amdgpu_kernel void @excess_soft_clause_reg_pressure(ptr addrspace(4) %wei_ptr, ptr addrspace(1) %out_ptr, ptr addrspace(1) %in) {
85entry:
86  %i = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
87  %i2 = load i64, ptr addrspace(4) %i, align 8
88  %i3 = tail call i32 @llvm.amdgcn.workgroup.id.x()
89  %i4 = shl i32 %i3, 8
90  %i5 = tail call i32 @llvm.amdgcn.workitem.id.x(), !range !5
91  %i6 = add i32 %i4, %i5
92  %i7 = trunc i64 %i2 to i32
93  %conv = add i32 %i6, %i7
94  %conv.frozen = freeze i32 %conv
95  %div = udiv i32 %conv.frozen, 49
96  %add.ptr22 = getelementptr inbounds float, ptr addrspace(4) %wei_ptr, i64 undef
97  %in.ptr1 = getelementptr inbounds float, ptr addrspace(1) %in, i32 %i5
98  br label %for.cond28.preheader
99
100for.cond28.preheader:                             ; preds = %for.cond28.preheader, %entry
101  %accum.sroa.110.0 = phi float [ 0.000000e+00, %entry ], [ %i251, %for.cond28.preheader ]
102  %accum.sroa.106.0 = phi float [ 0.000000e+00, %entry ], [ %i247, %for.cond28.preheader ]
103  %accum.sroa.102.0 = phi float [ 0.000000e+00, %entry ], [ %i243, %for.cond28.preheader ]
104  %accum.sroa.98.0 = phi float [ 0.000000e+00, %entry ], [ %i239, %for.cond28.preheader ]
105  %accum.sroa.94.0 = phi float [ 0.000000e+00, %entry ], [ %i235, %for.cond28.preheader ]
106  %accum.sroa.90.0 = phi float [ 0.000000e+00, %entry ], [ %i231, %for.cond28.preheader ]
107  %accum.sroa.86.0 = phi float [ 0.000000e+00, %entry ], [ %i227, %for.cond28.preheader ]
108  %accum.sroa.82.0 = phi float [ 0.000000e+00, %entry ], [ %i223, %for.cond28.preheader ]
109  %accum.sroa.78.0 = phi float [ 0.000000e+00, %entry ], [ %i219, %for.cond28.preheader ]
110  %accum.sroa.74.0 = phi float [ 0.000000e+00, %entry ], [ %i215, %for.cond28.preheader ]
111  %accum.sroa.70.0 = phi float [ 0.000000e+00, %entry ], [ %i211, %for.cond28.preheader ]
112  %accum.sroa.66.0 = phi float [ 0.000000e+00, %entry ], [ %i207, %for.cond28.preheader ]
113  %accum.sroa.62.0 = phi float [ 0.000000e+00, %entry ], [ %i203, %for.cond28.preheader ]
114  %accum.sroa.58.0 = phi float [ 0.000000e+00, %entry ], [ %i199, %for.cond28.preheader ]
115  %accum.sroa.54.0 = phi float [ 0.000000e+00, %entry ], [ %i195, %for.cond28.preheader ]
116  %accum.sroa.50.0 = phi float [ 0.000000e+00, %entry ], [ %i191, %for.cond28.preheader ]
117  %accum.sroa.46.0 = phi float [ 0.000000e+00, %entry ], [ %i187, %for.cond28.preheader ]
118  %accum.sroa.42.0 = phi float [ 0.000000e+00, %entry ], [ %i183, %for.cond28.preheader ]
119  %accum.sroa.38.0 = phi float [ 0.000000e+00, %entry ], [ %i179, %for.cond28.preheader ]
120  %accum.sroa.34.0 = phi float [ 0.000000e+00, %entry ], [ %i175, %for.cond28.preheader ]
121  %accum.sroa.30.0 = phi float [ 0.000000e+00, %entry ], [ %i171, %for.cond28.preheader ]
122  %accum.sroa.26.0 = phi float [ 0.000000e+00, %entry ], [ %i167, %for.cond28.preheader ]
123  %accum.sroa.22.0 = phi float [ 0.000000e+00, %entry ], [ %i163, %for.cond28.preheader ]
124  %accum.sroa.18.0 = phi float [ 0.000000e+00, %entry ], [ %i159, %for.cond28.preheader ]
125  %accum.sroa.14.0 = phi float [ 0.000000e+00, %entry ], [ %i155, %for.cond28.preheader ]
126  %accum.sroa.10.0 = phi float [ 0.000000e+00, %entry ], [ %i151, %for.cond28.preheader ]
127  %accum.sroa.6.0 = phi float [ 0.000000e+00, %entry ], [ %i147, %for.cond28.preheader ]
128  %accum.sroa.0.0 = phi float [ 0.000000e+00, %entry ], [ %i143, %for.cond28.preheader ]
129  %accum.sroa.114.0 = phi float [ 0.000000e+00, %entry ], [ %i255, %for.cond28.preheader ]
130  %accum.sroa.118.0 = phi float [ 0.000000e+00, %entry ], [ %i259, %for.cond28.preheader ]
131  %accum.sroa.122.0 = phi float [ 0.000000e+00, %entry ], [ %i263, %for.cond28.preheader ]
132  %accum.sroa.126.0 = phi float [ 0.000000e+00, %entry ], [ %i267, %for.cond28.preheader ]
133  %i_ptr.0288 = phi ptr addrspace(1) [ %in.ptr1, %entry ], [ %add.ptr47.3, %for.cond28.preheader ]
134  %w_ptr.0287 = phi ptr addrspace(4) [ %add.ptr22, %entry ], [ %add.ptr74, %for.cond28.preheader ]
135  %ci.0286 = phi i32 [ 0, %entry ], [ %inc116, %for.cond28.preheader ]
136  %i8 = load float, ptr addrspace(1) %i_ptr.0288, align 4
137  %add.ptr47 = getelementptr inbounds float, ptr addrspace(1) %i_ptr.0288, i64 49
138  %i9 = load float, ptr addrspace(1) %add.ptr47, align 4
139  %add.ptr47.1 = getelementptr inbounds float, ptr addrspace(1) %i_ptr.0288, i64 98
140  %i10 = load float, ptr addrspace(1) %add.ptr47.1, align 4
141  %add.ptr47.2 = getelementptr inbounds float, ptr addrspace(1) %i_ptr.0288, i64 147
142  %i11 = load float, ptr addrspace(1) %add.ptr47.2, align 4
143  %i12 = load float, ptr addrspace(4) %w_ptr.0287, align 4
144  %add.ptr66 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1024
145  %i13 = load float, ptr addrspace(4) %add.ptr66, align 4
146  %add.ptr66.1 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2048
147  %i14 = load float, ptr addrspace(4) %add.ptr66.1, align 4
148  %add.ptr66.2 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3072
149  %i15 = load float, ptr addrspace(4) %add.ptr66.2, align 4
150  %add.ptr70 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1
151  %i16 = load float, ptr addrspace(4) %add.ptr70, align 4
152  %add.ptr66.1291 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1025
153  %i17 = load float, ptr addrspace(4) %add.ptr66.1291, align 4
154  %add.ptr66.1.1 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2049
155  %i18 = load float, ptr addrspace(4) %add.ptr66.1.1, align 4
156  %add.ptr66.2.1 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3073
157  %i19 = load float, ptr addrspace(4) %add.ptr66.2.1, align 4
158  %add.ptr70.1 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2
159  %i20 = load float, ptr addrspace(4) %add.ptr70.1, align 4
160  %add.ptr66.2293 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1026
161  %i21 = load float, ptr addrspace(4) %add.ptr66.2293, align 4
162  %add.ptr66.1.2 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2050
163  %i22 = load float, ptr addrspace(4) %add.ptr66.1.2, align 4
164  %add.ptr66.2.2 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3074
165  %i23 = load float, ptr addrspace(4) %add.ptr66.2.2, align 4
166  %add.ptr70.2 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3
167  %i24 = load float, ptr addrspace(4) %add.ptr70.2, align 4
168  %add.ptr66.3 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1027
169  %i25 = load float, ptr addrspace(4) %add.ptr66.3, align 4
170  %add.ptr66.1.3 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2051
171  %i26 = load float, ptr addrspace(4) %add.ptr66.1.3, align 4
172  %add.ptr66.2.3 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3075
173  %i27 = load float, ptr addrspace(4) %add.ptr66.2.3, align 4
174  %add.ptr70.3 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 4
175  %i28 = load float, ptr addrspace(4) %add.ptr70.3, align 4
176  %add.ptr66.4 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1028
177  %i29 = load float, ptr addrspace(4) %add.ptr66.4, align 4
178  %add.ptr66.1.4 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2052
179  %i30 = load float, ptr addrspace(4) %add.ptr66.1.4, align 4
180  %add.ptr66.2.4 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3076
181  %i31 = load float, ptr addrspace(4) %add.ptr66.2.4, align 4
182  %add.ptr70.4 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 5
183  %i32 = load float, ptr addrspace(4) %add.ptr70.4, align 4
184  %add.ptr66.5 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1029
185  %i33 = load float, ptr addrspace(4) %add.ptr66.5, align 4
186  %add.ptr66.1.5 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2053
187  %i34 = load float, ptr addrspace(4) %add.ptr66.1.5, align 4
188  %add.ptr66.2.5 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3077
189  %i35 = load float, ptr addrspace(4) %add.ptr66.2.5, align 4
190  %add.ptr70.5 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 6
191  %i36 = load float, ptr addrspace(4) %add.ptr70.5, align 4
192  %add.ptr66.6 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1030
193  %i37 = load float, ptr addrspace(4) %add.ptr66.6, align 4
194  %add.ptr66.1.6 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2054
195  %i38 = load float, ptr addrspace(4) %add.ptr66.1.6, align 4
196  %add.ptr66.2.6 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3078
197  %i39 = load float, ptr addrspace(4) %add.ptr66.2.6, align 4
198  %add.ptr70.6 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 7
199  %i40 = load float, ptr addrspace(4) %add.ptr70.6, align 4
200  %add.ptr66.7 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1031
201  %i41 = load float, ptr addrspace(4) %add.ptr66.7, align 4
202  %add.ptr66.1.7 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2055
203  %i42 = load float, ptr addrspace(4) %add.ptr66.1.7, align 4
204  %add.ptr66.2.7 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3079
205  %i43 = load float, ptr addrspace(4) %add.ptr66.2.7, align 4
206  %add.ptr70.7 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 8
207  %i44 = load float, ptr addrspace(4) %add.ptr70.7, align 4
208  %add.ptr66.8 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1032
209  %i45 = load float, ptr addrspace(4) %add.ptr66.8, align 4
210  %add.ptr66.1.8 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2056
211  %i46 = load float, ptr addrspace(4) %add.ptr66.1.8, align 4
212  %add.ptr66.2.8 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3080
213  %i47 = load float, ptr addrspace(4) %add.ptr66.2.8, align 4
214  %add.ptr70.8 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 9
215  %i48 = load float, ptr addrspace(4) %add.ptr70.8, align 4
216  %add.ptr66.9 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1033
217  %i49 = load float, ptr addrspace(4) %add.ptr66.9, align 4
218  %add.ptr66.1.9 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2057
219  %i50 = load float, ptr addrspace(4) %add.ptr66.1.9, align 4
220  %add.ptr66.2.9 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3081
221  %i51 = load float, ptr addrspace(4) %add.ptr66.2.9, align 4
222  %add.ptr70.9 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 10
223  %i52 = load float, ptr addrspace(4) %add.ptr70.9, align 4
224  %add.ptr66.10 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1034
225  %i53 = load float, ptr addrspace(4) %add.ptr66.10, align 4
226  %add.ptr66.1.10 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2058
227  %i54 = load float, ptr addrspace(4) %add.ptr66.1.10, align 4
228  %add.ptr66.2.10 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3082
229  %i55 = load float, ptr addrspace(4) %add.ptr66.2.10, align 4
230  %add.ptr70.10 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 11
231  %i56 = load float, ptr addrspace(4) %add.ptr70.10, align 4
232  %add.ptr66.11 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1035
233  %i57 = load float, ptr addrspace(4) %add.ptr66.11, align 4
234  %add.ptr66.1.11 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2059
235  %i58 = load float, ptr addrspace(4) %add.ptr66.1.11, align 4
236  %add.ptr66.2.11 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3083
237  %i59 = load float, ptr addrspace(4) %add.ptr66.2.11, align 4
238  %add.ptr70.11 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 12
239  %i60 = load float, ptr addrspace(4) %add.ptr70.11, align 4
240  %add.ptr66.12 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1036
241  %i61 = load float, ptr addrspace(4) %add.ptr66.12, align 4
242  %add.ptr66.1.12 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2060
243  %i62 = load float, ptr addrspace(4) %add.ptr66.1.12, align 4
244  %add.ptr66.2.12 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3084
245  %i63 = load float, ptr addrspace(4) %add.ptr66.2.12, align 4
246  %add.ptr70.12 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 13
247  %i64 = load float, ptr addrspace(4) %add.ptr70.12, align 4
248  %add.ptr66.13 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1037
249  %i65 = load float, ptr addrspace(4) %add.ptr66.13, align 4
250  %add.ptr66.1.13 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2061
251  %i66 = load float, ptr addrspace(4) %add.ptr66.1.13, align 4
252  %add.ptr66.2.13 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3085
253  %i67 = load float, ptr addrspace(4) %add.ptr66.2.13, align 4
254  %add.ptr70.13 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 14
255  %i68 = load float, ptr addrspace(4) %add.ptr70.13, align 4
256  %add.ptr66.14 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1038
257  %i69 = load float, ptr addrspace(4) %add.ptr66.14, align 4
258  %add.ptr66.1.14 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2062
259  %i70 = load float, ptr addrspace(4) %add.ptr66.1.14, align 4
260  %add.ptr66.2.14 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3086
261  %i71 = load float, ptr addrspace(4) %add.ptr66.2.14, align 4
262  %add.ptr70.14 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 15
263  %i72 = load float, ptr addrspace(4) %add.ptr70.14, align 4
264  %add.ptr66.15 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1039
265  %i73 = load float, ptr addrspace(4) %add.ptr66.15, align 4
266  %add.ptr66.1.15 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2063
267  %i74 = load float, ptr addrspace(4) %add.ptr66.1.15, align 4
268  %add.ptr66.2.15 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3087
269  %i75 = load float, ptr addrspace(4) %add.ptr66.2.15, align 4
270  %add.ptr70.15 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 16
271  %i76 = load float, ptr addrspace(4) %add.ptr70.15, align 4
272  %add.ptr66.16 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1040
273  %i77 = load float, ptr addrspace(4) %add.ptr66.16, align 4
274  %add.ptr66.1.16 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2064
275  %i78 = load float, ptr addrspace(4) %add.ptr66.1.16, align 4
276  %add.ptr66.2.16 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3088
277  %i79 = load float, ptr addrspace(4) %add.ptr66.2.16, align 4
278  %add.ptr70.16 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 17
279  %i80 = load float, ptr addrspace(4) %add.ptr70.16, align 4
280  %add.ptr66.17 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1041
281  %i81 = load float, ptr addrspace(4) %add.ptr66.17, align 4
282  %add.ptr66.1.17 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2065
283  %i82 = load float, ptr addrspace(4) %add.ptr66.1.17, align 4
284  %add.ptr66.2.17 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3089
285  %i83 = load float, ptr addrspace(4) %add.ptr66.2.17, align 4
286  %add.ptr70.17 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 18
287  %i84 = load float, ptr addrspace(4) %add.ptr70.17, align 4
288  %add.ptr66.18 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1042
289  %i85 = load float, ptr addrspace(4) %add.ptr66.18, align 4
290  %add.ptr66.1.18 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2066
291  %i86 = load float, ptr addrspace(4) %add.ptr66.1.18, align 4
292  %add.ptr66.2.18 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3090
293  %i87 = load float, ptr addrspace(4) %add.ptr66.2.18, align 4
294  %add.ptr70.18 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 19
295  %i88 = load float, ptr addrspace(4) %add.ptr70.18, align 4
296  %add.ptr66.19 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1043
297  %i89 = load float, ptr addrspace(4) %add.ptr66.19, align 4
298  %add.ptr66.1.19 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2067
299  %i90 = load float, ptr addrspace(4) %add.ptr66.1.19, align 4
300  %add.ptr66.2.19 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3091
301  %i91 = load float, ptr addrspace(4) %add.ptr66.2.19, align 4
302  %add.ptr70.19 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 20
303  %i92 = load float, ptr addrspace(4) %add.ptr70.19, align 4
304  %add.ptr66.20 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1044
305  %i93 = load float, ptr addrspace(4) %add.ptr66.20, align 4
306  %add.ptr66.1.20 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2068
307  %i94 = load float, ptr addrspace(4) %add.ptr66.1.20, align 4
308  %add.ptr66.2.20 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3092
309  %i95 = load float, ptr addrspace(4) %add.ptr66.2.20, align 4
310  %add.ptr70.20 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 21
311  %i96 = load float, ptr addrspace(4) %add.ptr70.20, align 4
312  %add.ptr66.21 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1045
313  %i97 = load float, ptr addrspace(4) %add.ptr66.21, align 4
314  %add.ptr66.1.21 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2069
315  %i98 = load float, ptr addrspace(4) %add.ptr66.1.21, align 4
316  %add.ptr66.2.21 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3093
317  %i99 = load float, ptr addrspace(4) %add.ptr66.2.21, align 4
318  %add.ptr70.21 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 22
319  %i100 = load float, ptr addrspace(4) %add.ptr70.21, align 4
320  %add.ptr66.22 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1046
321  %i101 = load float, ptr addrspace(4) %add.ptr66.22, align 4
322  %add.ptr66.1.22 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2070
323  %i102 = load float, ptr addrspace(4) %add.ptr66.1.22, align 4
324  %add.ptr66.2.22 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3094
325  %i103 = load float, ptr addrspace(4) %add.ptr66.2.22, align 4
326  %add.ptr70.22 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 23
327  %i104 = load float, ptr addrspace(4) %add.ptr70.22, align 4
328  %add.ptr66.23 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1047
329  %i105 = load float, ptr addrspace(4) %add.ptr66.23, align 4
330  %add.ptr66.1.23 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2071
331  %i106 = load float, ptr addrspace(4) %add.ptr66.1.23, align 4
332  %add.ptr66.2.23 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3095
333  %i107 = load float, ptr addrspace(4) %add.ptr66.2.23, align 4
334  %add.ptr70.23 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 24
335  %i108 = load float, ptr addrspace(4) %add.ptr70.23, align 4
336  %add.ptr66.24 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1048
337  %i109 = load float, ptr addrspace(4) %add.ptr66.24, align 4
338  %add.ptr66.1.24 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2072
339  %i110 = load float, ptr addrspace(4) %add.ptr66.1.24, align 4
340  %add.ptr66.2.24 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3096
341  %i111 = load float, ptr addrspace(4) %add.ptr66.2.24, align 4
342  %add.ptr70.24 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 25
343  %i112 = load float, ptr addrspace(4) %add.ptr70.24, align 4
344  %add.ptr66.25 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1049
345  %i113 = load float, ptr addrspace(4) %add.ptr66.25, align 4
346  %add.ptr66.1.25 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2073
347  %i114 = load float, ptr addrspace(4) %add.ptr66.1.25, align 4
348  %add.ptr66.2.25 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3097
349  %i115 = load float, ptr addrspace(4) %add.ptr66.2.25, align 4
350  %add.ptr70.25 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 26
351  %i116 = load float, ptr addrspace(4) %add.ptr70.25, align 4
352  %add.ptr66.26 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1050
353  %i117 = load float, ptr addrspace(4) %add.ptr66.26, align 4
354  %add.ptr66.1.26 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2074
355  %i118 = load float, ptr addrspace(4) %add.ptr66.1.26, align 4
356  %add.ptr66.2.26 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3098
357  %i119 = load float, ptr addrspace(4) %add.ptr66.2.26, align 4
358  %add.ptr70.26 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 27
359  %i120 = load float, ptr addrspace(4) %add.ptr70.26, align 4
360  %add.ptr66.27 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1051
361  %i121 = load float, ptr addrspace(4) %add.ptr66.27, align 4
362  %add.ptr66.1.27 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2075
363  %i122 = load float, ptr addrspace(4) %add.ptr66.1.27, align 4
364  %add.ptr66.2.27 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3099
365  %i123 = load float, ptr addrspace(4) %add.ptr66.2.27, align 4
366  %add.ptr70.27 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 28
367  %i124 = load float, ptr addrspace(4) %add.ptr70.27, align 4
368  %add.ptr66.28 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1052
369  %i125 = load float, ptr addrspace(4) %add.ptr66.28, align 4
370  %add.ptr66.1.28 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2076
371  %i126 = load float, ptr addrspace(4) %add.ptr66.1.28, align 4
372  %add.ptr66.2.28 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3100
373  %i127 = load float, ptr addrspace(4) %add.ptr66.2.28, align 4
374  %add.ptr70.28 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 29
375  %i128 = load float, ptr addrspace(4) %add.ptr70.28, align 4
376  %add.ptr66.29 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1053
377  %i129 = load float, ptr addrspace(4) %add.ptr66.29, align 4
378  %add.ptr66.1.29 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2077
379  %i130 = load float, ptr addrspace(4) %add.ptr66.1.29, align 4
380  %add.ptr66.2.29 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3101
381  %i131 = load float, ptr addrspace(4) %add.ptr66.2.29, align 4
382  %add.ptr70.29 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 30
383  %i132 = load float, ptr addrspace(4) %add.ptr70.29, align 4
384  %add.ptr66.30 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1054
385  %i133 = load float, ptr addrspace(4) %add.ptr66.30, align 4
386  %add.ptr66.1.30 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2078
387  %i134 = load float, ptr addrspace(4) %add.ptr66.1.30, align 4
388  %add.ptr66.2.30 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3102
389  %i135 = load float, ptr addrspace(4) %add.ptr66.2.30, align 4
390  %add.ptr70.30 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 31
391  %i136 = load float, ptr addrspace(4) %add.ptr70.30, align 4
392  %add.ptr66.31 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1055
393  %i137 = load float, ptr addrspace(4) %add.ptr66.31, align 4
394  %add.ptr66.1.31 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2079
395  %i138 = load float, ptr addrspace(4) %add.ptr66.1.31, align 4
396  %add.ptr66.2.31 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3103
397  %i139 = load float, ptr addrspace(4) %add.ptr66.2.31, align 4
398  %add.ptr47.3 = getelementptr inbounds float, ptr addrspace(1) %i_ptr.0288, i64 196
399  %i140 = tail call float @llvm.fmuladd.f32(float %i8, float %i12, float %accum.sroa.0.0)
400  %i141 = tail call float @llvm.fmuladd.f32(float %i9, float %i13, float %i140)
401  %i142 = tail call float @llvm.fmuladd.f32(float %i10, float %i14, float %i141)
402  %i143 = tail call float @llvm.fmuladd.f32(float %i11, float %i15, float %i142)
403  %i144 = tail call float @llvm.fmuladd.f32(float %i8, float %i16, float %accum.sroa.6.0)
404  %i145 = tail call float @llvm.fmuladd.f32(float %i9, float %i17, float %i144)
405  %i146 = tail call float @llvm.fmuladd.f32(float %i10, float %i18, float %i145)
406  %i147 = tail call float @llvm.fmuladd.f32(float %i11, float %i19, float %i146)
407  %i148 = tail call float @llvm.fmuladd.f32(float %i8, float %i20, float %accum.sroa.10.0)
408  %i149 = tail call float @llvm.fmuladd.f32(float %i9, float %i21, float %i148)
409  %i150 = tail call float @llvm.fmuladd.f32(float %i10, float %i22, float %i149)
410  %i151 = tail call float @llvm.fmuladd.f32(float %i11, float %i23, float %i150)
411  %i152 = tail call float @llvm.fmuladd.f32(float %i8, float %i24, float %accum.sroa.14.0)
412  %i153 = tail call float @llvm.fmuladd.f32(float %i9, float %i25, float %i152)
413  %i154 = tail call float @llvm.fmuladd.f32(float %i10, float %i26, float %i153)
414  %i155 = tail call float @llvm.fmuladd.f32(float %i11, float %i27, float %i154)
415  %i156 = tail call float @llvm.fmuladd.f32(float %i8, float %i28, float %accum.sroa.18.0)
416  %i157 = tail call float @llvm.fmuladd.f32(float %i9, float %i29, float %i156)
417  %i158 = tail call float @llvm.fmuladd.f32(float %i10, float %i30, float %i157)
418  %i159 = tail call float @llvm.fmuladd.f32(float %i11, float %i31, float %i158)
419  %i160 = tail call float @llvm.fmuladd.f32(float %i8, float %i32, float %accum.sroa.22.0)
420  %i161 = tail call float @llvm.fmuladd.f32(float %i9, float %i33, float %i160)
421  %i162 = tail call float @llvm.fmuladd.f32(float %i10, float %i34, float %i161)
422  %i163 = tail call float @llvm.fmuladd.f32(float %i11, float %i35, float %i162)
423  %i164 = tail call float @llvm.fmuladd.f32(float %i8, float %i36, float %accum.sroa.26.0)
424  %i165 = tail call float @llvm.fmuladd.f32(float %i9, float %i37, float %i164)
425  %i166 = tail call float @llvm.fmuladd.f32(float %i10, float %i38, float %i165)
426  %i167 = tail call float @llvm.fmuladd.f32(float %i11, float %i39, float %i166)
427  %i168 = tail call float @llvm.fmuladd.f32(float %i8, float %i40, float %accum.sroa.30.0)
428  %i169 = tail call float @llvm.fmuladd.f32(float %i9, float %i41, float %i168)
429  %i170 = tail call float @llvm.fmuladd.f32(float %i10, float %i42, float %i169)
430  %i171 = tail call float @llvm.fmuladd.f32(float %i11, float %i43, float %i170)
431  %i172 = tail call float @llvm.fmuladd.f32(float %i8, float %i44, float %accum.sroa.34.0)
432  %i173 = tail call float @llvm.fmuladd.f32(float %i9, float %i45, float %i172)
433  %i174 = tail call float @llvm.fmuladd.f32(float %i10, float %i46, float %i173)
434  %i175 = tail call float @llvm.fmuladd.f32(float %i11, float %i47, float %i174)
435  %i176 = tail call float @llvm.fmuladd.f32(float %i8, float %i48, float %accum.sroa.38.0)
436  %i177 = tail call float @llvm.fmuladd.f32(float %i9, float %i49, float %i176)
437  %i178 = tail call float @llvm.fmuladd.f32(float %i10, float %i50, float %i177)
438  %i179 = tail call float @llvm.fmuladd.f32(float %i11, float %i51, float %i178)
439  %i180 = tail call float @llvm.fmuladd.f32(float %i8, float %i52, float %accum.sroa.42.0)
440  %i181 = tail call float @llvm.fmuladd.f32(float %i9, float %i53, float %i180)
441  %i182 = tail call float @llvm.fmuladd.f32(float %i10, float %i54, float %i181)
442  %i183 = tail call float @llvm.fmuladd.f32(float %i11, float %i55, float %i182)
443  %i184 = tail call float @llvm.fmuladd.f32(float %i8, float %i56, float %accum.sroa.46.0)
444  %i185 = tail call float @llvm.fmuladd.f32(float %i9, float %i57, float %i184)
445  %i186 = tail call float @llvm.fmuladd.f32(float %i10, float %i58, float %i185)
446  %i187 = tail call float @llvm.fmuladd.f32(float %i11, float %i59, float %i186)
447  %i188 = tail call float @llvm.fmuladd.f32(float %i8, float %i60, float %accum.sroa.50.0)
448  %i189 = tail call float @llvm.fmuladd.f32(float %i9, float %i61, float %i188)
449  %i190 = tail call float @llvm.fmuladd.f32(float %i10, float %i62, float %i189)
450  %i191 = tail call float @llvm.fmuladd.f32(float %i11, float %i63, float %i190)
451  %i192 = tail call float @llvm.fmuladd.f32(float %i8, float %i64, float %accum.sroa.54.0)
452  %i193 = tail call float @llvm.fmuladd.f32(float %i9, float %i65, float %i192)
453  %i194 = tail call float @llvm.fmuladd.f32(float %i10, float %i66, float %i193)
454  %i195 = tail call float @llvm.fmuladd.f32(float %i11, float %i67, float %i194)
455  %i196 = tail call float @llvm.fmuladd.f32(float %i8, float %i68, float %accum.sroa.58.0)
456  %i197 = tail call float @llvm.fmuladd.f32(float %i9, float %i69, float %i196)
457  %i198 = tail call float @llvm.fmuladd.f32(float %i10, float %i70, float %i197)
458  %i199 = tail call float @llvm.fmuladd.f32(float %i11, float %i71, float %i198)
459  %i200 = tail call float @llvm.fmuladd.f32(float %i8, float %i72, float %accum.sroa.62.0)
460  %i201 = tail call float @llvm.fmuladd.f32(float %i9, float %i73, float %i200)
461  %i202 = tail call float @llvm.fmuladd.f32(float %i10, float %i74, float %i201)
462  %i203 = tail call float @llvm.fmuladd.f32(float %i11, float %i75, float %i202)
463  %i204 = tail call float @llvm.fmuladd.f32(float %i8, float %i76, float %accum.sroa.66.0)
464  %i205 = tail call float @llvm.fmuladd.f32(float %i9, float %i77, float %i204)
465  %i206 = tail call float @llvm.fmuladd.f32(float %i10, float %i78, float %i205)
466  %i207 = tail call float @llvm.fmuladd.f32(float %i11, float %i79, float %i206)
467  %i208 = tail call float @llvm.fmuladd.f32(float %i8, float %i80, float %accum.sroa.70.0)
468  %i209 = tail call float @llvm.fmuladd.f32(float %i9, float %i81, float %i208)
469  %i210 = tail call float @llvm.fmuladd.f32(float %i10, float %i82, float %i209)
470  %i211 = tail call float @llvm.fmuladd.f32(float %i11, float %i83, float %i210)
471  %i212 = tail call float @llvm.fmuladd.f32(float %i8, float %i84, float %accum.sroa.74.0)
472  %i213 = tail call float @llvm.fmuladd.f32(float %i9, float %i85, float %i212)
473  %i214 = tail call float @llvm.fmuladd.f32(float %i10, float %i86, float %i213)
474  %i215 = tail call float @llvm.fmuladd.f32(float %i11, float %i87, float %i214)
475  %i216 = tail call float @llvm.fmuladd.f32(float %i8, float %i88, float %accum.sroa.78.0)
476  %i217 = tail call float @llvm.fmuladd.f32(float %i9, float %i89, float %i216)
477  %i218 = tail call float @llvm.fmuladd.f32(float %i10, float %i90, float %i217)
478  %i219 = tail call float @llvm.fmuladd.f32(float %i11, float %i91, float %i218)
479  %i220 = tail call float @llvm.fmuladd.f32(float %i8, float %i92, float %accum.sroa.82.0)
480  %i221 = tail call float @llvm.fmuladd.f32(float %i9, float %i93, float %i220)
481  %i222 = tail call float @llvm.fmuladd.f32(float %i10, float %i94, float %i221)
482  %i223 = tail call float @llvm.fmuladd.f32(float %i11, float %i95, float %i222)
483  %i224 = tail call float @llvm.fmuladd.f32(float %i8, float %i96, float %accum.sroa.86.0)
484  %i225 = tail call float @llvm.fmuladd.f32(float %i9, float %i97, float %i224)
485  %i226 = tail call float @llvm.fmuladd.f32(float %i10, float %i98, float %i225)
486  %i227 = tail call float @llvm.fmuladd.f32(float %i11, float %i99, float %i226)
487  %i228 = tail call float @llvm.fmuladd.f32(float %i8, float %i100, float %accum.sroa.90.0)
488  %i229 = tail call float @llvm.fmuladd.f32(float %i9, float %i101, float %i228)
489  %i230 = tail call float @llvm.fmuladd.f32(float %i10, float %i102, float %i229)
490  %i231 = tail call float @llvm.fmuladd.f32(float %i11, float %i103, float %i230)
491  %i232 = tail call float @llvm.fmuladd.f32(float %i8, float %i104, float %accum.sroa.94.0)
492  %i233 = tail call float @llvm.fmuladd.f32(float %i9, float %i105, float %i232)
493  %i234 = tail call float @llvm.fmuladd.f32(float %i10, float %i106, float %i233)
494  %i235 = tail call float @llvm.fmuladd.f32(float %i11, float %i107, float %i234)
495  %i236 = tail call float @llvm.fmuladd.f32(float %i8, float %i108, float %accum.sroa.98.0)
496  %i237 = tail call float @llvm.fmuladd.f32(float %i9, float %i109, float %i236)
497  %i238 = tail call float @llvm.fmuladd.f32(float %i10, float %i110, float %i237)
498  %i239 = tail call float @llvm.fmuladd.f32(float %i11, float %i111, float %i238)
499  %i240 = tail call float @llvm.fmuladd.f32(float %i8, float %i112, float %accum.sroa.102.0)
500  %i241 = tail call float @llvm.fmuladd.f32(float %i9, float %i113, float %i240)
501  %i242 = tail call float @llvm.fmuladd.f32(float %i10, float %i114, float %i241)
502  %i243 = tail call float @llvm.fmuladd.f32(float %i11, float %i115, float %i242)
503  %i244 = tail call float @llvm.fmuladd.f32(float %i8, float %i116, float %accum.sroa.106.0)
504  %i245 = tail call float @llvm.fmuladd.f32(float %i9, float %i117, float %i244)
505  %i246 = tail call float @llvm.fmuladd.f32(float %i10, float %i118, float %i245)
506  %i247 = tail call float @llvm.fmuladd.f32(float %i11, float %i119, float %i246)
507  %i248 = tail call float @llvm.fmuladd.f32(float %i8, float %i120, float %accum.sroa.110.0)
508  %i249 = tail call float @llvm.fmuladd.f32(float %i9, float %i121, float %i248)
509  %i250 = tail call float @llvm.fmuladd.f32(float %i10, float %i122, float %i249)
510  %i251 = tail call float @llvm.fmuladd.f32(float %i11, float %i123, float %i250)
511  %i252 = tail call float @llvm.fmuladd.f32(float %i8, float %i124, float %accum.sroa.114.0)
512  %i253 = tail call float @llvm.fmuladd.f32(float %i9, float %i125, float %i252)
513  %i254 = tail call float @llvm.fmuladd.f32(float %i10, float %i126, float %i253)
514  %i255 = tail call float @llvm.fmuladd.f32(float %i11, float %i127, float %i254)
515  %i256 = tail call float @llvm.fmuladd.f32(float %i8, float %i128, float %accum.sroa.118.0)
516  %i257 = tail call float @llvm.fmuladd.f32(float %i9, float %i129, float %i256)
517  %i258 = tail call float @llvm.fmuladd.f32(float %i10, float %i130, float %i257)
518  %i259 = tail call float @llvm.fmuladd.f32(float %i11, float %i131, float %i258)
519  %i260 = tail call float @llvm.fmuladd.f32(float %i8, float %i132, float %accum.sroa.122.0)
520  %i261 = tail call float @llvm.fmuladd.f32(float %i9, float %i133, float %i260)
521  %i262 = tail call float @llvm.fmuladd.f32(float %i10, float %i134, float %i261)
522  %i263 = tail call float @llvm.fmuladd.f32(float %i11, float %i135, float %i262)
523  %i264 = tail call float @llvm.fmuladd.f32(float %i8, float %i136, float %accum.sroa.126.0)
524  %i265 = tail call float @llvm.fmuladd.f32(float %i9, float %i137, float %i264)
525  %i266 = tail call float @llvm.fmuladd.f32(float %i10, float %i138, float %i265)
526  %i267 = tail call float @llvm.fmuladd.f32(float %i11, float %i139, float %i266)
527  %add.ptr74 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 4096
528  %inc116 = add nuw nsw i32 %ci.0286, 1
529  %exitcond.not = icmp eq i32 %inc116, 512
530  br i1 %exitcond.not, label %for.cond.cleanup26, label %for.cond28.preheader
531
532for.cond.cleanup26:                               ; preds = %for.cond28.preheader
533  %mul119 = shl nuw nsw i32 undef, 1
534  %mul120 = mul i32 %div, 200704
535  %mul121 = mul i32 undef, 6272
536  %add122 = add i32 %mul120, %mul121
537  %mul123 = mul nuw nsw i32 undef, 28
538  %add124 = add i32 %add122, %mul123
539  %add126 = add i32 %add124, %mul119
540  %idx.ext127 = zext i32 %add126 to i64
541  %add.ptr128 = getelementptr inbounds float, ptr addrspace(1) %out_ptr, i64 %idx.ext127
542  store float %i143, ptr addrspace(1) %add.ptr128, align 4
543  %add.ptr184 = getelementptr inbounds float, ptr addrspace(1) %add.ptr128, i64 196
544  store float %i147, ptr addrspace(1) %add.ptr184, align 4
545  %add.ptr167.1 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184, i64 14
546  store float 0.000000e+00, ptr addrspace(1) %add.ptr167.1, align 4
547  %add.ptr175.1.1 = getelementptr inbounds float, ptr addrspace(1) %add.ptr167.1, i64 1
548  store float 0.000000e+00, ptr addrspace(1) %add.ptr175.1.1, align 4
549  %add.ptr184.1 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184, i64 196
550  store float %i151, ptr addrspace(1) %add.ptr184.1, align 4
551  %add.ptr184.2 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.1, i64 196
552  store float %i155, ptr addrspace(1) %add.ptr184.2, align 4
553  %add.ptr184.3 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.2, i64 196
554  store float %i159, ptr addrspace(1) %add.ptr184.3, align 4
555  %add.ptr184.4 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.3, i64 196
556  store float %i163, ptr addrspace(1) %add.ptr184.4, align 4
557  %add.ptr154.5 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.4, i64 1
558  store float 0.000000e+00, ptr addrspace(1) %add.ptr154.5, align 4
559  %add.ptr184.5 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.4, i64 196
560  store float %i167, ptr addrspace(1) %add.ptr184.5, align 4
561  %add.ptr154.6 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.5, i64 1
562  store float 0.000000e+00, ptr addrspace(1) %add.ptr154.6, align 4
563  %add.ptr184.6 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.5, i64 196
564  store float %i171, ptr addrspace(1) %add.ptr184.6, align 4
565  %add.ptr184.7 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.6, i64 196
566  store float %i175, ptr addrspace(1) %add.ptr184.7, align 4
567  %add.ptr167.8 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.7, i64 14
568  store float 0.000000e+00, ptr addrspace(1) %add.ptr167.8, align 4
569  %add.ptr175.1.8 = getelementptr inbounds float, ptr addrspace(1) %add.ptr167.8, i64 1
570  store float 0.000000e+00, ptr addrspace(1) %add.ptr175.1.8, align 4
571  %add.ptr184.8 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.7, i64 196
572  store float %i179, ptr addrspace(1) %add.ptr184.8, align 4
573  %add.ptr184.9 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.8, i64 196
574  store float %i183, ptr addrspace(1) %add.ptr184.9, align 4
575  %add.ptr184.10 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.9, i64 196
576  store float %i187, ptr addrspace(1) %add.ptr184.10, align 4
577  %add.ptr184.11 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.10, i64 196
578  store float %i191, ptr addrspace(1) %add.ptr184.11, align 4
579  %add.ptr184.12 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.11, i64 196
580  store float %i195, ptr addrspace(1) %add.ptr184.12, align 4
581  %add.ptr184.13 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.12, i64 196
582  store float %i199, ptr addrspace(1) %add.ptr184.13, align 4
583  %add.ptr184.14 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.13, i64 196
584  store float %i203, ptr addrspace(1) %add.ptr184.14, align 4
585  %add.ptr184.15 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.14, i64 196
586  store float %i207, ptr addrspace(1) %add.ptr184.15, align 4
587  %add.ptr184.16 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.15, i64 196
588  store float %i211, ptr addrspace(1) %add.ptr184.16, align 4
589  %add.ptr184.17 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.16, i64 196
590  store float %i215, ptr addrspace(1) %add.ptr184.17, align 4
591  %add.ptr184.18 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.17, i64 196
592  store float %i219, ptr addrspace(1) %add.ptr184.18, align 4
593  %add.ptr184.19 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.18, i64 196
594  store float %i223, ptr addrspace(1) %add.ptr184.19, align 4
595  %add.ptr184.20 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.19, i64 196
596  store float %i227, ptr addrspace(1) %add.ptr184.20, align 4
597  %add.ptr184.21 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.20, i64 196
598  store float %i231, ptr addrspace(1) %add.ptr184.21, align 4
599  %add.ptr184.22 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.21, i64 196
600  store float %i235, ptr addrspace(1) %add.ptr184.22, align 4
601  %add.ptr184.23 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.22, i64 196
602  store float %i239, ptr addrspace(1) %add.ptr184.23, align 4
603  %add.ptr184.24 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.23, i64 196
604  store float %i243, ptr addrspace(1) %add.ptr184.24, align 4
605  %add.ptr184.25 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.24, i64 196
606  store float %i247, ptr addrspace(1) %add.ptr184.25, align 4
607  %add.ptr184.26 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.25, i64 196
608  store float %i251, ptr addrspace(1) %add.ptr184.26, align 4
609  %add.ptr184.27 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.26, i64 196
610  store float %i255, ptr addrspace(1) %add.ptr184.27, align 4
611  %add.ptr184.28 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.27, i64 196
612  store float %i259, ptr addrspace(1) %add.ptr184.28, align 4
613  %add.ptr184.29 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.28, i64 196
614  store float %i263, ptr addrspace(1) %add.ptr184.29, align 4
615  %add.ptr184.30 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.29, i64 196
616  store float %i267, ptr addrspace(1) %add.ptr184.30, align 4
617  ret void
618}
619
620
621
622declare float @llvm.fmuladd.f32(float, float, float) #2
623declare i32 @llvm.amdgcn.workitem.id.x() #3
624declare i32 @llvm.amdgcn.workgroup.id.x() #3
625declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #3
626
627!0 = !{i32 1, i32 2, i32 1, i32 0}
628!1 = !{!"none", !"none", !"none", !"none"}
629!2 = !{!"ptr", !"ptr", !"ptr", !"float"}
630!3 = !{!"restrict const", !"restrict const", !"restrict", !""}
631!4 = !{i32 256, i32 1, i32 1}
632!5 = !{i32 0, i32 1024}
633
634attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,1" }
635attributes #1 = { nounwind "amdgpu-num-vgpr"="64" }
636attributes #2 = { nofree nosync nounwind readnone speculatable willreturn }
637attributes #3 = { nounwind readnone speculatable willreturn }
638