1; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 -verify-misched < %s | FileCheck --check-prefixes=GFX11-PAL %s 2; RUN: llc -mtriple=amdgcn--amdpal -mcpu=gfx1100 -amdgpu-use-amdgpu-trackers=1 -verify-misched < %s | FileCheck --check-prefixes=GFX11-PAL-GCNTRACKERS %s 3; RUN: llc -mtriple=amdgcn -mcpu=tonga -amdgpu-scalarize-global-loads=false -verify-misched < %s | FileCheck --check-prefixes=TONGA %s 4; RUN: llc -mtriple=amdgcn -mcpu=tonga -amdgpu-scalarize-global-loads=false -amdgpu-use-amdgpu-trackers=1 -verify-misched < %s | FileCheck --check-prefixes=TONGA-GCNTRACKERS %s 5; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -verify-misched < %s | FileCheck --check-prefixes=GFX908 %s 6; RUN: llc -mtriple=amdgcn -mcpu=gfx908 -amdgpu-use-amdgpu-trackers=1 -verify-misched < %s | FileCheck --check-prefixes=GFX908-GCNTRACKERS %s 7; RUN: llc -mtriple=amdgcn -verify-misched < %s | FileCheck --check-prefixes=GENERIC %s 8; RUN: llc -mtriple=amdgcn -amdgpu-use-amdgpu-trackers=1 -verify-misched < %s | FileCheck --check-prefixes=GENERIC-GCNTRACKERS %s 9 10; GCN Trackers are sensitive to minor changes in RP, and will avoid scheduling certain instructions, which, if scheduled, 11; allow scheduling of other instructions which reduce RP 12 13; CHECK-LABEL: {{^}}return_72xi32: 14; GFX11-PAL: NumSgprs: 33 15; GFX11-PAL-GCNTRACKERS: NumSgprs: 33 16; GFX11-PAL: NumVgprs: 64 17; GFX11-PAL-GCNTRACKERS: NumVgprs: 64 18; GFX11-PAL: ScratchSize: 220 19; GFX11-PAL-GCNTRACKERS: ScratchSize: 248 20 21 22; CHECK-LABEL: {{^}}call_72xi32: 23; GFX11-PAL: NumSgprs: 37 24; GFX11-PAL-GCNTRACKERS: NumSgprs: 37 25; GFX11-PAL: NumVgprs: 64 26; GFX11-PAL-GCNTRACKERS: NumVgprs: 64 27; GFX11-PAL: ScratchSize: 2780 28; GFX11-PAL-GCNTRACKERS: ScratchSize: 2808 29 30 31define amdgpu_gfx <72 x i32> @return_72xi32(<72 x i32> %val) #1 { 32 ret <72 x i32> %val 33} 34 35define amdgpu_gfx void @call_72xi32() #1 { 36entry: 37 %ret.0 = call amdgpu_gfx <72 x i32> @return_72xi32(<72 x i32> zeroinitializer) 38 %val.0 = insertelement <72 x i32> %ret.0, i32 42, i32 0 39 %val.1 = insertelement <72 x i32> %val.0, i32 24, i32 58 40 %ret.1 = call amdgpu_gfx <72 x i32> @return_72xi32(<72 x i32> %val.1) 41 ret void 42} 43 44; CHECK-LABEL: {{^}}global_extload_v16f16_to_v16f64: 45; TONGA: NumSgprs: 96 46; TONGA-GCNTRACKERS: NumSgprs: 96 47; TONGA: NumVgprs: 21 48; TONGA-GCNTRACKERS: NumVgprs: 23 49; TONGA: Occupancy: 8 50; TONGA-GCNTRACKERS: Occupancy: 8 51 52 53define amdgpu_kernel void @global_extload_v16f16_to_v16f64(ptr addrspace(1) %out, ptr addrspace(1) %in) { 54 %val = load <16 x half>, ptr addrspace(1) %in 55 %cvt = fpext <16 x half> %val to <16 x double> 56 store <16 x double> %cvt, ptr addrspace(1) %out 57 ret void 58} 59 60; CHECK-LABEL: {{^}}constant_zextload_v64i16_to_v64i32: 61; GENERIC: NumSgprs: 71 62; GENERIC-GCNTRACKERS: NumSgprs: 45 63; GENERIC: NumVgprs: 20 64; GENERIC-GCNTRACKERS: NumVgprs: 20 65; GENERIC: Occupancy: 7 66; GENERIC-GCNTRACKERS: Occupancy: 10 67 68define amdgpu_kernel void @constant_zextload_v64i16_to_v64i32(ptr addrspace(1) %out, ptr addrspace(4) %in) { 69 %load = load <64 x i16>, ptr addrspace(4) %in 70 %ext = zext <64 x i16> %load to <64 x i32> 71 store <64 x i32> %ext, ptr addrspace(1) %out 72 ret void 73} 74 75; CHECK-LABEL: {{^}}excess_soft_clause_reg_pressure: 76; GFX908: NumSgprs: 64 77; GFX908-GCNTRACKERS: NumSgprs: 64 78; GFX908: NumVgprs: 43 79; GFX908-GCNTRACKERS: NumVgprs: 39 80; GFX908: Occupancy: 5 81; GFX908-GCNTRACKERS: Occupancy: 6 82 83 84define protected amdgpu_kernel void @excess_soft_clause_reg_pressure(ptr addrspace(4) %wei_ptr, ptr addrspace(1) %out_ptr, ptr addrspace(1) %in) { 85entry: 86 %i = tail call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() 87 %i2 = load i64, ptr addrspace(4) %i, align 8 88 %i3 = tail call i32 @llvm.amdgcn.workgroup.id.x() 89 %i4 = shl i32 %i3, 8 90 %i5 = tail call i32 @llvm.amdgcn.workitem.id.x(), !range !5 91 %i6 = add i32 %i4, %i5 92 %i7 = trunc i64 %i2 to i32 93 %conv = add i32 %i6, %i7 94 %conv.frozen = freeze i32 %conv 95 %div = udiv i32 %conv.frozen, 49 96 %add.ptr22 = getelementptr inbounds float, ptr addrspace(4) %wei_ptr, i64 undef 97 %in.ptr1 = getelementptr inbounds float, ptr addrspace(1) %in, i32 %i5 98 br label %for.cond28.preheader 99 100for.cond28.preheader: ; preds = %for.cond28.preheader, %entry 101 %accum.sroa.110.0 = phi float [ 0.000000e+00, %entry ], [ %i251, %for.cond28.preheader ] 102 %accum.sroa.106.0 = phi float [ 0.000000e+00, %entry ], [ %i247, %for.cond28.preheader ] 103 %accum.sroa.102.0 = phi float [ 0.000000e+00, %entry ], [ %i243, %for.cond28.preheader ] 104 %accum.sroa.98.0 = phi float [ 0.000000e+00, %entry ], [ %i239, %for.cond28.preheader ] 105 %accum.sroa.94.0 = phi float [ 0.000000e+00, %entry ], [ %i235, %for.cond28.preheader ] 106 %accum.sroa.90.0 = phi float [ 0.000000e+00, %entry ], [ %i231, %for.cond28.preheader ] 107 %accum.sroa.86.0 = phi float [ 0.000000e+00, %entry ], [ %i227, %for.cond28.preheader ] 108 %accum.sroa.82.0 = phi float [ 0.000000e+00, %entry ], [ %i223, %for.cond28.preheader ] 109 %accum.sroa.78.0 = phi float [ 0.000000e+00, %entry ], [ %i219, %for.cond28.preheader ] 110 %accum.sroa.74.0 = phi float [ 0.000000e+00, %entry ], [ %i215, %for.cond28.preheader ] 111 %accum.sroa.70.0 = phi float [ 0.000000e+00, %entry ], [ %i211, %for.cond28.preheader ] 112 %accum.sroa.66.0 = phi float [ 0.000000e+00, %entry ], [ %i207, %for.cond28.preheader ] 113 %accum.sroa.62.0 = phi float [ 0.000000e+00, %entry ], [ %i203, %for.cond28.preheader ] 114 %accum.sroa.58.0 = phi float [ 0.000000e+00, %entry ], [ %i199, %for.cond28.preheader ] 115 %accum.sroa.54.0 = phi float [ 0.000000e+00, %entry ], [ %i195, %for.cond28.preheader ] 116 %accum.sroa.50.0 = phi float [ 0.000000e+00, %entry ], [ %i191, %for.cond28.preheader ] 117 %accum.sroa.46.0 = phi float [ 0.000000e+00, %entry ], [ %i187, %for.cond28.preheader ] 118 %accum.sroa.42.0 = phi float [ 0.000000e+00, %entry ], [ %i183, %for.cond28.preheader ] 119 %accum.sroa.38.0 = phi float [ 0.000000e+00, %entry ], [ %i179, %for.cond28.preheader ] 120 %accum.sroa.34.0 = phi float [ 0.000000e+00, %entry ], [ %i175, %for.cond28.preheader ] 121 %accum.sroa.30.0 = phi float [ 0.000000e+00, %entry ], [ %i171, %for.cond28.preheader ] 122 %accum.sroa.26.0 = phi float [ 0.000000e+00, %entry ], [ %i167, %for.cond28.preheader ] 123 %accum.sroa.22.0 = phi float [ 0.000000e+00, %entry ], [ %i163, %for.cond28.preheader ] 124 %accum.sroa.18.0 = phi float [ 0.000000e+00, %entry ], [ %i159, %for.cond28.preheader ] 125 %accum.sroa.14.0 = phi float [ 0.000000e+00, %entry ], [ %i155, %for.cond28.preheader ] 126 %accum.sroa.10.0 = phi float [ 0.000000e+00, %entry ], [ %i151, %for.cond28.preheader ] 127 %accum.sroa.6.0 = phi float [ 0.000000e+00, %entry ], [ %i147, %for.cond28.preheader ] 128 %accum.sroa.0.0 = phi float [ 0.000000e+00, %entry ], [ %i143, %for.cond28.preheader ] 129 %accum.sroa.114.0 = phi float [ 0.000000e+00, %entry ], [ %i255, %for.cond28.preheader ] 130 %accum.sroa.118.0 = phi float [ 0.000000e+00, %entry ], [ %i259, %for.cond28.preheader ] 131 %accum.sroa.122.0 = phi float [ 0.000000e+00, %entry ], [ %i263, %for.cond28.preheader ] 132 %accum.sroa.126.0 = phi float [ 0.000000e+00, %entry ], [ %i267, %for.cond28.preheader ] 133 %i_ptr.0288 = phi ptr addrspace(1) [ %in.ptr1, %entry ], [ %add.ptr47.3, %for.cond28.preheader ] 134 %w_ptr.0287 = phi ptr addrspace(4) [ %add.ptr22, %entry ], [ %add.ptr74, %for.cond28.preheader ] 135 %ci.0286 = phi i32 [ 0, %entry ], [ %inc116, %for.cond28.preheader ] 136 %i8 = load float, ptr addrspace(1) %i_ptr.0288, align 4 137 %add.ptr47 = getelementptr inbounds float, ptr addrspace(1) %i_ptr.0288, i64 49 138 %i9 = load float, ptr addrspace(1) %add.ptr47, align 4 139 %add.ptr47.1 = getelementptr inbounds float, ptr addrspace(1) %i_ptr.0288, i64 98 140 %i10 = load float, ptr addrspace(1) %add.ptr47.1, align 4 141 %add.ptr47.2 = getelementptr inbounds float, ptr addrspace(1) %i_ptr.0288, i64 147 142 %i11 = load float, ptr addrspace(1) %add.ptr47.2, align 4 143 %i12 = load float, ptr addrspace(4) %w_ptr.0287, align 4 144 %add.ptr66 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1024 145 %i13 = load float, ptr addrspace(4) %add.ptr66, align 4 146 %add.ptr66.1 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2048 147 %i14 = load float, ptr addrspace(4) %add.ptr66.1, align 4 148 %add.ptr66.2 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3072 149 %i15 = load float, ptr addrspace(4) %add.ptr66.2, align 4 150 %add.ptr70 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1 151 %i16 = load float, ptr addrspace(4) %add.ptr70, align 4 152 %add.ptr66.1291 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1025 153 %i17 = load float, ptr addrspace(4) %add.ptr66.1291, align 4 154 %add.ptr66.1.1 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2049 155 %i18 = load float, ptr addrspace(4) %add.ptr66.1.1, align 4 156 %add.ptr66.2.1 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3073 157 %i19 = load float, ptr addrspace(4) %add.ptr66.2.1, align 4 158 %add.ptr70.1 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2 159 %i20 = load float, ptr addrspace(4) %add.ptr70.1, align 4 160 %add.ptr66.2293 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1026 161 %i21 = load float, ptr addrspace(4) %add.ptr66.2293, align 4 162 %add.ptr66.1.2 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2050 163 %i22 = load float, ptr addrspace(4) %add.ptr66.1.2, align 4 164 %add.ptr66.2.2 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3074 165 %i23 = load float, ptr addrspace(4) %add.ptr66.2.2, align 4 166 %add.ptr70.2 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3 167 %i24 = load float, ptr addrspace(4) %add.ptr70.2, align 4 168 %add.ptr66.3 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1027 169 %i25 = load float, ptr addrspace(4) %add.ptr66.3, align 4 170 %add.ptr66.1.3 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2051 171 %i26 = load float, ptr addrspace(4) %add.ptr66.1.3, align 4 172 %add.ptr66.2.3 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3075 173 %i27 = load float, ptr addrspace(4) %add.ptr66.2.3, align 4 174 %add.ptr70.3 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 4 175 %i28 = load float, ptr addrspace(4) %add.ptr70.3, align 4 176 %add.ptr66.4 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1028 177 %i29 = load float, ptr addrspace(4) %add.ptr66.4, align 4 178 %add.ptr66.1.4 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2052 179 %i30 = load float, ptr addrspace(4) %add.ptr66.1.4, align 4 180 %add.ptr66.2.4 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3076 181 %i31 = load float, ptr addrspace(4) %add.ptr66.2.4, align 4 182 %add.ptr70.4 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 5 183 %i32 = load float, ptr addrspace(4) %add.ptr70.4, align 4 184 %add.ptr66.5 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1029 185 %i33 = load float, ptr addrspace(4) %add.ptr66.5, align 4 186 %add.ptr66.1.5 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2053 187 %i34 = load float, ptr addrspace(4) %add.ptr66.1.5, align 4 188 %add.ptr66.2.5 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3077 189 %i35 = load float, ptr addrspace(4) %add.ptr66.2.5, align 4 190 %add.ptr70.5 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 6 191 %i36 = load float, ptr addrspace(4) %add.ptr70.5, align 4 192 %add.ptr66.6 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1030 193 %i37 = load float, ptr addrspace(4) %add.ptr66.6, align 4 194 %add.ptr66.1.6 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2054 195 %i38 = load float, ptr addrspace(4) %add.ptr66.1.6, align 4 196 %add.ptr66.2.6 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3078 197 %i39 = load float, ptr addrspace(4) %add.ptr66.2.6, align 4 198 %add.ptr70.6 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 7 199 %i40 = load float, ptr addrspace(4) %add.ptr70.6, align 4 200 %add.ptr66.7 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1031 201 %i41 = load float, ptr addrspace(4) %add.ptr66.7, align 4 202 %add.ptr66.1.7 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2055 203 %i42 = load float, ptr addrspace(4) %add.ptr66.1.7, align 4 204 %add.ptr66.2.7 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3079 205 %i43 = load float, ptr addrspace(4) %add.ptr66.2.7, align 4 206 %add.ptr70.7 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 8 207 %i44 = load float, ptr addrspace(4) %add.ptr70.7, align 4 208 %add.ptr66.8 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1032 209 %i45 = load float, ptr addrspace(4) %add.ptr66.8, align 4 210 %add.ptr66.1.8 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2056 211 %i46 = load float, ptr addrspace(4) %add.ptr66.1.8, align 4 212 %add.ptr66.2.8 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3080 213 %i47 = load float, ptr addrspace(4) %add.ptr66.2.8, align 4 214 %add.ptr70.8 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 9 215 %i48 = load float, ptr addrspace(4) %add.ptr70.8, align 4 216 %add.ptr66.9 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1033 217 %i49 = load float, ptr addrspace(4) %add.ptr66.9, align 4 218 %add.ptr66.1.9 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2057 219 %i50 = load float, ptr addrspace(4) %add.ptr66.1.9, align 4 220 %add.ptr66.2.9 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3081 221 %i51 = load float, ptr addrspace(4) %add.ptr66.2.9, align 4 222 %add.ptr70.9 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 10 223 %i52 = load float, ptr addrspace(4) %add.ptr70.9, align 4 224 %add.ptr66.10 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1034 225 %i53 = load float, ptr addrspace(4) %add.ptr66.10, align 4 226 %add.ptr66.1.10 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2058 227 %i54 = load float, ptr addrspace(4) %add.ptr66.1.10, align 4 228 %add.ptr66.2.10 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3082 229 %i55 = load float, ptr addrspace(4) %add.ptr66.2.10, align 4 230 %add.ptr70.10 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 11 231 %i56 = load float, ptr addrspace(4) %add.ptr70.10, align 4 232 %add.ptr66.11 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1035 233 %i57 = load float, ptr addrspace(4) %add.ptr66.11, align 4 234 %add.ptr66.1.11 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2059 235 %i58 = load float, ptr addrspace(4) %add.ptr66.1.11, align 4 236 %add.ptr66.2.11 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3083 237 %i59 = load float, ptr addrspace(4) %add.ptr66.2.11, align 4 238 %add.ptr70.11 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 12 239 %i60 = load float, ptr addrspace(4) %add.ptr70.11, align 4 240 %add.ptr66.12 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1036 241 %i61 = load float, ptr addrspace(4) %add.ptr66.12, align 4 242 %add.ptr66.1.12 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2060 243 %i62 = load float, ptr addrspace(4) %add.ptr66.1.12, align 4 244 %add.ptr66.2.12 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3084 245 %i63 = load float, ptr addrspace(4) %add.ptr66.2.12, align 4 246 %add.ptr70.12 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 13 247 %i64 = load float, ptr addrspace(4) %add.ptr70.12, align 4 248 %add.ptr66.13 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1037 249 %i65 = load float, ptr addrspace(4) %add.ptr66.13, align 4 250 %add.ptr66.1.13 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2061 251 %i66 = load float, ptr addrspace(4) %add.ptr66.1.13, align 4 252 %add.ptr66.2.13 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3085 253 %i67 = load float, ptr addrspace(4) %add.ptr66.2.13, align 4 254 %add.ptr70.13 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 14 255 %i68 = load float, ptr addrspace(4) %add.ptr70.13, align 4 256 %add.ptr66.14 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1038 257 %i69 = load float, ptr addrspace(4) %add.ptr66.14, align 4 258 %add.ptr66.1.14 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2062 259 %i70 = load float, ptr addrspace(4) %add.ptr66.1.14, align 4 260 %add.ptr66.2.14 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3086 261 %i71 = load float, ptr addrspace(4) %add.ptr66.2.14, align 4 262 %add.ptr70.14 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 15 263 %i72 = load float, ptr addrspace(4) %add.ptr70.14, align 4 264 %add.ptr66.15 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1039 265 %i73 = load float, ptr addrspace(4) %add.ptr66.15, align 4 266 %add.ptr66.1.15 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2063 267 %i74 = load float, ptr addrspace(4) %add.ptr66.1.15, align 4 268 %add.ptr66.2.15 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3087 269 %i75 = load float, ptr addrspace(4) %add.ptr66.2.15, align 4 270 %add.ptr70.15 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 16 271 %i76 = load float, ptr addrspace(4) %add.ptr70.15, align 4 272 %add.ptr66.16 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1040 273 %i77 = load float, ptr addrspace(4) %add.ptr66.16, align 4 274 %add.ptr66.1.16 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2064 275 %i78 = load float, ptr addrspace(4) %add.ptr66.1.16, align 4 276 %add.ptr66.2.16 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3088 277 %i79 = load float, ptr addrspace(4) %add.ptr66.2.16, align 4 278 %add.ptr70.16 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 17 279 %i80 = load float, ptr addrspace(4) %add.ptr70.16, align 4 280 %add.ptr66.17 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1041 281 %i81 = load float, ptr addrspace(4) %add.ptr66.17, align 4 282 %add.ptr66.1.17 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2065 283 %i82 = load float, ptr addrspace(4) %add.ptr66.1.17, align 4 284 %add.ptr66.2.17 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3089 285 %i83 = load float, ptr addrspace(4) %add.ptr66.2.17, align 4 286 %add.ptr70.17 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 18 287 %i84 = load float, ptr addrspace(4) %add.ptr70.17, align 4 288 %add.ptr66.18 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1042 289 %i85 = load float, ptr addrspace(4) %add.ptr66.18, align 4 290 %add.ptr66.1.18 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2066 291 %i86 = load float, ptr addrspace(4) %add.ptr66.1.18, align 4 292 %add.ptr66.2.18 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3090 293 %i87 = load float, ptr addrspace(4) %add.ptr66.2.18, align 4 294 %add.ptr70.18 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 19 295 %i88 = load float, ptr addrspace(4) %add.ptr70.18, align 4 296 %add.ptr66.19 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1043 297 %i89 = load float, ptr addrspace(4) %add.ptr66.19, align 4 298 %add.ptr66.1.19 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2067 299 %i90 = load float, ptr addrspace(4) %add.ptr66.1.19, align 4 300 %add.ptr66.2.19 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3091 301 %i91 = load float, ptr addrspace(4) %add.ptr66.2.19, align 4 302 %add.ptr70.19 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 20 303 %i92 = load float, ptr addrspace(4) %add.ptr70.19, align 4 304 %add.ptr66.20 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1044 305 %i93 = load float, ptr addrspace(4) %add.ptr66.20, align 4 306 %add.ptr66.1.20 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2068 307 %i94 = load float, ptr addrspace(4) %add.ptr66.1.20, align 4 308 %add.ptr66.2.20 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3092 309 %i95 = load float, ptr addrspace(4) %add.ptr66.2.20, align 4 310 %add.ptr70.20 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 21 311 %i96 = load float, ptr addrspace(4) %add.ptr70.20, align 4 312 %add.ptr66.21 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1045 313 %i97 = load float, ptr addrspace(4) %add.ptr66.21, align 4 314 %add.ptr66.1.21 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2069 315 %i98 = load float, ptr addrspace(4) %add.ptr66.1.21, align 4 316 %add.ptr66.2.21 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3093 317 %i99 = load float, ptr addrspace(4) %add.ptr66.2.21, align 4 318 %add.ptr70.21 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 22 319 %i100 = load float, ptr addrspace(4) %add.ptr70.21, align 4 320 %add.ptr66.22 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1046 321 %i101 = load float, ptr addrspace(4) %add.ptr66.22, align 4 322 %add.ptr66.1.22 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2070 323 %i102 = load float, ptr addrspace(4) %add.ptr66.1.22, align 4 324 %add.ptr66.2.22 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3094 325 %i103 = load float, ptr addrspace(4) %add.ptr66.2.22, align 4 326 %add.ptr70.22 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 23 327 %i104 = load float, ptr addrspace(4) %add.ptr70.22, align 4 328 %add.ptr66.23 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1047 329 %i105 = load float, ptr addrspace(4) %add.ptr66.23, align 4 330 %add.ptr66.1.23 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2071 331 %i106 = load float, ptr addrspace(4) %add.ptr66.1.23, align 4 332 %add.ptr66.2.23 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3095 333 %i107 = load float, ptr addrspace(4) %add.ptr66.2.23, align 4 334 %add.ptr70.23 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 24 335 %i108 = load float, ptr addrspace(4) %add.ptr70.23, align 4 336 %add.ptr66.24 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1048 337 %i109 = load float, ptr addrspace(4) %add.ptr66.24, align 4 338 %add.ptr66.1.24 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2072 339 %i110 = load float, ptr addrspace(4) %add.ptr66.1.24, align 4 340 %add.ptr66.2.24 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3096 341 %i111 = load float, ptr addrspace(4) %add.ptr66.2.24, align 4 342 %add.ptr70.24 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 25 343 %i112 = load float, ptr addrspace(4) %add.ptr70.24, align 4 344 %add.ptr66.25 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1049 345 %i113 = load float, ptr addrspace(4) %add.ptr66.25, align 4 346 %add.ptr66.1.25 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2073 347 %i114 = load float, ptr addrspace(4) %add.ptr66.1.25, align 4 348 %add.ptr66.2.25 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3097 349 %i115 = load float, ptr addrspace(4) %add.ptr66.2.25, align 4 350 %add.ptr70.25 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 26 351 %i116 = load float, ptr addrspace(4) %add.ptr70.25, align 4 352 %add.ptr66.26 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1050 353 %i117 = load float, ptr addrspace(4) %add.ptr66.26, align 4 354 %add.ptr66.1.26 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2074 355 %i118 = load float, ptr addrspace(4) %add.ptr66.1.26, align 4 356 %add.ptr66.2.26 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3098 357 %i119 = load float, ptr addrspace(4) %add.ptr66.2.26, align 4 358 %add.ptr70.26 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 27 359 %i120 = load float, ptr addrspace(4) %add.ptr70.26, align 4 360 %add.ptr66.27 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1051 361 %i121 = load float, ptr addrspace(4) %add.ptr66.27, align 4 362 %add.ptr66.1.27 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2075 363 %i122 = load float, ptr addrspace(4) %add.ptr66.1.27, align 4 364 %add.ptr66.2.27 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3099 365 %i123 = load float, ptr addrspace(4) %add.ptr66.2.27, align 4 366 %add.ptr70.27 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 28 367 %i124 = load float, ptr addrspace(4) %add.ptr70.27, align 4 368 %add.ptr66.28 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1052 369 %i125 = load float, ptr addrspace(4) %add.ptr66.28, align 4 370 %add.ptr66.1.28 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2076 371 %i126 = load float, ptr addrspace(4) %add.ptr66.1.28, align 4 372 %add.ptr66.2.28 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3100 373 %i127 = load float, ptr addrspace(4) %add.ptr66.2.28, align 4 374 %add.ptr70.28 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 29 375 %i128 = load float, ptr addrspace(4) %add.ptr70.28, align 4 376 %add.ptr66.29 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1053 377 %i129 = load float, ptr addrspace(4) %add.ptr66.29, align 4 378 %add.ptr66.1.29 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2077 379 %i130 = load float, ptr addrspace(4) %add.ptr66.1.29, align 4 380 %add.ptr66.2.29 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3101 381 %i131 = load float, ptr addrspace(4) %add.ptr66.2.29, align 4 382 %add.ptr70.29 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 30 383 %i132 = load float, ptr addrspace(4) %add.ptr70.29, align 4 384 %add.ptr66.30 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1054 385 %i133 = load float, ptr addrspace(4) %add.ptr66.30, align 4 386 %add.ptr66.1.30 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2078 387 %i134 = load float, ptr addrspace(4) %add.ptr66.1.30, align 4 388 %add.ptr66.2.30 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3102 389 %i135 = load float, ptr addrspace(4) %add.ptr66.2.30, align 4 390 %add.ptr70.30 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 31 391 %i136 = load float, ptr addrspace(4) %add.ptr70.30, align 4 392 %add.ptr66.31 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 1055 393 %i137 = load float, ptr addrspace(4) %add.ptr66.31, align 4 394 %add.ptr66.1.31 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 2079 395 %i138 = load float, ptr addrspace(4) %add.ptr66.1.31, align 4 396 %add.ptr66.2.31 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 3103 397 %i139 = load float, ptr addrspace(4) %add.ptr66.2.31, align 4 398 %add.ptr47.3 = getelementptr inbounds float, ptr addrspace(1) %i_ptr.0288, i64 196 399 %i140 = tail call float @llvm.fmuladd.f32(float %i8, float %i12, float %accum.sroa.0.0) 400 %i141 = tail call float @llvm.fmuladd.f32(float %i9, float %i13, float %i140) 401 %i142 = tail call float @llvm.fmuladd.f32(float %i10, float %i14, float %i141) 402 %i143 = tail call float @llvm.fmuladd.f32(float %i11, float %i15, float %i142) 403 %i144 = tail call float @llvm.fmuladd.f32(float %i8, float %i16, float %accum.sroa.6.0) 404 %i145 = tail call float @llvm.fmuladd.f32(float %i9, float %i17, float %i144) 405 %i146 = tail call float @llvm.fmuladd.f32(float %i10, float %i18, float %i145) 406 %i147 = tail call float @llvm.fmuladd.f32(float %i11, float %i19, float %i146) 407 %i148 = tail call float @llvm.fmuladd.f32(float %i8, float %i20, float %accum.sroa.10.0) 408 %i149 = tail call float @llvm.fmuladd.f32(float %i9, float %i21, float %i148) 409 %i150 = tail call float @llvm.fmuladd.f32(float %i10, float %i22, float %i149) 410 %i151 = tail call float @llvm.fmuladd.f32(float %i11, float %i23, float %i150) 411 %i152 = tail call float @llvm.fmuladd.f32(float %i8, float %i24, float %accum.sroa.14.0) 412 %i153 = tail call float @llvm.fmuladd.f32(float %i9, float %i25, float %i152) 413 %i154 = tail call float @llvm.fmuladd.f32(float %i10, float %i26, float %i153) 414 %i155 = tail call float @llvm.fmuladd.f32(float %i11, float %i27, float %i154) 415 %i156 = tail call float @llvm.fmuladd.f32(float %i8, float %i28, float %accum.sroa.18.0) 416 %i157 = tail call float @llvm.fmuladd.f32(float %i9, float %i29, float %i156) 417 %i158 = tail call float @llvm.fmuladd.f32(float %i10, float %i30, float %i157) 418 %i159 = tail call float @llvm.fmuladd.f32(float %i11, float %i31, float %i158) 419 %i160 = tail call float @llvm.fmuladd.f32(float %i8, float %i32, float %accum.sroa.22.0) 420 %i161 = tail call float @llvm.fmuladd.f32(float %i9, float %i33, float %i160) 421 %i162 = tail call float @llvm.fmuladd.f32(float %i10, float %i34, float %i161) 422 %i163 = tail call float @llvm.fmuladd.f32(float %i11, float %i35, float %i162) 423 %i164 = tail call float @llvm.fmuladd.f32(float %i8, float %i36, float %accum.sroa.26.0) 424 %i165 = tail call float @llvm.fmuladd.f32(float %i9, float %i37, float %i164) 425 %i166 = tail call float @llvm.fmuladd.f32(float %i10, float %i38, float %i165) 426 %i167 = tail call float @llvm.fmuladd.f32(float %i11, float %i39, float %i166) 427 %i168 = tail call float @llvm.fmuladd.f32(float %i8, float %i40, float %accum.sroa.30.0) 428 %i169 = tail call float @llvm.fmuladd.f32(float %i9, float %i41, float %i168) 429 %i170 = tail call float @llvm.fmuladd.f32(float %i10, float %i42, float %i169) 430 %i171 = tail call float @llvm.fmuladd.f32(float %i11, float %i43, float %i170) 431 %i172 = tail call float @llvm.fmuladd.f32(float %i8, float %i44, float %accum.sroa.34.0) 432 %i173 = tail call float @llvm.fmuladd.f32(float %i9, float %i45, float %i172) 433 %i174 = tail call float @llvm.fmuladd.f32(float %i10, float %i46, float %i173) 434 %i175 = tail call float @llvm.fmuladd.f32(float %i11, float %i47, float %i174) 435 %i176 = tail call float @llvm.fmuladd.f32(float %i8, float %i48, float %accum.sroa.38.0) 436 %i177 = tail call float @llvm.fmuladd.f32(float %i9, float %i49, float %i176) 437 %i178 = tail call float @llvm.fmuladd.f32(float %i10, float %i50, float %i177) 438 %i179 = tail call float @llvm.fmuladd.f32(float %i11, float %i51, float %i178) 439 %i180 = tail call float @llvm.fmuladd.f32(float %i8, float %i52, float %accum.sroa.42.0) 440 %i181 = tail call float @llvm.fmuladd.f32(float %i9, float %i53, float %i180) 441 %i182 = tail call float @llvm.fmuladd.f32(float %i10, float %i54, float %i181) 442 %i183 = tail call float @llvm.fmuladd.f32(float %i11, float %i55, float %i182) 443 %i184 = tail call float @llvm.fmuladd.f32(float %i8, float %i56, float %accum.sroa.46.0) 444 %i185 = tail call float @llvm.fmuladd.f32(float %i9, float %i57, float %i184) 445 %i186 = tail call float @llvm.fmuladd.f32(float %i10, float %i58, float %i185) 446 %i187 = tail call float @llvm.fmuladd.f32(float %i11, float %i59, float %i186) 447 %i188 = tail call float @llvm.fmuladd.f32(float %i8, float %i60, float %accum.sroa.50.0) 448 %i189 = tail call float @llvm.fmuladd.f32(float %i9, float %i61, float %i188) 449 %i190 = tail call float @llvm.fmuladd.f32(float %i10, float %i62, float %i189) 450 %i191 = tail call float @llvm.fmuladd.f32(float %i11, float %i63, float %i190) 451 %i192 = tail call float @llvm.fmuladd.f32(float %i8, float %i64, float %accum.sroa.54.0) 452 %i193 = tail call float @llvm.fmuladd.f32(float %i9, float %i65, float %i192) 453 %i194 = tail call float @llvm.fmuladd.f32(float %i10, float %i66, float %i193) 454 %i195 = tail call float @llvm.fmuladd.f32(float %i11, float %i67, float %i194) 455 %i196 = tail call float @llvm.fmuladd.f32(float %i8, float %i68, float %accum.sroa.58.0) 456 %i197 = tail call float @llvm.fmuladd.f32(float %i9, float %i69, float %i196) 457 %i198 = tail call float @llvm.fmuladd.f32(float %i10, float %i70, float %i197) 458 %i199 = tail call float @llvm.fmuladd.f32(float %i11, float %i71, float %i198) 459 %i200 = tail call float @llvm.fmuladd.f32(float %i8, float %i72, float %accum.sroa.62.0) 460 %i201 = tail call float @llvm.fmuladd.f32(float %i9, float %i73, float %i200) 461 %i202 = tail call float @llvm.fmuladd.f32(float %i10, float %i74, float %i201) 462 %i203 = tail call float @llvm.fmuladd.f32(float %i11, float %i75, float %i202) 463 %i204 = tail call float @llvm.fmuladd.f32(float %i8, float %i76, float %accum.sroa.66.0) 464 %i205 = tail call float @llvm.fmuladd.f32(float %i9, float %i77, float %i204) 465 %i206 = tail call float @llvm.fmuladd.f32(float %i10, float %i78, float %i205) 466 %i207 = tail call float @llvm.fmuladd.f32(float %i11, float %i79, float %i206) 467 %i208 = tail call float @llvm.fmuladd.f32(float %i8, float %i80, float %accum.sroa.70.0) 468 %i209 = tail call float @llvm.fmuladd.f32(float %i9, float %i81, float %i208) 469 %i210 = tail call float @llvm.fmuladd.f32(float %i10, float %i82, float %i209) 470 %i211 = tail call float @llvm.fmuladd.f32(float %i11, float %i83, float %i210) 471 %i212 = tail call float @llvm.fmuladd.f32(float %i8, float %i84, float %accum.sroa.74.0) 472 %i213 = tail call float @llvm.fmuladd.f32(float %i9, float %i85, float %i212) 473 %i214 = tail call float @llvm.fmuladd.f32(float %i10, float %i86, float %i213) 474 %i215 = tail call float @llvm.fmuladd.f32(float %i11, float %i87, float %i214) 475 %i216 = tail call float @llvm.fmuladd.f32(float %i8, float %i88, float %accum.sroa.78.0) 476 %i217 = tail call float @llvm.fmuladd.f32(float %i9, float %i89, float %i216) 477 %i218 = tail call float @llvm.fmuladd.f32(float %i10, float %i90, float %i217) 478 %i219 = tail call float @llvm.fmuladd.f32(float %i11, float %i91, float %i218) 479 %i220 = tail call float @llvm.fmuladd.f32(float %i8, float %i92, float %accum.sroa.82.0) 480 %i221 = tail call float @llvm.fmuladd.f32(float %i9, float %i93, float %i220) 481 %i222 = tail call float @llvm.fmuladd.f32(float %i10, float %i94, float %i221) 482 %i223 = tail call float @llvm.fmuladd.f32(float %i11, float %i95, float %i222) 483 %i224 = tail call float @llvm.fmuladd.f32(float %i8, float %i96, float %accum.sroa.86.0) 484 %i225 = tail call float @llvm.fmuladd.f32(float %i9, float %i97, float %i224) 485 %i226 = tail call float @llvm.fmuladd.f32(float %i10, float %i98, float %i225) 486 %i227 = tail call float @llvm.fmuladd.f32(float %i11, float %i99, float %i226) 487 %i228 = tail call float @llvm.fmuladd.f32(float %i8, float %i100, float %accum.sroa.90.0) 488 %i229 = tail call float @llvm.fmuladd.f32(float %i9, float %i101, float %i228) 489 %i230 = tail call float @llvm.fmuladd.f32(float %i10, float %i102, float %i229) 490 %i231 = tail call float @llvm.fmuladd.f32(float %i11, float %i103, float %i230) 491 %i232 = tail call float @llvm.fmuladd.f32(float %i8, float %i104, float %accum.sroa.94.0) 492 %i233 = tail call float @llvm.fmuladd.f32(float %i9, float %i105, float %i232) 493 %i234 = tail call float @llvm.fmuladd.f32(float %i10, float %i106, float %i233) 494 %i235 = tail call float @llvm.fmuladd.f32(float %i11, float %i107, float %i234) 495 %i236 = tail call float @llvm.fmuladd.f32(float %i8, float %i108, float %accum.sroa.98.0) 496 %i237 = tail call float @llvm.fmuladd.f32(float %i9, float %i109, float %i236) 497 %i238 = tail call float @llvm.fmuladd.f32(float %i10, float %i110, float %i237) 498 %i239 = tail call float @llvm.fmuladd.f32(float %i11, float %i111, float %i238) 499 %i240 = tail call float @llvm.fmuladd.f32(float %i8, float %i112, float %accum.sroa.102.0) 500 %i241 = tail call float @llvm.fmuladd.f32(float %i9, float %i113, float %i240) 501 %i242 = tail call float @llvm.fmuladd.f32(float %i10, float %i114, float %i241) 502 %i243 = tail call float @llvm.fmuladd.f32(float %i11, float %i115, float %i242) 503 %i244 = tail call float @llvm.fmuladd.f32(float %i8, float %i116, float %accum.sroa.106.0) 504 %i245 = tail call float @llvm.fmuladd.f32(float %i9, float %i117, float %i244) 505 %i246 = tail call float @llvm.fmuladd.f32(float %i10, float %i118, float %i245) 506 %i247 = tail call float @llvm.fmuladd.f32(float %i11, float %i119, float %i246) 507 %i248 = tail call float @llvm.fmuladd.f32(float %i8, float %i120, float %accum.sroa.110.0) 508 %i249 = tail call float @llvm.fmuladd.f32(float %i9, float %i121, float %i248) 509 %i250 = tail call float @llvm.fmuladd.f32(float %i10, float %i122, float %i249) 510 %i251 = tail call float @llvm.fmuladd.f32(float %i11, float %i123, float %i250) 511 %i252 = tail call float @llvm.fmuladd.f32(float %i8, float %i124, float %accum.sroa.114.0) 512 %i253 = tail call float @llvm.fmuladd.f32(float %i9, float %i125, float %i252) 513 %i254 = tail call float @llvm.fmuladd.f32(float %i10, float %i126, float %i253) 514 %i255 = tail call float @llvm.fmuladd.f32(float %i11, float %i127, float %i254) 515 %i256 = tail call float @llvm.fmuladd.f32(float %i8, float %i128, float %accum.sroa.118.0) 516 %i257 = tail call float @llvm.fmuladd.f32(float %i9, float %i129, float %i256) 517 %i258 = tail call float @llvm.fmuladd.f32(float %i10, float %i130, float %i257) 518 %i259 = tail call float @llvm.fmuladd.f32(float %i11, float %i131, float %i258) 519 %i260 = tail call float @llvm.fmuladd.f32(float %i8, float %i132, float %accum.sroa.122.0) 520 %i261 = tail call float @llvm.fmuladd.f32(float %i9, float %i133, float %i260) 521 %i262 = tail call float @llvm.fmuladd.f32(float %i10, float %i134, float %i261) 522 %i263 = tail call float @llvm.fmuladd.f32(float %i11, float %i135, float %i262) 523 %i264 = tail call float @llvm.fmuladd.f32(float %i8, float %i136, float %accum.sroa.126.0) 524 %i265 = tail call float @llvm.fmuladd.f32(float %i9, float %i137, float %i264) 525 %i266 = tail call float @llvm.fmuladd.f32(float %i10, float %i138, float %i265) 526 %i267 = tail call float @llvm.fmuladd.f32(float %i11, float %i139, float %i266) 527 %add.ptr74 = getelementptr inbounds float, ptr addrspace(4) %w_ptr.0287, i64 4096 528 %inc116 = add nuw nsw i32 %ci.0286, 1 529 %exitcond.not = icmp eq i32 %inc116, 512 530 br i1 %exitcond.not, label %for.cond.cleanup26, label %for.cond28.preheader 531 532for.cond.cleanup26: ; preds = %for.cond28.preheader 533 %mul119 = shl nuw nsw i32 undef, 1 534 %mul120 = mul i32 %div, 200704 535 %mul121 = mul i32 undef, 6272 536 %add122 = add i32 %mul120, %mul121 537 %mul123 = mul nuw nsw i32 undef, 28 538 %add124 = add i32 %add122, %mul123 539 %add126 = add i32 %add124, %mul119 540 %idx.ext127 = zext i32 %add126 to i64 541 %add.ptr128 = getelementptr inbounds float, ptr addrspace(1) %out_ptr, i64 %idx.ext127 542 store float %i143, ptr addrspace(1) %add.ptr128, align 4 543 %add.ptr184 = getelementptr inbounds float, ptr addrspace(1) %add.ptr128, i64 196 544 store float %i147, ptr addrspace(1) %add.ptr184, align 4 545 %add.ptr167.1 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184, i64 14 546 store float 0.000000e+00, ptr addrspace(1) %add.ptr167.1, align 4 547 %add.ptr175.1.1 = getelementptr inbounds float, ptr addrspace(1) %add.ptr167.1, i64 1 548 store float 0.000000e+00, ptr addrspace(1) %add.ptr175.1.1, align 4 549 %add.ptr184.1 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184, i64 196 550 store float %i151, ptr addrspace(1) %add.ptr184.1, align 4 551 %add.ptr184.2 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.1, i64 196 552 store float %i155, ptr addrspace(1) %add.ptr184.2, align 4 553 %add.ptr184.3 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.2, i64 196 554 store float %i159, ptr addrspace(1) %add.ptr184.3, align 4 555 %add.ptr184.4 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.3, i64 196 556 store float %i163, ptr addrspace(1) %add.ptr184.4, align 4 557 %add.ptr154.5 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.4, i64 1 558 store float 0.000000e+00, ptr addrspace(1) %add.ptr154.5, align 4 559 %add.ptr184.5 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.4, i64 196 560 store float %i167, ptr addrspace(1) %add.ptr184.5, align 4 561 %add.ptr154.6 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.5, i64 1 562 store float 0.000000e+00, ptr addrspace(1) %add.ptr154.6, align 4 563 %add.ptr184.6 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.5, i64 196 564 store float %i171, ptr addrspace(1) %add.ptr184.6, align 4 565 %add.ptr184.7 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.6, i64 196 566 store float %i175, ptr addrspace(1) %add.ptr184.7, align 4 567 %add.ptr167.8 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.7, i64 14 568 store float 0.000000e+00, ptr addrspace(1) %add.ptr167.8, align 4 569 %add.ptr175.1.8 = getelementptr inbounds float, ptr addrspace(1) %add.ptr167.8, i64 1 570 store float 0.000000e+00, ptr addrspace(1) %add.ptr175.1.8, align 4 571 %add.ptr184.8 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.7, i64 196 572 store float %i179, ptr addrspace(1) %add.ptr184.8, align 4 573 %add.ptr184.9 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.8, i64 196 574 store float %i183, ptr addrspace(1) %add.ptr184.9, align 4 575 %add.ptr184.10 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.9, i64 196 576 store float %i187, ptr addrspace(1) %add.ptr184.10, align 4 577 %add.ptr184.11 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.10, i64 196 578 store float %i191, ptr addrspace(1) %add.ptr184.11, align 4 579 %add.ptr184.12 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.11, i64 196 580 store float %i195, ptr addrspace(1) %add.ptr184.12, align 4 581 %add.ptr184.13 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.12, i64 196 582 store float %i199, ptr addrspace(1) %add.ptr184.13, align 4 583 %add.ptr184.14 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.13, i64 196 584 store float %i203, ptr addrspace(1) %add.ptr184.14, align 4 585 %add.ptr184.15 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.14, i64 196 586 store float %i207, ptr addrspace(1) %add.ptr184.15, align 4 587 %add.ptr184.16 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.15, i64 196 588 store float %i211, ptr addrspace(1) %add.ptr184.16, align 4 589 %add.ptr184.17 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.16, i64 196 590 store float %i215, ptr addrspace(1) %add.ptr184.17, align 4 591 %add.ptr184.18 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.17, i64 196 592 store float %i219, ptr addrspace(1) %add.ptr184.18, align 4 593 %add.ptr184.19 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.18, i64 196 594 store float %i223, ptr addrspace(1) %add.ptr184.19, align 4 595 %add.ptr184.20 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.19, i64 196 596 store float %i227, ptr addrspace(1) %add.ptr184.20, align 4 597 %add.ptr184.21 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.20, i64 196 598 store float %i231, ptr addrspace(1) %add.ptr184.21, align 4 599 %add.ptr184.22 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.21, i64 196 600 store float %i235, ptr addrspace(1) %add.ptr184.22, align 4 601 %add.ptr184.23 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.22, i64 196 602 store float %i239, ptr addrspace(1) %add.ptr184.23, align 4 603 %add.ptr184.24 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.23, i64 196 604 store float %i243, ptr addrspace(1) %add.ptr184.24, align 4 605 %add.ptr184.25 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.24, i64 196 606 store float %i247, ptr addrspace(1) %add.ptr184.25, align 4 607 %add.ptr184.26 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.25, i64 196 608 store float %i251, ptr addrspace(1) %add.ptr184.26, align 4 609 %add.ptr184.27 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.26, i64 196 610 store float %i255, ptr addrspace(1) %add.ptr184.27, align 4 611 %add.ptr184.28 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.27, i64 196 612 store float %i259, ptr addrspace(1) %add.ptr184.28, align 4 613 %add.ptr184.29 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.28, i64 196 614 store float %i263, ptr addrspace(1) %add.ptr184.29, align 4 615 %add.ptr184.30 = getelementptr inbounds float, ptr addrspace(1) %add.ptr184.29, i64 196 616 store float %i267, ptr addrspace(1) %add.ptr184.30, align 4 617 ret void 618} 619 620 621 622declare float @llvm.fmuladd.f32(float, float, float) #2 623declare i32 @llvm.amdgcn.workitem.id.x() #3 624declare i32 @llvm.amdgcn.workgroup.id.x() #3 625declare align 4 ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr() #3 626 627!0 = !{i32 1, i32 2, i32 1, i32 0} 628!1 = !{!"none", !"none", !"none", !"none"} 629!2 = !{!"ptr", !"ptr", !"ptr", !"float"} 630!3 = !{!"restrict const", !"restrict const", !"restrict", !""} 631!4 = !{i32 256, i32 1, i32 1} 632!5 = !{i32 0, i32 1024} 633 634attributes #0 = { nounwind "amdgpu-waves-per-eu"="1,1" "amdgpu-flat-work-group-size"="1,1" } 635attributes #1 = { nounwind "amdgpu-num-vgpr"="64" } 636attributes #2 = { nofree nosync nounwind readnone speculatable willreturn } 637attributes #3 = { nounwind readnone speculatable willreturn } 638