1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 2; RUN: opt < %s -S -passes=slp-vectorizer -mtriple=aarch64-unknown-linux -mattr=-fullfp16 | FileCheck %s --check-prefixes=CHECK,NOFP16 3; RUN: opt < %s -S -passes=slp-vectorizer -mtriple=aarch64-unknown-linux -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,FULLFP16 4 5define half @reduce_fast_half2(<2 x half> %vec2) { 6; CHECK-LABEL: define half @reduce_fast_half2( 7; CHECK-SAME: <2 x half> [[VEC2:%.*]]) #[[ATTR0:[0-9]+]] { 8; CHECK-NEXT: [[ENTRY:.*:]] 9; CHECK-NEXT: [[ELT0:%.*]] = extractelement <2 x half> [[VEC2]], i64 0 10; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x half> [[VEC2]], i64 1 11; CHECK-NEXT: [[ADD1:%.*]] = fadd fast half [[ELT1]], [[ELT0]] 12; CHECK-NEXT: ret half [[ADD1]] 13; 14entry: 15 %elt0 = extractelement <2 x half> %vec2, i64 0 16 %elt1 = extractelement <2 x half> %vec2, i64 1 17 %add1 = fadd fast half %elt1, %elt0 18 ret half %add1 19} 20 21define half @reduce_half2(<2 x half> %vec2) { 22; CHECK-LABEL: define half @reduce_half2( 23; CHECK-SAME: <2 x half> [[VEC2:%.*]]) #[[ATTR0]] { 24; CHECK-NEXT: [[ENTRY:.*:]] 25; CHECK-NEXT: [[ELT0:%.*]] = extractelement <2 x half> [[VEC2]], i64 0 26; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x half> [[VEC2]], i64 1 27; CHECK-NEXT: [[ADD1:%.*]] = fadd half [[ELT1]], [[ELT0]] 28; CHECK-NEXT: ret half [[ADD1]] 29; 30entry: 31 %elt0 = extractelement <2 x half> %vec2, i64 0 32 %elt1 = extractelement <2 x half> %vec2, i64 1 33 %add1 = fadd half %elt1, %elt0 34 ret half %add1 35} 36 37define half @reduce_fast_half4(<4 x half> %vec4) { 38; CHECK-LABEL: define half @reduce_fast_half4( 39; CHECK-SAME: <4 x half> [[VEC4:%.*]]) #[[ATTR0]] { 40; CHECK-NEXT: [[ENTRY:.*:]] 41; CHECK-NEXT: [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> [[VEC4]]) 42; CHECK-NEXT: ret half [[TMP0]] 43; 44entry: 45 %elt0 = extractelement <4 x half> %vec4, i64 0 46 %elt1 = extractelement <4 x half> %vec4, i64 1 47 %elt2 = extractelement <4 x half> %vec4, i64 2 48 %elt3 = extractelement <4 x half> %vec4, i64 3 49 %add1 = fadd fast half %elt1, %elt0 50 %add2 = fadd fast half %elt2, %add1 51 %add3 = fadd fast half %elt3, %add2 52 ret half %add3 53} 54 55define half @reduce_half4(<4 x half> %vec4) { 56; CHECK-LABEL: define half @reduce_half4( 57; CHECK-SAME: <4 x half> [[VEC4:%.*]]) #[[ATTR0]] { 58; CHECK-NEXT: [[ENTRY:.*:]] 59; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x half> [[VEC4]], i64 0 60; CHECK-NEXT: [[ELT1:%.*]] = extractelement <4 x half> [[VEC4]], i64 1 61; CHECK-NEXT: [[ELT2:%.*]] = extractelement <4 x half> [[VEC4]], i64 2 62; CHECK-NEXT: [[ELT3:%.*]] = extractelement <4 x half> [[VEC4]], i64 3 63; CHECK-NEXT: [[ADD1:%.*]] = fadd half [[ELT1]], [[ELT0]] 64; CHECK-NEXT: [[ADD2:%.*]] = fadd half [[ELT2]], [[ADD1]] 65; CHECK-NEXT: [[ADD3:%.*]] = fadd half [[ELT3]], [[ADD2]] 66; CHECK-NEXT: ret half [[ADD3]] 67; 68entry: 69 %elt0 = extractelement <4 x half> %vec4, i64 0 70 %elt1 = extractelement <4 x half> %vec4, i64 1 71 %elt2 = extractelement <4 x half> %vec4, i64 2 72 %elt3 = extractelement <4 x half> %vec4, i64 3 73 %add1 = fadd half %elt1, %elt0 74 %add2 = fadd half %elt2, %add1 75 %add3 = fadd half %elt3, %add2 76 ret half %add3 77} 78 79define half @reduce_fast_half8(<8 x half> %vec8) { 80; NOFP16-LABEL: define half @reduce_fast_half8( 81; NOFP16-SAME: <8 x half> [[VEC8:%.*]]) #[[ATTR0]] { 82; NOFP16-NEXT: [[ENTRY:.*:]] 83; NOFP16-NEXT: [[TMP0:%.*]] = shufflevector <8 x half> [[VEC8]], <8 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 84; NOFP16-NEXT: [[TMP1:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> [[TMP0]]) 85; NOFP16-NEXT: [[TMP2:%.*]] = shufflevector <8 x half> [[VEC8]], <8 x half> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7> 86; NOFP16-NEXT: [[TMP3:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> [[TMP2]]) 87; NOFP16-NEXT: [[OP_RDX3:%.*]] = fadd fast half [[TMP1]], [[TMP3]] 88; NOFP16-NEXT: ret half [[OP_RDX3]] 89; 90; FULLFP16-LABEL: define half @reduce_fast_half8( 91; FULLFP16-SAME: <8 x half> [[VEC8:%.*]]) #[[ATTR0]] { 92; FULLFP16-NEXT: [[ENTRY:.*:]] 93; FULLFP16-NEXT: [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> [[VEC8]]) 94; FULLFP16-NEXT: ret half [[TMP0]] 95; 96entry: 97 %elt0 = extractelement <8 x half> %vec8, i64 0 98 %elt1 = extractelement <8 x half> %vec8, i64 1 99 %elt2 = extractelement <8 x half> %vec8, i64 2 100 %elt3 = extractelement <8 x half> %vec8, i64 3 101 %elt4 = extractelement <8 x half> %vec8, i64 4 102 %elt5 = extractelement <8 x half> %vec8, i64 5 103 %elt6 = extractelement <8 x half> %vec8, i64 6 104 %elt7 = extractelement <8 x half> %vec8, i64 7 105 %add1 = fadd fast half %elt1, %elt0 106 %add2 = fadd fast half %elt2, %add1 107 %add3 = fadd fast half %elt3, %add2 108 %add4 = fadd fast half %elt4, %add3 109 %add5 = fadd fast half %elt5, %add4 110 %add6 = fadd fast half %elt6, %add5 111 %add7 = fadd fast half %elt7, %add6 112 ret half %add7 113} 114 115define half @reduce_half8(<8 x half> %vec8) { 116; CHECK-LABEL: define half @reduce_half8( 117; CHECK-SAME: <8 x half> [[VEC8:%.*]]) #[[ATTR0]] { 118; CHECK-NEXT: [[ENTRY:.*:]] 119; CHECK-NEXT: [[ELT0:%.*]] = extractelement <8 x half> [[VEC8]], i64 0 120; CHECK-NEXT: [[ELT1:%.*]] = extractelement <8 x half> [[VEC8]], i64 1 121; CHECK-NEXT: [[ELT2:%.*]] = extractelement <8 x half> [[VEC8]], i64 2 122; CHECK-NEXT: [[ELT3:%.*]] = extractelement <8 x half> [[VEC8]], i64 3 123; CHECK-NEXT: [[ELT4:%.*]] = extractelement <8 x half> [[VEC8]], i64 4 124; CHECK-NEXT: [[ELT5:%.*]] = extractelement <8 x half> [[VEC8]], i64 5 125; CHECK-NEXT: [[ELT6:%.*]] = extractelement <8 x half> [[VEC8]], i64 6 126; CHECK-NEXT: [[ELT7:%.*]] = extractelement <8 x half> [[VEC8]], i64 7 127; CHECK-NEXT: [[ADD1:%.*]] = fadd half [[ELT1]], [[ELT0]] 128; CHECK-NEXT: [[ADD2:%.*]] = fadd half [[ELT2]], [[ADD1]] 129; CHECK-NEXT: [[ADD3:%.*]] = fadd half [[ELT3]], [[ADD2]] 130; CHECK-NEXT: [[ADD4:%.*]] = fadd half [[ELT4]], [[ADD3]] 131; CHECK-NEXT: [[ADD5:%.*]] = fadd half [[ELT5]], [[ADD4]] 132; CHECK-NEXT: [[ADD6:%.*]] = fadd half [[ELT6]], [[ADD5]] 133; CHECK-NEXT: [[ADD7:%.*]] = fadd half [[ELT7]], [[ADD6]] 134; CHECK-NEXT: ret half [[ADD7]] 135; 136entry: 137 %elt0 = extractelement <8 x half> %vec8, i64 0 138 %elt1 = extractelement <8 x half> %vec8, i64 1 139 %elt2 = extractelement <8 x half> %vec8, i64 2 140 %elt3 = extractelement <8 x half> %vec8, i64 3 141 %elt4 = extractelement <8 x half> %vec8, i64 4 142 %elt5 = extractelement <8 x half> %vec8, i64 5 143 %elt6 = extractelement <8 x half> %vec8, i64 6 144 %elt7 = extractelement <8 x half> %vec8, i64 7 145 %add1 = fadd half %elt1, %elt0 146 %add2 = fadd half %elt2, %add1 147 %add3 = fadd half %elt3, %add2 148 %add4 = fadd half %elt4, %add3 149 %add5 = fadd half %elt5, %add4 150 %add6 = fadd half %elt6, %add5 151 %add7 = fadd half %elt7, %add6 152 ret half %add7 153} 154 155define half @reduce_fast_half16(<16 x half> %vec16) { 156; CHECK-LABEL: define half @reduce_fast_half16( 157; CHECK-SAME: <16 x half> [[VEC16:%.*]]) #[[ATTR0]] { 158; CHECK-NEXT: [[ENTRY:.*:]] 159; CHECK-NEXT: [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> [[VEC16]]) 160; CHECK-NEXT: ret half [[TMP0]] 161; 162entry: 163 %elt0 = extractelement <16 x half> %vec16, i64 0 164 %elt1 = extractelement <16 x half> %vec16, i64 1 165 %elt2 = extractelement <16 x half> %vec16, i64 2 166 %elt3 = extractelement <16 x half> %vec16, i64 3 167 %elt4 = extractelement <16 x half> %vec16, i64 4 168 %elt5 = extractelement <16 x half> %vec16, i64 5 169 %elt6 = extractelement <16 x half> %vec16, i64 6 170 %elt7 = extractelement <16 x half> %vec16, i64 7 171 %elt8 = extractelement <16 x half> %vec16, i64 8 172 %elt9 = extractelement <16 x half> %vec16, i64 9 173 %elt10 = extractelement <16 x half> %vec16, i64 10 174 %elt11 = extractelement <16 x half> %vec16, i64 11 175 %elt12 = extractelement <16 x half> %vec16, i64 12 176 %elt13 = extractelement <16 x half> %vec16, i64 13 177 %elt14 = extractelement <16 x half> %vec16, i64 14 178 %elt15 = extractelement <16 x half> %vec16, i64 15 179 %add1 = fadd fast half %elt1, %elt0 180 %add2 = fadd fast half %elt2, %add1 181 %add3 = fadd fast half %elt3, %add2 182 %add4 = fadd fast half %elt4, %add3 183 %add5 = fadd fast half %elt5, %add4 184 %add6 = fadd fast half %elt6, %add5 185 %add7 = fadd fast half %elt7, %add6 186 %add8 = fadd fast half %elt8, %add7 187 %add9 = fadd fast half %elt9, %add8 188 %add10 = fadd fast half %elt10, %add9 189 %add11 = fadd fast half %elt11, %add10 190 %add12 = fadd fast half %elt12, %add11 191 %add13 = fadd fast half %elt13, %add12 192 %add14 = fadd fast half %elt14, %add13 193 %add15 = fadd fast half %elt15, %add14 194 ret half %add15 195} 196 197define half @reduce_half16(<16 x half> %vec16) { 198; CHECK-LABEL: define half @reduce_half16( 199; CHECK-SAME: <16 x half> [[VEC16:%.*]]) #[[ATTR0]] { 200; CHECK-NEXT: [[ENTRY:.*:]] 201; CHECK-NEXT: [[ELT0:%.*]] = extractelement <16 x half> [[VEC16]], i64 0 202; CHECK-NEXT: [[ELT1:%.*]] = extractelement <16 x half> [[VEC16]], i64 1 203; CHECK-NEXT: [[ELT2:%.*]] = extractelement <16 x half> [[VEC16]], i64 2 204; CHECK-NEXT: [[ELT3:%.*]] = extractelement <16 x half> [[VEC16]], i64 3 205; CHECK-NEXT: [[ELT4:%.*]] = extractelement <16 x half> [[VEC16]], i64 4 206; CHECK-NEXT: [[ELT5:%.*]] = extractelement <16 x half> [[VEC16]], i64 5 207; CHECK-NEXT: [[ELT6:%.*]] = extractelement <16 x half> [[VEC16]], i64 6 208; CHECK-NEXT: [[ELT7:%.*]] = extractelement <16 x half> [[VEC16]], i64 7 209; CHECK-NEXT: [[ELT8:%.*]] = extractelement <16 x half> [[VEC16]], i64 8 210; CHECK-NEXT: [[ELT9:%.*]] = extractelement <16 x half> [[VEC16]], i64 9 211; CHECK-NEXT: [[ELT10:%.*]] = extractelement <16 x half> [[VEC16]], i64 10 212; CHECK-NEXT: [[ELT11:%.*]] = extractelement <16 x half> [[VEC16]], i64 11 213; CHECK-NEXT: [[ELT12:%.*]] = extractelement <16 x half> [[VEC16]], i64 12 214; CHECK-NEXT: [[ELT13:%.*]] = extractelement <16 x half> [[VEC16]], i64 13 215; CHECK-NEXT: [[ELT14:%.*]] = extractelement <16 x half> [[VEC16]], i64 14 216; CHECK-NEXT: [[ELT15:%.*]] = extractelement <16 x half> [[VEC16]], i64 15 217; CHECK-NEXT: [[ADD1:%.*]] = fadd half [[ELT1]], [[ELT0]] 218; CHECK-NEXT: [[ADD2:%.*]] = fadd half [[ELT2]], [[ADD1]] 219; CHECK-NEXT: [[ADD3:%.*]] = fadd half [[ELT3]], [[ADD2]] 220; CHECK-NEXT: [[ADD4:%.*]] = fadd half [[ELT4]], [[ADD3]] 221; CHECK-NEXT: [[ADD5:%.*]] = fadd half [[ELT5]], [[ADD4]] 222; CHECK-NEXT: [[ADD6:%.*]] = fadd half [[ELT6]], [[ADD5]] 223; CHECK-NEXT: [[ADD7:%.*]] = fadd half [[ELT7]], [[ADD6]] 224; CHECK-NEXT: [[ADD8:%.*]] = fadd half [[ELT8]], [[ADD7]] 225; CHECK-NEXT: [[ADD9:%.*]] = fadd half [[ELT9]], [[ADD8]] 226; CHECK-NEXT: [[ADD10:%.*]] = fadd half [[ELT10]], [[ADD9]] 227; CHECK-NEXT: [[ADD11:%.*]] = fadd half [[ELT11]], [[ADD10]] 228; CHECK-NEXT: [[ADD12:%.*]] = fadd half [[ELT12]], [[ADD11]] 229; CHECK-NEXT: [[ADD13:%.*]] = fadd half [[ELT13]], [[ADD12]] 230; CHECK-NEXT: [[ADD14:%.*]] = fadd half [[ELT14]], [[ADD13]] 231; CHECK-NEXT: [[ADD15:%.*]] = fadd half [[ELT15]], [[ADD14]] 232; CHECK-NEXT: ret half [[ADD15]] 233; 234entry: 235 %elt0 = extractelement <16 x half> %vec16, i64 0 236 %elt1 = extractelement <16 x half> %vec16, i64 1 237 %elt2 = extractelement <16 x half> %vec16, i64 2 238 %elt3 = extractelement <16 x half> %vec16, i64 3 239 %elt4 = extractelement <16 x half> %vec16, i64 4 240 %elt5 = extractelement <16 x half> %vec16, i64 5 241 %elt6 = extractelement <16 x half> %vec16, i64 6 242 %elt7 = extractelement <16 x half> %vec16, i64 7 243 %elt8 = extractelement <16 x half> %vec16, i64 8 244 %elt9 = extractelement <16 x half> %vec16, i64 9 245 %elt10 = extractelement <16 x half> %vec16, i64 10 246 %elt11 = extractelement <16 x half> %vec16, i64 11 247 %elt12 = extractelement <16 x half> %vec16, i64 12 248 %elt13 = extractelement <16 x half> %vec16, i64 13 249 %elt14 = extractelement <16 x half> %vec16, i64 14 250 %elt15 = extractelement <16 x half> %vec16, i64 15 251 %add1 = fadd half %elt1, %elt0 252 %add2 = fadd half %elt2, %add1 253 %add3 = fadd half %elt3, %add2 254 %add4 = fadd half %elt4, %add3 255 %add5 = fadd half %elt5, %add4 256 %add6 = fadd half %elt6, %add5 257 %add7 = fadd half %elt7, %add6 258 %add8 = fadd half %elt8, %add7 259 %add9 = fadd half %elt9, %add8 260 %add10 = fadd half %elt10, %add9 261 %add11 = fadd half %elt11, %add10 262 %add12 = fadd half %elt12, %add11 263 %add13 = fadd half %elt13, %add12 264 %add14 = fadd half %elt14, %add13 265 %add15 = fadd half %elt15, %add14 266 ret half %add15 267} 268 269define float @reduce_fast_float2(<2 x float> %vec2) { 270; CHECK-LABEL: define float @reduce_fast_float2( 271; CHECK-SAME: <2 x float> [[VEC2:%.*]]) #[[ATTR0]] { 272; CHECK-NEXT: [[ENTRY:.*:]] 273; CHECK-NEXT: [[ELT0:%.*]] = extractelement <2 x float> [[VEC2]], i64 0 274; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[VEC2]], i64 1 275; CHECK-NEXT: [[ADD1:%.*]] = fadd fast float [[ELT1]], [[ELT0]] 276; CHECK-NEXT: ret float [[ADD1]] 277; 278entry: 279 %elt0 = extractelement <2 x float> %vec2, i64 0 280 %elt1 = extractelement <2 x float> %vec2, i64 1 281 %add1 = fadd fast float %elt1, %elt0 282 ret float %add1 283} 284 285define float @reduce_float2(<2 x float> %vec2) { 286; CHECK-LABEL: define float @reduce_float2( 287; CHECK-SAME: <2 x float> [[VEC2:%.*]]) #[[ATTR0]] { 288; CHECK-NEXT: [[ENTRY:.*:]] 289; CHECK-NEXT: [[ELT0:%.*]] = extractelement <2 x float> [[VEC2]], i64 0 290; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x float> [[VEC2]], i64 1 291; CHECK-NEXT: [[ADD1:%.*]] = fadd float [[ELT1]], [[ELT0]] 292; CHECK-NEXT: ret float [[ADD1]] 293; 294entry: 295 %elt0 = extractelement <2 x float> %vec2, i64 0 296 %elt1 = extractelement <2 x float> %vec2, i64 1 297 %add1 = fadd float %elt1, %elt0 298 ret float %add1 299} 300 301define float @reduce_fast_float4(<4 x float> %vec4) { 302; CHECK-LABEL: define float @reduce_fast_float4( 303; CHECK-SAME: <4 x float> [[VEC4:%.*]]) #[[ATTR0]] { 304; CHECK-NEXT: [[ENTRY:.*:]] 305; CHECK-NEXT: [[TMP0:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[VEC4]]) 306; CHECK-NEXT: ret float [[TMP0]] 307; 308entry: 309 %elt0 = extractelement <4 x float> %vec4, i64 0 310 %elt1 = extractelement <4 x float> %vec4, i64 1 311 %elt2 = extractelement <4 x float> %vec4, i64 2 312 %elt3 = extractelement <4 x float> %vec4, i64 3 313 %add1 = fadd fast float %elt1, %elt0 314 %add2 = fadd fast float %elt2, %add1 315 %add3 = fadd fast float %elt3, %add2 316 ret float %add3 317} 318 319define float @reduce_float4(<4 x float> %vec4) { 320; CHECK-LABEL: define float @reduce_float4( 321; CHECK-SAME: <4 x float> [[VEC4:%.*]]) #[[ATTR0]] { 322; CHECK-NEXT: [[ENTRY:.*:]] 323; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[VEC4]], i64 0 324; CHECK-NEXT: [[ELT1:%.*]] = extractelement <4 x float> [[VEC4]], i64 1 325; CHECK-NEXT: [[ELT2:%.*]] = extractelement <4 x float> [[VEC4]], i64 2 326; CHECK-NEXT: [[ELT3:%.*]] = extractelement <4 x float> [[VEC4]], i64 3 327; CHECK-NEXT: [[ADD1:%.*]] = fadd float [[ELT1]], [[ELT0]] 328; CHECK-NEXT: [[ADD2:%.*]] = fadd float [[ELT2]], [[ADD1]] 329; CHECK-NEXT: [[ADD3:%.*]] = fadd float [[ELT3]], [[ADD2]] 330; CHECK-NEXT: ret float [[ADD3]] 331; 332entry: 333 %elt0 = extractelement <4 x float> %vec4, i64 0 334 %elt1 = extractelement <4 x float> %vec4, i64 1 335 %elt2 = extractelement <4 x float> %vec4, i64 2 336 %elt3 = extractelement <4 x float> %vec4, i64 3 337 %add1 = fadd float %elt1, %elt0 338 %add2 = fadd float %elt2, %add1 339 %add3 = fadd float %elt3, %add2 340 ret float %add3 341} 342 343define float @reduce_fast_float8(<8 x float> %vec8) { 344; CHECK-LABEL: define float @reduce_fast_float8( 345; CHECK-SAME: <8 x float> [[VEC8:%.*]]) #[[ATTR0]] { 346; CHECK-NEXT: [[ENTRY:.*:]] 347; CHECK-NEXT: [[TMP0:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[VEC8]]) 348; CHECK-NEXT: ret float [[TMP0]] 349; 350entry: 351 %elt0 = extractelement <8 x float> %vec8, i64 0 352 %elt1 = extractelement <8 x float> %vec8, i64 1 353 %elt2 = extractelement <8 x float> %vec8, i64 2 354 %elt3 = extractelement <8 x float> %vec8, i64 3 355 %elt4 = extractelement <8 x float> %vec8, i64 4 356 %elt5 = extractelement <8 x float> %vec8, i64 5 357 %elt6 = extractelement <8 x float> %vec8, i64 6 358 %elt7 = extractelement <8 x float> %vec8, i64 7 359 %add1 = fadd fast float %elt1, %elt0 360 %add2 = fadd fast float %elt2, %add1 361 %add3 = fadd fast float %elt3, %add2 362 %add4 = fadd fast float %elt4, %add3 363 %add5 = fadd fast float %elt5, %add4 364 %add6 = fadd fast float %elt6, %add5 365 %add7 = fadd fast float %elt7, %add6 366 ret float %add7 367} 368 369define float @reduce_float8(<8 x float> %vec8) { 370; CHECK-LABEL: define float @reduce_float8( 371; CHECK-SAME: <8 x float> [[VEC8:%.*]]) #[[ATTR0]] { 372; CHECK-NEXT: [[ENTRY:.*:]] 373; CHECK-NEXT: [[ELT0:%.*]] = extractelement <8 x float> [[VEC8]], i64 0 374; CHECK-NEXT: [[ELT1:%.*]] = extractelement <8 x float> [[VEC8]], i64 1 375; CHECK-NEXT: [[ELT2:%.*]] = extractelement <8 x float> [[VEC8]], i64 2 376; CHECK-NEXT: [[ELT3:%.*]] = extractelement <8 x float> [[VEC8]], i64 3 377; CHECK-NEXT: [[ELT4:%.*]] = extractelement <8 x float> [[VEC8]], i64 4 378; CHECK-NEXT: [[ELT5:%.*]] = extractelement <8 x float> [[VEC8]], i64 5 379; CHECK-NEXT: [[ELT6:%.*]] = extractelement <8 x float> [[VEC8]], i64 6 380; CHECK-NEXT: [[ELT7:%.*]] = extractelement <8 x float> [[VEC8]], i64 7 381; CHECK-NEXT: [[ADD1:%.*]] = fadd float [[ELT1]], [[ELT0]] 382; CHECK-NEXT: [[ADD2:%.*]] = fadd float [[ELT2]], [[ADD1]] 383; CHECK-NEXT: [[ADD3:%.*]] = fadd float [[ELT3]], [[ADD2]] 384; CHECK-NEXT: [[ADD4:%.*]] = fadd float [[ELT4]], [[ADD3]] 385; CHECK-NEXT: [[ADD5:%.*]] = fadd float [[ELT5]], [[ADD4]] 386; CHECK-NEXT: [[ADD6:%.*]] = fadd float [[ELT6]], [[ADD5]] 387; CHECK-NEXT: [[ADD7:%.*]] = fadd float [[ELT7]], [[ADD6]] 388; CHECK-NEXT: ret float [[ADD7]] 389; 390entry: 391 %elt0 = extractelement <8 x float> %vec8, i64 0 392 %elt1 = extractelement <8 x float> %vec8, i64 1 393 %elt2 = extractelement <8 x float> %vec8, i64 2 394 %elt3 = extractelement <8 x float> %vec8, i64 3 395 %elt4 = extractelement <8 x float> %vec8, i64 4 396 %elt5 = extractelement <8 x float> %vec8, i64 5 397 %elt6 = extractelement <8 x float> %vec8, i64 6 398 %elt7 = extractelement <8 x float> %vec8, i64 7 399 %add1 = fadd float %elt1, %elt0 400 %add2 = fadd float %elt2, %add1 401 %add3 = fadd float %elt3, %add2 402 %add4 = fadd float %elt4, %add3 403 %add5 = fadd float %elt5, %add4 404 %add6 = fadd float %elt6, %add5 405 %add7 = fadd float %elt7, %add6 406 ret float %add7 407} 408 409define double @reduce_fast_double2(<2 x double> %vec2) { 410; CHECK-LABEL: define double @reduce_fast_double2( 411; CHECK-SAME: <2 x double> [[VEC2:%.*]]) #[[ATTR0]] { 412; CHECK-NEXT: [[ENTRY:.*:]] 413; CHECK-NEXT: [[ELT0:%.*]] = extractelement <2 x double> [[VEC2]], i64 0 414; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x double> [[VEC2]], i64 1 415; CHECK-NEXT: [[ADD1:%.*]] = fadd fast double [[ELT1]], [[ELT0]] 416; CHECK-NEXT: ret double [[ADD1]] 417; 418entry: 419 %elt0 = extractelement <2 x double> %vec2, i64 0 420 %elt1 = extractelement <2 x double> %vec2, i64 1 421 %add1 = fadd fast double %elt1, %elt0 422 ret double %add1 423} 424 425define double @reduce_double2(<2 x double> %vec2) { 426; CHECK-LABEL: define double @reduce_double2( 427; CHECK-SAME: <2 x double> [[VEC2:%.*]]) #[[ATTR0]] { 428; CHECK-NEXT: [[ENTRY:.*:]] 429; CHECK-NEXT: [[ELT0:%.*]] = extractelement <2 x double> [[VEC2]], i64 0 430; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x double> [[VEC2]], i64 1 431; CHECK-NEXT: [[ADD1:%.*]] = fadd double [[ELT1]], [[ELT0]] 432; CHECK-NEXT: ret double [[ADD1]] 433; 434entry: 435 %elt0 = extractelement <2 x double> %vec2, i64 0 436 %elt1 = extractelement <2 x double> %vec2, i64 1 437 %add1 = fadd double %elt1, %elt0 438 ret double %add1 439} 440 441define double @reduce_fast_double4(<4 x double> %vec4) { 442; CHECK-LABEL: define double @reduce_fast_double4( 443; CHECK-SAME: <4 x double> [[VEC4:%.*]]) #[[ATTR0]] { 444; CHECK-NEXT: [[ENTRY:.*:]] 445; CHECK-NEXT: [[TMP0:%.*]] = call fast double @llvm.vector.reduce.fadd.v4f64(double 0.000000e+00, <4 x double> [[VEC4]]) 446; CHECK-NEXT: ret double [[TMP0]] 447; 448entry: 449 %elt0 = extractelement <4 x double> %vec4, i64 0 450 %elt1 = extractelement <4 x double> %vec4, i64 1 451 %elt2 = extractelement <4 x double> %vec4, i64 2 452 %elt3 = extractelement <4 x double> %vec4, i64 3 453 %add1 = fadd fast double %elt1, %elt0 454 %add2 = fadd fast double %elt2, %add1 455 %add3 = fadd fast double %elt3, %add2 456 ret double %add3 457} 458 459define double @reduce_double4(<4 x double> %vec4) { 460; CHECK-LABEL: define double @reduce_double4( 461; CHECK-SAME: <4 x double> [[VEC4:%.*]]) #[[ATTR0]] { 462; CHECK-NEXT: [[ENTRY:.*:]] 463; CHECK-NEXT: [[ELT0:%.*]] = extractelement <4 x double> [[VEC4]], i64 0 464; CHECK-NEXT: [[ELT1:%.*]] = extractelement <4 x double> [[VEC4]], i64 1 465; CHECK-NEXT: [[ELT2:%.*]] = extractelement <4 x double> [[VEC4]], i64 2 466; CHECK-NEXT: [[ELT3:%.*]] = extractelement <4 x double> [[VEC4]], i64 3 467; CHECK-NEXT: [[ADD1:%.*]] = fadd double [[ELT1]], [[ELT0]] 468; CHECK-NEXT: [[ADD2:%.*]] = fadd double [[ELT2]], [[ADD1]] 469; CHECK-NEXT: [[ADD3:%.*]] = fadd double [[ELT3]], [[ADD2]] 470; CHECK-NEXT: ret double [[ADD3]] 471; 472entry: 473 %elt0 = extractelement <4 x double> %vec4, i64 0 474 %elt1 = extractelement <4 x double> %vec4, i64 1 475 %elt2 = extractelement <4 x double> %vec4, i64 2 476 %elt3 = extractelement <4 x double> %vec4, i64 3 477 %add1 = fadd double %elt1, %elt0 478 %add2 = fadd double %elt2, %add1 479 %add3 = fadd double %elt3, %add2 480 ret double %add3 481} 482 483; Fixed iteration count. sum += a[i] 484define float @reduce_fast_float_case1(ptr %a) { 485; CHECK-LABEL: define float @reduce_fast_float_case1( 486; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { 487; CHECK-NEXT: [[ENTRY:.*:]] 488; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 4 489; CHECK-NEXT: [[GEP4:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 16 490; CHECK-NEXT: [[LOAD4:%.*]] = load float, ptr [[GEP4]], align 4 491; CHECK-NEXT: [[TMP1:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP0]]) 492; CHECK-NEXT: [[ADD4:%.*]] = fadd fast float [[TMP1]], [[LOAD4]] 493; CHECK-NEXT: ret float [[ADD4]] 494; 495entry: 496 %load = load float, ptr %a 497 %gep = getelementptr inbounds i8, ptr %a, i64 4 498 %load1 = load float, ptr %gep 499 %add1 = fadd fast float %load1, %load 500 %gep2 = getelementptr inbounds i8, ptr %a, i64 8 501 %load2 = load float, ptr %gep2 502 %add2 = fadd fast float %load2, %add1 503 %gep3 = getelementptr inbounds i8, ptr %a, i64 12 504 %load3 = load float, ptr %gep3 505 %add3 = fadd fast float %load3, %add2 506 %gep4 = getelementptr inbounds i8, ptr %a, i64 16 507 %load4 = load float, ptr %gep4 508 %add4 = fadd fast float %load4, %add3 509 ret float %add4 510} 511 512; Fixed iteration count. sum += a[i] 513define float @reduce_float_case1(ptr %a) { 514; CHECK-LABEL: define float @reduce_float_case1( 515; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { 516; CHECK-NEXT: [[ENTRY:.*:]] 517; CHECK-NEXT: [[LOAD:%.*]] = load float, ptr [[A]], align 4 518; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 4 519; CHECK-NEXT: [[LOAD1:%.*]] = load float, ptr [[GEP]], align 4 520; CHECK-NEXT: [[ADD1:%.*]] = fadd float [[LOAD1]], [[LOAD]] 521; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 8 522; CHECK-NEXT: [[LOAD2:%.*]] = load float, ptr [[GEP2]], align 4 523; CHECK-NEXT: [[ADD2:%.*]] = fadd float [[LOAD2]], [[ADD1]] 524; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 12 525; CHECK-NEXT: [[LOAD3:%.*]] = load float, ptr [[GEP3]], align 4 526; CHECK-NEXT: [[ADD3:%.*]] = fadd float [[LOAD3]], [[ADD2]] 527; CHECK-NEXT: [[GEP4:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 16 528; CHECK-NEXT: [[LOAD4:%.*]] = load float, ptr [[GEP4]], align 4 529; CHECK-NEXT: [[ADD4:%.*]] = fadd float [[LOAD4]], [[ADD3]] 530; CHECK-NEXT: ret float [[ADD4]] 531; 532entry: 533 %load = load float, ptr %a 534 %gep = getelementptr inbounds i8, ptr %a, i64 4 535 %load1 = load float, ptr %gep 536 %add1 = fadd float %load1, %load 537 %gep2 = getelementptr inbounds i8, ptr %a, i64 8 538 %load2 = load float, ptr %gep2 539 %add2 = fadd float %load2, %add1 540 %gep3 = getelementptr inbounds i8, ptr %a, i64 12 541 %load3 = load float, ptr %gep3 542 %add3 = fadd float %load3, %add2 543 %gep4 = getelementptr inbounds i8, ptr %a, i64 16 544 %load4 = load float, ptr %gep4 545 %add4 = fadd float %load4, %add3 546 ret float %add4 547} 548 549; Reduction needs a shuffle. See add2 and add3. 550define float @reduce_fast_float_case2(ptr %a, ptr %b) { 551; CHECK-LABEL: define float @reduce_fast_float_case2( 552; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { 553; CHECK-NEXT: [[ENTRY:.*:]] 554; CHECK-NEXT: [[TMP0:%.*]] = load <4 x float>, ptr [[A]], align 4 555; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, ptr [[B]], align 4 556; CHECK-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> poison, <4 x float> [[TMP1]], i64 0) 557; CHECK-NEXT: [[TMP3:%.*]] = call <8 x float> @llvm.vector.insert.v8f32.v4f32(<8 x float> [[TMP2]], <4 x float> [[TMP0]], i64 4) 558; CHECK-NEXT: [[RED3:%.*]] = call fast float @llvm.vector.reduce.fadd.v8f32(float 0.000000e+00, <8 x float> [[TMP3]]) 559; CHECK-NEXT: ret float [[RED3]] 560; 561entry: 562 %gepa1 = getelementptr inbounds float, ptr %a, i32 1 563 %gepa2 = getelementptr inbounds float, ptr %a, i32 2 564 %gepa3 = getelementptr inbounds float, ptr %a, i32 3 565 %gepb1 = getelementptr inbounds float, ptr %b, i32 1 566 %gepb2 = getelementptr inbounds float, ptr %b, i32 2 567 %gepb3 = getelementptr inbounds float, ptr %b, i32 3 568 %loada = load float, ptr %a 569 %loada1 = load float, ptr %gepa1 570 %loada2 = load float, ptr %gepa2 571 %loada3 = load float, ptr %gepa3 572 %loadb = load float, ptr %b 573 %loadb1 = load float, ptr %gepb1 574 %loadb2 = load float, ptr %gepb2 575 %loadb3 = load float, ptr %gepb3 576 %add = fadd fast float %loada, %loadb 577 %add1 = fadd fast float %loada1, %loadb1 578 %add2 = fadd fast float %loada3, %loadb2 579 %add3 = fadd fast float %loada2, %loadb3 580 %red1 = fadd fast float %add, %add1 581 %red2 = fadd fast float %add2, %red1 582 %red3 = fadd fast float %add3, %red2 583 ret float %red3 584} 585 586; Reduction needs a shuffle. See add2 and add3. 587define float @reduce_float_case2(ptr %a, ptr %b) { 588; CHECK-LABEL: define float @reduce_float_case2( 589; CHECK-SAME: ptr [[A:%.*]], ptr [[B:%.*]]) #[[ATTR0]] { 590; CHECK-NEXT: [[ENTRY:.*:]] 591; CHECK-NEXT: [[GEPA2:%.*]] = getelementptr inbounds float, ptr [[A]], i32 2 592; CHECK-NEXT: [[GEPA3:%.*]] = getelementptr inbounds float, ptr [[A]], i32 3 593; CHECK-NEXT: [[GEPB2:%.*]] = getelementptr inbounds float, ptr [[B]], i32 2 594; CHECK-NEXT: [[GEPB3:%.*]] = getelementptr inbounds float, ptr [[B]], i32 3 595; CHECK-NEXT: [[LOADA2:%.*]] = load float, ptr [[GEPA2]], align 4 596; CHECK-NEXT: [[LOADA3:%.*]] = load float, ptr [[GEPA3]], align 4 597; CHECK-NEXT: [[LOADB2:%.*]] = load float, ptr [[GEPB2]], align 4 598; CHECK-NEXT: [[LOADB3:%.*]] = load float, ptr [[GEPB3]], align 4 599; CHECK-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[A]], align 4 600; CHECK-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[B]], align 4 601; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x float> [[TMP0]], [[TMP1]] 602; CHECK-NEXT: [[ADD2:%.*]] = fadd float [[LOADA3]], [[LOADB2]] 603; CHECK-NEXT: [[ADD3:%.*]] = fadd float [[LOADA2]], [[LOADB3]] 604; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 605; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 606; CHECK-NEXT: [[RED1:%.*]] = fadd float [[TMP3]], [[TMP4]] 607; CHECK-NEXT: [[RED2:%.*]] = fadd float [[ADD2]], [[RED1]] 608; CHECK-NEXT: [[RED3:%.*]] = fadd float [[ADD3]], [[RED2]] 609; CHECK-NEXT: ret float [[RED3]] 610; 611entry: 612 %gepa1 = getelementptr inbounds float, ptr %a, i32 1 613 %gepa2 = getelementptr inbounds float, ptr %a, i32 2 614 %gepa3 = getelementptr inbounds float, ptr %a, i32 3 615 %gepb1 = getelementptr inbounds float, ptr %b, i32 1 616 %gepb2 = getelementptr inbounds float, ptr %b, i32 2 617 %gepb3 = getelementptr inbounds float, ptr %b, i32 3 618 %loada = load float, ptr %a 619 %loada1 = load float, ptr %gepa1 620 %loada2 = load float, ptr %gepa2 621 %loada3 = load float, ptr %gepa3 622 %loadb = load float, ptr %b 623 %loadb1 = load float, ptr %gepb1 624 %loadb2 = load float, ptr %gepb2 625 %loadb3 = load float, ptr %gepb3 626 %add = fadd float %loada, %loadb 627 %add1 = fadd float %loada1, %loadb1 628 %add2 = fadd float %loada3, %loadb2 629 %add3 = fadd float %loada2, %loadb3 630 %red1 = fadd float %add, %add1 631 %red2 = fadd float %add2, %red1 632 %red3 = fadd float %add3, %red2 633 ret float %red3 634} 635 636; Addition of log. 637define float @reduce_fast_float_case3(ptr %a) { 638; CHECK-LABEL: define float @reduce_fast_float_case3( 639; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { 640; CHECK-NEXT: [[ENTRY:.*:]] 641; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds float, ptr [[A]], i32 1 642; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds float, ptr [[A]], i32 2 643; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds float, ptr [[A]], i32 3 644; CHECK-NEXT: [[GEP4:%.*]] = getelementptr inbounds float, ptr [[A]], i32 4 645; CHECK-NEXT: [[GEP5:%.*]] = getelementptr inbounds float, ptr [[A]], i32 5 646; CHECK-NEXT: [[GEP6:%.*]] = getelementptr inbounds float, ptr [[A]], i32 6 647; CHECK-NEXT: [[GEP7:%.*]] = getelementptr inbounds float, ptr [[A]], i32 7 648; CHECK-NEXT: [[LOAD:%.*]] = load float, ptr [[A]], align 4 649; CHECK-NEXT: [[LOAD1:%.*]] = load float, ptr [[GEP1]], align 4 650; CHECK-NEXT: [[LOAD2:%.*]] = load float, ptr [[GEP2]], align 4 651; CHECK-NEXT: [[LOAD3:%.*]] = load float, ptr [[GEP3]], align 4 652; CHECK-NEXT: [[LOAD4:%.*]] = load float, ptr [[GEP4]], align 4 653; CHECK-NEXT: [[LOAD5:%.*]] = load float, ptr [[GEP5]], align 4 654; CHECK-NEXT: [[LOAD6:%.*]] = load float, ptr [[GEP6]], align 4 655; CHECK-NEXT: [[LOAD7:%.*]] = load float, ptr [[GEP7]], align 4 656; CHECK-NEXT: [[LOG:%.*]] = call fast float @llvm.log.f32(float [[LOAD]]) 657; CHECK-NEXT: [[LOG1:%.*]] = call fast float @llvm.log.f32(float [[LOAD1]]) 658; CHECK-NEXT: [[LOG2:%.*]] = call fast float @llvm.log.f32(float [[LOAD2]]) 659; CHECK-NEXT: [[LOG3:%.*]] = call fast float @llvm.log.f32(float [[LOAD3]]) 660; CHECK-NEXT: [[LOG4:%.*]] = call fast float @llvm.log.f32(float [[LOAD4]]) 661; CHECK-NEXT: [[LOG5:%.*]] = call fast float @llvm.log.f32(float [[LOAD5]]) 662; CHECK-NEXT: [[LOG6:%.*]] = call fast float @llvm.log.f32(float [[LOAD6]]) 663; CHECK-NEXT: [[LOG7:%.*]] = call fast float @llvm.log.f32(float [[LOAD7]]) 664; CHECK-NEXT: [[ADD1:%.*]] = fadd fast float [[LOG]], [[LOG1]] 665; CHECK-NEXT: [[ADD2:%.*]] = fadd fast float [[ADD1]], [[LOG2]] 666; CHECK-NEXT: [[ADD3:%.*]] = fadd fast float [[ADD2]], [[LOG3]] 667; CHECK-NEXT: [[ADD4:%.*]] = fadd fast float [[ADD3]], [[LOG4]] 668; CHECK-NEXT: [[ADD5:%.*]] = fadd fast float [[ADD4]], [[LOG5]] 669; CHECK-NEXT: [[ADD6:%.*]] = fadd fast float [[ADD5]], [[LOG6]] 670; CHECK-NEXT: [[ADD7:%.*]] = fadd fast float [[ADD6]], [[LOG7]] 671; CHECK-NEXT: ret float [[ADD7]] 672; 673entry: 674 %gep1 = getelementptr inbounds float, ptr %a, i32 1 675 %gep2 = getelementptr inbounds float, ptr %a, i32 2 676 %gep3 = getelementptr inbounds float, ptr %a, i32 3 677 %gep4 = getelementptr inbounds float, ptr %a, i32 4 678 %gep5 = getelementptr inbounds float, ptr %a, i32 5 679 %gep6 = getelementptr inbounds float, ptr %a, i32 6 680 %gep7 = getelementptr inbounds float, ptr %a, i32 7 681 %load = load float, ptr %a 682 %load1 = load float, ptr %gep1 683 %load2 = load float, ptr %gep2 684 %load3 = load float, ptr %gep3 685 %load4 = load float, ptr %gep4 686 %load5 = load float, ptr %gep5 687 %load6 = load float, ptr %gep6 688 %load7 = load float, ptr %gep7 689 %log = call fast float @llvm.log.f32(float %load) 690 %log1 = call fast float @llvm.log.f32(float %load1) 691 %log2 = call fast float @llvm.log.f32(float %load2) 692 %log3 = call fast float @llvm.log.f32(float %load3) 693 %log4 = call fast float @llvm.log.f32(float %load4) 694 %log5 = call fast float @llvm.log.f32(float %load5) 695 %log6 = call fast float @llvm.log.f32(float %load6) 696 %log7 = call fast float @llvm.log.f32(float %load7) 697 %add1 = fadd fast float %log, %log1 698 %add2 = fadd fast float %add1, %log2 699 %add3 = fadd fast float %add2, %log3 700 %add4 = fadd fast float %add3, %log4 701 %add5 = fadd fast float %add4, %log5 702 %add6 = fadd fast float %add5, %log6 703 %add7 = fadd fast float %add6, %log7 704 ret float %add7 705} 706 707; Addition of log. 708define float @reduce_float_case3(ptr %a) { 709; CHECK-LABEL: define float @reduce_float_case3( 710; CHECK-SAME: ptr [[A:%.*]]) #[[ATTR0]] { 711; CHECK-NEXT: [[ENTRY:.*:]] 712; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds float, ptr [[A]], i32 1 713; CHECK-NEXT: [[GEP2:%.*]] = getelementptr inbounds float, ptr [[A]], i32 2 714; CHECK-NEXT: [[GEP3:%.*]] = getelementptr inbounds float, ptr [[A]], i32 3 715; CHECK-NEXT: [[GEP4:%.*]] = getelementptr inbounds float, ptr [[A]], i32 4 716; CHECK-NEXT: [[GEP5:%.*]] = getelementptr inbounds float, ptr [[A]], i32 5 717; CHECK-NEXT: [[GEP6:%.*]] = getelementptr inbounds float, ptr [[A]], i32 6 718; CHECK-NEXT: [[GEP7:%.*]] = getelementptr inbounds float, ptr [[A]], i32 7 719; CHECK-NEXT: [[LOAD:%.*]] = load float, ptr [[A]], align 4 720; CHECK-NEXT: [[LOAD1:%.*]] = load float, ptr [[GEP1]], align 4 721; CHECK-NEXT: [[LOAD2:%.*]] = load float, ptr [[GEP2]], align 4 722; CHECK-NEXT: [[LOAD3:%.*]] = load float, ptr [[GEP3]], align 4 723; CHECK-NEXT: [[LOAD4:%.*]] = load float, ptr [[GEP4]], align 4 724; CHECK-NEXT: [[LOAD5:%.*]] = load float, ptr [[GEP5]], align 4 725; CHECK-NEXT: [[LOAD6:%.*]] = load float, ptr [[GEP6]], align 4 726; CHECK-NEXT: [[LOAD7:%.*]] = load float, ptr [[GEP7]], align 4 727; CHECK-NEXT: [[LOG:%.*]] = call float @llvm.log.f32(float [[LOAD]]) 728; CHECK-NEXT: [[LOG1:%.*]] = call float @llvm.log.f32(float [[LOAD1]]) 729; CHECK-NEXT: [[LOG2:%.*]] = call float @llvm.log.f32(float [[LOAD2]]) 730; CHECK-NEXT: [[LOG3:%.*]] = call float @llvm.log.f32(float [[LOAD3]]) 731; CHECK-NEXT: [[LOG4:%.*]] = call float @llvm.log.f32(float [[LOAD4]]) 732; CHECK-NEXT: [[LOG5:%.*]] = call float @llvm.log.f32(float [[LOAD5]]) 733; CHECK-NEXT: [[LOG6:%.*]] = call float @llvm.log.f32(float [[LOAD6]]) 734; CHECK-NEXT: [[LOG7:%.*]] = call float @llvm.log.f32(float [[LOAD7]]) 735; CHECK-NEXT: [[ADD1:%.*]] = fadd float [[LOG]], [[LOG1]] 736; CHECK-NEXT: [[ADD2:%.*]] = fadd float [[ADD1]], [[LOG2]] 737; CHECK-NEXT: [[ADD3:%.*]] = fadd float [[ADD2]], [[LOG3]] 738; CHECK-NEXT: [[ADD4:%.*]] = fadd float [[ADD3]], [[LOG4]] 739; CHECK-NEXT: [[ADD5:%.*]] = fadd float [[ADD4]], [[LOG5]] 740; CHECK-NEXT: [[ADD6:%.*]] = fadd float [[ADD5]], [[LOG6]] 741; CHECK-NEXT: [[ADD7:%.*]] = fadd float [[ADD6]], [[LOG7]] 742; CHECK-NEXT: ret float [[ADD7]] 743; 744entry: 745 %gep1 = getelementptr inbounds float, ptr %a, i32 1 746 %gep2 = getelementptr inbounds float, ptr %a, i32 2 747 %gep3 = getelementptr inbounds float, ptr %a, i32 3 748 %gep4 = getelementptr inbounds float, ptr %a, i32 4 749 %gep5 = getelementptr inbounds float, ptr %a, i32 5 750 %gep6 = getelementptr inbounds float, ptr %a, i32 6 751 %gep7 = getelementptr inbounds float, ptr %a, i32 7 752 %load = load float, ptr %a 753 %load1 = load float, ptr %gep1 754 %load2 = load float, ptr %gep2 755 %load3 = load float, ptr %gep3 756 %load4 = load float, ptr %gep4 757 %load5 = load float, ptr %gep5 758 %load6 = load float, ptr %gep6 759 %load7 = load float, ptr %gep7 760 %log = call float @llvm.log.f32(float %load) 761 %log1 = call float @llvm.log.f32(float %load1) 762 %log2 = call float @llvm.log.f32(float %load2) 763 %log3 = call float @llvm.log.f32(float %load3) 764 %log4 = call float @llvm.log.f32(float %load4) 765 %log5 = call float @llvm.log.f32(float %load5) 766 %log6 = call float @llvm.log.f32(float %load6) 767 %log7 = call float @llvm.log.f32(float %load7) 768 %add1 = fadd float %log, %log1 769 %add2 = fadd float %add1, %log2 770 %add3 = fadd float %add2, %log3 771 %add4 = fadd float %add3, %log4 772 %add5 = fadd float %add4, %log5 773 %add6 = fadd float %add5, %log6 774 %add7 = fadd float %add6, %log7 775 ret float %add7 776} 777 778define half @reduce_unordered_fast_half4(<4 x half> %vec4) { 779; CHECK-LABEL: define half @reduce_unordered_fast_half4( 780; CHECK-SAME: <4 x half> [[VEC4:%.*]]) #[[ATTR0]] { 781; CHECK-NEXT: [[ENTRY:.*:]] 782; CHECK-NEXT: [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> [[VEC4]]) 783; CHECK-NEXT: ret half [[TMP0]] 784; 785entry: 786 %elt0 = extractelement <4 x half> %vec4, i64 0 787 %elt1 = extractelement <4 x half> %vec4, i64 1 788 %elt2 = extractelement <4 x half> %vec4, i64 2 789 %elt3 = extractelement <4 x half> %vec4, i64 3 790 %add1 = fadd fast half %elt1, %elt0 791 %add2 = fadd fast half %elt2, %elt3 792 %add3 = fadd fast half %add1, %add2 793 ret half %add3 794} 795 796define half @reduce_unordered_half4(<4 x half> %vec4) { 797; CHECK-LABEL: define half @reduce_unordered_half4( 798; CHECK-SAME: <4 x half> [[VEC4:%.*]]) #[[ATTR0]] { 799; CHECK-NEXT: [[ENTRY:.*:]] 800; CHECK-NEXT: [[TMP0:%.*]] = shufflevector <4 x half> [[VEC4]], <4 x half> poison, <2 x i32> <i32 1, i32 2> 801; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x half> [[VEC4]], <4 x half> poison, <2 x i32> <i32 0, i32 3> 802; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x half> [[TMP0]], [[TMP1]] 803; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x half> [[TMP2]], i32 0 804; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x half> [[TMP2]], i32 1 805; CHECK-NEXT: [[ADD3:%.*]] = fadd half [[TMP3]], [[TMP4]] 806; CHECK-NEXT: ret half [[ADD3]] 807; 808entry: 809 %elt0 = extractelement <4 x half> %vec4, i64 0 810 %elt1 = extractelement <4 x half> %vec4, i64 1 811 %elt2 = extractelement <4 x half> %vec4, i64 2 812 %elt3 = extractelement <4 x half> %vec4, i64 3 813 %add1 = fadd half %elt1, %elt0 814 %add2 = fadd half %elt2, %elt3 815 %add3 = fadd half %add1, %add2 816 ret half %add3 817} 818