1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=slp-vectorizer,dce < %s | FileCheck -check-prefixes=GCN,GFX9 %s 3; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=fiji -passes=slp-vectorizer,dce < %s | FileCheck -check-prefixes=GCN,VI %s 4 5define half @reduction_half4(<4 x half> %a) { 6; GCN-LABEL: @reduction_half4( 7; GCN-NEXT: entry: 8; GCN-NEXT: [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> [[A:%.*]]) 9; GCN-NEXT: ret half [[TMP0]] 10; 11entry: 12 %elt0 = extractelement <4 x half> %a, i64 0 13 %elt1 = extractelement <4 x half> %a, i64 1 14 %elt2 = extractelement <4 x half> %a, i64 2 15 %elt3 = extractelement <4 x half> %a, i64 3 16 17 %add1 = fadd fast half %elt1, %elt0 18 %add2 = fadd fast half %elt2, %add1 19 %add3 = fadd fast half %elt3, %add2 20 21 ret half %add3 22} 23 24define half @reduction_half8(<8 x half> %vec8) { 25; GCN-LABEL: @reduction_half8( 26; GCN-NEXT: entry: 27; GCN-NEXT: [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> [[VEC8:%.*]]) 28; GCN-NEXT: ret half [[TMP0]] 29; 30entry: 31 %elt0 = extractelement <8 x half> %vec8, i64 0 32 %elt1 = extractelement <8 x half> %vec8, i64 1 33 %elt2 = extractelement <8 x half> %vec8, i64 2 34 %elt3 = extractelement <8 x half> %vec8, i64 3 35 %elt4 = extractelement <8 x half> %vec8, i64 4 36 %elt5 = extractelement <8 x half> %vec8, i64 5 37 %elt6 = extractelement <8 x half> %vec8, i64 6 38 %elt7 = extractelement <8 x half> %vec8, i64 7 39 40 %add1 = fadd fast half %elt1, %elt0 41 %add2 = fadd fast half %elt2, %add1 42 %add3 = fadd fast half %elt3, %add2 43 %add4 = fadd fast half %elt4, %add3 44 %add5 = fadd fast half %elt5, %add4 45 %add6 = fadd fast half %elt6, %add5 46 %add7 = fadd fast half %elt7, %add6 47 48 ret half %add7 49} 50 51define half @reduction_half16(<16 x half> %vec16) { 52; GFX9-LABEL: @reduction_half16( 53; GFX9-NEXT: entry: 54; GFX9-NEXT: [[TMP0:%.*]] = call fast half @llvm.vector.reduce.fadd.v16f16(half 0xH0000, <16 x half> [[VEC16:%.*]]) 55; GFX9-NEXT: ret half [[TMP0]] 56; 57; VI-LABEL: @reduction_half16( 58; VI-NEXT: entry: 59; VI-NEXT: [[TMP0:%.*]] = shufflevector <16 x half> [[VEC16:%.*]], <16 x half> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 60; VI-NEXT: [[TMP1:%.*]] = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> [[TMP0]]) 61; VI-NEXT: [[TMP2:%.*]] = shufflevector <16 x half> [[VEC16]], <16 x half> poison, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 62; VI-NEXT: [[TMP3:%.*]] = call fast half @llvm.vector.reduce.fadd.v8f16(half 0xH0000, <8 x half> [[TMP2]]) 63; VI-NEXT: [[OP_RDX:%.*]] = fadd fast half [[TMP1]], [[TMP3]] 64; VI-NEXT: ret half [[OP_RDX]] 65; 66entry: 67 %elt0 = extractelement <16 x half> %vec16, i64 0 68 %elt1 = extractelement <16 x half> %vec16, i64 1 69 %elt2 = extractelement <16 x half> %vec16, i64 2 70 %elt3 = extractelement <16 x half> %vec16, i64 3 71 %elt4 = extractelement <16 x half> %vec16, i64 4 72 %elt5 = extractelement <16 x half> %vec16, i64 5 73 %elt6 = extractelement <16 x half> %vec16, i64 6 74 %elt7 = extractelement <16 x half> %vec16, i64 7 75 %elt8 = extractelement <16 x half> %vec16, i64 8 76 %elt9 = extractelement <16 x half> %vec16, i64 9 77 %elt10 = extractelement <16 x half> %vec16, i64 10 78 %elt11 = extractelement <16 x half> %vec16, i64 11 79 %elt12 = extractelement <16 x half> %vec16, i64 12 80 %elt13 = extractelement <16 x half> %vec16, i64 13 81 %elt14 = extractelement <16 x half> %vec16, i64 14 82 %elt15 = extractelement <16 x half> %vec16, i64 15 83 84 %add1 = fadd fast half %elt1, %elt0 85 %add2 = fadd fast half %elt2, %add1 86 %add3 = fadd fast half %elt3, %add2 87 %add4 = fadd fast half %elt4, %add3 88 %add5 = fadd fast half %elt5, %add4 89 %add6 = fadd fast half %elt6, %add5 90 %add7 = fadd fast half %elt7, %add6 91 %add8 = fadd fast half %elt8, %add7 92 %add9 = fadd fast half %elt9, %add8 93 %add10 = fadd fast half %elt10, %add9 94 %add11 = fadd fast half %elt11, %add10 95 %add12 = fadd fast half %elt12, %add11 96 %add13 = fadd fast half %elt13, %add12 97 %add14 = fadd fast half %elt14, %add13 98 %add15 = fadd fast half %elt15, %add14 99 100 ret half %add15 101} 102 103; FIXME: support vectorization; 104define half @reduction_sub_half4(<4 x half> %a) { 105; GCN-LABEL: @reduction_sub_half4( 106; GCN-NEXT: entry: 107; GCN-NEXT: [[ELT0:%.*]] = extractelement <4 x half> [[A:%.*]], i64 0 108; GCN-NEXT: [[ELT1:%.*]] = extractelement <4 x half> [[A]], i64 1 109; GCN-NEXT: [[ELT2:%.*]] = extractelement <4 x half> [[A]], i64 2 110; GCN-NEXT: [[ELT3:%.*]] = extractelement <4 x half> [[A]], i64 3 111; GCN-NEXT: [[ADD1:%.*]] = fsub fast half [[ELT1]], [[ELT0]] 112; GCN-NEXT: [[ADD2:%.*]] = fsub fast half [[ELT2]], [[ADD1]] 113; GCN-NEXT: [[ADD3:%.*]] = fsub fast half [[ELT3]], [[ADD2]] 114; GCN-NEXT: ret half [[ADD3]] 115; 116entry: 117 %elt0 = extractelement <4 x half> %a, i64 0 118 %elt1 = extractelement <4 x half> %a, i64 1 119 %elt2 = extractelement <4 x half> %a, i64 2 120 %elt3 = extractelement <4 x half> %a, i64 3 121 122 %add1 = fsub fast half %elt1, %elt0 123 %add2 = fsub fast half %elt2, %add1 124 %add3 = fsub fast half %elt3, %add2 125 126 ret half %add3 127} 128 129define i16 @reduction_v4i16(<4 x i16> %a) { 130; GCN-LABEL: @reduction_v4i16( 131; GCN-NEXT: entry: 132; GCN-NEXT: [[TMP0:%.*]] = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> [[A:%.*]]) 133; GCN-NEXT: ret i16 [[TMP0]] 134; 135entry: 136 %elt0 = extractelement <4 x i16> %a, i64 0 137 %elt1 = extractelement <4 x i16> %a, i64 1 138 %elt2 = extractelement <4 x i16> %a, i64 2 139 %elt3 = extractelement <4 x i16> %a, i64 3 140 141 %add1 = add i16 %elt1, %elt0 142 %add2 = add i16 %elt2, %add1 143 %add3 = add i16 %elt3, %add2 144 145 ret i16 %add3 146} 147 148define i16 @reduction_v8i16(<8 x i16> %vec8) { 149; GCN-LABEL: @reduction_v8i16( 150; GCN-NEXT: entry: 151; GCN-NEXT: [[TMP0:%.*]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> [[VEC8:%.*]]) 152; GCN-NEXT: ret i16 [[TMP0]] 153; 154entry: 155 %elt0 = extractelement <8 x i16> %vec8, i64 0 156 %elt1 = extractelement <8 x i16> %vec8, i64 1 157 %elt2 = extractelement <8 x i16> %vec8, i64 2 158 %elt3 = extractelement <8 x i16> %vec8, i64 3 159 %elt4 = extractelement <8 x i16> %vec8, i64 4 160 %elt5 = extractelement <8 x i16> %vec8, i64 5 161 %elt6 = extractelement <8 x i16> %vec8, i64 6 162 %elt7 = extractelement <8 x i16> %vec8, i64 7 163 164 %add1 = add i16 %elt1, %elt0 165 %add2 = add i16 %elt2, %add1 166 %add3 = add i16 %elt3, %add2 167 %add4 = add i16 %elt4, %add3 168 %add5 = add i16 %elt5, %add4 169 %add6 = add i16 %elt6, %add5 170 %add7 = add i16 %elt7, %add6 171 172 ret i16 %add7 173} 174 175define i16 @reduction_umin_v4i16(<4 x i16> %vec4) { 176; GFX9-LABEL: @reduction_umin_v4i16( 177; GFX9-NEXT: entry: 178; GFX9-NEXT: [[TMP0:%.*]] = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> [[VEC4:%.*]]) 179; GFX9-NEXT: ret i16 [[TMP0]] 180; 181; VI-LABEL: @reduction_umin_v4i16( 182; VI-NEXT: entry: 183; VI-NEXT: [[ELT0:%.*]] = extractelement <4 x i16> [[VEC4:%.*]], i64 0 184; VI-NEXT: [[ELT1:%.*]] = extractelement <4 x i16> [[VEC4]], i64 1 185; VI-NEXT: [[ELT2:%.*]] = extractelement <4 x i16> [[VEC4]], i64 2 186; VI-NEXT: [[ELT3:%.*]] = extractelement <4 x i16> [[VEC4]], i64 3 187; VI-NEXT: [[CMP1:%.*]] = icmp ult i16 [[ELT1]], [[ELT0]] 188; VI-NEXT: [[MIN1:%.*]] = select i1 [[CMP1]], i16 [[ELT1]], i16 [[ELT0]] 189; VI-NEXT: [[CMP2:%.*]] = icmp ult i16 [[ELT2]], [[MIN1]] 190; VI-NEXT: [[MIN2:%.*]] = select i1 [[CMP2]], i16 [[ELT2]], i16 [[MIN1]] 191; VI-NEXT: [[CMP3:%.*]] = icmp ult i16 [[ELT3]], [[MIN2]] 192; VI-NEXT: [[MIN3:%.*]] = select i1 [[CMP3]], i16 [[ELT3]], i16 [[MIN2]] 193; VI-NEXT: ret i16 [[MIN3]] 194; 195entry: 196 %elt0 = extractelement <4 x i16> %vec4, i64 0 197 %elt1 = extractelement <4 x i16> %vec4, i64 1 198 %elt2 = extractelement <4 x i16> %vec4, i64 2 199 %elt3 = extractelement <4 x i16> %vec4, i64 3 200 201 %cmp1 = icmp ult i16 %elt1, %elt0 202 %min1 = select i1 %cmp1, i16 %elt1, i16 %elt0 203 %cmp2 = icmp ult i16 %elt2, %min1 204 %min2 = select i1 %cmp2, i16 %elt2, i16 %min1 205 %cmp3 = icmp ult i16 %elt3, %min2 206 %min3 = select i1 %cmp3, i16 %elt3, i16 %min2 207 208 ret i16 %min3 209} 210 211define i16 @reduction_icmp_v8i16(<8 x i16> %vec8) { 212; GFX9-LABEL: @reduction_icmp_v8i16( 213; GFX9-NEXT: entry: 214; GFX9-NEXT: [[TMP0:%.*]] = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> [[VEC8:%.*]]) 215; GFX9-NEXT: ret i16 [[TMP0]] 216; 217; VI-LABEL: @reduction_icmp_v8i16( 218; VI-NEXT: entry: 219; VI-NEXT: [[ELT0:%.*]] = extractelement <8 x i16> [[VEC8:%.*]], i64 0 220; VI-NEXT: [[ELT1:%.*]] = extractelement <8 x i16> [[VEC8]], i64 1 221; VI-NEXT: [[ELT2:%.*]] = extractelement <8 x i16> [[VEC8]], i64 2 222; VI-NEXT: [[ELT3:%.*]] = extractelement <8 x i16> [[VEC8]], i64 3 223; VI-NEXT: [[ELT4:%.*]] = extractelement <8 x i16> [[VEC8]], i64 4 224; VI-NEXT: [[ELT5:%.*]] = extractelement <8 x i16> [[VEC8]], i64 5 225; VI-NEXT: [[ELT6:%.*]] = extractelement <8 x i16> [[VEC8]], i64 6 226; VI-NEXT: [[ELT7:%.*]] = extractelement <8 x i16> [[VEC8]], i64 7 227; VI-NEXT: [[CMP0:%.*]] = icmp ult i16 [[ELT1]], [[ELT0]] 228; VI-NEXT: [[MIN1:%.*]] = select i1 [[CMP0]], i16 [[ELT1]], i16 [[ELT0]] 229; VI-NEXT: [[CMP1:%.*]] = icmp ult i16 [[ELT2]], [[MIN1]] 230; VI-NEXT: [[MIN2:%.*]] = select i1 [[CMP1]], i16 [[ELT2]], i16 [[MIN1]] 231; VI-NEXT: [[CMP2:%.*]] = icmp ult i16 [[ELT3]], [[MIN2]] 232; VI-NEXT: [[MIN3:%.*]] = select i1 [[CMP2]], i16 [[ELT3]], i16 [[MIN2]] 233; VI-NEXT: [[CMP3:%.*]] = icmp ult i16 [[ELT4]], [[MIN3]] 234; VI-NEXT: [[MIN4:%.*]] = select i1 [[CMP3]], i16 [[ELT4]], i16 [[MIN3]] 235; VI-NEXT: [[CMP4:%.*]] = icmp ult i16 [[ELT5]], [[MIN4]] 236; VI-NEXT: [[MIN5:%.*]] = select i1 [[CMP4]], i16 [[ELT5]], i16 [[MIN4]] 237; VI-NEXT: [[CMP5:%.*]] = icmp ult i16 [[ELT6]], [[MIN5]] 238; VI-NEXT: [[MIN6:%.*]] = select i1 [[CMP5]], i16 [[ELT6]], i16 [[MIN5]] 239; VI-NEXT: [[CMP6:%.*]] = icmp ult i16 [[ELT7]], [[MIN6]] 240; VI-NEXT: [[MIN7:%.*]] = select i1 [[CMP6]], i16 [[ELT7]], i16 [[MIN6]] 241; VI-NEXT: ret i16 [[MIN7]] 242; 243entry: 244 %elt0 = extractelement <8 x i16> %vec8, i64 0 245 %elt1 = extractelement <8 x i16> %vec8, i64 1 246 %elt2 = extractelement <8 x i16> %vec8, i64 2 247 %elt3 = extractelement <8 x i16> %vec8, i64 3 248 %elt4 = extractelement <8 x i16> %vec8, i64 4 249 %elt5 = extractelement <8 x i16> %vec8, i64 5 250 %elt6 = extractelement <8 x i16> %vec8, i64 6 251 %elt7 = extractelement <8 x i16> %vec8, i64 7 252 253 %cmp0 = icmp ult i16 %elt1, %elt0 254 %min1 = select i1 %cmp0, i16 %elt1, i16 %elt0 255 %cmp1 = icmp ult i16 %elt2, %min1 256 %min2 = select i1 %cmp1, i16 %elt2, i16 %min1 257 %cmp2 = icmp ult i16 %elt3, %min2 258 %min3 = select i1 %cmp2, i16 %elt3, i16 %min2 259 260 %cmp3 = icmp ult i16 %elt4, %min3 261 %min4 = select i1 %cmp3, i16 %elt4, i16 %min3 262 %cmp4 = icmp ult i16 %elt5, %min4 263 %min5 = select i1 %cmp4, i16 %elt5, i16 %min4 264 265 %cmp5 = icmp ult i16 %elt6, %min5 266 %min6 = select i1 %cmp5, i16 %elt6, i16 %min5 267 %cmp6 = icmp ult i16 %elt7, %min6 268 %min7 = select i1 %cmp6, i16 %elt7, i16 %min6 269 270 ret i16 %min7 271} 272 273define i16 @reduction_smin_v16i16(<16 x i16> %vec16) { 274; GFX9-LABEL: @reduction_smin_v16i16( 275; GFX9-NEXT: entry: 276; GFX9-NEXT: [[TMP0:%.*]] = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> [[VEC16:%.*]]) 277; GFX9-NEXT: ret i16 [[TMP0]] 278; 279; VI-LABEL: @reduction_smin_v16i16( 280; VI-NEXT: entry: 281; VI-NEXT: [[ELT0:%.*]] = extractelement <16 x i16> [[VEC16:%.*]], i64 0 282; VI-NEXT: [[ELT1:%.*]] = extractelement <16 x i16> [[VEC16]], i64 1 283; VI-NEXT: [[ELT2:%.*]] = extractelement <16 x i16> [[VEC16]], i64 2 284; VI-NEXT: [[ELT3:%.*]] = extractelement <16 x i16> [[VEC16]], i64 3 285; VI-NEXT: [[ELT4:%.*]] = extractelement <16 x i16> [[VEC16]], i64 4 286; VI-NEXT: [[ELT5:%.*]] = extractelement <16 x i16> [[VEC16]], i64 5 287; VI-NEXT: [[ELT6:%.*]] = extractelement <16 x i16> [[VEC16]], i64 6 288; VI-NEXT: [[ELT7:%.*]] = extractelement <16 x i16> [[VEC16]], i64 7 289; VI-NEXT: [[ELT8:%.*]] = extractelement <16 x i16> [[VEC16]], i64 8 290; VI-NEXT: [[ELT9:%.*]] = extractelement <16 x i16> [[VEC16]], i64 9 291; VI-NEXT: [[ELT10:%.*]] = extractelement <16 x i16> [[VEC16]], i64 10 292; VI-NEXT: [[ELT11:%.*]] = extractelement <16 x i16> [[VEC16]], i64 11 293; VI-NEXT: [[ELT12:%.*]] = extractelement <16 x i16> [[VEC16]], i64 12 294; VI-NEXT: [[ELT13:%.*]] = extractelement <16 x i16> [[VEC16]], i64 13 295; VI-NEXT: [[ELT14:%.*]] = extractelement <16 x i16> [[VEC16]], i64 14 296; VI-NEXT: [[ELT15:%.*]] = extractelement <16 x i16> [[VEC16]], i64 15 297; VI-NEXT: [[CMP0:%.*]] = icmp slt i16 [[ELT1]], [[ELT0]] 298; VI-NEXT: [[MIN1:%.*]] = select i1 [[CMP0]], i16 [[ELT1]], i16 [[ELT0]] 299; VI-NEXT: [[CMP1:%.*]] = icmp slt i16 [[ELT2]], [[MIN1]] 300; VI-NEXT: [[MIN2:%.*]] = select i1 [[CMP1]], i16 [[ELT2]], i16 [[MIN1]] 301; VI-NEXT: [[CMP2:%.*]] = icmp slt i16 [[ELT3]], [[MIN2]] 302; VI-NEXT: [[MIN3:%.*]] = select i1 [[CMP2]], i16 [[ELT3]], i16 [[MIN2]] 303; VI-NEXT: [[CMP3:%.*]] = icmp slt i16 [[ELT4]], [[MIN3]] 304; VI-NEXT: [[MIN4:%.*]] = select i1 [[CMP3]], i16 [[ELT4]], i16 [[MIN3]] 305; VI-NEXT: [[CMP4:%.*]] = icmp slt i16 [[ELT5]], [[MIN4]] 306; VI-NEXT: [[MIN5:%.*]] = select i1 [[CMP4]], i16 [[ELT5]], i16 [[MIN4]] 307; VI-NEXT: [[CMP5:%.*]] = icmp slt i16 [[ELT6]], [[MIN5]] 308; VI-NEXT: [[MIN6:%.*]] = select i1 [[CMP5]], i16 [[ELT6]], i16 [[MIN5]] 309; VI-NEXT: [[CMP6:%.*]] = icmp slt i16 [[ELT7]], [[MIN6]] 310; VI-NEXT: [[MIN7:%.*]] = select i1 [[CMP6]], i16 [[ELT7]], i16 [[MIN6]] 311; VI-NEXT: [[CMP7:%.*]] = icmp slt i16 [[ELT8]], [[MIN7]] 312; VI-NEXT: [[MIN8:%.*]] = select i1 [[CMP7]], i16 [[ELT8]], i16 [[MIN7]] 313; VI-NEXT: [[CMP8:%.*]] = icmp slt i16 [[ELT9]], [[MIN8]] 314; VI-NEXT: [[MIN9:%.*]] = select i1 [[CMP8]], i16 [[ELT9]], i16 [[MIN8]] 315; VI-NEXT: [[CMP9:%.*]] = icmp slt i16 [[ELT10]], [[MIN9]] 316; VI-NEXT: [[MIN10:%.*]] = select i1 [[CMP9]], i16 [[ELT10]], i16 [[MIN9]] 317; VI-NEXT: [[CMP10:%.*]] = icmp slt i16 [[ELT11]], [[MIN10]] 318; VI-NEXT: [[MIN11:%.*]] = select i1 [[CMP10]], i16 [[ELT11]], i16 [[MIN10]] 319; VI-NEXT: [[CMP11:%.*]] = icmp slt i16 [[ELT12]], [[MIN11]] 320; VI-NEXT: [[MIN12:%.*]] = select i1 [[CMP11]], i16 [[ELT12]], i16 [[MIN11]] 321; VI-NEXT: [[CMP12:%.*]] = icmp slt i16 [[ELT13]], [[MIN12]] 322; VI-NEXT: [[MIN13:%.*]] = select i1 [[CMP12]], i16 [[ELT13]], i16 [[MIN12]] 323; VI-NEXT: [[CMP13:%.*]] = icmp slt i16 [[ELT14]], [[MIN13]] 324; VI-NEXT: [[MIN14:%.*]] = select i1 [[CMP13]], i16 [[ELT14]], i16 [[MIN13]] 325; VI-NEXT: [[CMP14:%.*]] = icmp slt i16 [[ELT15]], [[MIN14]] 326; VI-NEXT: [[MIN15:%.*]] = select i1 [[CMP14]], i16 [[ELT15]], i16 [[MIN14]] 327; VI-NEXT: ret i16 [[MIN15]] 328; 329entry: 330 %elt0 = extractelement <16 x i16> %vec16, i64 0 331 %elt1 = extractelement <16 x i16> %vec16, i64 1 332 %elt2 = extractelement <16 x i16> %vec16, i64 2 333 %elt3 = extractelement <16 x i16> %vec16, i64 3 334 %elt4 = extractelement <16 x i16> %vec16, i64 4 335 %elt5 = extractelement <16 x i16> %vec16, i64 5 336 %elt6 = extractelement <16 x i16> %vec16, i64 6 337 %elt7 = extractelement <16 x i16> %vec16, i64 7 338 339 %elt8 = extractelement <16 x i16> %vec16, i64 8 340 %elt9 = extractelement <16 x i16> %vec16, i64 9 341 %elt10 = extractelement <16 x i16> %vec16, i64 10 342 %elt11 = extractelement <16 x i16> %vec16, i64 11 343 %elt12 = extractelement <16 x i16> %vec16, i64 12 344 %elt13 = extractelement <16 x i16> %vec16, i64 13 345 %elt14 = extractelement <16 x i16> %vec16, i64 14 346 %elt15 = extractelement <16 x i16> %vec16, i64 15 347 348 %cmp0 = icmp slt i16 %elt1, %elt0 349 %min1 = select i1 %cmp0, i16 %elt1, i16 %elt0 350 %cmp1 = icmp slt i16 %elt2, %min1 351 %min2 = select i1 %cmp1, i16 %elt2, i16 %min1 352 %cmp2 = icmp slt i16 %elt3, %min2 353 %min3 = select i1 %cmp2, i16 %elt3, i16 %min2 354 355 %cmp3 = icmp slt i16 %elt4, %min3 356 %min4 = select i1 %cmp3, i16 %elt4, i16 %min3 357 %cmp4 = icmp slt i16 %elt5, %min4 358 %min5 = select i1 %cmp4, i16 %elt5, i16 %min4 359 360 %cmp5 = icmp slt i16 %elt6, %min5 361 %min6 = select i1 %cmp5, i16 %elt6, i16 %min5 362 %cmp6 = icmp slt i16 %elt7, %min6 363 %min7 = select i1 %cmp6, i16 %elt7, i16 %min6 364 365 %cmp7 = icmp slt i16 %elt8, %min7 366 %min8 = select i1 %cmp7, i16 %elt8, i16 %min7 367 %cmp8 = icmp slt i16 %elt9, %min8 368 %min9 = select i1 %cmp8, i16 %elt9, i16 %min8 369 370 %cmp9 = icmp slt i16 %elt10, %min9 371 %min10 = select i1 %cmp9, i16 %elt10, i16 %min9 372 %cmp10 = icmp slt i16 %elt11, %min10 373 %min11 = select i1 %cmp10, i16 %elt11, i16 %min10 374 375 %cmp11 = icmp slt i16 %elt12, %min11 376 %min12 = select i1 %cmp11, i16 %elt12, i16 %min11 377 %cmp12 = icmp slt i16 %elt13, %min12 378 %min13 = select i1 %cmp12, i16 %elt13, i16 %min12 379 380 %cmp13 = icmp slt i16 %elt14, %min13 381 %min14 = select i1 %cmp13, i16 %elt14, i16 %min13 382 %cmp14 = icmp slt i16 %elt15, %min14 383 %min15 = select i1 %cmp14, i16 %elt15, i16 %min14 384 385 386 ret i16 %min15 387} 388 389define i16 @reduction_umax_v4i16(<4 x i16> %vec4) { 390; GFX9-LABEL: @reduction_umax_v4i16( 391; GFX9-NEXT: entry: 392; GFX9-NEXT: [[TMP0:%.*]] = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> [[VEC4:%.*]]) 393; GFX9-NEXT: ret i16 [[TMP0]] 394; 395; VI-LABEL: @reduction_umax_v4i16( 396; VI-NEXT: entry: 397; VI-NEXT: [[ELT0:%.*]] = extractelement <4 x i16> [[VEC4:%.*]], i64 0 398; VI-NEXT: [[ELT1:%.*]] = extractelement <4 x i16> [[VEC4]], i64 1 399; VI-NEXT: [[ELT2:%.*]] = extractelement <4 x i16> [[VEC4]], i64 2 400; VI-NEXT: [[ELT3:%.*]] = extractelement <4 x i16> [[VEC4]], i64 3 401; VI-NEXT: [[CMP1:%.*]] = icmp ugt i16 [[ELT1]], [[ELT0]] 402; VI-NEXT: [[MAX1:%.*]] = select i1 [[CMP1]], i16 [[ELT1]], i16 [[ELT0]] 403; VI-NEXT: [[CMP2:%.*]] = icmp ugt i16 [[ELT2]], [[MAX1]] 404; VI-NEXT: [[MAX2:%.*]] = select i1 [[CMP2]], i16 [[ELT2]], i16 [[MAX1]] 405; VI-NEXT: [[CMP3:%.*]] = icmp ugt i16 [[ELT3]], [[MAX2]] 406; VI-NEXT: [[MAX3:%.*]] = select i1 [[CMP3]], i16 [[ELT3]], i16 [[MAX2]] 407; VI-NEXT: ret i16 [[MAX3]] 408; 409entry: 410 %elt0 = extractelement <4 x i16> %vec4, i64 0 411 %elt1 = extractelement <4 x i16> %vec4, i64 1 412 %elt2 = extractelement <4 x i16> %vec4, i64 2 413 %elt3 = extractelement <4 x i16> %vec4, i64 3 414 415 %cmp1 = icmp ugt i16 %elt1, %elt0 416 %max1 = select i1 %cmp1, i16 %elt1, i16 %elt0 417 %cmp2 = icmp ugt i16 %elt2, %max1 418 %max2 = select i1 %cmp2, i16 %elt2, i16 %max1 419 %cmp3 = icmp ugt i16 %elt3, %max2 420 %max3 = select i1 %cmp3, i16 %elt3, i16 %max2 421 422 ret i16 %max3 423} 424 425define i16 @reduction_smax_v4i16(<4 x i16> %vec4) { 426; GFX9-LABEL: @reduction_smax_v4i16( 427; GFX9-NEXT: entry: 428; GFX9-NEXT: [[TMP0:%.*]] = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> [[VEC4:%.*]]) 429; GFX9-NEXT: ret i16 [[TMP0]] 430; 431; VI-LABEL: @reduction_smax_v4i16( 432; VI-NEXT: entry: 433; VI-NEXT: [[ELT0:%.*]] = extractelement <4 x i16> [[VEC4:%.*]], i64 0 434; VI-NEXT: [[ELT1:%.*]] = extractelement <4 x i16> [[VEC4]], i64 1 435; VI-NEXT: [[ELT2:%.*]] = extractelement <4 x i16> [[VEC4]], i64 2 436; VI-NEXT: [[ELT3:%.*]] = extractelement <4 x i16> [[VEC4]], i64 3 437; VI-NEXT: [[CMP1:%.*]] = icmp sgt i16 [[ELT1]], [[ELT0]] 438; VI-NEXT: [[MAX1:%.*]] = select i1 [[CMP1]], i16 [[ELT1]], i16 [[ELT0]] 439; VI-NEXT: [[CMP2:%.*]] = icmp sgt i16 [[ELT2]], [[MAX1]] 440; VI-NEXT: [[MAX2:%.*]] = select i1 [[CMP2]], i16 [[ELT2]], i16 [[MAX1]] 441; VI-NEXT: [[CMP3:%.*]] = icmp sgt i16 [[ELT3]], [[MAX2]] 442; VI-NEXT: [[MAX3:%.*]] = select i1 [[CMP3]], i16 [[ELT3]], i16 [[MAX2]] 443; VI-NEXT: ret i16 [[MAX3]] 444; 445entry: 446 %elt0 = extractelement <4 x i16> %vec4, i64 0 447 %elt1 = extractelement <4 x i16> %vec4, i64 1 448 %elt2 = extractelement <4 x i16> %vec4, i64 2 449 %elt3 = extractelement <4 x i16> %vec4, i64 3 450 451 %cmp1 = icmp sgt i16 %elt1, %elt0 452 %max1 = select i1 %cmp1, i16 %elt1, i16 %elt0 453 %cmp2 = icmp sgt i16 %elt2, %max1 454 %max2 = select i1 %cmp2, i16 %elt2, i16 %max1 455 %cmp3 = icmp sgt i16 %elt3, %max2 456 %max3 = select i1 %cmp3, i16 %elt3, i16 %max2 457 458 ret i16 %max3 459} 460 461; FIXME: Use fmaxnum intrinsics to match what InstCombine creates for fcmp+select 462; with fastmath on the select. 463define half @reduction_fmax_v4half(<4 x half> %vec4) { 464; GCN-LABEL: @reduction_fmax_v4half( 465; GCN-NEXT: entry: 466; GCN-NEXT: [[ELT0:%.*]] = extractelement <4 x half> [[VEC4:%.*]], i64 0 467; GCN-NEXT: [[ELT1:%.*]] = extractelement <4 x half> [[VEC4]], i64 1 468; GCN-NEXT: [[ELT2:%.*]] = extractelement <4 x half> [[VEC4]], i64 2 469; GCN-NEXT: [[ELT3:%.*]] = extractelement <4 x half> [[VEC4]], i64 3 470; GCN-NEXT: [[CMP1:%.*]] = fcmp fast ogt half [[ELT1]], [[ELT0]] 471; GCN-NEXT: [[MAX1:%.*]] = select i1 [[CMP1]], half [[ELT1]], half [[ELT0]] 472; GCN-NEXT: [[CMP2:%.*]] = fcmp fast ogt half [[ELT2]], [[MAX1]] 473; GCN-NEXT: [[MAX2:%.*]] = select i1 [[CMP2]], half [[ELT2]], half [[MAX1]] 474; GCN-NEXT: [[CMP3:%.*]] = fcmp fast ogt half [[ELT3]], [[MAX2]] 475; GCN-NEXT: [[MAX3:%.*]] = select i1 [[CMP3]], half [[ELT3]], half [[MAX2]] 476; GCN-NEXT: ret half [[MAX3]] 477; 478entry: 479 %elt0 = extractelement <4 x half> %vec4, i64 0 480 %elt1 = extractelement <4 x half> %vec4, i64 1 481 %elt2 = extractelement <4 x half> %vec4, i64 2 482 %elt3 = extractelement <4 x half> %vec4, i64 3 483 484 %cmp1 = fcmp fast ogt half %elt1, %elt0 485 %max1 = select i1 %cmp1, half %elt1, half %elt0 486 %cmp2 = fcmp fast ogt half %elt2, %max1 487 %max2 = select i1 %cmp2, half %elt2, half %max1 488 %cmp3 = fcmp fast ogt half %elt3, %max2 489 %max3 = select i1 %cmp3, half %elt3, half %max2 490 491 ret half %max3 492} 493 494; FIXME: Use fmaxnum intrinsics to match what InstCombine creates for fcmp+select 495; with fastmath on the select. 496define half @reduction_fmin_v4half(<4 x half> %vec4) { 497; GCN-LABEL: @reduction_fmin_v4half( 498; GCN-NEXT: entry: 499; GCN-NEXT: [[ELT0:%.*]] = extractelement <4 x half> [[VEC4:%.*]], i64 0 500; GCN-NEXT: [[ELT1:%.*]] = extractelement <4 x half> [[VEC4]], i64 1 501; GCN-NEXT: [[ELT2:%.*]] = extractelement <4 x half> [[VEC4]], i64 2 502; GCN-NEXT: [[ELT3:%.*]] = extractelement <4 x half> [[VEC4]], i64 3 503; GCN-NEXT: [[CMP1:%.*]] = fcmp fast olt half [[ELT1]], [[ELT0]] 504; GCN-NEXT: [[MIN1:%.*]] = select i1 [[CMP1]], half [[ELT1]], half [[ELT0]] 505; GCN-NEXT: [[CMP2:%.*]] = fcmp fast olt half [[ELT2]], [[MIN1]] 506; GCN-NEXT: [[MIN2:%.*]] = select i1 [[CMP2]], half [[ELT2]], half [[MIN1]] 507; GCN-NEXT: [[CMP3:%.*]] = fcmp fast olt half [[ELT3]], [[MIN2]] 508; GCN-NEXT: [[MIN3:%.*]] = select i1 [[CMP3]], half [[ELT3]], half [[MIN2]] 509; GCN-NEXT: ret half [[MIN3]] 510; 511entry: 512 %elt0 = extractelement <4 x half> %vec4, i64 0 513 %elt1 = extractelement <4 x half> %vec4, i64 1 514 %elt2 = extractelement <4 x half> %vec4, i64 2 515 %elt3 = extractelement <4 x half> %vec4, i64 3 516 517 %cmp1 = fcmp fast olt half %elt1, %elt0 518 %min1 = select i1 %cmp1, half %elt1, half %elt0 519 %cmp2 = fcmp fast olt half %elt2, %min1 520 %min2 = select i1 %cmp2, half %elt2, half %min1 521 %cmp3 = fcmp fast olt half %elt3, %min2 522 %min3 = select i1 %cmp3, half %elt3, half %min2 523 524 ret half %min3 525} 526 527; Tests to make sure reduction does not kick in. vega does not support packed math for types larger than 16 bits. 528define float @reduction_v4float(<4 x float> %a) { 529; GCN-LABEL: @reduction_v4float( 530; GCN-NEXT: entry: 531; GCN-NEXT: [[ELT0:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0 532; GCN-NEXT: [[ELT1:%.*]] = extractelement <4 x float> [[A]], i64 1 533; GCN-NEXT: [[ELT2:%.*]] = extractelement <4 x float> [[A]], i64 2 534; GCN-NEXT: [[ELT3:%.*]] = extractelement <4 x float> [[A]], i64 3 535; GCN-NEXT: [[ADD1:%.*]] = fadd fast float [[ELT1]], [[ELT0]] 536; GCN-NEXT: [[ADD2:%.*]] = fadd fast float [[ELT2]], [[ADD1]] 537; GCN-NEXT: [[ADD3:%.*]] = fadd fast float [[ELT3]], [[ADD2]] 538; GCN-NEXT: ret float [[ADD3]] 539; 540entry: 541 %elt0 = extractelement <4 x float> %a, i64 0 542 %elt1 = extractelement <4 x float> %a, i64 1 543 %elt2 = extractelement <4 x float> %a, i64 2 544 %elt3 = extractelement <4 x float> %a, i64 3 545 546 %add1 = fadd fast float %elt1, %elt0 547 %add2 = fadd fast float %elt2, %add1 548 %add3 = fadd fast float %elt3, %add2 549 550 ret float %add3 551} 552