110625958SPhilip Reames; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 220864d2cSLuke Lau; RUN: opt < %s -passes=slp-vectorizer -mtriple=riscv64 \ 320864d2cSLuke Lau; RUN: -mattr=+v,+zvfhmin,+zvfbfmin -riscv-v-slp-max-vf=0 -S \ 420864d2cSLuke Lau; RUN: | FileCheck %s --check-prefixes=CHECK,ZVFHMIN 520864d2cSLuke Lau; RUN: opt < %s -passes=slp-vectorizer -mtriple=riscv64 \ 620864d2cSLuke Lau; RUN: -mattr=+v,+zvfh,+zvfbfmin -riscv-v-slp-max-vf=0 -S \ 720864d2cSLuke Lau; RUN: | FileCheck %s --check-prefixes=CHECK,ZVFH,ZVL128 820864d2cSLuke Lau; RUN: opt < %s -passes=slp-vectorizer -mtriple=riscv64 \ 920864d2cSLuke Lau; RUN: -mattr=+v,+zvl256b,+zvfh,+zvfbfmin -riscv-v-slp-max-vf=0 -S \ 1020864d2cSLuke Lau; RUN: | FileCheck %s --check-prefixes=CHECK,ZVFH,ZVL256 1120864d2cSLuke Lau; RUN: opt < %s -passes=slp-vectorizer -mtriple=riscv64 \ 1220864d2cSLuke Lau; RUN: -mattr=+v,+zvl512b,+zvfh,+zvfbfmin -riscv-v-slp-max-vf=0 -S \ 1320864d2cSLuke Lau; RUN: | FileCheck %s --check-prefixes=CHECK,ZVFH,ZVL512 1410625958SPhilip Reames 1510625958SPhilip Reamestarget datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" 1610625958SPhilip Reamestarget triple = "riscv64" 1710625958SPhilip Reames 1810625958SPhilip Reames; First batch of tests are simple reductions of various widths 1910625958SPhilip Reames 2010625958SPhilip Reamesdefine i64 @red_ld_2xi64(ptr %ptr) { 2110625958SPhilip Reames; CHECK-LABEL: @red_ld_2xi64( 2210625958SPhilip Reames; CHECK-NEXT: entry: 2310625958SPhilip Reames; CHECK-NEXT: [[LD0:%.*]] = load i64, ptr [[PTR:%.*]], align 8 2410625958SPhilip Reames; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 1 2510625958SPhilip Reames; CHECK-NEXT: [[LD1:%.*]] = load i64, ptr [[GEP]], align 8 2610625958SPhilip Reames; CHECK-NEXT: [[ADD_1:%.*]] = add nuw nsw i64 [[LD0]], [[LD1]] 2710625958SPhilip Reames; CHECK-NEXT: ret i64 [[ADD_1]] 2810625958SPhilip Reames; 2910625958SPhilip Reamesentry: 3010625958SPhilip Reames %ld0 = load i64, ptr %ptr 3110625958SPhilip Reames %gep = getelementptr inbounds i64, ptr %ptr, i64 1 3210625958SPhilip Reames %ld1 = load i64, ptr %gep 3310625958SPhilip Reames %add.1 = add nuw nsw i64 %ld0, %ld1 3410625958SPhilip Reames ret i64 %add.1 3510625958SPhilip Reames} 3610625958SPhilip Reames 3710625958SPhilip Reamesdefine i64 @red_ld_4xi64(ptr %ptr) { 3810625958SPhilip Reames; CHECK-LABEL: @red_ld_4xi64( 3910625958SPhilip Reames; CHECK-NEXT: entry: 4010625958SPhilip Reames; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i64>, ptr [[PTR:%.*]], align 8 4110625958SPhilip Reames; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP0]]) 4210625958SPhilip Reames; CHECK-NEXT: ret i64 [[TMP1]] 4310625958SPhilip Reames; 4410625958SPhilip Reamesentry: 4510625958SPhilip Reames %ld0 = load i64, ptr %ptr 4610625958SPhilip Reames %gep = getelementptr inbounds i64, ptr %ptr, i64 1 4710625958SPhilip Reames %ld1 = load i64, ptr %gep 4810625958SPhilip Reames %add.1 = add nuw nsw i64 %ld0, %ld1 4910625958SPhilip Reames %gep.1 = getelementptr inbounds i64, ptr %ptr, i64 2 5010625958SPhilip Reames %ld2 = load i64, ptr %gep.1 5110625958SPhilip Reames %add.2 = add nuw nsw i64 %add.1, %ld2 5210625958SPhilip Reames %gep.2 = getelementptr inbounds i64, ptr %ptr, i64 3 5310625958SPhilip Reames %ld3 = load i64, ptr %gep.2 5410625958SPhilip Reames %add.3 = add nuw nsw i64 %add.2, %ld3 5510625958SPhilip Reames ret i64 %add.3 5610625958SPhilip Reames} 5710625958SPhilip Reames 5810625958SPhilip Reamesdefine i64 @red_ld_8xi64(ptr %ptr) { 5910625958SPhilip Reames; CHECK-LABEL: @red_ld_8xi64( 6010625958SPhilip Reames; CHECK-NEXT: entry: 6110625958SPhilip Reames; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i64>, ptr [[PTR:%.*]], align 8 6210625958SPhilip Reames; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP0]]) 6310625958SPhilip Reames; CHECK-NEXT: ret i64 [[TMP1]] 6410625958SPhilip Reames; 6510625958SPhilip Reamesentry: 6610625958SPhilip Reames %ld0 = load i64, ptr %ptr 6710625958SPhilip Reames %gep = getelementptr inbounds i64, ptr %ptr, i64 1 6810625958SPhilip Reames %ld1 = load i64, ptr %gep 6910625958SPhilip Reames %add.1 = add nuw nsw i64 %ld0, %ld1 7010625958SPhilip Reames %gep.1 = getelementptr inbounds i64, ptr %ptr, i64 2 7110625958SPhilip Reames %ld2 = load i64, ptr %gep.1 7210625958SPhilip Reames %add.2 = add nuw nsw i64 %add.1, %ld2 7310625958SPhilip Reames %gep.2 = getelementptr inbounds i64, ptr %ptr, i64 3 7410625958SPhilip Reames %ld3 = load i64, ptr %gep.2 7510625958SPhilip Reames %add.3 = add nuw nsw i64 %add.2, %ld3 7610625958SPhilip Reames %gep.3 = getelementptr inbounds i64, ptr %ptr, i64 4 7710625958SPhilip Reames %ld4 = load i64, ptr %gep.3 7810625958SPhilip Reames %add.4 = add nuw nsw i64 %add.3, %ld4 7910625958SPhilip Reames %gep.4 = getelementptr inbounds i64, ptr %ptr, i64 5 8010625958SPhilip Reames %ld5 = load i64, ptr %gep.4 8110625958SPhilip Reames %add.5 = add nuw nsw i64 %add.4, %ld5 8210625958SPhilip Reames %gep.5 = getelementptr inbounds i64, ptr %ptr, i64 6 8310625958SPhilip Reames %ld6 = load i64, ptr %gep.5 8410625958SPhilip Reames %add.6 = add nuw nsw i64 %add.5, %ld6 8510625958SPhilip Reames %gep.6 = getelementptr inbounds i64, ptr %ptr, i64 7 8610625958SPhilip Reames %ld7 = load i64, ptr %gep.6 8710625958SPhilip Reames %add.7 = add nuw nsw i64 %add.6, %ld7 8810625958SPhilip Reames ret i64 %add.7 8910625958SPhilip Reames} 9010625958SPhilip Reames 9110625958SPhilip Reamesdefine i64 @red_ld_16xi64(ptr %ptr) { 9210625958SPhilip Reames; CHECK-LABEL: @red_ld_16xi64( 9310625958SPhilip Reames; CHECK-NEXT: entry: 9410625958SPhilip Reames; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i64>, ptr [[PTR:%.*]], align 8 9510625958SPhilip Reames; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> [[TMP0]]) 9610625958SPhilip Reames; CHECK-NEXT: ret i64 [[TMP1]] 9710625958SPhilip Reames; 9810625958SPhilip Reamesentry: 9910625958SPhilip Reames %ld0 = load i64, ptr %ptr 10010625958SPhilip Reames %gep = getelementptr inbounds i64, ptr %ptr, i64 1 10110625958SPhilip Reames %ld1 = load i64, ptr %gep 10210625958SPhilip Reames %add.1 = add nuw nsw i64 %ld0, %ld1 10310625958SPhilip Reames %gep.1 = getelementptr inbounds i64, ptr %ptr, i64 2 10410625958SPhilip Reames %ld2 = load i64, ptr %gep.1 10510625958SPhilip Reames %add.2 = add nuw nsw i64 %add.1, %ld2 10610625958SPhilip Reames %gep.2 = getelementptr inbounds i64, ptr %ptr, i64 3 10710625958SPhilip Reames %ld3 = load i64, ptr %gep.2 10810625958SPhilip Reames %add.3 = add nuw nsw i64 %add.2, %ld3 10910625958SPhilip Reames %gep.3 = getelementptr inbounds i64, ptr %ptr, i64 4 11010625958SPhilip Reames %ld4 = load i64, ptr %gep.3 11110625958SPhilip Reames %add.4 = add nuw nsw i64 %add.3, %ld4 11210625958SPhilip Reames %gep.4 = getelementptr inbounds i64, ptr %ptr, i64 5 11310625958SPhilip Reames %ld5 = load i64, ptr %gep.4 11410625958SPhilip Reames %add.5 = add nuw nsw i64 %add.4, %ld5 11510625958SPhilip Reames %gep.5 = getelementptr inbounds i64, ptr %ptr, i64 6 11610625958SPhilip Reames %ld6 = load i64, ptr %gep.5 11710625958SPhilip Reames %add.6 = add nuw nsw i64 %add.5, %ld6 11810625958SPhilip Reames %gep.6 = getelementptr inbounds i64, ptr %ptr, i64 7 11910625958SPhilip Reames %ld7 = load i64, ptr %gep.6 12010625958SPhilip Reames %add.7 = add nuw nsw i64 %add.6, %ld7 12110625958SPhilip Reames %gep.7 = getelementptr inbounds i64, ptr %ptr, i64 8 12210625958SPhilip Reames %ld8 = load i64, ptr %gep.7 12310625958SPhilip Reames %add.8 = add nuw nsw i64 %add.7, %ld8 12410625958SPhilip Reames %gep.8 = getelementptr inbounds i64, ptr %ptr, i64 9 12510625958SPhilip Reames %ld9 = load i64, ptr %gep.8 12610625958SPhilip Reames %add.9 = add nuw nsw i64 %add.8, %ld9 12710625958SPhilip Reames %gep.9 = getelementptr inbounds i64, ptr %ptr, i64 10 12810625958SPhilip Reames %ld10 = load i64, ptr %gep.9 12910625958SPhilip Reames %add.10 = add nuw nsw i64 %add.9, %ld10 13010625958SPhilip Reames %gep.10 = getelementptr inbounds i64, ptr %ptr, i64 11 13110625958SPhilip Reames %ld11 = load i64, ptr %gep.10 13210625958SPhilip Reames %add.11 = add nuw nsw i64 %add.10, %ld11 13310625958SPhilip Reames %gep.11 = getelementptr inbounds i64, ptr %ptr, i64 12 13410625958SPhilip Reames %ld12 = load i64, ptr %gep.11 13510625958SPhilip Reames %add.12 = add nuw nsw i64 %add.11, %ld12 13610625958SPhilip Reames %gep.12 = getelementptr inbounds i64, ptr %ptr, i64 13 13710625958SPhilip Reames %ld13 = load i64, ptr %gep.12 13810625958SPhilip Reames %add.13 = add nuw nsw i64 %add.12, %ld13 13910625958SPhilip Reames %gep.13 = getelementptr inbounds i64, ptr %ptr, i64 14 14010625958SPhilip Reames %ld14 = load i64, ptr %gep.13 14110625958SPhilip Reames %add.14 = add nuw nsw i64 %add.13, %ld14 14210625958SPhilip Reames %gep.14 = getelementptr inbounds i64, ptr %ptr, i64 15 14310625958SPhilip Reames %ld15 = load i64, ptr %gep.14 14410625958SPhilip Reames %add.15 = add nuw nsw i64 %add.14, %ld15 14510625958SPhilip Reames ret i64 %add.15 14610625958SPhilip Reames} 14710625958SPhilip Reames 148deb3ecf0SAlexey Bataev 149deb3ecf0SAlexey Bataevdefine i64 @red_strided_ld_16xi64(ptr %ptr) { 150deb3ecf0SAlexey Bataev; CHECK-LABEL: @red_strided_ld_16xi64( 151deb3ecf0SAlexey Bataev; CHECK-NEXT: entry: 15238fffa63SPaul Walker; CHECK-NEXT: [[TMP0:%.*]] = call <16 x i64> @llvm.experimental.vp.strided.load.v16i64.p0.i64(ptr align 8 [[PTR:%.*]], i64 16, <16 x i1> splat (i1 true), i32 16) 153799fd3d8SAlexey Bataev; CHECK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> [[TMP0]]) 154799fd3d8SAlexey Bataev; CHECK-NEXT: ret i64 [[TMP1]] 155deb3ecf0SAlexey Bataev; 156deb3ecf0SAlexey Bataeventry: 157deb3ecf0SAlexey Bataev %ld0 = load i64, ptr %ptr 158deb3ecf0SAlexey Bataev %gep = getelementptr inbounds i64, ptr %ptr, i64 2 159deb3ecf0SAlexey Bataev %ld1 = load i64, ptr %gep 160deb3ecf0SAlexey Bataev %add.1 = add nuw nsw i64 %ld0, %ld1 161deb3ecf0SAlexey Bataev %gep.1 = getelementptr inbounds i64, ptr %ptr, i64 4 162deb3ecf0SAlexey Bataev %ld2 = load i64, ptr %gep.1 163deb3ecf0SAlexey Bataev %add.2 = add nuw nsw i64 %add.1, %ld2 164deb3ecf0SAlexey Bataev %gep.2 = getelementptr inbounds i64, ptr %ptr, i64 6 165deb3ecf0SAlexey Bataev %ld3 = load i64, ptr %gep.2 166deb3ecf0SAlexey Bataev %add.3 = add nuw nsw i64 %add.2, %ld3 167deb3ecf0SAlexey Bataev %gep.3 = getelementptr inbounds i64, ptr %ptr, i64 8 168deb3ecf0SAlexey Bataev %ld4 = load i64, ptr %gep.3 169deb3ecf0SAlexey Bataev %add.4 = add nuw nsw i64 %add.3, %ld4 170deb3ecf0SAlexey Bataev %gep.4 = getelementptr inbounds i64, ptr %ptr, i64 10 171deb3ecf0SAlexey Bataev %ld5 = load i64, ptr %gep.4 172deb3ecf0SAlexey Bataev %add.5 = add nuw nsw i64 %add.4, %ld5 173deb3ecf0SAlexey Bataev %gep.5 = getelementptr inbounds i64, ptr %ptr, i64 12 174deb3ecf0SAlexey Bataev %ld6 = load i64, ptr %gep.5 175deb3ecf0SAlexey Bataev %add.6 = add nuw nsw i64 %add.5, %ld6 176deb3ecf0SAlexey Bataev %gep.6 = getelementptr inbounds i64, ptr %ptr, i64 14 177deb3ecf0SAlexey Bataev %ld7 = load i64, ptr %gep.6 178deb3ecf0SAlexey Bataev %add.7 = add nuw nsw i64 %add.6, %ld7 179deb3ecf0SAlexey Bataev %gep.7 = getelementptr inbounds i64, ptr %ptr, i64 16 180deb3ecf0SAlexey Bataev %ld8 = load i64, ptr %gep.7 181deb3ecf0SAlexey Bataev %add.8 = add nuw nsw i64 %add.7, %ld8 182deb3ecf0SAlexey Bataev %gep.8 = getelementptr inbounds i64, ptr %ptr, i64 18 183deb3ecf0SAlexey Bataev %ld9 = load i64, ptr %gep.8 184deb3ecf0SAlexey Bataev %add.9 = add nuw nsw i64 %add.8, %ld9 185deb3ecf0SAlexey Bataev %gep.9 = getelementptr inbounds i64, ptr %ptr, i64 20 186deb3ecf0SAlexey Bataev %ld10 = load i64, ptr %gep.9 187deb3ecf0SAlexey Bataev %add.10 = add nuw nsw i64 %add.9, %ld10 188deb3ecf0SAlexey Bataev %gep.10 = getelementptr inbounds i64, ptr %ptr, i64 22 189deb3ecf0SAlexey Bataev %ld11 = load i64, ptr %gep.10 190deb3ecf0SAlexey Bataev %add.11 = add nuw nsw i64 %add.10, %ld11 191deb3ecf0SAlexey Bataev %gep.11 = getelementptr inbounds i64, ptr %ptr, i64 24 192deb3ecf0SAlexey Bataev %ld12 = load i64, ptr %gep.11 193deb3ecf0SAlexey Bataev %add.12 = add nuw nsw i64 %add.11, %ld12 194deb3ecf0SAlexey Bataev %gep.12 = getelementptr inbounds i64, ptr %ptr, i64 26 195deb3ecf0SAlexey Bataev %ld13 = load i64, ptr %gep.12 196deb3ecf0SAlexey Bataev %add.13 = add nuw nsw i64 %add.12, %ld13 197deb3ecf0SAlexey Bataev %gep.13 = getelementptr inbounds i64, ptr %ptr, i64 28 198deb3ecf0SAlexey Bataev %ld14 = load i64, ptr %gep.13 199deb3ecf0SAlexey Bataev %add.14 = add nuw nsw i64 %add.13, %ld14 200deb3ecf0SAlexey Bataev %gep.14 = getelementptr inbounds i64, ptr %ptr, i64 30 201deb3ecf0SAlexey Bataev %ld15 = load i64, ptr %gep.14 202deb3ecf0SAlexey Bataev %add.15 = add nuw nsw i64 %add.14, %ld15 203deb3ecf0SAlexey Bataev ret i64 %add.15 204deb3ecf0SAlexey Bataev} 205deb3ecf0SAlexey Bataev 20610625958SPhilip Reames; Next batch test differen reductions kinds 20710625958SPhilip Reames 20810625958SPhilip Reames%struct.buf = type { [8 x i8] } 20910625958SPhilip Reames 21010625958SPhilip Reamesdefine i8 @reduce_and(ptr %a, ptr %b) { 21110625958SPhilip Reames; CHECK-LABEL: @reduce_and( 21210625958SPhilip Reames; CHECK-NEXT: entry: 21310625958SPhilip Reames; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_BUF:%.*]], ptr [[A:%.*]], i64 0, i32 0, i64 0 21410625958SPhilip Reames; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_BUF]], ptr [[B:%.*]], i64 0, i32 0, i64 0 21510625958SPhilip Reames; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 1 21610625958SPhilip Reames; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[ARRAYIDX3]], align 1 21710625958SPhilip Reames; CHECK-NEXT: [[TMP2:%.*]] = xor <8 x i8> [[TMP1]], [[TMP0]] 21810625958SPhilip Reames; CHECK-NEXT: [[TMP3:%.*]] = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> [[TMP2]]) 21910625958SPhilip Reames; CHECK-NEXT: [[OP_RDX:%.*]] = and i8 [[TMP3]], 1 22010625958SPhilip Reames; CHECK-NEXT: ret i8 [[OP_RDX]] 22110625958SPhilip Reames; 22210625958SPhilip Reamesentry: 22310625958SPhilip Reames %arrayidx = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 0 2242d69827cSNikita Popov %0 = load i8, ptr %arrayidx, align 1 22510625958SPhilip Reames %arrayidx3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 0 2262d69827cSNikita Popov %1 = load i8, ptr %arrayidx3, align 1 22710625958SPhilip Reames %xor12 = xor i8 %1, %0 22810625958SPhilip Reames %and13 = and i8 %xor12, 1 22910625958SPhilip Reames %arrayidx.1 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 1 2302d69827cSNikita Popov %2 = load i8, ptr %arrayidx.1, align 1 23110625958SPhilip Reames %arrayidx3.1 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 1 2322d69827cSNikita Popov %3 = load i8, ptr %arrayidx3.1, align 1 23310625958SPhilip Reames %xor12.1 = xor i8 %3, %2 23410625958SPhilip Reames %and13.1 = and i8 %xor12.1, %and13 23510625958SPhilip Reames %arrayidx.2 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 2 2362d69827cSNikita Popov %4 = load i8, ptr %arrayidx.2, align 1 23710625958SPhilip Reames %arrayidx3.2 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 2 2382d69827cSNikita Popov %5 = load i8, ptr %arrayidx3.2, align 1 23910625958SPhilip Reames %xor12.2 = xor i8 %5, %4 24010625958SPhilip Reames %and13.2 = and i8 %xor12.2, %and13.1 24110625958SPhilip Reames %arrayidx.3 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 3 2422d69827cSNikita Popov %6 = load i8, ptr %arrayidx.3, align 1 24310625958SPhilip Reames %arrayidx3.3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 3 2442d69827cSNikita Popov %7 = load i8, ptr %arrayidx3.3, align 1 24510625958SPhilip Reames %xor12.3 = xor i8 %7, %6 24610625958SPhilip Reames %and13.3 = and i8 %xor12.3, %and13.2 24710625958SPhilip Reames %arrayidx.4 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 4 2482d69827cSNikita Popov %8 = load i8, ptr %arrayidx.4, align 1 24910625958SPhilip Reames %arrayidx3.4 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 4 2502d69827cSNikita Popov %9 = load i8, ptr %arrayidx3.4, align 1 25110625958SPhilip Reames %xor12.4 = xor i8 %9, %8 25210625958SPhilip Reames %and13.4 = and i8 %xor12.4, %and13.3 25310625958SPhilip Reames %arrayidx.5 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 5 2542d69827cSNikita Popov %10 = load i8, ptr %arrayidx.5, align 1 25510625958SPhilip Reames %arrayidx3.5 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 5 2562d69827cSNikita Popov %11 = load i8, ptr %arrayidx3.5, align 1 25710625958SPhilip Reames %xor12.5 = xor i8 %11, %10 25810625958SPhilip Reames %and13.5 = and i8 %xor12.5, %and13.4 25910625958SPhilip Reames %arrayidx.6 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 6 2602d69827cSNikita Popov %12 = load i8, ptr %arrayidx.6, align 1 26110625958SPhilip Reames %arrayidx3.6 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 6 2622d69827cSNikita Popov %13 = load i8, ptr %arrayidx3.6, align 1 26310625958SPhilip Reames %xor12.6 = xor i8 %13, %12 26410625958SPhilip Reames %and13.6 = and i8 %xor12.6, %and13.5 26510625958SPhilip Reames %arrayidx.7 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 7 2662d69827cSNikita Popov %14 = load i8, ptr %arrayidx.7, align 1 26710625958SPhilip Reames %arrayidx3.7 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 7 2682d69827cSNikita Popov %15 = load i8, ptr %arrayidx3.7, align 1 26910625958SPhilip Reames %xor12.7 = xor i8 %15, %14 27010625958SPhilip Reames %and13.7 = and i8 %xor12.7, %and13.6 27110625958SPhilip Reames ret i8 %and13.7 27210625958SPhilip Reames} 27310625958SPhilip Reames 2749a82bda9Swangpcdefine i8 @reduce_or_1(ptr %a, ptr %b) { 2759a82bda9Swangpc; CHECK-LABEL: @reduce_or_1( 27610625958SPhilip Reames; CHECK-NEXT: entry: 27710625958SPhilip Reames; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_BUF:%.*]], ptr [[A:%.*]], i64 0, i32 0, i64 0 27810625958SPhilip Reames; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_BUF]], ptr [[B:%.*]], i64 0, i32 0, i64 0 27910625958SPhilip Reames; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 1 28010625958SPhilip Reames; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[ARRAYIDX3]], align 1 28110625958SPhilip Reames; CHECK-NEXT: [[TMP2:%.*]] = xor <8 x i8> [[TMP1]], [[TMP0]] 28210625958SPhilip Reames; CHECK-NEXT: [[TMP3:%.*]] = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> [[TMP2]]) 28310625958SPhilip Reames; CHECK-NEXT: ret i8 [[TMP3]] 28410625958SPhilip Reames; 28510625958SPhilip Reames 28610625958SPhilip Reamesentry: 28710625958SPhilip Reames %arrayidx = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 0 2882d69827cSNikita Popov %0 = load i8, ptr %arrayidx, align 1 28910625958SPhilip Reames %arrayidx3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 0 2902d69827cSNikita Popov %1 = load i8, ptr %arrayidx3, align 1 29110625958SPhilip Reames %xor12 = xor i8 %1, %0 29210625958SPhilip Reames %arrayidx.1 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 1 2932d69827cSNikita Popov %2 = load i8, ptr %arrayidx.1, align 1 29410625958SPhilip Reames %arrayidx3.1 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 1 2952d69827cSNikita Popov %3 = load i8, ptr %arrayidx3.1, align 1 29610625958SPhilip Reames %xor12.1 = xor i8 %3, %2 29710625958SPhilip Reames %or13.1 = or i8 %xor12.1, %xor12 29810625958SPhilip Reames %arrayidx.2 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 2 2992d69827cSNikita Popov %4 = load i8, ptr %arrayidx.2, align 1 30010625958SPhilip Reames %arrayidx3.2 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 2 3012d69827cSNikita Popov %5 = load i8, ptr %arrayidx3.2, align 1 30210625958SPhilip Reames %xor12.2 = xor i8 %5, %4 30310625958SPhilip Reames %or13.2 = or i8 %xor12.2, %or13.1 30410625958SPhilip Reames %arrayidx.3 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 3 3052d69827cSNikita Popov %6 = load i8, ptr %arrayidx.3, align 1 30610625958SPhilip Reames %arrayidx3.3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 3 3072d69827cSNikita Popov %7 = load i8, ptr %arrayidx3.3, align 1 30810625958SPhilip Reames %xor12.3 = xor i8 %7, %6 30910625958SPhilip Reames %or13.3 = or i8 %xor12.3, %or13.2 31010625958SPhilip Reames %arrayidx.4 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 4 3112d69827cSNikita Popov %8 = load i8, ptr %arrayidx.4, align 1 31210625958SPhilip Reames %arrayidx3.4 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 4 3132d69827cSNikita Popov %9 = load i8, ptr %arrayidx3.4, align 1 31410625958SPhilip Reames %xor12.4 = xor i8 %9, %8 31510625958SPhilip Reames %or13.4 = or i8 %xor12.4, %or13.3 31610625958SPhilip Reames %arrayidx.5 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 5 3172d69827cSNikita Popov %10 = load i8, ptr %arrayidx.5, align 1 31810625958SPhilip Reames %arrayidx3.5 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 5 3192d69827cSNikita Popov %11 = load i8, ptr %arrayidx3.5, align 1 32010625958SPhilip Reames %xor12.5 = xor i8 %11, %10 32110625958SPhilip Reames %or13.5 = or i8 %xor12.5, %or13.4 32210625958SPhilip Reames %arrayidx.6 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 6 3232d69827cSNikita Popov %12 = load i8, ptr %arrayidx.6, align 1 32410625958SPhilip Reames %arrayidx3.6 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 6 3252d69827cSNikita Popov %13 = load i8, ptr %arrayidx3.6, align 1 32610625958SPhilip Reames %xor12.6 = xor i8 %13, %12 32710625958SPhilip Reames %or13.6 = or i8 %xor12.6, %or13.5 32810625958SPhilip Reames %arrayidx.7 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 7 3292d69827cSNikita Popov %14 = load i8, ptr %arrayidx.7, align 1 33010625958SPhilip Reames %arrayidx3.7 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 7 3312d69827cSNikita Popov %15 = load i8, ptr %arrayidx3.7, align 1 33210625958SPhilip Reames %xor12.7 = xor i8 %15, %14 33310625958SPhilip Reames %or13.7 = or i8 %xor12.7, %or13.6 33410625958SPhilip Reames ret i8 %or13.7 33510625958SPhilip Reames} 33610625958SPhilip Reames 3379a82bda9Swangpcdefine void @reduce_or_2() { 33820864d2cSLuke Lau; ZVFHMIN-LABEL: @reduce_or_2( 33920864d2cSLuke Lau; ZVFHMIN-NEXT: [[TMP1:%.*]] = shl i64 0, 0 34020864d2cSLuke Lau; ZVFHMIN-NEXT: [[TMP2:%.*]] = insertelement <16 x i64> <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 poison>, i64 [[TMP1]], i32 15 34120864d2cSLuke Lau; ZVFHMIN-NEXT: [[TMP3:%.*]] = icmp ult <16 x i64> [[TMP2]], zeroinitializer 34220864d2cSLuke Lau; ZVFHMIN-NEXT: [[TMP4:%.*]] = insertelement <16 x i64> <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 poison, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>, i64 [[TMP1]], i32 6 34320864d2cSLuke Lau; ZVFHMIN-NEXT: [[TMP5:%.*]] = icmp ult <16 x i64> [[TMP4]], zeroinitializer 34420864d2cSLuke Lau; ZVFHMIN-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP3]]) 34520864d2cSLuke Lau; ZVFHMIN-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP5]]) 34620864d2cSLuke Lau; ZVFHMIN-NEXT: [[OP_RDX:%.*]] = or i1 [[TMP6]], [[TMP7]] 34720864d2cSLuke Lau; ZVFHMIN-NEXT: br i1 [[OP_RDX]], label [[TMP9:%.*]], label [[TMP8:%.*]] 34820864d2cSLuke Lau; ZVFHMIN: 8: 34920864d2cSLuke Lau; ZVFHMIN-NEXT: ret void 35020864d2cSLuke Lau; ZVFHMIN: 9: 35120864d2cSLuke Lau; ZVFHMIN-NEXT: ret void 35220864d2cSLuke Lau; 353514b38cdSPhilip Reames; ZVL128-LABEL: @reduce_or_2( 354514b38cdSPhilip Reames; ZVL128-NEXT: [[TMP1:%.*]] = shl i64 0, 0 355514b38cdSPhilip Reames; ZVL128-NEXT: [[TMP2:%.*]] = insertelement <16 x i64> <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 poison>, i64 [[TMP1]], i32 15 356514b38cdSPhilip Reames; ZVL128-NEXT: [[TMP3:%.*]] = icmp ult <16 x i64> [[TMP2]], zeroinitializer 357514b38cdSPhilip Reames; ZVL128-NEXT: [[TMP4:%.*]] = insertelement <16 x i64> <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 poison, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>, i64 [[TMP1]], i32 6 358514b38cdSPhilip Reames; ZVL128-NEXT: [[TMP5:%.*]] = icmp ult <16 x i64> [[TMP4]], zeroinitializer 359514b38cdSPhilip Reames; ZVL128-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP3]]) 360514b38cdSPhilip Reames; ZVL128-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP5]]) 361514b38cdSPhilip Reames; ZVL128-NEXT: [[OP_RDX:%.*]] = or i1 [[TMP6]], [[TMP7]] 362514b38cdSPhilip Reames; ZVL128-NEXT: br i1 [[OP_RDX]], label [[TMP9:%.*]], label [[TMP8:%.*]] 363514b38cdSPhilip Reames; ZVL128: 8: 364514b38cdSPhilip Reames; ZVL128-NEXT: ret void 365514b38cdSPhilip Reames; ZVL128: 9: 366514b38cdSPhilip Reames; ZVL128-NEXT: ret void 367514b38cdSPhilip Reames; 368514b38cdSPhilip Reames; ZVL256-LABEL: @reduce_or_2( 369514b38cdSPhilip Reames; ZVL256-NEXT: [[TMP1:%.*]] = shl i64 0, 0 370514b38cdSPhilip Reames; ZVL256-NEXT: [[TMP2:%.*]] = insertelement <16 x i64> <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 poison>, i64 [[TMP1]], i32 15 371514b38cdSPhilip Reames; ZVL256-NEXT: [[TMP3:%.*]] = icmp ult <16 x i64> [[TMP2]], zeroinitializer 372514b38cdSPhilip Reames; ZVL256-NEXT: [[TMP4:%.*]] = insertelement <16 x i64> <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 poison, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>, i64 [[TMP1]], i32 6 373514b38cdSPhilip Reames; ZVL256-NEXT: [[TMP5:%.*]] = icmp ult <16 x i64> [[TMP4]], zeroinitializer 374514b38cdSPhilip Reames; ZVL256-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP3]]) 375514b38cdSPhilip Reames; ZVL256-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP5]]) 376514b38cdSPhilip Reames; ZVL256-NEXT: [[OP_RDX:%.*]] = or i1 [[TMP6]], [[TMP7]] 377514b38cdSPhilip Reames; ZVL256-NEXT: br i1 [[OP_RDX]], label [[TMP9:%.*]], label [[TMP8:%.*]] 378514b38cdSPhilip Reames; ZVL256: 8: 379514b38cdSPhilip Reames; ZVL256-NEXT: ret void 380514b38cdSPhilip Reames; ZVL256: 9: 381514b38cdSPhilip Reames; ZVL256-NEXT: ret void 382514b38cdSPhilip Reames; 383514b38cdSPhilip Reames; ZVL512-LABEL: @reduce_or_2( 384514b38cdSPhilip Reames; ZVL512-NEXT: [[TMP1:%.*]] = shl i64 0, 0 385514b38cdSPhilip Reames; ZVL512-NEXT: [[TMP2:%.*]] = insertelement <32 x i64> <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 poison, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 poison, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>, i64 [[TMP1]], i32 15 386514b38cdSPhilip Reames; ZVL512-NEXT: [[TMP3:%.*]] = shufflevector <32 x i64> [[TMP2]], <32 x i64> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 15, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 387514b38cdSPhilip Reames; ZVL512-NEXT: [[TMP4:%.*]] = icmp ult <32 x i64> [[TMP3]], zeroinitializer 388514b38cdSPhilip Reames; ZVL512-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> [[TMP4]]) 389514b38cdSPhilip Reames; ZVL512-NEXT: br i1 [[TMP5]], label [[TMP7:%.*]], label [[TMP6:%.*]] 390514b38cdSPhilip Reames; ZVL512: 6: 391514b38cdSPhilip Reames; ZVL512-NEXT: ret void 392514b38cdSPhilip Reames; ZVL512: 7: 393514b38cdSPhilip Reames; ZVL512-NEXT: ret void 3949a82bda9Swangpc; 3959a82bda9Swangpc %1 = shl i64 0, 0 3969a82bda9Swangpc %2 = icmp ult i64 0, 0 3979a82bda9Swangpc %3 = icmp ult i64 0, 0 3989a82bda9Swangpc %4 = or i1 %2, %3 3999a82bda9Swangpc %5 = icmp ult i64 0, 0 4009a82bda9Swangpc %6 = or i1 %4, %5 4019a82bda9Swangpc %7 = icmp ult i64 0, 0 4029a82bda9Swangpc %8 = or i1 %6, %7 4039a82bda9Swangpc %9 = icmp ult i64 0, 0 4049a82bda9Swangpc %10 = or i1 %8, %9 4059a82bda9Swangpc %11 = icmp ult i64 0, 0 4069a82bda9Swangpc %12 = or i1 %10, %11 4079a82bda9Swangpc %13 = icmp ult i64 0, 0 4089a82bda9Swangpc %14 = or i1 %12, %13 4099a82bda9Swangpc %15 = icmp ult i64 0, 0 4109a82bda9Swangpc %16 = or i1 %14, %15 4119a82bda9Swangpc %17 = icmp ult i64 0, 0 4129a82bda9Swangpc %18 = or i1 %16, %17 4139a82bda9Swangpc %19 = icmp ult i64 0, 0 4149a82bda9Swangpc %20 = or i1 %18, %19 4159a82bda9Swangpc %21 = icmp ult i64 0, 0 4169a82bda9Swangpc %22 = or i1 %20, %21 4179a82bda9Swangpc %23 = icmp ult i64 0, 0 4189a82bda9Swangpc %24 = or i1 %22, %23 4199a82bda9Swangpc %25 = icmp ult i64 0, 0 4209a82bda9Swangpc %26 = or i1 %24, %25 4219a82bda9Swangpc %27 = icmp ult i64 0, 0 4229a82bda9Swangpc %28 = or i1 %26, %27 4239a82bda9Swangpc %29 = icmp ult i64 0, 0 4249a82bda9Swangpc %30 = or i1 %28, %29 4259a82bda9Swangpc %31 = icmp ult i64 %1, 0 4269a82bda9Swangpc %32 = or i1 %30, %31 4279a82bda9Swangpc %33 = icmp ult i64 0, 0 4289a82bda9Swangpc %34 = or i1 %32, %33 4299a82bda9Swangpc %35 = icmp ult i64 0, 0 4309a82bda9Swangpc %36 = or i1 %34, %35 4319a82bda9Swangpc %37 = icmp ult i64 0, 0 4329a82bda9Swangpc %38 = or i1 %36, %37 4339a82bda9Swangpc %39 = icmp ult i64 0, 0 4349a82bda9Swangpc %40 = or i1 %38, %39 4359a82bda9Swangpc %41 = icmp ult i64 0, 0 4369a82bda9Swangpc %42 = or i1 %40, %41 4379a82bda9Swangpc %43 = icmp ult i64 0, 0 4389a82bda9Swangpc %44 = or i1 %42, %43 4399a82bda9Swangpc %45 = icmp ult i64 %1, 0 4409a82bda9Swangpc %46 = or i1 %44, %45 4419a82bda9Swangpc %47 = icmp ult i64 0, 0 4429a82bda9Swangpc %48 = or i1 %46, %47 4439a82bda9Swangpc %49 = icmp ult i64 0, 0 4449a82bda9Swangpc %50 = or i1 %48, %49 4459a82bda9Swangpc %51 = icmp ult i64 0, 0 4469a82bda9Swangpc %52 = or i1 %50, %51 4479a82bda9Swangpc %53 = icmp ult i64 0, 0 4489a82bda9Swangpc %54 = or i1 %52, %53 4499a82bda9Swangpc %55 = icmp ult i64 0, 0 4509a82bda9Swangpc %56 = or i1 %54, %55 4519a82bda9Swangpc %57 = icmp ult i64 0, 0 4529a82bda9Swangpc %58 = or i1 %56, %57 4539a82bda9Swangpc %59 = icmp ult i64 0, 0 4549a82bda9Swangpc %60 = or i1 %58, %59 4559a82bda9Swangpc %61 = icmp ult i64 0, 0 4569a82bda9Swangpc %62 = or i1 %60, %61 4579a82bda9Swangpc %63 = icmp ult i64 0, 0 4589a82bda9Swangpc %64 = or i1 %62, %63 4599a82bda9Swangpc br i1 %64, label %66, label %65 4609a82bda9Swangpc 4619a82bda9Swangpc65: ; preds = %0 4629a82bda9Swangpc ret void 4639a82bda9Swangpc 4649a82bda9Swangpc66: ; preds = %0 4659a82bda9Swangpc ret void 4669a82bda9Swangpc} 4679a82bda9Swangpc 46810625958SPhilip Reamesdefine i8 @reduce_xor(ptr %a, ptr %b) { 46910625958SPhilip Reames; CHECK-LABEL: @reduce_xor( 47010625958SPhilip Reames; CHECK-NEXT: entry: 47110625958SPhilip Reames; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_BUF:%.*]], ptr [[A:%.*]], i64 0, i32 0, i64 0 47210625958SPhilip Reames; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_BUF]], ptr [[B:%.*]], i64 0, i32 0, i64 0 47310625958SPhilip Reames; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 1 47410625958SPhilip Reames; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[ARRAYIDX3]], align 1 47510625958SPhilip Reames; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[TMP0]] 47610625958SPhilip Reames; CHECK-NEXT: [[TMP3:%.*]] = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> [[TMP2]]) 47710625958SPhilip Reames; CHECK-NEXT: [[OP_RDX:%.*]] = xor i8 [[TMP3]], 1 47810625958SPhilip Reames; CHECK-NEXT: ret i8 [[OP_RDX]] 47910625958SPhilip Reames; 48010625958SPhilip Reamesentry: 48110625958SPhilip Reames %arrayidx = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 0 4822d69827cSNikita Popov %0 = load i8, ptr %arrayidx, align 1 48310625958SPhilip Reames %arrayidx3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 0 4842d69827cSNikita Popov %1 = load i8, ptr %arrayidx3, align 1 48510625958SPhilip Reames %and12 = and i8 %1, %0 48610625958SPhilip Reames %arrayidx.1 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 1 4872d69827cSNikita Popov %2 = load i8, ptr %arrayidx.1, align 1 48810625958SPhilip Reames %arrayidx3.1 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 1 4892d69827cSNikita Popov %3 = load i8, ptr %arrayidx3.1, align 1 49010625958SPhilip Reames %and12.1 = and i8 %3, %2 49110625958SPhilip Reames %4 = xor i8 %and12, %and12.1 49210625958SPhilip Reames %arrayidx.2 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 2 4932d69827cSNikita Popov %5 = load i8, ptr %arrayidx.2, align 1 49410625958SPhilip Reames %arrayidx3.2 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 2 4952d69827cSNikita Popov %6 = load i8, ptr %arrayidx3.2, align 1 49610625958SPhilip Reames %and12.2 = and i8 %6, %5 49710625958SPhilip Reames %7 = xor i8 %4, %and12.2 49810625958SPhilip Reames %arrayidx.3 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 3 4992d69827cSNikita Popov %8 = load i8, ptr %arrayidx.3, align 1 50010625958SPhilip Reames %arrayidx3.3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 3 5012d69827cSNikita Popov %9 = load i8, ptr %arrayidx3.3, align 1 50210625958SPhilip Reames %and12.3 = and i8 %9, %8 50310625958SPhilip Reames %10 = xor i8 %7, %and12.3 50410625958SPhilip Reames %arrayidx.4 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 4 5052d69827cSNikita Popov %11 = load i8, ptr %arrayidx.4, align 1 50610625958SPhilip Reames %arrayidx3.4 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 4 5072d69827cSNikita Popov %12 = load i8, ptr %arrayidx3.4, align 1 50810625958SPhilip Reames %and12.4 = and i8 %12, %11 50910625958SPhilip Reames %13 = xor i8 %10, %and12.4 51010625958SPhilip Reames %arrayidx.5 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 5 5112d69827cSNikita Popov %14 = load i8, ptr %arrayidx.5, align 1 51210625958SPhilip Reames %arrayidx3.5 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 5 5132d69827cSNikita Popov %15 = load i8, ptr %arrayidx3.5, align 1 51410625958SPhilip Reames %and12.5 = and i8 %15, %14 51510625958SPhilip Reames %16 = xor i8 %13, %and12.5 51610625958SPhilip Reames %arrayidx.6 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 6 5172d69827cSNikita Popov %17 = load i8, ptr %arrayidx.6, align 1 51810625958SPhilip Reames %arrayidx3.6 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 6 5192d69827cSNikita Popov %18 = load i8, ptr %arrayidx3.6, align 1 52010625958SPhilip Reames %and12.6 = and i8 %18, %17 52110625958SPhilip Reames %19 = xor i8 %16, %and12.6 52210625958SPhilip Reames %arrayidx.7 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 7 5232d69827cSNikita Popov %20 = load i8, ptr %arrayidx.7, align 1 52410625958SPhilip Reames %arrayidx3.7 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 7 5252d69827cSNikita Popov %21 = load i8, ptr %arrayidx3.7, align 1 52610625958SPhilip Reames %and12.7 = and i8 %21, %20 52710625958SPhilip Reames %22 = xor i8 %19, %and12.7 52810625958SPhilip Reames %xor13.7 = xor i8 %22, 1 52910625958SPhilip Reames ret i8 %xor13.7 53010625958SPhilip Reames} 53110625958SPhilip Reames 53210625958SPhilip Reames 53310625958SPhilip Reames 53410625958SPhilip Reamesdefine i8 @reduce_add(ptr %a, ptr %b) { 53510625958SPhilip Reames; CHECK-LABEL: @reduce_add( 53610625958SPhilip Reames; CHECK-NEXT: entry: 53710625958SPhilip Reames; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_BUF:%.*]], ptr [[A:%.*]], i64 0, i32 0, i64 0 53810625958SPhilip Reames; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_BUF]], ptr [[B:%.*]], i64 0, i32 0, i64 0 53910625958SPhilip Reames; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 1 54010625958SPhilip Reames; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[ARRAYIDX3]], align 1 54110625958SPhilip Reames; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[TMP0]] 54210625958SPhilip Reames; CHECK-NEXT: [[TMP3:%.*]] = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> [[TMP2]]) 54310625958SPhilip Reames; CHECK-NEXT: [[OP_RDX:%.*]] = add i8 [[TMP3]], 1 54410625958SPhilip Reames; CHECK-NEXT: ret i8 [[OP_RDX]] 54510625958SPhilip Reames; 54610625958SPhilip Reamesentry: 54710625958SPhilip Reames %arrayidx = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 0 5482d69827cSNikita Popov %0 = load i8, ptr %arrayidx, align 1 54910625958SPhilip Reames %arrayidx3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 0 5502d69827cSNikita Popov %1 = load i8, ptr %arrayidx3, align 1 55110625958SPhilip Reames %and12 = and i8 %1, %0 55210625958SPhilip Reames %arrayidx.1 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 1 5532d69827cSNikita Popov %2 = load i8, ptr %arrayidx.1, align 1 55410625958SPhilip Reames %arrayidx3.1 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 1 5552d69827cSNikita Popov %3 = load i8, ptr %arrayidx3.1, align 1 55610625958SPhilip Reames %and12.1 = and i8 %3, %2 55710625958SPhilip Reames %4 = add i8 %and12, %and12.1 55810625958SPhilip Reames %arrayidx.2 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 2 5592d69827cSNikita Popov %5 = load i8, ptr %arrayidx.2, align 1 56010625958SPhilip Reames %arrayidx3.2 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 2 5612d69827cSNikita Popov %6 = load i8, ptr %arrayidx3.2, align 1 56210625958SPhilip Reames %and12.2 = and i8 %6, %5 56310625958SPhilip Reames %7 = add i8 %4, %and12.2 56410625958SPhilip Reames %arrayidx.3 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 3 5652d69827cSNikita Popov %8 = load i8, ptr %arrayidx.3, align 1 56610625958SPhilip Reames %arrayidx3.3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 3 5672d69827cSNikita Popov %9 = load i8, ptr %arrayidx3.3, align 1 56810625958SPhilip Reames %and12.3 = and i8 %9, %8 56910625958SPhilip Reames %10 = add i8 %7, %and12.3 57010625958SPhilip Reames %arrayidx.4 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 4 5712d69827cSNikita Popov %11 = load i8, ptr %arrayidx.4, align 1 57210625958SPhilip Reames %arrayidx3.4 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 4 5732d69827cSNikita Popov %12 = load i8, ptr %arrayidx3.4, align 1 57410625958SPhilip Reames %and12.4 = and i8 %12, %11 57510625958SPhilip Reames %13 = add i8 %10, %and12.4 57610625958SPhilip Reames %arrayidx.5 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 5 5772d69827cSNikita Popov %14 = load i8, ptr %arrayidx.5, align 1 57810625958SPhilip Reames %arrayidx3.5 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 5 5792d69827cSNikita Popov %15 = load i8, ptr %arrayidx3.5, align 1 58010625958SPhilip Reames %and12.5 = and i8 %15, %14 58110625958SPhilip Reames %16 = add i8 %13, %and12.5 58210625958SPhilip Reames %arrayidx.6 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 6 5832d69827cSNikita Popov %17 = load i8, ptr %arrayidx.6, align 1 58410625958SPhilip Reames %arrayidx3.6 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 6 5852d69827cSNikita Popov %18 = load i8, ptr %arrayidx3.6, align 1 58610625958SPhilip Reames %and12.6 = and i8 %18, %17 58710625958SPhilip Reames %19 = add i8 %16, %and12.6 58810625958SPhilip Reames %arrayidx.7 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 7 5892d69827cSNikita Popov %20 = load i8, ptr %arrayidx.7, align 1 59010625958SPhilip Reames %arrayidx3.7 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 7 5912d69827cSNikita Popov %21 = load i8, ptr %arrayidx3.7, align 1 59210625958SPhilip Reames %and12.7 = and i8 %21, %20 59310625958SPhilip Reames %22 = add i8 %19, %and12.7 59410625958SPhilip Reames %add13.7 = add i8 %22, 1 59510625958SPhilip Reames ret i8 %add13.7 59610625958SPhilip Reames} 59710625958SPhilip Reames 59872ce9d1cSBen Shideclare i8 @llvm.smin.i8(i8, i8) 59972ce9d1cSBen Shi 60072ce9d1cSBen Shidefine i8 @reduce_smin(ptr %a, ptr %b) { 60172ce9d1cSBen Shi; CHECK-LABEL: @reduce_smin( 60272ce9d1cSBen Shi; CHECK-NEXT: entry: 60372ce9d1cSBen Shi; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_BUF:%.*]], ptr [[A:%.*]], i64 0, i32 0, i64 0 60472ce9d1cSBen Shi; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_BUF]], ptr [[B:%.*]], i64 0, i32 0, i64 0 60572ce9d1cSBen Shi; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 1 60672ce9d1cSBen Shi; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[ARRAYIDX3]], align 1 60772ce9d1cSBen Shi; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[TMP0]] 60872ce9d1cSBen Shi; CHECK-NEXT: [[TMP3:%.*]] = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> [[TMP2]]) 60972ce9d1cSBen Shi; CHECK-NEXT: ret i8 [[TMP3]] 61072ce9d1cSBen Shi; 61172ce9d1cSBen Shientry: 61272ce9d1cSBen Shi %arrayidx = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 0 6132d69827cSNikita Popov %0 = load i8, ptr %arrayidx, align 1 61472ce9d1cSBen Shi %arrayidx3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 0 6152d69827cSNikita Popov %1 = load i8, ptr %arrayidx3, align 1 61672ce9d1cSBen Shi %and12 = and i8 %1, %0 61772ce9d1cSBen Shi %arrayidx.1 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 1 6182d69827cSNikita Popov %2 = load i8, ptr %arrayidx.1, align 1 61972ce9d1cSBen Shi %arrayidx3.1 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 1 6202d69827cSNikita Popov %3 = load i8, ptr %arrayidx3.1, align 1 62172ce9d1cSBen Shi %and12.1 = and i8 %3, %2 62272ce9d1cSBen Shi %4 = tail call i8 @llvm.smin.i8(i8 %and12, i8 %and12.1) 62372ce9d1cSBen Shi %arrayidx.2 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 2 6242d69827cSNikita Popov %5 = load i8, ptr %arrayidx.2, align 1 62572ce9d1cSBen Shi %arrayidx3.2 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 2 6262d69827cSNikita Popov %6 = load i8, ptr %arrayidx3.2, align 1 62772ce9d1cSBen Shi %and12.2 = and i8 %6, %5 62872ce9d1cSBen Shi %7 = tail call i8 @llvm.smin.i8(i8 %4, i8 %and12.2) 62972ce9d1cSBen Shi %arrayidx.3 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 3 6302d69827cSNikita Popov %8 = load i8, ptr %arrayidx.3, align 1 63172ce9d1cSBen Shi %arrayidx3.3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 3 6322d69827cSNikita Popov %9 = load i8, ptr %arrayidx3.3, align 1 63372ce9d1cSBen Shi %and12.3 = and i8 %9, %8 63472ce9d1cSBen Shi %10 = tail call i8 @llvm.smin.i8(i8 %7, i8 %and12.3) 63572ce9d1cSBen Shi %arrayidx.4 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 4 6362d69827cSNikita Popov %11 = load i8, ptr %arrayidx.4, align 1 63772ce9d1cSBen Shi %arrayidx3.4 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 4 6382d69827cSNikita Popov %12 = load i8, ptr %arrayidx3.4, align 1 63972ce9d1cSBen Shi %and12.4 = and i8 %12, %11 64072ce9d1cSBen Shi %13 = tail call i8 @llvm.smin.i8(i8 %10, i8 %and12.4) 64172ce9d1cSBen Shi %arrayidx.5 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 5 6422d69827cSNikita Popov %14 = load i8, ptr %arrayidx.5, align 1 64372ce9d1cSBen Shi %arrayidx3.5 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 5 6442d69827cSNikita Popov %15 = load i8, ptr %arrayidx3.5, align 1 64572ce9d1cSBen Shi %and12.5 = and i8 %15, %14 64672ce9d1cSBen Shi %16 = tail call i8 @llvm.smin.i8(i8 %13, i8 %and12.5) 64772ce9d1cSBen Shi %arrayidx.6 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 6 6482d69827cSNikita Popov %17 = load i8, ptr %arrayidx.6, align 1 64972ce9d1cSBen Shi %arrayidx3.6 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 6 6502d69827cSNikita Popov %18 = load i8, ptr %arrayidx3.6, align 1 65172ce9d1cSBen Shi %and12.6 = and i8 %18, %17 65272ce9d1cSBen Shi %19 = tail call i8 @llvm.smin.i8(i8 %16, i8 %and12.6) 65372ce9d1cSBen Shi %arrayidx.7 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 7 6542d69827cSNikita Popov %20 = load i8, ptr %arrayidx.7, align 1 65572ce9d1cSBen Shi %arrayidx3.7 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 7 6562d69827cSNikita Popov %21 = load i8, ptr %arrayidx3.7, align 1 65772ce9d1cSBen Shi %and12.7 = and i8 %21, %20 65872ce9d1cSBen Shi %22 = tail call i8 @llvm.smin.i8(i8 %19, i8 %and12.7) 65972ce9d1cSBen Shi ret i8 %22 66072ce9d1cSBen Shi} 66172ce9d1cSBen Shi 66272ce9d1cSBen Shideclare i8 @llvm.smax.i8(i8, i8) 66372ce9d1cSBen Shi 66472ce9d1cSBen Shidefine i8 @reduce_smax(ptr %a, ptr %b) { 66572ce9d1cSBen Shi; CHECK-LABEL: @reduce_smax( 66672ce9d1cSBen Shi; CHECK-NEXT: entry: 66772ce9d1cSBen Shi; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_BUF:%.*]], ptr [[A:%.*]], i64 0, i32 0, i64 0 66872ce9d1cSBen Shi; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_BUF]], ptr [[B:%.*]], i64 0, i32 0, i64 0 66972ce9d1cSBen Shi; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 1 67072ce9d1cSBen Shi; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[ARRAYIDX3]], align 1 67172ce9d1cSBen Shi; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[TMP0]] 67272ce9d1cSBen Shi; CHECK-NEXT: [[TMP3:%.*]] = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> [[TMP2]]) 67372ce9d1cSBen Shi; CHECK-NEXT: ret i8 [[TMP3]] 67472ce9d1cSBen Shi; 67572ce9d1cSBen Shientry: 67672ce9d1cSBen Shi %arrayidx = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 0 6772d69827cSNikita Popov %0 = load i8, ptr %arrayidx, align 1 67872ce9d1cSBen Shi %arrayidx3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 0 6792d69827cSNikita Popov %1 = load i8, ptr %arrayidx3, align 1 68072ce9d1cSBen Shi %and12 = and i8 %1, %0 68172ce9d1cSBen Shi %arrayidx.1 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 1 6822d69827cSNikita Popov %2 = load i8, ptr %arrayidx.1, align 1 68372ce9d1cSBen Shi %arrayidx3.1 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 1 6842d69827cSNikita Popov %3 = load i8, ptr %arrayidx3.1, align 1 68572ce9d1cSBen Shi %and12.1 = and i8 %3, %2 68672ce9d1cSBen Shi %4 = tail call i8 @llvm.smax.i8(i8 %and12, i8 %and12.1) 68772ce9d1cSBen Shi %arrayidx.2 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 2 6882d69827cSNikita Popov %5 = load i8, ptr %arrayidx.2, align 1 68972ce9d1cSBen Shi %arrayidx3.2 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 2 6902d69827cSNikita Popov %6 = load i8, ptr %arrayidx3.2, align 1 69172ce9d1cSBen Shi %and12.2 = and i8 %6, %5 69272ce9d1cSBen Shi %7 = tail call i8 @llvm.smax.i8(i8 %4, i8 %and12.2) 69372ce9d1cSBen Shi %arrayidx.3 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 3 6942d69827cSNikita Popov %8 = load i8, ptr %arrayidx.3, align 1 69572ce9d1cSBen Shi %arrayidx3.3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 3 6962d69827cSNikita Popov %9 = load i8, ptr %arrayidx3.3, align 1 69772ce9d1cSBen Shi %and12.3 = and i8 %9, %8 69872ce9d1cSBen Shi %10 = tail call i8 @llvm.smax.i8(i8 %7, i8 %and12.3) 69972ce9d1cSBen Shi %arrayidx.4 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 4 7002d69827cSNikita Popov %11 = load i8, ptr %arrayidx.4, align 1 70172ce9d1cSBen Shi %arrayidx3.4 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 4 7022d69827cSNikita Popov %12 = load i8, ptr %arrayidx3.4, align 1 70372ce9d1cSBen Shi %and12.4 = and i8 %12, %11 70472ce9d1cSBen Shi %13 = tail call i8 @llvm.smax.i8(i8 %10, i8 %and12.4) 70572ce9d1cSBen Shi %arrayidx.5 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 5 7062d69827cSNikita Popov %14 = load i8, ptr %arrayidx.5, align 1 70772ce9d1cSBen Shi %arrayidx3.5 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 5 7082d69827cSNikita Popov %15 = load i8, ptr %arrayidx3.5, align 1 70972ce9d1cSBen Shi %and12.5 = and i8 %15, %14 71072ce9d1cSBen Shi %16 = tail call i8 @llvm.smax.i8(i8 %13, i8 %and12.5) 71172ce9d1cSBen Shi %arrayidx.6 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 6 7122d69827cSNikita Popov %17 = load i8, ptr %arrayidx.6, align 1 71372ce9d1cSBen Shi %arrayidx3.6 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 6 7142d69827cSNikita Popov %18 = load i8, ptr %arrayidx3.6, align 1 71572ce9d1cSBen Shi %and12.6 = and i8 %18, %17 71672ce9d1cSBen Shi %19 = tail call i8 @llvm.smax.i8(i8 %16, i8 %and12.6) 71772ce9d1cSBen Shi %arrayidx.7 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 7 7182d69827cSNikita Popov %20 = load i8, ptr %arrayidx.7, align 1 71972ce9d1cSBen Shi %arrayidx3.7 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 7 7202d69827cSNikita Popov %21 = load i8, ptr %arrayidx3.7, align 1 72172ce9d1cSBen Shi %and12.7 = and i8 %21, %20 72272ce9d1cSBen Shi %22 = tail call i8 @llvm.smax.i8(i8 %19, i8 %and12.7) 72372ce9d1cSBen Shi ret i8 %22 72472ce9d1cSBen Shi} 72572ce9d1cSBen Shi 72672ce9d1cSBen Shideclare i8 @llvm.umax.i8(i8, i8) 72772ce9d1cSBen Shi 72872ce9d1cSBen Shidefine i8 @reduce_umax(ptr %a, ptr %b) { 72972ce9d1cSBen Shi; CHECK-LABEL: @reduce_umax( 73072ce9d1cSBen Shi; CHECK-NEXT: entry: 73172ce9d1cSBen Shi; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_BUF:%.*]], ptr [[A:%.*]], i64 0, i32 0, i64 0 73272ce9d1cSBen Shi; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_BUF]], ptr [[B:%.*]], i64 0, i32 0, i64 0 73372ce9d1cSBen Shi; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 1 73472ce9d1cSBen Shi; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[ARRAYIDX3]], align 1 73572ce9d1cSBen Shi; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[TMP0]] 73672ce9d1cSBen Shi; CHECK-NEXT: [[TMP3:%.*]] = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> [[TMP2]]) 73772ce9d1cSBen Shi; CHECK-NEXT: ret i8 [[TMP3]] 73872ce9d1cSBen Shi; 73972ce9d1cSBen Shientry: 74072ce9d1cSBen Shi %arrayidx = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 0 7412d69827cSNikita Popov %0 = load i8, ptr %arrayidx, align 1 74272ce9d1cSBen Shi %arrayidx3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 0 7432d69827cSNikita Popov %1 = load i8, ptr %arrayidx3, align 1 74472ce9d1cSBen Shi %and12 = and i8 %1, %0 74572ce9d1cSBen Shi %arrayidx.1 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 1 7462d69827cSNikita Popov %2 = load i8, ptr %arrayidx.1, align 1 74772ce9d1cSBen Shi %arrayidx3.1 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 1 7482d69827cSNikita Popov %3 = load i8, ptr %arrayidx3.1, align 1 74972ce9d1cSBen Shi %and12.1 = and i8 %3, %2 75072ce9d1cSBen Shi %4 = tail call i8 @llvm.umax.i8(i8 %and12, i8 %and12.1) 75172ce9d1cSBen Shi %arrayidx.2 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 2 7522d69827cSNikita Popov %5 = load i8, ptr %arrayidx.2, align 1 75372ce9d1cSBen Shi %arrayidx3.2 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 2 7542d69827cSNikita Popov %6 = load i8, ptr %arrayidx3.2, align 1 75572ce9d1cSBen Shi %and12.2 = and i8 %6, %5 75672ce9d1cSBen Shi %7 = tail call i8 @llvm.umax.i8(i8 %4, i8 %and12.2) 75772ce9d1cSBen Shi %arrayidx.3 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 3 7582d69827cSNikita Popov %8 = load i8, ptr %arrayidx.3, align 1 75972ce9d1cSBen Shi %arrayidx3.3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 3 7602d69827cSNikita Popov %9 = load i8, ptr %arrayidx3.3, align 1 76172ce9d1cSBen Shi %and12.3 = and i8 %9, %8 76272ce9d1cSBen Shi %10 = tail call i8 @llvm.umax.i8(i8 %7, i8 %and12.3) 76372ce9d1cSBen Shi %arrayidx.4 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 4 7642d69827cSNikita Popov %11 = load i8, ptr %arrayidx.4, align 1 76572ce9d1cSBen Shi %arrayidx3.4 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 4 7662d69827cSNikita Popov %12 = load i8, ptr %arrayidx3.4, align 1 76772ce9d1cSBen Shi %and12.4 = and i8 %12, %11 76872ce9d1cSBen Shi %13 = tail call i8 @llvm.umax.i8(i8 %10, i8 %and12.4) 76972ce9d1cSBen Shi %arrayidx.5 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 5 7702d69827cSNikita Popov %14 = load i8, ptr %arrayidx.5, align 1 77172ce9d1cSBen Shi %arrayidx3.5 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 5 7722d69827cSNikita Popov %15 = load i8, ptr %arrayidx3.5, align 1 77372ce9d1cSBen Shi %and12.5 = and i8 %15, %14 77472ce9d1cSBen Shi %16 = tail call i8 @llvm.umax.i8(i8 %13, i8 %and12.5) 77572ce9d1cSBen Shi %arrayidx.6 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 6 7762d69827cSNikita Popov %17 = load i8, ptr %arrayidx.6, align 1 77772ce9d1cSBen Shi %arrayidx3.6 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 6 7782d69827cSNikita Popov %18 = load i8, ptr %arrayidx3.6, align 1 77972ce9d1cSBen Shi %and12.6 = and i8 %18, %17 78072ce9d1cSBen Shi %19 = tail call i8 @llvm.umax.i8(i8 %16, i8 %and12.6) 78172ce9d1cSBen Shi %arrayidx.7 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 7 7822d69827cSNikita Popov %20 = load i8, ptr %arrayidx.7, align 1 78372ce9d1cSBen Shi %arrayidx3.7 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 7 7842d69827cSNikita Popov %21 = load i8, ptr %arrayidx3.7, align 1 78572ce9d1cSBen Shi %and12.7 = and i8 %21, %20 78672ce9d1cSBen Shi %22 = tail call i8 @llvm.umax.i8(i8 %19, i8 %and12.7) 78772ce9d1cSBen Shi ret i8 %22 78872ce9d1cSBen Shi} 78972ce9d1cSBen Shi 79072ce9d1cSBen Shideclare i8 @llvm.umin.i8(i8, i8) 79172ce9d1cSBen Shi 79272ce9d1cSBen Shidefine i8 @reduce_umin(ptr %a, ptr %b) { 79372ce9d1cSBen Shi; CHECK-LABEL: @reduce_umin( 79472ce9d1cSBen Shi; CHECK-NEXT: entry: 79572ce9d1cSBen Shi; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_BUF:%.*]], ptr [[A:%.*]], i64 0, i32 0, i64 0 79672ce9d1cSBen Shi; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_BUF]], ptr [[B:%.*]], i64 0, i32 0, i64 0 79772ce9d1cSBen Shi; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 1 79872ce9d1cSBen Shi; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i8>, ptr [[ARRAYIDX3]], align 1 79972ce9d1cSBen Shi; CHECK-NEXT: [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[TMP0]] 80072ce9d1cSBen Shi; CHECK-NEXT: [[TMP3:%.*]] = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> [[TMP2]]) 80172ce9d1cSBen Shi; CHECK-NEXT: ret i8 [[TMP3]] 80272ce9d1cSBen Shi; 80372ce9d1cSBen Shientry: 80472ce9d1cSBen Shi %arrayidx = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 0 8052d69827cSNikita Popov %0 = load i8, ptr %arrayidx, align 1 80672ce9d1cSBen Shi %arrayidx3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 0 8072d69827cSNikita Popov %1 = load i8, ptr %arrayidx3, align 1 80872ce9d1cSBen Shi %and12 = and i8 %1, %0 80972ce9d1cSBen Shi %arrayidx.1 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 1 8102d69827cSNikita Popov %2 = load i8, ptr %arrayidx.1, align 1 81172ce9d1cSBen Shi %arrayidx3.1 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 1 8122d69827cSNikita Popov %3 = load i8, ptr %arrayidx3.1, align 1 81372ce9d1cSBen Shi %and12.1 = and i8 %3, %2 81472ce9d1cSBen Shi %4 = tail call i8 @llvm.umin.i8(i8 %and12, i8 %and12.1) 81572ce9d1cSBen Shi %arrayidx.2 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 2 8162d69827cSNikita Popov %5 = load i8, ptr %arrayidx.2, align 1 81772ce9d1cSBen Shi %arrayidx3.2 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 2 8182d69827cSNikita Popov %6 = load i8, ptr %arrayidx3.2, align 1 81972ce9d1cSBen Shi %and12.2 = and i8 %6, %5 82072ce9d1cSBen Shi %7 = tail call i8 @llvm.umin.i8(i8 %4, i8 %and12.2) 82172ce9d1cSBen Shi %arrayidx.3 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 3 8222d69827cSNikita Popov %8 = load i8, ptr %arrayidx.3, align 1 82372ce9d1cSBen Shi %arrayidx3.3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 3 8242d69827cSNikita Popov %9 = load i8, ptr %arrayidx3.3, align 1 82572ce9d1cSBen Shi %and12.3 = and i8 %9, %8 82672ce9d1cSBen Shi %10 = tail call i8 @llvm.umin.i8(i8 %7, i8 %and12.3) 82772ce9d1cSBen Shi %arrayidx.4 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 4 8282d69827cSNikita Popov %11 = load i8, ptr %arrayidx.4, align 1 82972ce9d1cSBen Shi %arrayidx3.4 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 4 8302d69827cSNikita Popov %12 = load i8, ptr %arrayidx3.4, align 1 83172ce9d1cSBen Shi %and12.4 = and i8 %12, %11 83272ce9d1cSBen Shi %13 = tail call i8 @llvm.umin.i8(i8 %10, i8 %and12.4) 83372ce9d1cSBen Shi %arrayidx.5 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 5 8342d69827cSNikita Popov %14 = load i8, ptr %arrayidx.5, align 1 83572ce9d1cSBen Shi %arrayidx3.5 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 5 8362d69827cSNikita Popov %15 = load i8, ptr %arrayidx3.5, align 1 83772ce9d1cSBen Shi %and12.5 = and i8 %15, %14 83872ce9d1cSBen Shi %16 = tail call i8 @llvm.umin.i8(i8 %13, i8 %and12.5) 83972ce9d1cSBen Shi %arrayidx.6 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 6 8402d69827cSNikita Popov %17 = load i8, ptr %arrayidx.6, align 1 84172ce9d1cSBen Shi %arrayidx3.6 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 6 8422d69827cSNikita Popov %18 = load i8, ptr %arrayidx3.6, align 1 84372ce9d1cSBen Shi %and12.6 = and i8 %18, %17 84472ce9d1cSBen Shi %19 = tail call i8 @llvm.umin.i8(i8 %16, i8 %and12.6) 84572ce9d1cSBen Shi %arrayidx.7 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 7 8462d69827cSNikita Popov %20 = load i8, ptr %arrayidx.7, align 1 84772ce9d1cSBen Shi %arrayidx3.7 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 7 8482d69827cSNikita Popov %21 = load i8, ptr %arrayidx3.7, align 1 84972ce9d1cSBen Shi %and12.7 = and i8 %21, %20 85072ce9d1cSBen Shi %22 = tail call i8 @llvm.umin.i8(i8 %19, i8 %and12.7) 85172ce9d1cSBen Shi ret i8 %22 85272ce9d1cSBen Shi} 85310625958SPhilip Reames 85410625958SPhilip Reames; Next batch exercise reductions involing zext of narrower loads 85510625958SPhilip Reames 85610625958SPhilip Reamesdefine i64 @red_zext_ld_2xi64(ptr %ptr) { 85710625958SPhilip Reames; CHECK-LABEL: @red_zext_ld_2xi64( 85810625958SPhilip Reames; CHECK-NEXT: entry: 85910625958SPhilip Reames; CHECK-NEXT: [[LD0:%.*]] = load i8, ptr [[PTR:%.*]], align 1 86010625958SPhilip Reames; CHECK-NEXT: [[ZEXT:%.*]] = zext i8 [[LD0]] to i64 86110625958SPhilip Reames; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1 86210625958SPhilip Reames; CHECK-NEXT: [[LD1:%.*]] = load i8, ptr [[GEP]], align 1 86310625958SPhilip Reames; CHECK-NEXT: [[ZEXT_1:%.*]] = zext i8 [[LD1]] to i64 86410625958SPhilip Reames; CHECK-NEXT: [[ADD_1:%.*]] = add nuw nsw i64 [[ZEXT]], [[ZEXT_1]] 86510625958SPhilip Reames; CHECK-NEXT: ret i64 [[ADD_1]] 86610625958SPhilip Reames; 86710625958SPhilip Reamesentry: 86810625958SPhilip Reames %ld0 = load i8, ptr %ptr 86910625958SPhilip Reames %zext = zext i8 %ld0 to i64 87010625958SPhilip Reames %gep = getelementptr inbounds i8, ptr %ptr, i64 1 87110625958SPhilip Reames %ld1 = load i8, ptr %gep 87210625958SPhilip Reames %zext.1 = zext i8 %ld1 to i64 87310625958SPhilip Reames %add.1 = add nuw nsw i64 %zext, %zext.1 87410625958SPhilip Reames ret i64 %add.1 87510625958SPhilip Reames} 87610625958SPhilip Reames 87710625958SPhilip Reamesdefine i64 @red_zext_ld_4xi64(ptr %ptr) { 87810625958SPhilip Reames; CHECK-LABEL: @red_zext_ld_4xi64( 87910625958SPhilip Reames; CHECK-NEXT: entry: 880*7523086aSAlexey Bataev; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[PTR:%.*]], align 1 881*7523086aSAlexey Bataev; CHECK-NEXT: [[TMP1:%.*]] = zext <4 x i8> [[TMP0]] to <4 x i16> 882*7523086aSAlexey Bataev; CHECK-NEXT: [[TMP2:%.*]] = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> [[TMP1]]) 883*7523086aSAlexey Bataev; CHECK-NEXT: [[ADD_3:%.*]] = zext i16 [[TMP2]] to i64 884a9888211SAlexey Bataev; CHECK-NEXT: ret i64 [[ADD_3]] 88510625958SPhilip Reames; 88610625958SPhilip Reamesentry: 88710625958SPhilip Reames %ld0 = load i8, ptr %ptr 88810625958SPhilip Reames %zext = zext i8 %ld0 to i64 88910625958SPhilip Reames %gep = getelementptr inbounds i8, ptr %ptr, i64 1 89010625958SPhilip Reames %ld1 = load i8, ptr %gep 89110625958SPhilip Reames %zext.1 = zext i8 %ld1 to i64 89210625958SPhilip Reames %add.1 = add nuw nsw i64 %zext, %zext.1 89310625958SPhilip Reames %gep.1 = getelementptr inbounds i8, ptr %ptr, i64 2 89410625958SPhilip Reames %ld2 = load i8, ptr %gep.1 89510625958SPhilip Reames %zext.2 = zext i8 %ld2 to i64 89610625958SPhilip Reames %add.2 = add nuw nsw i64 %add.1, %zext.2 89710625958SPhilip Reames %gep.2 = getelementptr inbounds i8, ptr %ptr, i64 3 89810625958SPhilip Reames %ld3 = load i8, ptr %gep.2 89910625958SPhilip Reames %zext.3 = zext i8 %ld3 to i64 90010625958SPhilip Reames %add.3 = add nuw nsw i64 %add.2, %zext.3 90110625958SPhilip Reames ret i64 %add.3 90210625958SPhilip Reames} 90310625958SPhilip Reames 90410625958SPhilip Reamesdefine i64 @red_zext_ld_8xi64(ptr %ptr) { 90510625958SPhilip Reames; CHECK-LABEL: @red_zext_ld_8xi64( 90610625958SPhilip Reames; CHECK-NEXT: entry: 90710625958SPhilip Reames; CHECK-NEXT: [[TMP0:%.*]] = load <8 x i8>, ptr [[PTR:%.*]], align 1 90810625958SPhilip Reames; CHECK-NEXT: [[TMP1:%.*]] = zext <8 x i8> [[TMP0]] to <8 x i64> 90910625958SPhilip Reames; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP1]]) 91010625958SPhilip Reames; CHECK-NEXT: ret i64 [[TMP2]] 91110625958SPhilip Reames; 91210625958SPhilip Reamesentry: 91310625958SPhilip Reames %ld0 = load i8, ptr %ptr 91410625958SPhilip Reames %zext = zext i8 %ld0 to i64 91510625958SPhilip Reames %gep = getelementptr inbounds i8, ptr %ptr, i64 1 91610625958SPhilip Reames %ld1 = load i8, ptr %gep 91710625958SPhilip Reames %zext.1 = zext i8 %ld1 to i64 91810625958SPhilip Reames %add.1 = add nuw nsw i64 %zext, %zext.1 91910625958SPhilip Reames %gep.1 = getelementptr inbounds i8, ptr %ptr, i64 2 92010625958SPhilip Reames %ld2 = load i8, ptr %gep.1 92110625958SPhilip Reames %zext.2 = zext i8 %ld2 to i64 92210625958SPhilip Reames %add.2 = add nuw nsw i64 %add.1, %zext.2 92310625958SPhilip Reames %gep.2 = getelementptr inbounds i8, ptr %ptr, i64 3 92410625958SPhilip Reames %ld3 = load i8, ptr %gep.2 92510625958SPhilip Reames %zext.3 = zext i8 %ld3 to i64 92610625958SPhilip Reames %add.3 = add nuw nsw i64 %add.2, %zext.3 92710625958SPhilip Reames %gep.3 = getelementptr inbounds i8, ptr %ptr, i64 4 92810625958SPhilip Reames %ld4 = load i8, ptr %gep.3 92910625958SPhilip Reames %zext.4 = zext i8 %ld4 to i64 93010625958SPhilip Reames %add.4 = add nuw nsw i64 %add.3, %zext.4 93110625958SPhilip Reames %gep.4 = getelementptr inbounds i8, ptr %ptr, i64 5 93210625958SPhilip Reames %ld5 = load i8, ptr %gep.4 93310625958SPhilip Reames %zext.5 = zext i8 %ld5 to i64 93410625958SPhilip Reames %add.5 = add nuw nsw i64 %add.4, %zext.5 93510625958SPhilip Reames %gep.5 = getelementptr inbounds i8, ptr %ptr, i64 6 93610625958SPhilip Reames %ld6 = load i8, ptr %gep.5 93710625958SPhilip Reames %zext.6 = zext i8 %ld6 to i64 93810625958SPhilip Reames %add.6 = add nuw nsw i64 %add.5, %zext.6 93910625958SPhilip Reames %gep.6 = getelementptr inbounds i8, ptr %ptr, i64 7 94010625958SPhilip Reames %ld7 = load i8, ptr %gep.6 94110625958SPhilip Reames %zext.7 = zext i8 %ld7 to i64 94210625958SPhilip Reames %add.7 = add nuw nsw i64 %add.6, %zext.7 94310625958SPhilip Reames ret i64 %add.7 94410625958SPhilip Reames} 94510625958SPhilip Reames 94610625958SPhilip Reamesdefine i64 @red_zext_ld_16xi64(ptr %ptr) { 94710625958SPhilip Reames; CHECK-LABEL: @red_zext_ld_16xi64( 94810625958SPhilip Reames; CHECK-NEXT: entry: 94910625958SPhilip Reames; CHECK-NEXT: [[TMP0:%.*]] = load <16 x i8>, ptr [[PTR:%.*]], align 1 95010625958SPhilip Reames; CHECK-NEXT: [[TMP1:%.*]] = zext <16 x i8> [[TMP0]] to <16 x i64> 95110625958SPhilip Reames; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> [[TMP1]]) 95210625958SPhilip Reames; CHECK-NEXT: ret i64 [[TMP2]] 95310625958SPhilip Reames; 95410625958SPhilip Reamesentry: 95510625958SPhilip Reames %ld0 = load i8, ptr %ptr 95610625958SPhilip Reames %zext = zext i8 %ld0 to i64 95710625958SPhilip Reames %gep = getelementptr inbounds i8, ptr %ptr, i64 1 95810625958SPhilip Reames %ld1 = load i8, ptr %gep 95910625958SPhilip Reames %zext.1 = zext i8 %ld1 to i64 96010625958SPhilip Reames %add.1 = add nuw nsw i64 %zext, %zext.1 96110625958SPhilip Reames %gep.1 = getelementptr inbounds i8, ptr %ptr, i64 2 96210625958SPhilip Reames %ld2 = load i8, ptr %gep.1 96310625958SPhilip Reames %zext.2 = zext i8 %ld2 to i64 96410625958SPhilip Reames %add.2 = add nuw nsw i64 %add.1, %zext.2 96510625958SPhilip Reames %gep.2 = getelementptr inbounds i8, ptr %ptr, i64 3 96610625958SPhilip Reames %ld3 = load i8, ptr %gep.2 96710625958SPhilip Reames %zext.3 = zext i8 %ld3 to i64 96810625958SPhilip Reames %add.3 = add nuw nsw i64 %add.2, %zext.3 96910625958SPhilip Reames %gep.3 = getelementptr inbounds i8, ptr %ptr, i64 4 97010625958SPhilip Reames %ld4 = load i8, ptr %gep.3 97110625958SPhilip Reames %zext.4 = zext i8 %ld4 to i64 97210625958SPhilip Reames %add.4 = add nuw nsw i64 %add.3, %zext.4 97310625958SPhilip Reames %gep.4 = getelementptr inbounds i8, ptr %ptr, i64 5 97410625958SPhilip Reames %ld5 = load i8, ptr %gep.4 97510625958SPhilip Reames %zext.5 = zext i8 %ld5 to i64 97610625958SPhilip Reames %add.5 = add nuw nsw i64 %add.4, %zext.5 97710625958SPhilip Reames %gep.5 = getelementptr inbounds i8, ptr %ptr, i64 6 97810625958SPhilip Reames %ld6 = load i8, ptr %gep.5 97910625958SPhilip Reames %zext.6 = zext i8 %ld6 to i64 98010625958SPhilip Reames %add.6 = add nuw nsw i64 %add.5, %zext.6 98110625958SPhilip Reames %gep.6 = getelementptr inbounds i8, ptr %ptr, i64 7 98210625958SPhilip Reames %ld7 = load i8, ptr %gep.6 98310625958SPhilip Reames %zext.7 = zext i8 %ld7 to i64 98410625958SPhilip Reames %add.7 = add nuw nsw i64 %add.6, %zext.7 98510625958SPhilip Reames %gep.7 = getelementptr inbounds i8, ptr %ptr, i64 8 98610625958SPhilip Reames %ld8 = load i8, ptr %gep.7 98710625958SPhilip Reames %zext.8 = zext i8 %ld8 to i64 98810625958SPhilip Reames %add.8 = add nuw nsw i64 %add.7, %zext.8 98910625958SPhilip Reames %gep.8 = getelementptr inbounds i8, ptr %ptr, i64 9 99010625958SPhilip Reames %ld9 = load i8, ptr %gep.8 99110625958SPhilip Reames %zext.9 = zext i8 %ld9 to i64 99210625958SPhilip Reames %add.9 = add nuw nsw i64 %add.8, %zext.9 99310625958SPhilip Reames %gep.9 = getelementptr inbounds i8, ptr %ptr, i64 10 99410625958SPhilip Reames %ld10 = load i8, ptr %gep.9 99510625958SPhilip Reames %zext.10 = zext i8 %ld10 to i64 99610625958SPhilip Reames %add.10 = add nuw nsw i64 %add.9, %zext.10 99710625958SPhilip Reames %gep.10 = getelementptr inbounds i8, ptr %ptr, i64 11 99810625958SPhilip Reames %ld11 = load i8, ptr %gep.10 99910625958SPhilip Reames %zext.11 = zext i8 %ld11 to i64 100010625958SPhilip Reames %add.11 = add nuw nsw i64 %add.10, %zext.11 100110625958SPhilip Reames %gep.11 = getelementptr inbounds i8, ptr %ptr, i64 12 100210625958SPhilip Reames %ld12 = load i8, ptr %gep.11 100310625958SPhilip Reames %zext.12 = zext i8 %ld12 to i64 100410625958SPhilip Reames %add.12 = add nuw nsw i64 %add.11, %zext.12 100510625958SPhilip Reames %gep.12 = getelementptr inbounds i8, ptr %ptr, i64 13 100610625958SPhilip Reames %ld13 = load i8, ptr %gep.12 100710625958SPhilip Reames %zext.13 = zext i8 %ld13 to i64 100810625958SPhilip Reames %add.13 = add nuw nsw i64 %add.12, %zext.13 100910625958SPhilip Reames %gep.13 = getelementptr inbounds i8, ptr %ptr, i64 14 101010625958SPhilip Reames %ld14 = load i8, ptr %gep.13 101110625958SPhilip Reames %zext.14 = zext i8 %ld14 to i64 101210625958SPhilip Reames %add.14 = add nuw nsw i64 %add.13, %zext.14 101310625958SPhilip Reames %gep.14 = getelementptr inbounds i8, ptr %ptr, i64 15 101410625958SPhilip Reames %ld15 = load i8, ptr %gep.14 101510625958SPhilip Reames %zext.15 = zext i8 %ld15 to i64 101610625958SPhilip Reames %add.15 = add nuw nsw i64 %add.14, %zext.15 101710625958SPhilip Reames ret i64 %add.15 101810625958SPhilip Reames} 101910625958SPhilip Reames 1020e69f8bacSLuke Laudeclare i32 @llvm.abs.i32(i32, i1) 102110625958SPhilip Reames 1022e69f8bacSLuke Laudefine i32 @stride_sum_abs_diff(ptr %p, ptr %q, i64 %stride) { 1023e69f8bacSLuke Lau; CHECK-LABEL: @stride_sum_abs_diff( 1024f3d2609aSAlexey Bataev; CHECK-NEXT: [[P_2:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[STRIDE:%.*]] 1025f3d2609aSAlexey Bataev; CHECK-NEXT: [[Q_2:%.*]] = getelementptr inbounds i32, ptr [[Q:%.*]], i64 [[STRIDE]] 1026f3d2609aSAlexey Bataev; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[P]], align 4 1027f3d2609aSAlexey Bataev; CHECK-NEXT: [[TMP2:%.*]] = load <2 x i32>, ptr [[Q]], align 4 1028f23ea4cbSLuke Lau; CHECK-NEXT: [[TMP3:%.*]] = load <2 x i32>, ptr [[P_2]], align 4 1029f23ea4cbSLuke Lau; CHECK-NEXT: [[TMP4:%.*]] = load <2 x i32>, ptr [[Q_2]], align 4 1030f3d2609aSAlexey Bataev; CHECK-NEXT: [[TMP5:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> poison, <2 x i32> [[TMP1]], i64 0) 1031f3d2609aSAlexey Bataev; CHECK-NEXT: [[TMP6:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP5]], <2 x i32> [[TMP3]], i64 2) 1032f3d2609aSAlexey Bataev; CHECK-NEXT: [[TMP7:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> poison, <2 x i32> [[TMP2]], i64 0) 1033f3d2609aSAlexey Bataev; CHECK-NEXT: [[TMP8:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP7]], <2 x i32> [[TMP4]], i64 2) 1034f3d2609aSAlexey Bataev; CHECK-NEXT: [[TMP9:%.*]] = sub <4 x i32> [[TMP6]], [[TMP8]] 1035f3d2609aSAlexey Bataev; CHECK-NEXT: [[TMP10:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[TMP9]], i1 true) 1036f3d2609aSAlexey Bataev; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP10]]) 1037f3d2609aSAlexey Bataev; CHECK-NEXT: ret i32 [[TMP11]] 1038e69f8bacSLuke Lau; 1039e69f8bacSLuke Lau %x.0 = load i32, ptr %p 1040e69f8bacSLuke Lau %y.0 = load i32, ptr %q 1041e69f8bacSLuke Lau %sub.0 = sub i32 %x.0, %y.0 1042e69f8bacSLuke Lau %abs.0 = tail call i32 @llvm.abs.i32(i32 %sub.0, i1 true) 1043e69f8bacSLuke Lau 1044e69f8bacSLuke Lau %p.1 = getelementptr inbounds i32, ptr %p, i64 1 1045e69f8bacSLuke Lau %x.1 = load i32, ptr %p.1 1046e69f8bacSLuke Lau %q.1 = getelementptr inbounds i32, ptr %q, i64 1 1047e69f8bacSLuke Lau %y.1 = load i32, ptr %q.1 1048e69f8bacSLuke Lau %sub.1 = sub i32 %x.1, %y.1 1049e69f8bacSLuke Lau %abs.1 = tail call i32 @llvm.abs.i32(i32 %sub.1, i1 true) 1050e69f8bacSLuke Lau %sum.0 = add i32 %abs.0, %abs.1 1051e69f8bacSLuke Lau 1052e69f8bacSLuke Lau %p.2 = getelementptr inbounds i32, ptr %p, i64 %stride 1053e69f8bacSLuke Lau %q.2 = getelementptr inbounds i32, ptr %q, i64 %stride 1054e69f8bacSLuke Lau 1055e69f8bacSLuke Lau %x.2 = load i32, ptr %p.2 1056e69f8bacSLuke Lau %y.2 = load i32, ptr %q.2 1057e69f8bacSLuke Lau %sub.2 = sub i32 %x.2, %y.2 1058e69f8bacSLuke Lau %abs.2 = tail call i32 @llvm.abs.i32(i32 %sub.2, i1 true) 1059e69f8bacSLuke Lau %sum.1 = add i32 %sum.0, %abs.2 1060e69f8bacSLuke Lau 1061e69f8bacSLuke Lau %p.3 = getelementptr inbounds i32, ptr %p.2, i64 1 1062e69f8bacSLuke Lau %x.3 = load i32, ptr %p.3 1063e69f8bacSLuke Lau %q.3 = getelementptr inbounds i32, ptr %q.2, i64 1 1064e69f8bacSLuke Lau %y.3 = load i32, ptr %q.3 1065e69f8bacSLuke Lau %sub.3 = sub i32 %x.3, %y.3 1066e69f8bacSLuke Lau %abs.3 = tail call i32 @llvm.abs.i32(i32 %sub.3, i1 true) 1067e69f8bacSLuke Lau %sum.2 = add i32 %sum.1, %abs.3 1068e69f8bacSLuke Lau 1069e69f8bacSLuke Lau ret i32 %sum.2 1070e69f8bacSLuke Lau} 10711c9094a2SLuke Lau 10721c9094a2SLuke Laudefine i32 @reduce_sum_2arrays_a(ptr noalias %p, ptr noalias %q) { 10731c9094a2SLuke Lau; CHECK-LABEL: @reduce_sum_2arrays_a( 10741c9094a2SLuke Lau; CHECK-NEXT: entry: 1075f23ea4cbSLuke Lau; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[P:%.*]], align 1 1076f23ea4cbSLuke Lau; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[Q:%.*]], align 1 1077f3d2609aSAlexey Bataev; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i8> @llvm.vector.insert.v8i8.v4i8(<8 x i8> poison, <4 x i8> [[TMP0]], i64 0) 1078f3d2609aSAlexey Bataev; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.vector.insert.v8i8.v4i8(<8 x i8> [[TMP2]], <4 x i8> [[TMP1]], i64 4) 1079f3d2609aSAlexey Bataev; CHECK-NEXT: [[TMP4:%.*]] = zext <8 x i8> [[TMP3]] to <8 x i32> 1080f3d2609aSAlexey Bataev; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP4]]) 1081f3d2609aSAlexey Bataev; CHECK-NEXT: ret i32 [[TMP5]] 10821c9094a2SLuke Lau; 10831c9094a2SLuke Lauentry: 10841c9094a2SLuke Lau %x.0 = load i8, ptr %p, align 1 10851c9094a2SLuke Lau %conv = zext i8 %x.0 to i32 10861c9094a2SLuke Lau %y.0 = load i8, ptr %q, align 1 10871c9094a2SLuke Lau %conv3 = zext i8 %y.0 to i32 10881c9094a2SLuke Lau %add4 = add nuw nsw i32 %conv, %conv3 10891c9094a2SLuke Lau 10901c9094a2SLuke Lau %arrayidx.1 = getelementptr inbounds i8, ptr %p, i64 1 10911c9094a2SLuke Lau %x.1 = load i8, ptr %arrayidx.1, align 1 10921c9094a2SLuke Lau %conv.1 = zext i8 %x.1 to i32 10931c9094a2SLuke Lau %arrayidx2.1 = getelementptr inbounds i8, ptr %q, i64 1 10941c9094a2SLuke Lau %y.1 = load i8, ptr %arrayidx2.1, align 1 10951c9094a2SLuke Lau %conv3.1 = zext i8 %y.1 to i32 10961c9094a2SLuke Lau %add.1 = add nuw nsw i32 %add4, %conv.1 10971c9094a2SLuke Lau %add4.1 = add nuw nsw i32 %add.1, %conv3.1 10981c9094a2SLuke Lau 10991c9094a2SLuke Lau %arrayidx.2 = getelementptr inbounds i8, ptr %p, i64 2 11001c9094a2SLuke Lau %x.2 = load i8, ptr %arrayidx.2, align 1 11011c9094a2SLuke Lau %conv.2 = zext i8 %x.2 to i32 11021c9094a2SLuke Lau %arrayidx2.2 = getelementptr inbounds i8, ptr %q, i64 2 11031c9094a2SLuke Lau %y.2 = load i8, ptr %arrayidx2.2, align 1 11041c9094a2SLuke Lau %conv3.2 = zext i8 %y.2 to i32 11051c9094a2SLuke Lau %add.2 = add nuw nsw i32 %add4.1, %conv.2 11061c9094a2SLuke Lau %add4.2 = add nuw nsw i32 %add.2, %conv3.2 11071c9094a2SLuke Lau 11081c9094a2SLuke Lau %arrayidx.3 = getelementptr inbounds i8, ptr %p, i64 3 11091c9094a2SLuke Lau %x.3 = load i8, ptr %arrayidx.3, align 1 11101c9094a2SLuke Lau %conv.3 = zext i8 %x.3 to i32 11111c9094a2SLuke Lau %arrayidx2.3 = getelementptr inbounds i8, ptr %q, i64 3 11121c9094a2SLuke Lau %y.3 = load i8, ptr %arrayidx2.3, align 1 11131c9094a2SLuke Lau %conv3.3 = zext i8 %y.3 to i32 11141c9094a2SLuke Lau %add.3 = add nuw nsw i32 %add4.2, %conv.3 11151c9094a2SLuke Lau %add4.3 = add nuw nsw i32 %add.3, %conv3.3 11161c9094a2SLuke Lau 11171c9094a2SLuke Lau ret i32 %add4.3 11181c9094a2SLuke Lau} 11191c9094a2SLuke Lau 11201c9094a2SLuke Laudefine i32 @reduce_sum_2arrays_b(ptr noalias noundef %x, ptr noalias %y) { 11211c9094a2SLuke Lau; CHECK-LABEL: @reduce_sum_2arrays_b( 11221c9094a2SLuke Lau; CHECK-NEXT: entry: 11231c9094a2SLuke Lau; CHECK-NEXT: [[TMP0:%.*]] = load <4 x i8>, ptr [[X:%.*]], align 1 1124f23ea4cbSLuke Lau; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, ptr [[Y:%.*]], align 1 1125f3d2609aSAlexey Bataev; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i8> @llvm.vector.insert.v8i8.v4i8(<8 x i8> poison, <4 x i8> [[TMP0]], i64 0) 1126f3d2609aSAlexey Bataev; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i8> @llvm.vector.insert.v8i8.v4i8(<8 x i8> [[TMP2]], <4 x i8> [[TMP1]], i64 4) 1127f3d2609aSAlexey Bataev; CHECK-NEXT: [[TMP4:%.*]] = zext <8 x i8> [[TMP3]] to <8 x i32> 1128f3d2609aSAlexey Bataev; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP4]]) 1129f3d2609aSAlexey Bataev; CHECK-NEXT: ret i32 [[TMP5]] 11301c9094a2SLuke Lau; 11311c9094a2SLuke Lau entry: 11321c9094a2SLuke Lau %0 = load i8, ptr %x, align 1 11331c9094a2SLuke Lau %conv = zext i8 %0 to i32 11341c9094a2SLuke Lau %arrayidx.1 = getelementptr inbounds i8, ptr %x, i64 1 11351c9094a2SLuke Lau %1 = load i8, ptr %arrayidx.1, align 1 11361c9094a2SLuke Lau %conv.1 = zext i8 %1 to i32 11371c9094a2SLuke Lau %add.1 = add nuw nsw i32 %conv, %conv.1 11381c9094a2SLuke Lau %arrayidx.2 = getelementptr inbounds i8, ptr %x, i64 2 11391c9094a2SLuke Lau %2 = load i8, ptr %arrayidx.2, align 1 11401c9094a2SLuke Lau %conv.2 = zext i8 %2 to i32 11411c9094a2SLuke Lau %add.2 = add nuw nsw i32 %add.1, %conv.2 11421c9094a2SLuke Lau %arrayidx.3 = getelementptr inbounds i8, ptr %x, i64 3 11431c9094a2SLuke Lau %3 = load i8, ptr %arrayidx.3, align 1 11441c9094a2SLuke Lau %conv.3 = zext i8 %3 to i32 11451c9094a2SLuke Lau %add.3 = add nuw nsw i32 %add.2, %conv.3 11461c9094a2SLuke Lau %4 = load i8, ptr %y, align 1 11471c9094a2SLuke Lau %conv9 = zext i8 %4 to i32 11481c9094a2SLuke Lau %add10 = add nuw nsw i32 %add.3, %conv9 11491c9094a2SLuke Lau %arrayidx8.1 = getelementptr inbounds i8, ptr %y, i64 1 11501c9094a2SLuke Lau %5 = load i8, ptr %arrayidx8.1, align 1 11511c9094a2SLuke Lau %conv9.1 = zext i8 %5 to i32 11521c9094a2SLuke Lau %add10.1 = add nuw nsw i32 %add10, %conv9.1 11531c9094a2SLuke Lau %arrayidx8.2 = getelementptr inbounds i8, ptr %y, i64 2 11541c9094a2SLuke Lau %6 = load i8, ptr %arrayidx8.2, align 1 11551c9094a2SLuke Lau %conv9.2 = zext i8 %6 to i32 11561c9094a2SLuke Lau %add10.2 = add nuw nsw i32 %add10.1, %conv9.2 11571c9094a2SLuke Lau %arrayidx8.3 = getelementptr inbounds i8, ptr %y, i64 3 11581c9094a2SLuke Lau %7 = load i8, ptr %arrayidx8.3, align 1 11591c9094a2SLuke Lau %conv9.3 = zext i8 %7 to i32 11601c9094a2SLuke Lau %add10.3 = add nuw nsw i32 %add10.2, %conv9.3 11611c9094a2SLuke Lau ret i32 %add10.3 11621c9094a2SLuke Lau} 116320864d2cSLuke Lau 116420864d2cSLuke Lau; Shouldn't vectorize to a reduction because we can't promote it 116520864d2cSLuke Laudefine bfloat @fadd_4xbf16(ptr %p) { 116620864d2cSLuke Lau; CHECK-LABEL: @fadd_4xbf16( 116720864d2cSLuke Lau; CHECK-NEXT: [[X0:%.*]] = load bfloat, ptr [[P:%.*]], align 2 116820864d2cSLuke Lau; CHECK-NEXT: [[P1:%.*]] = getelementptr bfloat, ptr [[P]], i32 1 116920864d2cSLuke Lau; CHECK-NEXT: [[X1:%.*]] = load bfloat, ptr [[P1]], align 2 117020864d2cSLuke Lau; CHECK-NEXT: [[P2:%.*]] = getelementptr bfloat, ptr [[P]], i32 2 117120864d2cSLuke Lau; CHECK-NEXT: [[X2:%.*]] = load bfloat, ptr [[P2]], align 2 117220864d2cSLuke Lau; CHECK-NEXT: [[P3:%.*]] = getelementptr bfloat, ptr [[P]], i32 3 117320864d2cSLuke Lau; CHECK-NEXT: [[X3:%.*]] = load bfloat, ptr [[P3]], align 2 117420864d2cSLuke Lau; CHECK-NEXT: [[R0:%.*]] = fadd fast bfloat [[X0]], [[X1]] 117520864d2cSLuke Lau; CHECK-NEXT: [[R1:%.*]] = fadd fast bfloat [[R0]], [[X2]] 117620864d2cSLuke Lau; CHECK-NEXT: [[R2:%.*]] = fadd fast bfloat [[R1]], [[X3]] 117720864d2cSLuke Lau; CHECK-NEXT: ret bfloat [[R2]] 117820864d2cSLuke Lau; 117920864d2cSLuke Lau %x0 = load bfloat, ptr %p 118020864d2cSLuke Lau %p1 = getelementptr bfloat, ptr %p, i32 1 118120864d2cSLuke Lau %x1 = load bfloat, ptr %p1 118220864d2cSLuke Lau %p2 = getelementptr bfloat, ptr %p, i32 2 118320864d2cSLuke Lau %x2 = load bfloat, ptr %p2 118420864d2cSLuke Lau %p3 = getelementptr bfloat, ptr %p, i32 3 118520864d2cSLuke Lau %x3 = load bfloat, ptr %p3 118620864d2cSLuke Lau 118720864d2cSLuke Lau %r0 = fadd fast bfloat %x0, %x1 118820864d2cSLuke Lau %r1 = fadd fast bfloat %r0, %x2 118920864d2cSLuke Lau %r2 = fadd fast bfloat %r1, %x3 119020864d2cSLuke Lau 119120864d2cSLuke Lau ret bfloat %r2 119220864d2cSLuke Lau} 119320864d2cSLuke Lau 119420864d2cSLuke Lau; Shouldn't vectorize to a reduction because there's no vfred{u,o}mul.vs 119520864d2cSLuke Laudefine bfloat @fmul_4xbf16(ptr %p) { 119620864d2cSLuke Lau; CHECK-LABEL: @fmul_4xbf16( 119720864d2cSLuke Lau; CHECK-NEXT: [[X0:%.*]] = load bfloat, ptr [[P:%.*]], align 2 119820864d2cSLuke Lau; CHECK-NEXT: [[P1:%.*]] = getelementptr bfloat, ptr [[P]], i32 1 119920864d2cSLuke Lau; CHECK-NEXT: [[X1:%.*]] = load bfloat, ptr [[P1]], align 2 120020864d2cSLuke Lau; CHECK-NEXT: [[P2:%.*]] = getelementptr bfloat, ptr [[P]], i32 2 120120864d2cSLuke Lau; CHECK-NEXT: [[X2:%.*]] = load bfloat, ptr [[P2]], align 2 120220864d2cSLuke Lau; CHECK-NEXT: [[P3:%.*]] = getelementptr bfloat, ptr [[P]], i32 3 120320864d2cSLuke Lau; CHECK-NEXT: [[X3:%.*]] = load bfloat, ptr [[P3]], align 2 120420864d2cSLuke Lau; CHECK-NEXT: [[R0:%.*]] = fmul fast bfloat [[X0]], [[X1]] 120520864d2cSLuke Lau; CHECK-NEXT: [[R1:%.*]] = fmul fast bfloat [[R0]], [[X2]] 120620864d2cSLuke Lau; CHECK-NEXT: [[R2:%.*]] = fmul fast bfloat [[R1]], [[X3]] 120720864d2cSLuke Lau; CHECK-NEXT: ret bfloat [[R2]] 120820864d2cSLuke Lau; 120920864d2cSLuke Lau %x0 = load bfloat, ptr %p 121020864d2cSLuke Lau %p1 = getelementptr bfloat, ptr %p, i32 1 121120864d2cSLuke Lau %x1 = load bfloat, ptr %p1 121220864d2cSLuke Lau %p2 = getelementptr bfloat, ptr %p, i32 2 121320864d2cSLuke Lau %x2 = load bfloat, ptr %p2 121420864d2cSLuke Lau %p3 = getelementptr bfloat, ptr %p, i32 3 121520864d2cSLuke Lau %x3 = load bfloat, ptr %p3 121620864d2cSLuke Lau 121720864d2cSLuke Lau %r0 = fmul fast bfloat %x0, %x1 121820864d2cSLuke Lau %r1 = fmul fast bfloat %r0, %x2 121920864d2cSLuke Lau %r2 = fmul fast bfloat %r1, %x3 122020864d2cSLuke Lau 122120864d2cSLuke Lau ret bfloat %r2 122220864d2cSLuke Lau} 122320864d2cSLuke Lau 122420864d2cSLuke Lau; Shouldn't vectorize to a reduction on zvfhmin because we can't promote it 122520864d2cSLuke Laudefine half @fadd_4xf16(ptr %p) { 122620864d2cSLuke Lau; ZVFHMIN-LABEL: @fadd_4xf16( 122720864d2cSLuke Lau; ZVFHMIN-NEXT: [[X0:%.*]] = load half, ptr [[P:%.*]], align 2 122820864d2cSLuke Lau; ZVFHMIN-NEXT: [[P1:%.*]] = getelementptr half, ptr [[P]], i32 1 122920864d2cSLuke Lau; ZVFHMIN-NEXT: [[X1:%.*]] = load half, ptr [[P1]], align 2 123020864d2cSLuke Lau; ZVFHMIN-NEXT: [[P2:%.*]] = getelementptr half, ptr [[P]], i32 2 123120864d2cSLuke Lau; ZVFHMIN-NEXT: [[X2:%.*]] = load half, ptr [[P2]], align 2 123220864d2cSLuke Lau; ZVFHMIN-NEXT: [[P3:%.*]] = getelementptr half, ptr [[P]], i32 3 123320864d2cSLuke Lau; ZVFHMIN-NEXT: [[X3:%.*]] = load half, ptr [[P3]], align 2 123420864d2cSLuke Lau; ZVFHMIN-NEXT: [[R0:%.*]] = fadd fast half [[X0]], [[X1]] 123520864d2cSLuke Lau; ZVFHMIN-NEXT: [[R1:%.*]] = fadd fast half [[R0]], [[X2]] 123620864d2cSLuke Lau; ZVFHMIN-NEXT: [[R2:%.*]] = fadd fast half [[R1]], [[X3]] 123720864d2cSLuke Lau; ZVFHMIN-NEXT: ret half [[R2]] 123820864d2cSLuke Lau; 123920864d2cSLuke Lau; ZVFH-LABEL: @fadd_4xf16( 124020864d2cSLuke Lau; ZVFH-NEXT: [[TMP1:%.*]] = load <4 x half>, ptr [[P:%.*]], align 2 124120864d2cSLuke Lau; ZVFH-NEXT: [[TMP2:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> [[TMP1]]) 124220864d2cSLuke Lau; ZVFH-NEXT: ret half [[TMP2]] 124320864d2cSLuke Lau; 124420864d2cSLuke Lau %x0 = load half, ptr %p 124520864d2cSLuke Lau %p1 = getelementptr half, ptr %p, i32 1 124620864d2cSLuke Lau %x1 = load half, ptr %p1 124720864d2cSLuke Lau %p2 = getelementptr half, ptr %p, i32 2 124820864d2cSLuke Lau %x2 = load half, ptr %p2 124920864d2cSLuke Lau %p3 = getelementptr half, ptr %p, i32 3 125020864d2cSLuke Lau %x3 = load half, ptr %p3 125120864d2cSLuke Lau 125220864d2cSLuke Lau %r0 = fadd fast half %x0, %x1 125320864d2cSLuke Lau %r1 = fadd fast half %r0, %x2 125420864d2cSLuke Lau %r2 = fadd fast half %r1, %x3 125520864d2cSLuke Lau 125620864d2cSLuke Lau ret half %r2 125720864d2cSLuke Lau} 125820864d2cSLuke Lau 125920864d2cSLuke Lau; Shouldn't vectorize to a reduction because there's no vfred{u,o}mul.vs 126020864d2cSLuke Laudefine half @fmul_4xf16(ptr %p) { 126120864d2cSLuke Lau; CHECK-LABEL: @fmul_4xf16( 126220864d2cSLuke Lau; CHECK-NEXT: [[X0:%.*]] = load half, ptr [[P:%.*]], align 2 126320864d2cSLuke Lau; CHECK-NEXT: [[P1:%.*]] = getelementptr half, ptr [[P]], i32 1 126420864d2cSLuke Lau; CHECK-NEXT: [[X1:%.*]] = load half, ptr [[P1]], align 2 126520864d2cSLuke Lau; CHECK-NEXT: [[P2:%.*]] = getelementptr half, ptr [[P]], i32 2 126620864d2cSLuke Lau; CHECK-NEXT: [[X2:%.*]] = load half, ptr [[P2]], align 2 126720864d2cSLuke Lau; CHECK-NEXT: [[P3:%.*]] = getelementptr half, ptr [[P]], i32 3 126820864d2cSLuke Lau; CHECK-NEXT: [[X3:%.*]] = load half, ptr [[P3]], align 2 126920864d2cSLuke Lau; CHECK-NEXT: [[R0:%.*]] = fmul fast half [[X0]], [[X1]] 127020864d2cSLuke Lau; CHECK-NEXT: [[R1:%.*]] = fmul fast half [[R0]], [[X2]] 127120864d2cSLuke Lau; CHECK-NEXT: [[R2:%.*]] = fmul fast half [[R1]], [[X3]] 127220864d2cSLuke Lau; CHECK-NEXT: ret half [[R2]] 127320864d2cSLuke Lau; 127420864d2cSLuke Lau %x0 = load half, ptr %p 127520864d2cSLuke Lau %p1 = getelementptr half, ptr %p, i32 1 127620864d2cSLuke Lau %x1 = load half, ptr %p1 127720864d2cSLuke Lau %p2 = getelementptr half, ptr %p, i32 2 127820864d2cSLuke Lau %x2 = load half, ptr %p2 127920864d2cSLuke Lau %p3 = getelementptr half, ptr %p, i32 3 128020864d2cSLuke Lau %x3 = load half, ptr %p3 128120864d2cSLuke Lau 128220864d2cSLuke Lau %r0 = fmul fast half %x0, %x1 128320864d2cSLuke Lau %r1 = fmul fast half %r0, %x2 128420864d2cSLuke Lau %r2 = fmul fast half %r1, %x3 128520864d2cSLuke Lau 128620864d2cSLuke Lau ret half %r2 128720864d2cSLuke Lau} 1288