SLPVectorizer/RISCV/reductions.ll

10625958SPhilip Reames; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
20864d2cSLuke Lau; RUN: opt < %s -passes=slp-vectorizer -mtriple=riscv64 \
20864d2cSLuke Lau; RUN: -mattr=+v,+zvfhmin,+zvfbfmin -riscv-v-slp-max-vf=0 -S \
20864d2cSLuke Lau; RUN: | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
20864d2cSLuke Lau; RUN: opt < %s -passes=slp-vectorizer -mtriple=riscv64 \
20864d2cSLuke Lau; RUN: -mattr=+v,+zvfh,+zvfbfmin -riscv-v-slp-max-vf=0 -S \
20864d2cSLuke Lau; RUN: | FileCheck %s --check-prefixes=CHECK,ZVFH,ZVL128
20864d2cSLuke Lau; RUN: opt < %s -passes=slp-vectorizer -mtriple=riscv64 \
20864d2cSLuke Lau; RUN: -mattr=+v,+zvl256b,+zvfh,+zvfbfmin -riscv-v-slp-max-vf=0 -S \
20864d2cSLuke Lau; RUN: | FileCheck %s --check-prefixes=CHECK,ZVFH,ZVL256
20864d2cSLuke Lau; RUN: opt < %s -passes=slp-vectorizer -mtriple=riscv64 \
20864d2cSLuke Lau; RUN: -mattr=+v,+zvl512b,+zvfh,+zvfbfmin -riscv-v-slp-max-vf=0 -S \
20864d2cSLuke Lau; RUN: | FileCheck %s --check-prefixes=CHECK,ZVFH,ZVL512
10625958SPhilip Reames
10625958SPhilip Reamestarget datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
10625958SPhilip Reamestarget triple = "riscv64"
10625958SPhilip Reames
10625958SPhilip Reames; First batch of tests are simple reductions of various widths
10625958SPhilip Reames
10625958SPhilip Reamesdefine i64 @red_ld_2xi64(ptr %ptr) {
10625958SPhilip Reames; CHECK-LABEL: @red_ld_2xi64(
10625958SPhilip Reames; CHECK-NEXT:  entry:
10625958SPhilip Reames; CHECK-NEXT:    [[LD0:%.*]] = load i64, ptr [[PTR:%.*]], align 8
10625958SPhilip Reames; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 1
10625958SPhilip Reames; CHECK-NEXT:    [[LD1:%.*]] = load i64, ptr [[GEP]], align 8
10625958SPhilip Reames; CHECK-NEXT:    [[ADD_1:%.*]] = add nuw nsw i64 [[LD0]], [[LD1]]
10625958SPhilip Reames; CHECK-NEXT:    ret i64 [[ADD_1]]
10625958SPhilip Reames;
10625958SPhilip Reamesentry:
10625958SPhilip Reames  %ld0 = load i64, ptr %ptr
10625958SPhilip Reames  %gep = getelementptr inbounds i64, ptr %ptr, i64 1
10625958SPhilip Reames  %ld1 = load i64, ptr %gep
10625958SPhilip Reames  %add.1 = add nuw nsw i64 %ld0, %ld1
10625958SPhilip Reames  ret i64 %add.1
10625958SPhilip Reames}
10625958SPhilip Reames
10625958SPhilip Reamesdefine i64 @red_ld_4xi64(ptr %ptr) {
10625958SPhilip Reames; CHECK-LABEL: @red_ld_4xi64(
10625958SPhilip Reames; CHECK-NEXT:  entry:
10625958SPhilip Reames; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i64>, ptr [[PTR:%.*]], align 8
10625958SPhilip Reames; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP0]])
10625958SPhilip Reames; CHECK-NEXT:    ret i64 [[TMP1]]
10625958SPhilip Reames;
10625958SPhilip Reamesentry:
10625958SPhilip Reames  %ld0 = load i64, ptr %ptr
10625958SPhilip Reames  %gep = getelementptr inbounds i64, ptr %ptr, i64 1
10625958SPhilip Reames  %ld1 = load i64, ptr %gep
10625958SPhilip Reames  %add.1 = add nuw nsw i64 %ld0, %ld1
10625958SPhilip Reames  %gep.1 = getelementptr inbounds i64, ptr %ptr, i64 2
10625958SPhilip Reames  %ld2 = load i64, ptr %gep.1
10625958SPhilip Reames  %add.2 = add nuw nsw i64 %add.1, %ld2
10625958SPhilip Reames  %gep.2 = getelementptr inbounds i64, ptr %ptr, i64 3
10625958SPhilip Reames  %ld3 = load i64, ptr %gep.2
10625958SPhilip Reames  %add.3 = add nuw nsw i64 %add.2, %ld3
10625958SPhilip Reames  ret i64 %add.3
10625958SPhilip Reames}
10625958SPhilip Reames
10625958SPhilip Reamesdefine i64 @red_ld_8xi64(ptr %ptr) {
10625958SPhilip Reames; CHECK-LABEL: @red_ld_8xi64(
10625958SPhilip Reames; CHECK-NEXT:  entry:
10625958SPhilip Reames; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i64>, ptr [[PTR:%.*]], align 8
10625958SPhilip Reames; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP0]])
10625958SPhilip Reames; CHECK-NEXT:    ret i64 [[TMP1]]
10625958SPhilip Reames;
10625958SPhilip Reamesentry:
10625958SPhilip Reames  %ld0 = load i64, ptr %ptr
10625958SPhilip Reames  %gep = getelementptr inbounds i64, ptr %ptr, i64 1
10625958SPhilip Reames  %ld1 = load i64, ptr %gep
10625958SPhilip Reames  %add.1 = add nuw nsw i64 %ld0, %ld1
10625958SPhilip Reames  %gep.1 = getelementptr inbounds i64, ptr %ptr, i64 2
10625958SPhilip Reames  %ld2 = load i64, ptr %gep.1
10625958SPhilip Reames  %add.2 = add nuw nsw i64 %add.1, %ld2
10625958SPhilip Reames  %gep.2 = getelementptr inbounds i64, ptr %ptr, i64 3
10625958SPhilip Reames  %ld3 = load i64, ptr %gep.2
10625958SPhilip Reames  %add.3 = add nuw nsw i64 %add.2, %ld3
10625958SPhilip Reames  %gep.3 = getelementptr inbounds i64, ptr %ptr, i64 4
10625958SPhilip Reames  %ld4 = load i64, ptr %gep.3
10625958SPhilip Reames  %add.4 = add nuw nsw i64 %add.3, %ld4
10625958SPhilip Reames  %gep.4 = getelementptr inbounds i64, ptr %ptr, i64 5
10625958SPhilip Reames  %ld5 = load i64, ptr %gep.4
10625958SPhilip Reames  %add.5 = add nuw nsw i64 %add.4, %ld5
10625958SPhilip Reames  %gep.5 = getelementptr inbounds i64, ptr %ptr, i64 6
10625958SPhilip Reames  %ld6 = load i64, ptr %gep.5
10625958SPhilip Reames  %add.6 = add nuw nsw i64 %add.5, %ld6
10625958SPhilip Reames  %gep.6 = getelementptr inbounds i64, ptr %ptr, i64 7
10625958SPhilip Reames  %ld7 = load i64, ptr %gep.6
10625958SPhilip Reames  %add.7 = add nuw nsw i64 %add.6, %ld7
10625958SPhilip Reames  ret i64 %add.7
10625958SPhilip Reames}
10625958SPhilip Reames
10625958SPhilip Reamesdefine i64 @red_ld_16xi64(ptr %ptr) {
10625958SPhilip Reames; CHECK-LABEL: @red_ld_16xi64(
10625958SPhilip Reames; CHECK-NEXT:  entry:
10625958SPhilip Reames; CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i64>, ptr [[PTR:%.*]], align 8
10625958SPhilip Reames; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> [[TMP0]])
10625958SPhilip Reames; CHECK-NEXT:    ret i64 [[TMP1]]
10625958SPhilip Reames;
10625958SPhilip Reamesentry:
10625958SPhilip Reames  %ld0 = load i64, ptr %ptr
10625958SPhilip Reames  %gep = getelementptr inbounds i64, ptr %ptr, i64 1
10625958SPhilip Reames  %ld1 = load i64, ptr %gep
10625958SPhilip Reames  %add.1 = add nuw nsw i64 %ld0, %ld1
10625958SPhilip Reames  %gep.1 = getelementptr inbounds i64, ptr %ptr, i64 2
10625958SPhilip Reames  %ld2 = load i64, ptr %gep.1
10625958SPhilip Reames  %add.2 = add nuw nsw i64 %add.1, %ld2
10625958SPhilip Reames  %gep.2 = getelementptr inbounds i64, ptr %ptr, i64 3
10625958SPhilip Reames  %ld3 = load i64, ptr %gep.2
10625958SPhilip Reames  %add.3 = add nuw nsw i64 %add.2, %ld3
10625958SPhilip Reames  %gep.3 = getelementptr inbounds i64, ptr %ptr, i64 4
10625958SPhilip Reames  %ld4 = load i64, ptr %gep.3
10625958SPhilip Reames  %add.4 = add nuw nsw i64 %add.3, %ld4
10625958SPhilip Reames  %gep.4 = getelementptr inbounds i64, ptr %ptr, i64 5
10625958SPhilip Reames  %ld5 = load i64, ptr %gep.4
10625958SPhilip Reames  %add.5 = add nuw nsw i64 %add.4, %ld5
10625958SPhilip Reames  %gep.5 = getelementptr inbounds i64, ptr %ptr, i64 6
10625958SPhilip Reames  %ld6 = load i64, ptr %gep.5
10625958SPhilip Reames  %add.6 = add nuw nsw i64 %add.5, %ld6
10625958SPhilip Reames  %gep.6 = getelementptr inbounds i64, ptr %ptr, i64 7
10625958SPhilip Reames  %ld7 = load i64, ptr %gep.6
10625958SPhilip Reames  %add.7 = add nuw nsw i64 %add.6, %ld7
10625958SPhilip Reames  %gep.7 = getelementptr inbounds i64, ptr %ptr, i64 8
10625958SPhilip Reames  %ld8 = load i64, ptr %gep.7
10625958SPhilip Reames  %add.8 = add nuw nsw i64 %add.7, %ld8
10625958SPhilip Reames  %gep.8 = getelementptr inbounds i64, ptr %ptr, i64 9
10625958SPhilip Reames  %ld9 = load i64, ptr %gep.8
10625958SPhilip Reames  %add.9 = add nuw nsw i64 %add.8, %ld9
10625958SPhilip Reames  %gep.9 = getelementptr inbounds i64, ptr %ptr, i64 10
10625958SPhilip Reames  %ld10 = load i64, ptr %gep.9
10625958SPhilip Reames  %add.10 = add nuw nsw i64 %add.9, %ld10
10625958SPhilip Reames  %gep.10 = getelementptr inbounds i64, ptr %ptr, i64 11
10625958SPhilip Reames  %ld11 = load i64, ptr %gep.10
10625958SPhilip Reames  %add.11 = add nuw nsw i64 %add.10, %ld11
10625958SPhilip Reames  %gep.11 = getelementptr inbounds i64, ptr %ptr, i64 12
10625958SPhilip Reames  %ld12 = load i64, ptr %gep.11
10625958SPhilip Reames  %add.12 = add nuw nsw i64 %add.11, %ld12
10625958SPhilip Reames  %gep.12 = getelementptr inbounds i64, ptr %ptr, i64 13
10625958SPhilip Reames  %ld13 = load i64, ptr %gep.12
10625958SPhilip Reames  %add.13 = add nuw nsw i64 %add.12, %ld13
10625958SPhilip Reames  %gep.13 = getelementptr inbounds i64, ptr %ptr, i64 14
10625958SPhilip Reames  %ld14 = load i64, ptr %gep.13
10625958SPhilip Reames  %add.14 = add nuw nsw i64 %add.13, %ld14
10625958SPhilip Reames  %gep.14 = getelementptr inbounds i64, ptr %ptr, i64 15
10625958SPhilip Reames  %ld15 = load i64, ptr %gep.14
10625958SPhilip Reames  %add.15 = add nuw nsw i64 %add.14, %ld15
10625958SPhilip Reames  ret i64 %add.15
10625958SPhilip Reames}
10625958SPhilip Reames
deb3ecf0SAlexey Bataev
deb3ecf0SAlexey Bataevdefine i64 @red_strided_ld_16xi64(ptr %ptr) {
deb3ecf0SAlexey Bataev; CHECK-LABEL: @red_strided_ld_16xi64(
deb3ecf0SAlexey Bataev; CHECK-NEXT:  entry:
38fffa63SPaul Walker; CHECK-NEXT:    [[TMP0:%.*]] = call <16 x i64> @llvm.experimental.vp.strided.load.v16i64.p0.i64(ptr align 8 [[PTR:%.*]], i64 16, <16 x i1> splat (i1 true), i32 16)
799fd3d8SAlexey Bataev; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> [[TMP0]])
799fd3d8SAlexey Bataev; CHECK-NEXT:    ret i64 [[TMP1]]
deb3ecf0SAlexey Bataev;
deb3ecf0SAlexey Bataeventry:
deb3ecf0SAlexey Bataev  %ld0 = load i64, ptr %ptr
deb3ecf0SAlexey Bataev  %gep = getelementptr inbounds i64, ptr %ptr, i64 2
deb3ecf0SAlexey Bataev  %ld1 = load i64, ptr %gep
deb3ecf0SAlexey Bataev  %add.1 = add nuw nsw i64 %ld0, %ld1
deb3ecf0SAlexey Bataev  %gep.1 = getelementptr inbounds i64, ptr %ptr, i64 4
deb3ecf0SAlexey Bataev  %ld2 = load i64, ptr %gep.1
deb3ecf0SAlexey Bataev  %add.2 = add nuw nsw i64 %add.1, %ld2
deb3ecf0SAlexey Bataev  %gep.2 = getelementptr inbounds i64, ptr %ptr, i64 6
deb3ecf0SAlexey Bataev  %ld3 = load i64, ptr %gep.2
deb3ecf0SAlexey Bataev  %add.3 = add nuw nsw i64 %add.2, %ld3
deb3ecf0SAlexey Bataev  %gep.3 = getelementptr inbounds i64, ptr %ptr, i64 8
deb3ecf0SAlexey Bataev  %ld4 = load i64, ptr %gep.3
deb3ecf0SAlexey Bataev  %add.4 = add nuw nsw i64 %add.3, %ld4
deb3ecf0SAlexey Bataev  %gep.4 = getelementptr inbounds i64, ptr %ptr, i64 10
deb3ecf0SAlexey Bataev  %ld5 = load i64, ptr %gep.4
deb3ecf0SAlexey Bataev  %add.5 = add nuw nsw i64 %add.4, %ld5
deb3ecf0SAlexey Bataev  %gep.5 = getelementptr inbounds i64, ptr %ptr, i64 12
deb3ecf0SAlexey Bataev  %ld6 = load i64, ptr %gep.5
deb3ecf0SAlexey Bataev  %add.6 = add nuw nsw i64 %add.5, %ld6
deb3ecf0SAlexey Bataev  %gep.6 = getelementptr inbounds i64, ptr %ptr, i64 14
deb3ecf0SAlexey Bataev  %ld7 = load i64, ptr %gep.6
deb3ecf0SAlexey Bataev  %add.7 = add nuw nsw i64 %add.6, %ld7
deb3ecf0SAlexey Bataev  %gep.7 = getelementptr inbounds i64, ptr %ptr, i64 16
deb3ecf0SAlexey Bataev  %ld8 = load i64, ptr %gep.7
deb3ecf0SAlexey Bataev  %add.8 = add nuw nsw i64 %add.7, %ld8
deb3ecf0SAlexey Bataev  %gep.8 = getelementptr inbounds i64, ptr %ptr, i64 18
deb3ecf0SAlexey Bataev  %ld9 = load i64, ptr %gep.8
deb3ecf0SAlexey Bataev  %add.9 = add nuw nsw i64 %add.8, %ld9
deb3ecf0SAlexey Bataev  %gep.9 = getelementptr inbounds i64, ptr %ptr, i64 20
deb3ecf0SAlexey Bataev  %ld10 = load i64, ptr %gep.9
deb3ecf0SAlexey Bataev  %add.10 = add nuw nsw i64 %add.9, %ld10
deb3ecf0SAlexey Bataev  %gep.10 = getelementptr inbounds i64, ptr %ptr, i64 22
deb3ecf0SAlexey Bataev  %ld11 = load i64, ptr %gep.10
deb3ecf0SAlexey Bataev  %add.11 = add nuw nsw i64 %add.10, %ld11
deb3ecf0SAlexey Bataev  %gep.11 = getelementptr inbounds i64, ptr %ptr, i64 24
deb3ecf0SAlexey Bataev  %ld12 = load i64, ptr %gep.11
deb3ecf0SAlexey Bataev  %add.12 = add nuw nsw i64 %add.11, %ld12
deb3ecf0SAlexey Bataev  %gep.12 = getelementptr inbounds i64, ptr %ptr, i64 26
deb3ecf0SAlexey Bataev  %ld13 = load i64, ptr %gep.12
deb3ecf0SAlexey Bataev  %add.13 = add nuw nsw i64 %add.12, %ld13
deb3ecf0SAlexey Bataev  %gep.13 = getelementptr inbounds i64, ptr %ptr, i64 28
deb3ecf0SAlexey Bataev  %ld14 = load i64, ptr %gep.13
deb3ecf0SAlexey Bataev  %add.14 = add nuw nsw i64 %add.13, %ld14
deb3ecf0SAlexey Bataev  %gep.14 = getelementptr inbounds i64, ptr %ptr, i64 30
deb3ecf0SAlexey Bataev  %ld15 = load i64, ptr %gep.14
deb3ecf0SAlexey Bataev  %add.15 = add nuw nsw i64 %add.14, %ld15
deb3ecf0SAlexey Bataev  ret i64 %add.15
deb3ecf0SAlexey Bataev}
deb3ecf0SAlexey Bataev
10625958SPhilip Reames; Next batch test differen reductions kinds
10625958SPhilip Reames
10625958SPhilip Reames%struct.buf = type { [8 x i8] }
10625958SPhilip Reames
10625958SPhilip Reamesdefine i8 @reduce_and(ptr %a, ptr %b) {
10625958SPhilip Reames; CHECK-LABEL: @reduce_and(
10625958SPhilip Reames; CHECK-NEXT:  entry:
10625958SPhilip Reames; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_BUF:%.*]], ptr [[A:%.*]], i64 0, i32 0, i64 0
10625958SPhilip Reames; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_BUF]], ptr [[B:%.*]], i64 0, i32 0, i64 0
10625958SPhilip Reames; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 1
10625958SPhilip Reames; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[ARRAYIDX3]], align 1
10625958SPhilip Reames; CHECK-NEXT:    [[TMP2:%.*]] = xor <8 x i8> [[TMP1]], [[TMP0]]
10625958SPhilip Reames; CHECK-NEXT:    [[TMP3:%.*]] = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> [[TMP2]])
10625958SPhilip Reames; CHECK-NEXT:    [[OP_RDX:%.*]] = and i8 [[TMP3]], 1
10625958SPhilip Reames; CHECK-NEXT:    ret i8 [[OP_RDX]]
10625958SPhilip Reames;
10625958SPhilip Reamesentry:
10625958SPhilip Reames  %arrayidx = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 0
2d69827cSNikita Popov  %0 = load i8, ptr %arrayidx, align 1
10625958SPhilip Reames  %arrayidx3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 0
2d69827cSNikita Popov  %1 = load i8, ptr %arrayidx3, align 1
10625958SPhilip Reames  %xor12 = xor i8 %1, %0
10625958SPhilip Reames  %and13 = and i8 %xor12, 1
10625958SPhilip Reames  %arrayidx.1 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 1
2d69827cSNikita Popov  %2 = load i8, ptr %arrayidx.1, align 1
10625958SPhilip Reames  %arrayidx3.1 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 1
2d69827cSNikita Popov  %3 = load i8, ptr %arrayidx3.1, align 1
10625958SPhilip Reames  %xor12.1 = xor i8 %3, %2
10625958SPhilip Reames  %and13.1 = and i8 %xor12.1, %and13
10625958SPhilip Reames  %arrayidx.2 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 2
2d69827cSNikita Popov  %4 = load i8, ptr %arrayidx.2, align 1
10625958SPhilip Reames  %arrayidx3.2 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 2
2d69827cSNikita Popov  %5 = load i8, ptr %arrayidx3.2, align 1
10625958SPhilip Reames  %xor12.2 = xor i8 %5, %4
10625958SPhilip Reames  %and13.2 = and i8 %xor12.2, %and13.1
10625958SPhilip Reames  %arrayidx.3 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 3
2d69827cSNikita Popov  %6 = load i8, ptr %arrayidx.3, align 1
10625958SPhilip Reames  %arrayidx3.3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 3
2d69827cSNikita Popov  %7 = load i8, ptr %arrayidx3.3, align 1
10625958SPhilip Reames  %xor12.3 = xor i8 %7, %6
10625958SPhilip Reames  %and13.3 = and i8 %xor12.3, %and13.2
10625958SPhilip Reames  %arrayidx.4 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 4
2d69827cSNikita Popov  %8 = load i8, ptr %arrayidx.4, align 1
10625958SPhilip Reames  %arrayidx3.4 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 4
2d69827cSNikita Popov  %9 = load i8, ptr %arrayidx3.4, align 1
10625958SPhilip Reames  %xor12.4 = xor i8 %9, %8
10625958SPhilip Reames  %and13.4 = and i8 %xor12.4, %and13.3
10625958SPhilip Reames  %arrayidx.5 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 5
2d69827cSNikita Popov  %10 = load i8, ptr %arrayidx.5, align 1
10625958SPhilip Reames  %arrayidx3.5 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 5
2d69827cSNikita Popov  %11 = load i8, ptr %arrayidx3.5, align 1
10625958SPhilip Reames  %xor12.5 = xor i8 %11, %10
10625958SPhilip Reames  %and13.5 = and i8 %xor12.5, %and13.4
10625958SPhilip Reames  %arrayidx.6 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 6
2d69827cSNikita Popov  %12 = load i8, ptr %arrayidx.6, align 1
10625958SPhilip Reames  %arrayidx3.6 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 6
2d69827cSNikita Popov  %13 = load i8, ptr %arrayidx3.6, align 1
10625958SPhilip Reames  %xor12.6 = xor i8 %13, %12
10625958SPhilip Reames  %and13.6 = and i8 %xor12.6, %and13.5
10625958SPhilip Reames  %arrayidx.7 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 7
2d69827cSNikita Popov  %14 = load i8, ptr %arrayidx.7, align 1
10625958SPhilip Reames  %arrayidx3.7 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 7
2d69827cSNikita Popov  %15 = load i8, ptr %arrayidx3.7, align 1
10625958SPhilip Reames  %xor12.7 = xor i8 %15, %14
10625958SPhilip Reames  %and13.7 = and i8 %xor12.7, %and13.6
10625958SPhilip Reames  ret i8 %and13.7
10625958SPhilip Reames}
10625958SPhilip Reames
9a82bda9Swangpcdefine i8 @reduce_or_1(ptr %a, ptr %b) {
9a82bda9Swangpc; CHECK-LABEL: @reduce_or_1(
10625958SPhilip Reames; CHECK-NEXT:  entry:
10625958SPhilip Reames; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_BUF:%.*]], ptr [[A:%.*]], i64 0, i32 0, i64 0
10625958SPhilip Reames; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_BUF]], ptr [[B:%.*]], i64 0, i32 0, i64 0
10625958SPhilip Reames; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 1
10625958SPhilip Reames; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[ARRAYIDX3]], align 1
10625958SPhilip Reames; CHECK-NEXT:    [[TMP2:%.*]] = xor <8 x i8> [[TMP1]], [[TMP0]]
10625958SPhilip Reames; CHECK-NEXT:    [[TMP3:%.*]] = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> [[TMP2]])
10625958SPhilip Reames; CHECK-NEXT:    ret i8 [[TMP3]]
10625958SPhilip Reames;
10625958SPhilip Reames
10625958SPhilip Reamesentry:
10625958SPhilip Reames  %arrayidx = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 0
2d69827cSNikita Popov  %0 = load i8, ptr %arrayidx, align 1
10625958SPhilip Reames  %arrayidx3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 0
2d69827cSNikita Popov  %1 = load i8, ptr %arrayidx3, align 1
10625958SPhilip Reames  %xor12 = xor i8 %1, %0
10625958SPhilip Reames  %arrayidx.1 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 1
2d69827cSNikita Popov  %2 = load i8, ptr %arrayidx.1, align 1
10625958SPhilip Reames  %arrayidx3.1 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 1
2d69827cSNikita Popov  %3 = load i8, ptr %arrayidx3.1, align 1
10625958SPhilip Reames  %xor12.1 = xor i8 %3, %2
10625958SPhilip Reames  %or13.1 = or i8 %xor12.1, %xor12
10625958SPhilip Reames  %arrayidx.2 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 2
2d69827cSNikita Popov  %4 = load i8, ptr %arrayidx.2, align 1
10625958SPhilip Reames  %arrayidx3.2 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 2
2d69827cSNikita Popov  %5 = load i8, ptr %arrayidx3.2, align 1
10625958SPhilip Reames  %xor12.2 = xor i8 %5, %4
10625958SPhilip Reames  %or13.2 = or i8 %xor12.2, %or13.1
10625958SPhilip Reames  %arrayidx.3 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 3
2d69827cSNikita Popov  %6 = load i8, ptr %arrayidx.3, align 1
10625958SPhilip Reames  %arrayidx3.3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 3
2d69827cSNikita Popov  %7 = load i8, ptr %arrayidx3.3, align 1
10625958SPhilip Reames  %xor12.3 = xor i8 %7, %6
10625958SPhilip Reames  %or13.3 = or i8 %xor12.3, %or13.2
10625958SPhilip Reames  %arrayidx.4 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 4
2d69827cSNikita Popov  %8 = load i8, ptr %arrayidx.4, align 1
10625958SPhilip Reames  %arrayidx3.4 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 4
2d69827cSNikita Popov  %9 = load i8, ptr %arrayidx3.4, align 1
10625958SPhilip Reames  %xor12.4 = xor i8 %9, %8
10625958SPhilip Reames  %or13.4 = or i8 %xor12.4, %or13.3
10625958SPhilip Reames  %arrayidx.5 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 5
2d69827cSNikita Popov  %10 = load i8, ptr %arrayidx.5, align 1
10625958SPhilip Reames  %arrayidx3.5 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 5
2d69827cSNikita Popov  %11 = load i8, ptr %arrayidx3.5, align 1
10625958SPhilip Reames  %xor12.5 = xor i8 %11, %10
10625958SPhilip Reames  %or13.5 = or i8 %xor12.5, %or13.4
10625958SPhilip Reames  %arrayidx.6 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 6
2d69827cSNikita Popov  %12 = load i8, ptr %arrayidx.6, align 1
10625958SPhilip Reames  %arrayidx3.6 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 6
2d69827cSNikita Popov  %13 = load i8, ptr %arrayidx3.6, align 1
10625958SPhilip Reames  %xor12.6 = xor i8 %13, %12
10625958SPhilip Reames  %or13.6 = or i8 %xor12.6, %or13.5
10625958SPhilip Reames  %arrayidx.7 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 7
2d69827cSNikita Popov  %14 = load i8, ptr %arrayidx.7, align 1
10625958SPhilip Reames  %arrayidx3.7 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 7
2d69827cSNikita Popov  %15 = load i8, ptr %arrayidx3.7, align 1
10625958SPhilip Reames  %xor12.7 = xor i8 %15, %14
10625958SPhilip Reames  %or13.7 = or i8 %xor12.7, %or13.6
10625958SPhilip Reames  ret i8 %or13.7
10625958SPhilip Reames}
10625958SPhilip Reames
9a82bda9Swangpcdefine void @reduce_or_2() {
20864d2cSLuke Lau; ZVFHMIN-LABEL: @reduce_or_2(
20864d2cSLuke Lau; ZVFHMIN-NEXT:    [[TMP1:%.*]] = shl i64 0, 0
20864d2cSLuke Lau; ZVFHMIN-NEXT:    [[TMP2:%.*]] = insertelement <16 x i64> <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 poison>, i64 [[TMP1]], i32 15
20864d2cSLuke Lau; ZVFHMIN-NEXT:    [[TMP3:%.*]] = icmp ult <16 x i64> [[TMP2]], zeroinitializer
20864d2cSLuke Lau; ZVFHMIN-NEXT:    [[TMP4:%.*]] = insertelement <16 x i64> <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 poison, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>, i64 [[TMP1]], i32 6
20864d2cSLuke Lau; ZVFHMIN-NEXT:    [[TMP5:%.*]] = icmp ult <16 x i64> [[TMP4]], zeroinitializer
20864d2cSLuke Lau; ZVFHMIN-NEXT:    [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP3]])
20864d2cSLuke Lau; ZVFHMIN-NEXT:    [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP5]])
20864d2cSLuke Lau; ZVFHMIN-NEXT:    [[OP_RDX:%.*]] = or i1 [[TMP6]], [[TMP7]]
20864d2cSLuke Lau; ZVFHMIN-NEXT:    br i1 [[OP_RDX]], label [[TMP9:%.*]], label [[TMP8:%.*]]
20864d2cSLuke Lau; ZVFHMIN:       8:
20864d2cSLuke Lau; ZVFHMIN-NEXT:    ret void
20864d2cSLuke Lau; ZVFHMIN:       9:
20864d2cSLuke Lau; ZVFHMIN-NEXT:    ret void
20864d2cSLuke Lau;
514b38cdSPhilip Reames; ZVL128-LABEL: @reduce_or_2(
514b38cdSPhilip Reames; ZVL128-NEXT:    [[TMP1:%.*]] = shl i64 0, 0
514b38cdSPhilip Reames; ZVL128-NEXT:    [[TMP2:%.*]] = insertelement <16 x i64> <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 poison>, i64 [[TMP1]], i32 15
514b38cdSPhilip Reames; ZVL128-NEXT:    [[TMP3:%.*]] = icmp ult <16 x i64> [[TMP2]], zeroinitializer
514b38cdSPhilip Reames; ZVL128-NEXT:    [[TMP4:%.*]] = insertelement <16 x i64> <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 poison, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>, i64 [[TMP1]], i32 6
514b38cdSPhilip Reames; ZVL128-NEXT:    [[TMP5:%.*]] = icmp ult <16 x i64> [[TMP4]], zeroinitializer
514b38cdSPhilip Reames; ZVL128-NEXT:    [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP3]])
514b38cdSPhilip Reames; ZVL128-NEXT:    [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP5]])
514b38cdSPhilip Reames; ZVL128-NEXT:    [[OP_RDX:%.*]] = or i1 [[TMP6]], [[TMP7]]
514b38cdSPhilip Reames; ZVL128-NEXT:    br i1 [[OP_RDX]], label [[TMP9:%.*]], label [[TMP8:%.*]]
514b38cdSPhilip Reames; ZVL128:       8:
514b38cdSPhilip Reames; ZVL128-NEXT:    ret void
514b38cdSPhilip Reames; ZVL128:       9:
514b38cdSPhilip Reames; ZVL128-NEXT:    ret void
514b38cdSPhilip Reames;
514b38cdSPhilip Reames; ZVL256-LABEL: @reduce_or_2(
514b38cdSPhilip Reames; ZVL256-NEXT:    [[TMP1:%.*]] = shl i64 0, 0
514b38cdSPhilip Reames; ZVL256-NEXT:    [[TMP2:%.*]] = insertelement <16 x i64> <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 poison>, i64 [[TMP1]], i32 15
514b38cdSPhilip Reames; ZVL256-NEXT:    [[TMP3:%.*]] = icmp ult <16 x i64> [[TMP2]], zeroinitializer
514b38cdSPhilip Reames; ZVL256-NEXT:    [[TMP4:%.*]] = insertelement <16 x i64> <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 poison, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>, i64 [[TMP1]], i32 6
514b38cdSPhilip Reames; ZVL256-NEXT:    [[TMP5:%.*]] = icmp ult <16 x i64> [[TMP4]], zeroinitializer
514b38cdSPhilip Reames; ZVL256-NEXT:    [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP3]])
514b38cdSPhilip Reames; ZVL256-NEXT:    [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP5]])
514b38cdSPhilip Reames; ZVL256-NEXT:    [[OP_RDX:%.*]] = or i1 [[TMP6]], [[TMP7]]
514b38cdSPhilip Reames; ZVL256-NEXT:    br i1 [[OP_RDX]], label [[TMP9:%.*]], label [[TMP8:%.*]]
514b38cdSPhilip Reames; ZVL256:       8:
514b38cdSPhilip Reames; ZVL256-NEXT:    ret void
514b38cdSPhilip Reames; ZVL256:       9:
514b38cdSPhilip Reames; ZVL256-NEXT:    ret void
514b38cdSPhilip Reames;
514b38cdSPhilip Reames; ZVL512-LABEL: @reduce_or_2(
514b38cdSPhilip Reames; ZVL512-NEXT:    [[TMP1:%.*]] = shl i64 0, 0
514b38cdSPhilip Reames; ZVL512-NEXT:    [[TMP2:%.*]] = insertelement <32 x i64> <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 poison, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 poison, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>, i64 [[TMP1]], i32 15
514b38cdSPhilip Reames; ZVL512-NEXT:    [[TMP3:%.*]] = shufflevector <32 x i64> [[TMP2]], <32 x i64> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 15, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
514b38cdSPhilip Reames; ZVL512-NEXT:    [[TMP4:%.*]] = icmp ult <32 x i64> [[TMP3]], zeroinitializer
514b38cdSPhilip Reames; ZVL512-NEXT:    [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> [[TMP4]])
514b38cdSPhilip Reames; ZVL512-NEXT:    br i1 [[TMP5]], label [[TMP7:%.*]], label [[TMP6:%.*]]
514b38cdSPhilip Reames; ZVL512:       6:
514b38cdSPhilip Reames; ZVL512-NEXT:    ret void
514b38cdSPhilip Reames; ZVL512:       7:
514b38cdSPhilip Reames; ZVL512-NEXT:    ret void
9a82bda9Swangpc;
9a82bda9Swangpc  %1 = shl i64 0, 0
9a82bda9Swangpc  %2 = icmp ult i64 0, 0
9a82bda9Swangpc  %3 = icmp ult i64 0, 0
9a82bda9Swangpc  %4 = or i1 %2, %3
9a82bda9Swangpc  %5 = icmp ult i64 0, 0
9a82bda9Swangpc  %6 = or i1 %4, %5
9a82bda9Swangpc  %7 = icmp ult i64 0, 0
9a82bda9Swangpc  %8 = or i1 %6, %7
9a82bda9Swangpc  %9 = icmp ult i64 0, 0
9a82bda9Swangpc  %10 = or i1 %8, %9
9a82bda9Swangpc  %11 = icmp ult i64 0, 0
9a82bda9Swangpc  %12 = or i1 %10, %11
9a82bda9Swangpc  %13 = icmp ult i64 0, 0
9a82bda9Swangpc  %14 = or i1 %12, %13
9a82bda9Swangpc  %15 = icmp ult i64 0, 0
9a82bda9Swangpc  %16 = or i1 %14, %15
9a82bda9Swangpc  %17 = icmp ult i64 0, 0
9a82bda9Swangpc  %18 = or i1 %16, %17
9a82bda9Swangpc  %19 = icmp ult i64 0, 0
9a82bda9Swangpc  %20 = or i1 %18, %19
9a82bda9Swangpc  %21 = icmp ult i64 0, 0
9a82bda9Swangpc  %22 = or i1 %20, %21
9a82bda9Swangpc  %23 = icmp ult i64 0, 0
9a82bda9Swangpc  %24 = or i1 %22, %23
9a82bda9Swangpc  %25 = icmp ult i64 0, 0
9a82bda9Swangpc  %26 = or i1 %24, %25
9a82bda9Swangpc  %27 = icmp ult i64 0, 0
9a82bda9Swangpc  %28 = or i1 %26, %27
9a82bda9Swangpc  %29 = icmp ult i64 0, 0
9a82bda9Swangpc  %30 = or i1 %28, %29
9a82bda9Swangpc  %31 = icmp ult i64 %1, 0
9a82bda9Swangpc  %32 = or i1 %30, %31
9a82bda9Swangpc  %33 = icmp ult i64 0, 0
9a82bda9Swangpc  %34 = or i1 %32, %33
9a82bda9Swangpc  %35 = icmp ult i64 0, 0
9a82bda9Swangpc  %36 = or i1 %34, %35
9a82bda9Swangpc  %37 = icmp ult i64 0, 0
9a82bda9Swangpc  %38 = or i1 %36, %37
9a82bda9Swangpc  %39 = icmp ult i64 0, 0
9a82bda9Swangpc  %40 = or i1 %38, %39
9a82bda9Swangpc  %41 = icmp ult i64 0, 0
9a82bda9Swangpc  %42 = or i1 %40, %41
9a82bda9Swangpc  %43 = icmp ult i64 0, 0
9a82bda9Swangpc  %44 = or i1 %42, %43
9a82bda9Swangpc  %45 = icmp ult i64 %1, 0
9a82bda9Swangpc  %46 = or i1 %44, %45
9a82bda9Swangpc  %47 = icmp ult i64 0, 0
9a82bda9Swangpc  %48 = or i1 %46, %47
9a82bda9Swangpc  %49 = icmp ult i64 0, 0
9a82bda9Swangpc  %50 = or i1 %48, %49
9a82bda9Swangpc  %51 = icmp ult i64 0, 0
9a82bda9Swangpc  %52 = or i1 %50, %51
9a82bda9Swangpc  %53 = icmp ult i64 0, 0
9a82bda9Swangpc  %54 = or i1 %52, %53
9a82bda9Swangpc  %55 = icmp ult i64 0, 0
9a82bda9Swangpc  %56 = or i1 %54, %55
9a82bda9Swangpc  %57 = icmp ult i64 0, 0
9a82bda9Swangpc  %58 = or i1 %56, %57
9a82bda9Swangpc  %59 = icmp ult i64 0, 0
9a82bda9Swangpc  %60 = or i1 %58, %59
9a82bda9Swangpc  %61 = icmp ult i64 0, 0
9a82bda9Swangpc  %62 = or i1 %60, %61
9a82bda9Swangpc  %63 = icmp ult i64 0, 0
9a82bda9Swangpc  %64 = or i1 %62, %63
9a82bda9Swangpc  br i1 %64, label %66, label %65
9a82bda9Swangpc
9a82bda9Swangpc65:                                               ; preds = %0
9a82bda9Swangpc  ret void
9a82bda9Swangpc
9a82bda9Swangpc66:                                               ; preds = %0
9a82bda9Swangpc  ret void
9a82bda9Swangpc}
9a82bda9Swangpc
10625958SPhilip Reamesdefine i8 @reduce_xor(ptr %a, ptr %b) {
10625958SPhilip Reames; CHECK-LABEL: @reduce_xor(
10625958SPhilip Reames; CHECK-NEXT:  entry:
10625958SPhilip Reames; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_BUF:%.*]], ptr [[A:%.*]], i64 0, i32 0, i64 0
10625958SPhilip Reames; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_BUF]], ptr [[B:%.*]], i64 0, i32 0, i64 0
10625958SPhilip Reames; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 1
10625958SPhilip Reames; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[ARRAYIDX3]], align 1
10625958SPhilip Reames; CHECK-NEXT:    [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[TMP0]]
10625958SPhilip Reames; CHECK-NEXT:    [[TMP3:%.*]] = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> [[TMP2]])
10625958SPhilip Reames; CHECK-NEXT:    [[OP_RDX:%.*]] = xor i8 [[TMP3]], 1
10625958SPhilip Reames; CHECK-NEXT:    ret i8 [[OP_RDX]]
10625958SPhilip Reames;
10625958SPhilip Reamesentry:
10625958SPhilip Reames  %arrayidx = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 0
2d69827cSNikita Popov  %0 = load i8, ptr %arrayidx, align 1
10625958SPhilip Reames  %arrayidx3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 0
2d69827cSNikita Popov  %1 = load i8, ptr %arrayidx3, align 1
10625958SPhilip Reames  %and12 = and i8 %1, %0
10625958SPhilip Reames  %arrayidx.1 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 1
2d69827cSNikita Popov  %2 = load i8, ptr %arrayidx.1, align 1
10625958SPhilip Reames  %arrayidx3.1 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 1
2d69827cSNikita Popov  %3 = load i8, ptr %arrayidx3.1, align 1
10625958SPhilip Reames  %and12.1 = and i8 %3, %2
10625958SPhilip Reames  %4 = xor i8 %and12, %and12.1
10625958SPhilip Reames  %arrayidx.2 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 2
2d69827cSNikita Popov  %5 = load i8, ptr %arrayidx.2, align 1
10625958SPhilip Reames  %arrayidx3.2 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 2
2d69827cSNikita Popov  %6 = load i8, ptr %arrayidx3.2, align 1
10625958SPhilip Reames  %and12.2 = and i8 %6, %5
10625958SPhilip Reames  %7 = xor i8 %4, %and12.2
10625958SPhilip Reames  %arrayidx.3 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 3
2d69827cSNikita Popov  %8 = load i8, ptr %arrayidx.3, align 1
10625958SPhilip Reames  %arrayidx3.3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 3
2d69827cSNikita Popov  %9 = load i8, ptr %arrayidx3.3, align 1
10625958SPhilip Reames  %and12.3 = and i8 %9, %8
10625958SPhilip Reames  %10 = xor i8 %7, %and12.3
10625958SPhilip Reames  %arrayidx.4 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 4
2d69827cSNikita Popov  %11 = load i8, ptr %arrayidx.4, align 1
10625958SPhilip Reames  %arrayidx3.4 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 4
2d69827cSNikita Popov  %12 = load i8, ptr %arrayidx3.4, align 1
10625958SPhilip Reames  %and12.4 = and i8 %12, %11
10625958SPhilip Reames  %13 = xor i8 %10, %and12.4
10625958SPhilip Reames  %arrayidx.5 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 5
2d69827cSNikita Popov  %14 = load i8, ptr %arrayidx.5, align 1
10625958SPhilip Reames  %arrayidx3.5 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 5
2d69827cSNikita Popov  %15 = load i8, ptr %arrayidx3.5, align 1
10625958SPhilip Reames  %and12.5 = and i8 %15, %14
10625958SPhilip Reames  %16 = xor i8 %13, %and12.5
10625958SPhilip Reames  %arrayidx.6 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 6
2d69827cSNikita Popov  %17 = load i8, ptr %arrayidx.6, align 1
10625958SPhilip Reames  %arrayidx3.6 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 6
2d69827cSNikita Popov  %18 = load i8, ptr %arrayidx3.6, align 1
10625958SPhilip Reames  %and12.6 = and i8 %18, %17
10625958SPhilip Reames  %19 = xor i8 %16, %and12.6
10625958SPhilip Reames  %arrayidx.7 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 7
2d69827cSNikita Popov  %20 = load i8, ptr %arrayidx.7, align 1
10625958SPhilip Reames  %arrayidx3.7 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 7
2d69827cSNikita Popov  %21 = load i8, ptr %arrayidx3.7, align 1
10625958SPhilip Reames  %and12.7 = and i8 %21, %20
10625958SPhilip Reames  %22 = xor i8 %19, %and12.7
10625958SPhilip Reames  %xor13.7 = xor i8 %22, 1
10625958SPhilip Reames  ret i8 %xor13.7
10625958SPhilip Reames}
10625958SPhilip Reames
10625958SPhilip Reames
10625958SPhilip Reames
10625958SPhilip Reamesdefine i8 @reduce_add(ptr %a, ptr %b) {
10625958SPhilip Reames; CHECK-LABEL: @reduce_add(
10625958SPhilip Reames; CHECK-NEXT:  entry:
10625958SPhilip Reames; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_BUF:%.*]], ptr [[A:%.*]], i64 0, i32 0, i64 0
10625958SPhilip Reames; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_BUF]], ptr [[B:%.*]], i64 0, i32 0, i64 0
10625958SPhilip Reames; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 1
10625958SPhilip Reames; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[ARRAYIDX3]], align 1
10625958SPhilip Reames; CHECK-NEXT:    [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[TMP0]]
10625958SPhilip Reames; CHECK-NEXT:    [[TMP3:%.*]] = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> [[TMP2]])
10625958SPhilip Reames; CHECK-NEXT:    [[OP_RDX:%.*]] = add i8 [[TMP3]], 1
10625958SPhilip Reames; CHECK-NEXT:    ret i8 [[OP_RDX]]
10625958SPhilip Reames;
10625958SPhilip Reamesentry:
10625958SPhilip Reames  %arrayidx = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 0
2d69827cSNikita Popov  %0 = load i8, ptr %arrayidx, align 1
10625958SPhilip Reames  %arrayidx3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 0
2d69827cSNikita Popov  %1 = load i8, ptr %arrayidx3, align 1
10625958SPhilip Reames  %and12 = and i8 %1, %0
10625958SPhilip Reames  %arrayidx.1 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 1
2d69827cSNikita Popov  %2 = load i8, ptr %arrayidx.1, align 1
10625958SPhilip Reames  %arrayidx3.1 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 1
2d69827cSNikita Popov  %3 = load i8, ptr %arrayidx3.1, align 1
10625958SPhilip Reames  %and12.1 = and i8 %3, %2
10625958SPhilip Reames  %4 = add i8 %and12, %and12.1
10625958SPhilip Reames  %arrayidx.2 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 2
2d69827cSNikita Popov  %5 = load i8, ptr %arrayidx.2, align 1
10625958SPhilip Reames  %arrayidx3.2 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 2
2d69827cSNikita Popov  %6 = load i8, ptr %arrayidx3.2, align 1
10625958SPhilip Reames  %and12.2 = and i8 %6, %5
10625958SPhilip Reames  %7 = add i8 %4, %and12.2
10625958SPhilip Reames  %arrayidx.3 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 3
2d69827cSNikita Popov  %8 = load i8, ptr %arrayidx.3, align 1
10625958SPhilip Reames  %arrayidx3.3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 3
2d69827cSNikita Popov  %9 = load i8, ptr %arrayidx3.3, align 1
10625958SPhilip Reames  %and12.3 = and i8 %9, %8
10625958SPhilip Reames  %10 = add i8 %7, %and12.3
10625958SPhilip Reames  %arrayidx.4 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 4
2d69827cSNikita Popov  %11 = load i8, ptr %arrayidx.4, align 1
10625958SPhilip Reames  %arrayidx3.4 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 4
2d69827cSNikita Popov  %12 = load i8, ptr %arrayidx3.4, align 1
10625958SPhilip Reames  %and12.4 = and i8 %12, %11
10625958SPhilip Reames  %13 = add i8 %10, %and12.4
10625958SPhilip Reames  %arrayidx.5 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 5
2d69827cSNikita Popov  %14 = load i8, ptr %arrayidx.5, align 1
10625958SPhilip Reames  %arrayidx3.5 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 5
2d69827cSNikita Popov  %15 = load i8, ptr %arrayidx3.5, align 1
10625958SPhilip Reames  %and12.5 = and i8 %15, %14
10625958SPhilip Reames  %16 = add i8 %13, %and12.5
10625958SPhilip Reames  %arrayidx.6 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 6
2d69827cSNikita Popov  %17 = load i8, ptr %arrayidx.6, align 1
10625958SPhilip Reames  %arrayidx3.6 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 6
2d69827cSNikita Popov  %18 = load i8, ptr %arrayidx3.6, align 1
10625958SPhilip Reames  %and12.6 = and i8 %18, %17
10625958SPhilip Reames  %19 = add i8 %16, %and12.6
10625958SPhilip Reames  %arrayidx.7 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 7
2d69827cSNikita Popov  %20 = load i8, ptr %arrayidx.7, align 1
10625958SPhilip Reames  %arrayidx3.7 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 7
2d69827cSNikita Popov  %21 = load i8, ptr %arrayidx3.7, align 1
10625958SPhilip Reames  %and12.7 = and i8 %21, %20
10625958SPhilip Reames  %22 = add i8 %19, %and12.7
10625958SPhilip Reames  %add13.7 = add i8 %22, 1
10625958SPhilip Reames  ret i8 %add13.7
10625958SPhilip Reames}
10625958SPhilip Reames
72ce9d1cSBen Shideclare i8 @llvm.smin.i8(i8, i8)
72ce9d1cSBen Shi
72ce9d1cSBen Shidefine i8 @reduce_smin(ptr %a, ptr %b) {
72ce9d1cSBen Shi; CHECK-LABEL: @reduce_smin(
72ce9d1cSBen Shi; CHECK-NEXT:  entry:
72ce9d1cSBen Shi; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_BUF:%.*]], ptr [[A:%.*]], i64 0, i32 0, i64 0
72ce9d1cSBen Shi; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_BUF]], ptr [[B:%.*]], i64 0, i32 0, i64 0
72ce9d1cSBen Shi; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 1
72ce9d1cSBen Shi; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[ARRAYIDX3]], align 1
72ce9d1cSBen Shi; CHECK-NEXT:    [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[TMP0]]
72ce9d1cSBen Shi; CHECK-NEXT:    [[TMP3:%.*]] = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> [[TMP2]])
72ce9d1cSBen Shi; CHECK-NEXT:    ret i8 [[TMP3]]
72ce9d1cSBen Shi;
72ce9d1cSBen Shientry:
72ce9d1cSBen Shi  %arrayidx = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 0
2d69827cSNikita Popov  %0 = load i8, ptr %arrayidx, align 1
72ce9d1cSBen Shi  %arrayidx3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 0
2d69827cSNikita Popov  %1 = load i8, ptr %arrayidx3, align 1
72ce9d1cSBen Shi  %and12 = and i8 %1, %0
72ce9d1cSBen Shi  %arrayidx.1 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 1
2d69827cSNikita Popov  %2 = load i8, ptr %arrayidx.1, align 1
72ce9d1cSBen Shi  %arrayidx3.1 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 1
2d69827cSNikita Popov  %3 = load i8, ptr %arrayidx3.1, align 1
72ce9d1cSBen Shi  %and12.1 = and i8 %3, %2
72ce9d1cSBen Shi  %4 = tail call i8 @llvm.smin.i8(i8 %and12, i8 %and12.1)
72ce9d1cSBen Shi  %arrayidx.2 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 2
2d69827cSNikita Popov  %5 = load i8, ptr %arrayidx.2, align 1
72ce9d1cSBen Shi  %arrayidx3.2 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 2
2d69827cSNikita Popov  %6 = load i8, ptr %arrayidx3.2, align 1
72ce9d1cSBen Shi  %and12.2 = and i8 %6, %5
72ce9d1cSBen Shi  %7 = tail call i8 @llvm.smin.i8(i8 %4, i8 %and12.2)
72ce9d1cSBen Shi  %arrayidx.3 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 3
2d69827cSNikita Popov  %8 = load i8, ptr %arrayidx.3, align 1
72ce9d1cSBen Shi  %arrayidx3.3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 3
2d69827cSNikita Popov  %9 = load i8, ptr %arrayidx3.3, align 1
72ce9d1cSBen Shi  %and12.3 = and i8 %9, %8
72ce9d1cSBen Shi  %10 = tail call i8 @llvm.smin.i8(i8 %7, i8 %and12.3)
72ce9d1cSBen Shi  %arrayidx.4 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 4
2d69827cSNikita Popov  %11 = load i8, ptr %arrayidx.4, align 1
72ce9d1cSBen Shi  %arrayidx3.4 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 4
2d69827cSNikita Popov  %12 = load i8, ptr %arrayidx3.4, align 1
72ce9d1cSBen Shi  %and12.4 = and i8 %12, %11
72ce9d1cSBen Shi  %13 = tail call i8 @llvm.smin.i8(i8 %10, i8 %and12.4)
72ce9d1cSBen Shi  %arrayidx.5 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 5
2d69827cSNikita Popov  %14 = load i8, ptr %arrayidx.5, align 1
72ce9d1cSBen Shi  %arrayidx3.5 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 5
2d69827cSNikita Popov  %15 = load i8, ptr %arrayidx3.5, align 1
72ce9d1cSBen Shi  %and12.5 = and i8 %15, %14
72ce9d1cSBen Shi  %16 = tail call i8 @llvm.smin.i8(i8 %13, i8 %and12.5)
72ce9d1cSBen Shi  %arrayidx.6 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 6
2d69827cSNikita Popov  %17 = load i8, ptr %arrayidx.6, align 1
72ce9d1cSBen Shi  %arrayidx3.6 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 6
2d69827cSNikita Popov  %18 = load i8, ptr %arrayidx3.6, align 1
72ce9d1cSBen Shi  %and12.6 = and i8 %18, %17
72ce9d1cSBen Shi  %19 = tail call i8 @llvm.smin.i8(i8 %16, i8 %and12.6)
72ce9d1cSBen Shi  %arrayidx.7 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 7
2d69827cSNikita Popov  %20 = load i8, ptr %arrayidx.7, align 1
72ce9d1cSBen Shi  %arrayidx3.7 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 7
2d69827cSNikita Popov  %21 = load i8, ptr %arrayidx3.7, align 1
72ce9d1cSBen Shi  %and12.7 = and i8 %21, %20
72ce9d1cSBen Shi  %22 = tail call i8 @llvm.smin.i8(i8 %19, i8 %and12.7)
72ce9d1cSBen Shi  ret i8 %22
72ce9d1cSBen Shi}
72ce9d1cSBen Shi
72ce9d1cSBen Shideclare i8 @llvm.smax.i8(i8, i8)
72ce9d1cSBen Shi
72ce9d1cSBen Shidefine i8 @reduce_smax(ptr %a, ptr %b) {
72ce9d1cSBen Shi; CHECK-LABEL: @reduce_smax(
72ce9d1cSBen Shi; CHECK-NEXT:  entry:
72ce9d1cSBen Shi; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_BUF:%.*]], ptr [[A:%.*]], i64 0, i32 0, i64 0
72ce9d1cSBen Shi; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_BUF]], ptr [[B:%.*]], i64 0, i32 0, i64 0
72ce9d1cSBen Shi; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 1
72ce9d1cSBen Shi; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[ARRAYIDX3]], align 1
72ce9d1cSBen Shi; CHECK-NEXT:    [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[TMP0]]
72ce9d1cSBen Shi; CHECK-NEXT:    [[TMP3:%.*]] = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> [[TMP2]])
72ce9d1cSBen Shi; CHECK-NEXT:    ret i8 [[TMP3]]
72ce9d1cSBen Shi;
72ce9d1cSBen Shientry:
72ce9d1cSBen Shi  %arrayidx = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 0
2d69827cSNikita Popov  %0 = load i8, ptr %arrayidx, align 1
72ce9d1cSBen Shi  %arrayidx3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 0
2d69827cSNikita Popov  %1 = load i8, ptr %arrayidx3, align 1
72ce9d1cSBen Shi  %and12 = and i8 %1, %0
72ce9d1cSBen Shi  %arrayidx.1 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 1
2d69827cSNikita Popov  %2 = load i8, ptr %arrayidx.1, align 1
72ce9d1cSBen Shi  %arrayidx3.1 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 1
2d69827cSNikita Popov  %3 = load i8, ptr %arrayidx3.1, align 1
72ce9d1cSBen Shi  %and12.1 = and i8 %3, %2
72ce9d1cSBen Shi  %4 = tail call i8 @llvm.smax.i8(i8 %and12, i8 %and12.1)
72ce9d1cSBen Shi  %arrayidx.2 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 2
2d69827cSNikita Popov  %5 = load i8, ptr %arrayidx.2, align 1
72ce9d1cSBen Shi  %arrayidx3.2 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 2
2d69827cSNikita Popov  %6 = load i8, ptr %arrayidx3.2, align 1
72ce9d1cSBen Shi  %and12.2 = and i8 %6, %5
72ce9d1cSBen Shi  %7 = tail call i8 @llvm.smax.i8(i8 %4, i8 %and12.2)
72ce9d1cSBen Shi  %arrayidx.3 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 3
2d69827cSNikita Popov  %8 = load i8, ptr %arrayidx.3, align 1
72ce9d1cSBen Shi  %arrayidx3.3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 3
2d69827cSNikita Popov  %9 = load i8, ptr %arrayidx3.3, align 1
72ce9d1cSBen Shi  %and12.3 = and i8 %9, %8
72ce9d1cSBen Shi  %10 = tail call i8 @llvm.smax.i8(i8 %7, i8 %and12.3)
72ce9d1cSBen Shi  %arrayidx.4 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 4
2d69827cSNikita Popov  %11 = load i8, ptr %arrayidx.4, align 1
72ce9d1cSBen Shi  %arrayidx3.4 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 4
2d69827cSNikita Popov  %12 = load i8, ptr %arrayidx3.4, align 1
72ce9d1cSBen Shi  %and12.4 = and i8 %12, %11
72ce9d1cSBen Shi  %13 = tail call i8 @llvm.smax.i8(i8 %10, i8 %and12.4)
72ce9d1cSBen Shi  %arrayidx.5 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 5
2d69827cSNikita Popov  %14 = load i8, ptr %arrayidx.5, align 1
72ce9d1cSBen Shi  %arrayidx3.5 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 5
2d69827cSNikita Popov  %15 = load i8, ptr %arrayidx3.5, align 1
72ce9d1cSBen Shi  %and12.5 = and i8 %15, %14
72ce9d1cSBen Shi  %16 = tail call i8 @llvm.smax.i8(i8 %13, i8 %and12.5)
72ce9d1cSBen Shi  %arrayidx.6 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 6
2d69827cSNikita Popov  %17 = load i8, ptr %arrayidx.6, align 1
72ce9d1cSBen Shi  %arrayidx3.6 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 6
2d69827cSNikita Popov  %18 = load i8, ptr %arrayidx3.6, align 1
72ce9d1cSBen Shi  %and12.6 = and i8 %18, %17
72ce9d1cSBen Shi  %19 = tail call i8 @llvm.smax.i8(i8 %16, i8 %and12.6)
72ce9d1cSBen Shi  %arrayidx.7 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 7
2d69827cSNikita Popov  %20 = load i8, ptr %arrayidx.7, align 1
72ce9d1cSBen Shi  %arrayidx3.7 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 7
2d69827cSNikita Popov  %21 = load i8, ptr %arrayidx3.7, align 1
72ce9d1cSBen Shi  %and12.7 = and i8 %21, %20
72ce9d1cSBen Shi  %22 = tail call i8 @llvm.smax.i8(i8 %19, i8 %and12.7)
72ce9d1cSBen Shi  ret i8 %22
72ce9d1cSBen Shi}
72ce9d1cSBen Shi
72ce9d1cSBen Shideclare i8 @llvm.umax.i8(i8, i8)
72ce9d1cSBen Shi
72ce9d1cSBen Shidefine i8 @reduce_umax(ptr %a, ptr %b) {
72ce9d1cSBen Shi; CHECK-LABEL: @reduce_umax(
72ce9d1cSBen Shi; CHECK-NEXT:  entry:
72ce9d1cSBen Shi; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_BUF:%.*]], ptr [[A:%.*]], i64 0, i32 0, i64 0
72ce9d1cSBen Shi; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_BUF]], ptr [[B:%.*]], i64 0, i32 0, i64 0
72ce9d1cSBen Shi; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 1
72ce9d1cSBen Shi; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[ARRAYIDX3]], align 1
72ce9d1cSBen Shi; CHECK-NEXT:    [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[TMP0]]
72ce9d1cSBen Shi; CHECK-NEXT:    [[TMP3:%.*]] = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> [[TMP2]])
72ce9d1cSBen Shi; CHECK-NEXT:    ret i8 [[TMP3]]
72ce9d1cSBen Shi;
72ce9d1cSBen Shientry:
72ce9d1cSBen Shi  %arrayidx = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 0
2d69827cSNikita Popov  %0 = load i8, ptr %arrayidx, align 1
72ce9d1cSBen Shi  %arrayidx3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 0
2d69827cSNikita Popov  %1 = load i8, ptr %arrayidx3, align 1
72ce9d1cSBen Shi  %and12 = and i8 %1, %0
72ce9d1cSBen Shi  %arrayidx.1 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 1
2d69827cSNikita Popov  %2 = load i8, ptr %arrayidx.1, align 1
72ce9d1cSBen Shi  %arrayidx3.1 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 1
2d69827cSNikita Popov  %3 = load i8, ptr %arrayidx3.1, align 1
72ce9d1cSBen Shi  %and12.1 = and i8 %3, %2
72ce9d1cSBen Shi  %4 = tail call i8 @llvm.umax.i8(i8 %and12, i8 %and12.1)
72ce9d1cSBen Shi  %arrayidx.2 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 2
2d69827cSNikita Popov  %5 = load i8, ptr %arrayidx.2, align 1
72ce9d1cSBen Shi  %arrayidx3.2 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 2
2d69827cSNikita Popov  %6 = load i8, ptr %arrayidx3.2, align 1
72ce9d1cSBen Shi  %and12.2 = and i8 %6, %5
72ce9d1cSBen Shi  %7 = tail call i8 @llvm.umax.i8(i8 %4, i8 %and12.2)
72ce9d1cSBen Shi  %arrayidx.3 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 3
2d69827cSNikita Popov  %8 = load i8, ptr %arrayidx.3, align 1
72ce9d1cSBen Shi  %arrayidx3.3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 3
2d69827cSNikita Popov  %9 = load i8, ptr %arrayidx3.3, align 1
72ce9d1cSBen Shi  %and12.3 = and i8 %9, %8
72ce9d1cSBen Shi  %10 = tail call i8 @llvm.umax.i8(i8 %7, i8 %and12.3)
72ce9d1cSBen Shi  %arrayidx.4 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 4
2d69827cSNikita Popov  %11 = load i8, ptr %arrayidx.4, align 1
72ce9d1cSBen Shi  %arrayidx3.4 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 4
2d69827cSNikita Popov  %12 = load i8, ptr %arrayidx3.4, align 1
72ce9d1cSBen Shi  %and12.4 = and i8 %12, %11
72ce9d1cSBen Shi  %13 = tail call i8 @llvm.umax.i8(i8 %10, i8 %and12.4)
72ce9d1cSBen Shi  %arrayidx.5 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 5
2d69827cSNikita Popov  %14 = load i8, ptr %arrayidx.5, align 1
72ce9d1cSBen Shi  %arrayidx3.5 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 5
2d69827cSNikita Popov  %15 = load i8, ptr %arrayidx3.5, align 1
72ce9d1cSBen Shi  %and12.5 = and i8 %15, %14
72ce9d1cSBen Shi  %16 = tail call i8 @llvm.umax.i8(i8 %13, i8 %and12.5)
72ce9d1cSBen Shi  %arrayidx.6 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 6
2d69827cSNikita Popov  %17 = load i8, ptr %arrayidx.6, align 1
72ce9d1cSBen Shi  %arrayidx3.6 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 6
2d69827cSNikita Popov  %18 = load i8, ptr %arrayidx3.6, align 1
72ce9d1cSBen Shi  %and12.6 = and i8 %18, %17
72ce9d1cSBen Shi  %19 = tail call i8 @llvm.umax.i8(i8 %16, i8 %and12.6)
72ce9d1cSBen Shi  %arrayidx.7 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 7
2d69827cSNikita Popov  %20 = load i8, ptr %arrayidx.7, align 1
72ce9d1cSBen Shi  %arrayidx3.7 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 7
2d69827cSNikita Popov  %21 = load i8, ptr %arrayidx3.7, align 1
72ce9d1cSBen Shi  %and12.7 = and i8 %21, %20
72ce9d1cSBen Shi  %22 = tail call i8 @llvm.umax.i8(i8 %19, i8 %and12.7)
72ce9d1cSBen Shi  ret i8 %22
72ce9d1cSBen Shi}
72ce9d1cSBen Shi
72ce9d1cSBen Shideclare i8 @llvm.umin.i8(i8, i8)
72ce9d1cSBen Shi
72ce9d1cSBen Shidefine i8 @reduce_umin(ptr %a, ptr %b) {
72ce9d1cSBen Shi; CHECK-LABEL: @reduce_umin(
72ce9d1cSBen Shi; CHECK-NEXT:  entry:
72ce9d1cSBen Shi; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_BUF:%.*]], ptr [[A:%.*]], i64 0, i32 0, i64 0
72ce9d1cSBen Shi; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_BUF]], ptr [[B:%.*]], i64 0, i32 0, i64 0
72ce9d1cSBen Shi; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 1
72ce9d1cSBen Shi; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[ARRAYIDX3]], align 1
72ce9d1cSBen Shi; CHECK-NEXT:    [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[TMP0]]
72ce9d1cSBen Shi; CHECK-NEXT:    [[TMP3:%.*]] = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> [[TMP2]])
72ce9d1cSBen Shi; CHECK-NEXT:    ret i8 [[TMP3]]
72ce9d1cSBen Shi;
72ce9d1cSBen Shientry:
72ce9d1cSBen Shi  %arrayidx = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 0
2d69827cSNikita Popov  %0 = load i8, ptr %arrayidx, align 1
72ce9d1cSBen Shi  %arrayidx3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 0
2d69827cSNikita Popov  %1 = load i8, ptr %arrayidx3, align 1
72ce9d1cSBen Shi  %and12 = and i8 %1, %0
72ce9d1cSBen Shi  %arrayidx.1 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 1
2d69827cSNikita Popov  %2 = load i8, ptr %arrayidx.1, align 1
72ce9d1cSBen Shi  %arrayidx3.1 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 1
2d69827cSNikita Popov  %3 = load i8, ptr %arrayidx3.1, align 1
72ce9d1cSBen Shi  %and12.1 = and i8 %3, %2
72ce9d1cSBen Shi  %4 = tail call i8 @llvm.umin.i8(i8 %and12, i8 %and12.1)
72ce9d1cSBen Shi  %arrayidx.2 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 2
2d69827cSNikita Popov  %5 = load i8, ptr %arrayidx.2, align 1
72ce9d1cSBen Shi  %arrayidx3.2 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 2
2d69827cSNikita Popov  %6 = load i8, ptr %arrayidx3.2, align 1
72ce9d1cSBen Shi  %and12.2 = and i8 %6, %5
72ce9d1cSBen Shi  %7 = tail call i8 @llvm.umin.i8(i8 %4, i8 %and12.2)
72ce9d1cSBen Shi  %arrayidx.3 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 3
2d69827cSNikita Popov  %8 = load i8, ptr %arrayidx.3, align 1
72ce9d1cSBen Shi  %arrayidx3.3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 3
2d69827cSNikita Popov  %9 = load i8, ptr %arrayidx3.3, align 1
72ce9d1cSBen Shi  %and12.3 = and i8 %9, %8
72ce9d1cSBen Shi  %10 = tail call i8 @llvm.umin.i8(i8 %7, i8 %and12.3)
72ce9d1cSBen Shi  %arrayidx.4 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 4
2d69827cSNikita Popov  %11 = load i8, ptr %arrayidx.4, align 1
72ce9d1cSBen Shi  %arrayidx3.4 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 4
2d69827cSNikita Popov  %12 = load i8, ptr %arrayidx3.4, align 1
72ce9d1cSBen Shi  %and12.4 = and i8 %12, %11
72ce9d1cSBen Shi  %13 = tail call i8 @llvm.umin.i8(i8 %10, i8 %and12.4)
72ce9d1cSBen Shi  %arrayidx.5 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 5
2d69827cSNikita Popov  %14 = load i8, ptr %arrayidx.5, align 1
72ce9d1cSBen Shi  %arrayidx3.5 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 5
2d69827cSNikita Popov  %15 = load i8, ptr %arrayidx3.5, align 1
72ce9d1cSBen Shi  %and12.5 = and i8 %15, %14
72ce9d1cSBen Shi  %16 = tail call i8 @llvm.umin.i8(i8 %13, i8 %and12.5)
72ce9d1cSBen Shi  %arrayidx.6 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 6
2d69827cSNikita Popov  %17 = load i8, ptr %arrayidx.6, align 1
72ce9d1cSBen Shi  %arrayidx3.6 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 6
2d69827cSNikita Popov  %18 = load i8, ptr %arrayidx3.6, align 1
72ce9d1cSBen Shi  %and12.6 = and i8 %18, %17
72ce9d1cSBen Shi  %19 = tail call i8 @llvm.umin.i8(i8 %16, i8 %and12.6)
72ce9d1cSBen Shi  %arrayidx.7 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 7
2d69827cSNikita Popov  %20 = load i8, ptr %arrayidx.7, align 1
72ce9d1cSBen Shi  %arrayidx3.7 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 7
2d69827cSNikita Popov  %21 = load i8, ptr %arrayidx3.7, align 1
72ce9d1cSBen Shi  %and12.7 = and i8 %21, %20
72ce9d1cSBen Shi  %22 = tail call i8 @llvm.umin.i8(i8 %19, i8 %and12.7)
72ce9d1cSBen Shi  ret i8 %22
72ce9d1cSBen Shi}
10625958SPhilip Reames
10625958SPhilip Reames; Next batch exercise reductions involing zext of narrower loads
10625958SPhilip Reames
10625958SPhilip Reamesdefine i64 @red_zext_ld_2xi64(ptr %ptr) {
10625958SPhilip Reames; CHECK-LABEL: @red_zext_ld_2xi64(
10625958SPhilip Reames; CHECK-NEXT:  entry:
10625958SPhilip Reames; CHECK-NEXT:    [[LD0:%.*]] = load i8, ptr [[PTR:%.*]], align 1
10625958SPhilip Reames; CHECK-NEXT:    [[ZEXT:%.*]] = zext i8 [[LD0]] to i64
10625958SPhilip Reames; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1
10625958SPhilip Reames; CHECK-NEXT:    [[LD1:%.*]] = load i8, ptr [[GEP]], align 1
10625958SPhilip Reames; CHECK-NEXT:    [[ZEXT_1:%.*]] = zext i8 [[LD1]] to i64
10625958SPhilip Reames; CHECK-NEXT:    [[ADD_1:%.*]] = add nuw nsw i64 [[ZEXT]], [[ZEXT_1]]
10625958SPhilip Reames; CHECK-NEXT:    ret i64 [[ADD_1]]
10625958SPhilip Reames;
10625958SPhilip Reamesentry:
10625958SPhilip Reames  %ld0 = load i8, ptr %ptr
10625958SPhilip Reames  %zext = zext i8 %ld0 to i64
10625958SPhilip Reames  %gep = getelementptr inbounds i8, ptr %ptr, i64 1
10625958SPhilip Reames  %ld1 = load i8, ptr %gep
10625958SPhilip Reames  %zext.1 = zext i8 %ld1 to i64
10625958SPhilip Reames  %add.1 = add nuw nsw i64 %zext, %zext.1
10625958SPhilip Reames  ret i64 %add.1
10625958SPhilip Reames}
10625958SPhilip Reames
10625958SPhilip Reamesdefine i64 @red_zext_ld_4xi64(ptr %ptr) {
10625958SPhilip Reames; CHECK-LABEL: @red_zext_ld_4xi64(
10625958SPhilip Reames; CHECK-NEXT:  entry:
*7523086aSAlexey Bataev; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i8>, ptr [[PTR:%.*]], align 1
*7523086aSAlexey Bataev; CHECK-NEXT:    [[TMP1:%.*]] = zext <4 x i8> [[TMP0]] to <4 x i16>
*7523086aSAlexey Bataev; CHECK-NEXT:    [[TMP2:%.*]] = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> [[TMP1]])
*7523086aSAlexey Bataev; CHECK-NEXT:    [[ADD_3:%.*]] = zext i16 [[TMP2]] to i64
a9888211SAlexey Bataev; CHECK-NEXT:    ret i64 [[ADD_3]]
10625958SPhilip Reames;
10625958SPhilip Reamesentry:
10625958SPhilip Reames  %ld0 = load i8, ptr %ptr
10625958SPhilip Reames  %zext = zext i8 %ld0 to i64
10625958SPhilip Reames  %gep = getelementptr inbounds i8, ptr %ptr, i64 1
10625958SPhilip Reames  %ld1 = load i8, ptr %gep
10625958SPhilip Reames  %zext.1 = zext i8 %ld1 to i64
10625958SPhilip Reames  %add.1 = add nuw nsw i64 %zext, %zext.1
10625958SPhilip Reames  %gep.1 = getelementptr inbounds i8, ptr %ptr, i64 2
10625958SPhilip Reames  %ld2 = load i8, ptr %gep.1
10625958SPhilip Reames  %zext.2 = zext i8 %ld2 to i64
10625958SPhilip Reames  %add.2 = add nuw nsw i64 %add.1, %zext.2
10625958SPhilip Reames  %gep.2 = getelementptr inbounds i8, ptr %ptr, i64 3
10625958SPhilip Reames  %ld3 = load i8, ptr %gep.2
10625958SPhilip Reames  %zext.3 = zext i8 %ld3 to i64
10625958SPhilip Reames  %add.3 = add nuw nsw i64 %add.2, %zext.3
10625958SPhilip Reames  ret i64 %add.3
10625958SPhilip Reames}
10625958SPhilip Reames
10625958SPhilip Reamesdefine i64 @red_zext_ld_8xi64(ptr %ptr) {
10625958SPhilip Reames; CHECK-LABEL: @red_zext_ld_8xi64(
10625958SPhilip Reames; CHECK-NEXT:  entry:
10625958SPhilip Reames; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i8>, ptr [[PTR:%.*]], align 1
10625958SPhilip Reames; CHECK-NEXT:    [[TMP1:%.*]] = zext <8 x i8> [[TMP0]] to <8 x i64>
10625958SPhilip Reames; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP1]])
10625958SPhilip Reames; CHECK-NEXT:    ret i64 [[TMP2]]
10625958SPhilip Reames;
10625958SPhilip Reamesentry:
10625958SPhilip Reames  %ld0 = load i8, ptr %ptr
10625958SPhilip Reames  %zext = zext i8 %ld0 to i64
10625958SPhilip Reames  %gep = getelementptr inbounds i8, ptr %ptr, i64 1
10625958SPhilip Reames  %ld1 = load i8, ptr %gep
10625958SPhilip Reames  %zext.1 = zext i8 %ld1 to i64
10625958SPhilip Reames  %add.1 = add nuw nsw i64 %zext, %zext.1
10625958SPhilip Reames  %gep.1 = getelementptr inbounds i8, ptr %ptr, i64 2
10625958SPhilip Reames  %ld2 = load i8, ptr %gep.1
10625958SPhilip Reames  %zext.2 = zext i8 %ld2 to i64
10625958SPhilip Reames  %add.2 = add nuw nsw i64 %add.1, %zext.2
10625958SPhilip Reames  %gep.2 = getelementptr inbounds i8, ptr %ptr, i64 3
10625958SPhilip Reames  %ld3 = load i8, ptr %gep.2
10625958SPhilip Reames  %zext.3 = zext i8 %ld3 to i64
10625958SPhilip Reames  %add.3 = add nuw nsw i64 %add.2, %zext.3
10625958SPhilip Reames  %gep.3 = getelementptr inbounds i8, ptr %ptr, i64 4
10625958SPhilip Reames  %ld4 = load i8, ptr %gep.3
10625958SPhilip Reames  %zext.4 = zext i8 %ld4 to i64
10625958SPhilip Reames  %add.4 = add nuw nsw i64 %add.3, %zext.4
10625958SPhilip Reames  %gep.4 = getelementptr inbounds i8, ptr %ptr, i64 5
10625958SPhilip Reames  %ld5 = load i8, ptr %gep.4
10625958SPhilip Reames  %zext.5 = zext i8 %ld5 to i64
10625958SPhilip Reames  %add.5 = add nuw nsw i64 %add.4, %zext.5
10625958SPhilip Reames  %gep.5 = getelementptr inbounds i8, ptr %ptr, i64 6
10625958SPhilip Reames  %ld6 = load i8, ptr %gep.5
10625958SPhilip Reames  %zext.6 = zext i8 %ld6 to i64
10625958SPhilip Reames  %add.6 = add nuw nsw i64 %add.5, %zext.6
10625958SPhilip Reames  %gep.6 = getelementptr inbounds i8, ptr %ptr, i64 7
10625958SPhilip Reames  %ld7 = load i8, ptr %gep.6
10625958SPhilip Reames  %zext.7 = zext i8 %ld7 to i64
10625958SPhilip Reames  %add.7 = add nuw nsw i64 %add.6, %zext.7
10625958SPhilip Reames  ret i64 %add.7
10625958SPhilip Reames}
10625958SPhilip Reames
10625958SPhilip Reamesdefine i64 @red_zext_ld_16xi64(ptr %ptr) {
10625958SPhilip Reames; CHECK-LABEL: @red_zext_ld_16xi64(
10625958SPhilip Reames; CHECK-NEXT:  entry:
10625958SPhilip Reames; CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr [[PTR:%.*]], align 1
10625958SPhilip Reames; CHECK-NEXT:    [[TMP1:%.*]] = zext <16 x i8> [[TMP0]] to <16 x i64>
10625958SPhilip Reames; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> [[TMP1]])
10625958SPhilip Reames; CHECK-NEXT:    ret i64 [[TMP2]]
10625958SPhilip Reames;
10625958SPhilip Reamesentry:
10625958SPhilip Reames  %ld0 = load i8, ptr %ptr
10625958SPhilip Reames  %zext = zext i8 %ld0 to i64
10625958SPhilip Reames  %gep = getelementptr inbounds i8, ptr %ptr, i64 1
10625958SPhilip Reames  %ld1 = load i8, ptr %gep
10625958SPhilip Reames  %zext.1 = zext i8 %ld1 to i64
10625958SPhilip Reames  %add.1 = add nuw nsw i64 %zext, %zext.1
10625958SPhilip Reames  %gep.1 = getelementptr inbounds i8, ptr %ptr, i64 2
10625958SPhilip Reames  %ld2 = load i8, ptr %gep.1
10625958SPhilip Reames  %zext.2 = zext i8 %ld2 to i64
10625958SPhilip Reames  %add.2 = add nuw nsw i64 %add.1, %zext.2
10625958SPhilip Reames  %gep.2 = getelementptr inbounds i8, ptr %ptr, i64 3
10625958SPhilip Reames  %ld3 = load i8, ptr %gep.2
10625958SPhilip Reames  %zext.3 = zext i8 %ld3 to i64
10625958SPhilip Reames  %add.3 = add nuw nsw i64 %add.2, %zext.3
10625958SPhilip Reames  %gep.3 = getelementptr inbounds i8, ptr %ptr, i64 4
10625958SPhilip Reames  %ld4 = load i8, ptr %gep.3
10625958SPhilip Reames  %zext.4 = zext i8 %ld4 to i64
10625958SPhilip Reames  %add.4 = add nuw nsw i64 %add.3, %zext.4
10625958SPhilip Reames  %gep.4 = getelementptr inbounds i8, ptr %ptr, i64 5
10625958SPhilip Reames  %ld5 = load i8, ptr %gep.4
10625958SPhilip Reames  %zext.5 = zext i8 %ld5 to i64
10625958SPhilip Reames  %add.5 = add nuw nsw i64 %add.4, %zext.5
10625958SPhilip Reames  %gep.5 = getelementptr inbounds i8, ptr %ptr, i64 6
10625958SPhilip Reames  %ld6 = load i8, ptr %gep.5
10625958SPhilip Reames  %zext.6 = zext i8 %ld6 to i64
10625958SPhilip Reames  %add.6 = add nuw nsw i64 %add.5, %zext.6
10625958SPhilip Reames  %gep.6 = getelementptr inbounds i8, ptr %ptr, i64 7
10625958SPhilip Reames  %ld7 = load i8, ptr %gep.6
10625958SPhilip Reames  %zext.7 = zext i8 %ld7 to i64
10625958SPhilip Reames  %add.7 = add nuw nsw i64 %add.6, %zext.7
10625958SPhilip Reames  %gep.7 = getelementptr inbounds i8, ptr %ptr, i64 8
10625958SPhilip Reames  %ld8 = load i8, ptr %gep.7
10625958SPhilip Reames  %zext.8 = zext i8 %ld8 to i64
10625958SPhilip Reames  %add.8 = add nuw nsw i64 %add.7, %zext.8
10625958SPhilip Reames  %gep.8 = getelementptr inbounds i8, ptr %ptr, i64 9
10625958SPhilip Reames  %ld9 = load i8, ptr %gep.8
10625958SPhilip Reames  %zext.9 = zext i8 %ld9 to i64
10625958SPhilip Reames  %add.9 = add nuw nsw i64 %add.8, %zext.9
10625958SPhilip Reames  %gep.9 = getelementptr inbounds i8, ptr %ptr, i64 10
10625958SPhilip Reames  %ld10 = load i8, ptr %gep.9
10625958SPhilip Reames  %zext.10 = zext i8 %ld10 to i64
10625958SPhilip Reames  %add.10 = add nuw nsw i64 %add.9, %zext.10
10625958SPhilip Reames  %gep.10 = getelementptr inbounds i8, ptr %ptr, i64 11
10625958SPhilip Reames  %ld11 = load i8, ptr %gep.10
10625958SPhilip Reames  %zext.11 = zext i8 %ld11 to i64
10625958SPhilip Reames  %add.11 = add nuw nsw i64 %add.10, %zext.11
10625958SPhilip Reames  %gep.11 = getelementptr inbounds i8, ptr %ptr, i64 12
10625958SPhilip Reames  %ld12 = load i8, ptr %gep.11
10625958SPhilip Reames  %zext.12 = zext i8 %ld12 to i64
10625958SPhilip Reames  %add.12 = add nuw nsw i64 %add.11, %zext.12
10625958SPhilip Reames  %gep.12 = getelementptr inbounds i8, ptr %ptr, i64 13
10625958SPhilip Reames  %ld13 = load i8, ptr %gep.12
10625958SPhilip Reames  %zext.13 = zext i8 %ld13 to i64
10625958SPhilip Reames  %add.13 = add nuw nsw i64 %add.12, %zext.13
10625958SPhilip Reames  %gep.13 = getelementptr inbounds i8, ptr %ptr, i64 14
10625958SPhilip Reames  %ld14 = load i8, ptr %gep.13
10625958SPhilip Reames  %zext.14 = zext i8 %ld14 to i64
10625958SPhilip Reames  %add.14 = add nuw nsw i64 %add.13, %zext.14
10625958SPhilip Reames  %gep.14 = getelementptr inbounds i8, ptr %ptr, i64 15
10625958SPhilip Reames  %ld15 = load i8, ptr %gep.14
10625958SPhilip Reames  %zext.15 = zext i8 %ld15 to i64
10625958SPhilip Reames  %add.15 = add nuw nsw i64 %add.14, %zext.15
10625958SPhilip Reames  ret i64 %add.15
10625958SPhilip Reames}
10625958SPhilip Reames
e69f8bacSLuke Laudeclare i32 @llvm.abs.i32(i32, i1)
10625958SPhilip Reames
e69f8bacSLuke Laudefine i32 @stride_sum_abs_diff(ptr %p, ptr %q, i64 %stride) {
e69f8bacSLuke Lau; CHECK-LABEL: @stride_sum_abs_diff(
f3d2609aSAlexey Bataev; CHECK-NEXT:    [[P_2:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[STRIDE:%.*]]
f3d2609aSAlexey Bataev; CHECK-NEXT:    [[Q_2:%.*]] = getelementptr inbounds i32, ptr [[Q:%.*]], i64 [[STRIDE]]
f3d2609aSAlexey Bataev; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[P]], align 4
f3d2609aSAlexey Bataev; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr [[Q]], align 4
f23ea4cbSLuke Lau; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i32>, ptr [[P_2]], align 4
f23ea4cbSLuke Lau; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i32>, ptr [[Q_2]], align 4
f3d2609aSAlexey Bataev; CHECK-NEXT:    [[TMP5:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> poison, <2 x i32> [[TMP1]], i64 0)
f3d2609aSAlexey Bataev; CHECK-NEXT:    [[TMP6:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP5]], <2 x i32> [[TMP3]], i64 2)
f3d2609aSAlexey Bataev; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> poison, <2 x i32> [[TMP2]], i64 0)
f3d2609aSAlexey Bataev; CHECK-NEXT:    [[TMP8:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP7]], <2 x i32> [[TMP4]], i64 2)
f3d2609aSAlexey Bataev; CHECK-NEXT:    [[TMP9:%.*]] = sub <4 x i32> [[TMP6]], [[TMP8]]
f3d2609aSAlexey Bataev; CHECK-NEXT:    [[TMP10:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[TMP9]], i1 true)
f3d2609aSAlexey Bataev; CHECK-NEXT:    [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP10]])
f3d2609aSAlexey Bataev; CHECK-NEXT:    ret i32 [[TMP11]]
e69f8bacSLuke Lau;
e69f8bacSLuke Lau  %x.0 = load i32, ptr %p
e69f8bacSLuke Lau  %y.0 = load i32, ptr %q
e69f8bacSLuke Lau  %sub.0 = sub i32 %x.0, %y.0
e69f8bacSLuke Lau  %abs.0 = tail call i32 @llvm.abs.i32(i32 %sub.0, i1 true)
e69f8bacSLuke Lau
e69f8bacSLuke Lau  %p.1 = getelementptr inbounds i32, ptr %p, i64 1
e69f8bacSLuke Lau  %x.1 = load i32, ptr %p.1
e69f8bacSLuke Lau  %q.1 = getelementptr inbounds i32, ptr %q, i64 1
e69f8bacSLuke Lau  %y.1 = load i32, ptr %q.1
e69f8bacSLuke Lau  %sub.1 = sub i32 %x.1, %y.1
e69f8bacSLuke Lau  %abs.1 = tail call i32 @llvm.abs.i32(i32 %sub.1, i1 true)
e69f8bacSLuke Lau  %sum.0 = add i32 %abs.0, %abs.1
e69f8bacSLuke Lau
e69f8bacSLuke Lau  %p.2 = getelementptr inbounds i32, ptr %p, i64 %stride
e69f8bacSLuke Lau  %q.2 = getelementptr inbounds i32, ptr %q, i64 %stride
e69f8bacSLuke Lau
e69f8bacSLuke Lau  %x.2 = load i32, ptr %p.2
e69f8bacSLuke Lau  %y.2 = load i32, ptr %q.2
e69f8bacSLuke Lau  %sub.2 = sub i32 %x.2, %y.2
e69f8bacSLuke Lau  %abs.2 = tail call i32 @llvm.abs.i32(i32 %sub.2, i1 true)
e69f8bacSLuke Lau  %sum.1 = add i32 %sum.0, %abs.2
e69f8bacSLuke Lau
e69f8bacSLuke Lau  %p.3 = getelementptr inbounds i32, ptr %p.2, i64 1
e69f8bacSLuke Lau  %x.3 = load i32, ptr %p.3
e69f8bacSLuke Lau  %q.3 = getelementptr inbounds i32, ptr %q.2, i64 1
e69f8bacSLuke Lau  %y.3 = load i32, ptr %q.3
e69f8bacSLuke Lau  %sub.3 = sub i32 %x.3, %y.3
e69f8bacSLuke Lau  %abs.3 = tail call i32 @llvm.abs.i32(i32 %sub.3, i1 true)
e69f8bacSLuke Lau  %sum.2 = add i32 %sum.1, %abs.3
e69f8bacSLuke Lau
e69f8bacSLuke Lau  ret i32 %sum.2
e69f8bacSLuke Lau}
1c9094a2SLuke Lau
1c9094a2SLuke Laudefine i32 @reduce_sum_2arrays_a(ptr noalias %p, ptr noalias %q) {
1c9094a2SLuke Lau; CHECK-LABEL: @reduce_sum_2arrays_a(
1c9094a2SLuke Lau; CHECK-NEXT:  entry:
f23ea4cbSLuke Lau; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i8>, ptr [[P:%.*]], align 1
f23ea4cbSLuke Lau; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr [[Q:%.*]], align 1
f3d2609aSAlexey Bataev; CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i8> @llvm.vector.insert.v8i8.v4i8(<8 x i8> poison, <4 x i8> [[TMP0]], i64 0)
f3d2609aSAlexey Bataev; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.vector.insert.v8i8.v4i8(<8 x i8> [[TMP2]], <4 x i8> [[TMP1]], i64 4)
f3d2609aSAlexey Bataev; CHECK-NEXT:    [[TMP4:%.*]] = zext <8 x i8> [[TMP3]] to <8 x i32>
f3d2609aSAlexey Bataev; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP4]])
f3d2609aSAlexey Bataev; CHECK-NEXT:    ret i32 [[TMP5]]
1c9094a2SLuke Lau;
1c9094a2SLuke Lauentry:
1c9094a2SLuke Lau  %x.0 = load i8, ptr %p, align 1
1c9094a2SLuke Lau  %conv = zext i8 %x.0 to i32
1c9094a2SLuke Lau  %y.0 = load i8, ptr %q, align 1
1c9094a2SLuke Lau  %conv3 = zext i8 %y.0 to i32
1c9094a2SLuke Lau  %add4 = add nuw nsw i32 %conv, %conv3
1c9094a2SLuke Lau
1c9094a2SLuke Lau  %arrayidx.1 = getelementptr inbounds i8, ptr %p, i64 1
1c9094a2SLuke Lau  %x.1 = load i8, ptr %arrayidx.1, align 1
1c9094a2SLuke Lau  %conv.1 = zext i8 %x.1 to i32
1c9094a2SLuke Lau  %arrayidx2.1 = getelementptr inbounds i8, ptr %q, i64 1
1c9094a2SLuke Lau  %y.1 = load i8, ptr %arrayidx2.1, align 1
1c9094a2SLuke Lau  %conv3.1 = zext i8 %y.1 to i32
1c9094a2SLuke Lau  %add.1 = add nuw nsw i32 %add4, %conv.1
1c9094a2SLuke Lau  %add4.1 = add nuw nsw i32 %add.1, %conv3.1
1c9094a2SLuke Lau
1c9094a2SLuke Lau  %arrayidx.2 = getelementptr inbounds i8, ptr %p, i64 2
1c9094a2SLuke Lau  %x.2 = load i8, ptr %arrayidx.2, align 1
1c9094a2SLuke Lau  %conv.2 = zext i8 %x.2 to i32
1c9094a2SLuke Lau  %arrayidx2.2 = getelementptr inbounds i8, ptr %q, i64 2
1c9094a2SLuke Lau  %y.2 = load i8, ptr %arrayidx2.2, align 1
1c9094a2SLuke Lau  %conv3.2 = zext i8 %y.2 to i32
1c9094a2SLuke Lau  %add.2 = add nuw nsw i32 %add4.1, %conv.2
1c9094a2SLuke Lau  %add4.2 = add nuw nsw i32 %add.2, %conv3.2
1c9094a2SLuke Lau
1c9094a2SLuke Lau  %arrayidx.3 = getelementptr inbounds i8, ptr %p, i64 3
1c9094a2SLuke Lau  %x.3 = load i8, ptr %arrayidx.3, align 1
1c9094a2SLuke Lau  %conv.3 = zext i8 %x.3 to i32
1c9094a2SLuke Lau  %arrayidx2.3 = getelementptr inbounds i8, ptr %q, i64 3
1c9094a2SLuke Lau  %y.3 = load i8, ptr %arrayidx2.3, align 1
1c9094a2SLuke Lau  %conv3.3 = zext i8 %y.3 to i32
1c9094a2SLuke Lau  %add.3 = add nuw nsw i32 %add4.2, %conv.3
1c9094a2SLuke Lau  %add4.3 = add nuw nsw i32 %add.3, %conv3.3
1c9094a2SLuke Lau
1c9094a2SLuke Lau  ret i32 %add4.3
1c9094a2SLuke Lau}
1c9094a2SLuke Lau
1c9094a2SLuke Laudefine i32 @reduce_sum_2arrays_b(ptr noalias noundef %x, ptr noalias %y) {
1c9094a2SLuke Lau; CHECK-LABEL: @reduce_sum_2arrays_b(
1c9094a2SLuke Lau; CHECK-NEXT:  entry:
1c9094a2SLuke Lau; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i8>, ptr [[X:%.*]], align 1
f23ea4cbSLuke Lau; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr [[Y:%.*]], align 1
f3d2609aSAlexey Bataev; CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i8> @llvm.vector.insert.v8i8.v4i8(<8 x i8> poison, <4 x i8> [[TMP0]], i64 0)
f3d2609aSAlexey Bataev; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.vector.insert.v8i8.v4i8(<8 x i8> [[TMP2]], <4 x i8> [[TMP1]], i64 4)
f3d2609aSAlexey Bataev; CHECK-NEXT:    [[TMP4:%.*]] = zext <8 x i8> [[TMP3]] to <8 x i32>
f3d2609aSAlexey Bataev; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP4]])
f3d2609aSAlexey Bataev; CHECK-NEXT:    ret i32 [[TMP5]]
1c9094a2SLuke Lau;
1c9094a2SLuke Lau  entry:
1c9094a2SLuke Lau  %0 = load i8, ptr %x, align 1
1c9094a2SLuke Lau  %conv = zext i8 %0 to i32
1c9094a2SLuke Lau  %arrayidx.1 = getelementptr inbounds i8, ptr %x, i64 1
1c9094a2SLuke Lau  %1 = load i8, ptr %arrayidx.1, align 1
1c9094a2SLuke Lau  %conv.1 = zext i8 %1 to i32
1c9094a2SLuke Lau  %add.1 = add nuw nsw i32 %conv, %conv.1
1c9094a2SLuke Lau  %arrayidx.2 = getelementptr inbounds i8, ptr %x, i64 2
1c9094a2SLuke Lau  %2 = load i8, ptr %arrayidx.2, align 1
1c9094a2SLuke Lau  %conv.2 = zext i8 %2 to i32
1c9094a2SLuke Lau  %add.2 = add nuw nsw i32 %add.1, %conv.2
1c9094a2SLuke Lau  %arrayidx.3 = getelementptr inbounds i8, ptr %x, i64 3
1c9094a2SLuke Lau  %3 = load i8, ptr %arrayidx.3, align 1
1c9094a2SLuke Lau  %conv.3 = zext i8 %3 to i32
1c9094a2SLuke Lau  %add.3 = add nuw nsw i32 %add.2, %conv.3
1c9094a2SLuke Lau  %4 = load i8, ptr %y, align 1
1c9094a2SLuke Lau  %conv9 = zext i8 %4 to i32
1c9094a2SLuke Lau  %add10 = add nuw nsw i32 %add.3, %conv9
1c9094a2SLuke Lau  %arrayidx8.1 = getelementptr inbounds i8, ptr %y, i64 1
1c9094a2SLuke Lau  %5 = load i8, ptr %arrayidx8.1, align 1
1c9094a2SLuke Lau  %conv9.1 = zext i8 %5 to i32
1c9094a2SLuke Lau  %add10.1 = add nuw nsw i32 %add10, %conv9.1
1c9094a2SLuke Lau  %arrayidx8.2 = getelementptr inbounds i8, ptr %y, i64 2
1c9094a2SLuke Lau  %6 = load i8, ptr %arrayidx8.2, align 1
1c9094a2SLuke Lau  %conv9.2 = zext i8 %6 to i32
1c9094a2SLuke Lau  %add10.2 = add nuw nsw i32 %add10.1, %conv9.2
1c9094a2SLuke Lau  %arrayidx8.3 = getelementptr inbounds i8, ptr %y, i64 3
1c9094a2SLuke Lau  %7 = load i8, ptr %arrayidx8.3, align 1
1c9094a2SLuke Lau  %conv9.3 = zext i8 %7 to i32
1c9094a2SLuke Lau  %add10.3 = add nuw nsw i32 %add10.2, %conv9.3
1c9094a2SLuke Lau  ret i32 %add10.3
1c9094a2SLuke Lau}
20864d2cSLuke Lau
20864d2cSLuke Lau; Shouldn't vectorize to a reduction because we can't promote it
20864d2cSLuke Laudefine bfloat @fadd_4xbf16(ptr %p) {
20864d2cSLuke Lau; CHECK-LABEL: @fadd_4xbf16(
20864d2cSLuke Lau; CHECK-NEXT:    [[X0:%.*]] = load bfloat, ptr [[P:%.*]], align 2
20864d2cSLuke Lau; CHECK-NEXT:    [[P1:%.*]] = getelementptr bfloat, ptr [[P]], i32 1
20864d2cSLuke Lau; CHECK-NEXT:    [[X1:%.*]] = load bfloat, ptr [[P1]], align 2
20864d2cSLuke Lau; CHECK-NEXT:    [[P2:%.*]] = getelementptr bfloat, ptr [[P]], i32 2
20864d2cSLuke Lau; CHECK-NEXT:    [[X2:%.*]] = load bfloat, ptr [[P2]], align 2
20864d2cSLuke Lau; CHECK-NEXT:    [[P3:%.*]] = getelementptr bfloat, ptr [[P]], i32 3
20864d2cSLuke Lau; CHECK-NEXT:    [[X3:%.*]] = load bfloat, ptr [[P3]], align 2
20864d2cSLuke Lau; CHECK-NEXT:    [[R0:%.*]] = fadd fast bfloat [[X0]], [[X1]]
20864d2cSLuke Lau; CHECK-NEXT:    [[R1:%.*]] = fadd fast bfloat [[R0]], [[X2]]
20864d2cSLuke Lau; CHECK-NEXT:    [[R2:%.*]] = fadd fast bfloat [[R1]], [[X3]]
20864d2cSLuke Lau; CHECK-NEXT:    ret bfloat [[R2]]
20864d2cSLuke Lau;
20864d2cSLuke Lau  %x0 = load bfloat, ptr %p
20864d2cSLuke Lau  %p1 = getelementptr bfloat, ptr %p, i32 1
20864d2cSLuke Lau  %x1 = load bfloat, ptr %p1
20864d2cSLuke Lau  %p2 = getelementptr bfloat, ptr %p, i32 2
20864d2cSLuke Lau  %x2 = load bfloat, ptr %p2
20864d2cSLuke Lau  %p3 = getelementptr bfloat, ptr %p, i32 3
20864d2cSLuke Lau  %x3 = load bfloat, ptr %p3
20864d2cSLuke Lau
20864d2cSLuke Lau  %r0 = fadd fast bfloat %x0, %x1
20864d2cSLuke Lau  %r1 = fadd fast bfloat %r0, %x2
20864d2cSLuke Lau  %r2 = fadd fast bfloat %r1, %x3
20864d2cSLuke Lau
20864d2cSLuke Lau  ret bfloat %r2
20864d2cSLuke Lau}
20864d2cSLuke Lau
20864d2cSLuke Lau; Shouldn't vectorize to a reduction because there's no vfred{u,o}mul.vs
20864d2cSLuke Laudefine bfloat @fmul_4xbf16(ptr %p) {
20864d2cSLuke Lau; CHECK-LABEL: @fmul_4xbf16(
20864d2cSLuke Lau; CHECK-NEXT:    [[X0:%.*]] = load bfloat, ptr [[P:%.*]], align 2
20864d2cSLuke Lau; CHECK-NEXT:    [[P1:%.*]] = getelementptr bfloat, ptr [[P]], i32 1
20864d2cSLuke Lau; CHECK-NEXT:    [[X1:%.*]] = load bfloat, ptr [[P1]], align 2
20864d2cSLuke Lau; CHECK-NEXT:    [[P2:%.*]] = getelementptr bfloat, ptr [[P]], i32 2
20864d2cSLuke Lau; CHECK-NEXT:    [[X2:%.*]] = load bfloat, ptr [[P2]], align 2
20864d2cSLuke Lau; CHECK-NEXT:    [[P3:%.*]] = getelementptr bfloat, ptr [[P]], i32 3
20864d2cSLuke Lau; CHECK-NEXT:    [[X3:%.*]] = load bfloat, ptr [[P3]], align 2
20864d2cSLuke Lau; CHECK-NEXT:    [[R0:%.*]] = fmul fast bfloat [[X0]], [[X1]]
20864d2cSLuke Lau; CHECK-NEXT:    [[R1:%.*]] = fmul fast bfloat [[R0]], [[X2]]
20864d2cSLuke Lau; CHECK-NEXT:    [[R2:%.*]] = fmul fast bfloat [[R1]], [[X3]]
20864d2cSLuke Lau; CHECK-NEXT:    ret bfloat [[R2]]
20864d2cSLuke Lau;
20864d2cSLuke Lau  %x0 = load bfloat, ptr %p
20864d2cSLuke Lau  %p1 = getelementptr bfloat, ptr %p, i32 1
20864d2cSLuke Lau  %x1 = load bfloat, ptr %p1
20864d2cSLuke Lau  %p2 = getelementptr bfloat, ptr %p, i32 2
20864d2cSLuke Lau  %x2 = load bfloat, ptr %p2
20864d2cSLuke Lau  %p3 = getelementptr bfloat, ptr %p, i32 3
20864d2cSLuke Lau  %x3 = load bfloat, ptr %p3
20864d2cSLuke Lau
20864d2cSLuke Lau  %r0 = fmul fast bfloat %x0, %x1
20864d2cSLuke Lau  %r1 = fmul fast bfloat %r0, %x2
20864d2cSLuke Lau  %r2 = fmul fast bfloat %r1, %x3
20864d2cSLuke Lau
20864d2cSLuke Lau  ret bfloat %r2
20864d2cSLuke Lau}
20864d2cSLuke Lau
20864d2cSLuke Lau; Shouldn't vectorize to a reduction on zvfhmin because we can't promote it
20864d2cSLuke Laudefine half @fadd_4xf16(ptr %p) {
20864d2cSLuke Lau; ZVFHMIN-LABEL: @fadd_4xf16(
20864d2cSLuke Lau; ZVFHMIN-NEXT:    [[X0:%.*]] = load half, ptr [[P:%.*]], align 2
20864d2cSLuke Lau; ZVFHMIN-NEXT:    [[P1:%.*]] = getelementptr half, ptr [[P]], i32 1
20864d2cSLuke Lau; ZVFHMIN-NEXT:    [[X1:%.*]] = load half, ptr [[P1]], align 2
20864d2cSLuke Lau; ZVFHMIN-NEXT:    [[P2:%.*]] = getelementptr half, ptr [[P]], i32 2
20864d2cSLuke Lau; ZVFHMIN-NEXT:    [[X2:%.*]] = load half, ptr [[P2]], align 2
20864d2cSLuke Lau; ZVFHMIN-NEXT:    [[P3:%.*]] = getelementptr half, ptr [[P]], i32 3
20864d2cSLuke Lau; ZVFHMIN-NEXT:    [[X3:%.*]] = load half, ptr [[P3]], align 2
20864d2cSLuke Lau; ZVFHMIN-NEXT:    [[R0:%.*]] = fadd fast half [[X0]], [[X1]]
20864d2cSLuke Lau; ZVFHMIN-NEXT:    [[R1:%.*]] = fadd fast half [[R0]], [[X2]]
20864d2cSLuke Lau; ZVFHMIN-NEXT:    [[R2:%.*]] = fadd fast half [[R1]], [[X3]]
20864d2cSLuke Lau; ZVFHMIN-NEXT:    ret half [[R2]]
20864d2cSLuke Lau;
20864d2cSLuke Lau; ZVFH-LABEL: @fadd_4xf16(
20864d2cSLuke Lau; ZVFH-NEXT:    [[TMP1:%.*]] = load <4 x half>, ptr [[P:%.*]], align 2
20864d2cSLuke Lau; ZVFH-NEXT:    [[TMP2:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> [[TMP1]])
20864d2cSLuke Lau; ZVFH-NEXT:    ret half [[TMP2]]
20864d2cSLuke Lau;
20864d2cSLuke Lau  %x0 = load half, ptr %p
20864d2cSLuke Lau  %p1 = getelementptr half, ptr %p, i32 1
20864d2cSLuke Lau  %x1 = load half, ptr %p1
20864d2cSLuke Lau  %p2 = getelementptr half, ptr %p, i32 2
20864d2cSLuke Lau  %x2 = load half, ptr %p2
20864d2cSLuke Lau  %p3 = getelementptr half, ptr %p, i32 3
20864d2cSLuke Lau  %x3 = load half, ptr %p3
20864d2cSLuke Lau
20864d2cSLuke Lau  %r0 = fadd fast half %x0, %x1
20864d2cSLuke Lau  %r1 = fadd fast half %r0, %x2
20864d2cSLuke Lau  %r2 = fadd fast half %r1, %x3
20864d2cSLuke Lau
20864d2cSLuke Lau  ret half %r2
20864d2cSLuke Lau}
20864d2cSLuke Lau
20864d2cSLuke Lau; Shouldn't vectorize to a reduction because there's no vfred{u,o}mul.vs
20864d2cSLuke Laudefine half @fmul_4xf16(ptr %p) {
20864d2cSLuke Lau; CHECK-LABEL: @fmul_4xf16(
20864d2cSLuke Lau; CHECK-NEXT:    [[X0:%.*]] = load half, ptr [[P:%.*]], align 2
20864d2cSLuke Lau; CHECK-NEXT:    [[P1:%.*]] = getelementptr half, ptr [[P]], i32 1
20864d2cSLuke Lau; CHECK-NEXT:    [[X1:%.*]] = load half, ptr [[P1]], align 2
20864d2cSLuke Lau; CHECK-NEXT:    [[P2:%.*]] = getelementptr half, ptr [[P]], i32 2
20864d2cSLuke Lau; CHECK-NEXT:    [[X2:%.*]] = load half, ptr [[P2]], align 2
20864d2cSLuke Lau; CHECK-NEXT:    [[P3:%.*]] = getelementptr half, ptr [[P]], i32 3
20864d2cSLuke Lau; CHECK-NEXT:    [[X3:%.*]] = load half, ptr [[P3]], align 2
20864d2cSLuke Lau; CHECK-NEXT:    [[R0:%.*]] = fmul fast half [[X0]], [[X1]]
20864d2cSLuke Lau; CHECK-NEXT:    [[R1:%.*]] = fmul fast half [[R0]], [[X2]]
20864d2cSLuke Lau; CHECK-NEXT:    [[R2:%.*]] = fmul fast half [[R1]], [[X3]]
20864d2cSLuke Lau; CHECK-NEXT:    ret half [[R2]]
20864d2cSLuke Lau;
20864d2cSLuke Lau  %x0 = load half, ptr %p
20864d2cSLuke Lau  %p1 = getelementptr half, ptr %p, i32 1
20864d2cSLuke Lau  %x1 = load half, ptr %p1
20864d2cSLuke Lau  %p2 = getelementptr half, ptr %p, i32 2
20864d2cSLuke Lau  %x2 = load half, ptr %p2
20864d2cSLuke Lau  %p3 = getelementptr half, ptr %p, i32 3
20864d2cSLuke Lau  %x3 = load half, ptr %p3
20864d2cSLuke Lau
20864d2cSLuke Lau  %r0 = fmul fast half %x0, %x1
20864d2cSLuke Lau  %r1 = fmul fast half %r0, %x2
20864d2cSLuke Lau  %r2 = fmul fast half %r1, %x3
20864d2cSLuke Lau
20864d2cSLuke Lau  ret half %r2
20864d2cSLuke Lau}