xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/RISCV/reductions.ll (revision 7523086a050d679370dfd86a0166d5f7168ffa09)
110625958SPhilip Reames; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
220864d2cSLuke Lau; RUN: opt < %s -passes=slp-vectorizer -mtriple=riscv64 \
320864d2cSLuke Lau; RUN: -mattr=+v,+zvfhmin,+zvfbfmin -riscv-v-slp-max-vf=0 -S \
420864d2cSLuke Lau; RUN: | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
520864d2cSLuke Lau; RUN: opt < %s -passes=slp-vectorizer -mtriple=riscv64 \
620864d2cSLuke Lau; RUN: -mattr=+v,+zvfh,+zvfbfmin -riscv-v-slp-max-vf=0 -S \
720864d2cSLuke Lau; RUN: | FileCheck %s --check-prefixes=CHECK,ZVFH,ZVL128
820864d2cSLuke Lau; RUN: opt < %s -passes=slp-vectorizer -mtriple=riscv64 \
920864d2cSLuke Lau; RUN: -mattr=+v,+zvl256b,+zvfh,+zvfbfmin -riscv-v-slp-max-vf=0 -S \
1020864d2cSLuke Lau; RUN: | FileCheck %s --check-prefixes=CHECK,ZVFH,ZVL256
1120864d2cSLuke Lau; RUN: opt < %s -passes=slp-vectorizer -mtriple=riscv64 \
1220864d2cSLuke Lau; RUN: -mattr=+v,+zvl512b,+zvfh,+zvfbfmin -riscv-v-slp-max-vf=0 -S \
1320864d2cSLuke Lau; RUN: | FileCheck %s --check-prefixes=CHECK,ZVFH,ZVL512
1410625958SPhilip Reames
1510625958SPhilip Reamestarget datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
1610625958SPhilip Reamestarget triple = "riscv64"
1710625958SPhilip Reames
1810625958SPhilip Reames; First batch of tests are simple reductions of various widths
1910625958SPhilip Reames
2010625958SPhilip Reamesdefine i64 @red_ld_2xi64(ptr %ptr) {
2110625958SPhilip Reames; CHECK-LABEL: @red_ld_2xi64(
2210625958SPhilip Reames; CHECK-NEXT:  entry:
2310625958SPhilip Reames; CHECK-NEXT:    [[LD0:%.*]] = load i64, ptr [[PTR:%.*]], align 8
2410625958SPhilip Reames; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i64, ptr [[PTR]], i64 1
2510625958SPhilip Reames; CHECK-NEXT:    [[LD1:%.*]] = load i64, ptr [[GEP]], align 8
2610625958SPhilip Reames; CHECK-NEXT:    [[ADD_1:%.*]] = add nuw nsw i64 [[LD0]], [[LD1]]
2710625958SPhilip Reames; CHECK-NEXT:    ret i64 [[ADD_1]]
2810625958SPhilip Reames;
2910625958SPhilip Reamesentry:
3010625958SPhilip Reames  %ld0 = load i64, ptr %ptr
3110625958SPhilip Reames  %gep = getelementptr inbounds i64, ptr %ptr, i64 1
3210625958SPhilip Reames  %ld1 = load i64, ptr %gep
3310625958SPhilip Reames  %add.1 = add nuw nsw i64 %ld0, %ld1
3410625958SPhilip Reames  ret i64 %add.1
3510625958SPhilip Reames}
3610625958SPhilip Reames
3710625958SPhilip Reamesdefine i64 @red_ld_4xi64(ptr %ptr) {
3810625958SPhilip Reames; CHECK-LABEL: @red_ld_4xi64(
3910625958SPhilip Reames; CHECK-NEXT:  entry:
4010625958SPhilip Reames; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i64>, ptr [[PTR:%.*]], align 8
4110625958SPhilip Reames; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP0]])
4210625958SPhilip Reames; CHECK-NEXT:    ret i64 [[TMP1]]
4310625958SPhilip Reames;
4410625958SPhilip Reamesentry:
4510625958SPhilip Reames  %ld0 = load i64, ptr %ptr
4610625958SPhilip Reames  %gep = getelementptr inbounds i64, ptr %ptr, i64 1
4710625958SPhilip Reames  %ld1 = load i64, ptr %gep
4810625958SPhilip Reames  %add.1 = add nuw nsw i64 %ld0, %ld1
4910625958SPhilip Reames  %gep.1 = getelementptr inbounds i64, ptr %ptr, i64 2
5010625958SPhilip Reames  %ld2 = load i64, ptr %gep.1
5110625958SPhilip Reames  %add.2 = add nuw nsw i64 %add.1, %ld2
5210625958SPhilip Reames  %gep.2 = getelementptr inbounds i64, ptr %ptr, i64 3
5310625958SPhilip Reames  %ld3 = load i64, ptr %gep.2
5410625958SPhilip Reames  %add.3 = add nuw nsw i64 %add.2, %ld3
5510625958SPhilip Reames  ret i64 %add.3
5610625958SPhilip Reames}
5710625958SPhilip Reames
5810625958SPhilip Reamesdefine i64 @red_ld_8xi64(ptr %ptr) {
5910625958SPhilip Reames; CHECK-LABEL: @red_ld_8xi64(
6010625958SPhilip Reames; CHECK-NEXT:  entry:
6110625958SPhilip Reames; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i64>, ptr [[PTR:%.*]], align 8
6210625958SPhilip Reames; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP0]])
6310625958SPhilip Reames; CHECK-NEXT:    ret i64 [[TMP1]]
6410625958SPhilip Reames;
6510625958SPhilip Reamesentry:
6610625958SPhilip Reames  %ld0 = load i64, ptr %ptr
6710625958SPhilip Reames  %gep = getelementptr inbounds i64, ptr %ptr, i64 1
6810625958SPhilip Reames  %ld1 = load i64, ptr %gep
6910625958SPhilip Reames  %add.1 = add nuw nsw i64 %ld0, %ld1
7010625958SPhilip Reames  %gep.1 = getelementptr inbounds i64, ptr %ptr, i64 2
7110625958SPhilip Reames  %ld2 = load i64, ptr %gep.1
7210625958SPhilip Reames  %add.2 = add nuw nsw i64 %add.1, %ld2
7310625958SPhilip Reames  %gep.2 = getelementptr inbounds i64, ptr %ptr, i64 3
7410625958SPhilip Reames  %ld3 = load i64, ptr %gep.2
7510625958SPhilip Reames  %add.3 = add nuw nsw i64 %add.2, %ld3
7610625958SPhilip Reames  %gep.3 = getelementptr inbounds i64, ptr %ptr, i64 4
7710625958SPhilip Reames  %ld4 = load i64, ptr %gep.3
7810625958SPhilip Reames  %add.4 = add nuw nsw i64 %add.3, %ld4
7910625958SPhilip Reames  %gep.4 = getelementptr inbounds i64, ptr %ptr, i64 5
8010625958SPhilip Reames  %ld5 = load i64, ptr %gep.4
8110625958SPhilip Reames  %add.5 = add nuw nsw i64 %add.4, %ld5
8210625958SPhilip Reames  %gep.5 = getelementptr inbounds i64, ptr %ptr, i64 6
8310625958SPhilip Reames  %ld6 = load i64, ptr %gep.5
8410625958SPhilip Reames  %add.6 = add nuw nsw i64 %add.5, %ld6
8510625958SPhilip Reames  %gep.6 = getelementptr inbounds i64, ptr %ptr, i64 7
8610625958SPhilip Reames  %ld7 = load i64, ptr %gep.6
8710625958SPhilip Reames  %add.7 = add nuw nsw i64 %add.6, %ld7
8810625958SPhilip Reames  ret i64 %add.7
8910625958SPhilip Reames}
9010625958SPhilip Reames
9110625958SPhilip Reamesdefine i64 @red_ld_16xi64(ptr %ptr) {
9210625958SPhilip Reames; CHECK-LABEL: @red_ld_16xi64(
9310625958SPhilip Reames; CHECK-NEXT:  entry:
9410625958SPhilip Reames; CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i64>, ptr [[PTR:%.*]], align 8
9510625958SPhilip Reames; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> [[TMP0]])
9610625958SPhilip Reames; CHECK-NEXT:    ret i64 [[TMP1]]
9710625958SPhilip Reames;
9810625958SPhilip Reamesentry:
9910625958SPhilip Reames  %ld0 = load i64, ptr %ptr
10010625958SPhilip Reames  %gep = getelementptr inbounds i64, ptr %ptr, i64 1
10110625958SPhilip Reames  %ld1 = load i64, ptr %gep
10210625958SPhilip Reames  %add.1 = add nuw nsw i64 %ld0, %ld1
10310625958SPhilip Reames  %gep.1 = getelementptr inbounds i64, ptr %ptr, i64 2
10410625958SPhilip Reames  %ld2 = load i64, ptr %gep.1
10510625958SPhilip Reames  %add.2 = add nuw nsw i64 %add.1, %ld2
10610625958SPhilip Reames  %gep.2 = getelementptr inbounds i64, ptr %ptr, i64 3
10710625958SPhilip Reames  %ld3 = load i64, ptr %gep.2
10810625958SPhilip Reames  %add.3 = add nuw nsw i64 %add.2, %ld3
10910625958SPhilip Reames  %gep.3 = getelementptr inbounds i64, ptr %ptr, i64 4
11010625958SPhilip Reames  %ld4 = load i64, ptr %gep.3
11110625958SPhilip Reames  %add.4 = add nuw nsw i64 %add.3, %ld4
11210625958SPhilip Reames  %gep.4 = getelementptr inbounds i64, ptr %ptr, i64 5
11310625958SPhilip Reames  %ld5 = load i64, ptr %gep.4
11410625958SPhilip Reames  %add.5 = add nuw nsw i64 %add.4, %ld5
11510625958SPhilip Reames  %gep.5 = getelementptr inbounds i64, ptr %ptr, i64 6
11610625958SPhilip Reames  %ld6 = load i64, ptr %gep.5
11710625958SPhilip Reames  %add.6 = add nuw nsw i64 %add.5, %ld6
11810625958SPhilip Reames  %gep.6 = getelementptr inbounds i64, ptr %ptr, i64 7
11910625958SPhilip Reames  %ld7 = load i64, ptr %gep.6
12010625958SPhilip Reames  %add.7 = add nuw nsw i64 %add.6, %ld7
12110625958SPhilip Reames  %gep.7 = getelementptr inbounds i64, ptr %ptr, i64 8
12210625958SPhilip Reames  %ld8 = load i64, ptr %gep.7
12310625958SPhilip Reames  %add.8 = add nuw nsw i64 %add.7, %ld8
12410625958SPhilip Reames  %gep.8 = getelementptr inbounds i64, ptr %ptr, i64 9
12510625958SPhilip Reames  %ld9 = load i64, ptr %gep.8
12610625958SPhilip Reames  %add.9 = add nuw nsw i64 %add.8, %ld9
12710625958SPhilip Reames  %gep.9 = getelementptr inbounds i64, ptr %ptr, i64 10
12810625958SPhilip Reames  %ld10 = load i64, ptr %gep.9
12910625958SPhilip Reames  %add.10 = add nuw nsw i64 %add.9, %ld10
13010625958SPhilip Reames  %gep.10 = getelementptr inbounds i64, ptr %ptr, i64 11
13110625958SPhilip Reames  %ld11 = load i64, ptr %gep.10
13210625958SPhilip Reames  %add.11 = add nuw nsw i64 %add.10, %ld11
13310625958SPhilip Reames  %gep.11 = getelementptr inbounds i64, ptr %ptr, i64 12
13410625958SPhilip Reames  %ld12 = load i64, ptr %gep.11
13510625958SPhilip Reames  %add.12 = add nuw nsw i64 %add.11, %ld12
13610625958SPhilip Reames  %gep.12 = getelementptr inbounds i64, ptr %ptr, i64 13
13710625958SPhilip Reames  %ld13 = load i64, ptr %gep.12
13810625958SPhilip Reames  %add.13 = add nuw nsw i64 %add.12, %ld13
13910625958SPhilip Reames  %gep.13 = getelementptr inbounds i64, ptr %ptr, i64 14
14010625958SPhilip Reames  %ld14 = load i64, ptr %gep.13
14110625958SPhilip Reames  %add.14 = add nuw nsw i64 %add.13, %ld14
14210625958SPhilip Reames  %gep.14 = getelementptr inbounds i64, ptr %ptr, i64 15
14310625958SPhilip Reames  %ld15 = load i64, ptr %gep.14
14410625958SPhilip Reames  %add.15 = add nuw nsw i64 %add.14, %ld15
14510625958SPhilip Reames  ret i64 %add.15
14610625958SPhilip Reames}
14710625958SPhilip Reames
148deb3ecf0SAlexey Bataev
149deb3ecf0SAlexey Bataevdefine i64 @red_strided_ld_16xi64(ptr %ptr) {
150deb3ecf0SAlexey Bataev; CHECK-LABEL: @red_strided_ld_16xi64(
151deb3ecf0SAlexey Bataev; CHECK-NEXT:  entry:
15238fffa63SPaul Walker; CHECK-NEXT:    [[TMP0:%.*]] = call <16 x i64> @llvm.experimental.vp.strided.load.v16i64.p0.i64(ptr align 8 [[PTR:%.*]], i64 16, <16 x i1> splat (i1 true), i32 16)
153799fd3d8SAlexey Bataev; CHECK-NEXT:    [[TMP1:%.*]] = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> [[TMP0]])
154799fd3d8SAlexey Bataev; CHECK-NEXT:    ret i64 [[TMP1]]
155deb3ecf0SAlexey Bataev;
156deb3ecf0SAlexey Bataeventry:
157deb3ecf0SAlexey Bataev  %ld0 = load i64, ptr %ptr
158deb3ecf0SAlexey Bataev  %gep = getelementptr inbounds i64, ptr %ptr, i64 2
159deb3ecf0SAlexey Bataev  %ld1 = load i64, ptr %gep
160deb3ecf0SAlexey Bataev  %add.1 = add nuw nsw i64 %ld0, %ld1
161deb3ecf0SAlexey Bataev  %gep.1 = getelementptr inbounds i64, ptr %ptr, i64 4
162deb3ecf0SAlexey Bataev  %ld2 = load i64, ptr %gep.1
163deb3ecf0SAlexey Bataev  %add.2 = add nuw nsw i64 %add.1, %ld2
164deb3ecf0SAlexey Bataev  %gep.2 = getelementptr inbounds i64, ptr %ptr, i64 6
165deb3ecf0SAlexey Bataev  %ld3 = load i64, ptr %gep.2
166deb3ecf0SAlexey Bataev  %add.3 = add nuw nsw i64 %add.2, %ld3
167deb3ecf0SAlexey Bataev  %gep.3 = getelementptr inbounds i64, ptr %ptr, i64 8
168deb3ecf0SAlexey Bataev  %ld4 = load i64, ptr %gep.3
169deb3ecf0SAlexey Bataev  %add.4 = add nuw nsw i64 %add.3, %ld4
170deb3ecf0SAlexey Bataev  %gep.4 = getelementptr inbounds i64, ptr %ptr, i64 10
171deb3ecf0SAlexey Bataev  %ld5 = load i64, ptr %gep.4
172deb3ecf0SAlexey Bataev  %add.5 = add nuw nsw i64 %add.4, %ld5
173deb3ecf0SAlexey Bataev  %gep.5 = getelementptr inbounds i64, ptr %ptr, i64 12
174deb3ecf0SAlexey Bataev  %ld6 = load i64, ptr %gep.5
175deb3ecf0SAlexey Bataev  %add.6 = add nuw nsw i64 %add.5, %ld6
176deb3ecf0SAlexey Bataev  %gep.6 = getelementptr inbounds i64, ptr %ptr, i64 14
177deb3ecf0SAlexey Bataev  %ld7 = load i64, ptr %gep.6
178deb3ecf0SAlexey Bataev  %add.7 = add nuw nsw i64 %add.6, %ld7
179deb3ecf0SAlexey Bataev  %gep.7 = getelementptr inbounds i64, ptr %ptr, i64 16
180deb3ecf0SAlexey Bataev  %ld8 = load i64, ptr %gep.7
181deb3ecf0SAlexey Bataev  %add.8 = add nuw nsw i64 %add.7, %ld8
182deb3ecf0SAlexey Bataev  %gep.8 = getelementptr inbounds i64, ptr %ptr, i64 18
183deb3ecf0SAlexey Bataev  %ld9 = load i64, ptr %gep.8
184deb3ecf0SAlexey Bataev  %add.9 = add nuw nsw i64 %add.8, %ld9
185deb3ecf0SAlexey Bataev  %gep.9 = getelementptr inbounds i64, ptr %ptr, i64 20
186deb3ecf0SAlexey Bataev  %ld10 = load i64, ptr %gep.9
187deb3ecf0SAlexey Bataev  %add.10 = add nuw nsw i64 %add.9, %ld10
188deb3ecf0SAlexey Bataev  %gep.10 = getelementptr inbounds i64, ptr %ptr, i64 22
189deb3ecf0SAlexey Bataev  %ld11 = load i64, ptr %gep.10
190deb3ecf0SAlexey Bataev  %add.11 = add nuw nsw i64 %add.10, %ld11
191deb3ecf0SAlexey Bataev  %gep.11 = getelementptr inbounds i64, ptr %ptr, i64 24
192deb3ecf0SAlexey Bataev  %ld12 = load i64, ptr %gep.11
193deb3ecf0SAlexey Bataev  %add.12 = add nuw nsw i64 %add.11, %ld12
194deb3ecf0SAlexey Bataev  %gep.12 = getelementptr inbounds i64, ptr %ptr, i64 26
195deb3ecf0SAlexey Bataev  %ld13 = load i64, ptr %gep.12
196deb3ecf0SAlexey Bataev  %add.13 = add nuw nsw i64 %add.12, %ld13
197deb3ecf0SAlexey Bataev  %gep.13 = getelementptr inbounds i64, ptr %ptr, i64 28
198deb3ecf0SAlexey Bataev  %ld14 = load i64, ptr %gep.13
199deb3ecf0SAlexey Bataev  %add.14 = add nuw nsw i64 %add.13, %ld14
200deb3ecf0SAlexey Bataev  %gep.14 = getelementptr inbounds i64, ptr %ptr, i64 30
201deb3ecf0SAlexey Bataev  %ld15 = load i64, ptr %gep.14
202deb3ecf0SAlexey Bataev  %add.15 = add nuw nsw i64 %add.14, %ld15
203deb3ecf0SAlexey Bataev  ret i64 %add.15
204deb3ecf0SAlexey Bataev}
205deb3ecf0SAlexey Bataev
20610625958SPhilip Reames; Next batch test differen reductions kinds
20710625958SPhilip Reames
20810625958SPhilip Reames%struct.buf = type { [8 x i8] }
20910625958SPhilip Reames
21010625958SPhilip Reamesdefine i8 @reduce_and(ptr %a, ptr %b) {
21110625958SPhilip Reames; CHECK-LABEL: @reduce_and(
21210625958SPhilip Reames; CHECK-NEXT:  entry:
21310625958SPhilip Reames; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_BUF:%.*]], ptr [[A:%.*]], i64 0, i32 0, i64 0
21410625958SPhilip Reames; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_BUF]], ptr [[B:%.*]], i64 0, i32 0, i64 0
21510625958SPhilip Reames; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 1
21610625958SPhilip Reames; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[ARRAYIDX3]], align 1
21710625958SPhilip Reames; CHECK-NEXT:    [[TMP2:%.*]] = xor <8 x i8> [[TMP1]], [[TMP0]]
21810625958SPhilip Reames; CHECK-NEXT:    [[TMP3:%.*]] = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> [[TMP2]])
21910625958SPhilip Reames; CHECK-NEXT:    [[OP_RDX:%.*]] = and i8 [[TMP3]], 1
22010625958SPhilip Reames; CHECK-NEXT:    ret i8 [[OP_RDX]]
22110625958SPhilip Reames;
22210625958SPhilip Reamesentry:
22310625958SPhilip Reames  %arrayidx = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 0
2242d69827cSNikita Popov  %0 = load i8, ptr %arrayidx, align 1
22510625958SPhilip Reames  %arrayidx3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 0
2262d69827cSNikita Popov  %1 = load i8, ptr %arrayidx3, align 1
22710625958SPhilip Reames  %xor12 = xor i8 %1, %0
22810625958SPhilip Reames  %and13 = and i8 %xor12, 1
22910625958SPhilip Reames  %arrayidx.1 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 1
2302d69827cSNikita Popov  %2 = load i8, ptr %arrayidx.1, align 1
23110625958SPhilip Reames  %arrayidx3.1 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 1
2322d69827cSNikita Popov  %3 = load i8, ptr %arrayidx3.1, align 1
23310625958SPhilip Reames  %xor12.1 = xor i8 %3, %2
23410625958SPhilip Reames  %and13.1 = and i8 %xor12.1, %and13
23510625958SPhilip Reames  %arrayidx.2 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 2
2362d69827cSNikita Popov  %4 = load i8, ptr %arrayidx.2, align 1
23710625958SPhilip Reames  %arrayidx3.2 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 2
2382d69827cSNikita Popov  %5 = load i8, ptr %arrayidx3.2, align 1
23910625958SPhilip Reames  %xor12.2 = xor i8 %5, %4
24010625958SPhilip Reames  %and13.2 = and i8 %xor12.2, %and13.1
24110625958SPhilip Reames  %arrayidx.3 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 3
2422d69827cSNikita Popov  %6 = load i8, ptr %arrayidx.3, align 1
24310625958SPhilip Reames  %arrayidx3.3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 3
2442d69827cSNikita Popov  %7 = load i8, ptr %arrayidx3.3, align 1
24510625958SPhilip Reames  %xor12.3 = xor i8 %7, %6
24610625958SPhilip Reames  %and13.3 = and i8 %xor12.3, %and13.2
24710625958SPhilip Reames  %arrayidx.4 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 4
2482d69827cSNikita Popov  %8 = load i8, ptr %arrayidx.4, align 1
24910625958SPhilip Reames  %arrayidx3.4 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 4
2502d69827cSNikita Popov  %9 = load i8, ptr %arrayidx3.4, align 1
25110625958SPhilip Reames  %xor12.4 = xor i8 %9, %8
25210625958SPhilip Reames  %and13.4 = and i8 %xor12.4, %and13.3
25310625958SPhilip Reames  %arrayidx.5 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 5
2542d69827cSNikita Popov  %10 = load i8, ptr %arrayidx.5, align 1
25510625958SPhilip Reames  %arrayidx3.5 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 5
2562d69827cSNikita Popov  %11 = load i8, ptr %arrayidx3.5, align 1
25710625958SPhilip Reames  %xor12.5 = xor i8 %11, %10
25810625958SPhilip Reames  %and13.5 = and i8 %xor12.5, %and13.4
25910625958SPhilip Reames  %arrayidx.6 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 6
2602d69827cSNikita Popov  %12 = load i8, ptr %arrayidx.6, align 1
26110625958SPhilip Reames  %arrayidx3.6 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 6
2622d69827cSNikita Popov  %13 = load i8, ptr %arrayidx3.6, align 1
26310625958SPhilip Reames  %xor12.6 = xor i8 %13, %12
26410625958SPhilip Reames  %and13.6 = and i8 %xor12.6, %and13.5
26510625958SPhilip Reames  %arrayidx.7 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 7
2662d69827cSNikita Popov  %14 = load i8, ptr %arrayidx.7, align 1
26710625958SPhilip Reames  %arrayidx3.7 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 7
2682d69827cSNikita Popov  %15 = load i8, ptr %arrayidx3.7, align 1
26910625958SPhilip Reames  %xor12.7 = xor i8 %15, %14
27010625958SPhilip Reames  %and13.7 = and i8 %xor12.7, %and13.6
27110625958SPhilip Reames  ret i8 %and13.7
27210625958SPhilip Reames}
27310625958SPhilip Reames
2749a82bda9Swangpcdefine i8 @reduce_or_1(ptr %a, ptr %b) {
2759a82bda9Swangpc; CHECK-LABEL: @reduce_or_1(
27610625958SPhilip Reames; CHECK-NEXT:  entry:
27710625958SPhilip Reames; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_BUF:%.*]], ptr [[A:%.*]], i64 0, i32 0, i64 0
27810625958SPhilip Reames; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_BUF]], ptr [[B:%.*]], i64 0, i32 0, i64 0
27910625958SPhilip Reames; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 1
28010625958SPhilip Reames; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[ARRAYIDX3]], align 1
28110625958SPhilip Reames; CHECK-NEXT:    [[TMP2:%.*]] = xor <8 x i8> [[TMP1]], [[TMP0]]
28210625958SPhilip Reames; CHECK-NEXT:    [[TMP3:%.*]] = call i8 @llvm.vector.reduce.or.v8i8(<8 x i8> [[TMP2]])
28310625958SPhilip Reames; CHECK-NEXT:    ret i8 [[TMP3]]
28410625958SPhilip Reames;
28510625958SPhilip Reames
28610625958SPhilip Reamesentry:
28710625958SPhilip Reames  %arrayidx = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 0
2882d69827cSNikita Popov  %0 = load i8, ptr %arrayidx, align 1
28910625958SPhilip Reames  %arrayidx3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 0
2902d69827cSNikita Popov  %1 = load i8, ptr %arrayidx3, align 1
29110625958SPhilip Reames  %xor12 = xor i8 %1, %0
29210625958SPhilip Reames  %arrayidx.1 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 1
2932d69827cSNikita Popov  %2 = load i8, ptr %arrayidx.1, align 1
29410625958SPhilip Reames  %arrayidx3.1 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 1
2952d69827cSNikita Popov  %3 = load i8, ptr %arrayidx3.1, align 1
29610625958SPhilip Reames  %xor12.1 = xor i8 %3, %2
29710625958SPhilip Reames  %or13.1 = or i8 %xor12.1, %xor12
29810625958SPhilip Reames  %arrayidx.2 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 2
2992d69827cSNikita Popov  %4 = load i8, ptr %arrayidx.2, align 1
30010625958SPhilip Reames  %arrayidx3.2 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 2
3012d69827cSNikita Popov  %5 = load i8, ptr %arrayidx3.2, align 1
30210625958SPhilip Reames  %xor12.2 = xor i8 %5, %4
30310625958SPhilip Reames  %or13.2 = or i8 %xor12.2, %or13.1
30410625958SPhilip Reames  %arrayidx.3 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 3
3052d69827cSNikita Popov  %6 = load i8, ptr %arrayidx.3, align 1
30610625958SPhilip Reames  %arrayidx3.3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 3
3072d69827cSNikita Popov  %7 = load i8, ptr %arrayidx3.3, align 1
30810625958SPhilip Reames  %xor12.3 = xor i8 %7, %6
30910625958SPhilip Reames  %or13.3 = or i8 %xor12.3, %or13.2
31010625958SPhilip Reames  %arrayidx.4 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 4
3112d69827cSNikita Popov  %8 = load i8, ptr %arrayidx.4, align 1
31210625958SPhilip Reames  %arrayidx3.4 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 4
3132d69827cSNikita Popov  %9 = load i8, ptr %arrayidx3.4, align 1
31410625958SPhilip Reames  %xor12.4 = xor i8 %9, %8
31510625958SPhilip Reames  %or13.4 = or i8 %xor12.4, %or13.3
31610625958SPhilip Reames  %arrayidx.5 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 5
3172d69827cSNikita Popov  %10 = load i8, ptr %arrayidx.5, align 1
31810625958SPhilip Reames  %arrayidx3.5 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 5
3192d69827cSNikita Popov  %11 = load i8, ptr %arrayidx3.5, align 1
32010625958SPhilip Reames  %xor12.5 = xor i8 %11, %10
32110625958SPhilip Reames  %or13.5 = or i8 %xor12.5, %or13.4
32210625958SPhilip Reames  %arrayidx.6 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 6
3232d69827cSNikita Popov  %12 = load i8, ptr %arrayidx.6, align 1
32410625958SPhilip Reames  %arrayidx3.6 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 6
3252d69827cSNikita Popov  %13 = load i8, ptr %arrayidx3.6, align 1
32610625958SPhilip Reames  %xor12.6 = xor i8 %13, %12
32710625958SPhilip Reames  %or13.6 = or i8 %xor12.6, %or13.5
32810625958SPhilip Reames  %arrayidx.7 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 7
3292d69827cSNikita Popov  %14 = load i8, ptr %arrayidx.7, align 1
33010625958SPhilip Reames  %arrayidx3.7 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 7
3312d69827cSNikita Popov  %15 = load i8, ptr %arrayidx3.7, align 1
33210625958SPhilip Reames  %xor12.7 = xor i8 %15, %14
33310625958SPhilip Reames  %or13.7 = or i8 %xor12.7, %or13.6
33410625958SPhilip Reames  ret i8 %or13.7
33510625958SPhilip Reames}
33610625958SPhilip Reames
3379a82bda9Swangpcdefine void @reduce_or_2() {
33820864d2cSLuke Lau; ZVFHMIN-LABEL: @reduce_or_2(
33920864d2cSLuke Lau; ZVFHMIN-NEXT:    [[TMP1:%.*]] = shl i64 0, 0
34020864d2cSLuke Lau; ZVFHMIN-NEXT:    [[TMP2:%.*]] = insertelement <16 x i64> <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 poison>, i64 [[TMP1]], i32 15
34120864d2cSLuke Lau; ZVFHMIN-NEXT:    [[TMP3:%.*]] = icmp ult <16 x i64> [[TMP2]], zeroinitializer
34220864d2cSLuke Lau; ZVFHMIN-NEXT:    [[TMP4:%.*]] = insertelement <16 x i64> <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 poison, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>, i64 [[TMP1]], i32 6
34320864d2cSLuke Lau; ZVFHMIN-NEXT:    [[TMP5:%.*]] = icmp ult <16 x i64> [[TMP4]], zeroinitializer
34420864d2cSLuke Lau; ZVFHMIN-NEXT:    [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP3]])
34520864d2cSLuke Lau; ZVFHMIN-NEXT:    [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP5]])
34620864d2cSLuke Lau; ZVFHMIN-NEXT:    [[OP_RDX:%.*]] = or i1 [[TMP6]], [[TMP7]]
34720864d2cSLuke Lau; ZVFHMIN-NEXT:    br i1 [[OP_RDX]], label [[TMP9:%.*]], label [[TMP8:%.*]]
34820864d2cSLuke Lau; ZVFHMIN:       8:
34920864d2cSLuke Lau; ZVFHMIN-NEXT:    ret void
35020864d2cSLuke Lau; ZVFHMIN:       9:
35120864d2cSLuke Lau; ZVFHMIN-NEXT:    ret void
35220864d2cSLuke Lau;
353514b38cdSPhilip Reames; ZVL128-LABEL: @reduce_or_2(
354514b38cdSPhilip Reames; ZVL128-NEXT:    [[TMP1:%.*]] = shl i64 0, 0
355514b38cdSPhilip Reames; ZVL128-NEXT:    [[TMP2:%.*]] = insertelement <16 x i64> <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 poison>, i64 [[TMP1]], i32 15
356514b38cdSPhilip Reames; ZVL128-NEXT:    [[TMP3:%.*]] = icmp ult <16 x i64> [[TMP2]], zeroinitializer
357514b38cdSPhilip Reames; ZVL128-NEXT:    [[TMP4:%.*]] = insertelement <16 x i64> <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 poison, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>, i64 [[TMP1]], i32 6
358514b38cdSPhilip Reames; ZVL128-NEXT:    [[TMP5:%.*]] = icmp ult <16 x i64> [[TMP4]], zeroinitializer
359514b38cdSPhilip Reames; ZVL128-NEXT:    [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP3]])
360514b38cdSPhilip Reames; ZVL128-NEXT:    [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP5]])
361514b38cdSPhilip Reames; ZVL128-NEXT:    [[OP_RDX:%.*]] = or i1 [[TMP6]], [[TMP7]]
362514b38cdSPhilip Reames; ZVL128-NEXT:    br i1 [[OP_RDX]], label [[TMP9:%.*]], label [[TMP8:%.*]]
363514b38cdSPhilip Reames; ZVL128:       8:
364514b38cdSPhilip Reames; ZVL128-NEXT:    ret void
365514b38cdSPhilip Reames; ZVL128:       9:
366514b38cdSPhilip Reames; ZVL128-NEXT:    ret void
367514b38cdSPhilip Reames;
368514b38cdSPhilip Reames; ZVL256-LABEL: @reduce_or_2(
369514b38cdSPhilip Reames; ZVL256-NEXT:    [[TMP1:%.*]] = shl i64 0, 0
370514b38cdSPhilip Reames; ZVL256-NEXT:    [[TMP2:%.*]] = insertelement <16 x i64> <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 poison>, i64 [[TMP1]], i32 15
371514b38cdSPhilip Reames; ZVL256-NEXT:    [[TMP3:%.*]] = icmp ult <16 x i64> [[TMP2]], zeroinitializer
372514b38cdSPhilip Reames; ZVL256-NEXT:    [[TMP4:%.*]] = insertelement <16 x i64> <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 poison, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>, i64 [[TMP1]], i32 6
373514b38cdSPhilip Reames; ZVL256-NEXT:    [[TMP5:%.*]] = icmp ult <16 x i64> [[TMP4]], zeroinitializer
374514b38cdSPhilip Reames; ZVL256-NEXT:    [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP3]])
375514b38cdSPhilip Reames; ZVL256-NEXT:    [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP5]])
376514b38cdSPhilip Reames; ZVL256-NEXT:    [[OP_RDX:%.*]] = or i1 [[TMP6]], [[TMP7]]
377514b38cdSPhilip Reames; ZVL256-NEXT:    br i1 [[OP_RDX]], label [[TMP9:%.*]], label [[TMP8:%.*]]
378514b38cdSPhilip Reames; ZVL256:       8:
379514b38cdSPhilip Reames; ZVL256-NEXT:    ret void
380514b38cdSPhilip Reames; ZVL256:       9:
381514b38cdSPhilip Reames; ZVL256-NEXT:    ret void
382514b38cdSPhilip Reames;
383514b38cdSPhilip Reames; ZVL512-LABEL: @reduce_or_2(
384514b38cdSPhilip Reames; ZVL512-NEXT:    [[TMP1:%.*]] = shl i64 0, 0
385514b38cdSPhilip Reames; ZVL512-NEXT:    [[TMP2:%.*]] = insertelement <32 x i64> <i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 poison, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 poison, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0, i64 0>, i64 [[TMP1]], i32 15
386514b38cdSPhilip Reames; ZVL512-NEXT:    [[TMP3:%.*]] = shufflevector <32 x i64> [[TMP2]], <32 x i64> poison, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 15, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
387514b38cdSPhilip Reames; ZVL512-NEXT:    [[TMP4:%.*]] = icmp ult <32 x i64> [[TMP3]], zeroinitializer
388514b38cdSPhilip Reames; ZVL512-NEXT:    [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> [[TMP4]])
389514b38cdSPhilip Reames; ZVL512-NEXT:    br i1 [[TMP5]], label [[TMP7:%.*]], label [[TMP6:%.*]]
390514b38cdSPhilip Reames; ZVL512:       6:
391514b38cdSPhilip Reames; ZVL512-NEXT:    ret void
392514b38cdSPhilip Reames; ZVL512:       7:
393514b38cdSPhilip Reames; ZVL512-NEXT:    ret void
3949a82bda9Swangpc;
3959a82bda9Swangpc  %1 = shl i64 0, 0
3969a82bda9Swangpc  %2 = icmp ult i64 0, 0
3979a82bda9Swangpc  %3 = icmp ult i64 0, 0
3989a82bda9Swangpc  %4 = or i1 %2, %3
3999a82bda9Swangpc  %5 = icmp ult i64 0, 0
4009a82bda9Swangpc  %6 = or i1 %4, %5
4019a82bda9Swangpc  %7 = icmp ult i64 0, 0
4029a82bda9Swangpc  %8 = or i1 %6, %7
4039a82bda9Swangpc  %9 = icmp ult i64 0, 0
4049a82bda9Swangpc  %10 = or i1 %8, %9
4059a82bda9Swangpc  %11 = icmp ult i64 0, 0
4069a82bda9Swangpc  %12 = or i1 %10, %11
4079a82bda9Swangpc  %13 = icmp ult i64 0, 0
4089a82bda9Swangpc  %14 = or i1 %12, %13
4099a82bda9Swangpc  %15 = icmp ult i64 0, 0
4109a82bda9Swangpc  %16 = or i1 %14, %15
4119a82bda9Swangpc  %17 = icmp ult i64 0, 0
4129a82bda9Swangpc  %18 = or i1 %16, %17
4139a82bda9Swangpc  %19 = icmp ult i64 0, 0
4149a82bda9Swangpc  %20 = or i1 %18, %19
4159a82bda9Swangpc  %21 = icmp ult i64 0, 0
4169a82bda9Swangpc  %22 = or i1 %20, %21
4179a82bda9Swangpc  %23 = icmp ult i64 0, 0
4189a82bda9Swangpc  %24 = or i1 %22, %23
4199a82bda9Swangpc  %25 = icmp ult i64 0, 0
4209a82bda9Swangpc  %26 = or i1 %24, %25
4219a82bda9Swangpc  %27 = icmp ult i64 0, 0
4229a82bda9Swangpc  %28 = or i1 %26, %27
4239a82bda9Swangpc  %29 = icmp ult i64 0, 0
4249a82bda9Swangpc  %30 = or i1 %28, %29
4259a82bda9Swangpc  %31 = icmp ult i64 %1, 0
4269a82bda9Swangpc  %32 = or i1 %30, %31
4279a82bda9Swangpc  %33 = icmp ult i64 0, 0
4289a82bda9Swangpc  %34 = or i1 %32, %33
4299a82bda9Swangpc  %35 = icmp ult i64 0, 0
4309a82bda9Swangpc  %36 = or i1 %34, %35
4319a82bda9Swangpc  %37 = icmp ult i64 0, 0
4329a82bda9Swangpc  %38 = or i1 %36, %37
4339a82bda9Swangpc  %39 = icmp ult i64 0, 0
4349a82bda9Swangpc  %40 = or i1 %38, %39
4359a82bda9Swangpc  %41 = icmp ult i64 0, 0
4369a82bda9Swangpc  %42 = or i1 %40, %41
4379a82bda9Swangpc  %43 = icmp ult i64 0, 0
4389a82bda9Swangpc  %44 = or i1 %42, %43
4399a82bda9Swangpc  %45 = icmp ult i64 %1, 0
4409a82bda9Swangpc  %46 = or i1 %44, %45
4419a82bda9Swangpc  %47 = icmp ult i64 0, 0
4429a82bda9Swangpc  %48 = or i1 %46, %47
4439a82bda9Swangpc  %49 = icmp ult i64 0, 0
4449a82bda9Swangpc  %50 = or i1 %48, %49
4459a82bda9Swangpc  %51 = icmp ult i64 0, 0
4469a82bda9Swangpc  %52 = or i1 %50, %51
4479a82bda9Swangpc  %53 = icmp ult i64 0, 0
4489a82bda9Swangpc  %54 = or i1 %52, %53
4499a82bda9Swangpc  %55 = icmp ult i64 0, 0
4509a82bda9Swangpc  %56 = or i1 %54, %55
4519a82bda9Swangpc  %57 = icmp ult i64 0, 0
4529a82bda9Swangpc  %58 = or i1 %56, %57
4539a82bda9Swangpc  %59 = icmp ult i64 0, 0
4549a82bda9Swangpc  %60 = or i1 %58, %59
4559a82bda9Swangpc  %61 = icmp ult i64 0, 0
4569a82bda9Swangpc  %62 = or i1 %60, %61
4579a82bda9Swangpc  %63 = icmp ult i64 0, 0
4589a82bda9Swangpc  %64 = or i1 %62, %63
4599a82bda9Swangpc  br i1 %64, label %66, label %65
4609a82bda9Swangpc
4619a82bda9Swangpc65:                                               ; preds = %0
4629a82bda9Swangpc  ret void
4639a82bda9Swangpc
4649a82bda9Swangpc66:                                               ; preds = %0
4659a82bda9Swangpc  ret void
4669a82bda9Swangpc}
4679a82bda9Swangpc
46810625958SPhilip Reamesdefine i8 @reduce_xor(ptr %a, ptr %b) {
46910625958SPhilip Reames; CHECK-LABEL: @reduce_xor(
47010625958SPhilip Reames; CHECK-NEXT:  entry:
47110625958SPhilip Reames; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_BUF:%.*]], ptr [[A:%.*]], i64 0, i32 0, i64 0
47210625958SPhilip Reames; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_BUF]], ptr [[B:%.*]], i64 0, i32 0, i64 0
47310625958SPhilip Reames; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 1
47410625958SPhilip Reames; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[ARRAYIDX3]], align 1
47510625958SPhilip Reames; CHECK-NEXT:    [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[TMP0]]
47610625958SPhilip Reames; CHECK-NEXT:    [[TMP3:%.*]] = call i8 @llvm.vector.reduce.xor.v8i8(<8 x i8> [[TMP2]])
47710625958SPhilip Reames; CHECK-NEXT:    [[OP_RDX:%.*]] = xor i8 [[TMP3]], 1
47810625958SPhilip Reames; CHECK-NEXT:    ret i8 [[OP_RDX]]
47910625958SPhilip Reames;
48010625958SPhilip Reamesentry:
48110625958SPhilip Reames  %arrayidx = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 0
4822d69827cSNikita Popov  %0 = load i8, ptr %arrayidx, align 1
48310625958SPhilip Reames  %arrayidx3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 0
4842d69827cSNikita Popov  %1 = load i8, ptr %arrayidx3, align 1
48510625958SPhilip Reames  %and12 = and i8 %1, %0
48610625958SPhilip Reames  %arrayidx.1 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 1
4872d69827cSNikita Popov  %2 = load i8, ptr %arrayidx.1, align 1
48810625958SPhilip Reames  %arrayidx3.1 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 1
4892d69827cSNikita Popov  %3 = load i8, ptr %arrayidx3.1, align 1
49010625958SPhilip Reames  %and12.1 = and i8 %3, %2
49110625958SPhilip Reames  %4 = xor i8 %and12, %and12.1
49210625958SPhilip Reames  %arrayidx.2 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 2
4932d69827cSNikita Popov  %5 = load i8, ptr %arrayidx.2, align 1
49410625958SPhilip Reames  %arrayidx3.2 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 2
4952d69827cSNikita Popov  %6 = load i8, ptr %arrayidx3.2, align 1
49610625958SPhilip Reames  %and12.2 = and i8 %6, %5
49710625958SPhilip Reames  %7 = xor i8 %4, %and12.2
49810625958SPhilip Reames  %arrayidx.3 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 3
4992d69827cSNikita Popov  %8 = load i8, ptr %arrayidx.3, align 1
50010625958SPhilip Reames  %arrayidx3.3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 3
5012d69827cSNikita Popov  %9 = load i8, ptr %arrayidx3.3, align 1
50210625958SPhilip Reames  %and12.3 = and i8 %9, %8
50310625958SPhilip Reames  %10 = xor i8 %7, %and12.3
50410625958SPhilip Reames  %arrayidx.4 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 4
5052d69827cSNikita Popov  %11 = load i8, ptr %arrayidx.4, align 1
50610625958SPhilip Reames  %arrayidx3.4 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 4
5072d69827cSNikita Popov  %12 = load i8, ptr %arrayidx3.4, align 1
50810625958SPhilip Reames  %and12.4 = and i8 %12, %11
50910625958SPhilip Reames  %13 = xor i8 %10, %and12.4
51010625958SPhilip Reames  %arrayidx.5 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 5
5112d69827cSNikita Popov  %14 = load i8, ptr %arrayidx.5, align 1
51210625958SPhilip Reames  %arrayidx3.5 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 5
5132d69827cSNikita Popov  %15 = load i8, ptr %arrayidx3.5, align 1
51410625958SPhilip Reames  %and12.5 = and i8 %15, %14
51510625958SPhilip Reames  %16 = xor i8 %13, %and12.5
51610625958SPhilip Reames  %arrayidx.6 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 6
5172d69827cSNikita Popov  %17 = load i8, ptr %arrayidx.6, align 1
51810625958SPhilip Reames  %arrayidx3.6 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 6
5192d69827cSNikita Popov  %18 = load i8, ptr %arrayidx3.6, align 1
52010625958SPhilip Reames  %and12.6 = and i8 %18, %17
52110625958SPhilip Reames  %19 = xor i8 %16, %and12.6
52210625958SPhilip Reames  %arrayidx.7 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 7
5232d69827cSNikita Popov  %20 = load i8, ptr %arrayidx.7, align 1
52410625958SPhilip Reames  %arrayidx3.7 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 7
5252d69827cSNikita Popov  %21 = load i8, ptr %arrayidx3.7, align 1
52610625958SPhilip Reames  %and12.7 = and i8 %21, %20
52710625958SPhilip Reames  %22 = xor i8 %19, %and12.7
52810625958SPhilip Reames  %xor13.7 = xor i8 %22, 1
52910625958SPhilip Reames  ret i8 %xor13.7
53010625958SPhilip Reames}
53110625958SPhilip Reames
53210625958SPhilip Reames
53310625958SPhilip Reames
53410625958SPhilip Reamesdefine i8 @reduce_add(ptr %a, ptr %b) {
53510625958SPhilip Reames; CHECK-LABEL: @reduce_add(
53610625958SPhilip Reames; CHECK-NEXT:  entry:
53710625958SPhilip Reames; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_BUF:%.*]], ptr [[A:%.*]], i64 0, i32 0, i64 0
53810625958SPhilip Reames; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_BUF]], ptr [[B:%.*]], i64 0, i32 0, i64 0
53910625958SPhilip Reames; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 1
54010625958SPhilip Reames; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[ARRAYIDX3]], align 1
54110625958SPhilip Reames; CHECK-NEXT:    [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[TMP0]]
54210625958SPhilip Reames; CHECK-NEXT:    [[TMP3:%.*]] = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> [[TMP2]])
54310625958SPhilip Reames; CHECK-NEXT:    [[OP_RDX:%.*]] = add i8 [[TMP3]], 1
54410625958SPhilip Reames; CHECK-NEXT:    ret i8 [[OP_RDX]]
54510625958SPhilip Reames;
54610625958SPhilip Reamesentry:
54710625958SPhilip Reames  %arrayidx = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 0
5482d69827cSNikita Popov  %0 = load i8, ptr %arrayidx, align 1
54910625958SPhilip Reames  %arrayidx3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 0
5502d69827cSNikita Popov  %1 = load i8, ptr %arrayidx3, align 1
55110625958SPhilip Reames  %and12 = and i8 %1, %0
55210625958SPhilip Reames  %arrayidx.1 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 1
5532d69827cSNikita Popov  %2 = load i8, ptr %arrayidx.1, align 1
55410625958SPhilip Reames  %arrayidx3.1 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 1
5552d69827cSNikita Popov  %3 = load i8, ptr %arrayidx3.1, align 1
55610625958SPhilip Reames  %and12.1 = and i8 %3, %2
55710625958SPhilip Reames  %4 = add i8 %and12, %and12.1
55810625958SPhilip Reames  %arrayidx.2 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 2
5592d69827cSNikita Popov  %5 = load i8, ptr %arrayidx.2, align 1
56010625958SPhilip Reames  %arrayidx3.2 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 2
5612d69827cSNikita Popov  %6 = load i8, ptr %arrayidx3.2, align 1
56210625958SPhilip Reames  %and12.2 = and i8 %6, %5
56310625958SPhilip Reames  %7 = add i8 %4, %and12.2
56410625958SPhilip Reames  %arrayidx.3 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 3
5652d69827cSNikita Popov  %8 = load i8, ptr %arrayidx.3, align 1
56610625958SPhilip Reames  %arrayidx3.3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 3
5672d69827cSNikita Popov  %9 = load i8, ptr %arrayidx3.3, align 1
56810625958SPhilip Reames  %and12.3 = and i8 %9, %8
56910625958SPhilip Reames  %10 = add i8 %7, %and12.3
57010625958SPhilip Reames  %arrayidx.4 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 4
5712d69827cSNikita Popov  %11 = load i8, ptr %arrayidx.4, align 1
57210625958SPhilip Reames  %arrayidx3.4 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 4
5732d69827cSNikita Popov  %12 = load i8, ptr %arrayidx3.4, align 1
57410625958SPhilip Reames  %and12.4 = and i8 %12, %11
57510625958SPhilip Reames  %13 = add i8 %10, %and12.4
57610625958SPhilip Reames  %arrayidx.5 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 5
5772d69827cSNikita Popov  %14 = load i8, ptr %arrayidx.5, align 1
57810625958SPhilip Reames  %arrayidx3.5 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 5
5792d69827cSNikita Popov  %15 = load i8, ptr %arrayidx3.5, align 1
58010625958SPhilip Reames  %and12.5 = and i8 %15, %14
58110625958SPhilip Reames  %16 = add i8 %13, %and12.5
58210625958SPhilip Reames  %arrayidx.6 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 6
5832d69827cSNikita Popov  %17 = load i8, ptr %arrayidx.6, align 1
58410625958SPhilip Reames  %arrayidx3.6 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 6
5852d69827cSNikita Popov  %18 = load i8, ptr %arrayidx3.6, align 1
58610625958SPhilip Reames  %and12.6 = and i8 %18, %17
58710625958SPhilip Reames  %19 = add i8 %16, %and12.6
58810625958SPhilip Reames  %arrayidx.7 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 7
5892d69827cSNikita Popov  %20 = load i8, ptr %arrayidx.7, align 1
59010625958SPhilip Reames  %arrayidx3.7 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 7
5912d69827cSNikita Popov  %21 = load i8, ptr %arrayidx3.7, align 1
59210625958SPhilip Reames  %and12.7 = and i8 %21, %20
59310625958SPhilip Reames  %22 = add i8 %19, %and12.7
59410625958SPhilip Reames  %add13.7 = add i8 %22, 1
59510625958SPhilip Reames  ret i8 %add13.7
59610625958SPhilip Reames}
59710625958SPhilip Reames
59872ce9d1cSBen Shideclare i8 @llvm.smin.i8(i8, i8)
59972ce9d1cSBen Shi
60072ce9d1cSBen Shidefine i8 @reduce_smin(ptr %a, ptr %b) {
60172ce9d1cSBen Shi; CHECK-LABEL: @reduce_smin(
60272ce9d1cSBen Shi; CHECK-NEXT:  entry:
60372ce9d1cSBen Shi; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_BUF:%.*]], ptr [[A:%.*]], i64 0, i32 0, i64 0
60472ce9d1cSBen Shi; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_BUF]], ptr [[B:%.*]], i64 0, i32 0, i64 0
60572ce9d1cSBen Shi; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 1
60672ce9d1cSBen Shi; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[ARRAYIDX3]], align 1
60772ce9d1cSBen Shi; CHECK-NEXT:    [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[TMP0]]
60872ce9d1cSBen Shi; CHECK-NEXT:    [[TMP3:%.*]] = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> [[TMP2]])
60972ce9d1cSBen Shi; CHECK-NEXT:    ret i8 [[TMP3]]
61072ce9d1cSBen Shi;
61172ce9d1cSBen Shientry:
61272ce9d1cSBen Shi  %arrayidx = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 0
6132d69827cSNikita Popov  %0 = load i8, ptr %arrayidx, align 1
61472ce9d1cSBen Shi  %arrayidx3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 0
6152d69827cSNikita Popov  %1 = load i8, ptr %arrayidx3, align 1
61672ce9d1cSBen Shi  %and12 = and i8 %1, %0
61772ce9d1cSBen Shi  %arrayidx.1 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 1
6182d69827cSNikita Popov  %2 = load i8, ptr %arrayidx.1, align 1
61972ce9d1cSBen Shi  %arrayidx3.1 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 1
6202d69827cSNikita Popov  %3 = load i8, ptr %arrayidx3.1, align 1
62172ce9d1cSBen Shi  %and12.1 = and i8 %3, %2
62272ce9d1cSBen Shi  %4 = tail call i8 @llvm.smin.i8(i8 %and12, i8 %and12.1)
62372ce9d1cSBen Shi  %arrayidx.2 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 2
6242d69827cSNikita Popov  %5 = load i8, ptr %arrayidx.2, align 1
62572ce9d1cSBen Shi  %arrayidx3.2 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 2
6262d69827cSNikita Popov  %6 = load i8, ptr %arrayidx3.2, align 1
62772ce9d1cSBen Shi  %and12.2 = and i8 %6, %5
62872ce9d1cSBen Shi  %7 = tail call i8 @llvm.smin.i8(i8 %4, i8 %and12.2)
62972ce9d1cSBen Shi  %arrayidx.3 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 3
6302d69827cSNikita Popov  %8 = load i8, ptr %arrayidx.3, align 1
63172ce9d1cSBen Shi  %arrayidx3.3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 3
6322d69827cSNikita Popov  %9 = load i8, ptr %arrayidx3.3, align 1
63372ce9d1cSBen Shi  %and12.3 = and i8 %9, %8
63472ce9d1cSBen Shi  %10 = tail call i8 @llvm.smin.i8(i8 %7, i8 %and12.3)
63572ce9d1cSBen Shi  %arrayidx.4 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 4
6362d69827cSNikita Popov  %11 = load i8, ptr %arrayidx.4, align 1
63772ce9d1cSBen Shi  %arrayidx3.4 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 4
6382d69827cSNikita Popov  %12 = load i8, ptr %arrayidx3.4, align 1
63972ce9d1cSBen Shi  %and12.4 = and i8 %12, %11
64072ce9d1cSBen Shi  %13 = tail call i8 @llvm.smin.i8(i8 %10, i8 %and12.4)
64172ce9d1cSBen Shi  %arrayidx.5 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 5
6422d69827cSNikita Popov  %14 = load i8, ptr %arrayidx.5, align 1
64372ce9d1cSBen Shi  %arrayidx3.5 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 5
6442d69827cSNikita Popov  %15 = load i8, ptr %arrayidx3.5, align 1
64572ce9d1cSBen Shi  %and12.5 = and i8 %15, %14
64672ce9d1cSBen Shi  %16 = tail call i8 @llvm.smin.i8(i8 %13, i8 %and12.5)
64772ce9d1cSBen Shi  %arrayidx.6 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 6
6482d69827cSNikita Popov  %17 = load i8, ptr %arrayidx.6, align 1
64972ce9d1cSBen Shi  %arrayidx3.6 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 6
6502d69827cSNikita Popov  %18 = load i8, ptr %arrayidx3.6, align 1
65172ce9d1cSBen Shi  %and12.6 = and i8 %18, %17
65272ce9d1cSBen Shi  %19 = tail call i8 @llvm.smin.i8(i8 %16, i8 %and12.6)
65372ce9d1cSBen Shi  %arrayidx.7 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 7
6542d69827cSNikita Popov  %20 = load i8, ptr %arrayidx.7, align 1
65572ce9d1cSBen Shi  %arrayidx3.7 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 7
6562d69827cSNikita Popov  %21 = load i8, ptr %arrayidx3.7, align 1
65772ce9d1cSBen Shi  %and12.7 = and i8 %21, %20
65872ce9d1cSBen Shi  %22 = tail call i8 @llvm.smin.i8(i8 %19, i8 %and12.7)
65972ce9d1cSBen Shi  ret i8 %22
66072ce9d1cSBen Shi}
66172ce9d1cSBen Shi
66272ce9d1cSBen Shideclare i8 @llvm.smax.i8(i8, i8)
66372ce9d1cSBen Shi
66472ce9d1cSBen Shidefine i8 @reduce_smax(ptr %a, ptr %b) {
66572ce9d1cSBen Shi; CHECK-LABEL: @reduce_smax(
66672ce9d1cSBen Shi; CHECK-NEXT:  entry:
66772ce9d1cSBen Shi; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_BUF:%.*]], ptr [[A:%.*]], i64 0, i32 0, i64 0
66872ce9d1cSBen Shi; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_BUF]], ptr [[B:%.*]], i64 0, i32 0, i64 0
66972ce9d1cSBen Shi; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 1
67072ce9d1cSBen Shi; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[ARRAYIDX3]], align 1
67172ce9d1cSBen Shi; CHECK-NEXT:    [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[TMP0]]
67272ce9d1cSBen Shi; CHECK-NEXT:    [[TMP3:%.*]] = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> [[TMP2]])
67372ce9d1cSBen Shi; CHECK-NEXT:    ret i8 [[TMP3]]
67472ce9d1cSBen Shi;
67572ce9d1cSBen Shientry:
67672ce9d1cSBen Shi  %arrayidx = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 0
6772d69827cSNikita Popov  %0 = load i8, ptr %arrayidx, align 1
67872ce9d1cSBen Shi  %arrayidx3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 0
6792d69827cSNikita Popov  %1 = load i8, ptr %arrayidx3, align 1
68072ce9d1cSBen Shi  %and12 = and i8 %1, %0
68172ce9d1cSBen Shi  %arrayidx.1 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 1
6822d69827cSNikita Popov  %2 = load i8, ptr %arrayidx.1, align 1
68372ce9d1cSBen Shi  %arrayidx3.1 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 1
6842d69827cSNikita Popov  %3 = load i8, ptr %arrayidx3.1, align 1
68572ce9d1cSBen Shi  %and12.1 = and i8 %3, %2
68672ce9d1cSBen Shi  %4 = tail call i8 @llvm.smax.i8(i8 %and12, i8 %and12.1)
68772ce9d1cSBen Shi  %arrayidx.2 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 2
6882d69827cSNikita Popov  %5 = load i8, ptr %arrayidx.2, align 1
68972ce9d1cSBen Shi  %arrayidx3.2 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 2
6902d69827cSNikita Popov  %6 = load i8, ptr %arrayidx3.2, align 1
69172ce9d1cSBen Shi  %and12.2 = and i8 %6, %5
69272ce9d1cSBen Shi  %7 = tail call i8 @llvm.smax.i8(i8 %4, i8 %and12.2)
69372ce9d1cSBen Shi  %arrayidx.3 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 3
6942d69827cSNikita Popov  %8 = load i8, ptr %arrayidx.3, align 1
69572ce9d1cSBen Shi  %arrayidx3.3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 3
6962d69827cSNikita Popov  %9 = load i8, ptr %arrayidx3.3, align 1
69772ce9d1cSBen Shi  %and12.3 = and i8 %9, %8
69872ce9d1cSBen Shi  %10 = tail call i8 @llvm.smax.i8(i8 %7, i8 %and12.3)
69972ce9d1cSBen Shi  %arrayidx.4 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 4
7002d69827cSNikita Popov  %11 = load i8, ptr %arrayidx.4, align 1
70172ce9d1cSBen Shi  %arrayidx3.4 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 4
7022d69827cSNikita Popov  %12 = load i8, ptr %arrayidx3.4, align 1
70372ce9d1cSBen Shi  %and12.4 = and i8 %12, %11
70472ce9d1cSBen Shi  %13 = tail call i8 @llvm.smax.i8(i8 %10, i8 %and12.4)
70572ce9d1cSBen Shi  %arrayidx.5 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 5
7062d69827cSNikita Popov  %14 = load i8, ptr %arrayidx.5, align 1
70772ce9d1cSBen Shi  %arrayidx3.5 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 5
7082d69827cSNikita Popov  %15 = load i8, ptr %arrayidx3.5, align 1
70972ce9d1cSBen Shi  %and12.5 = and i8 %15, %14
71072ce9d1cSBen Shi  %16 = tail call i8 @llvm.smax.i8(i8 %13, i8 %and12.5)
71172ce9d1cSBen Shi  %arrayidx.6 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 6
7122d69827cSNikita Popov  %17 = load i8, ptr %arrayidx.6, align 1
71372ce9d1cSBen Shi  %arrayidx3.6 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 6
7142d69827cSNikita Popov  %18 = load i8, ptr %arrayidx3.6, align 1
71572ce9d1cSBen Shi  %and12.6 = and i8 %18, %17
71672ce9d1cSBen Shi  %19 = tail call i8 @llvm.smax.i8(i8 %16, i8 %and12.6)
71772ce9d1cSBen Shi  %arrayidx.7 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 7
7182d69827cSNikita Popov  %20 = load i8, ptr %arrayidx.7, align 1
71972ce9d1cSBen Shi  %arrayidx3.7 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 7
7202d69827cSNikita Popov  %21 = load i8, ptr %arrayidx3.7, align 1
72172ce9d1cSBen Shi  %and12.7 = and i8 %21, %20
72272ce9d1cSBen Shi  %22 = tail call i8 @llvm.smax.i8(i8 %19, i8 %and12.7)
72372ce9d1cSBen Shi  ret i8 %22
72472ce9d1cSBen Shi}
72572ce9d1cSBen Shi
72672ce9d1cSBen Shideclare i8 @llvm.umax.i8(i8, i8)
72772ce9d1cSBen Shi
72872ce9d1cSBen Shidefine i8 @reduce_umax(ptr %a, ptr %b) {
72972ce9d1cSBen Shi; CHECK-LABEL: @reduce_umax(
73072ce9d1cSBen Shi; CHECK-NEXT:  entry:
73172ce9d1cSBen Shi; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_BUF:%.*]], ptr [[A:%.*]], i64 0, i32 0, i64 0
73272ce9d1cSBen Shi; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_BUF]], ptr [[B:%.*]], i64 0, i32 0, i64 0
73372ce9d1cSBen Shi; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 1
73472ce9d1cSBen Shi; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[ARRAYIDX3]], align 1
73572ce9d1cSBen Shi; CHECK-NEXT:    [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[TMP0]]
73672ce9d1cSBen Shi; CHECK-NEXT:    [[TMP3:%.*]] = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> [[TMP2]])
73772ce9d1cSBen Shi; CHECK-NEXT:    ret i8 [[TMP3]]
73872ce9d1cSBen Shi;
73972ce9d1cSBen Shientry:
74072ce9d1cSBen Shi  %arrayidx = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 0
7412d69827cSNikita Popov  %0 = load i8, ptr %arrayidx, align 1
74272ce9d1cSBen Shi  %arrayidx3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 0
7432d69827cSNikita Popov  %1 = load i8, ptr %arrayidx3, align 1
74472ce9d1cSBen Shi  %and12 = and i8 %1, %0
74572ce9d1cSBen Shi  %arrayidx.1 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 1
7462d69827cSNikita Popov  %2 = load i8, ptr %arrayidx.1, align 1
74772ce9d1cSBen Shi  %arrayidx3.1 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 1
7482d69827cSNikita Popov  %3 = load i8, ptr %arrayidx3.1, align 1
74972ce9d1cSBen Shi  %and12.1 = and i8 %3, %2
75072ce9d1cSBen Shi  %4 = tail call i8 @llvm.umax.i8(i8 %and12, i8 %and12.1)
75172ce9d1cSBen Shi  %arrayidx.2 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 2
7522d69827cSNikita Popov  %5 = load i8, ptr %arrayidx.2, align 1
75372ce9d1cSBen Shi  %arrayidx3.2 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 2
7542d69827cSNikita Popov  %6 = load i8, ptr %arrayidx3.2, align 1
75572ce9d1cSBen Shi  %and12.2 = and i8 %6, %5
75672ce9d1cSBen Shi  %7 = tail call i8 @llvm.umax.i8(i8 %4, i8 %and12.2)
75772ce9d1cSBen Shi  %arrayidx.3 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 3
7582d69827cSNikita Popov  %8 = load i8, ptr %arrayidx.3, align 1
75972ce9d1cSBen Shi  %arrayidx3.3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 3
7602d69827cSNikita Popov  %9 = load i8, ptr %arrayidx3.3, align 1
76172ce9d1cSBen Shi  %and12.3 = and i8 %9, %8
76272ce9d1cSBen Shi  %10 = tail call i8 @llvm.umax.i8(i8 %7, i8 %and12.3)
76372ce9d1cSBen Shi  %arrayidx.4 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 4
7642d69827cSNikita Popov  %11 = load i8, ptr %arrayidx.4, align 1
76572ce9d1cSBen Shi  %arrayidx3.4 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 4
7662d69827cSNikita Popov  %12 = load i8, ptr %arrayidx3.4, align 1
76772ce9d1cSBen Shi  %and12.4 = and i8 %12, %11
76872ce9d1cSBen Shi  %13 = tail call i8 @llvm.umax.i8(i8 %10, i8 %and12.4)
76972ce9d1cSBen Shi  %arrayidx.5 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 5
7702d69827cSNikita Popov  %14 = load i8, ptr %arrayidx.5, align 1
77172ce9d1cSBen Shi  %arrayidx3.5 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 5
7722d69827cSNikita Popov  %15 = load i8, ptr %arrayidx3.5, align 1
77372ce9d1cSBen Shi  %and12.5 = and i8 %15, %14
77472ce9d1cSBen Shi  %16 = tail call i8 @llvm.umax.i8(i8 %13, i8 %and12.5)
77572ce9d1cSBen Shi  %arrayidx.6 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 6
7762d69827cSNikita Popov  %17 = load i8, ptr %arrayidx.6, align 1
77772ce9d1cSBen Shi  %arrayidx3.6 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 6
7782d69827cSNikita Popov  %18 = load i8, ptr %arrayidx3.6, align 1
77972ce9d1cSBen Shi  %and12.6 = and i8 %18, %17
78072ce9d1cSBen Shi  %19 = tail call i8 @llvm.umax.i8(i8 %16, i8 %and12.6)
78172ce9d1cSBen Shi  %arrayidx.7 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 7
7822d69827cSNikita Popov  %20 = load i8, ptr %arrayidx.7, align 1
78372ce9d1cSBen Shi  %arrayidx3.7 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 7
7842d69827cSNikita Popov  %21 = load i8, ptr %arrayidx3.7, align 1
78572ce9d1cSBen Shi  %and12.7 = and i8 %21, %20
78672ce9d1cSBen Shi  %22 = tail call i8 @llvm.umax.i8(i8 %19, i8 %and12.7)
78772ce9d1cSBen Shi  ret i8 %22
78872ce9d1cSBen Shi}
78972ce9d1cSBen Shi
79072ce9d1cSBen Shideclare i8 @llvm.umin.i8(i8, i8)
79172ce9d1cSBen Shi
79272ce9d1cSBen Shidefine i8 @reduce_umin(ptr %a, ptr %b) {
79372ce9d1cSBen Shi; CHECK-LABEL: @reduce_umin(
79472ce9d1cSBen Shi; CHECK-NEXT:  entry:
79572ce9d1cSBen Shi; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [[STRUCT_BUF:%.*]], ptr [[A:%.*]], i64 0, i32 0, i64 0
79672ce9d1cSBen Shi; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds [[STRUCT_BUF]], ptr [[B:%.*]], i64 0, i32 0, i64 0
79772ce9d1cSBen Shi; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i8>, ptr [[ARRAYIDX]], align 1
79872ce9d1cSBen Shi; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i8>, ptr [[ARRAYIDX3]], align 1
79972ce9d1cSBen Shi; CHECK-NEXT:    [[TMP2:%.*]] = and <8 x i8> [[TMP1]], [[TMP0]]
80072ce9d1cSBen Shi; CHECK-NEXT:    [[TMP3:%.*]] = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> [[TMP2]])
80172ce9d1cSBen Shi; CHECK-NEXT:    ret i8 [[TMP3]]
80272ce9d1cSBen Shi;
80372ce9d1cSBen Shientry:
80472ce9d1cSBen Shi  %arrayidx = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 0
8052d69827cSNikita Popov  %0 = load i8, ptr %arrayidx, align 1
80672ce9d1cSBen Shi  %arrayidx3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 0
8072d69827cSNikita Popov  %1 = load i8, ptr %arrayidx3, align 1
80872ce9d1cSBen Shi  %and12 = and i8 %1, %0
80972ce9d1cSBen Shi  %arrayidx.1 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 1
8102d69827cSNikita Popov  %2 = load i8, ptr %arrayidx.1, align 1
81172ce9d1cSBen Shi  %arrayidx3.1 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 1
8122d69827cSNikita Popov  %3 = load i8, ptr %arrayidx3.1, align 1
81372ce9d1cSBen Shi  %and12.1 = and i8 %3, %2
81472ce9d1cSBen Shi  %4 = tail call i8 @llvm.umin.i8(i8 %and12, i8 %and12.1)
81572ce9d1cSBen Shi  %arrayidx.2 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 2
8162d69827cSNikita Popov  %5 = load i8, ptr %arrayidx.2, align 1
81772ce9d1cSBen Shi  %arrayidx3.2 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 2
8182d69827cSNikita Popov  %6 = load i8, ptr %arrayidx3.2, align 1
81972ce9d1cSBen Shi  %and12.2 = and i8 %6, %5
82072ce9d1cSBen Shi  %7 = tail call i8 @llvm.umin.i8(i8 %4, i8 %and12.2)
82172ce9d1cSBen Shi  %arrayidx.3 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 3
8222d69827cSNikita Popov  %8 = load i8, ptr %arrayidx.3, align 1
82372ce9d1cSBen Shi  %arrayidx3.3 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 3
8242d69827cSNikita Popov  %9 = load i8, ptr %arrayidx3.3, align 1
82572ce9d1cSBen Shi  %and12.3 = and i8 %9, %8
82672ce9d1cSBen Shi  %10 = tail call i8 @llvm.umin.i8(i8 %7, i8 %and12.3)
82772ce9d1cSBen Shi  %arrayidx.4 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 4
8282d69827cSNikita Popov  %11 = load i8, ptr %arrayidx.4, align 1
82972ce9d1cSBen Shi  %arrayidx3.4 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 4
8302d69827cSNikita Popov  %12 = load i8, ptr %arrayidx3.4, align 1
83172ce9d1cSBen Shi  %and12.4 = and i8 %12, %11
83272ce9d1cSBen Shi  %13 = tail call i8 @llvm.umin.i8(i8 %10, i8 %and12.4)
83372ce9d1cSBen Shi  %arrayidx.5 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 5
8342d69827cSNikita Popov  %14 = load i8, ptr %arrayidx.5, align 1
83572ce9d1cSBen Shi  %arrayidx3.5 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 5
8362d69827cSNikita Popov  %15 = load i8, ptr %arrayidx3.5, align 1
83772ce9d1cSBen Shi  %and12.5 = and i8 %15, %14
83872ce9d1cSBen Shi  %16 = tail call i8 @llvm.umin.i8(i8 %13, i8 %and12.5)
83972ce9d1cSBen Shi  %arrayidx.6 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 6
8402d69827cSNikita Popov  %17 = load i8, ptr %arrayidx.6, align 1
84172ce9d1cSBen Shi  %arrayidx3.6 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 6
8422d69827cSNikita Popov  %18 = load i8, ptr %arrayidx3.6, align 1
84372ce9d1cSBen Shi  %and12.6 = and i8 %18, %17
84472ce9d1cSBen Shi  %19 = tail call i8 @llvm.umin.i8(i8 %16, i8 %and12.6)
84572ce9d1cSBen Shi  %arrayidx.7 = getelementptr inbounds %struct.buf, ptr %a, i64 0, i32 0, i64 7
8462d69827cSNikita Popov  %20 = load i8, ptr %arrayidx.7, align 1
84772ce9d1cSBen Shi  %arrayidx3.7 = getelementptr inbounds %struct.buf, ptr %b, i64 0, i32 0, i64 7
8482d69827cSNikita Popov  %21 = load i8, ptr %arrayidx3.7, align 1
84972ce9d1cSBen Shi  %and12.7 = and i8 %21, %20
85072ce9d1cSBen Shi  %22 = tail call i8 @llvm.umin.i8(i8 %19, i8 %and12.7)
85172ce9d1cSBen Shi  ret i8 %22
85272ce9d1cSBen Shi}
85310625958SPhilip Reames
85410625958SPhilip Reames; Next batch exercise reductions involing zext of narrower loads
85510625958SPhilip Reames
85610625958SPhilip Reamesdefine i64 @red_zext_ld_2xi64(ptr %ptr) {
85710625958SPhilip Reames; CHECK-LABEL: @red_zext_ld_2xi64(
85810625958SPhilip Reames; CHECK-NEXT:  entry:
85910625958SPhilip Reames; CHECK-NEXT:    [[LD0:%.*]] = load i8, ptr [[PTR:%.*]], align 1
86010625958SPhilip Reames; CHECK-NEXT:    [[ZEXT:%.*]] = zext i8 [[LD0]] to i64
86110625958SPhilip Reames; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 1
86210625958SPhilip Reames; CHECK-NEXT:    [[LD1:%.*]] = load i8, ptr [[GEP]], align 1
86310625958SPhilip Reames; CHECK-NEXT:    [[ZEXT_1:%.*]] = zext i8 [[LD1]] to i64
86410625958SPhilip Reames; CHECK-NEXT:    [[ADD_1:%.*]] = add nuw nsw i64 [[ZEXT]], [[ZEXT_1]]
86510625958SPhilip Reames; CHECK-NEXT:    ret i64 [[ADD_1]]
86610625958SPhilip Reames;
86710625958SPhilip Reamesentry:
86810625958SPhilip Reames  %ld0 = load i8, ptr %ptr
86910625958SPhilip Reames  %zext = zext i8 %ld0 to i64
87010625958SPhilip Reames  %gep = getelementptr inbounds i8, ptr %ptr, i64 1
87110625958SPhilip Reames  %ld1 = load i8, ptr %gep
87210625958SPhilip Reames  %zext.1 = zext i8 %ld1 to i64
87310625958SPhilip Reames  %add.1 = add nuw nsw i64 %zext, %zext.1
87410625958SPhilip Reames  ret i64 %add.1
87510625958SPhilip Reames}
87610625958SPhilip Reames
87710625958SPhilip Reamesdefine i64 @red_zext_ld_4xi64(ptr %ptr) {
87810625958SPhilip Reames; CHECK-LABEL: @red_zext_ld_4xi64(
87910625958SPhilip Reames; CHECK-NEXT:  entry:
880*7523086aSAlexey Bataev; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i8>, ptr [[PTR:%.*]], align 1
881*7523086aSAlexey Bataev; CHECK-NEXT:    [[TMP1:%.*]] = zext <4 x i8> [[TMP0]] to <4 x i16>
882*7523086aSAlexey Bataev; CHECK-NEXT:    [[TMP2:%.*]] = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> [[TMP1]])
883*7523086aSAlexey Bataev; CHECK-NEXT:    [[ADD_3:%.*]] = zext i16 [[TMP2]] to i64
884a9888211SAlexey Bataev; CHECK-NEXT:    ret i64 [[ADD_3]]
88510625958SPhilip Reames;
88610625958SPhilip Reamesentry:
88710625958SPhilip Reames  %ld0 = load i8, ptr %ptr
88810625958SPhilip Reames  %zext = zext i8 %ld0 to i64
88910625958SPhilip Reames  %gep = getelementptr inbounds i8, ptr %ptr, i64 1
89010625958SPhilip Reames  %ld1 = load i8, ptr %gep
89110625958SPhilip Reames  %zext.1 = zext i8 %ld1 to i64
89210625958SPhilip Reames  %add.1 = add nuw nsw i64 %zext, %zext.1
89310625958SPhilip Reames  %gep.1 = getelementptr inbounds i8, ptr %ptr, i64 2
89410625958SPhilip Reames  %ld2 = load i8, ptr %gep.1
89510625958SPhilip Reames  %zext.2 = zext i8 %ld2 to i64
89610625958SPhilip Reames  %add.2 = add nuw nsw i64 %add.1, %zext.2
89710625958SPhilip Reames  %gep.2 = getelementptr inbounds i8, ptr %ptr, i64 3
89810625958SPhilip Reames  %ld3 = load i8, ptr %gep.2
89910625958SPhilip Reames  %zext.3 = zext i8 %ld3 to i64
90010625958SPhilip Reames  %add.3 = add nuw nsw i64 %add.2, %zext.3
90110625958SPhilip Reames  ret i64 %add.3
90210625958SPhilip Reames}
90310625958SPhilip Reames
90410625958SPhilip Reamesdefine i64 @red_zext_ld_8xi64(ptr %ptr) {
90510625958SPhilip Reames; CHECK-LABEL: @red_zext_ld_8xi64(
90610625958SPhilip Reames; CHECK-NEXT:  entry:
90710625958SPhilip Reames; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i8>, ptr [[PTR:%.*]], align 1
90810625958SPhilip Reames; CHECK-NEXT:    [[TMP1:%.*]] = zext <8 x i8> [[TMP0]] to <8 x i64>
90910625958SPhilip Reames; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vector.reduce.add.v8i64(<8 x i64> [[TMP1]])
91010625958SPhilip Reames; CHECK-NEXT:    ret i64 [[TMP2]]
91110625958SPhilip Reames;
91210625958SPhilip Reamesentry:
91310625958SPhilip Reames  %ld0 = load i8, ptr %ptr
91410625958SPhilip Reames  %zext = zext i8 %ld0 to i64
91510625958SPhilip Reames  %gep = getelementptr inbounds i8, ptr %ptr, i64 1
91610625958SPhilip Reames  %ld1 = load i8, ptr %gep
91710625958SPhilip Reames  %zext.1 = zext i8 %ld1 to i64
91810625958SPhilip Reames  %add.1 = add nuw nsw i64 %zext, %zext.1
91910625958SPhilip Reames  %gep.1 = getelementptr inbounds i8, ptr %ptr, i64 2
92010625958SPhilip Reames  %ld2 = load i8, ptr %gep.1
92110625958SPhilip Reames  %zext.2 = zext i8 %ld2 to i64
92210625958SPhilip Reames  %add.2 = add nuw nsw i64 %add.1, %zext.2
92310625958SPhilip Reames  %gep.2 = getelementptr inbounds i8, ptr %ptr, i64 3
92410625958SPhilip Reames  %ld3 = load i8, ptr %gep.2
92510625958SPhilip Reames  %zext.3 = zext i8 %ld3 to i64
92610625958SPhilip Reames  %add.3 = add nuw nsw i64 %add.2, %zext.3
92710625958SPhilip Reames  %gep.3 = getelementptr inbounds i8, ptr %ptr, i64 4
92810625958SPhilip Reames  %ld4 = load i8, ptr %gep.3
92910625958SPhilip Reames  %zext.4 = zext i8 %ld4 to i64
93010625958SPhilip Reames  %add.4 = add nuw nsw i64 %add.3, %zext.4
93110625958SPhilip Reames  %gep.4 = getelementptr inbounds i8, ptr %ptr, i64 5
93210625958SPhilip Reames  %ld5 = load i8, ptr %gep.4
93310625958SPhilip Reames  %zext.5 = zext i8 %ld5 to i64
93410625958SPhilip Reames  %add.5 = add nuw nsw i64 %add.4, %zext.5
93510625958SPhilip Reames  %gep.5 = getelementptr inbounds i8, ptr %ptr, i64 6
93610625958SPhilip Reames  %ld6 = load i8, ptr %gep.5
93710625958SPhilip Reames  %zext.6 = zext i8 %ld6 to i64
93810625958SPhilip Reames  %add.6 = add nuw nsw i64 %add.5, %zext.6
93910625958SPhilip Reames  %gep.6 = getelementptr inbounds i8, ptr %ptr, i64 7
94010625958SPhilip Reames  %ld7 = load i8, ptr %gep.6
94110625958SPhilip Reames  %zext.7 = zext i8 %ld7 to i64
94210625958SPhilip Reames  %add.7 = add nuw nsw i64 %add.6, %zext.7
94310625958SPhilip Reames  ret i64 %add.7
94410625958SPhilip Reames}
94510625958SPhilip Reames
94610625958SPhilip Reamesdefine i64 @red_zext_ld_16xi64(ptr %ptr) {
94710625958SPhilip Reames; CHECK-LABEL: @red_zext_ld_16xi64(
94810625958SPhilip Reames; CHECK-NEXT:  entry:
94910625958SPhilip Reames; CHECK-NEXT:    [[TMP0:%.*]] = load <16 x i8>, ptr [[PTR:%.*]], align 1
95010625958SPhilip Reames; CHECK-NEXT:    [[TMP1:%.*]] = zext <16 x i8> [[TMP0]] to <16 x i64>
95110625958SPhilip Reames; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @llvm.vector.reduce.add.v16i64(<16 x i64> [[TMP1]])
95210625958SPhilip Reames; CHECK-NEXT:    ret i64 [[TMP2]]
95310625958SPhilip Reames;
95410625958SPhilip Reamesentry:
95510625958SPhilip Reames  %ld0 = load i8, ptr %ptr
95610625958SPhilip Reames  %zext = zext i8 %ld0 to i64
95710625958SPhilip Reames  %gep = getelementptr inbounds i8, ptr %ptr, i64 1
95810625958SPhilip Reames  %ld1 = load i8, ptr %gep
95910625958SPhilip Reames  %zext.1 = zext i8 %ld1 to i64
96010625958SPhilip Reames  %add.1 = add nuw nsw i64 %zext, %zext.1
96110625958SPhilip Reames  %gep.1 = getelementptr inbounds i8, ptr %ptr, i64 2
96210625958SPhilip Reames  %ld2 = load i8, ptr %gep.1
96310625958SPhilip Reames  %zext.2 = zext i8 %ld2 to i64
96410625958SPhilip Reames  %add.2 = add nuw nsw i64 %add.1, %zext.2
96510625958SPhilip Reames  %gep.2 = getelementptr inbounds i8, ptr %ptr, i64 3
96610625958SPhilip Reames  %ld3 = load i8, ptr %gep.2
96710625958SPhilip Reames  %zext.3 = zext i8 %ld3 to i64
96810625958SPhilip Reames  %add.3 = add nuw nsw i64 %add.2, %zext.3
96910625958SPhilip Reames  %gep.3 = getelementptr inbounds i8, ptr %ptr, i64 4
97010625958SPhilip Reames  %ld4 = load i8, ptr %gep.3
97110625958SPhilip Reames  %zext.4 = zext i8 %ld4 to i64
97210625958SPhilip Reames  %add.4 = add nuw nsw i64 %add.3, %zext.4
97310625958SPhilip Reames  %gep.4 = getelementptr inbounds i8, ptr %ptr, i64 5
97410625958SPhilip Reames  %ld5 = load i8, ptr %gep.4
97510625958SPhilip Reames  %zext.5 = zext i8 %ld5 to i64
97610625958SPhilip Reames  %add.5 = add nuw nsw i64 %add.4, %zext.5
97710625958SPhilip Reames  %gep.5 = getelementptr inbounds i8, ptr %ptr, i64 6
97810625958SPhilip Reames  %ld6 = load i8, ptr %gep.5
97910625958SPhilip Reames  %zext.6 = zext i8 %ld6 to i64
98010625958SPhilip Reames  %add.6 = add nuw nsw i64 %add.5, %zext.6
98110625958SPhilip Reames  %gep.6 = getelementptr inbounds i8, ptr %ptr, i64 7
98210625958SPhilip Reames  %ld7 = load i8, ptr %gep.6
98310625958SPhilip Reames  %zext.7 = zext i8 %ld7 to i64
98410625958SPhilip Reames  %add.7 = add nuw nsw i64 %add.6, %zext.7
98510625958SPhilip Reames  %gep.7 = getelementptr inbounds i8, ptr %ptr, i64 8
98610625958SPhilip Reames  %ld8 = load i8, ptr %gep.7
98710625958SPhilip Reames  %zext.8 = zext i8 %ld8 to i64
98810625958SPhilip Reames  %add.8 = add nuw nsw i64 %add.7, %zext.8
98910625958SPhilip Reames  %gep.8 = getelementptr inbounds i8, ptr %ptr, i64 9
99010625958SPhilip Reames  %ld9 = load i8, ptr %gep.8
99110625958SPhilip Reames  %zext.9 = zext i8 %ld9 to i64
99210625958SPhilip Reames  %add.9 = add nuw nsw i64 %add.8, %zext.9
99310625958SPhilip Reames  %gep.9 = getelementptr inbounds i8, ptr %ptr, i64 10
99410625958SPhilip Reames  %ld10 = load i8, ptr %gep.9
99510625958SPhilip Reames  %zext.10 = zext i8 %ld10 to i64
99610625958SPhilip Reames  %add.10 = add nuw nsw i64 %add.9, %zext.10
99710625958SPhilip Reames  %gep.10 = getelementptr inbounds i8, ptr %ptr, i64 11
99810625958SPhilip Reames  %ld11 = load i8, ptr %gep.10
99910625958SPhilip Reames  %zext.11 = zext i8 %ld11 to i64
100010625958SPhilip Reames  %add.11 = add nuw nsw i64 %add.10, %zext.11
100110625958SPhilip Reames  %gep.11 = getelementptr inbounds i8, ptr %ptr, i64 12
100210625958SPhilip Reames  %ld12 = load i8, ptr %gep.11
100310625958SPhilip Reames  %zext.12 = zext i8 %ld12 to i64
100410625958SPhilip Reames  %add.12 = add nuw nsw i64 %add.11, %zext.12
100510625958SPhilip Reames  %gep.12 = getelementptr inbounds i8, ptr %ptr, i64 13
100610625958SPhilip Reames  %ld13 = load i8, ptr %gep.12
100710625958SPhilip Reames  %zext.13 = zext i8 %ld13 to i64
100810625958SPhilip Reames  %add.13 = add nuw nsw i64 %add.12, %zext.13
100910625958SPhilip Reames  %gep.13 = getelementptr inbounds i8, ptr %ptr, i64 14
101010625958SPhilip Reames  %ld14 = load i8, ptr %gep.13
101110625958SPhilip Reames  %zext.14 = zext i8 %ld14 to i64
101210625958SPhilip Reames  %add.14 = add nuw nsw i64 %add.13, %zext.14
101310625958SPhilip Reames  %gep.14 = getelementptr inbounds i8, ptr %ptr, i64 15
101410625958SPhilip Reames  %ld15 = load i8, ptr %gep.14
101510625958SPhilip Reames  %zext.15 = zext i8 %ld15 to i64
101610625958SPhilip Reames  %add.15 = add nuw nsw i64 %add.14, %zext.15
101710625958SPhilip Reames  ret i64 %add.15
101810625958SPhilip Reames}
101910625958SPhilip Reames
1020e69f8bacSLuke Laudeclare i32 @llvm.abs.i32(i32, i1)
102110625958SPhilip Reames
1022e69f8bacSLuke Laudefine i32 @stride_sum_abs_diff(ptr %p, ptr %q, i64 %stride) {
1023e69f8bacSLuke Lau; CHECK-LABEL: @stride_sum_abs_diff(
1024f3d2609aSAlexey Bataev; CHECK-NEXT:    [[P_2:%.*]] = getelementptr inbounds i32, ptr [[P:%.*]], i64 [[STRIDE:%.*]]
1025f3d2609aSAlexey Bataev; CHECK-NEXT:    [[Q_2:%.*]] = getelementptr inbounds i32, ptr [[Q:%.*]], i64 [[STRIDE]]
1026f3d2609aSAlexey Bataev; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[P]], align 4
1027f3d2609aSAlexey Bataev; CHECK-NEXT:    [[TMP2:%.*]] = load <2 x i32>, ptr [[Q]], align 4
1028f23ea4cbSLuke Lau; CHECK-NEXT:    [[TMP3:%.*]] = load <2 x i32>, ptr [[P_2]], align 4
1029f23ea4cbSLuke Lau; CHECK-NEXT:    [[TMP4:%.*]] = load <2 x i32>, ptr [[Q_2]], align 4
1030f3d2609aSAlexey Bataev; CHECK-NEXT:    [[TMP5:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> poison, <2 x i32> [[TMP1]], i64 0)
1031f3d2609aSAlexey Bataev; CHECK-NEXT:    [[TMP6:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP5]], <2 x i32> [[TMP3]], i64 2)
1032f3d2609aSAlexey Bataev; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> poison, <2 x i32> [[TMP2]], i64 0)
1033f3d2609aSAlexey Bataev; CHECK-NEXT:    [[TMP8:%.*]] = call <4 x i32> @llvm.vector.insert.v4i32.v2i32(<4 x i32> [[TMP7]], <2 x i32> [[TMP4]], i64 2)
1034f3d2609aSAlexey Bataev; CHECK-NEXT:    [[TMP9:%.*]] = sub <4 x i32> [[TMP6]], [[TMP8]]
1035f3d2609aSAlexey Bataev; CHECK-NEXT:    [[TMP10:%.*]] = call <4 x i32> @llvm.abs.v4i32(<4 x i32> [[TMP9]], i1 true)
1036f3d2609aSAlexey Bataev; CHECK-NEXT:    [[TMP11:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP10]])
1037f3d2609aSAlexey Bataev; CHECK-NEXT:    ret i32 [[TMP11]]
1038e69f8bacSLuke Lau;
1039e69f8bacSLuke Lau  %x.0 = load i32, ptr %p
1040e69f8bacSLuke Lau  %y.0 = load i32, ptr %q
1041e69f8bacSLuke Lau  %sub.0 = sub i32 %x.0, %y.0
1042e69f8bacSLuke Lau  %abs.0 = tail call i32 @llvm.abs.i32(i32 %sub.0, i1 true)
1043e69f8bacSLuke Lau
1044e69f8bacSLuke Lau  %p.1 = getelementptr inbounds i32, ptr %p, i64 1
1045e69f8bacSLuke Lau  %x.1 = load i32, ptr %p.1
1046e69f8bacSLuke Lau  %q.1 = getelementptr inbounds i32, ptr %q, i64 1
1047e69f8bacSLuke Lau  %y.1 = load i32, ptr %q.1
1048e69f8bacSLuke Lau  %sub.1 = sub i32 %x.1, %y.1
1049e69f8bacSLuke Lau  %abs.1 = tail call i32 @llvm.abs.i32(i32 %sub.1, i1 true)
1050e69f8bacSLuke Lau  %sum.0 = add i32 %abs.0, %abs.1
1051e69f8bacSLuke Lau
1052e69f8bacSLuke Lau  %p.2 = getelementptr inbounds i32, ptr %p, i64 %stride
1053e69f8bacSLuke Lau  %q.2 = getelementptr inbounds i32, ptr %q, i64 %stride
1054e69f8bacSLuke Lau
1055e69f8bacSLuke Lau  %x.2 = load i32, ptr %p.2
1056e69f8bacSLuke Lau  %y.2 = load i32, ptr %q.2
1057e69f8bacSLuke Lau  %sub.2 = sub i32 %x.2, %y.2
1058e69f8bacSLuke Lau  %abs.2 = tail call i32 @llvm.abs.i32(i32 %sub.2, i1 true)
1059e69f8bacSLuke Lau  %sum.1 = add i32 %sum.0, %abs.2
1060e69f8bacSLuke Lau
1061e69f8bacSLuke Lau  %p.3 = getelementptr inbounds i32, ptr %p.2, i64 1
1062e69f8bacSLuke Lau  %x.3 = load i32, ptr %p.3
1063e69f8bacSLuke Lau  %q.3 = getelementptr inbounds i32, ptr %q.2, i64 1
1064e69f8bacSLuke Lau  %y.3 = load i32, ptr %q.3
1065e69f8bacSLuke Lau  %sub.3 = sub i32 %x.3, %y.3
1066e69f8bacSLuke Lau  %abs.3 = tail call i32 @llvm.abs.i32(i32 %sub.3, i1 true)
1067e69f8bacSLuke Lau  %sum.2 = add i32 %sum.1, %abs.3
1068e69f8bacSLuke Lau
1069e69f8bacSLuke Lau  ret i32 %sum.2
1070e69f8bacSLuke Lau}
10711c9094a2SLuke Lau
10721c9094a2SLuke Laudefine i32 @reduce_sum_2arrays_a(ptr noalias %p, ptr noalias %q) {
10731c9094a2SLuke Lau; CHECK-LABEL: @reduce_sum_2arrays_a(
10741c9094a2SLuke Lau; CHECK-NEXT:  entry:
1075f23ea4cbSLuke Lau; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i8>, ptr [[P:%.*]], align 1
1076f23ea4cbSLuke Lau; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr [[Q:%.*]], align 1
1077f3d2609aSAlexey Bataev; CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i8> @llvm.vector.insert.v8i8.v4i8(<8 x i8> poison, <4 x i8> [[TMP0]], i64 0)
1078f3d2609aSAlexey Bataev; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.vector.insert.v8i8.v4i8(<8 x i8> [[TMP2]], <4 x i8> [[TMP1]], i64 4)
1079f3d2609aSAlexey Bataev; CHECK-NEXT:    [[TMP4:%.*]] = zext <8 x i8> [[TMP3]] to <8 x i32>
1080f3d2609aSAlexey Bataev; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP4]])
1081f3d2609aSAlexey Bataev; CHECK-NEXT:    ret i32 [[TMP5]]
10821c9094a2SLuke Lau;
10831c9094a2SLuke Lauentry:
10841c9094a2SLuke Lau  %x.0 = load i8, ptr %p, align 1
10851c9094a2SLuke Lau  %conv = zext i8 %x.0 to i32
10861c9094a2SLuke Lau  %y.0 = load i8, ptr %q, align 1
10871c9094a2SLuke Lau  %conv3 = zext i8 %y.0 to i32
10881c9094a2SLuke Lau  %add4 = add nuw nsw i32 %conv, %conv3
10891c9094a2SLuke Lau
10901c9094a2SLuke Lau  %arrayidx.1 = getelementptr inbounds i8, ptr %p, i64 1
10911c9094a2SLuke Lau  %x.1 = load i8, ptr %arrayidx.1, align 1
10921c9094a2SLuke Lau  %conv.1 = zext i8 %x.1 to i32
10931c9094a2SLuke Lau  %arrayidx2.1 = getelementptr inbounds i8, ptr %q, i64 1
10941c9094a2SLuke Lau  %y.1 = load i8, ptr %arrayidx2.1, align 1
10951c9094a2SLuke Lau  %conv3.1 = zext i8 %y.1 to i32
10961c9094a2SLuke Lau  %add.1 = add nuw nsw i32 %add4, %conv.1
10971c9094a2SLuke Lau  %add4.1 = add nuw nsw i32 %add.1, %conv3.1
10981c9094a2SLuke Lau
10991c9094a2SLuke Lau  %arrayidx.2 = getelementptr inbounds i8, ptr %p, i64 2
11001c9094a2SLuke Lau  %x.2 = load i8, ptr %arrayidx.2, align 1
11011c9094a2SLuke Lau  %conv.2 = zext i8 %x.2 to i32
11021c9094a2SLuke Lau  %arrayidx2.2 = getelementptr inbounds i8, ptr %q, i64 2
11031c9094a2SLuke Lau  %y.2 = load i8, ptr %arrayidx2.2, align 1
11041c9094a2SLuke Lau  %conv3.2 = zext i8 %y.2 to i32
11051c9094a2SLuke Lau  %add.2 = add nuw nsw i32 %add4.1, %conv.2
11061c9094a2SLuke Lau  %add4.2 = add nuw nsw i32 %add.2, %conv3.2
11071c9094a2SLuke Lau
11081c9094a2SLuke Lau  %arrayidx.3 = getelementptr inbounds i8, ptr %p, i64 3
11091c9094a2SLuke Lau  %x.3 = load i8, ptr %arrayidx.3, align 1
11101c9094a2SLuke Lau  %conv.3 = zext i8 %x.3 to i32
11111c9094a2SLuke Lau  %arrayidx2.3 = getelementptr inbounds i8, ptr %q, i64 3
11121c9094a2SLuke Lau  %y.3 = load i8, ptr %arrayidx2.3, align 1
11131c9094a2SLuke Lau  %conv3.3 = zext i8 %y.3 to i32
11141c9094a2SLuke Lau  %add.3 = add nuw nsw i32 %add4.2, %conv.3
11151c9094a2SLuke Lau  %add4.3 = add nuw nsw i32 %add.3, %conv3.3
11161c9094a2SLuke Lau
11171c9094a2SLuke Lau  ret i32 %add4.3
11181c9094a2SLuke Lau}
11191c9094a2SLuke Lau
11201c9094a2SLuke Laudefine i32 @reduce_sum_2arrays_b(ptr noalias noundef %x, ptr noalias %y) {
11211c9094a2SLuke Lau; CHECK-LABEL: @reduce_sum_2arrays_b(
11221c9094a2SLuke Lau; CHECK-NEXT:  entry:
11231c9094a2SLuke Lau; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i8>, ptr [[X:%.*]], align 1
1124f23ea4cbSLuke Lau; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i8>, ptr [[Y:%.*]], align 1
1125f3d2609aSAlexey Bataev; CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i8> @llvm.vector.insert.v8i8.v4i8(<8 x i8> poison, <4 x i8> [[TMP0]], i64 0)
1126f3d2609aSAlexey Bataev; CHECK-NEXT:    [[TMP3:%.*]] = call <8 x i8> @llvm.vector.insert.v8i8.v4i8(<8 x i8> [[TMP2]], <4 x i8> [[TMP1]], i64 4)
1127f3d2609aSAlexey Bataev; CHECK-NEXT:    [[TMP4:%.*]] = zext <8 x i8> [[TMP3]] to <8 x i32>
1128f3d2609aSAlexey Bataev; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP4]])
1129f3d2609aSAlexey Bataev; CHECK-NEXT:    ret i32 [[TMP5]]
11301c9094a2SLuke Lau;
11311c9094a2SLuke Lau  entry:
11321c9094a2SLuke Lau  %0 = load i8, ptr %x, align 1
11331c9094a2SLuke Lau  %conv = zext i8 %0 to i32
11341c9094a2SLuke Lau  %arrayidx.1 = getelementptr inbounds i8, ptr %x, i64 1
11351c9094a2SLuke Lau  %1 = load i8, ptr %arrayidx.1, align 1
11361c9094a2SLuke Lau  %conv.1 = zext i8 %1 to i32
11371c9094a2SLuke Lau  %add.1 = add nuw nsw i32 %conv, %conv.1
11381c9094a2SLuke Lau  %arrayidx.2 = getelementptr inbounds i8, ptr %x, i64 2
11391c9094a2SLuke Lau  %2 = load i8, ptr %arrayidx.2, align 1
11401c9094a2SLuke Lau  %conv.2 = zext i8 %2 to i32
11411c9094a2SLuke Lau  %add.2 = add nuw nsw i32 %add.1, %conv.2
11421c9094a2SLuke Lau  %arrayidx.3 = getelementptr inbounds i8, ptr %x, i64 3
11431c9094a2SLuke Lau  %3 = load i8, ptr %arrayidx.3, align 1
11441c9094a2SLuke Lau  %conv.3 = zext i8 %3 to i32
11451c9094a2SLuke Lau  %add.3 = add nuw nsw i32 %add.2, %conv.3
11461c9094a2SLuke Lau  %4 = load i8, ptr %y, align 1
11471c9094a2SLuke Lau  %conv9 = zext i8 %4 to i32
11481c9094a2SLuke Lau  %add10 = add nuw nsw i32 %add.3, %conv9
11491c9094a2SLuke Lau  %arrayidx8.1 = getelementptr inbounds i8, ptr %y, i64 1
11501c9094a2SLuke Lau  %5 = load i8, ptr %arrayidx8.1, align 1
11511c9094a2SLuke Lau  %conv9.1 = zext i8 %5 to i32
11521c9094a2SLuke Lau  %add10.1 = add nuw nsw i32 %add10, %conv9.1
11531c9094a2SLuke Lau  %arrayidx8.2 = getelementptr inbounds i8, ptr %y, i64 2
11541c9094a2SLuke Lau  %6 = load i8, ptr %arrayidx8.2, align 1
11551c9094a2SLuke Lau  %conv9.2 = zext i8 %6 to i32
11561c9094a2SLuke Lau  %add10.2 = add nuw nsw i32 %add10.1, %conv9.2
11571c9094a2SLuke Lau  %arrayidx8.3 = getelementptr inbounds i8, ptr %y, i64 3
11581c9094a2SLuke Lau  %7 = load i8, ptr %arrayidx8.3, align 1
11591c9094a2SLuke Lau  %conv9.3 = zext i8 %7 to i32
11601c9094a2SLuke Lau  %add10.3 = add nuw nsw i32 %add10.2, %conv9.3
11611c9094a2SLuke Lau  ret i32 %add10.3
11621c9094a2SLuke Lau}
116320864d2cSLuke Lau
116420864d2cSLuke Lau; Shouldn't vectorize to a reduction because we can't promote it
116520864d2cSLuke Laudefine bfloat @fadd_4xbf16(ptr %p) {
116620864d2cSLuke Lau; CHECK-LABEL: @fadd_4xbf16(
116720864d2cSLuke Lau; CHECK-NEXT:    [[X0:%.*]] = load bfloat, ptr [[P:%.*]], align 2
116820864d2cSLuke Lau; CHECK-NEXT:    [[P1:%.*]] = getelementptr bfloat, ptr [[P]], i32 1
116920864d2cSLuke Lau; CHECK-NEXT:    [[X1:%.*]] = load bfloat, ptr [[P1]], align 2
117020864d2cSLuke Lau; CHECK-NEXT:    [[P2:%.*]] = getelementptr bfloat, ptr [[P]], i32 2
117120864d2cSLuke Lau; CHECK-NEXT:    [[X2:%.*]] = load bfloat, ptr [[P2]], align 2
117220864d2cSLuke Lau; CHECK-NEXT:    [[P3:%.*]] = getelementptr bfloat, ptr [[P]], i32 3
117320864d2cSLuke Lau; CHECK-NEXT:    [[X3:%.*]] = load bfloat, ptr [[P3]], align 2
117420864d2cSLuke Lau; CHECK-NEXT:    [[R0:%.*]] = fadd fast bfloat [[X0]], [[X1]]
117520864d2cSLuke Lau; CHECK-NEXT:    [[R1:%.*]] = fadd fast bfloat [[R0]], [[X2]]
117620864d2cSLuke Lau; CHECK-NEXT:    [[R2:%.*]] = fadd fast bfloat [[R1]], [[X3]]
117720864d2cSLuke Lau; CHECK-NEXT:    ret bfloat [[R2]]
117820864d2cSLuke Lau;
117920864d2cSLuke Lau  %x0 = load bfloat, ptr %p
118020864d2cSLuke Lau  %p1 = getelementptr bfloat, ptr %p, i32 1
118120864d2cSLuke Lau  %x1 = load bfloat, ptr %p1
118220864d2cSLuke Lau  %p2 = getelementptr bfloat, ptr %p, i32 2
118320864d2cSLuke Lau  %x2 = load bfloat, ptr %p2
118420864d2cSLuke Lau  %p3 = getelementptr bfloat, ptr %p, i32 3
118520864d2cSLuke Lau  %x3 = load bfloat, ptr %p3
118620864d2cSLuke Lau
118720864d2cSLuke Lau  %r0 = fadd fast bfloat %x0, %x1
118820864d2cSLuke Lau  %r1 = fadd fast bfloat %r0, %x2
118920864d2cSLuke Lau  %r2 = fadd fast bfloat %r1, %x3
119020864d2cSLuke Lau
119120864d2cSLuke Lau  ret bfloat %r2
119220864d2cSLuke Lau}
119320864d2cSLuke Lau
119420864d2cSLuke Lau; Shouldn't vectorize to a reduction because there's no vfred{u,o}mul.vs
119520864d2cSLuke Laudefine bfloat @fmul_4xbf16(ptr %p) {
119620864d2cSLuke Lau; CHECK-LABEL: @fmul_4xbf16(
119720864d2cSLuke Lau; CHECK-NEXT:    [[X0:%.*]] = load bfloat, ptr [[P:%.*]], align 2
119820864d2cSLuke Lau; CHECK-NEXT:    [[P1:%.*]] = getelementptr bfloat, ptr [[P]], i32 1
119920864d2cSLuke Lau; CHECK-NEXT:    [[X1:%.*]] = load bfloat, ptr [[P1]], align 2
120020864d2cSLuke Lau; CHECK-NEXT:    [[P2:%.*]] = getelementptr bfloat, ptr [[P]], i32 2
120120864d2cSLuke Lau; CHECK-NEXT:    [[X2:%.*]] = load bfloat, ptr [[P2]], align 2
120220864d2cSLuke Lau; CHECK-NEXT:    [[P3:%.*]] = getelementptr bfloat, ptr [[P]], i32 3
120320864d2cSLuke Lau; CHECK-NEXT:    [[X3:%.*]] = load bfloat, ptr [[P3]], align 2
120420864d2cSLuke Lau; CHECK-NEXT:    [[R0:%.*]] = fmul fast bfloat [[X0]], [[X1]]
120520864d2cSLuke Lau; CHECK-NEXT:    [[R1:%.*]] = fmul fast bfloat [[R0]], [[X2]]
120620864d2cSLuke Lau; CHECK-NEXT:    [[R2:%.*]] = fmul fast bfloat [[R1]], [[X3]]
120720864d2cSLuke Lau; CHECK-NEXT:    ret bfloat [[R2]]
120820864d2cSLuke Lau;
120920864d2cSLuke Lau  %x0 = load bfloat, ptr %p
121020864d2cSLuke Lau  %p1 = getelementptr bfloat, ptr %p, i32 1
121120864d2cSLuke Lau  %x1 = load bfloat, ptr %p1
121220864d2cSLuke Lau  %p2 = getelementptr bfloat, ptr %p, i32 2
121320864d2cSLuke Lau  %x2 = load bfloat, ptr %p2
121420864d2cSLuke Lau  %p3 = getelementptr bfloat, ptr %p, i32 3
121520864d2cSLuke Lau  %x3 = load bfloat, ptr %p3
121620864d2cSLuke Lau
121720864d2cSLuke Lau  %r0 = fmul fast bfloat %x0, %x1
121820864d2cSLuke Lau  %r1 = fmul fast bfloat %r0, %x2
121920864d2cSLuke Lau  %r2 = fmul fast bfloat %r1, %x3
122020864d2cSLuke Lau
122120864d2cSLuke Lau  ret bfloat %r2
122220864d2cSLuke Lau}
122320864d2cSLuke Lau
122420864d2cSLuke Lau; Shouldn't vectorize to a reduction on zvfhmin because we can't promote it
122520864d2cSLuke Laudefine half @fadd_4xf16(ptr %p) {
122620864d2cSLuke Lau; ZVFHMIN-LABEL: @fadd_4xf16(
122720864d2cSLuke Lau; ZVFHMIN-NEXT:    [[X0:%.*]] = load half, ptr [[P:%.*]], align 2
122820864d2cSLuke Lau; ZVFHMIN-NEXT:    [[P1:%.*]] = getelementptr half, ptr [[P]], i32 1
122920864d2cSLuke Lau; ZVFHMIN-NEXT:    [[X1:%.*]] = load half, ptr [[P1]], align 2
123020864d2cSLuke Lau; ZVFHMIN-NEXT:    [[P2:%.*]] = getelementptr half, ptr [[P]], i32 2
123120864d2cSLuke Lau; ZVFHMIN-NEXT:    [[X2:%.*]] = load half, ptr [[P2]], align 2
123220864d2cSLuke Lau; ZVFHMIN-NEXT:    [[P3:%.*]] = getelementptr half, ptr [[P]], i32 3
123320864d2cSLuke Lau; ZVFHMIN-NEXT:    [[X3:%.*]] = load half, ptr [[P3]], align 2
123420864d2cSLuke Lau; ZVFHMIN-NEXT:    [[R0:%.*]] = fadd fast half [[X0]], [[X1]]
123520864d2cSLuke Lau; ZVFHMIN-NEXT:    [[R1:%.*]] = fadd fast half [[R0]], [[X2]]
123620864d2cSLuke Lau; ZVFHMIN-NEXT:    [[R2:%.*]] = fadd fast half [[R1]], [[X3]]
123720864d2cSLuke Lau; ZVFHMIN-NEXT:    ret half [[R2]]
123820864d2cSLuke Lau;
123920864d2cSLuke Lau; ZVFH-LABEL: @fadd_4xf16(
124020864d2cSLuke Lau; ZVFH-NEXT:    [[TMP1:%.*]] = load <4 x half>, ptr [[P:%.*]], align 2
124120864d2cSLuke Lau; ZVFH-NEXT:    [[TMP2:%.*]] = call fast half @llvm.vector.reduce.fadd.v4f16(half 0xH0000, <4 x half> [[TMP1]])
124220864d2cSLuke Lau; ZVFH-NEXT:    ret half [[TMP2]]
124320864d2cSLuke Lau;
124420864d2cSLuke Lau  %x0 = load half, ptr %p
124520864d2cSLuke Lau  %p1 = getelementptr half, ptr %p, i32 1
124620864d2cSLuke Lau  %x1 = load half, ptr %p1
124720864d2cSLuke Lau  %p2 = getelementptr half, ptr %p, i32 2
124820864d2cSLuke Lau  %x2 = load half, ptr %p2
124920864d2cSLuke Lau  %p3 = getelementptr half, ptr %p, i32 3
125020864d2cSLuke Lau  %x3 = load half, ptr %p3
125120864d2cSLuke Lau
125220864d2cSLuke Lau  %r0 = fadd fast half %x0, %x1
125320864d2cSLuke Lau  %r1 = fadd fast half %r0, %x2
125420864d2cSLuke Lau  %r2 = fadd fast half %r1, %x3
125520864d2cSLuke Lau
125620864d2cSLuke Lau  ret half %r2
125720864d2cSLuke Lau}
125820864d2cSLuke Lau
125920864d2cSLuke Lau; Shouldn't vectorize to a reduction because there's no vfred{u,o}mul.vs
126020864d2cSLuke Laudefine half @fmul_4xf16(ptr %p) {
126120864d2cSLuke Lau; CHECK-LABEL: @fmul_4xf16(
126220864d2cSLuke Lau; CHECK-NEXT:    [[X0:%.*]] = load half, ptr [[P:%.*]], align 2
126320864d2cSLuke Lau; CHECK-NEXT:    [[P1:%.*]] = getelementptr half, ptr [[P]], i32 1
126420864d2cSLuke Lau; CHECK-NEXT:    [[X1:%.*]] = load half, ptr [[P1]], align 2
126520864d2cSLuke Lau; CHECK-NEXT:    [[P2:%.*]] = getelementptr half, ptr [[P]], i32 2
126620864d2cSLuke Lau; CHECK-NEXT:    [[X2:%.*]] = load half, ptr [[P2]], align 2
126720864d2cSLuke Lau; CHECK-NEXT:    [[P3:%.*]] = getelementptr half, ptr [[P]], i32 3
126820864d2cSLuke Lau; CHECK-NEXT:    [[X3:%.*]] = load half, ptr [[P3]], align 2
126920864d2cSLuke Lau; CHECK-NEXT:    [[R0:%.*]] = fmul fast half [[X0]], [[X1]]
127020864d2cSLuke Lau; CHECK-NEXT:    [[R1:%.*]] = fmul fast half [[R0]], [[X2]]
127120864d2cSLuke Lau; CHECK-NEXT:    [[R2:%.*]] = fmul fast half [[R1]], [[X3]]
127220864d2cSLuke Lau; CHECK-NEXT:    ret half [[R2]]
127320864d2cSLuke Lau;
127420864d2cSLuke Lau  %x0 = load half, ptr %p
127520864d2cSLuke Lau  %p1 = getelementptr half, ptr %p, i32 1
127620864d2cSLuke Lau  %x1 = load half, ptr %p1
127720864d2cSLuke Lau  %p2 = getelementptr half, ptr %p, i32 2
127820864d2cSLuke Lau  %x2 = load half, ptr %p2
127920864d2cSLuke Lau  %p3 = getelementptr half, ptr %p, i32 3
128020864d2cSLuke Lau  %x3 = load half, ptr %p3
128120864d2cSLuke Lau
128220864d2cSLuke Lau  %r0 = fmul fast half %x0, %x1
128320864d2cSLuke Lau  %r1 = fmul fast half %r0, %x2
128420864d2cSLuke Lau  %r2 = fmul fast half %r1, %x3
128520864d2cSLuke Lau
128620864d2cSLuke Lau  ret half %r2
128720864d2cSLuke Lau}
1288