xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/RISCV/strided-loads-vectorized.ll (revision 343a810725f27bfe92fbd04a42d42aa9caaee7a6)
17b9bf80aSAlexey Bataev; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2*343a8107SLuke Lau; RUN: opt -passes=slp-vectorizer -S -mtriple=riscv64-unknown-linux-gnu -mattr=+v < %s | FileCheck %s --check-prefixes=CHECK,NO-ZVFHMIN-ZVFBFMIN
3*343a8107SLuke Lau; RUN: opt -passes=slp-vectorizer -S -mtriple=riscv64-unknown-linux-gnu -mattr=+v,+zvfhmin,+zvfbfmin < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN-ZVFBFMIN
4*343a8107SLuke Lau
57b9bf80aSAlexey Bataev
62d69827cSNikita Popovdefine void @test(ptr %p, ptr noalias %s) {
77b9bf80aSAlexey Bataev; CHECK-LABEL: @test(
87b9bf80aSAlexey Bataev; CHECK-NEXT:  entry:
97b9bf80aSAlexey Bataev; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 0
107b9bf80aSAlexey Bataev; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 30
117b9bf80aSAlexey Bataev; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0
1238fffa63SPaul Walker; CHECK-NEXT:    [[TMP0:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 16, <8 x i1> splat (i1 true), i32 8)
1338fffa63SPaul Walker; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 -16, <8 x i1> splat (i1 true), i32 8)
14833a1cadSAlexey Bataev; CHECK-NEXT:    [[TMP2:%.*]] = fsub fast <8 x float> [[TMP1]], [[TMP0]]
15833a1cadSAlexey Bataev; CHECK-NEXT:    store <8 x float> [[TMP2]], ptr [[ARRAYIDX2]], align 4
167b9bf80aSAlexey Bataev; CHECK-NEXT:    ret void
177b9bf80aSAlexey Bataev;
187b9bf80aSAlexey Bataeventry:
192d69827cSNikita Popov  %arrayidx = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 0
202d69827cSNikita Popov  %i = load float, ptr %arrayidx, align 4
212d69827cSNikita Popov  %arrayidx1 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 30
222d69827cSNikita Popov  %i1 = load float, ptr %arrayidx1, align 4
237b9bf80aSAlexey Bataev  %add = fsub fast float %i1, %i
242d69827cSNikita Popov  %arrayidx2 = getelementptr inbounds float, ptr %s, i64 0
252d69827cSNikita Popov  store float %add, ptr %arrayidx2, align 4
262d69827cSNikita Popov  %arrayidx4 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 4
272d69827cSNikita Popov  %i2 = load float, ptr %arrayidx4, align 4
282d69827cSNikita Popov  %arrayidx6 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 26
292d69827cSNikita Popov  %i3 = load float, ptr %arrayidx6, align 4
307b9bf80aSAlexey Bataev  %add7 = fsub fast float %i3, %i2
312d69827cSNikita Popov  %arrayidx9 = getelementptr inbounds float, ptr %s, i64 1
322d69827cSNikita Popov  store float %add7, ptr %arrayidx9, align 4
332d69827cSNikita Popov  %arrayidx11 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 8
342d69827cSNikita Popov  %i4 = load float, ptr %arrayidx11, align 4
352d69827cSNikita Popov  %arrayidx13 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 22
362d69827cSNikita Popov  %i5 = load float, ptr %arrayidx13, align 4
377b9bf80aSAlexey Bataev  %add14 = fsub fast float %i5, %i4
382d69827cSNikita Popov  %arrayidx16 = getelementptr inbounds float, ptr %s, i64 2
392d69827cSNikita Popov  store float %add14, ptr %arrayidx16, align 4
402d69827cSNikita Popov  %arrayidx18 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 12
412d69827cSNikita Popov  %i6 = load float, ptr %arrayidx18, align 4
422d69827cSNikita Popov  %arrayidx20 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 18
432d69827cSNikita Popov  %i7 = load float, ptr %arrayidx20, align 4
447b9bf80aSAlexey Bataev  %add21 = fsub fast float %i7, %i6
452d69827cSNikita Popov  %arrayidx23 = getelementptr inbounds float, ptr %s, i64 3
462d69827cSNikita Popov  store float %add21, ptr %arrayidx23, align 4
472d69827cSNikita Popov  %arrayidx25 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 16
482d69827cSNikita Popov  %i8 = load float, ptr %arrayidx25, align 4
492d69827cSNikita Popov  %arrayidx27 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 14
502d69827cSNikita Popov  %i9 = load float, ptr %arrayidx27, align 4
517b9bf80aSAlexey Bataev  %add28 = fsub fast float %i9, %i8
522d69827cSNikita Popov  %arrayidx30 = getelementptr inbounds float, ptr %s, i64 4
532d69827cSNikita Popov  store float %add28, ptr %arrayidx30, align 4
542d69827cSNikita Popov  %arrayidx32 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 20
552d69827cSNikita Popov  %i10 = load float, ptr %arrayidx32, align 4
562d69827cSNikita Popov  %arrayidx34 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 10
572d69827cSNikita Popov  %i11 = load float, ptr %arrayidx34, align 4
587b9bf80aSAlexey Bataev  %add35 = fsub fast float %i11, %i10
592d69827cSNikita Popov  %arrayidx37 = getelementptr inbounds float, ptr %s, i64 5
602d69827cSNikita Popov  store float %add35, ptr %arrayidx37, align 4
612d69827cSNikita Popov  %arrayidx39 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 24
622d69827cSNikita Popov  %i12 = load float, ptr %arrayidx39, align 4
632d69827cSNikita Popov  %arrayidx41 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 6
642d69827cSNikita Popov  %i13 = load float, ptr %arrayidx41, align 4
657b9bf80aSAlexey Bataev  %add42 = fsub fast float %i13, %i12
662d69827cSNikita Popov  %arrayidx44 = getelementptr inbounds float, ptr %s, i64 6
672d69827cSNikita Popov  store float %add42, ptr %arrayidx44, align 4
682d69827cSNikita Popov  %arrayidx46 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 28
692d69827cSNikita Popov  %i14 = load float, ptr %arrayidx46, align 4
702d69827cSNikita Popov  %arrayidx48 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 2
712d69827cSNikita Popov  %i15 = load float, ptr %arrayidx48, align 4
727b9bf80aSAlexey Bataev  %add49 = fsub fast float %i15, %i14
732d69827cSNikita Popov  %arrayidx51 = getelementptr inbounds float, ptr %s, i64 7
742d69827cSNikita Popov  store float %add49, ptr %arrayidx51, align 4
757b9bf80aSAlexey Bataev  ret void
767b9bf80aSAlexey Bataev}
777b9bf80aSAlexey Bataev
782d69827cSNikita Popovdefine void @test1(ptr %p, ptr noalias %s, i32 %stride) {
797b9bf80aSAlexey Bataev; CHECK-LABEL: @test1(
807b9bf80aSAlexey Bataev; CHECK-NEXT:  entry:
817b9bf80aSAlexey Bataev; CHECK-NEXT:    [[STR:%.*]] = zext i32 [[STRIDE:%.*]] to i64
827b9bf80aSAlexey Bataev; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 0
837b9bf80aSAlexey Bataev; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 30
847b9bf80aSAlexey Bataev; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0
851c2b79adSAlexey Bataev; CHECK-NEXT:    [[TMP0:%.*]] = mul i64 [[STR]], 4
8638fffa63SPaul Walker; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 [[TMP0]], <8 x i1> splat (i1 true), i32 8)
8738fffa63SPaul Walker; CHECK-NEXT:    [[TMP2:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 -16, <8 x i1> splat (i1 true), i32 8)
881c2b79adSAlexey Bataev; CHECK-NEXT:    [[TMP3:%.*]] = fsub fast <8 x float> [[TMP2]], [[TMP1]]
891c2b79adSAlexey Bataev; CHECK-NEXT:    store <8 x float> [[TMP3]], ptr [[ARRAYIDX2]], align 4
907b9bf80aSAlexey Bataev; CHECK-NEXT:    ret void
917b9bf80aSAlexey Bataev;
927b9bf80aSAlexey Bataeventry:
937b9bf80aSAlexey Bataev  %str = zext i32 %stride to i64
942d69827cSNikita Popov  %arrayidx = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 0
952d69827cSNikita Popov  %i = load float, ptr %arrayidx, align 4
962d69827cSNikita Popov  %arrayidx1 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 30
972d69827cSNikita Popov  %i1 = load float, ptr %arrayidx1, align 4
987b9bf80aSAlexey Bataev  %add = fsub fast float %i1, %i
992d69827cSNikita Popov  %arrayidx2 = getelementptr inbounds float, ptr %s, i64 0
1002d69827cSNikita Popov  store float %add, ptr %arrayidx2, align 4
1012d69827cSNikita Popov  %arrayidx4 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %str
1022d69827cSNikita Popov  %i2 = load float, ptr %arrayidx4, align 4
1032d69827cSNikita Popov  %arrayidx6 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 26
1042d69827cSNikita Popov  %i3 = load float, ptr %arrayidx6, align 4
1057b9bf80aSAlexey Bataev  %add7 = fsub fast float %i3, %i2
1062d69827cSNikita Popov  %arrayidx9 = getelementptr inbounds float, ptr %s, i64 1
1072d69827cSNikita Popov  store float %add7, ptr %arrayidx9, align 4
1087b9bf80aSAlexey Bataev  %st1 = mul i64 %str, 2
1092d69827cSNikita Popov  %arrayidx11 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st1
1102d69827cSNikita Popov  %i4 = load float, ptr %arrayidx11, align 4
1112d69827cSNikita Popov  %arrayidx13 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 22
1122d69827cSNikita Popov  %i5 = load float, ptr %arrayidx13, align 4
1137b9bf80aSAlexey Bataev  %add14 = fsub fast float %i5, %i4
1142d69827cSNikita Popov  %arrayidx16 = getelementptr inbounds float, ptr %s, i64 2
1152d69827cSNikita Popov  store float %add14, ptr %arrayidx16, align 4
1167b9bf80aSAlexey Bataev  %st2 = mul i64 %str, 3
1172d69827cSNikita Popov  %arrayidx18 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st2
1182d69827cSNikita Popov  %i6 = load float, ptr %arrayidx18, align 4
1192d69827cSNikita Popov  %arrayidx20 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 18
1202d69827cSNikita Popov  %i7 = load float, ptr %arrayidx20, align 4
1217b9bf80aSAlexey Bataev  %add21 = fsub fast float %i7, %i6
1222d69827cSNikita Popov  %arrayidx23 = getelementptr inbounds float, ptr %s, i64 3
1232d69827cSNikita Popov  store float %add21, ptr %arrayidx23, align 4
1247b9bf80aSAlexey Bataev  %st3 = mul i64 %str, 4
1252d69827cSNikita Popov  %arrayidx25 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st3
1262d69827cSNikita Popov  %i8 = load float, ptr %arrayidx25, align 4
1272d69827cSNikita Popov  %arrayidx27 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 14
1282d69827cSNikita Popov  %i9 = load float, ptr %arrayidx27, align 4
1297b9bf80aSAlexey Bataev  %add28 = fsub fast float %i9, %i8
1302d69827cSNikita Popov  %arrayidx30 = getelementptr inbounds float, ptr %s, i64 4
1312d69827cSNikita Popov  store float %add28, ptr %arrayidx30, align 4
1327b9bf80aSAlexey Bataev  %st4 = mul i64 %str, 5
1332d69827cSNikita Popov  %arrayidx32 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st4
1342d69827cSNikita Popov  %i10 = load float, ptr %arrayidx32, align 4
1352d69827cSNikita Popov  %arrayidx34 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 10
1362d69827cSNikita Popov  %i11 = load float, ptr %arrayidx34, align 4
1377b9bf80aSAlexey Bataev  %add35 = fsub fast float %i11, %i10
1382d69827cSNikita Popov  %arrayidx37 = getelementptr inbounds float, ptr %s, i64 5
1392d69827cSNikita Popov  store float %add35, ptr %arrayidx37, align 4
1407b9bf80aSAlexey Bataev  %st5 = mul i64 %str, 6
1412d69827cSNikita Popov  %arrayidx39 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st5
1422d69827cSNikita Popov  %i12 = load float, ptr %arrayidx39, align 4
1432d69827cSNikita Popov  %arrayidx41 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 6
1442d69827cSNikita Popov  %i13 = load float, ptr %arrayidx41, align 4
1457b9bf80aSAlexey Bataev  %add42 = fsub fast float %i13, %i12
1462d69827cSNikita Popov  %arrayidx44 = getelementptr inbounds float, ptr %s, i64 6
1472d69827cSNikita Popov  store float %add42, ptr %arrayidx44, align 4
1487b9bf80aSAlexey Bataev  %st6 = mul i64 %str, 7
1492d69827cSNikita Popov  %arrayidx46 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st6
1502d69827cSNikita Popov  %i14 = load float, ptr %arrayidx46, align 4
1512d69827cSNikita Popov  %arrayidx48 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 2
1522d69827cSNikita Popov  %i15 = load float, ptr %arrayidx48, align 4
1537b9bf80aSAlexey Bataev  %add49 = fsub fast float %i15, %i14
1542d69827cSNikita Popov  %arrayidx51 = getelementptr inbounds float, ptr %s, i64 7
1552d69827cSNikita Popov  store float %add49, ptr %arrayidx51, align 4
1567b9bf80aSAlexey Bataev  ret void
1577b9bf80aSAlexey Bataev}
1587b9bf80aSAlexey Bataev
1592d69827cSNikita Popovdefine void @test2(ptr %p, ptr noalias %s, i32 %stride) {
1607b9bf80aSAlexey Bataev; CHECK-LABEL: @test2(
1617b9bf80aSAlexey Bataev; CHECK-NEXT:  entry:
1627b9bf80aSAlexey Bataev; CHECK-NEXT:    [[STR:%.*]] = zext i32 [[STRIDE:%.*]] to i64
1637b9bf80aSAlexey Bataev; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 2
1647b9bf80aSAlexey Bataev; CHECK-NEXT:    [[ST6:%.*]] = mul i64 [[STR]], 7
1657b9bf80aSAlexey Bataev; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST6]]
1667b9bf80aSAlexey Bataev; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0
16738fffa63SPaul Walker; CHECK-NEXT:    [[TMP0:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 16, <8 x i1> splat (i1 true), i32 8)
1681c2b79adSAlexey Bataev; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[STR]], -4
16938fffa63SPaul Walker; CHECK-NEXT:    [[TMP2:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 [[TMP1]], <8 x i1> splat (i1 true), i32 8)
1701c2b79adSAlexey Bataev; CHECK-NEXT:    [[TMP3:%.*]] = fsub fast <8 x float> [[TMP2]], [[TMP0]]
1711c2b79adSAlexey Bataev; CHECK-NEXT:    store <8 x float> [[TMP3]], ptr [[ARRAYIDX2]], align 4
1727b9bf80aSAlexey Bataev; CHECK-NEXT:    ret void
1737b9bf80aSAlexey Bataev;
1747b9bf80aSAlexey Bataeventry:
1757b9bf80aSAlexey Bataev  %str = zext i32 %stride to i64
1762d69827cSNikita Popov  %arrayidx = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 2
1772d69827cSNikita Popov  %i = load float, ptr %arrayidx, align 4
1787b9bf80aSAlexey Bataev  %st6 = mul i64 %str, 7
1792d69827cSNikita Popov  %arrayidx1 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st6
1802d69827cSNikita Popov  %i1 = load float, ptr %arrayidx1, align 4
1817b9bf80aSAlexey Bataev  %add = fsub fast float %i1, %i
1822d69827cSNikita Popov  %arrayidx2 = getelementptr inbounds float, ptr %s, i64 0
1832d69827cSNikita Popov  store float %add, ptr %arrayidx2, align 4
1842d69827cSNikita Popov  %arrayidx4 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 6
1852d69827cSNikita Popov  %i2 = load float, ptr %arrayidx4, align 4
1867b9bf80aSAlexey Bataev  %st5 = mul i64 %str, 6
1872d69827cSNikita Popov  %arrayidx6 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st5
1882d69827cSNikita Popov  %i3 = load float, ptr %arrayidx6, align 4
1897b9bf80aSAlexey Bataev  %add7 = fsub fast float %i3, %i2
1902d69827cSNikita Popov  %arrayidx9 = getelementptr inbounds float, ptr %s, i64 1
1912d69827cSNikita Popov  store float %add7, ptr %arrayidx9, align 4
1922d69827cSNikita Popov  %arrayidx11 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 10
1932d69827cSNikita Popov  %i4 = load float, ptr %arrayidx11, align 4
1947b9bf80aSAlexey Bataev  %st4 = mul i64 %str, 5
1952d69827cSNikita Popov  %arrayidx13 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st4
1962d69827cSNikita Popov  %i5 = load float, ptr %arrayidx13, align 4
1977b9bf80aSAlexey Bataev  %add14 = fsub fast float %i5, %i4
1982d69827cSNikita Popov  %arrayidx16 = getelementptr inbounds float, ptr %s, i64 2
1992d69827cSNikita Popov  store float %add14, ptr %arrayidx16, align 4
2002d69827cSNikita Popov  %arrayidx18 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 14
2012d69827cSNikita Popov  %i6 = load float, ptr %arrayidx18, align 4
2027b9bf80aSAlexey Bataev  %st3 = mul i64 %str, 4
2032d69827cSNikita Popov  %arrayidx20 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st3
2042d69827cSNikita Popov  %i7 = load float, ptr %arrayidx20, align 4
2057b9bf80aSAlexey Bataev  %add21 = fsub fast float %i7, %i6
2062d69827cSNikita Popov  %arrayidx23 = getelementptr inbounds float, ptr %s, i64 3
2072d69827cSNikita Popov  store float %add21, ptr %arrayidx23, align 4
2082d69827cSNikita Popov  %arrayidx25 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 18
2097b9bf80aSAlexey Bataev  %st2 = mul i64 %str, 3
2102d69827cSNikita Popov  %i8 = load float, ptr %arrayidx25, align 4
2112d69827cSNikita Popov  %arrayidx27 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st2
2122d69827cSNikita Popov  %i9 = load float, ptr %arrayidx27, align 4
2137b9bf80aSAlexey Bataev  %add28 = fsub fast float %i9, %i8
2142d69827cSNikita Popov  %arrayidx30 = getelementptr inbounds float, ptr %s, i64 4
2152d69827cSNikita Popov  store float %add28, ptr %arrayidx30, align 4
2162d69827cSNikita Popov  %arrayidx32 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 22
2172d69827cSNikita Popov  %i10 = load float, ptr %arrayidx32, align 4
2187b9bf80aSAlexey Bataev  %st1 = mul i64 %str, 2
2192d69827cSNikita Popov  %arrayidx34 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st1
2202d69827cSNikita Popov  %i11 = load float, ptr %arrayidx34, align 4
2217b9bf80aSAlexey Bataev  %add35 = fsub fast float %i11, %i10
2222d69827cSNikita Popov  %arrayidx37 = getelementptr inbounds float, ptr %s, i64 5
2232d69827cSNikita Popov  store float %add35, ptr %arrayidx37, align 4
2242d69827cSNikita Popov  %arrayidx39 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 26
2252d69827cSNikita Popov  %i12 = load float, ptr %arrayidx39, align 4
2262d69827cSNikita Popov  %arrayidx41 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %str
2272d69827cSNikita Popov  %i13 = load float, ptr %arrayidx41, align 4
2287b9bf80aSAlexey Bataev  %add42 = fsub fast float %i13, %i12
2292d69827cSNikita Popov  %arrayidx44 = getelementptr inbounds float, ptr %s, i64 6
2302d69827cSNikita Popov  store float %add42, ptr %arrayidx44, align 4
2312d69827cSNikita Popov  %arrayidx46 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 30
2322d69827cSNikita Popov  %i14 = load float, ptr %arrayidx46, align 4
2332d69827cSNikita Popov  %arrayidx48 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 0
2342d69827cSNikita Popov  %i15 = load float, ptr %arrayidx48, align 4
2357b9bf80aSAlexey Bataev  %add49 = fsub fast float %i15, %i14
2362d69827cSNikita Popov  %arrayidx51 = getelementptr inbounds float, ptr %s, i64 7
2372d69827cSNikita Popov  store float %add49, ptr %arrayidx51, align 4
2387b9bf80aSAlexey Bataev  ret void
2397b9bf80aSAlexey Bataev}
2407b9bf80aSAlexey Bataev
2412d69827cSNikita Popovdefine void @test3(ptr %p, ptr noalias %s) {
2427b9bf80aSAlexey Bataev; CHECK-LABEL: @test3(
2437b9bf80aSAlexey Bataev; CHECK-NEXT:  entry:
2447b9bf80aSAlexey Bataev; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 0
2450ab0c1d9SAlexey Bataev; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 30
2467b9bf80aSAlexey Bataev; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0
24738fffa63SPaul Walker; CHECK-NEXT:    [[TMP0:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 16, <8 x i1> splat (i1 true), i32 8)
24838fffa63SPaul Walker; CHECK-NEXT:    [[TMP2:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 -4, <8 x i1> splat (i1 true), i32 8)
249833a1cadSAlexey Bataev; CHECK-NEXT:    [[TMP3:%.*]] = fsub fast <8 x float> [[TMP2]], [[TMP0]]
250833a1cadSAlexey Bataev; CHECK-NEXT:    store <8 x float> [[TMP3]], ptr [[ARRAYIDX2]], align 4
2517b9bf80aSAlexey Bataev; CHECK-NEXT:    ret void
2527b9bf80aSAlexey Bataev;
2537b9bf80aSAlexey Bataeventry:
2542d69827cSNikita Popov  %arrayidx = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 0
2552d69827cSNikita Popov  %i = load float, ptr %arrayidx, align 4
2562d69827cSNikita Popov  %arrayidx1 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 30
2572d69827cSNikita Popov  %i1 = load float, ptr %arrayidx1, align 4
2587b9bf80aSAlexey Bataev  %add = fsub fast float %i1, %i
2592d69827cSNikita Popov  %arrayidx2 = getelementptr inbounds float, ptr %s, i64 0
2602d69827cSNikita Popov  store float %add, ptr %arrayidx2, align 4
2612d69827cSNikita Popov  %arrayidx4 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 4
2622d69827cSNikita Popov  %i2 = load float, ptr %arrayidx4, align 4
2632d69827cSNikita Popov  %arrayidx6 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 29
2642d69827cSNikita Popov  %i3 = load float, ptr %arrayidx6, align 4
2657b9bf80aSAlexey Bataev  %add7 = fsub fast float %i3, %i2
2662d69827cSNikita Popov  %arrayidx9 = getelementptr inbounds float, ptr %s, i64 1
2672d69827cSNikita Popov  store float %add7, ptr %arrayidx9, align 4
2682d69827cSNikita Popov  %arrayidx11 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 8
2692d69827cSNikita Popov  %i4 = load float, ptr %arrayidx11, align 4
2702d69827cSNikita Popov  %arrayidx13 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 28
2712d69827cSNikita Popov  %i5 = load float, ptr %arrayidx13, align 4
2727b9bf80aSAlexey Bataev  %add14 = fsub fast float %i5, %i4
2732d69827cSNikita Popov  %arrayidx16 = getelementptr inbounds float, ptr %s, i64 2
2742d69827cSNikita Popov  store float %add14, ptr %arrayidx16, align 4
2752d69827cSNikita Popov  %arrayidx18 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 12
2762d69827cSNikita Popov  %i6 = load float, ptr %arrayidx18, align 4
2772d69827cSNikita Popov  %arrayidx20 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 27
2782d69827cSNikita Popov  %i7 = load float, ptr %arrayidx20, align 4
2797b9bf80aSAlexey Bataev  %add21 = fsub fast float %i7, %i6
2802d69827cSNikita Popov  %arrayidx23 = getelementptr inbounds float, ptr %s, i64 3
2812d69827cSNikita Popov  store float %add21, ptr %arrayidx23, align 4
2822d69827cSNikita Popov  %arrayidx25 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 16
2832d69827cSNikita Popov  %i8 = load float, ptr %arrayidx25, align 4
2842d69827cSNikita Popov  %arrayidx27 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 26
2852d69827cSNikita Popov  %i9 = load float, ptr %arrayidx27, align 4
2867b9bf80aSAlexey Bataev  %add28 = fsub fast float %i9, %i8
2872d69827cSNikita Popov  %arrayidx30 = getelementptr inbounds float, ptr %s, i64 4
2882d69827cSNikita Popov  store float %add28, ptr %arrayidx30, align 4
2892d69827cSNikita Popov  %arrayidx32 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 20
2902d69827cSNikita Popov  %i10 = load float, ptr %arrayidx32, align 4
2912d69827cSNikita Popov  %arrayidx34 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 25
2922d69827cSNikita Popov  %i11 = load float, ptr %arrayidx34, align 4
2937b9bf80aSAlexey Bataev  %add35 = fsub fast float %i11, %i10
2942d69827cSNikita Popov  %arrayidx37 = getelementptr inbounds float, ptr %s, i64 5
2952d69827cSNikita Popov  store float %add35, ptr %arrayidx37, align 4
2962d69827cSNikita Popov  %arrayidx39 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 24
2972d69827cSNikita Popov  %i12 = load float, ptr %arrayidx39, align 4
2982d69827cSNikita Popov  %arrayidx41 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 24
2992d69827cSNikita Popov  %i13 = load float, ptr %arrayidx41, align 4
3007b9bf80aSAlexey Bataev  %add42 = fsub fast float %i13, %i12
3012d69827cSNikita Popov  %arrayidx44 = getelementptr inbounds float, ptr %s, i64 6
3022d69827cSNikita Popov  store float %add42, ptr %arrayidx44, align 4
3032d69827cSNikita Popov  %arrayidx46 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 28
3042d69827cSNikita Popov  %i14 = load float, ptr %arrayidx46, align 4
3052d69827cSNikita Popov  %arrayidx48 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 23
3062d69827cSNikita Popov  %i15 = load float, ptr %arrayidx48, align 4
3077b9bf80aSAlexey Bataev  %add49 = fsub fast float %i15, %i14
3082d69827cSNikita Popov  %arrayidx51 = getelementptr inbounds float, ptr %s, i64 7
3092d69827cSNikita Popov  store float %add49, ptr %arrayidx51, align 4
3107b9bf80aSAlexey Bataev  ret void
3117b9bf80aSAlexey Bataev}
3127b9bf80aSAlexey Bataev
313*343a8107SLuke Lau
314*343a8107SLuke Laudefine void @test_bf16(ptr %p, ptr noalias %s) {
315*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-LABEL: @test_bf16(
316*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:  entry:
317*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P:%.*]], i64 0, i64 0
318*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[I:%.*]] = load bfloat, ptr [[ARRAYIDX]], align 4
319*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 30
320*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[I1:%.*]] = load bfloat, ptr [[ARRAYIDX1]], align 4
321*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ADD:%.*]] = fsub fast bfloat [[I1]], [[I]]
322*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds bfloat, ptr [[S:%.*]], i64 0
323*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    store bfloat [[ADD]], ptr [[ARRAYIDX2]], align 4
324*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 4
325*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[I2:%.*]] = load bfloat, ptr [[ARRAYIDX4]], align 4
326*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 26
327*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[I3:%.*]] = load bfloat, ptr [[ARRAYIDX6]], align 4
328*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ADD7:%.*]] = fsub fast bfloat [[I3]], [[I2]]
329*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds bfloat, ptr [[S]], i64 1
330*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    store bfloat [[ADD7]], ptr [[ARRAYIDX9]], align 4
331*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 8
332*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[I4:%.*]] = load bfloat, ptr [[ARRAYIDX11]], align 4
333*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 22
334*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[I5:%.*]] = load bfloat, ptr [[ARRAYIDX13]], align 4
335*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ADD14:%.*]] = fsub fast bfloat [[I5]], [[I4]]
336*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds bfloat, ptr [[S]], i64 2
337*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    store bfloat [[ADD14]], ptr [[ARRAYIDX16]], align 4
338*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX18:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 12
339*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[I6:%.*]] = load bfloat, ptr [[ARRAYIDX18]], align 4
340*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 18
341*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[I7:%.*]] = load bfloat, ptr [[ARRAYIDX20]], align 4
342*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ADD21:%.*]] = fsub fast bfloat [[I7]], [[I6]]
343*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX23:%.*]] = getelementptr inbounds bfloat, ptr [[S]], i64 3
344*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    store bfloat [[ADD21]], ptr [[ARRAYIDX23]], align 4
345*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX25:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 16
346*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[I8:%.*]] = load bfloat, ptr [[ARRAYIDX25]], align 4
347*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX27:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 14
348*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[I9:%.*]] = load bfloat, ptr [[ARRAYIDX27]], align 4
349*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ADD28:%.*]] = fsub fast bfloat [[I9]], [[I8]]
350*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX30:%.*]] = getelementptr inbounds bfloat, ptr [[S]], i64 4
351*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    store bfloat [[ADD28]], ptr [[ARRAYIDX30]], align 4
352*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 20
353*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[I10:%.*]] = load bfloat, ptr [[ARRAYIDX32]], align 4
354*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX34:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 10
355*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[I11:%.*]] = load bfloat, ptr [[ARRAYIDX34]], align 4
356*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ADD35:%.*]] = fsub fast bfloat [[I11]], [[I10]]
357*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX37:%.*]] = getelementptr inbounds bfloat, ptr [[S]], i64 5
358*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    store bfloat [[ADD35]], ptr [[ARRAYIDX37]], align 4
359*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX39:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 24
360*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[I12:%.*]] = load bfloat, ptr [[ARRAYIDX39]], align 4
361*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX41:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 6
362*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[I13:%.*]] = load bfloat, ptr [[ARRAYIDX41]], align 4
363*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ADD42:%.*]] = fsub fast bfloat [[I13]], [[I12]]
364*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX44:%.*]] = getelementptr inbounds bfloat, ptr [[S]], i64 6
365*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    store bfloat [[ADD42]], ptr [[ARRAYIDX44]], align 4
366*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX46:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 28
367*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[I14:%.*]] = load bfloat, ptr [[ARRAYIDX46]], align 4
368*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX48:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 2
369*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[I15:%.*]] = load bfloat, ptr [[ARRAYIDX48]], align 4
370*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ADD49:%.*]] = fsub fast bfloat [[I15]], [[I14]]
371*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX51:%.*]] = getelementptr inbounds bfloat, ptr [[S]], i64 7
372*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    store bfloat [[ADD49]], ptr [[ARRAYIDX51]], align 4
373*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    ret void
374*343a8107SLuke Lau;
375*343a8107SLuke Lau; ZVFHMIN-ZVFBFMIN-LABEL: @test_bf16(
376*343a8107SLuke Lau; ZVFHMIN-ZVFBFMIN-NEXT:  entry:
377*343a8107SLuke Lau; ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P:%.*]], i64 0, i64 0
378*343a8107SLuke Lau; ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 30
379*343a8107SLuke Lau; ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds bfloat, ptr [[S:%.*]], i64 0
380*343a8107SLuke Lau; ZVFHMIN-ZVFBFMIN-NEXT:    [[TMP15:%.*]] = call <8 x bfloat> @llvm.experimental.vp.strided.load.v8bf16.p0.i64(ptr align 4 [[ARRAYIDX]], i64 8, <8 x i1> splat (i1 true), i32 8)
381*343a8107SLuke Lau; ZVFHMIN-ZVFBFMIN-NEXT:    [[TMP7:%.*]] = call <8 x bfloat> @llvm.experimental.vp.strided.load.v8bf16.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 -8, <8 x i1> splat (i1 true), i32 8)
382*343a8107SLuke Lau; ZVFHMIN-ZVFBFMIN-NEXT:    [[TMP16:%.*]] = fsub fast <8 x bfloat> [[TMP7]], [[TMP15]]
383*343a8107SLuke Lau; ZVFHMIN-ZVFBFMIN-NEXT:    store <8 x bfloat> [[TMP16]], ptr [[ARRAYIDX2]], align 4
384*343a8107SLuke Lau; ZVFHMIN-ZVFBFMIN-NEXT:    ret void
385*343a8107SLuke Lau;
386*343a8107SLuke Lauentry:
387*343a8107SLuke Lau  %arrayidx = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 0
388*343a8107SLuke Lau  %i = load bfloat, ptr %arrayidx, align 4
389*343a8107SLuke Lau  %arrayidx1 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 30
390*343a8107SLuke Lau  %i1 = load bfloat, ptr %arrayidx1, align 4
391*343a8107SLuke Lau  %add = fsub fast bfloat %i1, %i
392*343a8107SLuke Lau  %arrayidx2 = getelementptr inbounds bfloat, ptr %s, i64 0
393*343a8107SLuke Lau  store bfloat %add, ptr %arrayidx2, align 4
394*343a8107SLuke Lau  %arrayidx4 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 4
395*343a8107SLuke Lau  %i2 = load bfloat, ptr %arrayidx4, align 4
396*343a8107SLuke Lau  %arrayidx6 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 26
397*343a8107SLuke Lau  %i3 = load bfloat, ptr %arrayidx6, align 4
398*343a8107SLuke Lau  %add7 = fsub fast bfloat %i3, %i2
399*343a8107SLuke Lau  %arrayidx9 = getelementptr inbounds bfloat, ptr %s, i64 1
400*343a8107SLuke Lau  store bfloat %add7, ptr %arrayidx9, align 4
401*343a8107SLuke Lau  %arrayidx11 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 8
402*343a8107SLuke Lau  %i4 = load bfloat, ptr %arrayidx11, align 4
403*343a8107SLuke Lau  %arrayidx13 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 22
404*343a8107SLuke Lau  %i5 = load bfloat, ptr %arrayidx13, align 4
405*343a8107SLuke Lau  %add14 = fsub fast bfloat %i5, %i4
406*343a8107SLuke Lau  %arrayidx16 = getelementptr inbounds bfloat, ptr %s, i64 2
407*343a8107SLuke Lau  store bfloat %add14, ptr %arrayidx16, align 4
408*343a8107SLuke Lau  %arrayidx18 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 12
409*343a8107SLuke Lau  %i6 = load bfloat, ptr %arrayidx18, align 4
410*343a8107SLuke Lau  %arrayidx20 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 18
411*343a8107SLuke Lau  %i7 = load bfloat, ptr %arrayidx20, align 4
412*343a8107SLuke Lau  %add21 = fsub fast bfloat %i7, %i6
413*343a8107SLuke Lau  %arrayidx23 = getelementptr inbounds bfloat, ptr %s, i64 3
414*343a8107SLuke Lau  store bfloat %add21, ptr %arrayidx23, align 4
415*343a8107SLuke Lau  %arrayidx25 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 16
416*343a8107SLuke Lau  %i8 = load bfloat, ptr %arrayidx25, align 4
417*343a8107SLuke Lau  %arrayidx27 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 14
418*343a8107SLuke Lau  %i9 = load bfloat, ptr %arrayidx27, align 4
419*343a8107SLuke Lau  %add28 = fsub fast bfloat %i9, %i8
420*343a8107SLuke Lau  %arrayidx30 = getelementptr inbounds bfloat, ptr %s, i64 4
421*343a8107SLuke Lau  store bfloat %add28, ptr %arrayidx30, align 4
422*343a8107SLuke Lau  %arrayidx32 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 20
423*343a8107SLuke Lau  %i10 = load bfloat, ptr %arrayidx32, align 4
424*343a8107SLuke Lau  %arrayidx34 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 10
425*343a8107SLuke Lau  %i11 = load bfloat, ptr %arrayidx34, align 4
426*343a8107SLuke Lau  %add35 = fsub fast bfloat %i11, %i10
427*343a8107SLuke Lau  %arrayidx37 = getelementptr inbounds bfloat, ptr %s, i64 5
428*343a8107SLuke Lau  store bfloat %add35, ptr %arrayidx37, align 4
429*343a8107SLuke Lau  %arrayidx39 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 24
430*343a8107SLuke Lau  %i12 = load bfloat, ptr %arrayidx39, align 4
431*343a8107SLuke Lau  %arrayidx41 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 6
432*343a8107SLuke Lau  %i13 = load bfloat, ptr %arrayidx41, align 4
433*343a8107SLuke Lau  %add42 = fsub fast bfloat %i13, %i12
434*343a8107SLuke Lau  %arrayidx44 = getelementptr inbounds bfloat, ptr %s, i64 6
435*343a8107SLuke Lau  store bfloat %add42, ptr %arrayidx44, align 4
436*343a8107SLuke Lau  %arrayidx46 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 28
437*343a8107SLuke Lau  %i14 = load bfloat, ptr %arrayidx46, align 4
438*343a8107SLuke Lau  %arrayidx48 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 2
439*343a8107SLuke Lau  %i15 = load bfloat, ptr %arrayidx48, align 4
440*343a8107SLuke Lau  %add49 = fsub fast bfloat %i15, %i14
441*343a8107SLuke Lau  %arrayidx51 = getelementptr inbounds bfloat, ptr %s, i64 7
442*343a8107SLuke Lau  store bfloat %add49, ptr %arrayidx51, align 4
443*343a8107SLuke Lau  ret void
444*343a8107SLuke Lau}
445*343a8107SLuke Lau
446*343a8107SLuke Laudefine void @test_f16(ptr %p, ptr noalias %s) {
447*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-LABEL: @test_f16(
448*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:  entry:
449*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x half], ptr [[P:%.*]], i64 0, i64 0
450*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[I:%.*]] = load half, ptr [[ARRAYIDX]], align 4
451*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 30
452*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[I1:%.*]] = load half, ptr [[ARRAYIDX1]], align 4
453*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ADD:%.*]] = fsub fast half [[I1]], [[I]]
454*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds half, ptr [[S:%.*]], i64 0
455*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    store half [[ADD]], ptr [[ARRAYIDX2]], align 4
456*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 4
457*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[I2:%.*]] = load half, ptr [[ARRAYIDX4]], align 4
458*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 26
459*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[I3:%.*]] = load half, ptr [[ARRAYIDX6]], align 4
460*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ADD7:%.*]] = fsub fast half [[I3]], [[I2]]
461*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds half, ptr [[S]], i64 1
462*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    store half [[ADD7]], ptr [[ARRAYIDX9]], align 4
463*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 8
464*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[I4:%.*]] = load half, ptr [[ARRAYIDX11]], align 4
465*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 22
466*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[I5:%.*]] = load half, ptr [[ARRAYIDX13]], align 4
467*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ADD14:%.*]] = fsub fast half [[I5]], [[I4]]
468*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX16:%.*]] = getelementptr inbounds half, ptr [[S]], i64 2
469*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    store half [[ADD14]], ptr [[ARRAYIDX16]], align 4
470*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX18:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 12
471*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[I6:%.*]] = load half, ptr [[ARRAYIDX18]], align 4
472*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 18
473*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[I7:%.*]] = load half, ptr [[ARRAYIDX20]], align 4
474*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ADD21:%.*]] = fsub fast half [[I7]], [[I6]]
475*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX23:%.*]] = getelementptr inbounds half, ptr [[S]], i64 3
476*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    store half [[ADD21]], ptr [[ARRAYIDX23]], align 4
477*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX25:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 16
478*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[I8:%.*]] = load half, ptr [[ARRAYIDX25]], align 4
479*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX27:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 14
480*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[I9:%.*]] = load half, ptr [[ARRAYIDX27]], align 4
481*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ADD28:%.*]] = fsub fast half [[I9]], [[I8]]
482*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX30:%.*]] = getelementptr inbounds half, ptr [[S]], i64 4
483*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    store half [[ADD28]], ptr [[ARRAYIDX30]], align 4
484*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 20
485*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[I10:%.*]] = load half, ptr [[ARRAYIDX32]], align 4
486*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX34:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 10
487*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[I11:%.*]] = load half, ptr [[ARRAYIDX34]], align 4
488*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ADD35:%.*]] = fsub fast half [[I11]], [[I10]]
489*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX37:%.*]] = getelementptr inbounds half, ptr [[S]], i64 5
490*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    store half [[ADD35]], ptr [[ARRAYIDX37]], align 4
491*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX39:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 24
492*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[I12:%.*]] = load half, ptr [[ARRAYIDX39]], align 4
493*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX41:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 6
494*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[I13:%.*]] = load half, ptr [[ARRAYIDX41]], align 4
495*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ADD42:%.*]] = fsub fast half [[I13]], [[I12]]
496*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX44:%.*]] = getelementptr inbounds half, ptr [[S]], i64 6
497*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    store half [[ADD42]], ptr [[ARRAYIDX44]], align 4
498*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX46:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 28
499*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[I14:%.*]] = load half, ptr [[ARRAYIDX46]], align 4
500*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX48:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 2
501*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[I15:%.*]] = load half, ptr [[ARRAYIDX48]], align 4
502*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ADD49:%.*]] = fsub fast half [[I15]], [[I14]]
503*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX51:%.*]] = getelementptr inbounds half, ptr [[S]], i64 7
504*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    store half [[ADD49]], ptr [[ARRAYIDX51]], align 4
505*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT:    ret void
506*343a8107SLuke Lau;
507*343a8107SLuke Lau; ZVFHMIN-ZVFBFMIN-LABEL: @test_f16(
508*343a8107SLuke Lau; ZVFHMIN-ZVFBFMIN-NEXT:  entry:
509*343a8107SLuke Lau; ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x half], ptr [[P:%.*]], i64 0, i64 0
510*343a8107SLuke Lau; ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 30
511*343a8107SLuke Lau; ZVFHMIN-ZVFBFMIN-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds half, ptr [[S:%.*]], i64 0
512*343a8107SLuke Lau; ZVFHMIN-ZVFBFMIN-NEXT:    [[TMP15:%.*]] = call <8 x half> @llvm.experimental.vp.strided.load.v8f16.p0.i64(ptr align 4 [[ARRAYIDX]], i64 8, <8 x i1> splat (i1 true), i32 8)
513*343a8107SLuke Lau; ZVFHMIN-ZVFBFMIN-NEXT:    [[TMP7:%.*]] = call <8 x half> @llvm.experimental.vp.strided.load.v8f16.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 -8, <8 x i1> splat (i1 true), i32 8)
514*343a8107SLuke Lau; ZVFHMIN-ZVFBFMIN-NEXT:    [[TMP16:%.*]] = fsub fast <8 x half> [[TMP7]], [[TMP15]]
515*343a8107SLuke Lau; ZVFHMIN-ZVFBFMIN-NEXT:    store <8 x half> [[TMP16]], ptr [[ARRAYIDX2]], align 4
516*343a8107SLuke Lau; ZVFHMIN-ZVFBFMIN-NEXT:    ret void
517*343a8107SLuke Lau;
518*343a8107SLuke Lauentry:
519*343a8107SLuke Lau  %arrayidx = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 0
520*343a8107SLuke Lau  %i = load half, ptr %arrayidx, align 4
521*343a8107SLuke Lau  %arrayidx1 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 30
522*343a8107SLuke Lau  %i1 = load half, ptr %arrayidx1, align 4
523*343a8107SLuke Lau  %add = fsub fast half %i1, %i
524*343a8107SLuke Lau  %arrayidx2 = getelementptr inbounds half, ptr %s, i64 0
525*343a8107SLuke Lau  store half %add, ptr %arrayidx2, align 4
526*343a8107SLuke Lau  %arrayidx4 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 4
527*343a8107SLuke Lau  %i2 = load half, ptr %arrayidx4, align 4
528*343a8107SLuke Lau  %arrayidx6 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 26
529*343a8107SLuke Lau  %i3 = load half, ptr %arrayidx6, align 4
530*343a8107SLuke Lau  %add7 = fsub fast half %i3, %i2
531*343a8107SLuke Lau  %arrayidx9 = getelementptr inbounds half, ptr %s, i64 1
532*343a8107SLuke Lau  store half %add7, ptr %arrayidx9, align 4
533*343a8107SLuke Lau  %arrayidx11 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 8
534*343a8107SLuke Lau  %i4 = load half, ptr %arrayidx11, align 4
535*343a8107SLuke Lau  %arrayidx13 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 22
536*343a8107SLuke Lau  %i5 = load half, ptr %arrayidx13, align 4
537*343a8107SLuke Lau  %add14 = fsub fast half %i5, %i4
538*343a8107SLuke Lau  %arrayidx16 = getelementptr inbounds half, ptr %s, i64 2
539*343a8107SLuke Lau  store half %add14, ptr %arrayidx16, align 4
540*343a8107SLuke Lau  %arrayidx18 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 12
541*343a8107SLuke Lau  %i6 = load half, ptr %arrayidx18, align 4
542*343a8107SLuke Lau  %arrayidx20 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 18
543*343a8107SLuke Lau  %i7 = load half, ptr %arrayidx20, align 4
544*343a8107SLuke Lau  %add21 = fsub fast half %i7, %i6
545*343a8107SLuke Lau  %arrayidx23 = getelementptr inbounds half, ptr %s, i64 3
546*343a8107SLuke Lau  store half %add21, ptr %arrayidx23, align 4
547*343a8107SLuke Lau  %arrayidx25 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 16
548*343a8107SLuke Lau  %i8 = load half, ptr %arrayidx25, align 4
549*343a8107SLuke Lau  %arrayidx27 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 14
550*343a8107SLuke Lau  %i9 = load half, ptr %arrayidx27, align 4
551*343a8107SLuke Lau  %add28 = fsub fast half %i9, %i8
552*343a8107SLuke Lau  %arrayidx30 = getelementptr inbounds half, ptr %s, i64 4
553*343a8107SLuke Lau  store half %add28, ptr %arrayidx30, align 4
554*343a8107SLuke Lau  %arrayidx32 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 20
555*343a8107SLuke Lau  %i10 = load half, ptr %arrayidx32, align 4
556*343a8107SLuke Lau  %arrayidx34 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 10
557*343a8107SLuke Lau  %i11 = load half, ptr %arrayidx34, align 4
558*343a8107SLuke Lau  %add35 = fsub fast half %i11, %i10
559*343a8107SLuke Lau  %arrayidx37 = getelementptr inbounds half, ptr %s, i64 5
560*343a8107SLuke Lau  store half %add35, ptr %arrayidx37, align 4
561*343a8107SLuke Lau  %arrayidx39 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 24
562*343a8107SLuke Lau  %i12 = load half, ptr %arrayidx39, align 4
563*343a8107SLuke Lau  %arrayidx41 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 6
564*343a8107SLuke Lau  %i13 = load half, ptr %arrayidx41, align 4
565*343a8107SLuke Lau  %add42 = fsub fast half %i13, %i12
566*343a8107SLuke Lau  %arrayidx44 = getelementptr inbounds half, ptr %s, i64 6
567*343a8107SLuke Lau  store half %add42, ptr %arrayidx44, align 4
568*343a8107SLuke Lau  %arrayidx46 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 28
569*343a8107SLuke Lau  %i14 = load half, ptr %arrayidx46, align 4
570*343a8107SLuke Lau  %arrayidx48 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 2
571*343a8107SLuke Lau  %i15 = load half, ptr %arrayidx48, align 4
572*343a8107SLuke Lau  %add49 = fsub fast half %i15, %i14
573*343a8107SLuke Lau  %arrayidx51 = getelementptr inbounds half, ptr %s, i64 7
574*343a8107SLuke Lau  store half %add49, ptr %arrayidx51, align 4
575*343a8107SLuke Lau  ret void
576*343a8107SLuke Lau}
577