17b9bf80aSAlexey Bataev; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2*343a8107SLuke Lau; RUN: opt -passes=slp-vectorizer -S -mtriple=riscv64-unknown-linux-gnu -mattr=+v < %s | FileCheck %s --check-prefixes=CHECK,NO-ZVFHMIN-ZVFBFMIN 3*343a8107SLuke Lau; RUN: opt -passes=slp-vectorizer -S -mtriple=riscv64-unknown-linux-gnu -mattr=+v,+zvfhmin,+zvfbfmin < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN-ZVFBFMIN 4*343a8107SLuke Lau 57b9bf80aSAlexey Bataev 62d69827cSNikita Popovdefine void @test(ptr %p, ptr noalias %s) { 77b9bf80aSAlexey Bataev; CHECK-LABEL: @test( 87b9bf80aSAlexey Bataev; CHECK-NEXT: entry: 97b9bf80aSAlexey Bataev; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 0 107b9bf80aSAlexey Bataev; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 30 117b9bf80aSAlexey Bataev; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0 1238fffa63SPaul Walker; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 16, <8 x i1> splat (i1 true), i32 8) 1338fffa63SPaul Walker; CHECK-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 -16, <8 x i1> splat (i1 true), i32 8) 14833a1cadSAlexey Bataev; CHECK-NEXT: [[TMP2:%.*]] = fsub fast <8 x float> [[TMP1]], [[TMP0]] 15833a1cadSAlexey Bataev; CHECK-NEXT: store <8 x float> [[TMP2]], ptr [[ARRAYIDX2]], align 4 167b9bf80aSAlexey Bataev; CHECK-NEXT: ret void 177b9bf80aSAlexey Bataev; 187b9bf80aSAlexey Bataeventry: 192d69827cSNikita Popov %arrayidx = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 0 202d69827cSNikita Popov %i = load float, ptr %arrayidx, align 4 212d69827cSNikita Popov %arrayidx1 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 30 222d69827cSNikita Popov %i1 = load float, ptr %arrayidx1, align 4 237b9bf80aSAlexey Bataev %add = fsub fast float %i1, %i 242d69827cSNikita Popov %arrayidx2 = getelementptr inbounds float, ptr %s, i64 0 252d69827cSNikita Popov store float %add, ptr %arrayidx2, align 4 262d69827cSNikita Popov %arrayidx4 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 4 272d69827cSNikita Popov %i2 = load float, ptr %arrayidx4, align 4 282d69827cSNikita Popov %arrayidx6 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 26 292d69827cSNikita Popov %i3 = load float, ptr %arrayidx6, align 4 307b9bf80aSAlexey Bataev %add7 = fsub fast float %i3, %i2 312d69827cSNikita Popov %arrayidx9 = getelementptr inbounds float, ptr %s, i64 1 322d69827cSNikita Popov store float %add7, ptr %arrayidx9, align 4 332d69827cSNikita Popov %arrayidx11 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 8 342d69827cSNikita Popov %i4 = load float, ptr %arrayidx11, align 4 352d69827cSNikita Popov %arrayidx13 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 22 362d69827cSNikita Popov %i5 = load float, ptr %arrayidx13, align 4 377b9bf80aSAlexey Bataev %add14 = fsub fast float %i5, %i4 382d69827cSNikita Popov %arrayidx16 = getelementptr inbounds float, ptr %s, i64 2 392d69827cSNikita Popov store float %add14, ptr %arrayidx16, align 4 402d69827cSNikita Popov %arrayidx18 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 12 412d69827cSNikita Popov %i6 = load float, ptr %arrayidx18, align 4 422d69827cSNikita Popov %arrayidx20 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 18 432d69827cSNikita Popov %i7 = load float, ptr %arrayidx20, align 4 447b9bf80aSAlexey Bataev %add21 = fsub fast float %i7, %i6 452d69827cSNikita Popov %arrayidx23 = getelementptr inbounds float, ptr %s, i64 3 462d69827cSNikita Popov store float %add21, ptr %arrayidx23, align 4 472d69827cSNikita Popov %arrayidx25 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 16 482d69827cSNikita Popov %i8 = load float, ptr %arrayidx25, align 4 492d69827cSNikita Popov %arrayidx27 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 14 502d69827cSNikita Popov %i9 = load float, ptr %arrayidx27, align 4 517b9bf80aSAlexey Bataev %add28 = fsub fast float %i9, %i8 522d69827cSNikita Popov %arrayidx30 = getelementptr inbounds float, ptr %s, i64 4 532d69827cSNikita Popov store float %add28, ptr %arrayidx30, align 4 542d69827cSNikita Popov %arrayidx32 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 20 552d69827cSNikita Popov %i10 = load float, ptr %arrayidx32, align 4 562d69827cSNikita Popov %arrayidx34 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 10 572d69827cSNikita Popov %i11 = load float, ptr %arrayidx34, align 4 587b9bf80aSAlexey Bataev %add35 = fsub fast float %i11, %i10 592d69827cSNikita Popov %arrayidx37 = getelementptr inbounds float, ptr %s, i64 5 602d69827cSNikita Popov store float %add35, ptr %arrayidx37, align 4 612d69827cSNikita Popov %arrayidx39 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 24 622d69827cSNikita Popov %i12 = load float, ptr %arrayidx39, align 4 632d69827cSNikita Popov %arrayidx41 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 6 642d69827cSNikita Popov %i13 = load float, ptr %arrayidx41, align 4 657b9bf80aSAlexey Bataev %add42 = fsub fast float %i13, %i12 662d69827cSNikita Popov %arrayidx44 = getelementptr inbounds float, ptr %s, i64 6 672d69827cSNikita Popov store float %add42, ptr %arrayidx44, align 4 682d69827cSNikita Popov %arrayidx46 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 28 692d69827cSNikita Popov %i14 = load float, ptr %arrayidx46, align 4 702d69827cSNikita Popov %arrayidx48 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 2 712d69827cSNikita Popov %i15 = load float, ptr %arrayidx48, align 4 727b9bf80aSAlexey Bataev %add49 = fsub fast float %i15, %i14 732d69827cSNikita Popov %arrayidx51 = getelementptr inbounds float, ptr %s, i64 7 742d69827cSNikita Popov store float %add49, ptr %arrayidx51, align 4 757b9bf80aSAlexey Bataev ret void 767b9bf80aSAlexey Bataev} 777b9bf80aSAlexey Bataev 782d69827cSNikita Popovdefine void @test1(ptr %p, ptr noalias %s, i32 %stride) { 797b9bf80aSAlexey Bataev; CHECK-LABEL: @test1( 807b9bf80aSAlexey Bataev; CHECK-NEXT: entry: 817b9bf80aSAlexey Bataev; CHECK-NEXT: [[STR:%.*]] = zext i32 [[STRIDE:%.*]] to i64 827b9bf80aSAlexey Bataev; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 0 837b9bf80aSAlexey Bataev; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 30 847b9bf80aSAlexey Bataev; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0 851c2b79adSAlexey Bataev; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[STR]], 4 8638fffa63SPaul Walker; CHECK-NEXT: [[TMP1:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 [[TMP0]], <8 x i1> splat (i1 true), i32 8) 8738fffa63SPaul Walker; CHECK-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 -16, <8 x i1> splat (i1 true), i32 8) 881c2b79adSAlexey Bataev; CHECK-NEXT: [[TMP3:%.*]] = fsub fast <8 x float> [[TMP2]], [[TMP1]] 891c2b79adSAlexey Bataev; CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[ARRAYIDX2]], align 4 907b9bf80aSAlexey Bataev; CHECK-NEXT: ret void 917b9bf80aSAlexey Bataev; 927b9bf80aSAlexey Bataeventry: 937b9bf80aSAlexey Bataev %str = zext i32 %stride to i64 942d69827cSNikita Popov %arrayidx = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 0 952d69827cSNikita Popov %i = load float, ptr %arrayidx, align 4 962d69827cSNikita Popov %arrayidx1 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 30 972d69827cSNikita Popov %i1 = load float, ptr %arrayidx1, align 4 987b9bf80aSAlexey Bataev %add = fsub fast float %i1, %i 992d69827cSNikita Popov %arrayidx2 = getelementptr inbounds float, ptr %s, i64 0 1002d69827cSNikita Popov store float %add, ptr %arrayidx2, align 4 1012d69827cSNikita Popov %arrayidx4 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %str 1022d69827cSNikita Popov %i2 = load float, ptr %arrayidx4, align 4 1032d69827cSNikita Popov %arrayidx6 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 26 1042d69827cSNikita Popov %i3 = load float, ptr %arrayidx6, align 4 1057b9bf80aSAlexey Bataev %add7 = fsub fast float %i3, %i2 1062d69827cSNikita Popov %arrayidx9 = getelementptr inbounds float, ptr %s, i64 1 1072d69827cSNikita Popov store float %add7, ptr %arrayidx9, align 4 1087b9bf80aSAlexey Bataev %st1 = mul i64 %str, 2 1092d69827cSNikita Popov %arrayidx11 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st1 1102d69827cSNikita Popov %i4 = load float, ptr %arrayidx11, align 4 1112d69827cSNikita Popov %arrayidx13 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 22 1122d69827cSNikita Popov %i5 = load float, ptr %arrayidx13, align 4 1137b9bf80aSAlexey Bataev %add14 = fsub fast float %i5, %i4 1142d69827cSNikita Popov %arrayidx16 = getelementptr inbounds float, ptr %s, i64 2 1152d69827cSNikita Popov store float %add14, ptr %arrayidx16, align 4 1167b9bf80aSAlexey Bataev %st2 = mul i64 %str, 3 1172d69827cSNikita Popov %arrayidx18 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st2 1182d69827cSNikita Popov %i6 = load float, ptr %arrayidx18, align 4 1192d69827cSNikita Popov %arrayidx20 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 18 1202d69827cSNikita Popov %i7 = load float, ptr %arrayidx20, align 4 1217b9bf80aSAlexey Bataev %add21 = fsub fast float %i7, %i6 1222d69827cSNikita Popov %arrayidx23 = getelementptr inbounds float, ptr %s, i64 3 1232d69827cSNikita Popov store float %add21, ptr %arrayidx23, align 4 1247b9bf80aSAlexey Bataev %st3 = mul i64 %str, 4 1252d69827cSNikita Popov %arrayidx25 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st3 1262d69827cSNikita Popov %i8 = load float, ptr %arrayidx25, align 4 1272d69827cSNikita Popov %arrayidx27 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 14 1282d69827cSNikita Popov %i9 = load float, ptr %arrayidx27, align 4 1297b9bf80aSAlexey Bataev %add28 = fsub fast float %i9, %i8 1302d69827cSNikita Popov %arrayidx30 = getelementptr inbounds float, ptr %s, i64 4 1312d69827cSNikita Popov store float %add28, ptr %arrayidx30, align 4 1327b9bf80aSAlexey Bataev %st4 = mul i64 %str, 5 1332d69827cSNikita Popov %arrayidx32 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st4 1342d69827cSNikita Popov %i10 = load float, ptr %arrayidx32, align 4 1352d69827cSNikita Popov %arrayidx34 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 10 1362d69827cSNikita Popov %i11 = load float, ptr %arrayidx34, align 4 1377b9bf80aSAlexey Bataev %add35 = fsub fast float %i11, %i10 1382d69827cSNikita Popov %arrayidx37 = getelementptr inbounds float, ptr %s, i64 5 1392d69827cSNikita Popov store float %add35, ptr %arrayidx37, align 4 1407b9bf80aSAlexey Bataev %st5 = mul i64 %str, 6 1412d69827cSNikita Popov %arrayidx39 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st5 1422d69827cSNikita Popov %i12 = load float, ptr %arrayidx39, align 4 1432d69827cSNikita Popov %arrayidx41 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 6 1442d69827cSNikita Popov %i13 = load float, ptr %arrayidx41, align 4 1457b9bf80aSAlexey Bataev %add42 = fsub fast float %i13, %i12 1462d69827cSNikita Popov %arrayidx44 = getelementptr inbounds float, ptr %s, i64 6 1472d69827cSNikita Popov store float %add42, ptr %arrayidx44, align 4 1487b9bf80aSAlexey Bataev %st6 = mul i64 %str, 7 1492d69827cSNikita Popov %arrayidx46 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st6 1502d69827cSNikita Popov %i14 = load float, ptr %arrayidx46, align 4 1512d69827cSNikita Popov %arrayidx48 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 2 1522d69827cSNikita Popov %i15 = load float, ptr %arrayidx48, align 4 1537b9bf80aSAlexey Bataev %add49 = fsub fast float %i15, %i14 1542d69827cSNikita Popov %arrayidx51 = getelementptr inbounds float, ptr %s, i64 7 1552d69827cSNikita Popov store float %add49, ptr %arrayidx51, align 4 1567b9bf80aSAlexey Bataev ret void 1577b9bf80aSAlexey Bataev} 1587b9bf80aSAlexey Bataev 1592d69827cSNikita Popovdefine void @test2(ptr %p, ptr noalias %s, i32 %stride) { 1607b9bf80aSAlexey Bataev; CHECK-LABEL: @test2( 1617b9bf80aSAlexey Bataev; CHECK-NEXT: entry: 1627b9bf80aSAlexey Bataev; CHECK-NEXT: [[STR:%.*]] = zext i32 [[STRIDE:%.*]] to i64 1637b9bf80aSAlexey Bataev; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 2 1647b9bf80aSAlexey Bataev; CHECK-NEXT: [[ST6:%.*]] = mul i64 [[STR]], 7 1657b9bf80aSAlexey Bataev; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 [[ST6]] 1667b9bf80aSAlexey Bataev; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0 16738fffa63SPaul Walker; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 16, <8 x i1> splat (i1 true), i32 8) 1681c2b79adSAlexey Bataev; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[STR]], -4 16938fffa63SPaul Walker; CHECK-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 [[TMP1]], <8 x i1> splat (i1 true), i32 8) 1701c2b79adSAlexey Bataev; CHECK-NEXT: [[TMP3:%.*]] = fsub fast <8 x float> [[TMP2]], [[TMP0]] 1711c2b79adSAlexey Bataev; CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[ARRAYIDX2]], align 4 1727b9bf80aSAlexey Bataev; CHECK-NEXT: ret void 1737b9bf80aSAlexey Bataev; 1747b9bf80aSAlexey Bataeventry: 1757b9bf80aSAlexey Bataev %str = zext i32 %stride to i64 1762d69827cSNikita Popov %arrayidx = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 2 1772d69827cSNikita Popov %i = load float, ptr %arrayidx, align 4 1787b9bf80aSAlexey Bataev %st6 = mul i64 %str, 7 1792d69827cSNikita Popov %arrayidx1 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st6 1802d69827cSNikita Popov %i1 = load float, ptr %arrayidx1, align 4 1817b9bf80aSAlexey Bataev %add = fsub fast float %i1, %i 1822d69827cSNikita Popov %arrayidx2 = getelementptr inbounds float, ptr %s, i64 0 1832d69827cSNikita Popov store float %add, ptr %arrayidx2, align 4 1842d69827cSNikita Popov %arrayidx4 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 6 1852d69827cSNikita Popov %i2 = load float, ptr %arrayidx4, align 4 1867b9bf80aSAlexey Bataev %st5 = mul i64 %str, 6 1872d69827cSNikita Popov %arrayidx6 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st5 1882d69827cSNikita Popov %i3 = load float, ptr %arrayidx6, align 4 1897b9bf80aSAlexey Bataev %add7 = fsub fast float %i3, %i2 1902d69827cSNikita Popov %arrayidx9 = getelementptr inbounds float, ptr %s, i64 1 1912d69827cSNikita Popov store float %add7, ptr %arrayidx9, align 4 1922d69827cSNikita Popov %arrayidx11 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 10 1932d69827cSNikita Popov %i4 = load float, ptr %arrayidx11, align 4 1947b9bf80aSAlexey Bataev %st4 = mul i64 %str, 5 1952d69827cSNikita Popov %arrayidx13 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st4 1962d69827cSNikita Popov %i5 = load float, ptr %arrayidx13, align 4 1977b9bf80aSAlexey Bataev %add14 = fsub fast float %i5, %i4 1982d69827cSNikita Popov %arrayidx16 = getelementptr inbounds float, ptr %s, i64 2 1992d69827cSNikita Popov store float %add14, ptr %arrayidx16, align 4 2002d69827cSNikita Popov %arrayidx18 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 14 2012d69827cSNikita Popov %i6 = load float, ptr %arrayidx18, align 4 2027b9bf80aSAlexey Bataev %st3 = mul i64 %str, 4 2032d69827cSNikita Popov %arrayidx20 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st3 2042d69827cSNikita Popov %i7 = load float, ptr %arrayidx20, align 4 2057b9bf80aSAlexey Bataev %add21 = fsub fast float %i7, %i6 2062d69827cSNikita Popov %arrayidx23 = getelementptr inbounds float, ptr %s, i64 3 2072d69827cSNikita Popov store float %add21, ptr %arrayidx23, align 4 2082d69827cSNikita Popov %arrayidx25 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 18 2097b9bf80aSAlexey Bataev %st2 = mul i64 %str, 3 2102d69827cSNikita Popov %i8 = load float, ptr %arrayidx25, align 4 2112d69827cSNikita Popov %arrayidx27 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st2 2122d69827cSNikita Popov %i9 = load float, ptr %arrayidx27, align 4 2137b9bf80aSAlexey Bataev %add28 = fsub fast float %i9, %i8 2142d69827cSNikita Popov %arrayidx30 = getelementptr inbounds float, ptr %s, i64 4 2152d69827cSNikita Popov store float %add28, ptr %arrayidx30, align 4 2162d69827cSNikita Popov %arrayidx32 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 22 2172d69827cSNikita Popov %i10 = load float, ptr %arrayidx32, align 4 2187b9bf80aSAlexey Bataev %st1 = mul i64 %str, 2 2192d69827cSNikita Popov %arrayidx34 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %st1 2202d69827cSNikita Popov %i11 = load float, ptr %arrayidx34, align 4 2217b9bf80aSAlexey Bataev %add35 = fsub fast float %i11, %i10 2222d69827cSNikita Popov %arrayidx37 = getelementptr inbounds float, ptr %s, i64 5 2232d69827cSNikita Popov store float %add35, ptr %arrayidx37, align 4 2242d69827cSNikita Popov %arrayidx39 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 26 2252d69827cSNikita Popov %i12 = load float, ptr %arrayidx39, align 4 2262d69827cSNikita Popov %arrayidx41 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 %str 2272d69827cSNikita Popov %i13 = load float, ptr %arrayidx41, align 4 2287b9bf80aSAlexey Bataev %add42 = fsub fast float %i13, %i12 2292d69827cSNikita Popov %arrayidx44 = getelementptr inbounds float, ptr %s, i64 6 2302d69827cSNikita Popov store float %add42, ptr %arrayidx44, align 4 2312d69827cSNikita Popov %arrayidx46 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 30 2322d69827cSNikita Popov %i14 = load float, ptr %arrayidx46, align 4 2332d69827cSNikita Popov %arrayidx48 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 0 2342d69827cSNikita Popov %i15 = load float, ptr %arrayidx48, align 4 2357b9bf80aSAlexey Bataev %add49 = fsub fast float %i15, %i14 2362d69827cSNikita Popov %arrayidx51 = getelementptr inbounds float, ptr %s, i64 7 2372d69827cSNikita Popov store float %add49, ptr %arrayidx51, align 4 2387b9bf80aSAlexey Bataev ret void 2397b9bf80aSAlexey Bataev} 2407b9bf80aSAlexey Bataev 2412d69827cSNikita Popovdefine void @test3(ptr %p, ptr noalias %s) { 2427b9bf80aSAlexey Bataev; CHECK-LABEL: @test3( 2437b9bf80aSAlexey Bataev; CHECK-NEXT: entry: 2447b9bf80aSAlexey Bataev; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x float], ptr [[P:%.*]], i64 0, i64 0 2450ab0c1d9SAlexey Bataev; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x float], ptr [[P]], i64 0, i64 30 2467b9bf80aSAlexey Bataev; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, ptr [[S:%.*]], i64 0 24738fffa63SPaul Walker; CHECK-NEXT: [[TMP0:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX]], i64 16, <8 x i1> splat (i1 true), i32 8) 24838fffa63SPaul Walker; CHECK-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.experimental.vp.strided.load.v8f32.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 -4, <8 x i1> splat (i1 true), i32 8) 249833a1cadSAlexey Bataev; CHECK-NEXT: [[TMP3:%.*]] = fsub fast <8 x float> [[TMP2]], [[TMP0]] 250833a1cadSAlexey Bataev; CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[ARRAYIDX2]], align 4 2517b9bf80aSAlexey Bataev; CHECK-NEXT: ret void 2527b9bf80aSAlexey Bataev; 2537b9bf80aSAlexey Bataeventry: 2542d69827cSNikita Popov %arrayidx = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 0 2552d69827cSNikita Popov %i = load float, ptr %arrayidx, align 4 2562d69827cSNikita Popov %arrayidx1 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 30 2572d69827cSNikita Popov %i1 = load float, ptr %arrayidx1, align 4 2587b9bf80aSAlexey Bataev %add = fsub fast float %i1, %i 2592d69827cSNikita Popov %arrayidx2 = getelementptr inbounds float, ptr %s, i64 0 2602d69827cSNikita Popov store float %add, ptr %arrayidx2, align 4 2612d69827cSNikita Popov %arrayidx4 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 4 2622d69827cSNikita Popov %i2 = load float, ptr %arrayidx4, align 4 2632d69827cSNikita Popov %arrayidx6 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 29 2642d69827cSNikita Popov %i3 = load float, ptr %arrayidx6, align 4 2657b9bf80aSAlexey Bataev %add7 = fsub fast float %i3, %i2 2662d69827cSNikita Popov %arrayidx9 = getelementptr inbounds float, ptr %s, i64 1 2672d69827cSNikita Popov store float %add7, ptr %arrayidx9, align 4 2682d69827cSNikita Popov %arrayidx11 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 8 2692d69827cSNikita Popov %i4 = load float, ptr %arrayidx11, align 4 2702d69827cSNikita Popov %arrayidx13 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 28 2712d69827cSNikita Popov %i5 = load float, ptr %arrayidx13, align 4 2727b9bf80aSAlexey Bataev %add14 = fsub fast float %i5, %i4 2732d69827cSNikita Popov %arrayidx16 = getelementptr inbounds float, ptr %s, i64 2 2742d69827cSNikita Popov store float %add14, ptr %arrayidx16, align 4 2752d69827cSNikita Popov %arrayidx18 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 12 2762d69827cSNikita Popov %i6 = load float, ptr %arrayidx18, align 4 2772d69827cSNikita Popov %arrayidx20 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 27 2782d69827cSNikita Popov %i7 = load float, ptr %arrayidx20, align 4 2797b9bf80aSAlexey Bataev %add21 = fsub fast float %i7, %i6 2802d69827cSNikita Popov %arrayidx23 = getelementptr inbounds float, ptr %s, i64 3 2812d69827cSNikita Popov store float %add21, ptr %arrayidx23, align 4 2822d69827cSNikita Popov %arrayidx25 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 16 2832d69827cSNikita Popov %i8 = load float, ptr %arrayidx25, align 4 2842d69827cSNikita Popov %arrayidx27 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 26 2852d69827cSNikita Popov %i9 = load float, ptr %arrayidx27, align 4 2867b9bf80aSAlexey Bataev %add28 = fsub fast float %i9, %i8 2872d69827cSNikita Popov %arrayidx30 = getelementptr inbounds float, ptr %s, i64 4 2882d69827cSNikita Popov store float %add28, ptr %arrayidx30, align 4 2892d69827cSNikita Popov %arrayidx32 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 20 2902d69827cSNikita Popov %i10 = load float, ptr %arrayidx32, align 4 2912d69827cSNikita Popov %arrayidx34 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 25 2922d69827cSNikita Popov %i11 = load float, ptr %arrayidx34, align 4 2937b9bf80aSAlexey Bataev %add35 = fsub fast float %i11, %i10 2942d69827cSNikita Popov %arrayidx37 = getelementptr inbounds float, ptr %s, i64 5 2952d69827cSNikita Popov store float %add35, ptr %arrayidx37, align 4 2962d69827cSNikita Popov %arrayidx39 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 24 2972d69827cSNikita Popov %i12 = load float, ptr %arrayidx39, align 4 2982d69827cSNikita Popov %arrayidx41 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 24 2992d69827cSNikita Popov %i13 = load float, ptr %arrayidx41, align 4 3007b9bf80aSAlexey Bataev %add42 = fsub fast float %i13, %i12 3012d69827cSNikita Popov %arrayidx44 = getelementptr inbounds float, ptr %s, i64 6 3022d69827cSNikita Popov store float %add42, ptr %arrayidx44, align 4 3032d69827cSNikita Popov %arrayidx46 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 28 3042d69827cSNikita Popov %i14 = load float, ptr %arrayidx46, align 4 3052d69827cSNikita Popov %arrayidx48 = getelementptr inbounds [48 x float], ptr %p, i64 0, i64 23 3062d69827cSNikita Popov %i15 = load float, ptr %arrayidx48, align 4 3077b9bf80aSAlexey Bataev %add49 = fsub fast float %i15, %i14 3082d69827cSNikita Popov %arrayidx51 = getelementptr inbounds float, ptr %s, i64 7 3092d69827cSNikita Popov store float %add49, ptr %arrayidx51, align 4 3107b9bf80aSAlexey Bataev ret void 3117b9bf80aSAlexey Bataev} 3127b9bf80aSAlexey Bataev 313*343a8107SLuke Lau 314*343a8107SLuke Laudefine void @test_bf16(ptr %p, ptr noalias %s) { 315*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-LABEL: @test_bf16( 316*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: entry: 317*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P:%.*]], i64 0, i64 0 318*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I:%.*]] = load bfloat, ptr [[ARRAYIDX]], align 4 319*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 30 320*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I1:%.*]] = load bfloat, ptr [[ARRAYIDX1]], align 4 321*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD:%.*]] = fsub fast bfloat [[I1]], [[I]] 322*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds bfloat, ptr [[S:%.*]], i64 0 323*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: store bfloat [[ADD]], ptr [[ARRAYIDX2]], align 4 324*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 4 325*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I2:%.*]] = load bfloat, ptr [[ARRAYIDX4]], align 4 326*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 26 327*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I3:%.*]] = load bfloat, ptr [[ARRAYIDX6]], align 4 328*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD7:%.*]] = fsub fast bfloat [[I3]], [[I2]] 329*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds bfloat, ptr [[S]], i64 1 330*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: store bfloat [[ADD7]], ptr [[ARRAYIDX9]], align 4 331*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 8 332*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I4:%.*]] = load bfloat, ptr [[ARRAYIDX11]], align 4 333*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 22 334*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I5:%.*]] = load bfloat, ptr [[ARRAYIDX13]], align 4 335*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD14:%.*]] = fsub fast bfloat [[I5]], [[I4]] 336*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds bfloat, ptr [[S]], i64 2 337*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: store bfloat [[ADD14]], ptr [[ARRAYIDX16]], align 4 338*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 12 339*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I6:%.*]] = load bfloat, ptr [[ARRAYIDX18]], align 4 340*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 18 341*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I7:%.*]] = load bfloat, ptr [[ARRAYIDX20]], align 4 342*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD21:%.*]] = fsub fast bfloat [[I7]], [[I6]] 343*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds bfloat, ptr [[S]], i64 3 344*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: store bfloat [[ADD21]], ptr [[ARRAYIDX23]], align 4 345*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 16 346*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I8:%.*]] = load bfloat, ptr [[ARRAYIDX25]], align 4 347*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 14 348*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I9:%.*]] = load bfloat, ptr [[ARRAYIDX27]], align 4 349*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD28:%.*]] = fsub fast bfloat [[I9]], [[I8]] 350*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds bfloat, ptr [[S]], i64 4 351*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: store bfloat [[ADD28]], ptr [[ARRAYIDX30]], align 4 352*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 20 353*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I10:%.*]] = load bfloat, ptr [[ARRAYIDX32]], align 4 354*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 10 355*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I11:%.*]] = load bfloat, ptr [[ARRAYIDX34]], align 4 356*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD35:%.*]] = fsub fast bfloat [[I11]], [[I10]] 357*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds bfloat, ptr [[S]], i64 5 358*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: store bfloat [[ADD35]], ptr [[ARRAYIDX37]], align 4 359*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 24 360*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I12:%.*]] = load bfloat, ptr [[ARRAYIDX39]], align 4 361*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX41:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 6 362*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I13:%.*]] = load bfloat, ptr [[ARRAYIDX41]], align 4 363*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD42:%.*]] = fsub fast bfloat [[I13]], [[I12]] 364*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds bfloat, ptr [[S]], i64 6 365*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: store bfloat [[ADD42]], ptr [[ARRAYIDX44]], align 4 366*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 28 367*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I14:%.*]] = load bfloat, ptr [[ARRAYIDX46]], align 4 368*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 2 369*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I15:%.*]] = load bfloat, ptr [[ARRAYIDX48]], align 4 370*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD49:%.*]] = fsub fast bfloat [[I15]], [[I14]] 371*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX51:%.*]] = getelementptr inbounds bfloat, ptr [[S]], i64 7 372*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: store bfloat [[ADD49]], ptr [[ARRAYIDX51]], align 4 373*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: ret void 374*343a8107SLuke Lau; 375*343a8107SLuke Lau; ZVFHMIN-ZVFBFMIN-LABEL: @test_bf16( 376*343a8107SLuke Lau; ZVFHMIN-ZVFBFMIN-NEXT: entry: 377*343a8107SLuke Lau; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P:%.*]], i64 0, i64 0 378*343a8107SLuke Lau; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x bfloat], ptr [[P]], i64 0, i64 30 379*343a8107SLuke Lau; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds bfloat, ptr [[S:%.*]], i64 0 380*343a8107SLuke Lau; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP15:%.*]] = call <8 x bfloat> @llvm.experimental.vp.strided.load.v8bf16.p0.i64(ptr align 4 [[ARRAYIDX]], i64 8, <8 x i1> splat (i1 true), i32 8) 381*343a8107SLuke Lau; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP7:%.*]] = call <8 x bfloat> @llvm.experimental.vp.strided.load.v8bf16.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 -8, <8 x i1> splat (i1 true), i32 8) 382*343a8107SLuke Lau; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP16:%.*]] = fsub fast <8 x bfloat> [[TMP7]], [[TMP15]] 383*343a8107SLuke Lau; ZVFHMIN-ZVFBFMIN-NEXT: store <8 x bfloat> [[TMP16]], ptr [[ARRAYIDX2]], align 4 384*343a8107SLuke Lau; ZVFHMIN-ZVFBFMIN-NEXT: ret void 385*343a8107SLuke Lau; 386*343a8107SLuke Lauentry: 387*343a8107SLuke Lau %arrayidx = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 0 388*343a8107SLuke Lau %i = load bfloat, ptr %arrayidx, align 4 389*343a8107SLuke Lau %arrayidx1 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 30 390*343a8107SLuke Lau %i1 = load bfloat, ptr %arrayidx1, align 4 391*343a8107SLuke Lau %add = fsub fast bfloat %i1, %i 392*343a8107SLuke Lau %arrayidx2 = getelementptr inbounds bfloat, ptr %s, i64 0 393*343a8107SLuke Lau store bfloat %add, ptr %arrayidx2, align 4 394*343a8107SLuke Lau %arrayidx4 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 4 395*343a8107SLuke Lau %i2 = load bfloat, ptr %arrayidx4, align 4 396*343a8107SLuke Lau %arrayidx6 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 26 397*343a8107SLuke Lau %i3 = load bfloat, ptr %arrayidx6, align 4 398*343a8107SLuke Lau %add7 = fsub fast bfloat %i3, %i2 399*343a8107SLuke Lau %arrayidx9 = getelementptr inbounds bfloat, ptr %s, i64 1 400*343a8107SLuke Lau store bfloat %add7, ptr %arrayidx9, align 4 401*343a8107SLuke Lau %arrayidx11 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 8 402*343a8107SLuke Lau %i4 = load bfloat, ptr %arrayidx11, align 4 403*343a8107SLuke Lau %arrayidx13 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 22 404*343a8107SLuke Lau %i5 = load bfloat, ptr %arrayidx13, align 4 405*343a8107SLuke Lau %add14 = fsub fast bfloat %i5, %i4 406*343a8107SLuke Lau %arrayidx16 = getelementptr inbounds bfloat, ptr %s, i64 2 407*343a8107SLuke Lau store bfloat %add14, ptr %arrayidx16, align 4 408*343a8107SLuke Lau %arrayidx18 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 12 409*343a8107SLuke Lau %i6 = load bfloat, ptr %arrayidx18, align 4 410*343a8107SLuke Lau %arrayidx20 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 18 411*343a8107SLuke Lau %i7 = load bfloat, ptr %arrayidx20, align 4 412*343a8107SLuke Lau %add21 = fsub fast bfloat %i7, %i6 413*343a8107SLuke Lau %arrayidx23 = getelementptr inbounds bfloat, ptr %s, i64 3 414*343a8107SLuke Lau store bfloat %add21, ptr %arrayidx23, align 4 415*343a8107SLuke Lau %arrayidx25 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 16 416*343a8107SLuke Lau %i8 = load bfloat, ptr %arrayidx25, align 4 417*343a8107SLuke Lau %arrayidx27 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 14 418*343a8107SLuke Lau %i9 = load bfloat, ptr %arrayidx27, align 4 419*343a8107SLuke Lau %add28 = fsub fast bfloat %i9, %i8 420*343a8107SLuke Lau %arrayidx30 = getelementptr inbounds bfloat, ptr %s, i64 4 421*343a8107SLuke Lau store bfloat %add28, ptr %arrayidx30, align 4 422*343a8107SLuke Lau %arrayidx32 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 20 423*343a8107SLuke Lau %i10 = load bfloat, ptr %arrayidx32, align 4 424*343a8107SLuke Lau %arrayidx34 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 10 425*343a8107SLuke Lau %i11 = load bfloat, ptr %arrayidx34, align 4 426*343a8107SLuke Lau %add35 = fsub fast bfloat %i11, %i10 427*343a8107SLuke Lau %arrayidx37 = getelementptr inbounds bfloat, ptr %s, i64 5 428*343a8107SLuke Lau store bfloat %add35, ptr %arrayidx37, align 4 429*343a8107SLuke Lau %arrayidx39 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 24 430*343a8107SLuke Lau %i12 = load bfloat, ptr %arrayidx39, align 4 431*343a8107SLuke Lau %arrayidx41 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 6 432*343a8107SLuke Lau %i13 = load bfloat, ptr %arrayidx41, align 4 433*343a8107SLuke Lau %add42 = fsub fast bfloat %i13, %i12 434*343a8107SLuke Lau %arrayidx44 = getelementptr inbounds bfloat, ptr %s, i64 6 435*343a8107SLuke Lau store bfloat %add42, ptr %arrayidx44, align 4 436*343a8107SLuke Lau %arrayidx46 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 28 437*343a8107SLuke Lau %i14 = load bfloat, ptr %arrayidx46, align 4 438*343a8107SLuke Lau %arrayidx48 = getelementptr inbounds [48 x bfloat], ptr %p, i64 0, i64 2 439*343a8107SLuke Lau %i15 = load bfloat, ptr %arrayidx48, align 4 440*343a8107SLuke Lau %add49 = fsub fast bfloat %i15, %i14 441*343a8107SLuke Lau %arrayidx51 = getelementptr inbounds bfloat, ptr %s, i64 7 442*343a8107SLuke Lau store bfloat %add49, ptr %arrayidx51, align 4 443*343a8107SLuke Lau ret void 444*343a8107SLuke Lau} 445*343a8107SLuke Lau 446*343a8107SLuke Laudefine void @test_f16(ptr %p, ptr noalias %s) { 447*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-LABEL: @test_f16( 448*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: entry: 449*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x half], ptr [[P:%.*]], i64 0, i64 0 450*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I:%.*]] = load half, ptr [[ARRAYIDX]], align 4 451*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 30 452*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I1:%.*]] = load half, ptr [[ARRAYIDX1]], align 4 453*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD:%.*]] = fsub fast half [[I1]], [[I]] 454*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds half, ptr [[S:%.*]], i64 0 455*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: store half [[ADD]], ptr [[ARRAYIDX2]], align 4 456*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 4 457*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I2:%.*]] = load half, ptr [[ARRAYIDX4]], align 4 458*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 26 459*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I3:%.*]] = load half, ptr [[ARRAYIDX6]], align 4 460*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD7:%.*]] = fsub fast half [[I3]], [[I2]] 461*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds half, ptr [[S]], i64 1 462*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: store half [[ADD7]], ptr [[ARRAYIDX9]], align 4 463*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 8 464*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I4:%.*]] = load half, ptr [[ARRAYIDX11]], align 4 465*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 22 466*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I5:%.*]] = load half, ptr [[ARRAYIDX13]], align 4 467*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD14:%.*]] = fsub fast half [[I5]], [[I4]] 468*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds half, ptr [[S]], i64 2 469*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: store half [[ADD14]], ptr [[ARRAYIDX16]], align 4 470*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 12 471*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I6:%.*]] = load half, ptr [[ARRAYIDX18]], align 4 472*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 18 473*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I7:%.*]] = load half, ptr [[ARRAYIDX20]], align 4 474*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD21:%.*]] = fsub fast half [[I7]], [[I6]] 475*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX23:%.*]] = getelementptr inbounds half, ptr [[S]], i64 3 476*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: store half [[ADD21]], ptr [[ARRAYIDX23]], align 4 477*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 16 478*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I8:%.*]] = load half, ptr [[ARRAYIDX25]], align 4 479*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX27:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 14 480*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I9:%.*]] = load half, ptr [[ARRAYIDX27]], align 4 481*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD28:%.*]] = fsub fast half [[I9]], [[I8]] 482*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX30:%.*]] = getelementptr inbounds half, ptr [[S]], i64 4 483*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: store half [[ADD28]], ptr [[ARRAYIDX30]], align 4 484*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 20 485*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I10:%.*]] = load half, ptr [[ARRAYIDX32]], align 4 486*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX34:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 10 487*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I11:%.*]] = load half, ptr [[ARRAYIDX34]], align 4 488*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD35:%.*]] = fsub fast half [[I11]], [[I10]] 489*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds half, ptr [[S]], i64 5 490*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: store half [[ADD35]], ptr [[ARRAYIDX37]], align 4 491*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX39:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 24 492*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I12:%.*]] = load half, ptr [[ARRAYIDX39]], align 4 493*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX41:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 6 494*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I13:%.*]] = load half, ptr [[ARRAYIDX41]], align 4 495*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD42:%.*]] = fsub fast half [[I13]], [[I12]] 496*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX44:%.*]] = getelementptr inbounds half, ptr [[S]], i64 6 497*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: store half [[ADD42]], ptr [[ARRAYIDX44]], align 4 498*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX46:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 28 499*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I14:%.*]] = load half, ptr [[ARRAYIDX46]], align 4 500*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX48:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 2 501*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[I15:%.*]] = load half, ptr [[ARRAYIDX48]], align 4 502*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ADD49:%.*]] = fsub fast half [[I15]], [[I14]] 503*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX51:%.*]] = getelementptr inbounds half, ptr [[S]], i64 7 504*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: store half [[ADD49]], ptr [[ARRAYIDX51]], align 4 505*343a8107SLuke Lau; NO-ZVFHMIN-ZVFBFMIN-NEXT: ret void 506*343a8107SLuke Lau; 507*343a8107SLuke Lau; ZVFHMIN-ZVFBFMIN-LABEL: @test_f16( 508*343a8107SLuke Lau; ZVFHMIN-ZVFBFMIN-NEXT: entry: 509*343a8107SLuke Lau; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [48 x half], ptr [[P:%.*]], i64 0, i64 0 510*343a8107SLuke Lau; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds [48 x half], ptr [[P]], i64 0, i64 30 511*343a8107SLuke Lau; ZVFHMIN-ZVFBFMIN-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds half, ptr [[S:%.*]], i64 0 512*343a8107SLuke Lau; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP15:%.*]] = call <8 x half> @llvm.experimental.vp.strided.load.v8f16.p0.i64(ptr align 4 [[ARRAYIDX]], i64 8, <8 x i1> splat (i1 true), i32 8) 513*343a8107SLuke Lau; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP7:%.*]] = call <8 x half> @llvm.experimental.vp.strided.load.v8f16.p0.i64(ptr align 4 [[ARRAYIDX1]], i64 -8, <8 x i1> splat (i1 true), i32 8) 514*343a8107SLuke Lau; ZVFHMIN-ZVFBFMIN-NEXT: [[TMP16:%.*]] = fsub fast <8 x half> [[TMP7]], [[TMP15]] 515*343a8107SLuke Lau; ZVFHMIN-ZVFBFMIN-NEXT: store <8 x half> [[TMP16]], ptr [[ARRAYIDX2]], align 4 516*343a8107SLuke Lau; ZVFHMIN-ZVFBFMIN-NEXT: ret void 517*343a8107SLuke Lau; 518*343a8107SLuke Lauentry: 519*343a8107SLuke Lau %arrayidx = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 0 520*343a8107SLuke Lau %i = load half, ptr %arrayidx, align 4 521*343a8107SLuke Lau %arrayidx1 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 30 522*343a8107SLuke Lau %i1 = load half, ptr %arrayidx1, align 4 523*343a8107SLuke Lau %add = fsub fast half %i1, %i 524*343a8107SLuke Lau %arrayidx2 = getelementptr inbounds half, ptr %s, i64 0 525*343a8107SLuke Lau store half %add, ptr %arrayidx2, align 4 526*343a8107SLuke Lau %arrayidx4 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 4 527*343a8107SLuke Lau %i2 = load half, ptr %arrayidx4, align 4 528*343a8107SLuke Lau %arrayidx6 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 26 529*343a8107SLuke Lau %i3 = load half, ptr %arrayidx6, align 4 530*343a8107SLuke Lau %add7 = fsub fast half %i3, %i2 531*343a8107SLuke Lau %arrayidx9 = getelementptr inbounds half, ptr %s, i64 1 532*343a8107SLuke Lau store half %add7, ptr %arrayidx9, align 4 533*343a8107SLuke Lau %arrayidx11 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 8 534*343a8107SLuke Lau %i4 = load half, ptr %arrayidx11, align 4 535*343a8107SLuke Lau %arrayidx13 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 22 536*343a8107SLuke Lau %i5 = load half, ptr %arrayidx13, align 4 537*343a8107SLuke Lau %add14 = fsub fast half %i5, %i4 538*343a8107SLuke Lau %arrayidx16 = getelementptr inbounds half, ptr %s, i64 2 539*343a8107SLuke Lau store half %add14, ptr %arrayidx16, align 4 540*343a8107SLuke Lau %arrayidx18 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 12 541*343a8107SLuke Lau %i6 = load half, ptr %arrayidx18, align 4 542*343a8107SLuke Lau %arrayidx20 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 18 543*343a8107SLuke Lau %i7 = load half, ptr %arrayidx20, align 4 544*343a8107SLuke Lau %add21 = fsub fast half %i7, %i6 545*343a8107SLuke Lau %arrayidx23 = getelementptr inbounds half, ptr %s, i64 3 546*343a8107SLuke Lau store half %add21, ptr %arrayidx23, align 4 547*343a8107SLuke Lau %arrayidx25 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 16 548*343a8107SLuke Lau %i8 = load half, ptr %arrayidx25, align 4 549*343a8107SLuke Lau %arrayidx27 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 14 550*343a8107SLuke Lau %i9 = load half, ptr %arrayidx27, align 4 551*343a8107SLuke Lau %add28 = fsub fast half %i9, %i8 552*343a8107SLuke Lau %arrayidx30 = getelementptr inbounds half, ptr %s, i64 4 553*343a8107SLuke Lau store half %add28, ptr %arrayidx30, align 4 554*343a8107SLuke Lau %arrayidx32 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 20 555*343a8107SLuke Lau %i10 = load half, ptr %arrayidx32, align 4 556*343a8107SLuke Lau %arrayidx34 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 10 557*343a8107SLuke Lau %i11 = load half, ptr %arrayidx34, align 4 558*343a8107SLuke Lau %add35 = fsub fast half %i11, %i10 559*343a8107SLuke Lau %arrayidx37 = getelementptr inbounds half, ptr %s, i64 5 560*343a8107SLuke Lau store half %add35, ptr %arrayidx37, align 4 561*343a8107SLuke Lau %arrayidx39 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 24 562*343a8107SLuke Lau %i12 = load half, ptr %arrayidx39, align 4 563*343a8107SLuke Lau %arrayidx41 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 6 564*343a8107SLuke Lau %i13 = load half, ptr %arrayidx41, align 4 565*343a8107SLuke Lau %add42 = fsub fast half %i13, %i12 566*343a8107SLuke Lau %arrayidx44 = getelementptr inbounds half, ptr %s, i64 6 567*343a8107SLuke Lau store half %add42, ptr %arrayidx44, align 4 568*343a8107SLuke Lau %arrayidx46 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 28 569*343a8107SLuke Lau %i14 = load half, ptr %arrayidx46, align 4 570*343a8107SLuke Lau %arrayidx48 = getelementptr inbounds [48 x half], ptr %p, i64 0, i64 2 571*343a8107SLuke Lau %i15 = load half, ptr %arrayidx48, align 4 572*343a8107SLuke Lau %add49 = fsub fast half %i15, %i14 573*343a8107SLuke Lau %arrayidx51 = getelementptr inbounds half, ptr %s, i64 7 574*343a8107SLuke Lau store half %add49, ptr %arrayidx51, align 4 575*343a8107SLuke Lau ret void 576*343a8107SLuke Lau} 577