xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/RISCV/vec3-base.ll (revision 3133acf1fbd1cc57ea8e74288ee9a0acd027d749)
14dda564cSPhilip Reames; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
24dda564cSPhilip Reames; RUN: opt -passes=slp-vectorizer -slp-vectorize-non-power-of-2 -mtriple=riscv64 -mattr=+v -S %s | FileCheck --check-prefixes=CHECK,NON-POW2 %s
34dda564cSPhilip Reames; RUN: opt -passes=slp-vectorizer -slp-vectorize-non-power-of-2=false -mtriple=riscv64 -mattr=+v -S %s | FileCheck --check-prefixes=CHECK,POW2-ONLY %s
44dda564cSPhilip Reames
54dda564cSPhilip Reamesdefine void @v3_load_i32_mul_by_constant_store(ptr %src, ptr %dst) {
64dda564cSPhilip Reames; NON-POW2-LABEL: @v3_load_i32_mul_by_constant_store(
74dda564cSPhilip Reames; NON-POW2-NEXT:  entry:
84dda564cSPhilip Reames; NON-POW2-NEXT:    [[GEP_SRC_0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 0
94dda564cSPhilip Reames; NON-POW2-NEXT:    [[TMP0:%.*]] = load <3 x i32>, ptr [[GEP_SRC_0]], align 4
1038fffa63SPaul Walker; NON-POW2-NEXT:    [[TMP1:%.*]] = mul nsw <3 x i32> [[TMP0]], splat (i32 10)
114dda564cSPhilip Reames; NON-POW2-NEXT:    store <3 x i32> [[TMP1]], ptr [[DST:%.*]], align 4
124dda564cSPhilip Reames; NON-POW2-NEXT:    ret void
134dda564cSPhilip Reames;
144dda564cSPhilip Reames; POW2-ONLY-LABEL: @v3_load_i32_mul_by_constant_store(
154dda564cSPhilip Reames; POW2-ONLY-NEXT:  entry:
164dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[GEP_SRC_0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 0
174dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[GEP_SRC_2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 2
184dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[L_SRC_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 4
194dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[MUL_2:%.*]] = mul nsw i32 [[L_SRC_2]], 10
204dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[GEP_SRC_0]], align 4
2138fffa63SPaul Walker; POW2-ONLY-NEXT:    [[TMP1:%.*]] = mul nsw <2 x i32> [[TMP0]], splat (i32 10)
224dda564cSPhilip Reames; POW2-ONLY-NEXT:    store <2 x i32> [[TMP1]], ptr [[DST:%.*]], align 4
234dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[DST_2:%.*]] = getelementptr i32, ptr [[DST]], i32 2
244dda564cSPhilip Reames; POW2-ONLY-NEXT:    store i32 [[MUL_2]], ptr [[DST_2]], align 4
254dda564cSPhilip Reames; POW2-ONLY-NEXT:    ret void
264dda564cSPhilip Reames;
274dda564cSPhilip Reamesentry:
284dda564cSPhilip Reames  %gep.src.0 = getelementptr inbounds i32, ptr %src, i32 0
294dda564cSPhilip Reames  %l.src.0 = load i32, ptr %gep.src.0, align 4
304dda564cSPhilip Reames  %mul.0 = mul nsw i32 %l.src.0, 10
314dda564cSPhilip Reames
324dda564cSPhilip Reames  %gep.src.1 = getelementptr inbounds i32, ptr %src, i32 1
334dda564cSPhilip Reames  %l.src.1 = load i32, ptr %gep.src.1, align 4
344dda564cSPhilip Reames  %mul.1 = mul nsw i32 %l.src.1, 10
354dda564cSPhilip Reames
364dda564cSPhilip Reames  %gep.src.2 = getelementptr inbounds i32, ptr %src, i32 2
374dda564cSPhilip Reames  %l.src.2 = load i32, ptr %gep.src.2, align 4
384dda564cSPhilip Reames  %mul.2 = mul nsw i32 %l.src.2, 10
394dda564cSPhilip Reames
404dda564cSPhilip Reames  store i32 %mul.0, ptr %dst
414dda564cSPhilip Reames
424dda564cSPhilip Reames  %dst.1 = getelementptr i32, ptr %dst, i32 1
434dda564cSPhilip Reames  store i32 %mul.1, ptr %dst.1
444dda564cSPhilip Reames
454dda564cSPhilip Reames  %dst.2 = getelementptr i32, ptr %dst, i32 2
464dda564cSPhilip Reames  store i32 %mul.2, ptr %dst.2
474dda564cSPhilip Reames
484dda564cSPhilip Reames  ret void
494dda564cSPhilip Reames}
504dda564cSPhilip Reames
514dda564cSPhilip Reames; Should no be vectorized with a undef/poison element as padding, as
524dda564cSPhilip Reames; division by undef/poison may cause UB.  Must use VL predication or
534dda564cSPhilip Reames; masking instead, where RISCV wins.
544dda564cSPhilip Reamesdefine void @v3_load_i32_udiv_by_constant_store(ptr %src, ptr %dst) {
554dda564cSPhilip Reames; NON-POW2-LABEL: @v3_load_i32_udiv_by_constant_store(
564dda564cSPhilip Reames; NON-POW2-NEXT:  entry:
574dda564cSPhilip Reames; NON-POW2-NEXT:    [[GEP_SRC_0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 0
584dda564cSPhilip Reames; NON-POW2-NEXT:    [[TMP0:%.*]] = load <3 x i32>, ptr [[GEP_SRC_0]], align 4
5938fffa63SPaul Walker; NON-POW2-NEXT:    [[TMP1:%.*]] = udiv <3 x i32> splat (i32 10), [[TMP0]]
604dda564cSPhilip Reames; NON-POW2-NEXT:    store <3 x i32> [[TMP1]], ptr [[DST:%.*]], align 4
614dda564cSPhilip Reames; NON-POW2-NEXT:    ret void
624dda564cSPhilip Reames;
634dda564cSPhilip Reames; POW2-ONLY-LABEL: @v3_load_i32_udiv_by_constant_store(
644dda564cSPhilip Reames; POW2-ONLY-NEXT:  entry:
654dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[GEP_SRC_0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 0
664dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[L_SRC_0:%.*]] = load i32, ptr [[GEP_SRC_0]], align 4
674dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[MUL_0:%.*]] = udiv i32 10, [[L_SRC_0]]
684dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[GEP_SRC_1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 1
694dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[L_SRC_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 4
704dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[MUL_1:%.*]] = udiv i32 10, [[L_SRC_1]]
714dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[GEP_SRC_2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 2
724dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[L_SRC_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 4
734dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[MUL_2:%.*]] = udiv i32 10, [[L_SRC_2]]
744dda564cSPhilip Reames; POW2-ONLY-NEXT:    store i32 [[MUL_0]], ptr [[DST:%.*]], align 4
754dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[DST_1:%.*]] = getelementptr i32, ptr [[DST]], i32 1
764dda564cSPhilip Reames; POW2-ONLY-NEXT:    store i32 [[MUL_1]], ptr [[DST_1]], align 4
774dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[DST_2:%.*]] = getelementptr i32, ptr [[DST]], i32 2
784dda564cSPhilip Reames; POW2-ONLY-NEXT:    store i32 [[MUL_2]], ptr [[DST_2]], align 4
794dda564cSPhilip Reames; POW2-ONLY-NEXT:    ret void
804dda564cSPhilip Reames;
814dda564cSPhilip Reamesentry:
824dda564cSPhilip Reames  %gep.src.0 = getelementptr inbounds i32, ptr %src, i32 0
834dda564cSPhilip Reames  %l.src.0 = load i32, ptr %gep.src.0, align 4
844dda564cSPhilip Reames  %mul.0 = udiv i32 10, %l.src.0
854dda564cSPhilip Reames
864dda564cSPhilip Reames  %gep.src.1 = getelementptr inbounds i32, ptr %src, i32 1
874dda564cSPhilip Reames  %l.src.1 = load i32, ptr %gep.src.1, align 4
884dda564cSPhilip Reames  %mul.1 = udiv i32 10, %l.src.1
894dda564cSPhilip Reames
904dda564cSPhilip Reames  %gep.src.2 = getelementptr inbounds i32, ptr %src, i32 2
914dda564cSPhilip Reames  %l.src.2 = load i32, ptr %gep.src.2, align 4
924dda564cSPhilip Reames  %mul.2 = udiv i32 10, %l.src.2
934dda564cSPhilip Reames
944dda564cSPhilip Reames  store i32 %mul.0, ptr %dst
954dda564cSPhilip Reames
964dda564cSPhilip Reames  %dst.1 = getelementptr i32, ptr %dst, i32 1
974dda564cSPhilip Reames  store i32 %mul.1, ptr %dst.1
984dda564cSPhilip Reames
994dda564cSPhilip Reames  %dst.2 = getelementptr i32, ptr %dst, i32 2
1004dda564cSPhilip Reames  store i32 %mul.2, ptr %dst.2
1014dda564cSPhilip Reames
1024dda564cSPhilip Reames  ret void
1034dda564cSPhilip Reames}
1044dda564cSPhilip Reames
1054dda564cSPhilip Reames
1064dda564cSPhilip Reames
1074dda564cSPhilip Reamesdefine void @v3_load_i32_mul_store(ptr %src.1, ptr %src.2, ptr %dst) {
1084dda564cSPhilip Reames; NON-POW2-LABEL: @v3_load_i32_mul_store(
1094dda564cSPhilip Reames; NON-POW2-NEXT:  entry:
1104dda564cSPhilip Reames; NON-POW2-NEXT:    [[GEP_SRC_1_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_1:%.*]], i32 0
1114dda564cSPhilip Reames; NON-POW2-NEXT:    [[GEP_SRC_2_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_2:%.*]], i32 0
1124dda564cSPhilip Reames; NON-POW2-NEXT:    [[TMP0:%.*]] = load <3 x i32>, ptr [[GEP_SRC_1_0]], align 4
1134dda564cSPhilip Reames; NON-POW2-NEXT:    [[TMP1:%.*]] = load <3 x i32>, ptr [[GEP_SRC_2_0]], align 4
1144dda564cSPhilip Reames; NON-POW2-NEXT:    [[TMP2:%.*]] = mul nsw <3 x i32> [[TMP0]], [[TMP1]]
1154dda564cSPhilip Reames; NON-POW2-NEXT:    store <3 x i32> [[TMP2]], ptr [[DST:%.*]], align 4
1164dda564cSPhilip Reames; NON-POW2-NEXT:    ret void
1174dda564cSPhilip Reames;
1184dda564cSPhilip Reames; POW2-ONLY-LABEL: @v3_load_i32_mul_store(
1194dda564cSPhilip Reames; POW2-ONLY-NEXT:  entry:
1204dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[GEP_SRC_1_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_1:%.*]], i32 0
1214dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[GEP_SRC_2_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_2:%.*]], i32 0
1224dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[GEP_SRC_1_2:%.*]] = getelementptr inbounds i32, ptr [[SRC_1]], i32 2
1234dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[L_SRC_1_2:%.*]] = load i32, ptr [[GEP_SRC_1_2]], align 4
1244dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[GEP_SRC_2_2:%.*]] = getelementptr inbounds i32, ptr [[SRC_2]], i32 2
1254dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[L_SRC_2_2:%.*]] = load i32, ptr [[GEP_SRC_2_2]], align 4
1264dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[MUL_2:%.*]] = mul nsw i32 [[L_SRC_1_2]], [[L_SRC_2_2]]
1274dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[GEP_SRC_1_0]], align 4
1284dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[GEP_SRC_2_0]], align 4
1294dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[TMP2:%.*]] = mul nsw <2 x i32> [[TMP0]], [[TMP1]]
1304dda564cSPhilip Reames; POW2-ONLY-NEXT:    store <2 x i32> [[TMP2]], ptr [[DST:%.*]], align 4
1314dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[DST_2:%.*]] = getelementptr i32, ptr [[DST]], i32 2
1324dda564cSPhilip Reames; POW2-ONLY-NEXT:    store i32 [[MUL_2]], ptr [[DST_2]], align 4
1334dda564cSPhilip Reames; POW2-ONLY-NEXT:    ret void
1344dda564cSPhilip Reames;
1354dda564cSPhilip Reamesentry:
1364dda564cSPhilip Reames  %gep.src.1.0 = getelementptr inbounds i32, ptr %src.1, i32 0
1374dda564cSPhilip Reames  %l.src.1.0 = load i32, ptr %gep.src.1.0, align 4
1384dda564cSPhilip Reames  %gep.src.2.0 = getelementptr inbounds i32, ptr %src.2, i32 0
1394dda564cSPhilip Reames  %l.src.2.0 = load i32, ptr %gep.src.2.0, align 4
1404dda564cSPhilip Reames  %mul.0 = mul nsw i32 %l.src.1.0, %l.src.2.0
1414dda564cSPhilip Reames
1424dda564cSPhilip Reames  %gep.src.1.1 = getelementptr inbounds i32, ptr %src.1, i32 1
1434dda564cSPhilip Reames  %l.src.1.1 = load i32, ptr %gep.src.1.1, align 4
1444dda564cSPhilip Reames  %gep.src.2.1 = getelementptr inbounds i32, ptr %src.2, i32 1
1454dda564cSPhilip Reames  %l.src.2.1 = load i32, ptr %gep.src.2.1, align 4
1464dda564cSPhilip Reames  %mul.1 = mul nsw i32 %l.src.1.1, %l.src.2.1
1474dda564cSPhilip Reames
1484dda564cSPhilip Reames  %gep.src.1.2 = getelementptr inbounds i32, ptr %src.1, i32 2
1494dda564cSPhilip Reames  %l.src.1.2 = load i32, ptr %gep.src.1.2, align 4
1504dda564cSPhilip Reames  %gep.src.2.2 = getelementptr inbounds i32, ptr %src.2, i32 2
1514dda564cSPhilip Reames  %l.src.2.2 = load i32, ptr %gep.src.2.2, align 4
1524dda564cSPhilip Reames  %mul.2 = mul nsw i32 %l.src.1.2, %l.src.2.2
1534dda564cSPhilip Reames
1544dda564cSPhilip Reames  store i32 %mul.0, ptr %dst
1554dda564cSPhilip Reames
1564dda564cSPhilip Reames  %dst.1 = getelementptr i32, ptr %dst, i32 1
1574dda564cSPhilip Reames  store i32 %mul.1, ptr %dst.1
1584dda564cSPhilip Reames
1594dda564cSPhilip Reames  %dst.2 = getelementptr i32, ptr %dst, i32 2
1604dda564cSPhilip Reames  store i32 %mul.2, ptr %dst.2
1614dda564cSPhilip Reames
1624dda564cSPhilip Reames  ret void
1634dda564cSPhilip Reames}
1644dda564cSPhilip Reames
1654dda564cSPhilip Reamesdefine void @v3_load_i32_mul_add_const_store(ptr %src.1, ptr %src.2, ptr %dst) {
1664dda564cSPhilip Reames; NON-POW2-LABEL: @v3_load_i32_mul_add_const_store(
1674dda564cSPhilip Reames; NON-POW2-NEXT:  entry:
1684dda564cSPhilip Reames; NON-POW2-NEXT:    [[GEP_SRC_1_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_1:%.*]], i32 0
1694dda564cSPhilip Reames; NON-POW2-NEXT:    [[GEP_SRC_2_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_2:%.*]], i32 0
1704dda564cSPhilip Reames; NON-POW2-NEXT:    [[TMP0:%.*]] = load <3 x i32>, ptr [[GEP_SRC_1_0]], align 4
1714dda564cSPhilip Reames; NON-POW2-NEXT:    [[TMP1:%.*]] = load <3 x i32>, ptr [[GEP_SRC_2_0]], align 4
1724dda564cSPhilip Reames; NON-POW2-NEXT:    [[TMP2:%.*]] = mul nsw <3 x i32> [[TMP0]], [[TMP1]]
17338fffa63SPaul Walker; NON-POW2-NEXT:    [[TMP3:%.*]] = add <3 x i32> [[TMP2]], splat (i32 9)
1744dda564cSPhilip Reames; NON-POW2-NEXT:    store <3 x i32> [[TMP3]], ptr [[DST:%.*]], align 4
1754dda564cSPhilip Reames; NON-POW2-NEXT:    ret void
1764dda564cSPhilip Reames;
1774dda564cSPhilip Reames; POW2-ONLY-LABEL: @v3_load_i32_mul_add_const_store(
1784dda564cSPhilip Reames; POW2-ONLY-NEXT:  entry:
1794dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[GEP_SRC_1_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_1:%.*]], i32 0
1804dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[GEP_SRC_2_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_2:%.*]], i32 0
1814dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[GEP_SRC_1_2:%.*]] = getelementptr inbounds i32, ptr [[SRC_1]], i32 2
1824dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[L_SRC_1_2:%.*]] = load i32, ptr [[GEP_SRC_1_2]], align 4
1834dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[GEP_SRC_2_2:%.*]] = getelementptr inbounds i32, ptr [[SRC_2]], i32 2
1844dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[L_SRC_2_2:%.*]] = load i32, ptr [[GEP_SRC_2_2]], align 4
1854dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[MUL_2:%.*]] = mul nsw i32 [[L_SRC_1_2]], [[L_SRC_2_2]]
1864dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[ADD_2:%.*]] = add i32 [[MUL_2]], 9
1874dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[TMP0:%.*]] = load <2 x i32>, ptr [[GEP_SRC_1_0]], align 4
1884dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[TMP1:%.*]] = load <2 x i32>, ptr [[GEP_SRC_2_0]], align 4
1894dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[TMP2:%.*]] = mul nsw <2 x i32> [[TMP0]], [[TMP1]]
19038fffa63SPaul Walker; POW2-ONLY-NEXT:    [[TMP3:%.*]] = add <2 x i32> [[TMP2]], splat (i32 9)
1914dda564cSPhilip Reames; POW2-ONLY-NEXT:    store <2 x i32> [[TMP3]], ptr [[DST:%.*]], align 4
1924dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[DST_2:%.*]] = getelementptr i32, ptr [[DST]], i32 2
1934dda564cSPhilip Reames; POW2-ONLY-NEXT:    store i32 [[ADD_2]], ptr [[DST_2]], align 4
1944dda564cSPhilip Reames; POW2-ONLY-NEXT:    ret void
1954dda564cSPhilip Reames;
1964dda564cSPhilip Reamesentry:
1974dda564cSPhilip Reames  %gep.src.1.0 = getelementptr inbounds i32, ptr %src.1, i32 0
1984dda564cSPhilip Reames  %l.src.1.0 = load i32, ptr %gep.src.1.0, align 4
1994dda564cSPhilip Reames  %gep.src.2.0 = getelementptr inbounds i32, ptr %src.2, i32 0
2004dda564cSPhilip Reames  %l.src.2.0 = load i32, ptr %gep.src.2.0, align 4
2014dda564cSPhilip Reames  %mul.0 = mul nsw i32 %l.src.1.0, %l.src.2.0
2024dda564cSPhilip Reames  %add.0 = add i32 %mul.0, 9
2034dda564cSPhilip Reames
2044dda564cSPhilip Reames  %gep.src.1.1 = getelementptr inbounds i32, ptr %src.1, i32 1
2054dda564cSPhilip Reames  %l.src.1.1 = load i32, ptr %gep.src.1.1, align 4
2064dda564cSPhilip Reames  %gep.src.2.1 = getelementptr inbounds i32, ptr %src.2, i32 1
2074dda564cSPhilip Reames  %l.src.2.1 = load i32, ptr %gep.src.2.1, align 4
2084dda564cSPhilip Reames  %mul.1 = mul nsw i32 %l.src.1.1, %l.src.2.1
2094dda564cSPhilip Reames  %add.1 = add i32 %mul.1, 9
2104dda564cSPhilip Reames
2114dda564cSPhilip Reames  %gep.src.1.2 = getelementptr inbounds i32, ptr %src.1, i32 2
2124dda564cSPhilip Reames  %l.src.1.2 = load i32, ptr %gep.src.1.2, align 4
2134dda564cSPhilip Reames  %gep.src.2.2 = getelementptr inbounds i32, ptr %src.2, i32 2
2144dda564cSPhilip Reames  %l.src.2.2 = load i32, ptr %gep.src.2.2, align 4
2154dda564cSPhilip Reames  %mul.2 = mul nsw i32 %l.src.1.2, %l.src.2.2
2164dda564cSPhilip Reames  %add.2 = add i32 %mul.2, 9
2174dda564cSPhilip Reames
2184dda564cSPhilip Reames  store i32 %add.0, ptr %dst
2194dda564cSPhilip Reames
2204dda564cSPhilip Reames  %dst.1 = getelementptr i32, ptr %dst, i32 1
2214dda564cSPhilip Reames  store i32 %add.1, ptr %dst.1
2224dda564cSPhilip Reames
2234dda564cSPhilip Reames  %dst.2 = getelementptr i32, ptr %dst, i32 2
2244dda564cSPhilip Reames  store i32 %add.2, ptr %dst.2
2254dda564cSPhilip Reames
2264dda564cSPhilip Reames  ret void
2274dda564cSPhilip Reames}
2284dda564cSPhilip Reames
2294dda564cSPhilip Reamesdefine void @v3_load_f32_fadd_fadd_by_constant_store(ptr %src, ptr %dst) {
2304dda564cSPhilip Reames; NON-POW2-LABEL: @v3_load_f32_fadd_fadd_by_constant_store(
2314dda564cSPhilip Reames; NON-POW2-NEXT:  entry:
2324dda564cSPhilip Reames; NON-POW2-NEXT:    [[GEP_SRC_0:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i32 0
2334dda564cSPhilip Reames; NON-POW2-NEXT:    [[TMP0:%.*]] = load <3 x float>, ptr [[GEP_SRC_0]], align 4
23438fffa63SPaul Walker; NON-POW2-NEXT:    [[TMP1:%.*]] = fadd <3 x float> [[TMP0]], splat (float 1.000000e+01)
2354dda564cSPhilip Reames; NON-POW2-NEXT:    store <3 x float> [[TMP1]], ptr [[DST:%.*]], align 4
2364dda564cSPhilip Reames; NON-POW2-NEXT:    ret void
2374dda564cSPhilip Reames;
2384dda564cSPhilip Reames; POW2-ONLY-LABEL: @v3_load_f32_fadd_fadd_by_constant_store(
2394dda564cSPhilip Reames; POW2-ONLY-NEXT:  entry:
2404dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[GEP_SRC_0:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i32 0
2414dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[GEP_SRC_2:%.*]] = getelementptr inbounds float, ptr [[SRC]], i32 2
2424dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[L_SRC_2:%.*]] = load float, ptr [[GEP_SRC_2]], align 4
2434dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[FADD_2:%.*]] = fadd float [[L_SRC_2]], 1.000000e+01
2444dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[TMP0:%.*]] = load <2 x float>, ptr [[GEP_SRC_0]], align 4
24538fffa63SPaul Walker; POW2-ONLY-NEXT:    [[TMP1:%.*]] = fadd <2 x float> [[TMP0]], splat (float 1.000000e+01)
2464dda564cSPhilip Reames; POW2-ONLY-NEXT:    store <2 x float> [[TMP1]], ptr [[DST:%.*]], align 4
2474dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[DST_2:%.*]] = getelementptr float, ptr [[DST]], i32 2
2484dda564cSPhilip Reames; POW2-ONLY-NEXT:    store float [[FADD_2]], ptr [[DST_2]], align 4
2494dda564cSPhilip Reames; POW2-ONLY-NEXT:    ret void
2504dda564cSPhilip Reames;
2514dda564cSPhilip Reamesentry:
2524dda564cSPhilip Reames  %gep.src.0 = getelementptr inbounds float, ptr %src, i32 0
2534dda564cSPhilip Reames  %l.src.0 = load float , ptr %gep.src.0, align 4
2544dda564cSPhilip Reames  %fadd.0 = fadd float %l.src.0, 10.0
2554dda564cSPhilip Reames
2564dda564cSPhilip Reames  %gep.src.1 = getelementptr inbounds float , ptr %src, i32 1
2574dda564cSPhilip Reames  %l.src.1 = load float, ptr %gep.src.1, align 4
2584dda564cSPhilip Reames  %fadd.1 = fadd float %l.src.1, 10.0
2594dda564cSPhilip Reames
2604dda564cSPhilip Reames  %gep.src.2 = getelementptr inbounds float, ptr %src, i32 2
2614dda564cSPhilip Reames  %l.src.2 = load float, ptr %gep.src.2, align 4
2624dda564cSPhilip Reames  %fadd.2 = fadd float %l.src.2, 10.0
2634dda564cSPhilip Reames
2644dda564cSPhilip Reames  store float %fadd.0, ptr %dst
2654dda564cSPhilip Reames
2664dda564cSPhilip Reames  %dst.1 = getelementptr float, ptr %dst, i32 1
2674dda564cSPhilip Reames  store float %fadd.1, ptr %dst.1
2684dda564cSPhilip Reames
2694dda564cSPhilip Reames  %dst.2 = getelementptr float, ptr %dst, i32 2
2704dda564cSPhilip Reames  store float %fadd.2, ptr %dst.2
2714dda564cSPhilip Reames
2724dda564cSPhilip Reames  ret void
2734dda564cSPhilip Reames}
2744dda564cSPhilip Reames
2754dda564cSPhilip Reamesdefine void @phi_store3(ptr %dst) {
2764dda564cSPhilip Reames; NON-POW2-LABEL: @phi_store3(
2774dda564cSPhilip Reames; NON-POW2-NEXT:  entry:
2784dda564cSPhilip Reames; NON-POW2-NEXT:    br label [[EXIT:%.*]]
2794dda564cSPhilip Reames; NON-POW2:       invoke.cont8.loopexit:
2804dda564cSPhilip Reames; NON-POW2-NEXT:    br label [[EXIT]]
2814dda564cSPhilip Reames; NON-POW2:       exit:
2824dda564cSPhilip Reames; NON-POW2-NEXT:    [[TMP0:%.*]] = phi <3 x i32> [ <i32 1, i32 2, i32 3>, [[ENTRY:%.*]] ], [ poison, [[INVOKE_CONT8_LOOPEXIT:%.*]] ]
2834dda564cSPhilip Reames; NON-POW2-NEXT:    store <3 x i32> [[TMP0]], ptr [[DST:%.*]], align 4
2844dda564cSPhilip Reames; NON-POW2-NEXT:    ret void
2854dda564cSPhilip Reames;
2864dda564cSPhilip Reames; POW2-ONLY-LABEL: @phi_store3(
2874dda564cSPhilip Reames; POW2-ONLY-NEXT:  entry:
2884dda564cSPhilip Reames; POW2-ONLY-NEXT:    br label [[EXIT:%.*]]
2894dda564cSPhilip Reames; POW2-ONLY:       invoke.cont8.loopexit:
2904dda564cSPhilip Reames; POW2-ONLY-NEXT:    br label [[EXIT]]
2914dda564cSPhilip Reames; POW2-ONLY:       exit:
2924dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[P_2:%.*]] = phi i32 [ 3, [[ENTRY:%.*]] ], [ 0, [[INVOKE_CONT8_LOOPEXIT:%.*]] ]
2934dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[TMP0:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[ENTRY]] ], [ poison, [[INVOKE_CONT8_LOOPEXIT]] ]
2944dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[DST_2:%.*]] = getelementptr i32, ptr [[DST:%.*]], i32 2
2954dda564cSPhilip Reames; POW2-ONLY-NEXT:    store <2 x i32> [[TMP0]], ptr [[DST]], align 4
2964dda564cSPhilip Reames; POW2-ONLY-NEXT:    store i32 [[P_2]], ptr [[DST_2]], align 4
2974dda564cSPhilip Reames; POW2-ONLY-NEXT:    ret void
2984dda564cSPhilip Reames;
2994dda564cSPhilip Reamesentry:
3004dda564cSPhilip Reames  br label %exit
3014dda564cSPhilip Reames
3024dda564cSPhilip Reamesinvoke.cont8.loopexit:                            ; No predecessors!
3034dda564cSPhilip Reames  br label %exit
3044dda564cSPhilip Reames
3054dda564cSPhilip Reamesexit:
3064dda564cSPhilip Reames  %p.0 = phi i32 [ 1, %entry ], [ 0, %invoke.cont8.loopexit ]
3074dda564cSPhilip Reames  %p.1 = phi i32 [ 2, %entry ], [ 0, %invoke.cont8.loopexit ]
3084dda564cSPhilip Reames  %p.2 = phi i32 [ 3, %entry ], [ 0, %invoke.cont8.loopexit ]
3094dda564cSPhilip Reames
3104dda564cSPhilip Reames  %dst.1 = getelementptr i32, ptr %dst, i32 1
3114dda564cSPhilip Reames  %dst.2 = getelementptr i32, ptr %dst, i32 2
3124dda564cSPhilip Reames
3134dda564cSPhilip Reames  store i32 %p.0, ptr %dst, align 4
3144dda564cSPhilip Reames  store i32 %p.1, ptr %dst.1, align 4
3154dda564cSPhilip Reames  store i32 %p.2, ptr %dst.2, align 4
3164dda564cSPhilip Reames  ret void
3174dda564cSPhilip Reames}
3184dda564cSPhilip Reames
3194dda564cSPhilip Reamesdefine void @store_try_reorder(ptr %dst) {
3204dda564cSPhilip Reames; NON-POW2-LABEL: @store_try_reorder(
3214dda564cSPhilip Reames; NON-POW2-NEXT:  entry:
3224dda564cSPhilip Reames; NON-POW2-NEXT:    store <3 x i32> zeroinitializer, ptr [[DST:%.*]], align 4
3234dda564cSPhilip Reames; NON-POW2-NEXT:    ret void
3244dda564cSPhilip Reames;
3254dda564cSPhilip Reames; POW2-ONLY-LABEL: @store_try_reorder(
3264dda564cSPhilip Reames; POW2-ONLY-NEXT:  entry:
327*3133acf1SHan-Kuan Chen; POW2-ONLY-NEXT:    [[ADD:%.*]] = add i32 0, 0
328*3133acf1SHan-Kuan Chen; POW2-ONLY-NEXT:    store i32 [[ADD]], ptr [[DST:%.*]], align 4
329*3133acf1SHan-Kuan Chen; POW2-ONLY-NEXT:    [[ARRAYIDX_I1887:%.*]] = getelementptr i32, ptr [[DST]], i64 1
330*3133acf1SHan-Kuan Chen; POW2-ONLY-NEXT:    store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887]], align 4
3314dda564cSPhilip Reames; POW2-ONLY-NEXT:    ret void
3324dda564cSPhilip Reames;
3334dda564cSPhilip Reamesentry:
3344dda564cSPhilip Reames  %add = add i32 0, 0
3354dda564cSPhilip Reames  store i32 %add, ptr %dst, align 4
3364dda564cSPhilip Reames  %add207 = sub i32 0, 0
3374dda564cSPhilip Reames  %arrayidx.i1887 = getelementptr i32, ptr %dst, i64 1
3384dda564cSPhilip Reames  store i32 %add207, ptr %arrayidx.i1887, align 4
3394dda564cSPhilip Reames  %add216 = sub i32 0, 0
3404dda564cSPhilip Reames  %arrayidx.i1891 = getelementptr i32, ptr %dst, i64 2
3414dda564cSPhilip Reames  store i32 %add216, ptr %arrayidx.i1891, align 4
3424dda564cSPhilip Reames  ret void
3434dda564cSPhilip Reames}
3444dda564cSPhilip Reames
3454dda564cSPhilip Reamesdefine void @vec3_fpext_cost(ptr %Colour, float %0) {
3464dda564cSPhilip Reames; NON-POW2-LABEL: @vec3_fpext_cost(
3474dda564cSPhilip Reames; NON-POW2-NEXT:  entry:
3484dda564cSPhilip Reames; NON-POW2-NEXT:    [[TMP1:%.*]] = insertelement <3 x float> poison, float [[TMP0:%.*]], i32 0
3494dda564cSPhilip Reames; NON-POW2-NEXT:    [[TMP2:%.*]] = shufflevector <3 x float> [[TMP1]], <3 x float> poison, <3 x i32> zeroinitializer
3504dda564cSPhilip Reames; NON-POW2-NEXT:    [[TMP3:%.*]] = fpext <3 x float> [[TMP2]] to <3 x double>
3514dda564cSPhilip Reames; NON-POW2-NEXT:    [[TMP4:%.*]] = call <3 x double> @llvm.fmuladd.v3f64(<3 x double> [[TMP3]], <3 x double> zeroinitializer, <3 x double> zeroinitializer)
3524dda564cSPhilip Reames; NON-POW2-NEXT:    [[TMP5:%.*]] = fptrunc <3 x double> [[TMP4]] to <3 x float>
3534dda564cSPhilip Reames; NON-POW2-NEXT:    store <3 x float> [[TMP5]], ptr [[COLOUR:%.*]], align 4
3544dda564cSPhilip Reames; NON-POW2-NEXT:    ret void
3554dda564cSPhilip Reames;
3564dda564cSPhilip Reames; POW2-ONLY-LABEL: @vec3_fpext_cost(
3574dda564cSPhilip Reames; POW2-ONLY-NEXT:  entry:
3584dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[ARRAYIDX80:%.*]] = getelementptr float, ptr [[COLOUR:%.*]], i64 2
3594dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[TMP1:%.*]] = insertelement <2 x float> poison, float [[TMP0:%.*]], i32 0
3604dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <2 x i32> zeroinitializer
3614dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[TMP3:%.*]] = fpext <2 x float> [[TMP2]] to <2 x double>
3624dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[TMP4:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP3]], <2 x double> zeroinitializer, <2 x double> zeroinitializer)
3634dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[TMP5:%.*]] = fptrunc <2 x double> [[TMP4]] to <2 x float>
3644dda564cSPhilip Reames; POW2-ONLY-NEXT:    store <2 x float> [[TMP5]], ptr [[COLOUR]], align 4
3654dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[CONV78:%.*]] = fpext float [[TMP0]] to double
3664dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[TMP6:%.*]] = call double @llvm.fmuladd.f64(double [[CONV78]], double 0.000000e+00, double 0.000000e+00)
3674dda564cSPhilip Reames; POW2-ONLY-NEXT:    [[CONV82:%.*]] = fptrunc double [[TMP6]] to float
3684dda564cSPhilip Reames; POW2-ONLY-NEXT:    store float [[CONV82]], ptr [[ARRAYIDX80]], align 4
3694dda564cSPhilip Reames; POW2-ONLY-NEXT:    ret void
3704dda564cSPhilip Reames;
3714dda564cSPhilip Reamesentry:
3724dda564cSPhilip Reames  %arrayidx72 = getelementptr float, ptr %Colour, i64 1
3734dda564cSPhilip Reames  %arrayidx80 = getelementptr float, ptr %Colour, i64 2
3744dda564cSPhilip Reames  %conv62 = fpext float %0 to double
3754dda564cSPhilip Reames  %1 = call double @llvm.fmuladd.f64(double %conv62, double 0.000000e+00, double 0.000000e+00)
3764dda564cSPhilip Reames  %conv66 = fptrunc double %1 to float
3774dda564cSPhilip Reames  store float %conv66, ptr %Colour, align 4
3784dda564cSPhilip Reames  %conv70 = fpext float %0 to double
3794dda564cSPhilip Reames  %2 = call double @llvm.fmuladd.f64(double %conv70, double 0.000000e+00, double 0.000000e+00)
3804dda564cSPhilip Reames  %conv74 = fptrunc double %2 to float
3814dda564cSPhilip Reames  store float %conv74, ptr %arrayidx72, align 4
3824dda564cSPhilip Reames  %conv78 = fpext float %0 to double
3834dda564cSPhilip Reames  %3 = call double @llvm.fmuladd.f64(double %conv78, double 0.000000e+00, double 0.000000e+00)
3844dda564cSPhilip Reames  %conv82 = fptrunc double %3 to float
3854dda564cSPhilip Reames  store float %conv82, ptr %arrayidx80, align 4
3864dda564cSPhilip Reames  ret void
3874dda564cSPhilip Reames}
3884dda564cSPhilip Reames
3894dda564cSPhilip Reamesdefine void @fpext_scatter(ptr %dst, double %conv) {
3904dda564cSPhilip Reames; CHECK-LABEL: @fpext_scatter(
3914dda564cSPhilip Reames; CHECK-NEXT:  entry:
3924dda564cSPhilip Reames; CHECK-NEXT:    [[CONV25:%.*]] = fptrunc double [[CONV:%.*]] to float
3934dda564cSPhilip Reames; CHECK-NEXT:    [[LENGTHS:%.*]] = getelementptr float, ptr [[DST:%.*]], i64 0
3944dda564cSPhilip Reames; CHECK-NEXT:    store float [[CONV25]], ptr [[LENGTHS]], align 4
3954dda564cSPhilip Reames; CHECK-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr float, ptr [[DST]], i64 1
3964dda564cSPhilip Reames; CHECK-NEXT:    store float [[CONV25]], ptr [[ARRAYIDX32]], align 4
3974dda564cSPhilip Reames; CHECK-NEXT:    [[ARRAYIDX37:%.*]] = getelementptr float, ptr [[DST]], i64 2
3984dda564cSPhilip Reames; CHECK-NEXT:    store float [[CONV25]], ptr [[ARRAYIDX37]], align 4
3994dda564cSPhilip Reames; CHECK-NEXT:    ret void
4004dda564cSPhilip Reames;
4014dda564cSPhilip Reamesentry:
4024dda564cSPhilip Reames  %conv25 = fptrunc double %conv to float
4034dda564cSPhilip Reames  %Lengths = getelementptr float, ptr %dst, i64 0
4044dda564cSPhilip Reames  store float %conv25, ptr %Lengths, align 4
4054dda564cSPhilip Reames  %arrayidx32 = getelementptr float, ptr %dst, i64 1
4064dda564cSPhilip Reames  store float %conv25, ptr %arrayidx32, align 4
4074dda564cSPhilip Reames  %arrayidx37 = getelementptr float, ptr %dst, i64 2
4084dda564cSPhilip Reames  store float %conv25, ptr %arrayidx37, align 4
4094dda564cSPhilip Reames  ret void
4104dda564cSPhilip Reames}
4114dda564cSPhilip Reames
4124dda564cSPhilip Reamesdefine i32 @reduce_add(ptr %src) {
4134dda564cSPhilip Reames; CHECK-LABEL: @reduce_add(
4144dda564cSPhilip Reames; CHECK-NEXT:    [[GEP_SRC_0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 0
4154dda564cSPhilip Reames; CHECK-NEXT:    [[L_SRC_0:%.*]] = load i32, ptr [[GEP_SRC_0]], align 4
4164dda564cSPhilip Reames; CHECK-NEXT:    [[GEP_SRC_1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 1
4174dda564cSPhilip Reames; CHECK-NEXT:    [[L_SRC_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 4
4184dda564cSPhilip Reames; CHECK-NEXT:    [[GEP_SRC_2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 2
4194dda564cSPhilip Reames; CHECK-NEXT:    [[L_SRC_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 4
4204dda564cSPhilip Reames; CHECK-NEXT:    [[ADD_0:%.*]] = add i32 [[L_SRC_0]], [[L_SRC_1]]
4214dda564cSPhilip Reames; CHECK-NEXT:    [[ADD_1:%.*]] = add i32 [[ADD_0]], [[L_SRC_2]]
4224dda564cSPhilip Reames; CHECK-NEXT:    ret i32 [[ADD_1]]
4234dda564cSPhilip Reames;
4244dda564cSPhilip Reames  %gep.src.0 = getelementptr inbounds i32, ptr %src, i32 0
4254dda564cSPhilip Reames  %l.src.0 = load i32, ptr %gep.src.0, align 4
4264dda564cSPhilip Reames  %gep.src.1 = getelementptr inbounds i32, ptr %src, i32 1
4274dda564cSPhilip Reames  %l.src.1 = load i32, ptr %gep.src.1, align 4
4284dda564cSPhilip Reames  %gep.src.2 = getelementptr inbounds i32, ptr %src, i32 2
4294dda564cSPhilip Reames  %l.src.2 = load i32, ptr %gep.src.2, align 4
4304dda564cSPhilip Reames
4314dda564cSPhilip Reames  %add.0 = add i32 %l.src.0, %l.src.1
4324dda564cSPhilip Reames  %add.1 = add i32 %add.0, %l.src.2
4334dda564cSPhilip Reames  ret i32 %add.1
4344dda564cSPhilip Reames}
4354dda564cSPhilip Reames
436acb33a0cSPhilip Reamesdefine float @reduce_fadd(ptr %src) {
437ed03070eSPhilip Reames; NON-POW2-LABEL: @reduce_fadd(
438ed03070eSPhilip Reames; NON-POW2-NEXT:    [[GEP_SRC_0:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i32 0
439ed03070eSPhilip Reames; NON-POW2-NEXT:    [[TMP1:%.*]] = load <3 x float>, ptr [[GEP_SRC_0]], align 4
4402c7786e9SPhilip Reames; NON-POW2-NEXT:    [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v3f32(float 0.000000e+00, <3 x float> [[TMP1]])
441ed03070eSPhilip Reames; NON-POW2-NEXT:    ret float [[TMP2]]
442ed03070eSPhilip Reames;
443ed03070eSPhilip Reames; POW2-ONLY-LABEL: @reduce_fadd(
444ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[GEP_SRC_0:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i32 0
445ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[L_SRC_0:%.*]] = load float, ptr [[GEP_SRC_0]], align 4
446ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[GEP_SRC_1:%.*]] = getelementptr inbounds float, ptr [[SRC]], i32 1
447ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[L_SRC_1:%.*]] = load float, ptr [[GEP_SRC_1]], align 4
448ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[GEP_SRC_2:%.*]] = getelementptr inbounds float, ptr [[SRC]], i32 2
449ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[L_SRC_2:%.*]] = load float, ptr [[GEP_SRC_2]], align 4
450ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[ADD_0:%.*]] = fadd fast float [[L_SRC_0]], [[L_SRC_1]]
451ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[ADD_1:%.*]] = fadd fast float [[ADD_0]], [[L_SRC_2]]
452ed03070eSPhilip Reames; POW2-ONLY-NEXT:    ret float [[ADD_1]]
453acb33a0cSPhilip Reames;
454acb33a0cSPhilip Reames  %gep.src.0 = getelementptr inbounds float, ptr %src, i32 0
455acb33a0cSPhilip Reames  %l.src.0 = load float, ptr %gep.src.0, align 4
456acb33a0cSPhilip Reames  %gep.src.1 = getelementptr inbounds float, ptr %src, i32 1
457acb33a0cSPhilip Reames  %l.src.1 = load float, ptr %gep.src.1, align 4
458acb33a0cSPhilip Reames  %gep.src.2 = getelementptr inbounds float, ptr %src, i32 2
459acb33a0cSPhilip Reames  %l.src.2 = load float, ptr %gep.src.2, align 4
460acb33a0cSPhilip Reames
461acb33a0cSPhilip Reames  %add.0 = fadd fast float %l.src.0, %l.src.1
462acb33a0cSPhilip Reames  %add.1 = fadd fast float %add.0, %l.src.2
463acb33a0cSPhilip Reames  ret float %add.1
464acb33a0cSPhilip Reames}
4654dda564cSPhilip Reames
4664dda564cSPhilip Reamesdefine i32 @reduce_add_after_mul(ptr %src) {
467ed03070eSPhilip Reames; NON-POW2-LABEL: @reduce_add_after_mul(
468ed03070eSPhilip Reames; NON-POW2-NEXT:    [[GEP_SRC_0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 0
469ed03070eSPhilip Reames; NON-POW2-NEXT:    [[TMP1:%.*]] = load <3 x i32>, ptr [[GEP_SRC_0]], align 4
47038fffa63SPaul Walker; NON-POW2-NEXT:    [[TMP2:%.*]] = mul nsw <3 x i32> [[TMP1]], splat (i32 10)
471ed03070eSPhilip Reames; NON-POW2-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v3i32(<3 x i32> [[TMP2]])
472ed03070eSPhilip Reames; NON-POW2-NEXT:    ret i32 [[TMP3]]
473ed03070eSPhilip Reames;
474ed03070eSPhilip Reames; POW2-ONLY-LABEL: @reduce_add_after_mul(
475ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[GEP_SRC_0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 0
476ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[L_SRC_0:%.*]] = load i32, ptr [[GEP_SRC_0]], align 4
477ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[GEP_SRC_1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 1
478ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[L_SRC_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 4
479ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[GEP_SRC_2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 2
480ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[L_SRC_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 4
481ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[MUL_0:%.*]] = mul nsw i32 [[L_SRC_0]], 10
482ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[MUL_1:%.*]] = mul nsw i32 [[L_SRC_1]], 10
483ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[MUL_2:%.*]] = mul nsw i32 [[L_SRC_2]], 10
484ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[ADD_0:%.*]] = add i32 [[MUL_0]], [[MUL_1]]
485ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[ADD_1:%.*]] = add i32 [[ADD_0]], [[MUL_2]]
486ed03070eSPhilip Reames; POW2-ONLY-NEXT:    ret i32 [[ADD_1]]
4874dda564cSPhilip Reames;
4884dda564cSPhilip Reames  %gep.src.0 = getelementptr inbounds i32, ptr %src, i32 0
4894dda564cSPhilip Reames  %l.src.0 = load i32, ptr %gep.src.0, align 4
4904dda564cSPhilip Reames  %gep.src.1 = getelementptr inbounds i32, ptr %src, i32 1
4914dda564cSPhilip Reames  %l.src.1 = load i32, ptr %gep.src.1, align 4
4924dda564cSPhilip Reames  %gep.src.2 = getelementptr inbounds i32, ptr %src, i32 2
4934dda564cSPhilip Reames  %l.src.2 = load i32, ptr %gep.src.2, align 4
4944dda564cSPhilip Reames
4954dda564cSPhilip Reames  %mul.0 = mul nsw i32 %l.src.0, 10
4964dda564cSPhilip Reames  %mul.1 = mul nsw i32 %l.src.1, 10
4974dda564cSPhilip Reames  %mul.2 = mul nsw i32 %l.src.2, 10
4984dda564cSPhilip Reames
4994dda564cSPhilip Reames  %add.0 = add i32 %mul.0, %mul.1
5004dda564cSPhilip Reames  %add.1 = add i32 %add.0, %mul.2
5014dda564cSPhilip Reames  ret i32 %add.1
5024dda564cSPhilip Reames}
5034dda564cSPhilip Reames
504acb33a0cSPhilip Reamesdefine i32 @dot_product_i32(ptr %a, ptr %b) {
505ed03070eSPhilip Reames; NON-POW2-LABEL: @dot_product_i32(
506ed03070eSPhilip Reames; NON-POW2-NEXT:    [[GEP_A_0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 0
507ed03070eSPhilip Reames; NON-POW2-NEXT:    [[GEP_B_0:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 0
508ed03070eSPhilip Reames; NON-POW2-NEXT:    [[TMP1:%.*]] = load <3 x i32>, ptr [[GEP_A_0]], align 4
509ed03070eSPhilip Reames; NON-POW2-NEXT:    [[TMP2:%.*]] = load <3 x i32>, ptr [[GEP_B_0]], align 4
510ed03070eSPhilip Reames; NON-POW2-NEXT:    [[TMP3:%.*]] = mul nsw <3 x i32> [[TMP1]], [[TMP2]]
511ed03070eSPhilip Reames; NON-POW2-NEXT:    [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v3i32(<3 x i32> [[TMP3]])
512ed03070eSPhilip Reames; NON-POW2-NEXT:    ret i32 [[TMP4]]
513ed03070eSPhilip Reames;
514ed03070eSPhilip Reames; POW2-ONLY-LABEL: @dot_product_i32(
515ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[GEP_A_0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 0
516ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[L_A_0:%.*]] = load i32, ptr [[GEP_A_0]], align 4
517ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[GEP_A_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 1
518ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[L_A_1:%.*]] = load i32, ptr [[GEP_A_1]], align 4
519ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[GEP_A_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 2
520ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[L_A_2:%.*]] = load i32, ptr [[GEP_A_2]], align 4
521ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[GEP_B_0:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 0
522ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[L_B_0:%.*]] = load i32, ptr [[GEP_B_0]], align 4
523ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[GEP_B_1:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 1
524ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[L_B_1:%.*]] = load i32, ptr [[GEP_B_1]], align 4
525ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[GEP_B_2:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 2
526ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[L_B_2:%.*]] = load i32, ptr [[GEP_B_2]], align 4
527ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[MUL_0:%.*]] = mul nsw i32 [[L_A_0]], [[L_B_0]]
528ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[MUL_1:%.*]] = mul nsw i32 [[L_A_1]], [[L_B_1]]
529ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[MUL_2:%.*]] = mul nsw i32 [[L_A_2]], [[L_B_2]]
530ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[ADD_0:%.*]] = add i32 [[MUL_0]], [[MUL_1]]
531ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[ADD_1:%.*]] = add i32 [[ADD_0]], [[MUL_2]]
532ed03070eSPhilip Reames; POW2-ONLY-NEXT:    ret i32 [[ADD_1]]
5334dda564cSPhilip Reames;
5344dda564cSPhilip Reames  %gep.a.0 = getelementptr inbounds i32, ptr %a, i32 0
5354dda564cSPhilip Reames  %l.a.0 = load i32, ptr %gep.a.0, align 4
5364dda564cSPhilip Reames  %gep.a.1 = getelementptr inbounds i32, ptr %a, i32 1
5374dda564cSPhilip Reames  %l.a.1 = load i32, ptr %gep.a.1, align 4
5384dda564cSPhilip Reames  %gep.a.2 = getelementptr inbounds i32, ptr %a, i32 2
5394dda564cSPhilip Reames  %l.a.2 = load i32, ptr %gep.a.2, align 4
5404dda564cSPhilip Reames
5414dda564cSPhilip Reames  %gep.b.0 = getelementptr inbounds i32, ptr %b, i32 0
5424dda564cSPhilip Reames  %l.b.0 = load i32, ptr %gep.b.0, align 4
5434dda564cSPhilip Reames  %gep.b.1 = getelementptr inbounds i32, ptr %b, i32 1
5444dda564cSPhilip Reames  %l.b.1 = load i32, ptr %gep.b.1, align 4
5454dda564cSPhilip Reames  %gep.b.2 = getelementptr inbounds i32, ptr %b, i32 2
5464dda564cSPhilip Reames  %l.b.2 = load i32, ptr %gep.b.2, align 4
5474dda564cSPhilip Reames
5484dda564cSPhilip Reames  %mul.0 = mul nsw i32 %l.a.0, %l.b.0
5494dda564cSPhilip Reames  %mul.1 = mul nsw i32 %l.a.1, %l.b.1
5504dda564cSPhilip Reames  %mul.2 = mul nsw i32 %l.a.2, %l.b.2
5514dda564cSPhilip Reames
5524dda564cSPhilip Reames  %add.0 = add i32 %mul.0, %mul.1
5534dda564cSPhilip Reames  %add.1 = add i32 %add.0, %mul.2
5544dda564cSPhilip Reames  ret i32 %add.1
5554dda564cSPhilip Reames}
5564dda564cSPhilip Reames
55722ba3511SPhilip Reames; Same as above, except the reduction order has been perturbed.  This
55822ba3511SPhilip Reames; is checking for our ability to reorder.
55922ba3511SPhilip Reamesdefine i32 @dot_product_i32_reorder(ptr %a, ptr %b) {
56063e8a1b1SPhilip Reames; NON-POW2-LABEL: @dot_product_i32_reorder(
56163e8a1b1SPhilip Reames; NON-POW2-NEXT:    [[GEP_A_0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 0
56263e8a1b1SPhilip Reames; NON-POW2-NEXT:    [[GEP_B_0:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 0
56363e8a1b1SPhilip Reames; NON-POW2-NEXT:    [[TMP1:%.*]] = load <3 x i32>, ptr [[GEP_A_0]], align 4
56463e8a1b1SPhilip Reames; NON-POW2-NEXT:    [[TMP2:%.*]] = load <3 x i32>, ptr [[GEP_B_0]], align 4
56563e8a1b1SPhilip Reames; NON-POW2-NEXT:    [[TMP3:%.*]] = mul nsw <3 x i32> [[TMP1]], [[TMP2]]
56663e8a1b1SPhilip Reames; NON-POW2-NEXT:    [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v3i32(<3 x i32> [[TMP3]])
56763e8a1b1SPhilip Reames; NON-POW2-NEXT:    ret i32 [[TMP4]]
56863e8a1b1SPhilip Reames;
56963e8a1b1SPhilip Reames; POW2-ONLY-LABEL: @dot_product_i32_reorder(
57063e8a1b1SPhilip Reames; POW2-ONLY-NEXT:    [[GEP_A_0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 0
57163e8a1b1SPhilip Reames; POW2-ONLY-NEXT:    [[L_A_0:%.*]] = load i32, ptr [[GEP_A_0]], align 4
57263e8a1b1SPhilip Reames; POW2-ONLY-NEXT:    [[GEP_A_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 1
57363e8a1b1SPhilip Reames; POW2-ONLY-NEXT:    [[L_A_1:%.*]] = load i32, ptr [[GEP_A_1]], align 4
57463e8a1b1SPhilip Reames; POW2-ONLY-NEXT:    [[GEP_A_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 2
57563e8a1b1SPhilip Reames; POW2-ONLY-NEXT:    [[L_A_2:%.*]] = load i32, ptr [[GEP_A_2]], align 4
57663e8a1b1SPhilip Reames; POW2-ONLY-NEXT:    [[GEP_B_0:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 0
57763e8a1b1SPhilip Reames; POW2-ONLY-NEXT:    [[L_B_0:%.*]] = load i32, ptr [[GEP_B_0]], align 4
57863e8a1b1SPhilip Reames; POW2-ONLY-NEXT:    [[GEP_B_1:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 1
57963e8a1b1SPhilip Reames; POW2-ONLY-NEXT:    [[L_B_1:%.*]] = load i32, ptr [[GEP_B_1]], align 4
58063e8a1b1SPhilip Reames; POW2-ONLY-NEXT:    [[GEP_B_2:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 2
58163e8a1b1SPhilip Reames; POW2-ONLY-NEXT:    [[L_B_2:%.*]] = load i32, ptr [[GEP_B_2]], align 4
58263e8a1b1SPhilip Reames; POW2-ONLY-NEXT:    [[MUL_0:%.*]] = mul nsw i32 [[L_A_0]], [[L_B_0]]
58363e8a1b1SPhilip Reames; POW2-ONLY-NEXT:    [[MUL_1:%.*]] = mul nsw i32 [[L_A_1]], [[L_B_1]]
58463e8a1b1SPhilip Reames; POW2-ONLY-NEXT:    [[MUL_2:%.*]] = mul nsw i32 [[L_A_2]], [[L_B_2]]
58563e8a1b1SPhilip Reames; POW2-ONLY-NEXT:    [[ADD_0:%.*]] = add i32 [[MUL_1]], [[MUL_0]]
58663e8a1b1SPhilip Reames; POW2-ONLY-NEXT:    [[ADD_1:%.*]] = add i32 [[ADD_0]], [[MUL_2]]
58763e8a1b1SPhilip Reames; POW2-ONLY-NEXT:    ret i32 [[ADD_1]]
58822ba3511SPhilip Reames;
58922ba3511SPhilip Reames  %gep.a.0 = getelementptr inbounds i32, ptr %a, i32 0
59022ba3511SPhilip Reames  %l.a.0 = load i32, ptr %gep.a.0, align 4
59122ba3511SPhilip Reames  %gep.a.1 = getelementptr inbounds i32, ptr %a, i32 1
59222ba3511SPhilip Reames  %l.a.1 = load i32, ptr %gep.a.1, align 4
59322ba3511SPhilip Reames  %gep.a.2 = getelementptr inbounds i32, ptr %a, i32 2
59422ba3511SPhilip Reames  %l.a.2 = load i32, ptr %gep.a.2, align 4
59522ba3511SPhilip Reames
59622ba3511SPhilip Reames  %gep.b.0 = getelementptr inbounds i32, ptr %b, i32 0
59722ba3511SPhilip Reames  %l.b.0 = load i32, ptr %gep.b.0, align 4
59822ba3511SPhilip Reames  %gep.b.1 = getelementptr inbounds i32, ptr %b, i32 1
59922ba3511SPhilip Reames  %l.b.1 = load i32, ptr %gep.b.1, align 4
60022ba3511SPhilip Reames  %gep.b.2 = getelementptr inbounds i32, ptr %b, i32 2
60122ba3511SPhilip Reames  %l.b.2 = load i32, ptr %gep.b.2, align 4
60222ba3511SPhilip Reames
60322ba3511SPhilip Reames  %mul.0 = mul nsw i32 %l.a.0, %l.b.0
60422ba3511SPhilip Reames  %mul.1 = mul nsw i32 %l.a.1, %l.b.1
60522ba3511SPhilip Reames  %mul.2 = mul nsw i32 %l.a.2, %l.b.2
60622ba3511SPhilip Reames
60722ba3511SPhilip Reames  %add.0 = add i32 %mul.1, %mul.0
60822ba3511SPhilip Reames  %add.1 = add i32 %add.0, %mul.2
60922ba3511SPhilip Reames  ret i32 %add.1
61022ba3511SPhilip Reames}
61122ba3511SPhilip Reames
612acb33a0cSPhilip Reamesdefine float @dot_product_fp32(ptr %a, ptr %b) {
613ed03070eSPhilip Reames; NON-POW2-LABEL: @dot_product_fp32(
614ed03070eSPhilip Reames; NON-POW2-NEXT:    [[GEP_A_0:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i32 0
615ed03070eSPhilip Reames; NON-POW2-NEXT:    [[GEP_B_0:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i32 0
616ed03070eSPhilip Reames; NON-POW2-NEXT:    [[TMP1:%.*]] = load <3 x float>, ptr [[GEP_A_0]], align 4
617ed03070eSPhilip Reames; NON-POW2-NEXT:    [[TMP2:%.*]] = load <3 x float>, ptr [[GEP_B_0]], align 4
618ed03070eSPhilip Reames; NON-POW2-NEXT:    [[TMP3:%.*]] = fmul fast <3 x float> [[TMP1]], [[TMP2]]
6192c7786e9SPhilip Reames; NON-POW2-NEXT:    [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v3f32(float 0.000000e+00, <3 x float> [[TMP3]])
620ed03070eSPhilip Reames; NON-POW2-NEXT:    ret float [[TMP4]]
621ed03070eSPhilip Reames;
622ed03070eSPhilip Reames; POW2-ONLY-LABEL: @dot_product_fp32(
623ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[GEP_A_0:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i32 0
624ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[GEP_A_2:%.*]] = getelementptr inbounds float, ptr [[A]], i32 2
625ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[L_A_2:%.*]] = load float, ptr [[GEP_A_2]], align 4
626ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[GEP_B_0:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i32 0
627ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[GEP_B_2:%.*]] = getelementptr inbounds float, ptr [[B]], i32 2
628ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[L_B_2:%.*]] = load float, ptr [[GEP_B_2]], align 4
629ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[TMP1:%.*]] = load <2 x float>, ptr [[GEP_A_0]], align 4
630ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[TMP2:%.*]] = load <2 x float>, ptr [[GEP_B_0]], align 4
631ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[TMP3:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP2]]
632ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[MUL_2:%.*]] = fmul fast float [[L_A_2]], [[L_B_2]]
633ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0
634ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1
635ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[ADD_0:%.*]] = fadd fast float [[TMP4]], [[TMP5]]
636ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[ADD_1:%.*]] = fadd fast float [[ADD_0]], [[MUL_2]]
637ed03070eSPhilip Reames; POW2-ONLY-NEXT:    ret float [[ADD_1]]
638acb33a0cSPhilip Reames;
639acb33a0cSPhilip Reames  %gep.a.0 = getelementptr inbounds float, ptr %a, i32 0
640acb33a0cSPhilip Reames  %l.a.0 = load float, ptr %gep.a.0, align 4
641acb33a0cSPhilip Reames  %gep.a.1 = getelementptr inbounds float, ptr %a, i32 1
642acb33a0cSPhilip Reames  %l.a.1 = load float, ptr %gep.a.1, align 4
643acb33a0cSPhilip Reames  %gep.a.2 = getelementptr inbounds float, ptr %a, i32 2
644acb33a0cSPhilip Reames  %l.a.2 = load float, ptr %gep.a.2, align 4
645acb33a0cSPhilip Reames
646acb33a0cSPhilip Reames  %gep.b.0 = getelementptr inbounds float, ptr %b, i32 0
647acb33a0cSPhilip Reames  %l.b.0 = load float, ptr %gep.b.0, align 4
648acb33a0cSPhilip Reames  %gep.b.1 = getelementptr inbounds float, ptr %b, i32 1
649acb33a0cSPhilip Reames  %l.b.1 = load float, ptr %gep.b.1, align 4
650acb33a0cSPhilip Reames  %gep.b.2 = getelementptr inbounds float, ptr %b, i32 2
651acb33a0cSPhilip Reames  %l.b.2 = load float, ptr %gep.b.2, align 4
652acb33a0cSPhilip Reames
653acb33a0cSPhilip Reames  %mul.0 = fmul fast float %l.a.0, %l.b.0
654acb33a0cSPhilip Reames  %mul.1 = fmul fast float %l.a.1, %l.b.1
655acb33a0cSPhilip Reames  %mul.2 = fmul fast float %l.a.2, %l.b.2
656acb33a0cSPhilip Reames
657acb33a0cSPhilip Reames  %add.0 = fadd fast float %mul.0, %mul.1
658acb33a0cSPhilip Reames  %add.1 = fadd fast float %add.0, %mul.2
659acb33a0cSPhilip Reames  ret float %add.1
660acb33a0cSPhilip Reames}
661acb33a0cSPhilip Reames
66222ba3511SPhilip Reames; Same as above, except the reduction order has been perturbed.  This
66322ba3511SPhilip Reames; is checking for our ability to reorder.
66422ba3511SPhilip Reamesdefine float @dot_product_fp32_reorder(ptr %a, ptr %b) {
66563e8a1b1SPhilip Reames; NON-POW2-LABEL: @dot_product_fp32_reorder(
66663e8a1b1SPhilip Reames; NON-POW2-NEXT:    [[GEP_A_0:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i32 0
66763e8a1b1SPhilip Reames; NON-POW2-NEXT:    [[GEP_B_0:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i32 0
66863e8a1b1SPhilip Reames; NON-POW2-NEXT:    [[TMP1:%.*]] = load <3 x float>, ptr [[GEP_A_0]], align 4
66963e8a1b1SPhilip Reames; NON-POW2-NEXT:    [[TMP2:%.*]] = load <3 x float>, ptr [[GEP_B_0]], align 4
67063e8a1b1SPhilip Reames; NON-POW2-NEXT:    [[TMP3:%.*]] = fmul fast <3 x float> [[TMP1]], [[TMP2]]
67163e8a1b1SPhilip Reames; NON-POW2-NEXT:    [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v3f32(float 0.000000e+00, <3 x float> [[TMP3]])
67263e8a1b1SPhilip Reames; NON-POW2-NEXT:    ret float [[TMP4]]
67363e8a1b1SPhilip Reames;
67463e8a1b1SPhilip Reames; POW2-ONLY-LABEL: @dot_product_fp32_reorder(
67563e8a1b1SPhilip Reames; POW2-ONLY-NEXT:    [[GEP_A_0:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i32 0
67663e8a1b1SPhilip Reames; POW2-ONLY-NEXT:    [[GEP_A_2:%.*]] = getelementptr inbounds float, ptr [[A]], i32 2
67763e8a1b1SPhilip Reames; POW2-ONLY-NEXT:    [[L_A_2:%.*]] = load float, ptr [[GEP_A_2]], align 4
67863e8a1b1SPhilip Reames; POW2-ONLY-NEXT:    [[GEP_B_0:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i32 0
67963e8a1b1SPhilip Reames; POW2-ONLY-NEXT:    [[GEP_B_2:%.*]] = getelementptr inbounds float, ptr [[B]], i32 2
68063e8a1b1SPhilip Reames; POW2-ONLY-NEXT:    [[L_B_2:%.*]] = load float, ptr [[GEP_B_2]], align 4
68163e8a1b1SPhilip Reames; POW2-ONLY-NEXT:    [[TMP1:%.*]] = load <2 x float>, ptr [[GEP_A_0]], align 4
68263e8a1b1SPhilip Reames; POW2-ONLY-NEXT:    [[TMP2:%.*]] = load <2 x float>, ptr [[GEP_B_0]], align 4
68363e8a1b1SPhilip Reames; POW2-ONLY-NEXT:    [[TMP3:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP2]]
68463e8a1b1SPhilip Reames; POW2-ONLY-NEXT:    [[MUL_2:%.*]] = fmul fast float [[L_A_2]], [[L_B_2]]
68563e8a1b1SPhilip Reames; POW2-ONLY-NEXT:    [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0
68663e8a1b1SPhilip Reames; POW2-ONLY-NEXT:    [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1
68763e8a1b1SPhilip Reames; POW2-ONLY-NEXT:    [[ADD_0:%.*]] = fadd fast float [[TMP5]], [[TMP4]]
68863e8a1b1SPhilip Reames; POW2-ONLY-NEXT:    [[ADD_1:%.*]] = fadd fast float [[ADD_0]], [[MUL_2]]
68963e8a1b1SPhilip Reames; POW2-ONLY-NEXT:    ret float [[ADD_1]]
69022ba3511SPhilip Reames;
69122ba3511SPhilip Reames  %gep.a.0 = getelementptr inbounds float, ptr %a, i32 0
69222ba3511SPhilip Reames  %l.a.0 = load float, ptr %gep.a.0, align 4
69322ba3511SPhilip Reames  %gep.a.1 = getelementptr inbounds float, ptr %a, i32 1
69422ba3511SPhilip Reames  %l.a.1 = load float, ptr %gep.a.1, align 4
69522ba3511SPhilip Reames  %gep.a.2 = getelementptr inbounds float, ptr %a, i32 2
69622ba3511SPhilip Reames  %l.a.2 = load float, ptr %gep.a.2, align 4
69722ba3511SPhilip Reames
69822ba3511SPhilip Reames  %gep.b.0 = getelementptr inbounds float, ptr %b, i32 0
69922ba3511SPhilip Reames  %l.b.0 = load float, ptr %gep.b.0, align 4
70022ba3511SPhilip Reames  %gep.b.1 = getelementptr inbounds float, ptr %b, i32 1
70122ba3511SPhilip Reames  %l.b.1 = load float, ptr %gep.b.1, align 4
70222ba3511SPhilip Reames  %gep.b.2 = getelementptr inbounds float, ptr %b, i32 2
70322ba3511SPhilip Reames  %l.b.2 = load float, ptr %gep.b.2, align 4
70422ba3511SPhilip Reames
70522ba3511SPhilip Reames  %mul.0 = fmul fast float %l.a.0, %l.b.0
70622ba3511SPhilip Reames  %mul.1 = fmul fast float %l.a.1, %l.b.1
70722ba3511SPhilip Reames  %mul.2 = fmul fast float %l.a.2, %l.b.2
70822ba3511SPhilip Reames
70922ba3511SPhilip Reames  %add.0 = fadd fast float %mul.1, %mul.0
71022ba3511SPhilip Reames  %add.1 = fadd fast float %add.0, %mul.2
71122ba3511SPhilip Reames  ret float %add.1
71222ba3511SPhilip Reames}
71322ba3511SPhilip Reames
71422ba3511SPhilip Reames
715acb33a0cSPhilip Reamesdefine double @dot_product_fp64(ptr %a, ptr %b) {
716ed03070eSPhilip Reames; NON-POW2-LABEL: @dot_product_fp64(
717ed03070eSPhilip Reames; NON-POW2-NEXT:    [[GEP_A_0:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i32 0
718ed03070eSPhilip Reames; NON-POW2-NEXT:    [[GEP_B_0:%.*]] = getelementptr inbounds double, ptr [[B:%.*]], i32 0
719ed03070eSPhilip Reames; NON-POW2-NEXT:    [[TMP1:%.*]] = load <3 x double>, ptr [[GEP_A_0]], align 4
720ed03070eSPhilip Reames; NON-POW2-NEXT:    [[TMP2:%.*]] = load <3 x double>, ptr [[GEP_B_0]], align 4
721ed03070eSPhilip Reames; NON-POW2-NEXT:    [[TMP3:%.*]] = fmul fast <3 x double> [[TMP1]], [[TMP2]]
7222c7786e9SPhilip Reames; NON-POW2-NEXT:    [[TMP4:%.*]] = call fast double @llvm.vector.reduce.fadd.v3f64(double 0.000000e+00, <3 x double> [[TMP3]])
723ed03070eSPhilip Reames; NON-POW2-NEXT:    ret double [[TMP4]]
724ed03070eSPhilip Reames;
725ed03070eSPhilip Reames; POW2-ONLY-LABEL: @dot_product_fp64(
726ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[GEP_A_0:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i32 0
727ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[GEP_A_2:%.*]] = getelementptr inbounds double, ptr [[A]], i32 2
728ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[L_A_2:%.*]] = load double, ptr [[GEP_A_2]], align 4
729ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[GEP_B_0:%.*]] = getelementptr inbounds double, ptr [[B:%.*]], i32 0
730ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[GEP_B_2:%.*]] = getelementptr inbounds double, ptr [[B]], i32 2
731ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[L_B_2:%.*]] = load double, ptr [[GEP_B_2]], align 4
732ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[GEP_A_0]], align 4
733ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[TMP2:%.*]] = load <2 x double>, ptr [[GEP_B_0]], align 4
734ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[TMP3:%.*]] = fmul fast <2 x double> [[TMP1]], [[TMP2]]
735ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[MUL_2:%.*]] = fmul fast double [[L_A_2]], [[L_B_2]]
736ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0
737ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[TMP5:%.*]] = extractelement <2 x double> [[TMP3]], i32 1
738ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[ADD_0:%.*]] = fadd fast double [[TMP4]], [[TMP5]]
739ed03070eSPhilip Reames; POW2-ONLY-NEXT:    [[ADD_1:%.*]] = fadd fast double [[ADD_0]], [[MUL_2]]
740ed03070eSPhilip Reames; POW2-ONLY-NEXT:    ret double [[ADD_1]]
741acb33a0cSPhilip Reames;
742acb33a0cSPhilip Reames  %gep.a.0 = getelementptr inbounds double, ptr %a, i32 0
743acb33a0cSPhilip Reames  %l.a.0 = load double, ptr %gep.a.0, align 4
744acb33a0cSPhilip Reames  %gep.a.1 = getelementptr inbounds double, ptr %a, i32 1
745acb33a0cSPhilip Reames  %l.a.1 = load double, ptr %gep.a.1, align 4
746acb33a0cSPhilip Reames  %gep.a.2 = getelementptr inbounds double, ptr %a, i32 2
747acb33a0cSPhilip Reames  %l.a.2 = load double, ptr %gep.a.2, align 4
748acb33a0cSPhilip Reames
749acb33a0cSPhilip Reames  %gep.b.0 = getelementptr inbounds double, ptr %b, i32 0
750acb33a0cSPhilip Reames  %l.b.0 = load double, ptr %gep.b.0, align 4
751acb33a0cSPhilip Reames  %gep.b.1 = getelementptr inbounds double, ptr %b, i32 1
752acb33a0cSPhilip Reames  %l.b.1 = load double, ptr %gep.b.1, align 4
753acb33a0cSPhilip Reames  %gep.b.2 = getelementptr inbounds double, ptr %b, i32 2
754acb33a0cSPhilip Reames  %l.b.2 = load double, ptr %gep.b.2, align 4
755acb33a0cSPhilip Reames
756acb33a0cSPhilip Reames  %mul.0 = fmul fast double %l.a.0, %l.b.0
757acb33a0cSPhilip Reames  %mul.1 = fmul fast double %l.a.1, %l.b.1
758acb33a0cSPhilip Reames  %mul.2 = fmul fast double %l.a.2, %l.b.2
759acb33a0cSPhilip Reames
760acb33a0cSPhilip Reames  %add.0 = fadd fast double %mul.0, %mul.1
761acb33a0cSPhilip Reames  %add.1 = fadd fast double %add.0, %mul.2
762acb33a0cSPhilip Reames  ret double %add.1
763acb33a0cSPhilip Reames}
764acb33a0cSPhilip Reames
765247d3ea8SPhilip Reames;; Covers a case where SLP would previous crash due to a
766247d3ea8SPhilip Reames;; missing bailout in TryToFindDuplicates for the case
767247d3ea8SPhilip Reames;; where a VL=3 list was vectorized directly (without
768247d3ea8SPhilip Reames;; a root instruction such as a store or reduce).
769247d3ea8SPhilip Reamesdefine double @no_root_reshuffle(ptr  %ptr) {
770247d3ea8SPhilip Reames; CHECK-LABEL: @no_root_reshuffle(
771247d3ea8SPhilip Reames; CHECK-NEXT:  entry:
772247d3ea8SPhilip Reames; CHECK-NEXT:    [[TMP0:%.*]] = load double, ptr [[PTR:%.*]], align 8
773247d3ea8SPhilip Reames; CHECK-NEXT:    [[MUL:%.*]] = fmul fast double [[TMP0]], [[TMP0]]
774247d3ea8SPhilip Reames; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 8
775247d3ea8SPhilip Reames; CHECK-NEXT:    [[TMP1:%.*]] = load double, ptr [[ARRAYIDX2]], align 8
776247d3ea8SPhilip Reames; CHECK-NEXT:    [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 16
777247d3ea8SPhilip Reames; CHECK-NEXT:    [[TMP2:%.*]] = load double, ptr [[ARRAYIDX3]], align 8
778247d3ea8SPhilip Reames; CHECK-NEXT:    [[TMP3:%.*]] = fmul fast double [[TMP2]], [[TMP2]]
779247d3ea8SPhilip Reames; CHECK-NEXT:    [[MUL6:%.*]] = fmul fast double [[TMP3]], [[TMP1]]
780247d3ea8SPhilip Reames; CHECK-NEXT:    [[ADD:%.*]] = fadd fast double [[MUL6]], [[MUL]]
781247d3ea8SPhilip Reames; CHECK-NEXT:    ret double [[ADD]]
782247d3ea8SPhilip Reames;
783247d3ea8SPhilip Reamesentry:
784247d3ea8SPhilip Reames  %0 = load double, ptr %ptr, align 8
785247d3ea8SPhilip Reames  %mul = fmul fast double %0, %0
786247d3ea8SPhilip Reames  %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 8
787247d3ea8SPhilip Reames  %1 = load double, ptr %arrayidx2, align 8
788247d3ea8SPhilip Reames  %arrayidx3 = getelementptr inbounds i8, ptr %ptr, i64 16
789247d3ea8SPhilip Reames  %2 = load double, ptr %arrayidx3, align 8
790247d3ea8SPhilip Reames  %3 = fmul fast double %2, %2
791247d3ea8SPhilip Reames  %mul6 = fmul fast double %3, %1
792247d3ea8SPhilip Reames  %add = fadd fast double %mul6, %mul
793247d3ea8SPhilip Reames  ret double %add
794247d3ea8SPhilip Reames}
795acb33a0cSPhilip Reames
796fa8b737aSPhilip Reamesdefine float @reduce_fadd_after_fmul_of_buildvec(float %a, float %b, float %c) {
7977f6bbb3cSPhilip Reames; NON-POW2-LABEL: @reduce_fadd_after_fmul_of_buildvec(
7987f6bbb3cSPhilip Reames; NON-POW2-NEXT:    [[TMP1:%.*]] = insertelement <3 x float> poison, float [[A:%.*]], i32 0
7997f6bbb3cSPhilip Reames; NON-POW2-NEXT:    [[TMP2:%.*]] = insertelement <3 x float> [[TMP1]], float [[B:%.*]], i32 1
8007f6bbb3cSPhilip Reames; NON-POW2-NEXT:    [[TMP3:%.*]] = insertelement <3 x float> [[TMP2]], float [[C:%.*]], i32 2
80138fffa63SPaul Walker; NON-POW2-NEXT:    [[TMP4:%.*]] = fmul fast <3 x float> [[TMP3]], splat (float 1.000000e+01)
8027f6bbb3cSPhilip Reames; NON-POW2-NEXT:    [[TMP5:%.*]] = call fast float @llvm.vector.reduce.fadd.v3f32(float 0.000000e+00, <3 x float> [[TMP4]])
8037f6bbb3cSPhilip Reames; NON-POW2-NEXT:    ret float [[TMP5]]
8047f6bbb3cSPhilip Reames;
8057f6bbb3cSPhilip Reames; POW2-ONLY-LABEL: @reduce_fadd_after_fmul_of_buildvec(
8067f6bbb3cSPhilip Reames; POW2-ONLY-NEXT:    [[MUL_0:%.*]] = fmul fast float [[A:%.*]], 1.000000e+01
8077f6bbb3cSPhilip Reames; POW2-ONLY-NEXT:    [[MUL_1:%.*]] = fmul fast float [[B:%.*]], 1.000000e+01
8087f6bbb3cSPhilip Reames; POW2-ONLY-NEXT:    [[MUL_2:%.*]] = fmul fast float [[C:%.*]], 1.000000e+01
8097f6bbb3cSPhilip Reames; POW2-ONLY-NEXT:    [[ADD_0:%.*]] = fadd fast float [[MUL_0]], [[MUL_1]]
8107f6bbb3cSPhilip Reames; POW2-ONLY-NEXT:    [[ADD_1:%.*]] = fadd fast float [[ADD_0]], [[MUL_2]]
8117f6bbb3cSPhilip Reames; POW2-ONLY-NEXT:    ret float [[ADD_1]]
812fa8b737aSPhilip Reames;
813fa8b737aSPhilip Reames  %mul.0 = fmul fast float %a, 10.0
814fa8b737aSPhilip Reames  %mul.1 = fmul fast float %b, 10.0
815fa8b737aSPhilip Reames  %mul.2 = fmul fast float %c, 10.0
816fa8b737aSPhilip Reames
817fa8b737aSPhilip Reames  %add.0 = fadd fast float %mul.0, %mul.1
818fa8b737aSPhilip Reames  %add.1 = fadd fast float %add.0, %mul.2
819fa8b737aSPhilip Reames  ret float %add.1
820fa8b737aSPhilip Reames}
821fa8b737aSPhilip Reames
822fa8b737aSPhilip Reames
8234dda564cSPhilip Reamesdeclare float @llvm.fmuladd.f32(float, float, float)
8244dda564cSPhilip Reames
8254dda564cSPhilip Reamesdeclare double @llvm.fmuladd.f64(double, double, double)
826