14dda564cSPhilip Reames; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 24dda564cSPhilip Reames; RUN: opt -passes=slp-vectorizer -slp-vectorize-non-power-of-2 -mtriple=riscv64 -mattr=+v -S %s | FileCheck --check-prefixes=CHECK,NON-POW2 %s 34dda564cSPhilip Reames; RUN: opt -passes=slp-vectorizer -slp-vectorize-non-power-of-2=false -mtriple=riscv64 -mattr=+v -S %s | FileCheck --check-prefixes=CHECK,POW2-ONLY %s 44dda564cSPhilip Reames 54dda564cSPhilip Reamesdefine void @v3_load_i32_mul_by_constant_store(ptr %src, ptr %dst) { 64dda564cSPhilip Reames; NON-POW2-LABEL: @v3_load_i32_mul_by_constant_store( 74dda564cSPhilip Reames; NON-POW2-NEXT: entry: 84dda564cSPhilip Reames; NON-POW2-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 0 94dda564cSPhilip Reames; NON-POW2-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr [[GEP_SRC_0]], align 4 1038fffa63SPaul Walker; NON-POW2-NEXT: [[TMP1:%.*]] = mul nsw <3 x i32> [[TMP0]], splat (i32 10) 114dda564cSPhilip Reames; NON-POW2-NEXT: store <3 x i32> [[TMP1]], ptr [[DST:%.*]], align 4 124dda564cSPhilip Reames; NON-POW2-NEXT: ret void 134dda564cSPhilip Reames; 144dda564cSPhilip Reames; POW2-ONLY-LABEL: @v3_load_i32_mul_by_constant_store( 154dda564cSPhilip Reames; POW2-ONLY-NEXT: entry: 164dda564cSPhilip Reames; POW2-ONLY-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 0 174dda564cSPhilip Reames; POW2-ONLY-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 2 184dda564cSPhilip Reames; POW2-ONLY-NEXT: [[L_SRC_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 4 194dda564cSPhilip Reames; POW2-ONLY-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[L_SRC_2]], 10 204dda564cSPhilip Reames; POW2-ONLY-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[GEP_SRC_0]], align 4 2138fffa63SPaul Walker; POW2-ONLY-NEXT: [[TMP1:%.*]] = mul nsw <2 x i32> [[TMP0]], splat (i32 10) 224dda564cSPhilip Reames; POW2-ONLY-NEXT: store <2 x i32> [[TMP1]], ptr [[DST:%.*]], align 4 234dda564cSPhilip Reames; POW2-ONLY-NEXT: [[DST_2:%.*]] = getelementptr i32, ptr [[DST]], i32 2 244dda564cSPhilip Reames; POW2-ONLY-NEXT: store i32 [[MUL_2]], ptr [[DST_2]], align 4 254dda564cSPhilip Reames; POW2-ONLY-NEXT: ret void 264dda564cSPhilip Reames; 274dda564cSPhilip Reamesentry: 284dda564cSPhilip Reames %gep.src.0 = getelementptr inbounds i32, ptr %src, i32 0 294dda564cSPhilip Reames %l.src.0 = load i32, ptr %gep.src.0, align 4 304dda564cSPhilip Reames %mul.0 = mul nsw i32 %l.src.0, 10 314dda564cSPhilip Reames 324dda564cSPhilip Reames %gep.src.1 = getelementptr inbounds i32, ptr %src, i32 1 334dda564cSPhilip Reames %l.src.1 = load i32, ptr %gep.src.1, align 4 344dda564cSPhilip Reames %mul.1 = mul nsw i32 %l.src.1, 10 354dda564cSPhilip Reames 364dda564cSPhilip Reames %gep.src.2 = getelementptr inbounds i32, ptr %src, i32 2 374dda564cSPhilip Reames %l.src.2 = load i32, ptr %gep.src.2, align 4 384dda564cSPhilip Reames %mul.2 = mul nsw i32 %l.src.2, 10 394dda564cSPhilip Reames 404dda564cSPhilip Reames store i32 %mul.0, ptr %dst 414dda564cSPhilip Reames 424dda564cSPhilip Reames %dst.1 = getelementptr i32, ptr %dst, i32 1 434dda564cSPhilip Reames store i32 %mul.1, ptr %dst.1 444dda564cSPhilip Reames 454dda564cSPhilip Reames %dst.2 = getelementptr i32, ptr %dst, i32 2 464dda564cSPhilip Reames store i32 %mul.2, ptr %dst.2 474dda564cSPhilip Reames 484dda564cSPhilip Reames ret void 494dda564cSPhilip Reames} 504dda564cSPhilip Reames 514dda564cSPhilip Reames; Should no be vectorized with a undef/poison element as padding, as 524dda564cSPhilip Reames; division by undef/poison may cause UB. Must use VL predication or 534dda564cSPhilip Reames; masking instead, where RISCV wins. 544dda564cSPhilip Reamesdefine void @v3_load_i32_udiv_by_constant_store(ptr %src, ptr %dst) { 554dda564cSPhilip Reames; NON-POW2-LABEL: @v3_load_i32_udiv_by_constant_store( 564dda564cSPhilip Reames; NON-POW2-NEXT: entry: 574dda564cSPhilip Reames; NON-POW2-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 0 584dda564cSPhilip Reames; NON-POW2-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr [[GEP_SRC_0]], align 4 5938fffa63SPaul Walker; NON-POW2-NEXT: [[TMP1:%.*]] = udiv <3 x i32> splat (i32 10), [[TMP0]] 604dda564cSPhilip Reames; NON-POW2-NEXT: store <3 x i32> [[TMP1]], ptr [[DST:%.*]], align 4 614dda564cSPhilip Reames; NON-POW2-NEXT: ret void 624dda564cSPhilip Reames; 634dda564cSPhilip Reames; POW2-ONLY-LABEL: @v3_load_i32_udiv_by_constant_store( 644dda564cSPhilip Reames; POW2-ONLY-NEXT: entry: 654dda564cSPhilip Reames; POW2-ONLY-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 0 664dda564cSPhilip Reames; POW2-ONLY-NEXT: [[L_SRC_0:%.*]] = load i32, ptr [[GEP_SRC_0]], align 4 674dda564cSPhilip Reames; POW2-ONLY-NEXT: [[MUL_0:%.*]] = udiv i32 10, [[L_SRC_0]] 684dda564cSPhilip Reames; POW2-ONLY-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 1 694dda564cSPhilip Reames; POW2-ONLY-NEXT: [[L_SRC_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 4 704dda564cSPhilip Reames; POW2-ONLY-NEXT: [[MUL_1:%.*]] = udiv i32 10, [[L_SRC_1]] 714dda564cSPhilip Reames; POW2-ONLY-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 2 724dda564cSPhilip Reames; POW2-ONLY-NEXT: [[L_SRC_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 4 734dda564cSPhilip Reames; POW2-ONLY-NEXT: [[MUL_2:%.*]] = udiv i32 10, [[L_SRC_2]] 744dda564cSPhilip Reames; POW2-ONLY-NEXT: store i32 [[MUL_0]], ptr [[DST:%.*]], align 4 754dda564cSPhilip Reames; POW2-ONLY-NEXT: [[DST_1:%.*]] = getelementptr i32, ptr [[DST]], i32 1 764dda564cSPhilip Reames; POW2-ONLY-NEXT: store i32 [[MUL_1]], ptr [[DST_1]], align 4 774dda564cSPhilip Reames; POW2-ONLY-NEXT: [[DST_2:%.*]] = getelementptr i32, ptr [[DST]], i32 2 784dda564cSPhilip Reames; POW2-ONLY-NEXT: store i32 [[MUL_2]], ptr [[DST_2]], align 4 794dda564cSPhilip Reames; POW2-ONLY-NEXT: ret void 804dda564cSPhilip Reames; 814dda564cSPhilip Reamesentry: 824dda564cSPhilip Reames %gep.src.0 = getelementptr inbounds i32, ptr %src, i32 0 834dda564cSPhilip Reames %l.src.0 = load i32, ptr %gep.src.0, align 4 844dda564cSPhilip Reames %mul.0 = udiv i32 10, %l.src.0 854dda564cSPhilip Reames 864dda564cSPhilip Reames %gep.src.1 = getelementptr inbounds i32, ptr %src, i32 1 874dda564cSPhilip Reames %l.src.1 = load i32, ptr %gep.src.1, align 4 884dda564cSPhilip Reames %mul.1 = udiv i32 10, %l.src.1 894dda564cSPhilip Reames 904dda564cSPhilip Reames %gep.src.2 = getelementptr inbounds i32, ptr %src, i32 2 914dda564cSPhilip Reames %l.src.2 = load i32, ptr %gep.src.2, align 4 924dda564cSPhilip Reames %mul.2 = udiv i32 10, %l.src.2 934dda564cSPhilip Reames 944dda564cSPhilip Reames store i32 %mul.0, ptr %dst 954dda564cSPhilip Reames 964dda564cSPhilip Reames %dst.1 = getelementptr i32, ptr %dst, i32 1 974dda564cSPhilip Reames store i32 %mul.1, ptr %dst.1 984dda564cSPhilip Reames 994dda564cSPhilip Reames %dst.2 = getelementptr i32, ptr %dst, i32 2 1004dda564cSPhilip Reames store i32 %mul.2, ptr %dst.2 1014dda564cSPhilip Reames 1024dda564cSPhilip Reames ret void 1034dda564cSPhilip Reames} 1044dda564cSPhilip Reames 1054dda564cSPhilip Reames 1064dda564cSPhilip Reames 1074dda564cSPhilip Reamesdefine void @v3_load_i32_mul_store(ptr %src.1, ptr %src.2, ptr %dst) { 1084dda564cSPhilip Reames; NON-POW2-LABEL: @v3_load_i32_mul_store( 1094dda564cSPhilip Reames; NON-POW2-NEXT: entry: 1104dda564cSPhilip Reames; NON-POW2-NEXT: [[GEP_SRC_1_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_1:%.*]], i32 0 1114dda564cSPhilip Reames; NON-POW2-NEXT: [[GEP_SRC_2_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_2:%.*]], i32 0 1124dda564cSPhilip Reames; NON-POW2-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr [[GEP_SRC_1_0]], align 4 1134dda564cSPhilip Reames; NON-POW2-NEXT: [[TMP1:%.*]] = load <3 x i32>, ptr [[GEP_SRC_2_0]], align 4 1144dda564cSPhilip Reames; NON-POW2-NEXT: [[TMP2:%.*]] = mul nsw <3 x i32> [[TMP0]], [[TMP1]] 1154dda564cSPhilip Reames; NON-POW2-NEXT: store <3 x i32> [[TMP2]], ptr [[DST:%.*]], align 4 1164dda564cSPhilip Reames; NON-POW2-NEXT: ret void 1174dda564cSPhilip Reames; 1184dda564cSPhilip Reames; POW2-ONLY-LABEL: @v3_load_i32_mul_store( 1194dda564cSPhilip Reames; POW2-ONLY-NEXT: entry: 1204dda564cSPhilip Reames; POW2-ONLY-NEXT: [[GEP_SRC_1_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_1:%.*]], i32 0 1214dda564cSPhilip Reames; POW2-ONLY-NEXT: [[GEP_SRC_2_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_2:%.*]], i32 0 1224dda564cSPhilip Reames; POW2-ONLY-NEXT: [[GEP_SRC_1_2:%.*]] = getelementptr inbounds i32, ptr [[SRC_1]], i32 2 1234dda564cSPhilip Reames; POW2-ONLY-NEXT: [[L_SRC_1_2:%.*]] = load i32, ptr [[GEP_SRC_1_2]], align 4 1244dda564cSPhilip Reames; POW2-ONLY-NEXT: [[GEP_SRC_2_2:%.*]] = getelementptr inbounds i32, ptr [[SRC_2]], i32 2 1254dda564cSPhilip Reames; POW2-ONLY-NEXT: [[L_SRC_2_2:%.*]] = load i32, ptr [[GEP_SRC_2_2]], align 4 1264dda564cSPhilip Reames; POW2-ONLY-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[L_SRC_1_2]], [[L_SRC_2_2]] 1274dda564cSPhilip Reames; POW2-ONLY-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[GEP_SRC_1_0]], align 4 1284dda564cSPhilip Reames; POW2-ONLY-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[GEP_SRC_2_0]], align 4 1294dda564cSPhilip Reames; POW2-ONLY-NEXT: [[TMP2:%.*]] = mul nsw <2 x i32> [[TMP0]], [[TMP1]] 1304dda564cSPhilip Reames; POW2-ONLY-NEXT: store <2 x i32> [[TMP2]], ptr [[DST:%.*]], align 4 1314dda564cSPhilip Reames; POW2-ONLY-NEXT: [[DST_2:%.*]] = getelementptr i32, ptr [[DST]], i32 2 1324dda564cSPhilip Reames; POW2-ONLY-NEXT: store i32 [[MUL_2]], ptr [[DST_2]], align 4 1334dda564cSPhilip Reames; POW2-ONLY-NEXT: ret void 1344dda564cSPhilip Reames; 1354dda564cSPhilip Reamesentry: 1364dda564cSPhilip Reames %gep.src.1.0 = getelementptr inbounds i32, ptr %src.1, i32 0 1374dda564cSPhilip Reames %l.src.1.0 = load i32, ptr %gep.src.1.0, align 4 1384dda564cSPhilip Reames %gep.src.2.0 = getelementptr inbounds i32, ptr %src.2, i32 0 1394dda564cSPhilip Reames %l.src.2.0 = load i32, ptr %gep.src.2.0, align 4 1404dda564cSPhilip Reames %mul.0 = mul nsw i32 %l.src.1.0, %l.src.2.0 1414dda564cSPhilip Reames 1424dda564cSPhilip Reames %gep.src.1.1 = getelementptr inbounds i32, ptr %src.1, i32 1 1434dda564cSPhilip Reames %l.src.1.1 = load i32, ptr %gep.src.1.1, align 4 1444dda564cSPhilip Reames %gep.src.2.1 = getelementptr inbounds i32, ptr %src.2, i32 1 1454dda564cSPhilip Reames %l.src.2.1 = load i32, ptr %gep.src.2.1, align 4 1464dda564cSPhilip Reames %mul.1 = mul nsw i32 %l.src.1.1, %l.src.2.1 1474dda564cSPhilip Reames 1484dda564cSPhilip Reames %gep.src.1.2 = getelementptr inbounds i32, ptr %src.1, i32 2 1494dda564cSPhilip Reames %l.src.1.2 = load i32, ptr %gep.src.1.2, align 4 1504dda564cSPhilip Reames %gep.src.2.2 = getelementptr inbounds i32, ptr %src.2, i32 2 1514dda564cSPhilip Reames %l.src.2.2 = load i32, ptr %gep.src.2.2, align 4 1524dda564cSPhilip Reames %mul.2 = mul nsw i32 %l.src.1.2, %l.src.2.2 1534dda564cSPhilip Reames 1544dda564cSPhilip Reames store i32 %mul.0, ptr %dst 1554dda564cSPhilip Reames 1564dda564cSPhilip Reames %dst.1 = getelementptr i32, ptr %dst, i32 1 1574dda564cSPhilip Reames store i32 %mul.1, ptr %dst.1 1584dda564cSPhilip Reames 1594dda564cSPhilip Reames %dst.2 = getelementptr i32, ptr %dst, i32 2 1604dda564cSPhilip Reames store i32 %mul.2, ptr %dst.2 1614dda564cSPhilip Reames 1624dda564cSPhilip Reames ret void 1634dda564cSPhilip Reames} 1644dda564cSPhilip Reames 1654dda564cSPhilip Reamesdefine void @v3_load_i32_mul_add_const_store(ptr %src.1, ptr %src.2, ptr %dst) { 1664dda564cSPhilip Reames; NON-POW2-LABEL: @v3_load_i32_mul_add_const_store( 1674dda564cSPhilip Reames; NON-POW2-NEXT: entry: 1684dda564cSPhilip Reames; NON-POW2-NEXT: [[GEP_SRC_1_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_1:%.*]], i32 0 1694dda564cSPhilip Reames; NON-POW2-NEXT: [[GEP_SRC_2_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_2:%.*]], i32 0 1704dda564cSPhilip Reames; NON-POW2-NEXT: [[TMP0:%.*]] = load <3 x i32>, ptr [[GEP_SRC_1_0]], align 4 1714dda564cSPhilip Reames; NON-POW2-NEXT: [[TMP1:%.*]] = load <3 x i32>, ptr [[GEP_SRC_2_0]], align 4 1724dda564cSPhilip Reames; NON-POW2-NEXT: [[TMP2:%.*]] = mul nsw <3 x i32> [[TMP0]], [[TMP1]] 17338fffa63SPaul Walker; NON-POW2-NEXT: [[TMP3:%.*]] = add <3 x i32> [[TMP2]], splat (i32 9) 1744dda564cSPhilip Reames; NON-POW2-NEXT: store <3 x i32> [[TMP3]], ptr [[DST:%.*]], align 4 1754dda564cSPhilip Reames; NON-POW2-NEXT: ret void 1764dda564cSPhilip Reames; 1774dda564cSPhilip Reames; POW2-ONLY-LABEL: @v3_load_i32_mul_add_const_store( 1784dda564cSPhilip Reames; POW2-ONLY-NEXT: entry: 1794dda564cSPhilip Reames; POW2-ONLY-NEXT: [[GEP_SRC_1_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_1:%.*]], i32 0 1804dda564cSPhilip Reames; POW2-ONLY-NEXT: [[GEP_SRC_2_0:%.*]] = getelementptr inbounds i32, ptr [[SRC_2:%.*]], i32 0 1814dda564cSPhilip Reames; POW2-ONLY-NEXT: [[GEP_SRC_1_2:%.*]] = getelementptr inbounds i32, ptr [[SRC_1]], i32 2 1824dda564cSPhilip Reames; POW2-ONLY-NEXT: [[L_SRC_1_2:%.*]] = load i32, ptr [[GEP_SRC_1_2]], align 4 1834dda564cSPhilip Reames; POW2-ONLY-NEXT: [[GEP_SRC_2_2:%.*]] = getelementptr inbounds i32, ptr [[SRC_2]], i32 2 1844dda564cSPhilip Reames; POW2-ONLY-NEXT: [[L_SRC_2_2:%.*]] = load i32, ptr [[GEP_SRC_2_2]], align 4 1854dda564cSPhilip Reames; POW2-ONLY-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[L_SRC_1_2]], [[L_SRC_2_2]] 1864dda564cSPhilip Reames; POW2-ONLY-NEXT: [[ADD_2:%.*]] = add i32 [[MUL_2]], 9 1874dda564cSPhilip Reames; POW2-ONLY-NEXT: [[TMP0:%.*]] = load <2 x i32>, ptr [[GEP_SRC_1_0]], align 4 1884dda564cSPhilip Reames; POW2-ONLY-NEXT: [[TMP1:%.*]] = load <2 x i32>, ptr [[GEP_SRC_2_0]], align 4 1894dda564cSPhilip Reames; POW2-ONLY-NEXT: [[TMP2:%.*]] = mul nsw <2 x i32> [[TMP0]], [[TMP1]] 19038fffa63SPaul Walker; POW2-ONLY-NEXT: [[TMP3:%.*]] = add <2 x i32> [[TMP2]], splat (i32 9) 1914dda564cSPhilip Reames; POW2-ONLY-NEXT: store <2 x i32> [[TMP3]], ptr [[DST:%.*]], align 4 1924dda564cSPhilip Reames; POW2-ONLY-NEXT: [[DST_2:%.*]] = getelementptr i32, ptr [[DST]], i32 2 1934dda564cSPhilip Reames; POW2-ONLY-NEXT: store i32 [[ADD_2]], ptr [[DST_2]], align 4 1944dda564cSPhilip Reames; POW2-ONLY-NEXT: ret void 1954dda564cSPhilip Reames; 1964dda564cSPhilip Reamesentry: 1974dda564cSPhilip Reames %gep.src.1.0 = getelementptr inbounds i32, ptr %src.1, i32 0 1984dda564cSPhilip Reames %l.src.1.0 = load i32, ptr %gep.src.1.0, align 4 1994dda564cSPhilip Reames %gep.src.2.0 = getelementptr inbounds i32, ptr %src.2, i32 0 2004dda564cSPhilip Reames %l.src.2.0 = load i32, ptr %gep.src.2.0, align 4 2014dda564cSPhilip Reames %mul.0 = mul nsw i32 %l.src.1.0, %l.src.2.0 2024dda564cSPhilip Reames %add.0 = add i32 %mul.0, 9 2034dda564cSPhilip Reames 2044dda564cSPhilip Reames %gep.src.1.1 = getelementptr inbounds i32, ptr %src.1, i32 1 2054dda564cSPhilip Reames %l.src.1.1 = load i32, ptr %gep.src.1.1, align 4 2064dda564cSPhilip Reames %gep.src.2.1 = getelementptr inbounds i32, ptr %src.2, i32 1 2074dda564cSPhilip Reames %l.src.2.1 = load i32, ptr %gep.src.2.1, align 4 2084dda564cSPhilip Reames %mul.1 = mul nsw i32 %l.src.1.1, %l.src.2.1 2094dda564cSPhilip Reames %add.1 = add i32 %mul.1, 9 2104dda564cSPhilip Reames 2114dda564cSPhilip Reames %gep.src.1.2 = getelementptr inbounds i32, ptr %src.1, i32 2 2124dda564cSPhilip Reames %l.src.1.2 = load i32, ptr %gep.src.1.2, align 4 2134dda564cSPhilip Reames %gep.src.2.2 = getelementptr inbounds i32, ptr %src.2, i32 2 2144dda564cSPhilip Reames %l.src.2.2 = load i32, ptr %gep.src.2.2, align 4 2154dda564cSPhilip Reames %mul.2 = mul nsw i32 %l.src.1.2, %l.src.2.2 2164dda564cSPhilip Reames %add.2 = add i32 %mul.2, 9 2174dda564cSPhilip Reames 2184dda564cSPhilip Reames store i32 %add.0, ptr %dst 2194dda564cSPhilip Reames 2204dda564cSPhilip Reames %dst.1 = getelementptr i32, ptr %dst, i32 1 2214dda564cSPhilip Reames store i32 %add.1, ptr %dst.1 2224dda564cSPhilip Reames 2234dda564cSPhilip Reames %dst.2 = getelementptr i32, ptr %dst, i32 2 2244dda564cSPhilip Reames store i32 %add.2, ptr %dst.2 2254dda564cSPhilip Reames 2264dda564cSPhilip Reames ret void 2274dda564cSPhilip Reames} 2284dda564cSPhilip Reames 2294dda564cSPhilip Reamesdefine void @v3_load_f32_fadd_fadd_by_constant_store(ptr %src, ptr %dst) { 2304dda564cSPhilip Reames; NON-POW2-LABEL: @v3_load_f32_fadd_fadd_by_constant_store( 2314dda564cSPhilip Reames; NON-POW2-NEXT: entry: 2324dda564cSPhilip Reames; NON-POW2-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i32 0 2334dda564cSPhilip Reames; NON-POW2-NEXT: [[TMP0:%.*]] = load <3 x float>, ptr [[GEP_SRC_0]], align 4 23438fffa63SPaul Walker; NON-POW2-NEXT: [[TMP1:%.*]] = fadd <3 x float> [[TMP0]], splat (float 1.000000e+01) 2354dda564cSPhilip Reames; NON-POW2-NEXT: store <3 x float> [[TMP1]], ptr [[DST:%.*]], align 4 2364dda564cSPhilip Reames; NON-POW2-NEXT: ret void 2374dda564cSPhilip Reames; 2384dda564cSPhilip Reames; POW2-ONLY-LABEL: @v3_load_f32_fadd_fadd_by_constant_store( 2394dda564cSPhilip Reames; POW2-ONLY-NEXT: entry: 2404dda564cSPhilip Reames; POW2-ONLY-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i32 0 2414dda564cSPhilip Reames; POW2-ONLY-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds float, ptr [[SRC]], i32 2 2424dda564cSPhilip Reames; POW2-ONLY-NEXT: [[L_SRC_2:%.*]] = load float, ptr [[GEP_SRC_2]], align 4 2434dda564cSPhilip Reames; POW2-ONLY-NEXT: [[FADD_2:%.*]] = fadd float [[L_SRC_2]], 1.000000e+01 2444dda564cSPhilip Reames; POW2-ONLY-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[GEP_SRC_0]], align 4 24538fffa63SPaul Walker; POW2-ONLY-NEXT: [[TMP1:%.*]] = fadd <2 x float> [[TMP0]], splat (float 1.000000e+01) 2464dda564cSPhilip Reames; POW2-ONLY-NEXT: store <2 x float> [[TMP1]], ptr [[DST:%.*]], align 4 2474dda564cSPhilip Reames; POW2-ONLY-NEXT: [[DST_2:%.*]] = getelementptr float, ptr [[DST]], i32 2 2484dda564cSPhilip Reames; POW2-ONLY-NEXT: store float [[FADD_2]], ptr [[DST_2]], align 4 2494dda564cSPhilip Reames; POW2-ONLY-NEXT: ret void 2504dda564cSPhilip Reames; 2514dda564cSPhilip Reamesentry: 2524dda564cSPhilip Reames %gep.src.0 = getelementptr inbounds float, ptr %src, i32 0 2534dda564cSPhilip Reames %l.src.0 = load float , ptr %gep.src.0, align 4 2544dda564cSPhilip Reames %fadd.0 = fadd float %l.src.0, 10.0 2554dda564cSPhilip Reames 2564dda564cSPhilip Reames %gep.src.1 = getelementptr inbounds float , ptr %src, i32 1 2574dda564cSPhilip Reames %l.src.1 = load float, ptr %gep.src.1, align 4 2584dda564cSPhilip Reames %fadd.1 = fadd float %l.src.1, 10.0 2594dda564cSPhilip Reames 2604dda564cSPhilip Reames %gep.src.2 = getelementptr inbounds float, ptr %src, i32 2 2614dda564cSPhilip Reames %l.src.2 = load float, ptr %gep.src.2, align 4 2624dda564cSPhilip Reames %fadd.2 = fadd float %l.src.2, 10.0 2634dda564cSPhilip Reames 2644dda564cSPhilip Reames store float %fadd.0, ptr %dst 2654dda564cSPhilip Reames 2664dda564cSPhilip Reames %dst.1 = getelementptr float, ptr %dst, i32 1 2674dda564cSPhilip Reames store float %fadd.1, ptr %dst.1 2684dda564cSPhilip Reames 2694dda564cSPhilip Reames %dst.2 = getelementptr float, ptr %dst, i32 2 2704dda564cSPhilip Reames store float %fadd.2, ptr %dst.2 2714dda564cSPhilip Reames 2724dda564cSPhilip Reames ret void 2734dda564cSPhilip Reames} 2744dda564cSPhilip Reames 2754dda564cSPhilip Reamesdefine void @phi_store3(ptr %dst) { 2764dda564cSPhilip Reames; NON-POW2-LABEL: @phi_store3( 2774dda564cSPhilip Reames; NON-POW2-NEXT: entry: 2784dda564cSPhilip Reames; NON-POW2-NEXT: br label [[EXIT:%.*]] 2794dda564cSPhilip Reames; NON-POW2: invoke.cont8.loopexit: 2804dda564cSPhilip Reames; NON-POW2-NEXT: br label [[EXIT]] 2814dda564cSPhilip Reames; NON-POW2: exit: 2824dda564cSPhilip Reames; NON-POW2-NEXT: [[TMP0:%.*]] = phi <3 x i32> [ <i32 1, i32 2, i32 3>, [[ENTRY:%.*]] ], [ poison, [[INVOKE_CONT8_LOOPEXIT:%.*]] ] 2834dda564cSPhilip Reames; NON-POW2-NEXT: store <3 x i32> [[TMP0]], ptr [[DST:%.*]], align 4 2844dda564cSPhilip Reames; NON-POW2-NEXT: ret void 2854dda564cSPhilip Reames; 2864dda564cSPhilip Reames; POW2-ONLY-LABEL: @phi_store3( 2874dda564cSPhilip Reames; POW2-ONLY-NEXT: entry: 2884dda564cSPhilip Reames; POW2-ONLY-NEXT: br label [[EXIT:%.*]] 2894dda564cSPhilip Reames; POW2-ONLY: invoke.cont8.loopexit: 2904dda564cSPhilip Reames; POW2-ONLY-NEXT: br label [[EXIT]] 2914dda564cSPhilip Reames; POW2-ONLY: exit: 2924dda564cSPhilip Reames; POW2-ONLY-NEXT: [[P_2:%.*]] = phi i32 [ 3, [[ENTRY:%.*]] ], [ 0, [[INVOKE_CONT8_LOOPEXIT:%.*]] ] 2934dda564cSPhilip Reames; POW2-ONLY-NEXT: [[TMP0:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[ENTRY]] ], [ poison, [[INVOKE_CONT8_LOOPEXIT]] ] 2944dda564cSPhilip Reames; POW2-ONLY-NEXT: [[DST_2:%.*]] = getelementptr i32, ptr [[DST:%.*]], i32 2 2954dda564cSPhilip Reames; POW2-ONLY-NEXT: store <2 x i32> [[TMP0]], ptr [[DST]], align 4 2964dda564cSPhilip Reames; POW2-ONLY-NEXT: store i32 [[P_2]], ptr [[DST_2]], align 4 2974dda564cSPhilip Reames; POW2-ONLY-NEXT: ret void 2984dda564cSPhilip Reames; 2994dda564cSPhilip Reamesentry: 3004dda564cSPhilip Reames br label %exit 3014dda564cSPhilip Reames 3024dda564cSPhilip Reamesinvoke.cont8.loopexit: ; No predecessors! 3034dda564cSPhilip Reames br label %exit 3044dda564cSPhilip Reames 3054dda564cSPhilip Reamesexit: 3064dda564cSPhilip Reames %p.0 = phi i32 [ 1, %entry ], [ 0, %invoke.cont8.loopexit ] 3074dda564cSPhilip Reames %p.1 = phi i32 [ 2, %entry ], [ 0, %invoke.cont8.loopexit ] 3084dda564cSPhilip Reames %p.2 = phi i32 [ 3, %entry ], [ 0, %invoke.cont8.loopexit ] 3094dda564cSPhilip Reames 3104dda564cSPhilip Reames %dst.1 = getelementptr i32, ptr %dst, i32 1 3114dda564cSPhilip Reames %dst.2 = getelementptr i32, ptr %dst, i32 2 3124dda564cSPhilip Reames 3134dda564cSPhilip Reames store i32 %p.0, ptr %dst, align 4 3144dda564cSPhilip Reames store i32 %p.1, ptr %dst.1, align 4 3154dda564cSPhilip Reames store i32 %p.2, ptr %dst.2, align 4 3164dda564cSPhilip Reames ret void 3174dda564cSPhilip Reames} 3184dda564cSPhilip Reames 3194dda564cSPhilip Reamesdefine void @store_try_reorder(ptr %dst) { 3204dda564cSPhilip Reames; NON-POW2-LABEL: @store_try_reorder( 3214dda564cSPhilip Reames; NON-POW2-NEXT: entry: 3224dda564cSPhilip Reames; NON-POW2-NEXT: store <3 x i32> zeroinitializer, ptr [[DST:%.*]], align 4 3234dda564cSPhilip Reames; NON-POW2-NEXT: ret void 3244dda564cSPhilip Reames; 3254dda564cSPhilip Reames; POW2-ONLY-LABEL: @store_try_reorder( 3264dda564cSPhilip Reames; POW2-ONLY-NEXT: entry: 327*3133acf1SHan-Kuan Chen; POW2-ONLY-NEXT: [[ADD:%.*]] = add i32 0, 0 328*3133acf1SHan-Kuan Chen; POW2-ONLY-NEXT: store i32 [[ADD]], ptr [[DST:%.*]], align 4 329*3133acf1SHan-Kuan Chen; POW2-ONLY-NEXT: [[ARRAYIDX_I1887:%.*]] = getelementptr i32, ptr [[DST]], i64 1 330*3133acf1SHan-Kuan Chen; POW2-ONLY-NEXT: store <2 x i32> zeroinitializer, ptr [[ARRAYIDX_I1887]], align 4 3314dda564cSPhilip Reames; POW2-ONLY-NEXT: ret void 3324dda564cSPhilip Reames; 3334dda564cSPhilip Reamesentry: 3344dda564cSPhilip Reames %add = add i32 0, 0 3354dda564cSPhilip Reames store i32 %add, ptr %dst, align 4 3364dda564cSPhilip Reames %add207 = sub i32 0, 0 3374dda564cSPhilip Reames %arrayidx.i1887 = getelementptr i32, ptr %dst, i64 1 3384dda564cSPhilip Reames store i32 %add207, ptr %arrayidx.i1887, align 4 3394dda564cSPhilip Reames %add216 = sub i32 0, 0 3404dda564cSPhilip Reames %arrayidx.i1891 = getelementptr i32, ptr %dst, i64 2 3414dda564cSPhilip Reames store i32 %add216, ptr %arrayidx.i1891, align 4 3424dda564cSPhilip Reames ret void 3434dda564cSPhilip Reames} 3444dda564cSPhilip Reames 3454dda564cSPhilip Reamesdefine void @vec3_fpext_cost(ptr %Colour, float %0) { 3464dda564cSPhilip Reames; NON-POW2-LABEL: @vec3_fpext_cost( 3474dda564cSPhilip Reames; NON-POW2-NEXT: entry: 3484dda564cSPhilip Reames; NON-POW2-NEXT: [[TMP1:%.*]] = insertelement <3 x float> poison, float [[TMP0:%.*]], i32 0 3494dda564cSPhilip Reames; NON-POW2-NEXT: [[TMP2:%.*]] = shufflevector <3 x float> [[TMP1]], <3 x float> poison, <3 x i32> zeroinitializer 3504dda564cSPhilip Reames; NON-POW2-NEXT: [[TMP3:%.*]] = fpext <3 x float> [[TMP2]] to <3 x double> 3514dda564cSPhilip Reames; NON-POW2-NEXT: [[TMP4:%.*]] = call <3 x double> @llvm.fmuladd.v3f64(<3 x double> [[TMP3]], <3 x double> zeroinitializer, <3 x double> zeroinitializer) 3524dda564cSPhilip Reames; NON-POW2-NEXT: [[TMP5:%.*]] = fptrunc <3 x double> [[TMP4]] to <3 x float> 3534dda564cSPhilip Reames; NON-POW2-NEXT: store <3 x float> [[TMP5]], ptr [[COLOUR:%.*]], align 4 3544dda564cSPhilip Reames; NON-POW2-NEXT: ret void 3554dda564cSPhilip Reames; 3564dda564cSPhilip Reames; POW2-ONLY-LABEL: @vec3_fpext_cost( 3574dda564cSPhilip Reames; POW2-ONLY-NEXT: entry: 3584dda564cSPhilip Reames; POW2-ONLY-NEXT: [[ARRAYIDX80:%.*]] = getelementptr float, ptr [[COLOUR:%.*]], i64 2 3594dda564cSPhilip Reames; POW2-ONLY-NEXT: [[TMP1:%.*]] = insertelement <2 x float> poison, float [[TMP0:%.*]], i32 0 3604dda564cSPhilip Reames; POW2-ONLY-NEXT: [[TMP2:%.*]] = shufflevector <2 x float> [[TMP1]], <2 x float> poison, <2 x i32> zeroinitializer 3614dda564cSPhilip Reames; POW2-ONLY-NEXT: [[TMP3:%.*]] = fpext <2 x float> [[TMP2]] to <2 x double> 3624dda564cSPhilip Reames; POW2-ONLY-NEXT: [[TMP4:%.*]] = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> [[TMP3]], <2 x double> zeroinitializer, <2 x double> zeroinitializer) 3634dda564cSPhilip Reames; POW2-ONLY-NEXT: [[TMP5:%.*]] = fptrunc <2 x double> [[TMP4]] to <2 x float> 3644dda564cSPhilip Reames; POW2-ONLY-NEXT: store <2 x float> [[TMP5]], ptr [[COLOUR]], align 4 3654dda564cSPhilip Reames; POW2-ONLY-NEXT: [[CONV78:%.*]] = fpext float [[TMP0]] to double 3664dda564cSPhilip Reames; POW2-ONLY-NEXT: [[TMP6:%.*]] = call double @llvm.fmuladd.f64(double [[CONV78]], double 0.000000e+00, double 0.000000e+00) 3674dda564cSPhilip Reames; POW2-ONLY-NEXT: [[CONV82:%.*]] = fptrunc double [[TMP6]] to float 3684dda564cSPhilip Reames; POW2-ONLY-NEXT: store float [[CONV82]], ptr [[ARRAYIDX80]], align 4 3694dda564cSPhilip Reames; POW2-ONLY-NEXT: ret void 3704dda564cSPhilip Reames; 3714dda564cSPhilip Reamesentry: 3724dda564cSPhilip Reames %arrayidx72 = getelementptr float, ptr %Colour, i64 1 3734dda564cSPhilip Reames %arrayidx80 = getelementptr float, ptr %Colour, i64 2 3744dda564cSPhilip Reames %conv62 = fpext float %0 to double 3754dda564cSPhilip Reames %1 = call double @llvm.fmuladd.f64(double %conv62, double 0.000000e+00, double 0.000000e+00) 3764dda564cSPhilip Reames %conv66 = fptrunc double %1 to float 3774dda564cSPhilip Reames store float %conv66, ptr %Colour, align 4 3784dda564cSPhilip Reames %conv70 = fpext float %0 to double 3794dda564cSPhilip Reames %2 = call double @llvm.fmuladd.f64(double %conv70, double 0.000000e+00, double 0.000000e+00) 3804dda564cSPhilip Reames %conv74 = fptrunc double %2 to float 3814dda564cSPhilip Reames store float %conv74, ptr %arrayidx72, align 4 3824dda564cSPhilip Reames %conv78 = fpext float %0 to double 3834dda564cSPhilip Reames %3 = call double @llvm.fmuladd.f64(double %conv78, double 0.000000e+00, double 0.000000e+00) 3844dda564cSPhilip Reames %conv82 = fptrunc double %3 to float 3854dda564cSPhilip Reames store float %conv82, ptr %arrayidx80, align 4 3864dda564cSPhilip Reames ret void 3874dda564cSPhilip Reames} 3884dda564cSPhilip Reames 3894dda564cSPhilip Reamesdefine void @fpext_scatter(ptr %dst, double %conv) { 3904dda564cSPhilip Reames; CHECK-LABEL: @fpext_scatter( 3914dda564cSPhilip Reames; CHECK-NEXT: entry: 3924dda564cSPhilip Reames; CHECK-NEXT: [[CONV25:%.*]] = fptrunc double [[CONV:%.*]] to float 3934dda564cSPhilip Reames; CHECK-NEXT: [[LENGTHS:%.*]] = getelementptr float, ptr [[DST:%.*]], i64 0 3944dda564cSPhilip Reames; CHECK-NEXT: store float [[CONV25]], ptr [[LENGTHS]], align 4 3954dda564cSPhilip Reames; CHECK-NEXT: [[ARRAYIDX32:%.*]] = getelementptr float, ptr [[DST]], i64 1 3964dda564cSPhilip Reames; CHECK-NEXT: store float [[CONV25]], ptr [[ARRAYIDX32]], align 4 3974dda564cSPhilip Reames; CHECK-NEXT: [[ARRAYIDX37:%.*]] = getelementptr float, ptr [[DST]], i64 2 3984dda564cSPhilip Reames; CHECK-NEXT: store float [[CONV25]], ptr [[ARRAYIDX37]], align 4 3994dda564cSPhilip Reames; CHECK-NEXT: ret void 4004dda564cSPhilip Reames; 4014dda564cSPhilip Reamesentry: 4024dda564cSPhilip Reames %conv25 = fptrunc double %conv to float 4034dda564cSPhilip Reames %Lengths = getelementptr float, ptr %dst, i64 0 4044dda564cSPhilip Reames store float %conv25, ptr %Lengths, align 4 4054dda564cSPhilip Reames %arrayidx32 = getelementptr float, ptr %dst, i64 1 4064dda564cSPhilip Reames store float %conv25, ptr %arrayidx32, align 4 4074dda564cSPhilip Reames %arrayidx37 = getelementptr float, ptr %dst, i64 2 4084dda564cSPhilip Reames store float %conv25, ptr %arrayidx37, align 4 4094dda564cSPhilip Reames ret void 4104dda564cSPhilip Reames} 4114dda564cSPhilip Reames 4124dda564cSPhilip Reamesdefine i32 @reduce_add(ptr %src) { 4134dda564cSPhilip Reames; CHECK-LABEL: @reduce_add( 4144dda564cSPhilip Reames; CHECK-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 0 4154dda564cSPhilip Reames; CHECK-NEXT: [[L_SRC_0:%.*]] = load i32, ptr [[GEP_SRC_0]], align 4 4164dda564cSPhilip Reames; CHECK-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 1 4174dda564cSPhilip Reames; CHECK-NEXT: [[L_SRC_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 4 4184dda564cSPhilip Reames; CHECK-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 2 4194dda564cSPhilip Reames; CHECK-NEXT: [[L_SRC_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 4 4204dda564cSPhilip Reames; CHECK-NEXT: [[ADD_0:%.*]] = add i32 [[L_SRC_0]], [[L_SRC_1]] 4214dda564cSPhilip Reames; CHECK-NEXT: [[ADD_1:%.*]] = add i32 [[ADD_0]], [[L_SRC_2]] 4224dda564cSPhilip Reames; CHECK-NEXT: ret i32 [[ADD_1]] 4234dda564cSPhilip Reames; 4244dda564cSPhilip Reames %gep.src.0 = getelementptr inbounds i32, ptr %src, i32 0 4254dda564cSPhilip Reames %l.src.0 = load i32, ptr %gep.src.0, align 4 4264dda564cSPhilip Reames %gep.src.1 = getelementptr inbounds i32, ptr %src, i32 1 4274dda564cSPhilip Reames %l.src.1 = load i32, ptr %gep.src.1, align 4 4284dda564cSPhilip Reames %gep.src.2 = getelementptr inbounds i32, ptr %src, i32 2 4294dda564cSPhilip Reames %l.src.2 = load i32, ptr %gep.src.2, align 4 4304dda564cSPhilip Reames 4314dda564cSPhilip Reames %add.0 = add i32 %l.src.0, %l.src.1 4324dda564cSPhilip Reames %add.1 = add i32 %add.0, %l.src.2 4334dda564cSPhilip Reames ret i32 %add.1 4344dda564cSPhilip Reames} 4354dda564cSPhilip Reames 436acb33a0cSPhilip Reamesdefine float @reduce_fadd(ptr %src) { 437ed03070eSPhilip Reames; NON-POW2-LABEL: @reduce_fadd( 438ed03070eSPhilip Reames; NON-POW2-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i32 0 439ed03070eSPhilip Reames; NON-POW2-NEXT: [[TMP1:%.*]] = load <3 x float>, ptr [[GEP_SRC_0]], align 4 4402c7786e9SPhilip Reames; NON-POW2-NEXT: [[TMP2:%.*]] = call fast float @llvm.vector.reduce.fadd.v3f32(float 0.000000e+00, <3 x float> [[TMP1]]) 441ed03070eSPhilip Reames; NON-POW2-NEXT: ret float [[TMP2]] 442ed03070eSPhilip Reames; 443ed03070eSPhilip Reames; POW2-ONLY-LABEL: @reduce_fadd( 444ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds float, ptr [[SRC:%.*]], i32 0 445ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[L_SRC_0:%.*]] = load float, ptr [[GEP_SRC_0]], align 4 446ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds float, ptr [[SRC]], i32 1 447ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[L_SRC_1:%.*]] = load float, ptr [[GEP_SRC_1]], align 4 448ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds float, ptr [[SRC]], i32 2 449ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[L_SRC_2:%.*]] = load float, ptr [[GEP_SRC_2]], align 4 450ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[ADD_0:%.*]] = fadd fast float [[L_SRC_0]], [[L_SRC_1]] 451ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[ADD_1:%.*]] = fadd fast float [[ADD_0]], [[L_SRC_2]] 452ed03070eSPhilip Reames; POW2-ONLY-NEXT: ret float [[ADD_1]] 453acb33a0cSPhilip Reames; 454acb33a0cSPhilip Reames %gep.src.0 = getelementptr inbounds float, ptr %src, i32 0 455acb33a0cSPhilip Reames %l.src.0 = load float, ptr %gep.src.0, align 4 456acb33a0cSPhilip Reames %gep.src.1 = getelementptr inbounds float, ptr %src, i32 1 457acb33a0cSPhilip Reames %l.src.1 = load float, ptr %gep.src.1, align 4 458acb33a0cSPhilip Reames %gep.src.2 = getelementptr inbounds float, ptr %src, i32 2 459acb33a0cSPhilip Reames %l.src.2 = load float, ptr %gep.src.2, align 4 460acb33a0cSPhilip Reames 461acb33a0cSPhilip Reames %add.0 = fadd fast float %l.src.0, %l.src.1 462acb33a0cSPhilip Reames %add.1 = fadd fast float %add.0, %l.src.2 463acb33a0cSPhilip Reames ret float %add.1 464acb33a0cSPhilip Reames} 4654dda564cSPhilip Reames 4664dda564cSPhilip Reamesdefine i32 @reduce_add_after_mul(ptr %src) { 467ed03070eSPhilip Reames; NON-POW2-LABEL: @reduce_add_after_mul( 468ed03070eSPhilip Reames; NON-POW2-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 0 469ed03070eSPhilip Reames; NON-POW2-NEXT: [[TMP1:%.*]] = load <3 x i32>, ptr [[GEP_SRC_0]], align 4 47038fffa63SPaul Walker; NON-POW2-NEXT: [[TMP2:%.*]] = mul nsw <3 x i32> [[TMP1]], splat (i32 10) 471ed03070eSPhilip Reames; NON-POW2-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v3i32(<3 x i32> [[TMP2]]) 472ed03070eSPhilip Reames; NON-POW2-NEXT: ret i32 [[TMP3]] 473ed03070eSPhilip Reames; 474ed03070eSPhilip Reames; POW2-ONLY-LABEL: @reduce_add_after_mul( 475ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[GEP_SRC_0:%.*]] = getelementptr inbounds i32, ptr [[SRC:%.*]], i32 0 476ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[L_SRC_0:%.*]] = load i32, ptr [[GEP_SRC_0]], align 4 477ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[GEP_SRC_1:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 1 478ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[L_SRC_1:%.*]] = load i32, ptr [[GEP_SRC_1]], align 4 479ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[GEP_SRC_2:%.*]] = getelementptr inbounds i32, ptr [[SRC]], i32 2 480ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[L_SRC_2:%.*]] = load i32, ptr [[GEP_SRC_2]], align 4 481ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[MUL_0:%.*]] = mul nsw i32 [[L_SRC_0]], 10 482ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[MUL_1:%.*]] = mul nsw i32 [[L_SRC_1]], 10 483ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[L_SRC_2]], 10 484ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[ADD_0:%.*]] = add i32 [[MUL_0]], [[MUL_1]] 485ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[ADD_1:%.*]] = add i32 [[ADD_0]], [[MUL_2]] 486ed03070eSPhilip Reames; POW2-ONLY-NEXT: ret i32 [[ADD_1]] 4874dda564cSPhilip Reames; 4884dda564cSPhilip Reames %gep.src.0 = getelementptr inbounds i32, ptr %src, i32 0 4894dda564cSPhilip Reames %l.src.0 = load i32, ptr %gep.src.0, align 4 4904dda564cSPhilip Reames %gep.src.1 = getelementptr inbounds i32, ptr %src, i32 1 4914dda564cSPhilip Reames %l.src.1 = load i32, ptr %gep.src.1, align 4 4924dda564cSPhilip Reames %gep.src.2 = getelementptr inbounds i32, ptr %src, i32 2 4934dda564cSPhilip Reames %l.src.2 = load i32, ptr %gep.src.2, align 4 4944dda564cSPhilip Reames 4954dda564cSPhilip Reames %mul.0 = mul nsw i32 %l.src.0, 10 4964dda564cSPhilip Reames %mul.1 = mul nsw i32 %l.src.1, 10 4974dda564cSPhilip Reames %mul.2 = mul nsw i32 %l.src.2, 10 4984dda564cSPhilip Reames 4994dda564cSPhilip Reames %add.0 = add i32 %mul.0, %mul.1 5004dda564cSPhilip Reames %add.1 = add i32 %add.0, %mul.2 5014dda564cSPhilip Reames ret i32 %add.1 5024dda564cSPhilip Reames} 5034dda564cSPhilip Reames 504acb33a0cSPhilip Reamesdefine i32 @dot_product_i32(ptr %a, ptr %b) { 505ed03070eSPhilip Reames; NON-POW2-LABEL: @dot_product_i32( 506ed03070eSPhilip Reames; NON-POW2-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 0 507ed03070eSPhilip Reames; NON-POW2-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 0 508ed03070eSPhilip Reames; NON-POW2-NEXT: [[TMP1:%.*]] = load <3 x i32>, ptr [[GEP_A_0]], align 4 509ed03070eSPhilip Reames; NON-POW2-NEXT: [[TMP2:%.*]] = load <3 x i32>, ptr [[GEP_B_0]], align 4 510ed03070eSPhilip Reames; NON-POW2-NEXT: [[TMP3:%.*]] = mul nsw <3 x i32> [[TMP1]], [[TMP2]] 511ed03070eSPhilip Reames; NON-POW2-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v3i32(<3 x i32> [[TMP3]]) 512ed03070eSPhilip Reames; NON-POW2-NEXT: ret i32 [[TMP4]] 513ed03070eSPhilip Reames; 514ed03070eSPhilip Reames; POW2-ONLY-LABEL: @dot_product_i32( 515ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 0 516ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[L_A_0:%.*]] = load i32, ptr [[GEP_A_0]], align 4 517ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 1 518ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[L_A_1:%.*]] = load i32, ptr [[GEP_A_1]], align 4 519ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 2 520ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[L_A_2:%.*]] = load i32, ptr [[GEP_A_2]], align 4 521ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 0 522ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[L_B_0:%.*]] = load i32, ptr [[GEP_B_0]], align 4 523ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[GEP_B_1:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 1 524ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[L_B_1:%.*]] = load i32, ptr [[GEP_B_1]], align 4 525ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[GEP_B_2:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 2 526ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[L_B_2:%.*]] = load i32, ptr [[GEP_B_2]], align 4 527ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[MUL_0:%.*]] = mul nsw i32 [[L_A_0]], [[L_B_0]] 528ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[MUL_1:%.*]] = mul nsw i32 [[L_A_1]], [[L_B_1]] 529ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[L_A_2]], [[L_B_2]] 530ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[ADD_0:%.*]] = add i32 [[MUL_0]], [[MUL_1]] 531ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[ADD_1:%.*]] = add i32 [[ADD_0]], [[MUL_2]] 532ed03070eSPhilip Reames; POW2-ONLY-NEXT: ret i32 [[ADD_1]] 5334dda564cSPhilip Reames; 5344dda564cSPhilip Reames %gep.a.0 = getelementptr inbounds i32, ptr %a, i32 0 5354dda564cSPhilip Reames %l.a.0 = load i32, ptr %gep.a.0, align 4 5364dda564cSPhilip Reames %gep.a.1 = getelementptr inbounds i32, ptr %a, i32 1 5374dda564cSPhilip Reames %l.a.1 = load i32, ptr %gep.a.1, align 4 5384dda564cSPhilip Reames %gep.a.2 = getelementptr inbounds i32, ptr %a, i32 2 5394dda564cSPhilip Reames %l.a.2 = load i32, ptr %gep.a.2, align 4 5404dda564cSPhilip Reames 5414dda564cSPhilip Reames %gep.b.0 = getelementptr inbounds i32, ptr %b, i32 0 5424dda564cSPhilip Reames %l.b.0 = load i32, ptr %gep.b.0, align 4 5434dda564cSPhilip Reames %gep.b.1 = getelementptr inbounds i32, ptr %b, i32 1 5444dda564cSPhilip Reames %l.b.1 = load i32, ptr %gep.b.1, align 4 5454dda564cSPhilip Reames %gep.b.2 = getelementptr inbounds i32, ptr %b, i32 2 5464dda564cSPhilip Reames %l.b.2 = load i32, ptr %gep.b.2, align 4 5474dda564cSPhilip Reames 5484dda564cSPhilip Reames %mul.0 = mul nsw i32 %l.a.0, %l.b.0 5494dda564cSPhilip Reames %mul.1 = mul nsw i32 %l.a.1, %l.b.1 5504dda564cSPhilip Reames %mul.2 = mul nsw i32 %l.a.2, %l.b.2 5514dda564cSPhilip Reames 5524dda564cSPhilip Reames %add.0 = add i32 %mul.0, %mul.1 5534dda564cSPhilip Reames %add.1 = add i32 %add.0, %mul.2 5544dda564cSPhilip Reames ret i32 %add.1 5554dda564cSPhilip Reames} 5564dda564cSPhilip Reames 55722ba3511SPhilip Reames; Same as above, except the reduction order has been perturbed. This 55822ba3511SPhilip Reames; is checking for our ability to reorder. 55922ba3511SPhilip Reamesdefine i32 @dot_product_i32_reorder(ptr %a, ptr %b) { 56063e8a1b1SPhilip Reames; NON-POW2-LABEL: @dot_product_i32_reorder( 56163e8a1b1SPhilip Reames; NON-POW2-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 0 56263e8a1b1SPhilip Reames; NON-POW2-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 0 56363e8a1b1SPhilip Reames; NON-POW2-NEXT: [[TMP1:%.*]] = load <3 x i32>, ptr [[GEP_A_0]], align 4 56463e8a1b1SPhilip Reames; NON-POW2-NEXT: [[TMP2:%.*]] = load <3 x i32>, ptr [[GEP_B_0]], align 4 56563e8a1b1SPhilip Reames; NON-POW2-NEXT: [[TMP3:%.*]] = mul nsw <3 x i32> [[TMP1]], [[TMP2]] 56663e8a1b1SPhilip Reames; NON-POW2-NEXT: [[TMP4:%.*]] = call i32 @llvm.vector.reduce.add.v3i32(<3 x i32> [[TMP3]]) 56763e8a1b1SPhilip Reames; NON-POW2-NEXT: ret i32 [[TMP4]] 56863e8a1b1SPhilip Reames; 56963e8a1b1SPhilip Reames; POW2-ONLY-LABEL: @dot_product_i32_reorder( 57063e8a1b1SPhilip Reames; POW2-ONLY-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 0 57163e8a1b1SPhilip Reames; POW2-ONLY-NEXT: [[L_A_0:%.*]] = load i32, ptr [[GEP_A_0]], align 4 57263e8a1b1SPhilip Reames; POW2-ONLY-NEXT: [[GEP_A_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 1 57363e8a1b1SPhilip Reames; POW2-ONLY-NEXT: [[L_A_1:%.*]] = load i32, ptr [[GEP_A_1]], align 4 57463e8a1b1SPhilip Reames; POW2-ONLY-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 2 57563e8a1b1SPhilip Reames; POW2-ONLY-NEXT: [[L_A_2:%.*]] = load i32, ptr [[GEP_A_2]], align 4 57663e8a1b1SPhilip Reames; POW2-ONLY-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 0 57763e8a1b1SPhilip Reames; POW2-ONLY-NEXT: [[L_B_0:%.*]] = load i32, ptr [[GEP_B_0]], align 4 57863e8a1b1SPhilip Reames; POW2-ONLY-NEXT: [[GEP_B_1:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 1 57963e8a1b1SPhilip Reames; POW2-ONLY-NEXT: [[L_B_1:%.*]] = load i32, ptr [[GEP_B_1]], align 4 58063e8a1b1SPhilip Reames; POW2-ONLY-NEXT: [[GEP_B_2:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 2 58163e8a1b1SPhilip Reames; POW2-ONLY-NEXT: [[L_B_2:%.*]] = load i32, ptr [[GEP_B_2]], align 4 58263e8a1b1SPhilip Reames; POW2-ONLY-NEXT: [[MUL_0:%.*]] = mul nsw i32 [[L_A_0]], [[L_B_0]] 58363e8a1b1SPhilip Reames; POW2-ONLY-NEXT: [[MUL_1:%.*]] = mul nsw i32 [[L_A_1]], [[L_B_1]] 58463e8a1b1SPhilip Reames; POW2-ONLY-NEXT: [[MUL_2:%.*]] = mul nsw i32 [[L_A_2]], [[L_B_2]] 58563e8a1b1SPhilip Reames; POW2-ONLY-NEXT: [[ADD_0:%.*]] = add i32 [[MUL_1]], [[MUL_0]] 58663e8a1b1SPhilip Reames; POW2-ONLY-NEXT: [[ADD_1:%.*]] = add i32 [[ADD_0]], [[MUL_2]] 58763e8a1b1SPhilip Reames; POW2-ONLY-NEXT: ret i32 [[ADD_1]] 58822ba3511SPhilip Reames; 58922ba3511SPhilip Reames %gep.a.0 = getelementptr inbounds i32, ptr %a, i32 0 59022ba3511SPhilip Reames %l.a.0 = load i32, ptr %gep.a.0, align 4 59122ba3511SPhilip Reames %gep.a.1 = getelementptr inbounds i32, ptr %a, i32 1 59222ba3511SPhilip Reames %l.a.1 = load i32, ptr %gep.a.1, align 4 59322ba3511SPhilip Reames %gep.a.2 = getelementptr inbounds i32, ptr %a, i32 2 59422ba3511SPhilip Reames %l.a.2 = load i32, ptr %gep.a.2, align 4 59522ba3511SPhilip Reames 59622ba3511SPhilip Reames %gep.b.0 = getelementptr inbounds i32, ptr %b, i32 0 59722ba3511SPhilip Reames %l.b.0 = load i32, ptr %gep.b.0, align 4 59822ba3511SPhilip Reames %gep.b.1 = getelementptr inbounds i32, ptr %b, i32 1 59922ba3511SPhilip Reames %l.b.1 = load i32, ptr %gep.b.1, align 4 60022ba3511SPhilip Reames %gep.b.2 = getelementptr inbounds i32, ptr %b, i32 2 60122ba3511SPhilip Reames %l.b.2 = load i32, ptr %gep.b.2, align 4 60222ba3511SPhilip Reames 60322ba3511SPhilip Reames %mul.0 = mul nsw i32 %l.a.0, %l.b.0 60422ba3511SPhilip Reames %mul.1 = mul nsw i32 %l.a.1, %l.b.1 60522ba3511SPhilip Reames %mul.2 = mul nsw i32 %l.a.2, %l.b.2 60622ba3511SPhilip Reames 60722ba3511SPhilip Reames %add.0 = add i32 %mul.1, %mul.0 60822ba3511SPhilip Reames %add.1 = add i32 %add.0, %mul.2 60922ba3511SPhilip Reames ret i32 %add.1 61022ba3511SPhilip Reames} 61122ba3511SPhilip Reames 612acb33a0cSPhilip Reamesdefine float @dot_product_fp32(ptr %a, ptr %b) { 613ed03070eSPhilip Reames; NON-POW2-LABEL: @dot_product_fp32( 614ed03070eSPhilip Reames; NON-POW2-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i32 0 615ed03070eSPhilip Reames; NON-POW2-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i32 0 616ed03070eSPhilip Reames; NON-POW2-NEXT: [[TMP1:%.*]] = load <3 x float>, ptr [[GEP_A_0]], align 4 617ed03070eSPhilip Reames; NON-POW2-NEXT: [[TMP2:%.*]] = load <3 x float>, ptr [[GEP_B_0]], align 4 618ed03070eSPhilip Reames; NON-POW2-NEXT: [[TMP3:%.*]] = fmul fast <3 x float> [[TMP1]], [[TMP2]] 6192c7786e9SPhilip Reames; NON-POW2-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v3f32(float 0.000000e+00, <3 x float> [[TMP3]]) 620ed03070eSPhilip Reames; NON-POW2-NEXT: ret float [[TMP4]] 621ed03070eSPhilip Reames; 622ed03070eSPhilip Reames; POW2-ONLY-LABEL: @dot_product_fp32( 623ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i32 0 624ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds float, ptr [[A]], i32 2 625ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[L_A_2:%.*]] = load float, ptr [[GEP_A_2]], align 4 626ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i32 0 627ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[GEP_B_2:%.*]] = getelementptr inbounds float, ptr [[B]], i32 2 628ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[L_B_2:%.*]] = load float, ptr [[GEP_B_2]], align 4 629ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[GEP_A_0]], align 4 630ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[GEP_B_0]], align 4 631ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[TMP3:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP2]] 632ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[MUL_2:%.*]] = fmul fast float [[L_A_2]], [[L_B_2]] 633ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0 634ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1 635ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[ADD_0:%.*]] = fadd fast float [[TMP4]], [[TMP5]] 636ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[ADD_1:%.*]] = fadd fast float [[ADD_0]], [[MUL_2]] 637ed03070eSPhilip Reames; POW2-ONLY-NEXT: ret float [[ADD_1]] 638acb33a0cSPhilip Reames; 639acb33a0cSPhilip Reames %gep.a.0 = getelementptr inbounds float, ptr %a, i32 0 640acb33a0cSPhilip Reames %l.a.0 = load float, ptr %gep.a.0, align 4 641acb33a0cSPhilip Reames %gep.a.1 = getelementptr inbounds float, ptr %a, i32 1 642acb33a0cSPhilip Reames %l.a.1 = load float, ptr %gep.a.1, align 4 643acb33a0cSPhilip Reames %gep.a.2 = getelementptr inbounds float, ptr %a, i32 2 644acb33a0cSPhilip Reames %l.a.2 = load float, ptr %gep.a.2, align 4 645acb33a0cSPhilip Reames 646acb33a0cSPhilip Reames %gep.b.0 = getelementptr inbounds float, ptr %b, i32 0 647acb33a0cSPhilip Reames %l.b.0 = load float, ptr %gep.b.0, align 4 648acb33a0cSPhilip Reames %gep.b.1 = getelementptr inbounds float, ptr %b, i32 1 649acb33a0cSPhilip Reames %l.b.1 = load float, ptr %gep.b.1, align 4 650acb33a0cSPhilip Reames %gep.b.2 = getelementptr inbounds float, ptr %b, i32 2 651acb33a0cSPhilip Reames %l.b.2 = load float, ptr %gep.b.2, align 4 652acb33a0cSPhilip Reames 653acb33a0cSPhilip Reames %mul.0 = fmul fast float %l.a.0, %l.b.0 654acb33a0cSPhilip Reames %mul.1 = fmul fast float %l.a.1, %l.b.1 655acb33a0cSPhilip Reames %mul.2 = fmul fast float %l.a.2, %l.b.2 656acb33a0cSPhilip Reames 657acb33a0cSPhilip Reames %add.0 = fadd fast float %mul.0, %mul.1 658acb33a0cSPhilip Reames %add.1 = fadd fast float %add.0, %mul.2 659acb33a0cSPhilip Reames ret float %add.1 660acb33a0cSPhilip Reames} 661acb33a0cSPhilip Reames 66222ba3511SPhilip Reames; Same as above, except the reduction order has been perturbed. This 66322ba3511SPhilip Reames; is checking for our ability to reorder. 66422ba3511SPhilip Reamesdefine float @dot_product_fp32_reorder(ptr %a, ptr %b) { 66563e8a1b1SPhilip Reames; NON-POW2-LABEL: @dot_product_fp32_reorder( 66663e8a1b1SPhilip Reames; NON-POW2-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i32 0 66763e8a1b1SPhilip Reames; NON-POW2-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i32 0 66863e8a1b1SPhilip Reames; NON-POW2-NEXT: [[TMP1:%.*]] = load <3 x float>, ptr [[GEP_A_0]], align 4 66963e8a1b1SPhilip Reames; NON-POW2-NEXT: [[TMP2:%.*]] = load <3 x float>, ptr [[GEP_B_0]], align 4 67063e8a1b1SPhilip Reames; NON-POW2-NEXT: [[TMP3:%.*]] = fmul fast <3 x float> [[TMP1]], [[TMP2]] 67163e8a1b1SPhilip Reames; NON-POW2-NEXT: [[TMP4:%.*]] = call fast float @llvm.vector.reduce.fadd.v3f32(float 0.000000e+00, <3 x float> [[TMP3]]) 67263e8a1b1SPhilip Reames; NON-POW2-NEXT: ret float [[TMP4]] 67363e8a1b1SPhilip Reames; 67463e8a1b1SPhilip Reames; POW2-ONLY-LABEL: @dot_product_fp32_reorder( 67563e8a1b1SPhilip Reames; POW2-ONLY-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds float, ptr [[A:%.*]], i32 0 67663e8a1b1SPhilip Reames; POW2-ONLY-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds float, ptr [[A]], i32 2 67763e8a1b1SPhilip Reames; POW2-ONLY-NEXT: [[L_A_2:%.*]] = load float, ptr [[GEP_A_2]], align 4 67863e8a1b1SPhilip Reames; POW2-ONLY-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds float, ptr [[B:%.*]], i32 0 67963e8a1b1SPhilip Reames; POW2-ONLY-NEXT: [[GEP_B_2:%.*]] = getelementptr inbounds float, ptr [[B]], i32 2 68063e8a1b1SPhilip Reames; POW2-ONLY-NEXT: [[L_B_2:%.*]] = load float, ptr [[GEP_B_2]], align 4 68163e8a1b1SPhilip Reames; POW2-ONLY-NEXT: [[TMP1:%.*]] = load <2 x float>, ptr [[GEP_A_0]], align 4 68263e8a1b1SPhilip Reames; POW2-ONLY-NEXT: [[TMP2:%.*]] = load <2 x float>, ptr [[GEP_B_0]], align 4 68363e8a1b1SPhilip Reames; POW2-ONLY-NEXT: [[TMP3:%.*]] = fmul fast <2 x float> [[TMP1]], [[TMP2]] 68463e8a1b1SPhilip Reames; POW2-ONLY-NEXT: [[MUL_2:%.*]] = fmul fast float [[L_A_2]], [[L_B_2]] 68563e8a1b1SPhilip Reames; POW2-ONLY-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP3]], i32 0 68663e8a1b1SPhilip Reames; POW2-ONLY-NEXT: [[TMP5:%.*]] = extractelement <2 x float> [[TMP3]], i32 1 68763e8a1b1SPhilip Reames; POW2-ONLY-NEXT: [[ADD_0:%.*]] = fadd fast float [[TMP5]], [[TMP4]] 68863e8a1b1SPhilip Reames; POW2-ONLY-NEXT: [[ADD_1:%.*]] = fadd fast float [[ADD_0]], [[MUL_2]] 68963e8a1b1SPhilip Reames; POW2-ONLY-NEXT: ret float [[ADD_1]] 69022ba3511SPhilip Reames; 69122ba3511SPhilip Reames %gep.a.0 = getelementptr inbounds float, ptr %a, i32 0 69222ba3511SPhilip Reames %l.a.0 = load float, ptr %gep.a.0, align 4 69322ba3511SPhilip Reames %gep.a.1 = getelementptr inbounds float, ptr %a, i32 1 69422ba3511SPhilip Reames %l.a.1 = load float, ptr %gep.a.1, align 4 69522ba3511SPhilip Reames %gep.a.2 = getelementptr inbounds float, ptr %a, i32 2 69622ba3511SPhilip Reames %l.a.2 = load float, ptr %gep.a.2, align 4 69722ba3511SPhilip Reames 69822ba3511SPhilip Reames %gep.b.0 = getelementptr inbounds float, ptr %b, i32 0 69922ba3511SPhilip Reames %l.b.0 = load float, ptr %gep.b.0, align 4 70022ba3511SPhilip Reames %gep.b.1 = getelementptr inbounds float, ptr %b, i32 1 70122ba3511SPhilip Reames %l.b.1 = load float, ptr %gep.b.1, align 4 70222ba3511SPhilip Reames %gep.b.2 = getelementptr inbounds float, ptr %b, i32 2 70322ba3511SPhilip Reames %l.b.2 = load float, ptr %gep.b.2, align 4 70422ba3511SPhilip Reames 70522ba3511SPhilip Reames %mul.0 = fmul fast float %l.a.0, %l.b.0 70622ba3511SPhilip Reames %mul.1 = fmul fast float %l.a.1, %l.b.1 70722ba3511SPhilip Reames %mul.2 = fmul fast float %l.a.2, %l.b.2 70822ba3511SPhilip Reames 70922ba3511SPhilip Reames %add.0 = fadd fast float %mul.1, %mul.0 71022ba3511SPhilip Reames %add.1 = fadd fast float %add.0, %mul.2 71122ba3511SPhilip Reames ret float %add.1 71222ba3511SPhilip Reames} 71322ba3511SPhilip Reames 71422ba3511SPhilip Reames 715acb33a0cSPhilip Reamesdefine double @dot_product_fp64(ptr %a, ptr %b) { 716ed03070eSPhilip Reames; NON-POW2-LABEL: @dot_product_fp64( 717ed03070eSPhilip Reames; NON-POW2-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i32 0 718ed03070eSPhilip Reames; NON-POW2-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds double, ptr [[B:%.*]], i32 0 719ed03070eSPhilip Reames; NON-POW2-NEXT: [[TMP1:%.*]] = load <3 x double>, ptr [[GEP_A_0]], align 4 720ed03070eSPhilip Reames; NON-POW2-NEXT: [[TMP2:%.*]] = load <3 x double>, ptr [[GEP_B_0]], align 4 721ed03070eSPhilip Reames; NON-POW2-NEXT: [[TMP3:%.*]] = fmul fast <3 x double> [[TMP1]], [[TMP2]] 7222c7786e9SPhilip Reames; NON-POW2-NEXT: [[TMP4:%.*]] = call fast double @llvm.vector.reduce.fadd.v3f64(double 0.000000e+00, <3 x double> [[TMP3]]) 723ed03070eSPhilip Reames; NON-POW2-NEXT: ret double [[TMP4]] 724ed03070eSPhilip Reames; 725ed03070eSPhilip Reames; POW2-ONLY-LABEL: @dot_product_fp64( 726ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[GEP_A_0:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i32 0 727ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[GEP_A_2:%.*]] = getelementptr inbounds double, ptr [[A]], i32 2 728ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[L_A_2:%.*]] = load double, ptr [[GEP_A_2]], align 4 729ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[GEP_B_0:%.*]] = getelementptr inbounds double, ptr [[B:%.*]], i32 0 730ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[GEP_B_2:%.*]] = getelementptr inbounds double, ptr [[B]], i32 2 731ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[L_B_2:%.*]] = load double, ptr [[GEP_B_2]], align 4 732ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[GEP_A_0]], align 4 733ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[TMP2:%.*]] = load <2 x double>, ptr [[GEP_B_0]], align 4 734ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[TMP3:%.*]] = fmul fast <2 x double> [[TMP1]], [[TMP2]] 735ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[MUL_2:%.*]] = fmul fast double [[L_A_2]], [[L_B_2]] 736ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0 737ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[TMP3]], i32 1 738ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[ADD_0:%.*]] = fadd fast double [[TMP4]], [[TMP5]] 739ed03070eSPhilip Reames; POW2-ONLY-NEXT: [[ADD_1:%.*]] = fadd fast double [[ADD_0]], [[MUL_2]] 740ed03070eSPhilip Reames; POW2-ONLY-NEXT: ret double [[ADD_1]] 741acb33a0cSPhilip Reames; 742acb33a0cSPhilip Reames %gep.a.0 = getelementptr inbounds double, ptr %a, i32 0 743acb33a0cSPhilip Reames %l.a.0 = load double, ptr %gep.a.0, align 4 744acb33a0cSPhilip Reames %gep.a.1 = getelementptr inbounds double, ptr %a, i32 1 745acb33a0cSPhilip Reames %l.a.1 = load double, ptr %gep.a.1, align 4 746acb33a0cSPhilip Reames %gep.a.2 = getelementptr inbounds double, ptr %a, i32 2 747acb33a0cSPhilip Reames %l.a.2 = load double, ptr %gep.a.2, align 4 748acb33a0cSPhilip Reames 749acb33a0cSPhilip Reames %gep.b.0 = getelementptr inbounds double, ptr %b, i32 0 750acb33a0cSPhilip Reames %l.b.0 = load double, ptr %gep.b.0, align 4 751acb33a0cSPhilip Reames %gep.b.1 = getelementptr inbounds double, ptr %b, i32 1 752acb33a0cSPhilip Reames %l.b.1 = load double, ptr %gep.b.1, align 4 753acb33a0cSPhilip Reames %gep.b.2 = getelementptr inbounds double, ptr %b, i32 2 754acb33a0cSPhilip Reames %l.b.2 = load double, ptr %gep.b.2, align 4 755acb33a0cSPhilip Reames 756acb33a0cSPhilip Reames %mul.0 = fmul fast double %l.a.0, %l.b.0 757acb33a0cSPhilip Reames %mul.1 = fmul fast double %l.a.1, %l.b.1 758acb33a0cSPhilip Reames %mul.2 = fmul fast double %l.a.2, %l.b.2 759acb33a0cSPhilip Reames 760acb33a0cSPhilip Reames %add.0 = fadd fast double %mul.0, %mul.1 761acb33a0cSPhilip Reames %add.1 = fadd fast double %add.0, %mul.2 762acb33a0cSPhilip Reames ret double %add.1 763acb33a0cSPhilip Reames} 764acb33a0cSPhilip Reames 765247d3ea8SPhilip Reames;; Covers a case where SLP would previous crash due to a 766247d3ea8SPhilip Reames;; missing bailout in TryToFindDuplicates for the case 767247d3ea8SPhilip Reames;; where a VL=3 list was vectorized directly (without 768247d3ea8SPhilip Reames;; a root instruction such as a store or reduce). 769247d3ea8SPhilip Reamesdefine double @no_root_reshuffle(ptr %ptr) { 770247d3ea8SPhilip Reames; CHECK-LABEL: @no_root_reshuffle( 771247d3ea8SPhilip Reames; CHECK-NEXT: entry: 772247d3ea8SPhilip Reames; CHECK-NEXT: [[TMP0:%.*]] = load double, ptr [[PTR:%.*]], align 8 773247d3ea8SPhilip Reames; CHECK-NEXT: [[MUL:%.*]] = fmul fast double [[TMP0]], [[TMP0]] 774247d3ea8SPhilip Reames; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 8 775247d3ea8SPhilip Reames; CHECK-NEXT: [[TMP1:%.*]] = load double, ptr [[ARRAYIDX2]], align 8 776247d3ea8SPhilip Reames; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i8, ptr [[PTR]], i64 16 777247d3ea8SPhilip Reames; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr [[ARRAYIDX3]], align 8 778247d3ea8SPhilip Reames; CHECK-NEXT: [[TMP3:%.*]] = fmul fast double [[TMP2]], [[TMP2]] 779247d3ea8SPhilip Reames; CHECK-NEXT: [[MUL6:%.*]] = fmul fast double [[TMP3]], [[TMP1]] 780247d3ea8SPhilip Reames; CHECK-NEXT: [[ADD:%.*]] = fadd fast double [[MUL6]], [[MUL]] 781247d3ea8SPhilip Reames; CHECK-NEXT: ret double [[ADD]] 782247d3ea8SPhilip Reames; 783247d3ea8SPhilip Reamesentry: 784247d3ea8SPhilip Reames %0 = load double, ptr %ptr, align 8 785247d3ea8SPhilip Reames %mul = fmul fast double %0, %0 786247d3ea8SPhilip Reames %arrayidx2 = getelementptr inbounds i8, ptr %ptr, i64 8 787247d3ea8SPhilip Reames %1 = load double, ptr %arrayidx2, align 8 788247d3ea8SPhilip Reames %arrayidx3 = getelementptr inbounds i8, ptr %ptr, i64 16 789247d3ea8SPhilip Reames %2 = load double, ptr %arrayidx3, align 8 790247d3ea8SPhilip Reames %3 = fmul fast double %2, %2 791247d3ea8SPhilip Reames %mul6 = fmul fast double %3, %1 792247d3ea8SPhilip Reames %add = fadd fast double %mul6, %mul 793247d3ea8SPhilip Reames ret double %add 794247d3ea8SPhilip Reames} 795acb33a0cSPhilip Reames 796fa8b737aSPhilip Reamesdefine float @reduce_fadd_after_fmul_of_buildvec(float %a, float %b, float %c) { 7977f6bbb3cSPhilip Reames; NON-POW2-LABEL: @reduce_fadd_after_fmul_of_buildvec( 7987f6bbb3cSPhilip Reames; NON-POW2-NEXT: [[TMP1:%.*]] = insertelement <3 x float> poison, float [[A:%.*]], i32 0 7997f6bbb3cSPhilip Reames; NON-POW2-NEXT: [[TMP2:%.*]] = insertelement <3 x float> [[TMP1]], float [[B:%.*]], i32 1 8007f6bbb3cSPhilip Reames; NON-POW2-NEXT: [[TMP3:%.*]] = insertelement <3 x float> [[TMP2]], float [[C:%.*]], i32 2 80138fffa63SPaul Walker; NON-POW2-NEXT: [[TMP4:%.*]] = fmul fast <3 x float> [[TMP3]], splat (float 1.000000e+01) 8027f6bbb3cSPhilip Reames; NON-POW2-NEXT: [[TMP5:%.*]] = call fast float @llvm.vector.reduce.fadd.v3f32(float 0.000000e+00, <3 x float> [[TMP4]]) 8037f6bbb3cSPhilip Reames; NON-POW2-NEXT: ret float [[TMP5]] 8047f6bbb3cSPhilip Reames; 8057f6bbb3cSPhilip Reames; POW2-ONLY-LABEL: @reduce_fadd_after_fmul_of_buildvec( 8067f6bbb3cSPhilip Reames; POW2-ONLY-NEXT: [[MUL_0:%.*]] = fmul fast float [[A:%.*]], 1.000000e+01 8077f6bbb3cSPhilip Reames; POW2-ONLY-NEXT: [[MUL_1:%.*]] = fmul fast float [[B:%.*]], 1.000000e+01 8087f6bbb3cSPhilip Reames; POW2-ONLY-NEXT: [[MUL_2:%.*]] = fmul fast float [[C:%.*]], 1.000000e+01 8097f6bbb3cSPhilip Reames; POW2-ONLY-NEXT: [[ADD_0:%.*]] = fadd fast float [[MUL_0]], [[MUL_1]] 8107f6bbb3cSPhilip Reames; POW2-ONLY-NEXT: [[ADD_1:%.*]] = fadd fast float [[ADD_0]], [[MUL_2]] 8117f6bbb3cSPhilip Reames; POW2-ONLY-NEXT: ret float [[ADD_1]] 812fa8b737aSPhilip Reames; 813fa8b737aSPhilip Reames %mul.0 = fmul fast float %a, 10.0 814fa8b737aSPhilip Reames %mul.1 = fmul fast float %b, 10.0 815fa8b737aSPhilip Reames %mul.2 = fmul fast float %c, 10.0 816fa8b737aSPhilip Reames 817fa8b737aSPhilip Reames %add.0 = fadd fast float %mul.0, %mul.1 818fa8b737aSPhilip Reames %add.1 = fadd fast float %add.0, %mul.2 819fa8b737aSPhilip Reames ret float %add.1 820fa8b737aSPhilip Reames} 821fa8b737aSPhilip Reames 822fa8b737aSPhilip Reames 8234dda564cSPhilip Reamesdeclare float @llvm.fmuladd.f32(float, float, float) 8244dda564cSPhilip Reames 8254dda564cSPhilip Reamesdeclare double @llvm.fmuladd.f64(double, double, double) 826