1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -passes=slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux -slp-threshold=-127 | FileCheck %s 3; RUN: opt -passes=slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux -slp-threshold=-115 -mattr=+avx2 | FileCheck %s --check-prefix=AVX2 4 5define void @test(i64 %p0, i64 %p1, i64 %p2, i64 %p3) { 6; CHECK-LABEL: @test( 7; CHECK-NEXT: entry: 8; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x i64> poison, i64 [[P0:%.*]], i32 0 9; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> [[TMP0]], i64 [[P1:%.*]], i32 1 10; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i64> [[TMP1]], i64 [[P2:%.*]], i32 2 11; CHECK-NEXT: [[TMP3:%.*]] = insertelement <4 x i64> [[TMP2]], i64 [[P3:%.*]], i32 3 12; CHECK-NEXT: [[TMP4:%.*]] = add <4 x i64> [[TMP3]], [[TMP3]] 13; CHECK-NEXT: [[TMP5:%.*]] = mul <4 x i64> [[TMP3]], [[TMP3]] 14; CHECK-NEXT: [[TMP6:%.*]] = sdiv <4 x i64> [[TMP3]], [[TMP3]] 15; CHECK-NEXT: [[TMP7:%.*]] = sub <4 x i64> [[TMP5]], [[TMP6]] 16; CHECK-NEXT: [[TMP8:%.*]] = shl <4 x i64> [[TMP4]], [[TMP7]] 17; CHECK-NEXT: [[TMP9:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 poison, i32 poison> 18; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i64> [[TMP6]], <4 x i64> [[TMP5]], <4 x i32> <i32 poison, i32 poison, i32 0, i32 4> 19; CHECK-NEXT: [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> [[TMP10]], <4 x i32> <i32 0, i32 1, i32 6, i32 7> 20; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP5]], <4 x i32> <i32 1, i32 5, i32 poison, i32 poison> 21; CHECK-NEXT: [[TMP13:%.*]] = shufflevector <4 x i64> [[TMP6]], <4 x i64> [[TMP5]], <4 x i32> <i32 poison, i32 poison, i32 1, i32 5> 22; CHECK-NEXT: [[TMP14:%.*]] = shufflevector <4 x i64> [[TMP12]], <4 x i64> [[TMP13]], <4 x i32> <i32 0, i32 1, i32 6, i32 7> 23; CHECK-NEXT: [[TMP15:%.*]] = or <4 x i64> [[TMP11]], [[TMP14]] 24; CHECK-NEXT: [[TMP16:%.*]] = trunc <4 x i64> [[TMP15]] to <4 x i32> 25; CHECK-NEXT: br label [[BB:%.*]] 26; CHECK: bb: 27; CHECK-NEXT: [[TMP17:%.*]] = phi <4 x i32> [ [[TMP18:%.*]], [[BB]] ], [ [[TMP16]], [[ENTRY:%.*]] ] 28; CHECK-NEXT: [[TMP18]] = trunc <4 x i64> [[TMP8]] to <4 x i32> 29; CHECK-NEXT: br label [[BB]] 30; 31; AVX2-LABEL: @test( 32; AVX2-NEXT: entry: 33; AVX2-NEXT: [[TMP0:%.*]] = insertelement <4 x i64> poison, i64 [[P0:%.*]], i32 0 34; AVX2-NEXT: [[TMP1:%.*]] = insertelement <4 x i64> [[TMP0]], i64 [[P1:%.*]], i32 1 35; AVX2-NEXT: [[TMP2:%.*]] = insertelement <4 x i64> [[TMP1]], i64 [[P2:%.*]], i32 2 36; AVX2-NEXT: [[TMP3:%.*]] = insertelement <4 x i64> [[TMP2]], i64 [[P3:%.*]], i32 3 37; AVX2-NEXT: [[TMP4:%.*]] = add <4 x i64> [[TMP3]], [[TMP3]] 38; AVX2-NEXT: [[TMP5:%.*]] = mul <4 x i64> [[TMP3]], [[TMP3]] 39; AVX2-NEXT: [[TMP6:%.*]] = sdiv <4 x i64> [[TMP3]], [[TMP3]] 40; AVX2-NEXT: [[TMP7:%.*]] = sub <4 x i64> [[TMP5]], [[TMP6]] 41; AVX2-NEXT: [[TMP8:%.*]] = shl <4 x i64> [[TMP4]], [[TMP7]] 42; AVX2-NEXT: [[TMP9:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 poison, i32 4> 43; AVX2-NEXT: [[TMP10:%.*]] = shufflevector <4 x i64> [[TMP9]], <4 x i64> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 4, i32 3> 44; AVX2-NEXT: [[TMP11:%.*]] = shufflevector <4 x i64> [[TMP4]], <4 x i64> [[TMP5]], <4 x i32> <i32 1, i32 5, i32 poison, i32 5> 45; AVX2-NEXT: [[TMP12:%.*]] = shufflevector <4 x i64> [[TMP11]], <4 x i64> [[TMP6]], <4 x i32> <i32 0, i32 1, i32 5, i32 3> 46; AVX2-NEXT: [[TMP13:%.*]] = or <4 x i64> [[TMP10]], [[TMP12]] 47; AVX2-NEXT: [[TMP14:%.*]] = trunc <4 x i64> [[TMP13]] to <4 x i32> 48; AVX2-NEXT: br label [[BB:%.*]] 49; AVX2: bb: 50; AVX2-NEXT: [[TMP15:%.*]] = phi <4 x i32> [ [[TMP16:%.*]], [[BB]] ], [ [[TMP14]], [[ENTRY:%.*]] ] 51; AVX2-NEXT: [[TMP16]] = trunc <4 x i64> [[TMP8]] to <4 x i32> 52; AVX2-NEXT: br label [[BB]] 53; 54entry: 55 %a0 = add i64 %p0, %p0 56 %a1 = add i64 %p1, %p1 57 %a2 = add i64 %p2, %p2 58 %a3 = add i64 %p3, %p3 59 %m0 = mul i64 %p0, %p0 60 %m1 = mul i64 %p1, %p1 61 %m2 = mul i64 %p2, %p2 62 %m3 = mul i64 %p3, %p3 63 %d0 = sdiv i64 %p0, %p0 64 %d1 = sdiv i64 %p1, %p1 65 %d2 = sdiv i64 %p2, %p2 66 %d3 = sdiv i64 %p3, %p3 67 %s0 = sub i64 %m0, %d0 68 %s1 = sub i64 %m1, %d1 69 %s2 = sub i64 %m2, %d2 70 %s3 = sub i64 %m3, %d3 71 %shl1 = shl i64 %a0, %s0 72 %shl2 = shl i64 %a1, %s1 73 %shl3 = shl i64 %a2, %s2 74 %shl4 = shl i64 %a3, %s3 75 %o0 = or i64 %a0, %a1 76 %tt0 = trunc i64 %o0 to i32 77 %o1 = or i64 %m0, %m1 78 %tt1 = trunc i64 %o1 to i32 79 %o2 = or i64 %d0, %d1 80 %tt2 = trunc i64 %o2 to i32 81 %o3 = or i64 %m0, %m1 82 %tt3 = trunc i64 %o3 to i32 83 br label %bb 84 85bb: 86 %phi0 = phi i32 [ %t1, %bb ], [ %tt0, %entry ] 87 %phi1 = phi i32 [ %t2, %bb ], [ %tt1, %entry ] 88 %phi2 = phi i32 [ %t3, %bb ], [ %tt2, %entry ] 89 %phi3 = phi i32 [ %t4, %bb ], [ %tt3, %entry ] 90 %t1 = trunc i64 %shl1 to i32 91 %t2 = trunc i64 %shl2 to i32 92 %t3 = trunc i64 %shl3 to i32 93 %t4 = trunc i64 %shl4 to i32 94 br label %bb 95} 96