1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 2; RUN: %if x86-registered-target %{ opt -S -passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s --check-prefix X86 %} 3; RUN: %if aarch64-registered-target %{ opt -S -passes=slp-vectorizer -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s --check-prefix AARCH64 %} 4 5define i1 @test(float %0, double %1) { 6; X86-LABEL: define i1 @test 7; X86-SAME: (float [[TMP0:%.*]], double [[TMP1:%.*]]) { 8; X86-NEXT: [[TMP3:%.*]] = insertelement <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float poison>, float [[TMP0]], i32 3 9; X86-NEXT: [[TMP4:%.*]] = fpext <4 x float> [[TMP3]] to <4 x double> 10; X86-NEXT: [[TMP5:%.*]] = insertelement <6 x double> <double poison, double poison, double poison, double poison, double poison, double 0.000000e+00>, double [[TMP1]], i32 4 11; X86-NEXT: [[TMP6:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison> 12; X86-NEXT: [[TMP7:%.*]] = shufflevector <6 x double> [[TMP5]], <6 x double> [[TMP6]], <6 x i32> <i32 6, i32 7, i32 8, i32 9, i32 4, i32 5> 13; X86-NEXT: [[TMP8:%.*]] = fmul <6 x double> zeroinitializer, [[TMP7]] 14; X86-NEXT: [[TMP9:%.*]] = shufflevector <6 x double> [[TMP7]], <6 x double> [[TMP8]], <4 x i32> <i32 poison, i32 4, i32 11, i32 11> 15; X86-NEXT: [[TMP10:%.*]] = shufflevector <4 x double> [[TMP9]], <4 x double> <double 0.000000e+00, double poison, double poison, double poison>, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 16; X86-NEXT: [[TMP11:%.*]] = shufflevector <6 x double> [[TMP7]], <6 x double> poison, <4 x i32> <i32 2, i32 0, i32 1, i32 poison> 17; X86-NEXT: [[TMP12:%.*]] = shufflevector <4 x double> [[TMP11]], <4 x double> <double poison, double poison, double poison, double 0.000000e+00>, <4 x i32> <i32 0, i32 1, i32 2, i32 7> 18; X86-NEXT: [[TMP13:%.*]] = fmul <4 x double> [[TMP10]], [[TMP12]] 19; X86-NEXT: [[TMP14:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v4f64(<8 x double> <double poison, double poison, double poison, double poison, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00>, <4 x double> [[TMP13]], i64 0) 20; X86-NEXT: [[TMP15:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v6f64(<8 x double> <double poison, double poison, double poison, double poison, double poison, double poison, double 0.000000e+00, double 0.000000e+00>, <6 x double> [[TMP8]], i64 0) 21; X86-NEXT: [[TMP16:%.*]] = fsub <8 x double> [[TMP14]], [[TMP15]] 22; X86-NEXT: [[TMP17:%.*]] = fmul <8 x double> [[TMP14]], [[TMP15]] 23; X86-NEXT: [[TMP18:%.*]] = shufflevector <8 x double> [[TMP16]], <8 x double> [[TMP17]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 13, i32 14, i32 15> 24; X86-NEXT: [[TMP19:%.*]] = fptrunc <8 x double> [[TMP18]] to <8 x float> 25; X86-NEXT: [[TMP20:%.*]] = fmul <8 x float> [[TMP19]], zeroinitializer 26; X86-NEXT: [[TMP21:%.*]] = fcmp oeq <8 x float> [[TMP20]], zeroinitializer 27; X86-NEXT: [[TMP22:%.*]] = freeze <8 x i1> [[TMP21]] 28; X86-NEXT: [[TMP23:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP22]]) 29; X86-NEXT: ret i1 [[TMP23]] 30; 31; AARCH64-LABEL: define i1 @test 32; AARCH64-SAME: (float [[TMP0:%.*]], double [[TMP1:%.*]]) { 33; AARCH64-NEXT: [[TMP3:%.*]] = insertelement <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float poison>, float [[TMP0]], i32 3 34; AARCH64-NEXT: [[TMP4:%.*]] = fpext <4 x float> [[TMP3]] to <4 x double> 35; AARCH64-NEXT: [[TMP5:%.*]] = insertelement <6 x double> <double poison, double poison, double poison, double poison, double poison, double 0.000000e+00>, double [[TMP1]], i32 4 36; AARCH64-NEXT: [[TMP6:%.*]] = shufflevector <4 x double> [[TMP4]], <4 x double> poison, <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 poison, i32 poison> 37; AARCH64-NEXT: [[TMP7:%.*]] = shufflevector <6 x double> [[TMP5]], <6 x double> [[TMP6]], <6 x i32> <i32 6, i32 7, i32 8, i32 9, i32 4, i32 5> 38; AARCH64-NEXT: [[TMP8:%.*]] = fmul <6 x double> zeroinitializer, [[TMP7]] 39; AARCH64-NEXT: [[TMP9:%.*]] = shufflevector <6 x double> [[TMP7]], <6 x double> [[TMP8]], <4 x i32> <i32 poison, i32 4, i32 11, i32 11> 40; AARCH64-NEXT: [[TMP10:%.*]] = shufflevector <4 x double> [[TMP9]], <4 x double> <double 0.000000e+00, double poison, double poison, double poison>, <4 x i32> <i32 4, i32 1, i32 2, i32 3> 41; AARCH64-NEXT: [[TMP11:%.*]] = shufflevector <6 x double> [[TMP7]], <6 x double> poison, <4 x i32> <i32 2, i32 0, i32 poison, i32 poison> 42; AARCH64-NEXT: [[TMP12:%.*]] = shufflevector <4 x double> [[TMP11]], <4 x double> <double poison, double poison, double poison, double 0.000000e+00>, <4 x i32> <i32 0, i32 1, i32 poison, i32 7> 43; AARCH64-NEXT: [[TMP13:%.*]] = shufflevector <4 x double> [[TMP12]], <4 x double> [[TMP4]], <4 x i32> <i32 0, i32 1, i32 5, i32 3> 44; AARCH64-NEXT: [[TMP14:%.*]] = fmul <4 x double> [[TMP10]], [[TMP13]] 45; AARCH64-NEXT: [[TMP15:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v4f64(<8 x double> <double poison, double poison, double poison, double poison, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00, double 0.000000e+00>, <4 x double> [[TMP14]], i64 0) 46; AARCH64-NEXT: [[TMP16:%.*]] = call <8 x double> @llvm.vector.insert.v8f64.v6f64(<8 x double> <double poison, double poison, double poison, double poison, double poison, double poison, double 0.000000e+00, double 0.000000e+00>, <6 x double> [[TMP8]], i64 0) 47; AARCH64-NEXT: [[TMP17:%.*]] = fsub <8 x double> [[TMP15]], [[TMP16]] 48; AARCH64-NEXT: [[TMP18:%.*]] = fmul <8 x double> [[TMP15]], [[TMP16]] 49; AARCH64-NEXT: [[TMP19:%.*]] = shufflevector <8 x double> [[TMP17]], <8 x double> [[TMP18]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 13, i32 14, i32 15> 50; AARCH64-NEXT: [[TMP20:%.*]] = fptrunc <8 x double> [[TMP19]] to <8 x float> 51; AARCH64-NEXT: [[TMP21:%.*]] = fmul <8 x float> [[TMP20]], zeroinitializer 52; AARCH64-NEXT: [[TMP22:%.*]] = fcmp oeq <8 x float> [[TMP21]], zeroinitializer 53; AARCH64-NEXT: [[TMP23:%.*]] = freeze <8 x i1> [[TMP22]] 54; AARCH64-NEXT: [[TMP24:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP23]]) 55; AARCH64-NEXT: ret i1 [[TMP24]] 56; 57 %3 = fpext float %0 to double 58 %4 = fpext float 0.000000e+00 to double 59 %5 = fpext float 0.000000e+00 to double 60 %6 = fpext float 0.000000e+00 to double 61 %7 = fmul double 0.000000e+00, 0.000000e+00 62 %8 = fmul double 0.000000e+00, %1 63 %9 = fmul double 0.000000e+00, 0.000000e+00 64 %10 = fmul double 0.000000e+00, %5 65 %11 = fmul double 0.000000e+00, %6 66 %12 = fsub double %10, %11 67 %13 = fptrunc double %12 to float 68 %14 = fmul double %9, 0.000000e+00 69 %15 = fmul double 0.000000e+00, %3 70 %16 = fsub double %14, %15 71 %17 = fptrunc double %16 to float 72 %18 = fptrunc double %7 to float 73 %19 = fmul double %1, %6 74 %20 = fmul double 0.000000e+00, %4 75 %21 = fsub double %19, %20 76 %22 = fptrunc double %21 to float 77 %23 = fsub double 0.000000e+00, %8 78 %24 = fptrunc double %23 to float 79 %25 = fmul double 0.000000e+00, 0.000000e+00 80 %26 = fptrunc double %25 to float 81 %27 = fmul double %9, %4 82 %28 = fmul double 0.000000e+00, %5 83 %29 = fsub double %27, %28 84 %30 = fptrunc double %29 to float 85 %31 = fmul double %9, 0.000000e+00 86 %32 = fptrunc double %31 to float 87 %33 = fmul float %13, 0.000000e+00 88 %34 = fcmp oeq float %33, 0.000000e+00 89 %35 = fmul float %22, 0.000000e+00 90 %36 = fcmp oeq float %35, 0.000000e+00 91 %37 = select i1 %34, i1 %36, i1 false 92 %38 = fmul float %30, 0.000000e+00 93 %39 = fcmp oeq float %38, 0.000000e+00 94 %40 = select i1 %37, i1 %39, i1 false 95 %41 = fmul float %17, 0.000000e+00 96 %42 = fcmp oeq float %41, 0.000000e+00 97 %43 = select i1 %40, i1 %42, i1 false 98 %44 = fmul float %24, 0.000000e+00 99 %45 = fcmp oeq float %44, 0.000000e+00 100 %46 = select i1 %43, i1 %45, i1 false 101 %47 = fmul float %32, 0.000000e+00 102 %48 = fcmp oeq float %47, 0.000000e+00 103 %49 = select i1 %46, i1 %48, i1 false 104 %50 = fmul float %18, 0.000000e+00 105 %51 = fcmp oeq float %50, 0.000000e+00 106 %52 = select i1 %49, i1 %51, i1 false 107 %53 = fmul float %26, 0.000000e+00 108 %54 = fcmp oeq float %53, 0.000000e+00 109 %55 = select i1 %52, i1 %54, i1 false 110 ret i1 %55 111} 112