1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux -mattr=+avx2 | FileCheck %s 3; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux -mattr=+avx2 -slp-threshold=-1 -slp-vectorize-hor-store | FileCheck %s --check-prefix=THRESH1 4; RUN: opt < %s -passes=slp-vectorizer -S -mtriple=x86_64-unknown-linux -mattr=+avx2 -slp-threshold=-2 -slp-vectorize-hor-store | FileCheck %s --check-prefix=THRESH2 5 6@a = global float 0.000000e+00, align 4 7 8define float @f(<2 x float> %x) { 9; CHECK-LABEL: @f( 10; CHECK-NEXT: [[TMP1:%.*]] = fmul <2 x float> [[X:%.*]], [[X]] 11; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[TMP1]], i32 0 12; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP1]], i32 1 13; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP2]], [[TMP3]] 14; CHECK-NEXT: ret float [[ADD]] 15; 16; THRESH1-LABEL: @f( 17; THRESH1-NEXT: [[TMP1:%.*]] = fmul <2 x float> [[X:%.*]], [[X]] 18; THRESH1-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[TMP1]], i32 0 19; THRESH1-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP1]], i32 1 20; THRESH1-NEXT: [[ADD:%.*]] = fadd float [[TMP2]], [[TMP3]] 21; THRESH1-NEXT: ret float [[ADD]] 22; 23; THRESH2-LABEL: @f( 24; THRESH2-NEXT: [[TMP1:%.*]] = fmul <2 x float> [[X:%.*]], [[X]] 25; THRESH2-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[TMP1]], i32 0 26; THRESH2-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP1]], i32 1 27; THRESH2-NEXT: [[ADD:%.*]] = fadd float [[TMP2]], [[TMP3]] 28; THRESH2-NEXT: ret float [[ADD]] 29; 30 %x0 = extractelement <2 x float> %x, i32 0 31 %x1 = extractelement <2 x float> %x, i32 1 32 %x0x0 = fmul float %x0, %x0 33 %x1x1 = fmul float %x1, %x1 34 %add = fadd float %x0x0, %x1x1 35 ret float %add 36} 37 38define float @f_used_out_of_tree(<2 x float> %x) { 39; CHECK-LABEL: @f_used_out_of_tree( 40; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0 41; CHECK-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[X]], [[X]] 42; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 43; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 44; CHECK-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP4]] 45; CHECK-NEXT: store float [[ADD]], ptr @a, align 4 46; CHECK-NEXT: ret float [[TMP1]] 47; 48; THRESH1-LABEL: @f_used_out_of_tree( 49; THRESH1-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0 50; THRESH1-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[X]], [[X]] 51; THRESH1-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 52; THRESH1-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 53; THRESH1-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP4]] 54; THRESH1-NEXT: store float [[ADD]], ptr @a, align 4 55; THRESH1-NEXT: ret float [[TMP1]] 56; 57; THRESH2-LABEL: @f_used_out_of_tree( 58; THRESH2-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0 59; THRESH2-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[X]], [[X]] 60; THRESH2-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 61; THRESH2-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 62; THRESH2-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP4]] 63; THRESH2-NEXT: store float [[ADD]], ptr @a, align 4 64; THRESH2-NEXT: ret float [[TMP1]] 65; 66 %x0 = extractelement <2 x float> %x, i32 0 67 %x1 = extractelement <2 x float> %x, i32 1 68 %x0x0 = fmul float %x0, %x0 69 %x1x1 = fmul float %x1, %x1 70 %add = fadd float %x0x0, %x1x1 71 store float %add, ptr @a 72 ret float %x0 73} 74 75define float @f_used_twice_in_tree(<2 x float> %x) { 76; CHECK-LABEL: @f_used_twice_in_tree( 77; CHECK-NEXT: [[X0:%.*]] = extractelement <2 x float> [[X:%.*]], i32 0 78; CHECK-NEXT: [[X1:%.*]] = extractelement <2 x float> [[X]], i32 1 79; CHECK-NEXT: [[X0X0:%.*]] = fmul float [[X0]], [[X1]] 80; CHECK-NEXT: [[X1X1:%.*]] = fmul float [[X1]], [[X1]] 81; CHECK-NEXT: [[ADD:%.*]] = fadd float [[X0X0]], [[X1X1]] 82; CHECK-NEXT: ret float [[ADD]] 83; 84; THRESH1-LABEL: @f_used_twice_in_tree( 85; THRESH1-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> <i32 1, i32 1> 86; THRESH1-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], [[X]] 87; THRESH1-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 88; THRESH1-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 89; THRESH1-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP4]] 90; THRESH1-NEXT: ret float [[ADD]] 91; 92; THRESH2-LABEL: @f_used_twice_in_tree( 93; THRESH2-NEXT: [[TMP1:%.*]] = shufflevector <2 x float> [[X:%.*]], <2 x float> poison, <2 x i32> <i32 1, i32 1> 94; THRESH2-NEXT: [[TMP2:%.*]] = fmul <2 x float> [[TMP1]], [[X]] 95; THRESH2-NEXT: [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0 96; THRESH2-NEXT: [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1 97; THRESH2-NEXT: [[ADD:%.*]] = fadd float [[TMP3]], [[TMP4]] 98; THRESH2-NEXT: ret float [[ADD]] 99; 100 %x0 = extractelement <2 x float> %x, i32 0 101 %x1 = extractelement <2 x float> %x, i32 1 102 %x0x0 = fmul float %x0, %x1 103 %x1x1 = fmul float %x1, %x1 104 %add = fadd float %x0x0, %x1x1 105 ret float %add 106} 107 108