1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt -S -passes=slp-vectorizer %s -slp-threshold=-10 | FileCheck %s 3target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" 4target triple = "aarch64--linux-gnu" 5 6%structA = type { [2 x float] } 7 8define void @test1(ptr nocapture readonly %J, i32 %xmin, i32 %ymin) { 9; CHECK-LABEL: @test1( 10; CHECK-NEXT: entry: 11; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[XMIN:%.*]], i32 0 12; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[YMIN:%.*]], i32 1 13; CHECK-NEXT: br label [[FOR_BODY3_LR_PH:%.*]] 14; CHECK: for.body3.lr.ph: 15; CHECK-NEXT: [[TMP2:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x float> 16; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, ptr [[J:%.*]], align 4 17; CHECK-NEXT: [[TMP5:%.*]] = fsub fast <2 x float> [[TMP2]], [[TMP4]] 18; CHECK-NEXT: [[TMP6:%.*]] = fmul fast <2 x float> [[TMP5]], [[TMP5]] 19; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x float> [[TMP6]], i32 0 20; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP6]], i32 1 21; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP7]], [[TMP8]] 22; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq float [[ADD]], 0.000000e+00 23; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY3_LR_PH]], label [[FOR_END27:%.*]] 24; CHECK: for.end27: 25; CHECK-NEXT: ret void 26; 27 28entry: 29 br label %for.body3.lr.ph 30 31for.body3.lr.ph: 32 %conv5 = sitofp i32 %ymin to float 33 %conv = sitofp i32 %xmin to float 34 %0 = load float, ptr %J, align 4 35 %sub = fsub fast float %conv, %0 36 %arrayidx9 = getelementptr inbounds %structA, ptr %J, i64 0, i32 0, i64 1 37 %1 = load float, ptr %arrayidx9, align 4 38 %sub10 = fsub fast float %conv5, %1 39 %mul11 = fmul fast float %sub, %sub 40 %mul12 = fmul fast float %sub10, %sub10 41 %add = fadd fast float %mul11, %mul12 42 %cmp = fcmp oeq float %add, 0.000000e+00 43 br i1 %cmp, label %for.body3.lr.ph, label %for.end27 44 45for.end27: 46 ret void 47} 48 49define void @test2(ptr nocapture readonly %J, i32 %xmin, i32 %ymin) { 50; CHECK-LABEL: @test2( 51; CHECK-NEXT: entry: 52; CHECK-NEXT: [[TMP0:%.*]] = insertelement <2 x i32> poison, i32 [[XMIN:%.*]], i32 0 53; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x i32> [[TMP0]], i32 [[YMIN:%.*]], i32 1 54; CHECK-NEXT: br label [[FOR_BODY3_LR_PH:%.*]] 55; CHECK: for.body3.lr.ph: 56; CHECK-NEXT: [[TMP2:%.*]] = sitofp <2 x i32> [[TMP1]] to <2 x float> 57; CHECK-NEXT: [[TMP4:%.*]] = load <2 x float>, ptr [[J:%.*]], align 4 58; CHECK-NEXT: [[TMP5:%.*]] = fsub fast <2 x float> [[TMP2]], [[TMP4]] 59; CHECK-NEXT: [[TMP6:%.*]] = fmul fast <2 x float> [[TMP5]], [[TMP5]] 60; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x float> [[TMP6]], i32 0 61; CHECK-NEXT: [[TMP8:%.*]] = extractelement <2 x float> [[TMP6]], i32 1 62; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[TMP8]], [[TMP7]] 63; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq float [[ADD]], 0.000000e+00 64; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY3_LR_PH]], label [[FOR_END27:%.*]] 65; CHECK: for.end27: 66; CHECK-NEXT: ret void 67; 68 69entry: 70 br label %for.body3.lr.ph 71 72for.body3.lr.ph: 73 %conv5 = sitofp i32 %ymin to float 74 %conv = sitofp i32 %xmin to float 75 %0 = load float, ptr %J, align 4 76 %sub = fsub fast float %conv, %0 77 %arrayidx9 = getelementptr inbounds %structA, ptr %J, i64 0, i32 0, i64 1 78 %1 = load float, ptr %arrayidx9, align 4 79 %sub10 = fsub fast float %conv5, %1 80 %mul11 = fmul fast float %sub, %sub 81 %mul12 = fmul fast float %sub10, %sub10 82 %add = fadd fast float %mul12, %mul11 ;;;<---- Operands commuted!! 83 %cmp = fcmp oeq float %add, 0.000000e+00 84 br i1 %cmp, label %for.body3.lr.ph, label %for.end27 85 86for.end27: 87 ret void 88} 89