1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 2 2; RUN: opt < %s -passes=loop-vectorize -mtriple=x86_64-unknown-linux -S -mcpu=slm | FileCheck %s 3; REQUIRES: asserts 4 5; This test should not be vectorized in X86\SLM arch 6; Vectorizing the 64bit multiply in this case is wrong since 7; it can be done with a lower bit mode (notice that the sources is 16bit) 8; Also addq\subq (quad word) has a high cost on SLM arch. 9; this test has a bad performance (regression of -70%) if vectorized on SLM arch 10 11target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 12target triple = "x86_64-unknown-linux-gnu" 13 14define i32 @no_vec(i32 %LastIndex, ptr nocapture readonly %InputData, i16 signext %lag, i16 signext %Scale) { 15; CHECK-LABEL: define i32 @no_vec 16; CHECK-SAME: (i32 [[LASTINDEX:%.*]], ptr readonly captures(none) [[INPUTDATA:%.*]], i16 signext [[LAG:%.*]], i16 signext [[SCALE:%.*]]) #[[ATTR0:[0-9]+]] { 17; CHECK-NEXT: entry: 18; CHECK-NEXT: [[CMP17:%.*]] = icmp sgt i32 [[LASTINDEX]], 0 19; CHECK-NEXT: br i1 [[CMP17]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_COND_CLEANUP:%.*]] 20; CHECK: for.body.lr.ph: 21; CHECK-NEXT: [[CONV5:%.*]] = sext i16 [[SCALE]] to i64 22; CHECK-NEXT: [[SH_PROM:%.*]] = and i64 [[CONV5]], 4294967295 23; CHECK-NEXT: [[TMP0:%.*]] = sext i16 [[LAG]] to i64 24; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LASTINDEX]] to i64 25; CHECK-NEXT: br label [[FOR_BODY:%.*]] 26; CHECK: for.cond.cleanup.loopexit: 27; CHECK-NEXT: [[ADD7_LCSSA:%.*]] = phi i64 [ [[ADD7:%.*]], [[FOR_BODY]] ] 28; CHECK-NEXT: [[CONV8:%.*]] = trunc i64 [[ADD7_LCSSA]] to i32 29; CHECK-NEXT: br label [[FOR_COND_CLEANUP]] 30; CHECK: for.cond.cleanup: 31; CHECK-NEXT: [[ACCUMULATOR_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[CONV8]], [[FOR_COND_CLEANUP_LOOPEXIT:%.*]] ] 32; CHECK-NEXT: ret i32 [[ACCUMULATOR_0_LCSSA]] 33; CHECK: for.body: 34; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] 35; CHECK-NEXT: [[ACCUMULATOR_018:%.*]] = phi i64 [ 0, [[FOR_BODY_LR_PH]] ], [ [[ADD7]], [[FOR_BODY]] ] 36; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[INPUTDATA]], i64 [[INDVARS_IV]] 37; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 38; CHECK-NEXT: [[CONV:%.*]] = sext i16 [[TMP1]] to i64 39; CHECK-NEXT: [[TMP2:%.*]] = add nsw i64 [[INDVARS_IV]], [[TMP0]] 40; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i16, ptr [[INPUTDATA]], i64 [[TMP2]] 41; CHECK-NEXT: [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX3]], align 2 42; CHECK-NEXT: [[CONV4:%.*]] = sext i16 [[TMP3]] to i64 43; CHECK-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV4]], [[CONV]] 44; CHECK-NEXT: [[SHR:%.*]] = ashr i64 [[MUL]], [[SH_PROM]] 45; CHECK-NEXT: [[ADD7]] = add i64 [[SHR]], [[ACCUMULATOR_018]] 46; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 47; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]] 48; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]] 49; 50entry: 51 %cmp17 = icmp sgt i32 %LastIndex, 0 52 br i1 %cmp17, label %for.body.lr.ph, label %for.cond.cleanup 53 54for.body.lr.ph: ; preds = %entry 55 %conv5 = sext i16 %Scale to i64 56 %sh_prom = and i64 %conv5, 4294967295 57 %0 = sext i16 %lag to i64 58 %wide.trip.count = zext i32 %LastIndex to i64 59 br label %for.body 60 61for.cond.cleanup.loopexit: ; preds = %for.body 62 %conv8 = trunc i64 %add7 to i32 63 br label %for.cond.cleanup 64 65for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry 66 %Accumulator.0.lcssa = phi i32 [ 0, %entry ], [ %conv8, %for.cond.cleanup.loopexit ] 67 ret i32 %Accumulator.0.lcssa 68 69for.body: ; preds = %for.body, %for.body.lr.ph 70 %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] 71 %Accumulator.018 = phi i64 [ 0, %for.body.lr.ph ], [ %add7, %for.body ] 72 %arrayidx = getelementptr inbounds i16, ptr %InputData, i64 %indvars.iv 73 %1 = load i16, ptr %arrayidx, align 2 74 %conv = sext i16 %1 to i64 75 %2 = add nsw i64 %indvars.iv, %0 76 %arrayidx3 = getelementptr inbounds i16, ptr %InputData, i64 %2 77 %3 = load i16, ptr %arrayidx3, align 2 78 %conv4 = sext i16 %3 to i64 79 %mul = mul nsw i64 %conv4, %conv 80 %shr = ashr i64 %mul, %sh_prom 81 %add7 = add i64 %shr, %Accumulator.018 82 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 83 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count 84 br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body 85} 86 87