1a2016dc8SSjoerd Meijer; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 23be72f40SBjorn Pettersson; RUN: opt < %s -passes=slp-vectorizer -S | FileCheck %s 3a2016dc8SSjoerd Meijer 4a2016dc8SSjoerd Meijertarget datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" 5a2016dc8SSjoerd Meijertarget triple = "aarch64--linux-gnu" 6a2016dc8SSjoerd Meijer 7a2016dc8SSjoerd Meijer; These examples correspond to input code like: 8a2016dc8SSjoerd Meijer; 9a2016dc8SSjoerd Meijer; void t(long * __restrict a, long * __restrict b) { 10*580210a0SNikita Popov; aptr= b[0]; 11*580210a0SNikita Popov; aptr= b[1]; 12a2016dc8SSjoerd Meijer; } 13a2016dc8SSjoerd Meijer; 14a2016dc8SSjoerd Meijer; If we SLP vectorise this then we end up with something like this because we 15a2016dc8SSjoerd Meijer; don't have a mul.2d: 16a2016dc8SSjoerd Meijer; 17a2016dc8SSjoerd Meijer; ldr q0, [x1] 18a2016dc8SSjoerd Meijer; ldr q1, [x0] 19a2016dc8SSjoerd Meijer; fmov x8, d0 20a2016dc8SSjoerd Meijer; mov x10, v0.d[1] 21a2016dc8SSjoerd Meijer; fmov x9, d1 22a2016dc8SSjoerd Meijer; mov x11, v1.d[1] 23a2016dc8SSjoerd Meijer; mul x8, x9, x8 24a2016dc8SSjoerd Meijer; mul x9, x11, x10 25a2016dc8SSjoerd Meijer; fmov d0, x8 26a2016dc8SSjoerd Meijer; mov v0.d[1], x9 27a2016dc8SSjoerd Meijer; str q0, [x0] 28a2016dc8SSjoerd Meijer; ret 29a2016dc8SSjoerd Meijer; 305110ff08SSjoerd Meijer; If we don't SLP vectorise but scalarize this we get this instead: 31a2016dc8SSjoerd Meijer; 32a2016dc8SSjoerd Meijer; ldp x8, x9, [x1] 33a2016dc8SSjoerd Meijer; ldp x10, x11, [x0] 34a2016dc8SSjoerd Meijer; mul x9, x11, x9 35a2016dc8SSjoerd Meijer; mul x8, x10, x8 36a2016dc8SSjoerd Meijer; stp x8, x9, [x0] 37a2016dc8SSjoerd Meijer; ret 38a2016dc8SSjoerd Meijer; 39*580210a0SNikita Popovdefine void @mul(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) { 40a2016dc8SSjoerd Meijer; CHECK-LABEL: @mul( 41a2016dc8SSjoerd Meijer; CHECK-NEXT: entry: 42*580210a0SNikita Popov; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[B:%.*]], align 8 43*580210a0SNikita Popov; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A:%.*]], align 8 445110ff08SSjoerd Meijer; CHECK-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP1]], [[TMP0]] 45*580210a0SNikita Popov; CHECK-NEXT: store i64 [[MUL]], ptr [[A]], align 8 46*580210a0SNikita Popov; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 1 47*580210a0SNikita Popov; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[ARRAYIDX2]], align 8 48*580210a0SNikita Popov; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 1 49*580210a0SNikita Popov; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[ARRAYIDX3]], align 8 505110ff08SSjoerd Meijer; CHECK-NEXT: [[MUL4:%.*]] = mul nsw i64 [[TMP3]], [[TMP2]] 51*580210a0SNikita Popov; CHECK-NEXT: store i64 [[MUL4]], ptr [[ARRAYIDX3]], align 8 52a2016dc8SSjoerd Meijer; CHECK-NEXT: ret void 53a2016dc8SSjoerd Meijer; 54a2016dc8SSjoerd Meijerentry: 55*580210a0SNikita Popov %0 = load i64, ptr %b, align 8 56*580210a0SNikita Popov %1 = load i64, ptr %a, align 8 57a2016dc8SSjoerd Meijer %mul = mul nsw i64 %1, %0 58*580210a0SNikita Popov store i64 %mul, ptr %a, align 8 59*580210a0SNikita Popov %arrayidx2 = getelementptr inbounds i64, ptr %b, i64 1 60*580210a0SNikita Popov %2 = load i64, ptr %arrayidx2, align 8 61*580210a0SNikita Popov %arrayidx3 = getelementptr inbounds i64, ptr %a, i64 1 62*580210a0SNikita Popov %3 = load i64, ptr %arrayidx3, align 8 63a2016dc8SSjoerd Meijer %mul4 = mul nsw i64 %3, %2 64*580210a0SNikita Popov store i64 %mul4, ptr %arrayidx3, align 8 65a2016dc8SSjoerd Meijer ret void 66a2016dc8SSjoerd Meijer} 67a2016dc8SSjoerd Meijer 68a2016dc8SSjoerd Meijer; Similar example, but now a multiply-accumulate: 69a2016dc8SSjoerd Meijer; 70a2016dc8SSjoerd Meijer; void x (long * __restrict a, long * __restrict b) { 71*580210a0SNikita Popov; aptr= b[0]; 72*580210a0SNikita Popov; aptr= b[1]; 73a2016dc8SSjoerd Meijer; a[0] += b[0]; 74a2016dc8SSjoerd Meijer; a[1] += b[1]; 75a2016dc8SSjoerd Meijer; } 76a2016dc8SSjoerd Meijer; 77*580210a0SNikita Popovdefine void @mac(ptr noalias nocapture %a, ptr noalias nocapture readonly %b) { 78a2016dc8SSjoerd Meijer; CHECK-LABEL: @mac( 79a2016dc8SSjoerd Meijer; CHECK-NEXT: entry: 80*580210a0SNikita Popov; CHECK-NEXT: [[TMP0:%.*]] = load i64, ptr [[B:%.*]], align 8 81*580210a0SNikita Popov; CHECK-NEXT: [[TMP1:%.*]] = load i64, ptr [[A:%.*]], align 8 825110ff08SSjoerd Meijer; CHECK-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP1]], [[TMP0]] 83*580210a0SNikita Popov; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 1 84*580210a0SNikita Popov; CHECK-NEXT: [[TMP2:%.*]] = load i64, ptr [[ARRAYIDX2]], align 8 85*580210a0SNikita Popov; CHECK-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 1 86*580210a0SNikita Popov; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr [[ARRAYIDX3]], align 8 875110ff08SSjoerd Meijer; CHECK-NEXT: [[MUL4:%.*]] = mul nsw i64 [[TMP3]], [[TMP2]] 885110ff08SSjoerd Meijer; CHECK-NEXT: [[ADD:%.*]] = add nsw i64 [[MUL]], [[TMP0]] 89*580210a0SNikita Popov; CHECK-NEXT: store i64 [[ADD]], ptr [[A]], align 8 905110ff08SSjoerd Meijer; CHECK-NEXT: [[ADD9:%.*]] = add nsw i64 [[MUL4]], [[TMP2]] 91*580210a0SNikita Popov; CHECK-NEXT: store i64 [[ADD9]], ptr [[ARRAYIDX3]], align 8 92a2016dc8SSjoerd Meijer; CHECK-NEXT: ret void 93a2016dc8SSjoerd Meijer; 94a2016dc8SSjoerd Meijerentry: 95*580210a0SNikita Popov %0 = load i64, ptr %b, align 8 96*580210a0SNikita Popov %1 = load i64, ptr %a, align 8 97a2016dc8SSjoerd Meijer %mul = mul nsw i64 %1, %0 98*580210a0SNikita Popov %arrayidx2 = getelementptr inbounds i64, ptr %b, i64 1 99*580210a0SNikita Popov %2 = load i64, ptr %arrayidx2, align 8 100*580210a0SNikita Popov %arrayidx3 = getelementptr inbounds i64, ptr %a, i64 1 101*580210a0SNikita Popov %3 = load i64, ptr %arrayidx3, align 8 102a2016dc8SSjoerd Meijer %mul4 = mul nsw i64 %3, %2 103a2016dc8SSjoerd Meijer %add = add nsw i64 %mul, %0 104*580210a0SNikita Popov store i64 %add, ptr %a, align 8 105a2016dc8SSjoerd Meijer %add9 = add nsw i64 %mul4, %2 106*580210a0SNikita Popov store i64 %add9, ptr %arrayidx3, align 8 107a2016dc8SSjoerd Meijer ret void 108a2016dc8SSjoerd Meijer} 109