1; RUN: opt < %s -passes=loop-vectorize,dce,instcombine -force-vector-interleave=1 -S | FileCheck %s 2 3target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128" 4target triple = "aarch64--linux-gnu" 5 6; CHECK-LABEL: @reduction_i8 7; 8; char reduction_i8(char *a, char *b, int n) { 9; char sum = 0; 10; for (int i = 0; i < n; ++i) 11; sum += (a[i] + b[i]); 12; return sum; 13; } 14; 15; CHECK: vector.body: 16; CHECK: phi <16 x i8> 17; CHECK: load <16 x i8> 18; CHECK: load <16 x i8> 19; CHECK: add <16 x i8> 20; CHECK: add <16 x i8> 21; 22; CHECK: middle.block: 23; CHECK: [[Rdx:%[a-zA-Z0-9.]+]] = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> 24; CHECK: zext i8 [[Rdx]] to i32 25; 26define i8 @reduction_i8(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %n) { 27entry: 28 %cmp.12 = icmp sgt i32 %n, 0 29 br i1 %cmp.12, label %for.body.preheader, label %for.cond.cleanup 30 31for.body.preheader: 32 br label %for.body 33 34for.cond.for.cond.cleanup_crit_edge: 35 %add5.lcssa = phi i32 [ %add5, %for.body ] 36 %conv6 = trunc i32 %add5.lcssa to i8 37 br label %for.cond.cleanup 38 39for.cond.cleanup: 40 %sum.0.lcssa = phi i8 [ %conv6, %for.cond.for.cond.cleanup_crit_edge ], [ 0, %entry ] 41 ret i8 %sum.0.lcssa 42 43for.body: 44 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] 45 %sum.013 = phi i32 [ %add5, %for.body ], [ 0, %for.body.preheader ] 46 %arrayidx = getelementptr inbounds i8, ptr %a, i64 %indvars.iv 47 %0 = load i8, ptr %arrayidx, align 1 48 %conv = zext i8 %0 to i32 49 %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %indvars.iv 50 %1 = load i8, ptr %arrayidx2, align 1 51 %conv3 = zext i8 %1 to i32 52 %conv4 = and i32 %sum.013, 255 53 %add = add nuw nsw i32 %conv, %conv4 54 %add5 = add nuw nsw i32 %add, %conv3 55 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 56 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 57 %exitcond = icmp eq i32 %lftr.wideiv, %n 58 br i1 %exitcond, label %for.cond.for.cond.cleanup_crit_edge, label %for.body 59} 60 61; CHECK-LABEL: @reduction_i16_1 62; 63; short reduction_i16_1(short *a, short *b, int n) { 64; short sum = 0; 65; for (int i = 0; i < n; ++i) 66; sum += (a[i] + b[i]); 67; return sum; 68; } 69; 70; CHECK: vector.body: 71; CHECK: phi <8 x i16> 72; CHECK: load <8 x i16> 73; CHECK: load <8 x i16> 74; CHECK: add <8 x i16> 75; CHECK: add <8 x i16> 76; 77; CHECK: middle.block: 78; CHECK: [[Rdx:%[a-zA-Z0-9.]+]] = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> 79; CHECK: zext i16 [[Rdx]] to i32 80; 81define i16 @reduction_i16_1(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %n) { 82entry: 83 %cmp.16 = icmp sgt i32 %n, 0 84 br i1 %cmp.16, label %for.body.preheader, label %for.cond.cleanup 85 86for.body.preheader: 87 br label %for.body 88 89for.cond.for.cond.cleanup_crit_edge: 90 %add5.lcssa = phi i32 [ %add5, %for.body ] 91 %conv6 = trunc i32 %add5.lcssa to i16 92 br label %for.cond.cleanup 93 94for.cond.cleanup: 95 %sum.0.lcssa = phi i16 [ %conv6, %for.cond.for.cond.cleanup_crit_edge ], [ 0, %entry ] 96 ret i16 %sum.0.lcssa 97 98for.body: 99 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] 100 %sum.017 = phi i32 [ %add5, %for.body ], [ 0, %for.body.preheader ] 101 %arrayidx = getelementptr inbounds i16, ptr %a, i64 %indvars.iv 102 %0 = load i16, ptr %arrayidx, align 2 103 %conv.14 = zext i16 %0 to i32 104 %arrayidx2 = getelementptr inbounds i16, ptr %b, i64 %indvars.iv 105 %1 = load i16, ptr %arrayidx2, align 2 106 %conv3.15 = zext i16 %1 to i32 107 %conv4.13 = and i32 %sum.017, 65535 108 %add = add nuw nsw i32 %conv.14, %conv4.13 109 %add5 = add nuw nsw i32 %add, %conv3.15 110 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 111 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 112 %exitcond = icmp eq i32 %lftr.wideiv, %n 113 br i1 %exitcond, label %for.cond.for.cond.cleanup_crit_edge, label %for.body 114} 115 116; CHECK-LABEL: @reduction_i16_2 117; 118; short reduction_i16_2(char *a, char *b, int n) { 119; short sum = 0; 120; for (int i = 0; i < n; ++i) 121; sum += (a[i] + b[i]); 122; return sum; 123; } 124; 125; CHECK: vector.body: 126; CHECK: phi <16 x i16> 127; CHECK: [[Ld1:%[a-zA-Z0-9.]+]] = load <16 x i8> 128; CHECK: zext <16 x i8> [[Ld1]] to <16 x i16> 129; CHECK: [[Ld2:%[a-zA-Z0-9.]+]] = load <16 x i8> 130; CHECK: zext <16 x i8> [[Ld2]] to <16 x i16> 131; CHECK: add <16 x i16> 132; CHECK: add <16 x i16> 133; 134; CHECK: middle.block: 135; CHECK: [[Rdx:%[a-zA-Z0-9.]+]] = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> 136; CHECK: zext i16 [[Rdx]] to i32 137; 138define i16 @reduction_i16_2(ptr nocapture readonly %a, ptr nocapture readonly %b, i32 %n) { 139entry: 140 %cmp.14 = icmp sgt i32 %n, 0 141 br i1 %cmp.14, label %for.body.preheader, label %for.cond.cleanup 142 143for.body.preheader: 144 br label %for.body 145 146for.cond.for.cond.cleanup_crit_edge: 147 %add5.lcssa = phi i32 [ %add5, %for.body ] 148 %conv6 = trunc i32 %add5.lcssa to i16 149 br label %for.cond.cleanup 150 151for.cond.cleanup: 152 %sum.0.lcssa = phi i16 [ %conv6, %for.cond.for.cond.cleanup_crit_edge ], [ 0, %entry ] 153 ret i16 %sum.0.lcssa 154 155for.body: 156 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] 157 %sum.015 = phi i32 [ %add5, %for.body ], [ 0, %for.body.preheader ] 158 %arrayidx = getelementptr inbounds i8, ptr %a, i64 %indvars.iv 159 %0 = load i8, ptr %arrayidx, align 1 160 %conv = zext i8 %0 to i32 161 %arrayidx2 = getelementptr inbounds i8, ptr %b, i64 %indvars.iv 162 %1 = load i8, ptr %arrayidx2, align 1 163 %conv3 = zext i8 %1 to i32 164 %conv4.13 = and i32 %sum.015, 65535 165 %add = add nuw nsw i32 %conv, %conv4.13 166 %add5 = add nuw nsw i32 %add, %conv3 167 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 168 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 169 %exitcond = icmp eq i32 %lftr.wideiv, %n 170 br i1 %exitcond, label %for.cond.for.cond.cleanup_crit_edge, label %for.body 171} 172