1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple riscv64-linux-gnu -mattr=+v,+f -S 2>%t | FileCheck %s -check-prefix=VLENUNK 3; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -riscv-v-vector-bits-min=-1 -mtriple riscv64-linux-gnu -mattr=+v,+f -S 2>%t | FileCheck %s -check-prefix=VLEN128 4 5; Note: +v implies a Zvl128b (i.e. minimal VLEN of 128), but as can be seen, we're currently 6; not using that information unless an explicit vector width is set. (FIXME) 7 8; A collection of fairly basic functional tests when both fixed and scalable vectorization is 9; allowed. The primary goal of this is check for crashes during cost modeling, but it also 10; exercises the default heuristics in a useful way. 11 12target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" 13target triple = "riscv64" 14 15define void @vector_add(ptr noalias nocapture %a, i64 %v, i64 %n) { 16; VLENUNK-LABEL: @vector_add( 17; VLENUNK-NEXT: entry: 18; VLENUNK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 19; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] 20; VLENUNK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 21; VLENUNK: vector.ph: 22; VLENUNK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 23; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] 24; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 25; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i32 0 26; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer 27; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] 28; VLENUNK: vector.body: 29; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 30; VLENUNK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 31; VLENUNK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] 32; VLENUNK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 33; VLENUNK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 1 x i64>, ptr [[TMP4]], align 8 34; VLENUNK-NEXT: [[TMP5:%.*]] = add <vscale x 1 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 35; VLENUNK-NEXT: store <vscale x 1 x i64> [[TMP5]], ptr [[TMP4]], align 8 36; VLENUNK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() 37; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] 38; VLENUNK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 39; VLENUNK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 40; VLENUNK: middle.block: 41; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 42; VLENUNK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 43; VLENUNK: scalar.ph: 44; VLENUNK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 45; VLENUNK-NEXT: br label [[FOR_BODY:%.*]] 46; VLENUNK: for.body: 47; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 48; VLENUNK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 49; VLENUNK-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 50; VLENUNK-NEXT: [[ADD:%.*]] = add i64 [[ELEM]], [[V]] 51; VLENUNK-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX]], align 8 52; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 53; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 54; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] 55; VLENUNK: for.end: 56; VLENUNK-NEXT: ret void 57; 58; VLEN128-LABEL: @vector_add( 59; VLEN128-NEXT: entry: 60; VLEN128-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 61; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] 62; VLEN128-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 63; VLEN128: vector.ph: 64; VLEN128-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 65; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] 66; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 67; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i32 0 68; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer 69; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] 70; VLEN128: vector.body: 71; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 72; VLEN128-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 73; VLEN128-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] 74; VLEN128-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 75; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 1 x i64>, ptr [[TMP4]], align 8 76; VLEN128-NEXT: [[TMP5:%.*]] = add <vscale x 1 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 77; VLEN128-NEXT: store <vscale x 1 x i64> [[TMP5]], ptr [[TMP4]], align 8 78; VLEN128-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() 79; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]] 80; VLEN128-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 81; VLEN128-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 82; VLEN128: middle.block: 83; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 84; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 85; VLEN128: scalar.ph: 86; VLEN128-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 87; VLEN128-NEXT: br label [[FOR_BODY:%.*]] 88; VLEN128: for.body: 89; VLEN128-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 90; VLEN128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 91; VLEN128-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 92; VLEN128-NEXT: [[ADD:%.*]] = add i64 [[ELEM]], [[V]] 93; VLEN128-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX]], align 8 94; VLEN128-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 95; VLEN128-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 96; VLEN128-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] 97; VLEN128: for.end: 98; VLEN128-NEXT: ret void 99; 100entry: 101 br label %for.body 102 103for.body: 104 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 105 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv 106 %elem = load i64, ptr %arrayidx 107 %add = add i64 %elem, %v 108 store i64 %add, ptr %arrayidx 109 %iv.next = add nuw nsw i64 %iv, 1 110 %exitcond.not = icmp eq i64 %iv.next, 1024 111 br i1 %exitcond.not, label %for.end, label %for.body 112 113for.end: 114 ret void 115} 116 117; Same as above, but with op type of i32. We currently have a bug around 118; etype=ELEN profitability in the vectorizer, and having a smaller element 119; width test allows us to highlight different aspects of codegen. 120define void @vector_add_i32(ptr noalias nocapture %a, i32 %v, i64 %n) { 121; VLENUNK-LABEL: @vector_add_i32( 122; VLENUNK-NEXT: entry: 123; VLENUNK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 124; VLENUNK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 125; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] 126; VLENUNK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 127; VLENUNK: vector.ph: 128; VLENUNK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 129; VLENUNK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 130; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] 131; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 132; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[V:%.*]], i32 0 133; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer 134; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[V]], i32 0 135; VLENUNK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT2]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer 136; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] 137; VLENUNK: vector.body: 138; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 139; VLENUNK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 140; VLENUNK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() 141; VLENUNK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 142; VLENUNK-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 0 143; VLENUNK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 1 144; VLENUNK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], [[TMP8]] 145; VLENUNK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP4]] 146; VLENUNK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP9]] 147; VLENUNK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 0 148; VLENUNK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i32>, ptr [[TMP12]], align 4 149; VLENUNK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vscale.i32() 150; VLENUNK-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 2 151; VLENUNK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP14]] 152; VLENUNK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 2 x i32>, ptr [[TMP15]], align 4 153; VLENUNK-NEXT: [[TMP16:%.*]] = add <vscale x 2 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 154; VLENUNK-NEXT: [[TMP17:%.*]] = add <vscale x 2 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT3]] 155; VLENUNK-NEXT: store <vscale x 2 x i32> [[TMP16]], ptr [[TMP12]], align 4 156; VLENUNK-NEXT: [[TMP18:%.*]] = call i32 @llvm.vscale.i32() 157; VLENUNK-NEXT: [[TMP19:%.*]] = mul i32 [[TMP18]], 2 158; VLENUNK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP19]] 159; VLENUNK-NEXT: store <vscale x 2 x i32> [[TMP17]], ptr [[TMP20]], align 4 160; VLENUNK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() 161; VLENUNK-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 4 162; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP22]] 163; VLENUNK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 164; VLENUNK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 165; VLENUNK: middle.block: 166; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 167; VLENUNK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 168; VLENUNK: scalar.ph: 169; VLENUNK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 170; VLENUNK-NEXT: br label [[FOR_BODY:%.*]] 171; VLENUNK: for.body: 172; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 173; VLENUNK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 174; VLENUNK-NEXT: [[ELEM:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 175; VLENUNK-NEXT: [[ADD:%.*]] = add i32 [[ELEM]], [[V]] 176; VLENUNK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4 177; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 178; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 179; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 180; VLENUNK: for.end: 181; VLENUNK-NEXT: ret void 182; 183; VLEN128-LABEL: @vector_add_i32( 184; VLEN128-NEXT: entry: 185; VLEN128-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 186; VLEN128-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 187; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] 188; VLEN128-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 189; VLEN128: vector.ph: 190; VLEN128-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 191; VLEN128-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 192; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] 193; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 194; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[V:%.*]], i32 0 195; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer 196; VLEN128-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[V]], i32 0 197; VLEN128-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT2]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer 198; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] 199; VLEN128: vector.body: 200; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 201; VLEN128-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 202; VLEN128-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() 203; VLEN128-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 204; VLEN128-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 0 205; VLEN128-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 1 206; VLEN128-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], [[TMP8]] 207; VLEN128-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP4]] 208; VLEN128-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP9]] 209; VLEN128-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 0 210; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i32>, ptr [[TMP12]], align 4 211; VLEN128-NEXT: [[TMP13:%.*]] = call i32 @llvm.vscale.i32() 212; VLEN128-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 2 213; VLEN128-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP14]] 214; VLEN128-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 2 x i32>, ptr [[TMP15]], align 4 215; VLEN128-NEXT: [[TMP16:%.*]] = add <vscale x 2 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 216; VLEN128-NEXT: [[TMP17:%.*]] = add <vscale x 2 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT3]] 217; VLEN128-NEXT: store <vscale x 2 x i32> [[TMP16]], ptr [[TMP12]], align 4 218; VLEN128-NEXT: [[TMP18:%.*]] = call i32 @llvm.vscale.i32() 219; VLEN128-NEXT: [[TMP19:%.*]] = mul i32 [[TMP18]], 2 220; VLEN128-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP19]] 221; VLEN128-NEXT: store <vscale x 2 x i32> [[TMP17]], ptr [[TMP20]], align 4 222; VLEN128-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() 223; VLEN128-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 4 224; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP22]] 225; VLEN128-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 226; VLEN128-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 227; VLEN128: middle.block: 228; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 229; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 230; VLEN128: scalar.ph: 231; VLEN128-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 232; VLEN128-NEXT: br label [[FOR_BODY:%.*]] 233; VLEN128: for.body: 234; VLEN128-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 235; VLEN128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 236; VLEN128-NEXT: [[ELEM:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 237; VLEN128-NEXT: [[ADD:%.*]] = add i32 [[ELEM]], [[V]] 238; VLEN128-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4 239; VLEN128-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 240; VLEN128-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 241; VLEN128-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 242; VLEN128: for.end: 243; VLEN128-NEXT: ret void 244; 245entry: 246 br label %for.body 247 248for.body: 249 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 250 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv 251 %elem = load i32, ptr %arrayidx 252 %add = add i32 %elem, %v 253 store i32 %add, ptr %arrayidx 254 %iv.next = add nuw nsw i64 %iv, 1 255 %exitcond.not = icmp eq i64 %iv.next, 1024 256 br i1 %exitcond.not, label %for.end, label %for.body 257 258for.end: 259 ret void 260} 261 262 263; a[b[i]] += v, mostly to exercise scatter/gather costing 264; TODO: Currently fails to vectorize due to a memory conflict 265define void @indexed_add(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) { 266; VLENUNK-LABEL: @indexed_add( 267; VLENUNK-NEXT: entry: 268; VLENUNK-NEXT: br label [[FOR_BODY:%.*]] 269; VLENUNK: for.body: 270; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 271; VLENUNK-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[IV]] 272; VLENUNK-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8 273; VLENUNK-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[AIDX]] 274; VLENUNK-NEXT: [[ELEM:%.*]] = load i64, ptr [[AADDR]], align 8 275; VLENUNK-NEXT: [[ADD:%.*]] = add i64 [[ELEM]], [[V:%.*]] 276; VLENUNK-NEXT: store i64 [[ADD]], ptr [[AADDR]], align 8 277; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 278; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 279; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] 280; VLENUNK: for.end: 281; VLENUNK-NEXT: ret void 282; 283; VLEN128-LABEL: @indexed_add( 284; VLEN128-NEXT: entry: 285; VLEN128-NEXT: br label [[FOR_BODY:%.*]] 286; VLEN128: for.body: 287; VLEN128-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 288; VLEN128-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[IV]] 289; VLEN128-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8 290; VLEN128-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[AIDX]] 291; VLEN128-NEXT: [[ELEM:%.*]] = load i64, ptr [[AADDR]], align 8 292; VLEN128-NEXT: [[ADD:%.*]] = add i64 [[ELEM]], [[V:%.*]] 293; VLEN128-NEXT: store i64 [[ADD]], ptr [[AADDR]], align 8 294; VLEN128-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 295; VLEN128-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 296; VLEN128-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] 297; VLEN128: for.end: 298; VLEN128-NEXT: ret void 299; 300entry: 301 br label %for.body 302 303for.body: 304 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 305 %baddr = getelementptr inbounds i64, ptr %b, i64 %iv 306 %aidx = load i64, ptr %baddr 307 %aaddr = getelementptr inbounds i64, ptr %a, i64 %aidx 308 %elem = load i64, ptr %aaddr 309 %add = add i64 %elem, %v 310 store i64 %add, ptr %aaddr 311 %iv.next = add nuw nsw i64 %iv, 1 312 %exitcond.not = icmp eq i64 %iv.next, 1024 313 br i1 %exitcond.not, label %for.end, label %for.body 314 315for.end: 316 ret void 317} 318 319; a[b[i]] = v, exercise scatter support 320define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) { 321; VLENUNK-LABEL: @indexed_store( 322; VLENUNK-NEXT: entry: 323; VLENUNK-NEXT: br label [[FOR_BODY:%.*]] 324; VLENUNK: for.body: 325; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 326; VLENUNK-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[IV]] 327; VLENUNK-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8 328; VLENUNK-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[AIDX]] 329; VLENUNK-NEXT: store i64 [[V:%.*]], ptr [[AADDR]], align 8 330; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 331; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 332; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] 333; VLENUNK: for.end: 334; VLENUNK-NEXT: ret void 335; 336; VLEN128-LABEL: @indexed_store( 337; VLEN128-NEXT: entry: 338; VLEN128-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 339; VLEN128: vector.ph: 340; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i32 0 341; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer 342; VLEN128-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i32 0 343; VLEN128-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT2]], <2 x i64> poison, <2 x i32> zeroinitializer 344; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] 345; VLEN128: vector.body: 346; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 347; VLEN128-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 348; VLEN128-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 349; VLEN128-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP0]] 350; VLEN128-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] 351; VLEN128-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 352; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8 353; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 354; VLEN128-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8 355; VLEN128-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], <2 x i64> [[WIDE_LOAD]] 356; VLEN128-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], <2 x i64> [[WIDE_LOAD1]] 357; VLEN128-NEXT: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> [[BROADCAST_SPLAT]], <2 x ptr> [[TMP6]], i32 8, <2 x i1> <i1 true, i1 true>) 358; VLEN128-NEXT: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> [[BROADCAST_SPLAT3]], <2 x ptr> [[TMP7]], i32 8, <2 x i1> <i1 true, i1 true>) 359; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 360; VLEN128-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 361; VLEN128-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 362; VLEN128: middle.block: 363; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 364; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 365; VLEN128: scalar.ph: 366; VLEN128-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 367; VLEN128-NEXT: br label [[FOR_BODY:%.*]] 368; VLEN128: for.body: 369; VLEN128-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 370; VLEN128-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] 371; VLEN128-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8 372; VLEN128-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[AIDX]] 373; VLEN128-NEXT: store i64 [[V]], ptr [[AADDR]], align 8 374; VLEN128-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 375; VLEN128-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 376; VLEN128-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 377; VLEN128: for.end: 378; VLEN128-NEXT: ret void 379; 380entry: 381 br label %for.body 382 383for.body: 384 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 385 %baddr = getelementptr inbounds i64, ptr %b, i64 %iv 386 %aidx = load i64, ptr %baddr 387 %aaddr = getelementptr inbounds i64, ptr %a, i64 %aidx 388 store i64 %v, ptr %aaddr 389 %iv.next = add nuw nsw i64 %iv, 1 390 %exitcond.not = icmp eq i64 %iv.next, 1024 391 br i1 %exitcond.not, label %for.end, label %for.body 392 393for.end: 394 ret void 395} 396 397define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) { 398; VLENUNK-LABEL: @indexed_load( 399; VLENUNK-NEXT: entry: 400; VLENUNK-NEXT: br label [[FOR_BODY:%.*]] 401; VLENUNK: for.body: 402; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 403; VLENUNK-NEXT: [[SUM:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[SUM_NEXT:%.*]], [[FOR_BODY]] ] 404; VLENUNK-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[IV]] 405; VLENUNK-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8 406; VLENUNK-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[AIDX]] 407; VLENUNK-NEXT: [[ELEM:%.*]] = load i64, ptr [[AADDR]], align 8 408; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 409; VLENUNK-NEXT: [[SUM_NEXT]] = add i64 [[SUM]], [[ELEM]] 410; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 411; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] 412; VLENUNK: for.end: 413; VLENUNK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT]], [[FOR_BODY]] ] 414; VLENUNK-NEXT: ret i64 [[SUM_NEXT_LCSSA]] 415; 416; VLEN128-LABEL: @indexed_load( 417; VLEN128-NEXT: entry: 418; VLEN128-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 419; VLEN128: vector.ph: 420; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] 421; VLEN128: vector.body: 422; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 423; VLEN128-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] 424; VLEN128-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] 425; VLEN128-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 426; VLEN128-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 427; VLEN128-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP0]] 428; VLEN128-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] 429; VLEN128-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 430; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8 431; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 432; VLEN128-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8 433; VLEN128-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], <2 x i64> [[WIDE_LOAD]] 434; VLEN128-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], <2 x i64> [[WIDE_LOAD2]] 435; VLEN128-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> [[TMP6]], i32 8, <2 x i1> <i1 true, i1 true>, <2 x i64> undef) 436; VLEN128-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> [[TMP7]], i32 8, <2 x i1> <i1 true, i1 true>, <2 x i64> undef) 437; VLEN128-NEXT: [[TMP8]] = add <2 x i64> [[VEC_PHI]], [[WIDE_MASKED_GATHER]] 438; VLEN128-NEXT: [[TMP9]] = add <2 x i64> [[VEC_PHI1]], [[WIDE_MASKED_GATHER3]] 439; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 440; VLEN128-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 441; VLEN128-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 442; VLEN128: middle.block: 443; VLEN128-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[TMP9]], [[TMP8]] 444; VLEN128-NEXT: [[TMP11:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX]]) 445; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 446; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 447; VLEN128: scalar.ph: 448; VLEN128-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 449; VLEN128-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] 450; VLEN128-NEXT: br label [[FOR_BODY:%.*]] 451; VLEN128: for.body: 452; VLEN128-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 453; VLEN128-NEXT: [[SUM:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SUM_NEXT:%.*]], [[FOR_BODY]] ] 454; VLEN128-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] 455; VLEN128-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8 456; VLEN128-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[AIDX]] 457; VLEN128-NEXT: [[ELEM:%.*]] = load i64, ptr [[AADDR]], align 8 458; VLEN128-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 459; VLEN128-NEXT: [[SUM_NEXT]] = add i64 [[SUM]], [[ELEM]] 460; VLEN128-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 461; VLEN128-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 462; VLEN128: for.end: 463; VLEN128-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] 464; VLEN128-NEXT: ret i64 [[SUM_NEXT_LCSSA]] 465; 466entry: 467 br label %for.body 468 469for.body: 470 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 471 %sum = phi i64 [0, %entry], [%sum.next, %for.body] 472 %baddr = getelementptr inbounds i64, ptr %b, i64 %iv 473 %aidx = load i64, ptr %baddr 474 %aaddr = getelementptr inbounds i64, ptr %a, i64 %aidx 475 %elem = load i64, ptr %aaddr 476 %iv.next = add nuw nsw i64 %iv, 1 477 %sum.next = add i64 %sum, %elem 478 %exitcond.not = icmp eq i64 %iv.next, 1024 479 br i1 %exitcond.not, label %for.end, label %for.body 480 481for.end: 482 ret i64 %sum.next 483} 484 485define void @splat_int(ptr noalias nocapture %a, i64 %v, i64 %n) { 486; VLENUNK-LABEL: @splat_int( 487; VLENUNK-NEXT: entry: 488; VLENUNK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 489; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] 490; VLENUNK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 491; VLENUNK: vector.ph: 492; VLENUNK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 493; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] 494; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 495; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i32 0 496; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer 497; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] 498; VLENUNK: vector.body: 499; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 500; VLENUNK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 501; VLENUNK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] 502; VLENUNK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 503; VLENUNK-NEXT: store <vscale x 1 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 504; VLENUNK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() 505; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 506; VLENUNK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 507; VLENUNK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 508; VLENUNK: middle.block: 509; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 510; VLENUNK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 511; VLENUNK: scalar.ph: 512; VLENUNK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 513; VLENUNK-NEXT: br label [[FOR_BODY:%.*]] 514; VLENUNK: for.body: 515; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 516; VLENUNK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 517; VLENUNK-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 518; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 519; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 520; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 521; VLENUNK: for.end: 522; VLENUNK-NEXT: ret void 523; 524; VLEN128-LABEL: @splat_int( 525; VLEN128-NEXT: entry: 526; VLEN128-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 527; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] 528; VLEN128-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 529; VLEN128: vector.ph: 530; VLEN128-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 531; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] 532; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 533; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 [[V:%.*]], i32 0 534; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer 535; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] 536; VLEN128: vector.body: 537; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 538; VLEN128-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 539; VLEN128-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] 540; VLEN128-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP3]], i32 0 541; VLEN128-NEXT: store <vscale x 1 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 542; VLEN128-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() 543; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 544; VLEN128-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 545; VLEN128-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 546; VLEN128: middle.block: 547; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 548; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 549; VLEN128: scalar.ph: 550; VLEN128-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 551; VLEN128-NEXT: br label [[FOR_BODY:%.*]] 552; VLEN128: for.body: 553; VLEN128-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 554; VLEN128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 555; VLEN128-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 556; VLEN128-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 557; VLEN128-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 558; VLEN128-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] 559; VLEN128: for.end: 560; VLEN128-NEXT: ret void 561; 562entry: 563 br label %for.body 564 565for.body: 566 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 567 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv 568 store i64 %v, ptr %arrayidx 569 %iv.next = add nuw nsw i64 %iv, 1 570 %exitcond.not = icmp eq i64 %iv.next, 1024 571 br i1 %exitcond.not, label %for.end, label %for.body 572 573for.end: 574 ret void 575} 576 577define void @splat_ptr(ptr noalias nocapture %a, ptr %v, i64 %n) { 578; VLENUNK-LABEL: @splat_ptr( 579; VLENUNK-NEXT: entry: 580; VLENUNK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 581; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] 582; VLENUNK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 583; VLENUNK: vector.ph: 584; VLENUNK-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 585; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] 586; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 587; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x ptr> poison, ptr [[V:%.*]], i32 0 588; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 1 x ptr> poison, <vscale x 1 x i32> zeroinitializer 589; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] 590; VLENUNK: vector.body: 591; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 592; VLENUNK-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 593; VLENUNK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] 594; VLENUNK-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i32 0 595; VLENUNK-NEXT: store <vscale x 1 x ptr> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 596; VLENUNK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() 597; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 598; VLENUNK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 599; VLENUNK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 600; VLENUNK: middle.block: 601; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 602; VLENUNK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 603; VLENUNK: scalar.ph: 604; VLENUNK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 605; VLENUNK-NEXT: br label [[FOR_BODY:%.*]] 606; VLENUNK: for.body: 607; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 608; VLENUNK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 609; VLENUNK-NEXT: store ptr [[V]], ptr [[ARRAYIDX]], align 8 610; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 611; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 612; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 613; VLENUNK: for.end: 614; VLENUNK-NEXT: ret void 615; 616; VLEN128-LABEL: @splat_ptr( 617; VLEN128-NEXT: entry: 618; VLEN128-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 619; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP0]] 620; VLEN128-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 621; VLEN128: vector.ph: 622; VLEN128-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() 623; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP1]] 624; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 625; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 1 x ptr> poison, ptr [[V:%.*]], i32 0 626; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 1 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 1 x ptr> poison, <vscale x 1 x i32> zeroinitializer 627; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] 628; VLEN128: vector.body: 629; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 630; VLEN128-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 0 631; VLEN128-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP2]] 632; VLEN128-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP3]], i32 0 633; VLEN128-NEXT: store <vscale x 1 x ptr> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 634; VLEN128-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() 635; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 636; VLEN128-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 637; VLEN128-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 638; VLEN128: middle.block: 639; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 640; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 641; VLEN128: scalar.ph: 642; VLEN128-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 643; VLEN128-NEXT: br label [[FOR_BODY:%.*]] 644; VLEN128: for.body: 645; VLEN128-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 646; VLEN128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 647; VLEN128-NEXT: store ptr [[V]], ptr [[ARRAYIDX]], align 8 648; VLEN128-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 649; VLEN128-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 650; VLEN128-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] 651; VLEN128: for.end: 652; VLEN128-NEXT: ret void 653; 654entry: 655 br label %for.body 656 657for.body: 658 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 659 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv 660 store ptr %v, ptr %arrayidx 661 %iv.next = add nuw nsw i64 %iv, 1 662 %exitcond.not = icmp eq i64 %iv.next, 1024 663 br i1 %exitcond.not, label %for.end, label %for.body 664 665for.end: 666 ret void 667} 668 669