1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -passes=loop-vectorize -scalable-vectorization=on -mtriple riscv64-linux-gnu -mattr=+v,+f -S 2>%t | FileCheck %s -check-prefix=VLENUNK 3; RUN: opt < %s -passes=loop-vectorize -scalable-vectorization=on -riscv-v-vector-bits-min=-1 -mtriple riscv64-linux-gnu -mattr=+v,+f -S 2>%t | FileCheck %s -check-prefix=VLEN128 4 5; Note: +v implies a Zvl128b (i.e. minimal VLEN of 128), but as can be seen, we're currently 6; not using that information unless an explicit vector width is set. (FIXME) 7 8; A collection of fairly basic functional tests when both fixed and scalable vectorization is 9; allowed. The primary goal of this is check for crashes during cost modeling, but it also 10; exercises the default heuristics in a useful way. 11 12target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" 13target triple = "riscv64" 14 15define void @vector_add(ptr noalias nocapture %a, i64 %v, i64 %n) { 16; VLENUNK-LABEL: @vector_add( 17; VLENUNK-NEXT: entry: 18; VLENUNK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 19; VLENUNK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 20; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] 21; VLENUNK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 22; VLENUNK: vector.ph: 23; VLENUNK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 24; VLENUNK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 25; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] 26; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 27; VLENUNK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() 28; VLENUNK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 29; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0 30; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer 31; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] 32; VLENUNK: vector.body: 33; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 34; VLENUNK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 35; VLENUNK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] 36; VLENUNK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 37; VLENUNK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP6]], align 8 38; VLENUNK-NEXT: [[TMP7:%.*]] = add <vscale x 2 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 39; VLENUNK-NEXT: store <vscale x 2 x i64> [[TMP7]], ptr [[TMP6]], align 8 40; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] 41; VLENUNK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 42; VLENUNK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 43; VLENUNK: middle.block: 44; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 45; VLENUNK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 46; VLENUNK: scalar.ph: 47; VLENUNK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 48; VLENUNK-NEXT: br label [[FOR_BODY:%.*]] 49; VLENUNK: for.body: 50; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 51; VLENUNK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 52; VLENUNK-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 53; VLENUNK-NEXT: [[ADD:%.*]] = add i64 [[ELEM]], [[V]] 54; VLENUNK-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX]], align 8 55; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 56; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 57; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 58; VLENUNK: for.end: 59; VLENUNK-NEXT: ret void 60; 61; VLEN128-LABEL: @vector_add( 62; VLEN128-NEXT: entry: 63; VLEN128-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 64; VLEN128-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 65; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] 66; VLEN128-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 67; VLEN128: vector.ph: 68; VLEN128-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 69; VLEN128-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 70; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] 71; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 72; VLEN128-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() 73; VLEN128-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 74; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0 75; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer 76; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] 77; VLEN128: vector.body: 78; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 79; VLEN128-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 80; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] 81; VLEN128-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 82; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP6]], align 8 83; VLEN128-NEXT: [[TMP7:%.*]] = add <vscale x 2 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 84; VLEN128-NEXT: store <vscale x 2 x i64> [[TMP7]], ptr [[TMP6]], align 8 85; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] 86; VLEN128-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 87; VLEN128-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 88; VLEN128: middle.block: 89; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 90; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 91; VLEN128: scalar.ph: 92; VLEN128-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 93; VLEN128-NEXT: br label [[FOR_BODY:%.*]] 94; VLEN128: for.body: 95; VLEN128-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 96; VLEN128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 97; VLEN128-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 98; VLEN128-NEXT: [[ADD:%.*]] = add i64 [[ELEM]], [[V]] 99; VLEN128-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX]], align 8 100; VLEN128-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 101; VLEN128-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 102; VLEN128-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] 103; VLEN128: for.end: 104; VLEN128-NEXT: ret void 105; 106entry: 107 br label %for.body 108 109for.body: 110 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 111 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv 112 %elem = load i64, ptr %arrayidx 113 %add = add i64 %elem, %v 114 store i64 %add, ptr %arrayidx 115 %iv.next = add nuw nsw i64 %iv, 1 116 %exitcond.not = icmp eq i64 %iv.next, 1024 117 br i1 %exitcond.not, label %for.end, label %for.body 118 119for.end: 120 ret void 121} 122 123; Same as above, but with op type of i32. We currently have a bug around 124; etype=ELEN profitability in the vectorizer, and having a smaller element 125; width test allows us to highlight different aspects of codegen. 126define void @vector_add_i32(ptr noalias nocapture %a, i32 %v, i64 %n) { 127; VLENUNK-LABEL: @vector_add_i32( 128; VLENUNK-NEXT: entry: 129; VLENUNK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 130; VLENUNK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 131; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] 132; VLENUNK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 133; VLENUNK: vector.ph: 134; VLENUNK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 135; VLENUNK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 136; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] 137; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 138; VLENUNK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() 139; VLENUNK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4 140; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[V:%.*]], i64 0 141; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer 142; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] 143; VLENUNK: vector.body: 144; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 145; VLENUNK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 146; VLENUNK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP4]] 147; VLENUNK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 148; VLENUNK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP6]], align 4 149; VLENUNK-NEXT: [[TMP7:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 150; VLENUNK-NEXT: store <vscale x 4 x i32> [[TMP7]], ptr [[TMP6]], align 4 151; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] 152; VLENUNK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 153; VLENUNK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 154; VLENUNK: middle.block: 155; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 156; VLENUNK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 157; VLENUNK: scalar.ph: 158; VLENUNK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 159; VLENUNK-NEXT: br label [[FOR_BODY:%.*]] 160; VLENUNK: for.body: 161; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 162; VLENUNK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 163; VLENUNK-NEXT: [[ELEM:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 164; VLENUNK-NEXT: [[ADD:%.*]] = add i32 [[ELEM]], [[V]] 165; VLENUNK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4 166; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 167; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 168; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 169; VLENUNK: for.end: 170; VLENUNK-NEXT: ret void 171; 172; VLEN128-LABEL: @vector_add_i32( 173; VLEN128-NEXT: entry: 174; VLEN128-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 175; VLEN128-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 176; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] 177; VLEN128-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 178; VLEN128: vector.ph: 179; VLEN128-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 180; VLEN128-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 181; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] 182; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 183; VLEN128-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() 184; VLEN128-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 4 185; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[V:%.*]], i64 0 186; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer 187; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] 188; VLEN128: vector.body: 189; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 190; VLEN128-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 191; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP4]] 192; VLEN128-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 0 193; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x i32>, ptr [[TMP6]], align 4 194; VLEN128-NEXT: [[TMP7:%.*]] = add <vscale x 4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 195; VLEN128-NEXT: store <vscale x 4 x i32> [[TMP7]], ptr [[TMP6]], align 4 196; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] 197; VLEN128-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 198; VLEN128-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 199; VLEN128: middle.block: 200; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 201; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 202; VLEN128: scalar.ph: 203; VLEN128-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 204; VLEN128-NEXT: br label [[FOR_BODY:%.*]] 205; VLEN128: for.body: 206; VLEN128-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 207; VLEN128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 208; VLEN128-NEXT: [[ELEM:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 209; VLEN128-NEXT: [[ADD:%.*]] = add i32 [[ELEM]], [[V]] 210; VLEN128-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4 211; VLEN128-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 212; VLEN128-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 213; VLEN128-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 214; VLEN128: for.end: 215; VLEN128-NEXT: ret void 216; 217entry: 218 br label %for.body 219 220for.body: 221 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 222 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv 223 %elem = load i32, ptr %arrayidx 224 %add = add i32 %elem, %v 225 store i32 %add, ptr %arrayidx 226 %iv.next = add nuw nsw i64 %iv, 1 227 %exitcond.not = icmp eq i64 %iv.next, 1024 228 br i1 %exitcond.not, label %for.end, label %for.body 229 230for.end: 231 ret void 232} 233 234 235; a[b[i]] += v, mostly to exercise scatter/gather costing 236; TODO: Currently fails to vectorize due to a memory conflict 237define void @indexed_add(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) { 238; VLENUNK-LABEL: @indexed_add( 239; VLENUNK-NEXT: entry: 240; VLENUNK-NEXT: br label [[FOR_BODY:%.*]] 241; VLENUNK: for.body: 242; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 243; VLENUNK-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[IV]] 244; VLENUNK-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8 245; VLENUNK-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[AIDX]] 246; VLENUNK-NEXT: [[ELEM:%.*]] = load i64, ptr [[AADDR]], align 8 247; VLENUNK-NEXT: [[ADD:%.*]] = add i64 [[ELEM]], [[V:%.*]] 248; VLENUNK-NEXT: store i64 [[ADD]], ptr [[AADDR]], align 8 249; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 250; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 251; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] 252; VLENUNK: for.end: 253; VLENUNK-NEXT: ret void 254; 255; VLEN128-LABEL: @indexed_add( 256; VLEN128-NEXT: entry: 257; VLEN128-NEXT: br label [[FOR_BODY:%.*]] 258; VLEN128: for.body: 259; VLEN128-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 260; VLEN128-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[IV]] 261; VLEN128-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8 262; VLEN128-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[AIDX]] 263; VLEN128-NEXT: [[ELEM:%.*]] = load i64, ptr [[AADDR]], align 8 264; VLEN128-NEXT: [[ADD:%.*]] = add i64 [[ELEM]], [[V:%.*]] 265; VLEN128-NEXT: store i64 [[ADD]], ptr [[AADDR]], align 8 266; VLEN128-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 267; VLEN128-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 268; VLEN128-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] 269; VLEN128: for.end: 270; VLEN128-NEXT: ret void 271; 272entry: 273 br label %for.body 274 275for.body: 276 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 277 %baddr = getelementptr inbounds i64, ptr %b, i64 %iv 278 %aidx = load i64, ptr %baddr 279 %aaddr = getelementptr inbounds i64, ptr %a, i64 %aidx 280 %elem = load i64, ptr %aaddr 281 %add = add i64 %elem, %v 282 store i64 %add, ptr %aaddr 283 %iv.next = add nuw nsw i64 %iv, 1 284 %exitcond.not = icmp eq i64 %iv.next, 1024 285 br i1 %exitcond.not, label %for.end, label %for.body 286 287for.end: 288 ret void 289} 290 291; a[b[i]] = v, exercise scatter support 292define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) { 293; VLENUNK-LABEL: @indexed_store( 294; VLENUNK-NEXT: entry: 295; VLENUNK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 296; VLENUNK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 297; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] 298; VLENUNK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 299; VLENUNK: vector.ph: 300; VLENUNK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 301; VLENUNK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 302; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] 303; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 304; VLENUNK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() 305; VLENUNK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 306; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0 307; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer 308; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] 309; VLENUNK: vector.body: 310; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 311; VLENUNK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 312; VLENUNK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP4]] 313; VLENUNK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 314; VLENUNK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP6]], align 8 315; VLENUNK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], <vscale x 2 x i64> [[WIDE_LOAD]] 316; VLENUNK-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], <vscale x 2 x ptr> [[TMP7]], i32 8, <vscale x 2 x i1> splat (i1 true)) 317; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] 318; VLENUNK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 319; VLENUNK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 320; VLENUNK: middle.block: 321; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 322; VLENUNK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 323; VLENUNK: scalar.ph: 324; VLENUNK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 325; VLENUNK-NEXT: br label [[FOR_BODY:%.*]] 326; VLENUNK: for.body: 327; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 328; VLENUNK-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] 329; VLENUNK-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8 330; VLENUNK-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[AIDX]] 331; VLENUNK-NEXT: store i64 [[V]], ptr [[AADDR]], align 8 332; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 333; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 334; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 335; VLENUNK: for.end: 336; VLENUNK-NEXT: ret void 337; 338; VLEN128-LABEL: @indexed_store( 339; VLEN128-NEXT: entry: 340; VLEN128-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 341; VLEN128-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 342; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] 343; VLEN128-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 344; VLEN128: vector.ph: 345; VLEN128-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 346; VLEN128-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 347; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] 348; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 349; VLEN128-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64() 350; VLEN128-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 2 351; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0 352; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer 353; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] 354; VLEN128: vector.body: 355; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 356; VLEN128-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 357; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP4]] 358; VLEN128-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 359; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP6]], align 8 360; VLEN128-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], <vscale x 2 x i64> [[WIDE_LOAD]] 361; VLEN128-NEXT: call void @llvm.masked.scatter.nxv2i64.nxv2p0(<vscale x 2 x i64> [[BROADCAST_SPLAT]], <vscale x 2 x ptr> [[TMP7]], i32 8, <vscale x 2 x i1> splat (i1 true)) 362; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]] 363; VLEN128-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 364; VLEN128-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 365; VLEN128: middle.block: 366; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 367; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 368; VLEN128: scalar.ph: 369; VLEN128-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 370; VLEN128-NEXT: br label [[FOR_BODY:%.*]] 371; VLEN128: for.body: 372; VLEN128-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 373; VLEN128-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] 374; VLEN128-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8 375; VLEN128-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[AIDX]] 376; VLEN128-NEXT: store i64 [[V]], ptr [[AADDR]], align 8 377; VLEN128-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 378; VLEN128-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 379; VLEN128-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 380; VLEN128: for.end: 381; VLEN128-NEXT: ret void 382; 383entry: 384 br label %for.body 385 386for.body: 387 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 388 %baddr = getelementptr inbounds i64, ptr %b, i64 %iv 389 %aidx = load i64, ptr %baddr 390 %aaddr = getelementptr inbounds i64, ptr %a, i64 %aidx 391 store i64 %v, ptr %aaddr 392 %iv.next = add nuw nsw i64 %iv, 1 393 %exitcond.not = icmp eq i64 %iv.next, 1024 394 br i1 %exitcond.not, label %for.end, label %for.body 395 396for.end: 397 ret void 398} 399 400define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) { 401; VLENUNK-LABEL: @indexed_load( 402; VLENUNK-NEXT: entry: 403; VLENUNK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 404; VLENUNK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 405; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] 406; VLENUNK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 407; VLENUNK: vector.ph: 408; VLENUNK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 409; VLENUNK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 410; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] 411; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 412; VLENUNK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() 413; VLENUNK-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 2 414; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] 415; VLENUNK: vector.body: 416; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 417; VLENUNK-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] 418; VLENUNK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 419; VLENUNK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP4]] 420; VLENUNK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 421; VLENUNK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP6]], align 8 422; VLENUNK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], <vscale x 2 x i64> [[WIDE_LOAD]] 423; VLENUNK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> [[TMP7]], i32 8, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> poison) 424; VLENUNK-NEXT: [[TMP8]] = add <vscale x 2 x i64> [[VEC_PHI]], [[WIDE_MASKED_GATHER]] 425; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP10]] 426; VLENUNK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 427; VLENUNK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 428; VLENUNK: middle.block: 429; VLENUNK-NEXT: [[TMP12:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> [[TMP8]]) 430; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 431; VLENUNK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 432; VLENUNK: scalar.ph: 433; VLENUNK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 434; VLENUNK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 435; VLENUNK-NEXT: br label [[FOR_BODY:%.*]] 436; VLENUNK: for.body: 437; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 438; VLENUNK-NEXT: [[SUM:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SUM_NEXT:%.*]], [[FOR_BODY]] ] 439; VLENUNK-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] 440; VLENUNK-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8 441; VLENUNK-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[AIDX]] 442; VLENUNK-NEXT: [[ELEM:%.*]] = load i64, ptr [[AADDR]], align 8 443; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 444; VLENUNK-NEXT: [[SUM_NEXT]] = add i64 [[SUM]], [[ELEM]] 445; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 446; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 447; VLENUNK: for.end: 448; VLENUNK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] 449; VLENUNK-NEXT: ret i64 [[SUM_NEXT_LCSSA]] 450; 451; VLEN128-LABEL: @indexed_load( 452; VLEN128-NEXT: entry: 453; VLEN128-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 454; VLEN128-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 455; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] 456; VLEN128-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 457; VLEN128: vector.ph: 458; VLEN128-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 459; VLEN128-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 460; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] 461; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 462; VLEN128-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() 463; VLEN128-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 464; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] 465; VLEN128: vector.body: 466; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 467; VLEN128-NEXT: [[VEC_PHI:%.*]] = phi <vscale x 2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] 468; VLEN128-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 469; VLEN128-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP6]] 470; VLEN128-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, ptr [[TMP7]], i32 0 471; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i64>, ptr [[TMP8]], align 8 472; VLEN128-NEXT: [[TMP9:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], <vscale x 2 x i64> [[WIDE_LOAD]] 473; VLEN128-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64.nxv2p0(<vscale x 2 x ptr> [[TMP9]], i32 8, <vscale x 2 x i1> splat (i1 true), <vscale x 2 x i64> poison) 474; VLEN128-NEXT: [[TMP10]] = add <vscale x 2 x i64> [[VEC_PHI]], [[WIDE_MASKED_GATHER]] 475; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] 476; VLEN128-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 477; VLEN128-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 478; VLEN128: middle.block: 479; VLEN128-NEXT: [[TMP12:%.*]] = call i64 @llvm.vector.reduce.add.nxv2i64(<vscale x 2 x i64> [[TMP10]]) 480; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 481; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 482; VLEN128: scalar.ph: 483; VLEN128-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 484; VLEN128-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] 485; VLEN128-NEXT: br label [[FOR_BODY:%.*]] 486; VLEN128: for.body: 487; VLEN128-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 488; VLEN128-NEXT: [[SUM:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SUM_NEXT:%.*]], [[FOR_BODY]] ] 489; VLEN128-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] 490; VLEN128-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8 491; VLEN128-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[AIDX]] 492; VLEN128-NEXT: [[ELEM:%.*]] = load i64, ptr [[AADDR]], align 8 493; VLEN128-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 494; VLEN128-NEXT: [[SUM_NEXT]] = add i64 [[SUM]], [[ELEM]] 495; VLEN128-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 496; VLEN128-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 497; VLEN128: for.end: 498; VLEN128-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] 499; VLEN128-NEXT: ret i64 [[SUM_NEXT_LCSSA]] 500; 501entry: 502 br label %for.body 503 504for.body: 505 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 506 %sum = phi i64 [0, %entry], [%sum.next, %for.body] 507 %baddr = getelementptr inbounds i64, ptr %b, i64 %iv 508 %aidx = load i64, ptr %baddr 509 %aaddr = getelementptr inbounds i64, ptr %a, i64 %aidx 510 %elem = load i64, ptr %aaddr 511 %iv.next = add nuw nsw i64 %iv, 1 512 %sum.next = add i64 %sum, %elem 513 %exitcond.not = icmp eq i64 %iv.next, 1024 514 br i1 %exitcond.not, label %for.end, label %for.body 515 516for.end: 517 ret i64 %sum.next 518} 519 520define void @splat_int(ptr noalias nocapture %a, i64 %v, i64 %n) { 521; VLENUNK-LABEL: @splat_int( 522; VLENUNK-NEXT: entry: 523; VLENUNK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 524; VLENUNK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 525; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] 526; VLENUNK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 527; VLENUNK: vector.ph: 528; VLENUNK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 529; VLENUNK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 530; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] 531; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 532; VLENUNK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 533; VLENUNK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 534; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0 535; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer 536; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] 537; VLENUNK: vector.body: 538; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 539; VLENUNK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 540; VLENUNK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] 541; VLENUNK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 542; VLENUNK-NEXT: store <vscale x 2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8 543; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] 544; VLENUNK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 545; VLENUNK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 546; VLENUNK: middle.block: 547; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 548; VLENUNK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 549; VLENUNK: scalar.ph: 550; VLENUNK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 551; VLENUNK-NEXT: br label [[FOR_BODY:%.*]] 552; VLENUNK: for.body: 553; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 554; VLENUNK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 555; VLENUNK-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 556; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 557; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 558; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] 559; VLENUNK: for.end: 560; VLENUNK-NEXT: ret void 561; 562; VLEN128-LABEL: @splat_int( 563; VLEN128-NEXT: entry: 564; VLEN128-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 565; VLEN128-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 566; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] 567; VLEN128-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 568; VLEN128: vector.ph: 569; VLEN128-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 570; VLEN128-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 571; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] 572; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 573; VLEN128-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 574; VLEN128-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 575; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[V:%.*]], i64 0 576; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer 577; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] 578; VLEN128: vector.body: 579; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 580; VLEN128-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 581; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] 582; VLEN128-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP5]], i32 0 583; VLEN128-NEXT: store <vscale x 2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8 584; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] 585; VLEN128-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 586; VLEN128-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 587; VLEN128: middle.block: 588; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 589; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 590; VLEN128: scalar.ph: 591; VLEN128-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 592; VLEN128-NEXT: br label [[FOR_BODY:%.*]] 593; VLEN128: for.body: 594; VLEN128-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 595; VLEN128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 596; VLEN128-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 597; VLEN128-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 598; VLEN128-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 599; VLEN128-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] 600; VLEN128: for.end: 601; VLEN128-NEXT: ret void 602; 603entry: 604 br label %for.body 605 606for.body: 607 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 608 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv 609 store i64 %v, ptr %arrayidx 610 %iv.next = add nuw nsw i64 %iv, 1 611 %exitcond.not = icmp eq i64 %iv.next, 1024 612 br i1 %exitcond.not, label %for.end, label %for.body 613 614for.end: 615 ret void 616} 617 618define void @splat_ptr(ptr noalias nocapture %a, ptr %v, i64 %n) { 619; VLENUNK-LABEL: @splat_ptr( 620; VLENUNK-NEXT: entry: 621; VLENUNK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 622; VLENUNK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 623; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] 624; VLENUNK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 625; VLENUNK: vector.ph: 626; VLENUNK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 627; VLENUNK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 628; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] 629; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 630; VLENUNK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 631; VLENUNK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 632; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[V:%.*]], i64 0 633; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer 634; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] 635; VLENUNK: vector.body: 636; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 637; VLENUNK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 638; VLENUNK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] 639; VLENUNK-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i32 0 640; VLENUNK-NEXT: store <vscale x 2 x ptr> [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8 641; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] 642; VLENUNK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 643; VLENUNK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 644; VLENUNK: middle.block: 645; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 646; VLENUNK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 647; VLENUNK: scalar.ph: 648; VLENUNK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 649; VLENUNK-NEXT: br label [[FOR_BODY:%.*]] 650; VLENUNK: for.body: 651; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 652; VLENUNK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 653; VLENUNK-NEXT: store ptr [[V]], ptr [[ARRAYIDX]], align 8 654; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 655; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 656; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] 657; VLENUNK: for.end: 658; VLENUNK-NEXT: ret void 659; 660; VLEN128-LABEL: @splat_ptr( 661; VLEN128-NEXT: entry: 662; VLEN128-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 663; VLEN128-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2 664; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] 665; VLEN128-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 666; VLEN128: vector.ph: 667; VLEN128-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 668; VLEN128-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2 669; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] 670; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 671; VLEN128-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64() 672; VLEN128-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 2 673; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x ptr> poison, ptr [[V:%.*]], i64 0 674; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x ptr> [[BROADCAST_SPLATINSERT]], <vscale x 2 x ptr> poison, <vscale x 2 x i32> zeroinitializer 675; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] 676; VLEN128: vector.body: 677; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 678; VLEN128-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 679; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP4]] 680; VLEN128-NEXT: [[TMP6:%.*]] = getelementptr inbounds ptr, ptr [[TMP5]], i32 0 681; VLEN128-NEXT: store <vscale x 2 x ptr> [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8 682; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP8]] 683; VLEN128-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 684; VLEN128-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 685; VLEN128: middle.block: 686; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 687; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 688; VLEN128: scalar.ph: 689; VLEN128-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 690; VLEN128-NEXT: br label [[FOR_BODY:%.*]] 691; VLEN128: for.body: 692; VLEN128-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 693; VLEN128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 694; VLEN128-NEXT: store ptr [[V]], ptr [[ARRAYIDX]], align 8 695; VLEN128-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 696; VLEN128-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 697; VLEN128-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] 698; VLEN128: for.end: 699; VLEN128-NEXT: ret void 700; 701entry: 702 br label %for.body 703 704for.body: 705 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 706 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv 707 store ptr %v, ptr %arrayidx 708 %iv.next = add nuw nsw i64 %iv, 1 709 %exitcond.not = icmp eq i64 %iv.next, 1024 710 br i1 %exitcond.not, label %for.end, label %for.body 711 712for.end: 713 ret void 714} 715 716