1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -mtriple riscv64-linux-gnu -mattr=+v,+f -S 2>%t | FileCheck %s -check-prefix=VLENUNK 3; RUN: opt < %s -loop-vectorize -scalable-vectorization=on -riscv-v-vector-bits-min=-1 -mtriple riscv64-linux-gnu -mattr=+v,+f -S 2>%t | FileCheck %s -check-prefix=VLEN128 4 5; Note: +v implies a Zvl128b (i.e. minimal VLEN of 128), but as can be seen, we're currently 6; not using that information unless an explicit vector width is set. (FIXME) 7 8; A collection of fairly basic functional tests when both fixed and scalable vectorization is 9; allowed. The primary goal of this is check for crashes during cost modeling, but it also 10; exercises the default heuristics in a useful way. 11 12target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" 13target triple = "riscv64" 14 15define void @vector_add(ptr noalias nocapture %a, i64 %v, i64 %n) { 16; VLENUNK-LABEL: @vector_add( 17; VLENUNK-NEXT: entry: 18; VLENUNK-NEXT: br label [[FOR_BODY:%.*]] 19; VLENUNK: for.body: 20; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 21; VLENUNK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[IV]] 22; VLENUNK-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 23; VLENUNK-NEXT: [[ADD:%.*]] = add i64 [[ELEM]], [[V:%.*]] 24; VLENUNK-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX]], align 8 25; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 26; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 27; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] 28; VLENUNK: for.end: 29; VLENUNK-NEXT: ret void 30; 31; VLEN128-LABEL: @vector_add( 32; VLEN128-NEXT: entry: 33; VLEN128-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 34; VLEN128: vector.ph: 35; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i32 0 36; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer 37; VLEN128-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i32 0 38; VLEN128-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT2]], <2 x i64> poison, <2 x i32> zeroinitializer 39; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] 40; VLEN128: vector.body: 41; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 42; VLEN128-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 43; VLEN128-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 44; VLEN128-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] 45; VLEN128-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] 46; VLEN128-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 47; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8 48; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 49; VLEN128-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8 50; VLEN128-NEXT: [[TMP6:%.*]] = add <2 x i64> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 51; VLEN128-NEXT: [[TMP7:%.*]] = add <2 x i64> [[WIDE_LOAD1]], [[BROADCAST_SPLAT3]] 52; VLEN128-NEXT: store <2 x i64> [[TMP6]], ptr [[TMP4]], align 8 53; VLEN128-NEXT: store <2 x i64> [[TMP7]], ptr [[TMP5]], align 8 54; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 55; VLEN128-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 56; VLEN128-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 57; VLEN128: middle.block: 58; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 59; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 60; VLEN128: scalar.ph: 61; VLEN128-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 62; VLEN128-NEXT: br label [[FOR_BODY:%.*]] 63; VLEN128: for.body: 64; VLEN128-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 65; VLEN128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 66; VLEN128-NEXT: [[ELEM:%.*]] = load i64, ptr [[ARRAYIDX]], align 8 67; VLEN128-NEXT: [[ADD:%.*]] = add i64 [[ELEM]], [[V]] 68; VLEN128-NEXT: store i64 [[ADD]], ptr [[ARRAYIDX]], align 8 69; VLEN128-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 70; VLEN128-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 71; VLEN128-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] 72; VLEN128: for.end: 73; VLEN128-NEXT: ret void 74; 75entry: 76 br label %for.body 77 78for.body: 79 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 80 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv 81 %elem = load i64, ptr %arrayidx 82 %add = add i64 %elem, %v 83 store i64 %add, ptr %arrayidx 84 %iv.next = add nuw nsw i64 %iv, 1 85 %exitcond.not = icmp eq i64 %iv.next, 1024 86 br i1 %exitcond.not, label %for.end, label %for.body 87 88for.end: 89 ret void 90} 91 92; Same as above, but with op type of i32. We currently have a bug around 93; etype=ELEN profitability in the vectorizer, and having a smaller element 94; width test allows us to highlight different aspects of codegen. 95define void @vector_add_i32(ptr noalias nocapture %a, i32 %v, i64 %n) { 96; VLENUNK-LABEL: @vector_add_i32( 97; VLENUNK-NEXT: entry: 98; VLENUNK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 99; VLENUNK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 100; VLENUNK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] 101; VLENUNK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 102; VLENUNK: vector.ph: 103; VLENUNK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 104; VLENUNK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 105; VLENUNK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] 106; VLENUNK-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 107; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[V:%.*]], i32 0 108; VLENUNK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer 109; VLENUNK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[V]], i32 0 110; VLENUNK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT2]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer 111; VLENUNK-NEXT: br label [[VECTOR_BODY:%.*]] 112; VLENUNK: vector.body: 113; VLENUNK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 114; VLENUNK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 115; VLENUNK-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() 116; VLENUNK-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 117; VLENUNK-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 0 118; VLENUNK-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 1 119; VLENUNK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], [[TMP8]] 120; VLENUNK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP4]] 121; VLENUNK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP9]] 122; VLENUNK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 0 123; VLENUNK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i32>, ptr [[TMP12]], align 4 124; VLENUNK-NEXT: [[TMP13:%.*]] = call i32 @llvm.vscale.i32() 125; VLENUNK-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 2 126; VLENUNK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP14]] 127; VLENUNK-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 2 x i32>, ptr [[TMP15]], align 4 128; VLENUNK-NEXT: [[TMP16:%.*]] = add <vscale x 2 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 129; VLENUNK-NEXT: [[TMP17:%.*]] = add <vscale x 2 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT3]] 130; VLENUNK-NEXT: store <vscale x 2 x i32> [[TMP16]], ptr [[TMP12]], align 4 131; VLENUNK-NEXT: [[TMP18:%.*]] = call i32 @llvm.vscale.i32() 132; VLENUNK-NEXT: [[TMP19:%.*]] = mul i32 [[TMP18]], 2 133; VLENUNK-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP19]] 134; VLENUNK-NEXT: store <vscale x 2 x i32> [[TMP17]], ptr [[TMP20]], align 4 135; VLENUNK-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() 136; VLENUNK-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 4 137; VLENUNK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP22]] 138; VLENUNK-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 139; VLENUNK-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 140; VLENUNK: middle.block: 141; VLENUNK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 142; VLENUNK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 143; VLENUNK: scalar.ph: 144; VLENUNK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 145; VLENUNK-NEXT: br label [[FOR_BODY:%.*]] 146; VLENUNK: for.body: 147; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 148; VLENUNK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 149; VLENUNK-NEXT: [[ELEM:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 150; VLENUNK-NEXT: [[ADD:%.*]] = add i32 [[ELEM]], [[V]] 151; VLENUNK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4 152; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 153; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 154; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] 155; VLENUNK: for.end: 156; VLENUNK-NEXT: ret void 157; 158; VLEN128-LABEL: @vector_add_i32( 159; VLEN128-NEXT: entry: 160; VLEN128-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 161; VLEN128-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 162; VLEN128-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1024, [[TMP1]] 163; VLEN128-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 164; VLEN128: vector.ph: 165; VLEN128-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 166; VLEN128-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 167; VLEN128-NEXT: [[N_MOD_VF:%.*]] = urem i64 1024, [[TMP3]] 168; VLEN128-NEXT: [[N_VEC:%.*]] = sub i64 1024, [[N_MOD_VF]] 169; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[V:%.*]], i32 0 170; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer 171; VLEN128-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[V]], i32 0 172; VLEN128-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <vscale x 2 x i32> [[BROADCAST_SPLATINSERT2]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer 173; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] 174; VLEN128: vector.body: 175; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 176; VLEN128-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 177; VLEN128-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() 178; VLEN128-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 2 179; VLEN128-NEXT: [[TMP7:%.*]] = add i64 [[TMP6]], 0 180; VLEN128-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 1 181; VLEN128-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], [[TMP8]] 182; VLEN128-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP4]] 183; VLEN128-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP9]] 184; VLEN128-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 0 185; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 2 x i32>, ptr [[TMP12]], align 4 186; VLEN128-NEXT: [[TMP13:%.*]] = call i32 @llvm.vscale.i32() 187; VLEN128-NEXT: [[TMP14:%.*]] = mul i32 [[TMP13]], 2 188; VLEN128-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP14]] 189; VLEN128-NEXT: [[WIDE_LOAD1:%.*]] = load <vscale x 2 x i32>, ptr [[TMP15]], align 4 190; VLEN128-NEXT: [[TMP16:%.*]] = add <vscale x 2 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 191; VLEN128-NEXT: [[TMP17:%.*]] = add <vscale x 2 x i32> [[WIDE_LOAD1]], [[BROADCAST_SPLAT3]] 192; VLEN128-NEXT: store <vscale x 2 x i32> [[TMP16]], ptr [[TMP12]], align 4 193; VLEN128-NEXT: [[TMP18:%.*]] = call i32 @llvm.vscale.i32() 194; VLEN128-NEXT: [[TMP19:%.*]] = mul i32 [[TMP18]], 2 195; VLEN128-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP10]], i32 [[TMP19]] 196; VLEN128-NEXT: store <vscale x 2 x i32> [[TMP17]], ptr [[TMP20]], align 4 197; VLEN128-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64() 198; VLEN128-NEXT: [[TMP22:%.*]] = mul i64 [[TMP21]], 4 199; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP22]] 200; VLEN128-NEXT: [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 201; VLEN128-NEXT: br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 202; VLEN128: middle.block: 203; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, [[N_VEC]] 204; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 205; VLEN128: scalar.ph: 206; VLEN128-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 207; VLEN128-NEXT: br label [[FOR_BODY:%.*]] 208; VLEN128: for.body: 209; VLEN128-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 210; VLEN128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] 211; VLEN128-NEXT: [[ELEM:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 212; VLEN128-NEXT: [[ADD:%.*]] = add i32 [[ELEM]], [[V]] 213; VLEN128-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX]], align 4 214; VLEN128-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 215; VLEN128-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 216; VLEN128-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 217; VLEN128: for.end: 218; VLEN128-NEXT: ret void 219; 220entry: 221 br label %for.body 222 223for.body: 224 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 225 %arrayidx = getelementptr inbounds i32, ptr %a, i64 %iv 226 %elem = load i32, ptr %arrayidx 227 %add = add i32 %elem, %v 228 store i32 %add, ptr %arrayidx 229 %iv.next = add nuw nsw i64 %iv, 1 230 %exitcond.not = icmp eq i64 %iv.next, 1024 231 br i1 %exitcond.not, label %for.end, label %for.body 232 233for.end: 234 ret void 235} 236 237 238; a[b[i]] += v, mostly to exercise scatter/gather costing 239; TODO: Currently fails to vectorize due to a memory conflict 240define void @indexed_add(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) { 241; VLENUNK-LABEL: @indexed_add( 242; VLENUNK-NEXT: entry: 243; VLENUNK-NEXT: br label [[FOR_BODY:%.*]] 244; VLENUNK: for.body: 245; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 246; VLENUNK-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[IV]] 247; VLENUNK-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8 248; VLENUNK-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[AIDX]] 249; VLENUNK-NEXT: [[ELEM:%.*]] = load i64, ptr [[AADDR]], align 8 250; VLENUNK-NEXT: [[ADD:%.*]] = add i64 [[ELEM]], [[V:%.*]] 251; VLENUNK-NEXT: store i64 [[ADD]], ptr [[AADDR]], align 8 252; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 253; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 254; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] 255; VLENUNK: for.end: 256; VLENUNK-NEXT: ret void 257; 258; VLEN128-LABEL: @indexed_add( 259; VLEN128-NEXT: entry: 260; VLEN128-NEXT: br label [[FOR_BODY:%.*]] 261; VLEN128: for.body: 262; VLEN128-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 263; VLEN128-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[IV]] 264; VLEN128-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8 265; VLEN128-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[AIDX]] 266; VLEN128-NEXT: [[ELEM:%.*]] = load i64, ptr [[AADDR]], align 8 267; VLEN128-NEXT: [[ADD:%.*]] = add i64 [[ELEM]], [[V:%.*]] 268; VLEN128-NEXT: store i64 [[ADD]], ptr [[AADDR]], align 8 269; VLEN128-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 270; VLEN128-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 271; VLEN128-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] 272; VLEN128: for.end: 273; VLEN128-NEXT: ret void 274; 275entry: 276 br label %for.body 277 278for.body: 279 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 280 %baddr = getelementptr inbounds i64, ptr %b, i64 %iv 281 %aidx = load i64, ptr %baddr 282 %aaddr = getelementptr inbounds i64, ptr %a, i64 %aidx 283 %elem = load i64, ptr %aaddr 284 %add = add i64 %elem, %v 285 store i64 %add, ptr %aaddr 286 %iv.next = add nuw nsw i64 %iv, 1 287 %exitcond.not = icmp eq i64 %iv.next, 1024 288 br i1 %exitcond.not, label %for.end, label %for.body 289 290for.end: 291 ret void 292} 293 294; a[b[i]] = v, exercise scatter support 295define void @indexed_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) { 296; VLENUNK-LABEL: @indexed_store( 297; VLENUNK-NEXT: entry: 298; VLENUNK-NEXT: br label [[FOR_BODY:%.*]] 299; VLENUNK: for.body: 300; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 301; VLENUNK-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[IV]] 302; VLENUNK-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8 303; VLENUNK-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[AIDX]] 304; VLENUNK-NEXT: store i64 [[V:%.*]], ptr [[AADDR]], align 8 305; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 306; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 307; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] 308; VLENUNK: for.end: 309; VLENUNK-NEXT: ret void 310; 311; VLEN128-LABEL: @indexed_store( 312; VLEN128-NEXT: entry: 313; VLEN128-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 314; VLEN128: vector.ph: 315; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i32 0 316; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer 317; VLEN128-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i32 0 318; VLEN128-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT2]], <2 x i64> poison, <2 x i32> zeroinitializer 319; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] 320; VLEN128: vector.body: 321; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 322; VLEN128-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 323; VLEN128-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 324; VLEN128-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP0]] 325; VLEN128-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] 326; VLEN128-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 327; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8 328; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 329; VLEN128-NEXT: [[WIDE_LOAD1:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8 330; VLEN128-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], <2 x i64> [[WIDE_LOAD]] 331; VLEN128-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], <2 x i64> [[WIDE_LOAD1]] 332; VLEN128-NEXT: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> [[BROADCAST_SPLAT]], <2 x ptr> [[TMP6]], i32 8, <2 x i1> <i1 true, i1 true>) 333; VLEN128-NEXT: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> [[BROADCAST_SPLAT3]], <2 x ptr> [[TMP7]], i32 8, <2 x i1> <i1 true, i1 true>) 334; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 335; VLEN128-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 336; VLEN128-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 337; VLEN128: middle.block: 338; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 339; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 340; VLEN128: scalar.ph: 341; VLEN128-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 342; VLEN128-NEXT: br label [[FOR_BODY:%.*]] 343; VLEN128: for.body: 344; VLEN128-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 345; VLEN128-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] 346; VLEN128-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8 347; VLEN128-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[AIDX]] 348; VLEN128-NEXT: store i64 [[V]], ptr [[AADDR]], align 8 349; VLEN128-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 350; VLEN128-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 351; VLEN128-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 352; VLEN128: for.end: 353; VLEN128-NEXT: ret void 354; 355entry: 356 br label %for.body 357 358for.body: 359 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 360 %baddr = getelementptr inbounds i64, ptr %b, i64 %iv 361 %aidx = load i64, ptr %baddr 362 %aaddr = getelementptr inbounds i64, ptr %a, i64 %aidx 363 store i64 %v, ptr %aaddr 364 %iv.next = add nuw nsw i64 %iv, 1 365 %exitcond.not = icmp eq i64 %iv.next, 1024 366 br i1 %exitcond.not, label %for.end, label %for.body 367 368for.end: 369 ret void 370} 371 372define i64 @indexed_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) { 373; VLENUNK-LABEL: @indexed_load( 374; VLENUNK-NEXT: entry: 375; VLENUNK-NEXT: br label [[FOR_BODY:%.*]] 376; VLENUNK: for.body: 377; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 378; VLENUNK-NEXT: [[SUM:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[SUM_NEXT:%.*]], [[FOR_BODY]] ] 379; VLENUNK-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[IV]] 380; VLENUNK-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8 381; VLENUNK-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[AIDX]] 382; VLENUNK-NEXT: [[ELEM:%.*]] = load i64, ptr [[AADDR]], align 8 383; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 384; VLENUNK-NEXT: [[SUM_NEXT]] = add i64 [[SUM]], [[ELEM]] 385; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 386; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] 387; VLENUNK: for.end: 388; VLENUNK-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT]], [[FOR_BODY]] ] 389; VLENUNK-NEXT: ret i64 [[SUM_NEXT_LCSSA]] 390; 391; VLEN128-LABEL: @indexed_load( 392; VLEN128-NEXT: entry: 393; VLEN128-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 394; VLEN128: vector.ph: 395; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] 396; VLEN128: vector.body: 397; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 398; VLEN128-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] 399; VLEN128-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ] 400; VLEN128-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 401; VLEN128-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 402; VLEN128-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[B:%.*]], i64 [[TMP0]] 403; VLEN128-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP1]] 404; VLEN128-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 405; VLEN128-NEXT: [[WIDE_LOAD:%.*]] = load <2 x i64>, ptr [[TMP4]], align 8 406; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 407; VLEN128-NEXT: [[WIDE_LOAD2:%.*]] = load <2 x i64>, ptr [[TMP5]], align 8 408; VLEN128-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], <2 x i64> [[WIDE_LOAD]] 409; VLEN128-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[A]], <2 x i64> [[WIDE_LOAD2]] 410; VLEN128-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> [[TMP6]], i32 8, <2 x i1> <i1 true, i1 true>, <2 x i64> undef) 411; VLEN128-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> [[TMP7]], i32 8, <2 x i1> <i1 true, i1 true>, <2 x i64> undef) 412; VLEN128-NEXT: [[TMP8]] = add <2 x i64> [[VEC_PHI]], [[WIDE_MASKED_GATHER]] 413; VLEN128-NEXT: [[TMP9]] = add <2 x i64> [[VEC_PHI1]], [[WIDE_MASKED_GATHER3]] 414; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 415; VLEN128-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 416; VLEN128-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] 417; VLEN128: middle.block: 418; VLEN128-NEXT: [[BIN_RDX:%.*]] = add <2 x i64> [[TMP9]], [[TMP8]] 419; VLEN128-NEXT: [[TMP11:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX]]) 420; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 421; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 422; VLEN128: scalar.ph: 423; VLEN128-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 424; VLEN128-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] 425; VLEN128-NEXT: br label [[FOR_BODY:%.*]] 426; VLEN128: for.body: 427; VLEN128-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 428; VLEN128-NEXT: [[SUM:%.*]] = phi i64 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[SUM_NEXT:%.*]], [[FOR_BODY]] ] 429; VLEN128-NEXT: [[BADDR:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[IV]] 430; VLEN128-NEXT: [[AIDX:%.*]] = load i64, ptr [[BADDR]], align 8 431; VLEN128-NEXT: [[AADDR:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[AIDX]] 432; VLEN128-NEXT: [[ELEM:%.*]] = load i64, ptr [[AADDR]], align 8 433; VLEN128-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 434; VLEN128-NEXT: [[SUM_NEXT]] = add i64 [[SUM]], [[ELEM]] 435; VLEN128-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 436; VLEN128-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]] 437; VLEN128: for.end: 438; VLEN128-NEXT: [[SUM_NEXT_LCSSA:%.*]] = phi i64 [ [[SUM_NEXT]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ] 439; VLEN128-NEXT: ret i64 [[SUM_NEXT_LCSSA]] 440; 441entry: 442 br label %for.body 443 444for.body: 445 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 446 %sum = phi i64 [0, %entry], [%sum.next, %for.body] 447 %baddr = getelementptr inbounds i64, ptr %b, i64 %iv 448 %aidx = load i64, ptr %baddr 449 %aaddr = getelementptr inbounds i64, ptr %a, i64 %aidx 450 %elem = load i64, ptr %aaddr 451 %iv.next = add nuw nsw i64 %iv, 1 452 %sum.next = add i64 %sum, %elem 453 %exitcond.not = icmp eq i64 %iv.next, 1024 454 br i1 %exitcond.not, label %for.end, label %for.body 455 456for.end: 457 ret i64 %sum.next 458} 459 460define void @splat_int(ptr noalias nocapture %a, i64 %v, i64 %n) { 461; VLENUNK-LABEL: @splat_int( 462; VLENUNK-NEXT: entry: 463; VLENUNK-NEXT: br label [[FOR_BODY:%.*]] 464; VLENUNK: for.body: 465; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 466; VLENUNK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[IV]] 467; VLENUNK-NEXT: store i64 [[V:%.*]], ptr [[ARRAYIDX]], align 8 468; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 469; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 470; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] 471; VLENUNK: for.end: 472; VLENUNK-NEXT: ret void 473; 474; VLEN128-LABEL: @splat_int( 475; VLEN128-NEXT: entry: 476; VLEN128-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 477; VLEN128: vector.ph: 478; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i32 0 479; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer 480; VLEN128-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i32 0 481; VLEN128-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer 482; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] 483; VLEN128: vector.body: 484; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 485; VLEN128-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 486; VLEN128-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 487; VLEN128-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] 488; VLEN128-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] 489; VLEN128-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 490; VLEN128-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 491; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 492; VLEN128-NEXT: store <2 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP5]], align 8 493; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 494; VLEN128-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 495; VLEN128-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]] 496; VLEN128: middle.block: 497; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 498; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 499; VLEN128: scalar.ph: 500; VLEN128-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 501; VLEN128-NEXT: br label [[FOR_BODY:%.*]] 502; VLEN128: for.body: 503; VLEN128-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 504; VLEN128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 505; VLEN128-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 506; VLEN128-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 507; VLEN128-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 508; VLEN128-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]] 509; VLEN128: for.end: 510; VLEN128-NEXT: ret void 511; 512entry: 513 br label %for.body 514 515for.body: 516 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 517 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv 518 store i64 %v, ptr %arrayidx 519 %iv.next = add nuw nsw i64 %iv, 1 520 %exitcond.not = icmp eq i64 %iv.next, 1024 521 br i1 %exitcond.not, label %for.end, label %for.body 522 523for.end: 524 ret void 525} 526 527define void @splat_ptr(ptr noalias nocapture %a, ptr %v, i64 %n) { 528; VLENUNK-LABEL: @splat_ptr( 529; VLENUNK-NEXT: entry: 530; VLENUNK-NEXT: br label [[FOR_BODY:%.*]] 531; VLENUNK: for.body: 532; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 533; VLENUNK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[IV]] 534; VLENUNK-NEXT: store ptr [[V:%.*]], ptr [[ARRAYIDX]], align 8 535; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 536; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 537; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] 538; VLENUNK: for.end: 539; VLENUNK-NEXT: ret void 540; 541; VLEN128-LABEL: @splat_ptr( 542; VLEN128-NEXT: entry: 543; VLEN128-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 544; VLEN128: vector.ph: 545; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x ptr> poison, ptr [[V:%.*]], i32 0 546; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT]], <2 x ptr> poison, <2 x i32> zeroinitializer 547; VLEN128-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x ptr> poison, ptr [[V]], i32 0 548; VLEN128-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT1]], <2 x ptr> poison, <2 x i32> zeroinitializer 549; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] 550; VLEN128: vector.body: 551; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 552; VLEN128-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 553; VLEN128-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 554; VLEN128-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] 555; VLEN128-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] 556; VLEN128-NEXT: [[TMP4:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 0 557; VLEN128-NEXT: store <2 x ptr> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 558; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds ptr, ptr [[TMP2]], i32 2 559; VLEN128-NEXT: store <2 x ptr> [[BROADCAST_SPLAT2]], ptr [[TMP5]], align 8 560; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 561; VLEN128-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 562; VLEN128-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]] 563; VLEN128: middle.block: 564; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 565; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 566; VLEN128: scalar.ph: 567; VLEN128-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 568; VLEN128-NEXT: br label [[FOR_BODY:%.*]] 569; VLEN128: for.body: 570; VLEN128-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 571; VLEN128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 572; VLEN128-NEXT: store ptr [[V]], ptr [[ARRAYIDX]], align 8 573; VLEN128-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 574; VLEN128-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 575; VLEN128-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]] 576; VLEN128: for.end: 577; VLEN128-NEXT: ret void 578; 579entry: 580 br label %for.body 581 582for.body: 583 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 584 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv 585 store ptr %v, ptr %arrayidx 586 %iv.next = add nuw nsw i64 %iv, 1 587 %exitcond.not = icmp eq i64 %iv.next, 1024 588 br i1 %exitcond.not, label %for.end, label %for.body 589 590for.end: 591 ret void 592} 593 594define void @uniform_store(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) { 595; VLENUNK-LABEL: @uniform_store( 596; VLENUNK-NEXT: entry: 597; VLENUNK-NEXT: br label [[FOR_BODY:%.*]] 598; VLENUNK: for.body: 599; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 600; VLENUNK-NEXT: store i64 [[V:%.*]], ptr [[B:%.*]], align 8 601; VLENUNK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[IV]] 602; VLENUNK-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 603; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 604; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 605; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] 606; VLENUNK: for.end: 607; VLENUNK-NEXT: ret void 608; 609; VLEN128-LABEL: @uniform_store( 610; VLEN128-NEXT: entry: 611; VLEN128-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 612; VLEN128: vector.ph: 613; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i32 0 614; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer 615; VLEN128-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i32 0 616; VLEN128-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer 617; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] 618; VLEN128: vector.body: 619; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 620; VLEN128-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 621; VLEN128-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 622; VLEN128-NEXT: store i64 [[V]], ptr [[B:%.*]], align 8 623; VLEN128-NEXT: store i64 [[V]], ptr [[B]], align 8 624; VLEN128-NEXT: store i64 [[V]], ptr [[B]], align 8 625; VLEN128-NEXT: store i64 [[V]], ptr [[B]], align 8 626; VLEN128-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] 627; VLEN128-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] 628; VLEN128-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 629; VLEN128-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 630; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 631; VLEN128-NEXT: store <2 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP5]], align 8 632; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 633; VLEN128-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 634; VLEN128-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]] 635; VLEN128: middle.block: 636; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 637; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 638; VLEN128: scalar.ph: 639; VLEN128-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 640; VLEN128-NEXT: br label [[FOR_BODY:%.*]] 641; VLEN128: for.body: 642; VLEN128-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 643; VLEN128-NEXT: store i64 [[V]], ptr [[B]], align 8 644; VLEN128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 645; VLEN128-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 646; VLEN128-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 647; VLEN128-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 648; VLEN128-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]] 649; VLEN128: for.end: 650; VLEN128-NEXT: ret void 651; 652entry: 653 br label %for.body 654 655for.body: 656 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 657 store i64 %v, ptr %b, align 8 658 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv 659 store i64 %v, ptr %arrayidx 660 %iv.next = add nuw nsw i64 %iv, 1 661 %exitcond.not = icmp eq i64 %iv.next, 1024 662 br i1 %exitcond.not, label %for.end, label %for.body 663 664for.end: 665 ret void 666} 667 668define void @uniform_store_unaligned(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %v, i64 %n) { 669; VLENUNK-LABEL: @uniform_store_unaligned( 670; VLENUNK-NEXT: entry: 671; VLENUNK-NEXT: br label [[FOR_BODY:%.*]] 672; VLENUNK: for.body: 673; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 674; VLENUNK-NEXT: store i64 [[V:%.*]], ptr [[B:%.*]], align 1 675; VLENUNK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[IV]] 676; VLENUNK-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 677; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 678; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 679; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] 680; VLENUNK: for.end: 681; VLENUNK-NEXT: ret void 682; 683; VLEN128-LABEL: @uniform_store_unaligned( 684; VLEN128-NEXT: entry: 685; VLEN128-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 686; VLEN128: vector.ph: 687; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[V:%.*]], i32 0 688; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer 689; VLEN128-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[V]], i32 0 690; VLEN128-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer 691; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] 692; VLEN128: vector.body: 693; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 694; VLEN128-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 695; VLEN128-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 696; VLEN128-NEXT: store i64 [[V]], ptr [[B:%.*]], align 1 697; VLEN128-NEXT: store i64 [[V]], ptr [[B]], align 1 698; VLEN128-NEXT: store i64 [[V]], ptr [[B]], align 1 699; VLEN128-NEXT: store i64 [[V]], ptr [[B]], align 1 700; VLEN128-NEXT: [[TMP2:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] 701; VLEN128-NEXT: [[TMP3:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] 702; VLEN128-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 0 703; VLEN128-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP4]], align 8 704; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[TMP2]], i32 2 705; VLEN128-NEXT: store <2 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP5]], align 8 706; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 707; VLEN128-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 708; VLEN128-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]] 709; VLEN128: middle.block: 710; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 711; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 712; VLEN128: scalar.ph: 713; VLEN128-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 714; VLEN128-NEXT: br label [[FOR_BODY:%.*]] 715; VLEN128: for.body: 716; VLEN128-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 717; VLEN128-NEXT: store i64 [[V]], ptr [[B]], align 1 718; VLEN128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 719; VLEN128-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 720; VLEN128-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 721; VLEN128-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 722; VLEN128-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]] 723; VLEN128: for.end: 724; VLEN128-NEXT: ret void 725; 726entry: 727 br label %for.body 728 729for.body: 730 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 731 store i64 %v, ptr %b, align 1 732 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv 733 store i64 %v, ptr %arrayidx 734 %iv.next = add nuw nsw i64 %iv, 1 735 %exitcond.not = icmp eq i64 %iv.next, 1024 736 br i1 %exitcond.not, label %for.end, label %for.body 737 738for.end: 739 ret void 740} 741 742define i64 @uniform_load(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %n) { 743; VLENUNK-LABEL: @uniform_load( 744; VLENUNK-NEXT: entry: 745; VLENUNK-NEXT: br label [[FOR_BODY:%.*]] 746; VLENUNK: for.body: 747; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 748; VLENUNK-NEXT: [[V:%.*]] = load i64, ptr [[B:%.*]], align 8 749; VLENUNK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[IV]] 750; VLENUNK-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 751; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 752; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 753; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] 754; VLENUNK: for.end: 755; VLENUNK-NEXT: [[V_LCSSA:%.*]] = phi i64 [ [[V]], [[FOR_BODY]] ] 756; VLENUNK-NEXT: ret i64 [[V_LCSSA]] 757; 758; VLEN128-LABEL: @uniform_load( 759; VLEN128-NEXT: entry: 760; VLEN128-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 761; VLEN128: vector.ph: 762; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] 763; VLEN128: vector.body: 764; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 765; VLEN128-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 766; VLEN128-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 767; VLEN128-NEXT: [[TMP2:%.*]] = load i64, ptr [[B:%.*]], align 8 768; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i32 0 769; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer 770; VLEN128-NEXT: [[TMP3:%.*]] = load i64, ptr [[B]], align 8 771; VLEN128-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i32 0 772; VLEN128-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer 773; VLEN128-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] 774; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] 775; VLEN128-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 776; VLEN128-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8 777; VLEN128-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 2 778; VLEN128-NEXT: store <2 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP7]], align 8 779; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 780; VLEN128-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 781; VLEN128-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]] 782; VLEN128: middle.block: 783; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 784; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 785; VLEN128: scalar.ph: 786; VLEN128-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 787; VLEN128-NEXT: br label [[FOR_BODY:%.*]] 788; VLEN128: for.body: 789; VLEN128-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 790; VLEN128-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 8 791; VLEN128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 792; VLEN128-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 793; VLEN128-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 794; VLEN128-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 795; VLEN128-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]] 796; VLEN128: for.end: 797; VLEN128-NEXT: [[V_LCSSA:%.*]] = phi i64 [ [[V]], [[FOR_BODY]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ] 798; VLEN128-NEXT: ret i64 [[V_LCSSA]] 799; 800entry: 801 br label %for.body 802 803for.body: 804 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 805 %v = load i64, ptr %b, align 8 806 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv 807 store i64 %v, ptr %arrayidx 808 %iv.next = add nuw nsw i64 %iv, 1 809 %exitcond.not = icmp eq i64 %iv.next, 1024 810 br i1 %exitcond.not, label %for.end, label %for.body 811 812for.end: 813 ret i64 %v 814} 815 816define i64 @uniform_load_unaligned(ptr noalias nocapture %a, ptr noalias nocapture %b, i64 %n) { 817; VLENUNK-LABEL: @uniform_load_unaligned( 818; VLENUNK-NEXT: entry: 819; VLENUNK-NEXT: br label [[FOR_BODY:%.*]] 820; VLENUNK: for.body: 821; VLENUNK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 822; VLENUNK-NEXT: [[V:%.*]] = load i64, ptr [[B:%.*]], align 1 823; VLENUNK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[IV]] 824; VLENUNK-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 825; VLENUNK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 826; VLENUNK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 827; VLENUNK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] 828; VLENUNK: for.end: 829; VLENUNK-NEXT: [[V_LCSSA:%.*]] = phi i64 [ [[V]], [[FOR_BODY]] ] 830; VLENUNK-NEXT: ret i64 [[V_LCSSA]] 831; 832; VLEN128-LABEL: @uniform_load_unaligned( 833; VLEN128-NEXT: entry: 834; VLEN128-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 835; VLEN128: vector.ph: 836; VLEN128-NEXT: br label [[VECTOR_BODY:%.*]] 837; VLEN128: vector.body: 838; VLEN128-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 839; VLEN128-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 840; VLEN128-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 2 841; VLEN128-NEXT: [[TMP2:%.*]] = load i64, ptr [[B:%.*]], align 1 842; VLEN128-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP2]], i32 0 843; VLEN128-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer 844; VLEN128-NEXT: [[TMP3:%.*]] = load i64, ptr [[B]], align 1 845; VLEN128-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[TMP3]], i32 0 846; VLEN128-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer 847; VLEN128-NEXT: [[TMP4:%.*]] = getelementptr inbounds i64, ptr [[A:%.*]], i64 [[TMP0]] 848; VLEN128-NEXT: [[TMP5:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP1]] 849; VLEN128-NEXT: [[TMP6:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 0 850; VLEN128-NEXT: store <2 x i64> [[BROADCAST_SPLAT]], ptr [[TMP6]], align 8 851; VLEN128-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, ptr [[TMP4]], i32 2 852; VLEN128-NEXT: store <2 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP7]], align 8 853; VLEN128-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 854; VLEN128-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 855; VLEN128-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]] 856; VLEN128: middle.block: 857; VLEN128-NEXT: [[CMP_N:%.*]] = icmp eq i64 1024, 1024 858; VLEN128-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 859; VLEN128: scalar.ph: 860; VLEN128-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 861; VLEN128-NEXT: br label [[FOR_BODY:%.*]] 862; VLEN128: for.body: 863; VLEN128-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] 864; VLEN128-NEXT: [[V:%.*]] = load i64, ptr [[B]], align 1 865; VLEN128-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[IV]] 866; VLEN128-NEXT: store i64 [[V]], ptr [[ARRAYIDX]], align 8 867; VLEN128-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 868; VLEN128-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], 1024 869; VLEN128-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]] 870; VLEN128: for.end: 871; VLEN128-NEXT: [[V_LCSSA:%.*]] = phi i64 [ [[V]], [[FOR_BODY]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ] 872; VLEN128-NEXT: ret i64 [[V_LCSSA]] 873; 874entry: 875 br label %for.body 876 877for.body: 878 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] 879 %v = load i64, ptr %b, align 1 880 %arrayidx = getelementptr inbounds i64, ptr %a, i64 %iv 881 store i64 %v, ptr %arrayidx 882 %iv.next = add nuw nsw i64 %iv, 1 883 %exitcond.not = icmp eq i64 %iv.next, 1024 884 br i1 %exitcond.not, label %for.end, label %for.body 885 886for.end: 887 ret i64 %v 888} 889