1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt %s -S -riscv-gather-scatter-lowering -mtriple=riscv64 -mattr=+m,+v | FileCheck %s --check-prefixes=CHECK 3 4%struct.foo = type { i32, i32, i32, i32 } 5 6declare <vscale x 1 x i64> @llvm.stepvector.nxv1i64() 7 8define <vscale x 1 x i64> @gather(ptr %a, i32 %len) { 9; CHECK-LABEL: @gather( 10; CHECK-NEXT: vector.ph: 11; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN:%.*]] to i64 12; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() 13; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 14; CHECK: vector.body: 15; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 16; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ] 17; CHECK-NEXT: [[ACCUM:%.*]] = phi <vscale x 1 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[VECTOR_BODY]] ] 18; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR]], i32 3 19; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() 20; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP1]], i64 16, <vscale x 1 x i1> splat (i1 true), i32 [[TMP2]]) 21; CHECK-NEXT: [[GATHER:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i64> undef, i32 [[TMP2]]) 22; CHECK-NEXT: [[ACCUM_NEXT]] = add <vscale x 1 x i64> [[ACCUM]], [[GATHER]] 23; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP0]] 24; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], [[TMP0]] 25; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i64 [[INDEX_NEXT]], [[WIDE_TRIP_COUNT]] 26; CHECK-NEXT: br i1 [[TMP4]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] 27; CHECK: for.cond.cleanup: 28; CHECK-NEXT: ret <vscale x 1 x i64> [[ACCUM_NEXT]] 29; 30vector.ph: 31 %wide.trip.count = zext i32 %len to i64 32 %0 = tail call i64 @llvm.vscale.i64() 33 %1 = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64() 34 %.splatinsert = insertelement <vscale x 1 x i64> poison, i64 %0, i64 0 35 %.splat = shufflevector <vscale x 1 x i64> %.splatinsert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer 36 br label %vector.body 37 38vector.body: ; preds = %vector.body, %vector.ph 39 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 40 %vec.ind = phi <vscale x 1 x i64> [ %1, %vector.ph ], [ %vec.ind.next, %vector.body ] 41 %accum = phi <vscale x 1 x i64> [ zeroinitializer, %vector.ph ], [ %accum.next, %vector.body ] 42 %2 = getelementptr inbounds %struct.foo, ptr %a, <vscale x 1 x i64> %vec.ind, i32 3 43 %gather = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> %2, i32 8, <vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> undef) 44 %accum.next = add <vscale x 1 x i64> %accum, %gather 45 %index.next = add nuw i64 %index, %0 46 %vec.ind.next = add <vscale x 1 x i64> %vec.ind, %.splat 47 %3 = icmp ne i64 %index.next, %wide.trip.count 48 br i1 %3, label %for.cond.cleanup, label %vector.body 49 50for.cond.cleanup: ; preds = %vector.body 51 ret <vscale x 1 x i64> %accum.next 52} 53 54define <vscale x 1 x i64> @gather_disjoint_or(ptr %a, i64 %len) { 55; CHECK-LABEL: @gather_disjoint_or( 56; CHECK-NEXT: vector.ph: 57; CHECK-NEXT: [[VSCALE:%.*]] = call i64 @llvm.vscale.i64() 58; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 59; CHECK: vector.body: 60; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 61; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 1, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ] 62; CHECK-NEXT: [[ACCUM:%.*]] = phi <vscale x 1 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[VECTOR_BODY]] ] 63; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i64, ptr [[A:%.*]], i64 [[VEC_IND_SCALAR]] 64; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32() 65; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP0]], i64 16, <vscale x 1 x i1> splat (i1 true), i32 [[TMP1]]) 66; CHECK-NEXT: [[GATHER:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> poison, i32 [[TMP1]]) 67; CHECK-NEXT: [[ACCUM_NEXT]] = add <vscale x 1 x i64> [[ACCUM]], [[GATHER]] 68; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[VSCALE]] 69; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], 2 70; CHECK-NEXT: [[EXIT:%.*]] = icmp ne i64 [[INDEX_NEXT]], [[LEN:%.*]] 71; CHECK-NEXT: br i1 [[EXIT]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] 72; CHECK: for.cond.cleanup: 73; CHECK-NEXT: ret <vscale x 1 x i64> [[ACCUM_NEXT]] 74; 75vector.ph: 76 %vscale = call i64 @llvm.vscale.i64() 77 %step = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64() 78 %step.mul2 = shl <vscale x 1 x i64> %step, splat (i64 1) 79 br label %vector.body 80 81vector.body: ; preds = %vector.body, %vector.ph 82 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 83 %vec.ind = phi <vscale x 1 x i64> [ %step.mul2, %vector.ph ], [ %vec.ind.next, %vector.body ] 84 85 %accum = phi <vscale x 1 x i64> [ zeroinitializer, %vector.ph ], [ %accum.next, %vector.body ] 86 87 %vec.ind.or = or disjoint <vscale x 1 x i64> %vec.ind, splat (i64 1) 88 89 %gep = getelementptr i64, ptr %a, <vscale x 1 x i64> %vec.ind.or 90 %gather = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0( 91 <vscale x 1 x ptr> %gep, 92 i32 8, 93 <vscale x 1 x i1> splat (i1 true), 94 <vscale x 1 x i64> poison 95 ) 96 97 %accum.next = add <vscale x 1 x i64> %accum, %gather 98 %index.next = add nuw i64 %index, %vscale 99 %vec.ind.next = add <vscale x 1 x i64> %vec.ind, splat (i64 2) 100 101 %exit = icmp ne i64 %index.next, %len 102 br i1 %exit, label %for.cond.cleanup, label %vector.body 103 104for.cond.cleanup: ; preds = %vector.body 105 ret <vscale x 1 x i64> %accum.next 106} 107 108define <vscale x 1 x i64> @gather_non_invariant_step(ptr %a, ptr %b, i32 %len) { 109; CHECK-LABEL: @gather_non_invariant_step( 110; CHECK-NEXT: vector.ph: 111; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN:%.*]] to i64 112; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 113; CHECK: vector.body: 114; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ] 115; CHECK-NEXT: [[VEC_IND_SCALAR1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR1:%.*]], [[VECTOR_BODY]] ] 116; CHECK-NEXT: [[ACCUM:%.*]] = phi <vscale x 1 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[VECTOR_BODY]] ] 117; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR1]], i32 3 118; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32() 119; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP0]], i64 16, <vscale x 1 x i1> splat (i1 true), i32 [[TMP1]]) 120; CHECK-NEXT: [[GATHER:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i64> undef, i32 [[TMP1]]) 121; CHECK-NEXT: [[ACCUM_NEXT]] = add <vscale x 1 x i64> [[ACCUM]], [[GATHER]] 122; CHECK-NEXT: [[B:%.*]] = getelementptr i64, ptr [[B1:%.*]], i64 [[VEC_IND_SCALAR]] 123; CHECK-NEXT: [[STEP:%.*]] = load i64, ptr [[B]], align 8 124; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add nuw i64 [[VEC_IND_SCALAR]], [[STEP]] 125; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR1]] = add i64 [[VEC_IND_SCALAR1]], [[STEP]] 126; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[VEC_IND_NEXT_SCALAR]], [[WIDE_TRIP_COUNT]] 127; CHECK-NEXT: br i1 [[TMP2]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] 128; CHECK: for.cond.cleanup: 129; CHECK-NEXT: ret <vscale x 1 x i64> [[ACCUM_NEXT]] 130; 131vector.ph: 132 %wide.trip.count = zext i32 %len to i64 133 %1 = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64() 134 br label %vector.body 135 136vector.body: ; preds = %vector.body, %vector.ph 137 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 138 %vec.ind = phi <vscale x 1 x i64> [ %1, %vector.ph ], [ %vec.ind.next, %vector.body ] 139 %accum = phi <vscale x 1 x i64> [ zeroinitializer, %vector.ph ], [ %accum.next, %vector.body ] 140 %2 = getelementptr inbounds %struct.foo, ptr %a, <vscale x 1 x i64> %vec.ind, i32 3 141 %gather = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> %2, i32 8, <vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> undef) 142 %accum.next = add <vscale x 1 x i64> %accum, %gather 143 144 %b.gep = getelementptr i64, ptr %b, i64 %index 145 %step = load i64, ptr %b.gep 146 %index.next = add nuw i64 %index, %step 147 %.splatinsert = insertelement <vscale x 1 x i64> poison, i64 %step, i64 0 148 %.splat = shufflevector <vscale x 1 x i64> %.splatinsert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer 149 %vec.ind.next = add <vscale x 1 x i64> %vec.ind, %.splat 150 %3 = icmp ne i64 %index.next, %wide.trip.count 151 br i1 %3, label %for.cond.cleanup, label %vector.body 152 153for.cond.cleanup: ; preds = %vector.body 154 ret <vscale x 1 x i64> %accum.next 155} 156 157define <vscale x 1 x i64> @gather_non_invariant_step_shl(ptr %a, ptr %b, i32 %len) { 158; CHECK-LABEL: @gather_non_invariant_step_shl( 159; CHECK-NEXT: vector.ph: 160; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN:%.*]] to i64 161; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 162; CHECK: vector.body: 163; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ] 164; CHECK-NEXT: [[VEC_IND_SCALAR1:%.*]] = phi i64 [ 168, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR1:%.*]], [[VECTOR_BODY]] ] 165; CHECK-NEXT: [[ACCUM:%.*]] = phi <vscale x 1 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[VECTOR_BODY]] ] 166; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR1]], i32 3 167; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32() 168; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP0]], i64 64, <vscale x 1 x i1> splat (i1 true), i32 [[TMP1]]) 169; CHECK-NEXT: [[GATHER:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i64> undef, i32 [[TMP1]]) 170; CHECK-NEXT: [[ACCUM_NEXT]] = add <vscale x 1 x i64> [[ACCUM]], [[GATHER]] 171; CHECK-NEXT: [[B:%.*]] = getelementptr i64, ptr [[B1:%.*]], i64 [[VEC_IND_SCALAR]] 172; CHECK-NEXT: [[STEP:%.*]] = load i64, ptr [[B]], align 8 173; CHECK-NEXT: [[STEP1:%.*]] = shl i64 [[STEP]], 2 174; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add nuw i64 [[VEC_IND_SCALAR]], [[STEP]] 175; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR1]] = add i64 [[VEC_IND_SCALAR1]], [[STEP1]] 176; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[VEC_IND_NEXT_SCALAR]], [[WIDE_TRIP_COUNT]] 177; CHECK-NEXT: br i1 [[TMP2]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] 178; CHECK: for.cond.cleanup: 179; CHECK-NEXT: ret <vscale x 1 x i64> [[ACCUM_NEXT]] 180; 181vector.ph: 182 %wide.trip.count = zext i32 %len to i64 183 %1 = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64() 184 br label %vector.body 185 186vector.body: ; preds = %vector.body, %vector.ph 187 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 188 %vec.ind = phi <vscale x 1 x i64> [ %1, %vector.ph ], [ %vec.ind.next, %vector.body ] 189 %accum = phi <vscale x 1 x i64> [ zeroinitializer, %vector.ph ], [ %accum.next, %vector.body ] 190 191 %vec.ind.add = add <vscale x 1 x i64> %vec.ind, splat (i64 42) 192 %vec.ind.shl = shl <vscale x 1 x i64> %vec.ind.add, splat (i64 2) 193 194 %2 = getelementptr inbounds %struct.foo, ptr %a, <vscale x 1 x i64> %vec.ind.shl, i32 3 195 %gather = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> %2, i32 8, <vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> undef) 196 %accum.next = add <vscale x 1 x i64> %accum, %gather 197 198 %b.gep = getelementptr i64, ptr %b, i64 %index 199 %step = load i64, ptr %b.gep 200 %index.next = add nuw i64 %index, %step 201 %.splatinsert = insertelement <vscale x 1 x i64> poison, i64 %step, i64 0 202 %.splat = shufflevector <vscale x 1 x i64> %.splatinsert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer 203 %vec.ind.next = add <vscale x 1 x i64> %vec.ind, %.splat 204 %3 = icmp ne i64 %index.next, %wide.trip.count 205 br i1 %3, label %for.cond.cleanup, label %vector.body 206 207for.cond.cleanup: ; preds = %vector.body 208 ret <vscale x 1 x i64> %accum.next 209} 210 211; Check that the operand of the binary op (%scale.splat in shl) always dominates 212; the existing step value when we're adjusting it. 213define <vscale x 1 x i64> @gather_splat_op_after_step(ptr %a, ptr %b, i32 %len) { 214; CHECK-LABEL: @gather_splat_op_after_step( 215; CHECK-NEXT: vector.ph: 216; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN:%.*]] to i64 217; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() 218; CHECK-NEXT: [[SCALE:%.*]] = load i64, ptr [[B:%.*]], align 8 219; CHECK-NEXT: [[STRIDE:%.*]] = shl i64 1, [[SCALE]] 220; CHECK-NEXT: [[STEP:%.*]] = shl i64 [[TMP0]], [[SCALE]] 221; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[STRIDE]], 16 222; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 223; CHECK: vector.body: 224; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 225; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ] 226; CHECK-NEXT: [[ACCUM:%.*]] = phi <vscale x 1 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[VECTOR_BODY]] ] 227; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR]], i32 3 228; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32() 229; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP2]], i64 [[TMP1]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP3]]) 230; CHECK-NEXT: [[GATHER:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> [[TMP4]], <vscale x 1 x i64> undef, i32 [[TMP3]]) 231; CHECK-NEXT: [[ACCUM_NEXT]] = add <vscale x 1 x i64> [[ACCUM]], [[GATHER]] 232; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP0]] 233; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], [[STEP]] 234; CHECK-NEXT: [[TMP5:%.*]] = icmp ne i64 [[INDEX_NEXT]], [[WIDE_TRIP_COUNT]] 235; CHECK-NEXT: br i1 [[TMP5]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] 236; CHECK: for.cond.cleanup: 237; CHECK-NEXT: ret <vscale x 1 x i64> [[ACCUM_NEXT]] 238; 239vector.ph: 240 %wide.trip.count = zext i32 %len to i64 241 %0 = tail call i64 @llvm.vscale.i64() 242 %1 = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64() 243 %.splatinsert = insertelement <vscale x 1 x i64> poison, i64 %0, i64 0 244 %.splat = shufflevector <vscale x 1 x i64> %.splatinsert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer 245 246 %scale = load i64, ptr %b 247 %scale.head = insertelement <vscale x 1 x i64> poison, i64 %scale, i64 0 248 %scale.splat = shufflevector <vscale x 1 x i64> %scale.head, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer 249 br label %vector.body 250 251vector.body: ; preds = %vector.body, %vector.ph 252 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 253 %vec.ind = phi <vscale x 1 x i64> [ %1, %vector.ph ], [ %vec.ind.next, %vector.body ] 254 %accum = phi <vscale x 1 x i64> [ zeroinitializer, %vector.ph ], [ %accum.next, %vector.body ] 255 %vec.ind.shl = shl <vscale x 1 x i64> %vec.ind, %scale.splat 256 %2 = getelementptr inbounds %struct.foo, ptr %a, <vscale x 1 x i64> %vec.ind.shl, i32 3 257 %gather = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> %2, i32 8, <vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> undef) 258 %accum.next = add <vscale x 1 x i64> %accum, %gather 259 %index.next = add nuw i64 %index, %0 260 %vec.ind.next = add <vscale x 1 x i64> %vec.ind, %.splat 261 %3 = icmp ne i64 %index.next, %wide.trip.count 262 br i1 %3, label %for.cond.cleanup, label %vector.body 263 264for.cond.cleanup: ; preds = %vector.body 265 ret <vscale x 1 x i64> %accum.next 266} 267 268define void @scatter(ptr %a, i32 %len) { 269; CHECK-LABEL: @scatter( 270; CHECK-NEXT: vector.ph: 271; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN:%.*]] to i64 272; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() 273; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 274; CHECK: vector.body: 275; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 276; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ] 277; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR]], i32 3 278; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() 279; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.nxv1i64.p0.i64(<vscale x 1 x i64> zeroinitializer, ptr [[TMP1]], i64 16, <vscale x 1 x i1> splat (i1 true), i32 [[TMP2]]) 280; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP0]] 281; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], [[TMP0]] 282; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[INDEX_NEXT]], [[WIDE_TRIP_COUNT]] 283; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] 284; CHECK: for.cond.cleanup: 285; CHECK-NEXT: ret void 286; 287vector.ph: 288 %wide.trip.count = zext i32 %len to i64 289 %0 = tail call i64 @llvm.vscale.i64() 290 %1 = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64() 291 %.splatinsert = insertelement <vscale x 1 x i64> poison, i64 %0, i64 0 292 %.splat = shufflevector <vscale x 1 x i64> %.splatinsert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer 293 br label %vector.body 294 295vector.body: ; preds = %vector.body, %vector.ph 296 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 297 %vec.ind = phi <vscale x 1 x i64> [ %1, %vector.ph ], [ %vec.ind.next, %vector.body ] 298 %2 = getelementptr inbounds %struct.foo, ptr %a, <vscale x 1 x i64> %vec.ind, i32 3 299 tail call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> zeroinitializer, <vscale x 1 x ptr> %2, i32 8, <vscale x 1 x i1> splat (i1 true)) 300 %index.next = add nuw i64 %index, %0 301 %vec.ind.next = add <vscale x 1 x i64> %vec.ind, %.splat 302 %3 = icmp ne i64 %index.next, %wide.trip.count 303 br i1 %3, label %for.cond.cleanup, label %vector.body 304 305for.cond.cleanup: ; preds = %vector.body 306 ret void 307} 308 309define void @scatter_non_invariant_step(ptr %a, ptr %b, i32 %len) { 310; CHECK-LABEL: @scatter_non_invariant_step( 311; CHECK-NEXT: vector.ph: 312; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN:%.*]] to i64 313; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 314; CHECK: vector.body: 315; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ] 316; CHECK-NEXT: [[VEC_IND_SCALAR1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR1:%.*]], [[VECTOR_BODY]] ] 317; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR1]], i32 3 318; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32() 319; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.nxv1i64.p0.i64(<vscale x 1 x i64> zeroinitializer, ptr [[TMP0]], i64 16, <vscale x 1 x i1> splat (i1 true), i32 [[TMP1]]) 320; CHECK-NEXT: [[B:%.*]] = getelementptr i64, ptr [[B1:%.*]], i64 [[VEC_IND_SCALAR]] 321; CHECK-NEXT: [[STEP:%.*]] = load i64, ptr [[B]], align 8 322; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add nuw i64 [[VEC_IND_SCALAR]], [[STEP]] 323; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR1]] = add i64 [[VEC_IND_SCALAR1]], [[STEP]] 324; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[VEC_IND_NEXT_SCALAR]], [[WIDE_TRIP_COUNT]] 325; CHECK-NEXT: br i1 [[TMP2]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] 326; CHECK: for.cond.cleanup: 327; CHECK-NEXT: ret void 328; 329vector.ph: 330 %wide.trip.count = zext i32 %len to i64 331 %1 = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64() 332 br label %vector.body 333 334vector.body: ; preds = %vector.body, %vector.ph 335 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 336 %vec.ind = phi <vscale x 1 x i64> [ %1, %vector.ph ], [ %vec.ind.next, %vector.body ] 337 %2 = getelementptr inbounds %struct.foo, ptr %a, <vscale x 1 x i64> %vec.ind, i32 3 338 tail call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> zeroinitializer, <vscale x 1 x ptr> %2, i32 8, <vscale x 1 x i1> splat (i1 true)) 339 340 %b.gep = getelementptr i64, ptr %b, i64 %index 341 %step = load i64, ptr %b.gep 342 %index.next = add nuw i64 %index, %step 343 %.splatinsert = insertelement <vscale x 1 x i64> poison, i64 %step, i64 0 344 %.splat = shufflevector <vscale x 1 x i64> %.splatinsert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer 345 %vec.ind.next = add <vscale x 1 x i64> %vec.ind, %.splat 346 %3 = icmp ne i64 %index.next, %wide.trip.count 347 br i1 %3, label %for.cond.cleanup, label %vector.body 348 349for.cond.cleanup: ; preds = %vector.body 350 ret void 351} 352 353define void @scatter_non_invariant_step_add_shl(ptr %a, ptr %b, i32 %len) { 354; CHECK-LABEL: @scatter_non_invariant_step_add_shl( 355; CHECK-NEXT: vector.ph: 356; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN:%.*]] to i64 357; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 358; CHECK: vector.body: 359; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ] 360; CHECK-NEXT: [[VEC_IND_SCALAR1:%.*]] = phi i64 [ 168, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR1:%.*]], [[VECTOR_BODY]] ] 361; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR1]], i32 3 362; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32() 363; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.nxv1i64.p0.i64(<vscale x 1 x i64> zeroinitializer, ptr [[TMP0]], i64 64, <vscale x 1 x i1> splat (i1 true), i32 [[TMP1]]) 364; CHECK-NEXT: [[B:%.*]] = getelementptr i64, ptr [[B1:%.*]], i64 [[VEC_IND_SCALAR]] 365; CHECK-NEXT: [[STEP:%.*]] = load i64, ptr [[B]], align 8 366; CHECK-NEXT: [[STEP1:%.*]] = shl i64 [[STEP]], 2 367; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add nuw i64 [[VEC_IND_SCALAR]], [[STEP]] 368; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR1]] = add i64 [[VEC_IND_SCALAR1]], [[STEP1]] 369; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i64 [[VEC_IND_NEXT_SCALAR]], [[WIDE_TRIP_COUNT]] 370; CHECK-NEXT: br i1 [[TMP2]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] 371; CHECK: for.cond.cleanup: 372; CHECK-NEXT: ret void 373; 374vector.ph: 375 %wide.trip.count = zext i32 %len to i64 376 %1 = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64() 377 br label %vector.body 378 379vector.body: ; preds = %vector.body, %vector.ph 380 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 381 %vec.ind = phi <vscale x 1 x i64> [ %1, %vector.ph ], [ %vec.ind.next, %vector.body ] 382 383 %vec.ind.add = add <vscale x 1 x i64> %vec.ind, splat (i64 42) 384 %vec.ind.shl = shl <vscale x 1 x i64> %vec.ind.add, splat (i64 2) 385 386 %2 = getelementptr inbounds %struct.foo, ptr %a, <vscale x 1 x i64> %vec.ind.shl, i32 3 387 tail call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> zeroinitializer, <vscale x 1 x ptr> %2, i32 8, <vscale x 1 x i1> splat (i1 true)) 388 389 %b.gep = getelementptr i64, ptr %b, i64 %index 390 %step = load i64, ptr %b.gep 391 %index.next = add nuw i64 %index, %step 392 %.splatinsert = insertelement <vscale x 1 x i64> poison, i64 %step, i64 0 393 %.splat = shufflevector <vscale x 1 x i64> %.splatinsert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer 394 %vec.ind.next = add <vscale x 1 x i64> %vec.ind, %.splat 395 %3 = icmp ne i64 %index.next, %wide.trip.count 396 br i1 %3, label %for.cond.cleanup, label %vector.body 397 398for.cond.cleanup: ; preds = %vector.body 399 ret void 400} 401 402define <vscale x 1 x i64> @gather_loopless(ptr %p, i64 %stride) { 403; CHECK-LABEL: @gather_loopless( 404; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[STRIDE:%.*]], 4 405; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() 406; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[P:%.*]], i64 [[TMP1]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP2]]) 407; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i64> poison, i32 [[TMP2]]) 408; CHECK-NEXT: ret <vscale x 1 x i64> [[X]] 409; 410 %step = call <vscale x 1 x i64> @llvm.stepvector.nxv1i64() 411 %splat.insert = insertelement <vscale x 1 x i64> poison, i64 %stride, i64 0 412 %splat = shufflevector <vscale x 1 x i64> %splat.insert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer 413 %offsets = mul <vscale x 1 x i64> %step, %splat 414 %ptrs = getelementptr i32, ptr %p, <vscale x 1 x i64> %offsets 415 %x = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0( 416 <vscale x 1 x ptr> %ptrs, 417 i32 8, 418 <vscale x 1 x i1> splat (i1 1), 419 <vscale x 1 x i64> poison 420 ) 421 ret <vscale x 1 x i64> %x 422} 423 424define <vscale x 1 x i64> @straightline_offset_add(ptr %p, i64 %offset) { 425; CHECK-LABEL: @straightline_offset_add( 426; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 [[OFFSET:%.*]] 427; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() 428; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP1]], i64 4, <vscale x 1 x i1> splat (i1 true), i32 [[TMP2]]) 429; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i64> poison, i32 [[TMP2]]) 430; CHECK-NEXT: ret <vscale x 1 x i64> [[X]] 431; 432 %step = call <vscale x 1 x i64> @llvm.stepvector.nxv1i64() 433 %splat.insert = insertelement <vscale x 1 x i64> poison, i64 %offset, i64 0 434 %splat = shufflevector <vscale x 1 x i64> %splat.insert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer 435 %offsetv = add <vscale x 1 x i64> %step, %splat 436 %ptrs = getelementptr i32, ptr %p, <vscale x 1 x i64> %offsetv 437 %x = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0( 438 <vscale x 1 x ptr> %ptrs, 439 i32 8, 440 <vscale x 1 x i1> splat (i1 1), 441 <vscale x 1 x i64> poison 442 ) 443 ret <vscale x 1 x i64> %x 444} 445 446define <vscale x 1 x i64> @straightline_offset_disjoint_or(ptr %p, i64 %offset) { 447; CHECK-LABEL: @straightline_offset_disjoint_or( 448; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[P:%.*]], i64 1 449; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() 450; CHECK-NEXT: [[TMP3:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP1]], i64 8, <vscale x 1 x i1> splat (i1 true), i32 [[TMP2]]) 451; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> [[TMP3]], <vscale x 1 x i64> poison, i32 [[TMP2]]) 452; CHECK-NEXT: ret <vscale x 1 x i64> [[X]] 453; 454 %step = call <vscale x 1 x i64> @llvm.stepvector.nxv1i64() 455 %step.shl = shl <vscale x 1 x i64> %step, splat (i64 1) 456 %offsetv = or disjoint <vscale x 1 x i64> %step.shl, splat (i64 1) 457 %ptrs = getelementptr i32, ptr %p, <vscale x 1 x i64> %offsetv 458 %x = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0( 459 <vscale x 1 x ptr> %ptrs, 460 i32 8, 461 <vscale x 1 x i1> splat (i1 true), 462 <vscale x 1 x i64> poison 463 ) 464 ret <vscale x 1 x i64> %x 465} 466 467define <vscale x 1 x i64> @straightline_offset_shl(ptr %p) { 468; CHECK-LABEL: @straightline_offset_shl( 469; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32() 470; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[P:%.*]], i64 32, <vscale x 1 x i1> splat (i1 true), i32 [[TMP1]]) 471; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> poison, i32 [[TMP1]]) 472; CHECK-NEXT: ret <vscale x 1 x i64> [[X]] 473; 474 %step = call <vscale x 1 x i64> @llvm.stepvector.nxv1i64() 475 %offset = shl <vscale x 1 x i64> %step, splat (i64 3) 476 %ptrs = getelementptr i32, ptr %p, <vscale x 1 x i64> %offset 477 %x = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0( 478 <vscale x 1 x ptr> %ptrs, 479 i32 8, 480 <vscale x 1 x i1> splat (i1 1), 481 <vscale x 1 x i64> poison 482 ) 483 ret <vscale x 1 x i64> %x 484} 485 486define <vscale x 1 x i64> @neg_shl_is_not_commutative(ptr %p) { 487; CHECK-LABEL: @neg_shl_is_not_commutative( 488; CHECK-NEXT: [[STEP:%.*]] = call <vscale x 1 x i64> @llvm.stepvector.nxv1i64() 489; CHECK-NEXT: [[SPLAT_INSERT:%.*]] = insertelement <vscale x 1 x i64> poison, i64 3, i64 0 490; CHECK-NEXT: [[SPLAT:%.*]] = shufflevector <vscale x 1 x i64> [[SPLAT_INSERT]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer 491; CHECK-NEXT: [[OFFSET:%.*]] = shl <vscale x 1 x i64> [[SPLAT]], [[STEP]] 492; CHECK-NEXT: [[PTRS:%.*]] = getelementptr i32, ptr [[P:%.*]], <vscale x 1 x i64> [[OFFSET]] 493; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> [[PTRS]], i32 8, <vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> poison) 494; CHECK-NEXT: ret <vscale x 1 x i64> [[X]] 495; 496 %step = call <vscale x 1 x i64> @llvm.stepvector.nxv1i64() 497 %splat.insert = insertelement <vscale x 1 x i64> poison, i64 3, i64 0 498 %splat = shufflevector <vscale x 1 x i64> %splat.insert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer 499 %offset = shl <vscale x 1 x i64> %splat, %step 500 %ptrs = getelementptr i32, ptr %p, <vscale x 1 x i64> %offset 501 %x = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0( 502 <vscale x 1 x ptr> %ptrs, 503 i32 8, 504 <vscale x 1 x i1> splat (i1 1), 505 <vscale x 1 x i64> poison 506 ) 507 ret <vscale x 1 x i64> %x 508} 509 510define <vscale x 1 x i64> @straightline_offset_shl_nonc(ptr %p, i64 %shift) { 511; CHECK-LABEL: @straightline_offset_shl_nonc( 512; CHECK-NEXT: [[TMP1:%.*]] = shl i64 1, [[SHIFT:%.*]] 513; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 514; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vscale.i32() 515; CHECK-NEXT: [[TMP4:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[P:%.*]], i64 [[TMP2]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP3]]) 516; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> [[TMP4]], <vscale x 1 x i64> poison, i32 [[TMP3]]) 517; CHECK-NEXT: ret <vscale x 1 x i64> [[X]] 518; 519 %step = call <vscale x 1 x i64> @llvm.stepvector.nxv1i64() 520 %splat.insert = insertelement <vscale x 1 x i64> poison, i64 %shift, i64 0 521 %splat = shufflevector <vscale x 1 x i64> %splat.insert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer 522 %offset = shl <vscale x 1 x i64> %step, %splat 523 %ptrs = getelementptr i32, ptr %p, <vscale x 1 x i64> %offset 524 %x = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0( 525 <vscale x 1 x ptr> %ptrs, 526 i32 8, 527 <vscale x 1 x i1> splat (i1 1), 528 <vscale x 1 x i64> poison 529 ) 530 ret <vscale x 1 x i64> %x 531} 532 533define void @scatter_loopless(<vscale x 1 x i64> %x, ptr %p, i64 %stride) { 534; CHECK-LABEL: @scatter_loopless( 535; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[STRIDE:%.*]], 4 536; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() 537; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.nxv1i64.p0.i64(<vscale x 1 x i64> [[X:%.*]], ptr [[P:%.*]], i64 [[TMP1]], <vscale x 1 x i1> splat (i1 true), i32 [[TMP2]]) 538; CHECK-NEXT: ret void 539; 540 %step = call <vscale x 1 x i64> @llvm.stepvector.nxv1i64() 541 %splat.insert = insertelement <vscale x 1 x i64> poison, i64 %stride, i64 0 542 %splat = shufflevector <vscale x 1 x i64> %splat.insert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer 543 %offsets = mul <vscale x 1 x i64> %step, %splat 544 %ptrs = getelementptr i32, ptr %p, <vscale x 1 x i64> %offsets 545 call void @llvm.masked.scatter.nxv1i64.nxv1p0( 546 <vscale x 1 x i64> %x, 547 <vscale x 1 x ptr> %ptrs, 548 i32 8, 549 <vscale x 1 x i1> splat (i1 1) 550 ) 551 ret void 552} 553 554; We previously crashed expecting a constant to be fixed length. 555define void @constant_stride(<vscale x 1 x i64> %x, ptr %p, i64 %stride) { 556; CHECK-LABEL: @constant_stride( 557; CHECK-NEXT: [[PTRS:%.*]] = getelementptr i32, ptr [[P:%.*]], <vscale x 1 x i64> zeroinitializer 558; CHECK-NEXT: call void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64> [[X:%.*]], <vscale x 1 x ptr> [[PTRS]], i32 8, <vscale x 1 x i1> splat (i1 true)) 559; CHECK-NEXT: ret void 560; 561 %ptrs = getelementptr i32, ptr %p, <vscale x 1 x i64> zeroinitializer 562 call void @llvm.masked.scatter.nxv1i64.nxv1p0( 563 <vscale x 1 x i64> %x, 564 <vscale x 1 x ptr> %ptrs, 565 i32 8, 566 <vscale x 1 x i1> splat (i1 1) 567 ) 568 ret void 569} 570 571define <vscale x 1 x i64> @vector_base_scalar_offset(ptr %p, i64 %offset) { 572; CHECK-LABEL: @vector_base_scalar_offset( 573; CHECK-NEXT: [[PTRS2OFFSET:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET:%.*]] 574; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32() 575; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[PTRS2OFFSET]], i64 8, <vscale x 1 x i1> splat (i1 true), i32 [[TMP1]]) 576; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> poison, i32 [[TMP1]]) 577; CHECK-NEXT: ret <vscale x 1 x i64> [[X]] 578; 579 %step = call <vscale x 1 x i64> @llvm.stepvector.nxv1i64() 580 %ptrs1 = getelementptr i64, ptr %p, <vscale x 1 x i64> %step 581 %ptrs2 = getelementptr i64, <vscale x 1 x ptr> %ptrs1, i64 %offset 582 %x = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0( 583 <vscale x 1 x ptr> %ptrs2, 584 i32 8, 585 <vscale x 1 x i1> splat (i1 1), 586 <vscale x 1 x i64> poison 587 ) 588 ret <vscale x 1 x i64> %x 589} 590 591define <vscale x 1 x i64> @splat_base_scalar_offset(ptr %p, i64 %offset) { 592; CHECK-LABEL: @splat_base_scalar_offset( 593; CHECK-NEXT: [[PTRSOFFSET:%.*]] = getelementptr i64, ptr [[P:%.*]], i64 [[OFFSET:%.*]] 594; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32() 595; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[PTRSOFFSET]], i64 0, <vscale x 1 x i1> splat (i1 true), i32 [[TMP1]]) 596; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.vp.select.nxv1i64(<vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> [[TMP2]], <vscale x 1 x i64> poison, i32 [[TMP1]]) 597; CHECK-NEXT: ret <vscale x 1 x i64> [[X]] 598; 599 %head = insertelement <vscale x 1 x ptr> poison, ptr %p, i32 0 600 %splat = shufflevector <vscale x 1 x ptr> %head, <vscale x 1 x ptr> poison, <vscale x 1 x i32> zeroinitializer 601 %ptrs = getelementptr i64, <vscale x 1 x ptr> %splat, i64 %offset 602 %x = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0( 603 <vscale x 1 x ptr> %ptrs, 604 i32 8, 605 <vscale x 1 x i1> splat (i1 1), 606 <vscale x 1 x i64> poison 607 ) 608 ret <vscale x 1 x i64> %x 609} 610 611; We shouldn't be able to determine a stride here. 612define <vscale x 1 x i64> @nonstrided_base_scalar_offset(ptr %p, <vscale x 1 x i64> %v, i64 %offset) { 613; CHECK-LABEL: @nonstrided_base_scalar_offset( 614; CHECK-NEXT: [[PTRS1:%.*]] = getelementptr i64, ptr [[P:%.*]], <vscale x 1 x i64> [[V:%.*]] 615; CHECK-NEXT: [[PTRS2:%.*]] = getelementptr i64, <vscale x 1 x ptr> [[PTRS1]], i64 [[OFFSET:%.*]] 616; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> [[PTRS2]], i32 8, <vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> poison) 617; CHECK-NEXT: ret <vscale x 1 x i64> [[X]] 618; 619 %ptrs1 = getelementptr i64, ptr %p, <vscale x 1 x i64> %v 620 %ptrs2 = getelementptr i64, <vscale x 1 x ptr> %ptrs1, i64 %offset 621 %x = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0( 622 <vscale x 1 x ptr> %ptrs2, 623 i32 8, 624 <vscale x 1 x i1> splat (i1 1), 625 <vscale x 1 x i64> poison 626 ) 627 ret <vscale x 1 x i64> %x 628} 629 630; We shouldn't be able to determine a scalar base here. 631define <vscale x 1 x i64> @vector_base_vector_offset(ptr %p, <vscale x 1 x i64> %offset) { 632; CHECK-LABEL: @vector_base_vector_offset( 633; CHECK-NEXT: [[STEP:%.*]] = call <vscale x 1 x i64> @llvm.stepvector.nxv1i64() 634; CHECK-NEXT: [[PTRS1:%.*]] = getelementptr i64, ptr [[P:%.*]], <vscale x 1 x i64> [[STEP]] 635; CHECK-NEXT: [[PTRS2:%.*]] = getelementptr i64, <vscale x 1 x ptr> [[PTRS1]], <vscale x 1 x i64> [[OFFSET:%.*]] 636; CHECK-NEXT: [[X:%.*]] = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr> [[PTRS2]], i32 8, <vscale x 1 x i1> splat (i1 true), <vscale x 1 x i64> poison) 637; CHECK-NEXT: ret <vscale x 1 x i64> [[X]] 638; 639 %step = call <vscale x 1 x i64> @llvm.stepvector.nxv1i64() 640 %ptrs1 = getelementptr i64, ptr %p, <vscale x 1 x i64> %step 641 %ptrs2 = getelementptr i64, <vscale x 1 x ptr> %ptrs1, <vscale x 1 x i64> %offset 642 %x = call <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0( 643 <vscale x 1 x ptr> %ptrs2, 644 i32 8, 645 <vscale x 1 x i1> splat (i1 1), 646 <vscale x 1 x i64> poison 647 ) 648 ret <vscale x 1 x i64> %x 649} 650 651declare i64 @llvm.vscale.i64() 652declare void @llvm.masked.scatter.nxv1i64.nxv1p0(<vscale x 1 x i64>, <vscale x 1 x ptr>, i32, <vscale x 1 x i1>) 653declare <vscale x 1 x i64> @llvm.masked.gather.nxv1i64.nxv1p0(<vscale x 1 x ptr>, i32, <vscale x 1 x i1>, <vscale x 1 x i64>) 654 655 656define <vscale x 1 x i64> @vp_gather(ptr %a, i32 %len) { 657; CHECK-LABEL: @vp_gather( 658; CHECK-NEXT: vector.ph: 659; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN:%.*]] to i64 660; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() 661; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 662; CHECK: vector.body: 663; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ] 664; CHECK-NEXT: [[VEC_IND_SCALAR1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR1:%.*]], [[VECTOR_BODY]] ] 665; CHECK-NEXT: [[ACCUM:%.*]] = phi <vscale x 1 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[VECTOR_BODY]] ] 666; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR1]], i32 3 667; CHECK-NEXT: [[GATHER:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP2]], i64 16, <vscale x 1 x i1> splat (i1 true), i32 42) 668; CHECK-NEXT: [[ACCUM_NEXT]] = add <vscale x 1 x i64> [[ACCUM]], [[GATHER]] 669; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add nuw i64 [[VEC_IND_SCALAR]], [[TMP0]] 670; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR1]] = add i64 [[VEC_IND_SCALAR1]], [[TMP0]] 671; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VEC_IND_NEXT_SCALAR]], [[WIDE_TRIP_COUNT]] 672; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] 673; CHECK: for.cond.cleanup: 674; CHECK-NEXT: ret <vscale x 1 x i64> [[ACCUM_NEXT]] 675; 676vector.ph: 677 %wide.trip.count = zext i32 %len to i64 678 %0 = tail call i64 @llvm.vscale.i64() 679 %1 = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64() 680 %.splatinsert = insertelement <vscale x 1 x i64> poison, i64 %0, i64 0 681 %.splat = shufflevector <vscale x 1 x i64> %.splatinsert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer 682 br label %vector.body 683 684vector.body: ; preds = %vector.body, %vector.ph 685 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 686 %vec.ind = phi <vscale x 1 x i64> [ %1, %vector.ph ], [ %vec.ind.next, %vector.body ] 687 %accum = phi <vscale x 1 x i64> [ zeroinitializer, %vector.ph ], [ %accum.next, %vector.body ] 688 %2 = getelementptr inbounds %struct.foo, ptr %a, <vscale x 1 x i64> %vec.ind, i32 3 689 %gather = call <vscale x 1 x i64> @llvm.vp.gather(<vscale x 1 x ptr> %2, <vscale x 1 x i1> splat (i1 true), i32 42) 690 %accum.next = add <vscale x 1 x i64> %accum, %gather 691 %index.next = add nuw i64 %index, %0 692 %vec.ind.next = add <vscale x 1 x i64> %vec.ind, %.splat 693 %3 = icmp ne i64 %index.next, %wide.trip.count 694 br i1 %3, label %for.cond.cleanup, label %vector.body 695 696for.cond.cleanup: ; preds = %vector.body 697 ret <vscale x 1 x i64> %accum.next 698} 699 700define void @vp_scatter(ptr %a, i32 %len) { 701; CHECK-LABEL: @vp_scatter( 702; CHECK-NEXT: vector.ph: 703; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN:%.*]] to i64 704; CHECK-NEXT: [[TMP0:%.*]] = tail call i64 @llvm.vscale.i64() 705; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 706; CHECK: vector.body: 707; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ] 708; CHECK-NEXT: [[VEC_IND_SCALAR1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR1:%.*]], [[VECTOR_BODY]] ] 709; CHECK-NEXT: [[TMP2:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR1]], i32 3 710; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.nxv1i64.p0.i64(<vscale x 1 x i64> zeroinitializer, ptr [[TMP2]], i64 16, <vscale x 1 x i1> splat (i1 true), i32 42) 711; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add nuw i64 [[VEC_IND_SCALAR]], [[TMP0]] 712; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR1]] = add i64 [[VEC_IND_SCALAR1]], [[TMP0]] 713; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VEC_IND_NEXT_SCALAR]], [[WIDE_TRIP_COUNT]] 714; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] 715; CHECK: for.cond.cleanup: 716; CHECK-NEXT: ret void 717; 718vector.ph: 719 %wide.trip.count = zext i32 %len to i64 720 %0 = tail call i64 @llvm.vscale.i64() 721 %1 = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64() 722 %.splatinsert = insertelement <vscale x 1 x i64> poison, i64 %0, i64 0 723 %.splat = shufflevector <vscale x 1 x i64> %.splatinsert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer 724 br label %vector.body 725 726vector.body: ; preds = %vector.body, %vector.ph 727 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 728 %vec.ind = phi <vscale x 1 x i64> [ %1, %vector.ph ], [ %vec.ind.next, %vector.body ] 729 %2 = getelementptr inbounds %struct.foo, ptr %a, <vscale x 1 x i64> %vec.ind, i32 3 730 tail call void @llvm.vp.scatter(<vscale x 1 x i64> zeroinitializer, <vscale x 1 x ptr> %2, <vscale x 1 x i1> splat (i1 true), i32 42) 731 %index.next = add nuw i64 %index, %0 732 %vec.ind.next = add <vscale x 1 x i64> %vec.ind, %.splat 733 %3 = icmp ne i64 %index.next, %wide.trip.count 734 br i1 %3, label %for.cond.cleanup, label %vector.body 735 736for.cond.cleanup: ; preds = %vector.body 737 ret void 738} 739 740; Test that reflects what the loop vectorizer will generate for an EVL tail 741; folded loop 742 743define <vscale x 1 x i64> @evl_gather(ptr %a, i32 %len) { 744; CHECK-LABEL: @evl_gather( 745; CHECK-NEXT: vector.ph: 746; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN:%.*]] to i64 747; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 748; CHECK: vector.body: 749; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ] 750; CHECK-NEXT: [[VEC_IND_SCALAR1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR1:%.*]], [[VECTOR_BODY]] ] 751; CHECK-NEXT: [[ACCUM:%.*]] = phi <vscale x 1 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[VECTOR_BODY]] ] 752; CHECK-NEXT: [[ELEMS:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[VEC_IND_SCALAR]] 753; CHECK-NEXT: [[EVL:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[ELEMS]], i32 1, i1 true) 754; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR1]], i32 3 755; CHECK-NEXT: [[GATHER:%.*]] = call <vscale x 1 x i64> @llvm.experimental.vp.strided.load.nxv1i64.p0.i64(ptr [[TMP0]], i64 16, <vscale x 1 x i1> splat (i1 true), i32 [[EVL]]) 756; CHECK-NEXT: [[ACCUM_NEXT]] = add <vscale x 1 x i64> [[ACCUM]], [[GATHER]] 757; CHECK-NEXT: [[EVL_ZEXT:%.*]] = zext i32 [[EVL]] to i64 758; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add nuw i64 [[VEC_IND_SCALAR]], [[EVL_ZEXT]] 759; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR1]] = add i64 [[VEC_IND_SCALAR1]], [[EVL_ZEXT]] 760; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VEC_IND_NEXT_SCALAR]], [[WIDE_TRIP_COUNT]] 761; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] 762; CHECK: for.cond.cleanup: 763; CHECK-NEXT: ret <vscale x 1 x i64> [[ACCUM_NEXT]] 764; 765vector.ph: 766 %wide.trip.count = zext i32 %len to i64 767 %1 = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64() 768 br label %vector.body 769 770vector.body: ; preds = %vector.body, %vector.ph 771 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 772 %vec.ind = phi <vscale x 1 x i64> [ %1, %vector.ph ], [ %vec.ind.next, %vector.body ] 773 %accum = phi <vscale x 1 x i64> [ zeroinitializer, %vector.ph ], [ %accum.next, %vector.body ] 774 775 %elems = sub i64 %wide.trip.count, %index 776 %evl = call i32 @llvm.experimental.get.vector.length.i64(i64 %elems, i32 1, i1 true) 777 778 %2 = getelementptr inbounds %struct.foo, ptr %a, <vscale x 1 x i64> %vec.ind, i32 3 779 %gather = call <vscale x 1 x i64> @llvm.vp.gather(<vscale x 1 x ptr> %2, <vscale x 1 x i1> splat (i1 true), i32 %evl) 780 %accum.next = add <vscale x 1 x i64> %accum, %gather 781 782 %evl.zext = zext i32 %evl to i64 783 %index.next = add nuw i64 %index, %evl.zext 784 %evl.splatinsert = insertelement <vscale x 1 x i64> poison, i64 %evl.zext, i64 0 785 %evl.splat = shufflevector <vscale x 1 x i64> %evl.splatinsert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer 786 %vec.ind.next = add <vscale x 1 x i64> %vec.ind, %evl.splat 787 %3 = icmp ne i64 %index.next, %wide.trip.count 788 br i1 %3, label %for.cond.cleanup, label %vector.body 789 790for.cond.cleanup: ; preds = %vector.body 791 ret <vscale x 1 x i64> %accum.next 792} 793 794; Test that reflects what the loop vectorizer will generate for an EVL tail 795; folded loop 796 797define void @evl_scatter(ptr %a, i32 %len) { 798; CHECK-LABEL: @evl_scatter( 799; CHECK-NEXT: vector.ph: 800; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[LEN:%.*]] to i64 801; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 802; CHECK: vector.body: 803; CHECK-NEXT: [[VEC_IND_SCALAR1:%.*]] = phi i64 [ 0, [[VECTOR_PH:%.*]] ], [ [[VEC_IND_NEXT_SCALAR1:%.*]], [[VECTOR_BODY]] ] 804; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ] 805; CHECK-NEXT: [[ELEMS:%.*]] = sub i64 [[WIDE_TRIP_COUNT]], [[VEC_IND_SCALAR1]] 806; CHECK-NEXT: [[EVL:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[ELEMS]], i32 1, i1 true) 807; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[A:%.*]], i64 [[VEC_IND_SCALAR]], i32 3 808; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.nxv1i64.p0.i64(<vscale x 1 x i64> zeroinitializer, ptr [[TMP0]], i64 16, <vscale x 1 x i1> splat (i1 true), i32 [[EVL]]) 809; CHECK-NEXT: [[EVL_ZEXT:%.*]] = zext i32 [[EVL]] to i64 810; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR1]] = add nuw i64 [[VEC_IND_SCALAR1]], [[EVL_ZEXT]] 811; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], [[EVL_ZEXT]] 812; CHECK-NEXT: [[TMP3:%.*]] = icmp ne i64 [[VEC_IND_NEXT_SCALAR1]], [[WIDE_TRIP_COUNT]] 813; CHECK-NEXT: br i1 [[TMP3]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] 814; CHECK: for.cond.cleanup: 815; CHECK-NEXT: ret void 816; 817vector.ph: 818 %wide.trip.count = zext i32 %len to i64 819 %1 = tail call <vscale x 1 x i64> @llvm.stepvector.nxv1i64() 820 br label %vector.body 821 822vector.body: ; preds = %vector.body, %vector.ph 823 %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] 824 %vec.ind = phi <vscale x 1 x i64> [ %1, %vector.ph ], [ %vec.ind.next, %vector.body ] 825 826 %elems = sub i64 %wide.trip.count, %index 827 %evl = call i32 @llvm.experimental.get.vector.length.i64(i64 %elems, i32 1, i1 true) 828 829 %2 = getelementptr inbounds %struct.foo, ptr %a, <vscale x 1 x i64> %vec.ind, i32 3 830 tail call void @llvm.vp.scatter(<vscale x 1 x i64> zeroinitializer, <vscale x 1 x ptr> %2, <vscale x 1 x i1> splat (i1 true), i32 %evl) 831 832 %evl.zext = zext i32 %evl to i64 833 %index.next = add nuw i64 %index, %evl.zext 834 %evl.splatinsert = insertelement <vscale x 1 x i64> poison, i64 %evl.zext, i64 0 835 %evl.splat = shufflevector <vscale x 1 x i64> %evl.splatinsert, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer 836 %vec.ind.next = add <vscale x 1 x i64> %vec.ind, %evl.splat 837 %3 = icmp ne i64 %index.next, %wide.trip.count 838 br i1 %3, label %for.cond.cleanup, label %vector.body 839 840for.cond.cleanup: ; preds = %vector.body 841 ret void 842} 843