1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt %s -S -riscv-gather-scatter-lowering -mtriple=riscv64 -mattr=+m,+v,+zvl256b | FileCheck %s --check-prefixes=CHECK,V 3; RUN: opt %s -S -riscv-gather-scatter-lowering -mtriple=riscv64 -mattr=+m,+f,+zve32f,+zvl256b | FileCheck %s --check-prefixes=CHECK,ZVE32F 4 5%struct.foo = type { i32, i32, i32, i32 } 6 7; void gather(signed char * __restrict A, signed char * __restrict B) { 8; for (int i = 0; i != 1024; ++i) 9; A[i] += B[i * 5]; 10; } 11define void @gather(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) { 12; CHECK-LABEL: @gather( 13; CHECK-NEXT: entry: 14; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 15; CHECK: vector.body: 16; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 17; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ] 18; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]] 19; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.experimental.vp.strided.load.v32i8.p0.i64(ptr [[TMP0]], i64 5, <32 x i1> splat (i1 true), i32 32) 20; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.vp.select.v32i8(<32 x i1> splat (i1 true), <32 x i8> [[TMP1]], <32 x i8> undef, i32 32) 21; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] 22; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I2]], align 1 23; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]] 24; CHECK-NEXT: store <32 x i8> [[I4]], ptr [[I2]], align 1 25; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 26; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], 160 27; CHECK-NEXT: [[I6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 28; CHECK-NEXT: br i1 [[I6]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] 29; CHECK: for.cond.cleanup: 30; CHECK-NEXT: ret void 31; 32entry: 33 br label %vector.body 34 35vector.body: ; preds = %vector.body, %entry 36 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 37 %vec.ind = phi <32 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15, i64 16, i64 17, i64 18, i64 19, i64 20, i64 21, i64 22, i64 23, i64 24, i64 25, i64 26, i64 27, i64 28, i64 29, i64 30, i64 31>, %entry ], [ %vec.ind.next, %vector.body ] 38 %i = mul nuw nsw <32 x i64> %vec.ind, splat (i64 5) 39 %i1 = getelementptr inbounds i8, ptr %B, <32 x i64> %i 40 %wide.masked.gather = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> %i1, i32 1, <32 x i1> splat (i1 true), <32 x i8> undef) 41 %i2 = getelementptr inbounds i8, ptr %A, i64 %index 42 %wide.load = load <32 x i8>, ptr %i2, align 1 43 %i4 = add <32 x i8> %wide.load, %wide.masked.gather 44 store <32 x i8> %i4, ptr %i2, align 1 45 %index.next = add nuw i64 %index, 32 46 %vec.ind.next = add <32 x i64> %vec.ind, splat (i64 32) 47 %i6 = icmp eq i64 %index.next, 1024 48 br i1 %i6, label %for.cond.cleanup, label %vector.body 49 50for.cond.cleanup: ; preds = %vector.body 51 ret void 52} 53 54define void @gather_masked(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, <32 x i8> %maskedoff) { 55; CHECK-LABEL: @gather_masked( 56; CHECK-NEXT: entry: 57; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 58; CHECK: vector.body: 59; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 60; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ] 61; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]] 62; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.experimental.vp.strided.load.v32i8.p0.i64(ptr [[TMP0]], i64 5, <32 x i1> <i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>, i32 32) 63; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.vp.select.v32i8(<32 x i1> <i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>, <32 x i8> [[TMP1]], <32 x i8> [[MASKEDOFF:%.*]], i32 32) 64; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] 65; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I2]], align 1 66; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]] 67; CHECK-NEXT: store <32 x i8> [[I4]], ptr [[I2]], align 1 68; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 69; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], 160 70; CHECK-NEXT: [[I6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 71; CHECK-NEXT: br i1 [[I6]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] 72; CHECK: for.cond.cleanup: 73; CHECK-NEXT: ret void 74; 75entry: 76 br label %vector.body 77 78vector.body: ; preds = %vector.body, %entry 79 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 80 %vec.ind = phi <32 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15, i64 16, i64 17, i64 18, i64 19, i64 20, i64 21, i64 22, i64 23, i64 24, i64 25, i64 26, i64 27, i64 28, i64 29, i64 30, i64 31>, %entry ], [ %vec.ind.next, %vector.body ] 81 %i = mul nuw nsw <32 x i64> %vec.ind, splat (i64 5) 82 %i1 = getelementptr inbounds i8, ptr %B, <32 x i64> %i 83 %wide.masked.gather = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> %i1, i32 1, <32 x i1> <i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>, <32 x i8> %maskedoff) 84 %i2 = getelementptr inbounds i8, ptr %A, i64 %index 85 %wide.load = load <32 x i8>, ptr %i2, align 1 86 %i4 = add <32 x i8> %wide.load, %wide.masked.gather 87 store <32 x i8> %i4, ptr %i2, align 1 88 %index.next = add nuw i64 %index, 32 89 %vec.ind.next = add <32 x i64> %vec.ind, splat (i64 32) 90 %i6 = icmp eq i64 %index.next, 1024 91 br i1 %i6, label %for.cond.cleanup, label %vector.body 92 93for.cond.cleanup: ; preds = %vector.body 94 ret void 95} 96 97define void @gather_negative_stride(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) { 98; CHECK-LABEL: @gather_negative_stride( 99; CHECK-NEXT: entry: 100; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 101; CHECK: vector.body: 102; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 103; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 155, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ] 104; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]] 105; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.experimental.vp.strided.load.v32i8.p0.i64(ptr [[TMP0]], i64 -5, <32 x i1> splat (i1 true), i32 32) 106; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.vp.select.v32i8(<32 x i1> splat (i1 true), <32 x i8> [[TMP1]], <32 x i8> undef, i32 32) 107; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] 108; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I2]], align 1 109; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]] 110; CHECK-NEXT: store <32 x i8> [[I4]], ptr [[I2]], align 1 111; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 112; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], 160 113; CHECK-NEXT: [[I6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 114; CHECK-NEXT: br i1 [[I6]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] 115; CHECK: for.cond.cleanup: 116; CHECK-NEXT: ret void 117; 118entry: 119 br label %vector.body 120 121vector.body: ; preds = %vector.body, %entry 122 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 123 %vec.ind = phi <32 x i64> [ <i64 31, i64 30, i64 29, i64 28, i64 27, i64 26, i64 25, i64 24, i64 23, i64 22, i64 21, i64 20, i64 19, i64 18, i64 17, i64 16, i64 15, i64 14, i64 13, i64 12, i64 11, i64 10, i64 9, i64 8, i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>, %entry ], [ %vec.ind.next, %vector.body ] 124 %i = mul nuw nsw <32 x i64> %vec.ind, splat (i64 5) 125 %i1 = getelementptr inbounds i8, ptr %B, <32 x i64> %i 126 %wide.masked.gather = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> %i1, i32 1, <32 x i1> splat (i1 true), <32 x i8> undef) 127 %i2 = getelementptr inbounds i8, ptr %A, i64 %index 128 %wide.load = load <32 x i8>, ptr %i2, align 1 129 %i4 = add <32 x i8> %wide.load, %wide.masked.gather 130 store <32 x i8> %i4, ptr %i2, align 1 131 %index.next = add nuw i64 %index, 32 132 %vec.ind.next = add <32 x i64> %vec.ind, splat (i64 32) 133 %i6 = icmp eq i64 %index.next, 1024 134 br i1 %i6, label %for.cond.cleanup, label %vector.body 135 136for.cond.cleanup: ; preds = %vector.body 137 ret void 138} 139 140define void @gather_zero_stride(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) { 141; CHECK-LABEL: @gather_zero_stride( 142; CHECK-NEXT: entry: 143; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 144; CHECK: vector.body: 145; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 146; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ] 147; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]] 148; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.experimental.vp.strided.load.v32i8.p0.i64(ptr [[TMP0]], i64 0, <32 x i1> splat (i1 true), i32 32) 149; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.vp.select.v32i8(<32 x i1> splat (i1 true), <32 x i8> [[TMP1]], <32 x i8> undef, i32 32) 150; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] 151; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I2]], align 1 152; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]] 153; CHECK-NEXT: store <32 x i8> [[I4]], ptr [[I2]], align 1 154; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 155; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], 160 156; CHECK-NEXT: [[I6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 157; CHECK-NEXT: br i1 [[I6]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] 158; CHECK: for.cond.cleanup: 159; CHECK-NEXT: ret void 160; 161entry: 162 br label %vector.body 163 164vector.body: ; preds = %vector.body, %entry 165 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 166 %vec.ind = phi <32 x i64> [ zeroinitializer, %entry ], [ %vec.ind.next, %vector.body ] 167 %i = mul nuw nsw <32 x i64> %vec.ind, splat (i64 5) 168 %i1 = getelementptr inbounds i8, ptr %B, <32 x i64> %i 169 %wide.masked.gather = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> %i1, i32 1, <32 x i1> splat (i1 true), <32 x i8> undef) 170 %i2 = getelementptr inbounds i8, ptr %A, i64 %index 171 %wide.load = load <32 x i8>, ptr %i2, align 1 172 %i4 = add <32 x i8> %wide.load, %wide.masked.gather 173 store <32 x i8> %i4, ptr %i2, align 1 174 %index.next = add nuw i64 %index, 32 175 %vec.ind.next = add <32 x i64> %vec.ind, splat (i64 32) 176 %i6 = icmp eq i64 %index.next, 1024 177 br i1 %i6, label %for.cond.cleanup, label %vector.body 178 179for.cond.cleanup: ; preds = %vector.body 180 ret void 181} 182 183;void scatter(signed char * __restrict A, signed char * __restrict B) { 184; for (int i = 0; i < 1024; ++i) 185; A[i * 5] += B[i]; 186;} 187define void @scatter(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) { 188; CHECK-LABEL: @scatter( 189; CHECK-NEXT: entry: 190; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 191; CHECK: vector.body: 192; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 193; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ] 194; CHECK-NEXT: [[I:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i64 [[INDEX]] 195; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I]], align 1 196; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[VEC_IND_SCALAR]] 197; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.experimental.vp.strided.load.v32i8.p0.i64(ptr [[TMP0]], i64 5, <32 x i1> splat (i1 true), i32 32) 198; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.vp.select.v32i8(<32 x i1> splat (i1 true), <32 x i8> [[TMP1]], <32 x i8> undef, i32 32) 199; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_MASKED_GATHER]], [[WIDE_LOAD]] 200; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v32i8.p0.i64(<32 x i8> [[I4]], ptr [[TMP0]], i64 5, <32 x i1> splat (i1 true), i32 32) 201; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 202; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], 160 203; CHECK-NEXT: [[I5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 204; CHECK-NEXT: br i1 [[I5]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] 205; CHECK: for.cond.cleanup: 206; CHECK-NEXT: ret void 207; 208entry: 209 br label %vector.body 210 211vector.body: ; preds = %vector.body, %entry 212 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 213 %vec.ind = phi <32 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15, i64 16, i64 17, i64 18, i64 19, i64 20, i64 21, i64 22, i64 23, i64 24, i64 25, i64 26, i64 27, i64 28, i64 29, i64 30, i64 31>, %entry ], [ %vec.ind.next, %vector.body ] 214 %i = getelementptr inbounds i8, ptr %B, i64 %index 215 %wide.load = load <32 x i8>, ptr %i, align 1 216 %i2 = mul nuw nsw <32 x i64> %vec.ind, splat (i64 5) 217 %i3 = getelementptr inbounds i8, ptr %A, <32 x i64> %i2 218 %wide.masked.gather = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> %i3, i32 1, <32 x i1> splat (i1 true), <32 x i8> undef) 219 %i4 = add <32 x i8> %wide.masked.gather, %wide.load 220 call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> %i4, <32 x ptr> %i3, i32 1, <32 x i1> splat (i1 true)) 221 %index.next = add nuw i64 %index, 32 222 %vec.ind.next = add <32 x i64> %vec.ind, splat (i64 32) 223 %i5 = icmp eq i64 %index.next, 1024 224 br i1 %i5, label %for.cond.cleanup, label %vector.body 225 226for.cond.cleanup: ; preds = %vector.body 227 ret void 228} 229 230define void @scatter_masked(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, <32 x i8> %maskedoff) { 231; CHECK-LABEL: @scatter_masked( 232; CHECK-NEXT: entry: 233; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 234; CHECK: vector.body: 235; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 236; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ] 237; CHECK-NEXT: [[I:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i64 [[INDEX]] 238; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I]], align 1 239; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[VEC_IND_SCALAR]] 240; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.experimental.vp.strided.load.v32i8.p0.i64(ptr [[TMP0]], i64 5, <32 x i1> <i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>, i32 32) 241; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.vp.select.v32i8(<32 x i1> <i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>, <32 x i8> [[TMP1]], <32 x i8> [[MASKEDOFF:%.*]], i32 32) 242; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_MASKED_GATHER]], [[WIDE_LOAD]] 243; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v32i8.p0.i64(<32 x i8> [[I4]], ptr [[TMP0]], i64 5, <32 x i1> <i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>, i32 32) 244; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 245; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], 160 246; CHECK-NEXT: [[I5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 247; CHECK-NEXT: br i1 [[I5]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] 248; CHECK: for.cond.cleanup: 249; CHECK-NEXT: ret void 250; 251entry: 252 br label %vector.body 253 254vector.body: ; preds = %vector.body, %entry 255 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 256 %vec.ind = phi <32 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15, i64 16, i64 17, i64 18, i64 19, i64 20, i64 21, i64 22, i64 23, i64 24, i64 25, i64 26, i64 27, i64 28, i64 29, i64 30, i64 31>, %entry ], [ %vec.ind.next, %vector.body ] 257 %i = getelementptr inbounds i8, ptr %B, i64 %index 258 %wide.load = load <32 x i8>, ptr %i, align 1 259 %i2 = mul nuw nsw <32 x i64> %vec.ind, splat (i64 5) 260 %i3 = getelementptr inbounds i8, ptr %A, <32 x i64> %i2 261 %wide.masked.gather = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> %i3, i32 1, <32 x i1> <i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>, <32 x i8> %maskedoff) 262 %i4 = add <32 x i8> %wide.masked.gather, %wide.load 263 call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> %i4, <32 x ptr> %i3, i32 1, <32 x i1> <i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>) 264 %index.next = add nuw i64 %index, 32 265 %vec.ind.next = add <32 x i64> %vec.ind, splat (i64 32) 266 %i5 = icmp eq i64 %index.next, 1024 267 br i1 %i5, label %for.cond.cleanup, label %vector.body 268 269for.cond.cleanup: ; preds = %vector.body 270 ret void 271} 272 273; void gather_pow2(signed char * __restrict A, signed char * __restrict B) { 274; for (int i = 0; i != 1024; ++i) 275; A[i] += B[i * 4]; 276; } 277define void @gather_pow2(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) { 278; CHECK-LABEL: @gather_pow2( 279; CHECK-NEXT: entry: 280; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 281; CHECK: vector.body: 282; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 283; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ] 284; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]] 285; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP0]], i64 16, <8 x i1> splat (i1 true), i32 8) 286; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP1]], <8 x i32> undef, i32 8) 287; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] 288; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[I2]], align 1 289; CHECK-NEXT: [[I4:%.*]] = add <8 x i32> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]] 290; CHECK-NEXT: store <8 x i32> [[I4]], ptr [[I2]], align 1 291; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 292; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], 32 293; CHECK-NEXT: [[I6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 294; CHECK-NEXT: br i1 [[I6]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] 295; CHECK: for.cond.cleanup: 296; CHECK-NEXT: ret void 297; 298entry: 299 br label %vector.body 300 301vector.body: ; preds = %vector.body, %entry 302 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 303 %vec.ind = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %entry ], [ %vec.ind.next, %vector.body ] 304 %i = shl nsw <8 x i64> %vec.ind, splat (i64 2) 305 %i1 = getelementptr inbounds i32, ptr %B, <8 x i64> %i 306 %wide.masked.gather = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %i1, i32 4, <8 x i1> splat (i1 true), <8 x i32> undef) 307 %i2 = getelementptr inbounds i32, ptr %A, i64 %index 308 %wide.load = load <8 x i32>, ptr %i2, align 1 309 %i4 = add <8 x i32> %wide.load, %wide.masked.gather 310 store <8 x i32> %i4, ptr %i2, align 1 311 %index.next = add nuw i64 %index, 8 312 %vec.ind.next = add <8 x i64> %vec.ind, splat (i64 8) 313 %i6 = icmp eq i64 %index.next, 1024 314 br i1 %i6, label %for.cond.cleanup, label %vector.body 315 316for.cond.cleanup: ; preds = %vector.body 317 ret void 318} 319 320define void @gather_unknown_pow2(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, i64 %shift) { 321; CHECK-LABEL: @gather_unknown_pow2( 322; CHECK-NEXT: entry: 323; CHECK-NEXT: [[STRIDE:%.*]] = shl i64 1, [[SHIFT:%.*]] 324; CHECK-NEXT: [[STEP:%.*]] = shl i64 8, [[SHIFT]] 325; CHECK-NEXT: [[TMP0:%.*]] = mul i64 [[STRIDE]], 4 326; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 327; CHECK: vector.body: 328; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 329; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ] 330; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]] 331; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP1]], i64 [[TMP0]], <8 x i1> splat (i1 true), i32 8) 332; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP2]], <8 x i32> undef, i32 8) 333; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] 334; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[I2]], align 1 335; CHECK-NEXT: [[I4:%.*]] = add <8 x i32> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]] 336; CHECK-NEXT: store <8 x i32> [[I4]], ptr [[I2]], align 1 337; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 338; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], [[STEP]] 339; CHECK-NEXT: [[I6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 340; CHECK-NEXT: br i1 [[I6]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] 341; CHECK: for.cond.cleanup: 342; CHECK-NEXT: ret void 343; 344entry: 345 %.splatinsert = insertelement <8 x i64> poison, i64 %shift, i64 0 346 %.splat = shufflevector <8 x i64> %.splatinsert, <8 x i64> poison, <8 x i32> zeroinitializer 347 br label %vector.body 348 349vector.body: ; preds = %vector.body, %entry 350 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 351 %vec.ind = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %entry ], [ %vec.ind.next, %vector.body ] 352 %i = shl nsw <8 x i64> %vec.ind, %.splat 353 %i1 = getelementptr inbounds i32, ptr %B, <8 x i64> %i 354 %wide.masked.gather = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %i1, i32 4, <8 x i1> splat (i1 true), <8 x i32> undef) 355 %i2 = getelementptr inbounds i32, ptr %A, i64 %index 356 %wide.load = load <8 x i32>, ptr %i2, align 1 357 %i4 = add <8 x i32> %wide.load, %wide.masked.gather 358 store <8 x i32> %i4, ptr %i2, align 1 359 %index.next = add nuw i64 %index, 8 360 %vec.ind.next = add <8 x i64> %vec.ind, splat (i64 8) 361 %i6 = icmp eq i64 %index.next, 1024 362 br i1 %i6, label %for.cond.cleanup, label %vector.body 363 364for.cond.cleanup: ; preds = %vector.body 365 ret void 366} 367 368define void @negative_shl_non_commute(ptr noalias nocapture %A, ptr noalias nocapture readonly %B, i64 %shift) { 369; CHECK-LABEL: @negative_shl_non_commute( 370; CHECK-NEXT: entry: 371; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <8 x i64> poison, i64 [[SHIFT:%.*]], i64 0 372; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <8 x i64> [[DOTSPLATINSERT]], <8 x i64> poison, <8 x i32> zeroinitializer 373; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 374; CHECK: vector.body: 375; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 376; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 377; CHECK-NEXT: [[I:%.*]] = shl nsw <8 x i64> [[DOTSPLAT]], [[VEC_IND]] 378; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], <8 x i64> [[I]] 379; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> [[I1]], i32 4, <8 x i1> splat (i1 true), <8 x i32> undef) 380; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] 381; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[I2]], align 1 382; CHECK-NEXT: [[I4:%.*]] = add <8 x i32> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]] 383; CHECK-NEXT: store <8 x i32> [[I4]], ptr [[I2]], align 1 384; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 385; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8) 386; CHECK-NEXT: [[I6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 387; CHECK-NEXT: br i1 [[I6]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] 388; CHECK: for.cond.cleanup: 389; CHECK-NEXT: ret void 390; 391entry: 392 %.splatinsert = insertelement <8 x i64> poison, i64 %shift, i64 0 393 %.splat = shufflevector <8 x i64> %.splatinsert, <8 x i64> poison, <8 x i32> zeroinitializer 394 br label %vector.body 395 396vector.body: ; preds = %vector.body, %entry 397 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 398 %vec.ind = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %entry ], [ %vec.ind.next, %vector.body ] 399 %i = shl nsw <8 x i64> %.splat, %vec.ind 400 %i1 = getelementptr inbounds i32, ptr %B, <8 x i64> %i 401 %wide.masked.gather = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %i1, i32 4, <8 x i1> splat (i1 true), <8 x i32> undef) 402 %i2 = getelementptr inbounds i32, ptr %A, i64 %index 403 %wide.load = load <8 x i32>, ptr %i2, align 1 404 %i4 = add <8 x i32> %wide.load, %wide.masked.gather 405 store <8 x i32> %i4, ptr %i2, align 1 406 %index.next = add nuw i64 %index, 8 407 %vec.ind.next = add <8 x i64> %vec.ind, splat (i64 8) 408 %i6 = icmp eq i64 %index.next, 1024 409 br i1 %i6, label %for.cond.cleanup, label %vector.body 410 411for.cond.cleanup: ; preds = %vector.body 412 ret void 413} 414 415;void scatter_pow2(signed char * __restrict A, signed char * __restrict B) { 416; for (int i = 0; i < 1024; ++i) 417; A[i * 4] += B[i]; 418;} 419define void @scatter_pow2(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) { 420; CHECK-LABEL: @scatter_pow2( 421; CHECK-NEXT: entry: 422; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 423; CHECK: vector.body: 424; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 425; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ] 426; CHECK-NEXT: [[I:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[INDEX]] 427; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[I]], align 1 428; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[VEC_IND_SCALAR]] 429; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP0]], i64 16, <8 x i1> splat (i1 true), i32 8) 430; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP1]], <8 x i32> undef, i32 8) 431; CHECK-NEXT: [[I4:%.*]] = add <8 x i32> [[WIDE_MASKED_GATHER]], [[WIDE_LOAD]] 432; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v8i32.p0.i64(<8 x i32> [[I4]], ptr [[TMP0]], i64 16, <8 x i1> splat (i1 true), i32 8) 433; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 434; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], 32 435; CHECK-NEXT: [[I5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 436; CHECK-NEXT: br i1 [[I5]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] 437; CHECK: for.cond.cleanup: 438; CHECK-NEXT: ret void 439; 440entry: 441 br label %vector.body 442 443vector.body: ; preds = %vector.body, %entry 444 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 445 %vec.ind = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %entry ], [ %vec.ind.next, %vector.body ] 446 %i = getelementptr inbounds i32, ptr %B, i64 %index 447 %wide.load = load <8 x i32>, ptr %i, align 1 448 %i2 = shl nuw nsw <8 x i64> %vec.ind, splat (i64 2) 449 %i3 = getelementptr inbounds i32, ptr %A, <8 x i64> %i2 450 %wide.masked.gather = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %i3, i32 4, <8 x i1> splat (i1 true), <8 x i32> undef) 451 %i4 = add <8 x i32> %wide.masked.gather, %wide.load 452 call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %i4, <8 x ptr> %i3, i32 4, <8 x i1> splat (i1 true)) 453 %index.next = add nuw i64 %index, 8 454 %vec.ind.next = add <8 x i64> %vec.ind, splat (i64 8) 455 %i5 = icmp eq i64 %index.next, 1024 456 br i1 %i5, label %for.cond.cleanup, label %vector.body 457 458for.cond.cleanup: ; preds = %vector.body 459 ret void 460} 461 462;struct foo { 463; int a, b, c, d; 464;}; 465; 466;void struct_gather(int * __restrict A, struct foo * __restrict B) { 467; for (int i = 0; i < 1024; ++i) 468; A[i] += B[i].b; 469;} 470define void @struct_gather(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) { 471; CHECK-LABEL: @struct_gather( 472; CHECK-NEXT: entry: 473; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 474; CHECK: vector.body: 475; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 476; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ] 477; CHECK-NEXT: [[VEC_IND_SCALAR1:%.*]] = phi i64 [ 8, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR2:%.*]], [[VECTOR_BODY]] ] 478; CHECK-NEXT: [[TMP0:%.*]] = getelementptr [[STRUCT_FOO:%.*]], ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]], i32 1 479; CHECK-NEXT: [[TMP1:%.*]] = getelementptr [[STRUCT_FOO]], ptr [[B]], i64 [[VEC_IND_SCALAR1]], i32 1 480; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP0]], i64 16, <8 x i1> splat (i1 true), i32 8) 481; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP2]], <8 x i32> undef, i32 8) 482; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP1]], i64 16, <8 x i1> splat (i1 true), i32 8) 483; CHECK-NEXT: [[WIDE_MASKED_GATHER9:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP3]], <8 x i32> undef, i32 8) 484; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDEX]] 485; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i32>, ptr [[I2]], align 4 486; CHECK-NEXT: [[I4:%.*]] = getelementptr inbounds i32, ptr [[I2]], i64 8 487; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <8 x i32>, ptr [[I4]], align 4 488; CHECK-NEXT: [[I6:%.*]] = add nsw <8 x i32> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]] 489; CHECK-NEXT: [[I7:%.*]] = add nsw <8 x i32> [[WIDE_LOAD10]], [[WIDE_MASKED_GATHER9]] 490; CHECK-NEXT: store <8 x i32> [[I6]], ptr [[I2]], align 4 491; CHECK-NEXT: store <8 x i32> [[I7]], ptr [[I4]], align 4 492; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 493; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], 16 494; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR2]] = add i64 [[VEC_IND_SCALAR1]], 16 495; CHECK-NEXT: [[I10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 496; CHECK-NEXT: br i1 [[I10]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] 497; CHECK: for.cond.cleanup: 498; CHECK-NEXT: ret void 499; 500entry: 501 br label %vector.body 502 503vector.body: ; preds = %vector.body, %entry 504 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 505 %vec.ind = phi <8 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, %entry ], [ %vec.ind.next, %vector.body ] 506 %step.add = add <8 x i64> %vec.ind, splat (i64 8) 507 %i = getelementptr inbounds %struct.foo, ptr %B, <8 x i64> %vec.ind, i32 1 508 %i1 = getelementptr inbounds %struct.foo, ptr %B, <8 x i64> %step.add, i32 1 509 %wide.masked.gather = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %i, i32 4, <8 x i1> splat (i1 true), <8 x i32> undef) 510 %wide.masked.gather9 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %i1, i32 4, <8 x i1> splat (i1 true), <8 x i32> undef) 511 %i2 = getelementptr inbounds i32, ptr %A, i64 %index 512 %wide.load = load <8 x i32>, ptr %i2, align 4 513 %i4 = getelementptr inbounds i32, ptr %i2, i64 8 514 %wide.load10 = load <8 x i32>, ptr %i4, align 4 515 %i6 = add nsw <8 x i32> %wide.load, %wide.masked.gather 516 %i7 = add nsw <8 x i32> %wide.load10, %wide.masked.gather9 517 store <8 x i32> %i6, ptr %i2, align 4 518 store <8 x i32> %i7, ptr %i4, align 4 519 %index.next = add nuw i64 %index, 16 520 %vec.ind.next = add <8 x i64> %vec.ind, splat (i64 16) 521 %i10 = icmp eq i64 %index.next, 1024 522 br i1 %i10, label %for.cond.cleanup, label %vector.body 523 524for.cond.cleanup: ; preds = %vector.body 525 ret void 526} 527 528;void gather_unroll(int * __restrict A, int * __restrict B) { 529; for (int i = 0; i < 1024; i+= 4 ) { 530; A[i] += B[i * 4]; 531; A[i+1] += B[(i+1) * 4]; 532; A[i+2] += B[(i+2) * 4]; 533; A[i+3] += B[(i+3) * 4]; 534; } 535;} 536define void @gather_unroll(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) { 537; CHECK-LABEL: @gather_unroll( 538; CHECK-NEXT: entry: 539; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 540; CHECK: vector.body: 541; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 542; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ] 543; CHECK-NEXT: [[VEC_IND_SCALAR1:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR2:%.*]], [[VECTOR_BODY]] ] 544; CHECK-NEXT: [[VEC_IND_SCALAR3:%.*]] = phi i64 [ 4, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR4:%.*]], [[VECTOR_BODY]] ] 545; CHECK-NEXT: [[VEC_IND_SCALAR5:%.*]] = phi i64 [ 1, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR6:%.*]], [[VECTOR_BODY]] ] 546; CHECK-NEXT: [[VEC_IND_SCALAR7:%.*]] = phi i64 [ 8, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR8:%.*]], [[VECTOR_BODY]] ] 547; CHECK-NEXT: [[VEC_IND_SCALAR9:%.*]] = phi i64 [ 2, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR10:%.*]], [[VECTOR_BODY]] ] 548; CHECK-NEXT: [[VEC_IND_SCALAR11:%.*]] = phi i64 [ 12, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR12:%.*]], [[VECTOR_BODY]] ] 549; CHECK-NEXT: [[VEC_IND_SCALAR13:%.*]] = phi i64 [ 3, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR14:%.*]], [[VECTOR_BODY]] ] 550; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]] 551; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP0]], i64 64, <8 x i1> splat (i1 true), i32 8) 552; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP1]], <8 x i32> undef, i32 8) 553; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[VEC_IND_SCALAR1]] 554; CHECK-NEXT: [[TMP3:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP2]], i64 16, <8 x i1> splat (i1 true), i32 8) 555; CHECK-NEXT: [[WIDE_MASKED_GATHER52:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP3]], <8 x i32> undef, i32 8) 556; CHECK-NEXT: [[I3:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_GATHER52]], [[WIDE_MASKED_GATHER]] 557; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v8i32.p0.i64(<8 x i32> [[I3]], ptr [[TMP2]], i64 16, <8 x i1> splat (i1 true), i32 8) 558; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[B]], i64 [[VEC_IND_SCALAR3]] 559; CHECK-NEXT: [[TMP5:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP4]], i64 64, <8 x i1> splat (i1 true), i32 8) 560; CHECK-NEXT: [[WIDE_MASKED_GATHER53:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP5]], <8 x i32> undef, i32 8) 561; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[A]], i64 [[VEC_IND_SCALAR5]] 562; CHECK-NEXT: [[TMP7:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP6]], i64 16, <8 x i1> splat (i1 true), i32 8) 563; CHECK-NEXT: [[WIDE_MASKED_GATHER54:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP7]], <8 x i32> undef, i32 8) 564; CHECK-NEXT: [[I8:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_GATHER54]], [[WIDE_MASKED_GATHER53]] 565; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v8i32.p0.i64(<8 x i32> [[I8]], ptr [[TMP6]], i64 16, <8 x i1> splat (i1 true), i32 8) 566; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[B]], i64 [[VEC_IND_SCALAR7]] 567; CHECK-NEXT: [[TMP9:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP8]], i64 64, <8 x i1> splat (i1 true), i32 8) 568; CHECK-NEXT: [[WIDE_MASKED_GATHER55:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP9]], <8 x i32> undef, i32 8) 569; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[A]], i64 [[VEC_IND_SCALAR9]] 570; CHECK-NEXT: [[TMP11:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP10]], i64 16, <8 x i1> splat (i1 true), i32 8) 571; CHECK-NEXT: [[WIDE_MASKED_GATHER56:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP11]], <8 x i32> undef, i32 8) 572; CHECK-NEXT: [[I13:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_GATHER56]], [[WIDE_MASKED_GATHER55]] 573; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v8i32.p0.i64(<8 x i32> [[I13]], ptr [[TMP10]], i64 16, <8 x i1> splat (i1 true), i32 8) 574; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[B]], i64 [[VEC_IND_SCALAR11]] 575; CHECK-NEXT: [[TMP13:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP12]], i64 64, <8 x i1> splat (i1 true), i32 8) 576; CHECK-NEXT: [[WIDE_MASKED_GATHER57:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP13]], <8 x i32> undef, i32 8) 577; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[A]], i64 [[VEC_IND_SCALAR13]] 578; CHECK-NEXT: [[TMP15:%.*]] = call <8 x i32> @llvm.experimental.vp.strided.load.v8i32.p0.i64(ptr [[TMP14]], i64 16, <8 x i1> splat (i1 true), i32 8) 579; CHECK-NEXT: [[WIDE_MASKED_GATHER58:%.*]] = call <8 x i32> @llvm.vp.select.v8i32(<8 x i1> splat (i1 true), <8 x i32> [[TMP15]], <8 x i32> undef, i32 8) 580; CHECK-NEXT: [[I18:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_GATHER58]], [[WIDE_MASKED_GATHER57]] 581; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v8i32.p0.i64(<8 x i32> [[I18]], ptr [[TMP14]], i64 16, <8 x i1> splat (i1 true), i32 8) 582; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 583; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add i64 [[VEC_IND_SCALAR]], 128 584; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR2]] = add i64 [[VEC_IND_SCALAR1]], 32 585; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR4]] = add i64 [[VEC_IND_SCALAR3]], 128 586; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR6]] = add i64 [[VEC_IND_SCALAR5]], 32 587; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR8]] = add i64 [[VEC_IND_SCALAR7]], 128 588; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR10]] = add i64 [[VEC_IND_SCALAR9]], 32 589; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR12]] = add i64 [[VEC_IND_SCALAR11]], 128 590; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR14]] = add i64 [[VEC_IND_SCALAR13]], 32 591; CHECK-NEXT: [[I19:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256 592; CHECK-NEXT: br i1 [[I19]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] 593; CHECK: for.cond.cleanup: 594; CHECK-NEXT: ret void 595; 596entry: 597 br label %vector.body 598 599vector.body: ; preds = %vector.body, %entry 600 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 601 %vec.ind = phi <8 x i64> [ <i64 0, i64 4, i64 8, i64 12, i64 16, i64 20, i64 24, i64 28>, %entry ], [ %vec.ind.next, %vector.body ] 602 %i = shl nuw nsw <8 x i64> %vec.ind, splat (i64 2) 603 %i1 = getelementptr inbounds i32, ptr %B, <8 x i64> %i 604 %wide.masked.gather = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %i1, i32 4, <8 x i1> splat (i1 true), <8 x i32> undef) 605 %i2 = getelementptr inbounds i32, ptr %A, <8 x i64> %vec.ind 606 %wide.masked.gather52 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %i2, i32 4, <8 x i1> splat (i1 true), <8 x i32> undef) 607 %i3 = add nsw <8 x i32> %wide.masked.gather52, %wide.masked.gather 608 call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %i3, <8 x ptr> %i2, i32 4, <8 x i1> splat (i1 true)) 609 %i4 = or disjoint <8 x i64> %vec.ind, splat (i64 1) 610 %i5 = shl nsw <8 x i64> %i4, splat (i64 2) 611 %i6 = getelementptr inbounds i32, ptr %B, <8 x i64> %i5 612 %wide.masked.gather53 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %i6, i32 4, <8 x i1> splat (i1 true), <8 x i32> undef) 613 %i7 = getelementptr inbounds i32, ptr %A, <8 x i64> %i4 614 %wide.masked.gather54 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %i7, i32 4, <8 x i1> splat (i1 true), <8 x i32> undef) 615 %i8 = add nsw <8 x i32> %wide.masked.gather54, %wide.masked.gather53 616 call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %i8, <8 x ptr> %i7, i32 4, <8 x i1> splat (i1 true)) 617 %i9 = or disjoint <8 x i64> %vec.ind, splat (i64 2) 618 %i10 = shl nsw <8 x i64> %i9, splat (i64 2) 619 %i11 = getelementptr inbounds i32, ptr %B, <8 x i64> %i10 620 %wide.masked.gather55 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %i11, i32 4, <8 x i1> splat (i1 true), <8 x i32> undef) 621 %i12 = getelementptr inbounds i32, ptr %A, <8 x i64> %i9 622 %wide.masked.gather56 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %i12, i32 4, <8 x i1> splat (i1 true), <8 x i32> undef) 623 %i13 = add nsw <8 x i32> %wide.masked.gather56, %wide.masked.gather55 624 call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %i13, <8 x ptr> %i12, i32 4, <8 x i1> splat (i1 true)) 625 %i14 = or disjoint <8 x i64> %vec.ind, splat (i64 3) 626 %i15 = shl nsw <8 x i64> %i14, splat (i64 2) 627 %i16 = getelementptr inbounds i32, ptr %B, <8 x i64> %i15 628 %wide.masked.gather57 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %i16, i32 4, <8 x i1> splat (i1 true), <8 x i32> undef) 629 %i17 = getelementptr inbounds i32, ptr %A, <8 x i64> %i14 630 %wide.masked.gather58 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> %i17, i32 4, <8 x i1> splat (i1 true), <8 x i32> undef) 631 %i18 = add nsw <8 x i32> %wide.masked.gather58, %wide.masked.gather57 632 call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %i18, <8 x ptr> %i17, i32 4, <8 x i1> splat (i1 true)) 633 %index.next = add nuw i64 %index, 8 634 %vec.ind.next = add <8 x i64> %vec.ind, splat (i64 32) 635 %i19 = icmp eq i64 %index.next, 256 636 br i1 %i19, label %for.cond.cleanup, label %vector.body 637 638for.cond.cleanup: ; preds = %vector.body 639 ret void 640} 641 642declare <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr>, i32 immarg, <32 x i1>, <32 x i8>) 643declare <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr>, i32 immarg, <8 x i1>, <8 x i32>) 644declare void @llvm.masked.scatter.v32i8.v32p0(<32 x i8>, <32 x ptr>, i32 immarg, <32 x i1>) 645declare void @llvm.masked.scatter.v8i32.v8p0(<8 x i32>, <8 x ptr>, i32 immarg, <8 x i1>) 646 647; Make sure we don't crash in getTgtMemIntrinsic for a vector of pointers. 648define void @gather_of_pointers(ptr noalias nocapture %arg, ptr noalias nocapture readonly %arg1) { 649; V-LABEL: @gather_of_pointers( 650; V-NEXT: bb: 651; V-NEXT: br label [[BB2:%.*]] 652; V: bb2: 653; V-NEXT: [[I:%.*]] = phi i64 [ 0, [[BB:%.*]] ], [ [[I15:%.*]], [[BB2]] ] 654; V-NEXT: [[I3_SCALAR:%.*]] = phi i64 [ 0, [[BB]] ], [ [[I16_SCALAR:%.*]], [[BB2]] ] 655; V-NEXT: [[I3_SCALAR1:%.*]] = phi i64 [ 10, [[BB]] ], [ [[I16_SCALAR2:%.*]], [[BB2]] ] 656; V-NEXT: [[TMP0:%.*]] = getelementptr ptr, ptr [[ARG1:%.*]], i64 [[I3_SCALAR]] 657; V-NEXT: [[TMP1:%.*]] = getelementptr ptr, ptr [[ARG1]], i64 [[I3_SCALAR1]] 658; V-NEXT: [[TMP2:%.*]] = call <2 x ptr> @llvm.experimental.vp.strided.load.v2p0.p0.i64(ptr [[TMP0]], i64 40, <2 x i1> splat (i1 true), i32 2) 659; V-NEXT: [[I9:%.*]] = call <2 x ptr> @llvm.vp.select.v2p0(<2 x i1> splat (i1 true), <2 x ptr> [[TMP2]], <2 x ptr> undef, i32 2) 660; V-NEXT: [[TMP3:%.*]] = call <2 x ptr> @llvm.experimental.vp.strided.load.v2p0.p0.i64(ptr [[TMP1]], i64 40, <2 x i1> splat (i1 true), i32 2) 661; V-NEXT: [[I10:%.*]] = call <2 x ptr> @llvm.vp.select.v2p0(<2 x i1> splat (i1 true), <2 x ptr> [[TMP3]], <2 x ptr> undef, i32 2) 662; V-NEXT: [[I11:%.*]] = getelementptr inbounds ptr, ptr [[ARG:%.*]], i64 [[I]] 663; V-NEXT: store <2 x ptr> [[I9]], ptr [[I11]], align 8 664; V-NEXT: [[I13:%.*]] = getelementptr inbounds ptr, ptr [[I11]], i64 2 665; V-NEXT: store <2 x ptr> [[I10]], ptr [[I13]], align 8 666; V-NEXT: [[I15]] = add nuw i64 [[I]], 4 667; V-NEXT: [[I16_SCALAR]] = add i64 [[I3_SCALAR]], 20 668; V-NEXT: [[I16_SCALAR2]] = add i64 [[I3_SCALAR1]], 20 669; V-NEXT: [[I17:%.*]] = icmp eq i64 [[I15]], 1024 670; V-NEXT: br i1 [[I17]], label [[BB18:%.*]], label [[BB2]] 671; V: bb18: 672; V-NEXT: ret void 673; 674; ZVE32F-LABEL: @gather_of_pointers( 675; ZVE32F-NEXT: bb: 676; ZVE32F-NEXT: br label [[BB2:%.*]] 677; ZVE32F: bb2: 678; ZVE32F-NEXT: [[I:%.*]] = phi i64 [ 0, [[BB:%.*]] ], [ [[I15:%.*]], [[BB2]] ] 679; ZVE32F-NEXT: [[I3:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[BB]] ], [ [[I16:%.*]], [[BB2]] ] 680; ZVE32F-NEXT: [[I4:%.*]] = mul nuw nsw <2 x i64> [[I3]], splat (i64 5) 681; ZVE32F-NEXT: [[I5:%.*]] = mul <2 x i64> [[I3]], splat (i64 5) 682; ZVE32F-NEXT: [[I6:%.*]] = add <2 x i64> [[I5]], splat (i64 10) 683; ZVE32F-NEXT: [[I7:%.*]] = getelementptr inbounds ptr, ptr [[ARG1:%.*]], <2 x i64> [[I4]] 684; ZVE32F-NEXT: [[I8:%.*]] = getelementptr inbounds ptr, ptr [[ARG1]], <2 x i64> [[I6]] 685; ZVE32F-NEXT: [[I9:%.*]] = call <2 x ptr> @llvm.masked.gather.v2p0.v2p0(<2 x ptr> [[I7]], i32 8, <2 x i1> splat (i1 true), <2 x ptr> undef) 686; ZVE32F-NEXT: [[I10:%.*]] = call <2 x ptr> @llvm.masked.gather.v2p0.v2p0(<2 x ptr> [[I8]], i32 8, <2 x i1> splat (i1 true), <2 x ptr> undef) 687; ZVE32F-NEXT: [[I11:%.*]] = getelementptr inbounds ptr, ptr [[ARG:%.*]], i64 [[I]] 688; ZVE32F-NEXT: store <2 x ptr> [[I9]], ptr [[I11]], align 8 689; ZVE32F-NEXT: [[I13:%.*]] = getelementptr inbounds ptr, ptr [[I11]], i64 2 690; ZVE32F-NEXT: store <2 x ptr> [[I10]], ptr [[I13]], align 8 691; ZVE32F-NEXT: [[I15]] = add nuw i64 [[I]], 4 692; ZVE32F-NEXT: [[I16]] = add <2 x i64> [[I3]], splat (i64 4) 693; ZVE32F-NEXT: [[I17:%.*]] = icmp eq i64 [[I15]], 1024 694; ZVE32F-NEXT: br i1 [[I17]], label [[BB18:%.*]], label [[BB2]] 695; ZVE32F: bb18: 696; ZVE32F-NEXT: ret void 697; 698bb: 699 br label %bb2 700 701bb2: ; preds = %bb2, %bb 702 %i = phi i64 [ 0, %bb ], [ %i15, %bb2 ] 703 %i3 = phi <2 x i64> [ <i64 0, i64 1>, %bb ], [ %i16, %bb2 ] 704 %i4 = mul nuw nsw <2 x i64> %i3, splat (i64 5) 705 %i5 = mul <2 x i64> %i3, splat (i64 5) 706 %i6 = add <2 x i64> %i5, <i64 10, i64 10> 707 %i7 = getelementptr inbounds ptr, ptr %arg1, <2 x i64> %i4 708 %i8 = getelementptr inbounds ptr, ptr %arg1, <2 x i64> %i6 709 %i9 = call <2 x ptr> @llvm.masked.gather.v2p0.v2p0(<2 x ptr> %i7, i32 8, <2 x i1> splat (i1 true), <2 x ptr> undef) 710 %i10 = call <2 x ptr> @llvm.masked.gather.v2p0.v2p0(<2 x ptr> %i8, i32 8, <2 x i1> splat (i1 true), <2 x ptr> undef) 711 %i11 = getelementptr inbounds ptr, ptr %arg, i64 %i 712 store <2 x ptr> %i9, ptr %i11, align 8 713 %i13 = getelementptr inbounds ptr, ptr %i11, i64 2 714 store <2 x ptr> %i10, ptr %i13, align 8 715 %i15 = add nuw i64 %i, 4 716 %i16 = add <2 x i64> %i3, <i64 4, i64 4> 717 %i17 = icmp eq i64 %i15, 1024 718 br i1 %i17, label %bb18, label %bb2 719 720bb18: ; preds = %bb2 721 ret void 722} 723 724declare <2 x ptr> @llvm.masked.gather.v2p0.v2p0(<2 x ptr>, i32 immarg, <2 x i1>, <2 x ptr>) 725 726; Make sure we don't crash in getTgtMemIntrinsic for a vector of pointers. 727define void @scatter_of_pointers(ptr noalias nocapture %arg, ptr noalias nocapture readonly %arg1) { 728; V-LABEL: @scatter_of_pointers( 729; V-NEXT: bb: 730; V-NEXT: br label [[BB2:%.*]] 731; V: bb2: 732; V-NEXT: [[I:%.*]] = phi i64 [ 0, [[BB:%.*]] ], [ [[I15:%.*]], [[BB2]] ] 733; V-NEXT: [[I3_SCALAR:%.*]] = phi i64 [ 0, [[BB]] ], [ [[I16_SCALAR:%.*]], [[BB2]] ] 734; V-NEXT: [[I3_SCALAR1:%.*]] = phi i64 [ 10, [[BB]] ], [ [[I16_SCALAR2:%.*]], [[BB2]] ] 735; V-NEXT: [[I4:%.*]] = getelementptr inbounds ptr, ptr [[ARG1:%.*]], i64 [[I]] 736; V-NEXT: [[I6:%.*]] = load <2 x ptr>, ptr [[I4]], align 8 737; V-NEXT: [[I7:%.*]] = getelementptr inbounds ptr, ptr [[I4]], i64 2 738; V-NEXT: [[I9:%.*]] = load <2 x ptr>, ptr [[I7]], align 8 739; V-NEXT: [[TMP0:%.*]] = getelementptr ptr, ptr [[ARG:%.*]], i64 [[I3_SCALAR]] 740; V-NEXT: [[TMP1:%.*]] = getelementptr ptr, ptr [[ARG]], i64 [[I3_SCALAR1]] 741; V-NEXT: call void @llvm.experimental.vp.strided.store.v2p0.p0.i64(<2 x ptr> [[I6]], ptr [[TMP0]], i64 40, <2 x i1> splat (i1 true), i32 2) 742; V-NEXT: call void @llvm.experimental.vp.strided.store.v2p0.p0.i64(<2 x ptr> [[I9]], ptr [[TMP1]], i64 40, <2 x i1> splat (i1 true), i32 2) 743; V-NEXT: [[I15]] = add nuw i64 [[I]], 4 744; V-NEXT: [[I16_SCALAR]] = add i64 [[I3_SCALAR]], 20 745; V-NEXT: [[I16_SCALAR2]] = add i64 [[I3_SCALAR1]], 20 746; V-NEXT: [[I17:%.*]] = icmp eq i64 [[I15]], 1024 747; V-NEXT: br i1 [[I17]], label [[BB18:%.*]], label [[BB2]] 748; V: bb18: 749; V-NEXT: ret void 750; 751; ZVE32F-LABEL: @scatter_of_pointers( 752; ZVE32F-NEXT: bb: 753; ZVE32F-NEXT: br label [[BB2:%.*]] 754; ZVE32F: bb2: 755; ZVE32F-NEXT: [[I:%.*]] = phi i64 [ 0, [[BB:%.*]] ], [ [[I15:%.*]], [[BB2]] ] 756; ZVE32F-NEXT: [[I3:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[BB]] ], [ [[I16:%.*]], [[BB2]] ] 757; ZVE32F-NEXT: [[I4:%.*]] = getelementptr inbounds ptr, ptr [[ARG1:%.*]], i64 [[I]] 758; ZVE32F-NEXT: [[I6:%.*]] = load <2 x ptr>, ptr [[I4]], align 8 759; ZVE32F-NEXT: [[I7:%.*]] = getelementptr inbounds ptr, ptr [[I4]], i64 2 760; ZVE32F-NEXT: [[I9:%.*]] = load <2 x ptr>, ptr [[I7]], align 8 761; ZVE32F-NEXT: [[I10:%.*]] = mul nuw nsw <2 x i64> [[I3]], splat (i64 5) 762; ZVE32F-NEXT: [[I11:%.*]] = mul <2 x i64> [[I3]], splat (i64 5) 763; ZVE32F-NEXT: [[I12:%.*]] = add <2 x i64> [[I11]], splat (i64 10) 764; ZVE32F-NEXT: [[I13:%.*]] = getelementptr inbounds ptr, ptr [[ARG:%.*]], <2 x i64> [[I10]] 765; ZVE32F-NEXT: [[I14:%.*]] = getelementptr inbounds ptr, ptr [[ARG]], <2 x i64> [[I12]] 766; ZVE32F-NEXT: call void @llvm.masked.scatter.v2p0.v2p0(<2 x ptr> [[I6]], <2 x ptr> [[I13]], i32 8, <2 x i1> splat (i1 true)) 767; ZVE32F-NEXT: call void @llvm.masked.scatter.v2p0.v2p0(<2 x ptr> [[I9]], <2 x ptr> [[I14]], i32 8, <2 x i1> splat (i1 true)) 768; ZVE32F-NEXT: [[I15]] = add nuw i64 [[I]], 4 769; ZVE32F-NEXT: [[I16]] = add <2 x i64> [[I3]], splat (i64 4) 770; ZVE32F-NEXT: [[I17:%.*]] = icmp eq i64 [[I15]], 1024 771; ZVE32F-NEXT: br i1 [[I17]], label [[BB18:%.*]], label [[BB2]] 772; ZVE32F: bb18: 773; ZVE32F-NEXT: ret void 774; 775bb: 776 br label %bb2 777 778bb2: ; preds = %bb2, %bb 779 %i = phi i64 [ 0, %bb ], [ %i15, %bb2 ] 780 %i3 = phi <2 x i64> [ <i64 0, i64 1>, %bb ], [ %i16, %bb2 ] 781 %i4 = getelementptr inbounds ptr, ptr %arg1, i64 %i 782 %i6 = load <2 x ptr>, ptr %i4, align 8 783 %i7 = getelementptr inbounds ptr, ptr %i4, i64 2 784 %i9 = load <2 x ptr>, ptr %i7, align 8 785 %i10 = mul nuw nsw <2 x i64> %i3, splat (i64 5) 786 %i11 = mul <2 x i64> %i3, splat (i64 5) 787 %i12 = add <2 x i64> %i11, <i64 10, i64 10> 788 %i13 = getelementptr inbounds ptr, ptr %arg, <2 x i64> %i10 789 %i14 = getelementptr inbounds ptr, ptr %arg, <2 x i64> %i12 790 call void @llvm.masked.scatter.v2p0.v2p0(<2 x ptr> %i6, <2 x ptr> %i13, i32 8, <2 x i1> splat (i1 true)) 791 call void @llvm.masked.scatter.v2p0.v2p0(<2 x ptr> %i9, <2 x ptr> %i14, i32 8, <2 x i1> splat (i1 true)) 792 %i15 = add nuw i64 %i, 4 793 %i16 = add <2 x i64> %i3, <i64 4, i64 4> 794 %i17 = icmp eq i64 %i15, 1024 795 br i1 %i17, label %bb18, label %bb2 796 797bb18: ; preds = %bb2 798 ret void 799} 800 801declare void @llvm.masked.scatter.v2p0.v2p0(<2 x ptr>, <2 x ptr>, i32 immarg, <2 x i1>) 802 803define void @strided_load_startval_add_with_splat(ptr noalias nocapture %arg, ptr noalias nocapture readonly %arg1, i32 signext %arg2) { 804; CHECK-LABEL: @strided_load_startval_add_with_splat( 805; CHECK-NEXT: bb: 806; CHECK-NEXT: [[I:%.*]] = icmp eq i32 [[ARG2:%.*]], 1024 807; CHECK-NEXT: br i1 [[I]], label [[BB34:%.*]], label [[BB3:%.*]] 808; CHECK: bb3: 809; CHECK-NEXT: [[I4:%.*]] = sext i32 [[ARG2]] to i64 810; CHECK-NEXT: [[I5:%.*]] = sub i32 1023, [[ARG2]] 811; CHECK-NEXT: [[I6:%.*]] = zext i32 [[I5]] to i64 812; CHECK-NEXT: [[I7:%.*]] = add nuw nsw i64 [[I6]], 1 813; CHECK-NEXT: [[I8:%.*]] = icmp ult i32 [[I5]], 31 814; CHECK-NEXT: br i1 [[I8]], label [[BB32:%.*]], label [[BB9:%.*]] 815; CHECK: bb9: 816; CHECK-NEXT: [[I10:%.*]] = and i64 [[I7]], 8589934560 817; CHECK-NEXT: [[I11:%.*]] = add nsw i64 [[I10]], [[I4]] 818; CHECK-NEXT: [[START:%.*]] = mul i64 [[I4]], 5 819; CHECK-NEXT: br label [[BB15:%.*]] 820; CHECK: bb15: 821; CHECK-NEXT: [[I16:%.*]] = phi i64 [ 0, [[BB9]] ], [ [[I27:%.*]], [[BB15]] ] 822; CHECK-NEXT: [[I17_SCALAR:%.*]] = phi i64 [ [[START]], [[BB9]] ], [ [[I28_SCALAR:%.*]], [[BB15]] ] 823; CHECK-NEXT: [[I18:%.*]] = add i64 [[I16]], [[I4]] 824; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[ARG1:%.*]], i64 [[I17_SCALAR]] 825; CHECK-NEXT: [[TMP1:%.*]] = call <32 x i8> @llvm.experimental.vp.strided.load.v32i8.p0.i64(ptr [[TMP0]], i64 5, <32 x i1> splat (i1 true), i32 32) 826; CHECK-NEXT: [[I21:%.*]] = call <32 x i8> @llvm.vp.select.v32i8(<32 x i1> splat (i1 true), <32 x i8> [[TMP1]], <32 x i8> undef, i32 32) 827; CHECK-NEXT: [[I22:%.*]] = getelementptr inbounds i8, ptr [[ARG:%.*]], i64 [[I18]] 828; CHECK-NEXT: [[I24:%.*]] = load <32 x i8>, ptr [[I22]], align 1 829; CHECK-NEXT: [[I25:%.*]] = add <32 x i8> [[I24]], [[I21]] 830; CHECK-NEXT: store <32 x i8> [[I25]], ptr [[I22]], align 1 831; CHECK-NEXT: [[I27]] = add nuw i64 [[I16]], 32 832; CHECK-NEXT: [[I28_SCALAR]] = add i64 [[I17_SCALAR]], 160 833; CHECK-NEXT: [[I29:%.*]] = icmp eq i64 [[I27]], [[I10]] 834; CHECK-NEXT: br i1 [[I29]], label [[BB30:%.*]], label [[BB15]] 835; CHECK: bb30: 836; CHECK-NEXT: [[I31:%.*]] = icmp eq i64 [[I7]], [[I10]] 837; CHECK-NEXT: br i1 [[I31]], label [[BB34]], label [[BB32]] 838; CHECK: bb32: 839; CHECK-NEXT: [[I33:%.*]] = phi i64 [ [[I4]], [[BB3]] ], [ [[I11]], [[BB30]] ] 840; CHECK-NEXT: br label [[BB35:%.*]] 841; CHECK: bb34: 842; CHECK-NEXT: ret void 843; CHECK: bb35: 844; CHECK-NEXT: [[I36:%.*]] = phi i64 [ [[I43:%.*]], [[BB35]] ], [ [[I33]], [[BB32]] ] 845; CHECK-NEXT: [[I37:%.*]] = mul nsw i64 [[I36]], 5 846; CHECK-NEXT: [[I38:%.*]] = getelementptr inbounds i8, ptr [[ARG1]], i64 [[I37]] 847; CHECK-NEXT: [[I39:%.*]] = load i8, ptr [[I38]], align 1 848; CHECK-NEXT: [[I40:%.*]] = getelementptr inbounds i8, ptr [[ARG]], i64 [[I36]] 849; CHECK-NEXT: [[I41:%.*]] = load i8, ptr [[I40]], align 1 850; CHECK-NEXT: [[I42:%.*]] = add i8 [[I41]], [[I39]] 851; CHECK-NEXT: store i8 [[I42]], ptr [[I40]], align 1 852; CHECK-NEXT: [[I43]] = add nsw i64 [[I36]], 1 853; CHECK-NEXT: [[I44:%.*]] = trunc i64 [[I43]] to i32 854; CHECK-NEXT: [[I45:%.*]] = icmp eq i32 [[I44]], 1024 855; CHECK-NEXT: br i1 [[I45]], label [[BB34]], label [[BB35]] 856; 857bb: 858 %i = icmp eq i32 %arg2, 1024 859 br i1 %i, label %bb34, label %bb3 860 861bb3: ; preds = %bb 862 %i4 = sext i32 %arg2 to i64 863 %i5 = sub i32 1023, %arg2 864 %i6 = zext i32 %i5 to i64 865 %i7 = add nuw nsw i64 %i6, 1 866 %i8 = icmp ult i32 %i5, 31 867 br i1 %i8, label %bb32, label %bb9 868 869bb9: ; preds = %bb3 870 %i10 = and i64 %i7, 8589934560 871 %i11 = add nsw i64 %i10, %i4 872 %i12 = insertelement <32 x i64> poison, i64 %i4, i64 0 873 %i13 = shufflevector <32 x i64> %i12, <32 x i64> poison, <32 x i32> zeroinitializer 874 %i14 = add <32 x i64> %i13, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15, i64 16, i64 17, i64 18, i64 19, i64 20, i64 21, i64 22, i64 23, i64 24, i64 25, i64 26, i64 27, i64 28, i64 29, i64 30, i64 31> 875 br label %bb15 876 877bb15: ; preds = %bb15, %bb9 878 %i16 = phi i64 [ 0, %bb9 ], [ %i27, %bb15 ] 879 %i17 = phi <32 x i64> [ %i14, %bb9 ], [ %i28, %bb15 ] 880 %i18 = add i64 %i16, %i4 881 %i19 = mul nsw <32 x i64> %i17, splat (i64 5) 882 %i20 = getelementptr inbounds i8, ptr %arg1, <32 x i64> %i19 883 %i21 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> %i20, i32 1, <32 x i1> splat (i1 true), <32 x i8> undef) 884 %i22 = getelementptr inbounds i8, ptr %arg, i64 %i18 885 %i24 = load <32 x i8>, ptr %i22, align 1 886 %i25 = add <32 x i8> %i24, %i21 887 store <32 x i8> %i25, ptr %i22, align 1 888 %i27 = add nuw i64 %i16, 32 889 %i28 = add <32 x i64> %i17, splat (i64 32) 890 %i29 = icmp eq i64 %i27, %i10 891 br i1 %i29, label %bb30, label %bb15 892 893bb30: ; preds = %bb15 894 %i31 = icmp eq i64 %i7, %i10 895 br i1 %i31, label %bb34, label %bb32 896 897bb32: ; preds = %bb30, %bb3 898 %i33 = phi i64 [ %i4, %bb3 ], [ %i11, %bb30 ] 899 br label %bb35 900 901bb34: ; preds = %bb35, %bb30, %bb 902 ret void 903 904bb35: ; preds = %bb35, %bb32 905 %i36 = phi i64 [ %i43, %bb35 ], [ %i33, %bb32 ] 906 %i37 = mul nsw i64 %i36, 5 907 %i38 = getelementptr inbounds i8, ptr %arg1, i64 %i37 908 %i39 = load i8, ptr %i38, align 1 909 %i40 = getelementptr inbounds i8, ptr %arg, i64 %i36 910 %i41 = load i8, ptr %i40, align 1 911 %i42 = add i8 %i41, %i39 912 store i8 %i42, ptr %i40, align 1 913 %i43 = add nsw i64 %i36, 1 914 %i44 = trunc i64 %i43 to i32 915 %i45 = icmp eq i32 %i44, 1024 916 br i1 %i45, label %bb34, label %bb35 917} 918 919declare <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr>, i32 immarg, <16 x i1>, <16 x i8>) 920declare void @llvm.masked.scatter.v16i8.v16p0(<16 x i8>, <16 x ptr>, i32 immarg, <16 x i1>) 921 922define void @gather_no_scalar_remainder(ptr noalias nocapture noundef %arg, ptr noalias nocapture noundef readonly %arg1, i64 noundef %arg2) { 923; CHECK-LABEL: @gather_no_scalar_remainder( 924; CHECK-NEXT: bb: 925; CHECK-NEXT: [[I:%.*]] = shl i64 [[ARG2:%.*]], 4 926; CHECK-NEXT: [[I3:%.*]] = icmp eq i64 [[I]], 0 927; CHECK-NEXT: br i1 [[I3]], label [[BB16:%.*]], label [[BB2:%.*]] 928; CHECK: bb2: 929; CHECK-NEXT: br label [[BB4:%.*]] 930; CHECK: bb4: 931; CHECK-NEXT: [[I5:%.*]] = phi i64 [ [[I13:%.*]], [[BB4]] ], [ 0, [[BB2]] ] 932; CHECK-NEXT: [[I6_SCALAR:%.*]] = phi i64 [ 0, [[BB2]] ], [ [[I14_SCALAR:%.*]], [[BB4]] ] 933; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[ARG1:%.*]], i64 [[I6_SCALAR]] 934; CHECK-NEXT: [[TMP1:%.*]] = call <16 x i8> @llvm.experimental.vp.strided.load.v16i8.p0.i64(ptr [[TMP0]], i64 5, <16 x i1> splat (i1 true), i32 16) 935; CHECK-NEXT: [[I9:%.*]] = call <16 x i8> @llvm.vp.select.v16i8(<16 x i1> splat (i1 true), <16 x i8> [[TMP1]], <16 x i8> undef, i32 16) 936; CHECK-NEXT: [[I10:%.*]] = getelementptr inbounds i8, ptr [[ARG:%.*]], i64 [[I5]] 937; CHECK-NEXT: [[I11:%.*]] = load <16 x i8>, ptr [[I10]], align 1 938; CHECK-NEXT: [[I12:%.*]] = add <16 x i8> [[I11]], [[I9]] 939; CHECK-NEXT: store <16 x i8> [[I12]], ptr [[I10]], align 1 940; CHECK-NEXT: [[I13]] = add nuw i64 [[I5]], 16 941; CHECK-NEXT: [[I14_SCALAR]] = add i64 [[I6_SCALAR]], 80 942; CHECK-NEXT: [[I15:%.*]] = icmp eq i64 [[I13]], [[I]] 943; CHECK-NEXT: br i1 [[I15]], label [[BB16]], label [[BB4]] 944; CHECK: bb16: 945; CHECK-NEXT: ret void 946; 947bb: 948 %i = shl i64 %arg2, 4 949 %i3 = icmp eq i64 %i, 0 950 br i1 %i3, label %bb16, label %bb2 951 952bb2: ; preds = %bb 953 br label %bb4 954 955bb4: ; preds = %bb4, %bb2 956 %i5 = phi i64 [ %i13, %bb4 ], [ 0, %bb2 ] 957 %i6 = phi <16 x i64> [ %i14, %bb4 ], [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>, %bb2 ] 958 %i7 = mul <16 x i64> %i6, splat (i64 5) 959 %i8 = getelementptr inbounds i8, ptr %arg1, <16 x i64> %i7 960 %i9 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> %i8, i32 1, <16 x i1> splat (i1 true), <16 x i8> undef) 961 %i10 = getelementptr inbounds i8, ptr %arg, i64 %i5 962 %i11 = load <16 x i8>, ptr %i10, align 1 963 %i12 = add <16 x i8> %i11, %i9 964 store <16 x i8> %i12, ptr %i10, align 1 965 %i13 = add nuw i64 %i5, 16 966 %i14 = add <16 x i64> %i6, splat (i64 16) 967 %i15 = icmp eq i64 %i13, %i 968 br i1 %i15, label %bb16, label %bb4 969 970bb16: ; preds = %bb4, %bb 971 ret void 972} 973 974define <8 x i8> @broadcast_ptr_base(ptr %a) { 975; CHECK-LABEL: @broadcast_ptr_base( 976; CHECK-NEXT: entry: 977; CHECK-NEXT: [[TMP0:%.*]] = call <8 x i8> @llvm.experimental.vp.strided.load.v8i8.p0.i64(ptr [[A:%.*]], i64 64, <8 x i1> splat (i1 true), i32 8) 978; CHECK-NEXT: [[TMP1:%.*]] = call <8 x i8> @llvm.vp.select.v8i8(<8 x i1> splat (i1 true), <8 x i8> [[TMP0]], <8 x i8> poison, i32 8) 979; CHECK-NEXT: ret <8 x i8> [[TMP1]] 980; 981entry: 982 %0 = insertelement <8 x ptr> poison, ptr %a, i64 0 983 %1 = shufflevector <8 x ptr> %0, <8 x ptr> poison, <8 x i32> zeroinitializer 984 %2 = getelementptr i8, <8 x ptr> %1, <8 x i64> <i64 0, i64 64, i64 128, i64 192, i64 256, i64 320, i64 384, i64 448> 985 %3 = tail call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> %2, i32 1, <8 x i1> splat (i1 true), <8 x i8> poison) 986 ret <8 x i8> %3 987} 988 989declare <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr>, i32 immarg, <8 x i1>, <8 x i8>) 990 991define void @gather_narrow_idx(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) { 992; CHECK-LABEL: @gather_narrow_idx( 993; CHECK-NEXT: entry: 994; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 995; CHECK: vector.body: 996; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 997; CHECK-NEXT: [[VEC_IND:%.*]] = phi <32 x i16> [ <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19, i16 20, i16 21, i16 22, i16 23, i16 24, i16 25, i16 26, i16 27, i16 28, i16 29, i16 30, i16 31>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 998; CHECK-NEXT: [[I:%.*]] = mul nuw nsw <32 x i16> [[VEC_IND]], splat (i16 5) 999; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], <32 x i16> [[I]] 1000; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> [[I1]], i32 1, <32 x i1> splat (i1 true), <32 x i8> undef) 1001; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] 1002; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I2]], align 1 1003; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]] 1004; CHECK-NEXT: store <32 x i8> [[I4]], ptr [[I2]], align 1 1005; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 1006; CHECK-NEXT: [[VEC_IND_NEXT]] = add <32 x i16> [[VEC_IND]], splat (i16 32) 1007; CHECK-NEXT: [[I6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 1008; CHECK-NEXT: br i1 [[I6]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] 1009; CHECK: for.cond.cleanup: 1010; CHECK-NEXT: ret void 1011; 1012entry: 1013 br label %vector.body 1014 1015vector.body: ; preds = %vector.body, %entry 1016 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 1017 %vec.ind = phi <32 x i16> [ <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19, i16 20, i16 21, i16 22, i16 23, i16 24, i16 25, i16 26, i16 27, i16 28, i16 29, i16 30, i16 31>, %entry ], [ %vec.ind.next, %vector.body ] 1018 %i = mul nuw nsw <32 x i16> %vec.ind, splat (i16 5) 1019 %i1 = getelementptr inbounds i8, ptr %B, <32 x i16> %i 1020 %wide.masked.gather = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> %i1, i32 1, <32 x i1> splat (i1 true), <32 x i8> undef) 1021 %i2 = getelementptr inbounds i8, ptr %A, i64 %index 1022 %wide.load = load <32 x i8>, ptr %i2, align 1 1023 %i4 = add <32 x i8> %wide.load, %wide.masked.gather 1024 store <32 x i8> %i4, ptr %i2, align 1 1025 %index.next = add nuw i64 %index, 32 1026 %vec.ind.next = add <32 x i16> %vec.ind, splat (i16 32) 1027 %i6 = icmp eq i64 %index.next, 1024 1028 br i1 %i6, label %for.cond.cleanup, label %vector.body 1029 1030for.cond.cleanup: ; preds = %vector.body 1031 ret void 1032} 1033 1034define void @vp_gather(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) { 1035; CHECK-LABEL: @vp_gather( 1036; CHECK-NEXT: entry: 1037; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 1038; CHECK: vector.body: 1039; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ] 1040; CHECK-NEXT: [[VEC_IND_SCALAR1:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR1:%.*]], [[VECTOR_BODY]] ] 1041; CHECK-NEXT: [[VEC_IND:%.*]] = phi <32 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15, i64 16, i64 17, i64 18, i64 19, i64 20, i64 21, i64 22, i64 23, i64 24, i64 25, i64 26, i64 27, i64 28, i64 29, i64 30, i64 31>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 1042; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[B:%.*]], i64 [[VEC_IND_SCALAR1]] 1043; CHECK-NEXT: [[ELEMS:%.*]] = sub i64 1024, [[VEC_IND_SCALAR]] 1044; CHECK-NEXT: [[EVL:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[ELEMS]], i32 32, i1 false) 1045; CHECK-NEXT: [[ODD:%.*]] = and <32 x i64> [[VEC_IND]], splat (i64 1) 1046; CHECK-NEXT: [[MASK:%.*]] = icmp ne <32 x i64> [[ODD]], zeroinitializer 1047; CHECK-NEXT: [[WIDE_VP_GATHER:%.*]] = call <32 x i8> @llvm.experimental.vp.strided.load.v32i8.p0.i64(ptr [[TMP0]], i64 5, <32 x i1> [[MASK]], i32 [[EVL]]) 1048; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[VEC_IND_SCALAR]] 1049; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I2]], align 1 1050; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[WIDE_VP_GATHER]] 1051; CHECK-NEXT: store <32 x i8> [[I4]], ptr [[I2]], align 1 1052; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add nuw i64 [[VEC_IND_SCALAR]], 32 1053; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR1]] = add i64 [[VEC_IND_SCALAR1]], 160 1054; CHECK-NEXT: [[VEC_IND_NEXT]] = add <32 x i64> [[VEC_IND]], splat (i64 32) 1055; CHECK-NEXT: [[I6:%.*]] = icmp eq i64 [[VEC_IND_NEXT_SCALAR]], 1024 1056; CHECK-NEXT: br i1 [[I6]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] 1057; CHECK: for.cond.cleanup: 1058; CHECK-NEXT: ret void 1059; 1060entry: 1061 br label %vector.body 1062 1063vector.body: ; preds = %vector.body, %entry 1064 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 1065 %vec.ind = phi <32 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15, i64 16, i64 17, i64 18, i64 19, i64 20, i64 21, i64 22, i64 23, i64 24, i64 25, i64 26, i64 27, i64 28, i64 29, i64 30, i64 31>, %entry ], [ %vec.ind.next, %vector.body ] 1066 %i = mul nuw nsw <32 x i64> %vec.ind, splat (i64 5) 1067 %i1 = getelementptr inbounds i8, ptr %B, <32 x i64> %i 1068 1069 %elems = sub i64 1024, %index 1070 %evl = call i32 @llvm.experimental.get.vector.length.i64(i64 %elems, i32 32, i1 false) 1071 1072 %odd = and <32 x i64> %vec.ind, splat (i64 1) 1073 %mask = icmp ne <32 x i64> %odd, splat (i64 0) 1074 1075 %wide.vp.gather = call <32 x i8> @llvm.vp.gather(<32 x ptr> %i1, <32 x i1> %mask, i32 %evl) 1076 %i2 = getelementptr inbounds i8, ptr %A, i64 %index 1077 %wide.load = load <32 x i8>, ptr %i2, align 1 1078 %i4 = add <32 x i8> %wide.load, %wide.vp.gather 1079 store <32 x i8> %i4, ptr %i2, align 1 1080 %index.next = add nuw i64 %index, 32 1081 %vec.ind.next = add <32 x i64> %vec.ind, splat (i64 32) 1082 %i6 = icmp eq i64 %index.next, 1024 1083 br i1 %i6, label %for.cond.cleanup, label %vector.body 1084 1085for.cond.cleanup: ; preds = %vector.body 1086 ret void 1087} 1088 1089define void @vp_scatter(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) { 1090; CHECK-LABEL: @vp_scatter( 1091; CHECK-NEXT: entry: 1092; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 1093; CHECK: vector.body: 1094; CHECK-NEXT: [[VEC_IND_SCALAR:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[VEC_IND_NEXT_SCALAR:%.*]], [[VECTOR_BODY]] ] 1095; CHECK-NEXT: [[VEC_IND_SCALAR1:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[VEC_IND_NEXT_SCALAR1:%.*]], [[VECTOR_BODY]] ] 1096; CHECK-NEXT: [[VEC_IND:%.*]] = phi <32 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15, i64 16, i64 17, i64 18, i64 19, i64 20, i64 21, i64 22, i64 23, i64 24, i64 25, i64 26, i64 27, i64 28, i64 29, i64 30, i64 31>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 1097; CHECK-NEXT: [[I:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i64 [[VEC_IND_SCALAR]] 1098; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I]], align 1 1099; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A:%.*]], i64 [[VEC_IND_SCALAR1]] 1100; CHECK-NEXT: [[ELEMS:%.*]] = sub i64 1024, [[VEC_IND_SCALAR]] 1101; CHECK-NEXT: [[EVL:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[ELEMS]], i32 32, i1 false) 1102; CHECK-NEXT: [[ODD:%.*]] = and <32 x i64> [[VEC_IND]], splat (i64 1) 1103; CHECK-NEXT: [[MASK:%.*]] = icmp ne <32 x i64> [[ODD]], zeroinitializer 1104; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.experimental.vp.strided.load.v32i8.p0.i64(ptr [[TMP0]], i64 5, <32 x i1> [[MASK]], i32 [[EVL]]) 1105; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_MASKED_GATHER]], [[WIDE_LOAD]] 1106; CHECK-NEXT: call void @llvm.experimental.vp.strided.store.v32i8.p0.i64(<32 x i8> [[I4]], ptr [[TMP0]], i64 5, <32 x i1> [[MASK]], i32 [[EVL]]) 1107; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR]] = add nuw i64 [[VEC_IND_SCALAR]], 32 1108; CHECK-NEXT: [[VEC_IND_NEXT_SCALAR1]] = add i64 [[VEC_IND_SCALAR1]], 160 1109; CHECK-NEXT: [[VEC_IND_NEXT]] = add <32 x i64> [[VEC_IND]], splat (i64 32) 1110; CHECK-NEXT: [[I5:%.*]] = icmp eq i64 [[VEC_IND_NEXT_SCALAR]], 1024 1111; CHECK-NEXT: br i1 [[I5]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] 1112; CHECK: for.cond.cleanup: 1113; CHECK-NEXT: ret void 1114; 1115entry: 1116 br label %vector.body 1117 1118vector.body: ; preds = %vector.body, %entry 1119 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 1120 %vec.ind = phi <32 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15, i64 16, i64 17, i64 18, i64 19, i64 20, i64 21, i64 22, i64 23, i64 24, i64 25, i64 26, i64 27, i64 28, i64 29, i64 30, i64 31>, %entry ], [ %vec.ind.next, %vector.body ] 1121 %i = getelementptr inbounds i8, ptr %B, i64 %index 1122 %wide.load = load <32 x i8>, ptr %i, align 1 1123 %i2 = mul nuw nsw <32 x i64> %vec.ind, splat (i64 5) 1124 %i3 = getelementptr inbounds i8, ptr %A, <32 x i64> %i2 1125 1126 1127 %elems = sub i64 1024, %index 1128 %evl = call i32 @llvm.experimental.get.vector.length.i64(i64 %elems, i32 32, i1 false) 1129 1130 %odd = and <32 x i64> %vec.ind, splat (i64 1) 1131 %mask = icmp ne <32 x i64> %odd, splat (i64 0) 1132 1133 %wide.masked.gather = call <32 x i8> @llvm.vp.gather(<32 x ptr> %i3, <32 x i1> %mask, i32 %evl) 1134 %i4 = add <32 x i8> %wide.masked.gather, %wide.load 1135 call void @llvm.vp.scatter(<32 x i8> %i4, <32 x ptr> %i3, <32 x i1> %mask, i32 %evl) 1136 %index.next = add nuw i64 %index, 32 1137 %vec.ind.next = add <32 x i64> %vec.ind, splat (i64 32) 1138 %i5 = icmp eq i64 %index.next, 1024 1139 br i1 %i5, label %for.cond.cleanup, label %vector.body 1140 1141for.cond.cleanup: ; preds = %vector.body 1142 ret void 1143} 1144