; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt %s -S -riscv-gather-scatter-lowering -mtriple=riscv64 -mattr=+m,+v,+zvl256b | FileCheck %s ; This contains negative tests for the strided load/store recognition in ; RISCVGatherScatterLowering.cpp ; Negative test for treating OR as ADD. define void @gather_bad_or(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) { ; CHECK-LABEL: @gather_bad_or( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <32 x i64> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[I:%.*]] = mul nuw nsw <32 x i64> [[VEC_IND]], splat (i64 5) ; CHECK-NEXT: [[OR:%.*]] = or <32 x i64> [[I]], splat (i64 1) ; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], <32 x i64> [[OR]] ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> [[I1]], i32 1, <32 x i1> splat (i1 true), <32 x i8> undef) ; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I2]], align 1 ; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]] ; CHECK-NEXT: store <32 x i8> [[I4]], ptr [[I2]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <32 x i64> [[VEC_IND]], splat (i64 32) ; CHECK-NEXT: [[I6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[I6]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void ; entry: br label %vector.body vector.body: ; preds = %vector.body, %entry %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] %vec.ind = phi <32 x i64> [ , %entry ], [ %vec.ind.next, %vector.body ] %i = mul nuw nsw <32 x i64> %vec.ind, %or = or <32 x i64> %i, %i1 = getelementptr inbounds i8, ptr %B, <32 x i64> %or %wide.masked.gather = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> %i1, i32 1, <32 x i1> , <32 x i8> undef) %i2 = getelementptr inbounds i8, ptr %A, i64 %index %wide.load = load <32 x i8>, ptr %i2, align 1 %i4 = add <32 x i8> %wide.load, %wide.masked.gather store <32 x i8> %i4, ptr %i2, align 1 %index.next = add nuw i64 %index, 32 %vec.ind.next = add <32 x i64> %vec.ind, %i6 = icmp eq i64 %index.next, 1024 br i1 %i6, label %for.cond.cleanup, label %vector.body for.cond.cleanup: ; preds = %vector.body ret void } ; Don't transform since we might not handle wrap correctly with narrow indices. define void @gather_narrow_index(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) { ; CHECK-LABEL: @gather_narrow_index( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <32 x i32> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[TMP0:%.*]] = mul nuw nsw <32 x i32> [[VEC_IND]], splat (i32 5) ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], <32 x i32> [[TMP0]] ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> [[TMP1]], i32 1, <32 x i1> splat (i1 true), <32 x i8> undef) ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[TMP2]], align 1 ; CHECK-NEXT: [[TMP4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]] ; CHECK-NEXT: store <32 x i8> [[TMP4]], ptr [[TMP2]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <32 x i32> [[VEC_IND]], splat (i32 32) ; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP6]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void ; entry: br label %vector.body vector.body: ; preds = %vector.body, %entry %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] %vec.ind = phi <32 x i32> [ , %entry ], [ %vec.ind.next, %vector.body ] %0 = mul nuw nsw <32 x i32> %vec.ind, %1 = getelementptr inbounds i8, ptr %B, <32 x i32> %0 %wide.masked.gather = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> %1, i32 1, <32 x i1> , <32 x i8> undef) %2 = getelementptr inbounds i8, ptr %A, i64 %index %wide.load = load <32 x i8>, ptr %2, align 1 %3 = add <32 x i8> %wide.load, %wide.masked.gather store <32 x i8> %3, ptr %2, align 1 %index.next = add nuw i64 %index, 32 %vec.ind.next = add <32 x i32> %vec.ind, %4 = icmp eq i64 %index.next, 1024 br i1 %4, label %for.cond.cleanup, label %vector.body for.cond.cleanup: ; preds = %vector.body ret void } ; The last element of the start value of the phi has the wrong stride. define void @gather_broken_stride(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) { ; CHECK-LABEL: @gather_broken_stride( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[VEC_IND:%.*]] = phi <32 x i64> [ , [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[I:%.*]] = mul nuw nsw <32 x i64> [[VEC_IND]], splat (i64 5) ; CHECK-NEXT: [[OR:%.*]] = or <32 x i64> [[I]], splat (i64 1) ; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], <32 x i64> [[OR]] ; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> [[I1]], i32 1, <32 x i1> splat (i1 true), <32 x i8> undef) ; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I2]], align 1 ; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]] ; CHECK-NEXT: store <32 x i8> [[I4]], ptr [[I2]], align 1 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 ; CHECK-NEXT: [[VEC_IND_NEXT]] = add <32 x i64> [[VEC_IND]], splat (i64 32) ; CHECK-NEXT: [[I6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[I6]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] ; CHECK: for.cond.cleanup: ; CHECK-NEXT: ret void ; entry: br label %vector.body vector.body: ; preds = %vector.body, %entry %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] %vec.ind = phi <32 x i64> [ , %entry ], [ %vec.ind.next, %vector.body ] %i = mul nuw nsw <32 x i64> %vec.ind, %or = or <32 x i64> %i, %i1 = getelementptr inbounds i8, ptr %B, <32 x i64> %or %wide.masked.gather = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> %i1, i32 1, <32 x i1> , <32 x i8> undef) %i2 = getelementptr inbounds i8, ptr %A, i64 %index %wide.load = load <32 x i8>, ptr %i2, align 1 %i4 = add <32 x i8> %wide.load, %wide.masked.gather store <32 x i8> %i4, ptr %i2, align 1 %index.next = add nuw i64 %index, 32 %vec.ind.next = add <32 x i64> %vec.ind, %i6 = icmp eq i64 %index.next, 1024 br i1 %i6, label %for.cond.cleanup, label %vector.body for.cond.cleanup: ; preds = %vector.body ret void } declare <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr>, i32 immarg, <32 x i1>, <32 x i8>)