1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt %s -S -riscv-gather-scatter-lowering -mtriple=riscv64 -mattr=+m,+v,+zvl256b | FileCheck %s 3 4; This contains negative tests for the strided load/store recognition in 5; RISCVGatherScatterLowering.cpp 6 7; Negative test for treating OR as ADD. 8define void @gather_bad_or(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) { 9; CHECK-LABEL: @gather_bad_or( 10; CHECK-NEXT: entry: 11; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 12; CHECK: vector.body: 13; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 14; CHECK-NEXT: [[VEC_IND:%.*]] = phi <32 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15, i64 16, i64 17, i64 18, i64 19, i64 20, i64 21, i64 22, i64 23, i64 24, i64 25, i64 26, i64 27, i64 28, i64 29, i64 30, i64 31>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 15; CHECK-NEXT: [[I:%.*]] = mul nuw nsw <32 x i64> [[VEC_IND]], splat (i64 5) 16; CHECK-NEXT: [[OR:%.*]] = or <32 x i64> [[I]], splat (i64 1) 17; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], <32 x i64> [[OR]] 18; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> [[I1]], i32 1, <32 x i1> splat (i1 true), <32 x i8> undef) 19; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] 20; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I2]], align 1 21; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]] 22; CHECK-NEXT: store <32 x i8> [[I4]], ptr [[I2]], align 1 23; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 24; CHECK-NEXT: [[VEC_IND_NEXT]] = add <32 x i64> [[VEC_IND]], splat (i64 32) 25; CHECK-NEXT: [[I6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 26; CHECK-NEXT: br i1 [[I6]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] 27; CHECK: for.cond.cleanup: 28; CHECK-NEXT: ret void 29; 30entry: 31 br label %vector.body 32 33vector.body: ; preds = %vector.body, %entry 34 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 35 %vec.ind = phi <32 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15, i64 16, i64 17, i64 18, i64 19, i64 20, i64 21, i64 22, i64 23, i64 24, i64 25, i64 26, i64 27, i64 28, i64 29, i64 30, i64 31>, %entry ], [ %vec.ind.next, %vector.body ] 36 %i = mul nuw nsw <32 x i64> %vec.ind, <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5> 37 %or = or <32 x i64> %i, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1> 38 %i1 = getelementptr inbounds i8, ptr %B, <32 x i64> %or 39 %wide.masked.gather = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> %i1, i32 1, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i8> undef) 40 %i2 = getelementptr inbounds i8, ptr %A, i64 %index 41 %wide.load = load <32 x i8>, ptr %i2, align 1 42 %i4 = add <32 x i8> %wide.load, %wide.masked.gather 43 store <32 x i8> %i4, ptr %i2, align 1 44 %index.next = add nuw i64 %index, 32 45 %vec.ind.next = add <32 x i64> %vec.ind, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32> 46 %i6 = icmp eq i64 %index.next, 1024 47 br i1 %i6, label %for.cond.cleanup, label %vector.body 48 49for.cond.cleanup: ; preds = %vector.body 50 ret void 51} 52 53; Don't transform since we might not handle wrap correctly with narrow indices. 54define void @gather_narrow_index(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) { 55; CHECK-LABEL: @gather_narrow_index( 56; CHECK-NEXT: entry: 57; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 58; CHECK: vector.body: 59; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 60; CHECK-NEXT: [[VEC_IND:%.*]] = phi <32 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 61; CHECK-NEXT: [[TMP0:%.*]] = mul nuw nsw <32 x i32> [[VEC_IND]], splat (i32 5) 62; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], <32 x i32> [[TMP0]] 63; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> [[TMP1]], i32 1, <32 x i1> splat (i1 true), <32 x i8> undef) 64; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] 65; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[TMP2]], align 1 66; CHECK-NEXT: [[TMP4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]] 67; CHECK-NEXT: store <32 x i8> [[TMP4]], ptr [[TMP2]], align 1 68; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 69; CHECK-NEXT: [[VEC_IND_NEXT]] = add <32 x i32> [[VEC_IND]], splat (i32 32) 70; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 71; CHECK-NEXT: br i1 [[TMP6]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] 72; CHECK: for.cond.cleanup: 73; CHECK-NEXT: ret void 74; 75entry: 76 br label %vector.body 77 78vector.body: ; preds = %vector.body, %entry 79 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 80 %vec.ind = phi <32 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>, %entry ], [ %vec.ind.next, %vector.body ] 81 %0 = mul nuw nsw <32 x i32> %vec.ind, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 82 %1 = getelementptr inbounds i8, ptr %B, <32 x i32> %0 83 %wide.masked.gather = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> %1, i32 1, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i8> undef) 84 %2 = getelementptr inbounds i8, ptr %A, i64 %index 85 %wide.load = load <32 x i8>, ptr %2, align 1 86 %3 = add <32 x i8> %wide.load, %wide.masked.gather 87 store <32 x i8> %3, ptr %2, align 1 88 %index.next = add nuw i64 %index, 32 89 %vec.ind.next = add <32 x i32> %vec.ind, <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32> 90 %4 = icmp eq i64 %index.next, 1024 91 br i1 %4, label %for.cond.cleanup, label %vector.body 92 93for.cond.cleanup: ; preds = %vector.body 94 ret void 95} 96 97; The last element of the start value of the phi has the wrong stride. 98define void @gather_broken_stride(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) { 99; CHECK-LABEL: @gather_broken_stride( 100; CHECK-NEXT: entry: 101; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 102; CHECK: vector.body: 103; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 104; CHECK-NEXT: [[VEC_IND:%.*]] = phi <32 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15, i64 16, i64 17, i64 18, i64 19, i64 20, i64 21, i64 22, i64 23, i64 24, i64 25, i64 26, i64 27, i64 28, i64 29, i64 30, i64 32>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] 105; CHECK-NEXT: [[I:%.*]] = mul nuw nsw <32 x i64> [[VEC_IND]], splat (i64 5) 106; CHECK-NEXT: [[OR:%.*]] = or <32 x i64> [[I]], splat (i64 1) 107; CHECK-NEXT: [[I1:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], <32 x i64> [[OR]] 108; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> [[I1]], i32 1, <32 x i1> splat (i1 true), <32 x i8> undef) 109; CHECK-NEXT: [[I2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]] 110; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I2]], align 1 111; CHECK-NEXT: [[I4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]] 112; CHECK-NEXT: store <32 x i8> [[I4]], ptr [[I2]], align 1 113; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32 114; CHECK-NEXT: [[VEC_IND_NEXT]] = add <32 x i64> [[VEC_IND]], splat (i64 32) 115; CHECK-NEXT: [[I6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 116; CHECK-NEXT: br i1 [[I6]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]] 117; CHECK: for.cond.cleanup: 118; CHECK-NEXT: ret void 119; 120entry: 121 br label %vector.body 122 123vector.body: ; preds = %vector.body, %entry 124 %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ] 125 %vec.ind = phi <32 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15, i64 16, i64 17, i64 18, i64 19, i64 20, i64 21, i64 22, i64 23, i64 24, i64 25, i64 26, i64 27, i64 28, i64 29, i64 30, i64 32>, %entry ], [ %vec.ind.next, %vector.body ] 126 %i = mul nuw nsw <32 x i64> %vec.ind, <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5> 127 %or = or <32 x i64> %i, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1> 128 %i1 = getelementptr inbounds i8, ptr %B, <32 x i64> %or 129 %wide.masked.gather = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> %i1, i32 1, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i8> undef) 130 %i2 = getelementptr inbounds i8, ptr %A, i64 %index 131 %wide.load = load <32 x i8>, ptr %i2, align 1 132 %i4 = add <32 x i8> %wide.load, %wide.masked.gather 133 store <32 x i8> %i4, ptr %i2, align 1 134 %index.next = add nuw i64 %index, 32 135 %vec.ind.next = add <32 x i64> %vec.ind, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32> 136 %i6 = icmp eq i64 %index.next, 1024 137 br i1 %i6, label %for.cond.cleanup, label %vector.body 138 139for.cond.cleanup: ; preds = %vector.body 140 ret void 141} 142 143declare <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr>, i32 immarg, <32 x i1>, <32 x i8>) 144