xref: /llvm-project/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-strided-load-store-negative.ll (revision 38fffa630ee80163dc65e759392ad29798905679)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt %s -S -riscv-gather-scatter-lowering -mtriple=riscv64 -mattr=+m,+v,+zvl256b | FileCheck %s
3
4; This contains negative tests for the strided load/store recognition in
5; RISCVGatherScatterLowering.cpp
6
7; Negative test for treating OR as ADD.
8define void @gather_bad_or(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) {
9; CHECK-LABEL: @gather_bad_or(
10; CHECK-NEXT:  entry:
11; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
12; CHECK:       vector.body:
13; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
14; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <32 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15, i64 16, i64 17, i64 18, i64 19, i64 20, i64 21, i64 22, i64 23, i64 24, i64 25, i64 26, i64 27, i64 28, i64 29, i64 30, i64 31>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
15; CHECK-NEXT:    [[I:%.*]] = mul nuw nsw <32 x i64> [[VEC_IND]], splat (i64 5)
16; CHECK-NEXT:    [[OR:%.*]] = or <32 x i64> [[I]], splat (i64 1)
17; CHECK-NEXT:    [[I1:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], <32 x i64> [[OR]]
18; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> [[I1]], i32 1, <32 x i1> splat (i1 true), <32 x i8> undef)
19; CHECK-NEXT:    [[I2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]]
20; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I2]], align 1
21; CHECK-NEXT:    [[I4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]]
22; CHECK-NEXT:    store <32 x i8> [[I4]], ptr [[I2]], align 1
23; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
24; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <32 x i64> [[VEC_IND]], splat (i64 32)
25; CHECK-NEXT:    [[I6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
26; CHECK-NEXT:    br i1 [[I6]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
27; CHECK:       for.cond.cleanup:
28; CHECK-NEXT:    ret void
29;
30entry:
31  br label %vector.body
32
33vector.body:                                      ; preds = %vector.body, %entry
34  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
35  %vec.ind = phi <32 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15, i64 16, i64 17, i64 18, i64 19, i64 20, i64 21, i64 22, i64 23, i64 24, i64 25, i64 26, i64 27, i64 28, i64 29, i64 30, i64 31>, %entry ], [ %vec.ind.next, %vector.body ]
36  %i = mul nuw nsw <32 x i64> %vec.ind, <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>
37  %or = or <32 x i64> %i, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
38  %i1 = getelementptr inbounds i8, ptr %B, <32 x i64> %or
39  %wide.masked.gather = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> %i1, i32 1, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i8> undef)
40  %i2 = getelementptr inbounds i8, ptr %A, i64 %index
41  %wide.load = load <32 x i8>, ptr %i2, align 1
42  %i4 = add <32 x i8> %wide.load, %wide.masked.gather
43  store <32 x i8> %i4, ptr %i2, align 1
44  %index.next = add nuw i64 %index, 32
45  %vec.ind.next = add <32 x i64> %vec.ind, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
46  %i6 = icmp eq i64 %index.next, 1024
47  br i1 %i6, label %for.cond.cleanup, label %vector.body
48
49for.cond.cleanup:                                 ; preds = %vector.body
50  ret void
51}
52
53; Don't transform since we might not handle wrap correctly with narrow indices.
54define void @gather_narrow_index(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) {
55; CHECK-LABEL: @gather_narrow_index(
56; CHECK-NEXT:  entry:
57; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
58; CHECK:       vector.body:
59; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
60; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <32 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
61; CHECK-NEXT:    [[TMP0:%.*]] = mul nuw nsw <32 x i32> [[VEC_IND]], splat (i32 5)
62; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], <32 x i32> [[TMP0]]
63; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> [[TMP1]], i32 1, <32 x i1> splat (i1 true), <32 x i8> undef)
64; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]]
65; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[TMP2]], align 1
66; CHECK-NEXT:    [[TMP4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]]
67; CHECK-NEXT:    store <32 x i8> [[TMP4]], ptr [[TMP2]], align 1
68; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
69; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <32 x i32> [[VEC_IND]], splat (i32 32)
70; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
71; CHECK-NEXT:    br i1 [[TMP6]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
72; CHECK:       for.cond.cleanup:
73; CHECK-NEXT:    ret void
74;
75entry:
76  br label %vector.body
77
78vector.body:                                      ; preds = %vector.body, %entry
79  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
80  %vec.ind = phi <32 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>, %entry ], [ %vec.ind.next, %vector.body ]
81  %0 = mul nuw nsw <32 x i32> %vec.ind, <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5>
82  %1 = getelementptr inbounds i8, ptr %B, <32 x i32> %0
83  %wide.masked.gather = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> %1, i32 1, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i8> undef)
84  %2 = getelementptr inbounds i8, ptr %A, i64 %index
85  %wide.load = load <32 x i8>, ptr %2, align 1
86  %3 = add <32 x i8> %wide.load, %wide.masked.gather
87  store <32 x i8> %3, ptr %2, align 1
88  %index.next = add nuw i64 %index, 32
89  %vec.ind.next = add <32 x i32> %vec.ind, <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32>
90  %4 = icmp eq i64 %index.next, 1024
91  br i1 %4, label %for.cond.cleanup, label %vector.body
92
93for.cond.cleanup:                                 ; preds = %vector.body
94  ret void
95}
96
97; The last element of the start value of the phi has the wrong stride.
98define void @gather_broken_stride(ptr noalias nocapture %A, ptr noalias nocapture readonly %B) {
99; CHECK-LABEL: @gather_broken_stride(
100; CHECK-NEXT:  entry:
101; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
102; CHECK:       vector.body:
103; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
104; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <32 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15, i64 16, i64 17, i64 18, i64 19, i64 20, i64 21, i64 22, i64 23, i64 24, i64 25, i64 26, i64 27, i64 28, i64 29, i64 30, i64 32>, [[ENTRY]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
105; CHECK-NEXT:    [[I:%.*]] = mul nuw nsw <32 x i64> [[VEC_IND]], splat (i64 5)
106; CHECK-NEXT:    [[OR:%.*]] = or <32 x i64> [[I]], splat (i64 1)
107; CHECK-NEXT:    [[I1:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], <32 x i64> [[OR]]
108; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> [[I1]], i32 1, <32 x i1> splat (i1 true), <32 x i8> undef)
109; CHECK-NEXT:    [[I2:%.*]] = getelementptr inbounds i8, ptr [[A:%.*]], i64 [[INDEX]]
110; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[I2]], align 1
111; CHECK-NEXT:    [[I4:%.*]] = add <32 x i8> [[WIDE_LOAD]], [[WIDE_MASKED_GATHER]]
112; CHECK-NEXT:    store <32 x i8> [[I4]], ptr [[I2]], align 1
113; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
114; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <32 x i64> [[VEC_IND]], splat (i64 32)
115; CHECK-NEXT:    [[I6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
116; CHECK-NEXT:    br i1 [[I6]], label [[FOR_COND_CLEANUP:%.*]], label [[VECTOR_BODY]]
117; CHECK:       for.cond.cleanup:
118; CHECK-NEXT:    ret void
119;
120entry:
121  br label %vector.body
122
123vector.body:                                      ; preds = %vector.body, %entry
124  %index = phi i64 [ 0, %entry ], [ %index.next, %vector.body ]
125  %vec.ind = phi <32 x i64> [ <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15, i64 16, i64 17, i64 18, i64 19, i64 20, i64 21, i64 22, i64 23, i64 24, i64 25, i64 26, i64 27, i64 28, i64 29, i64 30, i64 32>, %entry ], [ %vec.ind.next, %vector.body ]
126  %i = mul nuw nsw <32 x i64> %vec.ind, <i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5, i64 5>
127  %or = or <32 x i64> %i, <i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1, i64 1>
128  %i1 = getelementptr inbounds i8, ptr %B, <32 x i64> %or
129  %wide.masked.gather = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> %i1, i32 1, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i8> undef)
130  %i2 = getelementptr inbounds i8, ptr %A, i64 %index
131  %wide.load = load <32 x i8>, ptr %i2, align 1
132  %i4 = add <32 x i8> %wide.load, %wide.masked.gather
133  store <32 x i8> %i4, ptr %i2, align 1
134  %index.next = add nuw i64 %index, 32
135  %vec.ind.next = add <32 x i64> %vec.ind, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
136  %i6 = icmp eq i64 %index.next, 1024
137  br i1 %i6, label %for.cond.cleanup, label %vector.body
138
139for.cond.cleanup:                                 ; preds = %vector.body
140  ret void
141}
142
143declare <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr>, i32 immarg, <32 x i1>, <32 x i8>)
144