xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/X86/revectorized_rdx_crash.ll (revision 0e11e194167ff4e4959f0b908b9de5d3f5f801f5)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -mtriple=x86_64-unknown -passes=slp-vectorizer -S | FileCheck %s
3
4; REQUIRES: asserts
5
6; SLP crashed when tried to delete instruction with uses.
7; It tried to match reduction subsequently on %i23, then %i22 etc
8; When it reached %i18 it was still failing to match reduction but
9; succeeded with its operands pair: %i17, %i11.
10; Then it popped instruction %i17 from stack to make next attempt on
11; matching reduction but the instruction was actually erased on prior
12; iteration (it was matched and vectorized, which added a use of a deleted
13; instruction)
14
15define void @test(i1 %arg, ptr %p) {
16; CHECK-LABEL: @test(
17; CHECK-NEXT:  entry:
18; CHECK-NEXT:    br i1 %arg, label [[IF_END:%.*]], label [[FOR_COND_PREHEADER:%.*]]
19; CHECK:       for.cond.preheader:
20; CHECK-NEXT:    [[I:%.*]] = getelementptr inbounds [100 x i32], ptr %p, i64 0, i64 2
21; CHECK-NEXT:    [[I1:%.*]] = getelementptr inbounds [100 x i32], ptr %p, i64 0, i64 3
22; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i32>, ptr [[I]], align 8
23; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP0]])
24; CHECK-NEXT:    [[OP_RDX3:%.*]] = add i32 [[TMP1]], 0
25; CHECK-NEXT:    [[TMP2:%.*]] = load <4 x i32>, ptr [[I1]], align 4
26; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP2]])
27; CHECK-NEXT:    [[OP_RDX2:%.*]] = add i32 [[TMP3]], 0
28; CHECK-NEXT:    [[TMP4:%.*]] = mul i32 [[OP_RDX3]], 2
29; CHECK-NEXT:    [[OP_RDX:%.*]] = add i32 0, [[TMP4]]
30; CHECK-NEXT:    [[TMP5:%.*]] = mul i32 [[OP_RDX2]], 2
31; CHECK-NEXT:    [[OP_RDX1:%.*]] = add i32 [[OP_RDX]], [[TMP5]]
32; CHECK-NEXT:    br label [[IF_END]]
33; CHECK:       if.end:
34; CHECK-NEXT:    [[R:%.*]] = phi i32 [ [[OP_RDX1]], [[FOR_COND_PREHEADER]] ], [ 0, [[ENTRY:%.*]] ]
35; CHECK-NEXT:    ret void
36;
37entry:
38  br i1 %arg, label %if.end, label %for.cond.preheader
39
40for.cond.preheader:                               ; preds = %entry
41  %i = getelementptr inbounds [100 x i32], ptr %p, i64 0, i64 2
42  %i1 = getelementptr inbounds [100 x i32], ptr %p, i64 0, i64 3
43  %i2 = getelementptr inbounds [100 x i32], ptr %p, i64 0, i64 4
44  %i3 = getelementptr inbounds [100 x i32], ptr %p, i64 0, i64 5
45  %i4 = getelementptr inbounds [100 x i32], ptr %p, i64 0, i64 6
46  %ld0 = load i32, ptr %i, align 8
47  %ld1 = load i32, ptr %i1, align 4
48  %ld2 = load i32, ptr %i2, align 16
49  %ld3 = load i32, ptr %i3, align 4
50  %i5 = add i32 0, 0
51  %i6 = add i32 %i5, %ld3
52  %i7 = add i32 %i6, %ld2
53  %i8 = add i32 %i7, %ld1
54  %i9 = add i32 %i8, %ld0
55  %i10 = add i32 %i9, 0
56  %i11 = add i32 %i9, %i10
57  %ld4 = load i32, ptr %i1, align 4
58  %ld5 = load i32, ptr %i2, align 16
59  %ld6 = load i32, ptr %i3, align 4
60  %ld7 = load i32, ptr %i4, align 8
61  %i12 = add i32 0, 0
62  %i13 = add i32 %i12, %ld7
63  %i14 = add i32 %i13, %ld6
64  %i15 = add i32 %i14, %ld5
65  %i16 = add i32 %i15, %ld4
66  %i17 = add i32 %i16, 0
67  %i18 = add i32 %i17, %i11
68  %i19 = add i32 %i17, %i18
69  %i20 = add i32 0, %i19
70  %i21 = add i32 0, %i20
71  %i22 = add i32 0, %i21
72  %i23 = add i32 0, %i22
73  br label %if.end
74
75if.end:                                           ; preds = %for.cond.preheader, %entry
76  %r = phi i32 [ %i23, %for.cond.preheader ], [ 0, %entry ]
77  ret void
78}
79