xref: /llvm-project/llvm/test/CodeGen/ARM/ParallelDSP/inner-full-unroll.ll (revision 701890164d567866900f3087ffd2ad4da963111c)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -mtriple=thumbv7em -arm-parallel-dsp -dce -S %s -o - | FileCheck %s
3
4define void @full_unroll(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, ptr noalias nocapture readonly %c, i32 %N) {
5; CHECK-LABEL: @full_unroll(
6; CHECK-NEXT:  entry:
7; CHECK-NEXT:    [[CMP29:%.*]] = icmp eq i32 [[N:%.*]], 0
8; CHECK-NEXT:    br i1 [[CMP29]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY:%.*]]
9; CHECK:       for.cond.cleanup:
10; CHECK-NEXT:    ret void
11; CHECK:       for.body:
12; CHECK-NEXT:    [[I_030:%.*]] = phi i32 [ [[INC12:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
13; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[I_030]]
14; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds ptr, ptr [[B:%.*]], i32 [[I_030]]
15; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX5]], align 4
16; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[C:%.*]], i32 [[I_030]]
17; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 4
18; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 2
19; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[TMP1]], align 2
20; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.arm.smlad(i32 [[TMP5]], i32 [[TMP3]], i32 0)
21; CHECK-NEXT:    [[ARRAYIDX6_2:%.*]] = getelementptr inbounds i16, ptr [[TMP0]], i32 2
22; CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX6_2]], align 2
23; CHECK-NEXT:    [[ARRAYIDX8_2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 2
24; CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX8_2]], align 2
25; CHECK-NEXT:    [[TMP11:%.*]] = call i32 @llvm.arm.smlad(i32 [[TMP10]], i32 [[TMP8]], i32 [[TMP6]])
26; CHECK-NEXT:    store i32 [[TMP11]], ptr [[ARRAYIDX]], align 4
27; CHECK-NEXT:    [[INC12]] = add nuw i32 [[I_030]], 1
28; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC12]], [[N]]
29; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]]
30;
31entry:
32  %cmp29 = icmp eq i32 %N, 0
33  br i1 %cmp29, label %for.cond.cleanup, label %for.body
34
35for.cond.cleanup:                                 ; preds = %for.body, %entry
36  ret void
37
38for.body:                                         ; preds = %entry, %for.body
39  %i.030 = phi i32 [ %inc12, %for.body ], [ 0, %entry ]
40  %arrayidx = getelementptr inbounds i32, ptr %a, i32 %i.030
41  %arrayidx5 = getelementptr inbounds ptr, ptr %b, i32 %i.030
42  %0 = load ptr, ptr %arrayidx5, align 4
43  %arrayidx7 = getelementptr inbounds ptr, ptr %c, i32 %i.030
44  %1 = load ptr, ptr %arrayidx7, align 4
45  %2 = load i16, ptr %0, align 2
46  %conv = sext i16 %2 to i32
47  %3 = load i16, ptr %1, align 2
48  %conv9 = sext i16 %3 to i32
49  %mul = mul nsw i32 %conv9, %conv
50  %arrayidx6.1 = getelementptr inbounds i16, ptr %0, i32 1
51  %4 = load i16, ptr %arrayidx6.1, align 2
52  %conv.1 = sext i16 %4 to i32
53  %arrayidx8.1 = getelementptr inbounds i16, ptr %1, i32 1
54  %5 = load i16, ptr %arrayidx8.1, align 2
55  %conv9.1 = sext i16 %5 to i32
56  %mul.1 = mul nsw i32 %conv9.1, %conv.1
57  %add.1 = add nsw i32 %mul.1, %mul
58  %arrayidx6.2 = getelementptr inbounds i16, ptr %0, i32 2
59  %6 = load i16, ptr %arrayidx6.2, align 2
60  %conv.2 = sext i16 %6 to i32
61  %arrayidx8.2 = getelementptr inbounds i16, ptr %1, i32 2
62  %7 = load i16, ptr %arrayidx8.2, align 2
63  %conv9.2 = sext i16 %7 to i32
64  %mul.2 = mul nsw i32 %conv9.2, %conv.2
65  %add.2 = add nsw i32 %mul.2, %add.1
66  %arrayidx6.3 = getelementptr inbounds i16, ptr %0, i32 3
67  %8 = load i16, ptr %arrayidx6.3, align 2
68  %conv.3 = sext i16 %8 to i32
69  %arrayidx8.3 = getelementptr inbounds i16, ptr %1, i32 3
70  %9 = load i16, ptr %arrayidx8.3, align 2
71  %conv9.3 = sext i16 %9 to i32
72  %mul.3 = mul nsw i32 %conv9.3, %conv.3
73  %add.3 = add nsw i32 %mul.3, %add.2
74  store i32 %add.3, ptr %arrayidx, align 4
75  %inc12 = add nuw i32 %i.030, 1
76  %exitcond = icmp eq i32 %inc12, %N
77  br i1 %exitcond, label %for.cond.cleanup, label %for.body
78}
79
80define void @full_unroll_sub(ptr noalias nocapture %a, ptr noalias nocapture readonly %b, ptr noalias nocapture readonly %c, i32 %N) {
81; CHECK-LABEL: @full_unroll_sub(
82; CHECK-NEXT:  entry:
83; CHECK-NEXT:    [[CMP29:%.*]] = icmp eq i32 [[N:%.*]], 0
84; CHECK-NEXT:    br i1 [[CMP29]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY:%.*]]
85; CHECK:       for.cond.cleanup:
86; CHECK-NEXT:    ret void
87; CHECK:       for.body:
88; CHECK-NEXT:    [[I_030:%.*]] = phi i32 [ [[INC12:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY:%.*]] ]
89; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[I_030]]
90; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds ptr, ptr [[B:%.*]], i32 [[I_030]]
91; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[ARRAYIDX5]], align 4
92; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds ptr, ptr [[C:%.*]], i32 [[I_030]]
93; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[ARRAYIDX7]], align 4
94; CHECK-NEXT:    [[TMP2:%.*]] = load i16, ptr [[TMP0]], align 2
95; CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[TMP2]] to i32
96; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr [[TMP1]], align 2
97; CHECK-NEXT:    [[CONV9:%.*]] = sext i16 [[TMP3]] to i32
98; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 [[CONV9]], [[CONV]]
99; CHECK-NEXT:    [[ARRAYIDX6_1:%.*]] = getelementptr inbounds i16, ptr [[TMP0]], i32 1
100; CHECK-NEXT:    [[TMP4:%.*]] = load i16, ptr [[ARRAYIDX6_1]], align 2
101; CHECK-NEXT:    [[CONV_1:%.*]] = sext i16 [[TMP4]] to i32
102; CHECK-NEXT:    [[ARRAYIDX8_1:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 1
103; CHECK-NEXT:    [[TMP5:%.*]] = load i16, ptr [[ARRAYIDX8_1]], align 2
104; CHECK-NEXT:    [[CONV9_1:%.*]] = sext i16 [[TMP5]] to i32
105; CHECK-NEXT:    [[MUL_1:%.*]] = mul nsw i32 [[CONV9_1]], [[CONV_1]]
106; CHECK-NEXT:    [[ADD_1:%.*]] = add nsw i32 [[MUL_1]], [[SUB]]
107; CHECK-NEXT:    [[ARRAYIDX6_2:%.*]] = getelementptr inbounds i16, ptr [[TMP0]], i32 2
108; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX6_2]], align 2
109; CHECK-NEXT:    [[ARRAYIDX8_2:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 2
110; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX8_2]], align 2
111; CHECK-NEXT:    [[TMP10:%.*]] = call i32 @llvm.arm.smlad(i32 [[TMP9]], i32 [[TMP7]], i32 [[ADD_1]])
112; CHECK-NEXT:    store i32 [[TMP10]], ptr [[ARRAYIDX]], align 4
113; CHECK-NEXT:    [[INC12]] = add nuw i32 [[I_030]], 1
114; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC12]], [[N]]
115; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]]
116;
117entry:
118  %cmp29 = icmp eq i32 %N, 0
119  br i1 %cmp29, label %for.cond.cleanup, label %for.body
120
121for.cond.cleanup:                                 ; preds = %for.body, %entry
122  ret void
123
124for.body:                                         ; preds = %entry, %for.body
125  %i.030 = phi i32 [ %inc12, %for.body ], [ 0, %entry ]
126  %arrayidx = getelementptr inbounds i32, ptr %a, i32 %i.030
127  %arrayidx5 = getelementptr inbounds ptr, ptr %b, i32 %i.030
128  %0 = load ptr, ptr %arrayidx5, align 4
129  %arrayidx7 = getelementptr inbounds ptr, ptr %c, i32 %i.030
130  %1 = load ptr, ptr %arrayidx7, align 4
131  %2 = load i16, ptr %0, align 2
132  %conv = sext i16 %2 to i32
133  %3 = load i16, ptr %1, align 2
134  %conv9 = sext i16 %3 to i32
135  %sub = sub nsw i32 %conv9, %conv
136  %arrayidx6.1 = getelementptr inbounds i16, ptr %0, i32 1
137  %4 = load i16, ptr %arrayidx6.1, align 2
138  %conv.1 = sext i16 %4 to i32
139  %arrayidx8.1 = getelementptr inbounds i16, ptr %1, i32 1
140  %5 = load i16, ptr %arrayidx8.1, align 2
141  %conv9.1 = sext i16 %5 to i32
142  %mul.1 = mul nsw i32 %conv9.1, %conv.1
143  %add.1 = add nsw i32 %mul.1, %sub
144  %arrayidx6.2 = getelementptr inbounds i16, ptr %0, i32 2
145  %6 = load i16, ptr %arrayidx6.2, align 2
146  %conv.2 = sext i16 %6 to i32
147  %arrayidx8.2 = getelementptr inbounds i16, ptr %1, i32 2
148  %7 = load i16, ptr %arrayidx8.2, align 2
149  %conv9.2 = sext i16 %7 to i32
150  %mul.2 = mul nsw i32 %conv9.2, %conv.2
151  %add.2 = add nsw i32 %mul.2, %add.1
152  %arrayidx6.3 = getelementptr inbounds i16, ptr %0, i32 3
153  %8 = load i16, ptr %arrayidx6.3, align 2
154  %conv.3 = sext i16 %8 to i32
155  %arrayidx8.3 = getelementptr inbounds i16, ptr %1, i32 3
156  %9 = load i16, ptr %arrayidx8.3, align 2
157  %conv9.3 = sext i16 %9 to i32
158  %mul.3 = mul nsw i32 %conv9.3, %conv.3
159  %add.3 = add nsw i32 %mul.3, %add.2
160  store i32 %add.3, ptr %arrayidx, align 4
161  %inc12 = add nuw i32 %i.030, 1
162  %exitcond = icmp eq i32 %inc12, %N
163  br i1 %exitcond, label %for.cond.cleanup, label %for.body
164}
165