|
Revision tags: llvmorg-18.1.8, llvmorg-18.1.7, llvmorg-18.1.6, llvmorg-18.1.5, llvmorg-18.1.4, llvmorg-18.1.3, llvmorg-18.1.2, llvmorg-18.1.1, llvmorg-18.1.0, llvmorg-18.1.0-rc4, llvmorg-18.1.0-rc3, llvmorg-18.1.0-rc2, llvmorg-18.1.0-rc1, llvmorg-19-init, llvmorg-17.0.6, llvmorg-17.0.5, llvmorg-17.0.4, llvmorg-17.0.3, llvmorg-17.0.2, llvmorg-17.0.1, llvmorg-17.0.0, llvmorg-17.0.0-rc4, llvmorg-17.0.0-rc3, llvmorg-17.0.0-rc2, llvmorg-17.0.0-rc1, llvmorg-18-init, llvmorg-16.0.6, llvmorg-16.0.5, llvmorg-16.0.4, llvmorg-16.0.3, llvmorg-16.0.2 |
|
| #
8bba57b1 |
| 13-Apr-2023 |
Craig Topper <craig.topper@sifive.com> |
[LoopIdiomRecognize] Remove NUW flag from SCEV in getTripCount.
Based on the conversation in D147355.
Reviewed By: nikic
Differential Revision: https://reviews.llvm.org/D148170
|
|
Revision tags: llvmorg-16.0.1, llvmorg-16.0.0, llvmorg-16.0.0-rc4, llvmorg-16.0.0-rc3, llvmorg-16.0.0-rc2, llvmorg-16.0.0-rc1, llvmorg-17-init, llvmorg-15.0.7 |
|
| #
7a752e81 |
| 06-Jan-2023 |
Nikita Popov <npopov@redhat.com> |
[LoopIdiom] Convert tests to opaque pointers (NFC)
The differences here are due to SCEVExpander producing GEPs with explicit offset calculation, a known difference with opaque pointers.
|
| #
89f1876b |
| 06-Jan-2023 |
Nikita Popov <npopov@redhat.com> |
[LoopIdiom] Name instructions in test (NFC)
|
| #
48c6b272 |
| 07-Dec-2022 |
Roman Lebedev <lebedev.ri@gmail.com> |
[NFC] Port all LoopIdiom tests to `-passes=` syntax
|
|
Revision tags: llvmorg-15.0.6, llvmorg-15.0.5, llvmorg-15.0.4, llvmorg-15.0.3, working, llvmorg-15.0.2, llvmorg-15.0.1, llvmorg-15.0.0, llvmorg-15.0.0-rc3, llvmorg-15.0.0-rc2, llvmorg-15.0.0-rc1, llvmorg-16-init, llvmorg-14.0.6, llvmorg-14.0.5, llvmorg-14.0.4, llvmorg-14.0.3, llvmorg-14.0.2, llvmorg-14.0.1, llvmorg-14.0.0, llvmorg-14.0.0-rc4, llvmorg-14.0.0-rc3, llvmorg-14.0.0-rc2, llvmorg-14.0.0-rc1, llvmorg-15-init, llvmorg-13.0.1, llvmorg-13.0.1-rc3, llvmorg-13.0.1-rc2, llvmorg-13.0.1-rc1, llvmorg-13.0.0, llvmorg-13.0.0-rc4, llvmorg-13.0.0-rc3, llvmorg-13.0.0-rc2, llvmorg-13.0.0-rc1, llvmorg-14-init, llvmorg-12.0.1, llvmorg-12.0.1-rc4, llvmorg-12.0.1-rc3, llvmorg-12.0.1-rc2, llvmorg-12.0.1-rc1, llvmorg-12.0.0, llvmorg-12.0.0-rc5, llvmorg-12.0.0-rc4, llvmorg-12.0.0-rc3, llvmorg-12.0.0-rc2 |
|
| #
da1cdffb |
| 09-Feb-2021 |
Han Zhu <zhuhan@fb.com> |
[loop-idiom] Hoist loop memcpys to loop preheader
For a simple loop like: ``` struct S { int x; int y; char b; };
unsigned foo(S* __restrict__ a, S* b, int n) { for (int i = 0; i < n; i++)
[loop-idiom] Hoist loop memcpys to loop preheader
For a simple loop like: ``` struct S { int x; int y; char b; };
unsigned foo(S* __restrict__ a, S* b, int n) { for (int i = 0; i < n; i++) a[i] = b[i];
return sizeof(a[0]); } ``` We could eliminate the loop and convert it to a large memcpy of 12*n bytes. Currently this is not handled. Output of `opt -loop-idiom -S < memcpy_before.ll` ``` %struct.S = type { i32, i32, i8 }
define dso_local i32 @_Z3fooP1SS0_i(%struct.S* noalias nocapture %a, %struct.S* nocapture readonly %b, i32 %n) local_unnamed_addr { entry: %cmp7 = icmp sgt i32 %n, 0 br i1 %cmp7, label %for.body.preheader, label %for.cond.cleanup
for.body.preheader: ; preds = %entry br label %for.body
for.cond.cleanup.loopexit: ; preds = %for.body br label %for.cond.cleanup
for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry ret i32 12
for.body: ; preds = %for.body, %for.body.preheader %i.08 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] %idxprom = zext i32 %i.08 to i64 %arrayidx = getelementptr inbounds %struct.S, %struct.S* %b, i64 %idxprom %arrayidx2 = getelementptr inbounds %struct.S, %struct.S* %a, i64 %idxprom %0 = bitcast %struct.S* %arrayidx2 to i8* %1 = bitcast %struct.S* %arrayidx to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 dereferenceable(12) %0, i8* nonnull align 4 dereferenceable(12) %1, i64 12, i1 false) %inc = add nuw nsw i32 %i.08, 1 %cmp = icmp slt i32 %inc, %n br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit }
; Function Attrs: argmemonly nofree nosync nounwind willreturn declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #0
attributes #0 = { argmemonly nofree nosync nounwind willreturn }
``` The loop idiom pass currently only handles load and store instructions. Since struct S is too big to fit in a register, the loop body contains a memcpy intrinsic.
With this change, re-run `opt -loop-idiom -S < memcpy_before.ll`. The loop memcpy is promoted to loop preheader. For this trivial case, the loop is dead and will be removed by another pass. ``` %struct.S = type { i32, i32, i8 }
define dso_local i32 @_Z3fooP1SS0_i(%struct.S* noalias nocapture %a, %struct.S* nocapture readonly %b, i32 %n) local_unnamed_addr { entry: %a1 = bitcast %struct.S* %a to i8* %b2 = bitcast %struct.S* %b to i8* %cmp7 = icmp sgt i32 %n, 0 br i1 %cmp7, label %for.body.preheader, label %for.cond.cleanup
for.body.preheader: ; preds = %entry %0 = zext i32 %n to i64 %1 = mul nuw nsw i64 %0, 12 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %a1, i8* align 4 %b2, i64 %1, i1 false) br label %for.body
for.cond.cleanup.loopexit: ; preds = %for.body br label %for.cond.cleanup
for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry ret i32 12
for.body: ; preds = %for.body, %for.body.preheader %i.08 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] %idxprom = zext i32 %i.08 to i64 %arrayidx = getelementptr inbounds %struct.S, %struct.S* %b, i64 %idxprom %arrayidx2 = getelementptr inbounds %struct.S, %struct.S* %a, i64 %idxprom %2 = bitcast %struct.S* %arrayidx2 to i8* %3 = bitcast %struct.S* %arrayidx to i8* %inc = add nuw nsw i32 %i.08, 1 %cmp = icmp slt i32 %inc, %n br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit }
; Function Attrs: argmemonly nofree nosync nounwind willreturn declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #0
attributes #0 = { argmemonly nofree nosync nounwind willreturn } ```
Reviewed By: zino
Differential Revision: https://reviews.llvm.org/D97667
show more ...
|
| #
75d6b8bb |
| 09-Feb-2021 |
Han Zhu <zhuhan@fb.com> |
[loop-idiom] Hoist loop memcpys to loop preheader
For a simple loop like: ``` struct S { int x; int y; char b; };
unsigned foo(S* __restrict__ a, S* b, int n) { for (int i = 0; i < n; i++)
[loop-idiom] Hoist loop memcpys to loop preheader
For a simple loop like: ``` struct S { int x; int y; char b; };
unsigned foo(S* __restrict__ a, S* b, int n) { for (int i = 0; i < n; i++) a[i] = b[i];
return sizeof(a[0]); } ``` We could eliminate the loop and convert it to a large memcpy of 12*n bytes. Currently this is not handled. Output of `opt -loop-idiom -S < memcpy_before.ll` ``` %struct.S = type { i32, i32, i8 }
define dso_local i32 @_Z3fooP1SS0_i(%struct.S* noalias nocapture %a, %struct.S* nocapture readonly %b, i32 %n) local_unnamed_addr { entry: %cmp7 = icmp sgt i32 %n, 0 br i1 %cmp7, label %for.body.preheader, label %for.cond.cleanup
for.body.preheader: ; preds = %entry br label %for.body
for.cond.cleanup.loopexit: ; preds = %for.body br label %for.cond.cleanup
for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry ret i32 12
for.body: ; preds = %for.body, %for.body.preheader %i.08 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] %idxprom = zext i32 %i.08 to i64 %arrayidx = getelementptr inbounds %struct.S, %struct.S* %b, i64 %idxprom %arrayidx2 = getelementptr inbounds %struct.S, %struct.S* %a, i64 %idxprom %0 = bitcast %struct.S* %arrayidx2 to i8* %1 = bitcast %struct.S* %arrayidx to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 dereferenceable(12) %0, i8* nonnull align 4 dereferenceable(12) %1, i64 12, i1 false) %inc = add nuw nsw i32 %i.08, 1 %cmp = icmp slt i32 %inc, %n br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit }
; Function Attrs: argmemonly nofree nosync nounwind willreturn declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #0
attributes #0 = { argmemonly nofree nosync nounwind willreturn }
``` The loop idiom pass currently only handles load and store instructions. Since struct S is too big to fit in a register, the loop body contains a memcpy intrinsic.
With this change, re-run `opt -loop-idiom -S < memcpy_before.ll`. The loop memcpy is promoted to loop preheader. For this trivial case, the loop is dead and will be removed by another pass. ``` %struct.S = type { i32, i32, i8 }
define dso_local i32 @_Z3fooP1SS0_i(%struct.S* noalias nocapture %a, %struct.S* nocapture readonly %b, i32 %n) local_unnamed_addr { entry: %a1 = bitcast %struct.S* %a to i8* %b2 = bitcast %struct.S* %b to i8* %cmp7 = icmp sgt i32 %n, 0 br i1 %cmp7, label %for.body.preheader, label %for.cond.cleanup
for.body.preheader: ; preds = %entry %0 = zext i32 %n to i64 %1 = mul nuw nsw i64 %0, 12 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %a1, i8* align 4 %b2, i64 %1, i1 false) br label %for.body
for.cond.cleanup.loopexit: ; preds = %for.body br label %for.cond.cleanup
for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry ret i32 12
for.body: ; preds = %for.body, %for.body.preheader %i.08 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] %idxprom = zext i32 %i.08 to i64 %arrayidx = getelementptr inbounds %struct.S, %struct.S* %b, i64 %idxprom %arrayidx2 = getelementptr inbounds %struct.S, %struct.S* %a, i64 %idxprom %2 = bitcast %struct.S* %arrayidx2 to i8* %3 = bitcast %struct.S* %arrayidx to i8* %inc = add nuw nsw i32 %i.08, 1 %cmp = icmp slt i32 %inc, %n br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit }
; Function Attrs: argmemonly nofree nosync nounwind willreturn declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #0
attributes #0 = { argmemonly nofree nosync nounwind willreturn } ```
Reviewed By: zino
Differential Revision: https://reviews.llvm.org/D97667
show more ...
|
| #
92ddd3c1 |
| 09-Feb-2021 |
Han Zhu <zhuhan@fb.com> |
[loop-idiom] Hoist loop memcpys to loop preheader
For a simple loop like: ``` struct S { int x; int y; char b; };
unsigned foo(S* __restrict__ a, S* b, int n) { for (int i = 0; i < n; i++)
[loop-idiom] Hoist loop memcpys to loop preheader
For a simple loop like: ``` struct S { int x; int y; char b; };
unsigned foo(S* __restrict__ a, S* b, int n) { for (int i = 0; i < n; i++) a[i] = b[i];
return sizeof(a[0]); } ``` We could eliminate the loop and convert it to a large memcpy of 12*n bytes. Currently this is not handled. Output of `opt -loop-idiom -S < memcpy_before.ll` ``` %struct.S = type { i32, i32, i8 }
define dso_local i32 @_Z3fooP1SS0_i(%struct.S* noalias nocapture %a, %struct.S* nocapture readonly %b, i32 %n) local_unnamed_addr { entry: %cmp7 = icmp sgt i32 %n, 0 br i1 %cmp7, label %for.body.preheader, label %for.cond.cleanup
for.body.preheader: ; preds = %entry br label %for.body
for.cond.cleanup.loopexit: ; preds = %for.body br label %for.cond.cleanup
for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry ret i32 12
for.body: ; preds = %for.body, %for.body.preheader %i.08 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] %idxprom = zext i32 %i.08 to i64 %arrayidx = getelementptr inbounds %struct.S, %struct.S* %b, i64 %idxprom %arrayidx2 = getelementptr inbounds %struct.S, %struct.S* %a, i64 %idxprom %0 = bitcast %struct.S* %arrayidx2 to i8* %1 = bitcast %struct.S* %arrayidx to i8* call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 dereferenceable(12) %0, i8* nonnull align 4 dereferenceable(12) %1, i64 12, i1 false) %inc = add nuw nsw i32 %i.08, 1 %cmp = icmp slt i32 %inc, %n br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit }
; Function Attrs: argmemonly nofree nosync nounwind willreturn declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #0
attributes #0 = { argmemonly nofree nosync nounwind willreturn }
``` The loop idiom pass currently only handles load and store instructions. Since struct S is too big to fit in a register, the loop body contains a memcpy intrinsic.
With this change, re-run `opt -loop-idiom -S < memcpy_before.ll`. The loop memcpy is promoted to loop preheader. For this trivial case, the loop is dead and will be removed by another pass. ``` %struct.S = type { i32, i32, i8 }
define dso_local i32 @_Z3fooP1SS0_i(%struct.S* noalias nocapture %a, %struct.S* nocapture readonly %b, i32 %n) local_unnamed_addr { entry: %a1 = bitcast %struct.S* %a to i8* %b2 = bitcast %struct.S* %b to i8* %cmp7 = icmp sgt i32 %n, 0 br i1 %cmp7, label %for.body.preheader, label %for.cond.cleanup
for.body.preheader: ; preds = %entry %0 = zext i32 %n to i64 %1 = mul nuw nsw i64 %0, 12 call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %a1, i8* align 4 %b2, i64 %1, i1 false) br label %for.body
for.cond.cleanup.loopexit: ; preds = %for.body br label %for.cond.cleanup
for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit, %entry ret i32 12
for.body: ; preds = %for.body, %for.body.preheader %i.08 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ] %idxprom = zext i32 %i.08 to i64 %arrayidx = getelementptr inbounds %struct.S, %struct.S* %b, i64 %idxprom %arrayidx2 = getelementptr inbounds %struct.S, %struct.S* %a, i64 %idxprom %2 = bitcast %struct.S* %arrayidx2 to i8* %3 = bitcast %struct.S* %arrayidx to i8* %inc = add nuw nsw i32 %i.08, 1 %cmp = icmp slt i32 %inc, %n br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit }
; Function Attrs: argmemonly nofree nosync nounwind willreturn declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #0
attributes #0 = { argmemonly nofree nosync nounwind willreturn } ```
Reviewed By: zino
Differential Revision: https://reviews.llvm.org/D97667
show more ...
|
| #
deb50958 |
| 09-Feb-2021 |
Han Zhu <zhuhan@fb.com> |
[loop-idiom] Hoist loop memcpys to loop preheader
Summary:
Test Plan:
Reviewers:
Subscribers:
Tasks:
Tags:
Blame Revision:
Differential Revision: https://phabricator.intern.facebook.com/D2638
[loop-idiom] Hoist loop memcpys to loop preheader
Summary:
Test Plan:
Reviewers:
Subscribers:
Tasks:
Tags:
Blame Revision:
Differential Revision: https://phabricator.intern.facebook.com/D26380397
show more ...
|