xref: /llvm-project/llvm/test/Transforms/InstCombine/align-addr.ll (revision f226cabbb1b9737676536bc4417336bef4808992)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -passes=instcombine -S | FileCheck %s
3target datalayout = "E-p:64:64:64-p1:32:32:32-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
4
5; Instcombine should be able to prove vector alignment in the
6; presence of a few mild address computation tricks.
7
8define void @test0(ptr %b, i64 %n, i64 %u, i64 %y) nounwind  {
9; CHECK-LABEL: @test0(
10; CHECK-NEXT:  entry:
11; CHECK-NEXT:    [[C:%.*]] = ptrtoint ptr [[B:%.*]] to i64
12; CHECK-NEXT:    [[D:%.*]] = and i64 [[C]], -16
13; CHECK-NEXT:    [[E:%.*]] = inttoptr i64 [[D]] to ptr
14; CHECK-NEXT:    [[V:%.*]] = shl i64 [[U:%.*]], 1
15; CHECK-NEXT:    [[Z:%.*]] = and i64 [[Y:%.*]], -2
16; CHECK-NEXT:    [[T1421:%.*]] = icmp eq i64 [[N:%.*]], 0
17; CHECK-NEXT:    br i1 [[T1421]], label [[RETURN:%.*]], label [[BB:%.*]]
18; CHECK:       bb:
19; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[BB]] ], [ 20, [[ENTRY:%.*]] ]
20; CHECK-NEXT:    [[J:%.*]] = mul i64 [[I]], [[V]]
21; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr double, ptr [[E]], i64 [[J]]
22; CHECK-NEXT:    [[T8:%.*]] = getelementptr double, ptr [[TMP0]], i64 [[Z]]
23; CHECK-NEXT:    store <2 x double> zeroinitializer, ptr [[T8]], align 8
24; CHECK-NEXT:    [[INDVAR_NEXT]] = add i64 [[I]], 1
25; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVAR_NEXT]], [[N]]
26; CHECK-NEXT:    br i1 [[EXITCOND]], label [[RETURN]], label [[BB]]
27; CHECK:       return:
28; CHECK-NEXT:    ret void
29;
30entry:
31  %c = ptrtoint ptr %b to i64
32  %d = and i64 %c, -16
33  %e = inttoptr i64 %d to ptr
34  %v = mul i64 %u, 2
35  %z = and i64 %y, -2
36  %t1421 = icmp eq i64 %n, 0
37  br i1 %t1421, label %return, label %bb
38
39bb:
40  %i = phi i64 [ %indvar.next, %bb ], [ 20, %entry ]
41  %j = mul i64 %i, %v
42  %h = add i64 %j, %z
43  %t8 = getelementptr double, ptr %e, i64 %h
44  store <2 x double><double 0.0, double 0.0>, ptr %t8, align 8
45  %indvar.next = add i64 %i, 1
46  %exitcond = icmp eq i64 %indvar.next, %n
47  br i1 %exitcond, label %return, label %bb
48
49return:
50  ret void
51}
52
53; When we see a unaligned load from an insufficiently aligned global or
54; alloca, increase the alignment of the load, turning it into an aligned load.
55
56@GLOBAL = internal global [4 x i32] zeroinitializer
57
58define <16 x i8> @test1(<2 x i64> %x) {
59; CHECK-LABEL: @test1(
60; CHECK-NEXT:  entry:
61; CHECK-NEXT:    [[TMP:%.*]] = load <16 x i8>, ptr @GLOBAL, align 1
62; CHECK-NEXT:    ret <16 x i8> [[TMP]]
63;
64entry:
65  %tmp = load <16 x i8>, ptr @GLOBAL, align 1
66  ret <16 x i8> %tmp
67}
68
69@GLOBAL_as1 = internal addrspace(1) global [4 x i32] zeroinitializer
70
71define <16 x i8> @test1_as1(<2 x i64> %x) {
72; CHECK-LABEL: @test1_as1(
73; CHECK-NEXT:    [[TMP:%.*]] = load <16 x i8>, ptr addrspace(1) @GLOBAL_as1, align 1
74; CHECK-NEXT:    ret <16 x i8> [[TMP]]
75;
76  %tmp = load <16 x i8>, ptr addrspace(1) @GLOBAL_as1, align 1
77  ret <16 x i8> %tmp
78}
79
80@GLOBAL_as1_gep = internal addrspace(1) global [8 x i32] zeroinitializer
81
82define <16 x i8> @test1_as1_gep(<2 x i64> %x) {
83; CHECK-LABEL: @test1_as1_gep(
84; CHECK-NEXT:    [[TMP:%.*]] = load <16 x i8>, ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) @GLOBAL_as1_gep, i32 16), align 1
85; CHECK-NEXT:    ret <16 x i8> [[TMP]]
86;
87  %tmp = load <16 x i8>, ptr addrspace(1) getelementptr ([8 x i32], ptr addrspace(1) @GLOBAL_as1_gep, i16 0, i16 4), align 1
88  ret <16 x i8> %tmp
89}
90
91
92; When a load or store lacks an explicit alignment, add one.
93
94define double @test2(ptr %p, double %n) nounwind {
95; CHECK-LABEL: @test2(
96; CHECK-NEXT:    [[T:%.*]] = load double, ptr [[P:%.*]], align 8
97; CHECK-NEXT:    store double [[N:%.*]], ptr [[P]], align 8
98; CHECK-NEXT:    ret double [[T]]
99;
100  %t = load double, ptr %p
101  store double %n, ptr %p
102  ret double %t
103}
104
105declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) nounwind
106
107declare void @use(ptr)
108
109%struct.s = type { i32, i32, i32, i32 }
110
111define void @test3(ptr sret(%struct.s) %a4) {
112; Check that the alignment is bumped up the alignment of the sret type.
113; CHECK-LABEL: @test3(
114; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr noundef nonnull align 4 dereferenceable(16) [[A4:%.*]], i8 0, i64 16, i1 false)
115; CHECK-NEXT:    call void @use(ptr nonnull [[A4]])
116; CHECK-NEXT:    ret void
117;
118  call void @llvm.memset.p0.i64(ptr %a4, i8 0, i64 16, i1 false)
119  call void @use(ptr %a4)
120  ret void
121}
122
123declare ptr @llvm.ptrmask.p0.i64(ptr, i64)
124
125define <16 x i8> @ptrmask_align_unknown_ptr_align1(ptr align 1 %ptr, i64 %mask) {
126; CHECK-LABEL: @ptrmask_align_unknown_ptr_align1(
127; CHECK-NEXT:    [[ALIGNED:%.*]] = call ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 [[MASK:%.*]])
128; CHECK-NEXT:    [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1
129; CHECK-NEXT:    ret <16 x i8> [[LOAD]]
130;
131  %aligned = call ptr @llvm.ptrmask.p0.i64(ptr %ptr, i64 %mask)
132  %load = load <16 x i8>, ptr %aligned, align 1
133  ret <16 x i8> %load
134}
135
136define <16 x i8> @ptrmask_align_unknown_ptr_align8(ptr align 8 %ptr, i64 %mask) {
137; CHECK-LABEL: @ptrmask_align_unknown_ptr_align8(
138; CHECK-NEXT:    [[ALIGNED:%.*]] = call align 8 ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 [[MASK:%.*]])
139; CHECK-NEXT:    [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1
140; CHECK-NEXT:    ret <16 x i8> [[LOAD]]
141;
142  %aligned = call ptr @llvm.ptrmask.p0.i64(ptr %ptr, i64 %mask)
143  %load = load <16 x i8>, ptr %aligned, align 1
144  ret <16 x i8> %load
145}
146
147; Increase load align from 1 to 2
148define <16 x i8> @ptrmask_align2_ptr_align1(ptr align 1 %ptr) {
149; CHECK-LABEL: @ptrmask_align2_ptr_align1(
150; CHECK-NEXT:    [[ALIGNED:%.*]] = call align 2 ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -2)
151; CHECK-NEXT:    [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1
152; CHECK-NEXT:    ret <16 x i8> [[LOAD]]
153;
154  %aligned = call ptr @llvm.ptrmask.p0.i64(ptr %ptr, i64 -2)
155  %load = load <16 x i8>, ptr %aligned, align 1
156  ret <16 x i8> %load
157}
158
159; Increase load align from 1 to 4
160define <16 x i8> @ptrmask_align4_ptr_align1(ptr align 1 %ptr) {
161; CHECK-LABEL: @ptrmask_align4_ptr_align1(
162; CHECK-NEXT:    [[ALIGNED:%.*]] = call align 4 ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -4)
163; CHECK-NEXT:    [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1
164; CHECK-NEXT:    ret <16 x i8> [[LOAD]]
165;
166  %aligned = call ptr @llvm.ptrmask.p0.i64(ptr %ptr, i64 -4)
167  %load = load <16 x i8>, ptr %aligned, align 1
168  ret <16 x i8> %load
169}
170
171; Increase load align from 1 to 8
172define <16 x i8> @ptrmask_align8_ptr_align1(ptr align 1 %ptr) {
173; CHECK-LABEL: @ptrmask_align8_ptr_align1(
174; CHECK-NEXT:    [[ALIGNED:%.*]] = call align 8 ptr @llvm.ptrmask.p0.i64(ptr [[PTR:%.*]], i64 -8)
175; CHECK-NEXT:    [[LOAD:%.*]] = load <16 x i8>, ptr [[ALIGNED]], align 1
176; CHECK-NEXT:    ret <16 x i8> [[LOAD]]
177;
178  %aligned = call ptr @llvm.ptrmask.p0.i64(ptr %ptr, i64 -8)
179  %load = load <16 x i8>, ptr %aligned, align 1
180  ret <16 x i8> %load
181}
182
183; Underlying alignment already the same as forced alignment by ptrmask
184define <16 x i8> @ptrmask_align8_ptr_align8(ptr align 8 %ptr) {
185; CHECK-LABEL: @ptrmask_align8_ptr_align8(
186; CHECK-NEXT:    [[LOAD:%.*]] = load <16 x i8>, ptr [[PTR:%.*]], align 1
187; CHECK-NEXT:    ret <16 x i8> [[LOAD]]
188;
189  %aligned = call ptr @llvm.ptrmask.p0.i64(ptr %ptr, i64 -8)
190  %load = load <16 x i8>, ptr %aligned, align 1
191  ret <16 x i8> %load
192}
193
194; Underlying alignment greater than alignment forced by ptrmask
195define <16 x i8> @ptrmask_align8_ptr_align16(ptr align 16 %ptr) {
196; CHECK-LABEL: @ptrmask_align8_ptr_align16(
197; CHECK-NEXT:    [[LOAD:%.*]] = load <16 x i8>, ptr [[PTR:%.*]], align 1
198; CHECK-NEXT:    ret <16 x i8> [[LOAD]]
199;
200  %aligned = call ptr @llvm.ptrmask.p0.i64(ptr %ptr, i64 -8)
201  %load = load <16 x i8>, ptr %aligned, align 1
202  ret <16 x i8> %load
203}
204