xref: /llvm-project/llvm/test/Transforms/SLPVectorizer/AMDGPU/address-space-ptr-sze-gep-index-assert.ll (revision f0415f2a456d54daaa231c228d2c9f4ef2ce9b89)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -S -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -passes=slp-vectorizer -slp-threshold=-18 < %s | FileCheck %s
3
4; Make sure there's no SCEV assert when the indexes are for different
5; sized address spaces
6
7target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
8
9define void @slp_scev_assert(i32 %idx, i64 %tmp3) #0 {
10; CHECK-LABEL: @slp_scev_assert(
11; CHECK-NEXT:  bb:
12; CHECK-NEXT:    [[TMP:%.*]] = addrspacecast ptr addrspace(5) undef to ptr
13; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, ptr addrspace(5) undef, i32 [[IDX:%.*]]
14; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP]], i64 [[TMP3:%.*]]
15; CHECK-NEXT:    store i8 0, ptr addrspace(5) [[TMP2]], align 1
16; CHECK-NEXT:    store i8 0, ptr [[TMP4]], align 1
17; CHECK-NEXT:    ret void
18;
19bb:
20  %tmp = addrspacecast ptr addrspace(5) undef to ptr
21  %tmp2 = getelementptr inbounds i8, ptr addrspace(5) undef, i32 %idx
22  %tmp4 = getelementptr inbounds i8, ptr %tmp, i64 %tmp3
23  store i8 0, ptr addrspace(5) %tmp2
24  store i8 0, ptr %tmp4
25  ret void
26}
27
28define void @multi_as_reduction_different_sized(ptr addrspace(3) %lds, i32 %idx0, i64 %idx1) #0 {
29; CHECK-LABEL: @multi_as_reduction_different_sized(
30; CHECK-NEXT:  bb:
31; CHECK-NEXT:    [[FLAT:%.*]] = addrspacecast ptr addrspace(3) [[LDS:%.*]] to ptr
32; CHECK-NEXT:    [[ADD0:%.*]] = add i32 [[IDX0:%.*]], 2
33; CHECK-NEXT:    [[ADD1:%.*]] = add i64 [[IDX1:%.*]], 1
34; CHECK-NEXT:    [[LDS_1:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[LDS]], i32 [[ADD0]]
35; CHECK-NEXT:    [[FLAT_1:%.*]] = getelementptr inbounds i32, ptr [[FLAT]], i64 [[ADD1]]
36; CHECK-NEXT:    [[LOAD_LDS_0:%.*]] = load i32, ptr addrspace(3) [[LDS]], align 4
37; CHECK-NEXT:    [[LOAD_LDS_1:%.*]] = load i32, ptr addrspace(3) [[LDS_1]], align 4
38; CHECK-NEXT:    [[LOAD_FLAT_0:%.*]] = load i32, ptr [[FLAT]], align 4
39; CHECK-NEXT:    [[LOAD_FLAT_1:%.*]] = load i32, ptr [[FLAT_1]], align 4
40; CHECK-NEXT:    [[SUB0:%.*]] = sub i32 [[LOAD_FLAT_0]], [[LOAD_LDS_0]]
41; CHECK-NEXT:    [[SUB1:%.*]] = sub i32 [[LOAD_FLAT_1]], [[LOAD_LDS_1]]
42; CHECK-NEXT:    store i32 [[SUB0]], ptr undef, align 4
43; CHECK-NEXT:    store i32 [[SUB1]], ptr undef, align 4
44; CHECK-NEXT:    ret void
45;
46bb:
47  %flat = addrspacecast ptr addrspace(3) %lds to ptr
48  %add0 = add i32 %idx0, 2
49  %add1 = add i64 %idx1, 1
50
51  %lds.1 = getelementptr inbounds i32, ptr addrspace(3) %lds, i32 %add0
52  %flat.1 = getelementptr inbounds i32, ptr %flat, i64 %add1
53
54  %load.lds.0 = load i32, ptr addrspace(3) %lds, align 4
55  %load.lds.1 = load i32, ptr addrspace(3) %lds.1, align 4
56
57  %load.flat.0 = load i32, ptr %flat, align 4
58  %load.flat.1 = load i32, ptr %flat.1, align 4
59
60  %sub0 = sub i32 %load.flat.0, %load.lds.0
61  %sub1 = sub i32 %load.flat.1, %load.lds.1
62
63  store i32 %sub0, ptr undef
64  store i32 %sub1, ptr undef
65  ret void
66}
67
68; This should vectorize if using getUnderlyingObject
69define void @multi_as_reduction_same_size(ptr addrspace(1) %global, i64 %idx0, i64 %idx1) #0 {
70; CHECK-LABEL: @multi_as_reduction_same_size(
71; CHECK-NEXT:  bb:
72; CHECK-NEXT:    [[FLAT:%.*]] = addrspacecast ptr addrspace(1) [[GLOBAL:%.*]] to ptr
73; CHECK-NEXT:    [[ADD0:%.*]] = add i64 [[IDX0:%.*]], 2
74; CHECK-NEXT:    [[ADD1:%.*]] = add i64 [[IDX1:%.*]], 1
75; CHECK-NEXT:    [[GLOBAL_1:%.*]] = getelementptr inbounds i32, ptr addrspace(1) [[GLOBAL]], i64 [[ADD0]]
76; CHECK-NEXT:    [[FLAT_1:%.*]] = getelementptr inbounds i32, ptr [[FLAT]], i64 [[ADD1]]
77; CHECK-NEXT:    [[LOAD_GLOBAL_0:%.*]] = load i32, ptr addrspace(1) [[GLOBAL]], align 4
78; CHECK-NEXT:    [[LOAD_GLOBAL_1:%.*]] = load i32, ptr addrspace(1) [[GLOBAL_1]], align 4
79; CHECK-NEXT:    [[LOAD_FLAT_0:%.*]] = load i32, ptr [[FLAT]], align 4
80; CHECK-NEXT:    [[LOAD_FLAT_1:%.*]] = load i32, ptr [[FLAT_1]], align 4
81; CHECK-NEXT:    [[SUB0:%.*]] = sub i32 [[LOAD_FLAT_0]], [[LOAD_GLOBAL_0]]
82; CHECK-NEXT:    [[SUB1:%.*]] = sub i32 [[LOAD_FLAT_1]], [[LOAD_GLOBAL_1]]
83; CHECK-NEXT:    store i32 [[SUB0]], ptr undef, align 4
84; CHECK-NEXT:    store i32 [[SUB1]], ptr undef, align 4
85; CHECK-NEXT:    ret void
86;
87bb:
88  %flat = addrspacecast ptr addrspace(1) %global to ptr
89  %add0 = add i64 %idx0, 2
90  %add1 = add i64 %idx1, 1
91
92  %global.1 = getelementptr inbounds i32, ptr addrspace(1) %global, i64 %add0
93  %flat.1 = getelementptr inbounds i32, ptr %flat, i64 %add1
94
95  %load.global.0 = load i32, ptr addrspace(1) %global, align 4
96  %load.global.1 = load i32, ptr addrspace(1) %global.1, align 4
97
98  %load.flat.0 = load i32, ptr %flat, align 4
99  %load.flat.1 = load i32, ptr %flat.1, align 4
100
101  %sub0 = sub i32 %load.flat.0, %load.global.0
102  %sub1 = sub i32 %load.flat.1, %load.global.1
103
104  store i32 %sub0, ptr undef
105  store i32 %sub1, ptr undef
106  ret void
107}
108
109; This should vectorize if using getUnderlyingObject
110; The add is done in the same width, even though the address space size is smaller
111define void @multi_as_reduction_different_sized_noncanon(ptr addrspace(3) %lds, i64 %idx0, i64 %idx1) #0 {
112; CHECK-LABEL: @multi_as_reduction_different_sized_noncanon(
113; CHECK-NEXT:  bb:
114; CHECK-NEXT:    [[FLAT:%.*]] = addrspacecast ptr addrspace(3) [[LDS:%.*]] to ptr
115; CHECK-NEXT:    [[ADD0:%.*]] = add i64 [[IDX0:%.*]], 2
116; CHECK-NEXT:    [[ADD1:%.*]] = add i64 [[IDX1:%.*]], 1
117; CHECK-NEXT:    [[LDS_1:%.*]] = getelementptr inbounds i32, ptr addrspace(3) [[LDS]], i64 [[ADD0]]
118; CHECK-NEXT:    [[FLAT_1:%.*]] = getelementptr inbounds i32, ptr [[FLAT]], i64 [[ADD1]]
119; CHECK-NEXT:    [[LOAD_LDS_0:%.*]] = load i32, ptr addrspace(3) [[LDS]], align 4
120; CHECK-NEXT:    [[LOAD_LDS_1:%.*]] = load i32, ptr addrspace(3) [[LDS_1]], align 4
121; CHECK-NEXT:    [[LOAD_FLAT_0:%.*]] = load i32, ptr [[FLAT]], align 4
122; CHECK-NEXT:    [[LOAD_FLAT_1:%.*]] = load i32, ptr [[FLAT_1]], align 4
123; CHECK-NEXT:    [[SUB0:%.*]] = sub i32 [[LOAD_FLAT_0]], [[LOAD_LDS_0]]
124; CHECK-NEXT:    [[SUB1:%.*]] = sub i32 [[LOAD_FLAT_1]], [[LOAD_LDS_1]]
125; CHECK-NEXT:    store i32 [[SUB0]], ptr undef, align 4
126; CHECK-NEXT:    store i32 [[SUB1]], ptr undef, align 4
127; CHECK-NEXT:    ret void
128;
129bb:
130  %flat = addrspacecast ptr addrspace(3) %lds to ptr
131  %add0 = add i64 %idx0, 2
132  %add1 = add i64 %idx1, 1
133
134  %lds.1 = getelementptr inbounds i32, ptr addrspace(3) %lds, i64 %add0
135  %flat.1 = getelementptr inbounds i32, ptr %flat, i64 %add1
136
137  %load.lds.0 = load i32, ptr addrspace(3) %lds, align 4
138  %load.lds.1 = load i32, ptr addrspace(3) %lds.1, align 4
139
140  %load.flat.0 = load i32, ptr %flat, align 4
141  %load.flat.1 = load i32, ptr %flat.1, align 4
142
143  %sub0 = sub i32 %load.flat.0, %load.lds.0
144  %sub1 = sub i32 %load.flat.1, %load.lds.1
145
146  store i32 %sub0, ptr undef
147  store i32 %sub1, ptr undef
148  ret void
149}
150
151define void @slp_crash_on_addrspacecast() {
152; CHECK-LABEL: @slp_crash_on_addrspacecast(
153; CHECK-NEXT:  entry:
154; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i64, ptr addrspace(3) undef, i32 undef
155; CHECK-NEXT:    [[P0:%.*]] = addrspacecast ptr addrspace(3) [[TMP0]] to ptr
156; CHECK-NEXT:    store i64 undef, ptr [[P0]], align 8
157; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i64, ptr addrspace(3) undef, i32 undef
158; CHECK-NEXT:    [[P1:%.*]] = addrspacecast ptr addrspace(3) [[TMP1]] to ptr
159; CHECK-NEXT:    store i64 undef, ptr [[P1]], align 8
160; CHECK-NEXT:    ret void
161;
162entry:
163  %0 = getelementptr inbounds i64, ptr addrspace(3) undef, i32 undef
164  %p0 = addrspacecast ptr addrspace(3) %0 to ptr
165  store i64 undef, ptr %p0, align 8
166  %1 = getelementptr inbounds i64, ptr addrspace(3) undef, i32 undef
167  %p1 = addrspacecast ptr addrspace(3) %1 to ptr
168  store i64 undef, ptr %p1, align 8
169  ret void
170}
171