xref: /llvm-project/llvm/test/Transforms/LoadStoreVectorizer/AMDGPU/complex-index.ll (revision eecb99c5f66c8491766628a2925587e20f3b1dbd)
1; RUN: opt -mtriple=amdgcn-amd-amdhsa -passes=load-store-vectorizer -S -o - %s | FileCheck %s
2; RUN: opt -mtriple=amdgcn-amd-amdhsa -aa-pipeline=basic-aa -passes='function(load-store-vectorizer)' -S -o - %s | FileCheck %s
3
4target datalayout = "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32-p7:160:256:256:32-p8:128:128-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
5
6declare i64 @_Z12get_local_idj(i32)
7
8declare i64 @_Z12get_group_idj(i32)
9
10declare double @llvm.fmuladd.f64(double, double, double)
11
12; CHECK-LABEL: @factorizedVsNonfactorizedAccess(
13; CHECK: load <2 x float>
14; CHECK: store <2 x float>
15define amdgpu_kernel void @factorizedVsNonfactorizedAccess(ptr addrspace(1) nocapture %c) {
16entry:
17  %call = tail call i64 @_Z12get_local_idj(i32 0)
18  %call1 = tail call i64 @_Z12get_group_idj(i32 0)
19  %div = lshr i64 %call, 4
20  %div2 = lshr i64 %call1, 3
21  %mul = shl i64 %div2, 7
22  %rem = shl i64 %call, 3
23  %mul3 = and i64 %rem, 120
24  %add = or i64 %mul, %mul3
25  %rem4 = shl i64 %call1, 7
26  %mul5 = and i64 %rem4, 896
27  %mul6 = shl nuw nsw i64 %div, 3
28  %add7 = add nuw i64 %mul5, %mul6
29  %mul9 = shl i64 %add7, 10
30  %add10 = add i64 %mul9, %add
31  %arrayidx = getelementptr inbounds float, ptr addrspace(1) %c, i64 %add10
32  %load1 = load float, ptr addrspace(1) %arrayidx, align 4
33  %conv = fpext float %load1 to double
34  %mul11 = fmul double %conv, 0x3FEAB481D8F35506
35  %conv12 = fptrunc double %mul11 to float
36  %conv18 = fpext float %conv12 to double
37  %storeval1 = tail call double @llvm.fmuladd.f64(double 0x3FF4FFAFBBEC946A, double 0.000000e+00, double %conv18)
38  %cstoreval1 = fptrunc double %storeval1 to float
39  store float %cstoreval1, ptr addrspace(1) %arrayidx, align 4
40
41  %add23 = or disjoint i64 %add10, 1
42  %arrayidx24 = getelementptr inbounds float, ptr addrspace(1) %c, i64 %add23
43  %load2 = load float, ptr addrspace(1) %arrayidx24, align 4
44  %conv25 = fpext float %load2 to double
45  %mul26 = fmul double %conv25, 0x3FEAB481D8F35506
46  %conv27 = fptrunc double %mul26 to float
47  %conv34 = fpext float %conv27 to double
48  %storeval2 = tail call double @llvm.fmuladd.f64(double 0x3FF4FFAFBBEC946A, double 0.000000e+00, double %conv34)
49  %cstoreval2 = fptrunc double %storeval2 to float
50  store float %cstoreval2, ptr addrspace(1) %arrayidx24, align 4
51  ret void
52}
53