xref: /llvm-project/llvm/test/CodeGen/AMDGPU/replace-store-of-insert-load.ll (revision 98ccc70b93a39a7ea3e26f7f5b5fe40d39b5a7e5)
1*98ccc70bSpvanhout; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2*98ccc70bSpvanhout; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck %s
3*98ccc70bSpvanhout
4*98ccc70bSpvanhout; Regression test for a bug in `DAGCombiner::replaceStoreOfInsertLoad` where
5*98ccc70bSpvanhout; Idx could be smaller than PtrVT, causing a MUL to be emitted with inconsistent
6*98ccc70bSpvanhout; LHS/RHS types.
7*98ccc70bSpvanhout
8*98ccc70bSpvanhoutdefine void @testcase_0(ptr addrspace(1) %in, float %arg) {
9*98ccc70bSpvanhout; CHECK-LABEL: testcase_0:
10*98ccc70bSpvanhout; CHECK:       ; %bb.0:
11*98ccc70bSpvanhout; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12*98ccc70bSpvanhout; CHECK-NEXT:    global_store_dword v[0:1], v2, off offset:12
13*98ccc70bSpvanhout; CHECK-NEXT:    s_waitcnt vmcnt(0)
14*98ccc70bSpvanhout; CHECK-NEXT:    s_setpc_b64 s[30:31]
15*98ccc70bSpvanhout  %loaded = load <4 x float>, ptr addrspace(1) %in
16*98ccc70bSpvanhout  %modified = insertelement <4 x float> %loaded, float %arg, i64 3
17*98ccc70bSpvanhout  store <4 x float> %modified, ptr addrspace(1) %in
18*98ccc70bSpvanhout  ret void
19*98ccc70bSpvanhout}
20*98ccc70bSpvanhout
21*98ccc70bSpvanhoutdefine void @testcase_1(ptr addrspace(1) %in, float %arg) {
22*98ccc70bSpvanhout; CHECK-LABEL: testcase_1:
23*98ccc70bSpvanhout; CHECK:       ; %bb.0:
24*98ccc70bSpvanhout; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
25*98ccc70bSpvanhout; CHECK-NEXT:    global_store_dword v[0:1], v2, off offset:16
26*98ccc70bSpvanhout; CHECK-NEXT:    s_waitcnt vmcnt(0)
27*98ccc70bSpvanhout; CHECK-NEXT:    s_setpc_b64 s[30:31]
28*98ccc70bSpvanhout  %loaded = load <6 x float>, ptr addrspace(1) %in
29*98ccc70bSpvanhout  %modified = insertelement <6 x float> %loaded, float %arg, i64 4
30*98ccc70bSpvanhout  store <6 x float> %modified, ptr addrspace(1) %in
31*98ccc70bSpvanhout  ret void
32*98ccc70bSpvanhout}
33*98ccc70bSpvanhout
34*98ccc70bSpvanhoutdefine void @testcase_2(ptr addrspace(1) %in, double %arg) {
35*98ccc70bSpvanhout; CHECK-LABEL: testcase_2:
36*98ccc70bSpvanhout; CHECK:       ; %bb.0:
37*98ccc70bSpvanhout; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
38*98ccc70bSpvanhout; CHECK-NEXT:    global_store_dwordx2 v[0:1], v[2:3], off offset:8
39*98ccc70bSpvanhout; CHECK-NEXT:    s_waitcnt vmcnt(0)
40*98ccc70bSpvanhout; CHECK-NEXT:    s_setpc_b64 s[30:31]
41*98ccc70bSpvanhout  %loaded = load <4 x double>, ptr addrspace(1) %in
42*98ccc70bSpvanhout  %modified = insertelement <4 x double> %loaded, double %arg, i64 1
43*98ccc70bSpvanhout  store <4 x double> %modified, ptr addrspace(1) %in
44*98ccc70bSpvanhout  ret void
45*98ccc70bSpvanhout}
46*98ccc70bSpvanhout
47*98ccc70bSpvanhoutdefine void @testcase_3(ptr addrspace(1) %in, double %arg) {
48*98ccc70bSpvanhout; CHECK-LABEL: testcase_3:
49*98ccc70bSpvanhout; CHECK:       ; %bb.0:
50*98ccc70bSpvanhout; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
51*98ccc70bSpvanhout; CHECK-NEXT:    global_store_dwordx2 v[0:1], v[2:3], off offset:56
52*98ccc70bSpvanhout; CHECK-NEXT:    s_waitcnt vmcnt(0)
53*98ccc70bSpvanhout; CHECK-NEXT:    s_setpc_b64 s[30:31]
54*98ccc70bSpvanhout  %loaded = load <8 x double>, ptr addrspace(1) %in
55*98ccc70bSpvanhout  %modified = insertelement <8 x double> %loaded, double %arg, i64 7
56*98ccc70bSpvanhout  store <8 x double> %modified, ptr addrspace(1) %in
57*98ccc70bSpvanhout  ret void
58*98ccc70bSpvanhout}
59