xref: /llvm-project/llvm/test/CodeGen/ARM/combine-vmovdrr.ll (revision bed1c7f061aa12417aa081e334afdba45767b938)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv7s-none-eabi %s -o - | FileCheck %s
3
4declare <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %shuffle.i.i307, <8 x i8> %shuffle.i27.i308, <8 x i8> %vtbl2.i25.i)
5
6; Check that we get the motivating example:
7; The bitcasts force the values to go through the GPRs, whereas
8; they are defined on VPRs and used on VPRs.
9;
10define void @motivatingExample(ptr %addr, ptr %addr2) {
11; CHECK-LABEL: motivatingExample:
12; CHECK:       @ %bb.0:
13; CHECK-NEXT:    vld1.64 {d16, d17}, [r0]
14; CHECK-NEXT:    vldr d18, [r1]
15; CHECK-NEXT:    vtbl.8 d16, {d16, d17}, d18
16; CHECK-NEXT:    vstr d16, [r1]
17; CHECK-NEXT:    bx lr
18  %shuffle.i.bc.i309 = load <2 x i64>, ptr %addr
19  %vtbl2.i25.i = load <8 x i8>, ptr %addr2
20  %shuffle.i.extract.i310 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 0
21  %shuffle.i27.extract.i311 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 1
22  %tmp45 = bitcast i64 %shuffle.i.extract.i310 to <8 x i8>
23  %tmp46 = bitcast i64 %shuffle.i27.extract.i311 to <8 x i8>
24  %vtbl2.i25.i313 = tail call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %tmp45, <8 x i8> %tmp46, <8 x i8> %vtbl2.i25.i)
25  store <8 x i8> %vtbl2.i25.i313, ptr %addr2
26  ret void
27}
28
29; Check that we do not perform the transformation for dynamic index.
30define void @dynamicIndex(ptr %addr, ptr %addr2, i32 %index) {
31; CHECK-LABEL: dynamicIndex:
32; CHECK:       @ %bb.0:
33; CHECK-NEXT:    .save {r4, r6, r7, lr}
34; CHECK-NEXT:    push {r4, r6, r7, lr}
35; CHECK-NEXT:    .setfp r7, sp, #8
36; CHECK-NEXT:    add r7, sp, #8
37; CHECK-NEXT:    .pad #16
38; CHECK-NEXT:    sub sp, #16
39; CHECK-NEXT:    mov r4, sp
40; CHECK-NEXT:    bfc r4, #0, #4
41; CHECK-NEXT:    mov sp, r4
42; CHECK-NEXT:    vld1.64 {d16, d17}, [r0]
43; CHECK-NEXT:    adds r0, r2, r2
44; CHECK-NEXT:    and r2, r0, #3
45; CHECK-NEXT:    adds r0, #1
46; CHECK-NEXT:    mov r12, sp
47; CHECK-NEXT:    and r0, r0, #3
48; CHECK-NEXT:    lsls r2, r2, #2
49; CHECK-NEXT:    mov r3, r12
50; CHECK-NEXT:    vst1.64 {d16, d17}, [r3:128], r2
51; CHECK-NEXT:    orr.w r0, r12, r0, lsl #2
52; CHECK-NEXT:    sub.w r4, r7, #8
53; CHECK-NEXT:    ldr r2, [r3]
54; CHECK-NEXT:    ldr r0, [r0]
55; CHECK-NEXT:    vldr d18, [r1]
56; CHECK-NEXT:    vmov d16, r2, r0
57; CHECK-NEXT:    vtbl.8 d16, {d16, d17}, d18
58; CHECK-NEXT:    vstr d16, [r1]
59; CHECK-NEXT:    mov sp, r4
60; CHECK-NEXT:    pop {r4, r6, r7, pc}
61  %shuffle.i.bc.i309 = load <2 x i64>, ptr %addr
62  %vtbl2.i25.i = load <8 x i8>, ptr %addr2
63  %shuffle.i.extract.i310 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 %index
64  %shuffle.i27.extract.i311 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 1
65  %tmp45 = bitcast i64 %shuffle.i.extract.i310 to <8 x i8>
66  %tmp46 = bitcast i64 %shuffle.i27.extract.i311 to <8 x i8>
67  %vtbl2.i25.i313 = tail call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %tmp45, <8 x i8> %tmp46, <8 x i8> %vtbl2.i25.i)
68  store <8 x i8> %vtbl2.i25.i313, ptr %addr2
69  ret void
70}
71
72; Check that we do not perform the transformation when there are several uses
73; of the result of the bitcast.
74define i64 @severalUses(ptr %addr, ptr %addr2) {
75; CHECK-LABEL: severalUses:
76; CHECK:       @ %bb.0:
77; CHECK-NEXT:    vld1.64 {d16, d17}, [r0]
78; CHECK-NEXT:    vmov r0, r2, d16
79; CHECK-NEXT:    vldr d18, [r1]
80; CHECK-NEXT:    vtbl.8 d16, {d16, d17}, d18
81; CHECK-NEXT:    vstr d16, [r1]
82; CHECK-NEXT:    mov r1, r2
83; CHECK-NEXT:    bx lr
84  %shuffle.i.bc.i309 = load <2 x i64>, ptr %addr
85  %vtbl2.i25.i = load <8 x i8>, ptr %addr2
86  %shuffle.i.extract.i310 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 0
87  %shuffle.i27.extract.i311 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 1
88  %tmp45 = bitcast i64 %shuffle.i.extract.i310 to <8 x i8>
89  %tmp46 = bitcast i64 %shuffle.i27.extract.i311 to <8 x i8>
90  %vtbl2.i25.i313 = tail call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %tmp45, <8 x i8> %tmp46, <8 x i8> %vtbl2.i25.i)
91  store <8 x i8> %vtbl2.i25.i313, ptr %addr2
92  ret i64 %shuffle.i.extract.i310
93}
94