xref: /llvm-project/llvm/test/CodeGen/LoongArch/unaligned-memcpy-inline.ll (revision 8b26c02caaa002edc2d70af9e6a8024c4b1343cf)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2
3;; Test how memcpy is optimized when ual is turned off which is similar to AArch64/arm64-misaligned-memcpy-inline.ll.
4
5; RUN: llc --mtriple=loongarch32 --mattr=-ual < %s | FileCheck %s --check-prefix=LA32
6; RUN: llc --mtriple=loongarch64 --mattr=-ual < %s | FileCheck %s --check-prefix=LA64
7
8;; Small (16 bytes here) unaligned memcpy() should be a function call if
9;; ual is turned off.
10define void @t0(ptr %out, ptr %in) {
11; LA32-LABEL: t0:
12; LA32:       # %bb.0: # %entry
13; LA32-NEXT:    addi.w $sp, $sp, -16
14; LA32-NEXT:    .cfi_def_cfa_offset 16
15; LA32-NEXT:    st.w $ra, $sp, 12 # 4-byte Folded Spill
16; LA32-NEXT:    .cfi_offset 1, -4
17; LA32-NEXT:    ori $a2, $zero, 16
18; LA32-NEXT:    bl %plt(memcpy)
19; LA32-NEXT:    ld.w $ra, $sp, 12 # 4-byte Folded Reload
20; LA32-NEXT:    addi.w $sp, $sp, 16
21; LA32-NEXT:    ret
22;
23; LA64-LABEL: t0:
24; LA64:       # %bb.0: # %entry
25; LA64-NEXT:    addi.d $sp, $sp, -16
26; LA64-NEXT:    .cfi_def_cfa_offset 16
27; LA64-NEXT:    st.d $ra, $sp, 8 # 8-byte Folded Spill
28; LA64-NEXT:    .cfi_offset 1, -8
29; LA64-NEXT:    ori $a2, $zero, 16
30; LA64-NEXT:    bl %plt(memcpy)
31; LA64-NEXT:    ld.d $ra, $sp, 8 # 8-byte Folded Reload
32; LA64-NEXT:    addi.d $sp, $sp, 16
33; LA64-NEXT:    ret
34entry:
35  call void @llvm.memcpy.p0.p0.i64(ptr %out, ptr %in, i64 16, i1 false)
36  ret void
37}
38
39;; Small (16 bytes here) aligned memcpy() should be inlined even if
40;; ual is turned off.
41define void @t1(ptr align 8 %out, ptr align 8 %in) {
42; LA32-LABEL: t1:
43; LA32:       # %bb.0: # %entry
44; LA32-NEXT:    ld.w $a2, $a1, 12
45; LA32-NEXT:    st.w $a2, $a0, 12
46; LA32-NEXT:    ld.w $a2, $a1, 8
47; LA32-NEXT:    st.w $a2, $a0, 8
48; LA32-NEXT:    ld.w $a2, $a1, 4
49; LA32-NEXT:    st.w $a2, $a0, 4
50; LA32-NEXT:    ld.w $a1, $a1, 0
51; LA32-NEXT:    st.w $a1, $a0, 0
52; LA32-NEXT:    ret
53;
54; LA64-LABEL: t1:
55; LA64:       # %bb.0: # %entry
56; LA64-NEXT:    ld.d $a2, $a1, 8
57; LA64-NEXT:    st.d $a2, $a0, 8
58; LA64-NEXT:    ld.d $a1, $a1, 0
59; LA64-NEXT:    st.d $a1, $a0, 0
60; LA64-NEXT:    ret
61entry:
62  call void @llvm.memcpy.p0.p0.i64(ptr align 8 %out, ptr align 8 %in, i64 16, i1 false)
63  ret void
64}
65
66;; Tiny (4 bytes here) unaligned memcpy() should be inlined with byte sized
67;; loads and stores if ual is turned off.
68define void @t2(ptr %out, ptr %in) {
69; LA32-LABEL: t2:
70; LA32:       # %bb.0: # %entry
71; LA32-NEXT:    ld.b $a2, $a1, 3
72; LA32-NEXT:    st.b $a2, $a0, 3
73; LA32-NEXT:    ld.b $a2, $a1, 2
74; LA32-NEXT:    st.b $a2, $a0, 2
75; LA32-NEXT:    ld.b $a2, $a1, 1
76; LA32-NEXT:    st.b $a2, $a0, 1
77; LA32-NEXT:    ld.b $a1, $a1, 0
78; LA32-NEXT:    st.b $a1, $a0, 0
79; LA32-NEXT:    ret
80;
81; LA64-LABEL: t2:
82; LA64:       # %bb.0: # %entry
83; LA64-NEXT:    ld.b $a2, $a1, 3
84; LA64-NEXT:    st.b $a2, $a0, 3
85; LA64-NEXT:    ld.b $a2, $a1, 2
86; LA64-NEXT:    st.b $a2, $a0, 2
87; LA64-NEXT:    ld.b $a2, $a1, 1
88; LA64-NEXT:    st.b $a2, $a0, 1
89; LA64-NEXT:    ld.b $a1, $a1, 0
90; LA64-NEXT:    st.b $a1, $a0, 0
91; LA64-NEXT:    ret
92entry:
93  call void @llvm.memcpy.p0.p0.i64(ptr %out, ptr %in, i64 4, i1 false)
94  ret void
95}
96
97@.str = private constant [22 x i8] c"preemption imbalance \00", align 1
98
99define void @t3() {
100; LA32-LABEL: t3:
101; LA32:       # %bb.0: # %entry
102; LA32-NEXT:    addi.w $sp, $sp, -64
103; LA32-NEXT:    .cfi_def_cfa_offset 64
104; LA32-NEXT:    pcalau12i $a0, %pc_hi20(.L.str)
105; LA32-NEXT:    addi.w $a0, $a0, %pc_lo12(.L.str)
106; LA32-NEXT:    ld.h $a1, $a0, 20
107; LA32-NEXT:    ld.w $a2, $a0, 16
108; LA32-NEXT:    st.h $a1, $sp, 20
109; LA32-NEXT:    st.w $a2, $sp, 16
110; LA32-NEXT:    ld.w $a1, $a0, 12
111; LA32-NEXT:    ld.w $a2, $a0, 8
112; LA32-NEXT:    ld.w $a3, $a0, 4
113; LA32-NEXT:    ld.w $a0, $a0, 0
114; LA32-NEXT:    st.w $a1, $sp, 12
115; LA32-NEXT:    st.w $a2, $sp, 8
116; LA32-NEXT:    st.w $a3, $sp, 4
117; LA32-NEXT:    st.w $a0, $sp, 0
118; LA32-NEXT:    addi.w $sp, $sp, 64
119; LA32-NEXT:    ret
120;
121; LA64-LABEL: t3:
122; LA64:       # %bb.0: # %entry
123; LA64-NEXT:    addi.d $sp, $sp, -64
124; LA64-NEXT:    .cfi_def_cfa_offset 64
125; LA64-NEXT:    pcalau12i $a0, %pc_hi20(.L.str)
126; LA64-NEXT:    addi.d $a0, $a0, %pc_lo12(.L.str)
127; LA64-NEXT:    ld.h $a1, $a0, 20
128; LA64-NEXT:    ld.w $a2, $a0, 16
129; LA64-NEXT:    ld.d $a3, $a0, 8
130; LA64-NEXT:    ld.d $a0, $a0, 0
131; LA64-NEXT:    st.h $a1, $sp, 20
132; LA64-NEXT:    st.w $a2, $sp, 16
133; LA64-NEXT:    st.d $a3, $sp, 8
134; LA64-NEXT:    st.d $a0, $sp, 0
135; LA64-NEXT:    addi.d $sp, $sp, 64
136; LA64-NEXT:    ret
137entry:
138  %msgbuf = alloca [64 x i8], align 1
139  call void @llvm.memcpy.p0.p0.i64(ptr align 1 %msgbuf, ptr align 1 @.str, i64 22, i1 false)
140  ret void
141}
142
143declare void @llvm.memcpy.p0.p0.i64(ptr nocapture, ptr nocapture readonly, i64, i1)
144