xref: /llvm-project/llvm/test/CodeGen/AArch64/vec-combine-compare-truncate-store.ll (revision 0b4688403672264ab451992a3461a0df113c3bd7)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2; RUN: llc -mtriple=aarch64-apple-darwin -mattr=+neon -verify-machineinstrs < %s | FileCheck %s
3
4define void @store_16_elements(<16 x i8> %vec, ptr %out) {
5; Bits used in mask
6; CHECK-LABEL: store_16_elements:
7; CHECK:       ; %bb.0:
8; CHECK-NEXT:  Lloh0:
9; CHECK-NEXT:    adrp x8, lCPI0_0@PAGE
10; CHECK-NEXT:    cmeq.16b v0, v0, #0
11; CHECK-NEXT:  Lloh1:
12; CHECK-NEXT:    ldr q1, [x8, lCPI0_0@PAGEOFF]
13; CHECK-NEXT:    bic.16b v0, v1, v0
14; CHECK-NEXT:    ext.16b v1, v0, v0, #8
15; CHECK-NEXT:    zip1.16b v0, v0, v1
16; CHECK-NEXT:    addv.8h h0, v0
17; CHECK-NEXT:    str h0, [x0]
18; CHECK-NEXT:    ret
19; CHECK-NEXT:    .loh AdrpLdr Lloh0, Lloh1
20
21; Actual conversion
22
23  %cmp_result = icmp ne <16 x i8> %vec, zeroinitializer
24  store <16 x i1> %cmp_result, ptr %out
25  ret void
26}
27
28define void @store_8_elements(<8 x i16> %vec, ptr %out) {
29; CHECK-LABEL: store_8_elements:
30; CHECK:       ; %bb.0:
31; CHECK-NEXT:  Lloh2:
32; CHECK-NEXT:    adrp x8, lCPI1_0@PAGE
33; CHECK-NEXT:    cmeq.8h v0, v0, #0
34; CHECK-NEXT:  Lloh3:
35; CHECK-NEXT:    ldr q1, [x8, lCPI1_0@PAGEOFF]
36; CHECK-NEXT:    bic.16b v0, v1, v0
37; CHECK-NEXT:    addv.8h h0, v0
38; CHECK-NEXT:    fmov w8, s0
39; CHECK-NEXT:    strb w8, [x0]
40; CHECK-NEXT:    ret
41; CHECK-NEXT:    .loh AdrpLdr Lloh2, Lloh3
42
43
44  %cmp_result = icmp ne <8 x i16> %vec, zeroinitializer
45  store <8 x i1> %cmp_result, ptr %out
46  ret void
47}
48
49define void @store_4_elements(<4 x i32> %vec, ptr %out) {
50; CHECK-LABEL: store_4_elements:
51; CHECK:       ; %bb.0:
52; CHECK-NEXT:  Lloh4:
53; CHECK-NEXT:    adrp x8, lCPI2_0@PAGE
54; CHECK-NEXT:    cmeq.4s v0, v0, #0
55; CHECK-NEXT:  Lloh5:
56; CHECK-NEXT:    ldr q1, [x8, lCPI2_0@PAGEOFF]
57; CHECK-NEXT:    bic.16b v0, v1, v0
58; CHECK-NEXT:    addv.4s s0, v0
59; CHECK-NEXT:    fmov w8, s0
60; CHECK-NEXT:    strb w8, [x0]
61; CHECK-NEXT:    ret
62; CHECK-NEXT:    .loh AdrpLdr Lloh4, Lloh5
63
64
65  %cmp_result = icmp ne <4 x i32> %vec, zeroinitializer
66  store <4 x i1> %cmp_result, ptr %out
67  ret void
68}
69
70define void @store_2_elements(<2 x i64> %vec, ptr %out) {
71; CHECK-LABEL: store_2_elements:
72; CHECK:       ; %bb.0:
73; CHECK-NEXT:  Lloh6:
74; CHECK-NEXT:    adrp x8, lCPI3_0@PAGE
75; CHECK-NEXT:    cmeq.2d v0, v0, #0
76; CHECK-NEXT:  Lloh7:
77; CHECK-NEXT:    ldr q1, [x8, lCPI3_0@PAGEOFF]
78; CHECK-NEXT:    bic.16b v0, v1, v0
79; CHECK-NEXT:    addp.2d d0, v0
80; CHECK-NEXT:    fmov x8, d0
81; CHECK-NEXT:    strb w8, [x0]
82; CHECK-NEXT:    ret
83; CHECK-NEXT:    .loh AdrpLdr Lloh6, Lloh7
84
85
86  %cmp_result = icmp ne <2 x i64> %vec, zeroinitializer
87  store <2 x i1> %cmp_result, ptr %out
88  ret void
89}
90
91define void @add_trunc_compare_before_store(<4 x i32> %vec, ptr %out) {
92; CHECK-LABEL: add_trunc_compare_before_store:
93; CHECK:       ; %bb.0:
94; CHECK-NEXT:    shl.4s v0, v0, #31
95; CHECK-NEXT:  Lloh8:
96; CHECK-NEXT:    adrp x8, lCPI4_0@PAGE
97; CHECK-NEXT:  Lloh9:
98; CHECK-NEXT:    ldr q1, [x8, lCPI4_0@PAGEOFF]
99; CHECK-NEXT:    cmlt.4s v0, v0, #0
100; CHECK-NEXT:    and.16b v0, v0, v1
101; CHECK-NEXT:    addv.4s s0, v0
102; CHECK-NEXT:    fmov w8, s0
103; CHECK-NEXT:    strb w8, [x0]
104; CHECK-NEXT:    ret
105; CHECK-NEXT:    .loh AdrpLdr Lloh8, Lloh9
106
107
108  %trunc = trunc <4 x i32> %vec to <4 x i1>
109  store <4 x i1> %trunc, ptr %out
110  ret void
111}
112
113define void @add_trunc_mask_unknown_vector_type(<4 x i1> %vec, ptr %out) {
114; CHECK-LABEL: add_trunc_mask_unknown_vector_type:
115; CHECK:       ; %bb.0:
116; CHECK-NEXT:    shl.4h v0, v0, #15
117; CHECK-NEXT:  Lloh10:
118; CHECK-NEXT:    adrp x8, lCPI5_0@PAGE
119; CHECK-NEXT:  Lloh11:
120; CHECK-NEXT:    ldr d1, [x8, lCPI5_0@PAGEOFF]
121; CHECK-NEXT:    cmlt.4h v0, v0, #0
122; CHECK-NEXT:    and.8b v0, v0, v1
123; CHECK-NEXT:    addv.4h h0, v0
124; CHECK-NEXT:    fmov w8, s0
125; CHECK-NEXT:    strb w8, [x0]
126; CHECK-NEXT:    ret
127; CHECK-NEXT:    .loh AdrpLdr Lloh10, Lloh11
128
129
130  store <4 x i1> %vec, ptr %out
131  ret void
132}
133
134define void @store_8_elements_64_bit_vector(<8 x i8> %vec, ptr %out) {
135; CHECK-LABEL: store_8_elements_64_bit_vector:
136; CHECK:       ; %bb.0:
137; CHECK-NEXT:  Lloh12:
138; CHECK-NEXT:    adrp x8, lCPI6_0@PAGE
139; CHECK-NEXT:    cmeq.8b v0, v0, #0
140; CHECK-NEXT:  Lloh13:
141; CHECK-NEXT:    ldr d1, [x8, lCPI6_0@PAGEOFF]
142; CHECK-NEXT:    bic.8b v0, v1, v0
143; CHECK-NEXT:    addv.8b b0, v0
144; CHECK-NEXT:    st1.b { v0 }[0], [x0]
145; CHECK-NEXT:    ret
146; CHECK-NEXT:    .loh AdrpLdr Lloh12, Lloh13
147
148
149  %cmp_result = icmp ne <8 x i8> %vec, zeroinitializer
150  store <8 x i1> %cmp_result, ptr %out
151  ret void
152}
153
154define void @store_4_elements_64_bit_vector(<4 x i16> %vec, ptr %out) {
155; CHECK-LABEL: store_4_elements_64_bit_vector:
156; CHECK:       ; %bb.0:
157; CHECK-NEXT:  Lloh14:
158; CHECK-NEXT:    adrp x8, lCPI7_0@PAGE
159; CHECK-NEXT:    cmeq.4h v0, v0, #0
160; CHECK-NEXT:  Lloh15:
161; CHECK-NEXT:    ldr d1, [x8, lCPI7_0@PAGEOFF]
162; CHECK-NEXT:    bic.8b v0, v1, v0
163; CHECK-NEXT:    addv.4h h0, v0
164; CHECK-NEXT:    fmov w8, s0
165; CHECK-NEXT:    strb w8, [x0]
166; CHECK-NEXT:    ret
167; CHECK-NEXT:    .loh AdrpLdr Lloh14, Lloh15
168
169
170  %cmp_result = icmp ne <4 x i16> %vec, zeroinitializer
171  store <4 x i1> %cmp_result, ptr %out
172  ret void
173}
174
175define void @store_2_elements_64_bit_vector(<2 x i32> %vec, ptr %out) {
176; CHECK-LABEL: store_2_elements_64_bit_vector:
177; CHECK:       ; %bb.0:
178; CHECK-NEXT:  Lloh16:
179; CHECK-NEXT:    adrp x8, lCPI8_0@PAGE
180; CHECK-NEXT:    cmeq.2s v0, v0, #0
181; CHECK-NEXT:  Lloh17:
182; CHECK-NEXT:    ldr d1, [x8, lCPI8_0@PAGEOFF]
183; CHECK-NEXT:    bic.8b v0, v1, v0
184; CHECK-NEXT:    addp.2s v0, v0, v0
185; CHECK-NEXT:    fmov w8, s0
186; CHECK-NEXT:    strb w8, [x0]
187; CHECK-NEXT:    ret
188; CHECK-NEXT:    .loh AdrpLdr Lloh16, Lloh17
189
190
191  %cmp_result = icmp ne <2 x i32> %vec, zeroinitializer
192  store <2 x i1> %cmp_result, ptr %out
193  ret void
194}
195
196define void @no_combine_without_truncate(<16 x i8> %vec, ptr %out) {
197; CHECK-LABEL: no_combine_without_truncate:
198; CHECK:       ; %bb.0:
199; CHECK-NEXT:    cmtst.16b v0, v0, v0
200; CHECK-NEXT:    str q0, [x0]
201; CHECK-NEXT:    ret
202
203  %cmp_result = icmp ne <16 x i8> %vec, zeroinitializer
204  %extended_result = sext <16 x i1> %cmp_result to <16 x i8>
205  store <16 x i8> %extended_result, ptr %out
206  ret void
207}
208
209define void @no_combine_for_non_bool_truncate(<4 x i32> %vec, ptr %out) {
210; CHECK-LABEL: no_combine_for_non_bool_truncate:
211; CHECK:       ; %bb.0:
212; CHECK-NEXT:    xtn.4h v0, v0
213; CHECK-NEXT:    uzp1.8b v0, v0, v0
214; CHECK-NEXT:    str s0, [x0]
215; CHECK-NEXT:    ret
216
217  %trunc = trunc <4 x i32> %vec to <4 x i8>
218  store <4 x i8> %trunc, ptr %out
219  ret void
220}
221
222define void @no_combine_for_build_vector(i1 %a, i1 %b, i1 %c, i1 %d, ptr %out) {
223; CHECK-LABEL: no_combine_for_build_vector:
224; CHECK:       ; %bb.0:
225; CHECK-NEXT:    orr w8, w0, w1, lsl #1
226; CHECK-NEXT:    orr w8, w8, w2, lsl #2
227; CHECK-NEXT:    orr w8, w8, w3, lsl #3
228; CHECK-NEXT:    strb w8, [x4]
229; CHECK-NEXT:    ret
230
231  %1 =   insertelement <4 x i1> undef, i1 %a, i64 0
232  %2 =   insertelement <4 x i1>    %1, i1 %b, i64 1
233  %3 =   insertelement <4 x i1>    %2, i1 %c, i64 2
234  %vec = insertelement <4 x i1>    %3, i1 %d, i64 3
235  store <4 x i1> %vec, ptr %out
236  ret void
237}
238