xref: /llvm-project/llvm/test/CodeGen/AArch64/sitofp-to-tbl.ll (revision a497e987e5b09ab58efc7c6bef5ff68d4cd750f3)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2; RUN: llc -verify-machineinstrs < %s | FileCheck %s
3
4target triple = "aarch64-linux"
5
6; CHECK-LABEL: .LCPI0_0:
7; CHECK-NEXT:  .byte    255
8; CHECK-NEXT:  .byte    255
9; CHECK-NEXT:  .byte    255
10; CHECK-NEXT:  .byte    4
11; CHECK-NEXT:  .byte    255
12; CHECK-NEXT:  .byte    255
13; CHECK-NEXT:  .byte    255
14; CHECK-NEXT:  .byte    5
15; CHECK-NEXT:  .byte    255
16; CHECK-NEXT:  .byte    255
17; CHECK-NEXT:  .byte    255
18; CHECK-NEXT:  .byte    6
19; CHECK-NEXT:  .byte    255
20; CHECK-NEXT:  .byte    255
21; CHECK-NEXT:  .byte    255
22; CHECK-NEXT:  .byte    7
23; CHECK-NEXT:  .LCPI0_1:
24; CHECK-NEXT:  .byte    255
25; CHECK-NEXT:  .byte    255
26; CHECK-NEXT:  .byte    255
27; CHECK-NEXT:  .byte    0
28; CHECK-NEXT:  .byte    255
29; CHECK-NEXT:  .byte    255
30; CHECK-NEXT:  .byte    255
31; CHECK-NEXT:  .byte    1
32; CHECK-NEXT:  .byte    255
33; CHECK-NEXT:  .byte    255
34; CHECK-NEXT:  .byte    255
35; CHECK-NEXT:  .byte    2
36; CHECK-NEXT:  .byte    255
37; CHECK-NEXT:  .byte    255
38; CHECK-NEXT:  .byte    255
39; CHECK-NEXT:  .byte    3
40
41define void @sitofp_v8i8_to_v8f32(ptr %src, ptr %dst) {
42; CHECK-LABEL: sitofp_v8i8_to_v8f32:
43; CHECK:       // %bb.0: // %entry
44; CHECK-NEXT:    adrp x8, .LCPI0_0
45; CHECK-NEXT:    adrp x9, .LCPI0_1
46; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI0_0]
47; CHECK-NEXT:    ldr q1, [x9, :lo12:.LCPI0_1]
48; CHECK-NEXT:    mov x8, xzr
49; CHECK-NEXT:  .LBB0_1: // %loop
50; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
51; CHECK-NEXT:    ldr d2, [x0, x8, lsl #3]
52; CHECK-NEXT:    add x9, x1, x8, lsl #5
53; CHECK-NEXT:    add x8, x8, #1
54; CHECK-NEXT:    cmp x8, #1000
55; CHECK-NEXT:    tbl v3.16b, { v2.16b }, v0.16b
56; CHECK-NEXT:    tbl v2.16b, { v2.16b }, v1.16b
57; CHECK-NEXT:    scvtf v3.4s, v3.4s, #24
58; CHECK-NEXT:    scvtf v2.4s, v2.4s, #24
59; CHECK-NEXT:    stp q2, q3, [x9]
60; CHECK-NEXT:    b.eq .LBB0_1
61; CHECK-NEXT:  // %bb.2: // %exit
62; CHECK-NEXT:    ret
63entry:
64  br label %loop
65
66loop:
67  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
68  %gep.src = getelementptr inbounds <8 x i8>, ptr %src, i64 %iv
69  %l = load <8 x i8>, ptr %gep.src
70  %conv = sitofp <8 x i8> %l to <8 x float>
71  %gep.dst = getelementptr inbounds <8 x float>, ptr %dst, i64 %iv
72  store <8 x float> %conv, ptr %gep.dst
73  %iv.next = add i64 %iv, 1
74  %ec = icmp eq i64 %iv.next, 1000
75  br i1 %ec, label %loop, label %exit
76
77exit:
78  ret void
79}
80
81; CHECK-LABEL: .LCPI1_0:
82; CHECK-NEXT: .byte    255
83; CHECK-NEXT: .byte    255
84; CHECK-NEXT: .byte    255
85; CHECK-NEXT: .byte    12
86; CHECK-NEXT: .byte    255
87; CHECK-NEXT: .byte    255
88; CHECK-NEXT: .byte    255
89; CHECK-NEXT: .byte    13
90; CHECK-NEXT: .byte    255
91; CHECK-NEXT: .byte    255
92; CHECK-NEXT: .byte    255
93; CHECK-NEXT: .byte    14
94; CHECK-NEXT: .byte    255
95; CHECK-NEXT: .byte    255
96; CHECK-NEXT: .byte    255
97; CHECK-NEXT: .byte    15
98; CHECK-NEXT: .LCPI1_1:
99; CHECK-NEXT: .byte    255
100; CHECK-NEXT: .byte    255
101; CHECK-NEXT: .byte    255
102; CHECK-NEXT: .byte    8
103; CHECK-NEXT: .byte    255
104; CHECK-NEXT: .byte    255
105; CHECK-NEXT: .byte    255
106; CHECK-NEXT: .byte    9
107; CHECK-NEXT: .byte    255
108; CHECK-NEXT: .byte    255
109; CHECK-NEXT: .byte    255
110; CHECK-NEXT: .byte    10
111; CHECK-NEXT: .byte    255
112; CHECK-NEXT: .byte    255
113; CHECK-NEXT: .byte    255
114; CHECK-NEXT: .byte    11
115; CHECK-NEXT: .LCPI1_2:
116; CHECK-NEXT: .byte    255
117; CHECK-NEXT: .byte    255
118; CHECK-NEXT: .byte    255
119; CHECK-NEXT: .byte    4
120; CHECK-NEXT: .byte    255
121; CHECK-NEXT: .byte    255
122; CHECK-NEXT: .byte    255
123; CHECK-NEXT: .byte    5
124; CHECK-NEXT: .byte    255
125; CHECK-NEXT: .byte    255
126; CHECK-NEXT: .byte    255
127; CHECK-NEXT: .byte    6
128; CHECK-NEXT: .byte    255
129; CHECK-NEXT: .byte    255
130; CHECK-NEXT: .byte    255
131; CHECK-NEXT: .byte    7
132; CHECK-NEXT: .LCPI1_3:
133; CHECK-NEXT: .byte    255
134; CHECK-NEXT: .byte    255
135; CHECK-NEXT: .byte    255
136; CHECK-NEXT: .byte    0
137; CHECK-NEXT: .byte    255
138; CHECK-NEXT: .byte    255
139; CHECK-NEXT: .byte    255
140; CHECK-NEXT: .byte    1
141; CHECK-NEXT: .byte    255
142; CHECK-NEXT: .byte    255
143; CHECK-NEXT: .byte    255
144; CHECK-NEXT: .byte    2
145; CHECK-NEXT: .byte    255
146; CHECK-NEXT: .byte    255
147; CHECK-NEXT: .byte    255
148; CHECK-NEXT: .byte    3
149
150define void @sitofp_v16i8_to_v16f32(ptr %src, ptr %dst) {
151; CHECK-LABEL: sitofp_v16i8_to_v16f32:
152; CHECK:       // %bb.0: // %entry
153; CHECK-NEXT:    adrp x8, .LCPI1_0
154; CHECK-NEXT:    adrp x9, .LCPI1_1
155; CHECK-NEXT:    adrp x10, .LCPI1_2
156; CHECK-NEXT:    ldr q0, [x8, :lo12:.LCPI1_0]
157; CHECK-NEXT:    adrp x8, .LCPI1_3
158; CHECK-NEXT:    ldr q1, [x9, :lo12:.LCPI1_1]
159; CHECK-NEXT:    ldr q2, [x10, :lo12:.LCPI1_2]
160; CHECK-NEXT:    ldr q3, [x8, :lo12:.LCPI1_3]
161; CHECK-NEXT:    mov x8, xzr
162; CHECK-NEXT:  .LBB1_1: // %loop
163; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
164; CHECK-NEXT:    ldr q4, [x0, x8, lsl #4]
165; CHECK-NEXT:    add x9, x1, x8, lsl #6
166; CHECK-NEXT:    add x8, x8, #1
167; CHECK-NEXT:    cmp x8, #1000
168; CHECK-NEXT:    tbl v5.16b, { v4.16b }, v0.16b
169; CHECK-NEXT:    tbl v6.16b, { v4.16b }, v1.16b
170; CHECK-NEXT:    tbl v7.16b, { v4.16b }, v2.16b
171; CHECK-NEXT:    tbl v4.16b, { v4.16b }, v3.16b
172; CHECK-NEXT:    scvtf v5.4s, v5.4s, #24
173; CHECK-NEXT:    scvtf v6.4s, v6.4s, #24
174; CHECK-NEXT:    scvtf v7.4s, v7.4s, #24
175; CHECK-NEXT:    scvtf v4.4s, v4.4s, #24
176; CHECK-NEXT:    stp q6, q5, [x9, #32]
177; CHECK-NEXT:    stp q4, q7, [x9]
178; CHECK-NEXT:    b.eq .LBB1_1
179; CHECK-NEXT:  // %bb.2: // %exit
180; CHECK-NEXT:    ret
181entry:
182  br label %loop
183
184loop:
185  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
186  %gep.src = getelementptr inbounds <16 x i8>, ptr %src, i64 %iv
187  %l = load <16 x i8>, ptr %gep.src
188  %conv = sitofp <16 x i8> %l to <16 x float>
189  %gep.dst = getelementptr inbounds <16 x float>, ptr %dst, i64 %iv
190  store <16 x float> %conv, ptr %gep.dst
191  %iv.next = add i64 %iv, 1
192  %ec = icmp eq i64 %iv.next, 1000
193  br i1 %ec, label %loop, label %exit
194
195exit:
196  ret void
197}
198
199
200; Negative tests, currently we don't convert to f16/bf16 via `tbl`.
201define void @sitofp_v8i8_to_v8f16(ptr %src, ptr %dst) {
202; CHECK-LABEL: sitofp_v8i8_to_v8f16:
203; CHECK:       // %bb.0: // %entry
204; CHECK-NEXT:    mov x8, xzr
205; CHECK-NEXT:  .LBB2_1: // %loop
206; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
207; CHECK-NEXT:    ldr d0, [x0, x8, lsl #3]
208; CHECK-NEXT:    sshll v0.8h, v0.8b, #0
209; CHECK-NEXT:    sshll v1.4s, v0.4h, #0
210; CHECK-NEXT:    sshll2 v0.4s, v0.8h, #0
211; CHECK-NEXT:    scvtf v1.4s, v1.4s
212; CHECK-NEXT:    scvtf v0.4s, v0.4s
213; CHECK-NEXT:    fcvtn v1.4h, v1.4s
214; CHECK-NEXT:    fcvtn2 v1.8h, v0.4s
215; CHECK-NEXT:    str q1, [x1, x8, lsl #4]
216; CHECK-NEXT:    add x8, x8, #1
217; CHECK-NEXT:    cmp x8, #1000
218; CHECK-NEXT:    b.eq .LBB2_1
219; CHECK-NEXT:  // %bb.2: // %exit
220; CHECK-NEXT:    ret
221entry:
222  br label %loop
223
224loop:
225  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
226  %gep.src = getelementptr inbounds <8 x i8>, ptr %src, i64 %iv
227  %l = load <8 x i8>, ptr %gep.src
228  %conv = sitofp <8 x i8> %l to <8 x half>
229  %gep.dst = getelementptr inbounds <8 x half>, ptr %dst, i64 %iv
230  store <8 x half> %conv, ptr %gep.dst
231  %iv.next = add i64 %iv, 1
232  %ec = icmp eq i64 %iv.next, 1000
233  br i1 %ec, label %loop, label %exit
234
235exit:
236  ret void
237}
238
239
240; Negative test, conversion to double with the help of `tbl` not implemented (TODO)
241define void @sitofp_v2i8_to_v2f64(ptr %src, ptr %dst) {
242; CHECK-LABEL: sitofp_v2i8_to_v2f64:
243; CHECK:       // %bb.0: // %entry
244; CHECK-NEXT:    mov x8, xzr
245; CHECK-NEXT:  .LBB3_1: // %loop
246; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
247; CHECK-NEXT:    add x9, x0, x8, lsl #1
248; CHECK-NEXT:    ldrsb w10, [x9]
249; CHECK-NEXT:    ldrsb w9, [x9, #1]
250; CHECK-NEXT:    fmov s0, w10
251; CHECK-NEXT:    mov v0.s[1], w9
252; CHECK-NEXT:    sshll v0.2d, v0.2s, #0
253; CHECK-NEXT:    scvtf v0.2d, v0.2d
254; CHECK-NEXT:    str q0, [x1, x8, lsl #4]
255; CHECK-NEXT:    add x8, x8, #1
256; CHECK-NEXT:    cmp x8, #1000
257; CHECK-NEXT:    b.eq .LBB3_1
258; CHECK-NEXT:  // %bb.2: // %exit
259; CHECK-NEXT:    ret
260entry:
261  br label %loop
262
263loop:
264  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
265  %gep.src = getelementptr inbounds <2 x i8>, ptr %src, i64 %iv
266  %l = load <2 x i8>, ptr %gep.src
267  %conv = sitofp <2 x i8> %l to <2 x double>
268  %gep.dst = getelementptr inbounds <2 x double>, ptr %dst, i64 %iv
269  store <2 x double> %conv, ptr %gep.dst
270  %iv.next = add i64 %iv, 1
271  %ec = icmp eq i64 %iv.next, 1000
272  br i1 %ec, label %loop, label %exit
273
274exit:
275  ret void
276}
277