xref: /llvm-project/llvm/test/CodeGen/AArch64/arm64-scvt.ll (revision 5ddce70ef0e5a641d7fea95e31fc5e2439cb98cb)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=arm64-eabi -mcpu=cyclone | FileCheck --check-prefixes=CHECK,CHECK-CYC %s
3; RUN: llc < %s -mtriple=arm64-eabi -mcpu=cortex-a57 | FileCheck --check-prefixes=CHECK,CHECK-A57 %s
4
5define float @t1(ptr nocapture %src) nounwind ssp {
6; CHECK-LABEL: t1:
7; CHECK:       // %bb.0: // %entry
8; CHECK-NEXT:    ldr s0, [x0]
9; CHECK-NEXT:    scvtf s0, s0
10; CHECK-NEXT:    ret
11entry:
12  %tmp1 = load i32, ptr %src, align 4
13  %tmp2 = sitofp i32 %tmp1 to float
14  ret float %tmp2
15}
16
17define float @t2(ptr nocapture %src) nounwind ssp {
18; CHECK-LABEL: t2:
19; CHECK:       // %bb.0: // %entry
20; CHECK-NEXT:    ldr s0, [x0]
21; CHECK-NEXT:    ucvtf s0, s0
22; CHECK-NEXT:    ret
23entry:
24  %tmp1 = load i32, ptr %src, align 4
25  %tmp2 = uitofp i32 %tmp1 to float
26  ret float %tmp2
27}
28
29define double @t3(ptr nocapture %src) nounwind ssp {
30; CHECK-LABEL: t3:
31; CHECK:       // %bb.0: // %entry
32; CHECK-NEXT:    ldr d0, [x0]
33; CHECK-NEXT:    scvtf d0, d0
34; CHECK-NEXT:    ret
35entry:
36  %tmp1 = load i64, ptr %src, align 4
37  %tmp2 = sitofp i64 %tmp1 to double
38  ret double %tmp2
39}
40
41define double @t4(ptr nocapture %src) nounwind ssp {
42; CHECK-LABEL: t4:
43; CHECK:       // %bb.0: // %entry
44; CHECK-NEXT:    ldr d0, [x0]
45; CHECK-NEXT:    ucvtf d0, d0
46; CHECK-NEXT:    ret
47entry:
48  %tmp1 = load i64, ptr %src, align 4
49  %tmp2 = uitofp i64 %tmp1 to double
50  ret double %tmp2
51}
52
53; rdar://13136456
54define double @t5(ptr nocapture %src) nounwind ssp optsize {
55; CHECK-LABEL: t5:
56; CHECK:       // %bb.0: // %entry
57; CHECK-NEXT:    ldr w8, [x0]
58; CHECK-NEXT:    scvtf d0, w8
59; CHECK-NEXT:    ret
60entry:
61  %tmp1 = load i32, ptr %src, align 4
62  %tmp2 = sitofp i32 %tmp1 to double
63  ret double %tmp2
64}
65
66; Check that we load in FP register when we want to convert into
67; floating point value.
68; This is much faster than loading on GPR and making the conversion
69; GPR -> FPR.
70; <rdar://problem/14599607>
71;
72; Check the flollowing patterns for signed/unsigned:
73; 1. load with scaled imm to float.
74; 2. load with scaled register to float.
75; 3. load with scaled imm to double.
76; 4. load with scaled register to double.
77; 5. load with unscaled imm to float.
78; 6. load with unscaled imm to double.
79; With loading size: 8, 16, 32, and 64-bits.
80
81; ********* 1. load with scaled imm to float. *********
82define float @fct1(ptr nocapture %sp0) {
83; CHECK-LABEL: fct1:
84; CHECK:       // %bb.0: // %entry
85; CHECK-NEXT:    ldr b0, [x0, #1]
86; CHECK-NEXT:    ucvtf s0, s0
87; CHECK-NEXT:    fmul s0, s0, s0
88; CHECK-NEXT:    ret
89entry:
90  %addr = getelementptr i8, ptr %sp0, i64 1
91  %pix_sp0.0.copyload = load i8, ptr %addr, align 1
92  %val = uitofp i8 %pix_sp0.0.copyload to float
93  %vmull.i = fmul float %val, %val
94  ret float %vmull.i
95}
96
97define float @fct2(ptr nocapture %sp0) {
98; CHECK-LABEL: fct2:
99; CHECK:       // %bb.0: // %entry
100; CHECK-NEXT:    ldr h0, [x0, #2]
101; CHECK-NEXT:    ucvtf s0, s0
102; CHECK-NEXT:    fmul s0, s0, s0
103; CHECK-NEXT:    ret
104entry:
105  %addr = getelementptr i16, ptr %sp0, i64 1
106  %pix_sp0.0.copyload = load i16, ptr %addr, align 1
107  %val = uitofp i16 %pix_sp0.0.copyload to float
108  %vmull.i = fmul float %val, %val
109  ret float %vmull.i
110}
111
112define float @fct3(ptr nocapture %sp0) {
113; CHECK-LABEL: fct3:
114; CHECK:       // %bb.0: // %entry
115; CHECK-NEXT:    ldr s0, [x0, #4]
116; CHECK-NEXT:    ucvtf s0, s0
117; CHECK-NEXT:    fmul s0, s0, s0
118; CHECK-NEXT:    ret
119entry:
120  %addr = getelementptr i32, ptr %sp0, i64 1
121  %pix_sp0.0.copyload = load i32, ptr %addr, align 1
122  %val = uitofp i32 %pix_sp0.0.copyload to float
123  %vmull.i = fmul float %val, %val
124  ret float %vmull.i
125}
126
127; i64 -> f32 is not supported on floating point unit.
128define float @fct4(ptr nocapture %sp0) {
129; CHECK-LABEL: fct4:
130; CHECK:       // %bb.0: // %entry
131; CHECK-NEXT:    ldr x8, [x0, #8]
132; CHECK-NEXT:    ucvtf s0, x8
133; CHECK-NEXT:    fmul s0, s0, s0
134; CHECK-NEXT:    ret
135entry:
136  %addr = getelementptr i64, ptr %sp0, i64 1
137  %pix_sp0.0.copyload = load i64, ptr %addr, align 1
138  %val = uitofp i64 %pix_sp0.0.copyload to float
139  %vmull.i = fmul float %val, %val
140  ret float %vmull.i
141}
142
143; ********* 2. load with scaled register to float. *********
144define float @fct5(ptr nocapture %sp0, i64 %offset) {
145; CHECK-LABEL: fct5:
146; CHECK:       // %bb.0: // %entry
147; CHECK-NEXT:    ldr b0, [x0, x1]
148; CHECK-NEXT:    ucvtf s0, s0
149; CHECK-NEXT:    fmul s0, s0, s0
150; CHECK-NEXT:    ret
151entry:
152  %addr = getelementptr i8, ptr %sp0, i64 %offset
153  %pix_sp0.0.copyload = load i8, ptr %addr, align 1
154  %val = uitofp i8 %pix_sp0.0.copyload to float
155  %vmull.i = fmul float %val, %val
156  ret float %vmull.i
157}
158
159define float @fct6(ptr nocapture %sp0, i64 %offset) {
160; CHECK-LABEL: fct6:
161; CHECK:       // %bb.0: // %entry
162; CHECK-NEXT:    ldr h0, [x0, x1, lsl #1]
163; CHECK-NEXT:    ucvtf s0, s0
164; CHECK-NEXT:    fmul s0, s0, s0
165; CHECK-NEXT:    ret
166entry:
167  %addr = getelementptr i16, ptr %sp0, i64 %offset
168  %pix_sp0.0.copyload = load i16, ptr %addr, align 1
169  %val = uitofp i16 %pix_sp0.0.copyload to float
170  %vmull.i = fmul float %val, %val
171  ret float %vmull.i
172}
173
174define float @fct7(ptr nocapture %sp0, i64 %offset) {
175; CHECK-LABEL: fct7:
176; CHECK:       // %bb.0: // %entry
177; CHECK-NEXT:    ldr s0, [x0, x1, lsl #2]
178; CHECK-NEXT:    ucvtf s0, s0
179; CHECK-NEXT:    fmul s0, s0, s0
180; CHECK-NEXT:    ret
181entry:
182  %addr = getelementptr i32, ptr %sp0, i64 %offset
183  %pix_sp0.0.copyload = load i32, ptr %addr, align 1
184  %val = uitofp i32 %pix_sp0.0.copyload to float
185  %vmull.i = fmul float %val, %val
186  ret float %vmull.i
187}
188
189; i64 -> f32 is not supported on floating point unit.
190define float @fct8(ptr nocapture %sp0, i64 %offset) {
191; CHECK-LABEL: fct8:
192; CHECK:       // %bb.0: // %entry
193; CHECK-NEXT:    ldr x8, [x0, x1, lsl #3]
194; CHECK-NEXT:    ucvtf s0, x8
195; CHECK-NEXT:    fmul s0, s0, s0
196; CHECK-NEXT:    ret
197entry:
198  %addr = getelementptr i64, ptr %sp0, i64 %offset
199  %pix_sp0.0.copyload = load i64, ptr %addr, align 1
200  %val = uitofp i64 %pix_sp0.0.copyload to float
201  %vmull.i = fmul float %val, %val
202  ret float %vmull.i
203}
204
205
206; ********* 3. load with scaled imm to double. *********
207define double @fct9(ptr nocapture %sp0) {
208; CHECK-LABEL: fct9:
209; CHECK:       // %bb.0: // %entry
210; CHECK-NEXT:    ldr b0, [x0, #1]
211; CHECK-NEXT:    ucvtf d0, d0
212; CHECK-NEXT:    fmul d0, d0, d0
213; CHECK-NEXT:    ret
214entry:
215  %addr = getelementptr i8, ptr %sp0, i64 1
216  %pix_sp0.0.copyload = load i8, ptr %addr, align 1
217  %val = uitofp i8 %pix_sp0.0.copyload to double
218  %vmull.i = fmul double %val, %val
219  ret double %vmull.i
220}
221
222define double @fct10(ptr nocapture %sp0) {
223; CHECK-LABEL: fct10:
224; CHECK:       // %bb.0: // %entry
225; CHECK-NEXT:    ldr h0, [x0, #2]
226; CHECK-NEXT:    ucvtf d0, d0
227; CHECK-NEXT:    fmul d0, d0, d0
228; CHECK-NEXT:    ret
229entry:
230  %addr = getelementptr i16, ptr %sp0, i64 1
231  %pix_sp0.0.copyload = load i16, ptr %addr, align 1
232  %val = uitofp i16 %pix_sp0.0.copyload to double
233  %vmull.i = fmul double %val, %val
234  ret double %vmull.i
235}
236
237define double @fct11(ptr nocapture %sp0) {
238; CHECK-LABEL: fct11:
239; CHECK:       // %bb.0: // %entry
240; CHECK-NEXT:    ldr s0, [x0, #4]
241; CHECK-NEXT:    ucvtf d0, d0
242; CHECK-NEXT:    fmul d0, d0, d0
243; CHECK-NEXT:    ret
244entry:
245  %addr = getelementptr i32, ptr %sp0, i64 1
246  %pix_sp0.0.copyload = load i32, ptr %addr, align 1
247  %val = uitofp i32 %pix_sp0.0.copyload to double
248  %vmull.i = fmul double %val, %val
249  ret double %vmull.i
250}
251
252define double @fct12(ptr nocapture %sp0) {
253; CHECK-LABEL: fct12:
254; CHECK:       // %bb.0: // %entry
255; CHECK-NEXT:    ldr d0, [x0, #8]
256; CHECK-NEXT:    ucvtf d0, d0
257; CHECK-NEXT:    fmul d0, d0, d0
258; CHECK-NEXT:    ret
259entry:
260  %addr = getelementptr i64, ptr %sp0, i64 1
261  %pix_sp0.0.copyload = load i64, ptr %addr, align 1
262  %val = uitofp i64 %pix_sp0.0.copyload to double
263  %vmull.i = fmul double %val, %val
264  ret double %vmull.i
265}
266
267; ********* 4. load with scaled register to double. *********
268define double @fct13(ptr nocapture %sp0, i64 %offset) {
269; CHECK-LABEL: fct13:
270; CHECK:       // %bb.0: // %entry
271; CHECK-NEXT:    ldr b0, [x0, x1]
272; CHECK-NEXT:    ucvtf d0, d0
273; CHECK-NEXT:    fmul d0, d0, d0
274; CHECK-NEXT:    ret
275entry:
276  %addr = getelementptr i8, ptr %sp0, i64 %offset
277  %pix_sp0.0.copyload = load i8, ptr %addr, align 1
278  %val = uitofp i8 %pix_sp0.0.copyload to double
279  %vmull.i = fmul double %val, %val
280  ret double %vmull.i
281}
282
283define double @fct14(ptr nocapture %sp0, i64 %offset) {
284; CHECK-LABEL: fct14:
285; CHECK:       // %bb.0: // %entry
286; CHECK-NEXT:    ldr h0, [x0, x1, lsl #1]
287; CHECK-NEXT:    ucvtf d0, d0
288; CHECK-NEXT:    fmul d0, d0, d0
289; CHECK-NEXT:    ret
290entry:
291  %addr = getelementptr i16, ptr %sp0, i64 %offset
292  %pix_sp0.0.copyload = load i16, ptr %addr, align 1
293  %val = uitofp i16 %pix_sp0.0.copyload to double
294  %vmull.i = fmul double %val, %val
295  ret double %vmull.i
296}
297
298define double @fct15(ptr nocapture %sp0, i64 %offset) {
299; CHECK-LABEL: fct15:
300; CHECK:       // %bb.0: // %entry
301; CHECK-NEXT:    ldr s0, [x0, x1, lsl #2]
302; CHECK-NEXT:    ucvtf d0, d0
303; CHECK-NEXT:    fmul d0, d0, d0
304; CHECK-NEXT:    ret
305entry:
306  %addr = getelementptr i32, ptr %sp0, i64 %offset
307  %pix_sp0.0.copyload = load i32, ptr %addr, align 1
308  %val = uitofp i32 %pix_sp0.0.copyload to double
309  %vmull.i = fmul double %val, %val
310  ret double %vmull.i
311}
312
313define double @fct16(ptr nocapture %sp0, i64 %offset) {
314; CHECK-LABEL: fct16:
315; CHECK:       // %bb.0: // %entry
316; CHECK-NEXT:    ldr d0, [x0, x1, lsl #3]
317; CHECK-NEXT:    ucvtf d0, d0
318; CHECK-NEXT:    fmul d0, d0, d0
319; CHECK-NEXT:    ret
320entry:
321  %addr = getelementptr i64, ptr %sp0, i64 %offset
322  %pix_sp0.0.copyload = load i64, ptr %addr, align 1
323  %val = uitofp i64 %pix_sp0.0.copyload to double
324  %vmull.i = fmul double %val, %val
325  ret double %vmull.i
326}
327
328; ********* 5. load with unscaled imm to float. *********
329define float @fct17(ptr nocapture %sp0) {
330; CHECK-LABEL: fct17:
331; CHECK:       // %bb.0: // %entry
332; CHECK-NEXT:    ldur b0, [x0, #-1]
333; CHECK-NEXT:    ucvtf s0, s0
334; CHECK-NEXT:    fmul s0, s0, s0
335; CHECK-NEXT:    ret
336entry:
337  %bitcast = ptrtoint ptr %sp0 to i64
338  %add = add i64 %bitcast, -1
339  %addr = inttoptr i64 %add to ptr
340  %pix_sp0.0.copyload = load i8, ptr %addr, align 1
341  %val = uitofp i8 %pix_sp0.0.copyload to float
342  %vmull.i = fmul float %val, %val
343  ret float %vmull.i
344}
345
346define float @fct18(ptr nocapture %sp0) {
347; CHECK-LABEL: fct18:
348; CHECK:       // %bb.0:
349; CHECK-NEXT:    ldur h0, [x0, #1]
350; CHECK-NEXT:    ucvtf s0, s0
351; CHECK-NEXT:    fmul s0, s0, s0
352; CHECK-NEXT:    ret
353  %bitcast = ptrtoint ptr %sp0 to i64
354  %add = add i64 %bitcast, 1
355  %addr = inttoptr i64 %add to ptr
356  %pix_sp0.0.copyload = load i16, ptr %addr, align 1
357  %val = uitofp i16 %pix_sp0.0.copyload to float
358  %vmull.i = fmul float %val, %val
359  ret float %vmull.i
360}
361
362define float @fct19(ptr nocapture %sp0) {
363; CHECK-LABEL: fct19:
364; CHECK:       // %bb.0:
365; CHECK-NEXT:    ldur s0, [x0, #1]
366; CHECK-NEXT:    ucvtf s0, s0
367; CHECK-NEXT:    fmul s0, s0, s0
368; CHECK-NEXT:    ret
369  %bitcast = ptrtoint ptr %sp0 to i64
370  %add = add i64 %bitcast, 1
371  %addr = inttoptr i64 %add to ptr
372  %pix_sp0.0.copyload = load i32, ptr %addr, align 1
373  %val = uitofp i32 %pix_sp0.0.copyload to float
374  %vmull.i = fmul float %val, %val
375  ret float %vmull.i
376}
377
378; i64 -> f32 is not supported on floating point unit.
379define float @fct20(ptr nocapture %sp0) {
380; CHECK-LABEL: fct20:
381; CHECK:       // %bb.0:
382; CHECK-NEXT:    ldur x8, [x0, #1]
383; CHECK-NEXT:    ucvtf s0, x8
384; CHECK-NEXT:    fmul s0, s0, s0
385; CHECK-NEXT:    ret
386  %bitcast = ptrtoint ptr %sp0 to i64
387  %add = add i64 %bitcast, 1
388  %addr = inttoptr i64 %add to ptr
389  %pix_sp0.0.copyload = load i64, ptr %addr, align 1
390  %val = uitofp i64 %pix_sp0.0.copyload to float
391  %vmull.i = fmul float %val, %val
392  ret float %vmull.i
393
394}
395
396; ********* 6. load with unscaled imm to double. *********
397define double @fct21(ptr nocapture %sp0) {
398; CHECK-LABEL: fct21:
399; CHECK:       // %bb.0: // %entry
400; CHECK-NEXT:    ldur b0, [x0, #-1]
401; CHECK-NEXT:    ucvtf d0, d0
402; CHECK-NEXT:    fmul d0, d0, d0
403; CHECK-NEXT:    ret
404entry:
405  %bitcast = ptrtoint ptr %sp0 to i64
406  %add = add i64 %bitcast, -1
407  %addr = inttoptr i64 %add to ptr
408  %pix_sp0.0.copyload = load i8, ptr %addr, align 1
409  %val = uitofp i8 %pix_sp0.0.copyload to double
410  %vmull.i = fmul double %val, %val
411  ret double %vmull.i
412}
413
414define double @fct22(ptr nocapture %sp0) {
415; CHECK-LABEL: fct22:
416; CHECK:       // %bb.0:
417; CHECK-NEXT:    ldur h0, [x0, #1]
418; CHECK-NEXT:    ucvtf d0, d0
419; CHECK-NEXT:    fmul d0, d0, d0
420; CHECK-NEXT:    ret
421  %bitcast = ptrtoint ptr %sp0 to i64
422  %add = add i64 %bitcast, 1
423  %addr = inttoptr i64 %add to ptr
424  %pix_sp0.0.copyload = load i16, ptr %addr, align 1
425  %val = uitofp i16 %pix_sp0.0.copyload to double
426  %vmull.i = fmul double %val, %val
427  ret double %vmull.i
428}
429
430define double @fct23(ptr nocapture %sp0) {
431; CHECK-LABEL: fct23:
432; CHECK:       // %bb.0:
433; CHECK-NEXT:    ldur s0, [x0, #1]
434; CHECK-NEXT:    ucvtf d0, d0
435; CHECK-NEXT:    fmul d0, d0, d0
436; CHECK-NEXT:    ret
437  %bitcast = ptrtoint ptr %sp0 to i64
438  %add = add i64 %bitcast, 1
439  %addr = inttoptr i64 %add to ptr
440  %pix_sp0.0.copyload = load i32, ptr %addr, align 1
441  %val = uitofp i32 %pix_sp0.0.copyload to double
442  %vmull.i = fmul double %val, %val
443  ret double %vmull.i
444}
445
446define double @fct24(ptr nocapture %sp0) {
447; CHECK-LABEL: fct24:
448; CHECK:       // %bb.0:
449; CHECK-NEXT:    ldur d0, [x0, #1]
450; CHECK-NEXT:    ucvtf d0, d0
451; CHECK-NEXT:    fmul d0, d0, d0
452; CHECK-NEXT:    ret
453  %bitcast = ptrtoint ptr %sp0 to i64
454  %add = add i64 %bitcast, 1
455  %addr = inttoptr i64 %add to ptr
456  %pix_sp0.0.copyload = load i64, ptr %addr, align 1
457  %val = uitofp i64 %pix_sp0.0.copyload to double
458  %vmull.i = fmul double %val, %val
459  ret double %vmull.i
460
461}
462
463; ********* 1s. load with scaled imm to float. *********
464define float @sfct1(ptr nocapture %sp0) {
465; CHECK-CYC-LABEL: sfct1:
466; CHECK-CYC:       // %bb.0: // %entry
467; CHECK-CYC-NEXT:    ldr b0, [x0, #1]
468; CHECK-CYC-NEXT:    sshll v0.8h, v0.8b, #0
469; CHECK-CYC-NEXT:    sshll v0.4s, v0.4h, #0
470; CHECK-CYC-NEXT:    scvtf s0, s0
471; CHECK-CYC-NEXT:    fmul s0, s0, s0
472; CHECK-CYC-NEXT:    ret
473;
474; CHECK-A57-LABEL: sfct1:
475; CHECK-A57:       // %bb.0: // %entry
476; CHECK-A57-NEXT:    ldrsb w8, [x0, #1]
477; CHECK-A57-NEXT:    scvtf s0, w8
478; CHECK-A57-NEXT:    fmul s0, s0, s0
479; CHECK-A57-NEXT:    ret
480entry:
481  %addr = getelementptr i8, ptr %sp0, i64 1
482  %pix_sp0.0.copyload = load i8, ptr %addr, align 1
483  %val = sitofp i8 %pix_sp0.0.copyload to float
484  %vmull.i = fmul float %val, %val
485  ret float %vmull.i
486}
487
488define float @sfct2(ptr nocapture %sp0) {
489; CHECK-CYC-LABEL: sfct2:
490; CHECK-CYC:       // %bb.0: // %entry
491; CHECK-CYC-NEXT:    ldr h0, [x0, #2]
492; CHECK-CYC-NEXT:    sshll v0.4s, v0.4h, #0
493; CHECK-CYC-NEXT:    scvtf s0, s0
494; CHECK-CYC-NEXT:    fmul s0, s0, s0
495; CHECK-CYC-NEXT:    ret
496;
497; CHECK-A57-LABEL: sfct2:
498; CHECK-A57:       // %bb.0: // %entry
499; CHECK-A57-NEXT:    ldrsh w8, [x0, #2]
500; CHECK-A57-NEXT:    scvtf s0, w8
501; CHECK-A57-NEXT:    fmul s0, s0, s0
502; CHECK-A57-NEXT:    ret
503entry:
504  %addr = getelementptr i16, ptr %sp0, i64 1
505  %pix_sp0.0.copyload = load i16, ptr %addr, align 1
506  %val = sitofp i16 %pix_sp0.0.copyload to float
507  %vmull.i = fmul float %val, %val
508  ret float %vmull.i
509}
510
511define float @sfct3(ptr nocapture %sp0) {
512; CHECK-LABEL: sfct3:
513; CHECK:       // %bb.0: // %entry
514; CHECK-NEXT:    ldr s0, [x0, #4]
515; CHECK-NEXT:    scvtf s0, s0
516; CHECK-NEXT:    fmul s0, s0, s0
517; CHECK-NEXT:    ret
518entry:
519  %addr = getelementptr i32, ptr %sp0, i64 1
520  %pix_sp0.0.copyload = load i32, ptr %addr, align 1
521  %val = sitofp i32 %pix_sp0.0.copyload to float
522  %vmull.i = fmul float %val, %val
523  ret float %vmull.i
524}
525
526; i64 -> f32 is not supported on floating point unit.
527define float @sfct4(ptr nocapture %sp0) {
528; CHECK-LABEL: sfct4:
529; CHECK:       // %bb.0: // %entry
530; CHECK-NEXT:    ldr x8, [x0, #8]
531; CHECK-NEXT:    scvtf s0, x8
532; CHECK-NEXT:    fmul s0, s0, s0
533; CHECK-NEXT:    ret
534entry:
535  %addr = getelementptr i64, ptr %sp0, i64 1
536  %pix_sp0.0.copyload = load i64, ptr %addr, align 1
537  %val = sitofp i64 %pix_sp0.0.copyload to float
538  %vmull.i = fmul float %val, %val
539  ret float %vmull.i
540}
541
542; ********* 2s. load with scaled register to float. *********
543define float @sfct5(ptr nocapture %sp0, i64 %offset) {
544; CHECK-CYC-LABEL: sfct5:
545; CHECK-CYC:       // %bb.0: // %entry
546; CHECK-CYC-NEXT:    ldr b0, [x0, x1]
547; CHECK-CYC-NEXT:    sshll v0.8h, v0.8b, #0
548; CHECK-CYC-NEXT:    sshll v0.4s, v0.4h, #0
549; CHECK-CYC-NEXT:    scvtf s0, s0
550; CHECK-CYC-NEXT:    fmul s0, s0, s0
551; CHECK-CYC-NEXT:    ret
552;
553; CHECK-A57-LABEL: sfct5:
554; CHECK-A57:       // %bb.0: // %entry
555; CHECK-A57-NEXT:    ldrsb w8, [x0, x1]
556; CHECK-A57-NEXT:    scvtf s0, w8
557; CHECK-A57-NEXT:    fmul s0, s0, s0
558; CHECK-A57-NEXT:    ret
559entry:
560  %addr = getelementptr i8, ptr %sp0, i64 %offset
561  %pix_sp0.0.copyload = load i8, ptr %addr, align 1
562  %val = sitofp i8 %pix_sp0.0.copyload to float
563  %vmull.i = fmul float %val, %val
564  ret float %vmull.i
565}
566
567define float @sfct6(ptr nocapture %sp0, i64 %offset) {
568; CHECK-CYC-LABEL: sfct6:
569; CHECK-CYC:       // %bb.0: // %entry
570; CHECK-CYC-NEXT:    ldr h0, [x0, x1, lsl #1]
571; CHECK-CYC-NEXT:    sshll v0.4s, v0.4h, #0
572; CHECK-CYC-NEXT:    scvtf s0, s0
573; CHECK-CYC-NEXT:    fmul s0, s0, s0
574; CHECK-CYC-NEXT:    ret
575;
576; CHECK-A57-LABEL: sfct6:
577; CHECK-A57:       // %bb.0: // %entry
578; CHECK-A57-NEXT:    ldrsh w8, [x0, x1, lsl #1]
579; CHECK-A57-NEXT:    scvtf s0, w8
580; CHECK-A57-NEXT:    fmul s0, s0, s0
581; CHECK-A57-NEXT:    ret
582entry:
583  %addr = getelementptr i16, ptr %sp0, i64 %offset
584  %pix_sp0.0.copyload = load i16, ptr %addr, align 1
585  %val = sitofp i16 %pix_sp0.0.copyload to float
586  %vmull.i = fmul float %val, %val
587  ret float %vmull.i
588}
589
590define float @sfct7(ptr nocapture %sp0, i64 %offset) {
591; CHECK-LABEL: sfct7:
592; CHECK:       // %bb.0: // %entry
593; CHECK-NEXT:    ldr s0, [x0, x1, lsl #2]
594; CHECK-NEXT:    scvtf s0, s0
595; CHECK-NEXT:    fmul s0, s0, s0
596; CHECK-NEXT:    ret
597entry:
598  %addr = getelementptr i32, ptr %sp0, i64 %offset
599  %pix_sp0.0.copyload = load i32, ptr %addr, align 1
600  %val = sitofp i32 %pix_sp0.0.copyload to float
601  %vmull.i = fmul float %val, %val
602  ret float %vmull.i
603}
604
605; i64 -> f32 is not supported on floating point unit.
606define float @sfct8(ptr nocapture %sp0, i64 %offset) {
607; CHECK-LABEL: sfct8:
608; CHECK:       // %bb.0: // %entry
609; CHECK-NEXT:    ldr x8, [x0, x1, lsl #3]
610; CHECK-NEXT:    scvtf s0, x8
611; CHECK-NEXT:    fmul s0, s0, s0
612; CHECK-NEXT:    ret
613entry:
614  %addr = getelementptr i64, ptr %sp0, i64 %offset
615  %pix_sp0.0.copyload = load i64, ptr %addr, align 1
616  %val = sitofp i64 %pix_sp0.0.copyload to float
617  %vmull.i = fmul float %val, %val
618  ret float %vmull.i
619}
620
621; ********* 3s. load with scaled imm to double. *********
622define double @sfct9(ptr nocapture %sp0) {
623; CHECK-LABEL: sfct9:
624; CHECK:       // %bb.0: // %entry
625; CHECK-NEXT:    ldrsb w8, [x0, #1]
626; CHECK-NEXT:    scvtf d0, w8
627; CHECK-NEXT:    fmul d0, d0, d0
628; CHECK-NEXT:    ret
629entry:
630  %addr = getelementptr i8, ptr %sp0, i64 1
631  %pix_sp0.0.copyload = load i8, ptr %addr, align 1
632  %val = sitofp i8 %pix_sp0.0.copyload to double
633  %vmull.i = fmul double %val, %val
634  ret double %vmull.i
635}
636
637define double @sfct10(ptr nocapture %sp0) {
638; CHECK-CYC-LABEL: sfct10:
639; CHECK-CYC:       // %bb.0: // %entry
640; CHECK-CYC-NEXT:    ldr h0, [x0, #2]
641; CHECK-CYC-NEXT:    sshll v0.4s, v0.4h, #0
642; CHECK-CYC-NEXT:    sshll v0.2d, v0.2s, #0
643; CHECK-CYC-NEXT:    scvtf d0, d0
644; CHECK-CYC-NEXT:    fmul d0, d0, d0
645; CHECK-CYC-NEXT:    ret
646;
647; CHECK-A57-LABEL: sfct10:
648; CHECK-A57:       // %bb.0: // %entry
649; CHECK-A57-NEXT:    ldrsh w8, [x0, #2]
650; CHECK-A57-NEXT:    scvtf d0, w8
651; CHECK-A57-NEXT:    fmul d0, d0, d0
652; CHECK-A57-NEXT:    ret
653entry:
654  %addr = getelementptr i16, ptr %sp0, i64 1
655  %pix_sp0.0.copyload = load i16, ptr %addr, align 1
656  %val = sitofp i16 %pix_sp0.0.copyload to double
657  %vmull.i = fmul double %val, %val
658  ret double %vmull.i
659}
660
661define double @sfct11(ptr nocapture %sp0) {
662; CHECK-CYC-LABEL: sfct11:
663; CHECK-CYC:       // %bb.0: // %entry
664; CHECK-CYC-NEXT:    ldr s0, [x0, #4]
665; CHECK-CYC-NEXT:    sshll v0.2d, v0.2s, #0
666; CHECK-CYC-NEXT:    scvtf d0, d0
667; CHECK-CYC-NEXT:    fmul d0, d0, d0
668; CHECK-CYC-NEXT:    ret
669;
670; CHECK-A57-LABEL: sfct11:
671; CHECK-A57:       // %bb.0: // %entry
672; CHECK-A57-NEXT:    ldr w8, [x0, #4]
673; CHECK-A57-NEXT:    scvtf d0, w8
674; CHECK-A57-NEXT:    fmul d0, d0, d0
675; CHECK-A57-NEXT:    ret
676entry:
677  %addr = getelementptr i32, ptr %sp0, i64 1
678  %pix_sp0.0.copyload = load i32, ptr %addr, align 1
679  %val = sitofp i32 %pix_sp0.0.copyload to double
680  %vmull.i = fmul double %val, %val
681  ret double %vmull.i
682}
683
684define double @sfct12(ptr nocapture %sp0) {
685; CHECK-LABEL: sfct12:
686; CHECK:       // %bb.0: // %entry
687; CHECK-NEXT:    ldr d0, [x0, #8]
688; CHECK-NEXT:    scvtf d0, d0
689; CHECK-NEXT:    fmul d0, d0, d0
690; CHECK-NEXT:    ret
691entry:
692  %addr = getelementptr i64, ptr %sp0, i64 1
693  %pix_sp0.0.copyload = load i64, ptr %addr, align 1
694  %val = sitofp i64 %pix_sp0.0.copyload to double
695  %vmull.i = fmul double %val, %val
696  ret double %vmull.i
697}
698
699; ********* 4s. load with scaled register to double. *********
700define double @sfct13(ptr nocapture %sp0, i64 %offset) {
701; CHECK-LABEL: sfct13:
702; CHECK:       // %bb.0: // %entry
703; CHECK-NEXT:    ldrsb w8, [x0, x1]
704; CHECK-NEXT:    scvtf d0, w8
705; CHECK-NEXT:    fmul d0, d0, d0
706; CHECK-NEXT:    ret
707entry:
708  %addr = getelementptr i8, ptr %sp0, i64 %offset
709  %pix_sp0.0.copyload = load i8, ptr %addr, align 1
710  %val = sitofp i8 %pix_sp0.0.copyload to double
711  %vmull.i = fmul double %val, %val
712  ret double %vmull.i
713}
714
715define double @sfct14(ptr nocapture %sp0, i64 %offset) {
716; CHECK-CYC-LABEL: sfct14:
717; CHECK-CYC:       // %bb.0: // %entry
718; CHECK-CYC-NEXT:    ldr h0, [x0, x1, lsl #1]
719; CHECK-CYC-NEXT:    sshll v0.4s, v0.4h, #0
720; CHECK-CYC-NEXT:    sshll v0.2d, v0.2s, #0
721; CHECK-CYC-NEXT:    scvtf d0, d0
722; CHECK-CYC-NEXT:    fmul d0, d0, d0
723; CHECK-CYC-NEXT:    ret
724;
725; CHECK-A57-LABEL: sfct14:
726; CHECK-A57:       // %bb.0: // %entry
727; CHECK-A57-NEXT:    ldrsh w8, [x0, x1, lsl #1]
728; CHECK-A57-NEXT:    scvtf d0, w8
729; CHECK-A57-NEXT:    fmul d0, d0, d0
730; CHECK-A57-NEXT:    ret
731entry:
732  %addr = getelementptr i16, ptr %sp0, i64 %offset
733  %pix_sp0.0.copyload = load i16, ptr %addr, align 1
734  %val = sitofp i16 %pix_sp0.0.copyload to double
735  %vmull.i = fmul double %val, %val
736  ret double %vmull.i
737}
738
739define double @sfct15(ptr nocapture %sp0, i64 %offset) {
740; CHECK-CYC-LABEL: sfct15:
741; CHECK-CYC:       // %bb.0: // %entry
742; CHECK-CYC-NEXT:    ldr s0, [x0, x1, lsl #2]
743; CHECK-CYC-NEXT:    sshll v0.2d, v0.2s, #0
744; CHECK-CYC-NEXT:    scvtf d0, d0
745; CHECK-CYC-NEXT:    fmul d0, d0, d0
746; CHECK-CYC-NEXT:    ret
747;
748; CHECK-A57-LABEL: sfct15:
749; CHECK-A57:       // %bb.0: // %entry
750; CHECK-A57-NEXT:    ldr w8, [x0, x1, lsl #2]
751; CHECK-A57-NEXT:    scvtf d0, w8
752; CHECK-A57-NEXT:    fmul d0, d0, d0
753; CHECK-A57-NEXT:    ret
754entry:
755  %addr = getelementptr i32, ptr %sp0, i64 %offset
756  %pix_sp0.0.copyload = load i32, ptr %addr, align 1
757  %val = sitofp i32 %pix_sp0.0.copyload to double
758  %vmull.i = fmul double %val, %val
759  ret double %vmull.i
760}
761
762define double @sfct16(ptr nocapture %sp0, i64 %offset) {
763; CHECK-LABEL: sfct16:
764; CHECK:       // %bb.0: // %entry
765; CHECK-NEXT:    ldr d0, [x0, x1, lsl #3]
766; CHECK-NEXT:    scvtf d0, d0
767; CHECK-NEXT:    fmul d0, d0, d0
768; CHECK-NEXT:    ret
769entry:
770  %addr = getelementptr i64, ptr %sp0, i64 %offset
771  %pix_sp0.0.copyload = load i64, ptr %addr, align 1
772  %val = sitofp i64 %pix_sp0.0.copyload to double
773  %vmull.i = fmul double %val, %val
774  ret double %vmull.i
775}
776
777; ********* 5s. load with unscaled imm to float. *********
778define float @sfct17(ptr nocapture %sp0) {
779; CHECK-CYC-LABEL: sfct17:
780; CHECK-CYC:       // %bb.0: // %entry
781; CHECK-CYC-NEXT:    ldur b0, [x0, #-1]
782; CHECK-CYC-NEXT:    sshll v0.8h, v0.8b, #0
783; CHECK-CYC-NEXT:    sshll v0.4s, v0.4h, #0
784; CHECK-CYC-NEXT:    scvtf s0, s0
785; CHECK-CYC-NEXT:    fmul s0, s0, s0
786; CHECK-CYC-NEXT:    ret
787;
788; CHECK-A57-LABEL: sfct17:
789; CHECK-A57:       // %bb.0: // %entry
790; CHECK-A57-NEXT:    ldursb w8, [x0, #-1]
791; CHECK-A57-NEXT:    scvtf s0, w8
792; CHECK-A57-NEXT:    fmul s0, s0, s0
793; CHECK-A57-NEXT:    ret
794entry:
795  %bitcast = ptrtoint ptr %sp0 to i64
796  %add = add i64 %bitcast, -1
797  %addr = inttoptr i64 %add to ptr
798  %pix_sp0.0.copyload = load i8, ptr %addr, align 1
799  %val = sitofp i8 %pix_sp0.0.copyload to float
800  %vmull.i = fmul float %val, %val
801  ret float %vmull.i
802}
803
804define float @sfct18(ptr nocapture %sp0) {
805; CHECK-CYC-LABEL: sfct18:
806; CHECK-CYC:       // %bb.0:
807; CHECK-CYC-NEXT:    ldur h0, [x0, #1]
808; CHECK-CYC-NEXT:    sshll v0.4s, v0.4h, #0
809; CHECK-CYC-NEXT:    scvtf s0, s0
810; CHECK-CYC-NEXT:    fmul s0, s0, s0
811; CHECK-CYC-NEXT:    ret
812;
813; CHECK-A57-LABEL: sfct18:
814; CHECK-A57:       // %bb.0:
815; CHECK-A57-NEXT:    ldursh w8, [x0, #1]
816; CHECK-A57-NEXT:    scvtf s0, w8
817; CHECK-A57-NEXT:    fmul s0, s0, s0
818; CHECK-A57-NEXT:    ret
819  %bitcast = ptrtoint ptr %sp0 to i64
820  %add = add i64 %bitcast, 1
821  %addr = inttoptr i64 %add to ptr
822  %pix_sp0.0.copyload = load i16, ptr %addr, align 1
823  %val = sitofp i16 %pix_sp0.0.copyload to float
824  %vmull.i = fmul float %val, %val
825  ret float %vmull.i
826}
827
828define float @sfct19(ptr nocapture %sp0) {
829; CHECK-LABEL: sfct19:
830; CHECK:       // %bb.0:
831; CHECK-NEXT:    ldur s0, [x0, #1]
832; CHECK-NEXT:    scvtf s0, s0
833; CHECK-NEXT:    fmul s0, s0, s0
834; CHECK-NEXT:    ret
835  %bitcast = ptrtoint ptr %sp0 to i64
836  %add = add i64 %bitcast, 1
837  %addr = inttoptr i64 %add to ptr
838  %pix_sp0.0.copyload = load i32, ptr %addr, align 1
839  %val = sitofp i32 %pix_sp0.0.copyload to float
840  %vmull.i = fmul float %val, %val
841  ret float %vmull.i
842}
843
844; i64 -> f32 is not supported on floating point unit.
845define float @sfct20(ptr nocapture %sp0) {
846; CHECK-LABEL: sfct20:
847; CHECK:       // %bb.0:
848; CHECK-NEXT:    ldur x8, [x0, #1]
849; CHECK-NEXT:    scvtf s0, x8
850; CHECK-NEXT:    fmul s0, s0, s0
851; CHECK-NEXT:    ret
852  %bitcast = ptrtoint ptr %sp0 to i64
853  %add = add i64 %bitcast, 1
854  %addr = inttoptr i64 %add to ptr
855  %pix_sp0.0.copyload = load i64, ptr %addr, align 1
856  %val = sitofp i64 %pix_sp0.0.copyload to float
857  %vmull.i = fmul float %val, %val
858  ret float %vmull.i
859
860}
861
862; ********* 6s. load with unscaled imm to double. *********
863define double @sfct21(ptr nocapture %sp0) {
864; CHECK-LABEL: sfct21:
865; CHECK:       // %bb.0: // %entry
866; CHECK-NEXT:    ldursb w8, [x0, #-1]
867; CHECK-NEXT:    scvtf d0, w8
868; CHECK-NEXT:    fmul d0, d0, d0
869; CHECK-NEXT:    ret
870entry:
871  %bitcast = ptrtoint ptr %sp0 to i64
872  %add = add i64 %bitcast, -1
873  %addr = inttoptr i64 %add to ptr
874  %pix_sp0.0.copyload = load i8, ptr %addr, align 1
875  %val = sitofp i8 %pix_sp0.0.copyload to double
876  %vmull.i = fmul double %val, %val
877  ret double %vmull.i
878}
879
880define double @sfct22(ptr nocapture %sp0) {
881; CHECK-CYC-LABEL: sfct22:
882; CHECK-CYC:       // %bb.0:
883; CHECK-CYC-NEXT:    ldur h0, [x0, #1]
884; CHECK-CYC-NEXT:    sshll v0.4s, v0.4h, #0
885; CHECK-CYC-NEXT:    sshll v0.2d, v0.2s, #0
886; CHECK-CYC-NEXT:    scvtf d0, d0
887; CHECK-CYC-NEXT:    fmul d0, d0, d0
888; CHECK-CYC-NEXT:    ret
889;
890; CHECK-A57-LABEL: sfct22:
891; CHECK-A57:       // %bb.0:
892; CHECK-A57-NEXT:    ldursh w8, [x0, #1]
893; CHECK-A57-NEXT:    scvtf d0, w8
894; CHECK-A57-NEXT:    fmul d0, d0, d0
895; CHECK-A57-NEXT:    ret
896  %bitcast = ptrtoint ptr %sp0 to i64
897  %add = add i64 %bitcast, 1
898  %addr = inttoptr i64 %add to ptr
899  %pix_sp0.0.copyload = load i16, ptr %addr, align 1
900  %val = sitofp i16 %pix_sp0.0.copyload to double
901  %vmull.i = fmul double %val, %val
902  ret double %vmull.i
903}
904
905define double @sfct23(ptr nocapture %sp0) {
906; CHECK-CYC-LABEL: sfct23:
907; CHECK-CYC:       // %bb.0:
908; CHECK-CYC-NEXT:    ldur s0, [x0, #1]
909; CHECK-CYC-NEXT:    sshll v0.2d, v0.2s, #0
910; CHECK-CYC-NEXT:    scvtf d0, d0
911; CHECK-CYC-NEXT:    fmul d0, d0, d0
912; CHECK-CYC-NEXT:    ret
913;
914; CHECK-A57-LABEL: sfct23:
915; CHECK-A57:       // %bb.0:
916; CHECK-A57-NEXT:    ldur w8, [x0, #1]
917; CHECK-A57-NEXT:    scvtf d0, w8
918; CHECK-A57-NEXT:    fmul d0, d0, d0
919; CHECK-A57-NEXT:    ret
920  %bitcast = ptrtoint ptr %sp0 to i64
921  %add = add i64 %bitcast, 1
922  %addr = inttoptr i64 %add to ptr
923  %pix_sp0.0.copyload = load i32, ptr %addr, align 1
924  %val = sitofp i32 %pix_sp0.0.copyload to double
925  %vmull.i = fmul double %val, %val
926  ret double %vmull.i
927}
928
929define double @sfct24(ptr nocapture %sp0) {
930; CHECK-LABEL: sfct24:
931; CHECK:       // %bb.0:
932; CHECK-NEXT:    ldur d0, [x0, #1]
933; CHECK-NEXT:    scvtf d0, d0
934; CHECK-NEXT:    fmul d0, d0, d0
935; CHECK-NEXT:    ret
936  %bitcast = ptrtoint ptr %sp0 to i64
937  %add = add i64 %bitcast, 1
938  %addr = inttoptr i64 %add to ptr
939  %pix_sp0.0.copyload = load i64, ptr %addr, align 1
940  %val = sitofp i64 %pix_sp0.0.copyload to double
941  %vmull.i = fmul double %val, %val
942  ret double %vmull.i
943
944}
945
946; Check that we do not use SSHLL code sequence when code size is a concern.
947define float @codesize_sfct17(ptr nocapture %sp0) optsize {
948; CHECK-LABEL: codesize_sfct17:
949; CHECK:       // %bb.0: // %entry
950; CHECK-NEXT:    ldursb w8, [x0, #-1]
951; CHECK-NEXT:    scvtf s0, w8
952; CHECK-NEXT:    fmul s0, s0, s0
953; CHECK-NEXT:    ret
954entry:
955  %bitcast = ptrtoint ptr %sp0 to i64
956  %add = add i64 %bitcast, -1
957  %addr = inttoptr i64 %add to ptr
958  %pix_sp0.0.copyload = load i8, ptr %addr, align 1
959  %val = sitofp i8 %pix_sp0.0.copyload to float
960  %vmull.i = fmul float %val, %val
961  ret float %vmull.i
962}
963
964define double @codesize_sfct11(ptr nocapture %sp0) minsize {
965; CHECK-LABEL: codesize_sfct11:
966; CHECK:       // %bb.0: // %entry
967; CHECK-NEXT:    ldr w8, [x0, #4]
968; CHECK-NEXT:    scvtf d0, w8
969; CHECK-NEXT:    fmul d0, d0, d0
970; CHECK-NEXT:    ret
971entry:
972  %addr = getelementptr i32, ptr %sp0, i64 1
973  %pix_sp0.0.copyload = load i32, ptr %addr, align 1
974  %val = sitofp i32 %pix_sp0.0.copyload to double
975  %vmull.i = fmul double %val, %val
976  ret double %vmull.i
977}
978
979; Adding fp128 custom lowering makes these a little fragile since we have to
980; return the correct mix of Legal/Expand from the custom method.
981;
982; rdar://problem/14991489
983
984define float @float_from_i128(i128 %in) {
985; CHECK-LABEL: float_from_i128:
986; CHECK:       // %bb.0:
987; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
988; CHECK-NEXT:    .cfi_def_cfa_offset 16
989; CHECK-NEXT:    .cfi_offset w30, -16
990; CHECK-NEXT:    bl __floatuntisf
991; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
992; CHECK-NEXT:    ret
993  %conv = uitofp i128 %in to float
994  ret float %conv
995}
996
997define double @double_from_i128(i128 %in) {
998; CHECK-LABEL: double_from_i128:
999; CHECK:       // %bb.0:
1000; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
1001; CHECK-NEXT:    .cfi_def_cfa_offset 16
1002; CHECK-NEXT:    .cfi_offset w30, -16
1003; CHECK-NEXT:    bl __floattidf
1004; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
1005; CHECK-NEXT:    ret
1006  %conv = sitofp i128 %in to double
1007  ret double %conv
1008}
1009
1010define fp128 @fp128_from_i128(i128 %in) {
1011; CHECK-LABEL: fp128_from_i128:
1012; CHECK:       // %bb.0:
1013; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
1014; CHECK-NEXT:    .cfi_def_cfa_offset 16
1015; CHECK-NEXT:    .cfi_offset w30, -16
1016; CHECK-NEXT:    bl __floatuntitf
1017; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
1018; CHECK-NEXT:    ret
1019  %conv = uitofp i128 %in to fp128
1020  ret fp128 %conv
1021}
1022
1023define i128 @i128_from_float(float %in) {
1024; CHECK-LABEL: i128_from_float:
1025; CHECK:       // %bb.0:
1026; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
1027; CHECK-NEXT:    .cfi_def_cfa_offset 16
1028; CHECK-NEXT:    .cfi_offset w30, -16
1029; CHECK-NEXT:    bl __fixsfti
1030; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
1031; CHECK-NEXT:    ret
1032  %conv = fptosi float %in to i128
1033  ret i128 %conv
1034}
1035
1036define i128 @i128_from_double(double %in) {
1037; CHECK-LABEL: i128_from_double:
1038; CHECK:       // %bb.0:
1039; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
1040; CHECK-NEXT:    .cfi_def_cfa_offset 16
1041; CHECK-NEXT:    .cfi_offset w30, -16
1042; CHECK-NEXT:    bl __fixunsdfti
1043; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
1044; CHECK-NEXT:    ret
1045  %conv = fptoui double %in to i128
1046  ret i128 %conv
1047}
1048
1049define i128 @i128_from_fp128(fp128 %in) {
1050; CHECK-LABEL: i128_from_fp128:
1051; CHECK:       // %bb.0:
1052; CHECK-NEXT:    str x30, [sp, #-16]! // 8-byte Folded Spill
1053; CHECK-NEXT:    .cfi_def_cfa_offset 16
1054; CHECK-NEXT:    .cfi_offset w30, -16
1055; CHECK-NEXT:    bl __fixtfti
1056; CHECK-NEXT:    ldr x30, [sp], #16 // 8-byte Folded Reload
1057; CHECK-NEXT:    ret
1058  %conv = fptosi fp128 %in to i128
1059  ret i128 %conv
1060}
1061
1062