xref: /llvm-project/llvm/test/CodeGen/AArch64/sve-fpext-load.ll (revision b24af43fdfa1b1242b7cb77540462212227c57c4)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s
3
4; fpext <vscale x 2 x half> -> <vscale x 2 x double>
5define <vscale x 2 x double> @ext2_f16_f64(ptr %ptr, i64 %index) {
6; CHECK-LABEL: ext2_f16_f64:
7; CHECK:       // %bb.0:
8; CHECK-NEXT:    ptrue p0.d
9; CHECK-NEXT:    ld1h { z0.d }, p0/z, [x0]
10; CHECK-NEXT:    fcvt z0.d, p0/m, z0.h
11; CHECK-NEXT:    ret
12  %load = load <vscale x 2 x half>, ptr %ptr, align 4
13  %load.ext = fpext <vscale x 2 x half> %load to <vscale x 2 x double>
14  ret <vscale x 2 x double> %load.ext
15}
16
17; fpext <vscale x 4 x half> -> <vscale x 4 x double>
18define <vscale x 4 x double> @ext4_f16_f64(ptr %ptr, i64 %index) {
19; CHECK-LABEL: ext4_f16_f64:
20; CHECK:       // %bb.0:
21; CHECK-NEXT:    ptrue p0.s
22; CHECK-NEXT:    ld1h { z0.s }, p0/z, [x0]
23; CHECK-NEXT:    ptrue p0.d
24; CHECK-NEXT:    uunpklo z1.d, z0.s
25; CHECK-NEXT:    uunpkhi z2.d, z0.s
26; CHECK-NEXT:    movprfx z0, z1
27; CHECK-NEXT:    fcvt z0.d, p0/m, z1.h
28; CHECK-NEXT:    movprfx z1, z2
29; CHECK-NEXT:    fcvt z1.d, p0/m, z2.h
30; CHECK-NEXT:    ret
31  %load = load <vscale x 4 x half>, ptr %ptr, align 4
32  %load.ext = fpext <vscale x 4 x half> %load to <vscale x 4 x double>
33  ret <vscale x 4 x double> %load.ext
34}
35
36; fpext <vscale x 8 x half> -> <vscale x 8 x double>
37define <vscale x 8 x double> @ext8_f16_f64(ptr %ptr, i64 %index) {
38; CHECK-LABEL: ext8_f16_f64:
39; CHECK:       // %bb.0:
40; CHECK-NEXT:    ptrue p0.h
41; CHECK-NEXT:    ld1h { z0.h }, p0/z, [x0]
42; CHECK-NEXT:    ptrue p0.d
43; CHECK-NEXT:    uunpklo z1.s, z0.h
44; CHECK-NEXT:    uunpkhi z0.s, z0.h
45; CHECK-NEXT:    uunpklo z2.d, z1.s
46; CHECK-NEXT:    uunpkhi z1.d, z1.s
47; CHECK-NEXT:    uunpklo z3.d, z0.s
48; CHECK-NEXT:    uunpkhi z4.d, z0.s
49; CHECK-NEXT:    fcvt z1.d, p0/m, z1.h
50; CHECK-NEXT:    movprfx z0, z2
51; CHECK-NEXT:    fcvt z0.d, p0/m, z2.h
52; CHECK-NEXT:    movprfx z2, z3
53; CHECK-NEXT:    fcvt z2.d, p0/m, z3.h
54; CHECK-NEXT:    movprfx z3, z4
55; CHECK-NEXT:    fcvt z3.d, p0/m, z4.h
56; CHECK-NEXT:    ret
57  %load = load <vscale x 8 x half>, ptr %ptr, align 4
58  %load.ext = fpext <vscale x 8 x half> %load to <vscale x 8 x double>
59  ret <vscale x 8 x double> %load.ext
60}
61
62; fpext <vscale x 2 x float> -> <vscale x 2 x double>
63define <vscale x 2 x double> @ext2_f32_f64(ptr %ptr, i64 %index) {
64; CHECK-LABEL: ext2_f32_f64:
65; CHECK:       // %bb.0:
66; CHECK-NEXT:    ptrue p0.d
67; CHECK-NEXT:    ld1w { z0.d }, p0/z, [x0]
68; CHECK-NEXT:    fcvt z0.d, p0/m, z0.s
69; CHECK-NEXT:    ret
70  %load = load <vscale x 2 x float>, ptr %ptr, align 4
71  %load.ext = fpext <vscale x 2 x float> %load to <vscale x 2 x double>
72  ret <vscale x 2 x double> %load.ext
73}
74
75; fpext <vscale x 4 x float> -> <vscale x 4 x double>
76define <vscale x 4 x double> @ext4_f32_f64(ptr %ptr, i64 %index) {
77; CHECK-LABEL: ext4_f32_f64:
78; CHECK:       // %bb.0:
79; CHECK-NEXT:    ptrue p0.s
80; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
81; CHECK-NEXT:    ptrue p0.d
82; CHECK-NEXT:    uunpklo z1.d, z0.s
83; CHECK-NEXT:    uunpkhi z2.d, z0.s
84; CHECK-NEXT:    movprfx z0, z1
85; CHECK-NEXT:    fcvt z0.d, p0/m, z1.s
86; CHECK-NEXT:    movprfx z1, z2
87; CHECK-NEXT:    fcvt z1.d, p0/m, z2.s
88; CHECK-NEXT:    ret
89  %load = load <vscale x 4 x float>, ptr %ptr, align 4
90  %load.ext = fpext <vscale x 4 x float> %load to <vscale x 4 x double>
91  ret <vscale x 4 x double> %load.ext
92}
93