xref: /llvm-project/llvm/test/CodeGen/AArch64/sme2-intrinsics-qcvtn.ll (revision 62baf21daa377c4ec1a641b26931063c1117d262)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py$
2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2,+bf16 -force-streaming -verify-machineinstrs < %s | FileCheck %s
3
4;
5; SQCVTN
6;
7
8; x2
9define <vscale x 8 x i16 > @multi_vector_qcvtn_x2_s16_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2) {
10; CHECK-LABEL: multi_vector_qcvtn_x2_s16_s32:
11; CHECK:       // %bb.0:
12; CHECK-NEXT:    mov z3.d, z2.d
13; CHECK-NEXT:    mov z2.d, z1.d
14; CHECK-NEXT:    sqcvtn z0.h, { z2.s, z3.s }
15; CHECK-NEXT:    ret
16  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.sqcvtn.x2.nxv4i32(<vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2)
17  ret <vscale x 8 x i16> %res
18}
19
20; x4
21define <vscale x 16 x i8 > @multi_vector_qcvtn_x4_s8_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4) {
22; CHECK-LABEL: multi_vector_qcvtn_x4_s8_s32:
23; CHECK:       // %bb.0:
24; CHECK-NEXT:    mov z7.d, z4.d
25; CHECK-NEXT:    mov z6.d, z3.d
26; CHECK-NEXT:    mov z5.d, z2.d
27; CHECK-NEXT:    mov z4.d, z1.d
28; CHECK-NEXT:    sqcvtn z0.b, { z4.s - z7.s }
29; CHECK-NEXT:    ret
30  %res = call <vscale x 16 x i8> @llvm.aarch64.sve.sqcvtn.x4.nxv4i32(<vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4)
31  ret <vscale x 16 x i8> %res
32}
33
34define <vscale x 8 x i16> @multi_vector_qcvtn_x4_s16_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, <vscale x 2 x i64> %zn4) {
35; CHECK-LABEL: multi_vector_qcvtn_x4_s16_s64:
36; CHECK:       // %bb.0:
37; CHECK-NEXT:    mov z7.d, z4.d
38; CHECK-NEXT:    mov z6.d, z3.d
39; CHECK-NEXT:    mov z5.d, z2.d
40; CHECK-NEXT:    mov z4.d, z1.d
41; CHECK-NEXT:    sqcvtn z0.h, { z4.d - z7.d }
42; CHECK-NEXT:    ret
43  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.sqcvtn.x4.nxv2i64(<vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, <vscale x 2 x i64> %zn4)
44  ret <vscale x 8 x i16> %res
45}
46
47;
48; UQCVTN
49;
50
51; x2
52define <vscale x 8 x i16> @multi_vector_qcvtn_x2_u16_u32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1) {
53; CHECK-LABEL: multi_vector_qcvtn_x2_u16_u32:
54; CHECK:       // %bb.0:
55; CHECK-NEXT:    mov z3.d, z2.d
56; CHECK-NEXT:    mov z2.d, z1.d
57; CHECK-NEXT:    uqcvtn z0.h, { z2.s, z3.s }
58; CHECK-NEXT:    ret
59  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.uqcvtn.x2.nxv4i32(<vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1)
60  ret<vscale x 8 x i16> %res
61}
62
63; x4
64define <vscale x 16 x i8> @multi_vector_qcvtn_x4_u8_u32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4) {
65; CHECK-LABEL: multi_vector_qcvtn_x4_u8_u32:
66; CHECK:       // %bb.0:
67; CHECK-NEXT:    mov z7.d, z4.d
68; CHECK-NEXT:    mov z6.d, z3.d
69; CHECK-NEXT:    mov z5.d, z2.d
70; CHECK-NEXT:    mov z4.d, z1.d
71; CHECK-NEXT:    uqcvtn z0.b, { z4.s - z7.s }
72; CHECK-NEXT:    ret
73  %res = call <vscale x 16 x i8> @llvm.aarch64.sve.uqcvtn.x4.nxv4i32(<vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4)
74  ret <vscale x 16 x i8> %res
75}
76
77define <vscale x 8 x i16> @multi_vector_qcvtn_x4_u16_u64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, <vscale x 2 x i64> %zn4) {
78; CHECK-LABEL: multi_vector_qcvtn_x4_u16_u64:
79; CHECK:       // %bb.0:
80; CHECK-NEXT:    mov z7.d, z4.d
81; CHECK-NEXT:    mov z6.d, z3.d
82; CHECK-NEXT:    mov z5.d, z2.d
83; CHECK-NEXT:    mov z4.d, z1.d
84; CHECK-NEXT:    uqcvtn z0.h, { z4.d - z7.d }
85; CHECK-NEXT:    ret
86  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.uqcvtn.x4.nxv2i64(<vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, <vscale x 2 x i64> %zn4)
87  ret <vscale x 8 x i16> %res
88}
89
90;
91; SQCVTUN
92;
93
94; x2
95define <vscale x 8 x i16 > @multi_vector_qcvtn_x2_s16_u32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2) {
96; CHECK-LABEL: multi_vector_qcvtn_x2_s16_u32:
97; CHECK:       // %bb.0:
98; CHECK-NEXT:    mov z3.d, z2.d
99; CHECK-NEXT:    mov z2.d, z1.d
100; CHECK-NEXT:    sqcvtun z0.h, { z2.s, z3.s }
101; CHECK-NEXT:    ret
102  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.sqcvtun.x2.nxv4i322(<vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2)
103  ret <vscale x 8 x i16> %res
104}
105; x4
106define <vscale x 16 x i8> @multi_vector_qcvtn_x4_u8_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4) {
107; CHECK-LABEL: multi_vector_qcvtn_x4_u8_s32:
108; CHECK:       // %bb.0:
109; CHECK-NEXT:    mov z7.d, z4.d
110; CHECK-NEXT:    mov z6.d, z3.d
111; CHECK-NEXT:    mov z5.d, z2.d
112; CHECK-NEXT:    mov z4.d, z1.d
113; CHECK-NEXT:    sqcvtun z0.b, { z4.s - z7.s }
114; CHECK-NEXT:    ret
115  %res = call <vscale x 16 x i8> @llvm.aarch64.sve.sqcvtun.x4.nxv4i32(<vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4)
116  ret <vscale x 16 x i8> %res
117}
118
119define <vscale x 8 x i16> @multi_vector_qcvtn_x4_u16_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, <vscale x 2 x i64> %zn4) {
120; CHECK-LABEL: multi_vector_qcvtn_x4_u16_s64:
121; CHECK:       // %bb.0:
122; CHECK-NEXT:    mov z7.d, z4.d
123; CHECK-NEXT:    mov z6.d, z3.d
124; CHECK-NEXT:    mov z5.d, z2.d
125; CHECK-NEXT:    mov z4.d, z1.d
126; CHECK-NEXT:    sqcvtun z0.h, { z4.d - z7.d }
127; CHECK-NEXT:    ret
128  %res = call <vscale x 8 x i16> @llvm.aarch64.sve.sqcvtun.x4.nxv2i64(<vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, <vscale x 2 x i64> %zn4)
129  ret <vscale x 8 x i16> %res
130}
131
132declare <vscale x 8 x i16> @llvm.aarch64.sve.uqcvtn.x2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
133declare <vscale x 8 x i16> @llvm.aarch64.sve.sqcvtn.x2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>)
134declare <vscale x 8 x i16> @llvm.aarch64.sve.sqcvtun.x2.nxv4i322(<vscale x 4 x i32>, <vscale x 4 x i32>)
135declare <vscale x 16 x i8> @llvm.aarch64.sve.sqcvtn.x4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
136declare <vscale x 8 x i16> @llvm.aarch64.sve.sqcvtn.x4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
137declare <vscale x 16 x i8> @llvm.aarch64.sve.uqcvtn.x4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
138declare <vscale x 8 x i16> @llvm.aarch64.sve.uqcvtn.x4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
139declare <vscale x 16 x i8> @llvm.aarch64.sve.sqcvtun.x4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
140declare <vscale x 8 x i16> @llvm.aarch64.sve.sqcvtun.x4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
141