1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py$ 2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2,+bf16 -force-streaming -verify-machineinstrs < %s | FileCheck %s 3 4; 5; SQCVTN 6; 7 8; x2 9define <vscale x 8 x i16 > @multi_vector_qcvtn_x2_s16_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2) { 10; CHECK-LABEL: multi_vector_qcvtn_x2_s16_s32: 11; CHECK: // %bb.0: 12; CHECK-NEXT: mov z3.d, z2.d 13; CHECK-NEXT: mov z2.d, z1.d 14; CHECK-NEXT: sqcvtn z0.h, { z2.s, z3.s } 15; CHECK-NEXT: ret 16 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.sqcvtn.x2.nxv4i32(<vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2) 17 ret <vscale x 8 x i16> %res 18} 19 20; x4 21define <vscale x 16 x i8 > @multi_vector_qcvtn_x4_s8_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4) { 22; CHECK-LABEL: multi_vector_qcvtn_x4_s8_s32: 23; CHECK: // %bb.0: 24; CHECK-NEXT: mov z7.d, z4.d 25; CHECK-NEXT: mov z6.d, z3.d 26; CHECK-NEXT: mov z5.d, z2.d 27; CHECK-NEXT: mov z4.d, z1.d 28; CHECK-NEXT: sqcvtn z0.b, { z4.s - z7.s } 29; CHECK-NEXT: ret 30 %res = call <vscale x 16 x i8> @llvm.aarch64.sve.sqcvtn.x4.nxv4i32(<vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4) 31 ret <vscale x 16 x i8> %res 32} 33 34define <vscale x 8 x i16> @multi_vector_qcvtn_x4_s16_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, <vscale x 2 x i64> %zn4) { 35; CHECK-LABEL: multi_vector_qcvtn_x4_s16_s64: 36; CHECK: // %bb.0: 37; CHECK-NEXT: mov z7.d, z4.d 38; CHECK-NEXT: mov z6.d, z3.d 39; CHECK-NEXT: mov z5.d, z2.d 40; CHECK-NEXT: mov z4.d, z1.d 41; CHECK-NEXT: sqcvtn z0.h, { z4.d - z7.d } 42; CHECK-NEXT: ret 43 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.sqcvtn.x4.nxv2i64(<vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, <vscale x 2 x i64> %zn4) 44 ret <vscale x 8 x i16> %res 45} 46 47; 48; UQCVTN 49; 50 51; x2 52define <vscale x 8 x i16> @multi_vector_qcvtn_x2_u16_u32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1) { 53; CHECK-LABEL: multi_vector_qcvtn_x2_u16_u32: 54; CHECK: // %bb.0: 55; CHECK-NEXT: mov z3.d, z2.d 56; CHECK-NEXT: mov z2.d, z1.d 57; CHECK-NEXT: uqcvtn z0.h, { z2.s, z3.s } 58; CHECK-NEXT: ret 59 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.uqcvtn.x2.nxv4i32(<vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1) 60 ret<vscale x 8 x i16> %res 61} 62 63; x4 64define <vscale x 16 x i8> @multi_vector_qcvtn_x4_u8_u32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4) { 65; CHECK-LABEL: multi_vector_qcvtn_x4_u8_u32: 66; CHECK: // %bb.0: 67; CHECK-NEXT: mov z7.d, z4.d 68; CHECK-NEXT: mov z6.d, z3.d 69; CHECK-NEXT: mov z5.d, z2.d 70; CHECK-NEXT: mov z4.d, z1.d 71; CHECK-NEXT: uqcvtn z0.b, { z4.s - z7.s } 72; CHECK-NEXT: ret 73 %res = call <vscale x 16 x i8> @llvm.aarch64.sve.uqcvtn.x4.nxv4i32(<vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4) 74 ret <vscale x 16 x i8> %res 75} 76 77define <vscale x 8 x i16> @multi_vector_qcvtn_x4_u16_u64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, <vscale x 2 x i64> %zn4) { 78; CHECK-LABEL: multi_vector_qcvtn_x4_u16_u64: 79; CHECK: // %bb.0: 80; CHECK-NEXT: mov z7.d, z4.d 81; CHECK-NEXT: mov z6.d, z3.d 82; CHECK-NEXT: mov z5.d, z2.d 83; CHECK-NEXT: mov z4.d, z1.d 84; CHECK-NEXT: uqcvtn z0.h, { z4.d - z7.d } 85; CHECK-NEXT: ret 86 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.uqcvtn.x4.nxv2i64(<vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, <vscale x 2 x i64> %zn4) 87 ret <vscale x 8 x i16> %res 88} 89 90; 91; SQCVTUN 92; 93 94; x2 95define <vscale x 8 x i16 > @multi_vector_qcvtn_x2_s16_u32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2) { 96; CHECK-LABEL: multi_vector_qcvtn_x2_s16_u32: 97; CHECK: // %bb.0: 98; CHECK-NEXT: mov z3.d, z2.d 99; CHECK-NEXT: mov z2.d, z1.d 100; CHECK-NEXT: sqcvtun z0.h, { z2.s, z3.s } 101; CHECK-NEXT: ret 102 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.sqcvtun.x2.nxv4i322(<vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2) 103 ret <vscale x 8 x i16> %res 104} 105; x4 106define <vscale x 16 x i8> @multi_vector_qcvtn_x4_u8_s32(<vscale x 4 x i32> %unused, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4) { 107; CHECK-LABEL: multi_vector_qcvtn_x4_u8_s32: 108; CHECK: // %bb.0: 109; CHECK-NEXT: mov z7.d, z4.d 110; CHECK-NEXT: mov z6.d, z3.d 111; CHECK-NEXT: mov z5.d, z2.d 112; CHECK-NEXT: mov z4.d, z1.d 113; CHECK-NEXT: sqcvtun z0.b, { z4.s - z7.s } 114; CHECK-NEXT: ret 115 %res = call <vscale x 16 x i8> @llvm.aarch64.sve.sqcvtun.x4.nxv4i32(<vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3, <vscale x 4 x i32> %zn4) 116 ret <vscale x 16 x i8> %res 117} 118 119define <vscale x 8 x i16> @multi_vector_qcvtn_x4_u16_s64(<vscale x 2 x i64> %unused, <vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, <vscale x 2 x i64> %zn4) { 120; CHECK-LABEL: multi_vector_qcvtn_x4_u16_s64: 121; CHECK: // %bb.0: 122; CHECK-NEXT: mov z7.d, z4.d 123; CHECK-NEXT: mov z6.d, z3.d 124; CHECK-NEXT: mov z5.d, z2.d 125; CHECK-NEXT: mov z4.d, z1.d 126; CHECK-NEXT: sqcvtun z0.h, { z4.d - z7.d } 127; CHECK-NEXT: ret 128 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.sqcvtun.x4.nxv2i64(<vscale x 2 x i64> %zn1, <vscale x 2 x i64> %zn2, <vscale x 2 x i64> %zn3, <vscale x 2 x i64> %zn4) 129 ret <vscale x 8 x i16> %res 130} 131 132declare <vscale x 8 x i16> @llvm.aarch64.sve.uqcvtn.x2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>) 133declare <vscale x 8 x i16> @llvm.aarch64.sve.sqcvtn.x2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>) 134declare <vscale x 8 x i16> @llvm.aarch64.sve.sqcvtun.x2.nxv4i322(<vscale x 4 x i32>, <vscale x 4 x i32>) 135declare <vscale x 16 x i8> @llvm.aarch64.sve.sqcvtn.x4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>) 136declare <vscale x 8 x i16> @llvm.aarch64.sve.sqcvtn.x4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>) 137declare <vscale x 16 x i8> @llvm.aarch64.sve.uqcvtn.x4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>) 138declare <vscale x 8 x i16> @llvm.aarch64.sve.uqcvtn.x4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>) 139declare <vscale x 16 x i8> @llvm.aarch64.sve.sqcvtun.x4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>) 140declare <vscale x 8 x i16> @llvm.aarch64.sve.sqcvtun.x4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>) 141