1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2 -force-streaming -verify-machineinstrs < %s | FileCheck %s 3 4; SRSHL (Single, x2) 5 6define { <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_rounding_shl_single_x2_s8(<vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm) { 7; CHECK-LABEL: multi_vec_rounding_shl_single_x2_s8: 8; CHECK: // %bb.0: 9; CHECK-NEXT: mov z5.d, z2.d 10; CHECK-NEXT: mov z4.d, z1.d 11; CHECK-NEXT: srshl { z4.b, z5.b }, { z4.b, z5.b }, z3.b 12; CHECK-NEXT: mov z0.d, z4.d 13; CHECK-NEXT: mov z1.d, z5.d 14; CHECK-NEXT: ret 15 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.srshl.single.x2.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm) 16 ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %res 17} 18 19define { <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_rounding_shl_single_x2_s16(<vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm) { 20; CHECK-LABEL: multi_vec_rounding_shl_single_x2_s16: 21; CHECK: // %bb.0: 22; CHECK-NEXT: mov z5.d, z2.d 23; CHECK-NEXT: mov z4.d, z1.d 24; CHECK-NEXT: srshl { z4.h, z5.h }, { z4.h, z5.h }, z3.h 25; CHECK-NEXT: mov z0.d, z4.d 26; CHECK-NEXT: mov z1.d, z5.d 27; CHECK-NEXT: ret 28 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.srshl.single.x2.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm) 29 ret { <vscale x 8 x i16>, <vscale x 8 x i16> } %res 30} 31 32define { <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_rounding_shl_single_x2_s32(<vscale x 4 x i32> %dummy, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm) { 33; CHECK-LABEL: multi_vec_rounding_shl_single_x2_s32: 34; CHECK: // %bb.0: 35; CHECK-NEXT: mov z5.d, z2.d 36; CHECK-NEXT: mov z4.d, z1.d 37; CHECK-NEXT: srshl { z4.s, z5.s }, { z4.s, z5.s }, z3.s 38; CHECK-NEXT: mov z0.d, z4.d 39; CHECK-NEXT: mov z1.d, z5.d 40; CHECK-NEXT: ret 41 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.srshl.single.x2.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm) 42 ret { <vscale x 4 x i32>, <vscale x 4 x i32> } %res 43} 44 45define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_rounding_shl_single_x2_s64(<vscale x 2 x i64> %dummy, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm) { 46; CHECK-LABEL: multi_vec_rounding_shl_single_x2_s64: 47; CHECK: // %bb.0: 48; CHECK-NEXT: mov z5.d, z2.d 49; CHECK-NEXT: mov z4.d, z1.d 50; CHECK-NEXT: srshl { z4.d, z5.d }, { z4.d, z5.d }, z3.d 51; CHECK-NEXT: mov z0.d, z4.d 52; CHECK-NEXT: mov z1.d, z5.d 53; CHECK-NEXT: ret 54 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.srshl.single.x2.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm) 55 ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %res 56} 57 58; SRSHL (Single, x4) 59 60define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_rounding_shl_single_x4_s8(<vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, <vscale x 16 x i8> %zm) { 61; CHECK-LABEL: multi_vec_rounding_shl_single_x4_s8: 62; CHECK: // %bb.0: 63; CHECK-NEXT: mov z27.d, z4.d 64; CHECK-NEXT: mov z26.d, z3.d 65; CHECK-NEXT: mov z25.d, z2.d 66; CHECK-NEXT: mov z24.d, z1.d 67; CHECK-NEXT: srshl { z24.b - z27.b }, { z24.b - z27.b }, z5.b 68; CHECK-NEXT: mov z0.d, z24.d 69; CHECK-NEXT: mov z1.d, z25.d 70; CHECK-NEXT: mov z2.d, z26.d 71; CHECK-NEXT: mov z3.d, z27.d 72; CHECK-NEXT: ret 73 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } 74 @llvm.aarch64.sve.srshl.single.x4.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, <vscale x 16 x i8> %zm) 75 ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res 76} 77 78define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_rounding_shl_single_x4_s16(<vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, <vscale x 8 x i16> %zm) { 79; CHECK-LABEL: multi_vec_rounding_shl_single_x4_s16: 80; CHECK: // %bb.0: 81; CHECK-NEXT: mov z27.d, z4.d 82; CHECK-NEXT: mov z26.d, z3.d 83; CHECK-NEXT: mov z25.d, z2.d 84; CHECK-NEXT: mov z24.d, z1.d 85; CHECK-NEXT: srshl { z24.h - z27.h }, { z24.h - z27.h }, z5.h 86; CHECK-NEXT: mov z0.d, z24.d 87; CHECK-NEXT: mov z1.d, z25.d 88; CHECK-NEXT: mov z2.d, z26.d 89; CHECK-NEXT: mov z3.d, z27.d 90; CHECK-NEXT: ret 91 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } 92 @llvm.aarch64.sve.srshl.single.x4.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, <vscale x 8 x i16> %zm) 93 ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res 94} 95 96define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_rounding_shl_single_x4_s32(<vscale x 4 x i32> %dummy, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, <vscale x 4 x i32> %zm) { 97; CHECK-LABEL: multi_vec_rounding_shl_single_x4_s32: 98; CHECK: // %bb.0: 99; CHECK-NEXT: mov z27.d, z4.d 100; CHECK-NEXT: mov z26.d, z3.d 101; CHECK-NEXT: mov z25.d, z2.d 102; CHECK-NEXT: mov z24.d, z1.d 103; CHECK-NEXT: srshl { z24.s - z27.s }, { z24.s - z27.s }, z5.s 104; CHECK-NEXT: mov z0.d, z24.d 105; CHECK-NEXT: mov z1.d, z25.d 106; CHECK-NEXT: mov z2.d, z26.d 107; CHECK-NEXT: mov z3.d, z27.d 108; CHECK-NEXT: ret 109 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } 110 @llvm.aarch64.sve.srshl.single.x4.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, <vscale x 4 x i32> %zm) 111 ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res 112} 113 114define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_rounding_shl_single_x4_s64(<vscale x 2 x i64> %dummy, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, <vscale x 2 x i64> %zm) { 115; CHECK-LABEL: multi_vec_rounding_shl_single_x4_s64: 116; CHECK: // %bb.0: 117; CHECK-NEXT: mov z27.d, z4.d 118; CHECK-NEXT: mov z26.d, z3.d 119; CHECK-NEXT: mov z25.d, z2.d 120; CHECK-NEXT: mov z24.d, z1.d 121; CHECK-NEXT: srshl { z24.d - z27.d }, { z24.d - z27.d }, z5.d 122; CHECK-NEXT: mov z0.d, z24.d 123; CHECK-NEXT: mov z1.d, z25.d 124; CHECK-NEXT: mov z2.d, z26.d 125; CHECK-NEXT: mov z3.d, z27.d 126; CHECK-NEXT: ret 127 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } 128 @llvm.aarch64.sve.srshl.single.x4.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, <vscale x 2 x i64> %zm) 129 ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res 130} 131 132; URSHL (Single, x2) 133 134define { <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_rounding_shl_single_x2_u8(<vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm) { 135; CHECK-LABEL: multi_vec_rounding_shl_single_x2_u8: 136; CHECK: // %bb.0: 137; CHECK-NEXT: mov z5.d, z2.d 138; CHECK-NEXT: mov z4.d, z1.d 139; CHECK-NEXT: urshl { z4.b, z5.b }, { z4.b, z5.b }, z3.b 140; CHECK-NEXT: mov z0.d, z4.d 141; CHECK-NEXT: mov z1.d, z5.d 142; CHECK-NEXT: ret 143 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.urshl.single.x2.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm) 144 ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %res 145} 146 147define { <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_rounding_shl_single_x2_u16(<vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm) { 148; CHECK-LABEL: multi_vec_rounding_shl_single_x2_u16: 149; CHECK: // %bb.0: 150; CHECK-NEXT: mov z5.d, z2.d 151; CHECK-NEXT: mov z4.d, z1.d 152; CHECK-NEXT: urshl { z4.h, z5.h }, { z4.h, z5.h }, z3.h 153; CHECK-NEXT: mov z0.d, z4.d 154; CHECK-NEXT: mov z1.d, z5.d 155; CHECK-NEXT: ret 156 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.urshl.single.x2.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm) 157 ret { <vscale x 8 x i16>, <vscale x 8 x i16> } %res 158} 159 160define { <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_rounding_shl_single_x2_u32(<vscale x 4 x i32> %dummy, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm) { 161; CHECK-LABEL: multi_vec_rounding_shl_single_x2_u32: 162; CHECK: // %bb.0: 163; CHECK-NEXT: mov z5.d, z2.d 164; CHECK-NEXT: mov z4.d, z1.d 165; CHECK-NEXT: urshl { z4.s, z5.s }, { z4.s, z5.s }, z3.s 166; CHECK-NEXT: mov z0.d, z4.d 167; CHECK-NEXT: mov z1.d, z5.d 168; CHECK-NEXT: ret 169 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.urshl.single.x2.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm) 170 ret { <vscale x 4 x i32>, <vscale x 4 x i32> } %res 171} 172 173define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_rounding_shl_single_x2_u64(<vscale x 2 x i64> %dummy, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm) { 174; CHECK-LABEL: multi_vec_rounding_shl_single_x2_u64: 175; CHECK: // %bb.0: 176; CHECK-NEXT: mov z5.d, z2.d 177; CHECK-NEXT: mov z4.d, z1.d 178; CHECK-NEXT: urshl { z4.d, z5.d }, { z4.d, z5.d }, z3.d 179; CHECK-NEXT: mov z0.d, z4.d 180; CHECK-NEXT: mov z1.d, z5.d 181; CHECK-NEXT: ret 182 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.urshl.single.x2.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm) 183 ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %res 184} 185 186; URSHL (Single, x4) 187 188define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_rounding_shl_single_x4_u8(<vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, <vscale x 16 x i8> %zm) { 189; CHECK-LABEL: multi_vec_rounding_shl_single_x4_u8: 190; CHECK: // %bb.0: 191; CHECK-NEXT: mov z27.d, z4.d 192; CHECK-NEXT: mov z26.d, z3.d 193; CHECK-NEXT: mov z25.d, z2.d 194; CHECK-NEXT: mov z24.d, z1.d 195; CHECK-NEXT: urshl { z24.b - z27.b }, { z24.b - z27.b }, z5.b 196; CHECK-NEXT: mov z0.d, z24.d 197; CHECK-NEXT: mov z1.d, z25.d 198; CHECK-NEXT: mov z2.d, z26.d 199; CHECK-NEXT: mov z3.d, z27.d 200; CHECK-NEXT: ret 201 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } 202 @llvm.aarch64.sve.urshl.single.x4.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, <vscale x 16 x i8> %zm) 203 ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res 204} 205 206define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_rounding_shl_single_x4_u16(<vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, <vscale x 8 x i16> %zm) { 207; CHECK-LABEL: multi_vec_rounding_shl_single_x4_u16: 208; CHECK: // %bb.0: 209; CHECK-NEXT: mov z27.d, z4.d 210; CHECK-NEXT: mov z26.d, z3.d 211; CHECK-NEXT: mov z25.d, z2.d 212; CHECK-NEXT: mov z24.d, z1.d 213; CHECK-NEXT: urshl { z24.h - z27.h }, { z24.h - z27.h }, z5.h 214; CHECK-NEXT: mov z0.d, z24.d 215; CHECK-NEXT: mov z1.d, z25.d 216; CHECK-NEXT: mov z2.d, z26.d 217; CHECK-NEXT: mov z3.d, z27.d 218; CHECK-NEXT: ret 219 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } 220 @llvm.aarch64.sve.urshl.single.x4.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, <vscale x 8 x i16> %zm) 221 ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res 222} 223 224define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_rounding_shl_single_x4_u32(<vscale x 4 x i32> %dummy, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, <vscale x 4 x i32> %zm) { 225; CHECK-LABEL: multi_vec_rounding_shl_single_x4_u32: 226; CHECK: // %bb.0: 227; CHECK-NEXT: mov z27.d, z4.d 228; CHECK-NEXT: mov z26.d, z3.d 229; CHECK-NEXT: mov z25.d, z2.d 230; CHECK-NEXT: mov z24.d, z1.d 231; CHECK-NEXT: urshl { z24.s - z27.s }, { z24.s - z27.s }, z5.s 232; CHECK-NEXT: mov z0.d, z24.d 233; CHECK-NEXT: mov z1.d, z25.d 234; CHECK-NEXT: mov z2.d, z26.d 235; CHECK-NEXT: mov z3.d, z27.d 236; CHECK-NEXT: ret 237 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } 238 @llvm.aarch64.sve.urshl.single.x4.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, <vscale x 4 x i32> %zm) 239 ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res 240} 241 242define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_rounding_shl_single_x4_u64(<vscale x 2 x i64> %dummy, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, <vscale x 2 x i64> %zm) { 243; CHECK-LABEL: multi_vec_rounding_shl_single_x4_u64: 244; CHECK: // %bb.0: 245; CHECK-NEXT: mov z27.d, z4.d 246; CHECK-NEXT: mov z26.d, z3.d 247; CHECK-NEXT: mov z25.d, z2.d 248; CHECK-NEXT: mov z24.d, z1.d 249; CHECK-NEXT: urshl { z24.d - z27.d }, { z24.d - z27.d }, z5.d 250; CHECK-NEXT: mov z0.d, z24.d 251; CHECK-NEXT: mov z1.d, z25.d 252; CHECK-NEXT: mov z2.d, z26.d 253; CHECK-NEXT: mov z3.d, z27.d 254; CHECK-NEXT: ret 255 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } 256 @llvm.aarch64.sve.urshl.single.x4.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, <vscale x 2 x i64> %zm) 257 ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res 258} 259 260; SRSHL (Multi, x2) 261 262define { <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_rounding_shl_x2_s8(<vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2) { 263; CHECK-LABEL: multi_vec_rounding_shl_x2_s8: 264; CHECK: // %bb.0: 265; CHECK-NEXT: mov z7.d, z4.d 266; CHECK-NEXT: mov z5.d, z2.d 267; CHECK-NEXT: mov z6.d, z3.d 268; CHECK-NEXT: mov z4.d, z1.d 269; CHECK-NEXT: srshl { z4.b, z5.b }, { z4.b, z5.b }, { z6.b, z7.b } 270; CHECK-NEXT: mov z0.d, z4.d 271; CHECK-NEXT: mov z1.d, z5.d 272; CHECK-NEXT: ret 273 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.srshl.x2.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2) 274 ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %res 275} 276 277define { <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_rounding_shl_x2_s16(<vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) { 278; CHECK-LABEL: multi_vec_rounding_shl_x2_s16: 279; CHECK: // %bb.0: 280; CHECK-NEXT: mov z7.d, z4.d 281; CHECK-NEXT: mov z5.d, z2.d 282; CHECK-NEXT: mov z6.d, z3.d 283; CHECK-NEXT: mov z4.d, z1.d 284; CHECK-NEXT: srshl { z4.h, z5.h }, { z4.h, z5.h }, { z6.h, z7.h } 285; CHECK-NEXT: mov z0.d, z4.d 286; CHECK-NEXT: mov z1.d, z5.d 287; CHECK-NEXT: ret 288 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.srshl.x2.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) 289 ret { <vscale x 8 x i16>, <vscale x 8 x i16> } %res 290} 291 292define { <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_rounding_shl_x2_s32(<vscale x 4 x i32> %dummy, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2) { 293; CHECK-LABEL: multi_vec_rounding_shl_x2_s32: 294; CHECK: // %bb.0: 295; CHECK-NEXT: mov z7.d, z4.d 296; CHECK-NEXT: mov z5.d, z2.d 297; CHECK-NEXT: mov z6.d, z3.d 298; CHECK-NEXT: mov z4.d, z1.d 299; CHECK-NEXT: srshl { z4.s, z5.s }, { z4.s, z5.s }, { z6.s, z7.s } 300; CHECK-NEXT: mov z0.d, z4.d 301; CHECK-NEXT: mov z1.d, z5.d 302; CHECK-NEXT: ret 303 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.srshl.x2.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2) 304 ret { <vscale x 4 x i32>, <vscale x 4 x i32> } %res 305} 306 307define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_rounding_shl_x2_s64(<vscale x 2 x i64> %dummy, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2) { 308; CHECK-LABEL: multi_vec_rounding_shl_x2_s64: 309; CHECK: // %bb.0: 310; CHECK-NEXT: mov z7.d, z4.d 311; CHECK-NEXT: mov z5.d, z2.d 312; CHECK-NEXT: mov z6.d, z3.d 313; CHECK-NEXT: mov z4.d, z1.d 314; CHECK-NEXT: srshl { z4.d, z5.d }, { z4.d, z5.d }, { z6.d, z7.d } 315; CHECK-NEXT: mov z0.d, z4.d 316; CHECK-NEXT: mov z1.d, z5.d 317; CHECK-NEXT: ret 318 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.srshl.x2.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2) 319 ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %res 320} 321 322; SRSHL (Multi, x4) 323 324define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_rounding_shl_x4_s8(<vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) { 325; CHECK-LABEL: multi_vec_rounding_shl_x4_s8: 326; CHECK: // %bb.0: 327; CHECK-NEXT: mov z30.d, z7.d 328; CHECK-NEXT: mov z27.d, z4.d 329; CHECK-NEXT: ptrue p0.b 330; CHECK-NEXT: mov z29.d, z6.d 331; CHECK-NEXT: mov z26.d, z3.d 332; CHECK-NEXT: mov z28.d, z5.d 333; CHECK-NEXT: mov z25.d, z2.d 334; CHECK-NEXT: ld1b { z31.b }, p0/z, [x0] 335; CHECK-NEXT: mov z24.d, z1.d 336; CHECK-NEXT: srshl { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b } 337; CHECK-NEXT: mov z0.d, z24.d 338; CHECK-NEXT: mov z1.d, z25.d 339; CHECK-NEXT: mov z2.d, z26.d 340; CHECK-NEXT: mov z3.d, z27.d 341; CHECK-NEXT: ret 342 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } 343 @llvm.aarch64.sve.srshl.x4.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, 344 <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) 345 ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res 346} 347 348define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_rounding_shl_x4_s16(<vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) { 349; CHECK-LABEL: multi_vec_rounding_shl_x4_s16: 350; CHECK: // %bb.0: 351; CHECK-NEXT: mov z30.d, z7.d 352; CHECK-NEXT: mov z27.d, z4.d 353; CHECK-NEXT: ptrue p0.h 354; CHECK-NEXT: mov z29.d, z6.d 355; CHECK-NEXT: mov z26.d, z3.d 356; CHECK-NEXT: mov z28.d, z5.d 357; CHECK-NEXT: mov z25.d, z2.d 358; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] 359; CHECK-NEXT: mov z24.d, z1.d 360; CHECK-NEXT: srshl { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } 361; CHECK-NEXT: mov z0.d, z24.d 362; CHECK-NEXT: mov z1.d, z25.d 363; CHECK-NEXT: mov z2.d, z26.d 364; CHECK-NEXT: mov z3.d, z27.d 365; CHECK-NEXT: ret 366 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } 367 @llvm.aarch64.sve.srshl.x4.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, 368 <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) 369 ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res 370} 371 372define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_rounding_shl_x4_s32(<vscale x 4 x i32> %dummy, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) { 373; CHECK-LABEL: multi_vec_rounding_shl_x4_s32: 374; CHECK: // %bb.0: 375; CHECK-NEXT: mov z30.d, z7.d 376; CHECK-NEXT: mov z27.d, z4.d 377; CHECK-NEXT: ptrue p0.s 378; CHECK-NEXT: mov z29.d, z6.d 379; CHECK-NEXT: mov z26.d, z3.d 380; CHECK-NEXT: mov z28.d, z5.d 381; CHECK-NEXT: mov z25.d, z2.d 382; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] 383; CHECK-NEXT: mov z24.d, z1.d 384; CHECK-NEXT: srshl { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } 385; CHECK-NEXT: mov z0.d, z24.d 386; CHECK-NEXT: mov z1.d, z25.d 387; CHECK-NEXT: mov z2.d, z26.d 388; CHECK-NEXT: mov z3.d, z27.d 389; CHECK-NEXT: ret 390 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } 391 @llvm.aarch64.sve.srshl.x4.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, 392 <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) 393 ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res 394} 395 396define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_rounding_shl_x4_s64(<vscale x 2 x i64> %dummy, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) { 397; CHECK-LABEL: multi_vec_rounding_shl_x4_s64: 398; CHECK: // %bb.0: 399; CHECK-NEXT: mov z30.d, z7.d 400; CHECK-NEXT: mov z27.d, z4.d 401; CHECK-NEXT: ptrue p0.d 402; CHECK-NEXT: mov z29.d, z6.d 403; CHECK-NEXT: mov z26.d, z3.d 404; CHECK-NEXT: mov z28.d, z5.d 405; CHECK-NEXT: mov z25.d, z2.d 406; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] 407; CHECK-NEXT: mov z24.d, z1.d 408; CHECK-NEXT: srshl { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } 409; CHECK-NEXT: mov z0.d, z24.d 410; CHECK-NEXT: mov z1.d, z25.d 411; CHECK-NEXT: mov z2.d, z26.d 412; CHECK-NEXT: mov z3.d, z27.d 413; CHECK-NEXT: ret 414 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } 415 @llvm.aarch64.sve.srshl.x4.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, 416 <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) 417 ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res 418} 419 420; URSHL (Multi, x2) 421 422define { <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_rounding_uhl_x2_u8(<vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2) { 423; CHECK-LABEL: multi_vec_rounding_uhl_x2_u8: 424; CHECK: // %bb.0: 425; CHECK-NEXT: mov z7.d, z4.d 426; CHECK-NEXT: mov z5.d, z2.d 427; CHECK-NEXT: mov z6.d, z3.d 428; CHECK-NEXT: mov z4.d, z1.d 429; CHECK-NEXT: urshl { z4.b, z5.b }, { z4.b, z5.b }, { z6.b, z7.b } 430; CHECK-NEXT: mov z0.d, z4.d 431; CHECK-NEXT: mov z1.d, z5.d 432; CHECK-NEXT: ret 433 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.urshl.x2.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2) 434 ret { <vscale x 16 x i8>, <vscale x 16 x i8> } %res 435} 436 437define { <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_rounding_uhl_x2_u16(<vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) { 438; CHECK-LABEL: multi_vec_rounding_uhl_x2_u16: 439; CHECK: // %bb.0: 440; CHECK-NEXT: mov z7.d, z4.d 441; CHECK-NEXT: mov z5.d, z2.d 442; CHECK-NEXT: mov z6.d, z3.d 443; CHECK-NEXT: mov z4.d, z1.d 444; CHECK-NEXT: urshl { z4.h, z5.h }, { z4.h, z5.h }, { z6.h, z7.h } 445; CHECK-NEXT: mov z0.d, z4.d 446; CHECK-NEXT: mov z1.d, z5.d 447; CHECK-NEXT: ret 448 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.urshl.x2.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2) 449 ret { <vscale x 8 x i16>, <vscale x 8 x i16> } %res 450} 451 452define { <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_rounding_uhl_x2_u32(<vscale x 4 x i32> %dummy, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2) { 453; CHECK-LABEL: multi_vec_rounding_uhl_x2_u32: 454; CHECK: // %bb.0: 455; CHECK-NEXT: mov z7.d, z4.d 456; CHECK-NEXT: mov z5.d, z2.d 457; CHECK-NEXT: mov z6.d, z3.d 458; CHECK-NEXT: mov z4.d, z1.d 459; CHECK-NEXT: urshl { z4.s, z5.s }, { z4.s, z5.s }, { z6.s, z7.s } 460; CHECK-NEXT: mov z0.d, z4.d 461; CHECK-NEXT: mov z1.d, z5.d 462; CHECK-NEXT: ret 463 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.urshl.x2.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2) 464 ret { <vscale x 4 x i32>, <vscale x 4 x i32> } %res 465} 466 467define { <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_rounding_uhl_x2_u64(<vscale x 2 x i64> %dummy, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2) { 468; CHECK-LABEL: multi_vec_rounding_uhl_x2_u64: 469; CHECK: // %bb.0: 470; CHECK-NEXT: mov z7.d, z4.d 471; CHECK-NEXT: mov z5.d, z2.d 472; CHECK-NEXT: mov z6.d, z3.d 473; CHECK-NEXT: mov z4.d, z1.d 474; CHECK-NEXT: urshl { z4.d, z5.d }, { z4.d, z5.d }, { z6.d, z7.d } 475; CHECK-NEXT: mov z0.d, z4.d 476; CHECK-NEXT: mov z1.d, z5.d 477; CHECK-NEXT: ret 478 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.urshl.x2.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2) 479 ret { <vscale x 2 x i64>, <vscale x 2 x i64> } %res 480} 481 482; URSHL (Multi, x4) 483 484define { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @multi_vec_rounding_shl_x4_u8(<vscale x 16 x i8> %dummy, <vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) { 485; CHECK-LABEL: multi_vec_rounding_shl_x4_u8: 486; CHECK: // %bb.0: 487; CHECK-NEXT: mov z30.d, z7.d 488; CHECK-NEXT: mov z27.d, z4.d 489; CHECK-NEXT: ptrue p0.b 490; CHECK-NEXT: mov z29.d, z6.d 491; CHECK-NEXT: mov z26.d, z3.d 492; CHECK-NEXT: mov z28.d, z5.d 493; CHECK-NEXT: mov z25.d, z2.d 494; CHECK-NEXT: ld1b { z31.b }, p0/z, [x0] 495; CHECK-NEXT: mov z24.d, z1.d 496; CHECK-NEXT: urshl { z24.b - z27.b }, { z24.b - z27.b }, { z28.b - z31.b } 497; CHECK-NEXT: mov z0.d, z24.d 498; CHECK-NEXT: mov z1.d, z25.d 499; CHECK-NEXT: mov z2.d, z26.d 500; CHECK-NEXT: mov z3.d, z27.d 501; CHECK-NEXT: ret 502 %res = call { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } 503 @llvm.aarch64.sve.urshl.x4.nxv16i8(<vscale x 16 x i8> %zdn1, <vscale x 16 x i8> %zdn2, <vscale x 16 x i8> %zdn3, <vscale x 16 x i8> %zdn4, 504 <vscale x 16 x i8> %zm1, <vscale x 16 x i8> %zm2, <vscale x 16 x i8> %zm3, <vscale x 16 x i8> %zm4) 505 ret { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } %res 506} 507 508define { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @multi_vec_rounding_shl_x4_u16(<vscale x 8 x i16> %dummy, <vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) { 509; CHECK-LABEL: multi_vec_rounding_shl_x4_u16: 510; CHECK: // %bb.0: 511; CHECK-NEXT: mov z30.d, z7.d 512; CHECK-NEXT: mov z27.d, z4.d 513; CHECK-NEXT: ptrue p0.h 514; CHECK-NEXT: mov z29.d, z6.d 515; CHECK-NEXT: mov z26.d, z3.d 516; CHECK-NEXT: mov z28.d, z5.d 517; CHECK-NEXT: mov z25.d, z2.d 518; CHECK-NEXT: ld1h { z31.h }, p0/z, [x0] 519; CHECK-NEXT: mov z24.d, z1.d 520; CHECK-NEXT: urshl { z24.h - z27.h }, { z24.h - z27.h }, { z28.h - z31.h } 521; CHECK-NEXT: mov z0.d, z24.d 522; CHECK-NEXT: mov z1.d, z25.d 523; CHECK-NEXT: mov z2.d, z26.d 524; CHECK-NEXT: mov z3.d, z27.d 525; CHECK-NEXT: ret 526 %res = call { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } 527 @llvm.aarch64.sve.urshl.x4.nxv8i16(<vscale x 8 x i16> %zdn1, <vscale x 8 x i16> %zdn2, <vscale x 8 x i16> %zdn3, <vscale x 8 x i16> %zdn4, 528 <vscale x 8 x i16> %zm1, <vscale x 8 x i16> %zm2, <vscale x 8 x i16> %zm3, <vscale x 8 x i16> %zm4) 529 ret { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } %res 530} 531 532define { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @multi_vec_rounding_shl_x4_u32(<vscale x 4 x i32> %dummy, <vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) { 533; CHECK-LABEL: multi_vec_rounding_shl_x4_u32: 534; CHECK: // %bb.0: 535; CHECK-NEXT: mov z30.d, z7.d 536; CHECK-NEXT: mov z27.d, z4.d 537; CHECK-NEXT: ptrue p0.s 538; CHECK-NEXT: mov z29.d, z6.d 539; CHECK-NEXT: mov z26.d, z3.d 540; CHECK-NEXT: mov z28.d, z5.d 541; CHECK-NEXT: mov z25.d, z2.d 542; CHECK-NEXT: ld1w { z31.s }, p0/z, [x0] 543; CHECK-NEXT: mov z24.d, z1.d 544; CHECK-NEXT: urshl { z24.s - z27.s }, { z24.s - z27.s }, { z28.s - z31.s } 545; CHECK-NEXT: mov z0.d, z24.d 546; CHECK-NEXT: mov z1.d, z25.d 547; CHECK-NEXT: mov z2.d, z26.d 548; CHECK-NEXT: mov z3.d, z27.d 549; CHECK-NEXT: ret 550 %res = call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } 551 @llvm.aarch64.sve.urshl.x4.nxv4i32(<vscale x 4 x i32> %zdn1, <vscale x 4 x i32> %zdn2, <vscale x 4 x i32> %zdn3, <vscale x 4 x i32> %zdn4, 552 <vscale x 4 x i32> %zm1, <vscale x 4 x i32> %zm2, <vscale x 4 x i32> %zm3, <vscale x 4 x i32> %zm4) 553 ret { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } %res 554} 555 556define { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @multi_vec_rounding_shl_x4_u64(<vscale x 2 x i64> %dummy, <vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) { 557; CHECK-LABEL: multi_vec_rounding_shl_x4_u64: 558; CHECK: // %bb.0: 559; CHECK-NEXT: mov z30.d, z7.d 560; CHECK-NEXT: mov z27.d, z4.d 561; CHECK-NEXT: ptrue p0.d 562; CHECK-NEXT: mov z29.d, z6.d 563; CHECK-NEXT: mov z26.d, z3.d 564; CHECK-NEXT: mov z28.d, z5.d 565; CHECK-NEXT: mov z25.d, z2.d 566; CHECK-NEXT: ld1d { z31.d }, p0/z, [x0] 567; CHECK-NEXT: mov z24.d, z1.d 568; CHECK-NEXT: urshl { z24.d - z27.d }, { z24.d - z27.d }, { z28.d - z31.d } 569; CHECK-NEXT: mov z0.d, z24.d 570; CHECK-NEXT: mov z1.d, z25.d 571; CHECK-NEXT: mov z2.d, z26.d 572; CHECK-NEXT: mov z3.d, z27.d 573; CHECK-NEXT: ret 574 %res = call { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } 575 @llvm.aarch64.sve.urshl.x4.nxv2i64(<vscale x 2 x i64> %zdn1, <vscale x 2 x i64> %zdn2, <vscale x 2 x i64> %zdn3, <vscale x 2 x i64> %zdn4, 576 <vscale x 2 x i64> %zm1, <vscale x 2 x i64> %zm2, <vscale x 2 x i64> %zm3, <vscale x 2 x i64> %zm4) 577 ret { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } %res 578} 579 580declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.srshl.single.x2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>) 581declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.srshl.single.x2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>) 582declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.srshl.single.x2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>) 583declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.srshl.single.x2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>) 584 585declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.srshl.single.x4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>) 586declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.srshl.single.x4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>) 587declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.srshl.single.x4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>) 588declare { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.srshl.single.x4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>) 589 590declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.urshl.single.x2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>) 591declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.urshl.single.x2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>) 592declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.urshl.single.x2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>) 593declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.urshl.single.x2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>) 594 595declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.urshl.single.x4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>) 596declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.urshl.single.x4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>) 597declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.urshl.single.x4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>) 598declare { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.urshl.single.x4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>) 599 600declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.srshl.x2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>) 601declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.srshl.x2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>) 602declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.srshl.x2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>) 603declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.srshl.x2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>) 604 605declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } 606 @llvm.aarch64.sve.srshl.x4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>) 607declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } 608 @llvm.aarch64.sve.srshl.x4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>) 609declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } 610 @llvm.aarch64.sve.srshl.x4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>) 611declare { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } 612 @llvm.aarch64.sve.srshl.x4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> ) 613 614declare { <vscale x 16 x i8>, <vscale x 16 x i8> } @llvm.aarch64.sve.urshl.x2.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>) 615declare { <vscale x 8 x i16>, <vscale x 8 x i16> } @llvm.aarch64.sve.urshl.x2.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>) 616declare { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.urshl.x2.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>) 617declare { <vscale x 2 x i64>, <vscale x 2 x i64> } @llvm.aarch64.sve.urshl.x2.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>) 618 619declare { <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8> } 620 @llvm.aarch64.sve.urshl.x4.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>) 621declare { <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16> } 622 @llvm.aarch64.sve.urshl.x4.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>) 623declare { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } 624 @llvm.aarch64.sve.urshl.x4.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>) 625declare { <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64> } 626 @llvm.aarch64.sve.urshl.x4.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>) 627