1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=aarch64 -mattr=+sve2 | FileCheck %s 3 4define i64 @smlsl_i64(i64 %a, i32 %b, i32 %c, i32 %d, i32 %e) { 5; CHECK-LABEL: smlsl_i64: 6; CHECK: // %bb.0: 7; CHECK-NEXT: smsubl x8, w4, w3, x0 8; CHECK-NEXT: smsubl x0, w2, w1, x8 9; CHECK-NEXT: ret 10 %be = sext i32 %b to i64 11 %ce = sext i32 %c to i64 12 %de = sext i32 %d to i64 13 %ee = sext i32 %e to i64 14 %m1.neg = mul nsw i64 %ce, %be 15 %m2.neg = mul nsw i64 %ee, %de 16 %reass.add = add i64 %m2.neg, %m1.neg 17 %s2 = sub i64 %a, %reass.add 18 ret i64 %s2 19} 20 21define i64 @umlsl_i64(i64 %a, i32 %b, i32 %c, i32 %d, i32 %e) { 22; CHECK-LABEL: umlsl_i64: 23; CHECK: // %bb.0: 24; CHECK-NEXT: umsubl x8, w4, w3, x0 25; CHECK-NEXT: umsubl x0, w2, w1, x8 26; CHECK-NEXT: ret 27 %be = zext i32 %b to i64 28 %ce = zext i32 %c to i64 29 %de = zext i32 %d to i64 30 %ee = zext i32 %e to i64 31 %m1.neg = mul nuw i64 %ce, %be 32 %m2.neg = mul nuw i64 %ee, %de 33 %reass.add = add i64 %m2.neg, %m1.neg 34 %s2 = sub i64 %a, %reass.add 35 ret i64 %s2 36} 37 38define i64 @mls_i64(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e) { 39; CHECK-LABEL: mls_i64: 40; CHECK: // %bb.0: 41; CHECK-NEXT: msub x8, x4, x3, x0 42; CHECK-NEXT: msub x0, x2, x1, x8 43; CHECK-NEXT: ret 44 %m1.neg = mul i64 %c, %b 45 %m2.neg = mul i64 %e, %d 46 %reass.add = add i64 %m2.neg, %m1.neg 47 %s2 = sub i64 %a, %reass.add 48 ret i64 %s2 49} 50 51define i16 @mls_i16(i16 %a, i16 %b, i16 %c, i16 %d, i16 %e) { 52; CHECK-LABEL: mls_i16: 53; CHECK: // %bb.0: 54; CHECK-NEXT: msub w8, w4, w3, w0 55; CHECK-NEXT: msub w0, w2, w1, w8 56; CHECK-NEXT: ret 57 %m1.neg = mul i16 %c, %b 58 %m2.neg = mul i16 %e, %d 59 %reass.add = add i16 %m2.neg, %m1.neg 60 %s2 = sub i16 %a, %reass.add 61 ret i16 %s2 62} 63 64define i64 @mla_i64(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e) { 65; CHECK-LABEL: mla_i64: 66; CHECK: // %bb.0: 67; CHECK-NEXT: mul x8, x4, x3 68; CHECK-NEXT: madd x8, x2, x1, x8 69; CHECK-NEXT: add x0, x8, x0 70; CHECK-NEXT: ret 71 %m1 = mul i64 %c, %b 72 %m2 = mul i64 %e, %d 73 %s1 = add i64 %m1, %m2 74 %s2 = add i64 %s1, %a 75 ret i64 %s2 76} 77 78define i64 @mls_i64_C(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e) { 79; CHECK-LABEL: mls_i64_C: 80; CHECK: // %bb.0: 81; CHECK-NEXT: mul x8, x2, x1 82; CHECK-NEXT: mov w9, #10 // =0xa 83; CHECK-NEXT: madd x8, x4, x3, x8 84; CHECK-NEXT: sub x0, x9, x8 85; CHECK-NEXT: ret 86 %m1.neg = mul i64 %c, %b 87 %m2.neg = mul i64 %e, %d 88 %reass.add = add i64 %m2.neg, %m1.neg 89 %s2 = sub i64 10, %reass.add 90 ret i64 %s2 91} 92 93define i64 @umlsl_i64_muls(i64 %a, i32 %b, i32 %c, i32 %d, i32 %e) { 94; CHECK-LABEL: umlsl_i64_muls: 95; CHECK: // %bb.0: 96; CHECK-NEXT: umull x8, w2, w3 97; CHECK-NEXT: umsubl x8, w4, w3, x8 98; CHECK-NEXT: umsubl x0, w2, w1, x8 99; CHECK-NEXT: ret 100 %be = zext i32 %b to i64 101 %ce = zext i32 %c to i64 102 %de = zext i32 %d to i64 103 %ee = zext i32 %e to i64 104 %m1.neg = mul nuw i64 %ce, %be 105 %m2.neg = mul nuw i64 %ee, %de 106 %m3 = mul nuw i64 %ce, %de 107 %reass.add = add i64 %m2.neg, %m1.neg 108 %s2 = sub i64 %m3, %reass.add 109 ret i64 %s2 110} 111 112define i64 @umlsl_i64_uses(i64 %a, i32 %b, i32 %c, i32 %d, i32 %e) { 113; CHECK-LABEL: umlsl_i64_uses: 114; CHECK: // %bb.0: 115; CHECK-NEXT: umull x8, w4, w3 116; CHECK-NEXT: umaddl x8, w2, w1, x8 117; CHECK-NEXT: sub x9, x0, x8 118; CHECK-NEXT: and x0, x8, x9 119; CHECK-NEXT: ret 120 %be = zext i32 %b to i64 121 %ce = zext i32 %c to i64 122 %de = zext i32 %d to i64 123 %ee = zext i32 %e to i64 124 %m1.neg = mul nuw i64 %ce, %be 125 %m2.neg = mul nuw i64 %ee, %de 126 %reass.add = add i64 %m2.neg, %m1.neg 127 %s2 = sub i64 %a, %reass.add 128 %o = and i64 %reass.add, %s2 129 ret i64 %o 130} 131 132define i64 @mla_i64_C(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e) { 133; CHECK-LABEL: mla_i64_C: 134; CHECK: // %bb.0: 135; CHECK-NEXT: mul x8, x2, x1 136; CHECK-NEXT: madd x8, x4, x3, x8 137; CHECK-NEXT: add x0, x8, #10 138; CHECK-NEXT: ret 139 %m1.neg = mul i64 %c, %b 140 %m2.neg = mul i64 %e, %d 141 %reass.add = add i64 %m2.neg, %m1.neg 142 %s2 = add i64 10, %reass.add 143 ret i64 %s2 144} 145 146define i64 @mla_i64_uses(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e) { 147; CHECK-LABEL: mla_i64_uses: 148; CHECK: // %bb.0: 149; CHECK-NEXT: mul x8, x2, x1 150; CHECK-NEXT: madd x8, x4, x3, x8 151; CHECK-NEXT: add x9, x0, x8 152; CHECK-NEXT: eor x0, x8, x9 153; CHECK-NEXT: ret 154 %m1.neg = mul i64 %c, %b 155 %m2.neg = mul i64 %e, %d 156 %reass.add = add i64 %m2.neg, %m1.neg 157 %s2 = add i64 %a, %reass.add 158 %o = xor i64 %reass.add, %s2 159 ret i64 %o 160} 161 162define i64 @mla_i64_mul(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e) { 163; CHECK-LABEL: mla_i64_mul: 164; CHECK: // %bb.0: 165; CHECK-NEXT: mul x8, x2, x1 166; CHECK-NEXT: madd x9, x4, x3, x8 167; CHECK-NEXT: add x0, x8, x9 168; CHECK-NEXT: ret 169 %m1.neg = mul i64 %c, %b 170 %m2.neg = mul i64 %e, %d 171 %reass.add = add i64 %m2.neg, %m1.neg 172 %s2 = add i64 %m1.neg, %reass.add 173 ret i64 %s2 174} 175 176 177define <8 x i16> @smlsl_v8i16(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d, <8 x i8> %e) { 178; CHECK-LABEL: smlsl_v8i16: 179; CHECK: // %bb.0: 180; CHECK-NEXT: smlsl v0.8h, v4.8b, v3.8b 181; CHECK-NEXT: smlsl v0.8h, v2.8b, v1.8b 182; CHECK-NEXT: ret 183 %be = sext <8 x i8> %b to <8 x i16> 184 %ce = sext <8 x i8> %c to <8 x i16> 185 %de = sext <8 x i8> %d to <8 x i16> 186 %ee = sext <8 x i8> %e to <8 x i16> 187 %m1.neg = mul nsw <8 x i16> %ce, %be 188 %m2.neg = mul nsw <8 x i16> %ee, %de 189 %reass.add = add <8 x i16> %m2.neg, %m1.neg 190 %s2 = sub <8 x i16> %a, %reass.add 191 ret <8 x i16> %s2 192} 193 194define <8 x i16> @umlsl_v8i16(<8 x i16> %a, <8 x i8> %b, <8 x i8> %c, <8 x i8> %d, <8 x i8> %e) { 195; CHECK-LABEL: umlsl_v8i16: 196; CHECK: // %bb.0: 197; CHECK-NEXT: umlsl v0.8h, v4.8b, v3.8b 198; CHECK-NEXT: umlsl v0.8h, v2.8b, v1.8b 199; CHECK-NEXT: ret 200 %be = zext <8 x i8> %b to <8 x i16> 201 %ce = zext <8 x i8> %c to <8 x i16> 202 %de = zext <8 x i8> %d to <8 x i16> 203 %ee = zext <8 x i8> %e to <8 x i16> 204 %m1.neg = mul nuw <8 x i16> %ce, %be 205 %m2.neg = mul nuw <8 x i16> %ee, %de 206 %reass.add = add <8 x i16> %m2.neg, %m1.neg 207 %s2 = sub <8 x i16> %a, %reass.add 208 ret <8 x i16> %s2 209} 210 211define <8 x i16> @mls_v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x i16> %d, <8 x i16> %e) { 212; CHECK-LABEL: mls_v8i16: 213; CHECK: // %bb.0: 214; CHECK-NEXT: mls v0.8h, v4.8h, v3.8h 215; CHECK-NEXT: mls v0.8h, v2.8h, v1.8h 216; CHECK-NEXT: ret 217 %m1.neg = mul <8 x i16> %c, %b 218 %m2.neg = mul <8 x i16> %e, %d 219 %reass.add = add <8 x i16> %m2.neg, %m1.neg 220 %s2 = sub <8 x i16> %a, %reass.add 221 ret <8 x i16> %s2 222} 223 224define <8 x i16> @mla_v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x i16> %d, <8 x i16> %e) { 225; CHECK-LABEL: mla_v8i16: 226; CHECK: // %bb.0: 227; CHECK-NEXT: mul v3.8h, v4.8h, v3.8h 228; CHECK-NEXT: mla v3.8h, v2.8h, v1.8h 229; CHECK-NEXT: add v0.8h, v3.8h, v0.8h 230; CHECK-NEXT: ret 231 %m1 = mul <8 x i16> %c, %b 232 %m2 = mul <8 x i16> %e, %d 233 %s1 = add <8 x i16> %m1, %m2 234 %s2 = add <8 x i16> %s1, %a 235 ret <8 x i16> %s2 236} 237 238define <8 x i16> @mls_v8i16_C(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x i16> %d, <8 x i16> %e) { 239; CHECK-LABEL: mls_v8i16_C: 240; CHECK: // %bb.0: 241; CHECK-NEXT: movi v0.8h, #10 242; CHECK-NEXT: mls v0.8h, v4.8h, v3.8h 243; CHECK-NEXT: mls v0.8h, v2.8h, v1.8h 244; CHECK-NEXT: ret 245 %m1.neg = mul <8 x i16> %c, %b 246 %m2.neg = mul <8 x i16> %e, %d 247 %reass.add = add <8 x i16> %m2.neg, %m1.neg 248 %s2 = sub <8 x i16> <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>, %reass.add 249 ret <8 x i16> %s2 250} 251 252define <8 x i16> @mla_v8i16_C(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, <8 x i16> %d, <8 x i16> %e) { 253; CHECK-LABEL: mla_v8i16_C: 254; CHECK: // %bb.0: 255; CHECK-NEXT: mul v1.8h, v2.8h, v1.8h 256; CHECK-NEXT: movi v0.8h, #10 257; CHECK-NEXT: mla v1.8h, v4.8h, v3.8h 258; CHECK-NEXT: add v0.8h, v1.8h, v0.8h 259; CHECK-NEXT: ret 260 %m1.neg = mul <8 x i16> %c, %b 261 %m2.neg = mul <8 x i16> %e, %d 262 %reass.add = add <8 x i16> %m2.neg, %m1.neg 263 %s2 = add <8 x i16> <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>, %reass.add 264 ret <8 x i16> %s2 265} 266 267 268define <vscale x 8 x i16> @smlsl_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i8> %b, <vscale x 8 x i8> %c, <vscale x 8 x i8> %d, <vscale x 8 x i8> %e) { 269; CHECK-LABEL: smlsl_nxv8i16: 270; CHECK: // %bb.0: 271; CHECK-NEXT: ptrue p0.h 272; CHECK-NEXT: sxtb z3.h, p0/m, z3.h 273; CHECK-NEXT: sxtb z4.h, p0/m, z4.h 274; CHECK-NEXT: sxtb z1.h, p0/m, z1.h 275; CHECK-NEXT: sxtb z2.h, p0/m, z2.h 276; CHECK-NEXT: mls z0.h, p0/m, z4.h, z3.h 277; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h 278; CHECK-NEXT: ret 279 %be = sext <vscale x 8 x i8> %b to <vscale x 8 x i16> 280 %ce = sext <vscale x 8 x i8> %c to <vscale x 8 x i16> 281 %de = sext <vscale x 8 x i8> %d to <vscale x 8 x i16> 282 %ee = sext <vscale x 8 x i8> %e to <vscale x 8 x i16> 283 %m1.neg = mul nsw <vscale x 8 x i16> %ce, %be 284 %m2.neg = mul nsw <vscale x 8 x i16> %ee, %de 285 %reass.add = add <vscale x 8 x i16> %m2.neg, %m1.neg 286 %s2 = sub <vscale x 8 x i16> %a, %reass.add 287 ret <vscale x 8 x i16> %s2 288} 289 290define <vscale x 8 x i16> @umlsl_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i8> %b, <vscale x 8 x i8> %c, <vscale x 8 x i8> %d, <vscale x 8 x i8> %e) { 291; CHECK-LABEL: umlsl_nxv8i16: 292; CHECK: // %bb.0: 293; CHECK-NEXT: and z3.h, z3.h, #0xff 294; CHECK-NEXT: and z4.h, z4.h, #0xff 295; CHECK-NEXT: ptrue p0.h 296; CHECK-NEXT: and z1.h, z1.h, #0xff 297; CHECK-NEXT: and z2.h, z2.h, #0xff 298; CHECK-NEXT: mls z0.h, p0/m, z4.h, z3.h 299; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h 300; CHECK-NEXT: ret 301 %be = zext <vscale x 8 x i8> %b to <vscale x 8 x i16> 302 %ce = zext <vscale x 8 x i8> %c to <vscale x 8 x i16> 303 %de = zext <vscale x 8 x i8> %d to <vscale x 8 x i16> 304 %ee = zext <vscale x 8 x i8> %e to <vscale x 8 x i16> 305 %m1.neg = mul nuw <vscale x 8 x i16> %ce, %be 306 %m2.neg = mul nuw <vscale x 8 x i16> %ee, %de 307 %reass.add = add <vscale x 8 x i16> %m2.neg, %m1.neg 308 %s2 = sub <vscale x 8 x i16> %a, %reass.add 309 ret <vscale x 8 x i16> %s2 310} 311 312define <vscale x 8 x i16> @mls_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d, <vscale x 8 x i16> %e) { 313; CHECK-LABEL: mls_nxv8i16: 314; CHECK: // %bb.0: 315; CHECK-NEXT: ptrue p0.h 316; CHECK-NEXT: mls z0.h, p0/m, z4.h, z3.h 317; CHECK-NEXT: mls z0.h, p0/m, z2.h, z1.h 318; CHECK-NEXT: ret 319 %m1.neg = mul <vscale x 8 x i16> %c, %b 320 %m2.neg = mul <vscale x 8 x i16> %e, %d 321 %reass.add = add <vscale x 8 x i16> %m2.neg, %m1.neg 322 %s2 = sub <vscale x 8 x i16> %a, %reass.add 323 ret <vscale x 8 x i16> %s2 324} 325 326define <vscale x 8 x i16> @mla_nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c, <vscale x 8 x i16> %d, <vscale x 8 x i16> %e) { 327; CHECK-LABEL: mla_nxv8i16: 328; CHECK: // %bb.0: 329; CHECK-NEXT: mul z1.h, z2.h, z1.h 330; CHECK-NEXT: ptrue p0.h 331; CHECK-NEXT: mla z1.h, p0/m, z4.h, z3.h 332; CHECK-NEXT: add z0.h, z1.h, z0.h 333; CHECK-NEXT: ret 334 %m1 = mul <vscale x 8 x i16> %c, %b 335 %m2 = mul <vscale x 8 x i16> %e, %d 336 %s1 = add <vscale x 8 x i16> %m1, %m2 337 %s2 = add <vscale x 8 x i16> %s1, %a 338 ret <vscale x 8 x i16> %s2 339} 340