1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc -mtriple=aarch64-none-eabi -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD 3; RUN: llc -mtriple=aarch64-none-eabi -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI 4 5define i8 @i8(i8 %a, i8 %b) { 6; CHECK-LABEL: i8: 7; CHECK: // %bb.0: // %entry 8; CHECK-NEXT: mul w0, w0, w1 9; CHECK-NEXT: ret 10entry: 11 %s = mul i8 %a, %b 12 ret i8 %s 13} 14 15define i16 @i16(i16 %a, i16 %b) { 16; CHECK-LABEL: i16: 17; CHECK: // %bb.0: // %entry 18; CHECK-NEXT: mul w0, w0, w1 19; CHECK-NEXT: ret 20entry: 21 %s = mul i16 %a, %b 22 ret i16 %s 23} 24 25define i32 @i32(i32 %a, i32 %b) { 26; CHECK-LABEL: i32: 27; CHECK: // %bb.0: // %entry 28; CHECK-NEXT: mul w0, w0, w1 29; CHECK-NEXT: ret 30entry: 31 %s = mul i32 %a, %b 32 ret i32 %s 33} 34 35define i64 @i64(i64 %a, i64 %b) { 36; CHECK-LABEL: i64: 37; CHECK: // %bb.0: // %entry 38; CHECK-NEXT: mul x0, x0, x1 39; CHECK-NEXT: ret 40entry: 41 %s = mul i64 %a, %b 42 ret i64 %s 43} 44 45define i128 @i128(i128 %a, i128 %b) { 46; CHECK-SD-LABEL: i128: 47; CHECK-SD: // %bb.0: // %entry 48; CHECK-SD-NEXT: umulh x8, x0, x2 49; CHECK-SD-NEXT: madd x8, x0, x3, x8 50; CHECK-SD-NEXT: mul x0, x0, x2 51; CHECK-SD-NEXT: madd x1, x1, x2, x8 52; CHECK-SD-NEXT: ret 53; 54; CHECK-GI-LABEL: i128: 55; CHECK-GI: // %bb.0: // %entry 56; CHECK-GI-NEXT: mul x9, x0, x3 57; CHECK-GI-NEXT: mul x8, x0, x2 58; CHECK-GI-NEXT: umulh x10, x0, x2 59; CHECK-GI-NEXT: madd x9, x1, x2, x9 60; CHECK-GI-NEXT: mov x0, x8 61; CHECK-GI-NEXT: add x1, x9, x10 62; CHECK-GI-NEXT: ret 63entry: 64 %s = mul i128 %a, %b 65 ret i128 %s 66} 67 68define void @v2i8(ptr %p1, ptr %p2) { 69; CHECK-SD-LABEL: v2i8: 70; CHECK-SD: // %bb.0: // %entry 71; CHECK-SD-NEXT: ld1 { v0.b }[0], [x0] 72; CHECK-SD-NEXT: ld1 { v1.b }[0], [x1] 73; CHECK-SD-NEXT: add x8, x0, #1 74; CHECK-SD-NEXT: add x9, x1, #1 75; CHECK-SD-NEXT: ld1 { v0.b }[4], [x8] 76; CHECK-SD-NEXT: ld1 { v1.b }[4], [x9] 77; CHECK-SD-NEXT: mul v0.2s, v0.2s, v1.2s 78; CHECK-SD-NEXT: mov w8, v0.s[1] 79; CHECK-SD-NEXT: fmov w9, s0 80; CHECK-SD-NEXT: strb w9, [x0] 81; CHECK-SD-NEXT: strb w8, [x0, #1] 82; CHECK-SD-NEXT: ret 83; 84; CHECK-GI-LABEL: v2i8: 85; CHECK-GI: // %bb.0: // %entry 86; CHECK-GI-NEXT: ld1 { v0.b }[0], [x0] 87; CHECK-GI-NEXT: ld1 { v1.b }[0], [x1] 88; CHECK-GI-NEXT: ldr b2, [x0, #1] 89; CHECK-GI-NEXT: ldr b3, [x1, #1] 90; CHECK-GI-NEXT: mov v0.s[1], v2.s[0] 91; CHECK-GI-NEXT: mov v1.s[1], v3.s[0] 92; CHECK-GI-NEXT: mul v0.2s, v0.2s, v1.2s 93; CHECK-GI-NEXT: mov s1, v0.s[1] 94; CHECK-GI-NEXT: str b0, [x0] 95; CHECK-GI-NEXT: str b1, [x0, #1] 96; CHECK-GI-NEXT: ret 97entry: 98 %d = load <2 x i8>, ptr %p1 99 %e = load <2 x i8>, ptr %p2 100 %s = mul <2 x i8> %d, %e 101 store <2 x i8> %s, ptr %p1 102 ret void 103} 104 105define void @v3i8(ptr %p1, ptr %p2) { 106; CHECK-SD-LABEL: v3i8: 107; CHECK-SD: // %bb.0: // %entry 108; CHECK-SD-NEXT: sub sp, sp, #16 109; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 110; CHECK-SD-NEXT: ldr s0, [x0] 111; CHECK-SD-NEXT: ldr s1, [x1] 112; CHECK-SD-NEXT: zip1 v0.8b, v0.8b, v0.8b 113; CHECK-SD-NEXT: zip1 v1.8b, v1.8b, v0.8b 114; CHECK-SD-NEXT: mul v0.4h, v0.4h, v1.4h 115; CHECK-SD-NEXT: uzp1 v1.8b, v0.8b, v0.8b 116; CHECK-SD-NEXT: umov w8, v0.h[2] 117; CHECK-SD-NEXT: str s1, [sp, #12] 118; CHECK-SD-NEXT: ldrh w9, [sp, #12] 119; CHECK-SD-NEXT: strb w8, [x0, #2] 120; CHECK-SD-NEXT: strh w9, [x0] 121; CHECK-SD-NEXT: add sp, sp, #16 122; CHECK-SD-NEXT: ret 123; 124; CHECK-GI-LABEL: v3i8: 125; CHECK-GI: // %bb.0: // %entry 126; CHECK-GI-NEXT: ldrb w8, [x0] 127; CHECK-GI-NEXT: ldrb w9, [x1] 128; CHECK-GI-NEXT: ldrb w10, [x0, #1] 129; CHECK-GI-NEXT: ldrb w11, [x1, #1] 130; CHECK-GI-NEXT: fmov s0, w8 131; CHECK-GI-NEXT: fmov s1, w9 132; CHECK-GI-NEXT: ldrb w8, [x0, #2] 133; CHECK-GI-NEXT: ldrb w9, [x1, #2] 134; CHECK-GI-NEXT: mov v0.h[1], w10 135; CHECK-GI-NEXT: mov v1.h[1], w11 136; CHECK-GI-NEXT: mov v0.h[2], w8 137; CHECK-GI-NEXT: mov v1.h[2], w9 138; CHECK-GI-NEXT: mul v0.4h, v0.4h, v1.4h 139; CHECK-GI-NEXT: mov h1, v0.h[1] 140; CHECK-GI-NEXT: mov h2, v0.h[2] 141; CHECK-GI-NEXT: str b0, [x0] 142; CHECK-GI-NEXT: str b1, [x0, #1] 143; CHECK-GI-NEXT: str b2, [x0, #2] 144; CHECK-GI-NEXT: ret 145entry: 146 %d = load <3 x i8>, ptr %p1 147 %e = load <3 x i8>, ptr %p2 148 %s = mul <3 x i8> %d, %e 149 store <3 x i8> %s, ptr %p1 150 ret void 151} 152 153define void @v4i8(ptr %p1, ptr %p2) { 154; CHECK-SD-LABEL: v4i8: 155; CHECK-SD: // %bb.0: // %entry 156; CHECK-SD-NEXT: ldr s0, [x0] 157; CHECK-SD-NEXT: ldr s1, [x1] 158; CHECK-SD-NEXT: umull v0.8h, v0.8b, v1.8b 159; CHECK-SD-NEXT: xtn v0.8b, v0.8h 160; CHECK-SD-NEXT: str s0, [x0] 161; CHECK-SD-NEXT: ret 162; 163; CHECK-GI-LABEL: v4i8: 164; CHECK-GI: // %bb.0: // %entry 165; CHECK-GI-NEXT: ldr w8, [x0] 166; CHECK-GI-NEXT: ldr w9, [x1] 167; CHECK-GI-NEXT: fmov s0, w8 168; CHECK-GI-NEXT: fmov s1, w9 169; CHECK-GI-NEXT: mov b2, v0.b[1] 170; CHECK-GI-NEXT: mov b3, v1.b[1] 171; CHECK-GI-NEXT: mov b4, v0.b[2] 172; CHECK-GI-NEXT: mov b5, v0.b[3] 173; CHECK-GI-NEXT: fmov w8, s2 174; CHECK-GI-NEXT: mov b2, v1.b[2] 175; CHECK-GI-NEXT: fmov w9, s3 176; CHECK-GI-NEXT: mov b3, v1.b[3] 177; CHECK-GI-NEXT: mov v0.h[1], w8 178; CHECK-GI-NEXT: mov v1.h[1], w9 179; CHECK-GI-NEXT: fmov w8, s4 180; CHECK-GI-NEXT: fmov w9, s2 181; CHECK-GI-NEXT: mov v0.h[2], w8 182; CHECK-GI-NEXT: mov v1.h[2], w9 183; CHECK-GI-NEXT: fmov w8, s5 184; CHECK-GI-NEXT: fmov w9, s3 185; CHECK-GI-NEXT: mov v0.h[3], w8 186; CHECK-GI-NEXT: mov v1.h[3], w9 187; CHECK-GI-NEXT: mul v0.4h, v0.4h, v1.4h 188; CHECK-GI-NEXT: uzp1 v0.8b, v0.8b, v0.8b 189; CHECK-GI-NEXT: fmov w8, s0 190; CHECK-GI-NEXT: str w8, [x0] 191; CHECK-GI-NEXT: ret 192entry: 193 %d = load <4 x i8>, ptr %p1 194 %e = load <4 x i8>, ptr %p2 195 %s = mul <4 x i8> %d, %e 196 store <4 x i8> %s, ptr %p1 197 ret void 198} 199 200define <8 x i8> @v8i8(<8 x i8> %d, <8 x i8> %e) { 201; CHECK-LABEL: v8i8: 202; CHECK: // %bb.0: // %entry 203; CHECK-NEXT: mul v0.8b, v0.8b, v1.8b 204; CHECK-NEXT: ret 205entry: 206 %s = mul <8 x i8> %d, %e 207 ret <8 x i8> %s 208} 209 210define <16 x i8> @v16i8(<16 x i8> %d, <16 x i8> %e) { 211; CHECK-LABEL: v16i8: 212; CHECK: // %bb.0: // %entry 213; CHECK-NEXT: mul v0.16b, v0.16b, v1.16b 214; CHECK-NEXT: ret 215entry: 216 %s = mul <16 x i8> %d, %e 217 ret <16 x i8> %s 218} 219 220define <32 x i8> @v32i8(<32 x i8> %d, <32 x i8> %e) { 221; CHECK-SD-LABEL: v32i8: 222; CHECK-SD: // %bb.0: // %entry 223; CHECK-SD-NEXT: mul v1.16b, v1.16b, v3.16b 224; CHECK-SD-NEXT: mul v0.16b, v0.16b, v2.16b 225; CHECK-SD-NEXT: ret 226; 227; CHECK-GI-LABEL: v32i8: 228; CHECK-GI: // %bb.0: // %entry 229; CHECK-GI-NEXT: mul v0.16b, v0.16b, v2.16b 230; CHECK-GI-NEXT: mul v1.16b, v1.16b, v3.16b 231; CHECK-GI-NEXT: ret 232entry: 233 %s = mul <32 x i8> %d, %e 234 ret <32 x i8> %s 235} 236 237define void @v2i16(ptr %p1, ptr %p2) { 238; CHECK-SD-LABEL: v2i16: 239; CHECK-SD: // %bb.0: // %entry 240; CHECK-SD-NEXT: ld1 { v0.h }[0], [x0] 241; CHECK-SD-NEXT: ld1 { v1.h }[0], [x1] 242; CHECK-SD-NEXT: add x8, x0, #2 243; CHECK-SD-NEXT: add x9, x1, #2 244; CHECK-SD-NEXT: ld1 { v0.h }[2], [x8] 245; CHECK-SD-NEXT: ld1 { v1.h }[2], [x9] 246; CHECK-SD-NEXT: mul v0.2s, v0.2s, v1.2s 247; CHECK-SD-NEXT: mov w8, v0.s[1] 248; CHECK-SD-NEXT: fmov w9, s0 249; CHECK-SD-NEXT: strh w9, [x0] 250; CHECK-SD-NEXT: strh w8, [x0, #2] 251; CHECK-SD-NEXT: ret 252; 253; CHECK-GI-LABEL: v2i16: 254; CHECK-GI: // %bb.0: // %entry 255; CHECK-GI-NEXT: ld1 { v0.h }[0], [x0] 256; CHECK-GI-NEXT: ld1 { v1.h }[0], [x1] 257; CHECK-GI-NEXT: ldr h2, [x0, #2] 258; CHECK-GI-NEXT: ldr h3, [x1, #2] 259; CHECK-GI-NEXT: mov v0.s[1], v2.s[0] 260; CHECK-GI-NEXT: mov v1.s[1], v3.s[0] 261; CHECK-GI-NEXT: mul v0.2s, v0.2s, v1.2s 262; CHECK-GI-NEXT: mov s1, v0.s[1] 263; CHECK-GI-NEXT: str h0, [x0] 264; CHECK-GI-NEXT: str h1, [x0, #2] 265; CHECK-GI-NEXT: ret 266entry: 267 %d = load <2 x i16>, ptr %p1 268 %e = load <2 x i16>, ptr %p2 269 %s = mul <2 x i16> %d, %e 270 store <2 x i16> %s, ptr %p1 271 ret void 272} 273 274define void @v3i16(ptr %p1, ptr %p2) { 275; CHECK-SD-LABEL: v3i16: 276; CHECK-SD: // %bb.0: // %entry 277; CHECK-SD-NEXT: ldr d0, [x0] 278; CHECK-SD-NEXT: ldr d1, [x1] 279; CHECK-SD-NEXT: add x8, x0, #4 280; CHECK-SD-NEXT: mul v0.4h, v0.4h, v1.4h 281; CHECK-SD-NEXT: st1 { v0.h }[2], [x8] 282; CHECK-SD-NEXT: str s0, [x0] 283; CHECK-SD-NEXT: ret 284; 285; CHECK-GI-LABEL: v3i16: 286; CHECK-GI: // %bb.0: // %entry 287; CHECK-GI-NEXT: ldr h0, [x0] 288; CHECK-GI-NEXT: ldr h1, [x1] 289; CHECK-GI-NEXT: add x8, x0, #2 290; CHECK-GI-NEXT: add x9, x1, #2 291; CHECK-GI-NEXT: add x10, x1, #4 292; CHECK-GI-NEXT: ld1 { v0.h }[1], [x8] 293; CHECK-GI-NEXT: ld1 { v1.h }[1], [x9] 294; CHECK-GI-NEXT: add x9, x0, #4 295; CHECK-GI-NEXT: ld1 { v0.h }[2], [x9] 296; CHECK-GI-NEXT: ld1 { v1.h }[2], [x10] 297; CHECK-GI-NEXT: mul v0.4h, v0.4h, v1.4h 298; CHECK-GI-NEXT: str h0, [x0] 299; CHECK-GI-NEXT: st1 { v0.h }[1], [x8] 300; CHECK-GI-NEXT: st1 { v0.h }[2], [x9] 301; CHECK-GI-NEXT: ret 302entry: 303 %d = load <3 x i16>, ptr %p1 304 %e = load <3 x i16>, ptr %p2 305 %s = mul <3 x i16> %d, %e 306 store <3 x i16> %s, ptr %p1 307 ret void 308} 309 310define <4 x i16> @v4i16(<4 x i16> %d, <4 x i16> %e) { 311; CHECK-LABEL: v4i16: 312; CHECK: // %bb.0: // %entry 313; CHECK-NEXT: mul v0.4h, v0.4h, v1.4h 314; CHECK-NEXT: ret 315entry: 316 %s = mul <4 x i16> %d, %e 317 ret <4 x i16> %s 318} 319 320define <8 x i16> @v8i16(<8 x i16> %d, <8 x i16> %e) { 321; CHECK-LABEL: v8i16: 322; CHECK: // %bb.0: // %entry 323; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h 324; CHECK-NEXT: ret 325entry: 326 %s = mul <8 x i16> %d, %e 327 ret <8 x i16> %s 328} 329 330define <16 x i16> @v16i16(<16 x i16> %d, <16 x i16> %e) { 331; CHECK-SD-LABEL: v16i16: 332; CHECK-SD: // %bb.0: // %entry 333; CHECK-SD-NEXT: mul v1.8h, v1.8h, v3.8h 334; CHECK-SD-NEXT: mul v0.8h, v0.8h, v2.8h 335; CHECK-SD-NEXT: ret 336; 337; CHECK-GI-LABEL: v16i16: 338; CHECK-GI: // %bb.0: // %entry 339; CHECK-GI-NEXT: mul v0.8h, v0.8h, v2.8h 340; CHECK-GI-NEXT: mul v1.8h, v1.8h, v3.8h 341; CHECK-GI-NEXT: ret 342entry: 343 %s = mul <16 x i16> %d, %e 344 ret <16 x i16> %s 345} 346 347define <2 x i32> @v2i32(<2 x i32> %d, <2 x i32> %e) { 348; CHECK-LABEL: v2i32: 349; CHECK: // %bb.0: // %entry 350; CHECK-NEXT: mul v0.2s, v0.2s, v1.2s 351; CHECK-NEXT: ret 352entry: 353 %s = mul <2 x i32> %d, %e 354 ret <2 x i32> %s 355} 356 357define <3 x i32> @v3i32(<3 x i32> %d, <3 x i32> %e) { 358; CHECK-LABEL: v3i32: 359; CHECK: // %bb.0: // %entry 360; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s 361; CHECK-NEXT: ret 362entry: 363 %s = mul <3 x i32> %d, %e 364 ret <3 x i32> %s 365} 366 367define <4 x i32> @v4i32(<4 x i32> %d, <4 x i32> %e) { 368; CHECK-LABEL: v4i32: 369; CHECK: // %bb.0: // %entry 370; CHECK-NEXT: mul v0.4s, v0.4s, v1.4s 371; CHECK-NEXT: ret 372entry: 373 %s = mul <4 x i32> %d, %e 374 ret <4 x i32> %s 375} 376 377define <8 x i32> @v8i32(<8 x i32> %d, <8 x i32> %e) { 378; CHECK-SD-LABEL: v8i32: 379; CHECK-SD: // %bb.0: // %entry 380; CHECK-SD-NEXT: mul v1.4s, v1.4s, v3.4s 381; CHECK-SD-NEXT: mul v0.4s, v0.4s, v2.4s 382; CHECK-SD-NEXT: ret 383; 384; CHECK-GI-LABEL: v8i32: 385; CHECK-GI: // %bb.0: // %entry 386; CHECK-GI-NEXT: mul v0.4s, v0.4s, v2.4s 387; CHECK-GI-NEXT: mul v1.4s, v1.4s, v3.4s 388; CHECK-GI-NEXT: ret 389entry: 390 %s = mul <8 x i32> %d, %e 391 ret <8 x i32> %s 392} 393 394define <2 x i64> @v2i64(<2 x i64> %d, <2 x i64> %e) { 395; CHECK-SD-LABEL: v2i64: 396; CHECK-SD: // %bb.0: // %entry 397; CHECK-SD-NEXT: fmov x10, d1 398; CHECK-SD-NEXT: fmov x11, d0 399; CHECK-SD-NEXT: mov x8, v1.d[1] 400; CHECK-SD-NEXT: mov x9, v0.d[1] 401; CHECK-SD-NEXT: mul x10, x11, x10 402; CHECK-SD-NEXT: mul x8, x9, x8 403; CHECK-SD-NEXT: fmov d0, x10 404; CHECK-SD-NEXT: mov v0.d[1], x8 405; CHECK-SD-NEXT: ret 406; 407; CHECK-GI-LABEL: v2i64: 408; CHECK-GI: // %bb.0: // %entry 409; CHECK-GI-NEXT: fmov x8, d0 410; CHECK-GI-NEXT: fmov x9, d1 411; CHECK-GI-NEXT: mov x10, v0.d[1] 412; CHECK-GI-NEXT: mov x11, v1.d[1] 413; CHECK-GI-NEXT: mul x8, x8, x9 414; CHECK-GI-NEXT: mul x9, x10, x11 415; CHECK-GI-NEXT: mov v0.d[0], x8 416; CHECK-GI-NEXT: mov v0.d[1], x9 417; CHECK-GI-NEXT: ret 418entry: 419 %s = mul <2 x i64> %d, %e 420 ret <2 x i64> %s 421} 422 423define <3 x i64> @v3i64(<3 x i64> %d, <3 x i64> %e) { 424; CHECK-SD-LABEL: v3i64: 425; CHECK-SD: // %bb.0: // %entry 426; CHECK-SD-NEXT: // kill: def $d3 killed $d3 def $q3 427; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0 428; CHECK-SD-NEXT: fmov x8, d3 429; CHECK-SD-NEXT: fmov x9, d0 430; CHECK-SD-NEXT: // kill: def $d4 killed $d4 def $q4 431; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1 432; CHECK-SD-NEXT: // kill: def $d5 killed $d5 def $q5 433; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2 434; CHECK-SD-NEXT: fmov x10, d1 435; CHECK-SD-NEXT: fmov x11, d2 436; CHECK-SD-NEXT: mul x8, x9, x8 437; CHECK-SD-NEXT: fmov x9, d4 438; CHECK-SD-NEXT: mul x9, x10, x9 439; CHECK-SD-NEXT: fmov x10, d5 440; CHECK-SD-NEXT: fmov d0, x8 441; CHECK-SD-NEXT: mul x10, x11, x10 442; CHECK-SD-NEXT: fmov d1, x9 443; CHECK-SD-NEXT: fmov d2, x10 444; CHECK-SD-NEXT: ret 445; 446; CHECK-GI-LABEL: v3i64: 447; CHECK-GI: // %bb.0: // %entry 448; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0 449; CHECK-GI-NEXT: // kill: def $d3 killed $d3 def $q3 450; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1 451; CHECK-GI-NEXT: // kill: def $d4 killed $d4 def $q4 452; CHECK-GI-NEXT: mov v0.d[1], v1.d[0] 453; CHECK-GI-NEXT: mov v3.d[1], v4.d[0] 454; CHECK-GI-NEXT: fmov x8, d0 455; CHECK-GI-NEXT: fmov x9, d3 456; CHECK-GI-NEXT: mov x10, v0.d[1] 457; CHECK-GI-NEXT: mov x11, v3.d[1] 458; CHECK-GI-NEXT: mul x8, x8, x9 459; CHECK-GI-NEXT: mul x9, x10, x11 460; CHECK-GI-NEXT: mov v0.d[0], x8 461; CHECK-GI-NEXT: fmov x8, d2 462; CHECK-GI-NEXT: mov v0.d[1], x9 463; CHECK-GI-NEXT: fmov x9, d5 464; CHECK-GI-NEXT: mul x8, x8, x9 465; CHECK-GI-NEXT: mov d1, v0.d[1] 466; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0 467; CHECK-GI-NEXT: fmov d2, x8 468; CHECK-GI-NEXT: ret 469entry: 470 %s = mul <3 x i64> %d, %e 471 ret <3 x i64> %s 472} 473 474define <4 x i64> @v4i64(<4 x i64> %d, <4 x i64> %e) { 475; CHECK-SD-LABEL: v4i64: 476; CHECK-SD: // %bb.0: // %entry 477; CHECK-SD-NEXT: fmov x8, d2 478; CHECK-SD-NEXT: fmov x9, d0 479; CHECK-SD-NEXT: fmov x12, d1 480; CHECK-SD-NEXT: mov x10, v2.d[1] 481; CHECK-SD-NEXT: mov x11, v0.d[1] 482; CHECK-SD-NEXT: mov x13, v3.d[1] 483; CHECK-SD-NEXT: mov x14, v1.d[1] 484; CHECK-SD-NEXT: mul x8, x9, x8 485; CHECK-SD-NEXT: fmov x9, d3 486; CHECK-SD-NEXT: mul x10, x11, x10 487; CHECK-SD-NEXT: mul x9, x12, x9 488; CHECK-SD-NEXT: fmov d0, x8 489; CHECK-SD-NEXT: mul x11, x14, x13 490; CHECK-SD-NEXT: mov v0.d[1], x10 491; CHECK-SD-NEXT: fmov d1, x9 492; CHECK-SD-NEXT: mov v1.d[1], x11 493; CHECK-SD-NEXT: ret 494; 495; CHECK-GI-LABEL: v4i64: 496; CHECK-GI: // %bb.0: // %entry 497; CHECK-GI-NEXT: fmov x8, d0 498; CHECK-GI-NEXT: fmov x9, d2 499; CHECK-GI-NEXT: fmov x12, d3 500; CHECK-GI-NEXT: mov x10, v0.d[1] 501; CHECK-GI-NEXT: mov x11, v2.d[1] 502; CHECK-GI-NEXT: mov x13, v1.d[1] 503; CHECK-GI-NEXT: mov x14, v3.d[1] 504; CHECK-GI-NEXT: mul x8, x8, x9 505; CHECK-GI-NEXT: fmov x9, d1 506; CHECK-GI-NEXT: mul x10, x10, x11 507; CHECK-GI-NEXT: mul x9, x9, x12 508; CHECK-GI-NEXT: mov v0.d[0], x8 509; CHECK-GI-NEXT: mul x11, x13, x14 510; CHECK-GI-NEXT: mov v1.d[0], x9 511; CHECK-GI-NEXT: mov v0.d[1], x10 512; CHECK-GI-NEXT: mov v1.d[1], x11 513; CHECK-GI-NEXT: ret 514entry: 515 %s = mul <4 x i64> %d, %e 516 ret <4 x i64> %s 517} 518 519define <2 x i128> @v2i128(<2 x i128> %d, <2 x i128> %e) { 520; CHECK-SD-LABEL: v2i128: 521; CHECK-SD: // %bb.0: // %entry 522; CHECK-SD-NEXT: umulh x8, x2, x6 523; CHECK-SD-NEXT: umulh x9, x0, x4 524; CHECK-SD-NEXT: madd x8, x2, x7, x8 525; CHECK-SD-NEXT: madd x9, x0, x5, x9 526; CHECK-SD-NEXT: madd x3, x3, x6, x8 527; CHECK-SD-NEXT: madd x1, x1, x4, x9 528; CHECK-SD-NEXT: mul x0, x0, x4 529; CHECK-SD-NEXT: mul x2, x2, x6 530; CHECK-SD-NEXT: ret 531; 532; CHECK-GI-LABEL: v2i128: 533; CHECK-GI: // %bb.0: // %entry 534; CHECK-GI-NEXT: mul x9, x0, x5 535; CHECK-GI-NEXT: mul x12, x2, x7 536; CHECK-GI-NEXT: mul x8, x0, x4 537; CHECK-GI-NEXT: umulh x10, x0, x4 538; CHECK-GI-NEXT: madd x11, x1, x4, x9 539; CHECK-GI-NEXT: mov x0, x8 540; CHECK-GI-NEXT: mul x9, x2, x6 541; CHECK-GI-NEXT: umulh x13, x2, x6 542; CHECK-GI-NEXT: add x1, x11, x10 543; CHECK-GI-NEXT: madd x12, x3, x6, x12 544; CHECK-GI-NEXT: mov x2, x9 545; CHECK-GI-NEXT: add x3, x12, x13 546; CHECK-GI-NEXT: ret 547entry: 548 %s = mul <2 x i128> %d, %e 549 ret <2 x i128> %s 550} 551 552define <3 x i128> @v3i128(<3 x i128> %d, <3 x i128> %e) { 553; CHECK-SD-LABEL: v3i128: 554; CHECK-SD: // %bb.0: // %entry 555; CHECK-SD-NEXT: umulh x9, x0, x6 556; CHECK-SD-NEXT: ldp x8, x10, [sp] 557; CHECK-SD-NEXT: madd x9, x0, x7, x9 558; CHECK-SD-NEXT: umulh x11, x2, x8 559; CHECK-SD-NEXT: madd x1, x1, x6, x9 560; CHECK-SD-NEXT: ldp x9, x12, [sp, #16] 561; CHECK-SD-NEXT: madd x10, x2, x10, x11 562; CHECK-SD-NEXT: umulh x13, x4, x9 563; CHECK-SD-NEXT: madd x3, x3, x8, x10 564; CHECK-SD-NEXT: madd x11, x4, x12, x13 565; CHECK-SD-NEXT: mul x0, x0, x6 566; CHECK-SD-NEXT: madd x5, x5, x9, x11 567; CHECK-SD-NEXT: mul x2, x2, x8 568; CHECK-SD-NEXT: mul x4, x4, x9 569; CHECK-SD-NEXT: ret 570; 571; CHECK-GI-LABEL: v3i128: 572; CHECK-GI: // %bb.0: // %entry 573; CHECK-GI-NEXT: ldp x10, x13, [sp] 574; CHECK-GI-NEXT: mul x9, x0, x7 575; CHECK-GI-NEXT: mul x8, x0, x6 576; CHECK-GI-NEXT: mul x13, x2, x13 577; CHECK-GI-NEXT: madd x12, x1, x6, x9 578; CHECK-GI-NEXT: mul x9, x2, x10 579; CHECK-GI-NEXT: umulh x14, x2, x10 580; CHECK-GI-NEXT: madd x10, x3, x10, x13 581; CHECK-GI-NEXT: ldp x13, x15, [sp, #16] 582; CHECK-GI-NEXT: mov x2, x9 583; CHECK-GI-NEXT: umulh x11, x0, x6 584; CHECK-GI-NEXT: mov x0, x8 585; CHECK-GI-NEXT: mul x15, x4, x15 586; CHECK-GI-NEXT: add x3, x10, x14 587; CHECK-GI-NEXT: umulh x16, x4, x13 588; CHECK-GI-NEXT: add x1, x12, x11 589; CHECK-GI-NEXT: madd x15, x5, x13, x15 590; CHECK-GI-NEXT: mul x4, x4, x13 591; CHECK-GI-NEXT: add x5, x15, x16 592; CHECK-GI-NEXT: ret 593entry: 594 %s = mul <3 x i128> %d, %e 595 ret <3 x i128> %s 596} 597 598define <4 x i128> @v4i128(<4 x i128> %d, <4 x i128> %e) { 599; CHECK-SD-LABEL: v4i128: 600; CHECK-SD: // %bb.0: // %entry 601; CHECK-SD-NEXT: ldp x8, x9, [sp] 602; CHECK-SD-NEXT: ldp x11, x12, [sp, #16] 603; CHECK-SD-NEXT: umulh x10, x0, x8 604; CHECK-SD-NEXT: umulh x13, x2, x11 605; CHECK-SD-NEXT: madd x9, x0, x9, x10 606; CHECK-SD-NEXT: madd x10, x2, x12, x13 607; CHECK-SD-NEXT: ldp x13, x14, [sp, #48] 608; CHECK-SD-NEXT: madd x1, x1, x8, x9 609; CHECK-SD-NEXT: madd x3, x3, x11, x10 610; CHECK-SD-NEXT: ldp x9, x10, [sp, #32] 611; CHECK-SD-NEXT: umulh x15, x6, x13 612; CHECK-SD-NEXT: umulh x12, x4, x9 613; CHECK-SD-NEXT: mul x0, x0, x8 614; CHECK-SD-NEXT: madd x10, x4, x10, x12 615; CHECK-SD-NEXT: madd x12, x6, x14, x15 616; CHECK-SD-NEXT: madd x5, x5, x9, x10 617; CHECK-SD-NEXT: madd x7, x7, x13, x12 618; CHECK-SD-NEXT: mul x2, x2, x11 619; CHECK-SD-NEXT: mul x4, x4, x9 620; CHECK-SD-NEXT: mul x6, x6, x13 621; CHECK-SD-NEXT: ret 622; 623; CHECK-GI-LABEL: v4i128: 624; CHECK-GI: // %bb.0: // %entry 625; CHECK-GI-NEXT: ldp x9, x10, [sp] 626; CHECK-GI-NEXT: ldp x15, x16, [sp, #32] 627; CHECK-GI-NEXT: mul x10, x0, x10 628; CHECK-GI-NEXT: mul x16, x4, x16 629; CHECK-GI-NEXT: madd x12, x1, x9, x10 630; CHECK-GI-NEXT: ldp x10, x13, [sp, #16] 631; CHECK-GI-NEXT: mul x8, x0, x9 632; CHECK-GI-NEXT: mul x13, x2, x13 633; CHECK-GI-NEXT: umulh x11, x0, x9 634; CHECK-GI-NEXT: mul x9, x2, x10 635; CHECK-GI-NEXT: umulh x14, x2, x10 636; CHECK-GI-NEXT: add x1, x12, x11 637; CHECK-GI-NEXT: madd x13, x3, x10, x13 638; CHECK-GI-NEXT: mov x2, x9 639; CHECK-GI-NEXT: mul x10, x4, x15 640; CHECK-GI-NEXT: umulh x17, x4, x15 641; CHECK-GI-NEXT: add x3, x13, x14 642; CHECK-GI-NEXT: madd x15, x5, x15, x16 643; CHECK-GI-NEXT: ldp x16, x18, [sp, #48] 644; CHECK-GI-NEXT: mov x4, x10 645; CHECK-GI-NEXT: mul x18, x6, x18 646; CHECK-GI-NEXT: umulh x0, x6, x16 647; CHECK-GI-NEXT: add x5, x15, x17 648; CHECK-GI-NEXT: madd x18, x7, x16, x18 649; CHECK-GI-NEXT: mul x6, x6, x16 650; CHECK-GI-NEXT: add x7, x18, x0 651; CHECK-GI-NEXT: mov x0, x8 652; CHECK-GI-NEXT: ret 653entry: 654 %s = mul <4 x i128> %d, %e 655 ret <4 x i128> %s 656} 657