1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE 3; RUN: llc -mattr=+sve2 -force-streaming-compatible < %s | FileCheck %s --check-prefixes=CHECK,SVE2 4; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s --check-prefixes=CHECK,SVE2 5; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE 6 7; This test only tests the legal types for a given vector width, as mulh nodes 8; do not get generated for non-legal types. 9 10target triple = "aarch64-unknown-linux-gnu" 11 12; 13; SMULH 14; 15 16define <4 x i8> @smulh_v4i8(<4 x i8> %op1, <4 x i8> %op2) { 17; SVE-LABEL: smulh_v4i8: 18; SVE: // %bb.0: 19; SVE-NEXT: ptrue p0.h, vl4 20; SVE-NEXT: // kill: def $d1 killed $d1 def $z1 21; SVE-NEXT: // kill: def $d0 killed $d0 def $z0 22; SVE-NEXT: sxtb z0.h, p0/m, z0.h 23; SVE-NEXT: sxtb z1.h, p0/m, z1.h 24; SVE-NEXT: mul z0.h, p0/m, z0.h, z1.h 25; SVE-NEXT: lsr z0.h, z0.h, #4 26; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0 27; SVE-NEXT: ret 28; 29; SVE2-LABEL: smulh_v4i8: 30; SVE2: // %bb.0: 31; SVE2-NEXT: ptrue p0.h, vl4 32; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1 33; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0 34; SVE2-NEXT: sxtb z0.h, p0/m, z0.h 35; SVE2-NEXT: sxtb z1.h, p0/m, z1.h 36; SVE2-NEXT: mul z0.h, z0.h, z1.h 37; SVE2-NEXT: lsr z0.h, z0.h, #4 38; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 39; SVE2-NEXT: ret 40; 41; NONEON-NOSVE-LABEL: smulh_v4i8: 42; NONEON-NOSVE: // %bb.0: 43; NONEON-NOSVE-NEXT: sub sp, sp, #32 44; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 45; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] 46; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #14] 47; NONEON-NOSVE-NEXT: ldrsb w12, [sp, #22] 48; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #12] 49; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #10] 50; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #8] 51; NONEON-NOSVE-NEXT: ldrsb w13, [sp, #20] 52; NONEON-NOSVE-NEXT: ldrsb w14, [sp, #18] 53; NONEON-NOSVE-NEXT: mul w8, w8, w12 54; NONEON-NOSVE-NEXT: ldrsb w12, [sp, #16] 55; NONEON-NOSVE-NEXT: mul w9, w9, w13 56; NONEON-NOSVE-NEXT: mul w10, w10, w14 57; NONEON-NOSVE-NEXT: mul w11, w11, w12 58; NONEON-NOSVE-NEXT: ubfx w8, w8, #4, #12 59; NONEON-NOSVE-NEXT: ubfx w9, w9, #4, #12 60; NONEON-NOSVE-NEXT: ubfx w10, w10, #4, #12 61; NONEON-NOSVE-NEXT: strh w8, [sp, #30] 62; NONEON-NOSVE-NEXT: ubfx w8, w11, #4, #12 63; NONEON-NOSVE-NEXT: strh w9, [sp, #28] 64; NONEON-NOSVE-NEXT: strh w10, [sp, #26] 65; NONEON-NOSVE-NEXT: strh w8, [sp, #24] 66; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] 67; NONEON-NOSVE-NEXT: add sp, sp, #32 68; NONEON-NOSVE-NEXT: ret 69 %insert = insertelement <4 x i16> undef, i16 4, i64 0 70 %splat = shufflevector <4 x i16> %insert, <4 x i16> undef, <4 x i32> zeroinitializer 71 %1 = sext <4 x i8> %op1 to <4 x i16> 72 %2 = sext <4 x i8> %op2 to <4 x i16> 73 %mul = mul <4 x i16> %1, %2 74 %shr = lshr <4 x i16> %mul, <i16 4, i16 4, i16 4, i16 4> 75 %res = trunc <4 x i16> %shr to <4 x i8> 76 ret <4 x i8> %res 77} 78 79define <8 x i8> @smulh_v8i8(<8 x i8> %op1, <8 x i8> %op2) { 80; SVE-LABEL: smulh_v8i8: 81; SVE: // %bb.0: 82; SVE-NEXT: ptrue p0.b, vl8 83; SVE-NEXT: // kill: def $d0 killed $d0 def $z0 84; SVE-NEXT: // kill: def $d1 killed $d1 def $z1 85; SVE-NEXT: smulh z0.b, p0/m, z0.b, z1.b 86; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0 87; SVE-NEXT: ret 88; 89; SVE2-LABEL: smulh_v8i8: 90; SVE2: // %bb.0: 91; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0 92; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1 93; SVE2-NEXT: smulh z0.b, z0.b, z1.b 94; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 95; SVE2-NEXT: ret 96; 97; NONEON-NOSVE-LABEL: smulh_v8i8: 98; NONEON-NOSVE: // %bb.0: 99; NONEON-NOSVE-NEXT: sub sp, sp, #32 100; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 101; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] 102; NONEON-NOSVE-NEXT: ldrsb w15, [sp, #15] 103; NONEON-NOSVE-NEXT: ldrsb w16, [sp, #23] 104; NONEON-NOSVE-NEXT: ldrsb w12, [sp, #12] 105; NONEON-NOSVE-NEXT: ldrsb w13, [sp, #13] 106; NONEON-NOSVE-NEXT: ldrsb w14, [sp, #14] 107; NONEON-NOSVE-NEXT: ldrsb w17, [sp, #22] 108; NONEON-NOSVE-NEXT: mul w15, w15, w16 109; NONEON-NOSVE-NEXT: ldrsb w16, [sp, #21] 110; NONEON-NOSVE-NEXT: ldrsb w18, [sp, #20] 111; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #8] 112; NONEON-NOSVE-NEXT: mul w14, w14, w17 113; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #9] 114; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #10] 115; NONEON-NOSVE-NEXT: mul w13, w13, w16 116; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #11] 117; NONEON-NOSVE-NEXT: ldrsb w17, [sp, #16] 118; NONEON-NOSVE-NEXT: mul w12, w12, w18 119; NONEON-NOSVE-NEXT: lsr w15, w15, #8 120; NONEON-NOSVE-NEXT: ldrsb w0, [sp, #19] 121; NONEON-NOSVE-NEXT: ldrsb w16, [sp, #18] 122; NONEON-NOSVE-NEXT: lsr w14, w14, #8 123; NONEON-NOSVE-NEXT: ldrsb w18, [sp, #17] 124; NONEON-NOSVE-NEXT: mul w8, w8, w17 125; NONEON-NOSVE-NEXT: lsr w13, w13, #8 126; NONEON-NOSVE-NEXT: mul w11, w11, w0 127; NONEON-NOSVE-NEXT: lsr w12, w12, #8 128; NONEON-NOSVE-NEXT: strb w15, [sp, #31] 129; NONEON-NOSVE-NEXT: mul w10, w10, w16 130; NONEON-NOSVE-NEXT: strb w14, [sp, #30] 131; NONEON-NOSVE-NEXT: mul w9, w9, w18 132; NONEON-NOSVE-NEXT: lsr w8, w8, #8 133; NONEON-NOSVE-NEXT: strb w13, [sp, #29] 134; NONEON-NOSVE-NEXT: lsr w11, w11, #8 135; NONEON-NOSVE-NEXT: strb w12, [sp, #28] 136; NONEON-NOSVE-NEXT: lsr w10, w10, #8 137; NONEON-NOSVE-NEXT: strb w8, [sp, #24] 138; NONEON-NOSVE-NEXT: lsr w9, w9, #8 139; NONEON-NOSVE-NEXT: strb w11, [sp, #27] 140; NONEON-NOSVE-NEXT: strb w10, [sp, #26] 141; NONEON-NOSVE-NEXT: strb w9, [sp, #25] 142; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] 143; NONEON-NOSVE-NEXT: add sp, sp, #32 144; NONEON-NOSVE-NEXT: ret 145 %insert = insertelement <8 x i16> undef, i16 8, i64 0 146 %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer 147 %1 = sext <8 x i8> %op1 to <8 x i16> 148 %2 = sext <8 x i8> %op2 to <8 x i16> 149 %mul = mul <8 x i16> %1, %2 150 %shr = lshr <8 x i16> %mul, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 151 %res = trunc <8 x i16> %shr to <8 x i8> 152 ret <8 x i8> %res 153} 154 155define <16 x i8> @smulh_v16i8(<16 x i8> %op1, <16 x i8> %op2) { 156; SVE-LABEL: smulh_v16i8: 157; SVE: // %bb.0: 158; SVE-NEXT: ptrue p0.b, vl16 159; SVE-NEXT: // kill: def $q0 killed $q0 def $z0 160; SVE-NEXT: // kill: def $q1 killed $q1 def $z1 161; SVE-NEXT: smulh z0.b, p0/m, z0.b, z1.b 162; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0 163; SVE-NEXT: ret 164; 165; SVE2-LABEL: smulh_v16i8: 166; SVE2: // %bb.0: 167; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0 168; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1 169; SVE2-NEXT: smulh z0.b, z0.b, z1.b 170; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 171; SVE2-NEXT: ret 172; 173; NONEON-NOSVE-LABEL: smulh_v16i8: 174; NONEON-NOSVE: // %bb.0: 175; NONEON-NOSVE-NEXT: sub sp, sp, #160 176; NONEON-NOSVE-NEXT: str x27, [sp, #80] // 8-byte Folded Spill 177; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #96] // 16-byte Folded Spill 178; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #112] // 16-byte Folded Spill 179; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #128] // 16-byte Folded Spill 180; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #144] // 16-byte Folded Spill 181; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160 182; NONEON-NOSVE-NEXT: .cfi_offset w19, -8 183; NONEON-NOSVE-NEXT: .cfi_offset w20, -16 184; NONEON-NOSVE-NEXT: .cfi_offset w21, -24 185; NONEON-NOSVE-NEXT: .cfi_offset w22, -32 186; NONEON-NOSVE-NEXT: .cfi_offset w23, -40 187; NONEON-NOSVE-NEXT: .cfi_offset w24, -48 188; NONEON-NOSVE-NEXT: .cfi_offset w25, -56 189; NONEON-NOSVE-NEXT: .cfi_offset w26, -64 190; NONEON-NOSVE-NEXT: .cfi_offset w27, -80 191; NONEON-NOSVE-NEXT: str q0, [sp] 192; NONEON-NOSVE-NEXT: ldp d2, d0, [sp] 193; NONEON-NOSVE-NEXT: str q1, [sp, #16] 194; NONEON-NOSVE-NEXT: stp d0, d2, [sp, #40] 195; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] 196; NONEON-NOSVE-NEXT: ldrsb w6, [sp, #44] 197; NONEON-NOSVE-NEXT: ldrsb w7, [sp, #45] 198; NONEON-NOSVE-NEXT: ldrsb w19, [sp, #46] 199; NONEON-NOSVE-NEXT: ldrsb w20, [sp, #47] 200; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #48] 201; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #49] 202; NONEON-NOSVE-NEXT: str d0, [sp, #56] 203; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #50] 204; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #51] 205; NONEON-NOSVE-NEXT: ldrsb w21, [sp, #63] 206; NONEON-NOSVE-NEXT: ldrsb w23, [sp, #62] 207; NONEON-NOSVE-NEXT: ldrsb w25, [sp, #61] 208; NONEON-NOSVE-NEXT: ldrsb w26, [sp, #60] 209; NONEON-NOSVE-NEXT: str d1, [sp, #88] 210; NONEON-NOSVE-NEXT: ldrsb w12, [sp, #52] 211; NONEON-NOSVE-NEXT: mul w20, w20, w21 212; NONEON-NOSVE-NEXT: ldrsb w13, [sp, #53] 213; NONEON-NOSVE-NEXT: ldrsb w15, [sp, #54] 214; NONEON-NOSVE-NEXT: mul w19, w19, w23 215; NONEON-NOSVE-NEXT: ldrsb w17, [sp, #55] 216; NONEON-NOSVE-NEXT: ldrsb w0, [sp, #40] 217; NONEON-NOSVE-NEXT: mul w7, w7, w25 218; NONEON-NOSVE-NEXT: ldrsb w2, [sp, #41] 219; NONEON-NOSVE-NEXT: ldrsb w3, [sp, #42] 220; NONEON-NOSVE-NEXT: mul w6, w6, w26 221; NONEON-NOSVE-NEXT: lsr w20, w20, #8 222; NONEON-NOSVE-NEXT: ldrsb w4, [sp, #43] 223; NONEON-NOSVE-NEXT: ldrsb w14, [sp, #88] 224; NONEON-NOSVE-NEXT: lsr w19, w19, #8 225; NONEON-NOSVE-NEXT: ldrsb w16, [sp, #89] 226; NONEON-NOSVE-NEXT: ldrsb w18, [sp, #90] 227; NONEON-NOSVE-NEXT: lsr w7, w7, #8 228; NONEON-NOSVE-NEXT: ldrsb w1, [sp, #91] 229; NONEON-NOSVE-NEXT: ldrsb w5, [sp, #92] 230; NONEON-NOSVE-NEXT: mul w9, w9, w16 231; NONEON-NOSVE-NEXT: lsr w6, w6, #8 232; NONEON-NOSVE-NEXT: ldrsb w22, [sp, #93] 233; NONEON-NOSVE-NEXT: ldrsb w24, [sp, #94] 234; NONEON-NOSVE-NEXT: mul w11, w11, w1 235; NONEON-NOSVE-NEXT: ldrsb w21, [sp, #95] 236; NONEON-NOSVE-NEXT: ldrsb w23, [sp, #56] 237; NONEON-NOSVE-NEXT: mul w12, w12, w5 238; NONEON-NOSVE-NEXT: ldrsb w27, [sp, #59] 239; NONEON-NOSVE-NEXT: ldrsb w25, [sp, #58] 240; NONEON-NOSVE-NEXT: mul w15, w15, w24 241; NONEON-NOSVE-NEXT: ldrsb w26, [sp, #57] 242; NONEON-NOSVE-NEXT: mul w0, w0, w23 243; NONEON-NOSVE-NEXT: lsr w11, w11, #8 244; NONEON-NOSVE-NEXT: mul w4, w4, w27 245; NONEON-NOSVE-NEXT: lsr w12, w12, #8 246; NONEON-NOSVE-NEXT: lsr w9, w9, #8 247; NONEON-NOSVE-NEXT: mul w3, w3, w25 248; NONEON-NOSVE-NEXT: lsr w15, w15, #8 249; NONEON-NOSVE-NEXT: strb w20, [sp, #79] 250; NONEON-NOSVE-NEXT: mul w2, w2, w26 251; NONEON-NOSVE-NEXT: lsr w0, w0, #8 252; NONEON-NOSVE-NEXT: strb w19, [sp, #78] 253; NONEON-NOSVE-NEXT: mul w17, w17, w21 254; NONEON-NOSVE-NEXT: lsr w4, w4, #8 255; NONEON-NOSVE-NEXT: strb w7, [sp, #77] 256; NONEON-NOSVE-NEXT: mul w13, w13, w22 257; NONEON-NOSVE-NEXT: lsr w3, w3, #8 258; NONEON-NOSVE-NEXT: strb w6, [sp, #76] 259; NONEON-NOSVE-NEXT: mul w10, w10, w18 260; NONEON-NOSVE-NEXT: lsr w2, w2, #8 261; NONEON-NOSVE-NEXT: strb w4, [sp, #75] 262; NONEON-NOSVE-NEXT: mul w8, w8, w14 263; NONEON-NOSVE-NEXT: lsr w17, w17, #8 264; NONEON-NOSVE-NEXT: strb w3, [sp, #74] 265; NONEON-NOSVE-NEXT: lsr w13, w13, #8 266; NONEON-NOSVE-NEXT: strb w2, [sp, #73] 267; NONEON-NOSVE-NEXT: ldr x27, [sp, #80] // 8-byte Folded Reload 268; NONEON-NOSVE-NEXT: lsr w10, w10, #8 269; NONEON-NOSVE-NEXT: strb w0, [sp, #72] 270; NONEON-NOSVE-NEXT: lsr w8, w8, #8 271; NONEON-NOSVE-NEXT: strb w17, [sp, #71] 272; NONEON-NOSVE-NEXT: strb w15, [sp, #70] 273; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #144] // 16-byte Folded Reload 274; NONEON-NOSVE-NEXT: strb w13, [sp, #69] 275; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #128] // 16-byte Folded Reload 276; NONEON-NOSVE-NEXT: strb w12, [sp, #68] 277; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #112] // 16-byte Folded Reload 278; NONEON-NOSVE-NEXT: strb w11, [sp, #67] 279; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #96] // 16-byte Folded Reload 280; NONEON-NOSVE-NEXT: strb w10, [sp, #66] 281; NONEON-NOSVE-NEXT: strb w9, [sp, #65] 282; NONEON-NOSVE-NEXT: strb w8, [sp, #64] 283; NONEON-NOSVE-NEXT: ldr q0, [sp, #64] 284; NONEON-NOSVE-NEXT: add sp, sp, #160 285; NONEON-NOSVE-NEXT: ret 286 %1 = sext <16 x i8> %op1 to <16 x i16> 287 %2 = sext <16 x i8> %op2 to <16 x i16> 288 %mul = mul <16 x i16> %1, %2 289 %shr = lshr <16 x i16> %mul, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 290 %res = trunc <16 x i16> %shr to <16 x i8> 291 ret <16 x i8> %res 292} 293 294define void @smulh_v32i8(ptr %a, ptr %b) { 295; SVE-LABEL: smulh_v32i8: 296; SVE: // %bb.0: 297; SVE-NEXT: ldp q0, q3, [x1] 298; SVE-NEXT: ptrue p0.b, vl16 299; SVE-NEXT: ldp q1, q2, [x0] 300; SVE-NEXT: smulh z0.b, p0/m, z0.b, z1.b 301; SVE-NEXT: movprfx z1, z2 302; SVE-NEXT: smulh z1.b, p0/m, z1.b, z3.b 303; SVE-NEXT: stp q0, q1, [x0] 304; SVE-NEXT: ret 305; 306; SVE2-LABEL: smulh_v32i8: 307; SVE2: // %bb.0: 308; SVE2-NEXT: ldp q0, q3, [x1] 309; SVE2-NEXT: ldp q1, q2, [x0] 310; SVE2-NEXT: smulh z0.b, z1.b, z0.b 311; SVE2-NEXT: smulh z1.b, z2.b, z3.b 312; SVE2-NEXT: stp q0, q1, [x0] 313; SVE2-NEXT: ret 314; 315; NONEON-NOSVE-LABEL: smulh_v32i8: 316; NONEON-NOSVE: // %bb.0: 317; NONEON-NOSVE-NEXT: sub sp, sp, #384 318; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #288] // 16-byte Folded Spill 319; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #304] // 16-byte Folded Spill 320; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #320] // 16-byte Folded Spill 321; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #336] // 16-byte Folded Spill 322; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #352] // 16-byte Folded Spill 323; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #368] // 16-byte Folded Spill 324; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 384 325; NONEON-NOSVE-NEXT: .cfi_offset w19, -8 326; NONEON-NOSVE-NEXT: .cfi_offset w20, -16 327; NONEON-NOSVE-NEXT: .cfi_offset w21, -24 328; NONEON-NOSVE-NEXT: .cfi_offset w22, -32 329; NONEON-NOSVE-NEXT: .cfi_offset w23, -40 330; NONEON-NOSVE-NEXT: .cfi_offset w24, -48 331; NONEON-NOSVE-NEXT: .cfi_offset w25, -56 332; NONEON-NOSVE-NEXT: .cfi_offset w26, -64 333; NONEON-NOSVE-NEXT: .cfi_offset w27, -72 334; NONEON-NOSVE-NEXT: .cfi_offset w28, -80 335; NONEON-NOSVE-NEXT: .cfi_offset w30, -88 336; NONEON-NOSVE-NEXT: .cfi_offset w29, -96 337; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] 338; NONEON-NOSVE-NEXT: mov x29, x0 339; NONEON-NOSVE-NEXT: ldp q3, q2, [x1] 340; NONEON-NOSVE-NEXT: str q0, [sp, #128] 341; NONEON-NOSVE-NEXT: str q1, [sp, #160] 342; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #128] 343; NONEON-NOSVE-NEXT: str q3, [sp, #144] 344; NONEON-NOSVE-NEXT: str q2, [sp, #192] 345; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #176] 346; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #160] 347; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #184] 348; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #185] 349; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120] // 8-byte Folded Spill 350; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #186] 351; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #187] 352; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #224] 353; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #144] 354; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112] // 8-byte Folded Spill 355; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #188] 356; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #189] 357; NONEON-NOSVE-NEXT: ldrsb w13, [sp, #229] 358; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #227] 359; NONEON-NOSVE-NEXT: ldrsb w12, [sp, #228] 360; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104] // 8-byte Folded Spill 361; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #190] 362; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #191] 363; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #208] 364; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #192] 365; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #96] // 8-byte Folded Spill 366; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #176] 367; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #177] 368; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #226] 369; NONEON-NOSVE-NEXT: ldrsb w2, [sp, #214] 370; NONEON-NOSVE-NEXT: ldrsb w1, [sp, #215] 371; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #88] // 8-byte Folded Spill 372; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #178] 373; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #179] 374; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #240] 375; NONEON-NOSVE-NEXT: ldrsb w4, [sp, #212] 376; NONEON-NOSVE-NEXT: ldrsb w3, [sp, #213] 377; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #80] // 8-byte Folded Spill 378; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #180] 379; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #181] 380; NONEON-NOSVE-NEXT: ldrsb w14, [sp, #247] 381; NONEON-NOSVE-NEXT: ldrsb w15, [sp, #246] 382; NONEON-NOSVE-NEXT: ldrsb w16, [sp, #244] 383; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72] // 8-byte Folded Spill 384; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #182] 385; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #183] 386; NONEON-NOSVE-NEXT: mul w26, w12, w16 387; NONEON-NOSVE-NEXT: ldrsb w12, [sp, #242] 388; NONEON-NOSVE-NEXT: ldrsb w16, [sp, #250] 389; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64] // 8-byte Folded Spill 390; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #232] 391; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #233] 392; NONEON-NOSVE-NEXT: mul w30, w10, w12 393; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #255] 394; NONEON-NOSVE-NEXT: ldrsb w12, [sp, #253] 395; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] // 8-byte Folded Spill 396; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #234] 397; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #235] 398; NONEON-NOSVE-NEXT: ldrsb w0, [sp, #248] 399; NONEON-NOSVE-NEXT: ldrsb w18, [sp, #249] 400; NONEON-NOSVE-NEXT: ldrsb w6, [sp, #210] 401; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] // 8-byte Folded Spill 402; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #236] 403; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #237] 404; NONEON-NOSVE-NEXT: ldrsb w5, [sp, #211] 405; NONEON-NOSVE-NEXT: ldrsb w19, [sp, #208] 406; NONEON-NOSVE-NEXT: ldrsb w7, [sp, #209] 407; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] // 8-byte Folded Spill 408; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #238] 409; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #239] 410; NONEON-NOSVE-NEXT: ldrsb w21, [sp, #222] 411; NONEON-NOSVE-NEXT: ldrsb w20, [sp, #223] 412; NONEON-NOSVE-NEXT: ldrsb w23, [sp, #220] 413; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] // 8-byte Folded Spill 414; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #224] 415; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #225] 416; NONEON-NOSVE-NEXT: ldrsb w22, [sp, #221] 417; NONEON-NOSVE-NEXT: ldrsb w24, [sp, #219] 418; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] // 8-byte Folded Spill 419; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #230] 420; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #231] 421; NONEON-NOSVE-NEXT: mul w27, w8, w14 422; NONEON-NOSVE-NEXT: ldrsb w14, [sp, #245] 423; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #217] 424; NONEON-NOSVE-NEXT: mul w9, w9, w15 425; NONEON-NOSVE-NEXT: ldrsb w15, [sp, #251] 426; NONEON-NOSVE-NEXT: mul w25, w13, w14 427; NONEON-NOSVE-NEXT: ldrsb w13, [sp, #243] 428; NONEON-NOSVE-NEXT: lsr w14, w27, #8 429; NONEON-NOSVE-NEXT: ldrsb w27, [sp, #218] 430; NONEON-NOSVE-NEXT: lsr w17, w9, #8 431; NONEON-NOSVE-NEXT: mul w28, w11, w13 432; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #216] 433; NONEON-NOSVE-NEXT: strb w14, [sp, #287] 434; NONEON-NOSVE-NEXT: lsr w14, w25, #8 435; NONEON-NOSVE-NEXT: ldr w25, [sp, #24] // 4-byte Folded Reload 436; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16] // 8-byte Folded Spill 437; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #241] 438; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #240] 439; NONEON-NOSVE-NEXT: strb w14, [sp, #285] 440; NONEON-NOSVE-NEXT: lsr w14, w28, #8 441; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #254] 442; NONEON-NOSVE-NEXT: mul w8, w25, w8 443; NONEON-NOSVE-NEXT: ldr w25, [sp, #28] // 4-byte Folded Reload 444; NONEON-NOSVE-NEXT: ldrsb w13, [sp, #252] 445; NONEON-NOSVE-NEXT: strb w14, [sp, #283] 446; NONEON-NOSVE-NEXT: ldr w14, [sp, #40] // 4-byte Folded Reload 447; NONEON-NOSVE-NEXT: mul w9, w25, w9 448; NONEON-NOSVE-NEXT: ldr w25, [sp, #32] // 4-byte Folded Reload 449; NONEON-NOSVE-NEXT: strb w17, [sp, #286] 450; NONEON-NOSVE-NEXT: mul w12, w14, w12 451; NONEON-NOSVE-NEXT: lsr w8, w8, #8 452; NONEON-NOSVE-NEXT: lsr w17, w26, #8 453; NONEON-NOSVE-NEXT: mul w10, w25, w10 454; NONEON-NOSVE-NEXT: ldr w25, [sp, #36] // 4-byte Folded Reload 455; NONEON-NOSVE-NEXT: ldr w14, [sp, #44] // 4-byte Folded Reload 456; NONEON-NOSVE-NEXT: lsr w9, w9, #8 457; NONEON-NOSVE-NEXT: strb w8, [sp, #281] 458; NONEON-NOSVE-NEXT: mul w11, w25, w11 459; NONEON-NOSVE-NEXT: strb w17, [sp, #284] 460; NONEON-NOSVE-NEXT: lsr w17, w30, #8 461; NONEON-NOSVE-NEXT: mul w13, w14, w13 462; NONEON-NOSVE-NEXT: lsr w8, w10, #8 463; NONEON-NOSVE-NEXT: ldr w10, [sp, #48] // 4-byte Folded Reload 464; NONEON-NOSVE-NEXT: strb w9, [sp, #280] 465; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #320] // 16-byte Folded Reload 466; NONEON-NOSVE-NEXT: lsr w9, w11, #8 467; NONEON-NOSVE-NEXT: mul w10, w10, w15 468; NONEON-NOSVE-NEXT: ldr w11, [sp, #52] // 4-byte Folded Reload 469; NONEON-NOSVE-NEXT: strb w8, [sp, #279] 470; NONEON-NOSVE-NEXT: lsr w8, w12, #8 471; NONEON-NOSVE-NEXT: ldr w12, [sp, #56] // 4-byte Folded Reload 472; NONEON-NOSVE-NEXT: mul w11, w11, w16 473; NONEON-NOSVE-NEXT: strb w9, [sp, #278] 474; NONEON-NOSVE-NEXT: lsr w9, w13, #8 475; NONEON-NOSVE-NEXT: mul w12, w12, w18 476; NONEON-NOSVE-NEXT: ldr w13, [sp, #60] // 4-byte Folded Reload 477; NONEON-NOSVE-NEXT: strb w8, [sp, #277] 478; NONEON-NOSVE-NEXT: lsr w8, w10, #8 479; NONEON-NOSVE-NEXT: ldr w10, [sp, #64] // 4-byte Folded Reload 480; NONEON-NOSVE-NEXT: strb w9, [sp, #276] 481; NONEON-NOSVE-NEXT: mul w13, w13, w0 482; NONEON-NOSVE-NEXT: lsr w9, w11, #8 483; NONEON-NOSVE-NEXT: ldr w11, [sp, #68] // 4-byte Folded Reload 484; NONEON-NOSVE-NEXT: mul w10, w10, w1 485; NONEON-NOSVE-NEXT: strb w8, [sp, #275] 486; NONEON-NOSVE-NEXT: lsr w8, w12, #8 487; NONEON-NOSVE-NEXT: mul w11, w11, w2 488; NONEON-NOSVE-NEXT: ldr w12, [sp, #72] // 4-byte Folded Reload 489; NONEON-NOSVE-NEXT: strb w9, [sp, #274] 490; NONEON-NOSVE-NEXT: lsr w9, w13, #8 491; NONEON-NOSVE-NEXT: ldr w13, [sp, #76] // 4-byte Folded Reload 492; NONEON-NOSVE-NEXT: strb w8, [sp, #273] 493; NONEON-NOSVE-NEXT: mul w12, w12, w3 494; NONEON-NOSVE-NEXT: lsr w8, w10, #8 495; NONEON-NOSVE-NEXT: ldr w10, [sp, #80] // 4-byte Folded Reload 496; NONEON-NOSVE-NEXT: mul w13, w13, w4 497; NONEON-NOSVE-NEXT: strb w9, [sp, #272] 498; NONEON-NOSVE-NEXT: lsr w9, w11, #8 499; NONEON-NOSVE-NEXT: mul w10, w10, w5 500; NONEON-NOSVE-NEXT: ldr w11, [sp, #84] // 4-byte Folded Reload 501; NONEON-NOSVE-NEXT: strb w8, [sp, #271] 502; NONEON-NOSVE-NEXT: lsr w8, w12, #8 503; NONEON-NOSVE-NEXT: ldr w12, [sp, #88] // 4-byte Folded Reload 504; NONEON-NOSVE-NEXT: strb w9, [sp, #270] 505; NONEON-NOSVE-NEXT: mul w11, w11, w6 506; NONEON-NOSVE-NEXT: lsr w9, w13, #8 507; NONEON-NOSVE-NEXT: ldr w13, [sp, #92] // 4-byte Folded Reload 508; NONEON-NOSVE-NEXT: mul w12, w12, w7 509; NONEON-NOSVE-NEXT: strb w8, [sp, #269] 510; NONEON-NOSVE-NEXT: lsr w8, w10, #8 511; NONEON-NOSVE-NEXT: mul w13, w13, w19 512; NONEON-NOSVE-NEXT: ldr w10, [sp, #96] // 4-byte Folded Reload 513; NONEON-NOSVE-NEXT: strb w9, [sp, #268] 514; NONEON-NOSVE-NEXT: lsr w9, w11, #8 515; NONEON-NOSVE-NEXT: ldr w11, [sp, #100] // 4-byte Folded Reload 516; NONEON-NOSVE-NEXT: strb w8, [sp, #267] 517; NONEON-NOSVE-NEXT: mul w10, w10, w20 518; NONEON-NOSVE-NEXT: lsr w8, w12, #8 519; NONEON-NOSVE-NEXT: ldr w12, [sp, #104] // 4-byte Folded Reload 520; NONEON-NOSVE-NEXT: mul w11, w11, w21 521; NONEON-NOSVE-NEXT: strb w9, [sp, #266] 522; NONEON-NOSVE-NEXT: lsr w9, w13, #8 523; NONEON-NOSVE-NEXT: ldr w13, [sp, #108] // 4-byte Folded Reload 524; NONEON-NOSVE-NEXT: mul w12, w12, w22 525; NONEON-NOSVE-NEXT: strb w8, [sp, #265] 526; NONEON-NOSVE-NEXT: lsr w8, w10, #8 527; NONEON-NOSVE-NEXT: ldr w10, [sp, #112] // 4-byte Folded Reload 528; NONEON-NOSVE-NEXT: strb w9, [sp, #264] 529; NONEON-NOSVE-NEXT: mul w13, w13, w23 530; NONEON-NOSVE-NEXT: lsr w9, w11, #8 531; NONEON-NOSVE-NEXT: ldr w11, [sp, #116] // 4-byte Folded Reload 532; NONEON-NOSVE-NEXT: ldp w15, w14, [sp, #16] // 8-byte Folded Reload 533; NONEON-NOSVE-NEXT: mul w10, w10, w24 534; NONEON-NOSVE-NEXT: strb w8, [sp, #263] 535; NONEON-NOSVE-NEXT: lsr w8, w12, #8 536; NONEON-NOSVE-NEXT: mul w11, w11, w27 537; NONEON-NOSVE-NEXT: ldr w12, [sp, #120] // 4-byte Folded Reload 538; NONEON-NOSVE-NEXT: strb w9, [sp, #262] 539; NONEON-NOSVE-NEXT: lsr w9, w13, #8 540; NONEON-NOSVE-NEXT: ldr w13, [sp, #124] // 4-byte Folded Reload 541; NONEON-NOSVE-NEXT: strb w8, [sp, #261] 542; NONEON-NOSVE-NEXT: mul w12, w12, w15 543; NONEON-NOSVE-NEXT: lsr w8, w10, #8 544; NONEON-NOSVE-NEXT: strb w17, [sp, #282] 545; NONEON-NOSVE-NEXT: mul w13, w13, w14 546; NONEON-NOSVE-NEXT: strb w9, [sp, #260] 547; NONEON-NOSVE-NEXT: lsr w9, w11, #8 548; NONEON-NOSVE-NEXT: strb w8, [sp, #259] 549; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #368] // 16-byte Folded Reload 550; NONEON-NOSVE-NEXT: lsr w8, w12, #8 551; NONEON-NOSVE-NEXT: strb w9, [sp, #258] 552; NONEON-NOSVE-NEXT: lsr w9, w13, #8 553; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #352] // 16-byte Folded Reload 554; NONEON-NOSVE-NEXT: strb w8, [sp, #257] 555; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #336] // 16-byte Folded Reload 556; NONEON-NOSVE-NEXT: strb w9, [sp, #256] 557; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #304] // 16-byte Folded Reload 558; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #256] 559; NONEON-NOSVE-NEXT: stp q0, q1, [x29] 560; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #288] // 16-byte Folded Reload 561; NONEON-NOSVE-NEXT: add sp, sp, #384 562; NONEON-NOSVE-NEXT: ret 563 %op1 = load <32 x i8>, ptr %a 564 %op2 = load <32 x i8>, ptr %b 565 %1 = sext <32 x i8> %op1 to <32 x i16> 566 %2 = sext <32 x i8> %op2 to <32 x i16> 567 %mul = mul <32 x i16> %1, %2 568 %shr = lshr <32 x i16> %mul, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 569 %res = trunc <32 x i16> %shr to <32 x i8> 570 store <32 x i8> %res, ptr %a 571 ret void 572} 573 574define <2 x i16> @smulh_v2i16(<2 x i16> %op1, <2 x i16> %op2) { 575; SVE-LABEL: smulh_v2i16: 576; SVE: // %bb.0: 577; SVE-NEXT: ptrue p0.s, vl2 578; SVE-NEXT: // kill: def $d1 killed $d1 def $z1 579; SVE-NEXT: // kill: def $d0 killed $d0 def $z0 580; SVE-NEXT: sxth z0.s, p0/m, z0.s 581; SVE-NEXT: sxth z1.s, p0/m, z1.s 582; SVE-NEXT: mul z0.s, p0/m, z0.s, z1.s 583; SVE-NEXT: lsr z0.s, z0.s, #16 584; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0 585; SVE-NEXT: ret 586; 587; SVE2-LABEL: smulh_v2i16: 588; SVE2: // %bb.0: 589; SVE2-NEXT: ptrue p0.s, vl2 590; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1 591; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0 592; SVE2-NEXT: sxth z0.s, p0/m, z0.s 593; SVE2-NEXT: sxth z1.s, p0/m, z1.s 594; SVE2-NEXT: mul z0.s, z0.s, z1.s 595; SVE2-NEXT: lsr z0.s, z0.s, #16 596; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 597; SVE2-NEXT: ret 598; 599; NONEON-NOSVE-LABEL: smulh_v2i16: 600; NONEON-NOSVE: // %bb.0: 601; NONEON-NOSVE-NEXT: sub sp, sp, #32 602; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 603; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] 604; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #12] 605; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #8] 606; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #20] 607; NONEON-NOSVE-NEXT: ldrsh w11, [sp, #16] 608; NONEON-NOSVE-NEXT: mul w8, w8, w10 609; NONEON-NOSVE-NEXT: mul w9, w9, w11 610; NONEON-NOSVE-NEXT: lsr w8, w8, #16 611; NONEON-NOSVE-NEXT: lsr w9, w9, #16 612; NONEON-NOSVE-NEXT: stp w9, w8, [sp, #24] 613; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] 614; NONEON-NOSVE-NEXT: add sp, sp, #32 615; NONEON-NOSVE-NEXT: ret 616 %1 = sext <2 x i16> %op1 to <2 x i32> 617 %2 = sext <2 x i16> %op2 to <2 x i32> 618 %mul = mul <2 x i32> %1, %2 619 %shr = lshr <2 x i32> %mul, <i32 16, i32 16> 620 %res = trunc <2 x i32> %shr to <2 x i16> 621 ret <2 x i16> %res 622} 623 624define <4 x i16> @smulh_v4i16(<4 x i16> %op1, <4 x i16> %op2) { 625; SVE-LABEL: smulh_v4i16: 626; SVE: // %bb.0: 627; SVE-NEXT: ptrue p0.h, vl4 628; SVE-NEXT: // kill: def $d0 killed $d0 def $z0 629; SVE-NEXT: // kill: def $d1 killed $d1 def $z1 630; SVE-NEXT: smulh z0.h, p0/m, z0.h, z1.h 631; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0 632; SVE-NEXT: ret 633; 634; SVE2-LABEL: smulh_v4i16: 635; SVE2: // %bb.0: 636; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0 637; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1 638; SVE2-NEXT: smulh z0.h, z0.h, z1.h 639; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 640; SVE2-NEXT: ret 641; 642; NONEON-NOSVE-LABEL: smulh_v4i16: 643; NONEON-NOSVE: // %bb.0: 644; NONEON-NOSVE-NEXT: sub sp, sp, #32 645; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 646; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] 647; NONEON-NOSVE-NEXT: ldrsh w11, [sp, #14] 648; NONEON-NOSVE-NEXT: ldrsh w12, [sp, #22] 649; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #8] 650; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #10] 651; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #12] 652; NONEON-NOSVE-NEXT: ldrsh w13, [sp, #20] 653; NONEON-NOSVE-NEXT: ldrsh w14, [sp, #18] 654; NONEON-NOSVE-NEXT: mul w11, w11, w12 655; NONEON-NOSVE-NEXT: ldrsh w12, [sp, #16] 656; NONEON-NOSVE-NEXT: mul w10, w10, w13 657; NONEON-NOSVE-NEXT: mul w9, w9, w14 658; NONEON-NOSVE-NEXT: mul w8, w8, w12 659; NONEON-NOSVE-NEXT: lsr w11, w11, #16 660; NONEON-NOSVE-NEXT: lsr w10, w10, #16 661; NONEON-NOSVE-NEXT: lsr w9, w9, #16 662; NONEON-NOSVE-NEXT: strh w11, [sp, #30] 663; NONEON-NOSVE-NEXT: lsr w8, w8, #16 664; NONEON-NOSVE-NEXT: strh w10, [sp, #28] 665; NONEON-NOSVE-NEXT: strh w9, [sp, #26] 666; NONEON-NOSVE-NEXT: strh w8, [sp, #24] 667; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] 668; NONEON-NOSVE-NEXT: add sp, sp, #32 669; NONEON-NOSVE-NEXT: ret 670 %1 = sext <4 x i16> %op1 to <4 x i32> 671 %2 = sext <4 x i16> %op2 to <4 x i32> 672 %mul = mul <4 x i32> %1, %2 673 %shr = lshr <4 x i32> %mul, <i32 16, i32 16, i32 16, i32 16> 674 %res = trunc <4 x i32> %shr to <4 x i16> 675 ret <4 x i16> %res 676} 677 678define <8 x i16> @smulh_v8i16(<8 x i16> %op1, <8 x i16> %op2) { 679; SVE-LABEL: smulh_v8i16: 680; SVE: // %bb.0: 681; SVE-NEXT: ptrue p0.h, vl8 682; SVE-NEXT: // kill: def $q0 killed $q0 def $z0 683; SVE-NEXT: // kill: def $q1 killed $q1 def $z1 684; SVE-NEXT: smulh z0.h, p0/m, z0.h, z1.h 685; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0 686; SVE-NEXT: ret 687; 688; SVE2-LABEL: smulh_v8i16: 689; SVE2: // %bb.0: 690; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0 691; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1 692; SVE2-NEXT: smulh z0.h, z0.h, z1.h 693; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 694; SVE2-NEXT: ret 695; 696; NONEON-NOSVE-LABEL: smulh_v8i16: 697; NONEON-NOSVE: // %bb.0: 698; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-80]! 699; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 700; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] 701; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] 702; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] 703; NONEON-NOSVE-NEXT: ldrsh w15, [sp, #38] 704; NONEON-NOSVE-NEXT: ldrsh w12, [sp, #32] 705; NONEON-NOSVE-NEXT: ldrsh w13, [sp, #34] 706; NONEON-NOSVE-NEXT: ldrsh w14, [sp, #36] 707; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #40] 708; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #42] 709; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] 710; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #44] 711; NONEON-NOSVE-NEXT: ldrsh w11, [sp, #46] 712; NONEON-NOSVE-NEXT: ldrsh w16, [sp, #54] 713; NONEON-NOSVE-NEXT: ldrsh w17, [sp, #52] 714; NONEON-NOSVE-NEXT: ldrsh w18, [sp, #50] 715; NONEON-NOSVE-NEXT: ldrsh w0, [sp, #62] 716; NONEON-NOSVE-NEXT: mul w15, w15, w16 717; NONEON-NOSVE-NEXT: ldrsh w16, [sp, #48] 718; NONEON-NOSVE-NEXT: mul w14, w14, w17 719; NONEON-NOSVE-NEXT: ldrsh w17, [sp, #56] 720; NONEON-NOSVE-NEXT: mul w13, w13, w18 721; NONEON-NOSVE-NEXT: ldrsh w18, [sp, #60] 722; NONEON-NOSVE-NEXT: mul w12, w12, w16 723; NONEON-NOSVE-NEXT: ldrsh w16, [sp, #58] 724; NONEON-NOSVE-NEXT: lsr w15, w15, #16 725; NONEON-NOSVE-NEXT: mul w11, w11, w0 726; NONEON-NOSVE-NEXT: lsr w14, w14, #16 727; NONEON-NOSVE-NEXT: mul w10, w10, w18 728; NONEON-NOSVE-NEXT: lsr w13, w13, #16 729; NONEON-NOSVE-NEXT: strh w15, [sp, #78] 730; NONEON-NOSVE-NEXT: mul w9, w9, w16 731; NONEON-NOSVE-NEXT: lsr w12, w12, #16 732; NONEON-NOSVE-NEXT: strh w14, [sp, #76] 733; NONEON-NOSVE-NEXT: mul w8, w8, w17 734; NONEON-NOSVE-NEXT: lsr w11, w11, #16 735; NONEON-NOSVE-NEXT: strh w13, [sp, #74] 736; NONEON-NOSVE-NEXT: lsr w10, w10, #16 737; NONEON-NOSVE-NEXT: strh w12, [sp, #72] 738; NONEON-NOSVE-NEXT: lsr w9, w9, #16 739; NONEON-NOSVE-NEXT: strh w11, [sp, #70] 740; NONEON-NOSVE-NEXT: lsr w8, w8, #16 741; NONEON-NOSVE-NEXT: strh w10, [sp, #68] 742; NONEON-NOSVE-NEXT: strh w9, [sp, #66] 743; NONEON-NOSVE-NEXT: strh w8, [sp, #64] 744; NONEON-NOSVE-NEXT: ldr q0, [sp, #64] 745; NONEON-NOSVE-NEXT: add sp, sp, #80 746; NONEON-NOSVE-NEXT: ret 747 %1 = sext <8 x i16> %op1 to <8 x i32> 748 %2 = sext <8 x i16> %op2 to <8 x i32> 749 %mul = mul <8 x i32> %1, %2 750 %shr = lshr <8 x i32> %mul, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 751 %res = trunc <8 x i32> %shr to <8 x i16> 752 ret <8 x i16> %res 753} 754 755define void @smulh_v16i16(ptr %a, ptr %b) { 756; SVE-LABEL: smulh_v16i16: 757; SVE: // %bb.0: 758; SVE-NEXT: ldp q0, q3, [x1] 759; SVE-NEXT: ptrue p0.h, vl8 760; SVE-NEXT: ldp q1, q2, [x0] 761; SVE-NEXT: smulh z0.h, p0/m, z0.h, z1.h 762; SVE-NEXT: movprfx z1, z2 763; SVE-NEXT: smulh z1.h, p0/m, z1.h, z3.h 764; SVE-NEXT: stp q0, q1, [x0] 765; SVE-NEXT: ret 766; 767; SVE2-LABEL: smulh_v16i16: 768; SVE2: // %bb.0: 769; SVE2-NEXT: ldp q0, q3, [x1] 770; SVE2-NEXT: ldp q1, q2, [x0] 771; SVE2-NEXT: smulh z0.h, z1.h, z0.h 772; SVE2-NEXT: smulh z1.h, z2.h, z3.h 773; SVE2-NEXT: stp q0, q1, [x0] 774; SVE2-NEXT: ret 775; 776; NONEON-NOSVE-LABEL: smulh_v16i16: 777; NONEON-NOSVE: // %bb.0: 778; NONEON-NOSVE-NEXT: sub sp, sp, #240 779; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #160] // 16-byte Folded Spill 780; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #176] // 16-byte Folded Spill 781; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #192] // 16-byte Folded Spill 782; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #208] // 16-byte Folded Spill 783; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #224] // 16-byte Folded Spill 784; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 240 785; NONEON-NOSVE-NEXT: .cfi_offset w19, -8 786; NONEON-NOSVE-NEXT: .cfi_offset w20, -16 787; NONEON-NOSVE-NEXT: .cfi_offset w21, -24 788; NONEON-NOSVE-NEXT: .cfi_offset w22, -32 789; NONEON-NOSVE-NEXT: .cfi_offset w23, -40 790; NONEON-NOSVE-NEXT: .cfi_offset w24, -48 791; NONEON-NOSVE-NEXT: .cfi_offset w25, -56 792; NONEON-NOSVE-NEXT: .cfi_offset w26, -64 793; NONEON-NOSVE-NEXT: .cfi_offset w27, -72 794; NONEON-NOSVE-NEXT: .cfi_offset w28, -80 795; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] 796; NONEON-NOSVE-NEXT: ldp q3, q2, [x1] 797; NONEON-NOSVE-NEXT: str q0, [sp] 798; NONEON-NOSVE-NEXT: str q1, [sp, #32] 799; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] 800; NONEON-NOSVE-NEXT: str q3, [sp, #16] 801; NONEON-NOSVE-NEXT: str q2, [sp, #64] 802; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] 803; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #32] 804; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #56] 805; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #58] 806; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #60] 807; NONEON-NOSVE-NEXT: ldrsh w11, [sp, #62] 808; NONEON-NOSVE-NEXT: ldrsh w12, [sp, #48] 809; NONEON-NOSVE-NEXT: ldrsh w13, [sp, #50] 810; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96] 811; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] 812; NONEON-NOSVE-NEXT: ldrsh w7, [sp, #96] 813; NONEON-NOSVE-NEXT: ldrsh w19, [sp, #98] 814; NONEON-NOSVE-NEXT: ldrsh w20, [sp, #100] 815; NONEON-NOSVE-NEXT: ldrsh w21, [sp, #102] 816; NONEON-NOSVE-NEXT: ldrsh w14, [sp, #52] 817; NONEON-NOSVE-NEXT: ldrsh w16, [sp, #54] 818; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] 819; NONEON-NOSVE-NEXT: ldrsh w18, [sp, #104] 820; NONEON-NOSVE-NEXT: ldrsh w2, [sp, #106] 821; NONEON-NOSVE-NEXT: ldrsh w4, [sp, #108] 822; NONEON-NOSVE-NEXT: ldrsh w5, [sp, #110] 823; NONEON-NOSVE-NEXT: ldrsh w15, [sp, #88] 824; NONEON-NOSVE-NEXT: ldrsh w17, [sp, #90] 825; NONEON-NOSVE-NEXT: ldrsh w1, [sp, #92] 826; NONEON-NOSVE-NEXT: ldrsh w3, [sp, #94] 827; NONEON-NOSVE-NEXT: mul w8, w8, w15 828; NONEON-NOSVE-NEXT: ldrsh w6, [sp, #80] 829; NONEON-NOSVE-NEXT: ldrsh w23, [sp, #82] 830; NONEON-NOSVE-NEXT: mul w11, w11, w3 831; NONEON-NOSVE-NEXT: ldrsh w25, [sp, #84] 832; NONEON-NOSVE-NEXT: mul w13, w13, w23 833; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #64] 834; NONEON-NOSVE-NEXT: mul w14, w14, w25 835; NONEON-NOSVE-NEXT: lsr w8, w8, #16 836; NONEON-NOSVE-NEXT: mul w12, w12, w6 837; NONEON-NOSVE-NEXT: lsr w11, w11, #16 838; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #112] 839; NONEON-NOSVE-NEXT: mul w10, w10, w1 840; NONEON-NOSVE-NEXT: lsr w13, w13, #16 841; NONEON-NOSVE-NEXT: ldrsh w22, [sp, #118] 842; NONEON-NOSVE-NEXT: ldrsh w24, [sp, #116] 843; NONEON-NOSVE-NEXT: ldrsh w26, [sp, #114] 844; NONEON-NOSVE-NEXT: ldrsh w27, [sp, #112] 845; NONEON-NOSVE-NEXT: ldrsh w28, [sp, #126] 846; NONEON-NOSVE-NEXT: mul w9, w9, w17 847; NONEON-NOSVE-NEXT: mul w21, w21, w22 848; NONEON-NOSVE-NEXT: ldrsh w22, [sp, #86] 849; NONEON-NOSVE-NEXT: lsr w14, w14, #16 850; NONEON-NOSVE-NEXT: mul w20, w20, w24 851; NONEON-NOSVE-NEXT: ldrsh w24, [sp, #120] 852; NONEON-NOSVE-NEXT: lsr w12, w12, #16 853; NONEON-NOSVE-NEXT: mul w19, w19, w26 854; NONEON-NOSVE-NEXT: ldrsh w26, [sp, #124] 855; NONEON-NOSVE-NEXT: lsr w10, w10, #16 856; NONEON-NOSVE-NEXT: mul w7, w7, w27 857; NONEON-NOSVE-NEXT: ldrsh w27, [sp, #122] 858; NONEON-NOSVE-NEXT: lsr w21, w21, #16 859; NONEON-NOSVE-NEXT: mul w5, w5, w28 860; NONEON-NOSVE-NEXT: lsr w20, w20, #16 861; NONEON-NOSVE-NEXT: lsr w9, w9, #16 862; NONEON-NOSVE-NEXT: mul w4, w4, w26 863; NONEON-NOSVE-NEXT: lsr w19, w19, #16 864; NONEON-NOSVE-NEXT: strh w21, [sp, #158] 865; NONEON-NOSVE-NEXT: mul w2, w2, w27 866; NONEON-NOSVE-NEXT: lsr w7, w7, #16 867; NONEON-NOSVE-NEXT: strh w20, [sp, #156] 868; NONEON-NOSVE-NEXT: mul w18, w18, w24 869; NONEON-NOSVE-NEXT: lsr w5, w5, #16 870; NONEON-NOSVE-NEXT: strh w19, [sp, #154] 871; NONEON-NOSVE-NEXT: mul w16, w16, w22 872; NONEON-NOSVE-NEXT: lsr w4, w4, #16 873; NONEON-NOSVE-NEXT: strh w7, [sp, #152] 874; NONEON-NOSVE-NEXT: lsr w2, w2, #16 875; NONEON-NOSVE-NEXT: strh w5, [sp, #150] 876; NONEON-NOSVE-NEXT: lsr w18, w18, #16 877; NONEON-NOSVE-NEXT: strh w4, [sp, #148] 878; NONEON-NOSVE-NEXT: lsr w16, w16, #16 879; NONEON-NOSVE-NEXT: strh w2, [sp, #146] 880; NONEON-NOSVE-NEXT: strh w18, [sp, #144] 881; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #224] // 16-byte Folded Reload 882; NONEON-NOSVE-NEXT: strh w16, [sp, #142] 883; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #208] // 16-byte Folded Reload 884; NONEON-NOSVE-NEXT: strh w14, [sp, #140] 885; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #192] // 16-byte Folded Reload 886; NONEON-NOSVE-NEXT: strh w13, [sp, #138] 887; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #176] // 16-byte Folded Reload 888; NONEON-NOSVE-NEXT: strh w12, [sp, #136] 889; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #160] // 16-byte Folded Reload 890; NONEON-NOSVE-NEXT: strh w11, [sp, #134] 891; NONEON-NOSVE-NEXT: strh w10, [sp, #132] 892; NONEON-NOSVE-NEXT: strh w9, [sp, #130] 893; NONEON-NOSVE-NEXT: strh w8, [sp, #128] 894; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #128] 895; NONEON-NOSVE-NEXT: stp q0, q1, [x0] 896; NONEON-NOSVE-NEXT: add sp, sp, #240 897; NONEON-NOSVE-NEXT: ret 898 %op1 = load <16 x i16>, ptr %a 899 %op2 = load <16 x i16>, ptr %b 900 %1 = sext <16 x i16> %op1 to <16 x i32> 901 %2 = sext <16 x i16> %op2 to <16 x i32> 902 %mul = mul <16 x i32> %1, %2 903 %shr = lshr <16 x i32> %mul, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 904 %res = trunc <16 x i32> %shr to <16 x i16> 905 store <16 x i16> %res, ptr %a 906 ret void 907} 908 909define <2 x i32> @smulh_v2i32(<2 x i32> %op1, <2 x i32> %op2) { 910; SVE-LABEL: smulh_v2i32: 911; SVE: // %bb.0: 912; SVE-NEXT: ptrue p0.s, vl2 913; SVE-NEXT: // kill: def $d0 killed $d0 def $z0 914; SVE-NEXT: // kill: def $d1 killed $d1 def $z1 915; SVE-NEXT: smulh z0.s, p0/m, z0.s, z1.s 916; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0 917; SVE-NEXT: ret 918; 919; SVE2-LABEL: smulh_v2i32: 920; SVE2: // %bb.0: 921; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0 922; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1 923; SVE2-NEXT: smulh z0.s, z0.s, z1.s 924; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 925; SVE2-NEXT: ret 926; 927; NONEON-NOSVE-LABEL: smulh_v2i32: 928; NONEON-NOSVE: // %bb.0: 929; NONEON-NOSVE-NEXT: sub sp, sp, #32 930; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 931; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] 932; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #8] 933; NONEON-NOSVE-NEXT: ldpsw x11, x10, [sp, #16] 934; NONEON-NOSVE-NEXT: smull x9, w9, w10 935; NONEON-NOSVE-NEXT: smull x8, w8, w11 936; NONEON-NOSVE-NEXT: lsr x9, x9, #32 937; NONEON-NOSVE-NEXT: lsr x8, x8, #32 938; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] 939; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] 940; NONEON-NOSVE-NEXT: add sp, sp, #32 941; NONEON-NOSVE-NEXT: ret 942 %1 = sext <2 x i32> %op1 to <2 x i64> 943 %2 = sext <2 x i32> %op2 to <2 x i64> 944 %mul = mul <2 x i64> %1, %2 945 %shr = lshr <2 x i64> %mul, <i64 32, i64 32> 946 %res = trunc <2 x i64> %shr to <2 x i32> 947 ret <2 x i32> %res 948} 949 950define <4 x i32> @smulh_v4i32(<4 x i32> %op1, <4 x i32> %op2) { 951; SVE-LABEL: smulh_v4i32: 952; SVE: // %bb.0: 953; SVE-NEXT: ptrue p0.s, vl4 954; SVE-NEXT: // kill: def $q0 killed $q0 def $z0 955; SVE-NEXT: // kill: def $q1 killed $q1 def $z1 956; SVE-NEXT: smulh z0.s, p0/m, z0.s, z1.s 957; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0 958; SVE-NEXT: ret 959; 960; SVE2-LABEL: smulh_v4i32: 961; SVE2: // %bb.0: 962; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0 963; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1 964; SVE2-NEXT: smulh z0.s, z0.s, z1.s 965; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 966; SVE2-NEXT: ret 967; 968; NONEON-NOSVE-LABEL: smulh_v4i32: 969; NONEON-NOSVE: // %bb.0: 970; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-80]! 971; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 972; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] 973; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] 974; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] 975; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #40] 976; NONEON-NOSVE-NEXT: ldpsw x10, x11, [sp, #32] 977; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] 978; NONEON-NOSVE-NEXT: ldpsw x13, x12, [sp, #48] 979; NONEON-NOSVE-NEXT: smull x11, w11, w12 980; NONEON-NOSVE-NEXT: ldpsw x12, x14, [sp, #56] 981; NONEON-NOSVE-NEXT: smull x10, w10, w13 982; NONEON-NOSVE-NEXT: lsr x11, x11, #32 983; NONEON-NOSVE-NEXT: smull x9, w9, w14 984; NONEON-NOSVE-NEXT: smull x8, w8, w12 985; NONEON-NOSVE-NEXT: lsr x10, x10, #32 986; NONEON-NOSVE-NEXT: lsr x9, x9, #32 987; NONEON-NOSVE-NEXT: stp w10, w11, [sp, #72] 988; NONEON-NOSVE-NEXT: lsr x8, x8, #32 989; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64] 990; NONEON-NOSVE-NEXT: ldr q0, [sp, #64] 991; NONEON-NOSVE-NEXT: add sp, sp, #80 992; NONEON-NOSVE-NEXT: ret 993 %1 = sext <4 x i32> %op1 to <4 x i64> 994 %2 = sext <4 x i32> %op2 to <4 x i64> 995 %mul = mul <4 x i64> %1, %2 996 %shr = lshr <4 x i64> %mul, <i64 32, i64 32, i64 32, i64 32> 997 %res = trunc <4 x i64> %shr to <4 x i32> 998 ret <4 x i32> %res 999} 1000 1001define void @smulh_v8i32(ptr %a, ptr %b) { 1002; SVE-LABEL: smulh_v8i32: 1003; SVE: // %bb.0: 1004; SVE-NEXT: ldp q0, q3, [x1] 1005; SVE-NEXT: ptrue p0.s, vl4 1006; SVE-NEXT: ldp q1, q2, [x0] 1007; SVE-NEXT: smulh z0.s, p0/m, z0.s, z1.s 1008; SVE-NEXT: movprfx z1, z2 1009; SVE-NEXT: smulh z1.s, p0/m, z1.s, z3.s 1010; SVE-NEXT: stp q0, q1, [x0] 1011; SVE-NEXT: ret 1012; 1013; SVE2-LABEL: smulh_v8i32: 1014; SVE2: // %bb.0: 1015; SVE2-NEXT: ldp q0, q3, [x1] 1016; SVE2-NEXT: ldp q1, q2, [x0] 1017; SVE2-NEXT: smulh z0.s, z1.s, z0.s 1018; SVE2-NEXT: smulh z1.s, z2.s, z3.s 1019; SVE2-NEXT: stp q0, q1, [x0] 1020; SVE2-NEXT: ret 1021; 1022; NONEON-NOSVE-LABEL: smulh_v8i32: 1023; NONEON-NOSVE: // %bb.0: 1024; NONEON-NOSVE-NEXT: sub sp, sp, #160 1025; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160 1026; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] 1027; NONEON-NOSVE-NEXT: ldp q3, q2, [x1] 1028; NONEON-NOSVE-NEXT: str q1, [sp, #32] 1029; NONEON-NOSVE-NEXT: stp q0, q3, [sp] 1030; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] 1031; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] 1032; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #32] 1033; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96] 1034; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] 1035; NONEON-NOSVE-NEXT: ldpsw x8, x9, [sp, #56] 1036; NONEON-NOSVE-NEXT: ldpsw x10, x11, [sp, #48] 1037; NONEON-NOSVE-NEXT: ldpsw x12, x13, [sp, #104] 1038; NONEON-NOSVE-NEXT: ldpsw x14, x15, [sp, #96] 1039; NONEON-NOSVE-NEXT: str q2, [sp, #64] 1040; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] 1041; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #64] 1042; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #112] 1043; NONEON-NOSVE-NEXT: ldpsw x17, x16, [sp, #112] 1044; NONEON-NOSVE-NEXT: smull x15, w15, w16 1045; NONEON-NOSVE-NEXT: ldpsw x16, x18, [sp, #120] 1046; NONEON-NOSVE-NEXT: smull x14, w14, w17 1047; NONEON-NOSVE-NEXT: ldpsw x17, x1, [sp, #80] 1048; NONEON-NOSVE-NEXT: smull x13, w13, w18 1049; NONEON-NOSVE-NEXT: lsr x15, x15, #32 1050; NONEON-NOSVE-NEXT: smull x12, w12, w16 1051; NONEON-NOSVE-NEXT: lsr x14, x14, #32 1052; NONEON-NOSVE-NEXT: ldpsw x16, x18, [sp, #88] 1053; NONEON-NOSVE-NEXT: smull x11, w11, w1 1054; NONEON-NOSVE-NEXT: lsr x13, x13, #32 1055; NONEON-NOSVE-NEXT: stp w14, w15, [sp, #152] 1056; NONEON-NOSVE-NEXT: smull x10, w10, w17 1057; NONEON-NOSVE-NEXT: lsr x12, x12, #32 1058; NONEON-NOSVE-NEXT: smull x9, w9, w18 1059; NONEON-NOSVE-NEXT: smull x8, w8, w16 1060; NONEON-NOSVE-NEXT: lsr x11, x11, #32 1061; NONEON-NOSVE-NEXT: stp w12, w13, [sp, #144] 1062; NONEON-NOSVE-NEXT: lsr x10, x10, #32 1063; NONEON-NOSVE-NEXT: lsr x9, x9, #32 1064; NONEON-NOSVE-NEXT: lsr x8, x8, #32 1065; NONEON-NOSVE-NEXT: stp w10, w11, [sp, #136] 1066; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128] 1067; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #128] 1068; NONEON-NOSVE-NEXT: stp q0, q1, [x0] 1069; NONEON-NOSVE-NEXT: add sp, sp, #160 1070; NONEON-NOSVE-NEXT: ret 1071 %op1 = load <8 x i32>, ptr %a 1072 %op2 = load <8 x i32>, ptr %b 1073 %1 = sext <8 x i32> %op1 to <8 x i64> 1074 %2 = sext <8 x i32> %op2 to <8 x i64> 1075 %mul = mul <8 x i64> %1, %2 1076 %shr = lshr <8 x i64> %mul, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32> 1077 %res = trunc <8 x i64> %shr to <8 x i32> 1078 store <8 x i32> %res, ptr %a 1079 ret void 1080} 1081 1082define <1 x i64> @smulh_v1i64(<1 x i64> %op1, <1 x i64> %op2) { 1083; SVE-LABEL: smulh_v1i64: 1084; SVE: // %bb.0: 1085; SVE-NEXT: ptrue p0.d, vl1 1086; SVE-NEXT: // kill: def $d0 killed $d0 def $z0 1087; SVE-NEXT: // kill: def $d1 killed $d1 def $z1 1088; SVE-NEXT: smulh z0.d, p0/m, z0.d, z1.d 1089; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0 1090; SVE-NEXT: ret 1091; 1092; SVE2-LABEL: smulh_v1i64: 1093; SVE2: // %bb.0: 1094; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0 1095; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1 1096; SVE2-NEXT: smulh z0.d, z0.d, z1.d 1097; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 1098; SVE2-NEXT: ret 1099; 1100; NONEON-NOSVE-LABEL: smulh_v1i64: 1101; NONEON-NOSVE: // %bb.0: 1102; NONEON-NOSVE-NEXT: sub sp, sp, #16 1103; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 1104; NONEON-NOSVE-NEXT: fmov x8, d0 1105; NONEON-NOSVE-NEXT: fmov x9, d1 1106; NONEON-NOSVE-NEXT: smulh x8, x8, x9 1107; NONEON-NOSVE-NEXT: str x8, [sp, #8] 1108; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] 1109; NONEON-NOSVE-NEXT: add sp, sp, #16 1110; NONEON-NOSVE-NEXT: ret 1111 %insert = insertelement <1 x i128> undef, i128 64, i128 0 1112 %splat = shufflevector <1 x i128> %insert, <1 x i128> undef, <1 x i32> zeroinitializer 1113 %1 = sext <1 x i64> %op1 to <1 x i128> 1114 %2 = sext <1 x i64> %op2 to <1 x i128> 1115 %mul = mul <1 x i128> %1, %2 1116 %shr = lshr <1 x i128> %mul, %splat 1117 %res = trunc <1 x i128> %shr to <1 x i64> 1118 ret <1 x i64> %res 1119} 1120 1121define <2 x i64> @smulh_v2i64(<2 x i64> %op1, <2 x i64> %op2) { 1122; SVE-LABEL: smulh_v2i64: 1123; SVE: // %bb.0: 1124; SVE-NEXT: ptrue p0.d, vl2 1125; SVE-NEXT: // kill: def $q0 killed $q0 def $z0 1126; SVE-NEXT: // kill: def $q1 killed $q1 def $z1 1127; SVE-NEXT: smulh z0.d, p0/m, z0.d, z1.d 1128; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0 1129; SVE-NEXT: ret 1130; 1131; SVE2-LABEL: smulh_v2i64: 1132; SVE2: // %bb.0: 1133; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0 1134; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1 1135; SVE2-NEXT: smulh z0.d, z0.d, z1.d 1136; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 1137; SVE2-NEXT: ret 1138; 1139; NONEON-NOSVE-LABEL: smulh_v2i64: 1140; NONEON-NOSVE: // %bb.0: 1141; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-64]! 1142; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 1143; NONEON-NOSVE-NEXT: ldp x9, x8, [sp] 1144; NONEON-NOSVE-NEXT: ldp x11, x10, [sp, #16] 1145; NONEON-NOSVE-NEXT: smulh x8, x8, x10 1146; NONEON-NOSVE-NEXT: smulh x9, x9, x11 1147; NONEON-NOSVE-NEXT: stp x9, x8, [sp, #32] 1148; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #32] 1149; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] 1150; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] 1151; NONEON-NOSVE-NEXT: add sp, sp, #64 1152; NONEON-NOSVE-NEXT: ret 1153 %1 = sext <2 x i64> %op1 to <2 x i128> 1154 %2 = sext <2 x i64> %op2 to <2 x i128> 1155 %mul = mul <2 x i128> %1, %2 1156 %shr = lshr <2 x i128> %mul, <i128 64, i128 64> 1157 %res = trunc <2 x i128> %shr to <2 x i64> 1158 ret <2 x i64> %res 1159} 1160 1161define void @smulh_v4i64(ptr %a, ptr %b) { 1162; SVE-LABEL: smulh_v4i64: 1163; SVE: // %bb.0: 1164; SVE-NEXT: ldp q0, q3, [x1] 1165; SVE-NEXT: ptrue p0.d, vl2 1166; SVE-NEXT: ldp q1, q2, [x0] 1167; SVE-NEXT: smulh z0.d, p0/m, z0.d, z1.d 1168; SVE-NEXT: movprfx z1, z2 1169; SVE-NEXT: smulh z1.d, p0/m, z1.d, z3.d 1170; SVE-NEXT: stp q0, q1, [x0] 1171; SVE-NEXT: ret 1172; 1173; SVE2-LABEL: smulh_v4i64: 1174; SVE2: // %bb.0: 1175; SVE2-NEXT: ldp q0, q3, [x1] 1176; SVE2-NEXT: ldp q1, q2, [x0] 1177; SVE2-NEXT: smulh z0.d, z1.d, z0.d 1178; SVE2-NEXT: smulh z1.d, z2.d, z3.d 1179; SVE2-NEXT: stp q0, q1, [x0] 1180; SVE2-NEXT: ret 1181; 1182; NONEON-NOSVE-LABEL: smulh_v4i64: 1183; NONEON-NOSVE: // %bb.0: 1184; NONEON-NOSVE-NEXT: sub sp, sp, #128 1185; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 128 1186; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] 1187; NONEON-NOSVE-NEXT: ldp q2, q3, [x1] 1188; NONEON-NOSVE-NEXT: stp q1, q2, [sp] 1189; NONEON-NOSVE-NEXT: ldp x11, x10, [sp] 1190; NONEON-NOSVE-NEXT: stp q0, q3, [sp, #32] 1191; NONEON-NOSVE-NEXT: ldp x13, x12, [sp, #16] 1192; NONEON-NOSVE-NEXT: ldp x9, x8, [sp, #32] 1193; NONEON-NOSVE-NEXT: smulh x10, x10, x12 1194; NONEON-NOSVE-NEXT: ldp x14, x12, [sp, #48] 1195; NONEON-NOSVE-NEXT: smulh x11, x11, x13 1196; NONEON-NOSVE-NEXT: smulh x8, x8, x12 1197; NONEON-NOSVE-NEXT: smulh x9, x9, x14 1198; NONEON-NOSVE-NEXT: stp x11, x10, [sp, #64] 1199; NONEON-NOSVE-NEXT: stp x9, x8, [sp, #80] 1200; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #80] 1201; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #112] 1202; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #64] 1203; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96] 1204; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96] 1205; NONEON-NOSVE-NEXT: stp q0, q1, [x0] 1206; NONEON-NOSVE-NEXT: add sp, sp, #128 1207; NONEON-NOSVE-NEXT: ret 1208 %op1 = load <4 x i64>, ptr %a 1209 %op2 = load <4 x i64>, ptr %b 1210 %1 = sext <4 x i64> %op1 to <4 x i128> 1211 %2 = sext <4 x i64> %op2 to <4 x i128> 1212 %mul = mul <4 x i128> %1, %2 1213 %shr = lshr <4 x i128> %mul, <i128 64, i128 64, i128 64, i128 64> 1214 %res = trunc <4 x i128> %shr to <4 x i64> 1215 store <4 x i64> %res, ptr %a 1216 ret void 1217} 1218 1219; 1220; UMULH 1221; 1222 1223define <4 x i8> @umulh_v4i8(<4 x i8> %op1, <4 x i8> %op2) { 1224; SVE-LABEL: umulh_v4i8: 1225; SVE: // %bb.0: 1226; SVE-NEXT: // kill: def $d1 killed $d1 def $z1 1227; SVE-NEXT: // kill: def $d0 killed $d0 def $z0 1228; SVE-NEXT: ptrue p0.h, vl4 1229; SVE-NEXT: and z0.h, z0.h, #0xff 1230; SVE-NEXT: and z1.h, z1.h, #0xff 1231; SVE-NEXT: mul z0.h, p0/m, z0.h, z1.h 1232; SVE-NEXT: lsr z0.h, z0.h, #4 1233; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0 1234; SVE-NEXT: ret 1235; 1236; SVE2-LABEL: umulh_v4i8: 1237; SVE2: // %bb.0: 1238; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1 1239; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0 1240; SVE2-NEXT: and z0.h, z0.h, #0xff 1241; SVE2-NEXT: and z1.h, z1.h, #0xff 1242; SVE2-NEXT: mul z0.h, z0.h, z1.h 1243; SVE2-NEXT: lsr z0.h, z0.h, #4 1244; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 1245; SVE2-NEXT: ret 1246; 1247; NONEON-NOSVE-LABEL: umulh_v4i8: 1248; NONEON-NOSVE: // %bb.0: 1249; NONEON-NOSVE-NEXT: sub sp, sp, #32 1250; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 1251; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] 1252; NONEON-NOSVE-NEXT: ldrb w8, [sp, #14] 1253; NONEON-NOSVE-NEXT: ldrb w12, [sp, #22] 1254; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] 1255; NONEON-NOSVE-NEXT: ldrb w10, [sp, #10] 1256; NONEON-NOSVE-NEXT: ldrb w11, [sp, #8] 1257; NONEON-NOSVE-NEXT: ldrb w13, [sp, #20] 1258; NONEON-NOSVE-NEXT: ldrb w14, [sp, #18] 1259; NONEON-NOSVE-NEXT: mul w8, w8, w12 1260; NONEON-NOSVE-NEXT: ldrb w12, [sp, #16] 1261; NONEON-NOSVE-NEXT: mul w9, w9, w13 1262; NONEON-NOSVE-NEXT: mul w10, w10, w14 1263; NONEON-NOSVE-NEXT: mul w11, w11, w12 1264; NONEON-NOSVE-NEXT: lsr w8, w8, #4 1265; NONEON-NOSVE-NEXT: lsr w9, w9, #4 1266; NONEON-NOSVE-NEXT: lsr w10, w10, #4 1267; NONEON-NOSVE-NEXT: strh w8, [sp, #30] 1268; NONEON-NOSVE-NEXT: lsr w8, w11, #4 1269; NONEON-NOSVE-NEXT: strh w9, [sp, #28] 1270; NONEON-NOSVE-NEXT: strh w10, [sp, #26] 1271; NONEON-NOSVE-NEXT: strh w8, [sp, #24] 1272; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] 1273; NONEON-NOSVE-NEXT: add sp, sp, #32 1274; NONEON-NOSVE-NEXT: ret 1275 %1 = zext <4 x i8> %op1 to <4 x i16> 1276 %2 = zext <4 x i8> %op2 to <4 x i16> 1277 %mul = mul <4 x i16> %1, %2 1278 %shr = lshr <4 x i16> %mul, <i16 4, i16 4, i16 4, i16 4> 1279 %res = trunc <4 x i16> %shr to <4 x i8> 1280 ret <4 x i8> %res 1281} 1282 1283define <8 x i8> @umulh_v8i8(<8 x i8> %op1, <8 x i8> %op2) { 1284; SVE-LABEL: umulh_v8i8: 1285; SVE: // %bb.0: 1286; SVE-NEXT: ptrue p0.b, vl8 1287; SVE-NEXT: // kill: def $d0 killed $d0 def $z0 1288; SVE-NEXT: // kill: def $d1 killed $d1 def $z1 1289; SVE-NEXT: umulh z0.b, p0/m, z0.b, z1.b 1290; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0 1291; SVE-NEXT: ret 1292; 1293; SVE2-LABEL: umulh_v8i8: 1294; SVE2: // %bb.0: 1295; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0 1296; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1 1297; SVE2-NEXT: umulh z0.b, z0.b, z1.b 1298; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 1299; SVE2-NEXT: ret 1300; 1301; NONEON-NOSVE-LABEL: umulh_v8i8: 1302; NONEON-NOSVE: // %bb.0: 1303; NONEON-NOSVE-NEXT: sub sp, sp, #32 1304; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 1305; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] 1306; NONEON-NOSVE-NEXT: ldrb w15, [sp, #15] 1307; NONEON-NOSVE-NEXT: ldrb w16, [sp, #23] 1308; NONEON-NOSVE-NEXT: ldrb w12, [sp, #12] 1309; NONEON-NOSVE-NEXT: ldrb w13, [sp, #13] 1310; NONEON-NOSVE-NEXT: ldrb w14, [sp, #14] 1311; NONEON-NOSVE-NEXT: ldrb w17, [sp, #22] 1312; NONEON-NOSVE-NEXT: mul w15, w15, w16 1313; NONEON-NOSVE-NEXT: ldrb w16, [sp, #21] 1314; NONEON-NOSVE-NEXT: ldrb w18, [sp, #20] 1315; NONEON-NOSVE-NEXT: ldrb w8, [sp, #8] 1316; NONEON-NOSVE-NEXT: mul w14, w14, w17 1317; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] 1318; NONEON-NOSVE-NEXT: ldrb w10, [sp, #10] 1319; NONEON-NOSVE-NEXT: mul w13, w13, w16 1320; NONEON-NOSVE-NEXT: ldrb w11, [sp, #11] 1321; NONEON-NOSVE-NEXT: ldrb w17, [sp, #16] 1322; NONEON-NOSVE-NEXT: mul w12, w12, w18 1323; NONEON-NOSVE-NEXT: lsr w15, w15, #8 1324; NONEON-NOSVE-NEXT: ldrb w0, [sp, #19] 1325; NONEON-NOSVE-NEXT: ldrb w16, [sp, #18] 1326; NONEON-NOSVE-NEXT: lsr w14, w14, #8 1327; NONEON-NOSVE-NEXT: ldrb w18, [sp, #17] 1328; NONEON-NOSVE-NEXT: mul w8, w8, w17 1329; NONEON-NOSVE-NEXT: lsr w13, w13, #8 1330; NONEON-NOSVE-NEXT: mul w11, w11, w0 1331; NONEON-NOSVE-NEXT: lsr w12, w12, #8 1332; NONEON-NOSVE-NEXT: strb w15, [sp, #31] 1333; NONEON-NOSVE-NEXT: mul w10, w10, w16 1334; NONEON-NOSVE-NEXT: strb w14, [sp, #30] 1335; NONEON-NOSVE-NEXT: mul w9, w9, w18 1336; NONEON-NOSVE-NEXT: lsr w8, w8, #8 1337; NONEON-NOSVE-NEXT: strb w13, [sp, #29] 1338; NONEON-NOSVE-NEXT: lsr w11, w11, #8 1339; NONEON-NOSVE-NEXT: strb w12, [sp, #28] 1340; NONEON-NOSVE-NEXT: lsr w10, w10, #8 1341; NONEON-NOSVE-NEXT: strb w8, [sp, #24] 1342; NONEON-NOSVE-NEXT: lsr w9, w9, #8 1343; NONEON-NOSVE-NEXT: strb w11, [sp, #27] 1344; NONEON-NOSVE-NEXT: strb w10, [sp, #26] 1345; NONEON-NOSVE-NEXT: strb w9, [sp, #25] 1346; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] 1347; NONEON-NOSVE-NEXT: add sp, sp, #32 1348; NONEON-NOSVE-NEXT: ret 1349 %1 = zext <8 x i8> %op1 to <8 x i16> 1350 %2 = zext <8 x i8> %op2 to <8 x i16> 1351 %mul = mul <8 x i16> %1, %2 1352 %shr = lshr <8 x i16> %mul, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 1353 %res = trunc <8 x i16> %shr to <8 x i8> 1354 ret <8 x i8> %res 1355} 1356 1357define <16 x i8> @umulh_v16i8(<16 x i8> %op1, <16 x i8> %op2) { 1358; SVE-LABEL: umulh_v16i8: 1359; SVE: // %bb.0: 1360; SVE-NEXT: ptrue p0.b, vl16 1361; SVE-NEXT: // kill: def $q0 killed $q0 def $z0 1362; SVE-NEXT: // kill: def $q1 killed $q1 def $z1 1363; SVE-NEXT: umulh z0.b, p0/m, z0.b, z1.b 1364; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0 1365; SVE-NEXT: ret 1366; 1367; SVE2-LABEL: umulh_v16i8: 1368; SVE2: // %bb.0: 1369; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0 1370; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1 1371; SVE2-NEXT: umulh z0.b, z0.b, z1.b 1372; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 1373; SVE2-NEXT: ret 1374; 1375; NONEON-NOSVE-LABEL: umulh_v16i8: 1376; NONEON-NOSVE: // %bb.0: 1377; NONEON-NOSVE-NEXT: sub sp, sp, #160 1378; NONEON-NOSVE-NEXT: str x27, [sp, #80] // 8-byte Folded Spill 1379; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #96] // 16-byte Folded Spill 1380; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #112] // 16-byte Folded Spill 1381; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #128] // 16-byte Folded Spill 1382; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #144] // 16-byte Folded Spill 1383; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160 1384; NONEON-NOSVE-NEXT: .cfi_offset w19, -8 1385; NONEON-NOSVE-NEXT: .cfi_offset w20, -16 1386; NONEON-NOSVE-NEXT: .cfi_offset w21, -24 1387; NONEON-NOSVE-NEXT: .cfi_offset w22, -32 1388; NONEON-NOSVE-NEXT: .cfi_offset w23, -40 1389; NONEON-NOSVE-NEXT: .cfi_offset w24, -48 1390; NONEON-NOSVE-NEXT: .cfi_offset w25, -56 1391; NONEON-NOSVE-NEXT: .cfi_offset w26, -64 1392; NONEON-NOSVE-NEXT: .cfi_offset w27, -80 1393; NONEON-NOSVE-NEXT: str q0, [sp] 1394; NONEON-NOSVE-NEXT: ldp d2, d0, [sp] 1395; NONEON-NOSVE-NEXT: str q1, [sp, #16] 1396; NONEON-NOSVE-NEXT: stp d0, d2, [sp, #40] 1397; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] 1398; NONEON-NOSVE-NEXT: ldrb w6, [sp, #44] 1399; NONEON-NOSVE-NEXT: ldrb w7, [sp, #45] 1400; NONEON-NOSVE-NEXT: ldrb w19, [sp, #46] 1401; NONEON-NOSVE-NEXT: ldrb w20, [sp, #47] 1402; NONEON-NOSVE-NEXT: ldrb w8, [sp, #48] 1403; NONEON-NOSVE-NEXT: ldrb w9, [sp, #49] 1404; NONEON-NOSVE-NEXT: str d0, [sp, #56] 1405; NONEON-NOSVE-NEXT: ldrb w10, [sp, #50] 1406; NONEON-NOSVE-NEXT: ldrb w11, [sp, #51] 1407; NONEON-NOSVE-NEXT: ldrb w21, [sp, #63] 1408; NONEON-NOSVE-NEXT: ldrb w23, [sp, #62] 1409; NONEON-NOSVE-NEXT: ldrb w25, [sp, #61] 1410; NONEON-NOSVE-NEXT: ldrb w26, [sp, #60] 1411; NONEON-NOSVE-NEXT: str d1, [sp, #88] 1412; NONEON-NOSVE-NEXT: ldrb w12, [sp, #52] 1413; NONEON-NOSVE-NEXT: mul w20, w20, w21 1414; NONEON-NOSVE-NEXT: ldrb w13, [sp, #53] 1415; NONEON-NOSVE-NEXT: ldrb w15, [sp, #54] 1416; NONEON-NOSVE-NEXT: mul w19, w19, w23 1417; NONEON-NOSVE-NEXT: ldrb w17, [sp, #55] 1418; NONEON-NOSVE-NEXT: ldrb w0, [sp, #40] 1419; NONEON-NOSVE-NEXT: mul w7, w7, w25 1420; NONEON-NOSVE-NEXT: ldrb w2, [sp, #41] 1421; NONEON-NOSVE-NEXT: ldrb w3, [sp, #42] 1422; NONEON-NOSVE-NEXT: mul w6, w6, w26 1423; NONEON-NOSVE-NEXT: lsr w20, w20, #8 1424; NONEON-NOSVE-NEXT: ldrb w4, [sp, #43] 1425; NONEON-NOSVE-NEXT: ldrb w14, [sp, #88] 1426; NONEON-NOSVE-NEXT: lsr w19, w19, #8 1427; NONEON-NOSVE-NEXT: ldrb w16, [sp, #89] 1428; NONEON-NOSVE-NEXT: ldrb w18, [sp, #90] 1429; NONEON-NOSVE-NEXT: lsr w7, w7, #8 1430; NONEON-NOSVE-NEXT: ldrb w1, [sp, #91] 1431; NONEON-NOSVE-NEXT: ldrb w5, [sp, #92] 1432; NONEON-NOSVE-NEXT: mul w9, w9, w16 1433; NONEON-NOSVE-NEXT: lsr w6, w6, #8 1434; NONEON-NOSVE-NEXT: ldrb w22, [sp, #93] 1435; NONEON-NOSVE-NEXT: ldrb w24, [sp, #94] 1436; NONEON-NOSVE-NEXT: mul w11, w11, w1 1437; NONEON-NOSVE-NEXT: ldrb w21, [sp, #95] 1438; NONEON-NOSVE-NEXT: ldrb w23, [sp, #56] 1439; NONEON-NOSVE-NEXT: mul w12, w12, w5 1440; NONEON-NOSVE-NEXT: ldrb w27, [sp, #59] 1441; NONEON-NOSVE-NEXT: ldrb w25, [sp, #58] 1442; NONEON-NOSVE-NEXT: mul w15, w15, w24 1443; NONEON-NOSVE-NEXT: ldrb w26, [sp, #57] 1444; NONEON-NOSVE-NEXT: mul w0, w0, w23 1445; NONEON-NOSVE-NEXT: lsr w11, w11, #8 1446; NONEON-NOSVE-NEXT: mul w4, w4, w27 1447; NONEON-NOSVE-NEXT: lsr w12, w12, #8 1448; NONEON-NOSVE-NEXT: lsr w9, w9, #8 1449; NONEON-NOSVE-NEXT: mul w3, w3, w25 1450; NONEON-NOSVE-NEXT: lsr w15, w15, #8 1451; NONEON-NOSVE-NEXT: strb w20, [sp, #79] 1452; NONEON-NOSVE-NEXT: mul w2, w2, w26 1453; NONEON-NOSVE-NEXT: lsr w0, w0, #8 1454; NONEON-NOSVE-NEXT: strb w19, [sp, #78] 1455; NONEON-NOSVE-NEXT: mul w17, w17, w21 1456; NONEON-NOSVE-NEXT: lsr w4, w4, #8 1457; NONEON-NOSVE-NEXT: strb w7, [sp, #77] 1458; NONEON-NOSVE-NEXT: mul w13, w13, w22 1459; NONEON-NOSVE-NEXT: lsr w3, w3, #8 1460; NONEON-NOSVE-NEXT: strb w6, [sp, #76] 1461; NONEON-NOSVE-NEXT: mul w10, w10, w18 1462; NONEON-NOSVE-NEXT: lsr w2, w2, #8 1463; NONEON-NOSVE-NEXT: strb w4, [sp, #75] 1464; NONEON-NOSVE-NEXT: mul w8, w8, w14 1465; NONEON-NOSVE-NEXT: lsr w17, w17, #8 1466; NONEON-NOSVE-NEXT: strb w3, [sp, #74] 1467; NONEON-NOSVE-NEXT: lsr w13, w13, #8 1468; NONEON-NOSVE-NEXT: strb w2, [sp, #73] 1469; NONEON-NOSVE-NEXT: ldr x27, [sp, #80] // 8-byte Folded Reload 1470; NONEON-NOSVE-NEXT: lsr w10, w10, #8 1471; NONEON-NOSVE-NEXT: strb w0, [sp, #72] 1472; NONEON-NOSVE-NEXT: lsr w8, w8, #8 1473; NONEON-NOSVE-NEXT: strb w17, [sp, #71] 1474; NONEON-NOSVE-NEXT: strb w15, [sp, #70] 1475; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #144] // 16-byte Folded Reload 1476; NONEON-NOSVE-NEXT: strb w13, [sp, #69] 1477; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #128] // 16-byte Folded Reload 1478; NONEON-NOSVE-NEXT: strb w12, [sp, #68] 1479; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #112] // 16-byte Folded Reload 1480; NONEON-NOSVE-NEXT: strb w11, [sp, #67] 1481; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #96] // 16-byte Folded Reload 1482; NONEON-NOSVE-NEXT: strb w10, [sp, #66] 1483; NONEON-NOSVE-NEXT: strb w9, [sp, #65] 1484; NONEON-NOSVE-NEXT: strb w8, [sp, #64] 1485; NONEON-NOSVE-NEXT: ldr q0, [sp, #64] 1486; NONEON-NOSVE-NEXT: add sp, sp, #160 1487; NONEON-NOSVE-NEXT: ret 1488 %1 = zext <16 x i8> %op1 to <16 x i16> 1489 %2 = zext <16 x i8> %op2 to <16 x i16> 1490 %mul = mul <16 x i16> %1, %2 1491 %shr = lshr <16 x i16> %mul, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 1492 %res = trunc <16 x i16> %shr to <16 x i8> 1493 ret <16 x i8> %res 1494} 1495 1496define void @umulh_v32i8(ptr %a, ptr %b) { 1497; SVE-LABEL: umulh_v32i8: 1498; SVE: // %bb.0: 1499; SVE-NEXT: ldp q0, q3, [x1] 1500; SVE-NEXT: ptrue p0.b, vl16 1501; SVE-NEXT: ldp q1, q2, [x0] 1502; SVE-NEXT: umulh z0.b, p0/m, z0.b, z1.b 1503; SVE-NEXT: movprfx z1, z2 1504; SVE-NEXT: umulh z1.b, p0/m, z1.b, z3.b 1505; SVE-NEXT: stp q0, q1, [x0] 1506; SVE-NEXT: ret 1507; 1508; SVE2-LABEL: umulh_v32i8: 1509; SVE2: // %bb.0: 1510; SVE2-NEXT: ldp q0, q3, [x1] 1511; SVE2-NEXT: ldp q1, q2, [x0] 1512; SVE2-NEXT: umulh z0.b, z1.b, z0.b 1513; SVE2-NEXT: umulh z1.b, z2.b, z3.b 1514; SVE2-NEXT: stp q0, q1, [x0] 1515; SVE2-NEXT: ret 1516; 1517; NONEON-NOSVE-LABEL: umulh_v32i8: 1518; NONEON-NOSVE: // %bb.0: 1519; NONEON-NOSVE-NEXT: sub sp, sp, #384 1520; NONEON-NOSVE-NEXT: stp x29, x30, [sp, #288] // 16-byte Folded Spill 1521; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #304] // 16-byte Folded Spill 1522; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #320] // 16-byte Folded Spill 1523; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #336] // 16-byte Folded Spill 1524; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #352] // 16-byte Folded Spill 1525; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #368] // 16-byte Folded Spill 1526; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 384 1527; NONEON-NOSVE-NEXT: .cfi_offset w19, -8 1528; NONEON-NOSVE-NEXT: .cfi_offset w20, -16 1529; NONEON-NOSVE-NEXT: .cfi_offset w21, -24 1530; NONEON-NOSVE-NEXT: .cfi_offset w22, -32 1531; NONEON-NOSVE-NEXT: .cfi_offset w23, -40 1532; NONEON-NOSVE-NEXT: .cfi_offset w24, -48 1533; NONEON-NOSVE-NEXT: .cfi_offset w25, -56 1534; NONEON-NOSVE-NEXT: .cfi_offset w26, -64 1535; NONEON-NOSVE-NEXT: .cfi_offset w27, -72 1536; NONEON-NOSVE-NEXT: .cfi_offset w28, -80 1537; NONEON-NOSVE-NEXT: .cfi_offset w30, -88 1538; NONEON-NOSVE-NEXT: .cfi_offset w29, -96 1539; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] 1540; NONEON-NOSVE-NEXT: mov x29, x0 1541; NONEON-NOSVE-NEXT: ldp q3, q2, [x1] 1542; NONEON-NOSVE-NEXT: str q0, [sp, #128] 1543; NONEON-NOSVE-NEXT: str q1, [sp, #160] 1544; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #128] 1545; NONEON-NOSVE-NEXT: str q3, [sp, #144] 1546; NONEON-NOSVE-NEXT: str q2, [sp, #192] 1547; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #176] 1548; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #160] 1549; NONEON-NOSVE-NEXT: ldrb w9, [sp, #184] 1550; NONEON-NOSVE-NEXT: ldrb w8, [sp, #185] 1551; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #120] // 8-byte Folded Spill 1552; NONEON-NOSVE-NEXT: ldrb w9, [sp, #186] 1553; NONEON-NOSVE-NEXT: ldrb w8, [sp, #187] 1554; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #224] 1555; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #144] 1556; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #112] // 8-byte Folded Spill 1557; NONEON-NOSVE-NEXT: ldrb w9, [sp, #188] 1558; NONEON-NOSVE-NEXT: ldrb w8, [sp, #189] 1559; NONEON-NOSVE-NEXT: ldrb w13, [sp, #229] 1560; NONEON-NOSVE-NEXT: ldrb w11, [sp, #227] 1561; NONEON-NOSVE-NEXT: ldrb w12, [sp, #228] 1562; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #104] // 8-byte Folded Spill 1563; NONEON-NOSVE-NEXT: ldrb w9, [sp, #190] 1564; NONEON-NOSVE-NEXT: ldrb w8, [sp, #191] 1565; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #208] 1566; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #192] 1567; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #96] // 8-byte Folded Spill 1568; NONEON-NOSVE-NEXT: ldrb w9, [sp, #176] 1569; NONEON-NOSVE-NEXT: ldrb w8, [sp, #177] 1570; NONEON-NOSVE-NEXT: ldrb w10, [sp, #226] 1571; NONEON-NOSVE-NEXT: ldrb w2, [sp, #214] 1572; NONEON-NOSVE-NEXT: ldrb w1, [sp, #215] 1573; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #88] // 8-byte Folded Spill 1574; NONEON-NOSVE-NEXT: ldrb w9, [sp, #178] 1575; NONEON-NOSVE-NEXT: ldrb w8, [sp, #179] 1576; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #240] 1577; NONEON-NOSVE-NEXT: ldrb w4, [sp, #212] 1578; NONEON-NOSVE-NEXT: ldrb w3, [sp, #213] 1579; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #80] // 8-byte Folded Spill 1580; NONEON-NOSVE-NEXT: ldrb w9, [sp, #180] 1581; NONEON-NOSVE-NEXT: ldrb w8, [sp, #181] 1582; NONEON-NOSVE-NEXT: ldrb w14, [sp, #247] 1583; NONEON-NOSVE-NEXT: ldrb w15, [sp, #246] 1584; NONEON-NOSVE-NEXT: ldrb w16, [sp, #244] 1585; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #72] // 8-byte Folded Spill 1586; NONEON-NOSVE-NEXT: ldrb w9, [sp, #182] 1587; NONEON-NOSVE-NEXT: ldrb w8, [sp, #183] 1588; NONEON-NOSVE-NEXT: mul w26, w12, w16 1589; NONEON-NOSVE-NEXT: ldrb w12, [sp, #242] 1590; NONEON-NOSVE-NEXT: ldrb w16, [sp, #250] 1591; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64] // 8-byte Folded Spill 1592; NONEON-NOSVE-NEXT: ldrb w9, [sp, #232] 1593; NONEON-NOSVE-NEXT: ldrb w8, [sp, #233] 1594; NONEON-NOSVE-NEXT: mul w30, w10, w12 1595; NONEON-NOSVE-NEXT: ldrb w10, [sp, #255] 1596; NONEON-NOSVE-NEXT: ldrb w12, [sp, #253] 1597; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #56] // 8-byte Folded Spill 1598; NONEON-NOSVE-NEXT: ldrb w9, [sp, #234] 1599; NONEON-NOSVE-NEXT: ldrb w8, [sp, #235] 1600; NONEON-NOSVE-NEXT: ldrb w0, [sp, #248] 1601; NONEON-NOSVE-NEXT: ldrb w18, [sp, #249] 1602; NONEON-NOSVE-NEXT: ldrb w6, [sp, #210] 1603; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #48] // 8-byte Folded Spill 1604; NONEON-NOSVE-NEXT: ldrb w9, [sp, #236] 1605; NONEON-NOSVE-NEXT: ldrb w8, [sp, #237] 1606; NONEON-NOSVE-NEXT: ldrb w5, [sp, #211] 1607; NONEON-NOSVE-NEXT: ldrb w19, [sp, #208] 1608; NONEON-NOSVE-NEXT: ldrb w7, [sp, #209] 1609; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #40] // 8-byte Folded Spill 1610; NONEON-NOSVE-NEXT: ldrb w9, [sp, #238] 1611; NONEON-NOSVE-NEXT: ldrb w8, [sp, #239] 1612; NONEON-NOSVE-NEXT: ldrb w21, [sp, #222] 1613; NONEON-NOSVE-NEXT: ldrb w20, [sp, #223] 1614; NONEON-NOSVE-NEXT: ldrb w23, [sp, #220] 1615; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #32] // 8-byte Folded Spill 1616; NONEON-NOSVE-NEXT: ldrb w9, [sp, #224] 1617; NONEON-NOSVE-NEXT: ldrb w8, [sp, #225] 1618; NONEON-NOSVE-NEXT: ldrb w22, [sp, #221] 1619; NONEON-NOSVE-NEXT: ldrb w24, [sp, #219] 1620; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] // 8-byte Folded Spill 1621; NONEON-NOSVE-NEXT: ldrb w9, [sp, #230] 1622; NONEON-NOSVE-NEXT: ldrb w8, [sp, #231] 1623; NONEON-NOSVE-NEXT: mul w27, w8, w14 1624; NONEON-NOSVE-NEXT: ldrb w14, [sp, #245] 1625; NONEON-NOSVE-NEXT: ldrb w8, [sp, #217] 1626; NONEON-NOSVE-NEXT: mul w9, w9, w15 1627; NONEON-NOSVE-NEXT: ldrb w15, [sp, #251] 1628; NONEON-NOSVE-NEXT: mul w25, w13, w14 1629; NONEON-NOSVE-NEXT: ldrb w13, [sp, #243] 1630; NONEON-NOSVE-NEXT: lsr w14, w27, #8 1631; NONEON-NOSVE-NEXT: ldrb w27, [sp, #218] 1632; NONEON-NOSVE-NEXT: lsr w17, w9, #8 1633; NONEON-NOSVE-NEXT: mul w28, w11, w13 1634; NONEON-NOSVE-NEXT: ldrb w9, [sp, #216] 1635; NONEON-NOSVE-NEXT: strb w14, [sp, #287] 1636; NONEON-NOSVE-NEXT: lsr w14, w25, #8 1637; NONEON-NOSVE-NEXT: ldr w25, [sp, #24] // 4-byte Folded Reload 1638; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #16] // 8-byte Folded Spill 1639; NONEON-NOSVE-NEXT: ldrb w8, [sp, #241] 1640; NONEON-NOSVE-NEXT: ldrb w9, [sp, #240] 1641; NONEON-NOSVE-NEXT: strb w14, [sp, #285] 1642; NONEON-NOSVE-NEXT: lsr w14, w28, #8 1643; NONEON-NOSVE-NEXT: ldrb w11, [sp, #254] 1644; NONEON-NOSVE-NEXT: mul w8, w25, w8 1645; NONEON-NOSVE-NEXT: ldr w25, [sp, #28] // 4-byte Folded Reload 1646; NONEON-NOSVE-NEXT: ldrb w13, [sp, #252] 1647; NONEON-NOSVE-NEXT: strb w14, [sp, #283] 1648; NONEON-NOSVE-NEXT: ldr w14, [sp, #40] // 4-byte Folded Reload 1649; NONEON-NOSVE-NEXT: mul w9, w25, w9 1650; NONEON-NOSVE-NEXT: ldr w25, [sp, #32] // 4-byte Folded Reload 1651; NONEON-NOSVE-NEXT: strb w17, [sp, #286] 1652; NONEON-NOSVE-NEXT: mul w12, w14, w12 1653; NONEON-NOSVE-NEXT: lsr w8, w8, #8 1654; NONEON-NOSVE-NEXT: lsr w17, w26, #8 1655; NONEON-NOSVE-NEXT: mul w10, w25, w10 1656; NONEON-NOSVE-NEXT: ldr w25, [sp, #36] // 4-byte Folded Reload 1657; NONEON-NOSVE-NEXT: ldr w14, [sp, #44] // 4-byte Folded Reload 1658; NONEON-NOSVE-NEXT: lsr w9, w9, #8 1659; NONEON-NOSVE-NEXT: strb w8, [sp, #281] 1660; NONEON-NOSVE-NEXT: mul w11, w25, w11 1661; NONEON-NOSVE-NEXT: strb w17, [sp, #284] 1662; NONEON-NOSVE-NEXT: lsr w17, w30, #8 1663; NONEON-NOSVE-NEXT: mul w13, w14, w13 1664; NONEON-NOSVE-NEXT: lsr w8, w10, #8 1665; NONEON-NOSVE-NEXT: ldr w10, [sp, #48] // 4-byte Folded Reload 1666; NONEON-NOSVE-NEXT: strb w9, [sp, #280] 1667; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #320] // 16-byte Folded Reload 1668; NONEON-NOSVE-NEXT: lsr w9, w11, #8 1669; NONEON-NOSVE-NEXT: mul w10, w10, w15 1670; NONEON-NOSVE-NEXT: ldr w11, [sp, #52] // 4-byte Folded Reload 1671; NONEON-NOSVE-NEXT: strb w8, [sp, #279] 1672; NONEON-NOSVE-NEXT: lsr w8, w12, #8 1673; NONEON-NOSVE-NEXT: ldr w12, [sp, #56] // 4-byte Folded Reload 1674; NONEON-NOSVE-NEXT: mul w11, w11, w16 1675; NONEON-NOSVE-NEXT: strb w9, [sp, #278] 1676; NONEON-NOSVE-NEXT: lsr w9, w13, #8 1677; NONEON-NOSVE-NEXT: mul w12, w12, w18 1678; NONEON-NOSVE-NEXT: ldr w13, [sp, #60] // 4-byte Folded Reload 1679; NONEON-NOSVE-NEXT: strb w8, [sp, #277] 1680; NONEON-NOSVE-NEXT: lsr w8, w10, #8 1681; NONEON-NOSVE-NEXT: ldr w10, [sp, #64] // 4-byte Folded Reload 1682; NONEON-NOSVE-NEXT: strb w9, [sp, #276] 1683; NONEON-NOSVE-NEXT: mul w13, w13, w0 1684; NONEON-NOSVE-NEXT: lsr w9, w11, #8 1685; NONEON-NOSVE-NEXT: ldr w11, [sp, #68] // 4-byte Folded Reload 1686; NONEON-NOSVE-NEXT: mul w10, w10, w1 1687; NONEON-NOSVE-NEXT: strb w8, [sp, #275] 1688; NONEON-NOSVE-NEXT: lsr w8, w12, #8 1689; NONEON-NOSVE-NEXT: mul w11, w11, w2 1690; NONEON-NOSVE-NEXT: ldr w12, [sp, #72] // 4-byte Folded Reload 1691; NONEON-NOSVE-NEXT: strb w9, [sp, #274] 1692; NONEON-NOSVE-NEXT: lsr w9, w13, #8 1693; NONEON-NOSVE-NEXT: ldr w13, [sp, #76] // 4-byte Folded Reload 1694; NONEON-NOSVE-NEXT: strb w8, [sp, #273] 1695; NONEON-NOSVE-NEXT: mul w12, w12, w3 1696; NONEON-NOSVE-NEXT: lsr w8, w10, #8 1697; NONEON-NOSVE-NEXT: ldr w10, [sp, #80] // 4-byte Folded Reload 1698; NONEON-NOSVE-NEXT: mul w13, w13, w4 1699; NONEON-NOSVE-NEXT: strb w9, [sp, #272] 1700; NONEON-NOSVE-NEXT: lsr w9, w11, #8 1701; NONEON-NOSVE-NEXT: mul w10, w10, w5 1702; NONEON-NOSVE-NEXT: ldr w11, [sp, #84] // 4-byte Folded Reload 1703; NONEON-NOSVE-NEXT: strb w8, [sp, #271] 1704; NONEON-NOSVE-NEXT: lsr w8, w12, #8 1705; NONEON-NOSVE-NEXT: ldr w12, [sp, #88] // 4-byte Folded Reload 1706; NONEON-NOSVE-NEXT: strb w9, [sp, #270] 1707; NONEON-NOSVE-NEXT: mul w11, w11, w6 1708; NONEON-NOSVE-NEXT: lsr w9, w13, #8 1709; NONEON-NOSVE-NEXT: ldr w13, [sp, #92] // 4-byte Folded Reload 1710; NONEON-NOSVE-NEXT: mul w12, w12, w7 1711; NONEON-NOSVE-NEXT: strb w8, [sp, #269] 1712; NONEON-NOSVE-NEXT: lsr w8, w10, #8 1713; NONEON-NOSVE-NEXT: mul w13, w13, w19 1714; NONEON-NOSVE-NEXT: ldr w10, [sp, #96] // 4-byte Folded Reload 1715; NONEON-NOSVE-NEXT: strb w9, [sp, #268] 1716; NONEON-NOSVE-NEXT: lsr w9, w11, #8 1717; NONEON-NOSVE-NEXT: ldr w11, [sp, #100] // 4-byte Folded Reload 1718; NONEON-NOSVE-NEXT: strb w8, [sp, #267] 1719; NONEON-NOSVE-NEXT: mul w10, w10, w20 1720; NONEON-NOSVE-NEXT: lsr w8, w12, #8 1721; NONEON-NOSVE-NEXT: ldr w12, [sp, #104] // 4-byte Folded Reload 1722; NONEON-NOSVE-NEXT: mul w11, w11, w21 1723; NONEON-NOSVE-NEXT: strb w9, [sp, #266] 1724; NONEON-NOSVE-NEXT: lsr w9, w13, #8 1725; NONEON-NOSVE-NEXT: ldr w13, [sp, #108] // 4-byte Folded Reload 1726; NONEON-NOSVE-NEXT: mul w12, w12, w22 1727; NONEON-NOSVE-NEXT: strb w8, [sp, #265] 1728; NONEON-NOSVE-NEXT: lsr w8, w10, #8 1729; NONEON-NOSVE-NEXT: ldr w10, [sp, #112] // 4-byte Folded Reload 1730; NONEON-NOSVE-NEXT: strb w9, [sp, #264] 1731; NONEON-NOSVE-NEXT: mul w13, w13, w23 1732; NONEON-NOSVE-NEXT: lsr w9, w11, #8 1733; NONEON-NOSVE-NEXT: ldr w11, [sp, #116] // 4-byte Folded Reload 1734; NONEON-NOSVE-NEXT: ldp w15, w14, [sp, #16] // 8-byte Folded Reload 1735; NONEON-NOSVE-NEXT: mul w10, w10, w24 1736; NONEON-NOSVE-NEXT: strb w8, [sp, #263] 1737; NONEON-NOSVE-NEXT: lsr w8, w12, #8 1738; NONEON-NOSVE-NEXT: mul w11, w11, w27 1739; NONEON-NOSVE-NEXT: ldr w12, [sp, #120] // 4-byte Folded Reload 1740; NONEON-NOSVE-NEXT: strb w9, [sp, #262] 1741; NONEON-NOSVE-NEXT: lsr w9, w13, #8 1742; NONEON-NOSVE-NEXT: ldr w13, [sp, #124] // 4-byte Folded Reload 1743; NONEON-NOSVE-NEXT: strb w8, [sp, #261] 1744; NONEON-NOSVE-NEXT: mul w12, w12, w15 1745; NONEON-NOSVE-NEXT: lsr w8, w10, #8 1746; NONEON-NOSVE-NEXT: strb w17, [sp, #282] 1747; NONEON-NOSVE-NEXT: mul w13, w13, w14 1748; NONEON-NOSVE-NEXT: strb w9, [sp, #260] 1749; NONEON-NOSVE-NEXT: lsr w9, w11, #8 1750; NONEON-NOSVE-NEXT: strb w8, [sp, #259] 1751; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #368] // 16-byte Folded Reload 1752; NONEON-NOSVE-NEXT: lsr w8, w12, #8 1753; NONEON-NOSVE-NEXT: strb w9, [sp, #258] 1754; NONEON-NOSVE-NEXT: lsr w9, w13, #8 1755; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #352] // 16-byte Folded Reload 1756; NONEON-NOSVE-NEXT: strb w8, [sp, #257] 1757; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #336] // 16-byte Folded Reload 1758; NONEON-NOSVE-NEXT: strb w9, [sp, #256] 1759; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #304] // 16-byte Folded Reload 1760; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #256] 1761; NONEON-NOSVE-NEXT: stp q0, q1, [x29] 1762; NONEON-NOSVE-NEXT: ldp x29, x30, [sp, #288] // 16-byte Folded Reload 1763; NONEON-NOSVE-NEXT: add sp, sp, #384 1764; NONEON-NOSVE-NEXT: ret 1765 %op1 = load <32 x i8>, ptr %a 1766 %op2 = load <32 x i8>, ptr %b 1767 %1 = zext <32 x i8> %op1 to <32 x i16> 1768 %2 = zext <32 x i8> %op2 to <32 x i16> 1769 %mul = mul <32 x i16> %1, %2 1770 %shr = lshr <32 x i16> %mul, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 1771 %res = trunc <32 x i16> %shr to <32 x i8> 1772 store <32 x i8> %res, ptr %a 1773 ret void 1774} 1775 1776define <2 x i16> @umulh_v2i16(<2 x i16> %op1, <2 x i16> %op2) { 1777; SVE-LABEL: umulh_v2i16: 1778; SVE: // %bb.0: 1779; SVE-NEXT: // kill: def $d1 killed $d1 def $z1 1780; SVE-NEXT: // kill: def $d0 killed $d0 def $z0 1781; SVE-NEXT: ptrue p0.s, vl2 1782; SVE-NEXT: and z0.s, z0.s, #0xffff 1783; SVE-NEXT: and z1.s, z1.s, #0xffff 1784; SVE-NEXT: mul z0.s, p0/m, z0.s, z1.s 1785; SVE-NEXT: lsr z0.s, z0.s, #16 1786; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0 1787; SVE-NEXT: ret 1788; 1789; SVE2-LABEL: umulh_v2i16: 1790; SVE2: // %bb.0: 1791; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1 1792; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0 1793; SVE2-NEXT: and z0.s, z0.s, #0xffff 1794; SVE2-NEXT: and z1.s, z1.s, #0xffff 1795; SVE2-NEXT: mul z0.s, z0.s, z1.s 1796; SVE2-NEXT: lsr z0.s, z0.s, #16 1797; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 1798; SVE2-NEXT: ret 1799; 1800; NONEON-NOSVE-LABEL: umulh_v2i16: 1801; NONEON-NOSVE: // %bb.0: 1802; NONEON-NOSVE-NEXT: sub sp, sp, #32 1803; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 1804; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] 1805; NONEON-NOSVE-NEXT: ldrh w8, [sp, #12] 1806; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] 1807; NONEON-NOSVE-NEXT: ldrh w10, [sp, #20] 1808; NONEON-NOSVE-NEXT: ldrh w11, [sp, #16] 1809; NONEON-NOSVE-NEXT: mul w8, w8, w10 1810; NONEON-NOSVE-NEXT: mul w9, w9, w11 1811; NONEON-NOSVE-NEXT: lsr w8, w8, #16 1812; NONEON-NOSVE-NEXT: lsr w9, w9, #16 1813; NONEON-NOSVE-NEXT: stp w9, w8, [sp, #24] 1814; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] 1815; NONEON-NOSVE-NEXT: add sp, sp, #32 1816; NONEON-NOSVE-NEXT: ret 1817 %1 = zext <2 x i16> %op1 to <2 x i32> 1818 %2 = zext <2 x i16> %op2 to <2 x i32> 1819 %mul = mul <2 x i32> %1, %2 1820 %shr = lshr <2 x i32> %mul, <i32 16, i32 16> 1821 %res = trunc <2 x i32> %shr to <2 x i16> 1822 ret <2 x i16> %res 1823} 1824 1825define <4 x i16> @umulh_v4i16(<4 x i16> %op1, <4 x i16> %op2) { 1826; SVE-LABEL: umulh_v4i16: 1827; SVE: // %bb.0: 1828; SVE-NEXT: ptrue p0.h, vl4 1829; SVE-NEXT: // kill: def $d0 killed $d0 def $z0 1830; SVE-NEXT: // kill: def $d1 killed $d1 def $z1 1831; SVE-NEXT: umulh z0.h, p0/m, z0.h, z1.h 1832; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0 1833; SVE-NEXT: ret 1834; 1835; SVE2-LABEL: umulh_v4i16: 1836; SVE2: // %bb.0: 1837; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0 1838; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1 1839; SVE2-NEXT: umulh z0.h, z0.h, z1.h 1840; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 1841; SVE2-NEXT: ret 1842; 1843; NONEON-NOSVE-LABEL: umulh_v4i16: 1844; NONEON-NOSVE: // %bb.0: 1845; NONEON-NOSVE-NEXT: sub sp, sp, #32 1846; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 1847; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] 1848; NONEON-NOSVE-NEXT: ldrh w11, [sp, #14] 1849; NONEON-NOSVE-NEXT: ldrh w12, [sp, #22] 1850; NONEON-NOSVE-NEXT: ldrh w8, [sp, #8] 1851; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] 1852; NONEON-NOSVE-NEXT: ldrh w10, [sp, #12] 1853; NONEON-NOSVE-NEXT: ldrh w13, [sp, #20] 1854; NONEON-NOSVE-NEXT: ldrh w14, [sp, #18] 1855; NONEON-NOSVE-NEXT: mul w11, w11, w12 1856; NONEON-NOSVE-NEXT: ldrh w12, [sp, #16] 1857; NONEON-NOSVE-NEXT: mul w10, w10, w13 1858; NONEON-NOSVE-NEXT: mul w9, w9, w14 1859; NONEON-NOSVE-NEXT: mul w8, w8, w12 1860; NONEON-NOSVE-NEXT: lsr w11, w11, #16 1861; NONEON-NOSVE-NEXT: lsr w10, w10, #16 1862; NONEON-NOSVE-NEXT: lsr w9, w9, #16 1863; NONEON-NOSVE-NEXT: strh w11, [sp, #30] 1864; NONEON-NOSVE-NEXT: lsr w8, w8, #16 1865; NONEON-NOSVE-NEXT: strh w10, [sp, #28] 1866; NONEON-NOSVE-NEXT: strh w9, [sp, #26] 1867; NONEON-NOSVE-NEXT: strh w8, [sp, #24] 1868; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] 1869; NONEON-NOSVE-NEXT: add sp, sp, #32 1870; NONEON-NOSVE-NEXT: ret 1871 %1 = zext <4 x i16> %op1 to <4 x i32> 1872 %2 = zext <4 x i16> %op2 to <4 x i32> 1873 %mul = mul <4 x i32> %1, %2 1874 %shr = lshr <4 x i32> %mul, <i32 16, i32 16, i32 16, i32 16> 1875 %res = trunc <4 x i32> %shr to <4 x i16> 1876 ret <4 x i16> %res 1877} 1878 1879define <8 x i16> @umulh_v8i16(<8 x i16> %op1, <8 x i16> %op2) { 1880; SVE-LABEL: umulh_v8i16: 1881; SVE: // %bb.0: 1882; SVE-NEXT: ptrue p0.h, vl8 1883; SVE-NEXT: // kill: def $q0 killed $q0 def $z0 1884; SVE-NEXT: // kill: def $q1 killed $q1 def $z1 1885; SVE-NEXT: umulh z0.h, p0/m, z0.h, z1.h 1886; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0 1887; SVE-NEXT: ret 1888; 1889; SVE2-LABEL: umulh_v8i16: 1890; SVE2: // %bb.0: 1891; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0 1892; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1 1893; SVE2-NEXT: umulh z0.h, z0.h, z1.h 1894; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 1895; SVE2-NEXT: ret 1896; 1897; NONEON-NOSVE-LABEL: umulh_v8i16: 1898; NONEON-NOSVE: // %bb.0: 1899; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-80]! 1900; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 1901; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] 1902; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] 1903; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] 1904; NONEON-NOSVE-NEXT: ldrh w15, [sp, #38] 1905; NONEON-NOSVE-NEXT: ldrh w12, [sp, #32] 1906; NONEON-NOSVE-NEXT: ldrh w13, [sp, #34] 1907; NONEON-NOSVE-NEXT: ldrh w14, [sp, #36] 1908; NONEON-NOSVE-NEXT: ldrh w8, [sp, #40] 1909; NONEON-NOSVE-NEXT: ldrh w9, [sp, #42] 1910; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] 1911; NONEON-NOSVE-NEXT: ldrh w10, [sp, #44] 1912; NONEON-NOSVE-NEXT: ldrh w11, [sp, #46] 1913; NONEON-NOSVE-NEXT: ldrh w16, [sp, #54] 1914; NONEON-NOSVE-NEXT: ldrh w17, [sp, #52] 1915; NONEON-NOSVE-NEXT: ldrh w18, [sp, #50] 1916; NONEON-NOSVE-NEXT: ldrh w0, [sp, #62] 1917; NONEON-NOSVE-NEXT: mul w15, w15, w16 1918; NONEON-NOSVE-NEXT: ldrh w16, [sp, #48] 1919; NONEON-NOSVE-NEXT: mul w14, w14, w17 1920; NONEON-NOSVE-NEXT: ldrh w17, [sp, #56] 1921; NONEON-NOSVE-NEXT: mul w13, w13, w18 1922; NONEON-NOSVE-NEXT: ldrh w18, [sp, #60] 1923; NONEON-NOSVE-NEXT: mul w12, w12, w16 1924; NONEON-NOSVE-NEXT: ldrh w16, [sp, #58] 1925; NONEON-NOSVE-NEXT: lsr w15, w15, #16 1926; NONEON-NOSVE-NEXT: mul w11, w11, w0 1927; NONEON-NOSVE-NEXT: lsr w14, w14, #16 1928; NONEON-NOSVE-NEXT: mul w10, w10, w18 1929; NONEON-NOSVE-NEXT: lsr w13, w13, #16 1930; NONEON-NOSVE-NEXT: strh w15, [sp, #78] 1931; NONEON-NOSVE-NEXT: mul w9, w9, w16 1932; NONEON-NOSVE-NEXT: lsr w12, w12, #16 1933; NONEON-NOSVE-NEXT: strh w14, [sp, #76] 1934; NONEON-NOSVE-NEXT: mul w8, w8, w17 1935; NONEON-NOSVE-NEXT: lsr w11, w11, #16 1936; NONEON-NOSVE-NEXT: strh w13, [sp, #74] 1937; NONEON-NOSVE-NEXT: lsr w10, w10, #16 1938; NONEON-NOSVE-NEXT: strh w12, [sp, #72] 1939; NONEON-NOSVE-NEXT: lsr w9, w9, #16 1940; NONEON-NOSVE-NEXT: strh w11, [sp, #70] 1941; NONEON-NOSVE-NEXT: lsr w8, w8, #16 1942; NONEON-NOSVE-NEXT: strh w10, [sp, #68] 1943; NONEON-NOSVE-NEXT: strh w9, [sp, #66] 1944; NONEON-NOSVE-NEXT: strh w8, [sp, #64] 1945; NONEON-NOSVE-NEXT: ldr q0, [sp, #64] 1946; NONEON-NOSVE-NEXT: add sp, sp, #80 1947; NONEON-NOSVE-NEXT: ret 1948 %1 = zext <8 x i16> %op1 to <8 x i32> 1949 %2 = zext <8 x i16> %op2 to <8 x i32> 1950 %mul = mul <8 x i32> %1, %2 1951 %shr = lshr <8 x i32> %mul, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 1952 %res = trunc <8 x i32> %shr to <8 x i16> 1953 ret <8 x i16> %res 1954} 1955 1956define void @umulh_v16i16(ptr %a, ptr %b) { 1957; SVE-LABEL: umulh_v16i16: 1958; SVE: // %bb.0: 1959; SVE-NEXT: ldp q0, q3, [x1] 1960; SVE-NEXT: ptrue p0.h, vl8 1961; SVE-NEXT: ldp q1, q2, [x0] 1962; SVE-NEXT: umulh z0.h, p0/m, z0.h, z1.h 1963; SVE-NEXT: movprfx z1, z2 1964; SVE-NEXT: umulh z1.h, p0/m, z1.h, z3.h 1965; SVE-NEXT: stp q0, q1, [x0] 1966; SVE-NEXT: ret 1967; 1968; SVE2-LABEL: umulh_v16i16: 1969; SVE2: // %bb.0: 1970; SVE2-NEXT: ldp q0, q3, [x1] 1971; SVE2-NEXT: ldp q1, q2, [x0] 1972; SVE2-NEXT: umulh z0.h, z1.h, z0.h 1973; SVE2-NEXT: umulh z1.h, z2.h, z3.h 1974; SVE2-NEXT: stp q0, q1, [x0] 1975; SVE2-NEXT: ret 1976; 1977; NONEON-NOSVE-LABEL: umulh_v16i16: 1978; NONEON-NOSVE: // %bb.0: 1979; NONEON-NOSVE-NEXT: sub sp, sp, #240 1980; NONEON-NOSVE-NEXT: stp x28, x27, [sp, #160] // 16-byte Folded Spill 1981; NONEON-NOSVE-NEXT: stp x26, x25, [sp, #176] // 16-byte Folded Spill 1982; NONEON-NOSVE-NEXT: stp x24, x23, [sp, #192] // 16-byte Folded Spill 1983; NONEON-NOSVE-NEXT: stp x22, x21, [sp, #208] // 16-byte Folded Spill 1984; NONEON-NOSVE-NEXT: stp x20, x19, [sp, #224] // 16-byte Folded Spill 1985; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 240 1986; NONEON-NOSVE-NEXT: .cfi_offset w19, -8 1987; NONEON-NOSVE-NEXT: .cfi_offset w20, -16 1988; NONEON-NOSVE-NEXT: .cfi_offset w21, -24 1989; NONEON-NOSVE-NEXT: .cfi_offset w22, -32 1990; NONEON-NOSVE-NEXT: .cfi_offset w23, -40 1991; NONEON-NOSVE-NEXT: .cfi_offset w24, -48 1992; NONEON-NOSVE-NEXT: .cfi_offset w25, -56 1993; NONEON-NOSVE-NEXT: .cfi_offset w26, -64 1994; NONEON-NOSVE-NEXT: .cfi_offset w27, -72 1995; NONEON-NOSVE-NEXT: .cfi_offset w28, -80 1996; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] 1997; NONEON-NOSVE-NEXT: ldp q3, q2, [x1] 1998; NONEON-NOSVE-NEXT: str q0, [sp] 1999; NONEON-NOSVE-NEXT: str q1, [sp, #32] 2000; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] 2001; NONEON-NOSVE-NEXT: str q3, [sp, #16] 2002; NONEON-NOSVE-NEXT: str q2, [sp, #64] 2003; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] 2004; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #32] 2005; NONEON-NOSVE-NEXT: ldrh w8, [sp, #56] 2006; NONEON-NOSVE-NEXT: ldrh w9, [sp, #58] 2007; NONEON-NOSVE-NEXT: ldrh w10, [sp, #60] 2008; NONEON-NOSVE-NEXT: ldrh w11, [sp, #62] 2009; NONEON-NOSVE-NEXT: ldrh w12, [sp, #48] 2010; NONEON-NOSVE-NEXT: ldrh w13, [sp, #50] 2011; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96] 2012; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] 2013; NONEON-NOSVE-NEXT: ldrh w7, [sp, #96] 2014; NONEON-NOSVE-NEXT: ldrh w19, [sp, #98] 2015; NONEON-NOSVE-NEXT: ldrh w20, [sp, #100] 2016; NONEON-NOSVE-NEXT: ldrh w21, [sp, #102] 2017; NONEON-NOSVE-NEXT: ldrh w14, [sp, #52] 2018; NONEON-NOSVE-NEXT: ldrh w16, [sp, #54] 2019; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] 2020; NONEON-NOSVE-NEXT: ldrh w18, [sp, #104] 2021; NONEON-NOSVE-NEXT: ldrh w2, [sp, #106] 2022; NONEON-NOSVE-NEXT: ldrh w4, [sp, #108] 2023; NONEON-NOSVE-NEXT: ldrh w5, [sp, #110] 2024; NONEON-NOSVE-NEXT: ldrh w15, [sp, #88] 2025; NONEON-NOSVE-NEXT: ldrh w17, [sp, #90] 2026; NONEON-NOSVE-NEXT: ldrh w1, [sp, #92] 2027; NONEON-NOSVE-NEXT: ldrh w3, [sp, #94] 2028; NONEON-NOSVE-NEXT: mul w8, w8, w15 2029; NONEON-NOSVE-NEXT: ldrh w6, [sp, #80] 2030; NONEON-NOSVE-NEXT: ldrh w23, [sp, #82] 2031; NONEON-NOSVE-NEXT: mul w11, w11, w3 2032; NONEON-NOSVE-NEXT: ldrh w25, [sp, #84] 2033; NONEON-NOSVE-NEXT: mul w13, w13, w23 2034; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #64] 2035; NONEON-NOSVE-NEXT: mul w14, w14, w25 2036; NONEON-NOSVE-NEXT: lsr w8, w8, #16 2037; NONEON-NOSVE-NEXT: mul w12, w12, w6 2038; NONEON-NOSVE-NEXT: lsr w11, w11, #16 2039; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #112] 2040; NONEON-NOSVE-NEXT: mul w10, w10, w1 2041; NONEON-NOSVE-NEXT: lsr w13, w13, #16 2042; NONEON-NOSVE-NEXT: ldrh w22, [sp, #118] 2043; NONEON-NOSVE-NEXT: ldrh w24, [sp, #116] 2044; NONEON-NOSVE-NEXT: ldrh w26, [sp, #114] 2045; NONEON-NOSVE-NEXT: ldrh w27, [sp, #112] 2046; NONEON-NOSVE-NEXT: ldrh w28, [sp, #126] 2047; NONEON-NOSVE-NEXT: mul w9, w9, w17 2048; NONEON-NOSVE-NEXT: mul w21, w21, w22 2049; NONEON-NOSVE-NEXT: ldrh w22, [sp, #86] 2050; NONEON-NOSVE-NEXT: lsr w14, w14, #16 2051; NONEON-NOSVE-NEXT: mul w20, w20, w24 2052; NONEON-NOSVE-NEXT: ldrh w24, [sp, #120] 2053; NONEON-NOSVE-NEXT: lsr w12, w12, #16 2054; NONEON-NOSVE-NEXT: mul w19, w19, w26 2055; NONEON-NOSVE-NEXT: ldrh w26, [sp, #124] 2056; NONEON-NOSVE-NEXT: lsr w10, w10, #16 2057; NONEON-NOSVE-NEXT: mul w7, w7, w27 2058; NONEON-NOSVE-NEXT: ldrh w27, [sp, #122] 2059; NONEON-NOSVE-NEXT: lsr w21, w21, #16 2060; NONEON-NOSVE-NEXT: mul w5, w5, w28 2061; NONEON-NOSVE-NEXT: lsr w20, w20, #16 2062; NONEON-NOSVE-NEXT: lsr w9, w9, #16 2063; NONEON-NOSVE-NEXT: mul w4, w4, w26 2064; NONEON-NOSVE-NEXT: lsr w19, w19, #16 2065; NONEON-NOSVE-NEXT: strh w21, [sp, #158] 2066; NONEON-NOSVE-NEXT: mul w2, w2, w27 2067; NONEON-NOSVE-NEXT: lsr w7, w7, #16 2068; NONEON-NOSVE-NEXT: strh w20, [sp, #156] 2069; NONEON-NOSVE-NEXT: mul w18, w18, w24 2070; NONEON-NOSVE-NEXT: lsr w5, w5, #16 2071; NONEON-NOSVE-NEXT: strh w19, [sp, #154] 2072; NONEON-NOSVE-NEXT: mul w16, w16, w22 2073; NONEON-NOSVE-NEXT: lsr w4, w4, #16 2074; NONEON-NOSVE-NEXT: strh w7, [sp, #152] 2075; NONEON-NOSVE-NEXT: lsr w2, w2, #16 2076; NONEON-NOSVE-NEXT: strh w5, [sp, #150] 2077; NONEON-NOSVE-NEXT: lsr w18, w18, #16 2078; NONEON-NOSVE-NEXT: strh w4, [sp, #148] 2079; NONEON-NOSVE-NEXT: lsr w16, w16, #16 2080; NONEON-NOSVE-NEXT: strh w2, [sp, #146] 2081; NONEON-NOSVE-NEXT: strh w18, [sp, #144] 2082; NONEON-NOSVE-NEXT: ldp x20, x19, [sp, #224] // 16-byte Folded Reload 2083; NONEON-NOSVE-NEXT: strh w16, [sp, #142] 2084; NONEON-NOSVE-NEXT: ldp x22, x21, [sp, #208] // 16-byte Folded Reload 2085; NONEON-NOSVE-NEXT: strh w14, [sp, #140] 2086; NONEON-NOSVE-NEXT: ldp x24, x23, [sp, #192] // 16-byte Folded Reload 2087; NONEON-NOSVE-NEXT: strh w13, [sp, #138] 2088; NONEON-NOSVE-NEXT: ldp x26, x25, [sp, #176] // 16-byte Folded Reload 2089; NONEON-NOSVE-NEXT: strh w12, [sp, #136] 2090; NONEON-NOSVE-NEXT: ldp x28, x27, [sp, #160] // 16-byte Folded Reload 2091; NONEON-NOSVE-NEXT: strh w11, [sp, #134] 2092; NONEON-NOSVE-NEXT: strh w10, [sp, #132] 2093; NONEON-NOSVE-NEXT: strh w9, [sp, #130] 2094; NONEON-NOSVE-NEXT: strh w8, [sp, #128] 2095; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #128] 2096; NONEON-NOSVE-NEXT: stp q0, q1, [x0] 2097; NONEON-NOSVE-NEXT: add sp, sp, #240 2098; NONEON-NOSVE-NEXT: ret 2099 %op1 = load <16 x i16>, ptr %a 2100 %op2 = load <16 x i16>, ptr %b 2101 %1 = zext <16 x i16> %op1 to <16 x i32> 2102 %2 = zext <16 x i16> %op2 to <16 x i32> 2103 %mul = mul <16 x i32> %1, %2 2104 %shr = lshr <16 x i32> %mul, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 2105 %res = trunc <16 x i32> %shr to <16 x i16> 2106 store <16 x i16> %res, ptr %a 2107 ret void 2108} 2109 2110define <2 x i32> @umulh_v2i32(<2 x i32> %op1, <2 x i32> %op2) { 2111; SVE-LABEL: umulh_v2i32: 2112; SVE: // %bb.0: 2113; SVE-NEXT: ptrue p0.s, vl2 2114; SVE-NEXT: // kill: def $d0 killed $d0 def $z0 2115; SVE-NEXT: // kill: def $d1 killed $d1 def $z1 2116; SVE-NEXT: umulh z0.s, p0/m, z0.s, z1.s 2117; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0 2118; SVE-NEXT: ret 2119; 2120; SVE2-LABEL: umulh_v2i32: 2121; SVE2: // %bb.0: 2122; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0 2123; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1 2124; SVE2-NEXT: umulh z0.s, z0.s, z1.s 2125; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 2126; SVE2-NEXT: ret 2127; 2128; NONEON-NOSVE-LABEL: umulh_v2i32: 2129; NONEON-NOSVE: // %bb.0: 2130; NONEON-NOSVE-NEXT: sub sp, sp, #32 2131; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 2132; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #8] 2133; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #8] 2134; NONEON-NOSVE-NEXT: ldp w11, w10, [sp, #16] 2135; NONEON-NOSVE-NEXT: umull x9, w9, w10 2136; NONEON-NOSVE-NEXT: umull x8, w8, w11 2137; NONEON-NOSVE-NEXT: lsr x9, x9, #32 2138; NONEON-NOSVE-NEXT: lsr x8, x8, #32 2139; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #24] 2140; NONEON-NOSVE-NEXT: ldr d0, [sp, #24] 2141; NONEON-NOSVE-NEXT: add sp, sp, #32 2142; NONEON-NOSVE-NEXT: ret 2143 %1 = zext <2 x i32> %op1 to <2 x i64> 2144 %2 = zext <2 x i32> %op2 to <2 x i64> 2145 %mul = mul <2 x i64> %1, %2 2146 %shr = lshr <2 x i64> %mul, <i64 32, i64 32> 2147 %res = trunc <2 x i64> %shr to <2 x i32> 2148 ret <2 x i32> %res 2149} 2150 2151define <4 x i32> @umulh_v4i32(<4 x i32> %op1, <4 x i32> %op2) { 2152; SVE-LABEL: umulh_v4i32: 2153; SVE: // %bb.0: 2154; SVE-NEXT: ptrue p0.s, vl4 2155; SVE-NEXT: // kill: def $q0 killed $q0 def $z0 2156; SVE-NEXT: // kill: def $q1 killed $q1 def $z1 2157; SVE-NEXT: umulh z0.s, p0/m, z0.s, z1.s 2158; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0 2159; SVE-NEXT: ret 2160; 2161; SVE2-LABEL: umulh_v4i32: 2162; SVE2: // %bb.0: 2163; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0 2164; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1 2165; SVE2-NEXT: umulh z0.s, z0.s, z1.s 2166; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 2167; SVE2-NEXT: ret 2168; 2169; NONEON-NOSVE-LABEL: umulh_v4i32: 2170; NONEON-NOSVE: // %bb.0: 2171; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-80]! 2172; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 80 2173; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] 2174; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #32] 2175; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] 2176; NONEON-NOSVE-NEXT: ldp w10, w11, [sp, #32] 2177; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #40] 2178; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] 2179; NONEON-NOSVE-NEXT: ldp w13, w12, [sp, #48] 2180; NONEON-NOSVE-NEXT: umull x11, w11, w12 2181; NONEON-NOSVE-NEXT: ldp w12, w14, [sp, #56] 2182; NONEON-NOSVE-NEXT: umull x10, w10, w13 2183; NONEON-NOSVE-NEXT: lsr x11, x11, #32 2184; NONEON-NOSVE-NEXT: umull x9, w9, w14 2185; NONEON-NOSVE-NEXT: umull x8, w8, w12 2186; NONEON-NOSVE-NEXT: lsr x10, x10, #32 2187; NONEON-NOSVE-NEXT: lsr x9, x9, #32 2188; NONEON-NOSVE-NEXT: stp w10, w11, [sp, #72] 2189; NONEON-NOSVE-NEXT: lsr x8, x8, #32 2190; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #64] 2191; NONEON-NOSVE-NEXT: ldr q0, [sp, #64] 2192; NONEON-NOSVE-NEXT: add sp, sp, #80 2193; NONEON-NOSVE-NEXT: ret 2194 %1 = zext <4 x i32> %op1 to <4 x i64> 2195 %2 = zext <4 x i32> %op2 to <4 x i64> 2196 %mul = mul <4 x i64> %1, %2 2197 %shr = lshr <4 x i64> %mul, <i64 32, i64 32, i64 32, i64 32> 2198 %res = trunc <4 x i64> %shr to <4 x i32> 2199 ret <4 x i32> %res 2200} 2201 2202define void @umulh_v8i32(ptr %a, ptr %b) { 2203; SVE-LABEL: umulh_v8i32: 2204; SVE: // %bb.0: 2205; SVE-NEXT: ldp q0, q3, [x1] 2206; SVE-NEXT: ptrue p0.s, vl4 2207; SVE-NEXT: ldp q1, q2, [x0] 2208; SVE-NEXT: umulh z0.s, p0/m, z0.s, z1.s 2209; SVE-NEXT: movprfx z1, z2 2210; SVE-NEXT: umulh z1.s, p0/m, z1.s, z3.s 2211; SVE-NEXT: stp q0, q1, [x0] 2212; SVE-NEXT: ret 2213; 2214; SVE2-LABEL: umulh_v8i32: 2215; SVE2: // %bb.0: 2216; SVE2-NEXT: ldp q0, q3, [x1] 2217; SVE2-NEXT: ldp q1, q2, [x0] 2218; SVE2-NEXT: umulh z0.s, z1.s, z0.s 2219; SVE2-NEXT: umulh z1.s, z2.s, z3.s 2220; SVE2-NEXT: stp q0, q1, [x0] 2221; SVE2-NEXT: ret 2222; 2223; NONEON-NOSVE-LABEL: umulh_v8i32: 2224; NONEON-NOSVE: // %bb.0: 2225; NONEON-NOSVE-NEXT: sub sp, sp, #160 2226; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 160 2227; NONEON-NOSVE-NEXT: ldp q0, q1, [x0] 2228; NONEON-NOSVE-NEXT: ldp q3, q2, [x1] 2229; NONEON-NOSVE-NEXT: str q1, [sp, #32] 2230; NONEON-NOSVE-NEXT: stp q0, q3, [sp] 2231; NONEON-NOSVE-NEXT: ldp d1, d0, [sp] 2232; NONEON-NOSVE-NEXT: str q2, [sp, #64] 2233; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] 2234; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #32] 2235; NONEON-NOSVE-NEXT: ldp w8, w9, [sp, #56] 2236; NONEON-NOSVE-NEXT: ldp w10, w11, [sp, #48] 2237; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96] 2238; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #16] 2239; NONEON-NOSVE-NEXT: ldp w14, w15, [sp, #96] 2240; NONEON-NOSVE-NEXT: ldp w12, w13, [sp, #104] 2241; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #80] 2242; NONEON-NOSVE-NEXT: ldp d1, d0, [sp, #64] 2243; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #112] 2244; NONEON-NOSVE-NEXT: ldp w17, w16, [sp, #112] 2245; NONEON-NOSVE-NEXT: umull x15, w15, w16 2246; NONEON-NOSVE-NEXT: ldp w16, w18, [sp, #120] 2247; NONEON-NOSVE-NEXT: umull x14, w14, w17 2248; NONEON-NOSVE-NEXT: ldp w17, w1, [sp, #80] 2249; NONEON-NOSVE-NEXT: umull x13, w13, w18 2250; NONEON-NOSVE-NEXT: lsr x15, x15, #32 2251; NONEON-NOSVE-NEXT: umull x12, w12, w16 2252; NONEON-NOSVE-NEXT: lsr x14, x14, #32 2253; NONEON-NOSVE-NEXT: ldp w16, w18, [sp, #88] 2254; NONEON-NOSVE-NEXT: umull x11, w11, w1 2255; NONEON-NOSVE-NEXT: lsr x13, x13, #32 2256; NONEON-NOSVE-NEXT: stp w14, w15, [sp, #152] 2257; NONEON-NOSVE-NEXT: umull x10, w10, w17 2258; NONEON-NOSVE-NEXT: lsr x12, x12, #32 2259; NONEON-NOSVE-NEXT: umull x9, w9, w18 2260; NONEON-NOSVE-NEXT: umull x8, w8, w16 2261; NONEON-NOSVE-NEXT: lsr x11, x11, #32 2262; NONEON-NOSVE-NEXT: stp w12, w13, [sp, #144] 2263; NONEON-NOSVE-NEXT: lsr x10, x10, #32 2264; NONEON-NOSVE-NEXT: lsr x9, x9, #32 2265; NONEON-NOSVE-NEXT: lsr x8, x8, #32 2266; NONEON-NOSVE-NEXT: stp w10, w11, [sp, #136] 2267; NONEON-NOSVE-NEXT: stp w8, w9, [sp, #128] 2268; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #128] 2269; NONEON-NOSVE-NEXT: stp q0, q1, [x0] 2270; NONEON-NOSVE-NEXT: add sp, sp, #160 2271; NONEON-NOSVE-NEXT: ret 2272 %op1 = load <8 x i32>, ptr %a 2273 %op2 = load <8 x i32>, ptr %b 2274 %insert = insertelement <8 x i64> undef, i64 32, i64 0 2275 %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer 2276 %1 = zext <8 x i32> %op1 to <8 x i64> 2277 %2 = zext <8 x i32> %op2 to <8 x i64> 2278 %mul = mul <8 x i64> %1, %2 2279 %shr = lshr <8 x i64> %mul, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32> 2280 %res = trunc <8 x i64> %shr to <8 x i32> 2281 store <8 x i32> %res, ptr %a 2282 ret void 2283} 2284 2285define <1 x i64> @umulh_v1i64(<1 x i64> %op1, <1 x i64> %op2) { 2286; SVE-LABEL: umulh_v1i64: 2287; SVE: // %bb.0: 2288; SVE-NEXT: ptrue p0.d, vl1 2289; SVE-NEXT: // kill: def $d0 killed $d0 def $z0 2290; SVE-NEXT: // kill: def $d1 killed $d1 def $z1 2291; SVE-NEXT: umulh z0.d, p0/m, z0.d, z1.d 2292; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0 2293; SVE-NEXT: ret 2294; 2295; SVE2-LABEL: umulh_v1i64: 2296; SVE2: // %bb.0: 2297; SVE2-NEXT: // kill: def $d0 killed $d0 def $z0 2298; SVE2-NEXT: // kill: def $d1 killed $d1 def $z1 2299; SVE2-NEXT: umulh z0.d, z0.d, z1.d 2300; SVE2-NEXT: // kill: def $d0 killed $d0 killed $z0 2301; SVE2-NEXT: ret 2302; 2303; NONEON-NOSVE-LABEL: umulh_v1i64: 2304; NONEON-NOSVE: // %bb.0: 2305; NONEON-NOSVE-NEXT: sub sp, sp, #16 2306; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 2307; NONEON-NOSVE-NEXT: fmov x8, d0 2308; NONEON-NOSVE-NEXT: fmov x9, d1 2309; NONEON-NOSVE-NEXT: umulh x8, x8, x9 2310; NONEON-NOSVE-NEXT: str x8, [sp, #8] 2311; NONEON-NOSVE-NEXT: ldr d0, [sp, #8] 2312; NONEON-NOSVE-NEXT: add sp, sp, #16 2313; NONEON-NOSVE-NEXT: ret 2314 %1 = zext <1 x i64> %op1 to <1 x i128> 2315 %2 = zext <1 x i64> %op2 to <1 x i128> 2316 %mul = mul <1 x i128> %1, %2 2317 %shr = lshr <1 x i128> %mul, <i128 64> 2318 %res = trunc <1 x i128> %shr to <1 x i64> 2319 ret <1 x i64> %res 2320} 2321 2322define <2 x i64> @umulh_v2i64(<2 x i64> %op1, <2 x i64> %op2) { 2323; SVE-LABEL: umulh_v2i64: 2324; SVE: // %bb.0: 2325; SVE-NEXT: ptrue p0.d, vl2 2326; SVE-NEXT: // kill: def $q0 killed $q0 def $z0 2327; SVE-NEXT: // kill: def $q1 killed $q1 def $z1 2328; SVE-NEXT: umulh z0.d, p0/m, z0.d, z1.d 2329; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0 2330; SVE-NEXT: ret 2331; 2332; SVE2-LABEL: umulh_v2i64: 2333; SVE2: // %bb.0: 2334; SVE2-NEXT: // kill: def $q0 killed $q0 def $z0 2335; SVE2-NEXT: // kill: def $q1 killed $q1 def $z1 2336; SVE2-NEXT: umulh z0.d, z0.d, z1.d 2337; SVE2-NEXT: // kill: def $q0 killed $q0 killed $z0 2338; SVE2-NEXT: ret 2339; 2340; NONEON-NOSVE-LABEL: umulh_v2i64: 2341; NONEON-NOSVE: // %bb.0: 2342; NONEON-NOSVE-NEXT: stp q0, q1, [sp, #-64]! 2343; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 64 2344; NONEON-NOSVE-NEXT: ldp x9, x8, [sp] 2345; NONEON-NOSVE-NEXT: ldp x11, x10, [sp, #16] 2346; NONEON-NOSVE-NEXT: umulh x8, x8, x10 2347; NONEON-NOSVE-NEXT: umulh x9, x9, x11 2348; NONEON-NOSVE-NEXT: stp x9, x8, [sp, #32] 2349; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #32] 2350; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #48] 2351; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] 2352; NONEON-NOSVE-NEXT: add sp, sp, #64 2353; NONEON-NOSVE-NEXT: ret 2354 %1 = zext <2 x i64> %op1 to <2 x i128> 2355 %2 = zext <2 x i64> %op2 to <2 x i128> 2356 %mul = mul <2 x i128> %1, %2 2357 %shr = lshr <2 x i128> %mul, <i128 64, i128 64> 2358 %res = trunc <2 x i128> %shr to <2 x i64> 2359 ret <2 x i64> %res 2360} 2361 2362define void @umulh_v4i64(ptr %a, ptr %b) { 2363; SVE-LABEL: umulh_v4i64: 2364; SVE: // %bb.0: 2365; SVE-NEXT: ldp q0, q3, [x1] 2366; SVE-NEXT: ptrue p0.d, vl2 2367; SVE-NEXT: ldp q1, q2, [x0] 2368; SVE-NEXT: umulh z0.d, p0/m, z0.d, z1.d 2369; SVE-NEXT: movprfx z1, z2 2370; SVE-NEXT: umulh z1.d, p0/m, z1.d, z3.d 2371; SVE-NEXT: stp q0, q1, [x0] 2372; SVE-NEXT: ret 2373; 2374; SVE2-LABEL: umulh_v4i64: 2375; SVE2: // %bb.0: 2376; SVE2-NEXT: ldp q0, q3, [x1] 2377; SVE2-NEXT: ldp q1, q2, [x0] 2378; SVE2-NEXT: umulh z0.d, z1.d, z0.d 2379; SVE2-NEXT: umulh z1.d, z2.d, z3.d 2380; SVE2-NEXT: stp q0, q1, [x0] 2381; SVE2-NEXT: ret 2382; 2383; NONEON-NOSVE-LABEL: umulh_v4i64: 2384; NONEON-NOSVE: // %bb.0: 2385; NONEON-NOSVE-NEXT: sub sp, sp, #128 2386; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 128 2387; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] 2388; NONEON-NOSVE-NEXT: ldp q2, q3, [x1] 2389; NONEON-NOSVE-NEXT: stp q1, q2, [sp] 2390; NONEON-NOSVE-NEXT: ldp x11, x10, [sp] 2391; NONEON-NOSVE-NEXT: stp q0, q3, [sp, #32] 2392; NONEON-NOSVE-NEXT: ldp x13, x12, [sp, #16] 2393; NONEON-NOSVE-NEXT: ldp x9, x8, [sp, #32] 2394; NONEON-NOSVE-NEXT: umulh x10, x10, x12 2395; NONEON-NOSVE-NEXT: ldp x14, x12, [sp, #48] 2396; NONEON-NOSVE-NEXT: umulh x11, x11, x13 2397; NONEON-NOSVE-NEXT: umulh x8, x8, x12 2398; NONEON-NOSVE-NEXT: umulh x9, x9, x14 2399; NONEON-NOSVE-NEXT: stp x11, x10, [sp, #64] 2400; NONEON-NOSVE-NEXT: stp x9, x8, [sp, #80] 2401; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #80] 2402; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #112] 2403; NONEON-NOSVE-NEXT: ldp d0, d1, [sp, #64] 2404; NONEON-NOSVE-NEXT: stp d0, d1, [sp, #96] 2405; NONEON-NOSVE-NEXT: ldp q0, q1, [sp, #96] 2406; NONEON-NOSVE-NEXT: stp q0, q1, [x0] 2407; NONEON-NOSVE-NEXT: add sp, sp, #128 2408; NONEON-NOSVE-NEXT: ret 2409 %op1 = load <4 x i64>, ptr %a 2410 %op2 = load <4 x i64>, ptr %b 2411 %1 = zext <4 x i64> %op1 to <4 x i128> 2412 %2 = zext <4 x i64> %op2 to <4 x i128> 2413 %mul = mul <4 x i128> %1, %2 2414 %shr = lshr <4 x i128> %mul, <i128 64, i128 64, i128 64, i128 64> 2415 %res = trunc <4 x i128> %shr to <4 x i64> 2416 store <4 x i64> %res, ptr %a 2417 ret void 2418} 2419;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 2420; CHECK: {{.*}} 2421