1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s 3 4define arm_aapcs_vfpcc <2 x i64> @sext32_0246_0ext(<4 x i32> %src1, i32 %src2) { 5; CHECK-LABEL: sext32_0246_0ext: 6; CHECK: @ %bb.0: @ %entry 7; CHECK-NEXT: vmov q2[2], q2[0], r0, r0 8; CHECK-NEXT: vmullb.s32 q1, q0, q2 9; CHECK-NEXT: vmov q0, q1 10; CHECK-NEXT: bx lr 11entry: 12 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2> 13 %out1 = sext <2 x i32> %shuf1 to <2 x i64> 14 %ins = insertelement <4 x i32> poison, i32 %src2, i32 0 15 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer 16 %out2 = sext <2 x i32> %shuf2 to <2 x i64> 17 %out = mul <2 x i64> %out1, %out2 18 ret <2 x i64> %out 19} 20 21define arm_aapcs_vfpcc <2 x i64> @sext32_0ext_0246(<4 x i32> %src1, i32 %src2) { 22; CHECK-LABEL: sext32_0ext_0246: 23; CHECK: @ %bb.0: @ %entry 24; CHECK-NEXT: vmov q2[2], q2[0], r0, r0 25; CHECK-NEXT: vmullb.s32 q1, q2, q0 26; CHECK-NEXT: vmov q0, q1 27; CHECK-NEXT: bx lr 28entry: 29 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2> 30 %out1 = sext <2 x i32> %shuf1 to <2 x i64> 31 %ins = insertelement <4 x i32> poison, i32 %src2, i32 0 32 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer 33 %out2 = sext <2 x i32> %shuf2 to <2 x i64> 34 %out = mul <2 x i64> %out2, %out1 35 ret <2 x i64> %out 36} 37 38define arm_aapcs_vfpcc <2 x i64> @sext32_0246_ext0(<4 x i32> %src1, i32 %src2) { 39; CHECK-LABEL: sext32_0246_ext0: 40; CHECK: @ %bb.0: @ %entry 41; CHECK-NEXT: .save {r4, r5, r7, lr} 42; CHECK-NEXT: push {r4, r5, r7, lr} 43; CHECK-NEXT: vmov r1, s2 44; CHECK-NEXT: vmov r3, s0 45; CHECK-NEXT: umull lr, r12, r1, r0 46; CHECK-NEXT: umull r2, r5, r3, r0 47; CHECK-NEXT: vmov q0[2], q0[0], r2, lr 48; CHECK-NEXT: asrs r2, r0, #31 49; CHECK-NEXT: mla r4, r1, r2, r12 50; CHECK-NEXT: asrs r1, r1, #31 51; CHECK-NEXT: mla r2, r3, r2, r5 52; CHECK-NEXT: asrs r3, r3, #31 53; CHECK-NEXT: mla r1, r1, r0, r4 54; CHECK-NEXT: mla r0, r3, r0, r2 55; CHECK-NEXT: vmov q0[3], q0[1], r0, r1 56; CHECK-NEXT: pop {r4, r5, r7, pc} 57entry: 58 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2> 59 %out1 = sext <2 x i32> %shuf1 to <2 x i64> 60 %ext = sext i32 %src2 to i64 61 %ins = insertelement <2 x i64> poison, i64 %ext, i32 0 62 %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer 63 %out = mul <2 x i64> %out1, %shuf2 64 ret <2 x i64> %out 65} 66 67define arm_aapcs_vfpcc <2 x i64> @sext32_ext0_0246(<4 x i32> %src1, i32 %src2) { 68; CHECK-LABEL: sext32_ext0_0246: 69; CHECK: @ %bb.0: @ %entry 70; CHECK-NEXT: .save {r4, r5, r7, lr} 71; CHECK-NEXT: push {r4, r5, r7, lr} 72; CHECK-NEXT: vmov r1, s2 73; CHECK-NEXT: asrs r4, r0, #31 74; CHECK-NEXT: vmov r3, s0 75; CHECK-NEXT: umull lr, r12, r0, r1 76; CHECK-NEXT: umull r2, r5, r0, r3 77; CHECK-NEXT: vmov q0[2], q0[0], r2, lr 78; CHECK-NEXT: asrs r2, r1, #31 79; CHECK-NEXT: mla r2, r0, r2, r12 80; CHECK-NEXT: mla r1, r4, r1, r2 81; CHECK-NEXT: asrs r2, r3, #31 82; CHECK-NEXT: mla r0, r0, r2, r5 83; CHECK-NEXT: mla r0, r4, r3, r0 84; CHECK-NEXT: vmov q0[3], q0[1], r0, r1 85; CHECK-NEXT: pop {r4, r5, r7, pc} 86entry: 87 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2> 88 %out1 = sext <2 x i32> %shuf1 to <2 x i64> 89 %ext = sext i32 %src2 to i64 90 %ins = insertelement <2 x i64> poison, i64 %ext, i32 0 91 %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer 92 %out = mul <2 x i64> %shuf2, %out1 93 ret <2 x i64> %out 94} 95 96define arm_aapcs_vfpcc <2 x i64> @sext32_1357_0ext(<4 x i32> %src1, i32 %src2) { 97; CHECK-LABEL: sext32_1357_0ext: 98; CHECK: @ %bb.0: @ %entry 99; CHECK-NEXT: vmov q1[2], q1[0], r0, r0 100; CHECK-NEXT: vrev64.32 q2, q0 101; CHECK-NEXT: vmullb.s32 q0, q2, q1 102; CHECK-NEXT: bx lr 103entry: 104 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3> 105 %out1 = sext <2 x i32> %shuf1 to <2 x i64> 106 %ins = insertelement <4 x i32> poison, i32 %src2, i32 0 107 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer 108 %out2 = sext <2 x i32> %shuf2 to <2 x i64> 109 %out = mul <2 x i64> %out1, %out2 110 ret <2 x i64> %out 111} 112 113define arm_aapcs_vfpcc <2 x i64> @sext32_0ext_1357(<4 x i32> %src1, i32 %src2) { 114; CHECK-LABEL: sext32_0ext_1357: 115; CHECK: @ %bb.0: @ %entry 116; CHECK-NEXT: vrev64.32 q1, q0 117; CHECK-NEXT: vmov q2[2], q2[0], r0, r0 118; CHECK-NEXT: vmullb.s32 q0, q2, q1 119; CHECK-NEXT: bx lr 120entry: 121 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3> 122 %out1 = sext <2 x i32> %shuf1 to <2 x i64> 123 %ins = insertelement <4 x i32> poison, i32 %src2, i32 0 124 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer 125 %out2 = sext <2 x i32> %shuf2 to <2 x i64> 126 %out = mul <2 x i64> %out2, %out1 127 ret <2 x i64> %out 128} 129 130define arm_aapcs_vfpcc <2 x i64> @sext32_1357_ext0(<4 x i32> %src1, i32 %src2) { 131; CHECK-LABEL: sext32_1357_ext0: 132; CHECK: @ %bb.0: @ %entry 133; CHECK-NEXT: .save {r4, r5, r7, lr} 134; CHECK-NEXT: push {r4, r5, r7, lr} 135; CHECK-NEXT: vrev64.32 q1, q0 136; CHECK-NEXT: vmov r1, s6 137; CHECK-NEXT: vmov r3, s4 138; CHECK-NEXT: umull lr, r12, r1, r0 139; CHECK-NEXT: umull r2, r5, r3, r0 140; CHECK-NEXT: vmov q0[2], q0[0], r2, lr 141; CHECK-NEXT: asrs r2, r0, #31 142; CHECK-NEXT: mla r4, r1, r2, r12 143; CHECK-NEXT: asrs r1, r1, #31 144; CHECK-NEXT: mla r2, r3, r2, r5 145; CHECK-NEXT: asrs r3, r3, #31 146; CHECK-NEXT: mla r1, r1, r0, r4 147; CHECK-NEXT: mla r0, r3, r0, r2 148; CHECK-NEXT: vmov q0[3], q0[1], r0, r1 149; CHECK-NEXT: pop {r4, r5, r7, pc} 150entry: 151 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3> 152 %out1 = sext <2 x i32> %shuf1 to <2 x i64> 153 %ext = sext i32 %src2 to i64 154 %ins = insertelement <2 x i64> poison, i64 %ext, i32 0 155 %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer 156 %out = mul <2 x i64> %out1, %shuf2 157 ret <2 x i64> %out 158} 159 160define arm_aapcs_vfpcc <2 x i64> @sext32_ext0_1357(<4 x i32> %src1, i32 %src2) { 161; CHECK-LABEL: sext32_ext0_1357: 162; CHECK: @ %bb.0: @ %entry 163; CHECK-NEXT: .save {r4, r5, r7, lr} 164; CHECK-NEXT: push {r4, r5, r7, lr} 165; CHECK-NEXT: vrev64.32 q1, q0 166; CHECK-NEXT: asrs r4, r0, #31 167; CHECK-NEXT: vmov r1, s6 168; CHECK-NEXT: vmov r3, s4 169; CHECK-NEXT: umull lr, r12, r0, r1 170; CHECK-NEXT: umull r2, r5, r0, r3 171; CHECK-NEXT: vmov q0[2], q0[0], r2, lr 172; CHECK-NEXT: asrs r2, r1, #31 173; CHECK-NEXT: mla r2, r0, r2, r12 174; CHECK-NEXT: mla r1, r4, r1, r2 175; CHECK-NEXT: asrs r2, r3, #31 176; CHECK-NEXT: mla r0, r0, r2, r5 177; CHECK-NEXT: mla r0, r4, r3, r0 178; CHECK-NEXT: vmov q0[3], q0[1], r0, r1 179; CHECK-NEXT: pop {r4, r5, r7, pc} 180entry: 181 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3> 182 %out1 = sext <2 x i32> %shuf1 to <2 x i64> 183 %ext = sext i32 %src2 to i64 184 %ins = insertelement <2 x i64> poison, i64 %ext, i32 0 185 %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer 186 %out = mul <2 x i64> %shuf2, %out1 187 ret <2 x i64> %out 188} 189 190define arm_aapcs_vfpcc <4 x i64> @sext32_0213_0ext(<8 x i32> %src1, i32 %src2) { 191; CHECK-LABEL: sext32_0213_0ext: 192; CHECK: @ %bb.0: @ %entry 193; CHECK-NEXT: .vsave {d8, d9} 194; CHECK-NEXT: vpush {d8, d9} 195; CHECK-NEXT: vmov q4, q0 196; CHECK-NEXT: vmov q3[2], q3[0], r0, r0 197; CHECK-NEXT: vmov.f32 s17, s4 198; CHECK-NEXT: vmov.f32 s0, s1 199; CHECK-NEXT: vmullb.s32 q2, q4, q3 200; CHECK-NEXT: vmov.f32 s2, s3 201; CHECK-NEXT: vmullb.s32 q1, q0, q3 202; CHECK-NEXT: vmov q0, q2 203; CHECK-NEXT: vpop {d8, d9} 204; CHECK-NEXT: bx lr 205entry: 206 %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3> 207 %out1 = sext <4 x i32> %shuf1 to <4 x i64> 208 %ins = insertelement <8 x i32> poison, i32 %src2, i32 0 209 %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <4 x i32> zeroinitializer 210 %out2 = sext <4 x i32> %shuf2 to <4 x i64> 211 %out = mul <4 x i64> %out1, %out2 212 ret <4 x i64> %out 213} 214 215define arm_aapcs_vfpcc <4 x i64> @sext32_0ext_0213(<8 x i32> %src1, i32 %src2) { 216; CHECK-LABEL: sext32_0ext_0213: 217; CHECK: @ %bb.0: @ %entry 218; CHECK-NEXT: .vsave {d8, d9} 219; CHECK-NEXT: vpush {d8, d9} 220; CHECK-NEXT: vmov q4, q0 221; CHECK-NEXT: vmov q3[2], q3[0], r0, r0 222; CHECK-NEXT: vmov.f32 s17, s4 223; CHECK-NEXT: vmov.f32 s0, s1 224; CHECK-NEXT: vmullb.s32 q2, q3, q4 225; CHECK-NEXT: vmov.f32 s2, s3 226; CHECK-NEXT: vmullb.s32 q1, q3, q0 227; CHECK-NEXT: vmov q0, q2 228; CHECK-NEXT: vpop {d8, d9} 229; CHECK-NEXT: bx lr 230entry: 231 %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3> 232 %out1 = sext <4 x i32> %shuf1 to <4 x i64> 233 %ins = insertelement <8 x i32> poison, i32 %src2, i32 0 234 %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <4 x i32> zeroinitializer 235 %out2 = sext <4 x i32> %shuf2 to <4 x i64> 236 %out = mul <4 x i64> %out2, %out1 237 ret <4 x i64> %out 238} 239 240define arm_aapcs_vfpcc <4 x i64> @sext32_0213_ext0(<8 x i32> %src1, i32 %src2) { 241; CHECK-LABEL: sext32_0213_ext0: 242; CHECK: @ %bb.0: @ %entry 243; CHECK-NEXT: .save {r4, r5, r7, lr} 244; CHECK-NEXT: push {r4, r5, r7, lr} 245; CHECK-NEXT: vmov.f32 s4, s1 246; CHECK-NEXT: vmov.f32 s6, s3 247; CHECK-NEXT: vmov r3, s4 248; CHECK-NEXT: vmov r1, s6 249; CHECK-NEXT: umull r2, r5, r3, r0 250; CHECK-NEXT: umull lr, r12, r1, r0 251; CHECK-NEXT: vmov q1[2], q1[0], r2, lr 252; CHECK-NEXT: asrs r2, r0, #31 253; CHECK-NEXT: mla r4, r1, r2, r12 254; CHECK-NEXT: asrs r1, r1, #31 255; CHECK-NEXT: mla r5, r3, r2, r5 256; CHECK-NEXT: asrs r3, r3, #31 257; CHECK-NEXT: mla r1, r1, r0, r4 258; CHECK-NEXT: mla r3, r3, r0, r5 259; CHECK-NEXT: vmov q1[3], q1[1], r3, r1 260; CHECK-NEXT: vmov r1, s2 261; CHECK-NEXT: umull r3, r5, r1, r0 262; CHECK-NEXT: mla r5, r1, r2, r5 263; CHECK-NEXT: asrs r1, r1, #31 264; CHECK-NEXT: mla r12, r1, r0, r5 265; CHECK-NEXT: vmov r5, s0 266; CHECK-NEXT: umull r4, r1, r5, r0 267; CHECK-NEXT: mla r1, r5, r2, r1 268; CHECK-NEXT: asrs r2, r5, #31 269; CHECK-NEXT: vmov q0[2], q0[0], r4, r3 270; CHECK-NEXT: mla r0, r2, r0, r1 271; CHECK-NEXT: vmov q0[3], q0[1], r0, r12 272; CHECK-NEXT: pop {r4, r5, r7, pc} 273entry: 274 %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3> 275 %out1 = sext <4 x i32> %shuf1 to <4 x i64> 276 %ext = sext i32 %src2 to i64 277 %ins = insertelement <4 x i64> poison, i64 %ext, i32 0 278 %shuf2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer 279 %out = mul <4 x i64> %out1, %shuf2 280 ret <4 x i64> %out 281} 282 283define arm_aapcs_vfpcc <4 x i64> @sext32_ext0_0213(<8 x i32> %src1, i32 %src2) { 284; CHECK-LABEL: sext32_ext0_0213: 285; CHECK: @ %bb.0: @ %entry 286; CHECK-NEXT: .save {r4, r5, r7, lr} 287; CHECK-NEXT: push {r4, r5, r7, lr} 288; CHECK-NEXT: vmov.f32 s4, s1 289; CHECK-NEXT: asrs r4, r0, #31 290; CHECK-NEXT: vmov.f32 s6, s3 291; CHECK-NEXT: vmov r3, s4 292; CHECK-NEXT: vmov r1, s6 293; CHECK-NEXT: umull r2, r5, r0, r3 294; CHECK-NEXT: umull lr, r12, r0, r1 295; CHECK-NEXT: vmov q1[2], q1[0], r2, lr 296; CHECK-NEXT: asrs r2, r1, #31 297; CHECK-NEXT: mla r2, r0, r2, r12 298; CHECK-NEXT: mla r1, r4, r1, r2 299; CHECK-NEXT: asrs r2, r3, #31 300; CHECK-NEXT: mla r2, r0, r2, r5 301; CHECK-NEXT: mla r2, r4, r3, r2 302; CHECK-NEXT: vmov q1[3], q1[1], r2, r1 303; CHECK-NEXT: vmov r1, s2 304; CHECK-NEXT: umull r2, r3, r0, r1 305; CHECK-NEXT: asrs r5, r1, #31 306; CHECK-NEXT: mla r3, r0, r5, r3 307; CHECK-NEXT: mla r12, r4, r1, r3 308; CHECK-NEXT: vmov r3, s0 309; CHECK-NEXT: umull r5, r1, r0, r3 310; CHECK-NEXT: vmov q0[2], q0[0], r5, r2 311; CHECK-NEXT: asrs r2, r3, #31 312; CHECK-NEXT: mla r0, r0, r2, r1 313; CHECK-NEXT: mla r0, r4, r3, r0 314; CHECK-NEXT: vmov q0[3], q0[1], r0, r12 315; CHECK-NEXT: pop {r4, r5, r7, pc} 316entry: 317 %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3> 318 %out1 = sext <4 x i32> %shuf1 to <4 x i64> 319 %ext = sext i32 %src2 to i64 320 %ins = insertelement <4 x i64> poison, i64 %ext, i32 0 321 %shuf2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer 322 %out = mul <4 x i64> %shuf2, %out1 323 ret <4 x i64> %out 324} 325 326define arm_aapcs_vfpcc <2 x i64> @zext32_0246_0ext(<4 x i32> %src1, i32 %src2) { 327; CHECK-LABEL: zext32_0246_0ext: 328; CHECK: @ %bb.0: @ %entry 329; CHECK-NEXT: vmov q2[2], q2[0], r0, r0 330; CHECK-NEXT: vmullb.u32 q1, q0, q2 331; CHECK-NEXT: vmov q0, q1 332; CHECK-NEXT: bx lr 333entry: 334 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2> 335 %out1 = zext <2 x i32> %shuf1 to <2 x i64> 336 %ins = insertelement <4 x i32> poison, i32 %src2, i32 0 337 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer 338 %out2 = zext <2 x i32> %shuf2 to <2 x i64> 339 %out = mul <2 x i64> %out1, %out2 340 ret <2 x i64> %out 341} 342 343define arm_aapcs_vfpcc <2 x i64> @zext32_0ext_0246(<4 x i32> %src1, i32 %src2) { 344; CHECK-LABEL: zext32_0ext_0246: 345; CHECK: @ %bb.0: @ %entry 346; CHECK-NEXT: vmov q2[2], q2[0], r0, r0 347; CHECK-NEXT: vmullb.u32 q1, q2, q0 348; CHECK-NEXT: vmov q0, q1 349; CHECK-NEXT: bx lr 350entry: 351 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2> 352 %out1 = zext <2 x i32> %shuf1 to <2 x i64> 353 %ins = insertelement <4 x i32> poison, i32 %src2, i32 0 354 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer 355 %out2 = zext <2 x i32> %shuf2 to <2 x i64> 356 %out = mul <2 x i64> %out2, %out1 357 ret <2 x i64> %out 358} 359 360define arm_aapcs_vfpcc <2 x i64> @zext32_0246_ext0(<4 x i32> %src1, i32 %src2) { 361; CHECK-LABEL: zext32_0246_ext0: 362; CHECK: @ %bb.0: @ %entry 363; CHECK-NEXT: vmov r1, s2 364; CHECK-NEXT: vmov r3, s0 365; CHECK-NEXT: umull r1, r2, r1, r0 366; CHECK-NEXT: umull r0, r3, r3, r0 367; CHECK-NEXT: vmov q0[2], q0[0], r0, r1 368; CHECK-NEXT: vmov q0[3], q0[1], r3, r2 369; CHECK-NEXT: bx lr 370entry: 371 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2> 372 %out1 = zext <2 x i32> %shuf1 to <2 x i64> 373 %ext = zext i32 %src2 to i64 374 %ins = insertelement <2 x i64> poison, i64 %ext, i32 0 375 %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer 376 %out = mul <2 x i64> %out1, %shuf2 377 ret <2 x i64> %out 378} 379 380define arm_aapcs_vfpcc <2 x i64> @zext32_ext0_0246(<4 x i32> %src1, i32 %src2) { 381; CHECK-LABEL: zext32_ext0_0246: 382; CHECK: @ %bb.0: @ %entry 383; CHECK-NEXT: vmov r1, s2 384; CHECK-NEXT: vmov r3, s0 385; CHECK-NEXT: umull r1, r2, r0, r1 386; CHECK-NEXT: umull r0, r3, r0, r3 387; CHECK-NEXT: vmov q0[2], q0[0], r0, r1 388; CHECK-NEXT: vmov q0[3], q0[1], r3, r2 389; CHECK-NEXT: bx lr 390entry: 391 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 0, i32 2> 392 %out1 = zext <2 x i32> %shuf1 to <2 x i64> 393 %ext = zext i32 %src2 to i64 394 %ins = insertelement <2 x i64> poison, i64 %ext, i32 0 395 %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer 396 %out = mul <2 x i64> %shuf2, %out1 397 ret <2 x i64> %out 398} 399 400define arm_aapcs_vfpcc <2 x i64> @zext32_1357_0ext(<4 x i32> %src1, i32 %src2) { 401; CHECK-LABEL: zext32_1357_0ext: 402; CHECK: @ %bb.0: @ %entry 403; CHECK-NEXT: vmov q1[2], q1[0], r0, r0 404; CHECK-NEXT: vrev64.32 q2, q0 405; CHECK-NEXT: vmullb.u32 q0, q2, q1 406; CHECK-NEXT: bx lr 407entry: 408 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3> 409 %out1 = zext <2 x i32> %shuf1 to <2 x i64> 410 %ins = insertelement <4 x i32> poison, i32 %src2, i32 0 411 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer 412 %out2 = zext <2 x i32> %shuf2 to <2 x i64> 413 %out = mul <2 x i64> %out1, %out2 414 ret <2 x i64> %out 415} 416 417define arm_aapcs_vfpcc <2 x i64> @zext32_0ext_1357(<4 x i32> %src1, i32 %src2) { 418; CHECK-LABEL: zext32_0ext_1357: 419; CHECK: @ %bb.0: @ %entry 420; CHECK-NEXT: vrev64.32 q1, q0 421; CHECK-NEXT: vmov q2[2], q2[0], r0, r0 422; CHECK-NEXT: vmullb.u32 q0, q2, q1 423; CHECK-NEXT: bx lr 424entry: 425 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3> 426 %out1 = zext <2 x i32> %shuf1 to <2 x i64> 427 %ins = insertelement <4 x i32> poison, i32 %src2, i32 0 428 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <2 x i32> zeroinitializer 429 %out2 = zext <2 x i32> %shuf2 to <2 x i64> 430 %out = mul <2 x i64> %out2, %out1 431 ret <2 x i64> %out 432} 433 434define arm_aapcs_vfpcc <2 x i64> @zext32_1357_ext0(<4 x i32> %src1, i32 %src2) { 435; CHECK-LABEL: zext32_1357_ext0: 436; CHECK: @ %bb.0: @ %entry 437; CHECK-NEXT: vrev64.32 q1, q0 438; CHECK-NEXT: vmov r1, s6 439; CHECK-NEXT: vmov r3, s4 440; CHECK-NEXT: umull r1, r2, r1, r0 441; CHECK-NEXT: umull r0, r3, r3, r0 442; CHECK-NEXT: vmov q0[2], q0[0], r0, r1 443; CHECK-NEXT: vmov q0[3], q0[1], r3, r2 444; CHECK-NEXT: bx lr 445entry: 446 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3> 447 %out1 = zext <2 x i32> %shuf1 to <2 x i64> 448 %ext = zext i32 %src2 to i64 449 %ins = insertelement <2 x i64> poison, i64 %ext, i32 0 450 %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer 451 %out = mul <2 x i64> %out1, %shuf2 452 ret <2 x i64> %out 453} 454 455define arm_aapcs_vfpcc <2 x i64> @zext32_ext0_1357(<4 x i32> %src1, i32 %src2) { 456; CHECK-LABEL: zext32_ext0_1357: 457; CHECK: @ %bb.0: @ %entry 458; CHECK-NEXT: vrev64.32 q1, q0 459; CHECK-NEXT: vmov r1, s6 460; CHECK-NEXT: vmov r3, s4 461; CHECK-NEXT: umull r1, r2, r0, r1 462; CHECK-NEXT: umull r0, r3, r0, r3 463; CHECK-NEXT: vmov q0[2], q0[0], r0, r1 464; CHECK-NEXT: vmov q0[3], q0[1], r3, r2 465; CHECK-NEXT: bx lr 466entry: 467 %shuf1 = shufflevector <4 x i32> %src1, <4 x i32> undef, <2 x i32> <i32 1, i32 3> 468 %out1 = zext <2 x i32> %shuf1 to <2 x i64> 469 %ext = zext i32 %src2 to i64 470 %ins = insertelement <2 x i64> poison, i64 %ext, i32 0 471 %shuf2 = shufflevector <2 x i64> %ins, <2 x i64> undef, <2 x i32> zeroinitializer 472 %out = mul <2 x i64> %shuf2, %out1 473 ret <2 x i64> %out 474} 475 476define arm_aapcs_vfpcc <4 x i64> @zext32_0213_0ext(<8 x i32> %src1, i32 %src2) { 477; CHECK-LABEL: zext32_0213_0ext: 478; CHECK: @ %bb.0: @ %entry 479; CHECK-NEXT: .vsave {d8, d9} 480; CHECK-NEXT: vpush {d8, d9} 481; CHECK-NEXT: vmov q4, q0 482; CHECK-NEXT: vmov q3[2], q3[0], r0, r0 483; CHECK-NEXT: vmov.f32 s17, s4 484; CHECK-NEXT: vmov.f32 s0, s1 485; CHECK-NEXT: vmullb.u32 q2, q4, q3 486; CHECK-NEXT: vmov.f32 s2, s3 487; CHECK-NEXT: vmullb.u32 q1, q0, q3 488; CHECK-NEXT: vmov q0, q2 489; CHECK-NEXT: vpop {d8, d9} 490; CHECK-NEXT: bx lr 491entry: 492 %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3> 493 %out1 = zext <4 x i32> %shuf1 to <4 x i64> 494 %ins = insertelement <8 x i32> poison, i32 %src2, i32 0 495 %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <4 x i32> zeroinitializer 496 %out2 = zext <4 x i32> %shuf2 to <4 x i64> 497 %out = mul <4 x i64> %out1, %out2 498 ret <4 x i64> %out 499} 500 501define arm_aapcs_vfpcc <4 x i64> @zext32_0ext_0213(<8 x i32> %src1, i32 %src2) { 502; CHECK-LABEL: zext32_0ext_0213: 503; CHECK: @ %bb.0: @ %entry 504; CHECK-NEXT: .vsave {d8, d9} 505; CHECK-NEXT: vpush {d8, d9} 506; CHECK-NEXT: vmov q4, q0 507; CHECK-NEXT: vmov q3[2], q3[0], r0, r0 508; CHECK-NEXT: vmov.f32 s17, s4 509; CHECK-NEXT: vmov.f32 s0, s1 510; CHECK-NEXT: vmullb.u32 q2, q3, q4 511; CHECK-NEXT: vmov.f32 s2, s3 512; CHECK-NEXT: vmullb.u32 q1, q3, q0 513; CHECK-NEXT: vmov q0, q2 514; CHECK-NEXT: vpop {d8, d9} 515; CHECK-NEXT: bx lr 516entry: 517 %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3> 518 %out1 = zext <4 x i32> %shuf1 to <4 x i64> 519 %ins = insertelement <8 x i32> poison, i32 %src2, i32 0 520 %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <4 x i32> zeroinitializer 521 %out2 = zext <4 x i32> %shuf2 to <4 x i64> 522 %out = mul <4 x i64> %out2, %out1 523 ret <4 x i64> %out 524} 525 526define arm_aapcs_vfpcc <4 x i64> @zext32_0213_ext0(<8 x i32> %src1, i32 %src2) { 527; CHECK-LABEL: zext32_0213_ext0: 528; CHECK: @ %bb.0: @ %entry 529; CHECK-NEXT: vmov r1, s2 530; CHECK-NEXT: vmov r3, s0 531; CHECK-NEXT: vmov.f32 s0, s1 532; CHECK-NEXT: vmov.f32 s2, s3 533; CHECK-NEXT: umull r1, r12, r1, r0 534; CHECK-NEXT: umull r3, r2, r3, r0 535; CHECK-NEXT: vmov q2[2], q2[0], r3, r1 536; CHECK-NEXT: vmov r1, s2 537; CHECK-NEXT: vmov r3, s0 538; CHECK-NEXT: vmov q2[3], q2[1], r2, r12 539; CHECK-NEXT: vmov q0, q2 540; CHECK-NEXT: umull r1, r2, r1, r0 541; CHECK-NEXT: umull r0, r3, r3, r0 542; CHECK-NEXT: vmov q1[2], q1[0], r0, r1 543; CHECK-NEXT: vmov q1[3], q1[1], r3, r2 544; CHECK-NEXT: bx lr 545entry: 546 %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3> 547 %out1 = zext <4 x i32> %shuf1 to <4 x i64> 548 %ext = zext i32 %src2 to i64 549 %ins = insertelement <4 x i64> poison, i64 %ext, i32 0 550 %shuf2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer 551 %out = mul <4 x i64> %out1, %shuf2 552 ret <4 x i64> %out 553} 554 555define arm_aapcs_vfpcc <4 x i64> @zext32_ext0_0213(<8 x i32> %src1, i32 %src2) { 556; CHECK-LABEL: zext32_ext0_0213: 557; CHECK: @ %bb.0: @ %entry 558; CHECK-NEXT: vmov r1, s2 559; CHECK-NEXT: vmov r3, s0 560; CHECK-NEXT: vmov.f32 s0, s1 561; CHECK-NEXT: vmov.f32 s2, s3 562; CHECK-NEXT: umull r1, r12, r0, r1 563; CHECK-NEXT: umull r3, r2, r0, r3 564; CHECK-NEXT: vmov q2[2], q2[0], r3, r1 565; CHECK-NEXT: vmov r1, s2 566; CHECK-NEXT: vmov r3, s0 567; CHECK-NEXT: vmov q2[3], q2[1], r2, r12 568; CHECK-NEXT: vmov q0, q2 569; CHECK-NEXT: umull r1, r2, r0, r1 570; CHECK-NEXT: umull r0, r3, r0, r3 571; CHECK-NEXT: vmov q1[2], q1[0], r0, r1 572; CHECK-NEXT: vmov q1[3], q1[1], r3, r2 573; CHECK-NEXT: bx lr 574entry: 575 %shuf1 = shufflevector <8 x i32> %src1, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3> 576 %out1 = zext <4 x i32> %shuf1 to <4 x i64> 577 %ext = zext i32 %src2 to i64 578 %ins = insertelement <4 x i64> poison, i64 %ext, i32 0 579 %shuf2 = shufflevector <4 x i64> %ins, <4 x i64> undef, <4 x i32> zeroinitializer 580 %out = mul <4 x i64> %shuf2, %out1 581 ret <4 x i64> %out 582} 583 584define arm_aapcs_vfpcc <4 x i32> @sext16_02468101214_0ext(<8 x i16> %src1, i16 %src2) { 585; CHECK-LABEL: sext16_02468101214_0ext: 586; CHECK: @ %bb.0: @ %entry 587; CHECK-NEXT: vdup.32 q1, r0 588; CHECK-NEXT: vmullb.s16 q0, q0, q1 589; CHECK-NEXT: bx lr 590entry: 591 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 592 %out1 = sext <4 x i16> %shuf1 to <4 x i32> 593 %ins = insertelement <8 x i16> poison, i16 %src2, i32 0 594 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer 595 %out2 = sext <4 x i16> %shuf2 to <4 x i32> 596 %out = mul <4 x i32> %out1, %out2 597 ret <4 x i32> %out 598} 599 600define arm_aapcs_vfpcc <4 x i32> @sext16_0ext_02468101214(<8 x i16> %src1, i16 %src2) { 601; CHECK-LABEL: sext16_0ext_02468101214: 602; CHECK: @ %bb.0: @ %entry 603; CHECK-NEXT: vdup.32 q1, r0 604; CHECK-NEXT: vmullb.s16 q0, q1, q0 605; CHECK-NEXT: bx lr 606entry: 607 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 608 %out1 = sext <4 x i16> %shuf1 to <4 x i32> 609 %ins = insertelement <8 x i16> poison, i16 %src2, i32 0 610 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer 611 %out2 = sext <4 x i16> %shuf2 to <4 x i32> 612 %out = mul <4 x i32> %out2, %out1 613 ret <4 x i32> %out 614} 615 616define arm_aapcs_vfpcc <4 x i32> @sext16_02468101214_ext0(<8 x i16> %src1, i16 %src2) { 617; CHECK-LABEL: sext16_02468101214_ext0: 618; CHECK: @ %bb.0: @ %entry 619; CHECK-NEXT: vmovlb.s16 q0, q0 620; CHECK-NEXT: sxth r0, r0 621; CHECK-NEXT: vmul.i32 q0, q0, r0 622; CHECK-NEXT: bx lr 623entry: 624 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 625 %out1 = sext <4 x i16> %shuf1 to <4 x i32> 626 %ext = sext i16 %src2 to i32 627 %ins = insertelement <4 x i32> poison, i32 %ext, i32 0 628 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer 629 %out = mul <4 x i32> %out1, %shuf2 630 ret <4 x i32> %out 631} 632 633define arm_aapcs_vfpcc <4 x i32> @sext16_ext0_02468101214(<8 x i16> %src1, i16 %src2) { 634; CHECK-LABEL: sext16_ext0_02468101214: 635; CHECK: @ %bb.0: @ %entry 636; CHECK-NEXT: vmovlb.s16 q0, q0 637; CHECK-NEXT: sxth r0, r0 638; CHECK-NEXT: vmul.i32 q0, q0, r0 639; CHECK-NEXT: bx lr 640entry: 641 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 642 %out1 = sext <4 x i16> %shuf1 to <4 x i32> 643 %ext = sext i16 %src2 to i32 644 %ins = insertelement <4 x i32> poison, i32 %ext, i32 0 645 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer 646 %out = mul <4 x i32> %shuf2, %out1 647 ret <4 x i32> %out 648} 649 650define arm_aapcs_vfpcc <4 x i32> @sext16_13579111315_0ext(<8 x i16> %src1, i16 %src2) { 651; CHECK-LABEL: sext16_13579111315_0ext: 652; CHECK: @ %bb.0: @ %entry 653; CHECK-NEXT: vdup.32 q1, r0 654; CHECK-NEXT: vrev32.16 q0, q0 655; CHECK-NEXT: vmullb.s16 q0, q0, q1 656; CHECK-NEXT: bx lr 657entry: 658 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 659 %out1 = sext <4 x i16> %shuf1 to <4 x i32> 660 %ins = insertelement <8 x i16> poison, i16 %src2, i32 0 661 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer 662 %out2 = sext <4 x i16> %shuf2 to <4 x i32> 663 %out = mul <4 x i32> %out1, %out2 664 ret <4 x i32> %out 665} 666 667define arm_aapcs_vfpcc <4 x i32> @sext16_0ext_13579111315(<8 x i16> %src1, i16 %src2) { 668; CHECK-LABEL: sext16_0ext_13579111315: 669; CHECK: @ %bb.0: @ %entry 670; CHECK-NEXT: vrev32.16 q0, q0 671; CHECK-NEXT: vdup.32 q1, r0 672; CHECK-NEXT: vmullb.s16 q0, q1, q0 673; CHECK-NEXT: bx lr 674entry: 675 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 676 %out1 = sext <4 x i16> %shuf1 to <4 x i32> 677 %ins = insertelement <8 x i16> poison, i16 %src2, i32 0 678 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer 679 %out2 = sext <4 x i16> %shuf2 to <4 x i32> 680 %out = mul <4 x i32> %out2, %out1 681 ret <4 x i32> %out 682} 683 684define arm_aapcs_vfpcc <4 x i32> @sext16_13579111315_ext0(<8 x i16> %src1, i16 %src2) { 685; CHECK-LABEL: sext16_13579111315_ext0: 686; CHECK: @ %bb.0: @ %entry 687; CHECK-NEXT: vmovlt.s16 q0, q0 688; CHECK-NEXT: sxth r0, r0 689; CHECK-NEXT: vmul.i32 q0, q0, r0 690; CHECK-NEXT: bx lr 691entry: 692 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 693 %out1 = sext <4 x i16> %shuf1 to <4 x i32> 694 %ext = sext i16 %src2 to i32 695 %ins = insertelement <4 x i32> poison, i32 %ext, i32 0 696 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer 697 %out = mul <4 x i32> %out1, %shuf2 698 ret <4 x i32> %out 699} 700 701define arm_aapcs_vfpcc <4 x i32> @sext16_ext0_13579111315(<8 x i16> %src1, i16 %src2) { 702; CHECK-LABEL: sext16_ext0_13579111315: 703; CHECK: @ %bb.0: @ %entry 704; CHECK-NEXT: vmovlt.s16 q0, q0 705; CHECK-NEXT: sxth r0, r0 706; CHECK-NEXT: vmul.i32 q0, q0, r0 707; CHECK-NEXT: bx lr 708entry: 709 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 710 %out1 = sext <4 x i16> %shuf1 to <4 x i32> 711 %ext = sext i16 %src2 to i32 712 %ins = insertelement <4 x i32> poison, i32 %ext, i32 0 713 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer 714 %out = mul <4 x i32> %shuf2, %out1 715 ret <4 x i32> %out 716} 717 718define arm_aapcs_vfpcc <8 x i32> @sext16_02461357_0ext(<16 x i16> %src1, i16 %src2) { 719; CHECK-LABEL: sext16_02461357_0ext: 720; CHECK: @ %bb.0: @ %entry 721; CHECK-NEXT: vdup.16 q2, r0 722; CHECK-NEXT: vrev32.16 q1, q0 723; CHECK-NEXT: vmullb.s16 q1, q1, q2 724; CHECK-NEXT: vmullb.s16 q0, q0, q2 725; CHECK-NEXT: bx lr 726entry: 727 %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7> 728 %out1 = sext <8 x i16> %shuf1 to <8 x i32> 729 %ins = insertelement <16 x i16> poison, i16 %src2, i32 0 730 %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <8 x i32> zeroinitializer 731 %out2 = sext <8 x i16> %shuf2 to <8 x i32> 732 %out = mul <8 x i32> %out1, %out2 733 ret <8 x i32> %out 734} 735 736define arm_aapcs_vfpcc <8 x i32> @sext16_0ext_02461357(<16 x i16> %src1, i16 %src2) { 737; CHECK-LABEL: sext16_0ext_02461357: 738; CHECK: @ %bb.0: @ %entry 739; CHECK-NEXT: vrev32.16 q1, q0 740; CHECK-NEXT: vdup.16 q2, r0 741; CHECK-NEXT: vmullb.s16 q1, q2, q1 742; CHECK-NEXT: vmullb.s16 q0, q2, q0 743; CHECK-NEXT: bx lr 744entry: 745 %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7> 746 %out1 = sext <8 x i16> %shuf1 to <8 x i32> 747 %ins = insertelement <16 x i16> poison, i16 %src2, i32 0 748 %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <8 x i32> zeroinitializer 749 %out2 = sext <8 x i16> %shuf2 to <8 x i32> 750 %out = mul <8 x i32> %out2, %out1 751 ret <8 x i32> %out 752} 753 754define arm_aapcs_vfpcc <8 x i32> @sext16_02461357_ext0(<16 x i16> %src1, i16 %src2) { 755; CHECK-LABEL: sext16_02461357_ext0: 756; CHECK: @ %bb.0: @ %entry 757; CHECK-NEXT: vmovlb.s16 q1, q0 758; CHECK-NEXT: sxth r0, r0 759; CHECK-NEXT: vmul.i32 q2, q1, r0 760; CHECK-NEXT: vmovlt.s16 q0, q0 761; CHECK-NEXT: vmul.i32 q1, q0, r0 762; CHECK-NEXT: vmov q0, q2 763; CHECK-NEXT: bx lr 764entry: 765 %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7> 766 %out1 = sext <8 x i16> %shuf1 to <8 x i32> 767 %ext = sext i16 %src2 to i32 768 %ins = insertelement <8 x i32> poison, i32 %ext, i32 0 769 %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer 770 %out = mul <8 x i32> %out1, %shuf2 771 ret <8 x i32> %out 772} 773 774define arm_aapcs_vfpcc <8 x i32> @sext16_ext0_02461357(<16 x i16> %src1, i16 %src2) { 775; CHECK-LABEL: sext16_ext0_02461357: 776; CHECK: @ %bb.0: @ %entry 777; CHECK-NEXT: vmovlb.s16 q1, q0 778; CHECK-NEXT: sxth r0, r0 779; CHECK-NEXT: vmul.i32 q2, q1, r0 780; CHECK-NEXT: vmovlt.s16 q0, q0 781; CHECK-NEXT: vmul.i32 q1, q0, r0 782; CHECK-NEXT: vmov q0, q2 783; CHECK-NEXT: bx lr 784entry: 785 %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7> 786 %out1 = sext <8 x i16> %shuf1 to <8 x i32> 787 %ext = sext i16 %src2 to i32 788 %ins = insertelement <8 x i32> poison, i32 %ext, i32 0 789 %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer 790 %out = mul <8 x i32> %shuf2, %out1 791 ret <8 x i32> %out 792} 793 794define arm_aapcs_vfpcc <4 x i32> @zext16_02468101214_0ext(<8 x i16> %src1, i16 %src2) { 795; CHECK-LABEL: zext16_02468101214_0ext: 796; CHECK: @ %bb.0: @ %entry 797; CHECK-NEXT: vdup.32 q1, r0 798; CHECK-NEXT: vmullb.u16 q0, q0, q1 799; CHECK-NEXT: bx lr 800entry: 801 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 802 %out1 = zext <4 x i16> %shuf1 to <4 x i32> 803 %ins = insertelement <8 x i16> poison, i16 %src2, i32 0 804 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer 805 %out2 = zext <4 x i16> %shuf2 to <4 x i32> 806 %out = mul <4 x i32> %out1, %out2 807 ret <4 x i32> %out 808} 809 810define arm_aapcs_vfpcc <4 x i32> @zext16_0ext_02468101214(<8 x i16> %src1, i16 %src2) { 811; CHECK-LABEL: zext16_0ext_02468101214: 812; CHECK: @ %bb.0: @ %entry 813; CHECK-NEXT: vdup.32 q1, r0 814; CHECK-NEXT: vmullb.u16 q0, q1, q0 815; CHECK-NEXT: bx lr 816entry: 817 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 818 %out1 = zext <4 x i16> %shuf1 to <4 x i32> 819 %ins = insertelement <8 x i16> poison, i16 %src2, i32 0 820 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer 821 %out2 = zext <4 x i16> %shuf2 to <4 x i32> 822 %out = mul <4 x i32> %out2, %out1 823 ret <4 x i32> %out 824} 825 826define arm_aapcs_vfpcc <4 x i32> @zext16_02468101214_ext0(<8 x i16> %src1, i16 %src2) { 827; CHECK-LABEL: zext16_02468101214_ext0: 828; CHECK: @ %bb.0: @ %entry 829; CHECK-NEXT: vmovlb.u16 q0, q0 830; CHECK-NEXT: uxth r0, r0 831; CHECK-NEXT: vmul.i32 q0, q0, r0 832; CHECK-NEXT: bx lr 833entry: 834 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 835 %out1 = zext <4 x i16> %shuf1 to <4 x i32> 836 %ext = zext i16 %src2 to i32 837 %ins = insertelement <4 x i32> poison, i32 %ext, i32 0 838 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer 839 %out = mul <4 x i32> %out1, %shuf2 840 ret <4 x i32> %out 841} 842 843define arm_aapcs_vfpcc <4 x i32> @zext16_ext0_02468101214(<8 x i16> %src1, i16 %src2) { 844; CHECK-LABEL: zext16_ext0_02468101214: 845; CHECK: @ %bb.0: @ %entry 846; CHECK-NEXT: vmovlb.u16 q0, q0 847; CHECK-NEXT: uxth r0, r0 848; CHECK-NEXT: vmul.i32 q0, q0, r0 849; CHECK-NEXT: bx lr 850entry: 851 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6> 852 %out1 = zext <4 x i16> %shuf1 to <4 x i32> 853 %ext = zext i16 %src2 to i32 854 %ins = insertelement <4 x i32> poison, i32 %ext, i32 0 855 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer 856 %out = mul <4 x i32> %shuf2, %out1 857 ret <4 x i32> %out 858} 859 860define arm_aapcs_vfpcc <4 x i32> @zext16_13579111315_0ext(<8 x i16> %src1, i16 %src2) { 861; CHECK-LABEL: zext16_13579111315_0ext: 862; CHECK: @ %bb.0: @ %entry 863; CHECK-NEXT: vdup.32 q1, r0 864; CHECK-NEXT: vrev32.16 q0, q0 865; CHECK-NEXT: vmullb.u16 q0, q0, q1 866; CHECK-NEXT: bx lr 867entry: 868 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 869 %out1 = zext <4 x i16> %shuf1 to <4 x i32> 870 %ins = insertelement <8 x i16> poison, i16 %src2, i32 0 871 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer 872 %out2 = zext <4 x i16> %shuf2 to <4 x i32> 873 %out = mul <4 x i32> %out1, %out2 874 ret <4 x i32> %out 875} 876 877define arm_aapcs_vfpcc <4 x i32> @zext16_0ext_13579111315(<8 x i16> %src1, i16 %src2) { 878; CHECK-LABEL: zext16_0ext_13579111315: 879; CHECK: @ %bb.0: @ %entry 880; CHECK-NEXT: vrev32.16 q0, q0 881; CHECK-NEXT: vdup.32 q1, r0 882; CHECK-NEXT: vmullb.u16 q0, q1, q0 883; CHECK-NEXT: bx lr 884entry: 885 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 886 %out1 = zext <4 x i16> %shuf1 to <4 x i32> 887 %ins = insertelement <8 x i16> poison, i16 %src2, i32 0 888 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <4 x i32> zeroinitializer 889 %out2 = zext <4 x i16> %shuf2 to <4 x i32> 890 %out = mul <4 x i32> %out2, %out1 891 ret <4 x i32> %out 892} 893 894define arm_aapcs_vfpcc <4 x i32> @zext16_13579111315_ext0(<8 x i16> %src1, i16 %src2) { 895; CHECK-LABEL: zext16_13579111315_ext0: 896; CHECK: @ %bb.0: @ %entry 897; CHECK-NEXT: vmovlt.u16 q0, q0 898; CHECK-NEXT: uxth r0, r0 899; CHECK-NEXT: vmul.i32 q0, q0, r0 900; CHECK-NEXT: bx lr 901entry: 902 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 903 %out1 = zext <4 x i16> %shuf1 to <4 x i32> 904 %ext = zext i16 %src2 to i32 905 %ins = insertelement <4 x i32> poison, i32 %ext, i32 0 906 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer 907 %out = mul <4 x i32> %out1, %shuf2 908 ret <4 x i32> %out 909} 910 911define arm_aapcs_vfpcc <4 x i32> @zext16_ext0_13579111315(<8 x i16> %src1, i16 %src2) { 912; CHECK-LABEL: zext16_ext0_13579111315: 913; CHECK: @ %bb.0: @ %entry 914; CHECK-NEXT: vmovlt.u16 q0, q0 915; CHECK-NEXT: uxth r0, r0 916; CHECK-NEXT: vmul.i32 q0, q0, r0 917; CHECK-NEXT: bx lr 918entry: 919 %shuf1 = shufflevector <8 x i16> %src1, <8 x i16> undef, <4 x i32> <i32 1, i32 3, i32 5, i32 7> 920 %out1 = zext <4 x i16> %shuf1 to <4 x i32> 921 %ext = zext i16 %src2 to i32 922 %ins = insertelement <4 x i32> poison, i32 %ext, i32 0 923 %shuf2 = shufflevector <4 x i32> %ins, <4 x i32> undef, <4 x i32> zeroinitializer 924 %out = mul <4 x i32> %shuf2, %out1 925 ret <4 x i32> %out 926} 927 928define arm_aapcs_vfpcc <8 x i32> @zext16_02461357_0ext(<16 x i16> %src1, i16 %src2) { 929; CHECK-LABEL: zext16_02461357_0ext: 930; CHECK: @ %bb.0: @ %entry 931; CHECK-NEXT: vdup.16 q2, r0 932; CHECK-NEXT: vrev32.16 q1, q0 933; CHECK-NEXT: vmullb.u16 q1, q1, q2 934; CHECK-NEXT: vmullb.u16 q0, q0, q2 935; CHECK-NEXT: bx lr 936entry: 937 %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7> 938 %out1 = zext <8 x i16> %shuf1 to <8 x i32> 939 %ins = insertelement <16 x i16> poison, i16 %src2, i32 0 940 %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <8 x i32> zeroinitializer 941 %out2 = zext <8 x i16> %shuf2 to <8 x i32> 942 %out = mul <8 x i32> %out1, %out2 943 ret <8 x i32> %out 944} 945 946define arm_aapcs_vfpcc <8 x i32> @zext16_0ext_02461357(<16 x i16> %src1, i16 %src2) { 947; CHECK-LABEL: zext16_0ext_02461357: 948; CHECK: @ %bb.0: @ %entry 949; CHECK-NEXT: vrev32.16 q1, q0 950; CHECK-NEXT: vdup.16 q2, r0 951; CHECK-NEXT: vmullb.u16 q1, q2, q1 952; CHECK-NEXT: vmullb.u16 q0, q2, q0 953; CHECK-NEXT: bx lr 954entry: 955 %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7> 956 %out1 = zext <8 x i16> %shuf1 to <8 x i32> 957 %ins = insertelement <16 x i16> poison, i16 %src2, i32 0 958 %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <8 x i32> zeroinitializer 959 %out2 = zext <8 x i16> %shuf2 to <8 x i32> 960 %out = mul <8 x i32> %out2, %out1 961 ret <8 x i32> %out 962} 963 964define arm_aapcs_vfpcc <8 x i32> @zext16_02461357_ext0(<16 x i16> %src1, i16 %src2) { 965; CHECK-LABEL: zext16_02461357_ext0: 966; CHECK: @ %bb.0: @ %entry 967; CHECK-NEXT: vmovlb.u16 q1, q0 968; CHECK-NEXT: uxth r0, r0 969; CHECK-NEXT: vmul.i32 q2, q1, r0 970; CHECK-NEXT: vmovlt.u16 q0, q0 971; CHECK-NEXT: vmul.i32 q1, q0, r0 972; CHECK-NEXT: vmov q0, q2 973; CHECK-NEXT: bx lr 974entry: 975 %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7> 976 %out1 = zext <8 x i16> %shuf1 to <8 x i32> 977 %ext = zext i16 %src2 to i32 978 %ins = insertelement <8 x i32> poison, i32 %ext, i32 0 979 %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer 980 %out = mul <8 x i32> %out1, %shuf2 981 ret <8 x i32> %out 982} 983 984define arm_aapcs_vfpcc <8 x i32> @zext16_ext0_02461357(<16 x i16> %src1, i16 %src2) { 985; CHECK-LABEL: zext16_ext0_02461357: 986; CHECK: @ %bb.0: @ %entry 987; CHECK-NEXT: vmovlb.u16 q1, q0 988; CHECK-NEXT: uxth r0, r0 989; CHECK-NEXT: vmul.i32 q2, q1, r0 990; CHECK-NEXT: vmovlt.u16 q0, q0 991; CHECK-NEXT: vmul.i32 q1, q0, r0 992; CHECK-NEXT: vmov q0, q2 993; CHECK-NEXT: bx lr 994entry: 995 %shuf1 = shufflevector <16 x i16> %src1, <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 1, i32 3, i32 5, i32 7> 996 %out1 = zext <8 x i16> %shuf1 to <8 x i32> 997 %ext = zext i16 %src2 to i32 998 %ins = insertelement <8 x i32> poison, i32 %ext, i32 0 999 %shuf2 = shufflevector <8 x i32> %ins, <8 x i32> undef, <8 x i32> zeroinitializer 1000 %out = mul <8 x i32> %shuf2, %out1 1001 ret <8 x i32> %out 1002} 1003 1004define arm_aapcs_vfpcc <8 x i16> @sext8_024681012141618202224262830_0ext(<16 x i8> %src1, i8 %src2) { 1005; CHECK-LABEL: sext8_024681012141618202224262830_0ext: 1006; CHECK: @ %bb.0: @ %entry 1007; CHECK-NEXT: vdup.16 q1, r0 1008; CHECK-NEXT: vmullb.s8 q0, q0, q1 1009; CHECK-NEXT: bx lr 1010entry: 1011 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 1012 %out1 = sext <8 x i8> %shuf1 to <8 x i16> 1013 %ins = insertelement <16 x i8> poison, i8 %src2, i32 0 1014 %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer 1015 %out2 = sext <8 x i8> %shuf2 to <8 x i16> 1016 %out = mul <8 x i16> %out1, %out2 1017 ret <8 x i16> %out 1018} 1019 1020define arm_aapcs_vfpcc <8 x i16> @sext8_0ext_024681012141618202224262830(<16 x i8> %src1, i8 %src2) { 1021; CHECK-LABEL: sext8_0ext_024681012141618202224262830: 1022; CHECK: @ %bb.0: @ %entry 1023; CHECK-NEXT: vdup.16 q1, r0 1024; CHECK-NEXT: vmullb.s8 q0, q1, q0 1025; CHECK-NEXT: bx lr 1026entry: 1027 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 1028 %out1 = sext <8 x i8> %shuf1 to <8 x i16> 1029 %ins = insertelement <16 x i8> poison, i8 %src2, i32 0 1030 %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer 1031 %out2 = sext <8 x i8> %shuf2 to <8 x i16> 1032 %out = mul <8 x i16> %out2, %out1 1033 ret <8 x i16> %out 1034} 1035 1036define arm_aapcs_vfpcc <8 x i16> @sext8_024681012141618202224262830_ext0(<16 x i8> %src1, i8 %src2) { 1037; CHECK-LABEL: sext8_024681012141618202224262830_ext0: 1038; CHECK: @ %bb.0: @ %entry 1039; CHECK-NEXT: vmovlb.s8 q0, q0 1040; CHECK-NEXT: sxtb r0, r0 1041; CHECK-NEXT: vmul.i16 q0, q0, r0 1042; CHECK-NEXT: bx lr 1043entry: 1044 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 1045 %out1 = sext <8 x i8> %shuf1 to <8 x i16> 1046 %ext = sext i8 %src2 to i16 1047 %ins = insertelement <8 x i16> poison, i16 %ext, i32 0 1048 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer 1049 %out = mul <8 x i16> %out1, %shuf2 1050 ret <8 x i16> %out 1051} 1052 1053define arm_aapcs_vfpcc <8 x i16> @sext8_ext0_024681012141618202224262830(<16 x i8> %src1, i8 %src2) { 1054; CHECK-LABEL: sext8_ext0_024681012141618202224262830: 1055; CHECK: @ %bb.0: @ %entry 1056; CHECK-NEXT: vmovlb.s8 q0, q0 1057; CHECK-NEXT: sxtb r0, r0 1058; CHECK-NEXT: vmul.i16 q0, q0, r0 1059; CHECK-NEXT: bx lr 1060entry: 1061 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 1062 %out1 = sext <8 x i8> %shuf1 to <8 x i16> 1063 %ext = sext i8 %src2 to i16 1064 %ins = insertelement <8 x i16> poison, i16 %ext, i32 0 1065 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer 1066 %out = mul <8 x i16> %shuf2, %out1 1067 ret <8 x i16> %out 1068} 1069 1070define arm_aapcs_vfpcc <8 x i16> @sext8_135791113151719212325272931_0ext(<16 x i8> %src1, i8 %src2) { 1071; CHECK-LABEL: sext8_135791113151719212325272931_0ext: 1072; CHECK: @ %bb.0: @ %entry 1073; CHECK-NEXT: vdup.16 q1, r0 1074; CHECK-NEXT: vrev16.8 q0, q0 1075; CHECK-NEXT: vmullb.s8 q0, q0, q1 1076; CHECK-NEXT: bx lr 1077entry: 1078 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 1079 %out1 = sext <8 x i8> %shuf1 to <8 x i16> 1080 %ins = insertelement <16 x i8> poison, i8 %src2, i32 0 1081 %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer 1082 %out2 = sext <8 x i8> %shuf2 to <8 x i16> 1083 %out = mul <8 x i16> %out1, %out2 1084 ret <8 x i16> %out 1085} 1086 1087define arm_aapcs_vfpcc <8 x i16> @sext8_0ext_135791113151719212325272931(<16 x i8> %src1, i8 %src2) { 1088; CHECK-LABEL: sext8_0ext_135791113151719212325272931: 1089; CHECK: @ %bb.0: @ %entry 1090; CHECK-NEXT: vrev16.8 q0, q0 1091; CHECK-NEXT: vdup.16 q1, r0 1092; CHECK-NEXT: vmullb.s8 q0, q1, q0 1093; CHECK-NEXT: bx lr 1094entry: 1095 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 1096 %out1 = sext <8 x i8> %shuf1 to <8 x i16> 1097 %ins = insertelement <16 x i8> poison, i8 %src2, i32 0 1098 %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer 1099 %out2 = sext <8 x i8> %shuf2 to <8 x i16> 1100 %out = mul <8 x i16> %out2, %out1 1101 ret <8 x i16> %out 1102} 1103 1104define arm_aapcs_vfpcc <8 x i16> @sext8_135791113151719212325272931_ext0(<16 x i8> %src1, i8 %src2) { 1105; CHECK-LABEL: sext8_135791113151719212325272931_ext0: 1106; CHECK: @ %bb.0: @ %entry 1107; CHECK-NEXT: vmovlt.s8 q0, q0 1108; CHECK-NEXT: sxtb r0, r0 1109; CHECK-NEXT: vmul.i16 q0, q0, r0 1110; CHECK-NEXT: bx lr 1111entry: 1112 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 1113 %out1 = sext <8 x i8> %shuf1 to <8 x i16> 1114 %ext = sext i8 %src2 to i16 1115 %ins = insertelement <8 x i16> poison, i16 %ext, i32 0 1116 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer 1117 %out = mul <8 x i16> %out1, %shuf2 1118 ret <8 x i16> %out 1119} 1120 1121define arm_aapcs_vfpcc <8 x i16> @sext8_ext0_135791113151719212325272931(<16 x i8> %src1, i8 %src2) { 1122; CHECK-LABEL: sext8_ext0_135791113151719212325272931: 1123; CHECK: @ %bb.0: @ %entry 1124; CHECK-NEXT: vmovlt.s8 q0, q0 1125; CHECK-NEXT: sxtb r0, r0 1126; CHECK-NEXT: vmul.i16 q0, q0, r0 1127; CHECK-NEXT: bx lr 1128entry: 1129 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 1130 %out1 = sext <8 x i8> %shuf1 to <8 x i16> 1131 %ext = sext i8 %src2 to i16 1132 %ins = insertelement <8 x i16> poison, i16 %ext, i32 0 1133 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer 1134 %out = mul <8 x i16> %shuf2, %out1 1135 ret <8 x i16> %out 1136} 1137 1138define arm_aapcs_vfpcc <16 x i16> @sext8_0246810121413579111315_0ext(<32 x i8> %src1, i8 %src2) { 1139; CHECK-LABEL: sext8_0246810121413579111315_0ext: 1140; CHECK: @ %bb.0: @ %entry 1141; CHECK-NEXT: vdup.8 q2, r0 1142; CHECK-NEXT: vrev16.8 q1, q0 1143; CHECK-NEXT: vmullb.s8 q1, q1, q2 1144; CHECK-NEXT: vmullb.s8 q0, q0, q2 1145; CHECK-NEXT: bx lr 1146entry: 1147 %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 1148 %out1 = sext <16 x i8> %shuf1 to <16 x i16> 1149 %ins = insertelement <32 x i8> poison, i8 %src2, i32 0 1150 %shuf2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <16 x i32> zeroinitializer 1151 %out2 = sext <16 x i8> %shuf2 to <16 x i16> 1152 %out = mul <16 x i16> %out1, %out2 1153 ret <16 x i16> %out 1154} 1155 1156define arm_aapcs_vfpcc <16 x i16> @sext8_0ext_0246810121413579111315(<32 x i8> %src1, i8 %src2) { 1157; CHECK-LABEL: sext8_0ext_0246810121413579111315: 1158; CHECK: @ %bb.0: @ %entry 1159; CHECK-NEXT: vrev16.8 q1, q0 1160; CHECK-NEXT: vdup.8 q2, r0 1161; CHECK-NEXT: vmullb.s8 q1, q2, q1 1162; CHECK-NEXT: vmullb.s8 q0, q2, q0 1163; CHECK-NEXT: bx lr 1164entry: 1165 %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 1166 %out1 = sext <16 x i8> %shuf1 to <16 x i16> 1167 %ins = insertelement <32 x i8> poison, i8 %src2, i32 0 1168 %shuf2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <16 x i32> zeroinitializer 1169 %out2 = sext <16 x i8> %shuf2 to <16 x i16> 1170 %out = mul <16 x i16> %out2, %out1 1171 ret <16 x i16> %out 1172} 1173 1174define arm_aapcs_vfpcc <16 x i16> @sext8_0246810121413579111315_ext0(<32 x i8> %src1, i8 %src2) { 1175; CHECK-LABEL: sext8_0246810121413579111315_ext0: 1176; CHECK: @ %bb.0: @ %entry 1177; CHECK-NEXT: vmovlb.s8 q1, q0 1178; CHECK-NEXT: sxtb r0, r0 1179; CHECK-NEXT: vmul.i16 q2, q1, r0 1180; CHECK-NEXT: vmovlt.s8 q0, q0 1181; CHECK-NEXT: vmul.i16 q1, q0, r0 1182; CHECK-NEXT: vmov q0, q2 1183; CHECK-NEXT: bx lr 1184entry: 1185 %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 1186 %out1 = sext <16 x i8> %shuf1 to <16 x i16> 1187 %ext = sext i8 %src2 to i16 1188 %ins = insertelement <16 x i16> poison, i16 %ext, i32 0 1189 %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer 1190 %out = mul <16 x i16> %out1, %shuf2 1191 ret <16 x i16> %out 1192} 1193 1194define arm_aapcs_vfpcc <16 x i16> @sext8_ext0_0246810121413579111315(<32 x i8> %src1, i8 %src2) { 1195; CHECK-LABEL: sext8_ext0_0246810121413579111315: 1196; CHECK: @ %bb.0: @ %entry 1197; CHECK-NEXT: vmovlb.s8 q1, q0 1198; CHECK-NEXT: sxtb r0, r0 1199; CHECK-NEXT: vmul.i16 q2, q1, r0 1200; CHECK-NEXT: vmovlt.s8 q0, q0 1201; CHECK-NEXT: vmul.i16 q1, q0, r0 1202; CHECK-NEXT: vmov q0, q2 1203; CHECK-NEXT: bx lr 1204entry: 1205 %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 1206 %out1 = sext <16 x i8> %shuf1 to <16 x i16> 1207 %ext = sext i8 %src2 to i16 1208 %ins = insertelement <16 x i16> poison, i16 %ext, i32 0 1209 %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer 1210 %out = mul <16 x i16> %shuf2, %out1 1211 ret <16 x i16> %out 1212} 1213 1214define arm_aapcs_vfpcc <8 x i16> @zext8_024681012141618202224262830_0ext(<16 x i8> %src1, i8 %src2) { 1215; CHECK-LABEL: zext8_024681012141618202224262830_0ext: 1216; CHECK: @ %bb.0: @ %entry 1217; CHECK-NEXT: vdup.16 q1, r0 1218; CHECK-NEXT: vmullb.u8 q0, q0, q1 1219; CHECK-NEXT: bx lr 1220entry: 1221 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 1222 %out1 = zext <8 x i8> %shuf1 to <8 x i16> 1223 %ins = insertelement <16 x i8> poison, i8 %src2, i32 0 1224 %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer 1225 %out2 = zext <8 x i8> %shuf2 to <8 x i16> 1226 %out = mul <8 x i16> %out1, %out2 1227 ret <8 x i16> %out 1228} 1229 1230define arm_aapcs_vfpcc <8 x i16> @zext8_0ext_024681012141618202224262830(<16 x i8> %src1, i8 %src2) { 1231; CHECK-LABEL: zext8_0ext_024681012141618202224262830: 1232; CHECK: @ %bb.0: @ %entry 1233; CHECK-NEXT: vdup.16 q1, r0 1234; CHECK-NEXT: vmullb.u8 q0, q1, q0 1235; CHECK-NEXT: bx lr 1236entry: 1237 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 1238 %out1 = zext <8 x i8> %shuf1 to <8 x i16> 1239 %ins = insertelement <16 x i8> poison, i8 %src2, i32 0 1240 %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer 1241 %out2 = zext <8 x i8> %shuf2 to <8 x i16> 1242 %out = mul <8 x i16> %out2, %out1 1243 ret <8 x i16> %out 1244} 1245 1246define arm_aapcs_vfpcc <8 x i16> @zext8_024681012141618202224262830_ext0(<16 x i8> %src1, i8 %src2) { 1247; CHECK-LABEL: zext8_024681012141618202224262830_ext0: 1248; CHECK: @ %bb.0: @ %entry 1249; CHECK-NEXT: vmovlb.u8 q0, q0 1250; CHECK-NEXT: uxtb r0, r0 1251; CHECK-NEXT: vmul.i16 q0, q0, r0 1252; CHECK-NEXT: bx lr 1253entry: 1254 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 1255 %out1 = zext <8 x i8> %shuf1 to <8 x i16> 1256 %ext = zext i8 %src2 to i16 1257 %ins = insertelement <8 x i16> poison, i16 %ext, i32 0 1258 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer 1259 %out = mul <8 x i16> %out1, %shuf2 1260 ret <8 x i16> %out 1261} 1262 1263define arm_aapcs_vfpcc <8 x i16> @zext8_ext0_024681012141618202224262830(<16 x i8> %src1, i8 %src2) { 1264; CHECK-LABEL: zext8_ext0_024681012141618202224262830: 1265; CHECK: @ %bb.0: @ %entry 1266; CHECK-NEXT: vmovlb.u8 q0, q0 1267; CHECK-NEXT: uxtb r0, r0 1268; CHECK-NEXT: vmul.i16 q0, q0, r0 1269; CHECK-NEXT: bx lr 1270entry: 1271 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14> 1272 %out1 = zext <8 x i8> %shuf1 to <8 x i16> 1273 %ext = zext i8 %src2 to i16 1274 %ins = insertelement <8 x i16> poison, i16 %ext, i32 0 1275 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer 1276 %out = mul <8 x i16> %shuf2, %out1 1277 ret <8 x i16> %out 1278} 1279 1280define arm_aapcs_vfpcc <8 x i16> @zext8_135791113151719212325272931_0ext(<16 x i8> %src1, i8 %src2) { 1281; CHECK-LABEL: zext8_135791113151719212325272931_0ext: 1282; CHECK: @ %bb.0: @ %entry 1283; CHECK-NEXT: vdup.16 q1, r0 1284; CHECK-NEXT: vrev16.8 q0, q0 1285; CHECK-NEXT: vmullb.u8 q0, q0, q1 1286; CHECK-NEXT: bx lr 1287entry: 1288 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 1289 %out1 = zext <8 x i8> %shuf1 to <8 x i16> 1290 %ins = insertelement <16 x i8> poison, i8 %src2, i32 0 1291 %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer 1292 %out2 = zext <8 x i8> %shuf2 to <8 x i16> 1293 %out = mul <8 x i16> %out1, %out2 1294 ret <8 x i16> %out 1295} 1296 1297define arm_aapcs_vfpcc <8 x i16> @zext8_0ext_135791113151719212325272931(<16 x i8> %src1, i8 %src2) { 1298; CHECK-LABEL: zext8_0ext_135791113151719212325272931: 1299; CHECK: @ %bb.0: @ %entry 1300; CHECK-NEXT: vrev16.8 q0, q0 1301; CHECK-NEXT: vdup.16 q1, r0 1302; CHECK-NEXT: vmullb.u8 q0, q1, q0 1303; CHECK-NEXT: bx lr 1304entry: 1305 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 1306 %out1 = zext <8 x i8> %shuf1 to <8 x i16> 1307 %ins = insertelement <16 x i8> poison, i8 %src2, i32 0 1308 %shuf2 = shufflevector <16 x i8> %ins, <16 x i8> undef, <8 x i32> zeroinitializer 1309 %out2 = zext <8 x i8> %shuf2 to <8 x i16> 1310 %out = mul <8 x i16> %out2, %out1 1311 ret <8 x i16> %out 1312} 1313 1314define arm_aapcs_vfpcc <8 x i16> @zext8_135791113151719212325272931_ext0(<16 x i8> %src1, i8 %src2) { 1315; CHECK-LABEL: zext8_135791113151719212325272931_ext0: 1316; CHECK: @ %bb.0: @ %entry 1317; CHECK-NEXT: vmovlt.u8 q0, q0 1318; CHECK-NEXT: uxtb r0, r0 1319; CHECK-NEXT: vmul.i16 q0, q0, r0 1320; CHECK-NEXT: bx lr 1321entry: 1322 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 1323 %out1 = zext <8 x i8> %shuf1 to <8 x i16> 1324 %ext = zext i8 %src2 to i16 1325 %ins = insertelement <8 x i16> poison, i16 %ext, i32 0 1326 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer 1327 %out = mul <8 x i16> %out1, %shuf2 1328 ret <8 x i16> %out 1329} 1330 1331define arm_aapcs_vfpcc <8 x i16> @zext8_ext0_135791113151719212325272931(<16 x i8> %src1, i8 %src2) { 1332; CHECK-LABEL: zext8_ext0_135791113151719212325272931: 1333; CHECK: @ %bb.0: @ %entry 1334; CHECK-NEXT: vmovlt.u8 q0, q0 1335; CHECK-NEXT: uxtb r0, r0 1336; CHECK-NEXT: vmul.i16 q0, q0, r0 1337; CHECK-NEXT: bx lr 1338entry: 1339 %shuf1 = shufflevector <16 x i8> %src1, <16 x i8> undef, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 1340 %out1 = zext <8 x i8> %shuf1 to <8 x i16> 1341 %ext = zext i8 %src2 to i16 1342 %ins = insertelement <8 x i16> poison, i16 %ext, i32 0 1343 %shuf2 = shufflevector <8 x i16> %ins, <8 x i16> undef, <8 x i32> zeroinitializer 1344 %out = mul <8 x i16> %shuf2, %out1 1345 ret <8 x i16> %out 1346} 1347 1348define arm_aapcs_vfpcc <16 x i16> @zext8_0246810121413579111315_0ext(<32 x i8> %src1, i8 %src2) { 1349; CHECK-LABEL: zext8_0246810121413579111315_0ext: 1350; CHECK: @ %bb.0: @ %entry 1351; CHECK-NEXT: vdup.8 q2, r0 1352; CHECK-NEXT: vrev16.8 q1, q0 1353; CHECK-NEXT: vmullb.u8 q1, q1, q2 1354; CHECK-NEXT: vmullb.u8 q0, q0, q2 1355; CHECK-NEXT: bx lr 1356entry: 1357 %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 1358 %out1 = zext <16 x i8> %shuf1 to <16 x i16> 1359 %ins = insertelement <32 x i8> poison, i8 %src2, i32 0 1360 %shuf2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <16 x i32> zeroinitializer 1361 %out2 = zext <16 x i8> %shuf2 to <16 x i16> 1362 %out = mul <16 x i16> %out1, %out2 1363 ret <16 x i16> %out 1364} 1365 1366define arm_aapcs_vfpcc <16 x i16> @zext8_0ext_0246810121413579111315(<32 x i8> %src1, i8 %src2) { 1367; CHECK-LABEL: zext8_0ext_0246810121413579111315: 1368; CHECK: @ %bb.0: @ %entry 1369; CHECK-NEXT: vrev16.8 q1, q0 1370; CHECK-NEXT: vdup.8 q2, r0 1371; CHECK-NEXT: vmullb.u8 q1, q2, q1 1372; CHECK-NEXT: vmullb.u8 q0, q2, q0 1373; CHECK-NEXT: bx lr 1374entry: 1375 %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 1376 %out1 = zext <16 x i8> %shuf1 to <16 x i16> 1377 %ins = insertelement <32 x i8> poison, i8 %src2, i32 0 1378 %shuf2 = shufflevector <32 x i8> %ins, <32 x i8> undef, <16 x i32> zeroinitializer 1379 %out2 = zext <16 x i8> %shuf2 to <16 x i16> 1380 %out = mul <16 x i16> %out2, %out1 1381 ret <16 x i16> %out 1382} 1383 1384define arm_aapcs_vfpcc <16 x i16> @zext8_0246810121413579111315_ext0(<32 x i8> %src1, i8 %src2) { 1385; CHECK-LABEL: zext8_0246810121413579111315_ext0: 1386; CHECK: @ %bb.0: @ %entry 1387; CHECK-NEXT: vmovlb.u8 q1, q0 1388; CHECK-NEXT: uxtb r0, r0 1389; CHECK-NEXT: vmul.i16 q2, q1, r0 1390; CHECK-NEXT: vmovlt.u8 q0, q0 1391; CHECK-NEXT: vmul.i16 q1, q0, r0 1392; CHECK-NEXT: vmov q0, q2 1393; CHECK-NEXT: bx lr 1394entry: 1395 %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 1396 %out1 = zext <16 x i8> %shuf1 to <16 x i16> 1397 %ext = zext i8 %src2 to i16 1398 %ins = insertelement <16 x i16> poison, i16 %ext, i32 0 1399 %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer 1400 %out = mul <16 x i16> %out1, %shuf2 1401 ret <16 x i16> %out 1402} 1403 1404define arm_aapcs_vfpcc <16 x i16> @zext8_ext0_0246810121413579111315(<32 x i8> %src1, i8 %src2) { 1405; CHECK-LABEL: zext8_ext0_0246810121413579111315: 1406; CHECK: @ %bb.0: @ %entry 1407; CHECK-NEXT: vmovlb.u8 q1, q0 1408; CHECK-NEXT: uxtb r0, r0 1409; CHECK-NEXT: vmul.i16 q2, q1, r0 1410; CHECK-NEXT: vmovlt.u8 q0, q0 1411; CHECK-NEXT: vmul.i16 q1, q0, r0 1412; CHECK-NEXT: vmov q0, q2 1413; CHECK-NEXT: bx lr 1414entry: 1415 %shuf1 = shufflevector <32 x i8> %src1, <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15> 1416 %out1 = zext <16 x i8> %shuf1 to <16 x i16> 1417 %ext = zext i8 %src2 to i16 1418 %ins = insertelement <16 x i16> poison, i16 %ext, i32 0 1419 %shuf2 = shufflevector <16 x i16> %ins, <16 x i16> undef, <16 x i32> zeroinitializer 1420 %out = mul <16 x i16> %shuf2, %out1 1421 ret <16 x i16> %out 1422} 1423