1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple aarch64-none-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK-SD 3; RUN: llc < %s -mtriple aarch64-none-linux-gnu -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI 4 5; Supported combines 6 7define <8 x i16> @dupsext_v8i8_v8i16(i8 %src, <8 x i8> %b) { 8; CHECK-SD-LABEL: dupsext_v8i8_v8i16: 9; CHECK-SD: // %bb.0: // %entry 10; CHECK-SD-NEXT: dup v1.8b, w0 11; CHECK-SD-NEXT: smull v0.8h, v1.8b, v0.8b 12; CHECK-SD-NEXT: ret 13; 14; CHECK-GI-LABEL: dupsext_v8i8_v8i16: 15; CHECK-GI: // %bb.0: // %entry 16; CHECK-GI-NEXT: lsl w8, w0, #8 17; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0 18; CHECK-GI-NEXT: sbfx w8, w8, #8, #8 19; CHECK-GI-NEXT: dup v1.8h, w8 20; CHECK-GI-NEXT: mul v0.8h, v1.8h, v0.8h 21; CHECK-GI-NEXT: ret 22entry: 23 %in = sext i8 %src to i16 24 %ext.b = sext <8 x i8> %b to <8 x i16> 25 %broadcast.splatinsert = insertelement <8 x i16> undef, i16 %in, i16 0 26 %broadcast.splat = shufflevector <8 x i16> %broadcast.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer 27 %out = mul nsw <8 x i16> %broadcast.splat, %ext.b 28 ret <8 x i16> %out 29} 30 31define <8 x i16> @dupzext_v8i8_v8i16(i8 %src, <8 x i8> %b) { 32; CHECK-SD-LABEL: dupzext_v8i8_v8i16: 33; CHECK-SD: // %bb.0: // %entry 34; CHECK-SD-NEXT: dup v1.8b, w0 35; CHECK-SD-NEXT: umull v0.8h, v1.8b, v0.8b 36; CHECK-SD-NEXT: ret 37; 38; CHECK-GI-LABEL: dupzext_v8i8_v8i16: 39; CHECK-GI: // %bb.0: // %entry 40; CHECK-GI-NEXT: and w8, w0, #0xff 41; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 42; CHECK-GI-NEXT: dup v1.8h, w8 43; CHECK-GI-NEXT: mul v0.8h, v1.8h, v0.8h 44; CHECK-GI-NEXT: ret 45entry: 46 %in = zext i8 %src to i16 47 %ext.b = zext <8 x i8> %b to <8 x i16> 48 %broadcast.splatinsert = insertelement <8 x i16> undef, i16 %in, i16 0 49 %broadcast.splat = shufflevector <8 x i16> %broadcast.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer 50 %out = mul nuw <8 x i16> %broadcast.splat, %ext.b 51 ret <8 x i16> %out 52} 53 54define <4 x i32> @dupsext_v4i16_v4i32(i16 %src, <4 x i16> %b) { 55; CHECK-SD-LABEL: dupsext_v4i16_v4i32: 56; CHECK-SD: // %bb.0: // %entry 57; CHECK-SD-NEXT: dup v1.4h, w0 58; CHECK-SD-NEXT: smull v0.4s, v1.4h, v0.4h 59; CHECK-SD-NEXT: ret 60; 61; CHECK-GI-LABEL: dupsext_v4i16_v4i32: 62; CHECK-GI: // %bb.0: // %entry 63; CHECK-GI-NEXT: sxth w8, w0 64; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0 65; CHECK-GI-NEXT: dup v1.4s, w8 66; CHECK-GI-NEXT: mul v0.4s, v1.4s, v0.4s 67; CHECK-GI-NEXT: ret 68entry: 69 %in = sext i16 %src to i32 70 %ext.b = sext <4 x i16> %b to <4 x i32> 71 %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %in, i32 0 72 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer 73 %out = mul nsw <4 x i32> %broadcast.splat, %ext.b 74 ret <4 x i32> %out 75} 76 77define <4 x i32> @dupzext_v4i16_v4i32(i16 %src, <4 x i16> %b) { 78; CHECK-SD-LABEL: dupzext_v4i16_v4i32: 79; CHECK-SD: // %bb.0: // %entry 80; CHECK-SD-NEXT: dup v1.4h, w0 81; CHECK-SD-NEXT: umull v0.4s, v1.4h, v0.4h 82; CHECK-SD-NEXT: ret 83; 84; CHECK-GI-LABEL: dupzext_v4i16_v4i32: 85; CHECK-GI: // %bb.0: // %entry 86; CHECK-GI-NEXT: and w8, w0, #0xffff 87; CHECK-GI-NEXT: ushll v0.4s, v0.4h, #0 88; CHECK-GI-NEXT: dup v1.4s, w8 89; CHECK-GI-NEXT: mul v0.4s, v1.4s, v0.4s 90; CHECK-GI-NEXT: ret 91entry: 92 %in = zext i16 %src to i32 93 %ext.b = zext <4 x i16> %b to <4 x i32> 94 %broadcast.splatinsert = insertelement <4 x i32> undef, i32 %in, i32 0 95 %broadcast.splat = shufflevector <4 x i32> %broadcast.splatinsert, <4 x i32> undef, <4 x i32> zeroinitializer 96 %out = mul nuw <4 x i32> %broadcast.splat, %ext.b 97 ret <4 x i32> %out 98} 99 100define <2 x i64> @dupsext_v2i32_v2i64(i32 %src, <2 x i32> %b) { 101; CHECK-SD-LABEL: dupsext_v2i32_v2i64: 102; CHECK-SD: // %bb.0: // %entry 103; CHECK-SD-NEXT: dup v1.2s, w0 104; CHECK-SD-NEXT: smull v0.2d, v1.2s, v0.2s 105; CHECK-SD-NEXT: ret 106; 107; CHECK-GI-LABEL: dupsext_v2i32_v2i64: 108; CHECK-GI: // %bb.0: // %entry 109; CHECK-GI-NEXT: // kill: def $w0 killed $w0 def $x0 110; CHECK-GI-NEXT: sxtw x8, w0 111; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0 112; CHECK-GI-NEXT: dup v1.2d, x8 113; CHECK-GI-NEXT: fmov x9, d0 114; CHECK-GI-NEXT: mov x11, v0.d[1] 115; CHECK-GI-NEXT: fmov x8, d1 116; CHECK-GI-NEXT: mov x10, v1.d[1] 117; CHECK-GI-NEXT: mul x8, x8, x9 118; CHECK-GI-NEXT: mul x9, x10, x11 119; CHECK-GI-NEXT: mov v0.d[0], x8 120; CHECK-GI-NEXT: mov v0.d[1], x9 121; CHECK-GI-NEXT: ret 122entry: 123 %in = sext i32 %src to i64 124 %ext.b = sext <2 x i32> %b to <2 x i64> 125 %broadcast.splatinsert = insertelement <2 x i64> undef, i64 %in, i64 0 126 %broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer 127 %out = mul nsw <2 x i64> %broadcast.splat, %ext.b 128 ret <2 x i64> %out 129} 130 131define <2 x i64> @dupzext_v2i32_v2i64(i32 %src, <2 x i32> %b) { 132; CHECK-SD-LABEL: dupzext_v2i32_v2i64: 133; CHECK-SD: // %bb.0: // %entry 134; CHECK-SD-NEXT: dup v1.2s, w0 135; CHECK-SD-NEXT: umull v0.2d, v1.2s, v0.2s 136; CHECK-SD-NEXT: ret 137; 138; CHECK-GI-LABEL: dupzext_v2i32_v2i64: 139; CHECK-GI: // %bb.0: // %entry 140; CHECK-GI-NEXT: mov w8, w0 141; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0 142; CHECK-GI-NEXT: dup v1.2d, x8 143; CHECK-GI-NEXT: fmov x9, d0 144; CHECK-GI-NEXT: mov x11, v0.d[1] 145; CHECK-GI-NEXT: fmov x8, d1 146; CHECK-GI-NEXT: mov x10, v1.d[1] 147; CHECK-GI-NEXT: mul x8, x8, x9 148; CHECK-GI-NEXT: mul x9, x10, x11 149; CHECK-GI-NEXT: mov v0.d[0], x8 150; CHECK-GI-NEXT: mov v0.d[1], x9 151; CHECK-GI-NEXT: ret 152entry: 153 %in = zext i32 %src to i64 154 %ext.b = zext <2 x i32> %b to <2 x i64> 155 %broadcast.splatinsert = insertelement <2 x i64> undef, i64 %in, i64 0 156 %broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer 157 %out = mul nuw <2 x i64> %broadcast.splat, %ext.b 158 ret <2 x i64> %out 159} 160 161define <2 x i32> @dupzext_v2i32_v2i64_trunc(i32 %src, <2 x i32> %b) { 162; CHECK-SD-LABEL: dupzext_v2i32_v2i64_trunc: 163; CHECK-SD: // %bb.0: // %entry 164; CHECK-SD-NEXT: dup v1.2s, w0 165; CHECK-SD-NEXT: smull v0.2d, v1.2s, v0.2s 166; CHECK-SD-NEXT: xtn v0.2s, v0.2d 167; CHECK-SD-NEXT: ret 168; 169; CHECK-GI-LABEL: dupzext_v2i32_v2i64_trunc: 170; CHECK-GI: // %bb.0: // %entry 171; CHECK-GI-NEXT: mov w8, w0 172; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0 173; CHECK-GI-NEXT: dup v1.2d, x8 174; CHECK-GI-NEXT: fmov x9, d0 175; CHECK-GI-NEXT: mov x11, v0.d[1] 176; CHECK-GI-NEXT: fmov x8, d1 177; CHECK-GI-NEXT: mov x10, v1.d[1] 178; CHECK-GI-NEXT: mul x8, x8, x9 179; CHECK-GI-NEXT: mul x9, x10, x11 180; CHECK-GI-NEXT: mov v0.d[0], x8 181; CHECK-GI-NEXT: mov v0.d[1], x9 182; CHECK-GI-NEXT: xtn v0.2s, v0.2d 183; CHECK-GI-NEXT: ret 184entry: 185 %in = zext i32 %src to i64 186 %ext.b = zext <2 x i32> %b to <2 x i64> 187 %broadcast.splatinsert = insertelement <2 x i64> poison, i64 %in, i64 0 188 %broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> poison, <2 x i32> zeroinitializer 189 %prod = mul nuw <2 x i64> %broadcast.splat, %ext.b 190 %out = trunc <2 x i64> %prod to <2 x i32> 191 ret <2 x i32> %out 192} 193 194; Unsupported combines 195 196define <2 x i16> @dupsext_v2i8_v2i16(i8 %src, <2 x i8> %b) { 197; CHECK-SD-LABEL: dupsext_v2i8_v2i16: 198; CHECK-SD: // %bb.0: // %entry 199; CHECK-SD-NEXT: sxtb w8, w0 200; CHECK-SD-NEXT: shl v0.2s, v0.2s, #24 201; CHECK-SD-NEXT: dup v1.2s, w8 202; CHECK-SD-NEXT: sshr v0.2s, v0.2s, #24 203; CHECK-SD-NEXT: mul v0.2s, v1.2s, v0.2s 204; CHECK-SD-NEXT: ret 205; 206; CHECK-GI-LABEL: dupsext_v2i8_v2i16: 207; CHECK-GI: // %bb.0: // %entry 208; CHECK-GI-NEXT: lsl w8, w0, #8 209; CHECK-GI-NEXT: shl v0.2s, v0.2s, #24 210; CHECK-GI-NEXT: sbfx w8, w8, #8, #8 211; CHECK-GI-NEXT: sshr v0.2s, v0.2s, #24 212; CHECK-GI-NEXT: dup v1.4h, w8 213; CHECK-GI-NEXT: ushll v1.4s, v1.4h, #0 214; CHECK-GI-NEXT: mul v0.2s, v1.2s, v0.2s 215; CHECK-GI-NEXT: ret 216entry: 217 %in = sext i8 %src to i16 218 %ext.b = sext <2 x i8> %b to <2 x i16> 219 %broadcast.splatinsert = insertelement <2 x i16> undef, i16 %in, i16 0 220 %broadcast.splat = shufflevector <2 x i16> %broadcast.splatinsert, <2 x i16> undef, <2 x i32> zeroinitializer 221 %out = mul nsw <2 x i16> %broadcast.splat, %ext.b 222 ret <2 x i16> %out 223} 224 225define <2 x i64> @dupzext_v2i16_v2i64(i16 %src, <2 x i16> %b) { 226; CHECK-SD-LABEL: dupzext_v2i16_v2i64: 227; CHECK-SD: // %bb.0: // %entry 228; CHECK-SD-NEXT: movi d1, #0x00ffff0000ffff 229; CHECK-SD-NEXT: and w8, w0, #0xffff 230; CHECK-SD-NEXT: dup v2.2s, w8 231; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b 232; CHECK-SD-NEXT: umull v0.2d, v2.2s, v0.2s 233; CHECK-SD-NEXT: ret 234; 235; CHECK-GI-LABEL: dupzext_v2i16_v2i64: 236; CHECK-GI: // %bb.0: // %entry 237; CHECK-GI-NEXT: movi v1.2d, #0x0000000000ffff 238; CHECK-GI-NEXT: ushll v0.2d, v0.2s, #0 239; CHECK-GI-NEXT: // kill: def $w0 killed $w0 def $x0 240; CHECK-GI-NEXT: and x8, x0, #0xffff 241; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b 242; CHECK-GI-NEXT: dup v1.2d, x8 243; CHECK-GI-NEXT: fmov x8, d1 244; CHECK-GI-NEXT: fmov x9, d0 245; CHECK-GI-NEXT: mov x10, v1.d[1] 246; CHECK-GI-NEXT: mov x11, v0.d[1] 247; CHECK-GI-NEXT: mul x8, x8, x9 248; CHECK-GI-NEXT: mul x9, x10, x11 249; CHECK-GI-NEXT: mov v0.d[0], x8 250; CHECK-GI-NEXT: mov v0.d[1], x9 251; CHECK-GI-NEXT: ret 252entry: 253 %in = zext i16 %src to i64 254 %ext.b = zext <2 x i16> %b to <2 x i64> 255 %broadcast.splatinsert = insertelement <2 x i64> undef, i64 %in, i64 0 256 %broadcast.splat = shufflevector <2 x i64> %broadcast.splatinsert, <2 x i64> undef, <2 x i32> zeroinitializer 257 %out = mul nuw <2 x i64> %broadcast.splat, %ext.b 258 ret <2 x i64> %out 259} 260 261; dupsext_v4i8_v4i16 262; dupsext_v2i8_v2i32 263; dupsext_v4i8_v4i32 264; dupsext_v2i8_v2i64 265; dupsext_v2i16_v2i32 266; dupsext_v2i16_v2i64 267; dupzext_v2i8_v2i16 268; dupzext_v4i8_v4i16 269; dupzext_v2i8_v2i32 270; dupzext_v4i8_v4i32 271; dupzext_v2i8_v2i64 272; dupzext_v2i16_v2i32 273; dupzext_v2i16_v2i64 274 275; Unsupported states 276 277define <8 x i16> @nonsplat_shuffleinsert(i8 %src, <8 x i8> %b) { 278; CHECK-SD-LABEL: nonsplat_shuffleinsert: 279; CHECK-SD: // %bb.0: // %entry 280; CHECK-SD-NEXT: dup v1.8b, w0 281; CHECK-SD-NEXT: smull v0.8h, v1.8b, v0.8b 282; CHECK-SD-NEXT: ret 283; 284; CHECK-GI-LABEL: nonsplat_shuffleinsert: 285; CHECK-GI: // %bb.0: // %entry 286; CHECK-GI-NEXT: lsl w8, w0, #8 287; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0 288; CHECK-GI-NEXT: sbfx w8, w8, #8, #8 289; CHECK-GI-NEXT: mov v1.h[1], w8 290; CHECK-GI-NEXT: ext v1.16b, v1.16b, v1.16b, #4 291; CHECK-GI-NEXT: mul v0.8h, v1.8h, v0.8h 292; CHECK-GI-NEXT: ret 293entry: 294 %in = sext i8 %src to i16 295 %ext.b = sext <8 x i8> %b to <8 x i16> 296 %broadcast.splatinsert = insertelement <8 x i16> undef, i16 %in, i16 1 297 %broadcast.splat = shufflevector <8 x i16> %broadcast.splatinsert, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1> 298 %out = mul nsw <8 x i16> %broadcast.splat, %ext.b 299 ret <8 x i16> %out 300} 301 302define <4 x i32> @nonsplat_shuffleinsert2(<4 x i16> %b, i16 %b0, i16 %b1, i16 %b2, i16 %b3) { 303; CHECK-SD-LABEL: nonsplat_shuffleinsert2: 304; CHECK-SD: // %bb.0: // %entry 305; CHECK-SD-NEXT: fmov s1, w0 306; CHECK-SD-NEXT: mov v1.h[1], w1 307; CHECK-SD-NEXT: mov v1.h[2], w2 308; CHECK-SD-NEXT: mov v1.h[3], w3 309; CHECK-SD-NEXT: smull v0.4s, v1.4h, v0.4h 310; CHECK-SD-NEXT: ret 311; 312; CHECK-GI-LABEL: nonsplat_shuffleinsert2: 313; CHECK-GI: // %bb.0: // %entry 314; CHECK-GI-NEXT: sxth w8, w0 315; CHECK-GI-NEXT: sshll v0.4s, v0.4h, #0 316; CHECK-GI-NEXT: mov v1.s[0], w8 317; CHECK-GI-NEXT: sxth w8, w1 318; CHECK-GI-NEXT: mov v1.s[1], w8 319; CHECK-GI-NEXT: sxth w8, w2 320; CHECK-GI-NEXT: mov v1.s[2], w8 321; CHECK-GI-NEXT: sxth w8, w3 322; CHECK-GI-NEXT: mov v1.s[3], w8 323; CHECK-GI-NEXT: mul v0.4s, v1.4s, v0.4s 324; CHECK-GI-NEXT: ret 325entry: 326 %s0 = sext i16 %b0 to i32 327 %s1 = sext i16 %b1 to i32 328 %s2 = sext i16 %b2 to i32 329 %s3 = sext i16 %b3 to i32 330 %ext.b = sext <4 x i16> %b to <4 x i32> 331 %v0 = insertelement <4 x i32> undef, i32 %s0, i32 0 332 %v1 = insertelement <4 x i32> %v0, i32 %s1, i32 1 333 %v2 = insertelement <4 x i32> %v1, i32 %s2, i32 2 334 %v3 = insertelement <4 x i32> %v2, i32 %s3, i32 3 335 %out = mul nsw <4 x i32> %v3, %ext.b 336 ret <4 x i32> %out 337} 338 339define void @typei1_orig(i64 %a, ptr %p, ptr %q) { 340; CHECK-SD-LABEL: typei1_orig: 341; CHECK-SD: // %bb.0: 342; CHECK-SD-NEXT: cmp x0, #0 343; CHECK-SD-NEXT: ldr q0, [x2] 344; CHECK-SD-NEXT: cset w8, gt 345; CHECK-SD-NEXT: dup v1.8h, w8 346; CHECK-SD-NEXT: cmtst v0.8h, v0.8h, v0.8h 347; CHECK-SD-NEXT: cmeq v1.8h, v1.8h, #0 348; CHECK-SD-NEXT: bic v0.16b, v0.16b, v1.16b 349; CHECK-SD-NEXT: xtn v0.8b, v0.8h 350; CHECK-SD-NEXT: str q0, [x1] 351; CHECK-SD-NEXT: ret 352; 353; CHECK-GI-LABEL: typei1_orig: 354; CHECK-GI: // %bb.0: 355; CHECK-GI-NEXT: ldr q1, [x2] 356; CHECK-GI-NEXT: cmp x0, #0 357; CHECK-GI-NEXT: movi v0.2d, #0xffffffffffffffff 358; CHECK-GI-NEXT: cset w8, gt 359; CHECK-GI-NEXT: neg v1.8h, v1.8h 360; CHECK-GI-NEXT: dup v2.8h, w8 361; CHECK-GI-NEXT: mvn v0.16b, v0.16b 362; CHECK-GI-NEXT: mul v1.8h, v1.8h, v2.8h 363; CHECK-GI-NEXT: cmeq v1.8h, v1.8h, #0 364; CHECK-GI-NEXT: mvn v1.16b, v1.16b 365; CHECK-GI-NEXT: uzp1 v0.16b, v1.16b, v0.16b 366; CHECK-GI-NEXT: shl v0.16b, v0.16b, #7 367; CHECK-GI-NEXT: sshr v0.16b, v0.16b, #7 368; CHECK-GI-NEXT: str q0, [x1] 369; CHECK-GI-NEXT: ret 370 %tmp = xor <16 x i1> zeroinitializer, <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true> 371 %tmp6 = load <8 x i16>, ptr %q, align 2 372 %tmp7 = sub <8 x i16> zeroinitializer, %tmp6 373 %tmp8 = shufflevector <8 x i16> %tmp7, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 374 %tmp9 = icmp slt i64 0, %a 375 %tmp10 = zext i1 %tmp9 to i16 376 %tmp11 = insertelement <16 x i16> undef, i16 %tmp10, i64 0 377 %tmp12 = shufflevector <16 x i16> %tmp11, <16 x i16> undef, <16 x i32> zeroinitializer 378 %tmp13 = mul nuw <16 x i16> %tmp8, %tmp12 379 %tmp14 = icmp ne <16 x i16> %tmp13, zeroinitializer 380 %tmp15 = and <16 x i1> %tmp14, %tmp 381 %tmp16 = sext <16 x i1> %tmp15 to <16 x i8> 382 store <16 x i8> %tmp16, ptr %p, align 1 383 ret void 384} 385 386define <8 x i16> @typei1_v8i1_v8i16(i1 %src, <8 x i1> %b) { 387; CHECK-SD-LABEL: typei1_v8i1_v8i16: 388; CHECK-SD: // %bb.0: // %entry 389; CHECK-SD-NEXT: movi v1.8b, #1 390; CHECK-SD-NEXT: and w8, w0, #0x1 391; CHECK-SD-NEXT: dup v2.8b, w8 392; CHECK-SD-NEXT: and v0.8b, v0.8b, v1.8b 393; CHECK-SD-NEXT: umull v0.8h, v2.8b, v0.8b 394; CHECK-SD-NEXT: ret 395; 396; CHECK-GI-LABEL: typei1_v8i1_v8i16: 397; CHECK-GI: // %bb.0: // %entry 398; CHECK-GI-NEXT: movi v1.8h, #1 399; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 400; CHECK-GI-NEXT: and w8, w0, #0x1 401; CHECK-GI-NEXT: and v0.16b, v0.16b, v1.16b 402; CHECK-GI-NEXT: dup v1.8h, w8 403; CHECK-GI-NEXT: mul v0.8h, v1.8h, v0.8h 404; CHECK-GI-NEXT: ret 405entry: 406 %in = zext i1 %src to i16 407 %ext.b = zext <8 x i1> %b to <8 x i16> 408 %broadcast.splatinsert = insertelement <8 x i16> undef, i16 %in, i16 0 409 %broadcast.splat = shufflevector <8 x i16> %broadcast.splatinsert, <8 x i16> undef, <8 x i32> zeroinitializer 410 %out = mul nsw <8 x i16> %broadcast.splat, %ext.b 411 ret <8 x i16> %out 412} 413 414define <8 x i16> @missing_insert(<8 x i8> %b) { 415; CHECK-SD-LABEL: missing_insert: 416; CHECK-SD: // %bb.0: // %entry 417; CHECK-SD-NEXT: ext v1.8b, v0.8b, v0.8b, #2 418; CHECK-SD-NEXT: smull v0.8h, v1.8b, v0.8b 419; CHECK-SD-NEXT: ret 420; 421; CHECK-GI-LABEL: missing_insert: 422; CHECK-GI: // %bb.0: // %entry 423; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0 424; CHECK-GI-NEXT: ext v1.16b, v0.16b, v0.16b, #4 425; CHECK-GI-NEXT: mul v0.8h, v1.8h, v0.8h 426; CHECK-GI-NEXT: ret 427entry: 428 %ext.b = sext <8 x i8> %b to <8 x i16> 429 %broadcast.splat = shufflevector <8 x i16> %ext.b, <8 x i16> undef, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1> 430 %out = mul nsw <8 x i16> %broadcast.splat, %ext.b 431 ret <8 x i16> %out 432} 433 434define <8 x i16> @shufsext_v8i8_v8i16(<8 x i8> %src, <8 x i8> %b) { 435; CHECK-SD-LABEL: shufsext_v8i8_v8i16: 436; CHECK-SD: // %bb.0: // %entry 437; CHECK-SD-NEXT: rev64 v0.8b, v0.8b 438; CHECK-SD-NEXT: smull v0.8h, v0.8b, v1.8b 439; CHECK-SD-NEXT: ret 440; 441; CHECK-GI-LABEL: shufsext_v8i8_v8i16: 442; CHECK-GI: // %bb.0: // %entry 443; CHECK-GI-NEXT: sshll v0.8h, v0.8b, #0 444; CHECK-GI-NEXT: sshll v1.8h, v1.8b, #0 445; CHECK-GI-NEXT: rev64 v0.8h, v0.8h 446; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8 447; CHECK-GI-NEXT: mul v0.8h, v0.8h, v1.8h 448; CHECK-GI-NEXT: ret 449entry: 450 %in = sext <8 x i8> %src to <8 x i16> 451 %ext.b = sext <8 x i8> %b to <8 x i16> 452 %shuf = shufflevector <8 x i16> %in, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 453 %out = mul nsw <8 x i16> %shuf, %ext.b 454 ret <8 x i16> %out 455} 456 457define <2 x i64> @shufsext_v2i32_v2i64(<2 x i32> %src, <2 x i32> %b) { 458; CHECK-SD-LABEL: shufsext_v2i32_v2i64: 459; CHECK-SD: // %bb.0: // %entry 460; CHECK-SD-NEXT: rev64 v0.2s, v0.2s 461; CHECK-SD-NEXT: smull v0.2d, v0.2s, v1.2s 462; CHECK-SD-NEXT: ret 463; 464; CHECK-GI-LABEL: shufsext_v2i32_v2i64: 465; CHECK-GI: // %bb.0: // %entry 466; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0 467; CHECK-GI-NEXT: sshll v1.2d, v1.2s, #0 468; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8 469; CHECK-GI-NEXT: fmov x9, d1 470; CHECK-GI-NEXT: mov x11, v1.d[1] 471; CHECK-GI-NEXT: fmov x8, d0 472; CHECK-GI-NEXT: mov x10, v0.d[1] 473; CHECK-GI-NEXT: mul x8, x8, x9 474; CHECK-GI-NEXT: mul x9, x10, x11 475; CHECK-GI-NEXT: mov v0.d[0], x8 476; CHECK-GI-NEXT: mov v0.d[1], x9 477; CHECK-GI-NEXT: ret 478entry: 479 %in = sext <2 x i32> %src to <2 x i64> 480 %ext.b = sext <2 x i32> %b to <2 x i64> 481 %shuf = shufflevector <2 x i64> %in, <2 x i64> undef, <2 x i32> <i32 1, i32 0> 482 %out = mul nsw <2 x i64> %shuf, %ext.b 483 ret <2 x i64> %out 484} 485 486define <8 x i16> @shufzext_v8i8_v8i16(<8 x i8> %src, <8 x i8> %b) { 487; CHECK-SD-LABEL: shufzext_v8i8_v8i16: 488; CHECK-SD: // %bb.0: // %entry 489; CHECK-SD-NEXT: rev64 v0.8b, v0.8b 490; CHECK-SD-NEXT: umull v0.8h, v0.8b, v1.8b 491; CHECK-SD-NEXT: ret 492; 493; CHECK-GI-LABEL: shufzext_v8i8_v8i16: 494; CHECK-GI: // %bb.0: // %entry 495; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 496; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0 497; CHECK-GI-NEXT: rev64 v0.8h, v0.8h 498; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8 499; CHECK-GI-NEXT: mul v0.8h, v0.8h, v1.8h 500; CHECK-GI-NEXT: ret 501entry: 502 %in = zext <8 x i8> %src to <8 x i16> 503 %ext.b = zext <8 x i8> %b to <8 x i16> 504 %shuf = shufflevector <8 x i16> %in, <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 505 %out = mul nsw <8 x i16> %shuf, %ext.b 506 ret <8 x i16> %out 507} 508 509define <2 x i64> @shufzext_v2i32_v2i64(<2 x i32> %src, <2 x i32> %b) { 510; CHECK-SD-LABEL: shufzext_v2i32_v2i64: 511; CHECK-SD: // %bb.0: // %entry 512; CHECK-SD-NEXT: rev64 v0.2s, v0.2s 513; CHECK-SD-NEXT: smull v0.2d, v0.2s, v1.2s 514; CHECK-SD-NEXT: ret 515; 516; CHECK-GI-LABEL: shufzext_v2i32_v2i64: 517; CHECK-GI: // %bb.0: // %entry 518; CHECK-GI-NEXT: sshll v0.2d, v0.2s, #0 519; CHECK-GI-NEXT: sshll v1.2d, v1.2s, #0 520; CHECK-GI-NEXT: ext v0.16b, v0.16b, v0.16b, #8 521; CHECK-GI-NEXT: fmov x9, d1 522; CHECK-GI-NEXT: mov x11, v1.d[1] 523; CHECK-GI-NEXT: fmov x8, d0 524; CHECK-GI-NEXT: mov x10, v0.d[1] 525; CHECK-GI-NEXT: mul x8, x8, x9 526; CHECK-GI-NEXT: mul x9, x10, x11 527; CHECK-GI-NEXT: mov v0.d[0], x8 528; CHECK-GI-NEXT: mov v0.d[1], x9 529; CHECK-GI-NEXT: ret 530entry: 531 %in = sext <2 x i32> %src to <2 x i64> 532 %ext.b = sext <2 x i32> %b to <2 x i64> 533 %shuf = shufflevector <2 x i64> %in, <2 x i64> undef, <2 x i32> <i32 1, i32 0> 534 %out = mul nsw <2 x i64> %shuf, %ext.b 535 ret <2 x i64> %out 536} 537 538define <8 x i16> @shufzext_v8i8_v8i16_twoin(<8 x i8> %src1, <8 x i8> %src2, <8 x i8> %b) { 539; CHECK-SD-LABEL: shufzext_v8i8_v8i16_twoin: 540; CHECK-SD: // %bb.0: // %entry 541; CHECK-SD-NEXT: trn1 v0.8b, v0.8b, v1.8b 542; CHECK-SD-NEXT: umull v0.8h, v0.8b, v2.8b 543; CHECK-SD-NEXT: ret 544; 545; CHECK-GI-LABEL: shufzext_v8i8_v8i16_twoin: 546; CHECK-GI: // %bb.0: // %entry 547; CHECK-GI-NEXT: ushll v0.8h, v0.8b, #0 548; CHECK-GI-NEXT: ushll v1.8h, v1.8b, #0 549; CHECK-GI-NEXT: trn1 v0.8h, v0.8h, v1.8h 550; CHECK-GI-NEXT: ushll v1.8h, v2.8b, #0 551; CHECK-GI-NEXT: mul v0.8h, v0.8h, v1.8h 552; CHECK-GI-NEXT: ret 553entry: 554 %in1 = zext <8 x i8> %src1 to <8 x i16> 555 %in2 = zext <8 x i8> %src2 to <8 x i16> 556 %ext.b = zext <8 x i8> %b to <8 x i16> 557 %shuf = shufflevector <8 x i16> %in1, <8 x i16> %in2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 558 %out = mul nsw <8 x i16> %shuf, %ext.b 559 ret <8 x i16> %out 560} 561 562define <8 x i16> @shufszext_v8i8_v8i16_twoin(<8 x i8> %src1, <8 x i8> %src2, <8 x i8> %b) { 563; CHECK-LABEL: shufszext_v8i8_v8i16_twoin: 564; CHECK: // %bb.0: // %entry 565; CHECK-NEXT: ushll v0.8h, v0.8b, #0 566; CHECK-NEXT: sshll v1.8h, v1.8b, #0 567; CHECK-NEXT: trn1 v0.8h, v0.8h, v1.8h 568; CHECK-NEXT: ushll v1.8h, v2.8b, #0 569; CHECK-NEXT: mul v0.8h, v0.8h, v1.8h 570; CHECK-NEXT: ret 571entry: 572 %in1 = zext <8 x i8> %src1 to <8 x i16> 573 %in2 = sext <8 x i8> %src2 to <8 x i16> 574 %ext.b = zext <8 x i8> %b to <8 x i16> 575 %shuf = shufflevector <8 x i16> %in1, <8 x i16> %in2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 576 %out = mul nsw <8 x i16> %shuf, %ext.b 577 ret <8 x i16> %out 578} 579 580