1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=armv7a-none-eabihf -mattr=+neon -verify-machineinstrs | FileCheck %s 3 4define <16 x i8> @ins16bw(<16 x i8> %tmp1, i8 %tmp2) { 5; CHECK-LABEL: ins16bw: 6; CHECK: @ %bb.0: 7; CHECK-NEXT: vmov.8 d1[7], r0 8; CHECK-NEXT: bx lr 9 %tmp3 = insertelement <16 x i8> %tmp1, i8 %tmp2, i32 15 10 ret <16 x i8> %tmp3 11} 12 13define <8 x i16> @ins8hw(<8 x i16> %tmp1, i16 %tmp2) { 14; CHECK-LABEL: ins8hw: 15; CHECK: @ %bb.0: 16; CHECK-NEXT: vmov.16 d1[2], r0 17; CHECK-NEXT: bx lr 18 %tmp3 = insertelement <8 x i16> %tmp1, i16 %tmp2, i32 6 19 ret <8 x i16> %tmp3 20} 21 22define <4 x i32> @ins4sw(<4 x i32> %tmp1, i32 %tmp2) { 23; CHECK-LABEL: ins4sw: 24; CHECK: @ %bb.0: 25; CHECK-NEXT: vmov.32 d1[0], r0 26; CHECK-NEXT: bx lr 27 %tmp3 = insertelement <4 x i32> %tmp1, i32 %tmp2, i32 2 28 ret <4 x i32> %tmp3 29} 30 31define <2 x i64> @ins2dw(<2 x i64> %tmp1, i64 %tmp2) { 32; CHECK-LABEL: ins2dw: 33; CHECK: @ %bb.0: 34; CHECK-NEXT: vmov.32 d1[0], r0 35; CHECK-NEXT: vmov.32 d1[1], r1 36; CHECK-NEXT: bx lr 37 %tmp3 = insertelement <2 x i64> %tmp1, i64 %tmp2, i32 1 38 ret <2 x i64> %tmp3 39} 40 41define <8 x i8> @ins8bw(<8 x i8> %tmp1, i8 %tmp2) { 42; CHECK-LABEL: ins8bw: 43; CHECK: @ %bb.0: 44; CHECK-NEXT: vmov.8 d0[5], r0 45; CHECK-NEXT: bx lr 46 %tmp3 = insertelement <8 x i8> %tmp1, i8 %tmp2, i32 5 47 ret <8 x i8> %tmp3 48} 49 50define <4 x i16> @ins4hw(<4 x i16> %tmp1, i16 %tmp2) { 51; CHECK-LABEL: ins4hw: 52; CHECK: @ %bb.0: 53; CHECK-NEXT: vmov.16 d0[3], r0 54; CHECK-NEXT: bx lr 55 %tmp3 = insertelement <4 x i16> %tmp1, i16 %tmp2, i32 3 56 ret <4 x i16> %tmp3 57} 58 59define <2 x i32> @ins2sw(<2 x i32> %tmp1, i32 %tmp2) { 60; CHECK-LABEL: ins2sw: 61; CHECK: @ %bb.0: 62; CHECK-NEXT: vmov.32 d0[1], r0 63; CHECK-NEXT: bx lr 64 %tmp3 = insertelement <2 x i32> %tmp1, i32 %tmp2, i32 1 65 ret <2 x i32> %tmp3 66} 67 68define <16 x i8> @ins16b16(<16 x i8> %tmp1, <16 x i8> %tmp2) { 69; CHECK-LABEL: ins16b16: 70; CHECK: @ %bb.0: 71; CHECK-NEXT: vmov.u8 r0, d0[2] 72; CHECK-NEXT: vmov.8 d3[7], r0 73; CHECK-NEXT: vorr q0, q1, q1 74; CHECK-NEXT: bx lr 75 %tmp3 = extractelement <16 x i8> %tmp1, i32 2 76 %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15 77 ret <16 x i8> %tmp4 78} 79 80define <8 x i16> @ins8h8(<8 x i16> %tmp1, <8 x i16> %tmp2) { 81; CHECK-LABEL: ins8h8: 82; CHECK: @ %bb.0: 83; CHECK-NEXT: vmov.u16 r0, d0[2] 84; CHECK-NEXT: vmov.16 d3[3], r0 85; CHECK-NEXT: vorr q0, q1, q1 86; CHECK-NEXT: bx lr 87 %tmp3 = extractelement <8 x i16> %tmp1, i32 2 88 %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7 89 ret <8 x i16> %tmp4 90} 91 92define <4 x i32> @ins4s4(<4 x i32> %tmp1, <4 x i32> %tmp2) { 93; CHECK-LABEL: ins4s4: 94; CHECK: @ %bb.0: 95; CHECK-NEXT: vmov.32 r0, d1[0] 96; CHECK-NEXT: vmov.32 d2[1], r0 97; CHECK-NEXT: vorr q0, q1, q1 98; CHECK-NEXT: bx lr 99 %tmp3 = extractelement <4 x i32> %tmp1, i32 2 100 %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1 101 ret <4 x i32> %tmp4 102} 103 104define <2 x i64> @ins2d2(<2 x i64> %tmp1, <2 x i64> %tmp2) { 105; CHECK-LABEL: ins2d2: 106; CHECK: @ %bb.0: 107; CHECK-NEXT: vmov r0, r1, d0 108; CHECK-NEXT: vmov.32 d3[0], r0 109; CHECK-NEXT: vmov.32 d3[1], r1 110; CHECK-NEXT: vorr q0, q1, q1 111; CHECK-NEXT: bx lr 112 %tmp3 = extractelement <2 x i64> %tmp1, i32 0 113 %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1 114 ret <2 x i64> %tmp4 115} 116 117define <4 x float> @ins4f4(<4 x float> %tmp1, <4 x float> %tmp2) { 118; CHECK-LABEL: ins4f4: 119; CHECK: @ %bb.0: 120; CHECK-NEXT: vmov.f32 s5, s2 121; CHECK-NEXT: vorr q0, q1, q1 122; CHECK-NEXT: bx lr 123 %tmp3 = extractelement <4 x float> %tmp1, i32 2 124 %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1 125 ret <4 x float> %tmp4 126} 127 128define <2 x double> @ins2df2(<2 x double> %tmp1, <2 x double> %tmp2) { 129; CHECK-LABEL: ins2df2: 130; CHECK: @ %bb.0: 131; CHECK-NEXT: vorr d3, d0, d0 132; CHECK-NEXT: vorr q0, q1, q1 133; CHECK-NEXT: bx lr 134 %tmp3 = extractelement <2 x double> %tmp1, i32 0 135 %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1 136 ret <2 x double> %tmp4 137} 138 139define <16 x i8> @ins8b16(<8 x i8> %tmp1, <16 x i8> %tmp2) { 140; CHECK-LABEL: ins8b16: 141; CHECK: @ %bb.0: 142; CHECK-NEXT: vmov.u8 r0, d0[2] 143; CHECK-NEXT: vmov.8 d3[7], r0 144; CHECK-NEXT: vorr q0, q1, q1 145; CHECK-NEXT: bx lr 146 %tmp3 = extractelement <8 x i8> %tmp1, i32 2 147 %tmp4 = insertelement <16 x i8> %tmp2, i8 %tmp3, i32 15 148 ret <16 x i8> %tmp4 149} 150 151define <8 x i16> @ins4h8(<4 x i16> %tmp1, <8 x i16> %tmp2) { 152; CHECK-LABEL: ins4h8: 153; CHECK: @ %bb.0: 154; CHECK-NEXT: vmov.u16 r0, d0[2] 155; CHECK-NEXT: vmov.16 d3[3], r0 156; CHECK-NEXT: vorr q0, q1, q1 157; CHECK-NEXT: bx lr 158 %tmp3 = extractelement <4 x i16> %tmp1, i32 2 159 %tmp4 = insertelement <8 x i16> %tmp2, i16 %tmp3, i32 7 160 ret <8 x i16> %tmp4 161} 162 163define <4 x i32> @ins2s4(<2 x i32> %tmp1, <4 x i32> %tmp2) { 164; CHECK-LABEL: ins2s4: 165; CHECK: @ %bb.0: 166; CHECK-NEXT: vmov.32 r0, d0[1] 167; CHECK-NEXT: vmov.32 d2[1], r0 168; CHECK-NEXT: vorr q0, q1, q1 169; CHECK-NEXT: bx lr 170 %tmp3 = extractelement <2 x i32> %tmp1, i32 1 171 %tmp4 = insertelement <4 x i32> %tmp2, i32 %tmp3, i32 1 172 ret <4 x i32> %tmp4 173} 174 175define <2 x i64> @ins1d2(<1 x i64> %tmp1, <2 x i64> %tmp2) { 176; CHECK-LABEL: ins1d2: 177; CHECK: @ %bb.0: 178; CHECK-NEXT: vmov.32 r0, d0[0] 179; CHECK-NEXT: vmov.32 r1, d0[1] 180; CHECK-NEXT: vmov.32 d3[0], r0 181; CHECK-NEXT: vmov.32 d3[1], r1 182; CHECK-NEXT: vorr q0, q1, q1 183; CHECK-NEXT: bx lr 184 %tmp3 = extractelement <1 x i64> %tmp1, i32 0 185 %tmp4 = insertelement <2 x i64> %tmp2, i64 %tmp3, i32 1 186 ret <2 x i64> %tmp4 187} 188 189define <4 x float> @ins2f4(<2 x float> %tmp1, <4 x float> %tmp2) { 190; CHECK-LABEL: ins2f4: 191; CHECK: @ %bb.0: 192; CHECK-NEXT: vmov.f32 s5, s1 193; CHECK-NEXT: vorr q0, q1, q1 194; CHECK-NEXT: bx lr 195 %tmp3 = extractelement <2 x float> %tmp1, i32 1 196 %tmp4 = insertelement <4 x float> %tmp2, float %tmp3, i32 1 197 ret <4 x float> %tmp4 198} 199 200define <2 x double> @ins1f2(<1 x double> %tmp1, <2 x double> %tmp2) { 201; CHECK-LABEL: ins1f2: 202; CHECK: @ %bb.0: 203; CHECK-NEXT: vorr d3, d0, d0 204; CHECK-NEXT: vorr q0, q1, q1 205; CHECK-NEXT: bx lr 206 %tmp3 = extractelement <1 x double> %tmp1, i32 0 207 %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1 208 ret <2 x double> %tmp4 209} 210 211define <2 x double> @ins1f2_args_flipped(<2 x double> %tmp2, <1 x double> %tmp1) { 212; CHECK-LABEL: ins1f2_args_flipped: 213; CHECK: @ %bb.0: 214; CHECK-NEXT: vmov.f64 d1, d2 215; CHECK-NEXT: bx lr 216 %tmp3 = extractelement <1 x double> %tmp1, i32 0 217 %tmp4 = insertelement <2 x double> %tmp2, double %tmp3, i32 1 218 ret <2 x double> %tmp4 219} 220 221define <8 x i8> @ins16b8(<16 x i8> %tmp1, <8 x i8> %tmp2) { 222; CHECK-LABEL: ins16b8: 223; CHECK: @ %bb.0: 224; CHECK-NEXT: vmov.u8 r0, d0[2] 225; CHECK-NEXT: vmov.8 d2[7], r0 226; CHECK-NEXT: vorr d0, d2, d2 227; CHECK-NEXT: bx lr 228 %tmp3 = extractelement <16 x i8> %tmp1, i32 2 229 %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 7 230 ret <8 x i8> %tmp4 231} 232 233define <4 x i16> @ins8h4(<8 x i16> %tmp1, <4 x i16> %tmp2) { 234; CHECK-LABEL: ins8h4: 235; CHECK: @ %bb.0: 236; CHECK-NEXT: vmov.u16 r0, d0[2] 237; CHECK-NEXT: vmov.16 d2[3], r0 238; CHECK-NEXT: vorr d0, d2, d2 239; CHECK-NEXT: bx lr 240 %tmp3 = extractelement <8 x i16> %tmp1, i32 2 241 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3 242 ret <4 x i16> %tmp4 243} 244 245define <2 x i32> @ins4s2(<4 x i32> %tmp1, <2 x i32> %tmp2) { 246; CHECK-LABEL: ins4s2: 247; CHECK: @ %bb.0: 248; CHECK-NEXT: vmov.32 r0, d1[0] 249; CHECK-NEXT: vmov.32 d2[1], r0 250; CHECK-NEXT: vorr d0, d2, d2 251; CHECK-NEXT: bx lr 252 %tmp3 = extractelement <4 x i32> %tmp1, i32 2 253 %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1 254 ret <2 x i32> %tmp4 255} 256 257define <1 x i64> @ins2d1(<2 x i64> %tmp1, <1 x i64> %tmp2) { 258; CHECK-LABEL: ins2d1: 259; CHECK: @ %bb.0: 260; CHECK-NEXT: @ kill: def $d0 killed $d0 killed $q0 261; CHECK-NEXT: bx lr 262 %tmp3 = extractelement <2 x i64> %tmp1, i32 0 263 %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0 264 ret <1 x i64> %tmp4 265} 266 267define <2 x float> @ins4f2(<4 x float> %tmp1, <2 x float> %tmp2) { 268; CHECK-LABEL: ins4f2: 269; CHECK: @ %bb.0: 270; CHECK-NEXT: vmov.f32 s5, s2 271; CHECK-NEXT: vmov.f64 d0, d2 272; CHECK-NEXT: bx lr 273 %tmp3 = extractelement <4 x float> %tmp1, i32 2 274 %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1 275 ret <2 x float> %tmp4 276} 277 278define <1 x double> @ins2f1(<2 x double> %tmp1, <1 x double> %tmp2) { 279; CHECK-LABEL: ins2f1: 280; CHECK: @ %bb.0: 281; CHECK-NEXT: vmov.f64 d0, d1 282; CHECK-NEXT: bx lr 283 %tmp3 = extractelement <2 x double> %tmp1, i32 1 284 %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0 285 ret <1 x double> %tmp4 286} 287 288define <8 x i8> @ins8b8(<8 x i8> %tmp1, <8 x i8> %tmp2) { 289; CHECK-LABEL: ins8b8: 290; CHECK: @ %bb.0: 291; CHECK-NEXT: vmov.u8 r0, d0[2] 292; CHECK-NEXT: vmov.8 d1[4], r0 293; CHECK-NEXT: vorr d0, d1, d1 294; CHECK-NEXT: bx lr 295 %tmp3 = extractelement <8 x i8> %tmp1, i32 2 296 %tmp4 = insertelement <8 x i8> %tmp2, i8 %tmp3, i32 4 297 ret <8 x i8> %tmp4 298} 299 300define <4 x i16> @ins4h4(<4 x i16> %tmp1, <4 x i16> %tmp2) { 301; CHECK-LABEL: ins4h4: 302; CHECK: @ %bb.0: 303; CHECK-NEXT: vmov.u16 r0, d0[2] 304; CHECK-NEXT: vmov.16 d1[3], r0 305; CHECK-NEXT: vorr d0, d1, d1 306; CHECK-NEXT: bx lr 307 %tmp3 = extractelement <4 x i16> %tmp1, i32 2 308 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 3 309 ret <4 x i16> %tmp4 310} 311 312define <2 x i32> @ins2s2(<2 x i32> %tmp1, <2 x i32> %tmp2) { 313; CHECK-LABEL: ins2s2: 314; CHECK: @ %bb.0: 315; CHECK-NEXT: vmov.32 r0, d0[0] 316; CHECK-NEXT: vmov.32 d1[1], r0 317; CHECK-NEXT: vorr d0, d1, d1 318; CHECK-NEXT: bx lr 319 %tmp3 = extractelement <2 x i32> %tmp1, i32 0 320 %tmp4 = insertelement <2 x i32> %tmp2, i32 %tmp3, i32 1 321 ret <2 x i32> %tmp4 322} 323 324define <1 x i64> @ins1d1(<1 x i64> %tmp1, <1 x i64> %tmp2) { 325; CHECK-LABEL: ins1d1: 326; CHECK: @ %bb.0: 327; CHECK-NEXT: bx lr 328 %tmp3 = extractelement <1 x i64> %tmp1, i32 0 329 %tmp4 = insertelement <1 x i64> %tmp2, i64 %tmp3, i32 0 330 ret <1 x i64> %tmp4 331} 332 333define <2 x float> @ins2f2(<2 x float> %tmp1, <2 x float> %tmp2) { 334; CHECK-LABEL: ins2f2: 335; CHECK: @ %bb.0: 336; CHECK-NEXT: vmov.f32 s3, s0 337; CHECK-NEXT: vmov.f64 d0, d1 338; CHECK-NEXT: bx lr 339 %tmp3 = extractelement <2 x float> %tmp1, i32 0 340 %tmp4 = insertelement <2 x float> %tmp2, float %tmp3, i32 1 341 ret <2 x float> %tmp4 342} 343 344define <1 x double> @ins1df1(<1 x double> %tmp1, <1 x double> %tmp2) { 345; CHECK-LABEL: ins1df1: 346; CHECK: @ %bb.0: 347; CHECK-NEXT: bx lr 348 %tmp3 = extractelement <1 x double> %tmp1, i32 0 349 %tmp4 = insertelement <1 x double> %tmp2, double %tmp3, i32 0 350 ret <1 x double> %tmp4 351} 352 353define i32 @umovw16b(<16 x i8> %tmp1) { 354; CHECK-LABEL: umovw16b: 355; CHECK: @ %bb.0: 356; CHECK-NEXT: vmov.u8 r0, d1[0] 357; CHECK-NEXT: bx lr 358 %tmp3 = extractelement <16 x i8> %tmp1, i32 8 359 %tmp4 = zext i8 %tmp3 to i32 360 ret i32 %tmp4 361} 362 363define i32 @umovw8h(<8 x i16> %tmp1) { 364; CHECK-LABEL: umovw8h: 365; CHECK: @ %bb.0: 366; CHECK-NEXT: vmov.u16 r0, d0[2] 367; CHECK-NEXT: bx lr 368 %tmp3 = extractelement <8 x i16> %tmp1, i32 2 369 %tmp4 = zext i16 %tmp3 to i32 370 ret i32 %tmp4 371} 372 373define i32 @umovw4s(<4 x i32> %tmp1) { 374; CHECK-LABEL: umovw4s: 375; CHECK: @ %bb.0: 376; CHECK-NEXT: vmov.32 r0, d1[0] 377; CHECK-NEXT: bx lr 378 %tmp3 = extractelement <4 x i32> %tmp1, i32 2 379 ret i32 %tmp3 380} 381 382define i64 @umovx2d(<2 x i64> %tmp1) { 383; CHECK-LABEL: umovx2d: 384; CHECK: @ %bb.0: 385; CHECK-NEXT: vmov r0, r1, d1 386; CHECK-NEXT: bx lr 387 %tmp3 = extractelement <2 x i64> %tmp1, i32 1 388 ret i64 %tmp3 389} 390 391define i32 @umovw8b(<8 x i8> %tmp1) { 392; CHECK-LABEL: umovw8b: 393; CHECK: @ %bb.0: 394; CHECK-NEXT: vmov.u8 r0, d0[7] 395; CHECK-NEXT: bx lr 396 %tmp3 = extractelement <8 x i8> %tmp1, i32 7 397 %tmp4 = zext i8 %tmp3 to i32 398 ret i32 %tmp4 399} 400 401define i32 @umovw4h(<4 x i16> %tmp1) { 402; CHECK-LABEL: umovw4h: 403; CHECK: @ %bb.0: 404; CHECK-NEXT: vmov.u16 r0, d0[2] 405; CHECK-NEXT: bx lr 406 %tmp3 = extractelement <4 x i16> %tmp1, i32 2 407 %tmp4 = zext i16 %tmp3 to i32 408 ret i32 %tmp4 409} 410 411define i32 @umovw2s(<2 x i32> %tmp1) { 412; CHECK-LABEL: umovw2s: 413; CHECK: @ %bb.0: 414; CHECK-NEXT: vmov.32 r0, d0[1] 415; CHECK-NEXT: bx lr 416 %tmp3 = extractelement <2 x i32> %tmp1, i32 1 417 ret i32 %tmp3 418} 419 420define i64 @umovx1d(<1 x i64> %tmp1) { 421; CHECK-LABEL: umovx1d: 422; CHECK: @ %bb.0: 423; CHECK-NEXT: vmov.32 r0, d0[0] 424; CHECK-NEXT: vmov.32 r1, d0[1] 425; CHECK-NEXT: bx lr 426 %tmp3 = extractelement <1 x i64> %tmp1, i32 0 427 ret i64 %tmp3 428} 429 430define i32 @smovw16b(<16 x i8> %tmp1) { 431; CHECK-LABEL: smovw16b: 432; CHECK: @ %bb.0: 433; CHECK-NEXT: vmov.s8 r0, d1[0] 434; CHECK-NEXT: add r0, r0, r0 435; CHECK-NEXT: bx lr 436 %tmp3 = extractelement <16 x i8> %tmp1, i32 8 437 %tmp4 = sext i8 %tmp3 to i32 438 %tmp5 = add i32 %tmp4, %tmp4 439 ret i32 %tmp5 440} 441 442define i32 @smovw8h(<8 x i16> %tmp1) { 443; CHECK-LABEL: smovw8h: 444; CHECK: @ %bb.0: 445; CHECK-NEXT: vmov.s16 r0, d0[2] 446; CHECK-NEXT: add r0, r0, r0 447; CHECK-NEXT: bx lr 448 %tmp3 = extractelement <8 x i16> %tmp1, i32 2 449 %tmp4 = sext i16 %tmp3 to i32 450 %tmp5 = add i32 %tmp4, %tmp4 451 ret i32 %tmp5 452} 453 454define i64 @smovx16b(<16 x i8> %tmp1) { 455; CHECK-LABEL: smovx16b: 456; CHECK: @ %bb.0: 457; CHECK-NEXT: vmov.s8 r0, d1[0] 458; CHECK-NEXT: asr r1, r0, #31 459; CHECK-NEXT: bx lr 460 %tmp3 = extractelement <16 x i8> %tmp1, i32 8 461 %tmp4 = sext i8 %tmp3 to i64 462 ret i64 %tmp4 463} 464 465define i64 @smovx8h(<8 x i16> %tmp1) { 466; CHECK-LABEL: smovx8h: 467; CHECK: @ %bb.0: 468; CHECK-NEXT: vmov.s16 r0, d0[2] 469; CHECK-NEXT: asr r1, r0, #31 470; CHECK-NEXT: bx lr 471 %tmp3 = extractelement <8 x i16> %tmp1, i32 2 472 %tmp4 = sext i16 %tmp3 to i64 473 ret i64 %tmp4 474} 475 476define i64 @smovx4s(<4 x i32> %tmp1) { 477; CHECK-LABEL: smovx4s: 478; CHECK: @ %bb.0: 479; CHECK-NEXT: vmov.32 r0, d1[0] 480; CHECK-NEXT: asr r1, r0, #31 481; CHECK-NEXT: bx lr 482 %tmp3 = extractelement <4 x i32> %tmp1, i32 2 483 %tmp4 = sext i32 %tmp3 to i64 484 ret i64 %tmp4 485} 486 487define i32 @smovw8b(<8 x i8> %tmp1) { 488; CHECK-LABEL: smovw8b: 489; CHECK: @ %bb.0: 490; CHECK-NEXT: vmov.s8 r0, d0[4] 491; CHECK-NEXT: add r0, r0, r0 492; CHECK-NEXT: bx lr 493 %tmp3 = extractelement <8 x i8> %tmp1, i32 4 494 %tmp4 = sext i8 %tmp3 to i32 495 %tmp5 = add i32 %tmp4, %tmp4 496 ret i32 %tmp5 497} 498 499define i32 @smovw4h(<4 x i16> %tmp1) { 500; CHECK-LABEL: smovw4h: 501; CHECK: @ %bb.0: 502; CHECK-NEXT: vmov.s16 r0, d0[2] 503; CHECK-NEXT: add r0, r0, r0 504; CHECK-NEXT: bx lr 505 %tmp3 = extractelement <4 x i16> %tmp1, i32 2 506 %tmp4 = sext i16 %tmp3 to i32 507 %tmp5 = add i32 %tmp4, %tmp4 508 ret i32 %tmp5 509} 510 511define i32 @smovx8b(<8 x i8> %tmp1) { 512; CHECK-LABEL: smovx8b: 513; CHECK: @ %bb.0: 514; CHECK-NEXT: vmov.s8 r0, d0[6] 515; CHECK-NEXT: bx lr 516 %tmp3 = extractelement <8 x i8> %tmp1, i32 6 517 %tmp4 = sext i8 %tmp3 to i32 518 ret i32 %tmp4 519} 520 521define i32 @smovx4h(<4 x i16> %tmp1) { 522; CHECK-LABEL: smovx4h: 523; CHECK: @ %bb.0: 524; CHECK-NEXT: vmov.s16 r0, d0[2] 525; CHECK-NEXT: bx lr 526 %tmp3 = extractelement <4 x i16> %tmp1, i32 2 527 %tmp4 = sext i16 %tmp3 to i32 528 ret i32 %tmp4 529} 530 531define i64 @smovx2s(<2 x i32> %tmp1) { 532; CHECK-LABEL: smovx2s: 533; CHECK: @ %bb.0: 534; CHECK-NEXT: vmov.32 r0, d0[1] 535; CHECK-NEXT: asr r1, r0, #31 536; CHECK-NEXT: bx lr 537 %tmp3 = extractelement <2 x i32> %tmp1, i32 1 538 %tmp4 = sext i32 %tmp3 to i64 539 ret i64 %tmp4 540} 541 542define <8 x i8> @test_vcopy_lane_s8(<8 x i8> %v1, <8 x i8> %v2) { 543; CHECK-LABEL: test_vcopy_lane_s8: 544; CHECK: @ %bb.0: 545; CHECK-NEXT: @ kill: def $d1 killed $d1 killed $q0 def $q0 546; CHECK-NEXT: vldr d16, .LCPI50_0 547; CHECK-NEXT: @ kill: def $d0 killed $d0 killed $q0 def $q0 548; CHECK-NEXT: vtbl.8 d0, {d0, d1}, d16 549; CHECK-NEXT: bx lr 550; CHECK-NEXT: .p2align 3 551; CHECK-NEXT: @ %bb.1: 552; CHECK-NEXT: .LCPI50_0: 553; CHECK-NEXT: .byte 0 @ 0x0 554; CHECK-NEXT: .byte 1 @ 0x1 555; CHECK-NEXT: .byte 2 @ 0x2 556; CHECK-NEXT: .byte 3 @ 0x3 557; CHECK-NEXT: .byte 4 @ 0x4 558; CHECK-NEXT: .byte 11 @ 0xb 559; CHECK-NEXT: .byte 6 @ 0x6 560; CHECK-NEXT: .byte 7 @ 0x7 561 %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 11, i32 6, i32 7> 562 ret <8 x i8> %vset_lane 563} 564 565define <16 x i8> @test_vcopyq_laneq_s8(<16 x i8> %v1, <16 x i8> %v2) { 566; CHECK-LABEL: test_vcopyq_laneq_s8: 567; CHECK: @ %bb.0: 568; CHECK-NEXT: @ kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 569; CHECK-NEXT: vldr d16, .LCPI51_0 570; CHECK-NEXT: @ kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 571; CHECK-NEXT: vtbl.8 d1, {d1, d2}, d16 572; CHECK-NEXT: @ kill: def $q0 killed $q0 killed $q0_q1 573; CHECK-NEXT: bx lr 574; CHECK-NEXT: .p2align 3 575; CHECK-NEXT: @ %bb.1: 576; CHECK-NEXT: .LCPI51_0: 577; CHECK-NEXT: .byte 0 @ 0x0 578; CHECK-NEXT: .byte 1 @ 0x1 579; CHECK-NEXT: .byte 2 @ 0x2 580; CHECK-NEXT: .byte 3 @ 0x3 581; CHECK-NEXT: .byte 4 @ 0x4 582; CHECK-NEXT: .byte 5 @ 0x5 583; CHECK-NEXT: .byte 14 @ 0xe 584; CHECK-NEXT: .byte 7 @ 0x7 585 %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 22, i32 15> 586 ret <16 x i8> %vset_lane 587} 588 589define <8 x i8> @test_vcopy_lane_swap_s8(<8 x i8> %v1, <8 x i8> %v2) { 590; CHECK-LABEL: test_vcopy_lane_swap_s8: 591; CHECK: @ %bb.0: 592; CHECK-NEXT: @ kill: def $d1 killed $d1 killed $q0 def $q0 593; CHECK-NEXT: vldr d16, .LCPI52_0 594; CHECK-NEXT: @ kill: def $d0 killed $d0 killed $q0 def $q0 595; CHECK-NEXT: vtbl.8 d0, {d0, d1}, d16 596; CHECK-NEXT: bx lr 597; CHECK-NEXT: .p2align 3 598; CHECK-NEXT: @ %bb.1: 599; CHECK-NEXT: .LCPI52_0: 600; CHECK-NEXT: .byte 8 @ 0x8 601; CHECK-NEXT: .byte 9 @ 0x9 602; CHECK-NEXT: .byte 10 @ 0xa 603; CHECK-NEXT: .byte 11 @ 0xb 604; CHECK-NEXT: .byte 12 @ 0xc 605; CHECK-NEXT: .byte 13 @ 0xd 606; CHECK-NEXT: .byte 14 @ 0xe 607; CHECK-NEXT: .byte 0 @ 0x0 608 %vset_lane = shufflevector <8 x i8> %v1, <8 x i8> %v2, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 0> 609 ret <8 x i8> %vset_lane 610} 611 612define <16 x i8> @test_vcopyq_laneq_swap_s8(<16 x i8> %v1, <16 x i8> %v2) { 613; CHECK-LABEL: test_vcopyq_laneq_swap_s8: 614; CHECK: @ %bb.0: 615; CHECK-NEXT: @ kill: def $q1 killed $q1 killed $q0_q1 def $q0_q1 616; CHECK-NEXT: vldr d16, .LCPI53_0 617; CHECK-NEXT: @ kill: def $q0 killed $q0 killed $q0_q1 def $q0_q1 618; CHECK-NEXT: vtbl.8 d2, {d1, d2}, d16 619; CHECK-NEXT: vorr q0, q1, q1 620; CHECK-NEXT: bx lr 621; CHECK-NEXT: .p2align 3 622; CHECK-NEXT: @ %bb.1: 623; CHECK-NEXT: .LCPI53_0: 624; CHECK-NEXT: .byte 7 @ 0x7 625; CHECK-NEXT: .byte 9 @ 0x9 626; CHECK-NEXT: .byte 10 @ 0xa 627; CHECK-NEXT: .byte 11 @ 0xb 628; CHECK-NEXT: .byte 12 @ 0xc 629; CHECK-NEXT: .byte 13 @ 0xd 630; CHECK-NEXT: .byte 14 @ 0xe 631; CHECK-NEXT: .byte 15 @ 0xf 632 %vset_lane = shufflevector <16 x i8> %v1, <16 x i8> %v2, <16 x i32> <i32 15, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 633 ret <16 x i8> %vset_lane 634} 635 636define <8 x i8> @test_vdup_n_u8(i8 %v1) #0 { 637; CHECK-LABEL: test_vdup_n_u8: 638; CHECK: @ %bb.0: 639; CHECK-NEXT: vdup.8 d0, r0 640; CHECK-NEXT: bx lr 641 %vecinit.i = insertelement <8 x i8> undef, i8 %v1, i32 0 642 %vecinit1.i = insertelement <8 x i8> %vecinit.i, i8 %v1, i32 1 643 %vecinit2.i = insertelement <8 x i8> %vecinit1.i, i8 %v1, i32 2 644 %vecinit3.i = insertelement <8 x i8> %vecinit2.i, i8 %v1, i32 3 645 %vecinit4.i = insertelement <8 x i8> %vecinit3.i, i8 %v1, i32 4 646 %vecinit5.i = insertelement <8 x i8> %vecinit4.i, i8 %v1, i32 5 647 %vecinit6.i = insertelement <8 x i8> %vecinit5.i, i8 %v1, i32 6 648 %vecinit7.i = insertelement <8 x i8> %vecinit6.i, i8 %v1, i32 7 649 ret <8 x i8> %vecinit7.i 650} 651 652define <4 x i16> @test_vdup_n_u16(i16 %v1) #0 { 653; CHECK-LABEL: test_vdup_n_u16: 654; CHECK: @ %bb.0: 655; CHECK-NEXT: vdup.16 d0, r0 656; CHECK-NEXT: bx lr 657 %vecinit.i = insertelement <4 x i16> undef, i16 %v1, i32 0 658 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %v1, i32 1 659 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %v1, i32 2 660 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %v1, i32 3 661 ret <4 x i16> %vecinit3.i 662} 663 664define <2 x i32> @test_vdup_n_u32(i32 %v1) #0 { 665; CHECK-LABEL: test_vdup_n_u32: 666; CHECK: @ %bb.0: 667; CHECK-NEXT: vdup.32 d0, r0 668; CHECK-NEXT: bx lr 669 %vecinit.i = insertelement <2 x i32> undef, i32 %v1, i32 0 670 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %v1, i32 1 671 ret <2 x i32> %vecinit1.i 672} 673 674define <1 x i64> @test_vdup_n_u64(i64 %v1) #0 { 675; CHECK-LABEL: test_vdup_n_u64: 676; CHECK: @ %bb.0: 677; CHECK-NEXT: vmov.32 d0[0], r0 678; CHECK-NEXT: vmov.32 d0[1], r1 679; CHECK-NEXT: bx lr 680 %vecinit.i = insertelement <1 x i64> undef, i64 %v1, i32 0 681 ret <1 x i64> %vecinit.i 682} 683 684define <16 x i8> @test_vdupq_n_u8(i8 %v1) #0 { 685; CHECK-LABEL: test_vdupq_n_u8: 686; CHECK: @ %bb.0: 687; CHECK-NEXT: vdup.8 q0, r0 688; CHECK-NEXT: bx lr 689 %vecinit.i = insertelement <16 x i8> undef, i8 %v1, i32 0 690 %vecinit1.i = insertelement <16 x i8> %vecinit.i, i8 %v1, i32 1 691 %vecinit2.i = insertelement <16 x i8> %vecinit1.i, i8 %v1, i32 2 692 %vecinit3.i = insertelement <16 x i8> %vecinit2.i, i8 %v1, i32 3 693 %vecinit4.i = insertelement <16 x i8> %vecinit3.i, i8 %v1, i32 4 694 %vecinit5.i = insertelement <16 x i8> %vecinit4.i, i8 %v1, i32 5 695 %vecinit6.i = insertelement <16 x i8> %vecinit5.i, i8 %v1, i32 6 696 %vecinit7.i = insertelement <16 x i8> %vecinit6.i, i8 %v1, i32 7 697 %vecinit8.i = insertelement <16 x i8> %vecinit7.i, i8 %v1, i32 8 698 %vecinit9.i = insertelement <16 x i8> %vecinit8.i, i8 %v1, i32 9 699 %vecinit10.i = insertelement <16 x i8> %vecinit9.i, i8 %v1, i32 10 700 %vecinit11.i = insertelement <16 x i8> %vecinit10.i, i8 %v1, i32 11 701 %vecinit12.i = insertelement <16 x i8> %vecinit11.i, i8 %v1, i32 12 702 %vecinit13.i = insertelement <16 x i8> %vecinit12.i, i8 %v1, i32 13 703 %vecinit14.i = insertelement <16 x i8> %vecinit13.i, i8 %v1, i32 14 704 %vecinit15.i = insertelement <16 x i8> %vecinit14.i, i8 %v1, i32 15 705 ret <16 x i8> %vecinit15.i 706} 707 708define <8 x i16> @test_vdupq_n_u16(i16 %v1) #0 { 709; CHECK-LABEL: test_vdupq_n_u16: 710; CHECK: @ %bb.0: 711; CHECK-NEXT: vdup.16 q0, r0 712; CHECK-NEXT: bx lr 713 %vecinit.i = insertelement <8 x i16> undef, i16 %v1, i32 0 714 %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %v1, i32 1 715 %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %v1, i32 2 716 %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %v1, i32 3 717 %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %v1, i32 4 718 %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %v1, i32 5 719 %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %v1, i32 6 720 %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %v1, i32 7 721 ret <8 x i16> %vecinit7.i 722} 723 724define <4 x i32> @test_vdupq_n_u32(i32 %v1) #0 { 725; CHECK-LABEL: test_vdupq_n_u32: 726; CHECK: @ %bb.0: 727; CHECK-NEXT: vdup.32 q0, r0 728; CHECK-NEXT: bx lr 729 %vecinit.i = insertelement <4 x i32> undef, i32 %v1, i32 0 730 %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %v1, i32 1 731 %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %v1, i32 2 732 %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %v1, i32 3 733 ret <4 x i32> %vecinit3.i 734} 735 736define <2 x i64> @test_vdupq_n_u64(i64 %v1) #0 { 737; CHECK-LABEL: test_vdupq_n_u64: 738; CHECK: @ %bb.0: 739; CHECK-NEXT: vmov.32 d0[0], r0 740; CHECK-NEXT: vmov.32 d0[1], r1 741; CHECK-NEXT: vorr d1, d0, d0 742; CHECK-NEXT: bx lr 743 %vecinit.i = insertelement <2 x i64> undef, i64 %v1, i32 0 744 %vecinit1.i = insertelement <2 x i64> %vecinit.i, i64 %v1, i32 1 745 ret <2 x i64> %vecinit1.i 746} 747 748define <8 x i8> @test_vdup_lane_s8(<8 x i8> %v1) #0 { 749; CHECK-LABEL: test_vdup_lane_s8: 750; CHECK: @ %bb.0: 751; CHECK-NEXT: vdup.8 d0, d0[5] 752; CHECK-NEXT: bx lr 753 %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 754 ret <8 x i8> %shuffle 755} 756 757define <4 x i16> @test_vdup_lane_s16(<4 x i16> %v1) #0 { 758; CHECK-LABEL: test_vdup_lane_s16: 759; CHECK: @ %bb.0: 760; CHECK-NEXT: vdup.16 d0, d0[2] 761; CHECK-NEXT: bx lr 762 %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2> 763 ret <4 x i16> %shuffle 764} 765 766define <2 x i32> @test_vdup_lane_s32(<2 x i32> %v1) #0 { 767; CHECK-LABEL: test_vdup_lane_s32: 768; CHECK: @ %bb.0: 769; CHECK-NEXT: vdup.32 d0, d0[1] 770; CHECK-NEXT: bx lr 771 %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <2 x i32> <i32 1, i32 1> 772 ret <2 x i32> %shuffle 773} 774 775define <16 x i8> @test_vdupq_lane_s8(<8 x i8> %v1) #0 { 776; CHECK-LABEL: test_vdupq_lane_s8: 777; CHECK: @ %bb.0: 778; CHECK-NEXT: @ kill: def $d0 killed $d0 def $q0 779; CHECK-NEXT: vdup.8 q0, d0[5] 780; CHECK-NEXT: bx lr 781 %shuffle = shufflevector <8 x i8> %v1, <8 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 782 ret <16 x i8> %shuffle 783} 784 785define <8 x i16> @test_vdupq_lane_s16(<4 x i16> %v1) #0 { 786; CHECK-LABEL: test_vdupq_lane_s16: 787; CHECK: @ %bb.0: 788; CHECK-NEXT: @ kill: def $d0 killed $d0 def $q0 789; CHECK-NEXT: vdup.16 q0, d0[2] 790; CHECK-NEXT: bx lr 791 %shuffle = shufflevector <4 x i16> %v1, <4 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 792 ret <8 x i16> %shuffle 793} 794 795define <4 x i32> @test_vdupq_lane_s32(<2 x i32> %v1) #0 { 796; CHECK-LABEL: test_vdupq_lane_s32: 797; CHECK: @ %bb.0: 798; CHECK-NEXT: @ kill: def $d0 killed $d0 def $q0 799; CHECK-NEXT: vdup.32 q0, d0[1] 800; CHECK-NEXT: bx lr 801 %shuffle = shufflevector <2 x i32> %v1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 802 ret <4 x i32> %shuffle 803} 804 805define <2 x i64> @test_vdupq_lane_s64(<1 x i64> %v1) #0 { 806; CHECK-LABEL: test_vdupq_lane_s64: 807; CHECK: @ %bb.0: 808; CHECK-NEXT: @ kill: def $d0 killed $d0 def $q0 809; CHECK-NEXT: vmov.f64 d1, d0 810; CHECK-NEXT: bx lr 811 %shuffle = shufflevector <1 x i64> %v1, <1 x i64> undef, <2 x i32> zeroinitializer 812 ret <2 x i64> %shuffle 813} 814 815define <8 x i8> @test_vdup_laneq_s8(<16 x i8> %v1) #0 { 816; CHECK-LABEL: test_vdup_laneq_s8: 817; CHECK: @ %bb.0: 818; CHECK-NEXT: vdup.8 d0, d0[5] 819; CHECK-NEXT: bx lr 820 %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 821 ret <8 x i8> %shuffle 822} 823 824define <4 x i16> @test_vdup_laneq_s16(<8 x i16> %v1) #0 { 825; CHECK-LABEL: test_vdup_laneq_s16: 826; CHECK: @ %bb.0: 827; CHECK-NEXT: vdup.16 d0, d0[2] 828; CHECK-NEXT: bx lr 829 %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 2> 830 ret <4 x i16> %shuffle 831} 832 833define <2 x i32> @test_vdup_laneq_s32(<4 x i32> %v1) #0 { 834; CHECK-LABEL: test_vdup_laneq_s32: 835; CHECK: @ %bb.0: 836; CHECK-NEXT: vdup.32 d0, d0[1] 837; CHECK-NEXT: bx lr 838 %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <2 x i32> <i32 1, i32 1> 839 ret <2 x i32> %shuffle 840} 841 842define <16 x i8> @test_vdupq_laneq_s8(<16 x i8> %v1) #0 { 843; CHECK-LABEL: test_vdupq_laneq_s8: 844; CHECK: @ %bb.0: 845; CHECK-NEXT: vdup.8 q0, d0[5] 846; CHECK-NEXT: bx lr 847 %shuffle = shufflevector <16 x i8> %v1, <16 x i8> undef, <16 x i32> <i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5, i32 5> 848 ret <16 x i8> %shuffle 849} 850 851define <8 x i16> @test_vdupq_laneq_s16(<8 x i16> %v1) #0 { 852; CHECK-LABEL: test_vdupq_laneq_s16: 853; CHECK: @ %bb.0: 854; CHECK-NEXT: vdup.16 q0, d0[2] 855; CHECK-NEXT: bx lr 856 %shuffle = shufflevector <8 x i16> %v1, <8 x i16> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2> 857 ret <8 x i16> %shuffle 858} 859 860define <4 x i32> @test_vdupq_laneq_s32(<4 x i32> %v1) #0 { 861; CHECK-LABEL: test_vdupq_laneq_s32: 862; CHECK: @ %bb.0: 863; CHECK-NEXT: vdup.32 q0, d0[1] 864; CHECK-NEXT: bx lr 865 %shuffle = shufflevector <4 x i32> %v1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 1, i32 1> 866 ret <4 x i32> %shuffle 867} 868 869define <2 x i64> @test_vdupq_laneq_s64(<2 x i64> %v1) #0 { 870; CHECK-LABEL: test_vdupq_laneq_s64: 871; CHECK: @ %bb.0: 872; CHECK-NEXT: vmov.f64 d1, d0 873; CHECK-NEXT: bx lr 874 %shuffle = shufflevector <2 x i64> %v1, <2 x i64> undef, <2 x i32> zeroinitializer 875 ret <2 x i64> %shuffle 876} 877 878define i64 @test_bitcastv8i8toi64(<8 x i8> %in) { 879; CHECK-LABEL: test_bitcastv8i8toi64: 880; CHECK: @ %bb.0: 881; CHECK-NEXT: vmov r0, r1, d0 882; CHECK-NEXT: bx lr 883 %res = bitcast <8 x i8> %in to i64 884 ret i64 %res 885} 886 887define i64 @test_bitcastv4i16toi64(<4 x i16> %in) { 888; CHECK-LABEL: test_bitcastv4i16toi64: 889; CHECK: @ %bb.0: 890; CHECK-NEXT: vmov r0, r1, d0 891; CHECK-NEXT: bx lr 892 %res = bitcast <4 x i16> %in to i64 893 ret i64 %res 894} 895 896define i64 @test_bitcastv2i32toi64(<2 x i32> %in) { 897; CHECK-LABEL: test_bitcastv2i32toi64: 898; CHECK: @ %bb.0: 899; CHECK-NEXT: vmov r0, r1, d0 900; CHECK-NEXT: bx lr 901 %res = bitcast <2 x i32> %in to i64 902 ret i64 %res 903} 904 905define i64 @test_bitcastv2f32toi64(<2 x float> %in) { 906; CHECK-LABEL: test_bitcastv2f32toi64: 907; CHECK: @ %bb.0: 908; CHECK-NEXT: vmov r0, r1, d0 909; CHECK-NEXT: bx lr 910 %res = bitcast <2 x float> %in to i64 911 ret i64 %res 912} 913 914define i64 @test_bitcastv1i64toi64(<1 x i64> %in) { 915; CHECK-LABEL: test_bitcastv1i64toi64: 916; CHECK: @ %bb.0: 917; CHECK-NEXT: vmov r0, r1, d0 918; CHECK-NEXT: bx lr 919 %res = bitcast <1 x i64> %in to i64 920 ret i64 %res 921} 922 923define i64 @test_bitcastv1f64toi64(<1 x double> %in) { 924; CHECK-LABEL: test_bitcastv1f64toi64: 925; CHECK: @ %bb.0: 926; CHECK-NEXT: vmov r0, r1, d0 927; CHECK-NEXT: bx lr 928 %res = bitcast <1 x double> %in to i64 929 ret i64 %res 930} 931 932define <8 x i8> @test_bitcasti64tov8i8(i64 %in) { 933; CHECK-LABEL: test_bitcasti64tov8i8: 934; CHECK: @ %bb.0: 935; CHECK-NEXT: vmov d0, r0, r1 936; CHECK-NEXT: bx lr 937 %res = bitcast i64 %in to <8 x i8> 938 ret <8 x i8> %res 939} 940 941define <4 x i16> @test_bitcasti64tov4i16(i64 %in) { 942; CHECK-LABEL: test_bitcasti64tov4i16: 943; CHECK: @ %bb.0: 944; CHECK-NEXT: vmov d0, r0, r1 945; CHECK-NEXT: bx lr 946 %res = bitcast i64 %in to <4 x i16> 947 ret <4 x i16> %res 948} 949 950define <2 x i32> @test_bitcasti64tov2i32(i64 %in) { 951; CHECK-LABEL: test_bitcasti64tov2i32: 952; CHECK: @ %bb.0: 953; CHECK-NEXT: vmov d0, r0, r1 954; CHECK-NEXT: bx lr 955 %res = bitcast i64 %in to <2 x i32> 956 ret <2 x i32> %res 957} 958 959define <2 x float> @test_bitcasti64tov2f32(i64 %in) { 960; CHECK-LABEL: test_bitcasti64tov2f32: 961; CHECK: @ %bb.0: 962; CHECK-NEXT: vmov d0, r0, r1 963; CHECK-NEXT: bx lr 964 %res = bitcast i64 %in to <2 x float> 965 ret <2 x float> %res 966} 967 968define <1 x i64> @test_bitcasti64tov1i64(i64 %in) { 969; CHECK-LABEL: test_bitcasti64tov1i64: 970; CHECK: @ %bb.0: 971; CHECK-NEXT: vmov d0, r0, r1 972; CHECK-NEXT: bx lr 973 %res = bitcast i64 %in to <1 x i64> 974 ret <1 x i64> %res 975} 976 977define <1 x double> @test_bitcasti64tov1f64(i64 %in) { 978; CHECK-LABEL: test_bitcasti64tov1f64: 979; CHECK: @ %bb.0: 980; CHECK-NEXT: vmov d0, r0, r1 981; CHECK-NEXT: bx lr 982 %res = bitcast i64 %in to <1 x double> 983 ret <1 x double> %res 984} 985 986define <1 x i64> @test_bitcastv8i8tov1f64(<8 x i8> %a) #0 { 987; CHECK-LABEL: test_bitcastv8i8tov1f64: 988; CHECK: @ %bb.0: 989; CHECK-NEXT: .save {r11, lr} 990; CHECK-NEXT: push {r11, lr} 991; CHECK-NEXT: vneg.s8 d16, d0 992; CHECK-NEXT: vmov r0, r1, d16 993; CHECK-NEXT: bl __aeabi_d2lz 994; CHECK-NEXT: vmov.32 d0[0], r0 995; CHECK-NEXT: vmov.32 d0[1], r1 996; CHECK-NEXT: pop {r11, pc} 997 %sub.i = sub <8 x i8> zeroinitializer, %a 998 %1 = bitcast <8 x i8> %sub.i to <1 x double> 999 %vcvt.i = fptosi <1 x double> %1 to <1 x i64> 1000 ret <1 x i64> %vcvt.i 1001} 1002 1003define <1 x i64> @test_bitcastv4i16tov1f64(<4 x i16> %a) #0 { 1004; CHECK-LABEL: test_bitcastv4i16tov1f64: 1005; CHECK: @ %bb.0: 1006; CHECK-NEXT: .save {r11, lr} 1007; CHECK-NEXT: push {r11, lr} 1008; CHECK-NEXT: vneg.s16 d16, d0 1009; CHECK-NEXT: vmov r0, r1, d16 1010; CHECK-NEXT: bl __aeabi_d2lz 1011; CHECK-NEXT: vmov.32 d0[0], r0 1012; CHECK-NEXT: vmov.32 d0[1], r1 1013; CHECK-NEXT: pop {r11, pc} 1014 %sub.i = sub <4 x i16> zeroinitializer, %a 1015 %1 = bitcast <4 x i16> %sub.i to <1 x double> 1016 %vcvt.i = fptosi <1 x double> %1 to <1 x i64> 1017 ret <1 x i64> %vcvt.i 1018} 1019 1020define <1 x i64> @test_bitcastv2i32tov1f64(<2 x i32> %a) #0 { 1021; CHECK-LABEL: test_bitcastv2i32tov1f64: 1022; CHECK: @ %bb.0: 1023; CHECK-NEXT: .save {r11, lr} 1024; CHECK-NEXT: push {r11, lr} 1025; CHECK-NEXT: vneg.s32 d16, d0 1026; CHECK-NEXT: vmov r0, r1, d16 1027; CHECK-NEXT: bl __aeabi_d2lz 1028; CHECK-NEXT: vmov.32 d0[0], r0 1029; CHECK-NEXT: vmov.32 d0[1], r1 1030; CHECK-NEXT: pop {r11, pc} 1031 %sub.i = sub <2 x i32> zeroinitializer, %a 1032 %1 = bitcast <2 x i32> %sub.i to <1 x double> 1033 %vcvt.i = fptosi <1 x double> %1 to <1 x i64> 1034 ret <1 x i64> %vcvt.i 1035} 1036 1037define <1 x i64> @test_bitcastv1i64tov1f64(<1 x i64> %a) #0 { 1038; CHECK-LABEL: test_bitcastv1i64tov1f64: 1039; CHECK: @ %bb.0: 1040; CHECK-NEXT: .save {r11, lr} 1041; CHECK-NEXT: push {r11, lr} 1042; CHECK-NEXT: vmov.i32 d16, #0x0 1043; CHECK-NEXT: vsub.i64 d16, d16, d0 1044; CHECK-NEXT: vmov r0, r1, d16 1045; CHECK-NEXT: bl __aeabi_d2lz 1046; CHECK-NEXT: vmov.32 d0[0], r0 1047; CHECK-NEXT: vmov.32 d0[1], r1 1048; CHECK-NEXT: pop {r11, pc} 1049 %sub.i = sub <1 x i64> zeroinitializer, %a 1050 %1 = bitcast <1 x i64> %sub.i to <1 x double> 1051 %vcvt.i = fptosi <1 x double> %1 to <1 x i64> 1052 ret <1 x i64> %vcvt.i 1053} 1054 1055define <1 x i64> @test_bitcastv2f32tov1f64(<2 x float> %a) #0 { 1056; CHECK-LABEL: test_bitcastv2f32tov1f64: 1057; CHECK: @ %bb.0: 1058; CHECK-NEXT: .save {r11, lr} 1059; CHECK-NEXT: push {r11, lr} 1060; CHECK-NEXT: vneg.f32 d16, d0 1061; CHECK-NEXT: vmov r0, r1, d16 1062; CHECK-NEXT: bl __aeabi_d2lz 1063; CHECK-NEXT: vmov.32 d0[0], r0 1064; CHECK-NEXT: vmov.32 d0[1], r1 1065; CHECK-NEXT: pop {r11, pc} 1066 %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %a 1067 %1 = bitcast <2 x float> %sub.i to <1 x double> 1068 %vcvt.i = fptosi <1 x double> %1 to <1 x i64> 1069 ret <1 x i64> %vcvt.i 1070} 1071 1072define <8 x i8> @test_bitcastv1f64tov8i8(<1 x i64> %a) #0 { 1073; CHECK-LABEL: test_bitcastv1f64tov8i8: 1074; CHECK: @ %bb.0: 1075; CHECK-NEXT: .save {r11, lr} 1076; CHECK-NEXT: push {r11, lr} 1077; CHECK-NEXT: vmov.32 r0, d0[0] 1078; CHECK-NEXT: vmov.32 r1, d0[1] 1079; CHECK-NEXT: bl __aeabi_l2d 1080; CHECK-NEXT: vmov d16, r0, r1 1081; CHECK-NEXT: vneg.s8 d0, d16 1082; CHECK-NEXT: pop {r11, pc} 1083 %vcvt.i = sitofp <1 x i64> %a to <1 x double> 1084 %1 = bitcast <1 x double> %vcvt.i to <8 x i8> 1085 %sub.i = sub <8 x i8> zeroinitializer, %1 1086 ret <8 x i8> %sub.i 1087} 1088 1089define <4 x i16> @test_bitcastv1f64tov4i16(<1 x i64> %a) #0 { 1090; CHECK-LABEL: test_bitcastv1f64tov4i16: 1091; CHECK: @ %bb.0: 1092; CHECK-NEXT: .save {r11, lr} 1093; CHECK-NEXT: push {r11, lr} 1094; CHECK-NEXT: vmov.32 r0, d0[0] 1095; CHECK-NEXT: vmov.32 r1, d0[1] 1096; CHECK-NEXT: bl __aeabi_l2d 1097; CHECK-NEXT: vmov d16, r0, r1 1098; CHECK-NEXT: vneg.s16 d0, d16 1099; CHECK-NEXT: pop {r11, pc} 1100 %vcvt.i = sitofp <1 x i64> %a to <1 x double> 1101 %1 = bitcast <1 x double> %vcvt.i to <4 x i16> 1102 %sub.i = sub <4 x i16> zeroinitializer, %1 1103 ret <4 x i16> %sub.i 1104} 1105 1106define <2 x i32> @test_bitcastv1f64tov2i32(<1 x i64> %a) #0 { 1107; CHECK-LABEL: test_bitcastv1f64tov2i32: 1108; CHECK: @ %bb.0: 1109; CHECK-NEXT: .save {r11, lr} 1110; CHECK-NEXT: push {r11, lr} 1111; CHECK-NEXT: vmov.32 r0, d0[0] 1112; CHECK-NEXT: vmov.32 r1, d0[1] 1113; CHECK-NEXT: bl __aeabi_l2d 1114; CHECK-NEXT: vmov d16, r0, r1 1115; CHECK-NEXT: vneg.s32 d0, d16 1116; CHECK-NEXT: pop {r11, pc} 1117 %vcvt.i = sitofp <1 x i64> %a to <1 x double> 1118 %1 = bitcast <1 x double> %vcvt.i to <2 x i32> 1119 %sub.i = sub <2 x i32> zeroinitializer, %1 1120 ret <2 x i32> %sub.i 1121} 1122 1123define <1 x i64> @test_bitcastv1f64tov1i64(<1 x i64> %a) #0 { 1124; CHECK-LABEL: test_bitcastv1f64tov1i64: 1125; CHECK: @ %bb.0: 1126; CHECK-NEXT: .save {r11, lr} 1127; CHECK-NEXT: push {r11, lr} 1128; CHECK-NEXT: vmov.32 r0, d0[0] 1129; CHECK-NEXT: vmov.32 r1, d0[1] 1130; CHECK-NEXT: bl __aeabi_l2d 1131; CHECK-NEXT: vmov.i32 d16, #0x0 1132; CHECK-NEXT: vmov d17, r0, r1 1133; CHECK-NEXT: vsub.i64 d0, d16, d17 1134; CHECK-NEXT: pop {r11, pc} 1135 %vcvt.i = sitofp <1 x i64> %a to <1 x double> 1136 %1 = bitcast <1 x double> %vcvt.i to <1 x i64> 1137 %sub.i = sub <1 x i64> zeroinitializer, %1 1138 ret <1 x i64> %sub.i 1139} 1140 1141define <2 x float> @test_bitcastv1f64tov2f32(<1 x i64> %a) #0 { 1142; CHECK-LABEL: test_bitcastv1f64tov2f32: 1143; CHECK: @ %bb.0: 1144; CHECK-NEXT: .save {r11, lr} 1145; CHECK-NEXT: push {r11, lr} 1146; CHECK-NEXT: vmov.32 r0, d0[0] 1147; CHECK-NEXT: vmov.32 r1, d0[1] 1148; CHECK-NEXT: bl __aeabi_l2d 1149; CHECK-NEXT: vmov d16, r0, r1 1150; CHECK-NEXT: vneg.f32 d0, d16 1151; CHECK-NEXT: pop {r11, pc} 1152 %vcvt.i = sitofp <1 x i64> %a to <1 x double> 1153 %1 = bitcast <1 x double> %vcvt.i to <2 x float> 1154 %sub.i = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %1 1155 ret <2 x float> %sub.i 1156} 1157 1158; Test insert element into an undef vector 1159define <8 x i8> @scalar_to_vector_v8i8(i8 %a) { 1160; CHECK-LABEL: scalar_to_vector_v8i8: 1161; CHECK: @ %bb.0: 1162; CHECK-NEXT: vmov.8 d0[0], r0 1163; CHECK-NEXT: bx lr 1164 %b = insertelement <8 x i8> undef, i8 %a, i32 0 1165 ret <8 x i8> %b 1166} 1167 1168define <16 x i8> @scalar_to_vector_v16i8(i8 %a) { 1169; CHECK-LABEL: scalar_to_vector_v16i8: 1170; CHECK: @ %bb.0: 1171; CHECK-NEXT: vmov.8 d0[0], r0 1172; CHECK-NEXT: bx lr 1173 %b = insertelement <16 x i8> undef, i8 %a, i32 0 1174 ret <16 x i8> %b 1175} 1176 1177define <4 x i16> @scalar_to_vector_v4i16(i16 %a) { 1178; CHECK-LABEL: scalar_to_vector_v4i16: 1179; CHECK: @ %bb.0: 1180; CHECK-NEXT: vmov.16 d0[0], r0 1181; CHECK-NEXT: bx lr 1182 %b = insertelement <4 x i16> undef, i16 %a, i32 0 1183 ret <4 x i16> %b 1184} 1185 1186define <8 x i16> @scalar_to_vector_v8i16(i16 %a) { 1187; CHECK-LABEL: scalar_to_vector_v8i16: 1188; CHECK: @ %bb.0: 1189; CHECK-NEXT: vmov.16 d0[0], r0 1190; CHECK-NEXT: bx lr 1191 %b = insertelement <8 x i16> undef, i16 %a, i32 0 1192 ret <8 x i16> %b 1193} 1194 1195define <2 x i32> @scalar_to_vector_v2i32(i32 %a) { 1196; CHECK-LABEL: scalar_to_vector_v2i32: 1197; CHECK: @ %bb.0: 1198; CHECK-NEXT: vmov.32 d0[0], r0 1199; CHECK-NEXT: bx lr 1200 %b = insertelement <2 x i32> undef, i32 %a, i32 0 1201 ret <2 x i32> %b 1202} 1203 1204define <4 x i32> @scalar_to_vector_v4i32(i32 %a) { 1205; CHECK-LABEL: scalar_to_vector_v4i32: 1206; CHECK: @ %bb.0: 1207; CHECK-NEXT: vmov.32 d0[0], r0 1208; CHECK-NEXT: bx lr 1209 %b = insertelement <4 x i32> undef, i32 %a, i32 0 1210 ret <4 x i32> %b 1211} 1212 1213define <2 x i64> @scalar_to_vector_v2i64(i64 %a) { 1214; CHECK-LABEL: scalar_to_vector_v2i64: 1215; CHECK: @ %bb.0: 1216; CHECK-NEXT: vmov.32 d0[0], r0 1217; CHECK-NEXT: vmov.32 d0[1], r1 1218; CHECK-NEXT: bx lr 1219 %b = insertelement <2 x i64> undef, i64 %a, i32 0 1220 ret <2 x i64> %b 1221} 1222 1223define <8 x i8> @testDUPv1i8(<1 x i8> %a) { 1224; CHECK-LABEL: testDUPv1i8: 1225; CHECK: @ %bb.0: 1226; CHECK-NEXT: vdup.8 d0, r0 1227; CHECK-NEXT: bx lr 1228 %b = extractelement <1 x i8> %a, i32 0 1229 %c = insertelement <8 x i8> undef, i8 %b, i32 0 1230 %d = insertelement <8 x i8> %c, i8 %b, i32 1 1231 %e = insertelement <8 x i8> %d, i8 %b, i32 2 1232 %f = insertelement <8 x i8> %e, i8 %b, i32 3 1233 %g = insertelement <8 x i8> %f, i8 %b, i32 4 1234 %h = insertelement <8 x i8> %g, i8 %b, i32 5 1235 %i = insertelement <8 x i8> %h, i8 %b, i32 6 1236 %j = insertelement <8 x i8> %i, i8 %b, i32 7 1237 ret <8 x i8> %j 1238} 1239 1240define <8 x i16> @testDUPv1i16(<1 x i16> %a) { 1241; CHECK-LABEL: testDUPv1i16: 1242; CHECK: @ %bb.0: 1243; CHECK-NEXT: vdup.16 q0, r0 1244; CHECK-NEXT: bx lr 1245 %b = extractelement <1 x i16> %a, i32 0 1246 %c = insertelement <8 x i16> undef, i16 %b, i32 0 1247 %d = insertelement <8 x i16> %c, i16 %b, i32 1 1248 %e = insertelement <8 x i16> %d, i16 %b, i32 2 1249 %f = insertelement <8 x i16> %e, i16 %b, i32 3 1250 %g = insertelement <8 x i16> %f, i16 %b, i32 4 1251 %h = insertelement <8 x i16> %g, i16 %b, i32 5 1252 %i = insertelement <8 x i16> %h, i16 %b, i32 6 1253 %j = insertelement <8 x i16> %i, i16 %b, i32 7 1254 ret <8 x i16> %j 1255} 1256 1257define <4 x i32> @testDUPv1i32(<1 x i32> %a) { 1258; CHECK-LABEL: testDUPv1i32: 1259; CHECK: @ %bb.0: 1260; CHECK-NEXT: vdup.32 q0, r0 1261; CHECK-NEXT: bx lr 1262 %b = extractelement <1 x i32> %a, i32 0 1263 %c = insertelement <4 x i32> undef, i32 %b, i32 0 1264 %d = insertelement <4 x i32> %c, i32 %b, i32 1 1265 %e = insertelement <4 x i32> %d, i32 %b, i32 2 1266 %f = insertelement <4 x i32> %e, i32 %b, i32 3 1267 ret <4 x i32> %f 1268} 1269 1270define <8 x i8> @getl(<16 x i8> %x) #0 { 1271; CHECK-LABEL: getl: 1272; CHECK: @ %bb.0: 1273; CHECK-NEXT: @ kill: def $d0 killed $d0 killed $q0 1274; CHECK-NEXT: bx lr 1275 %vecext = extractelement <16 x i8> %x, i32 0 1276 %vecinit = insertelement <8 x i8> undef, i8 %vecext, i32 0 1277 %vecext1 = extractelement <16 x i8> %x, i32 1 1278 %vecinit2 = insertelement <8 x i8> %vecinit, i8 %vecext1, i32 1 1279 %vecext3 = extractelement <16 x i8> %x, i32 2 1280 %vecinit4 = insertelement <8 x i8> %vecinit2, i8 %vecext3, i32 2 1281 %vecext5 = extractelement <16 x i8> %x, i32 3 1282 %vecinit6 = insertelement <8 x i8> %vecinit4, i8 %vecext5, i32 3 1283 %vecext7 = extractelement <16 x i8> %x, i32 4 1284 %vecinit8 = insertelement <8 x i8> %vecinit6, i8 %vecext7, i32 4 1285 %vecext9 = extractelement <16 x i8> %x, i32 5 1286 %vecinit10 = insertelement <8 x i8> %vecinit8, i8 %vecext9, i32 5 1287 %vecext11 = extractelement <16 x i8> %x, i32 6 1288 %vecinit12 = insertelement <8 x i8> %vecinit10, i8 %vecext11, i32 6 1289 %vecext13 = extractelement <16 x i8> %x, i32 7 1290 %vecinit14 = insertelement <8 x i8> %vecinit12, i8 %vecext13, i32 7 1291 ret <8 x i8> %vecinit14 1292} 1293 1294define <4 x i16> @test_extracts_inserts_varidx_extract(<8 x i16> %x, i32 %idx) { 1295; CHECK-LABEL: test_extracts_inserts_varidx_extract: 1296; CHECK: @ %bb.0: 1297; CHECK-NEXT: .save {r11} 1298; CHECK-NEXT: push {r11} 1299; CHECK-NEXT: .setfp r11, sp 1300; CHECK-NEXT: mov r11, sp 1301; CHECK-NEXT: .pad #28 1302; CHECK-NEXT: sub sp, sp, #28 1303; CHECK-NEXT: bfc sp, #0, #4 1304; CHECK-NEXT: vmov.u16 r1, d0[1] 1305; CHECK-NEXT: and r0, r0, #7 1306; CHECK-NEXT: vmov.u16 r2, d0[2] 1307; CHECK-NEXT: mov r3, sp 1308; CHECK-NEXT: vmov.u16 r12, d0[3] 1309; CHECK-NEXT: lsl r0, r0, #1 1310; CHECK-NEXT: vst1.64 {d0, d1}, [r3:128], r0 1311; CHECK-NEXT: vld1.16 {d0[0]}, [r3:16] 1312; CHECK-NEXT: vmov.16 d0[1], r1 1313; CHECK-NEXT: vmov.16 d0[2], r2 1314; CHECK-NEXT: vmov.16 d0[3], r12 1315; CHECK-NEXT: mov sp, r11 1316; CHECK-NEXT: pop {r11} 1317; CHECK-NEXT: bx lr 1318 %tmp = extractelement <8 x i16> %x, i32 %idx 1319 %tmp2 = insertelement <4 x i16> undef, i16 %tmp, i32 0 1320 %tmp3 = extractelement <8 x i16> %x, i32 1 1321 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 1 1322 %tmp5 = extractelement <8 x i16> %x, i32 2 1323 %tmp6 = insertelement <4 x i16> %tmp4, i16 %tmp5, i32 2 1324 %tmp7 = extractelement <8 x i16> %x, i32 3 1325 %tmp8 = insertelement <4 x i16> %tmp6, i16 %tmp7, i32 3 1326 ret <4 x i16> %tmp8 1327} 1328 1329define <4 x i16> @test_extracts_inserts_varidx_insert(<8 x i16> %x, i32 %idx) { 1330; CHECK-LABEL: test_extracts_inserts_varidx_insert: 1331; CHECK: @ %bb.0: 1332; CHECK-NEXT: .pad #8 1333; CHECK-NEXT: sub sp, sp, #8 1334; CHECK-NEXT: vmov.u16 r1, d0[1] 1335; CHECK-NEXT: and r0, r0, #3 1336; CHECK-NEXT: vmov.u16 r2, d0[2] 1337; CHECK-NEXT: mov r3, sp 1338; CHECK-NEXT: vmov.u16 r12, d0[3] 1339; CHECK-NEXT: orr r0, r3, r0, lsl #1 1340; CHECK-NEXT: vst1.16 {d0[0]}, [r0:16] 1341; CHECK-NEXT: vldr d0, [sp] 1342; CHECK-NEXT: vmov.16 d0[1], r1 1343; CHECK-NEXT: vmov.16 d0[2], r2 1344; CHECK-NEXT: vmov.16 d0[3], r12 1345; CHECK-NEXT: add sp, sp, #8 1346; CHECK-NEXT: bx lr 1347 %tmp = extractelement <8 x i16> %x, i32 0 1348 %tmp2 = insertelement <4 x i16> undef, i16 %tmp, i32 %idx 1349 %tmp3 = extractelement <8 x i16> %x, i32 1 1350 %tmp4 = insertelement <4 x i16> %tmp2, i16 %tmp3, i32 1 1351 %tmp5 = extractelement <8 x i16> %x, i32 2 1352 %tmp6 = insertelement <4 x i16> %tmp4, i16 %tmp5, i32 2 1353 %tmp7 = extractelement <8 x i16> %x, i32 3 1354 %tmp8 = insertelement <4 x i16> %tmp6, i16 %tmp7, i32 3 1355 ret <4 x i16> %tmp8 1356} 1357 1358define <4 x i16> @test_dup_v2i32_v4i16(<2 x i32> %a) { 1359; CHECK-LABEL: test_dup_v2i32_v4i16: 1360; CHECK: @ %bb.0: @ %entry 1361; CHECK-NEXT: vmov.32 r0, d0[1] 1362; CHECK-NEXT: vmov.16 d16[1], r0 1363; CHECK-NEXT: vdup.16 d0, d16[1] 1364; CHECK-NEXT: bx lr 1365entry: 1366 %x = extractelement <2 x i32> %a, i32 1 1367 %vget_lane = trunc i32 %x to i16 1368 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0 1369 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1 1370 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2 1371 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3 1372 ret <4 x i16> %vecinit3.i 1373} 1374 1375define <8 x i16> @test_dup_v4i32_v8i16(<4 x i32> %a) { 1376; CHECK-LABEL: test_dup_v4i32_v8i16: 1377; CHECK: @ %bb.0: @ %entry 1378; CHECK-NEXT: vmov.32 r0, d1[1] 1379; CHECK-NEXT: vmov.16 d16[3], r0 1380; CHECK-NEXT: vdup.16 q0, d16[3] 1381; CHECK-NEXT: bx lr 1382entry: 1383 %x = extractelement <4 x i32> %a, i32 3 1384 %vget_lane = trunc i32 %x to i16 1385 %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0 1386 %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1 1387 %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2 1388 %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3 1389 %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4 1390 %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5 1391 %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6 1392 %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7 1393 ret <8 x i16> %vecinit7.i 1394} 1395 1396define <4 x i16> @test_dup_v1i64_v4i16(<1 x i64> %a) { 1397; CHECK-LABEL: test_dup_v1i64_v4i16: 1398; CHECK: @ %bb.0: @ %entry 1399; CHECK-NEXT: vmov.32 r0, d0[0] 1400; CHECK-NEXT: vmov.16 d16[0], r0 1401; CHECK-NEXT: vdup.16 d0, d16[0] 1402; CHECK-NEXT: bx lr 1403entry: 1404 %x = extractelement <1 x i64> %a, i32 0 1405 %vget_lane = trunc i64 %x to i16 1406 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0 1407 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1 1408 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2 1409 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3 1410 ret <4 x i16> %vecinit3.i 1411} 1412 1413define <2 x i32> @test_dup_v1i64_v2i32(<1 x i64> %a) { 1414; CHECK-LABEL: test_dup_v1i64_v2i32: 1415; CHECK: @ %bb.0: @ %entry 1416; CHECK-NEXT: vdup.32 d0, d0[0] 1417; CHECK-NEXT: bx lr 1418entry: 1419 %x = extractelement <1 x i64> %a, i32 0 1420 %vget_lane = trunc i64 %x to i32 1421 %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0 1422 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1 1423 ret <2 x i32> %vecinit1.i 1424} 1425 1426define <8 x i16> @test_dup_v2i64_v8i16(<2 x i64> %a) { 1427; CHECK-LABEL: test_dup_v2i64_v8i16: 1428; CHECK: @ %bb.0: @ %entry 1429; CHECK-NEXT: vmov.32 r0, d1[0] 1430; CHECK-NEXT: vmov.16 d16[2], r0 1431; CHECK-NEXT: vdup.16 q0, d16[2] 1432; CHECK-NEXT: bx lr 1433entry: 1434 %x = extractelement <2 x i64> %a, i32 1 1435 %vget_lane = trunc i64 %x to i16 1436 %vecinit.i = insertelement <8 x i16> undef, i16 %vget_lane, i32 0 1437 %vecinit1.i = insertelement <8 x i16> %vecinit.i, i16 %vget_lane, i32 1 1438 %vecinit2.i = insertelement <8 x i16> %vecinit1.i, i16 %vget_lane, i32 2 1439 %vecinit3.i = insertelement <8 x i16> %vecinit2.i, i16 %vget_lane, i32 3 1440 %vecinit4.i = insertelement <8 x i16> %vecinit3.i, i16 %vget_lane, i32 4 1441 %vecinit5.i = insertelement <8 x i16> %vecinit4.i, i16 %vget_lane, i32 5 1442 %vecinit6.i = insertelement <8 x i16> %vecinit5.i, i16 %vget_lane, i32 6 1443 %vecinit7.i = insertelement <8 x i16> %vecinit6.i, i16 %vget_lane, i32 7 1444 ret <8 x i16> %vecinit7.i 1445} 1446 1447define <4 x i32> @test_dup_v2i64_v4i32(<2 x i64> %a) { 1448; CHECK-LABEL: test_dup_v2i64_v4i32: 1449; CHECK: @ %bb.0: @ %entry 1450; CHECK-NEXT: vdup.32 q0, d1[0] 1451; CHECK-NEXT: bx lr 1452entry: 1453 %x = extractelement <2 x i64> %a, i32 1 1454 %vget_lane = trunc i64 %x to i32 1455 %vecinit.i = insertelement <4 x i32> undef, i32 %vget_lane, i32 0 1456 %vecinit1.i = insertelement <4 x i32> %vecinit.i, i32 %vget_lane, i32 1 1457 %vecinit2.i = insertelement <4 x i32> %vecinit1.i, i32 %vget_lane, i32 2 1458 %vecinit3.i = insertelement <4 x i32> %vecinit2.i, i32 %vget_lane, i32 3 1459 ret <4 x i32> %vecinit3.i 1460} 1461 1462define <4 x i16> @test_dup_v4i32_v4i16(<4 x i32> %a) { 1463; CHECK-LABEL: test_dup_v4i32_v4i16: 1464; CHECK: @ %bb.0: @ %entry 1465; CHECK-NEXT: vmov.32 r0, d0[1] 1466; CHECK-NEXT: vmov.16 d16[1], r0 1467; CHECK-NEXT: vdup.16 d0, d16[1] 1468; CHECK-NEXT: bx lr 1469entry: 1470 %x = extractelement <4 x i32> %a, i32 1 1471 %vget_lane = trunc i32 %x to i16 1472 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0 1473 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1 1474 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2 1475 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3 1476 ret <4 x i16> %vecinit3.i 1477} 1478 1479define <4 x i16> @test_dup_v2i64_v4i16(<2 x i64> %a) { 1480; CHECK-LABEL: test_dup_v2i64_v4i16: 1481; CHECK: @ %bb.0: @ %entry 1482; CHECK-NEXT: vmov.32 r0, d0[0] 1483; CHECK-NEXT: vmov.16 d16[0], r0 1484; CHECK-NEXT: vdup.16 d0, d16[0] 1485; CHECK-NEXT: bx lr 1486entry: 1487 %x = extractelement <2 x i64> %a, i32 0 1488 %vget_lane = trunc i64 %x to i16 1489 %vecinit.i = insertelement <4 x i16> undef, i16 %vget_lane, i32 0 1490 %vecinit1.i = insertelement <4 x i16> %vecinit.i, i16 %vget_lane, i32 1 1491 %vecinit2.i = insertelement <4 x i16> %vecinit1.i, i16 %vget_lane, i32 2 1492 %vecinit3.i = insertelement <4 x i16> %vecinit2.i, i16 %vget_lane, i32 3 1493 ret <4 x i16> %vecinit3.i 1494} 1495 1496define <2 x i32> @test_dup_v2i64_v2i32(<2 x i64> %a) { 1497; CHECK-LABEL: test_dup_v2i64_v2i32: 1498; CHECK: @ %bb.0: @ %entry 1499; CHECK-NEXT: vdup.32 d0, d0[0] 1500; CHECK-NEXT: bx lr 1501entry: 1502 %x = extractelement <2 x i64> %a, i32 0 1503 %vget_lane = trunc i64 %x to i32 1504 %vecinit.i = insertelement <2 x i32> undef, i32 %vget_lane, i32 0 1505 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %vget_lane, i32 1 1506 ret <2 x i32> %vecinit1.i 1507} 1508 1509define <2 x i32> @test_concat_undef_v1i32(<2 x i32> %a) { 1510; CHECK-LABEL: test_concat_undef_v1i32: 1511; CHECK: @ %bb.0: @ %entry 1512; CHECK-NEXT: vdup.32 d0, d0[0] 1513; CHECK-NEXT: bx lr 1514entry: 1515 %0 = extractelement <2 x i32> %a, i32 0 1516 %vecinit1.i = insertelement <2 x i32> undef, i32 %0, i32 1 1517 ret <2 x i32> %vecinit1.i 1518} 1519 1520define <2 x i32> @test_concat_same_v1i32_v1i32(<2 x i32> %a) { 1521; CHECK-LABEL: test_concat_same_v1i32_v1i32: 1522; CHECK: @ %bb.0: @ %entry 1523; CHECK-NEXT: vdup.32 d0, d0[0] 1524; CHECK-NEXT: bx lr 1525entry: 1526 %0 = extractelement <2 x i32> %a, i32 0 1527 %vecinit.i = insertelement <2 x i32> undef, i32 %0, i32 0 1528 %vecinit1.i = insertelement <2 x i32> %vecinit.i, i32 %0, i32 1 1529 ret <2 x i32> %vecinit1.i 1530} 1531 1532 1533define <16 x i8> @test_concat_v16i8_v16i8_v16i8(<16 x i8> %x, <16 x i8> %y) #0 { 1534; CHECK-LABEL: test_concat_v16i8_v16i8_v16i8: 1535; CHECK: @ %bb.0: @ %entry 1536; CHECK-NEXT: vmov.f64 d1, d2 1537; CHECK-NEXT: bx lr 1538entry: 1539 %vecinit30 = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 1540 ret <16 x i8> %vecinit30 1541} 1542 1543define <16 x i8> @test_concat_v16i8_v8i8_v16i8(<8 x i8> %x, <16 x i8> %y) #0 { 1544; CHECK-LABEL: test_concat_v16i8_v8i8_v16i8: 1545; CHECK: @ %bb.0: @ %entry 1546; CHECK-NEXT: @ kill: def $d0 killed $d0 def $q0 1547; CHECK-NEXT: vmov.f64 d1, d2 1548; CHECK-NEXT: bx lr 1549entry: 1550 %vecext = extractelement <8 x i8> %x, i32 0 1551 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0 1552 %vecext1 = extractelement <8 x i8> %x, i32 1 1553 %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1 1554 %vecext3 = extractelement <8 x i8> %x, i32 2 1555 %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2 1556 %vecext5 = extractelement <8 x i8> %x, i32 3 1557 %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3 1558 %vecext7 = extractelement <8 x i8> %x, i32 4 1559 %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4 1560 %vecext9 = extractelement <8 x i8> %x, i32 5 1561 %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5 1562 %vecext11 = extractelement <8 x i8> %x, i32 6 1563 %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6 1564 %vecext13 = extractelement <8 x i8> %x, i32 7 1565 %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7 1566 %vecinit30 = shufflevector <16 x i8> %vecinit14, <16 x i8> %y, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 1567 ret <16 x i8> %vecinit30 1568} 1569 1570define <16 x i8> @test_concat_v16i8_v16i8_v8i8(<16 x i8> %x, <8 x i8> %y) #0 { 1571; CHECK-LABEL: test_concat_v16i8_v16i8_v8i8: 1572; CHECK: @ %bb.0: @ %entry 1573; CHECK-NEXT: vmov.f64 d1, d2 1574; CHECK-NEXT: bx lr 1575entry: 1576 %vecext = extractelement <16 x i8> %x, i32 0 1577 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0 1578 %vecext1 = extractelement <16 x i8> %x, i32 1 1579 %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1 1580 %vecext3 = extractelement <16 x i8> %x, i32 2 1581 %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2 1582 %vecext5 = extractelement <16 x i8> %x, i32 3 1583 %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3 1584 %vecext7 = extractelement <16 x i8> %x, i32 4 1585 %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4 1586 %vecext9 = extractelement <16 x i8> %x, i32 5 1587 %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5 1588 %vecext11 = extractelement <16 x i8> %x, i32 6 1589 %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6 1590 %vecext13 = extractelement <16 x i8> %x, i32 7 1591 %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7 1592 %vecext15 = extractelement <8 x i8> %y, i32 0 1593 %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8 1594 %vecext17 = extractelement <8 x i8> %y, i32 1 1595 %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9 1596 %vecext19 = extractelement <8 x i8> %y, i32 2 1597 %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10 1598 %vecext21 = extractelement <8 x i8> %y, i32 3 1599 %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11 1600 %vecext23 = extractelement <8 x i8> %y, i32 4 1601 %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12 1602 %vecext25 = extractelement <8 x i8> %y, i32 5 1603 %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13 1604 %vecext27 = extractelement <8 x i8> %y, i32 6 1605 %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14 1606 %vecext29 = extractelement <8 x i8> %y, i32 7 1607 %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15 1608 ret <16 x i8> %vecinit30 1609} 1610 1611define <16 x i8> @test_concat_v16i8_v8i8_v8i8(<8 x i8> %x, <8 x i8> %y) #0 { 1612; CHECK-LABEL: test_concat_v16i8_v8i8_v8i8: 1613; CHECK: @ %bb.0: @ %entry 1614; CHECK-NEXT: @ kill: def $d1 killed $d1 killed $q0 def $q0 1615; CHECK-NEXT: @ kill: def $d0 killed $d0 killed $q0 def $q0 1616; CHECK-NEXT: bx lr 1617entry: 1618 %vecext = extractelement <8 x i8> %x, i32 0 1619 %vecinit = insertelement <16 x i8> undef, i8 %vecext, i32 0 1620 %vecext1 = extractelement <8 x i8> %x, i32 1 1621 %vecinit2 = insertelement <16 x i8> %vecinit, i8 %vecext1, i32 1 1622 %vecext3 = extractelement <8 x i8> %x, i32 2 1623 %vecinit4 = insertelement <16 x i8> %vecinit2, i8 %vecext3, i32 2 1624 %vecext5 = extractelement <8 x i8> %x, i32 3 1625 %vecinit6 = insertelement <16 x i8> %vecinit4, i8 %vecext5, i32 3 1626 %vecext7 = extractelement <8 x i8> %x, i32 4 1627 %vecinit8 = insertelement <16 x i8> %vecinit6, i8 %vecext7, i32 4 1628 %vecext9 = extractelement <8 x i8> %x, i32 5 1629 %vecinit10 = insertelement <16 x i8> %vecinit8, i8 %vecext9, i32 5 1630 %vecext11 = extractelement <8 x i8> %x, i32 6 1631 %vecinit12 = insertelement <16 x i8> %vecinit10, i8 %vecext11, i32 6 1632 %vecext13 = extractelement <8 x i8> %x, i32 7 1633 %vecinit14 = insertelement <16 x i8> %vecinit12, i8 %vecext13, i32 7 1634 %vecext15 = extractelement <8 x i8> %y, i32 0 1635 %vecinit16 = insertelement <16 x i8> %vecinit14, i8 %vecext15, i32 8 1636 %vecext17 = extractelement <8 x i8> %y, i32 1 1637 %vecinit18 = insertelement <16 x i8> %vecinit16, i8 %vecext17, i32 9 1638 %vecext19 = extractelement <8 x i8> %y, i32 2 1639 %vecinit20 = insertelement <16 x i8> %vecinit18, i8 %vecext19, i32 10 1640 %vecext21 = extractelement <8 x i8> %y, i32 3 1641 %vecinit22 = insertelement <16 x i8> %vecinit20, i8 %vecext21, i32 11 1642 %vecext23 = extractelement <8 x i8> %y, i32 4 1643 %vecinit24 = insertelement <16 x i8> %vecinit22, i8 %vecext23, i32 12 1644 %vecext25 = extractelement <8 x i8> %y, i32 5 1645 %vecinit26 = insertelement <16 x i8> %vecinit24, i8 %vecext25, i32 13 1646 %vecext27 = extractelement <8 x i8> %y, i32 6 1647 %vecinit28 = insertelement <16 x i8> %vecinit26, i8 %vecext27, i32 14 1648 %vecext29 = extractelement <8 x i8> %y, i32 7 1649 %vecinit30 = insertelement <16 x i8> %vecinit28, i8 %vecext29, i32 15 1650 ret <16 x i8> %vecinit30 1651} 1652 1653define <8 x i16> @test_concat_v8i16_v8i16_v8i16(<8 x i16> %x, <8 x i16> %y) #0 { 1654; CHECK-LABEL: test_concat_v8i16_v8i16_v8i16: 1655; CHECK: @ %bb.0: @ %entry 1656; CHECK-NEXT: vmov.f64 d1, d2 1657; CHECK-NEXT: bx lr 1658entry: 1659 %vecinit14 = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 1660 ret <8 x i16> %vecinit14 1661} 1662 1663define <8 x i16> @test_concat_v8i16_v4i16_v8i16(<4 x i16> %x, <8 x i16> %y) #0 { 1664; CHECK-LABEL: test_concat_v8i16_v4i16_v8i16: 1665; CHECK: @ %bb.0: @ %entry 1666; CHECK-NEXT: @ kill: def $d0 killed $d0 def $q0 1667; CHECK-NEXT: vmov.f64 d1, d2 1668; CHECK-NEXT: bx lr 1669entry: 1670 %vecext = extractelement <4 x i16> %x, i32 0 1671 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0 1672 %vecext1 = extractelement <4 x i16> %x, i32 1 1673 %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1 1674 %vecext3 = extractelement <4 x i16> %x, i32 2 1675 %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2 1676 %vecext5 = extractelement <4 x i16> %x, i32 3 1677 %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3 1678 %vecinit14 = shufflevector <8 x i16> %vecinit6, <8 x i16> %y, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 1679 ret <8 x i16> %vecinit14 1680} 1681 1682define <8 x i16> @test_concat_v8i16_v8i16_v4i16(<8 x i16> %x, <4 x i16> %y) #0 { 1683; CHECK-LABEL: test_concat_v8i16_v8i16_v4i16: 1684; CHECK: @ %bb.0: @ %entry 1685; CHECK-NEXT: vmov.f64 d1, d2 1686; CHECK-NEXT: bx lr 1687entry: 1688 %vecext = extractelement <8 x i16> %x, i32 0 1689 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0 1690 %vecext1 = extractelement <8 x i16> %x, i32 1 1691 %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1 1692 %vecext3 = extractelement <8 x i16> %x, i32 2 1693 %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2 1694 %vecext5 = extractelement <8 x i16> %x, i32 3 1695 %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3 1696 %vecext7 = extractelement <4 x i16> %y, i32 0 1697 %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4 1698 %vecext9 = extractelement <4 x i16> %y, i32 1 1699 %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5 1700 %vecext11 = extractelement <4 x i16> %y, i32 2 1701 %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6 1702 %vecext13 = extractelement <4 x i16> %y, i32 3 1703 %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7 1704 ret <8 x i16> %vecinit14 1705} 1706 1707define <8 x i16> @test_concat_v8i16_v4i16_v4i16(<4 x i16> %x, <4 x i16> %y) #0 { 1708; CHECK-LABEL: test_concat_v8i16_v4i16_v4i16: 1709; CHECK: @ %bb.0: @ %entry 1710; CHECK-NEXT: @ kill: def $d1 killed $d1 killed $q0 def $q0 1711; CHECK-NEXT: @ kill: def $d0 killed $d0 killed $q0 def $q0 1712; CHECK-NEXT: bx lr 1713entry: 1714 %vecext = extractelement <4 x i16> %x, i32 0 1715 %vecinit = insertelement <8 x i16> undef, i16 %vecext, i32 0 1716 %vecext1 = extractelement <4 x i16> %x, i32 1 1717 %vecinit2 = insertelement <8 x i16> %vecinit, i16 %vecext1, i32 1 1718 %vecext3 = extractelement <4 x i16> %x, i32 2 1719 %vecinit4 = insertelement <8 x i16> %vecinit2, i16 %vecext3, i32 2 1720 %vecext5 = extractelement <4 x i16> %x, i32 3 1721 %vecinit6 = insertelement <8 x i16> %vecinit4, i16 %vecext5, i32 3 1722 %vecext7 = extractelement <4 x i16> %y, i32 0 1723 %vecinit8 = insertelement <8 x i16> %vecinit6, i16 %vecext7, i32 4 1724 %vecext9 = extractelement <4 x i16> %y, i32 1 1725 %vecinit10 = insertelement <8 x i16> %vecinit8, i16 %vecext9, i32 5 1726 %vecext11 = extractelement <4 x i16> %y, i32 2 1727 %vecinit12 = insertelement <8 x i16> %vecinit10, i16 %vecext11, i32 6 1728 %vecext13 = extractelement <4 x i16> %y, i32 3 1729 %vecinit14 = insertelement <8 x i16> %vecinit12, i16 %vecext13, i32 7 1730 ret <8 x i16> %vecinit14 1731} 1732 1733define <4 x i32> @test_concat_v4i32_v4i32_v4i32(<4 x i32> %x, <4 x i32> %y) #0 { 1734; CHECK-LABEL: test_concat_v4i32_v4i32_v4i32: 1735; CHECK: @ %bb.0: @ %entry 1736; CHECK-NEXT: vmov.f64 d1, d2 1737; CHECK-NEXT: bx lr 1738entry: 1739 %vecinit6 = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 1740 ret <4 x i32> %vecinit6 1741} 1742 1743define <4 x i32> @test_concat_v4i32_v2i32_v4i32(<2 x i32> %x, <4 x i32> %y) #0 { 1744; CHECK-LABEL: test_concat_v4i32_v2i32_v4i32: 1745; CHECK: @ %bb.0: @ %entry 1746; CHECK-NEXT: @ kill: def $d0 killed $d0 def $q0 1747; CHECK-NEXT: vmov.f64 d1, d2 1748; CHECK-NEXT: bx lr 1749entry: 1750 %vecext = extractelement <2 x i32> %x, i32 0 1751 %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0 1752 %vecext1 = extractelement <2 x i32> %x, i32 1 1753 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1 1754 %vecinit6 = shufflevector <4 x i32> %vecinit2, <4 x i32> %y, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 1755 ret <4 x i32> %vecinit6 1756} 1757 1758define <4 x i32> @test_concat_v4i32_v4i32_v2i32(<4 x i32> %x, <2 x i32> %y) #0 { 1759; CHECK-LABEL: test_concat_v4i32_v4i32_v2i32: 1760; CHECK: @ %bb.0: @ %entry 1761; CHECK-NEXT: vmov.f64 d1, d2 1762; CHECK-NEXT: bx lr 1763entry: 1764 %vecext = extractelement <4 x i32> %x, i32 0 1765 %vecinit = insertelement <4 x i32> undef, i32 %vecext, i32 0 1766 %vecext1 = extractelement <4 x i32> %x, i32 1 1767 %vecinit2 = insertelement <4 x i32> %vecinit, i32 %vecext1, i32 1 1768 %vecext3 = extractelement <2 x i32> %y, i32 0 1769 %vecinit4 = insertelement <4 x i32> %vecinit2, i32 %vecext3, i32 2 1770 %vecext5 = extractelement <2 x i32> %y, i32 1 1771 %vecinit6 = insertelement <4 x i32> %vecinit4, i32 %vecext5, i32 3 1772 ret <4 x i32> %vecinit6 1773} 1774 1775define <4 x i32> @test_concat_v4i32_v2i32_v2i32(<2 x i32> %x, <2 x i32> %y) #0 { 1776; CHECK-LABEL: test_concat_v4i32_v2i32_v2i32: 1777; CHECK: @ %bb.0: @ %entry 1778; CHECK-NEXT: @ kill: def $d1 killed $d1 killed $q0 def $q0 1779; CHECK-NEXT: @ kill: def $d0 killed $d0 killed $q0 def $q0 1780; CHECK-NEXT: bx lr 1781entry: 1782 %vecinit6 = shufflevector <2 x i32> %x, <2 x i32> %y, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1783 ret <4 x i32> %vecinit6 1784} 1785 1786define <2 x i64> @test_concat_v2i64_v2i64_v2i64(<2 x i64> %x, <2 x i64> %y) #0 { 1787; CHECK-LABEL: test_concat_v2i64_v2i64_v2i64: 1788; CHECK: @ %bb.0: @ %entry 1789; CHECK-NEXT: vmov.f64 d1, d2 1790; CHECK-NEXT: bx lr 1791entry: 1792 %vecinit2 = shufflevector <2 x i64> %x, <2 x i64> %y, <2 x i32> <i32 0, i32 2> 1793 ret <2 x i64> %vecinit2 1794} 1795 1796define <2 x i64> @test_concat_v2i64_v1i64_v2i64(<1 x i64> %x, <2 x i64> %y) #0 { 1797; CHECK-LABEL: test_concat_v2i64_v1i64_v2i64: 1798; CHECK: @ %bb.0: @ %entry 1799; CHECK-NEXT: @ kill: def $d0 killed $d0 def $q0 1800; CHECK-NEXT: vmov.f64 d1, d2 1801; CHECK-NEXT: bx lr 1802entry: 1803 %vecext = extractelement <1 x i64> %x, i32 0 1804 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0 1805 %vecinit2 = shufflevector <2 x i64> %vecinit, <2 x i64> %y, <2 x i32> <i32 0, i32 2> 1806 ret <2 x i64> %vecinit2 1807} 1808 1809define <2 x i64> @test_concat_v2i64_v2i64_v1i64(<2 x i64> %x, <1 x i64> %y) #0 { 1810; CHECK-LABEL: test_concat_v2i64_v2i64_v1i64: 1811; CHECK: @ %bb.0: @ %entry 1812; CHECK-NEXT: vmov.f64 d1, d2 1813; CHECK-NEXT: bx lr 1814entry: 1815 %vecext = extractelement <2 x i64> %x, i32 0 1816 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0 1817 %vecext1 = extractelement <1 x i64> %y, i32 0 1818 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1 1819 ret <2 x i64> %vecinit2 1820} 1821 1822define <2 x i64> @test_concat_v2i64_v1i64_v1i64(<1 x i64> %x, <1 x i64> %y) #0 { 1823; CHECK-LABEL: test_concat_v2i64_v1i64_v1i64: 1824; CHECK: @ %bb.0: @ %entry 1825; CHECK-NEXT: @ kill: def $d1 killed $d1 killed $q0 def $q0 1826; CHECK-NEXT: @ kill: def $d0 killed $d0 killed $q0 def $q0 1827; CHECK-NEXT: bx lr 1828entry: 1829 %vecext = extractelement <1 x i64> %x, i32 0 1830 %vecinit = insertelement <2 x i64> undef, i64 %vecext, i32 0 1831 %vecext1 = extractelement <1 x i64> %y, i32 0 1832 %vecinit2 = insertelement <2 x i64> %vecinit, i64 %vecext1, i32 1 1833 ret <2 x i64> %vecinit2 1834} 1835 1836 1837define <4 x i16> @concat_vector_v4i16_const() { 1838; CHECK-LABEL: concat_vector_v4i16_const: 1839; CHECK: @ %bb.0: 1840; CHECK-NEXT: vmov.i32 d0, #0x0 1841; CHECK-NEXT: bx lr 1842 %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <4 x i32> zeroinitializer 1843 ret <4 x i16> %r 1844} 1845 1846define <4 x i16> @concat_vector_v4i16_const_one() { 1847; CHECK-LABEL: concat_vector_v4i16_const_one: 1848; CHECK: @ %bb.0: 1849; CHECK-NEXT: vmov.i16 d0, #0x1 1850; CHECK-NEXT: bx lr 1851 %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <4 x i32> zeroinitializer 1852 ret <4 x i16> %r 1853} 1854 1855define <4 x i32> @concat_vector_v4i32_const() { 1856; CHECK-LABEL: concat_vector_v4i32_const: 1857; CHECK: @ %bb.0: 1858; CHECK-NEXT: vmov.i32 q0, #0x0 1859; CHECK-NEXT: bx lr 1860 %r = shufflevector <1 x i32> zeroinitializer, <1 x i32> undef, <4 x i32> zeroinitializer 1861 ret <4 x i32> %r 1862} 1863 1864define <8 x i8> @concat_vector_v8i8_const() { 1865; CHECK-LABEL: concat_vector_v8i8_const: 1866; CHECK: @ %bb.0: 1867; CHECK-NEXT: vmov.i32 d0, #0x0 1868; CHECK-NEXT: bx lr 1869 %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <8 x i32> zeroinitializer 1870 ret <8 x i8> %r 1871} 1872 1873define <8 x i16> @concat_vector_v8i16_const() { 1874; CHECK-LABEL: concat_vector_v8i16_const: 1875; CHECK: @ %bb.0: 1876; CHECK-NEXT: vmov.i32 q0, #0x0 1877; CHECK-NEXT: bx lr 1878 %r = shufflevector <1 x i16> zeroinitializer, <1 x i16> undef, <8 x i32> zeroinitializer 1879 ret <8 x i16> %r 1880} 1881 1882define <8 x i16> @concat_vector_v8i16_const_one() { 1883; CHECK-LABEL: concat_vector_v8i16_const_one: 1884; CHECK: @ %bb.0: 1885; CHECK-NEXT: vmov.i16 q0, #0x1 1886; CHECK-NEXT: bx lr 1887 %r = shufflevector <1 x i16> <i16 1>, <1 x i16> undef, <8 x i32> zeroinitializer 1888 ret <8 x i16> %r 1889} 1890 1891define <16 x i8> @concat_vector_v16i8_const() { 1892; CHECK-LABEL: concat_vector_v16i8_const: 1893; CHECK: @ %bb.0: 1894; CHECK-NEXT: vmov.i32 q0, #0x0 1895; CHECK-NEXT: bx lr 1896 %r = shufflevector <1 x i8> zeroinitializer, <1 x i8> undef, <16 x i32> zeroinitializer 1897 ret <16 x i8> %r 1898} 1899 1900define <4 x i16> @concat_vector_v4i16(<1 x i16> %a) { 1901; CHECK-LABEL: concat_vector_v4i16: 1902; CHECK: @ %bb.0: 1903; CHECK-NEXT: vdup.16 d0, r0 1904; CHECK-NEXT: bx lr 1905 %r = shufflevector <1 x i16> %a, <1 x i16> undef, <4 x i32> zeroinitializer 1906 ret <4 x i16> %r 1907} 1908 1909define <4 x i32> @concat_vector_v4i32(<1 x i32> %a) { 1910; CHECK-LABEL: concat_vector_v4i32: 1911; CHECK: @ %bb.0: 1912; CHECK-NEXT: vdup.32 q0, r0 1913; CHECK-NEXT: bx lr 1914 %r = shufflevector <1 x i32> %a, <1 x i32> undef, <4 x i32> zeroinitializer 1915 ret <4 x i32> %r 1916} 1917 1918define <8 x i8> @concat_vector_v8i8(<1 x i8> %a) { 1919; CHECK-LABEL: concat_vector_v8i8: 1920; CHECK: @ %bb.0: 1921; CHECK-NEXT: vdup.8 d0, r0 1922; CHECK-NEXT: bx lr 1923 %r = shufflevector <1 x i8> %a, <1 x i8> undef, <8 x i32> zeroinitializer 1924 ret <8 x i8> %r 1925} 1926 1927define <8 x i16> @concat_vector_v8i16(<1 x i16> %a) { 1928; CHECK-LABEL: concat_vector_v8i16: 1929; CHECK: @ %bb.0: 1930; CHECK-NEXT: vdup.16 q0, r0 1931; CHECK-NEXT: bx lr 1932 %r = shufflevector <1 x i16> %a, <1 x i16> undef, <8 x i32> zeroinitializer 1933 ret <8 x i16> %r 1934} 1935 1936define <16 x i8> @concat_vector_v16i8(<1 x i8> %a) { 1937; CHECK-LABEL: concat_vector_v16i8: 1938; CHECK: @ %bb.0: 1939; CHECK-NEXT: vdup.8 q0, r0 1940; CHECK-NEXT: bx lr 1941 %r = shufflevector <1 x i8> %a, <1 x i8> undef, <16 x i32> zeroinitializer 1942 ret <16 x i8> %r 1943} 1944