1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LE 3; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LE 4; RUN: llc -mtriple=thumbebv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-BE 5 6define arm_aapcs_vfpcc <4 x i32> @vdup_i32(i32 %src) { 7; CHECK-LE-LABEL: vdup_i32: 8; CHECK-LE: @ %bb.0: @ %entry 9; CHECK-LE-NEXT: vdup.32 q0, r0 10; CHECK-LE-NEXT: bx lr 11; 12; CHECK-BE-LABEL: vdup_i32: 13; CHECK-BE: @ %bb.0: @ %entry 14; CHECK-BE-NEXT: vdup.32 q1, r0 15; CHECK-BE-NEXT: vrev64.32 q0, q1 16; CHECK-BE-NEXT: bx lr 17entry: 18 %0 = insertelement <4 x i32> undef, i32 %src, i32 0 19 %out = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> zeroinitializer 20 ret <4 x i32> %out 21} 22 23define arm_aapcs_vfpcc <8 x i16> @vdup_i16(i16 %src) { 24; CHECK-LE-LABEL: vdup_i16: 25; CHECK-LE: @ %bb.0: @ %entry 26; CHECK-LE-NEXT: vdup.16 q0, r0 27; CHECK-LE-NEXT: bx lr 28; 29; CHECK-BE-LABEL: vdup_i16: 30; CHECK-BE: @ %bb.0: @ %entry 31; CHECK-BE-NEXT: vdup.16 q1, r0 32; CHECK-BE-NEXT: vrev64.16 q0, q1 33; CHECK-BE-NEXT: bx lr 34entry: 35 %0 = insertelement <8 x i16> undef, i16 %src, i32 0 36 %out = shufflevector <8 x i16> %0, <8 x i16> undef, <8 x i32> zeroinitializer 37 ret <8 x i16> %out 38} 39 40define arm_aapcs_vfpcc <16 x i8> @vdup_i8(i8 %src) { 41; CHECK-LE-LABEL: vdup_i8: 42; CHECK-LE: @ %bb.0: @ %entry 43; CHECK-LE-NEXT: vdup.8 q0, r0 44; CHECK-LE-NEXT: bx lr 45; 46; CHECK-BE-LABEL: vdup_i8: 47; CHECK-BE: @ %bb.0: @ %entry 48; CHECK-BE-NEXT: vdup.8 q1, r0 49; CHECK-BE-NEXT: vrev64.8 q0, q1 50; CHECK-BE-NEXT: bx lr 51entry: 52 %0 = insertelement <16 x i8> undef, i8 %src, i32 0 53 %out = shufflevector <16 x i8> %0, <16 x i8> undef, <16 x i32> zeroinitializer 54 ret <16 x i8> %out 55} 56 57define arm_aapcs_vfpcc <2 x i64> @vdup_i64(i64 %src) { 58; CHECK-LE-LABEL: vdup_i64: 59; CHECK-LE: @ %bb.0: @ %entry 60; CHECK-LE-NEXT: vmov q0[2], q0[0], r0, r0 61; CHECK-LE-NEXT: vmov q0[3], q0[1], r1, r1 62; CHECK-LE-NEXT: bx lr 63; 64; CHECK-BE-LABEL: vdup_i64: 65; CHECK-BE: @ %bb.0: @ %entry 66; CHECK-BE-NEXT: vmov q1[2], q1[0], r0, r0 67; CHECK-BE-NEXT: vmov q1[3], q1[1], r1, r1 68; CHECK-BE-NEXT: vrev64.32 q0, q1 69; CHECK-BE-NEXT: bx lr 70entry: 71 %0 = insertelement <2 x i64> undef, i64 %src, i32 0 72 %out = shufflevector <2 x i64> %0, <2 x i64> undef, <2 x i32> zeroinitializer 73 ret <2 x i64> %out 74} 75 76define arm_aapcs_vfpcc <4 x float> @vdup_f32_1(float %src) { 77; CHECK-LE-LABEL: vdup_f32_1: 78; CHECK-LE: @ %bb.0: @ %entry 79; CHECK-LE-NEXT: vmov r0, s0 80; CHECK-LE-NEXT: vdup.32 q0, r0 81; CHECK-LE-NEXT: bx lr 82; 83; CHECK-BE-LABEL: vdup_f32_1: 84; CHECK-BE: @ %bb.0: @ %entry 85; CHECK-BE-NEXT: vmov r0, s0 86; CHECK-BE-NEXT: vdup.32 q1, r0 87; CHECK-BE-NEXT: vrev64.32 q0, q1 88; CHECK-BE-NEXT: bx lr 89entry: 90 %0 = insertelement <4 x float> undef, float %src, i32 0 91 %out = shufflevector <4 x float> %0, <4 x float> undef, <4 x i32> zeroinitializer 92 ret <4 x float> %out 93} 94 95define arm_aapcs_vfpcc <4 x float> @vdup_f32_2(float %src1, float %src2) { 96; CHECK-LE-LABEL: vdup_f32_2: 97; CHECK-LE: @ %bb.0: @ %entry 98; CHECK-LE-NEXT: vadd.f32 s0, s0, s1 99; CHECK-LE-NEXT: vmov r0, s0 100; CHECK-LE-NEXT: vdup.32 q0, r0 101; CHECK-LE-NEXT: bx lr 102; 103; CHECK-BE-LABEL: vdup_f32_2: 104; CHECK-BE: @ %bb.0: @ %entry 105; CHECK-BE-NEXT: vadd.f32 s0, s0, s1 106; CHECK-BE-NEXT: vmov r0, s0 107; CHECK-BE-NEXT: vdup.32 q1, r0 108; CHECK-BE-NEXT: vrev64.32 q0, q1 109; CHECK-BE-NEXT: bx lr 110entry: 111 %0 = fadd float %src1, %src2 112 %1 = insertelement <4 x float> undef, float %0, i32 0 113 %out = shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> zeroinitializer 114 ret <4 x float> %out 115} 116 117define arm_aapcs_vfpcc <4 x float> @vdup_f32_1bc(float %src) { 118; CHECK-LE-LABEL: vdup_f32_1bc: 119; CHECK-LE: @ %bb.0: @ %entry 120; CHECK-LE-NEXT: vmov r0, s0 121; CHECK-LE-NEXT: vdup.32 q0, r0 122; CHECK-LE-NEXT: bx lr 123; 124; CHECK-BE-LABEL: vdup_f32_1bc: 125; CHECK-BE: @ %bb.0: @ %entry 126; CHECK-BE-NEXT: vmov r0, s0 127; CHECK-BE-NEXT: vdup.32 q1, r0 128; CHECK-BE-NEXT: vrev64.32 q0, q1 129; CHECK-BE-NEXT: bx lr 130entry: 131 %srcbc = bitcast float %src to i32 132 %0 = insertelement <4 x i32> undef, i32 %srcbc, i32 0 133 %out = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> zeroinitializer 134 %outbc = bitcast <4 x i32> %out to <4 x float> 135 ret <4 x float> %outbc 136} 137 138define arm_aapcs_vfpcc <4 x float> @vdup_f32_2bc(float %src1, float %src2) { 139; CHECK-LE-LABEL: vdup_f32_2bc: 140; CHECK-LE: @ %bb.0: @ %entry 141; CHECK-LE-NEXT: vadd.f32 s0, s0, s1 142; CHECK-LE-NEXT: vmov r0, s0 143; CHECK-LE-NEXT: vdup.32 q0, r0 144; CHECK-LE-NEXT: bx lr 145; 146; CHECK-BE-LABEL: vdup_f32_2bc: 147; CHECK-BE: @ %bb.0: @ %entry 148; CHECK-BE-NEXT: vadd.f32 s0, s0, s1 149; CHECK-BE-NEXT: vmov r0, s0 150; CHECK-BE-NEXT: vdup.32 q1, r0 151; CHECK-BE-NEXT: vrev64.32 q0, q1 152; CHECK-BE-NEXT: bx lr 153entry: 154 %0 = fadd float %src1, %src2 155 %bc = bitcast float %0 to i32 156 %1 = insertelement <4 x i32> undef, i32 %bc, i32 0 157 %out = shufflevector <4 x i32> %1, <4 x i32> undef, <4 x i32> zeroinitializer 158 %outbc = bitcast <4 x i32> %out to <4 x float> 159 ret <4 x float> %outbc 160} 161 162define arm_aapcs_vfpcc <8 x half> @vdup_f16(half %0, half %1) { 163; CHECK-LE-LABEL: vdup_f16: 164; CHECK-LE: @ %bb.0: @ %entry 165; CHECK-LE-NEXT: vadd.f16 s0, s0, s1 166; CHECK-LE-NEXT: vmov.f16 r0, s0 167; CHECK-LE-NEXT: vdup.16 q0, r0 168; CHECK-LE-NEXT: bx lr 169; 170; CHECK-BE-LABEL: vdup_f16: 171; CHECK-BE: @ %bb.0: @ %entry 172; CHECK-BE-NEXT: vadd.f16 s0, s0, s1 173; CHECK-BE-NEXT: vmov.f16 r0, s0 174; CHECK-BE-NEXT: vdup.16 q1, r0 175; CHECK-BE-NEXT: vrev64.16 q0, q1 176; CHECK-BE-NEXT: bx lr 177entry: 178 %2 = fadd half %0, %1 179 %3 = insertelement <8 x half> undef, half %2, i32 0 180 %out = shufflevector <8 x half> %3, <8 x half> undef, <8 x i32> zeroinitializer 181 ret <8 x half> %out 182} 183 184define arm_aapcs_vfpcc <8 x half> @vdup_f16_bc(half %0, half %1) { 185; CHECK-LE-LABEL: vdup_f16_bc: 186; CHECK-LE: @ %bb.0: @ %entry 187; CHECK-LE-NEXT: vadd.f16 s0, s0, s1 188; CHECK-LE-NEXT: vmov.f16 r0, s0 189; CHECK-LE-NEXT: vdup.16 q0, r0 190; CHECK-LE-NEXT: bx lr 191; 192; CHECK-BE-LABEL: vdup_f16_bc: 193; CHECK-BE: @ %bb.0: @ %entry 194; CHECK-BE-NEXT: vadd.f16 s0, s0, s1 195; CHECK-BE-NEXT: vmov.f16 r0, s0 196; CHECK-BE-NEXT: vdup.16 q1, r0 197; CHECK-BE-NEXT: vrev64.16 q0, q1 198; CHECK-BE-NEXT: bx lr 199entry: 200 %2 = fadd half %0, %1 201 %bc = bitcast half %2 to i16 202 %3 = insertelement <8 x i16> undef, i16 %bc, i32 0 203 %out = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> zeroinitializer 204 %outbc = bitcast <8 x i16> %out to <8 x half> 205 ret <8 x half> %outbc 206} 207 208define arm_aapcs_vfpcc <2 x double> @vdup_f64(double %src) { 209; CHECK-LABEL: vdup_f64: 210; CHECK: @ %bb.0: @ %entry 211; CHECK-NEXT: vmov.f32 s2, s0 212; CHECK-NEXT: vmov.f32 s3, s1 213; CHECK-NEXT: bx lr 214entry: 215 %0 = insertelement <2 x double> undef, double %src, i32 0 216 %out = shufflevector <2 x double> %0, <2 x double> undef, <2 x i32> zeroinitializer 217 ret <2 x double> %out 218} 219 220 221 222define arm_aapcs_vfpcc <4 x i32> @vduplane_i32(<4 x i32> %src) { 223; CHECK-LE-LABEL: vduplane_i32: 224; CHECK-LE: @ %bb.0: @ %entry 225; CHECK-LE-NEXT: vmov r0, s3 226; CHECK-LE-NEXT: vdup.32 q0, r0 227; CHECK-LE-NEXT: bx lr 228; 229; CHECK-BE-LABEL: vduplane_i32: 230; CHECK-BE: @ %bb.0: @ %entry 231; CHECK-BE-NEXT: vrev64.32 q1, q0 232; CHECK-BE-NEXT: vmov r0, s7 233; CHECK-BE-NEXT: vdup.32 q1, r0 234; CHECK-BE-NEXT: vrev64.32 q0, q1 235; CHECK-BE-NEXT: bx lr 236entry: 237 %out = shufflevector <4 x i32> %src, <4 x i32> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 238 ret <4 x i32> %out 239} 240 241define arm_aapcs_vfpcc <8 x i16> @vduplane_i16(<8 x i16> %src) { 242; CHECK-LE-LABEL: vduplane_i16: 243; CHECK-LE: @ %bb.0: @ %entry 244; CHECK-LE-NEXT: vmov.u16 r0, q0[3] 245; CHECK-LE-NEXT: vdup.16 q0, r0 246; CHECK-LE-NEXT: bx lr 247; 248; CHECK-BE-LABEL: vduplane_i16: 249; CHECK-BE: @ %bb.0: @ %entry 250; CHECK-BE-NEXT: vrev64.16 q1, q0 251; CHECK-BE-NEXT: vmov.u16 r0, q1[3] 252; CHECK-BE-NEXT: vdup.16 q1, r0 253; CHECK-BE-NEXT: vrev64.16 q0, q1 254; CHECK-BE-NEXT: bx lr 255entry: 256 %out = shufflevector <8 x i16> %src, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 257 ret <8 x i16> %out 258} 259 260define arm_aapcs_vfpcc <16 x i8> @vduplane_i8(<16 x i8> %src) { 261; CHECK-LE-LABEL: vduplane_i8: 262; CHECK-LE: @ %bb.0: @ %entry 263; CHECK-LE-NEXT: vmov.u8 r0, q0[3] 264; CHECK-LE-NEXT: vdup.8 q0, r0 265; CHECK-LE-NEXT: bx lr 266; 267; CHECK-BE-LABEL: vduplane_i8: 268; CHECK-BE: @ %bb.0: @ %entry 269; CHECK-BE-NEXT: vrev64.8 q1, q0 270; CHECK-BE-NEXT: vmov.u8 r0, q1[3] 271; CHECK-BE-NEXT: vdup.8 q1, r0 272; CHECK-BE-NEXT: vrev64.8 q0, q1 273; CHECK-BE-NEXT: bx lr 274entry: 275 %out = shufflevector <16 x i8> %src, <16 x i8> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 276 ret <16 x i8> %out 277} 278 279define arm_aapcs_vfpcc <2 x i64> @vduplane_i64(<2 x i64> %src) { 280; CHECK-LABEL: vduplane_i64: 281; CHECK: @ %bb.0: @ %entry 282; CHECK-NEXT: vmov.f32 s0, s2 283; CHECK-NEXT: vmov.f32 s1, s3 284; CHECK-NEXT: bx lr 285entry: 286 %out = shufflevector <2 x i64> %src, <2 x i64> undef, <2 x i32> <i32 1, i32 1> 287 ret <2 x i64> %out 288} 289 290define arm_aapcs_vfpcc <4 x float> @vduplane_f32(<4 x float> %src) { 291; CHECK-LE-LABEL: vduplane_f32: 292; CHECK-LE: @ %bb.0: @ %entry 293; CHECK-LE-NEXT: vmov r0, s3 294; CHECK-LE-NEXT: vdup.32 q0, r0 295; CHECK-LE-NEXT: bx lr 296; 297; CHECK-BE-LABEL: vduplane_f32: 298; CHECK-BE: @ %bb.0: @ %entry 299; CHECK-BE-NEXT: vrev64.32 q1, q0 300; CHECK-BE-NEXT: vmov r0, s7 301; CHECK-BE-NEXT: vdup.32 q1, r0 302; CHECK-BE-NEXT: vrev64.32 q0, q1 303; CHECK-BE-NEXT: bx lr 304entry: 305 %out = shufflevector <4 x float> %src, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 3> 306 ret <4 x float> %out 307} 308 309define arm_aapcs_vfpcc <8 x half> @vduplane_f16(<8 x half> %src) { 310; CHECK-LE-LABEL: vduplane_f16: 311; CHECK-LE: @ %bb.0: @ %entry 312; CHECK-LE-NEXT: vmov.u16 r0, q0[3] 313; CHECK-LE-NEXT: vdup.16 q0, r0 314; CHECK-LE-NEXT: bx lr 315; 316; CHECK-BE-LABEL: vduplane_f16: 317; CHECK-BE: @ %bb.0: @ %entry 318; CHECK-BE-NEXT: vrev64.16 q1, q0 319; CHECK-BE-NEXT: vmov.u16 r0, q1[3] 320; CHECK-BE-NEXT: vdup.16 q1, r0 321; CHECK-BE-NEXT: vrev64.16 q0, q1 322; CHECK-BE-NEXT: bx lr 323entry: 324 %out = shufflevector <8 x half> %src, <8 x half> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 325 ret <8 x half> %out 326} 327 328define arm_aapcs_vfpcc <2 x double> @vduplane_f64(<2 x double> %src) { 329; CHECK-LABEL: vduplane_f64: 330; CHECK: @ %bb.0: @ %entry 331; CHECK-NEXT: vmov.f32 s0, s2 332; CHECK-NEXT: vmov.f32 s1, s3 333; CHECK-NEXT: bx lr 334entry: 335 %out = shufflevector <2 x double> %src, <2 x double> undef, <2 x i32> <i32 1, i32 1> 336 ret <2 x double> %out 337} 338 339 340define arm_aapcs_vfpcc float @vdup_f32_extract(float %src) { 341; CHECK-LABEL: vdup_f32_extract: 342; CHECK: @ %bb.0: @ %entry 343; CHECK-NEXT: bx lr 344entry: 345 %srcbc = bitcast float %src to i32 346 %0 = insertelement <4 x i32> undef, i32 %srcbc, i32 0 347 %out = shufflevector <4 x i32> %0, <4 x i32> undef, <4 x i32> zeroinitializer 348 %outbc = bitcast <4 x i32> %out to <4 x float> 349 %ext = extractelement <4 x float> %outbc, i32 2 350 ret float %ext 351} 352 353define arm_aapcs_vfpcc half @vdup_f16_extract(half %0, half %1) { 354; CHECK-LABEL: vdup_f16_extract: 355; CHECK: @ %bb.0: @ %entry 356; CHECK-NEXT: vadd.f16 s0, s0, s1 357; CHECK-NEXT: bx lr 358entry: 359 %2 = fadd half %0, %1 360 %bc = bitcast half %2 to i16 361 %3 = insertelement <8 x i16> undef, i16 %bc, i32 0 362 %out = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> zeroinitializer 363 %outbc = bitcast <8 x i16> %out to <8 x half> 364 %ext = extractelement <8 x half> %outbc, i32 2 365 ret half %ext 366} 367 368 369define arm_aapcs_vfpcc <8 x i16> @bitcast_i64_v8i16(i64 %a) { 370; CHECK-LE-LABEL: bitcast_i64_v8i16: 371; CHECK-LE: @ %bb.0: 372; CHECK-LE-NEXT: .pad #8 373; CHECK-LE-NEXT: sub sp, #8 374; CHECK-LE-NEXT: strd r0, r1, [sp] 375; CHECK-LE-NEXT: mov r0, sp 376; CHECK-LE-NEXT: vldrh.u32 q0, [r0] 377; CHECK-LE-NEXT: vmov r0, s0 378; CHECK-LE-NEXT: vdup.16 q0, r0 379; CHECK-LE-NEXT: add sp, #8 380; CHECK-LE-NEXT: bx lr 381; 382; CHECK-BE-LABEL: bitcast_i64_v8i16: 383; CHECK-BE: @ %bb.0: 384; CHECK-BE-NEXT: .pad #8 385; CHECK-BE-NEXT: sub sp, #8 386; CHECK-BE-NEXT: strd r0, r1, [sp] 387; CHECK-BE-NEXT: mov r0, sp 388; CHECK-BE-NEXT: vldrh.u32 q0, [r0] 389; CHECK-BE-NEXT: vmov r0, s0 390; CHECK-BE-NEXT: vdup.16 q1, r0 391; CHECK-BE-NEXT: vrev64.16 q0, q1 392; CHECK-BE-NEXT: add sp, #8 393; CHECK-BE-NEXT: bx lr 394 %b = bitcast i64 %a to <4 x i16> 395 %r = shufflevector <4 x i16> %b, <4 x i16> poison, <8 x i32> zeroinitializer 396 ret <8 x i16> %r 397} 398 399define arm_aapcs_vfpcc <8 x i16> @bitcast_i128_v8i16(i128 %a) { 400; CHECK-LE-LABEL: bitcast_i128_v8i16: 401; CHECK-LE: @ %bb.0: 402; CHECK-LE-NEXT: vdup.16 q0, r0 403; CHECK-LE-NEXT: bx lr 404; 405; CHECK-BE-LABEL: bitcast_i128_v8i16: 406; CHECK-BE: @ %bb.0: 407; CHECK-BE-NEXT: vmov.32 q0[0], r0 408; CHECK-BE-NEXT: vrev32.16 q0, q0 409; CHECK-BE-NEXT: vmov.u16 r0, q0[0] 410; CHECK-BE-NEXT: vdup.16 q1, r0 411; CHECK-BE-NEXT: vrev64.16 q0, q1 412; CHECK-BE-NEXT: bx lr 413 %b = bitcast i128 %a to <8 x i16> 414 %r = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer 415 ret <8 x i16> %r 416} 417 418define arm_aapcs_vfpcc <8 x i16> @bitcast_i64_v8i16_lane1(i64 %a) { 419; CHECK-LE-LABEL: bitcast_i64_v8i16_lane1: 420; CHECK-LE: @ %bb.0: 421; CHECK-LE-NEXT: .pad #8 422; CHECK-LE-NEXT: sub sp, #8 423; CHECK-LE-NEXT: strd r0, r1, [sp] 424; CHECK-LE-NEXT: mov r0, sp 425; CHECK-LE-NEXT: vldrh.u32 q0, [r0] 426; CHECK-LE-NEXT: vmov r0, s1 427; CHECK-LE-NEXT: vdup.16 q0, r0 428; CHECK-LE-NEXT: add sp, #8 429; CHECK-LE-NEXT: bx lr 430; 431; CHECK-BE-LABEL: bitcast_i64_v8i16_lane1: 432; CHECK-BE: @ %bb.0: 433; CHECK-BE-NEXT: .pad #8 434; CHECK-BE-NEXT: sub sp, #8 435; CHECK-BE-NEXT: strd r0, r1, [sp] 436; CHECK-BE-NEXT: mov r0, sp 437; CHECK-BE-NEXT: vldrh.u32 q0, [r0] 438; CHECK-BE-NEXT: vmov r0, s1 439; CHECK-BE-NEXT: vdup.16 q1, r0 440; CHECK-BE-NEXT: vrev64.16 q0, q1 441; CHECK-BE-NEXT: add sp, #8 442; CHECK-BE-NEXT: bx lr 443 %b = bitcast i64 %a to <4 x i16> 444 %r = shufflevector <4 x i16> %b, <4 x i16> poison, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 445 ret <8 x i16> %r 446} 447 448define arm_aapcs_vfpcc <8 x i16> @bitcast_f64_v8i16(double %a) { 449; CHECK-LE-LABEL: bitcast_f64_v8i16: 450; CHECK-LE: @ %bb.0: 451; CHECK-LE-NEXT: vmov.u16 r0, q0[0] 452; CHECK-LE-NEXT: vdup.16 q0, r0 453; CHECK-LE-NEXT: bx lr 454; 455; CHECK-BE-LABEL: bitcast_f64_v8i16: 456; CHECK-BE: @ %bb.0: 457; CHECK-BE-NEXT: vrev64.16 q1, q0 458; CHECK-BE-NEXT: vmov.u16 r0, q1[0] 459; CHECK-BE-NEXT: vdup.16 q1, r0 460; CHECK-BE-NEXT: vrev64.16 q0, q1 461; CHECK-BE-NEXT: bx lr 462 %b = bitcast double %a to <4 x i16> 463 %r = shufflevector <4 x i16> %b, <4 x i16> poison, <8 x i32> zeroinitializer 464 ret <8 x i16> %r 465} 466 467define arm_aapcs_vfpcc <8 x half> @bitcast_i64_v8f16(i64 %a) { 468; CHECK-LE-LABEL: bitcast_i64_v8f16: 469; CHECK-LE: @ %bb.0: 470; CHECK-LE-NEXT: vmov.32 q0[0], r0 471; CHECK-LE-NEXT: vmov.u16 r0, q0[0] 472; CHECK-LE-NEXT: vdup.16 q0, r0 473; CHECK-LE-NEXT: bx lr 474; 475; CHECK-BE-LABEL: bitcast_i64_v8f16: 476; CHECK-BE: @ %bb.0: 477; CHECK-BE-NEXT: vmov.32 q0[0], r0 478; CHECK-BE-NEXT: vrev32.16 q0, q0 479; CHECK-BE-NEXT: vmov.u16 r0, q0[0] 480; CHECK-BE-NEXT: vdup.16 q1, r0 481; CHECK-BE-NEXT: vrev64.16 q0, q1 482; CHECK-BE-NEXT: bx lr 483 %b = bitcast i64 %a to <4 x half> 484 %r = shufflevector <4 x half> %b, <4 x half> poison, <8 x i32> zeroinitializer 485 ret <8 x half> %r 486} 487 488define arm_aapcs_vfpcc <2 x i64> @bitcast_i64_v2f64(i64 %a) { 489; CHECK-LE-LABEL: bitcast_i64_v2f64: 490; CHECK-LE: @ %bb.0: 491; CHECK-LE-NEXT: vmov q0[2], q0[0], r0, r0 492; CHECK-LE-NEXT: vmov q0[3], q0[1], r1, r1 493; CHECK-LE-NEXT: bx lr 494; 495; CHECK-BE-LABEL: bitcast_i64_v2f64: 496; CHECK-BE: @ %bb.0: 497; CHECK-BE-NEXT: vmov q1[2], q1[0], r0, r0 498; CHECK-BE-NEXT: vmov q1[3], q1[1], r1, r1 499; CHECK-BE-NEXT: vrev64.32 q0, q1 500; CHECK-BE-NEXT: bx lr 501 %b = bitcast i64 %a to <1 x i64> 502 %r = shufflevector <1 x i64> %b, <1 x i64> poison, <2 x i32> zeroinitializer 503 ret <2 x i64> %r 504} 505 506define arm_aapcs_vfpcc <2 x i64> @bitcast_v2f64_v2i64(<2 x double> %a) { 507; CHECK-LABEL: bitcast_v2f64_v2i64: 508; CHECK: @ %bb.0: 509; CHECK-NEXT: vmov.f32 s2, s0 510; CHECK-NEXT: vmov.f32 s3, s1 511; CHECK-NEXT: bx lr 512 %b = bitcast <2 x double> %a to <2 x i64> 513 %r = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer 514 ret <2 x i64> %r 515} 516 517define arm_aapcs_vfpcc <2 x i64> @bitcast_v8i16_v2i64(<8 x i16> %a) { 518; CHECK-LABEL: bitcast_v8i16_v2i64: 519; CHECK: @ %bb.0: 520; CHECK-NEXT: vmov.f32 s2, s0 521; CHECK-NEXT: vmov.f32 s3, s1 522; CHECK-NEXT: bx lr 523 %b = bitcast <8 x i16> %a to <2 x i64> 524 %r = shufflevector <2 x i64> %b, <2 x i64> poison, <2 x i32> zeroinitializer 525 ret <2 x i64> %r 526} 527 528define arm_aapcs_vfpcc <8 x i16> @bitcast_v2f64_v8i16(<2 x i64> %a) { 529; CHECK-LE-LABEL: bitcast_v2f64_v8i16: 530; CHECK-LE: @ %bb.0: 531; CHECK-LE-NEXT: vmov.u16 r0, q0[0] 532; CHECK-LE-NEXT: vdup.16 q0, r0 533; CHECK-LE-NEXT: bx lr 534; 535; CHECK-BE-LABEL: bitcast_v2f64_v8i16: 536; CHECK-BE: @ %bb.0: 537; CHECK-BE-NEXT: vrev64.16 q1, q0 538; CHECK-BE-NEXT: vmov.u16 r0, q1[0] 539; CHECK-BE-NEXT: vdup.16 q1, r0 540; CHECK-BE-NEXT: vrev64.16 q0, q1 541; CHECK-BE-NEXT: bx lr 542 %b = bitcast <2 x i64> %a to <8 x i16> 543 %r = shufflevector <8 x i16> %b, <8 x i16> poison, <8 x i32> zeroinitializer 544 ret <8 x i16> %r 545} 546 547define arm_aapcs_vfpcc <8 x i16> @other_max_case(i32 %blockSize) { 548; CHECK-LE-LABEL: other_max_case: 549; CHECK-LE: @ %bb.0: 550; CHECK-LE-NEXT: vdup.16 q0, r0 551; CHECK-LE-NEXT: bx lr 552; 553; CHECK-BE-LABEL: other_max_case: 554; CHECK-BE: @ %bb.0: 555; CHECK-BE-NEXT: vmov.32 q0[0], r0 556; CHECK-BE-NEXT: vrev32.16 q0, q0 557; CHECK-BE-NEXT: vmov.u16 r0, q0[0] 558; CHECK-BE-NEXT: vdup.16 q1, r0 559; CHECK-BE-NEXT: vrev64.16 q0, q1 560; CHECK-BE-NEXT: bx lr 561 %vec.blockSize = bitcast i32 %blockSize to <2 x i16> 562 %.splat2 = shufflevector <2 x i16> %vec.blockSize, <2 x i16> poison, <8 x i32> zeroinitializer 563 ret <8 x i16> %.splat2 564} 565