1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s 3; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s 4 5; i16 6 7define arm_aapcs_vfpcc <8 x i16> @shuffle_i16_45670123(<8 x i16> %s1, <8 x i16> %s2) { 8; CHECK-LABEL: shuffle_i16_45670123: 9; CHECK: @ %bb.0: @ %entry 10; CHECK-NEXT: vmov.f32 s4, s2 11; CHECK-NEXT: vmov.f32 s6, s0 12; CHECK-NEXT: vmov.f32 s5, s3 13; CHECK-NEXT: vmov.f32 s7, s1 14; CHECK-NEXT: vmov q0, q1 15; CHECK-NEXT: bx lr 16entry: 17 %out = shufflevector <8 x i16> %s1, <8 x i16> %s2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3> 18 ret <8 x i16> %out 19} 20 21define arm_aapcs_vfpcc <8 x i16> @shuffle_i16_67452301(<8 x i16> %s1, <8 x i16> %s2) { 22; CHECK-LABEL: shuffle_i16_67452301: 23; CHECK: @ %bb.0: @ %entry 24; CHECK-NEXT: vmov.f32 s4, s3 25; CHECK-NEXT: vmov.f32 s5, s2 26; CHECK-NEXT: vmov.f32 s6, s1 27; CHECK-NEXT: vmov.f32 s7, s0 28; CHECK-NEXT: vmov q0, q1 29; CHECK-NEXT: bx lr 30entry: 31 %out = shufflevector <8 x i16> %s1, <8 x i16> %s2, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1> 32 ret <8 x i16> %out 33} 34 35define arm_aapcs_vfpcc <8 x i16> @shuffle_i16_76543210(<8 x i16> %s1, <8 x i16> %s2) { 36; CHECK-LABEL: shuffle_i16_76543210: 37; CHECK: @ %bb.0: @ %entry 38; CHECK-NEXT: vrev64.16 q1, q0 39; CHECK-NEXT: vmov.f32 s0, s6 40; CHECK-NEXT: vmov.f32 s1, s7 41; CHECK-NEXT: vmov.f32 s2, s4 42; CHECK-NEXT: vmov.f32 s3, s5 43; CHECK-NEXT: bx lr 44entry: 45 %out = shufflevector <8 x i16> %s1, <8 x i16> %s2, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 46 ret <8 x i16> %out 47} 48 49define arm_aapcs_vfpcc <8 x i16> @shuffle_i16_01234567(<8 x i16> %s1, <8 x i16> %s2) { 50; CHECK-LABEL: shuffle_i16_01234567: 51; CHECK: @ %bb.0: @ %entry 52; CHECK-NEXT: bx lr 53entry: 54 %out = shufflevector <8 x i16> %s1, <8 x i16> %s2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 55 ret <8 x i16> %out 56} 57 58define arm_aapcs_vfpcc <8 x i16> @shuffle_i16_0123cdef(<8 x i16> %s1, <8 x i16> %s2) { 59; CHECK-LABEL: shuffle_i16_0123cdef: 60; CHECK: @ %bb.0: @ %entry 61; CHECK-NEXT: vmov.f32 s2, s6 62; CHECK-NEXT: vmov.f32 s3, s7 63; CHECK-NEXT: bx lr 64entry: 65 %out = shufflevector <8 x i16> %s1, <8 x i16> %s2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15> 66 ret <8 x i16> %out 67} 68 69define arm_aapcs_vfpcc <8 x i16> @shuffle_i16_u7u5u3u1(<8 x i16> %s1, <8 x i16> %s2) { 70; CHECK-LABEL: shuffle_i16_u7u5u3u1: 71; CHECK: @ %bb.0: @ %entry 72; CHECK-NEXT: vmov.f32 s4, s3 73; CHECK-NEXT: vmov.f32 s5, s2 74; CHECK-NEXT: vmov.f32 s6, s1 75; CHECK-NEXT: vmov.f32 s7, s0 76; CHECK-NEXT: vmov q0, q1 77; CHECK-NEXT: bx lr 78entry: 79 %out = shufflevector <8 x i16> %s1, <8 x i16> %s2, <8 x i32> <i32 undef, i32 7, i32 undef, i32 5, i32 undef, i32 3, i32 undef, i32 1> 80 ret <8 x i16> %out 81} 82 83define arm_aapcs_vfpcc <8 x i16> @shuffle_i16_6u4u2u0u(<8 x i16> %s1, <8 x i16> %s2) { 84; CHECK-LABEL: shuffle_i16_6u4u2u0u: 85; CHECK: @ %bb.0: @ %entry 86; CHECK-NEXT: vmov.f32 s4, s3 87; CHECK-NEXT: vmov.f32 s5, s2 88; CHECK-NEXT: vmov.f32 s6, s1 89; CHECK-NEXT: vmov.f32 s7, s0 90; CHECK-NEXT: vmov q0, q1 91; CHECK-NEXT: bx lr 92entry: 93 %out = shufflevector <8 x i16> %s1, <8 x i16> %s2, <8 x i32> <i32 6, i32 undef, i32 4, i32 undef, i32 2, i32 undef, i32 0, i32 undef> 94 ret <8 x i16> %out 95} 96 97define arm_aapcs_vfpcc <8 x i16> @shuffle_i16_0uuuuuuu(<8 x i16> %s1, <8 x i16> %s2) { 98; CHECK-LABEL: shuffle_i16_0uuuuuuu: 99; CHECK: @ %bb.0: @ %entry 100; CHECK-NEXT: bx lr 101entry: 102 %out = shufflevector <8 x i16> %s1, <8 x i16> %s2, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 103 ret <8 x i16> %out 104} 105 106define arm_aapcs_vfpcc <8 x i16> @shuffle_i16_uuuu0uuu(<8 x i16> %s1, <8 x i16> %s2) { 107; CHECK-LABEL: shuffle_i16_uuuu0uuu: 108; CHECK: @ %bb.0: @ %entry 109; CHECK-NEXT: vmov.u16 r0, q0[0] 110; CHECK-NEXT: vdup.16 q0, r0 111; CHECK-NEXT: bx lr 112entry: 113 %out = shufflevector <8 x i16> %s1, <8 x i16> %s2, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef> 114 ret <8 x i16> %out 115} 116 117 118; i8 119 120define arm_aapcs_vfpcc <16 x i8> @shuffle_i8_cdef89ab45670123(<16 x i8> %s1, <16 x i8> %s2) { 121; CHECK-LABEL: shuffle_i8_cdef89ab45670123: 122; CHECK: @ %bb.0: @ %entry 123; CHECK-NEXT: vmov.f32 s4, s3 124; CHECK-NEXT: vmov.f32 s5, s2 125; CHECK-NEXT: vmov.f32 s6, s1 126; CHECK-NEXT: vmov.f32 s7, s0 127; CHECK-NEXT: vmov q0, q1 128; CHECK-NEXT: bx lr 129entry: 130 %out = shufflevector <16 x i8> %s1, <16 x i8> %s2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3> 131 ret <16 x i8> %out 132} 133 134define arm_aapcs_vfpcc <16 x i8> @shuffle_i8_efcdab8967452301(<16 x i8> %s1, <16 x i8> %s2) { 135; CHECK-LABEL: shuffle_i8_efcdab8967452301: 136; CHECK: @ %bb.0: @ %entry 137; CHECK-NEXT: vmov q1, q0 138; CHECK-NEXT: vmov.u8 r0, q0[14] 139; CHECK-NEXT: vmov.8 q0[0], r0 140; CHECK-NEXT: vmov.u8 r0, q1[15] 141; CHECK-NEXT: vmov.8 q0[1], r0 142; CHECK-NEXT: vmov.u8 r0, q1[12] 143; CHECK-NEXT: vmov.8 q0[2], r0 144; CHECK-NEXT: vmov.u8 r0, q1[13] 145; CHECK-NEXT: vmov.8 q0[3], r0 146; CHECK-NEXT: vmov.u8 r0, q1[10] 147; CHECK-NEXT: vmov.8 q0[4], r0 148; CHECK-NEXT: vmov.u8 r0, q1[11] 149; CHECK-NEXT: vmov.8 q0[5], r0 150; CHECK-NEXT: vmov.u8 r0, q1[8] 151; CHECK-NEXT: vmov.8 q0[6], r0 152; CHECK-NEXT: vmov.u8 r0, q1[9] 153; CHECK-NEXT: vmov.8 q0[7], r0 154; CHECK-NEXT: vmov.u8 r0, q1[6] 155; CHECK-NEXT: vmov.8 q0[8], r0 156; CHECK-NEXT: vmov.u8 r0, q1[7] 157; CHECK-NEXT: vmov.8 q0[9], r0 158; CHECK-NEXT: vmov.u8 r0, q1[4] 159; CHECK-NEXT: vmov.8 q0[10], r0 160; CHECK-NEXT: vmov.u8 r0, q1[5] 161; CHECK-NEXT: vmov.8 q0[11], r0 162; CHECK-NEXT: vmov.u8 r0, q1[2] 163; CHECK-NEXT: vmov.8 q0[12], r0 164; CHECK-NEXT: vmov.u8 r0, q1[3] 165; CHECK-NEXT: vmov.8 q0[13], r0 166; CHECK-NEXT: vmov.u8 r0, q1[0] 167; CHECK-NEXT: vmov.8 q0[14], r0 168; CHECK-NEXT: vmov.u8 r0, q1[1] 169; CHECK-NEXT: vmov.8 q0[15], r0 170; CHECK-NEXT: bx lr 171entry: 172 %out = shufflevector <16 x i8> %s1, <16 x i8> %s2, <16 x i32> <i32 14, i32 15, i32 12, i32 13, i32 10, i32 11, i32 8, i32 9, i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1> 173 ret <16 x i8> %out 174} 175 176define arm_aapcs_vfpcc <16 x i8> @shuffle_i8_fedcba9876543210(<16 x i8> %s1, <16 x i8> %s2) { 177; CHECK-LABEL: shuffle_i8_fedcba9876543210: 178; CHECK: @ %bb.0: @ %entry 179; CHECK-NEXT: vrev64.8 q1, q0 180; CHECK-NEXT: vmov.f32 s0, s6 181; CHECK-NEXT: vmov.f32 s1, s7 182; CHECK-NEXT: vmov.f32 s2, s4 183; CHECK-NEXT: vmov.f32 s3, s5 184; CHECK-NEXT: bx lr 185entry: 186 %out = shufflevector <16 x i8> %s1, <16 x i8> %s2, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 187 ret <16 x i8> %out 188} 189 190define arm_aapcs_vfpcc <16 x i8> @shuffle_i8_0123456789abcdef(<16 x i8> %s1, <16 x i8> %s2) { 191; CHECK-LABEL: shuffle_i8_0123456789abcdef: 192; CHECK: @ %bb.0: @ %entry 193; CHECK-NEXT: bx lr 194entry: 195 %out = shufflevector <16 x i8> %s1, <16 x i8> %s2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 196 ret <16 x i8> %out 197} 198 199define arm_aapcs_vfpcc <16 x i8> @shuffle_i8_0123ghij4567klmn(<16 x i8> %s1, <16 x i8> %s2) { 200; CHECK-LABEL: shuffle_i8_0123ghij4567klmn: 201; CHECK: @ %bb.0: @ %entry 202; CHECK-NEXT: vmov.f32 s8, s0 203; CHECK-NEXT: vmov.f32 s9, s4 204; CHECK-NEXT: vmov.f32 s10, s1 205; CHECK-NEXT: vmov.f32 s11, s5 206; CHECK-NEXT: vmov q0, q2 207; CHECK-NEXT: bx lr 208entry: 209 %out = shufflevector <16 x i8> %s1, <16 x i8> %s2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23> 210 ret <16 x i8> %out 211} 212 213define arm_aapcs_vfpcc <16 x i8> @shuffle_i8_cdeu89ub4u67u123(<16 x i8> %s1, <16 x i8> %s2) { 214; CHECK-LABEL: shuffle_i8_cdeu89ub4u67u123: 215; CHECK: @ %bb.0: @ %entry 216; CHECK-NEXT: vmov.f32 s4, s3 217; CHECK-NEXT: vmov.f32 s5, s2 218; CHECK-NEXT: vmov.f32 s6, s1 219; CHECK-NEXT: vmov.f32 s7, s0 220; CHECK-NEXT: vmov q0, q1 221; CHECK-NEXT: bx lr 222entry: 223 %out = shufflevector <16 x i8> %s1, <16 x i8> %s2, <16 x i32> <i32 12, i32 13, i32 14, i32 undef, i32 8, i32 9, i32 undef, i32 11, i32 4, i32 undef, i32 6, i32 7, i32 undef, i32 1, i32 2, i32 3> 224 ret <16 x i8> %out 225} 226 227define arm_aapcs_vfpcc <16 x i8> @shuffle_i8_cduu8uubuu67u12u(<16 x i8> %s1, <16 x i8> %s2) { 228; CHECK-LABEL: shuffle_i8_cduu8uubuu67u12u: 229; CHECK: @ %bb.0: @ %entry 230; CHECK-NEXT: vmov.f32 s4, s3 231; CHECK-NEXT: vmov.f32 s5, s2 232; CHECK-NEXT: vmov.f32 s6, s1 233; CHECK-NEXT: vmov.f32 s7, s0 234; CHECK-NEXT: vmov q0, q1 235; CHECK-NEXT: bx lr 236entry: 237 %out = shufflevector <16 x i8> %s1, <16 x i8> %s2, <16 x i32> <i32 12, i32 13, i32 undef, i32 undef, i32 8, i32 undef, i32 undef, i32 11, i32 undef, i32 undef, i32 6, i32 7, i32 undef, i32 1, i32 2, i32 undef> 238 ret <16 x i8> %out 239} 240 241define arm_aapcs_vfpcc <16 x i8> @shuffle_i8_cuuuuuubuu6uuu2u(<16 x i8> %s1, <16 x i8> %s2) { 242; CHECK-LABEL: shuffle_i8_cuuuuuubuu6uuu2u: 243; CHECK: @ %bb.0: @ %entry 244; CHECK-NEXT: vmov.f32 s4, s3 245; CHECK-NEXT: vmov.f32 s5, s2 246; CHECK-NEXT: vmov.f32 s6, s1 247; CHECK-NEXT: vmov.f32 s7, s0 248; CHECK-NEXT: vmov q0, q1 249; CHECK-NEXT: bx lr 250entry: 251 %out = shufflevector <16 x i8> %s1, <16 x i8> %s2, <16 x i32> <i32 12, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 11, i32 undef, i32 undef, i32 6, i32 undef, i32 undef, i32 undef, i32 2, i32 undef> 252 ret <16 x i8> %out 253} 254 255define arm_aapcs_vfpcc <16 x i8> @shuffle_i8_cdef89ab45u700123(<16 x i8> %s1, <16 x i8> %s2) { 256; CHECK-LABEL: shuffle_i8_cdef89ab45u700123: 257; CHECK: @ %bb.0: @ %entry 258; CHECK-NEXT: vmov.u8 r0, q0[4] 259; CHECK-NEXT: vmov.8 q1[8], r0 260; CHECK-NEXT: vmov.u8 r0, q0[5] 261; CHECK-NEXT: vmov.8 q1[9], r0 262; CHECK-NEXT: vmov.u8 r0, q0[0] 263; CHECK-NEXT: vmov.8 q1[11], r0 264; CHECK-NEXT: vmov.f32 s4, s3 265; CHECK-NEXT: vmov.f32 s5, s2 266; CHECK-NEXT: vmov.f32 s7, s0 267; CHECK-NEXT: vmov q0, q1 268; CHECK-NEXT: bx lr 269entry: 270 %out = shufflevector <16 x i8> %s1, <16 x i8> %s2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 undef, i32 0, i32 0, i32 1, i32 2, i32 3> 271 ret <16 x i8> %out 272} 273 274 275 276; f16 277 278define arm_aapcs_vfpcc <8 x half> @shuffle_f16_45670123(<8 x half> %s1, <8 x half> %s2) { 279; CHECK-LABEL: shuffle_f16_45670123: 280; CHECK: @ %bb.0: @ %entry 281; CHECK-NEXT: vmov.f32 s4, s2 282; CHECK-NEXT: vmov.f32 s6, s0 283; CHECK-NEXT: vmov.f32 s5, s3 284; CHECK-NEXT: vmov.f32 s7, s1 285; CHECK-NEXT: vmov q0, q1 286; CHECK-NEXT: bx lr 287entry: 288 %out = shufflevector <8 x half> %s1, <8 x half> %s2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3> 289 ret <8 x half> %out 290} 291 292define arm_aapcs_vfpcc <8 x half> @shuffle_f16_67452301(<8 x half> %s1, <8 x half> %s2) { 293; CHECK-LABEL: shuffle_f16_67452301: 294; CHECK: @ %bb.0: @ %entry 295; CHECK-NEXT: vmov.f32 s4, s3 296; CHECK-NEXT: vmov.f32 s5, s2 297; CHECK-NEXT: vmov.f32 s6, s1 298; CHECK-NEXT: vmov.f32 s7, s0 299; CHECK-NEXT: vmov q0, q1 300; CHECK-NEXT: bx lr 301entry: 302 %out = shufflevector <8 x half> %s1, <8 x half> %s2, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 2, i32 3, i32 0, i32 1> 303 ret <8 x half> %out 304} 305 306define arm_aapcs_vfpcc <8 x half> @shuffle_f16_76543210(<8 x half> %s1, <8 x half> %s2) { 307; CHECK-LABEL: shuffle_f16_76543210: 308; CHECK: @ %bb.0: @ %entry 309; CHECK-NEXT: vrev64.16 q1, q0 310; CHECK-NEXT: vmov.f32 s0, s6 311; CHECK-NEXT: vmov.f32 s1, s7 312; CHECK-NEXT: vmov.f32 s2, s4 313; CHECK-NEXT: vmov.f32 s3, s5 314; CHECK-NEXT: bx lr 315entry: 316 %out = shufflevector <8 x half> %s1, <8 x half> %s2, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> 317 ret <8 x half> %out 318} 319 320define arm_aapcs_vfpcc <8 x half> @shuffle_f16_01234567(<8 x half> %s1, <8 x half> %s2) { 321; CHECK-LABEL: shuffle_f16_01234567: 322; CHECK: @ %bb.0: @ %entry 323; CHECK-NEXT: bx lr 324entry: 325 %out = shufflevector <8 x half> %s1, <8 x half> %s2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 326 ret <8 x half> %out 327} 328 329define arm_aapcs_vfpcc <8 x half> @shuffle_f16_0123cdef(<8 x half> %s1, <8 x half> %s2) { 330; CHECK-LABEL: shuffle_f16_0123cdef: 331; CHECK: @ %bb.0: @ %entry 332; CHECK-NEXT: vmov.f32 s2, s6 333; CHECK-NEXT: vmov.f32 s3, s7 334; CHECK-NEXT: bx lr 335entry: 336 %out = shufflevector <8 x half> %s1, <8 x half> %s2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15> 337 ret <8 x half> %out 338} 339 340define arm_aapcs_vfpcc <8 x half> @shuffle_f16_u7u5u3u1(<8 x half> %s1, <8 x half> %s2) { 341; CHECK-LABEL: shuffle_f16_u7u5u3u1: 342; CHECK: @ %bb.0: @ %entry 343; CHECK-NEXT: vmov.f32 s4, s3 344; CHECK-NEXT: vmov.f32 s5, s2 345; CHECK-NEXT: vmov.f32 s6, s1 346; CHECK-NEXT: vmov.f32 s7, s0 347; CHECK-NEXT: vmov q0, q1 348; CHECK-NEXT: bx lr 349entry: 350 %out = shufflevector <8 x half> %s1, <8 x half> %s2, <8 x i32> <i32 undef, i32 7, i32 undef, i32 5, i32 undef, i32 3, i32 undef, i32 1> 351 ret <8 x half> %out 352} 353 354define arm_aapcs_vfpcc <8 x half> @shuffle_f16_6u4u2u0u(<8 x half> %s1, <8 x half> %s2) { 355; CHECK-LABEL: shuffle_f16_6u4u2u0u: 356; CHECK: @ %bb.0: @ %entry 357; CHECK-NEXT: vmov.f32 s4, s3 358; CHECK-NEXT: vmov.f32 s5, s2 359; CHECK-NEXT: vmov.f32 s6, s1 360; CHECK-NEXT: vmov.f32 s7, s0 361; CHECK-NEXT: vmov q0, q1 362; CHECK-NEXT: bx lr 363entry: 364 %out = shufflevector <8 x half> %s1, <8 x half> %s2, <8 x i32> <i32 6, i32 undef, i32 4, i32 undef, i32 2, i32 undef, i32 0, i32 undef> 365 ret <8 x half> %out 366} 367 368define arm_aapcs_vfpcc <8 x half> @shuffle_f16_0uuuuuuu(<8 x half> %s1, <8 x half> %s2) { 369; CHECK-LABEL: shuffle_f16_0uuuuuuu: 370; CHECK: @ %bb.0: @ %entry 371; CHECK-NEXT: bx lr 372entry: 373 %out = shufflevector <8 x half> %s1, <8 x half> %s2, <8 x i32> <i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 374 ret <8 x half> %out 375} 376 377define arm_aapcs_vfpcc <8 x half> @shuffle_f16_uuuu0uuu(<8 x half> %s1, <8 x half> %s2) { 378; CHECK-LABEL: shuffle_f16_uuuu0uuu: 379; CHECK: @ %bb.0: @ %entry 380; CHECK-NEXT: vmov.u16 r0, q0[0] 381; CHECK-NEXT: vdup.16 q0, r0 382; CHECK-NEXT: bx lr 383entry: 384 %out = shufflevector <8 x half> %s1, <8 x half> %s2, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 undef, i32 undef, i32 undef> 385 ret <8 x half> %out 386} 387