1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-MVE 3; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-MVEFP 4; RUN: llc -mtriple=thumbebv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECKBE 5 6define arm_aapcs_vfpcc <8 x i16> @vmovn32_trunc1(<4 x i32> %src1, <4 x i32> %src2) { 7; CHECK-LABEL: vmovn32_trunc1: 8; CHECK: @ %bb.0: @ %entry 9; CHECK-NEXT: vmovnt.i32 q0, q1 10; CHECK-NEXT: bx lr 11; 12; CHECKBE-LABEL: vmovn32_trunc1: 13; CHECKBE: @ %bb.0: @ %entry 14; CHECKBE-NEXT: vrev64.32 q2, q1 15; CHECKBE-NEXT: vrev64.32 q1, q0 16; CHECKBE-NEXT: vmovnt.i32 q1, q2 17; CHECKBE-NEXT: vrev64.16 q0, q1 18; CHECKBE-NEXT: bx lr 19entry: 20 %strided.vec = shufflevector <4 x i32> %src1, <4 x i32> %src2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> 21 %out = trunc <8 x i32> %strided.vec to <8 x i16> 22 ret <8 x i16> %out 23} 24 25define arm_aapcs_vfpcc <8 x i16> @vmovn32_trunc2(<4 x i32> %src1, <4 x i32> %src2) { 26; CHECK-LABEL: vmovn32_trunc2: 27; CHECK: @ %bb.0: @ %entry 28; CHECK-NEXT: vmovnt.i32 q1, q0 29; CHECK-NEXT: vmov q0, q1 30; CHECK-NEXT: bx lr 31; 32; CHECKBE-LABEL: vmovn32_trunc2: 33; CHECKBE: @ %bb.0: @ %entry 34; CHECKBE-NEXT: vrev64.32 q2, q0 35; CHECKBE-NEXT: vrev64.32 q3, q1 36; CHECKBE-NEXT: vmovnt.i32 q3, q2 37; CHECKBE-NEXT: vrev64.16 q0, q3 38; CHECKBE-NEXT: bx lr 39entry: 40 %strided.vec = shufflevector <4 x i32> %src1, <4 x i32> %src2, <8 x i32> <i32 4, i32 0, i32 5, i32 1, i32 6, i32 2, i32 7, i32 3> 41 %out = trunc <8 x i32> %strided.vec to <8 x i16> 42 ret <8 x i16> %out 43} 44 45define arm_aapcs_vfpcc <8 x i16> @vmovn32_trunc3(<4 x i32> %src1) { 46; CHECK-LABEL: vmovn32_trunc3: 47; CHECK: @ %bb.0: @ %entry 48; CHECK-NEXT: vmovnt.i32 q0, q0 49; CHECK-NEXT: bx lr 50; 51; CHECKBE-LABEL: vmovn32_trunc3: 52; CHECKBE: @ %bb.0: @ %entry 53; CHECKBE-NEXT: vrev64.32 q1, q0 54; CHECKBE-NEXT: vmovnt.i32 q1, q1 55; CHECKBE-NEXT: vrev64.16 q0, q1 56; CHECKBE-NEXT: bx lr 57entry: 58 %strided.vec = shufflevector <4 x i32> %src1, <4 x i32> undef, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3> 59 %out = trunc <8 x i32> %strided.vec to <8 x i16> 60 ret <8 x i16> %out 61} 62 63define arm_aapcs_vfpcc <8 x i16> @vmovn32_trunc1_viabitcast(<4 x i32> %src1, <4 x i32> %src2) { 64; CHECK-MVE-LABEL: vmovn32_trunc1_viabitcast: 65; CHECK-MVE: @ %bb.0: @ %entry 66; CHECK-MVE-NEXT: .pad #16 67; CHECK-MVE-NEXT: sub sp, #16 68; CHECK-MVE-NEXT: vmov.f32 s8, s2 69; CHECK-MVE-NEXT: mov r0, sp 70; CHECK-MVE-NEXT: vmov.f32 s9, s6 71; CHECK-MVE-NEXT: vmov.f32 s10, s3 72; CHECK-MVE-NEXT: vmov.f32 s11, s7 73; CHECK-MVE-NEXT: vstrh.32 q2, [r0, #8] 74; CHECK-MVE-NEXT: vmov.f32 s8, s0 75; CHECK-MVE-NEXT: vmov.f32 s9, s4 76; CHECK-MVE-NEXT: vmov.f32 s10, s1 77; CHECK-MVE-NEXT: vmov.f32 s11, s5 78; CHECK-MVE-NEXT: vstrh.32 q2, [r0] 79; CHECK-MVE-NEXT: vldrw.u32 q0, [r0] 80; CHECK-MVE-NEXT: add sp, #16 81; CHECK-MVE-NEXT: bx lr 82; 83; CHECK-MVEFP-LABEL: vmovn32_trunc1_viabitcast: 84; CHECK-MVEFP: @ %bb.0: @ %entry 85; CHECK-MVEFP-NEXT: .pad #16 86; CHECK-MVEFP-NEXT: sub sp, #16 87; CHECK-MVEFP-NEXT: mov r0, sp 88; CHECK-MVEFP-NEXT: vmov.f32 s8, s2 89; CHECK-MVEFP-NEXT: vmov.f32 s9, s6 90; CHECK-MVEFP-NEXT: vmov.f32 s10, s3 91; CHECK-MVEFP-NEXT: vmov.f32 s11, s7 92; CHECK-MVEFP-NEXT: vstrh.32 q2, [r0, #8] 93; CHECK-MVEFP-NEXT: vmov.f32 s8, s0 94; CHECK-MVEFP-NEXT: vmov.f32 s9, s4 95; CHECK-MVEFP-NEXT: vmov.f32 s10, s1 96; CHECK-MVEFP-NEXT: vmov.f32 s11, s5 97; CHECK-MVEFP-NEXT: vstrh.32 q2, [r0] 98; CHECK-MVEFP-NEXT: vldrw.u32 q0, [r0] 99; CHECK-MVEFP-NEXT: add sp, #16 100; CHECK-MVEFP-NEXT: bx lr 101; 102; CHECKBE-LABEL: vmovn32_trunc1_viabitcast: 103; CHECKBE: @ %bb.0: @ %entry 104; CHECKBE-NEXT: .pad #16 105; CHECKBE-NEXT: sub sp, #16 106; CHECKBE-NEXT: vrev64.32 q2, q1 107; CHECKBE-NEXT: vrev64.32 q1, q0 108; CHECKBE-NEXT: mov r0, sp 109; CHECKBE-NEXT: vmov.f32 s0, s6 110; CHECKBE-NEXT: vmov.f32 s1, s10 111; CHECKBE-NEXT: vmov.f32 s2, s7 112; CHECKBE-NEXT: vmov.f32 s3, s11 113; CHECKBE-NEXT: vstrh.32 q0, [r0, #8] 114; CHECKBE-NEXT: vmov.f32 s0, s4 115; CHECKBE-NEXT: vmov.f32 s1, s8 116; CHECKBE-NEXT: vmov.f32 s2, s5 117; CHECKBE-NEXT: vmov.f32 s3, s9 118; CHECKBE-NEXT: vstrh.32 q0, [r0] 119; CHECKBE-NEXT: vldrb.u8 q1, [r0] 120; CHECKBE-NEXT: vrev64.8 q0, q1 121; CHECKBE-NEXT: add sp, #16 122; CHECKBE-NEXT: bx lr 123entry: 124 %b1 = bitcast <4 x i32> %src1 to <8 x i16> 125 %b2 = bitcast <4 x i32> %src2 to <8 x i16> 126 %s = shufflevector <8 x i16> %b1, <8 x i16> %b2, <16 x i32> <i32 0, i32 1, i32 8, i32 9, i32 2, i32 3, i32 10, i32 11, i32 4, i32 5, i32 12, i32 13, i32 6, i32 7, i32 14, i32 15> 127 %b3 = bitcast <16 x i16> %s to <8 x i32> 128 %out = trunc <8 x i32> %b3 to <8 x i16> 129 ret <8 x i16> %out 130} 131 132 133define arm_aapcs_vfpcc <16 x i8> @vmovn16_trunc1(<8 x i16> %src1, <8 x i16> %src2) { 134; CHECK-LABEL: vmovn16_trunc1: 135; CHECK: @ %bb.0: @ %entry 136; CHECK-NEXT: vmovnt.i16 q0, q1 137; CHECK-NEXT: bx lr 138; 139; CHECKBE-LABEL: vmovn16_trunc1: 140; CHECKBE: @ %bb.0: @ %entry 141; CHECKBE-NEXT: vrev64.16 q2, q1 142; CHECKBE-NEXT: vrev64.16 q1, q0 143; CHECKBE-NEXT: vmovnt.i16 q1, q2 144; CHECKBE-NEXT: vrev64.8 q0, q1 145; CHECKBE-NEXT: bx lr 146entry: 147 %strided.vec = shufflevector <8 x i16> %src1, <8 x i16> %src2, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 148 %out = trunc <16 x i16> %strided.vec to <16 x i8> 149 ret <16 x i8> %out 150} 151 152define arm_aapcs_vfpcc <16 x i8> @vmovn16_trunc2(<8 x i16> %src1, <8 x i16> %src2) { 153; CHECK-LABEL: vmovn16_trunc2: 154; CHECK: @ %bb.0: @ %entry 155; CHECK-NEXT: vmovnt.i16 q1, q0 156; CHECK-NEXT: vmov q0, q1 157; CHECK-NEXT: bx lr 158; 159; CHECKBE-LABEL: vmovn16_trunc2: 160; CHECKBE: @ %bb.0: @ %entry 161; CHECKBE-NEXT: vrev64.16 q2, q0 162; CHECKBE-NEXT: vrev64.16 q3, q1 163; CHECKBE-NEXT: vmovnt.i16 q3, q2 164; CHECKBE-NEXT: vrev64.8 q0, q3 165; CHECKBE-NEXT: bx lr 166entry: 167 %strided.vec = shufflevector <8 x i16> %src1, <8 x i16> %src2, <16 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3, i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7> 168 %out = trunc <16 x i16> %strided.vec to <16 x i8> 169 ret <16 x i8> %out 170} 171 172define arm_aapcs_vfpcc <16 x i8> @vmovn16_trunc3(<8 x i16> %src1) { 173; CHECK-LABEL: vmovn16_trunc3: 174; CHECK: @ %bb.0: @ %entry 175; CHECK-NEXT: vmovnt.i16 q0, q0 176; CHECK-NEXT: bx lr 177; 178; CHECKBE-LABEL: vmovn16_trunc3: 179; CHECKBE: @ %bb.0: @ %entry 180; CHECKBE-NEXT: vrev64.16 q1, q0 181; CHECKBE-NEXT: vmovnt.i16 q1, q1 182; CHECKBE-NEXT: vrev64.8 q0, q1 183; CHECKBE-NEXT: bx lr 184entry: 185 %strided.vec = shufflevector <8 x i16> %src1, <8 x i16> undef, <16 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3, i32 4, i32 4, i32 5, i32 5, i32 6, i32 6, i32 7, i32 7> 186 %out = trunc <16 x i16> %strided.vec to <16 x i8> 187 ret <16 x i8> %out 188} 189 190 191 192define arm_aapcs_vfpcc <2 x i64> @vmovn64_t1(<2 x i64> %src1, <2 x i64> %src2) { 193; CHECK-LABEL: vmovn64_t1: 194; CHECK: @ %bb.0: @ %entry 195; CHECK-NEXT: vmov.f32 s2, s4 196; CHECK-NEXT: vmov.f32 s3, s5 197; CHECK-NEXT: bx lr 198; 199; CHECKBE-LABEL: vmovn64_t1: 200; CHECKBE: @ %bb.0: @ %entry 201; CHECKBE-NEXT: vmov.f32 s2, s4 202; CHECKBE-NEXT: vmov.f32 s3, s5 203; CHECKBE-NEXT: bx lr 204entry: 205 %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 0, i32 2> 206 ret <2 x i64> %out 207} 208 209define arm_aapcs_vfpcc <2 x i64> @vmovn64_t2(<2 x i64> %src1, <2 x i64> %src2) { 210; CHECK-LABEL: vmovn64_t2: 211; CHECK: @ %bb.0: @ %entry 212; CHECK-NEXT: vmov.f32 s6, s0 213; CHECK-NEXT: vmov.f32 s7, s1 214; CHECK-NEXT: vmov q0, q1 215; CHECK-NEXT: bx lr 216; 217; CHECKBE-LABEL: vmovn64_t2: 218; CHECKBE: @ %bb.0: @ %entry 219; CHECKBE-NEXT: vmov.f32 s6, s0 220; CHECKBE-NEXT: vmov.f32 s7, s1 221; CHECKBE-NEXT: vmov q0, q1 222; CHECKBE-NEXT: bx lr 223entry: 224 %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 2, i32 0> 225 ret <2 x i64> %out 226} 227 228define arm_aapcs_vfpcc <2 x i64> @vmovn64_b1(<2 x i64> %src1, <2 x i64> %src2) { 229; CHECK-LABEL: vmovn64_b1: 230; CHECK: @ %bb.0: @ %entry 231; CHECK-NEXT: vmov.f32 s2, s6 232; CHECK-NEXT: vmov.f32 s3, s7 233; CHECK-NEXT: bx lr 234; 235; CHECKBE-LABEL: vmovn64_b1: 236; CHECKBE: @ %bb.0: @ %entry 237; CHECKBE-NEXT: vmov.f32 s2, s6 238; CHECKBE-NEXT: vmov.f32 s3, s7 239; CHECKBE-NEXT: bx lr 240entry: 241 %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 0, i32 3> 242 ret <2 x i64> %out 243} 244 245define arm_aapcs_vfpcc <2 x i64> @vmovn64_b2(<2 x i64> %src1, <2 x i64> %src2) { 246; CHECK-LABEL: vmovn64_b2: 247; CHECK: @ %bb.0: @ %entry 248; CHECK-NEXT: vmov.f32 s4, s6 249; CHECK-NEXT: vmov.f32 s6, s0 250; CHECK-NEXT: vmov.f32 s5, s7 251; CHECK-NEXT: vmov.f32 s7, s1 252; CHECK-NEXT: vmov q0, q1 253; CHECK-NEXT: bx lr 254; 255; CHECKBE-LABEL: vmovn64_b2: 256; CHECKBE: @ %bb.0: @ %entry 257; CHECKBE-NEXT: vmov.f32 s4, s6 258; CHECKBE-NEXT: vmov.f32 s6, s0 259; CHECKBE-NEXT: vmov.f32 s5, s7 260; CHECKBE-NEXT: vmov.f32 s7, s1 261; CHECKBE-NEXT: vmov q0, q1 262; CHECKBE-NEXT: bx lr 263entry: 264 %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 3, i32 0> 265 ret <2 x i64> %out 266} 267 268define arm_aapcs_vfpcc <2 x i64> @vmovn64_b3(<2 x i64> %src1, <2 x i64> %src2) { 269; CHECK-LABEL: vmovn64_b3: 270; CHECK: @ %bb.0: @ %entry 271; CHECK-NEXT: vmov.f32 s0, s2 272; CHECK-NEXT: vmov.f32 s2, s4 273; CHECK-NEXT: vmov.f32 s1, s3 274; CHECK-NEXT: vmov.f32 s3, s5 275; CHECK-NEXT: bx lr 276; 277; CHECKBE-LABEL: vmovn64_b3: 278; CHECKBE: @ %bb.0: @ %entry 279; CHECKBE-NEXT: vmov.f32 s0, s2 280; CHECKBE-NEXT: vmov.f32 s2, s4 281; CHECKBE-NEXT: vmov.f32 s1, s3 282; CHECKBE-NEXT: vmov.f32 s3, s5 283; CHECKBE-NEXT: bx lr 284entry: 285 %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 1, i32 2> 286 ret <2 x i64> %out 287} 288 289define arm_aapcs_vfpcc <2 x i64> @vmovn64_b4(<2 x i64> %src1, <2 x i64> %src2) { 290; CHECK-LABEL: vmovn64_b4: 291; CHECK: @ %bb.0: @ %entry 292; CHECK-NEXT: vmov.f32 s6, s2 293; CHECK-NEXT: vmov.f32 s7, s3 294; CHECK-NEXT: vmov q0, q1 295; CHECK-NEXT: bx lr 296; 297; CHECKBE-LABEL: vmovn64_b4: 298; CHECKBE: @ %bb.0: @ %entry 299; CHECKBE-NEXT: vmov.f32 s6, s2 300; CHECKBE-NEXT: vmov.f32 s7, s3 301; CHECKBE-NEXT: vmov q0, q1 302; CHECKBE-NEXT: bx lr 303entry: 304 %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 2, i32 1> 305 ret <2 x i64> %out 306} 307 308 309 310define arm_aapcs_vfpcc <4 x i32> @vmovn32_t1(<4 x i32> %src1, <4 x i32> %src2) { 311; CHECK-LABEL: vmovn32_t1: 312; CHECK: @ %bb.0: @ %entry 313; CHECK-NEXT: vmov.f32 s1, s4 314; CHECK-NEXT: vmov.f32 s3, s6 315; CHECK-NEXT: bx lr 316; 317; CHECKBE-LABEL: vmovn32_t1: 318; CHECKBE: @ %bb.0: @ %entry 319; CHECKBE-NEXT: vrev64.32 q2, q1 320; CHECKBE-NEXT: vrev64.32 q1, q0 321; CHECKBE-NEXT: vmov.f32 s5, s8 322; CHECKBE-NEXT: vmov.f32 s7, s10 323; CHECKBE-NEXT: vrev64.32 q0, q1 324; CHECKBE-NEXT: bx lr 325entry: 326 %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 327 ret <4 x i32> %out 328} 329 330define arm_aapcs_vfpcc <4 x i32> @vmovn32_t2(<4 x i32> %src1, <4 x i32> %src2) { 331; CHECK-LABEL: vmovn32_t2: 332; CHECK: @ %bb.0: @ %entry 333; CHECK-NEXT: vmov.f32 s5, s0 334; CHECK-NEXT: vmov.f32 s7, s2 335; CHECK-NEXT: vmov q0, q1 336; CHECK-NEXT: bx lr 337; 338; CHECKBE-LABEL: vmovn32_t2: 339; CHECKBE: @ %bb.0: @ %entry 340; CHECKBE-NEXT: vrev64.32 q2, q0 341; CHECKBE-NEXT: vrev64.32 q3, q1 342; CHECKBE-NEXT: vmov.f32 s13, s8 343; CHECKBE-NEXT: vmov.f32 s15, s10 344; CHECKBE-NEXT: vrev64.32 q0, q3 345; CHECKBE-NEXT: bx lr 346entry: 347 %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 4, i32 0, i32 6, i32 2> 348 ret <4 x i32> %out 349} 350 351define arm_aapcs_vfpcc <4 x i32> @vmovn32_b1(<4 x i32> %src1, <4 x i32> %src2) { 352; CHECK-LABEL: vmovn32_b1: 353; CHECK: @ %bb.0: @ %entry 354; CHECK-NEXT: vmov.f32 s1, s5 355; CHECK-NEXT: vmov.f32 s3, s7 356; CHECK-NEXT: bx lr 357; 358; CHECKBE-LABEL: vmovn32_b1: 359; CHECKBE: @ %bb.0: @ %entry 360; CHECKBE-NEXT: vrev64.32 q2, q1 361; CHECKBE-NEXT: vrev64.32 q1, q0 362; CHECKBE-NEXT: vmov.f32 s5, s9 363; CHECKBE-NEXT: vmov.f32 s7, s11 364; CHECKBE-NEXT: vrev64.32 q0, q1 365; CHECKBE-NEXT: bx lr 366entry: 367 %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 368 ret <4 x i32> %out 369} 370 371define arm_aapcs_vfpcc <4 x i32> @vmovn32_b2(<4 x i32> %src1, <4 x i32> %src2) { 372; CHECK-LABEL: vmovn32_b2: 373; CHECK: @ %bb.0: @ %entry 374; CHECK-NEXT: vmov.f32 s4, s5 375; CHECK-NEXT: vmov.f32 s6, s7 376; CHECK-NEXT: vmov.f32 s5, s0 377; CHECK-NEXT: vmov.f32 s7, s2 378; CHECK-NEXT: vmov q0, q1 379; CHECK-NEXT: bx lr 380; 381; CHECKBE-LABEL: vmovn32_b2: 382; CHECKBE: @ %bb.0: @ %entry 383; CHECKBE-NEXT: vrev64.32 q2, q0 384; CHECKBE-NEXT: vrev64.32 q0, q1 385; CHECKBE-NEXT: vmov.f32 s4, s1 386; CHECKBE-NEXT: vmov.f32 s5, s8 387; CHECKBE-NEXT: vmov.f32 s6, s3 388; CHECKBE-NEXT: vmov.f32 s7, s10 389; CHECKBE-NEXT: vrev64.32 q0, q1 390; CHECKBE-NEXT: bx lr 391entry: 392 %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 5, i32 0, i32 7, i32 2> 393 ret <4 x i32> %out 394} 395 396define arm_aapcs_vfpcc <4 x i32> @vmovn32_b3(<4 x i32> %src1, <4 x i32> %src2) { 397; CHECK-LABEL: vmovn32_b3: 398; CHECK: @ %bb.0: @ %entry 399; CHECK-NEXT: vmov.f32 s0, s1 400; CHECK-NEXT: vmov.f32 s2, s3 401; CHECK-NEXT: vmov.f32 s1, s4 402; CHECK-NEXT: vmov.f32 s3, s6 403; CHECK-NEXT: bx lr 404; 405; CHECKBE-LABEL: vmovn32_b3: 406; CHECKBE: @ %bb.0: @ %entry 407; CHECKBE-NEXT: vrev64.32 q2, q1 408; CHECKBE-NEXT: vrev64.32 q1, q0 409; CHECKBE-NEXT: vmov.f32 s4, s5 410; CHECKBE-NEXT: vmov.f32 s6, s7 411; CHECKBE-NEXT: vmov.f32 s5, s8 412; CHECKBE-NEXT: vmov.f32 s7, s10 413; CHECKBE-NEXT: vrev64.32 q0, q1 414; CHECKBE-NEXT: bx lr 415entry: 416 %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 1, i32 4, i32 3, i32 6> 417 ret <4 x i32> %out 418} 419 420define arm_aapcs_vfpcc <4 x i32> @vmovn32_b4(<4 x i32> %src1, <4 x i32> %src2) { 421; CHECK-LABEL: vmovn32_b4: 422; CHECK: @ %bb.0: @ %entry 423; CHECK-NEXT: vmov.f32 s5, s1 424; CHECK-NEXT: vmov.f32 s7, s3 425; CHECK-NEXT: vmov q0, q1 426; CHECK-NEXT: bx lr 427; 428; CHECKBE-LABEL: vmovn32_b4: 429; CHECKBE: @ %bb.0: @ %entry 430; CHECKBE-NEXT: vrev64.32 q2, q0 431; CHECKBE-NEXT: vrev64.32 q3, q1 432; CHECKBE-NEXT: vmov.f32 s13, s9 433; CHECKBE-NEXT: vmov.f32 s15, s11 434; CHECKBE-NEXT: vrev64.32 q0, q3 435; CHECKBE-NEXT: bx lr 436entry: 437 %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 4, i32 1, i32 6, i32 3> 438 ret <4 x i32> %out 439} 440 441define arm_aapcs_vfpcc <4 x i32> @vmovn32_single_t(<4 x i32> %src1) { 442; CHECK-LABEL: vmovn32_single_t: 443; CHECK: @ %bb.0: @ %entry 444; CHECK-NEXT: vmov.f32 s1, s0 445; CHECK-NEXT: vmov.f32 s3, s2 446; CHECK-NEXT: bx lr 447; 448; CHECKBE-LABEL: vmovn32_single_t: 449; CHECKBE: @ %bb.0: @ %entry 450; CHECKBE-NEXT: vrev64.32 q1, q0 451; CHECKBE-NEXT: vmov.f32 s5, s4 452; CHECKBE-NEXT: vmov.f32 s7, s6 453; CHECKBE-NEXT: vrev64.32 q0, q1 454; CHECKBE-NEXT: bx lr 455entry: 456 %out = shufflevector <4 x i32> %src1, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 457 ret <4 x i32> %out 458} 459 460 461 462 463define arm_aapcs_vfpcc <8 x i16> @vmovn16_t1(<8 x i16> %src1, <8 x i16> %src2) { 464; CHECK-LABEL: vmovn16_t1: 465; CHECK: @ %bb.0: @ %entry 466; CHECK-NEXT: vmovnt.i32 q0, q1 467; CHECK-NEXT: bx lr 468; 469; CHECKBE-LABEL: vmovn16_t1: 470; CHECKBE: @ %bb.0: @ %entry 471; CHECKBE-NEXT: vrev64.16 q2, q1 472; CHECKBE-NEXT: vrev64.16 q1, q0 473; CHECKBE-NEXT: vmovnt.i32 q1, q2 474; CHECKBE-NEXT: vrev64.16 q0, q1 475; CHECKBE-NEXT: bx lr 476entry: 477 %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 478 ret <8 x i16> %out 479} 480 481define arm_aapcs_vfpcc <8 x i16> @vmovn16_t2(<8 x i16> %src1, <8 x i16> %src2) { 482; CHECK-LABEL: vmovn16_t2: 483; CHECK: @ %bb.0: @ %entry 484; CHECK-NEXT: vmovnt.i32 q1, q0 485; CHECK-NEXT: vmov q0, q1 486; CHECK-NEXT: bx lr 487; 488; CHECKBE-LABEL: vmovn16_t2: 489; CHECKBE: @ %bb.0: @ %entry 490; CHECKBE-NEXT: vrev64.16 q2, q0 491; CHECKBE-NEXT: vrev64.16 q3, q1 492; CHECKBE-NEXT: vmovnt.i32 q3, q2 493; CHECKBE-NEXT: vrev64.16 q0, q3 494; CHECKBE-NEXT: bx lr 495entry: 496 %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 8, i32 0, i32 10, i32 2, i32 12, i32 4, i32 14, i32 6> 497 ret <8 x i16> %out 498} 499 500define arm_aapcs_vfpcc <8 x i16> @vmovn16_b1(<8 x i16> %src1, <8 x i16> %src2) { 501; CHECK-LABEL: vmovn16_b1: 502; CHECK: @ %bb.0: @ %entry 503; CHECK-NEXT: vmovnb.i32 q1, q0 504; CHECK-NEXT: vmov q0, q1 505; CHECK-NEXT: bx lr 506; 507; CHECKBE-LABEL: vmovn16_b1: 508; CHECKBE: @ %bb.0: @ %entry 509; CHECKBE-NEXT: vrev64.16 q2, q0 510; CHECKBE-NEXT: vrev64.16 q3, q1 511; CHECKBE-NEXT: vmovnb.i32 q3, q2 512; CHECKBE-NEXT: vrev64.16 q0, q3 513; CHECKBE-NEXT: bx lr 514entry: 515 %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 516 ret <8 x i16> %out 517} 518 519define arm_aapcs_vfpcc <8 x i16> @vmovn16_b2(<8 x i16> %src1, <8 x i16> %src2) { 520; CHECK-MVE-LABEL: vmovn16_b2: 521; CHECK-MVE: @ %bb.0: @ %entry 522; CHECK-MVE-NEXT: vmov q2, q0 523; CHECK-MVE-NEXT: vmov.u16 r0, q1[1] 524; CHECK-MVE-NEXT: vmov.16 q0[0], r0 525; CHECK-MVE-NEXT: vmov.u16 r0, q2[0] 526; CHECK-MVE-NEXT: vmov.16 q0[1], r0 527; CHECK-MVE-NEXT: vmov.u16 r0, q1[3] 528; CHECK-MVE-NEXT: vmov.16 q0[2], r0 529; CHECK-MVE-NEXT: vmov.u16 r0, q2[2] 530; CHECK-MVE-NEXT: vmov.16 q0[3], r0 531; CHECK-MVE-NEXT: vmov.u16 r0, q1[5] 532; CHECK-MVE-NEXT: vmov.16 q0[4], r0 533; CHECK-MVE-NEXT: vmov.u16 r0, q2[4] 534; CHECK-MVE-NEXT: vmov.16 q0[5], r0 535; CHECK-MVE-NEXT: vmov.u16 r0, q1[7] 536; CHECK-MVE-NEXT: vmov.16 q0[6], r0 537; CHECK-MVE-NEXT: vmov.u16 r0, q2[6] 538; CHECK-MVE-NEXT: vmov.16 q0[7], r0 539; CHECK-MVE-NEXT: bx lr 540; 541; CHECK-MVEFP-LABEL: vmovn16_b2: 542; CHECK-MVEFP: @ %bb.0: @ %entry 543; CHECK-MVEFP-NEXT: vmovx.f16 s4, s4 544; CHECK-MVEFP-NEXT: vmovx.f16 s5, s5 545; CHECK-MVEFP-NEXT: vmovx.f16 s6, s6 546; CHECK-MVEFP-NEXT: vmovx.f16 s7, s7 547; CHECK-MVEFP-NEXT: vins.f16 s4, s0 548; CHECK-MVEFP-NEXT: vins.f16 s5, s1 549; CHECK-MVEFP-NEXT: vins.f16 s6, s2 550; CHECK-MVEFP-NEXT: vins.f16 s7, s3 551; CHECK-MVEFP-NEXT: vmov q0, q1 552; CHECK-MVEFP-NEXT: bx lr 553; 554; CHECKBE-LABEL: vmovn16_b2: 555; CHECKBE: @ %bb.0: @ %entry 556; CHECKBE-NEXT: vrev64.16 q2, q0 557; CHECKBE-NEXT: vrev64.16 q0, q1 558; CHECKBE-NEXT: vmovx.f16 s4, s0 559; CHECKBE-NEXT: vmovx.f16 s5, s1 560; CHECKBE-NEXT: vmovx.f16 s6, s2 561; CHECKBE-NEXT: vmovx.f16 s7, s3 562; CHECKBE-NEXT: vins.f16 s4, s8 563; CHECKBE-NEXT: vins.f16 s5, s9 564; CHECKBE-NEXT: vins.f16 s6, s10 565; CHECKBE-NEXT: vins.f16 s7, s11 566; CHECKBE-NEXT: vrev64.16 q0, q1 567; CHECKBE-NEXT: bx lr 568entry: 569 %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 9, i32 0, i32 11, i32 2, i32 13, i32 4, i32 15, i32 6> 570 ret <8 x i16> %out 571} 572 573define arm_aapcs_vfpcc <8 x i16> @vmovn16_b3(<8 x i16> %src1, <8 x i16> %src2) { 574; CHECK-MVE-LABEL: vmovn16_b3: 575; CHECK-MVE: @ %bb.0: @ %entry 576; CHECK-MVE-NEXT: vmov.u16 r0, q0[1] 577; CHECK-MVE-NEXT: vmov q2, q0 578; CHECK-MVE-NEXT: vmov.16 q0[0], r0 579; CHECK-MVE-NEXT: vmov.u16 r0, q1[0] 580; CHECK-MVE-NEXT: vmov.16 q0[1], r0 581; CHECK-MVE-NEXT: vmov.u16 r0, q2[3] 582; CHECK-MVE-NEXT: vmov.16 q0[2], r0 583; CHECK-MVE-NEXT: vmov.u16 r0, q1[2] 584; CHECK-MVE-NEXT: vmov.16 q0[3], r0 585; CHECK-MVE-NEXT: vmov.u16 r0, q2[5] 586; CHECK-MVE-NEXT: vmov.16 q0[4], r0 587; CHECK-MVE-NEXT: vmov.u16 r0, q1[4] 588; CHECK-MVE-NEXT: vmov.16 q0[5], r0 589; CHECK-MVE-NEXT: vmov.u16 r0, q2[7] 590; CHECK-MVE-NEXT: vmov.16 q0[6], r0 591; CHECK-MVE-NEXT: vmov.u16 r0, q1[6] 592; CHECK-MVE-NEXT: vmov.16 q0[7], r0 593; CHECK-MVE-NEXT: bx lr 594; 595; CHECK-MVEFP-LABEL: vmovn16_b3: 596; CHECK-MVEFP: @ %bb.0: @ %entry 597; CHECK-MVEFP-NEXT: vmovx.f16 s0, s0 598; CHECK-MVEFP-NEXT: vmovx.f16 s1, s1 599; CHECK-MVEFP-NEXT: vmovx.f16 s2, s2 600; CHECK-MVEFP-NEXT: vmovx.f16 s3, s3 601; CHECK-MVEFP-NEXT: vins.f16 s0, s4 602; CHECK-MVEFP-NEXT: vins.f16 s1, s5 603; CHECK-MVEFP-NEXT: vins.f16 s2, s6 604; CHECK-MVEFP-NEXT: vins.f16 s3, s7 605; CHECK-MVEFP-NEXT: bx lr 606; 607; CHECKBE-LABEL: vmovn16_b3: 608; CHECKBE: @ %bb.0: @ %entry 609; CHECKBE-NEXT: vrev64.16 q2, q1 610; CHECKBE-NEXT: vrev64.16 q1, q0 611; CHECKBE-NEXT: vmovx.f16 s4, s4 612; CHECKBE-NEXT: vmovx.f16 s5, s5 613; CHECKBE-NEXT: vmovx.f16 s6, s6 614; CHECKBE-NEXT: vmovx.f16 s7, s7 615; CHECKBE-NEXT: vins.f16 s4, s8 616; CHECKBE-NEXT: vins.f16 s5, s9 617; CHECKBE-NEXT: vins.f16 s6, s10 618; CHECKBE-NEXT: vins.f16 s7, s11 619; CHECKBE-NEXT: vrev64.16 q0, q1 620; CHECKBE-NEXT: bx lr 621entry: 622 %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 1, i32 8, i32 3, i32 10, i32 5, i32 12, i32 7, i32 14> 623 ret <8 x i16> %out 624} 625 626define arm_aapcs_vfpcc <8 x i16> @vmovn16_b4(<8 x i16> %src1, <8 x i16> %src2) { 627; CHECK-LABEL: vmovn16_b4: 628; CHECK: @ %bb.0: @ %entry 629; CHECK-NEXT: vmovnb.i32 q0, q1 630; CHECK-NEXT: bx lr 631; 632; CHECKBE-LABEL: vmovn16_b4: 633; CHECKBE: @ %bb.0: @ %entry 634; CHECKBE-NEXT: vrev64.16 q2, q1 635; CHECKBE-NEXT: vrev64.16 q1, q0 636; CHECKBE-NEXT: vmovnb.i32 q1, q2 637; CHECKBE-NEXT: vrev64.16 q0, q1 638; CHECKBE-NEXT: bx lr 639entry: 640 %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7> 641 ret <8 x i16> %out 642} 643 644define arm_aapcs_vfpcc <8 x i16> @vmovn16_single_t(<8 x i16> %src1) { 645; CHECK-LABEL: vmovn16_single_t: 646; CHECK: @ %bb.0: @ %entry 647; CHECK-NEXT: vmovnt.i32 q0, q0 648; CHECK-NEXT: bx lr 649; 650; CHECKBE-LABEL: vmovn16_single_t: 651; CHECKBE: @ %bb.0: @ %entry 652; CHECKBE-NEXT: vrev64.16 q1, q0 653; CHECKBE-NEXT: vmovnt.i32 q1, q1 654; CHECKBE-NEXT: vrev64.16 q0, q1 655; CHECKBE-NEXT: bx lr 656entry: 657 %out = shufflevector <8 x i16> %src1, <8 x i16> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6> 658 ret <8 x i16> %out 659} 660 661 662define arm_aapcs_vfpcc <16 x i8> @vmovn8_b1(<16 x i8> %src1, <16 x i8> %src2) { 663; CHECK-LABEL: vmovn8_b1: 664; CHECK: @ %bb.0: @ %entry 665; CHECK-NEXT: vmovnt.i16 q0, q1 666; CHECK-NEXT: bx lr 667; 668; CHECKBE-LABEL: vmovn8_b1: 669; CHECKBE: @ %bb.0: @ %entry 670; CHECKBE-NEXT: vrev64.8 q2, q1 671; CHECKBE-NEXT: vrev64.8 q1, q0 672; CHECKBE-NEXT: vmovnt.i16 q1, q2 673; CHECKBE-NEXT: vrev64.8 q0, q1 674; CHECKBE-NEXT: bx lr 675entry: 676 %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30> 677 ret <16 x i8> %out 678} 679 680define arm_aapcs_vfpcc <16 x i8> @vmovn8_b2(<16 x i8> %src1, <16 x i8> %src2) { 681; CHECK-LABEL: vmovn8_b2: 682; CHECK: @ %bb.0: @ %entry 683; CHECK-NEXT: vmovnt.i16 q1, q0 684; CHECK-NEXT: vmov q0, q1 685; CHECK-NEXT: bx lr 686; 687; CHECKBE-LABEL: vmovn8_b2: 688; CHECKBE: @ %bb.0: @ %entry 689; CHECKBE-NEXT: vrev64.8 q2, q0 690; CHECKBE-NEXT: vrev64.8 q3, q1 691; CHECKBE-NEXT: vmovnt.i16 q3, q2 692; CHECKBE-NEXT: vrev64.8 q0, q3 693; CHECKBE-NEXT: bx lr 694entry: 695 %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 16, i32 0, i32 18, i32 2, i32 20, i32 4, i32 22, i32 6, i32 24, i32 8, i32 26, i32 10, i32 28, i32 12, i32 30, i32 14> 696 ret <16 x i8> %out 697} 698 699define arm_aapcs_vfpcc <16 x i8> @vmovn8_t1(<16 x i8> %src1, <16 x i8> %src2) { 700; CHECK-LABEL: vmovn8_t1: 701; CHECK: @ %bb.0: @ %entry 702; CHECK-NEXT: vmovnb.i16 q1, q0 703; CHECK-NEXT: vmov q0, q1 704; CHECK-NEXT: bx lr 705; 706; CHECKBE-LABEL: vmovn8_t1: 707; CHECKBE: @ %bb.0: @ %entry 708; CHECKBE-NEXT: vrev64.8 q2, q0 709; CHECKBE-NEXT: vrev64.8 q3, q1 710; CHECKBE-NEXT: vmovnb.i16 q3, q2 711; CHECKBE-NEXT: vrev64.8 q0, q3 712; CHECKBE-NEXT: bx lr 713entry: 714 %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31> 715 ret <16 x i8> %out 716} 717 718define arm_aapcs_vfpcc <16 x i8> @vmovn8_t2(<16 x i8> %src1, <16 x i8> %src2) { 719; CHECK-LABEL: vmovn8_t2: 720; CHECK: @ %bb.0: @ %entry 721; CHECK-NEXT: vmov q2, q0 722; CHECK-NEXT: vmov.u8 r0, q1[1] 723; CHECK-NEXT: vmov.8 q0[0], r0 724; CHECK-NEXT: vmov.u8 r0, q2[0] 725; CHECK-NEXT: vmov.8 q0[1], r0 726; CHECK-NEXT: vmov.u8 r0, q1[3] 727; CHECK-NEXT: vmov.8 q0[2], r0 728; CHECK-NEXT: vmov.u8 r0, q2[2] 729; CHECK-NEXT: vmov.8 q0[3], r0 730; CHECK-NEXT: vmov.u8 r0, q1[5] 731; CHECK-NEXT: vmov.8 q0[4], r0 732; CHECK-NEXT: vmov.u8 r0, q2[4] 733; CHECK-NEXT: vmov.8 q0[5], r0 734; CHECK-NEXT: vmov.u8 r0, q1[7] 735; CHECK-NEXT: vmov.8 q0[6], r0 736; CHECK-NEXT: vmov.u8 r0, q2[6] 737; CHECK-NEXT: vmov.8 q0[7], r0 738; CHECK-NEXT: vmov.u8 r0, q1[9] 739; CHECK-NEXT: vmov.8 q0[8], r0 740; CHECK-NEXT: vmov.u8 r0, q2[8] 741; CHECK-NEXT: vmov.8 q0[9], r0 742; CHECK-NEXT: vmov.u8 r0, q1[11] 743; CHECK-NEXT: vmov.8 q0[10], r0 744; CHECK-NEXT: vmov.u8 r0, q2[10] 745; CHECK-NEXT: vmov.8 q0[11], r0 746; CHECK-NEXT: vmov.u8 r0, q1[13] 747; CHECK-NEXT: vmov.8 q0[12], r0 748; CHECK-NEXT: vmov.u8 r0, q2[12] 749; CHECK-NEXT: vmov.8 q0[13], r0 750; CHECK-NEXT: vmov.u8 r0, q1[15] 751; CHECK-NEXT: vmov.8 q0[14], r0 752; CHECK-NEXT: vmov.u8 r0, q2[14] 753; CHECK-NEXT: vmov.8 q0[15], r0 754; CHECK-NEXT: bx lr 755; 756; CHECKBE-LABEL: vmovn8_t2: 757; CHECKBE: @ %bb.0: @ %entry 758; CHECKBE-NEXT: vrev64.8 q2, q1 759; CHECKBE-NEXT: vrev64.8 q3, q0 760; CHECKBE-NEXT: vmov.u8 r0, q2[1] 761; CHECKBE-NEXT: vmov.8 q1[0], r0 762; CHECKBE-NEXT: vmov.u8 r0, q3[0] 763; CHECKBE-NEXT: vmov.8 q1[1], r0 764; CHECKBE-NEXT: vmov.u8 r0, q2[3] 765; CHECKBE-NEXT: vmov.8 q1[2], r0 766; CHECKBE-NEXT: vmov.u8 r0, q3[2] 767; CHECKBE-NEXT: vmov.8 q1[3], r0 768; CHECKBE-NEXT: vmov.u8 r0, q2[5] 769; CHECKBE-NEXT: vmov.8 q1[4], r0 770; CHECKBE-NEXT: vmov.u8 r0, q3[4] 771; CHECKBE-NEXT: vmov.8 q1[5], r0 772; CHECKBE-NEXT: vmov.u8 r0, q2[7] 773; CHECKBE-NEXT: vmov.8 q1[6], r0 774; CHECKBE-NEXT: vmov.u8 r0, q3[6] 775; CHECKBE-NEXT: vmov.8 q1[7], r0 776; CHECKBE-NEXT: vmov.u8 r0, q2[9] 777; CHECKBE-NEXT: vmov.8 q1[8], r0 778; CHECKBE-NEXT: vmov.u8 r0, q3[8] 779; CHECKBE-NEXT: vmov.8 q1[9], r0 780; CHECKBE-NEXT: vmov.u8 r0, q2[11] 781; CHECKBE-NEXT: vmov.8 q1[10], r0 782; CHECKBE-NEXT: vmov.u8 r0, q3[10] 783; CHECKBE-NEXT: vmov.8 q1[11], r0 784; CHECKBE-NEXT: vmov.u8 r0, q2[13] 785; CHECKBE-NEXT: vmov.8 q1[12], r0 786; CHECKBE-NEXT: vmov.u8 r0, q3[12] 787; CHECKBE-NEXT: vmov.8 q1[13], r0 788; CHECKBE-NEXT: vmov.u8 r0, q2[15] 789; CHECKBE-NEXT: vmov.8 q1[14], r0 790; CHECKBE-NEXT: vmov.u8 r0, q3[14] 791; CHECKBE-NEXT: vmov.8 q1[15], r0 792; CHECKBE-NEXT: vrev64.8 q0, q1 793; CHECKBE-NEXT: bx lr 794entry: 795 %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 17, i32 0, i32 19, i32 2, i32 21, i32 4, i32 23, i32 6, i32 25, i32 8, i32 27, i32 10, i32 29, i32 12, i32 31, i32 14> 796 ret <16 x i8> %out 797} 798 799define arm_aapcs_vfpcc <16 x i8> @vmovn8_t3(<16 x i8> %src1, <16 x i8> %src2) { 800; CHECK-LABEL: vmovn8_t3: 801; CHECK: @ %bb.0: @ %entry 802; CHECK-NEXT: vmov.u8 r0, q0[1] 803; CHECK-NEXT: vmov q2, q0 804; CHECK-NEXT: vmov.8 q0[0], r0 805; CHECK-NEXT: vmov.u8 r0, q1[0] 806; CHECK-NEXT: vmov.8 q0[1], r0 807; CHECK-NEXT: vmov.u8 r0, q2[3] 808; CHECK-NEXT: vmov.8 q0[2], r0 809; CHECK-NEXT: vmov.u8 r0, q1[2] 810; CHECK-NEXT: vmov.8 q0[3], r0 811; CHECK-NEXT: vmov.u8 r0, q2[5] 812; CHECK-NEXT: vmov.8 q0[4], r0 813; CHECK-NEXT: vmov.u8 r0, q1[4] 814; CHECK-NEXT: vmov.8 q0[5], r0 815; CHECK-NEXT: vmov.u8 r0, q2[7] 816; CHECK-NEXT: vmov.8 q0[6], r0 817; CHECK-NEXT: vmov.u8 r0, q1[6] 818; CHECK-NEXT: vmov.8 q0[7], r0 819; CHECK-NEXT: vmov.u8 r0, q2[9] 820; CHECK-NEXT: vmov.8 q0[8], r0 821; CHECK-NEXT: vmov.u8 r0, q1[8] 822; CHECK-NEXT: vmov.8 q0[9], r0 823; CHECK-NEXT: vmov.u8 r0, q2[11] 824; CHECK-NEXT: vmov.8 q0[10], r0 825; CHECK-NEXT: vmov.u8 r0, q1[10] 826; CHECK-NEXT: vmov.8 q0[11], r0 827; CHECK-NEXT: vmov.u8 r0, q2[13] 828; CHECK-NEXT: vmov.8 q0[12], r0 829; CHECK-NEXT: vmov.u8 r0, q1[12] 830; CHECK-NEXT: vmov.8 q0[13], r0 831; CHECK-NEXT: vmov.u8 r0, q2[15] 832; CHECK-NEXT: vmov.8 q0[14], r0 833; CHECK-NEXT: vmov.u8 r0, q1[14] 834; CHECK-NEXT: vmov.8 q0[15], r0 835; CHECK-NEXT: bx lr 836; 837; CHECKBE-LABEL: vmovn8_t3: 838; CHECKBE: @ %bb.0: @ %entry 839; CHECKBE-NEXT: vrev64.8 q3, q0 840; CHECKBE-NEXT: vrev64.8 q0, q1 841; CHECKBE-NEXT: vmov.u8 r0, q3[1] 842; CHECKBE-NEXT: vmov.8 q2[0], r0 843; CHECKBE-NEXT: vmov.u8 r0, q0[0] 844; CHECKBE-NEXT: vmov.8 q2[1], r0 845; CHECKBE-NEXT: vmov.u8 r0, q3[3] 846; CHECKBE-NEXT: vmov.8 q2[2], r0 847; CHECKBE-NEXT: vmov.u8 r0, q0[2] 848; CHECKBE-NEXT: vmov.8 q2[3], r0 849; CHECKBE-NEXT: vmov.u8 r0, q3[5] 850; CHECKBE-NEXT: vmov.8 q2[4], r0 851; CHECKBE-NEXT: vmov.u8 r0, q0[4] 852; CHECKBE-NEXT: vmov.8 q2[5], r0 853; CHECKBE-NEXT: vmov.u8 r0, q3[7] 854; CHECKBE-NEXT: vmov.8 q2[6], r0 855; CHECKBE-NEXT: vmov.u8 r0, q0[6] 856; CHECKBE-NEXT: vmov.8 q2[7], r0 857; CHECKBE-NEXT: vmov.u8 r0, q3[9] 858; CHECKBE-NEXT: vmov.8 q2[8], r0 859; CHECKBE-NEXT: vmov.u8 r0, q0[8] 860; CHECKBE-NEXT: vmov.8 q2[9], r0 861; CHECKBE-NEXT: vmov.u8 r0, q3[11] 862; CHECKBE-NEXT: vmov.8 q2[10], r0 863; CHECKBE-NEXT: vmov.u8 r0, q0[10] 864; CHECKBE-NEXT: vmov.8 q2[11], r0 865; CHECKBE-NEXT: vmov.u8 r0, q3[13] 866; CHECKBE-NEXT: vmov.8 q2[12], r0 867; CHECKBE-NEXT: vmov.u8 r0, q0[12] 868; CHECKBE-NEXT: vmov.8 q2[13], r0 869; CHECKBE-NEXT: vmov.u8 r0, q3[15] 870; CHECKBE-NEXT: vmov.8 q2[14], r0 871; CHECKBE-NEXT: vmov.u8 r0, q0[14] 872; CHECKBE-NEXT: vmov.8 q2[15], r0 873; CHECKBE-NEXT: vrev64.8 q0, q2 874; CHECKBE-NEXT: bx lr 875entry: 876 %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 1, i32 16, i32 3, i32 18, i32 5, i32 20, i32 7, i32 22, i32 9, i32 24, i32 11, i32 26, i32 13, i32 28, i32 15, i32 30> 877 ret <16 x i8> %out 878} 879 880define arm_aapcs_vfpcc <16 x i8> @vmovn8_t4(<16 x i8> %src1, <16 x i8> %src2) { 881; CHECK-LABEL: vmovn8_t4: 882; CHECK: @ %bb.0: @ %entry 883; CHECK-NEXT: vmovnb.i16 q0, q1 884; CHECK-NEXT: bx lr 885; 886; CHECKBE-LABEL: vmovn8_t4: 887; CHECKBE: @ %bb.0: @ %entry 888; CHECKBE-NEXT: vrev64.8 q2, q1 889; CHECKBE-NEXT: vrev64.8 q1, q0 890; CHECKBE-NEXT: vmovnb.i16 q1, q2 891; CHECKBE-NEXT: vrev64.8 q0, q1 892; CHECKBE-NEXT: bx lr 893entry: 894 %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15> 895 ret <16 x i8> %out 896} 897 898define arm_aapcs_vfpcc <16 x i8> @vmovn8_single_t(<16 x i8> %src1) { 899; CHECK-LABEL: vmovn8_single_t: 900; CHECK: @ %bb.0: @ %entry 901; CHECK-NEXT: vmovnt.i16 q0, q0 902; CHECK-NEXT: bx lr 903; 904; CHECKBE-LABEL: vmovn8_single_t: 905; CHECKBE: @ %bb.0: @ %entry 906; CHECKBE-NEXT: vrev64.8 q1, q0 907; CHECKBE-NEXT: vmovnt.i16 q1, q1 908; CHECKBE-NEXT: vrev64.8 q0, q1 909; CHECKBE-NEXT: bx lr 910entry: 911 %out = shufflevector <16 x i8> %src1, <16 x i8> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14> 912 ret <16 x i8> %out 913} 914 915 916define arm_aapcs_vfpcc <8 x i16> @vmovn32trunct_undef2(<8 x i16> %a) { 917; CHECK-LABEL: vmovn32trunct_undef2: 918; CHECK: @ %bb.0: @ %entry 919; CHECK-NEXT: bx lr 920; 921; CHECKBE-LABEL: vmovn32trunct_undef2: 922; CHECKBE: @ %bb.0: @ %entry 923; CHECKBE-NEXT: bx lr 924entry: 925 %c1 = call <4 x i32> @llvm.arm.mve.vreinterpretq.v4i32.v8i16(<8 x i16> %a) 926 %c2 = call <4 x i32> @llvm.arm.mve.vreinterpretq.v4i32.v8i16(<8 x i16> undef) 927 %strided.vec = shufflevector <4 x i32> %c1, <4 x i32> %c2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> 928 %out = trunc <8 x i32> %strided.vec to <8 x i16> 929 ret <8 x i16> %out 930} 931 932define arm_aapcs_vfpcc <8 x i16> @vmovn32trunct_undef1(<8 x i16> %a) { 933; CHECK-LABEL: vmovn32trunct_undef1: 934; CHECK: @ %bb.0: @ %entry 935; CHECK-NEXT: vmovnt.i32 q0, q0 936; CHECK-NEXT: bx lr 937; 938; CHECKBE-LABEL: vmovn32trunct_undef1: 939; CHECKBE: @ %bb.0: @ %entry 940; CHECKBE-NEXT: vrev64.16 q1, q0 941; CHECKBE-NEXT: vmovnt.i32 q1, q1 942; CHECKBE-NEXT: vrev64.16 q0, q1 943; CHECKBE-NEXT: bx lr 944entry: 945 %c1 = call <4 x i32> @llvm.arm.mve.vreinterpretq.v4i32.v8i16(<8 x i16> undef) 946 %c2 = call <4 x i32> @llvm.arm.mve.vreinterpretq.v4i32.v8i16(<8 x i16> %a) 947 %strided.vec = shufflevector <4 x i32> %c1, <4 x i32> %c2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> 948 %out = trunc <8 x i32> %strided.vec to <8 x i16> 949 ret <8 x i16> %out 950} 951 952define arm_aapcs_vfpcc <8 x i16> @vmovn16b_undef2(<16 x i8> %a) { 953; CHECK-LABEL: vmovn16b_undef2: 954; CHECK: @ %bb.0: @ %entry 955; CHECK-NEXT: bx lr 956; 957; CHECKBE-LABEL: vmovn16b_undef2: 958; CHECKBE: @ %bb.0: @ %entry 959; CHECKBE-NEXT: vrev64.8 q1, q0 960; CHECKBE-NEXT: vrev64.16 q0, q1 961; CHECKBE-NEXT: bx lr 962entry: 963 %c1 = call <8 x i16> @llvm.arm.mve.vreinterpretq.v8i16.v16i8(<16 x i8> %a) 964 %c2 = call <8 x i16> @llvm.arm.mve.vreinterpretq.v8i16.v16i8(<16 x i8> undef) 965 %out = shufflevector <8 x i16> %c1, <8 x i16> %c2, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 966 ret <8 x i16> %out 967} 968 969define arm_aapcs_vfpcc <8 x i16> @vmovn16b_undef1(<16 x i8> %a) { 970; CHECK-LABEL: vmovn16b_undef1: 971; CHECK: @ %bb.0: @ %entry 972; CHECK-NEXT: bx lr 973; 974; CHECKBE-LABEL: vmovn16b_undef1: 975; CHECKBE: @ %bb.0: @ %entry 976; CHECKBE-NEXT: vrev64.8 q1, q0 977; CHECKBE-NEXT: vrev64.16 q0, q1 978; CHECKBE-NEXT: bx lr 979entry: 980 %c1 = call <8 x i16> @llvm.arm.mve.vreinterpretq.v8i16.v16i8(<16 x i8> undef) 981 %c2 = call <8 x i16> @llvm.arm.mve.vreinterpretq.v8i16.v16i8(<16 x i8> %a) 982 %out = shufflevector <8 x i16> %c1, <8 x i16> %c2, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 983 ret <8 x i16> %out 984} 985 986define arm_aapcs_vfpcc <8 x i16> @vmovn32_badlanes(<4 x i32> %src1) { 987; CHECK-MVE-LABEL: vmovn32_badlanes: 988; CHECK-MVE: @ %bb.0: @ %entry 989; CHECK-MVE-NEXT: vmov r0, r1, d0 990; CHECK-MVE-NEXT: vmov.16 q1[1], r0 991; CHECK-MVE-NEXT: vmov r0, s2 992; CHECK-MVE-NEXT: vmov.16 q1[3], r1 993; CHECK-MVE-NEXT: vmov.16 q1[5], r1 994; CHECK-MVE-NEXT: vmov.16 q1[7], r0 995; CHECK-MVE-NEXT: vmov q0, q1 996; CHECK-MVE-NEXT: bx lr 997; 998; CHECK-MVEFP-LABEL: vmovn32_badlanes: 999; CHECK-MVEFP: @ %bb.0: @ %entry 1000; CHECK-MVEFP-NEXT: vmov r1, r2, d0 1001; CHECK-MVEFP-NEXT: vmov r0, s2 1002; CHECK-MVEFP-NEXT: vmov.16 q0[1], r1 1003; CHECK-MVEFP-NEXT: vmov.16 q0[3], r2 1004; CHECK-MVEFP-NEXT: vmov.16 q0[5], r2 1005; CHECK-MVEFP-NEXT: vmov.16 q0[7], r0 1006; CHECK-MVEFP-NEXT: bx lr 1007; 1008; CHECKBE-LABEL: vmovn32_badlanes: 1009; CHECKBE: @ %bb.0: @ %entry 1010; CHECKBE-NEXT: vrev64.32 q1, q0 1011; CHECKBE-NEXT: vmov r0, r1, d2 1012; CHECKBE-NEXT: vmov r2, s6 1013; CHECKBE-NEXT: vmov.16 q1[1], r0 1014; CHECKBE-NEXT: vmov.16 q1[3], r1 1015; CHECKBE-NEXT: vmov.16 q1[5], r1 1016; CHECKBE-NEXT: vmov.16 q1[7], r2 1017; CHECKBE-NEXT: vrev64.16 q0, q1 1018; CHECKBE-NEXT: bx lr 1019entry: 1020 %strided.vec = shufflevector <4 x i32> %src1, <4 x i32> undef, <8 x i32> <i32 4, i32 0, i32 5, i32 1, i32 6, i32 1, i32 7, i32 2> 1021 %out = trunc <8 x i32> %strided.vec to <8 x i16> 1022 ret <8 x i16> %out 1023} 1024 1025define arm_aapcs_vfpcc <16 x i8> @vmovn16trunct_undef2(<16 x i8> %a) { 1026; CHECK-LABEL: vmovn16trunct_undef2: 1027; CHECK: @ %bb.0: @ %entry 1028; CHECK-NEXT: bx lr 1029; 1030; CHECKBE-LABEL: vmovn16trunct_undef2: 1031; CHECKBE: @ %bb.0: @ %entry 1032; CHECKBE-NEXT: bx lr 1033entry: 1034 %c1 = call <8 x i16> @llvm.arm.mve.vreinterpretq.v8i16.v16i8(<16 x i8> %a) 1035 %c2 = call <8 x i16> @llvm.arm.mve.vreinterpretq.v8i16.v16i8(<16 x i8> undef) 1036 %strided.vec = shufflevector <8 x i16> %c1, <8 x i16> %c2, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 1037 %out = trunc <16 x i16> %strided.vec to <16 x i8> 1038 ret <16 x i8> %out 1039} 1040 1041define arm_aapcs_vfpcc <16 x i8> @vmovn16trunct_undef1(<16 x i8> %a) { 1042; CHECK-LABEL: vmovn16trunct_undef1: 1043; CHECK: @ %bb.0: @ %entry 1044; CHECK-NEXT: vmovnt.i16 q0, q0 1045; CHECK-NEXT: bx lr 1046; 1047; CHECKBE-LABEL: vmovn16trunct_undef1: 1048; CHECKBE: @ %bb.0: @ %entry 1049; CHECKBE-NEXT: vrev64.8 q1, q0 1050; CHECKBE-NEXT: vmovnt.i16 q1, q1 1051; CHECKBE-NEXT: vrev64.8 q0, q1 1052; CHECKBE-NEXT: bx lr 1053entry: 1054 %c1 = call <8 x i16> @llvm.arm.mve.vreinterpretq.v8i16.v16i8(<16 x i8> undef) 1055 %c2 = call <8 x i16> @llvm.arm.mve.vreinterpretq.v8i16.v16i8(<16 x i8> %a) 1056 %strided.vec = shufflevector <8 x i16> %c1, <8 x i16> %c2, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 1057 %out = trunc <16 x i16> %strided.vec to <16 x i8> 1058 ret <16 x i8> %out 1059} 1060 1061declare <4 x i32> @llvm.arm.mve.vreinterpretq.v4i32.v8i16(<8 x i16>) 1062declare <8 x i16> @llvm.arm.mve.vreinterpretq.v8i16.v16i8(<16 x i8>) 1063