1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=armv8-arm-none-eabi -verify-machineinstrs %s -o - | FileCheck %s 3; RUN: llc -mtriple=armebv8-arm-none-eabi -verify-machineinstrs %s -o - | FileCheck %s --check-prefix=CHECKBE 4 5; This is the same as Thumb2/mve-vmovn.ll, testing the same patterns for neon 6; under both both LE and BE. The vmovn instruction is very different between 7; mve and neon, so these tests are not necessarily expected to generate a (neon) 8; vmovn. 9 10define arm_aapcs_vfpcc <8 x i16> @vmovn32_trunc1(<4 x i32> %src1, <4 x i32> %src2) { 11; CHECK-LABEL: vmovn32_trunc1: 12; CHECK: @ %bb.0: @ %entry 13; CHECK-NEXT: vzip.32 q0, q1 14; CHECK-NEXT: vmovn.i32 d17, q1 15; CHECK-NEXT: vmovn.i32 d16, q0 16; CHECK-NEXT: vorr q0, q8, q8 17; CHECK-NEXT: bx lr 18; 19; CHECKBE-LABEL: vmovn32_trunc1: 20; CHECKBE: @ %bb.0: @ %entry 21; CHECKBE-NEXT: vrev64.32 q8, q1 22; CHECKBE-NEXT: vrev64.32 q9, q0 23; CHECKBE-NEXT: vzip.32 q9, q8 24; CHECKBE-NEXT: vmovn.i32 d17, q8 25; CHECKBE-NEXT: vmovn.i32 d16, q9 26; CHECKBE-NEXT: vrev64.16 q0, q8 27; CHECKBE-NEXT: bx lr 28entry: 29 %strided.vec = shufflevector <4 x i32> %src1, <4 x i32> %src2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> 30 %out = trunc <8 x i32> %strided.vec to <8 x i16> 31 ret <8 x i16> %out 32} 33 34define arm_aapcs_vfpcc <8 x i16> @vmovn32_trunc2(<4 x i32> %src1, <4 x i32> %src2) { 35; CHECK-LABEL: vmovn32_trunc2: 36; CHECK: @ %bb.0: @ %entry 37; CHECK-NEXT: vzip.32 q1, q0 38; CHECK-NEXT: vmovn.i32 d1, q0 39; CHECK-NEXT: vmovn.i32 d0, q1 40; CHECK-NEXT: bx lr 41; 42; CHECKBE-LABEL: vmovn32_trunc2: 43; CHECKBE: @ %bb.0: @ %entry 44; CHECKBE-NEXT: vrev64.32 q8, q0 45; CHECKBE-NEXT: vrev64.32 q9, q1 46; CHECKBE-NEXT: vzip.32 q9, q8 47; CHECKBE-NEXT: vmovn.i32 d17, q8 48; CHECKBE-NEXT: vmovn.i32 d16, q9 49; CHECKBE-NEXT: vrev64.16 q0, q8 50; CHECKBE-NEXT: bx lr 51entry: 52 %strided.vec = shufflevector <4 x i32> %src1, <4 x i32> %src2, <8 x i32> <i32 4, i32 0, i32 5, i32 1, i32 6, i32 2, i32 7, i32 3> 53 %out = trunc <8 x i32> %strided.vec to <8 x i16> 54 ret <8 x i16> %out 55} 56 57define arm_aapcs_vfpcc <16 x i8> @vmovn16_trunc1(<8 x i16> %src1, <8 x i16> %src2) { 58; CHECK-LABEL: vmovn16_trunc1: 59; CHECK: @ %bb.0: @ %entry 60; CHECK-NEXT: vzip.16 q0, q1 61; CHECK-NEXT: vmovn.i16 d17, q1 62; CHECK-NEXT: vmovn.i16 d16, q0 63; CHECK-NEXT: vorr q0, q8, q8 64; CHECK-NEXT: bx lr 65; 66; CHECKBE-LABEL: vmovn16_trunc1: 67; CHECKBE: @ %bb.0: @ %entry 68; CHECKBE-NEXT: vrev64.16 q8, q1 69; CHECKBE-NEXT: vrev64.16 q9, q0 70; CHECKBE-NEXT: vzip.16 q9, q8 71; CHECKBE-NEXT: vmovn.i16 d17, q8 72; CHECKBE-NEXT: vmovn.i16 d16, q9 73; CHECKBE-NEXT: vrev64.8 q0, q8 74; CHECKBE-NEXT: bx lr 75entry: 76 %strided.vec = shufflevector <8 x i16> %src1, <8 x i16> %src2, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 77 %out = trunc <16 x i16> %strided.vec to <16 x i8> 78 ret <16 x i8> %out 79} 80 81define arm_aapcs_vfpcc <16 x i8> @vmovn16_trunc2(<8 x i16> %src1, <8 x i16> %src2) { 82; CHECK-LABEL: vmovn16_trunc2: 83; CHECK: @ %bb.0: @ %entry 84; CHECK-NEXT: vzip.16 q1, q0 85; CHECK-NEXT: vmovn.i16 d1, q0 86; CHECK-NEXT: vmovn.i16 d0, q1 87; CHECK-NEXT: bx lr 88; 89; CHECKBE-LABEL: vmovn16_trunc2: 90; CHECKBE: @ %bb.0: @ %entry 91; CHECKBE-NEXT: vrev64.16 q8, q0 92; CHECKBE-NEXT: vrev64.16 q9, q1 93; CHECKBE-NEXT: vzip.16 q9, q8 94; CHECKBE-NEXT: vmovn.i16 d17, q8 95; CHECKBE-NEXT: vmovn.i16 d16, q9 96; CHECKBE-NEXT: vrev64.8 q0, q8 97; CHECKBE-NEXT: bx lr 98entry: 99 %strided.vec = shufflevector <8 x i16> %src1, <8 x i16> %src2, <16 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3, i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7> 100 %out = trunc <16 x i16> %strided.vec to <16 x i8> 101 ret <16 x i8> %out 102} 103 104 105define arm_aapcs_vfpcc <2 x i64> @vmovn64_t1(<2 x i64> %src1, <2 x i64> %src2) { 106; CHECK-LABEL: vmovn64_t1: 107; CHECK: @ %bb.0: @ %entry 108; CHECK-NEXT: vmov.f64 d1, d2 109; CHECK-NEXT: bx lr 110; 111; CHECKBE-LABEL: vmovn64_t1: 112; CHECKBE: @ %bb.0: @ %entry 113; CHECKBE-NEXT: vmov.f64 d1, d2 114; CHECKBE-NEXT: bx lr 115entry: 116 %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 0, i32 2> 117 ret <2 x i64> %out 118} 119 120define arm_aapcs_vfpcc <2 x i64> @vmovn64_t2(<2 x i64> %src1, <2 x i64> %src2) { 121; CHECK-LABEL: vmovn64_t2: 122; CHECK: @ %bb.0: @ %entry 123; CHECK-NEXT: vorr d3, d0, d0 124; CHECK-NEXT: vorr q0, q1, q1 125; CHECK-NEXT: bx lr 126; 127; CHECKBE-LABEL: vmovn64_t2: 128; CHECKBE: @ %bb.0: @ %entry 129; CHECKBE-NEXT: vorr d3, d0, d0 130; CHECKBE-NEXT: vorr q0, q1, q1 131; CHECKBE-NEXT: bx lr 132entry: 133 %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 2, i32 0> 134 ret <2 x i64> %out 135} 136 137define arm_aapcs_vfpcc <2 x i64> @vmovn64_b1(<2 x i64> %src1, <2 x i64> %src2) { 138; CHECK-LABEL: vmovn64_b1: 139; CHECK: @ %bb.0: @ %entry 140; CHECK-NEXT: vmov.f64 d1, d3 141; CHECK-NEXT: bx lr 142; 143; CHECKBE-LABEL: vmovn64_b1: 144; CHECKBE: @ %bb.0: @ %entry 145; CHECKBE-NEXT: vmov.f64 d1, d3 146; CHECKBE-NEXT: bx lr 147entry: 148 %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 0, i32 3> 149 ret <2 x i64> %out 150} 151 152define arm_aapcs_vfpcc <2 x i64> @vmovn64_b2(<2 x i64> %src1, <2 x i64> %src2) { 153; CHECK-LABEL: vmovn64_b2: 154; CHECK: @ %bb.0: @ %entry 155; CHECK-NEXT: vmov.f64 d16, d3 156; CHECK-NEXT: vorr d17, d0, d0 157; CHECK-NEXT: vorr q0, q8, q8 158; CHECK-NEXT: bx lr 159; 160; CHECKBE-LABEL: vmovn64_b2: 161; CHECKBE: @ %bb.0: @ %entry 162; CHECKBE-NEXT: vmov.f64 d16, d3 163; CHECKBE-NEXT: vorr d17, d0, d0 164; CHECKBE-NEXT: vorr q0, q8, q8 165; CHECKBE-NEXT: bx lr 166entry: 167 %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 3, i32 0> 168 ret <2 x i64> %out 169} 170 171define arm_aapcs_vfpcc <2 x i64> @vmovn64_b3(<2 x i64> %src1, <2 x i64> %src2) { 172; CHECK-LABEL: vmovn64_b3: 173; CHECK: @ %bb.0: @ %entry 174; CHECK-NEXT: vmov.f64 d0, d1 175; CHECK-NEXT: vmov.f64 d1, d2 176; CHECK-NEXT: bx lr 177; 178; CHECKBE-LABEL: vmovn64_b3: 179; CHECKBE: @ %bb.0: @ %entry 180; CHECKBE-NEXT: vmov.f64 d0, d1 181; CHECKBE-NEXT: vmov.f64 d1, d2 182; CHECKBE-NEXT: bx lr 183entry: 184 %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 1, i32 2> 185 ret <2 x i64> %out 186} 187 188define arm_aapcs_vfpcc <2 x i64> @vmovn64_b4(<2 x i64> %src1, <2 x i64> %src2) { 189; CHECK-LABEL: vmovn64_b4: 190; CHECK: @ %bb.0: @ %entry 191; CHECK-NEXT: vorr d3, d1, d1 192; CHECK-NEXT: vorr q0, q1, q1 193; CHECK-NEXT: bx lr 194; 195; CHECKBE-LABEL: vmovn64_b4: 196; CHECKBE: @ %bb.0: @ %entry 197; CHECKBE-NEXT: vorr d3, d1, d1 198; CHECKBE-NEXT: vorr q0, q1, q1 199; CHECKBE-NEXT: bx lr 200entry: 201 %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 2, i32 1> 202 ret <2 x i64> %out 203} 204 205 206 207define arm_aapcs_vfpcc <4 x i32> @vmovn32_t1(<4 x i32> %src1, <4 x i32> %src2) { 208; CHECK-LABEL: vmovn32_t1: 209; CHECK: @ %bb.0: @ %entry 210; CHECK-NEXT: vtrn.32 q0, q1 211; CHECK-NEXT: bx lr 212; 213; CHECKBE-LABEL: vmovn32_t1: 214; CHECKBE: @ %bb.0: @ %entry 215; CHECKBE-NEXT: vrev64.32 q8, q1 216; CHECKBE-NEXT: vrev64.32 q9, q0 217; CHECKBE-NEXT: vtrn.32 q9, q8 218; CHECKBE-NEXT: vrev64.32 q0, q9 219; CHECKBE-NEXT: bx lr 220entry: 221 %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 222 ret <4 x i32> %out 223} 224 225define arm_aapcs_vfpcc <4 x i32> @vmovn32_t2(<4 x i32> %src1, <4 x i32> %src2) { 226; CHECK-LABEL: vmovn32_t2: 227; CHECK: @ %bb.0: @ %entry 228; CHECK-NEXT: vtrn.32 q1, q0 229; CHECK-NEXT: vorr q0, q1, q1 230; CHECK-NEXT: bx lr 231; 232; CHECKBE-LABEL: vmovn32_t2: 233; CHECKBE: @ %bb.0: @ %entry 234; CHECKBE-NEXT: vrev64.32 q8, q0 235; CHECKBE-NEXT: vrev64.32 q9, q1 236; CHECKBE-NEXT: vtrn.32 q9, q8 237; CHECKBE-NEXT: vrev64.32 q0, q9 238; CHECKBE-NEXT: bx lr 239entry: 240 %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 4, i32 0, i32 6, i32 2> 241 ret <4 x i32> %out 242} 243 244define arm_aapcs_vfpcc <4 x i32> @vmovn32_b1(<4 x i32> %src1, <4 x i32> %src2) { 245; CHECK-LABEL: vmovn32_b1: 246; CHECK: @ %bb.0: @ %entry 247; CHECK-NEXT: vrev64.32 q8, q0 248; CHECK-NEXT: vtrn.32 q8, q1 249; CHECK-NEXT: vorr q0, q1, q1 250; CHECK-NEXT: bx lr 251; 252; CHECKBE-LABEL: vmovn32_b1: 253; CHECKBE: @ %bb.0: @ %entry 254; CHECKBE-NEXT: vrev64.32 q8, q0 255; CHECKBE-NEXT: vrev64.32 q9, q1 256; CHECKBE-NEXT: vrev64.32 q8, q8 257; CHECKBE-NEXT: vtrn.32 q8, q9 258; CHECKBE-NEXT: vrev64.32 q0, q9 259; CHECKBE-NEXT: bx lr 260entry: 261 %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 262 ret <4 x i32> %out 263} 264 265define arm_aapcs_vfpcc <4 x i32> @vmovn32_b2(<4 x i32> %src1, <4 x i32> %src2) { 266; CHECK-LABEL: vmovn32_b2: 267; CHECK: @ %bb.0: @ %entry 268; CHECK-NEXT: vext.32 q8, q0, q0, #1 269; CHECK-NEXT: vtrn.32 q8, q1 270; CHECK-NEXT: vext.32 q0, q1, q1, #1 271; CHECK-NEXT: bx lr 272; 273; CHECKBE-LABEL: vmovn32_b2: 274; CHECKBE: @ %bb.0: @ %entry 275; CHECKBE-NEXT: vrev64.32 q8, q0 276; CHECKBE-NEXT: vrev64.32 q9, q1 277; CHECKBE-NEXT: vext.32 q8, q8, q8, #1 278; CHECKBE-NEXT: vtrn.32 q8, q9 279; CHECKBE-NEXT: vext.32 q8, q9, q9, #1 280; CHECKBE-NEXT: vrev64.32 q0, q8 281; CHECKBE-NEXT: bx lr 282entry: 283 %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 5, i32 0, i32 7, i32 2> 284 ret <4 x i32> %out 285} 286 287define arm_aapcs_vfpcc <4 x i32> @vmovn32_b3(<4 x i32> %src1, <4 x i32> %src2) { 288; CHECK-LABEL: vmovn32_b3: 289; CHECK: @ %bb.0: @ %entry 290; CHECK-NEXT: vorr q8, q0, q0 291; CHECK-NEXT: vtrn.32 q8, q1 292; CHECK-NEXT: vtrn.32 q0, q8 293; CHECK-NEXT: vorr q0, q8, q8 294; CHECK-NEXT: bx lr 295; 296; CHECKBE-LABEL: vmovn32_b3: 297; CHECKBE: @ %bb.0: @ %entry 298; CHECKBE-NEXT: vrev64.32 q9, q0 299; CHECKBE-NEXT: vrev64.32 q8, q1 300; CHECKBE-NEXT: vorr q10, q9, q9 301; CHECKBE-NEXT: vtrn.32 q10, q8 302; CHECKBE-NEXT: vtrn.32 q9, q10 303; CHECKBE-NEXT: vrev64.32 q0, q10 304; CHECKBE-NEXT: bx lr 305entry: 306 %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 1, i32 4, i32 3, i32 6> 307 ret <4 x i32> %out 308} 309 310define arm_aapcs_vfpcc <4 x i32> @vmovn32_b4(<4 x i32> %src1, <4 x i32> %src2) { 311; CHECK-LABEL: vmovn32_b4: 312; CHECK: @ %bb.0: @ %entry 313; CHECK-NEXT: vorr q8, q0, q0 314; CHECK-NEXT: vtrn.32 q8, q1 315; CHECK-NEXT: vtrn.32 q8, q0 316; CHECK-NEXT: bx lr 317; 318; CHECKBE-LABEL: vmovn32_b4: 319; CHECKBE: @ %bb.0: @ %entry 320; CHECKBE-NEXT: vrev64.32 q9, q0 321; CHECKBE-NEXT: vrev64.32 q8, q1 322; CHECKBE-NEXT: vorr q10, q9, q9 323; CHECKBE-NEXT: vtrn.32 q10, q8 324; CHECKBE-NEXT: vtrn.32 q10, q9 325; CHECKBE-NEXT: vrev64.32 q0, q9 326; CHECKBE-NEXT: bx lr 327entry: 328 %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 4, i32 1, i32 6, i32 3> 329 ret <4 x i32> %out 330} 331 332 333 334 335define arm_aapcs_vfpcc <8 x i16> @vmovn16_t1(<8 x i16> %src1, <8 x i16> %src2) { 336; CHECK-LABEL: vmovn16_t1: 337; CHECK: @ %bb.0: @ %entry 338; CHECK-NEXT: vtrn.16 q0, q1 339; CHECK-NEXT: bx lr 340; 341; CHECKBE-LABEL: vmovn16_t1: 342; CHECKBE: @ %bb.0: @ %entry 343; CHECKBE-NEXT: vrev64.16 q8, q1 344; CHECKBE-NEXT: vrev64.16 q9, q0 345; CHECKBE-NEXT: vtrn.16 q9, q8 346; CHECKBE-NEXT: vrev64.16 q0, q9 347; CHECKBE-NEXT: bx lr 348entry: 349 %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 350 ret <8 x i16> %out 351} 352 353define arm_aapcs_vfpcc <8 x i16> @vmovn16_t2(<8 x i16> %src1, <8 x i16> %src2) { 354; CHECK-LABEL: vmovn16_t2: 355; CHECK: @ %bb.0: @ %entry 356; CHECK-NEXT: vtrn.16 q1, q0 357; CHECK-NEXT: vorr q0, q1, q1 358; CHECK-NEXT: bx lr 359; 360; CHECKBE-LABEL: vmovn16_t2: 361; CHECKBE: @ %bb.0: @ %entry 362; CHECKBE-NEXT: vrev64.16 q8, q0 363; CHECKBE-NEXT: vrev64.16 q9, q1 364; CHECKBE-NEXT: vtrn.16 q9, q8 365; CHECKBE-NEXT: vrev64.16 q0, q9 366; CHECKBE-NEXT: bx lr 367entry: 368 %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 8, i32 0, i32 10, i32 2, i32 12, i32 4, i32 14, i32 6> 369 ret <8 x i16> %out 370} 371 372define arm_aapcs_vfpcc <8 x i16> @vmovn16_b1(<8 x i16> %src1, <8 x i16> %src2) { 373; CHECK-LABEL: vmovn16_b1: 374; CHECK: @ %bb.0: @ %entry 375; CHECK-NEXT: vrev32.16 d16, d1 376; CHECK-NEXT: vrev32.16 d17, d0 377; CHECK-NEXT: vtrn.16 d16, d3 378; CHECK-NEXT: vtrn.16 d17, d2 379; CHECK-NEXT: vorr q0, q1, q1 380; CHECK-NEXT: bx lr 381; 382; CHECKBE-LABEL: vmovn16_b1: 383; CHECKBE: @ %bb.0: @ %entry 384; CHECKBE-NEXT: vrev64.16 d16, d1 385; CHECKBE-NEXT: vrev64.16 d17, d0 386; CHECKBE-NEXT: vrev64.16 d19, d3 387; CHECKBE-NEXT: vrev32.16 d16, d16 388; CHECKBE-NEXT: vrev64.16 d18, d2 389; CHECKBE-NEXT: vrev32.16 d17, d17 390; CHECKBE-NEXT: vtrn.16 d16, d19 391; CHECKBE-NEXT: vtrn.16 d17, d18 392; CHECKBE-NEXT: vrev64.16 q0, q9 393; CHECKBE-NEXT: bx lr 394entry: 395 %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 396 ret <8 x i16> %out 397} 398 399define arm_aapcs_vfpcc <8 x i16> @vmovn16_b2(<8 x i16> %src1, <8 x i16> %src2) { 400; CHECK-LABEL: vmovn16_b2: 401; CHECK: @ %bb.0: @ %entry 402; CHECK-NEXT: vorr d17, d3, d3 403; CHECK-NEXT: vtrn.16 d17, d1 404; CHECK-NEXT: vorr d16, d2, d2 405; CHECK-NEXT: vtrn.16 d16, d0 406; CHECK-NEXT: vtrn.16 d3, d17 407; CHECK-NEXT: vtrn.16 d2, d16 408; CHECK-NEXT: vorr q0, q8, q8 409; CHECK-NEXT: bx lr 410; 411; CHECKBE-LABEL: vmovn16_b2: 412; CHECKBE: @ %bb.0: @ %entry 413; CHECKBE-NEXT: vrev64.16 d17, d3 414; CHECKBE-NEXT: vorr d21, d17, d17 415; CHECKBE-NEXT: vrev64.16 d16, d1 416; CHECKBE-NEXT: vrev64.16 d19, d2 417; CHECKBE-NEXT: vrev64.16 d18, d0 418; CHECKBE-NEXT: vtrn.16 d21, d16 419; CHECKBE-NEXT: vorr d20, d19, d19 420; CHECKBE-NEXT: vtrn.16 d20, d18 421; CHECKBE-NEXT: vtrn.16 d17, d21 422; CHECKBE-NEXT: vtrn.16 d19, d20 423; CHECKBE-NEXT: vrev64.16 q0, q10 424; CHECKBE-NEXT: bx lr 425entry: 426 %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 9, i32 0, i32 11, i32 2, i32 13, i32 4, i32 15, i32 6> 427 ret <8 x i16> %out 428} 429 430define arm_aapcs_vfpcc <8 x i16> @vmovn16_b3(<8 x i16> %src1, <8 x i16> %src2) { 431; CHECK-LABEL: vmovn16_b3: 432; CHECK: @ %bb.0: @ %entry 433; CHECK-NEXT: vorr d17, d1, d1 434; CHECK-NEXT: vtrn.16 d17, d3 435; CHECK-NEXT: vorr d16, d0, d0 436; CHECK-NEXT: vtrn.16 d16, d2 437; CHECK-NEXT: vtrn.16 d1, d17 438; CHECK-NEXT: vtrn.16 d0, d16 439; CHECK-NEXT: vorr q0, q8, q8 440; CHECK-NEXT: bx lr 441; 442; CHECKBE-LABEL: vmovn16_b3: 443; CHECKBE: @ %bb.0: @ %entry 444; CHECKBE-NEXT: vrev64.16 d17, d1 445; CHECKBE-NEXT: vorr d21, d17, d17 446; CHECKBE-NEXT: vrev64.16 d16, d3 447; CHECKBE-NEXT: vrev64.16 d19, d0 448; CHECKBE-NEXT: vrev64.16 d18, d2 449; CHECKBE-NEXT: vtrn.16 d21, d16 450; CHECKBE-NEXT: vorr d20, d19, d19 451; CHECKBE-NEXT: vtrn.16 d20, d18 452; CHECKBE-NEXT: vtrn.16 d17, d21 453; CHECKBE-NEXT: vtrn.16 d19, d20 454; CHECKBE-NEXT: vrev64.16 q0, q10 455; CHECKBE-NEXT: bx lr 456entry: 457 %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 1, i32 8, i32 3, i32 10, i32 5, i32 12, i32 7, i32 14> 458 ret <8 x i16> %out 459} 460 461define arm_aapcs_vfpcc <8 x i16> @vmovn16_b4(<8 x i16> %src1, <8 x i16> %src2) { 462; CHECK-LABEL: vmovn16_b4: 463; CHECK: @ %bb.0: @ %entry 464; CHECK-NEXT: vrev32.16 d16, d3 465; CHECK-NEXT: vrev32.16 d17, d2 466; CHECK-NEXT: vtrn.16 d16, d1 467; CHECK-NEXT: vtrn.16 d17, d0 468; CHECK-NEXT: bx lr 469; 470; CHECKBE-LABEL: vmovn16_b4: 471; CHECKBE: @ %bb.0: @ %entry 472; CHECKBE-NEXT: vrev64.16 d16, d3 473; CHECKBE-NEXT: vrev64.16 d17, d2 474; CHECKBE-NEXT: vrev64.16 d19, d1 475; CHECKBE-NEXT: vrev32.16 d16, d16 476; CHECKBE-NEXT: vrev64.16 d18, d0 477; CHECKBE-NEXT: vrev32.16 d17, d17 478; CHECKBE-NEXT: vtrn.16 d16, d19 479; CHECKBE-NEXT: vtrn.16 d17, d18 480; CHECKBE-NEXT: vrev64.16 q0, q9 481; CHECKBE-NEXT: bx lr 482entry: 483 %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7> 484 ret <8 x i16> %out 485} 486 487 488define arm_aapcs_vfpcc <16 x i8> @vmovn8_b1(<16 x i8> %src1, <16 x i8> %src2) { 489; CHECK-LABEL: vmovn8_b1: 490; CHECK: @ %bb.0: @ %entry 491; CHECK-NEXT: vtrn.8 q0, q1 492; CHECK-NEXT: bx lr 493; 494; CHECKBE-LABEL: vmovn8_b1: 495; CHECKBE: @ %bb.0: @ %entry 496; CHECKBE-NEXT: vrev64.8 q8, q1 497; CHECKBE-NEXT: vrev64.8 q9, q0 498; CHECKBE-NEXT: vtrn.8 q9, q8 499; CHECKBE-NEXT: vrev64.8 q0, q9 500; CHECKBE-NEXT: bx lr 501entry: 502 %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30> 503 ret <16 x i8> %out 504} 505 506define arm_aapcs_vfpcc <16 x i8> @vmovn8_b2(<16 x i8> %src1, <16 x i8> %src2) { 507; CHECK-LABEL: vmovn8_b2: 508; CHECK: @ %bb.0: @ %entry 509; CHECK-NEXT: vtrn.8 q1, q0 510; CHECK-NEXT: vorr q0, q1, q1 511; CHECK-NEXT: bx lr 512; 513; CHECKBE-LABEL: vmovn8_b2: 514; CHECKBE: @ %bb.0: @ %entry 515; CHECKBE-NEXT: vrev64.8 q8, q0 516; CHECKBE-NEXT: vrev64.8 q9, q1 517; CHECKBE-NEXT: vtrn.8 q9, q8 518; CHECKBE-NEXT: vrev64.8 q0, q9 519; CHECKBE-NEXT: bx lr 520entry: 521 %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 16, i32 0, i32 18, i32 2, i32 20, i32 4, i32 22, i32 6, i32 24, i32 8, i32 26, i32 10, i32 28, i32 12, i32 30, i32 14> 522 ret <16 x i8> %out 523} 524 525define arm_aapcs_vfpcc <16 x i8> @vmovn8_t1(<16 x i8> %src1, <16 x i8> %src2) { 526; CHECK-LABEL: vmovn8_t1: 527; CHECK: @ %bb.0: @ %entry 528; CHECK-NEXT: vorr q2, q0, q0 529; CHECK-NEXT: vldr d16, .LCPI24_0 530; CHECK-NEXT: vorr d6, d3, d3 531; CHECK-NEXT: vtbl.8 d1, {d5, d6}, d16 532; CHECK-NEXT: vorr d5, d2, d2 533; CHECK-NEXT: vtbl.8 d0, {d4, d5}, d16 534; CHECK-NEXT: bx lr 535; CHECK-NEXT: .p2align 3 536; CHECK-NEXT: @ %bb.1: 537; CHECK-NEXT: .LCPI24_0: 538; CHECK-NEXT: .byte 0 @ 0x0 539; CHECK-NEXT: .byte 9 @ 0x9 540; CHECK-NEXT: .byte 2 @ 0x2 541; CHECK-NEXT: .byte 11 @ 0xb 542; CHECK-NEXT: .byte 4 @ 0x4 543; CHECK-NEXT: .byte 13 @ 0xd 544; CHECK-NEXT: .byte 6 @ 0x6 545; CHECK-NEXT: .byte 15 @ 0xf 546; 547; CHECKBE-LABEL: vmovn8_t1: 548; CHECKBE: @ %bb.0: @ %entry 549; CHECKBE-NEXT: vldr d16, .LCPI24_0 550; CHECKBE-NEXT: vrev64.8 d19, d3 551; CHECKBE-NEXT: vrev64.8 d21, d2 552; CHECKBE-NEXT: vrev64.8 d18, d1 553; CHECKBE-NEXT: vrev64.8 d16, d16 554; CHECKBE-NEXT: vrev64.8 d20, d0 555; CHECKBE-NEXT: vtbl.8 d19, {d18, d19}, d16 556; CHECKBE-NEXT: vtbl.8 d18, {d20, d21}, d16 557; CHECKBE-NEXT: vrev64.8 q0, q9 558; CHECKBE-NEXT: bx lr 559; CHECKBE-NEXT: .p2align 3 560; CHECKBE-NEXT: @ %bb.1: 561; CHECKBE-NEXT: .LCPI24_0: 562; CHECKBE-NEXT: .byte 0 @ 0x0 563; CHECKBE-NEXT: .byte 9 @ 0x9 564; CHECKBE-NEXT: .byte 2 @ 0x2 565; CHECKBE-NEXT: .byte 11 @ 0xb 566; CHECKBE-NEXT: .byte 4 @ 0x4 567; CHECKBE-NEXT: .byte 13 @ 0xd 568; CHECKBE-NEXT: .byte 6 @ 0x6 569; CHECKBE-NEXT: .byte 15 @ 0xf 570entry: 571 %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31> 572 ret <16 x i8> %out 573} 574 575define arm_aapcs_vfpcc <16 x i8> @vmovn8_t2(<16 x i8> %src1, <16 x i8> %src2) { 576; CHECK-LABEL: vmovn8_t2: 577; CHECK: @ %bb.0: @ %entry 578; CHECK-NEXT: @ kill: def $q1 killed $q1 def $d2_d3_d4 579; CHECK-NEXT: vldr d18, .LCPI25_0 580; CHECK-NEXT: vorr d4, d1, d1 581; CHECK-NEXT: vtbl.8 d17, {d3, d4}, d18 582; CHECK-NEXT: vorr d3, d0, d0 583; CHECK-NEXT: vtbl.8 d16, {d2, d3}, d18 584; CHECK-NEXT: vorr q0, q8, q8 585; CHECK-NEXT: bx lr 586; CHECK-NEXT: .p2align 3 587; CHECK-NEXT: @ %bb.1: 588; CHECK-NEXT: .LCPI25_0: 589; CHECK-NEXT: .byte 1 @ 0x1 590; CHECK-NEXT: .byte 8 @ 0x8 591; CHECK-NEXT: .byte 3 @ 0x3 592; CHECK-NEXT: .byte 10 @ 0xa 593; CHECK-NEXT: .byte 5 @ 0x5 594; CHECK-NEXT: .byte 12 @ 0xc 595; CHECK-NEXT: .byte 7 @ 0x7 596; CHECK-NEXT: .byte 14 @ 0xe 597; 598; CHECKBE-LABEL: vmovn8_t2: 599; CHECKBE: @ %bb.0: @ %entry 600; CHECKBE-NEXT: vldr d16, .LCPI25_0 601; CHECKBE-NEXT: vrev64.8 d19, d1 602; CHECKBE-NEXT: vrev64.8 d21, d0 603; CHECKBE-NEXT: vrev64.8 d18, d3 604; CHECKBE-NEXT: vrev64.8 d16, d16 605; CHECKBE-NEXT: vrev64.8 d20, d2 606; CHECKBE-NEXT: vtbl.8 d19, {d18, d19}, d16 607; CHECKBE-NEXT: vtbl.8 d18, {d20, d21}, d16 608; CHECKBE-NEXT: vrev64.8 q0, q9 609; CHECKBE-NEXT: bx lr 610; CHECKBE-NEXT: .p2align 3 611; CHECKBE-NEXT: @ %bb.1: 612; CHECKBE-NEXT: .LCPI25_0: 613; CHECKBE-NEXT: .byte 1 @ 0x1 614; CHECKBE-NEXT: .byte 8 @ 0x8 615; CHECKBE-NEXT: .byte 3 @ 0x3 616; CHECKBE-NEXT: .byte 10 @ 0xa 617; CHECKBE-NEXT: .byte 5 @ 0x5 618; CHECKBE-NEXT: .byte 12 @ 0xc 619; CHECKBE-NEXT: .byte 7 @ 0x7 620; CHECKBE-NEXT: .byte 14 @ 0xe 621entry: 622 %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 17, i32 0, i32 19, i32 2, i32 21, i32 4, i32 23, i32 6, i32 25, i32 8, i32 27, i32 10, i32 29, i32 12, i32 31, i32 14> 623 ret <16 x i8> %out 624} 625 626define arm_aapcs_vfpcc <16 x i8> @vmovn8_t3(<16 x i8> %src1, <16 x i8> %src2) { 627; CHECK-LABEL: vmovn8_t3: 628; CHECK: @ %bb.0: @ %entry 629; CHECK-NEXT: vorr q2, q0, q0 630; CHECK-NEXT: vldr d16, .LCPI26_0 631; CHECK-NEXT: vorr d6, d3, d3 632; CHECK-NEXT: vtbl.8 d1, {d5, d6}, d16 633; CHECK-NEXT: vorr d5, d2, d2 634; CHECK-NEXT: vtbl.8 d0, {d4, d5}, d16 635; CHECK-NEXT: bx lr 636; CHECK-NEXT: .p2align 3 637; CHECK-NEXT: @ %bb.1: 638; CHECK-NEXT: .LCPI26_0: 639; CHECK-NEXT: .byte 1 @ 0x1 640; CHECK-NEXT: .byte 8 @ 0x8 641; CHECK-NEXT: .byte 3 @ 0x3 642; CHECK-NEXT: .byte 10 @ 0xa 643; CHECK-NEXT: .byte 5 @ 0x5 644; CHECK-NEXT: .byte 12 @ 0xc 645; CHECK-NEXT: .byte 7 @ 0x7 646; CHECK-NEXT: .byte 14 @ 0xe 647; 648; CHECKBE-LABEL: vmovn8_t3: 649; CHECKBE: @ %bb.0: @ %entry 650; CHECKBE-NEXT: vldr d16, .LCPI26_0 651; CHECKBE-NEXT: vrev64.8 d19, d3 652; CHECKBE-NEXT: vrev64.8 d21, d2 653; CHECKBE-NEXT: vrev64.8 d18, d1 654; CHECKBE-NEXT: vrev64.8 d16, d16 655; CHECKBE-NEXT: vrev64.8 d20, d0 656; CHECKBE-NEXT: vtbl.8 d19, {d18, d19}, d16 657; CHECKBE-NEXT: vtbl.8 d18, {d20, d21}, d16 658; CHECKBE-NEXT: vrev64.8 q0, q9 659; CHECKBE-NEXT: bx lr 660; CHECKBE-NEXT: .p2align 3 661; CHECKBE-NEXT: @ %bb.1: 662; CHECKBE-NEXT: .LCPI26_0: 663; CHECKBE-NEXT: .byte 1 @ 0x1 664; CHECKBE-NEXT: .byte 8 @ 0x8 665; CHECKBE-NEXT: .byte 3 @ 0x3 666; CHECKBE-NEXT: .byte 10 @ 0xa 667; CHECKBE-NEXT: .byte 5 @ 0x5 668; CHECKBE-NEXT: .byte 12 @ 0xc 669; CHECKBE-NEXT: .byte 7 @ 0x7 670; CHECKBE-NEXT: .byte 14 @ 0xe 671entry: 672 %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 1, i32 16, i32 3, i32 18, i32 5, i32 20, i32 7, i32 22, i32 9, i32 24, i32 11, i32 26, i32 13, i32 28, i32 15, i32 30> 673 ret <16 x i8> %out 674} 675 676define arm_aapcs_vfpcc <16 x i8> @vmovn8_t4(<16 x i8> %src1, <16 x i8> %src2) { 677; CHECK-LABEL: vmovn8_t4: 678; CHECK: @ %bb.0: @ %entry 679; CHECK-NEXT: @ kill: def $q1 killed $q1 def $d2_d3_d4 680; CHECK-NEXT: vldr d18, .LCPI27_0 681; CHECK-NEXT: vorr d4, d1, d1 682; CHECK-NEXT: vtbl.8 d17, {d3, d4}, d18 683; CHECK-NEXT: vorr d3, d0, d0 684; CHECK-NEXT: vtbl.8 d16, {d2, d3}, d18 685; CHECK-NEXT: vorr q0, q8, q8 686; CHECK-NEXT: bx lr 687; CHECK-NEXT: .p2align 3 688; CHECK-NEXT: @ %bb.1: 689; CHECK-NEXT: .LCPI27_0: 690; CHECK-NEXT: .byte 0 @ 0x0 691; CHECK-NEXT: .byte 9 @ 0x9 692; CHECK-NEXT: .byte 2 @ 0x2 693; CHECK-NEXT: .byte 11 @ 0xb 694; CHECK-NEXT: .byte 4 @ 0x4 695; CHECK-NEXT: .byte 13 @ 0xd 696; CHECK-NEXT: .byte 6 @ 0x6 697; CHECK-NEXT: .byte 15 @ 0xf 698; 699; CHECKBE-LABEL: vmovn8_t4: 700; CHECKBE: @ %bb.0: @ %entry 701; CHECKBE-NEXT: vldr d16, .LCPI27_0 702; CHECKBE-NEXT: vrev64.8 d19, d1 703; CHECKBE-NEXT: vrev64.8 d21, d0 704; CHECKBE-NEXT: vrev64.8 d18, d3 705; CHECKBE-NEXT: vrev64.8 d16, d16 706; CHECKBE-NEXT: vrev64.8 d20, d2 707; CHECKBE-NEXT: vtbl.8 d19, {d18, d19}, d16 708; CHECKBE-NEXT: vtbl.8 d18, {d20, d21}, d16 709; CHECKBE-NEXT: vrev64.8 q0, q9 710; CHECKBE-NEXT: bx lr 711; CHECKBE-NEXT: .p2align 3 712; CHECKBE-NEXT: @ %bb.1: 713; CHECKBE-NEXT: .LCPI27_0: 714; CHECKBE-NEXT: .byte 0 @ 0x0 715; CHECKBE-NEXT: .byte 9 @ 0x9 716; CHECKBE-NEXT: .byte 2 @ 0x2 717; CHECKBE-NEXT: .byte 11 @ 0xb 718; CHECKBE-NEXT: .byte 4 @ 0x4 719; CHECKBE-NEXT: .byte 13 @ 0xd 720; CHECKBE-NEXT: .byte 6 @ 0x6 721; CHECKBE-NEXT: .byte 15 @ 0xf 722entry: 723 %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15> 724 ret <16 x i8> %out 725} 726 727define arm_aapcs_vfpcc <16 x i8> @test(<8 x i16> %src1, <8 x i16> %src2) { 728; CHECK-LABEL: test: 729; CHECK: @ %bb.0: @ %entry 730; CHECK-NEXT: vtrn.8 q0, q1 731; CHECK-NEXT: bx lr 732; 733; CHECKBE-LABEL: test: 734; CHECKBE: @ %bb.0: @ %entry 735; CHECKBE-NEXT: vrev64.16 q8, q1 736; CHECKBE-NEXT: vrev64.16 q9, q0 737; CHECKBE-NEXT: vtrn.8 q9, q8 738; CHECKBE-NEXT: vrev64.8 q0, q9 739; CHECKBE-NEXT: bx lr 740entry: 741 %a0 = extractelement <8 x i16> %src1, i32 0 742 %a1 = extractelement <8 x i16> %src1, i32 1 743 %a2 = extractelement <8 x i16> %src1, i32 2 744 %a3 = extractelement <8 x i16> %src1, i32 3 745 %a4 = extractelement <8 x i16> %src1, i32 4 746 %a5 = extractelement <8 x i16> %src1, i32 5 747 %a6 = extractelement <8 x i16> %src1, i32 6 748 %a7 = extractelement <8 x i16> %src1, i32 7 749 750 %b0 = extractelement <8 x i16> %src2, i32 0 751 %b1 = extractelement <8 x i16> %src2, i32 1 752 %b2 = extractelement <8 x i16> %src2, i32 2 753 %b3 = extractelement <8 x i16> %src2, i32 3 754 %b4 = extractelement <8 x i16> %src2, i32 4 755 %b5 = extractelement <8 x i16> %src2, i32 5 756 %b6 = extractelement <8 x i16> %src2, i32 6 757 %b7 = extractelement <8 x i16> %src2, i32 7 758 759 %s0 = trunc i16 %a0 to i8 760 %s1 = trunc i16 %a1 to i8 761 %s2 = trunc i16 %a2 to i8 762 %s3 = trunc i16 %a3 to i8 763 %s4 = trunc i16 %a4 to i8 764 %s5 = trunc i16 %a5 to i8 765 %s6 = trunc i16 %a6 to i8 766 %s7 = trunc i16 %a7 to i8 767 %t0 = trunc i16 %b0 to i8 768 %t1 = trunc i16 %b1 to i8 769 %t2 = trunc i16 %b2 to i8 770 %t3 = trunc i16 %b3 to i8 771 %t4 = trunc i16 %b4 to i8 772 %t5 = trunc i16 %b5 to i8 773 %t6 = trunc i16 %b6 to i8 774 %t7 = trunc i16 %b7 to i8 775 776 %r0 = insertelement <16 x i8> undef, i8 %s0, i32 0 777 %r1 = insertelement <16 x i8> %r0, i8 %s1, i32 2 778 %r2 = insertelement <16 x i8> %r1, i8 %s2, i32 4 779 %r3 = insertelement <16 x i8> %r2, i8 %s3, i32 6 780 %r4 = insertelement <16 x i8> %r3, i8 %s4, i32 8 781 %r5 = insertelement <16 x i8> %r4, i8 %s5, i32 10 782 %r6 = insertelement <16 x i8> %r5, i8 %s6, i32 12 783 %r7 = insertelement <16 x i8> %r6, i8 %s7, i32 14 784 %r10 = insertelement <16 x i8> %r7, i8 %t0, i32 1 785 %r11 = insertelement <16 x i8> %r10, i8 %t1, i32 3 786 %r12 = insertelement <16 x i8> %r11, i8 %t2, i32 5 787 %r13 = insertelement <16 x i8> %r12, i8 %t3, i32 7 788 %r14 = insertelement <16 x i8> %r13, i8 %t4, i32 9 789 %r15 = insertelement <16 x i8> %r14, i8 %t5, i32 11 790 %r16 = insertelement <16 x i8> %r15, i8 %t6, i32 13 791 %r17 = insertelement <16 x i8> %r16, i8 %t7, i32 15 792 793 ret <16 x i8> %r17 794} 795