1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-MVE 3; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve,+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-MVEFP 4 5define arm_aapcs_vfpcc void @vmovn32_trunc1(<4 x i32> %src1, <4 x i32> %src2, ptr %dest) { 6; CHECK-LABEL: vmovn32_trunc1: 7; CHECK: @ %bb.0: @ %entry 8; CHECK-NEXT: vmovnt.i32 q0, q1 9; CHECK-NEXT: vstrw.32 q0, [r0] 10; CHECK-NEXT: bx lr 11entry: 12 %strided.vec = shufflevector <4 x i32> %src1, <4 x i32> %src2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> 13 %out = trunc <8 x i32> %strided.vec to <8 x i16> 14 store <8 x i16> %out, ptr %dest, align 8 15 ret void 16} 17 18define arm_aapcs_vfpcc void @vmovn32_trunc2(<4 x i32> %src1, <4 x i32> %src2, ptr %dest) { 19; CHECK-LABEL: vmovn32_trunc2: 20; CHECK: @ %bb.0: @ %entry 21; CHECK-NEXT: vmovnt.i32 q1, q0 22; CHECK-NEXT: vstrw.32 q1, [r0] 23; CHECK-NEXT: bx lr 24entry: 25 %strided.vec = shufflevector <4 x i32> %src1, <4 x i32> %src2, <8 x i32> <i32 4, i32 0, i32 5, i32 1, i32 6, i32 2, i32 7, i32 3> 26 %out = trunc <8 x i32> %strided.vec to <8 x i16> 27 store <8 x i16> %out, ptr %dest, align 8 28 ret void 29} 30 31define arm_aapcs_vfpcc void @vmovn32_trunc1_onesrc(<8 x i32> %src1, ptr %dest) { 32; CHECK-LABEL: vmovn32_trunc1_onesrc: 33; CHECK: @ %bb.0: @ %entry 34; CHECK-NEXT: vmovnt.i32 q0, q1 35; CHECK-NEXT: vstrw.32 q0, [r0] 36; CHECK-NEXT: bx lr 37entry: 38 %strided.vec = shufflevector <8 x i32> %src1, <8 x i32> undef, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> 39 %out = trunc <8 x i32> %strided.vec to <8 x i16> 40 store <8 x i16> %out, ptr %dest, align 8 41 ret void 42} 43 44define arm_aapcs_vfpcc void @vmovn32_trunc2_onesrc(<8 x i32> %src1, ptr %dest) { 45; CHECK-LABEL: vmovn32_trunc2_onesrc: 46; CHECK: @ %bb.0: @ %entry 47; CHECK-NEXT: vmovnt.i32 q1, q0 48; CHECK-NEXT: vstrw.32 q1, [r0] 49; CHECK-NEXT: bx lr 50entry: 51 %strided.vec = shufflevector <8 x i32> %src1, <8 x i32> undef, <8 x i32> <i32 4, i32 0, i32 5, i32 1, i32 6, i32 2, i32 7, i32 3> 52 %out = trunc <8 x i32> %strided.vec to <8 x i16> 53 store <8 x i16> %out, ptr %dest, align 8 54 ret void 55} 56 57define arm_aapcs_vfpcc void @vmovn16_trunc1(<8 x i16> %src1, <8 x i16> %src2, ptr %dest) { 58; CHECK-LABEL: vmovn16_trunc1: 59; CHECK: @ %bb.0: @ %entry 60; CHECK-NEXT: vmovnt.i16 q0, q1 61; CHECK-NEXT: vstrw.32 q0, [r0] 62; CHECK-NEXT: bx lr 63entry: 64 %strided.vec = shufflevector <8 x i16> %src1, <8 x i16> %src2, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 65 %out = trunc <16 x i16> %strided.vec to <16 x i8> 66 store <16 x i8> %out, ptr %dest, align 8 67 ret void 68} 69 70define arm_aapcs_vfpcc void @vmovn16_trunc2(<8 x i16> %src1, <8 x i16> %src2, ptr %dest) { 71; CHECK-LABEL: vmovn16_trunc2: 72; CHECK: @ %bb.0: @ %entry 73; CHECK-NEXT: vmovnt.i16 q1, q0 74; CHECK-NEXT: vstrw.32 q1, [r0] 75; CHECK-NEXT: bx lr 76entry: 77 %strided.vec = shufflevector <8 x i16> %src1, <8 x i16> %src2, <16 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3, i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7> 78 %out = trunc <16 x i16> %strided.vec to <16 x i8> 79 store <16 x i8> %out, ptr %dest, align 8 80 ret void 81} 82 83define arm_aapcs_vfpcc void @vmovn16_trunc1_onesrc(<16 x i16> %src1, ptr %dest) { 84; CHECK-LABEL: vmovn16_trunc1_onesrc: 85; CHECK: @ %bb.0: @ %entry 86; CHECK-NEXT: vmovnt.i16 q0, q1 87; CHECK-NEXT: vstrw.32 q0, [r0] 88; CHECK-NEXT: bx lr 89entry: 90 %strided.vec = shufflevector <16 x i16> %src1, <16 x i16> undef, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 91 %out = trunc <16 x i16> %strided.vec to <16 x i8> 92 store <16 x i8> %out, ptr %dest, align 8 93 ret void 94} 95 96define arm_aapcs_vfpcc void @vmovn16_trunc2_onesrc(<16 x i16> %src1, ptr %dest) { 97; CHECK-LABEL: vmovn16_trunc2_onesrc: 98; CHECK: @ %bb.0: @ %entry 99; CHECK-NEXT: vmovnt.i16 q1, q0 100; CHECK-NEXT: vstrw.32 q1, [r0] 101; CHECK-NEXT: bx lr 102entry: 103 %strided.vec = shufflevector <16 x i16> %src1, <16 x i16> undef, <16 x i32> <i32 8, i32 0, i32 9, i32 1, i32 10, i32 2, i32 11, i32 3, i32 12, i32 4, i32 13, i32 5, i32 14, i32 6, i32 15, i32 7> 104 %out = trunc <16 x i16> %strided.vec to <16 x i8> 105 store <16 x i8> %out, ptr %dest, align 8 106 ret void 107} 108 109 110define arm_aapcs_vfpcc void @vmovn64_t1(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) { 111; CHECK-LABEL: vmovn64_t1: 112; CHECK: @ %bb.0: @ %entry 113; CHECK-NEXT: vmov.f32 s2, s4 114; CHECK-NEXT: vmov.f32 s3, s5 115; CHECK-NEXT: vstrw.32 q0, [r0] 116; CHECK-NEXT: bx lr 117entry: 118 %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 0, i32 2> 119 store <2 x i64> %out, ptr %dest, align 8 120 ret void 121} 122 123define arm_aapcs_vfpcc void @vmovn64_t2(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) { 124; CHECK-LABEL: vmovn64_t2: 125; CHECK: @ %bb.0: @ %entry 126; CHECK-NEXT: vmov.f32 s6, s0 127; CHECK-NEXT: vmov.f32 s7, s1 128; CHECK-NEXT: vstrw.32 q1, [r0] 129; CHECK-NEXT: bx lr 130entry: 131 %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 2, i32 0> 132 store <2 x i64> %out, ptr %dest, align 8 133 ret void 134} 135 136define arm_aapcs_vfpcc void @vmovn64_b1(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) { 137; CHECK-LABEL: vmovn64_b1: 138; CHECK: @ %bb.0: @ %entry 139; CHECK-NEXT: vmov.f32 s2, s6 140; CHECK-NEXT: vmov.f32 s3, s7 141; CHECK-NEXT: vstrw.32 q0, [r0] 142; CHECK-NEXT: bx lr 143entry: 144 %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 0, i32 3> 145 store <2 x i64> %out, ptr %dest, align 8 146 ret void 147} 148 149define arm_aapcs_vfpcc void @vmovn64_b2(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) { 150; CHECK-LABEL: vmovn64_b2: 151; CHECK: @ %bb.0: @ %entry 152; CHECK-NEXT: vmov.f32 s4, s6 153; CHECK-NEXT: vmov.f32 s6, s0 154; CHECK-NEXT: vmov.f32 s5, s7 155; CHECK-NEXT: vmov.f32 s7, s1 156; CHECK-NEXT: vstrw.32 q1, [r0] 157; CHECK-NEXT: bx lr 158entry: 159 %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 3, i32 0> 160 store <2 x i64> %out, ptr %dest, align 8 161 ret void 162} 163 164define arm_aapcs_vfpcc void @vmovn64_b3(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) { 165; CHECK-LABEL: vmovn64_b3: 166; CHECK: @ %bb.0: @ %entry 167; CHECK-NEXT: vmov.f32 s0, s2 168; CHECK-NEXT: vmov.f32 s2, s4 169; CHECK-NEXT: vmov.f32 s1, s3 170; CHECK-NEXT: vmov.f32 s3, s5 171; CHECK-NEXT: vstrw.32 q0, [r0] 172; CHECK-NEXT: bx lr 173entry: 174 %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 1, i32 2> 175 store <2 x i64> %out, ptr %dest, align 8 176 ret void 177} 178 179define arm_aapcs_vfpcc void @vmovn64_b4(<2 x i64> %src1, <2 x i64> %src2, ptr %dest) { 180; CHECK-LABEL: vmovn64_b4: 181; CHECK: @ %bb.0: @ %entry 182; CHECK-NEXT: vmov.f32 s6, s2 183; CHECK-NEXT: vmov.f32 s7, s3 184; CHECK-NEXT: vstrw.32 q1, [r0] 185; CHECK-NEXT: bx lr 186entry: 187 %out = shufflevector <2 x i64> %src1, <2 x i64> %src2, <2 x i32> <i32 2, i32 1> 188 store <2 x i64> %out, ptr %dest, align 8 189 ret void 190} 191 192 193 194define arm_aapcs_vfpcc void @vmovn32_t1(<4 x i32> %src1, <4 x i32> %src2, ptr %dest) { 195; CHECK-LABEL: vmovn32_t1: 196; CHECK: @ %bb.0: @ %entry 197; CHECK-NEXT: vmov.f32 s1, s4 198; CHECK-NEXT: vmov.f32 s3, s6 199; CHECK-NEXT: vstrw.32 q0, [r0] 200; CHECK-NEXT: bx lr 201entry: 202 %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 0, i32 4, i32 2, i32 6> 203 store <4 x i32> %out, ptr %dest, align 8 204 ret void 205} 206 207define arm_aapcs_vfpcc void @vmovn32_t2(<4 x i32> %src1, <4 x i32> %src2, ptr %dest) { 208; CHECK-LABEL: vmovn32_t2: 209; CHECK: @ %bb.0: @ %entry 210; CHECK-NEXT: vmov.f32 s5, s0 211; CHECK-NEXT: vmov.f32 s7, s2 212; CHECK-NEXT: vstrw.32 q1, [r0] 213; CHECK-NEXT: bx lr 214entry: 215 %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 4, i32 0, i32 6, i32 2> 216 store <4 x i32> %out, ptr %dest, align 8 217 ret void 218} 219 220define arm_aapcs_vfpcc void @vmovn32_b1(<4 x i32> %src1, <4 x i32> %src2, ptr %dest) { 221; CHECK-LABEL: vmovn32_b1: 222; CHECK: @ %bb.0: @ %entry 223; CHECK-NEXT: vmov.f32 s1, s5 224; CHECK-NEXT: vmov.f32 s3, s7 225; CHECK-NEXT: vstrw.32 q0, [r0] 226; CHECK-NEXT: bx lr 227entry: 228 %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 0, i32 5, i32 2, i32 7> 229 store <4 x i32> %out, ptr %dest, align 8 230 ret void 231} 232 233define arm_aapcs_vfpcc void @vmovn32_b2(<4 x i32> %src1, <4 x i32> %src2, ptr %dest) { 234; CHECK-LABEL: vmovn32_b2: 235; CHECK: @ %bb.0: @ %entry 236; CHECK-NEXT: vmov.f32 s4, s5 237; CHECK-NEXT: vmov.f32 s6, s7 238; CHECK-NEXT: vmov.f32 s5, s0 239; CHECK-NEXT: vmov.f32 s7, s2 240; CHECK-NEXT: vstrw.32 q1, [r0] 241; CHECK-NEXT: bx lr 242entry: 243 %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 5, i32 0, i32 7, i32 2> 244 store <4 x i32> %out, ptr %dest, align 8 245 ret void 246} 247 248define arm_aapcs_vfpcc void @vmovn32_b3(<4 x i32> %src1, <4 x i32> %src2, ptr %dest) { 249; CHECK-LABEL: vmovn32_b3: 250; CHECK: @ %bb.0: @ %entry 251; CHECK-NEXT: vmov.f32 s0, s1 252; CHECK-NEXT: vmov.f32 s2, s3 253; CHECK-NEXT: vmov.f32 s1, s4 254; CHECK-NEXT: vmov.f32 s3, s6 255; CHECK-NEXT: vstrw.32 q0, [r0] 256; CHECK-NEXT: bx lr 257entry: 258 %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 1, i32 4, i32 3, i32 6> 259 store <4 x i32> %out, ptr %dest, align 8 260 ret void 261} 262 263define arm_aapcs_vfpcc void @vmovn32_b4(<4 x i32> %src1, <4 x i32> %src2, ptr %dest) { 264; CHECK-LABEL: vmovn32_b4: 265; CHECK: @ %bb.0: @ %entry 266; CHECK-NEXT: vmov.f32 s5, s1 267; CHECK-NEXT: vmov.f32 s7, s3 268; CHECK-NEXT: vstrw.32 q1, [r0] 269; CHECK-NEXT: bx lr 270entry: 271 %out = shufflevector <4 x i32> %src1, <4 x i32> %src2, <4 x i32> <i32 4, i32 1, i32 6, i32 3> 272 store <4 x i32> %out, ptr %dest, align 8 273 ret void 274} 275 276 277 278 279define arm_aapcs_vfpcc void @vmovn16_t1(<8 x i16> %src1, <8 x i16> %src2, ptr %dest) { 280; CHECK-LABEL: vmovn16_t1: 281; CHECK: @ %bb.0: @ %entry 282; CHECK-NEXT: vmovnt.i32 q0, q1 283; CHECK-NEXT: vstrw.32 q0, [r0] 284; CHECK-NEXT: bx lr 285entry: 286 %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> 287 store <8 x i16> %out, ptr %dest, align 8 288 ret void 289} 290 291define arm_aapcs_vfpcc void @vmovn16_t2(<8 x i16> %src1, <8 x i16> %src2, ptr %dest) { 292; CHECK-LABEL: vmovn16_t2: 293; CHECK: @ %bb.0: @ %entry 294; CHECK-NEXT: vmovnt.i32 q1, q0 295; CHECK-NEXT: vstrw.32 q1, [r0] 296; CHECK-NEXT: bx lr 297entry: 298 %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 8, i32 0, i32 10, i32 2, i32 12, i32 4, i32 14, i32 6> 299 store <8 x i16> %out, ptr %dest, align 8 300 ret void 301} 302 303define arm_aapcs_vfpcc void @vmovn16_b1(<8 x i16> %src1, <8 x i16> %src2, ptr %dest) { 304; CHECK-LABEL: vmovn16_b1: 305; CHECK: @ %bb.0: @ %entry 306; CHECK-NEXT: vmovnb.i32 q1, q0 307; CHECK-NEXT: vstrw.32 q1, [r0] 308; CHECK-NEXT: bx lr 309entry: 310 %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 0, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 15> 311 store <8 x i16> %out, ptr %dest, align 8 312 ret void 313} 314 315define arm_aapcs_vfpcc void @vmovn16_b2(<8 x i16> %src1, <8 x i16> %src2, ptr %dest) { 316; CHECK-MVE-LABEL: vmovn16_b2: 317; CHECK-MVE: @ %bb.0: @ %entry 318; CHECK-MVE-NEXT: vmov.u16 r1, q1[1] 319; CHECK-MVE-NEXT: vmov.16 q2[0], r1 320; CHECK-MVE-NEXT: vmov.u16 r1, q0[0] 321; CHECK-MVE-NEXT: vmov.16 q2[1], r1 322; CHECK-MVE-NEXT: vmov.u16 r1, q1[3] 323; CHECK-MVE-NEXT: vmov.16 q2[2], r1 324; CHECK-MVE-NEXT: vmov.u16 r1, q0[2] 325; CHECK-MVE-NEXT: vmov.16 q2[3], r1 326; CHECK-MVE-NEXT: vmov.u16 r1, q1[5] 327; CHECK-MVE-NEXT: vmov.16 q2[4], r1 328; CHECK-MVE-NEXT: vmov.u16 r1, q0[4] 329; CHECK-MVE-NEXT: vmov.16 q2[5], r1 330; CHECK-MVE-NEXT: vmov.u16 r1, q1[7] 331; CHECK-MVE-NEXT: vmov.16 q2[6], r1 332; CHECK-MVE-NEXT: vmov.u16 r1, q0[6] 333; CHECK-MVE-NEXT: vmov.16 q2[7], r1 334; CHECK-MVE-NEXT: vstrw.32 q2, [r0] 335; CHECK-MVE-NEXT: bx lr 336; 337; CHECK-MVEFP-LABEL: vmovn16_b2: 338; CHECK-MVEFP: @ %bb.0: @ %entry 339; CHECK-MVEFP-NEXT: vmovx.f16 s4, s4 340; CHECK-MVEFP-NEXT: vmovx.f16 s5, s5 341; CHECK-MVEFP-NEXT: vmovx.f16 s6, s6 342; CHECK-MVEFP-NEXT: vmovx.f16 s7, s7 343; CHECK-MVEFP-NEXT: vins.f16 s4, s0 344; CHECK-MVEFP-NEXT: vins.f16 s5, s1 345; CHECK-MVEFP-NEXT: vins.f16 s6, s2 346; CHECK-MVEFP-NEXT: vins.f16 s7, s3 347; CHECK-MVEFP-NEXT: vstrw.32 q1, [r0] 348; CHECK-MVEFP-NEXT: bx lr 349entry: 350 %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 9, i32 0, i32 11, i32 2, i32 13, i32 4, i32 15, i32 6> 351 store <8 x i16> %out, ptr %dest, align 8 352 ret void 353} 354 355define arm_aapcs_vfpcc void @vmovn16_b3(<8 x i16> %src1, <8 x i16> %src2, ptr %dest) { 356; CHECK-MVE-LABEL: vmovn16_b3: 357; CHECK-MVE: @ %bb.0: @ %entry 358; CHECK-MVE-NEXT: vmov.u16 r1, q0[1] 359; CHECK-MVE-NEXT: vmov.16 q2[0], r1 360; CHECK-MVE-NEXT: vmov.u16 r1, q1[0] 361; CHECK-MVE-NEXT: vmov.16 q2[1], r1 362; CHECK-MVE-NEXT: vmov.u16 r1, q0[3] 363; CHECK-MVE-NEXT: vmov.16 q2[2], r1 364; CHECK-MVE-NEXT: vmov.u16 r1, q1[2] 365; CHECK-MVE-NEXT: vmov.16 q2[3], r1 366; CHECK-MVE-NEXT: vmov.u16 r1, q0[5] 367; CHECK-MVE-NEXT: vmov.16 q2[4], r1 368; CHECK-MVE-NEXT: vmov.u16 r1, q1[4] 369; CHECK-MVE-NEXT: vmov.16 q2[5], r1 370; CHECK-MVE-NEXT: vmov.u16 r1, q0[7] 371; CHECK-MVE-NEXT: vmov.16 q2[6], r1 372; CHECK-MVE-NEXT: vmov.u16 r1, q1[6] 373; CHECK-MVE-NEXT: vmov.16 q2[7], r1 374; CHECK-MVE-NEXT: vstrw.32 q2, [r0] 375; CHECK-MVE-NEXT: bx lr 376; 377; CHECK-MVEFP-LABEL: vmovn16_b3: 378; CHECK-MVEFP: @ %bb.0: @ %entry 379; CHECK-MVEFP-NEXT: vmovx.f16 s0, s0 380; CHECK-MVEFP-NEXT: vmovx.f16 s1, s1 381; CHECK-MVEFP-NEXT: vmovx.f16 s2, s2 382; CHECK-MVEFP-NEXT: vmovx.f16 s3, s3 383; CHECK-MVEFP-NEXT: vins.f16 s0, s4 384; CHECK-MVEFP-NEXT: vins.f16 s1, s5 385; CHECK-MVEFP-NEXT: vins.f16 s2, s6 386; CHECK-MVEFP-NEXT: vins.f16 s3, s7 387; CHECK-MVEFP-NEXT: vstrw.32 q0, [r0] 388; CHECK-MVEFP-NEXT: bx lr 389entry: 390 %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 1, i32 8, i32 3, i32 10, i32 5, i32 12, i32 7, i32 14> 391 store <8 x i16> %out, ptr %dest, align 8 392 ret void 393} 394 395define arm_aapcs_vfpcc void @vmovn16_b4(<8 x i16> %src1, <8 x i16> %src2, ptr %dest) { 396; CHECK-LABEL: vmovn16_b4: 397; CHECK: @ %bb.0: @ %entry 398; CHECK-NEXT: vmovnb.i32 q0, q1 399; CHECK-NEXT: vstrw.32 q0, [r0] 400; CHECK-NEXT: bx lr 401entry: 402 %out = shufflevector <8 x i16> %src1, <8 x i16> %src2, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7> 403 store <8 x i16> %out, ptr %dest, align 8 404 ret void 405} 406 407 408define arm_aapcs_vfpcc void @vmovn8_b1(<16 x i8> %src1, <16 x i8> %src2, ptr %dest) { 409; CHECK-LABEL: vmovn8_b1: 410; CHECK: @ %bb.0: @ %entry 411; CHECK-NEXT: vmovnt.i16 q0, q1 412; CHECK-NEXT: vstrw.32 q0, [r0] 413; CHECK-NEXT: bx lr 414entry: 415 %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30> 416 store <16 x i8> %out, ptr %dest, align 8 417 ret void 418} 419 420define arm_aapcs_vfpcc void @vmovn8_b2(<16 x i8> %src1, <16 x i8> %src2, ptr %dest) { 421; CHECK-LABEL: vmovn8_b2: 422; CHECK: @ %bb.0: @ %entry 423; CHECK-NEXT: vmovnt.i16 q1, q0 424; CHECK-NEXT: vstrw.32 q1, [r0] 425; CHECK-NEXT: bx lr 426entry: 427 %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 16, i32 0, i32 18, i32 2, i32 20, i32 4, i32 22, i32 6, i32 24, i32 8, i32 26, i32 10, i32 28, i32 12, i32 30, i32 14> 428 store <16 x i8> %out, ptr %dest, align 8 429 ret void 430} 431 432define arm_aapcs_vfpcc void @vmovn8_t1(<16 x i8> %src1, <16 x i8> %src2, ptr %dest) { 433; CHECK-LABEL: vmovn8_t1: 434; CHECK: @ %bb.0: @ %entry 435; CHECK-NEXT: vmovnb.i16 q1, q0 436; CHECK-NEXT: vstrw.32 q1, [r0] 437; CHECK-NEXT: bx lr 438entry: 439 %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 0, i32 17, i32 2, i32 19, i32 4, i32 21, i32 6, i32 23, i32 8, i32 25, i32 10, i32 27, i32 12, i32 29, i32 14, i32 31> 440 store <16 x i8> %out, ptr %dest, align 8 441 ret void 442} 443 444define arm_aapcs_vfpcc void @vmovn8_t2(<16 x i8> %src1, <16 x i8> %src2, ptr %dest) { 445; CHECK-LABEL: vmovn8_t2: 446; CHECK: @ %bb.0: @ %entry 447; CHECK-NEXT: vmov.u8 r1, q1[1] 448; CHECK-NEXT: vmov.8 q2[0], r1 449; CHECK-NEXT: vmov.u8 r1, q0[0] 450; CHECK-NEXT: vmov.8 q2[1], r1 451; CHECK-NEXT: vmov.u8 r1, q1[3] 452; CHECK-NEXT: vmov.8 q2[2], r1 453; CHECK-NEXT: vmov.u8 r1, q0[2] 454; CHECK-NEXT: vmov.8 q2[3], r1 455; CHECK-NEXT: vmov.u8 r1, q1[5] 456; CHECK-NEXT: vmov.8 q2[4], r1 457; CHECK-NEXT: vmov.u8 r1, q0[4] 458; CHECK-NEXT: vmov.8 q2[5], r1 459; CHECK-NEXT: vmov.u8 r1, q1[7] 460; CHECK-NEXT: vmov.8 q2[6], r1 461; CHECK-NEXT: vmov.u8 r1, q0[6] 462; CHECK-NEXT: vmov.8 q2[7], r1 463; CHECK-NEXT: vmov.u8 r1, q1[9] 464; CHECK-NEXT: vmov.8 q2[8], r1 465; CHECK-NEXT: vmov.u8 r1, q0[8] 466; CHECK-NEXT: vmov.8 q2[9], r1 467; CHECK-NEXT: vmov.u8 r1, q1[11] 468; CHECK-NEXT: vmov.8 q2[10], r1 469; CHECK-NEXT: vmov.u8 r1, q0[10] 470; CHECK-NEXT: vmov.8 q2[11], r1 471; CHECK-NEXT: vmov.u8 r1, q1[13] 472; CHECK-NEXT: vmov.8 q2[12], r1 473; CHECK-NEXT: vmov.u8 r1, q0[12] 474; CHECK-NEXT: vmov.8 q2[13], r1 475; CHECK-NEXT: vmov.u8 r1, q1[15] 476; CHECK-NEXT: vmov.8 q2[14], r1 477; CHECK-NEXT: vmov.u8 r1, q0[14] 478; CHECK-NEXT: vmov.8 q2[15], r1 479; CHECK-NEXT: vstrw.32 q2, [r0] 480; CHECK-NEXT: bx lr 481entry: 482 %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 17, i32 0, i32 19, i32 2, i32 21, i32 4, i32 23, i32 6, i32 25, i32 8, i32 27, i32 10, i32 29, i32 12, i32 31, i32 14> 483 store <16 x i8> %out, ptr %dest, align 8 484 ret void 485} 486 487define arm_aapcs_vfpcc void @vmovn8_t3(<16 x i8> %src1, <16 x i8> %src2, ptr %dest) { 488; CHECK-LABEL: vmovn8_t3: 489; CHECK: @ %bb.0: @ %entry 490; CHECK-NEXT: vmov.u8 r1, q0[1] 491; CHECK-NEXT: vmov.8 q2[0], r1 492; CHECK-NEXT: vmov.u8 r1, q1[0] 493; CHECK-NEXT: vmov.8 q2[1], r1 494; CHECK-NEXT: vmov.u8 r1, q0[3] 495; CHECK-NEXT: vmov.8 q2[2], r1 496; CHECK-NEXT: vmov.u8 r1, q1[2] 497; CHECK-NEXT: vmov.8 q2[3], r1 498; CHECK-NEXT: vmov.u8 r1, q0[5] 499; CHECK-NEXT: vmov.8 q2[4], r1 500; CHECK-NEXT: vmov.u8 r1, q1[4] 501; CHECK-NEXT: vmov.8 q2[5], r1 502; CHECK-NEXT: vmov.u8 r1, q0[7] 503; CHECK-NEXT: vmov.8 q2[6], r1 504; CHECK-NEXT: vmov.u8 r1, q1[6] 505; CHECK-NEXT: vmov.8 q2[7], r1 506; CHECK-NEXT: vmov.u8 r1, q0[9] 507; CHECK-NEXT: vmov.8 q2[8], r1 508; CHECK-NEXT: vmov.u8 r1, q1[8] 509; CHECK-NEXT: vmov.8 q2[9], r1 510; CHECK-NEXT: vmov.u8 r1, q0[11] 511; CHECK-NEXT: vmov.8 q2[10], r1 512; CHECK-NEXT: vmov.u8 r1, q1[10] 513; CHECK-NEXT: vmov.8 q2[11], r1 514; CHECK-NEXT: vmov.u8 r1, q0[13] 515; CHECK-NEXT: vmov.8 q2[12], r1 516; CHECK-NEXT: vmov.u8 r1, q1[12] 517; CHECK-NEXT: vmov.8 q2[13], r1 518; CHECK-NEXT: vmov.u8 r1, q0[15] 519; CHECK-NEXT: vmov.8 q2[14], r1 520; CHECK-NEXT: vmov.u8 r1, q1[14] 521; CHECK-NEXT: vmov.8 q2[15], r1 522; CHECK-NEXT: vstrw.32 q2, [r0] 523; CHECK-NEXT: bx lr 524entry: 525 %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 1, i32 16, i32 3, i32 18, i32 5, i32 20, i32 7, i32 22, i32 9, i32 24, i32 11, i32 26, i32 13, i32 28, i32 15, i32 30> 526 store <16 x i8> %out, ptr %dest, align 8 527 ret void 528} 529 530define arm_aapcs_vfpcc void @vmovn8_t4(<16 x i8> %src1, <16 x i8> %src2, ptr %dest) { 531; CHECK-LABEL: vmovn8_t4: 532; CHECK: @ %bb.0: @ %entry 533; CHECK-NEXT: vmovnb.i16 q0, q1 534; CHECK-NEXT: vstrw.32 q0, [r0] 535; CHECK-NEXT: bx lr 536entry: 537 %out = shufflevector <16 x i8> %src1, <16 x i8> %src2, <16 x i32> <i32 16, i32 1, i32 18, i32 3, i32 20, i32 5, i32 22, i32 7, i32 24, i32 9, i32 26, i32 11, i32 28, i32 13, i32 30, i32 15> 538 store <16 x i8> %out, ptr %dest, align 8 539 ret void 540} 541