1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s 3 4define arm_aapcs_vfpcc <4 x i32> @sext_v4i32_v4i32_v4i1(<4 x i32> %m) { 5; CHECK-LABEL: sext_v4i32_v4i32_v4i1: 6; CHECK: @ %bb.0: @ %entry 7; CHECK-NEXT: vshl.i32 q0, q0, #31 8; CHECK-NEXT: vshr.s32 q0, q0, #31 9; CHECK-NEXT: bx lr 10entry: 11 %shl = shl <4 x i32> %m, <i32 31, i32 31, i32 31, i32 31> 12 %shr = ashr exact <4 x i32> %shl, <i32 31, i32 31, i32 31, i32 31> 13 ret <4 x i32> %shr 14} 15 16define arm_aapcs_vfpcc <4 x i32> @sext_v4i32_v4i32_v4i8(<4 x i32> %m) { 17; CHECK-LABEL: sext_v4i32_v4i32_v4i8: 18; CHECK: @ %bb.0: @ %entry 19; CHECK-NEXT: vmovlb.s8 q0, q0 20; CHECK-NEXT: vmovlb.s16 q0, q0 21; CHECK-NEXT: bx lr 22entry: 23 %shl = shl <4 x i32> %m, <i32 24, i32 24, i32 24, i32 24> 24 %shr = ashr exact <4 x i32> %shl, <i32 24, i32 24, i32 24, i32 24> 25 ret <4 x i32> %shr 26} 27 28define arm_aapcs_vfpcc <4 x i32> @sext_v4i32_v4i32_v4i16(<4 x i32> %m) { 29; CHECK-LABEL: sext_v4i32_v4i32_v4i16: 30; CHECK: @ %bb.0: @ %entry 31; CHECK-NEXT: vmovlb.s16 q0, q0 32; CHECK-NEXT: bx lr 33entry: 34 %shl = shl <4 x i32> %m, <i32 16, i32 16, i32 16, i32 16> 35 %shr = ashr exact <4 x i32> %shl, <i32 16, i32 16, i32 16, i32 16> 36 ret <4 x i32> %shr 37} 38 39define arm_aapcs_vfpcc <8 x i16> @sext_v8i16_v8i16_v8i8(<8 x i16> %m) { 40; CHECK-LABEL: sext_v8i16_v8i16_v8i8: 41; CHECK: @ %bb.0: @ %entry 42; CHECK-NEXT: vmovlb.s8 q0, q0 43; CHECK-NEXT: bx lr 44entry: 45 %shl = shl <8 x i16> %m, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 46 %shr = ashr exact <8 x i16> %shl, <i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8, i16 8> 47 ret <8 x i16> %shr 48} 49 50define arm_aapcs_vfpcc <8 x i16> @sext_v8i16_v8i16_v8i1(<8 x i16> %m) { 51; CHECK-LABEL: sext_v8i16_v8i16_v8i1: 52; CHECK: @ %bb.0: @ %entry 53; CHECK-NEXT: vshl.i16 q0, q0, #15 54; CHECK-NEXT: vshr.s16 q0, q0, #15 55; CHECK-NEXT: bx lr 56entry: 57 %shl = shl <8 x i16> %m, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 58 %shr = ashr exact <8 x i16> %shl, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 59 ret <8 x i16> %shr 60} 61 62define arm_aapcs_vfpcc <2 x i64> @sext_v2i64_v2i64_v2i32(<2 x i64> %m) { 63; CHECK-LABEL: sext_v2i64_v2i64_v2i32: 64; CHECK: @ %bb.0: @ %entry 65; CHECK-NEXT: vmov r0, s2 66; CHECK-NEXT: vmov r1, s0 67; CHECK-NEXT: vmov q0[2], q0[0], r1, r0 68; CHECK-NEXT: asrs r0, r0, #31 69; CHECK-NEXT: asrs r1, r1, #31 70; CHECK-NEXT: vmov q0[3], q0[1], r1, r0 71; CHECK-NEXT: bx lr 72entry: 73 %shl = shl <2 x i64> %m, <i64 32, i64 32> 74 %shr = ashr exact <2 x i64> %shl, <i64 32, i64 32> 75 ret <2 x i64> %shr 76} 77 78define arm_aapcs_vfpcc <2 x i64> @sext_v2i64_v2i64_v2i35(<2 x i64> %m) { 79; CHECK-LABEL: sext_v2i64_v2i64_v2i35: 80; CHECK: @ %bb.0: @ %entry 81; CHECK-NEXT: vmov r0, r1, d1 82; CHECK-NEXT: vmov r2, r3, d0 83; CHECK-NEXT: vmov q0[2], q0[0], r2, r0 84; CHECK-NEXT: sbfx r0, r1, #0, #3 85; CHECK-NEXT: sbfx r1, r3, #0, #3 86; CHECK-NEXT: vmov q0[3], q0[1], r1, r0 87; CHECK-NEXT: bx lr 88entry: 89 %shl = shl <2 x i64> %m, <i64 29, i64 29> 90 %shr = ashr exact <2 x i64> %shl, <i64 29, i64 29> 91 ret <2 x i64> %shr 92} 93 94define arm_aapcs_vfpcc <8 x i16> @sext_v8i8_v8i16(<8 x i8> %src) { 95; CHECK-LABEL: sext_v8i8_v8i16: 96; CHECK: @ %bb.0: @ %entry 97; CHECK-NEXT: vmovlb.s8 q0, q0 98; CHECK-NEXT: bx lr 99entry: 100 %0 = sext <8 x i8> %src to <8 x i16> 101 ret <8 x i16> %0 102} 103 104define arm_aapcs_vfpcc <4 x i32> @sext_v4i16_v4i32(<4 x i16> %src) { 105; CHECK-LABEL: sext_v4i16_v4i32: 106; CHECK: @ %bb.0: @ %entry 107; CHECK-NEXT: vmovlb.s16 q0, q0 108; CHECK-NEXT: bx lr 109entry: 110 %0 = sext <4 x i16> %src to <4 x i32> 111 ret <4 x i32> %0 112} 113 114define arm_aapcs_vfpcc <4 x i32> @sext_v4i8_v4i32(<4 x i8> %src) { 115; CHECK-LABEL: sext_v4i8_v4i32: 116; CHECK: @ %bb.0: @ %entry 117; CHECK-NEXT: vmovlb.s8 q0, q0 118; CHECK-NEXT: vmovlb.s16 q0, q0 119; CHECK-NEXT: bx lr 120entry: 121 %0 = sext <4 x i8> %src to <4 x i32> 122 ret <4 x i32> %0 123} 124 125define arm_aapcs_vfpcc <16 x i16> @sext_v16i8_v16i16(<16 x i8> %src) { 126; CHECK-LABEL: sext_v16i8_v16i16: 127; CHECK: @ %bb.0: @ %entry 128; CHECK-NEXT: .pad #16 129; CHECK-NEXT: sub sp, #16 130; CHECK-NEXT: mov r0, sp 131; CHECK-NEXT: vstrw.32 q0, [r0] 132; CHECK-NEXT: vldrb.s16 q0, [r0] 133; CHECK-NEXT: vldrb.s16 q1, [r0, #8] 134; CHECK-NEXT: add sp, #16 135; CHECK-NEXT: bx lr 136entry: 137 %0 = sext <16 x i8> %src to <16 x i16> 138 ret <16 x i16> %0 139} 140 141define arm_aapcs_vfpcc <8 x i32> @sext_v8i16_v8i32(<8 x i16> %src) { 142; CHECK-LABEL: sext_v8i16_v8i32: 143; CHECK: @ %bb.0: @ %entry 144; CHECK-NEXT: .pad #16 145; CHECK-NEXT: sub sp, #16 146; CHECK-NEXT: mov r0, sp 147; CHECK-NEXT: vstrw.32 q0, [r0] 148; CHECK-NEXT: vldrh.s32 q0, [r0] 149; CHECK-NEXT: vldrh.s32 q1, [r0, #8] 150; CHECK-NEXT: add sp, #16 151; CHECK-NEXT: bx lr 152entry: 153 %0 = sext <8 x i16> %src to <8 x i32> 154 ret <8 x i32> %0 155} 156 157define arm_aapcs_vfpcc <16 x i32> @sext_v16i8_v16i32(<16 x i8> %src) { 158; CHECK-LABEL: sext_v16i8_v16i32: 159; CHECK: @ %bb.0: @ %entry 160; CHECK-NEXT: .pad #48 161; CHECK-NEXT: sub sp, #48 162; CHECK-NEXT: mov r0, sp 163; CHECK-NEXT: add r1, sp, #32 164; CHECK-NEXT: vstrw.32 q0, [r0] 165; CHECK-NEXT: vldrb.s16 q0, [r0] 166; CHECK-NEXT: vstrw.32 q0, [r1] 167; CHECK-NEXT: vldrb.s16 q0, [r0, #8] 168; CHECK-NEXT: add r0, sp, #16 169; CHECK-NEXT: vstrw.32 q0, [r0] 170; CHECK-NEXT: vldrh.s32 q0, [r1] 171; CHECK-NEXT: vldrh.s32 q1, [r1, #8] 172; CHECK-NEXT: vldrh.s32 q2, [r0] 173; CHECK-NEXT: vldrh.s32 q3, [r0, #8] 174; CHECK-NEXT: add sp, #48 175; CHECK-NEXT: bx lr 176entry: 177 %0 = sext <16 x i8> %src to <16 x i32> 178 ret <16 x i32> %0 179} 180 181define arm_aapcs_vfpcc <2 x i64> @sext_v2i32_v2i64(<2 x i32> %src) { 182; CHECK-LABEL: sext_v2i32_v2i64: 183; CHECK: @ %bb.0: @ %entry 184; CHECK-NEXT: vmov r0, s2 185; CHECK-NEXT: vmov r1, s0 186; CHECK-NEXT: vmov q0[2], q0[0], r1, r0 187; CHECK-NEXT: asrs r0, r0, #31 188; CHECK-NEXT: asrs r1, r1, #31 189; CHECK-NEXT: vmov q0[3], q0[1], r1, r0 190; CHECK-NEXT: bx lr 191entry: 192 %0 = sext <2 x i32> %src to <2 x i64> 193 ret <2 x i64> %0 194} 195 196 197define arm_aapcs_vfpcc <8 x i16> @zext_v8i8_v8i16(<8 x i8> %src) { 198; CHECK-LABEL: zext_v8i8_v8i16: 199; CHECK: @ %bb.0: @ %entry 200; CHECK-NEXT: vmovlb.u8 q0, q0 201; CHECK-NEXT: bx lr 202entry: 203 %0 = zext <8 x i8> %src to <8 x i16> 204 ret <8 x i16> %0 205} 206 207define arm_aapcs_vfpcc <4 x i32> @zext_v4i16_v4i32(<4 x i16> %src) { 208; CHECK-LABEL: zext_v4i16_v4i32: 209; CHECK: @ %bb.0: @ %entry 210; CHECK-NEXT: vmovlb.u16 q0, q0 211; CHECK-NEXT: bx lr 212entry: 213 %0 = zext <4 x i16> %src to <4 x i32> 214 ret <4 x i32> %0 215} 216 217define arm_aapcs_vfpcc <4 x i32> @zext_v4i8_v4i32(<4 x i8> %src) { 218; CHECK-LABEL: zext_v4i8_v4i32: 219; CHECK: @ %bb.0: @ %entry 220; CHECK-NEXT: vmov.i32 q1, #0xff 221; CHECK-NEXT: vand q0, q0, q1 222; CHECK-NEXT: bx lr 223entry: 224 %0 = zext <4 x i8> %src to <4 x i32> 225 ret <4 x i32> %0 226} 227 228define arm_aapcs_vfpcc <16 x i16> @zext_v16i8_v16i16(<16 x i8> %src) { 229; CHECK-LABEL: zext_v16i8_v16i16: 230; CHECK: @ %bb.0: @ %entry 231; CHECK-NEXT: .pad #16 232; CHECK-NEXT: sub sp, #16 233; CHECK-NEXT: mov r0, sp 234; CHECK-NEXT: vstrw.32 q0, [r0] 235; CHECK-NEXT: vldrb.u16 q0, [r0] 236; CHECK-NEXT: vldrb.u16 q1, [r0, #8] 237; CHECK-NEXT: add sp, #16 238; CHECK-NEXT: bx lr 239entry: 240 %0 = zext <16 x i8> %src to <16 x i16> 241 ret <16 x i16> %0 242} 243 244define arm_aapcs_vfpcc <8 x i32> @zext_v8i16_v8i32(<8 x i16> %src) { 245; CHECK-LABEL: zext_v8i16_v8i32: 246; CHECK: @ %bb.0: @ %entry 247; CHECK-NEXT: .pad #16 248; CHECK-NEXT: sub sp, #16 249; CHECK-NEXT: mov r0, sp 250; CHECK-NEXT: vstrw.32 q0, [r0] 251; CHECK-NEXT: vldrh.u32 q0, [r0] 252; CHECK-NEXT: vldrh.u32 q1, [r0, #8] 253; CHECK-NEXT: add sp, #16 254; CHECK-NEXT: bx lr 255entry: 256 %0 = zext <8 x i16> %src to <8 x i32> 257 ret <8 x i32> %0 258} 259 260define arm_aapcs_vfpcc <16 x i32> @zext_v16i8_v16i32(<16 x i8> %src) { 261; CHECK-LABEL: zext_v16i8_v16i32: 262; CHECK: @ %bb.0: @ %entry 263; CHECK-NEXT: .pad #48 264; CHECK-NEXT: sub sp, #48 265; CHECK-NEXT: mov r0, sp 266; CHECK-NEXT: add r1, sp, #32 267; CHECK-NEXT: vstrw.32 q0, [r0] 268; CHECK-NEXT: vldrb.u16 q0, [r0] 269; CHECK-NEXT: vstrw.32 q0, [r1] 270; CHECK-NEXT: vldrb.u16 q0, [r0, #8] 271; CHECK-NEXT: add r0, sp, #16 272; CHECK-NEXT: vstrw.32 q0, [r0] 273; CHECK-NEXT: vldrh.u32 q0, [r1] 274; CHECK-NEXT: vldrh.u32 q1, [r1, #8] 275; CHECK-NEXT: vldrh.u32 q2, [r0] 276; CHECK-NEXT: vldrh.u32 q3, [r0, #8] 277; CHECK-NEXT: add sp, #48 278; CHECK-NEXT: bx lr 279entry: 280 %0 = zext <16 x i8> %src to <16 x i32> 281 ret <16 x i32> %0 282} 283 284define arm_aapcs_vfpcc <2 x i64> @zext_v2i32_v2i64(<2 x i32> %src) { 285; CHECK-LABEL: zext_v2i32_v2i64: 286; CHECK: @ %bb.0: @ %entry 287; CHECK-NEXT: vmov.i64 q1, #0xffffffff 288; CHECK-NEXT: vand q0, q0, q1 289; CHECK-NEXT: bx lr 290entry: 291 %0 = zext <2 x i32> %src to <2 x i64> 292 ret <2 x i64> %0 293} 294 295 296define arm_aapcs_vfpcc <8 x i8> @trunc_v8i16_v8i8(<8 x i16> %src) { 297; CHECK-LABEL: trunc_v8i16_v8i8: 298; CHECK: @ %bb.0: @ %entry 299; CHECK-NEXT: bx lr 300entry: 301 %0 = trunc <8 x i16> %src to <8 x i8> 302 ret <8 x i8> %0 303} 304 305define arm_aapcs_vfpcc <4 x i16> @trunc_v4i32_v4i16(<4 x i32> %src) { 306; CHECK-LABEL: trunc_v4i32_v4i16: 307; CHECK: @ %bb.0: @ %entry 308; CHECK-NEXT: bx lr 309entry: 310 %0 = trunc <4 x i32> %src to <4 x i16> 311 ret <4 x i16> %0 312} 313 314define arm_aapcs_vfpcc <4 x i8> @trunc_v4i32_v4i8(<4 x i32> %src) { 315; CHECK-LABEL: trunc_v4i32_v4i8: 316; CHECK: @ %bb.0: @ %entry 317; CHECK-NEXT: bx lr 318entry: 319 %0 = trunc <4 x i32> %src to <4 x i8> 320 ret <4 x i8> %0 321} 322 323define arm_aapcs_vfpcc <16 x i8> @trunc_v16i16_v16i8(<16 x i16> %src) { 324; CHECK-LABEL: trunc_v16i16_v16i8: 325; CHECK: @ %bb.0: @ %entry 326; CHECK-NEXT: .pad #16 327; CHECK-NEXT: sub sp, #16 328; CHECK-NEXT: mov r0, sp 329; CHECK-NEXT: vstrb.16 q1, [r0, #8] 330; CHECK-NEXT: vstrb.16 q0, [r0] 331; CHECK-NEXT: vldrw.u32 q0, [r0] 332; CHECK-NEXT: add sp, #16 333; CHECK-NEXT: bx lr 334entry: 335 %0 = trunc <16 x i16> %src to <16 x i8> 336 ret <16 x i8> %0 337} 338 339define arm_aapcs_vfpcc <8 x i16> @trunc_v8i32_v8i16(<8 x i32> %src) { 340; CHECK-LABEL: trunc_v8i32_v8i16: 341; CHECK: @ %bb.0: @ %entry 342; CHECK-NEXT: .pad #16 343; CHECK-NEXT: sub sp, #16 344; CHECK-NEXT: mov r0, sp 345; CHECK-NEXT: vstrh.32 q1, [r0, #8] 346; CHECK-NEXT: vstrh.32 q0, [r0] 347; CHECK-NEXT: vldrw.u32 q0, [r0] 348; CHECK-NEXT: add sp, #16 349; CHECK-NEXT: bx lr 350entry: 351 %0 = trunc <8 x i32> %src to <8 x i16> 352 ret <8 x i16> %0 353} 354 355define arm_aapcs_vfpcc <16 x i8> @trunc_v16i32_v16i8(<16 x i32> %src) { 356; CHECK-LABEL: trunc_v16i32_v16i8: 357; CHECK: @ %bb.0: @ %entry 358; CHECK-NEXT: .pad #16 359; CHECK-NEXT: sub sp, #16 360; CHECK-NEXT: mov r0, sp 361; CHECK-NEXT: vstrb.32 q3, [r0, #12] 362; CHECK-NEXT: vstrb.32 q2, [r0, #8] 363; CHECK-NEXT: vstrb.32 q1, [r0, #4] 364; CHECK-NEXT: vstrb.32 q0, [r0] 365; CHECK-NEXT: vldrw.u32 q0, [r0] 366; CHECK-NEXT: add sp, #16 367; CHECK-NEXT: bx lr 368entry: 369 %0 = trunc <16 x i32> %src to <16 x i8> 370 ret <16 x i8> %0 371} 372 373define arm_aapcs_vfpcc <2 x i32> @trunc_v2i64_v2i32(<2 x i64> %src) { 374; CHECK-LABEL: trunc_v2i64_v2i32: 375; CHECK: @ %bb.0: @ %entry 376; CHECK-NEXT: bx lr 377entry: 378 %0 = trunc <2 x i64> %src to <2 x i32> 379 ret <2 x i32> %0 380} 381 382