1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 2; RUN: llc -mtriple=aarch64-apple-darwin -verify-machineinstrs < %s | FileCheck %s 3 4define <4 x i32> @test_compress_v4i32(<4 x i32> %vec, <4 x i1> %mask) { 5; CHECK-LABEL: test_compress_v4i32: 6; CHECK: ; %bb.0: 7; CHECK-NEXT: sub sp, sp, #16 8; CHECK-NEXT: .cfi_def_cfa_offset 16 9; CHECK-NEXT: ushll.4s v1, v1, #0 10; CHECK-NEXT: mov x8, sp 11; CHECK-NEXT: str s0, [sp] 12; CHECK-NEXT: shl.4s v1, v1, #31 13; CHECK-NEXT: cmlt.4s v1, v1, #0 14; CHECK-NEXT: mov.s w9, v1[1] 15; CHECK-NEXT: mov.s w10, v1[2] 16; CHECK-NEXT: fmov w11, s1 17; CHECK-NEXT: bfi x8, x11, #2, #1 18; CHECK-NEXT: and x11, x11, #0x1 19; CHECK-NEXT: and x9, x9, #0x1 20; CHECK-NEXT: and w10, w10, #0x1 21; CHECK-NEXT: add x9, x11, x9 22; CHECK-NEXT: mov x11, sp 23; CHECK-NEXT: st1.s { v0 }[1], [x8] 24; CHECK-NEXT: add w10, w9, w10 25; CHECK-NEXT: orr x9, x11, x9, lsl #2 26; CHECK-NEXT: bfi x11, x10, #2, #2 27; CHECK-NEXT: st1.s { v0 }[2], [x9] 28; CHECK-NEXT: st1.s { v0 }[3], [x11] 29; CHECK-NEXT: ldr q0, [sp], #16 30; CHECK-NEXT: ret 31 %out = call <4 x i32> @llvm.experimental.vector.compress(<4 x i32> %vec, <4 x i1> %mask, <4 x i32> undef) 32 ret <4 x i32> %out 33} 34 35 36define <4 x i32> @test_compress_v4i32_with_passthru(<4 x i32> %vec, <4 x i1> %mask, <4 x i32> %passthru) { 37; CHECK-LABEL: test_compress_v4i32_with_passthru: 38; CHECK: ; %bb.0: 39; CHECK-NEXT: str q2, [sp, #-16]! 40; CHECK-NEXT: .cfi_def_cfa_offset 16 41; CHECK-NEXT: ushll.4s v1, v1, #0 42; CHECK-NEXT: movi.4s v3, #1 43; CHECK-NEXT: mov x12, sp 44; CHECK-NEXT: mov x10, sp 45; CHECK-NEXT: mov x9, sp 46; CHECK-NEXT: mov x14, sp 47; CHECK-NEXT: mov w15, #3 ; =0x3 48; CHECK-NEXT: shl.4s v1, v1, #31 49; CHECK-NEXT: cmlt.4s v1, v1, #0 50; CHECK-NEXT: and.16b v3, v1, v3 51; CHECK-NEXT: mov.s w8, v1[1] 52; CHECK-NEXT: fmov w16, s1 53; CHECK-NEXT: mov.s w11, v1[2] 54; CHECK-NEXT: mov.s w13, v1[3] 55; CHECK-NEXT: addv.4s s2, v3 56; CHECK-NEXT: bfi x12, x16, #2, #1 57; CHECK-NEXT: and x16, x16, #0x1 58; CHECK-NEXT: and x8, x8, #0x1 59; CHECK-NEXT: add x8, x16, x8 60; CHECK-NEXT: and x11, x11, #0x1 61; CHECK-NEXT: and x13, x13, #0x1 62; CHECK-NEXT: fmov w16, s2 63; CHECK-NEXT: add x11, x8, x11 64; CHECK-NEXT: orr x8, x9, x8, lsl #2 65; CHECK-NEXT: add x13, x11, x13 66; CHECK-NEXT: bfi x14, x11, #2, #2 67; CHECK-NEXT: cmp x13, #3 68; CHECK-NEXT: bfi x10, x16, #2, #2 69; CHECK-NEXT: mov.s w16, v0[3] 70; CHECK-NEXT: csel x11, x13, x15, lo 71; CHECK-NEXT: ldr w10, [x10] 72; CHECK-NEXT: str s0, [sp] 73; CHECK-NEXT: st1.s { v0 }[1], [x12] 74; CHECK-NEXT: st1.s { v0 }[2], [x8] 75; CHECK-NEXT: orr x8, x9, x11, lsl #2 76; CHECK-NEXT: csel w9, w16, w10, hi 77; CHECK-NEXT: st1.s { v0 }[3], [x14] 78; CHECK-NEXT: str w9, [x8] 79; CHECK-NEXT: ldr q0, [sp], #16 80; CHECK-NEXT: ret 81 %out = call <4 x i32> @llvm.experimental.vector.compress(<4 x i32> %vec, <4 x i1> %mask, <4 x i32> %passthru) 82 ret <4 x i32> %out 83} 84 85define <2 x double> @test_compress_v2f64(<2 x double> %vec, <2 x i1> %mask) { 86; CHECK-LABEL: test_compress_v2f64: 87; CHECK: ; %bb.0: 88; CHECK-NEXT: sub sp, sp, #16 89; CHECK-NEXT: .cfi_def_cfa_offset 16 90; CHECK-NEXT: ushll.2d v1, v1, #0 91; CHECK-NEXT: mov x8, sp 92; CHECK-NEXT: str d0, [sp] 93; CHECK-NEXT: shl.2d v1, v1, #63 94; CHECK-NEXT: cmlt.2d v1, v1, #0 95; CHECK-NEXT: fmov x9, d1 96; CHECK-NEXT: bfi x8, x9, #3, #1 97; CHECK-NEXT: st1.d { v0 }[1], [x8] 98; CHECK-NEXT: ldr q0, [sp], #16 99; CHECK-NEXT: ret 100 %out = call <2 x double> @llvm.experimental.vector.compress.v2f64(<2 x double> %vec, <2 x i1> %mask, <2 x double> undef) 101 ret <2 x double> %out 102} 103 104define <16 x i8> @test_compress_v16i8(<16 x i8> %vec, <16 x i1> %mask) { 105; CHECK-LABEL: test_compress_v16i8: 106; CHECK: ; %bb.0: 107; CHECK-NEXT: sub sp, sp, #16 108; CHECK-NEXT: .cfi_def_cfa_offset 16 109; CHECK-NEXT: shl.16b v1, v1, #7 110; CHECK-NEXT: mov x12, sp 111; CHECK-NEXT: mov x8, sp 112; CHECK-NEXT: st1.b { v0 }[0], [x8] 113; CHECK-NEXT: mov x13, sp 114; CHECK-NEXT: cmlt.16b v1, v1, #0 115; CHECK-NEXT: umov.b w9, v1[0] 116; CHECK-NEXT: umov.b w10, v1[1] 117; CHECK-NEXT: umov.b w11, v1[2] 118; CHECK-NEXT: umov.b w14, v1[3] 119; CHECK-NEXT: bfxil x12, x9, #0, #1 120; CHECK-NEXT: and x10, x10, #0x1 121; CHECK-NEXT: and x9, x9, #0x1 122; CHECK-NEXT: add x9, x9, x10 123; CHECK-NEXT: umov.b w10, v1[4] 124; CHECK-NEXT: and x11, x11, #0x1 125; CHECK-NEXT: st1.b { v0 }[1], [x12] 126; CHECK-NEXT: orr x12, x8, x9 127; CHECK-NEXT: add x9, x9, x11 128; CHECK-NEXT: umov.b w11, v1[5] 129; CHECK-NEXT: and x14, x14, #0x1 130; CHECK-NEXT: st1.b { v0 }[2], [x12] 131; CHECK-NEXT: add x14, x9, x14 132; CHECK-NEXT: umov.b w12, v1[6] 133; CHECK-NEXT: orr x9, x8, x9 134; CHECK-NEXT: and x10, x10, #0x1 135; CHECK-NEXT: st1.b { v0 }[3], [x9] 136; CHECK-NEXT: orr x9, x8, x14 137; CHECK-NEXT: add x10, x14, x10 138; CHECK-NEXT: umov.b w14, v1[7] 139; CHECK-NEXT: st1.b { v0 }[4], [x9] 140; CHECK-NEXT: and x11, x11, #0x1 141; CHECK-NEXT: bfxil x13, x10, #0, #4 142; CHECK-NEXT: mov x9, sp 143; CHECK-NEXT: add x10, x10, x11 144; CHECK-NEXT: umov.b w11, v1[8] 145; CHECK-NEXT: and x12, x12, #0x1 146; CHECK-NEXT: bfxil x9, x10, #0, #4 147; CHECK-NEXT: st1.b { v0 }[5], [x13] 148; CHECK-NEXT: umov.b w13, v1[9] 149; CHECK-NEXT: add x10, x10, x12 150; CHECK-NEXT: mov x12, sp 151; CHECK-NEXT: and x14, x14, #0x1 152; CHECK-NEXT: st1.b { v0 }[6], [x9] 153; CHECK-NEXT: umov.b w9, v1[10] 154; CHECK-NEXT: bfxil x12, x10, #0, #4 155; CHECK-NEXT: add x10, x10, x14 156; CHECK-NEXT: mov x14, sp 157; CHECK-NEXT: and x11, x11, #0x1 158; CHECK-NEXT: bfxil x14, x10, #0, #4 159; CHECK-NEXT: add x10, x10, x11 160; CHECK-NEXT: mov x11, sp 161; CHECK-NEXT: and x13, x13, #0x1 162; CHECK-NEXT: st1.b { v0 }[7], [x12] 163; CHECK-NEXT: mov x12, sp 164; CHECK-NEXT: bfxil x11, x10, #0, #4 165; CHECK-NEXT: add x10, x10, x13 166; CHECK-NEXT: umov.b w13, v1[11] 167; CHECK-NEXT: st1.b { v0 }[8], [x14] 168; CHECK-NEXT: umov.b w14, v1[12] 169; CHECK-NEXT: and x9, x9, #0x1 170; CHECK-NEXT: bfxil x12, x10, #0, #4 171; CHECK-NEXT: add x9, x10, x9 172; CHECK-NEXT: mov x10, sp 173; CHECK-NEXT: st1.b { v0 }[9], [x11] 174; CHECK-NEXT: umov.b w11, v1[13] 175; CHECK-NEXT: bfxil x10, x9, #0, #4 176; CHECK-NEXT: st1.b { v0 }[10], [x12] 177; CHECK-NEXT: umov.b w12, v1[14] 178; CHECK-NEXT: and x13, x13, #0x1 179; CHECK-NEXT: and x14, x14, #0x1 180; CHECK-NEXT: add x9, x9, x13 181; CHECK-NEXT: st1.b { v0 }[11], [x10] 182; CHECK-NEXT: mov x10, sp 183; CHECK-NEXT: add x13, x9, x14 184; CHECK-NEXT: mov x14, sp 185; CHECK-NEXT: bfxil x10, x9, #0, #4 186; CHECK-NEXT: and x9, x11, #0x1 187; CHECK-NEXT: mov x11, sp 188; CHECK-NEXT: add x9, x13, x9 189; CHECK-NEXT: and w12, w12, #0x1 190; CHECK-NEXT: bfxil x14, x13, #0, #4 191; CHECK-NEXT: bfxil x11, x9, #0, #4 192; CHECK-NEXT: add w9, w9, w12 193; CHECK-NEXT: st1.b { v0 }[12], [x10] 194; CHECK-NEXT: bfxil x8, x9, #0, #4 195; CHECK-NEXT: st1.b { v0 }[13], [x14] 196; CHECK-NEXT: st1.b { v0 }[14], [x11] 197; CHECK-NEXT: st1.b { v0 }[15], [x8] 198; CHECK-NEXT: ldr q0, [sp], #16 199; CHECK-NEXT: ret 200 %out = call <16 x i8> @llvm.experimental.vector.compress(<16 x i8> %vec, <16 x i1> %mask, <16 x i8> undef) 201 ret <16 x i8> %out 202} 203 204define <8 x i32> @test_compress_large(<8 x i32> %vec, <8 x i1> %mask) { 205; CHECK-LABEL: test_compress_large: 206; CHECK: ; %bb.0: 207; CHECK-NEXT: sub sp, sp, #32 208; CHECK-NEXT: .cfi_def_cfa_offset 32 209; CHECK-NEXT: ; kill: def $d2 killed $d2 def $q2 210; CHECK-NEXT: umov.b w9, v2[0] 211; CHECK-NEXT: umov.b w10, v2[1] 212; CHECK-NEXT: mov x12, sp 213; CHECK-NEXT: umov.b w11, v2[2] 214; CHECK-NEXT: umov.b w13, v2[3] 215; CHECK-NEXT: mov x8, sp 216; CHECK-NEXT: umov.b w14, v2[4] 217; CHECK-NEXT: str s0, [sp] 218; CHECK-NEXT: and x10, x10, #0x1 219; CHECK-NEXT: and x15, x9, #0x1 220; CHECK-NEXT: bfi x12, x9, #2, #1 221; CHECK-NEXT: and x9, x11, #0x1 222; CHECK-NEXT: add x10, x15, x10 223; CHECK-NEXT: umov.b w11, v2[5] 224; CHECK-NEXT: add x9, x10, x9 225; CHECK-NEXT: orr x15, x8, x10, lsl #2 226; CHECK-NEXT: umov.b w10, v2[6] 227; CHECK-NEXT: st1.s { v0 }[1], [x12] 228; CHECK-NEXT: add x12, x8, x9, lsl #2 229; CHECK-NEXT: and x13, x13, #0x1 230; CHECK-NEXT: st1.s { v0 }[2], [x15] 231; CHECK-NEXT: add x9, x9, x13 232; CHECK-NEXT: st1.s { v0 }[3], [x12] 233; CHECK-NEXT: and x12, x14, #0x1 234; CHECK-NEXT: and x11, x11, #0x1 235; CHECK-NEXT: add x12, x9, x12 236; CHECK-NEXT: and w10, w10, #0x1 237; CHECK-NEXT: and x9, x9, #0x7 238; CHECK-NEXT: add x11, x12, x11 239; CHECK-NEXT: and x12, x12, #0x7 240; CHECK-NEXT: str s1, [x8, x9, lsl #2] 241; CHECK-NEXT: add w10, w11, w10 242; CHECK-NEXT: and x11, x11, #0x7 243; CHECK-NEXT: add x12, x8, x12, lsl #2 244; CHECK-NEXT: and x10, x10, #0x7 245; CHECK-NEXT: add x9, x8, x11, lsl #2 246; CHECK-NEXT: add x8, x8, x10, lsl #2 247; CHECK-NEXT: st1.s { v1 }[1], [x12] 248; CHECK-NEXT: st1.s { v1 }[2], [x9] 249; CHECK-NEXT: st1.s { v1 }[3], [x8] 250; CHECK-NEXT: ldp q0, q1, [sp], #32 251; CHECK-NEXT: ret 252 %out = call <8 x i32> @llvm.experimental.vector.compress(<8 x i32> %vec, <8 x i1> %mask, <8 x i32> undef) 253 ret <8 x i32> %out 254} 255 256define <4 x i32> @test_compress_all_const() { 257; CHECK-LABEL: test_compress_all_const: 258; CHECK: ; %bb.0: 259; CHECK-NEXT: Lloh0: 260; CHECK-NEXT: adrp x8, lCPI5_0@PAGE 261; CHECK-NEXT: Lloh1: 262; CHECK-NEXT: ldr q0, [x8, lCPI5_0@PAGEOFF] 263; CHECK-NEXT: ret 264; CHECK-NEXT: .loh AdrpLdr Lloh0, Lloh1 265 %out = call <4 x i32> @llvm.experimental.vector.compress(<4 x i32> <i32 3, i32 5, i32 7, i32 9>, 266 <4 x i1> <i1 0, i1 1, i1 0, i1 1>, 267 <4 x i32> undef) 268 ret <4 x i32> %out 269} 270 271define <4 x i32> @test_compress_const_mask(<4 x i32> %vec) { 272; CHECK-LABEL: test_compress_const_mask: 273; CHECK: ; %bb.0: 274; CHECK-NEXT: mov.s v0[1], v0[3] 275; CHECK-NEXT: ret 276 %out = call <4 x i32> @llvm.experimental.vector.compress(<4 x i32> %vec, <4 x i1> <i1 1, i1 undef, i1 0, i1 1>, <4 x i32> undef) 277 ret <4 x i32> %out 278} 279 280define <4 x i32> @test_compress_const_mask_passthrough(<4 x i32> %vec, <4 x i32> %passthru) { 281; CHECK-LABEL: test_compress_const_mask_passthrough: 282; CHECK: ; %bb.0: 283; CHECK-NEXT: mov.d v1[0], v0[1] 284; CHECK-NEXT: mov.s v1[0], v0[0] 285; CHECK-NEXT: mov.16b v0, v1 286; CHECK-NEXT: ret 287 %out = call <4 x i32> @llvm.experimental.vector.compress(<4 x i32> %vec, <4 x i1> <i1 1, i1 undef, i1 0, i1 1>, <4 x i32> %passthru) 288 ret <4 x i32> %out 289} 290 291define <4 x i32> @test_compress_const_mask_const_passthrough(<4 x i32> %vec) { 292; CHECK-LABEL: test_compress_const_mask_const_passthrough: 293; CHECK: ; %bb.0: 294; CHECK-NEXT: mov.s v0[1], v0[3] 295; CHECK-NEXT: mov w8, #7 ; =0x7 296; CHECK-NEXT: mov.s v0[2], w8 297; CHECK-NEXT: mov w8, #8 ; =0x8 298; CHECK-NEXT: mov.s v0[3], w8 299; CHECK-NEXT: ret 300 %out = call <4 x i32> @llvm.experimental.vector.compress(<4 x i32> %vec, <4 x i1> <i1 1, i1 0, i1 0, i1 1>, <4 x i32> <i32 5, i32 6, i32 7, i32 8>) 301 ret <4 x i32> %out 302} 303 304; We pass a placeholder value for the const_mask* tests to check that they are converted to a no-op by simply copying 305; the second vector input register to the return register or doing nothing. 306define <4 x i32> @test_compress_const_splat1_mask(<4 x i32> %ignore, <4 x i32> %vec) { 307; CHECK-LABEL: test_compress_const_splat1_mask: 308; CHECK: ; %bb.0: 309; CHECK-NEXT: mov.16b v0, v1 310; CHECK-NEXT: ret 311 %out = call <4 x i32> @llvm.experimental.vector.compress(<4 x i32> %vec, <4 x i1> splat (i1 -1), <4 x i32> undef) 312 ret <4 x i32> %out 313} 314define <4 x i32> @test_compress_const_splat0_mask(<4 x i32> %ignore, <4 x i32> %vec) { 315; CHECK-LABEL: test_compress_const_splat0_mask: 316; CHECK: ; %bb.0: 317; CHECK-NEXT: ret 318 %out = call <4 x i32> @llvm.experimental.vector.compress(<4 x i32> %vec, <4 x i1> splat (i1 0), <4 x i32> undef) 319 ret <4 x i32> %out 320} 321define <4 x i32> @test_compress_undef_mask(<4 x i32> %ignore, <4 x i32> %vec) { 322; CHECK-LABEL: test_compress_undef_mask: 323; CHECK: ; %bb.0: 324; CHECK-NEXT: ret 325 %out = call <4 x i32> @llvm.experimental.vector.compress(<4 x i32> %vec, <4 x i1> undef, <4 x i32> undef) 326 ret <4 x i32> %out 327} 328define <4 x i32> @test_compress_const_splat0_mask_with_passthru(<4 x i32> %ignore, <4 x i32> %vec, <4 x i32> %passthru) { 329; CHECK-LABEL: test_compress_const_splat0_mask_with_passthru: 330; CHECK: ; %bb.0: 331; CHECK-NEXT: mov.16b v0, v2 332; CHECK-NEXT: ret 333 %out = call <4 x i32> @llvm.experimental.vector.compress(<4 x i32> %vec, <4 x i1> splat (i1 0), <4 x i32> %passthru) 334 ret <4 x i32> %out 335} 336define <4 x i32> @test_compress_const_splat0_mask_without_passthru(<4 x i32> %ignore, <4 x i32> %vec) { 337; CHECK-LABEL: test_compress_const_splat0_mask_without_passthru: 338; CHECK: ; %bb.0: 339; CHECK-NEXT: ret 340 %out = call <4 x i32> @llvm.experimental.vector.compress(<4 x i32> %vec, <4 x i1> splat (i1 0), <4 x i32> undef) 341 ret <4 x i32> %out 342} 343 344define <4 x i8> @test_compress_small(<4 x i8> %vec, <4 x i1> %mask) { 345; CHECK-LABEL: test_compress_small: 346; CHECK: ; %bb.0: 347; CHECK-NEXT: sub sp, sp, #16 348; CHECK-NEXT: .cfi_def_cfa_offset 16 349; CHECK-NEXT: shl.4h v1, v1, #15 350; CHECK-NEXT: add x8, sp, #8 351; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 352; CHECK-NEXT: str h0, [sp, #8] 353; CHECK-NEXT: cmlt.4h v1, v1, #0 354; CHECK-NEXT: umov.h w9, v1[0] 355; CHECK-NEXT: umov.h w10, v1[1] 356; CHECK-NEXT: umov.h w11, v1[2] 357; CHECK-NEXT: bfi x8, x9, #1, #1 358; CHECK-NEXT: and x10, x10, #0x1 359; CHECK-NEXT: and x9, x9, #0x1 360; CHECK-NEXT: add x9, x9, x10 361; CHECK-NEXT: and w11, w11, #0x1 362; CHECK-NEXT: add x10, sp, #8 363; CHECK-NEXT: add w11, w9, w11 364; CHECK-NEXT: orr x9, x10, x9, lsl #1 365; CHECK-NEXT: st1.h { v0 }[1], [x8] 366; CHECK-NEXT: bfi x10, x11, #1, #2 367; CHECK-NEXT: st1.h { v0 }[2], [x9] 368; CHECK-NEXT: st1.h { v0 }[3], [x10] 369; CHECK-NEXT: ldr d0, [sp, #8] 370; CHECK-NEXT: add sp, sp, #16 371; CHECK-NEXT: ret 372 %out = call <4 x i8> @llvm.experimental.vector.compress(<4 x i8> %vec, <4 x i1> %mask, <4 x i8> undef) 373 ret <4 x i8> %out 374} 375 376define <4 x i4> @test_compress_illegal_element_type(<4 x i4> %vec, <4 x i1> %mask) { 377; CHECK-LABEL: test_compress_illegal_element_type: 378; CHECK: ; %bb.0: 379; CHECK-NEXT: sub sp, sp, #16 380; CHECK-NEXT: .cfi_def_cfa_offset 16 381; CHECK-NEXT: shl.4h v1, v1, #15 382; CHECK-NEXT: add x8, sp, #8 383; CHECK-NEXT: ; kill: def $d0 killed $d0 def $q0 384; CHECK-NEXT: str h0, [sp, #8] 385; CHECK-NEXT: cmlt.4h v1, v1, #0 386; CHECK-NEXT: umov.h w9, v1[0] 387; CHECK-NEXT: umov.h w10, v1[1] 388; CHECK-NEXT: umov.h w11, v1[2] 389; CHECK-NEXT: bfi x8, x9, #1, #1 390; CHECK-NEXT: and x10, x10, #0x1 391; CHECK-NEXT: and x9, x9, #0x1 392; CHECK-NEXT: add x9, x9, x10 393; CHECK-NEXT: and w11, w11, #0x1 394; CHECK-NEXT: add x10, sp, #8 395; CHECK-NEXT: add w11, w9, w11 396; CHECK-NEXT: orr x9, x10, x9, lsl #1 397; CHECK-NEXT: st1.h { v0 }[1], [x8] 398; CHECK-NEXT: bfi x10, x11, #1, #2 399; CHECK-NEXT: st1.h { v0 }[2], [x9] 400; CHECK-NEXT: st1.h { v0 }[3], [x10] 401; CHECK-NEXT: ldr d0, [sp, #8] 402; CHECK-NEXT: add sp, sp, #16 403; CHECK-NEXT: ret 404 %out = call <4 x i4> @llvm.experimental.vector.compress(<4 x i4> %vec, <4 x i1> %mask, <4 x i4> undef) 405 ret <4 x i4> %out 406} 407 408define <3 x i32> @test_compress_narrow(<3 x i32> %vec, <3 x i1> %mask) { 409; CHECK-LABEL: test_compress_narrow: 410; CHECK: ; %bb.0: 411; CHECK-NEXT: sub sp, sp, #16 412; CHECK-NEXT: .cfi_def_cfa_offset 16 413; CHECK-NEXT: movi.2d v1, #0000000000000000 414; CHECK-NEXT: mov x11, sp 415; CHECK-NEXT: str s0, [sp] 416; CHECK-NEXT: mov.h v1[0], w0 417; CHECK-NEXT: mov.h v1[1], w1 418; CHECK-NEXT: mov.h v1[2], w2 419; CHECK-NEXT: ushll.4s v1, v1, #0 420; CHECK-NEXT: shl.4s v1, v1, #31 421; CHECK-NEXT: cmlt.4s v1, v1, #0 422; CHECK-NEXT: mov.s w8, v1[1] 423; CHECK-NEXT: mov.s w9, v1[2] 424; CHECK-NEXT: fmov w10, s1 425; CHECK-NEXT: bfi x11, x10, #2, #1 426; CHECK-NEXT: and x10, x10, #0x1 427; CHECK-NEXT: and x8, x8, #0x1 428; CHECK-NEXT: and w9, w9, #0x1 429; CHECK-NEXT: add x8, x10, x8 430; CHECK-NEXT: mov x10, sp 431; CHECK-NEXT: st1.s { v0 }[1], [x11] 432; CHECK-NEXT: add w9, w8, w9 433; CHECK-NEXT: orr x8, x10, x8, lsl #2 434; CHECK-NEXT: bfi x10, x9, #2, #2 435; CHECK-NEXT: st1.s { v0 }[2], [x8] 436; CHECK-NEXT: st1.s { v0 }[3], [x10] 437; CHECK-NEXT: ldr q0, [sp], #16 438; CHECK-NEXT: ret 439 %out = call <3 x i32> @llvm.experimental.vector.compress(<3 x i32> %vec, <3 x i1> %mask, <3 x i32> undef) 440 ret <3 x i32> %out 441} 442 443define <3 x i3> @test_compress_narrow_illegal_element_type(<3 x i3> %vec, <3 x i1> %mask) { 444; CHECK-LABEL: test_compress_narrow_illegal_element_type: 445; CHECK: ; %bb.0: 446; CHECK-NEXT: sub sp, sp, #16 447; CHECK-NEXT: .cfi_def_cfa_offset 16 448; CHECK-NEXT: movi.2d v0, #0000000000000000 449; CHECK-NEXT: add x10, sp, #8 450; CHECK-NEXT: strh w0, [sp, #8] 451; CHECK-NEXT: mov.h v0[0], w3 452; CHECK-NEXT: mov.h v0[1], w4 453; CHECK-NEXT: mov.h v0[2], w5 454; CHECK-NEXT: shl.4h v0, v0, #15 455; CHECK-NEXT: cmlt.4h v0, v0, #0 456; CHECK-NEXT: umov.h w8, v0[0] 457; CHECK-NEXT: umov.h w9, v0[1] 458; CHECK-NEXT: and x9, x9, #0x1 459; CHECK-NEXT: and x11, x8, #0x1 460; CHECK-NEXT: bfi x10, x8, #1, #1 461; CHECK-NEXT: add x8, x11, x9 462; CHECK-NEXT: add x9, sp, #8 463; CHECK-NEXT: orr x8, x9, x8, lsl #1 464; CHECK-NEXT: strh w1, [x10] 465; CHECK-NEXT: strh w2, [x8] 466; CHECK-NEXT: ldr d0, [sp, #8] 467; CHECK-NEXT: umov.h w0, v0[0] 468; CHECK-NEXT: umov.h w1, v0[1] 469; CHECK-NEXT: umov.h w2, v0[2] 470; CHECK-NEXT: add sp, sp, #16 471; CHECK-NEXT: ret 472 %out = call <3 x i3> @llvm.experimental.vector.compress(<3 x i3> %vec, <3 x i1> %mask, <3 x i3> undef) 473 ret <3 x i3> %out 474} 475