1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; RUN: llc -mtriple=aarch64-apple-darwin -mattr=+neon -aarch64-enable-collect-loh=false -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD 3; RUN: llc -mtriple=aarch64-apple-darwin -mattr=+neon -aarch64-enable-collect-loh=false -global-isel -global-isel-abort=2 -verify-machineinstrs < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI 4 5; Basic tests from input vector to bitmask 6; IR generated from clang for: 7; __builtin_convertvector + reinterpret_cast<uint16&> 8 9; CHECK-GI: warning: Instruction selection used fallback path for convert_to_bitmask2 10; CHECK-GI-NEXT: warning: Instruction selection used fallback path for clang_builtins_undef_concat_convert_to_bitmask4 11; CHECK-GI-NEXT: warning: Instruction selection used fallback path for convert_to_bitmask_2xi32 12; CHECK-GI-NEXT: warning: Instruction selection used fallback path for convert_to_bitmask_8xi2 13; CHECK-GI-NEXT: warning: Instruction selection used fallback path for no_direct_convert_for_bad_concat 14 15define i16 @convert_to_bitmask16(<16 x i8> %vec) { 16; Bits used in mask 17; CHECK-SD-LABEL: convert_to_bitmask16: 18; CHECK-SD: ; %bb.0: 19; CHECK-SD-NEXT: adrp x8, lCPI0_0@PAGE 20; CHECK-SD-NEXT: cmeq.16b v0, v0, #0 21; CHECK-SD-NEXT: ldr q1, [x8, lCPI0_0@PAGEOFF] 22; CHECK-SD-NEXT: bic.16b v0, v1, v0 23; CHECK-SD-NEXT: ext.16b v1, v0, v0, #8 24; CHECK-SD-NEXT: zip1.16b v0, v0, v1 25; CHECK-SD-NEXT: addv.8h h0, v0 26; CHECK-SD-NEXT: fmov w0, s0 27; CHECK-SD-NEXT: ret 28; 29; CHECK-GI-LABEL: convert_to_bitmask16: 30; CHECK-GI: ; %bb.0: 31; CHECK-GI-NEXT: sub sp, sp, #16 32; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 33; CHECK-GI-NEXT: cmeq.16b v0, v0, #0 34; CHECK-GI-NEXT: mvn.16b v0, v0 35; CHECK-GI-NEXT: umov.b w8, v0[1] 36; CHECK-GI-NEXT: umov.b w9, v0[0] 37; CHECK-GI-NEXT: umov.b w10, v0[2] 38; CHECK-GI-NEXT: umov.b w11, v0[3] 39; CHECK-GI-NEXT: and w8, w8, #0x1 40; CHECK-GI-NEXT: bfi w9, w8, #1, #31 41; CHECK-GI-NEXT: and w8, w10, #0x1 42; CHECK-GI-NEXT: umov.b w10, v0[4] 43; CHECK-GI-NEXT: orr w8, w9, w8, lsl #2 44; CHECK-GI-NEXT: and w9, w11, #0x1 45; CHECK-GI-NEXT: umov.b w11, v0[5] 46; CHECK-GI-NEXT: orr w8, w8, w9, lsl #3 47; CHECK-GI-NEXT: and w9, w10, #0x1 48; CHECK-GI-NEXT: umov.b w10, v0[6] 49; CHECK-GI-NEXT: orr w8, w8, w9, lsl #4 50; CHECK-GI-NEXT: and w9, w11, #0x1 51; CHECK-GI-NEXT: umov.b w11, v0[7] 52; CHECK-GI-NEXT: orr w8, w8, w9, lsl #5 53; CHECK-GI-NEXT: and w9, w10, #0x1 54; CHECK-GI-NEXT: umov.b w10, v0[8] 55; CHECK-GI-NEXT: orr w8, w8, w9, lsl #6 56; CHECK-GI-NEXT: and w9, w11, #0x1 57; CHECK-GI-NEXT: umov.b w11, v0[9] 58; CHECK-GI-NEXT: orr w8, w8, w9, lsl #7 59; CHECK-GI-NEXT: and w9, w10, #0x1 60; CHECK-GI-NEXT: umov.b w10, v0[10] 61; CHECK-GI-NEXT: orr w8, w8, w9, lsl #8 62; CHECK-GI-NEXT: and w9, w11, #0x1 63; CHECK-GI-NEXT: umov.b w11, v0[11] 64; CHECK-GI-NEXT: orr w8, w8, w9, lsl #9 65; CHECK-GI-NEXT: and w9, w10, #0x1 66; CHECK-GI-NEXT: umov.b w10, v0[12] 67; CHECK-GI-NEXT: orr w8, w8, w9, lsl #10 68; CHECK-GI-NEXT: and w9, w11, #0x1 69; CHECK-GI-NEXT: umov.b w11, v0[13] 70; CHECK-GI-NEXT: orr w8, w8, w9, lsl #11 71; CHECK-GI-NEXT: and w9, w10, #0x1 72; CHECK-GI-NEXT: umov.b w10, v0[14] 73; CHECK-GI-NEXT: orr w8, w8, w9, lsl #12 74; CHECK-GI-NEXT: and w9, w11, #0x1 75; CHECK-GI-NEXT: umov.b w11, v0[15] 76; CHECK-GI-NEXT: orr w8, w8, w9, lsl #13 77; CHECK-GI-NEXT: and w9, w10, #0x1 78; CHECK-GI-NEXT: orr w8, w8, w9, lsl #14 79; CHECK-GI-NEXT: and w9, w11, #0x1 80; CHECK-GI-NEXT: orr w8, w8, w9, lsl #15 81; CHECK-GI-NEXT: strh w8, [sp, #14] 82; CHECK-GI-NEXT: and w0, w8, #0xffff 83; CHECK-GI-NEXT: add sp, sp, #16 84; CHECK-GI-NEXT: ret 85 86; Actual conversion 87 88 %cmp_result = icmp ne <16 x i8> %vec, zeroinitializer 89 %bitmask = bitcast <16 x i1> %cmp_result to i16 90 ret i16 %bitmask 91} 92 93define i16 @convert_to_bitmask8(<8 x i16> %vec) { 94; CHECK-SD-LABEL: convert_to_bitmask8: 95; CHECK-SD: ; %bb.0: 96; CHECK-SD-NEXT: adrp x8, lCPI1_0@PAGE 97; CHECK-SD-NEXT: cmeq.8h v0, v0, #0 98; CHECK-SD-NEXT: ldr q1, [x8, lCPI1_0@PAGEOFF] 99; CHECK-SD-NEXT: bic.16b v0, v1, v0 100; CHECK-SD-NEXT: addv.8h h0, v0 101; CHECK-SD-NEXT: fmov w8, s0 102; CHECK-SD-NEXT: and w0, w8, #0xff 103; CHECK-SD-NEXT: ret 104; 105; CHECK-GI-LABEL: convert_to_bitmask8: 106; CHECK-GI: ; %bb.0: 107; CHECK-GI-NEXT: sub sp, sp, #16 108; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 109; CHECK-GI-NEXT: cmeq.8h v0, v0, #0 110; CHECK-GI-NEXT: mvn.16b v0, v0 111; CHECK-GI-NEXT: xtn.8b v0, v0 112; CHECK-GI-NEXT: umov.b w8, v0[1] 113; CHECK-GI-NEXT: umov.b w9, v0[0] 114; CHECK-GI-NEXT: umov.b w10, v0[2] 115; CHECK-GI-NEXT: umov.b w11, v0[3] 116; CHECK-GI-NEXT: and w8, w8, #0x1 117; CHECK-GI-NEXT: bfi w9, w8, #1, #31 118; CHECK-GI-NEXT: and w8, w10, #0x1 119; CHECK-GI-NEXT: umov.b w10, v0[4] 120; CHECK-GI-NEXT: orr w8, w9, w8, lsl #2 121; CHECK-GI-NEXT: and w9, w11, #0x1 122; CHECK-GI-NEXT: umov.b w11, v0[5] 123; CHECK-GI-NEXT: orr w8, w8, w9, lsl #3 124; CHECK-GI-NEXT: and w9, w10, #0x1 125; CHECK-GI-NEXT: umov.b w10, v0[6] 126; CHECK-GI-NEXT: orr w8, w8, w9, lsl #4 127; CHECK-GI-NEXT: and w9, w11, #0x1 128; CHECK-GI-NEXT: umov.b w11, v0[7] 129; CHECK-GI-NEXT: orr w8, w8, w9, lsl #5 130; CHECK-GI-NEXT: and w9, w10, #0x1 131; CHECK-GI-NEXT: orr w8, w8, w9, lsl #6 132; CHECK-GI-NEXT: and w9, w11, #0x1 133; CHECK-GI-NEXT: orr w8, w8, w9, lsl #7 134; CHECK-GI-NEXT: strb w8, [sp, #15] 135; CHECK-GI-NEXT: and w0, w8, #0xff 136; CHECK-GI-NEXT: add sp, sp, #16 137; CHECK-GI-NEXT: ret 138 139 140 %cmp_result = icmp ne <8 x i16> %vec, zeroinitializer 141 %bitmask = bitcast <8 x i1> %cmp_result to i8 142 %extended_bitmask = zext i8 %bitmask to i16 143 ret i16 %extended_bitmask 144} 145 146define i4 @convert_to_bitmask4(<4 x i32> %vec) { 147; CHECK-SD-LABEL: convert_to_bitmask4: 148; CHECK-SD: ; %bb.0: 149; CHECK-SD-NEXT: adrp x8, lCPI2_0@PAGE 150; CHECK-SD-NEXT: cmeq.4s v0, v0, #0 151; CHECK-SD-NEXT: ldr q1, [x8, lCPI2_0@PAGEOFF] 152; CHECK-SD-NEXT: bic.16b v0, v1, v0 153; CHECK-SD-NEXT: addv.4s s0, v0 154; CHECK-SD-NEXT: fmov w0, s0 155; CHECK-SD-NEXT: ret 156; 157; CHECK-GI-LABEL: convert_to_bitmask4: 158; CHECK-GI: ; %bb.0: 159; CHECK-GI-NEXT: sub sp, sp, #16 160; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 161; CHECK-GI-NEXT: cmeq.4s v0, v0, #0 162; CHECK-GI-NEXT: mvn.16b v0, v0 163; CHECK-GI-NEXT: mov.s w8, v0[1] 164; CHECK-GI-NEXT: mov.s w9, v0[2] 165; CHECK-GI-NEXT: fmov w11, s0 166; CHECK-GI-NEXT: mov.s w10, v0[3] 167; CHECK-GI-NEXT: and w8, w8, #0x1 168; CHECK-GI-NEXT: bfi w11, w8, #1, #31 169; CHECK-GI-NEXT: and w8, w9, #0x1 170; CHECK-GI-NEXT: and w9, w10, #0x1 171; CHECK-GI-NEXT: orr w8, w11, w8, lsl #2 172; CHECK-GI-NEXT: orr w8, w8, w9, lsl #3 173; CHECK-GI-NEXT: strb w8, [sp, #15] 174; CHECK-GI-NEXT: and w0, w8, #0xff 175; CHECK-GI-NEXT: add sp, sp, #16 176; CHECK-GI-NEXT: ret 177 178 179 %cmp_result = icmp ne <4 x i32> %vec, zeroinitializer 180 %bitmask = bitcast <4 x i1> %cmp_result to i4 181 ret i4 %bitmask 182} 183 184define i8 @convert_to_bitmask2(<2 x i64> %vec) { 185; CHECK-LABEL: convert_to_bitmask2: 186; CHECK: ; %bb.0: 187; CHECK-NEXT: adrp x8, lCPI3_0@PAGE 188; CHECK-NEXT: cmeq.2d v0, v0, #0 189; CHECK-NEXT: ldr q1, [x8, lCPI3_0@PAGEOFF] 190; CHECK-NEXT: bic.16b v0, v1, v0 191; CHECK-NEXT: addp.2d d0, v0 192; CHECK-NEXT: fmov w8, s0 193; CHECK-NEXT: and w0, w8, #0x3 194; CHECK-NEXT: ret 195 196 197 %cmp_result = icmp ne <2 x i64> %vec, zeroinitializer 198 %bitmask = bitcast <2 x i1> %cmp_result to i2 199 %extended_bitmask = zext i2 %bitmask to i8 200 ret i8 %extended_bitmask 201} 202 203; Clang's __builtin_convertvector adds an undef vector concat for vectors with <8 elements. 204define i8 @clang_builtins_undef_concat_convert_to_bitmask4(<4 x i32> %vec) { 205; CHECK-LABEL: clang_builtins_undef_concat_convert_to_bitmask4: 206; CHECK: ; %bb.0: 207; CHECK-NEXT: adrp x8, lCPI4_0@PAGE 208; CHECK-NEXT: cmeq.4s v0, v0, #0 209; CHECK-NEXT: ldr q1, [x8, lCPI4_0@PAGEOFF] 210; CHECK-NEXT: bic.16b v0, v1, v0 211; CHECK-NEXT: addv.4s s0, v0 212; CHECK-NEXT: fmov w0, s0 213; CHECK-NEXT: ret 214 215 216 %cmp_result = icmp ne <4 x i32> %vec, zeroinitializer 217 %vector_pad = shufflevector <4 x i1> %cmp_result, <4 x i1> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 218 %bitmask = bitcast <8 x i1> %vector_pad to i8 219 ret i8 %bitmask 220} 221 222 223define i4 @convert_to_bitmask_no_compare(<4 x i32> %vec1, <4 x i32> %vec2) { 224; CHECK-SD-LABEL: convert_to_bitmask_no_compare: 225; CHECK-SD: ; %bb.0: 226; CHECK-SD-NEXT: and.16b v0, v0, v1 227; CHECK-SD-NEXT: adrp x8, lCPI5_0@PAGE 228; CHECK-SD-NEXT: ldr q1, [x8, lCPI5_0@PAGEOFF] 229; CHECK-SD-NEXT: shl.4s v0, v0, #31 230; CHECK-SD-NEXT: cmlt.4s v0, v0, #0 231; CHECK-SD-NEXT: and.16b v0, v0, v1 232; CHECK-SD-NEXT: addv.4s s0, v0 233; CHECK-SD-NEXT: fmov w0, s0 234; CHECK-SD-NEXT: ret 235; 236; CHECK-GI-LABEL: convert_to_bitmask_no_compare: 237; CHECK-GI: ; %bb.0: 238; CHECK-GI-NEXT: sub sp, sp, #16 239; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 240; CHECK-GI-NEXT: and.16b v0, v0, v1 241; CHECK-GI-NEXT: mov.s w8, v0[1] 242; CHECK-GI-NEXT: mov.s w9, v0[2] 243; CHECK-GI-NEXT: fmov w11, s0 244; CHECK-GI-NEXT: mov.s w10, v0[3] 245; CHECK-GI-NEXT: and w8, w8, #0x1 246; CHECK-GI-NEXT: bfi w11, w8, #1, #31 247; CHECK-GI-NEXT: and w8, w9, #0x1 248; CHECK-GI-NEXT: and w9, w10, #0x1 249; CHECK-GI-NEXT: orr w8, w11, w8, lsl #2 250; CHECK-GI-NEXT: orr w8, w8, w9, lsl #3 251; CHECK-GI-NEXT: strb w8, [sp, #15] 252; CHECK-GI-NEXT: and w0, w8, #0xff 253; CHECK-GI-NEXT: add sp, sp, #16 254; CHECK-GI-NEXT: ret 255 256 257 %cmp = and <4 x i32> %vec1, %vec2 258 %trunc = trunc <4 x i32> %cmp to <4 x i1> 259 %bitmask = bitcast <4 x i1> %trunc to i4 260 ret i4 %bitmask 261} 262 263define i4 @convert_to_bitmask_with_compare_chain(<4 x i32> %vec1, <4 x i32> %vec2) { 264; CHECK-SD-LABEL: convert_to_bitmask_with_compare_chain: 265; CHECK-SD: ; %bb.0: 266; CHECK-SD-NEXT: cmeq.4s v2, v0, #0 267; CHECK-SD-NEXT: cmeq.4s v0, v0, v1 268; CHECK-SD-NEXT: adrp x8, lCPI6_0@PAGE 269; CHECK-SD-NEXT: ldr q1, [x8, lCPI6_0@PAGEOFF] 270; CHECK-SD-NEXT: bic.16b v0, v0, v2 271; CHECK-SD-NEXT: and.16b v0, v0, v1 272; CHECK-SD-NEXT: addv.4s s0, v0 273; CHECK-SD-NEXT: fmov w0, s0 274; CHECK-SD-NEXT: ret 275; 276; CHECK-GI-LABEL: convert_to_bitmask_with_compare_chain: 277; CHECK-GI: ; %bb.0: 278; CHECK-GI-NEXT: sub sp, sp, #16 279; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 280; CHECK-GI-NEXT: cmeq.4s v2, v0, #0 281; CHECK-GI-NEXT: cmeq.4s v0, v0, v1 282; CHECK-GI-NEXT: bic.16b v0, v0, v2 283; CHECK-GI-NEXT: mov.s w8, v0[1] 284; CHECK-GI-NEXT: mov.s w9, v0[2] 285; CHECK-GI-NEXT: fmov w11, s0 286; CHECK-GI-NEXT: mov.s w10, v0[3] 287; CHECK-GI-NEXT: and w8, w8, #0x1 288; CHECK-GI-NEXT: bfi w11, w8, #1, #31 289; CHECK-GI-NEXT: and w8, w9, #0x1 290; CHECK-GI-NEXT: and w9, w10, #0x1 291; CHECK-GI-NEXT: orr w8, w11, w8, lsl #2 292; CHECK-GI-NEXT: orr w8, w8, w9, lsl #3 293; CHECK-GI-NEXT: strb w8, [sp, #15] 294; CHECK-GI-NEXT: and w0, w8, #0xff 295; CHECK-GI-NEXT: add sp, sp, #16 296; CHECK-GI-NEXT: ret 297 298 299 %cmp1 = icmp ne <4 x i32> %vec1, zeroinitializer 300 %cmp2 = icmp eq <4 x i32> %vec1, %vec2 301 %cmp3 = and <4 x i1> %cmp1, %cmp2 302 %bitmask = bitcast <4 x i1> %cmp3 to i4 303 ret i4 %bitmask 304} 305 306define i4 @convert_to_bitmask_with_trunc_in_chain(<4 x i32> %vec1, <4 x i32> %vec2) { 307; CHECK-SD-LABEL: convert_to_bitmask_with_trunc_in_chain: 308; CHECK-SD: ; %bb.0: 309; CHECK-SD-NEXT: cmeq.4s v0, v0, #0 310; CHECK-SD-NEXT: adrp x8, lCPI7_0@PAGE 311; CHECK-SD-NEXT: bic.16b v0, v1, v0 312; CHECK-SD-NEXT: ldr q1, [x8, lCPI7_0@PAGEOFF] 313; CHECK-SD-NEXT: shl.4s v0, v0, #31 314; CHECK-SD-NEXT: cmlt.4s v0, v0, #0 315; CHECK-SD-NEXT: and.16b v0, v0, v1 316; CHECK-SD-NEXT: addv.4s s0, v0 317; CHECK-SD-NEXT: fmov w0, s0 318; CHECK-SD-NEXT: ret 319; 320; CHECK-GI-LABEL: convert_to_bitmask_with_trunc_in_chain: 321; CHECK-GI: ; %bb.0: 322; CHECK-GI-NEXT: sub sp, sp, #16 323; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 324; CHECK-GI-NEXT: cmeq.4s v0, v0, #0 325; CHECK-GI-NEXT: bic.16b v0, v1, v0 326; CHECK-GI-NEXT: mov.s w8, v0[1] 327; CHECK-GI-NEXT: mov.s w9, v0[2] 328; CHECK-GI-NEXT: fmov w11, s0 329; CHECK-GI-NEXT: mov.s w10, v0[3] 330; CHECK-GI-NEXT: and w8, w8, #0x1 331; CHECK-GI-NEXT: bfi w11, w8, #1, #31 332; CHECK-GI-NEXT: and w8, w9, #0x1 333; CHECK-GI-NEXT: and w9, w10, #0x1 334; CHECK-GI-NEXT: orr w8, w11, w8, lsl #2 335; CHECK-GI-NEXT: orr w8, w8, w9, lsl #3 336; CHECK-GI-NEXT: strb w8, [sp, #15] 337; CHECK-GI-NEXT: and w0, w8, #0xff 338; CHECK-GI-NEXT: add sp, sp, #16 339; CHECK-GI-NEXT: ret 340 341 342 %cmp1 = icmp ne <4 x i32> %vec1, zeroinitializer 343 %trunc_vec = trunc <4 x i32> %vec2 to <4 x i1> 344 %and_res = and <4 x i1> %cmp1, %trunc_vec 345 %bitmask = bitcast <4 x i1> %and_res to i4 346 ret i4 %bitmask 347} 348 349define i4 @convert_to_bitmask_with_unknown_type_in_long_chain(<4 x i32> %vec1, <4 x i32> %vec2) { 350; CHECK-SD-LABEL: convert_to_bitmask_with_unknown_type_in_long_chain: 351; CHECK-SD: ; %bb.0: 352; CHECK-SD-NEXT: cmeq.4s v0, v0, #0 353; CHECK-SD-NEXT: cmeq.4s v1, v1, #0 354; CHECK-SD-NEXT: adrp x8, lCPI8_0@PAGE 355; CHECK-SD-NEXT: movi d2, #0x000000ffffffff 356; CHECK-SD-NEXT: movi d3, #0x00ffffffffffff 357; CHECK-SD-NEXT: bic.16b v0, v1, v0 358; CHECK-SD-NEXT: movi d1, #0xffff0000ffff0000 359; CHECK-SD-NEXT: xtn.4h v0, v0 360; CHECK-SD-NEXT: orr.8b v0, v0, v2 361; CHECK-SD-NEXT: movi d2, #0x00ffffffff0000 362; CHECK-SD-NEXT: eor.8b v1, v0, v1 363; CHECK-SD-NEXT: eor.8b v0, v0, v2 364; CHECK-SD-NEXT: mov.h v1[2], wzr 365; CHECK-SD-NEXT: orr.8b v0, v0, v3 366; CHECK-SD-NEXT: orr.8b v0, v1, v0 367; CHECK-SD-NEXT: ldr d1, [x8, lCPI8_0@PAGEOFF] 368; CHECK-SD-NEXT: shl.4h v0, v0, #15 369; CHECK-SD-NEXT: cmlt.4h v0, v0, #0 370; CHECK-SD-NEXT: and.8b v0, v0, v1 371; CHECK-SD-NEXT: addv.4h h0, v0 372; CHECK-SD-NEXT: fmov w0, s0 373; CHECK-SD-NEXT: ret 374; 375; CHECK-GI-LABEL: convert_to_bitmask_with_unknown_type_in_long_chain: 376; CHECK-GI: ; %bb.0: 377; CHECK-GI-NEXT: sub sp, sp, #16 378; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 379; CHECK-GI-NEXT: mov w8, #1 ; =0x1 380; CHECK-GI-NEXT: mov w9, #0 ; =0x0 381; CHECK-GI-NEXT: cmeq.4s v5, v0, #0 382; CHECK-GI-NEXT: fmov s2, w8 383; CHECK-GI-NEXT: fmov s4, w9 384; CHECK-GI-NEXT: cmeq.4s v1, v1, #0 385; CHECK-GI-NEXT: mov.16b v3, v2 386; CHECK-GI-NEXT: mov.16b v0, v4 387; CHECK-GI-NEXT: mov.h v4[1], w8 388; CHECK-GI-NEXT: bic.16b v1, v1, v5 389; CHECK-GI-NEXT: mov.16b v5, v2 390; CHECK-GI-NEXT: mov.h v2[1], w8 391; CHECK-GI-NEXT: mov.h v3[1], w8 392; CHECK-GI-NEXT: mov.h v0[1], w8 393; CHECK-GI-NEXT: mov.h v5[1], w8 394; CHECK-GI-NEXT: mov.h v4[2], w8 395; CHECK-GI-NEXT: xtn.4h v1, v1 396; CHECK-GI-NEXT: mov.h v2[2], w8 397; CHECK-GI-NEXT: mov.h v3[2], w9 398; CHECK-GI-NEXT: mov.h v0[2], w9 399; CHECK-GI-NEXT: mov.h v5[2], w9 400; CHECK-GI-NEXT: mov.h v4[3], w9 401; CHECK-GI-NEXT: mov.h v2[3], w9 402; CHECK-GI-NEXT: mov.h v3[3], w9 403; CHECK-GI-NEXT: mov.h v0[3], w8 404; CHECK-GI-NEXT: mov.h v5[3], w8 405; CHECK-GI-NEXT: orr.8b v1, v1, v3 406; CHECK-GI-NEXT: eor.8b v0, v1, v0 407; CHECK-GI-NEXT: eor.8b v1, v4, v1 408; CHECK-GI-NEXT: and.8b v0, v0, v5 409; CHECK-GI-NEXT: orr.8b v1, v2, v1 410; CHECK-GI-NEXT: orr.8b v0, v0, v1 411; CHECK-GI-NEXT: ushll.4s v0, v0, #0 412; CHECK-GI-NEXT: mov.s w8, v0[1] 413; CHECK-GI-NEXT: mov.s w9, v0[2] 414; CHECK-GI-NEXT: fmov w11, s0 415; CHECK-GI-NEXT: mov.s w10, v0[3] 416; CHECK-GI-NEXT: and w8, w8, #0x1 417; CHECK-GI-NEXT: bfi w11, w8, #1, #31 418; CHECK-GI-NEXT: and w8, w9, #0x1 419; CHECK-GI-NEXT: and w9, w10, #0x1 420; CHECK-GI-NEXT: orr w8, w11, w8, lsl #2 421; CHECK-GI-NEXT: orr w8, w8, w9, lsl #3 422; CHECK-GI-NEXT: strb w8, [sp, #15] 423; CHECK-GI-NEXT: and w0, w8, #0xff 424; CHECK-GI-NEXT: add sp, sp, #16 425; CHECK-GI-NEXT: ret 426 427 428 %cmp1 = icmp ne <4 x i32> %vec1, zeroinitializer 429 %cmp2 = icmp eq <4 x i32> %vec2, zeroinitializer 430 431 ; Artificially make this a long chain to hide the original type 432 %chain1 = and <4 x i1> %cmp1, %cmp2; 433 %chain2 = or <4 x i1> %chain1, <i1 1, i1 1, i1 0, i1 0>; 434 %chain3 = xor <4 x i1> %chain2, <i1 0, i1 1, i1 0, i1 1>; 435 %chain4 = and <4 x i1> %chain3, <i1 1, i1 1, i1 0, i1 1>; 436 %chain5 = or <4 x i1> %chain4, <i1 1, i1 1, i1 1, i1 0>; 437 %chain6 = xor <4 x i1> <i1 0, i1 1, i1 1, i1 0>, %chain2; 438 %chain7 = or <4 x i1> %chain5, %chain6; 439 %bitmask = bitcast <4 x i1> %chain7 to i4 440 ret i4 %bitmask 441} 442 443define i4 @convert_to_bitmask_with_different_types_in_chain(<4 x i16> %vec1, <4 x i32> %vec2) { 444; CHECK-SD-LABEL: convert_to_bitmask_with_different_types_in_chain: 445; CHECK-SD: ; %bb.0: 446; CHECK-SD-NEXT: cmeq.4s v1, v1, #0 447; CHECK-SD-NEXT: cmeq.4h v0, v0, #0 448; CHECK-SD-NEXT: adrp x8, lCPI9_0@PAGE 449; CHECK-SD-NEXT: xtn.4h v1, v1 450; CHECK-SD-NEXT: orn.8b v0, v1, v0 451; CHECK-SD-NEXT: ldr d1, [x8, lCPI9_0@PAGEOFF] 452; CHECK-SD-NEXT: and.8b v0, v0, v1 453; CHECK-SD-NEXT: addv.4h h0, v0 454; CHECK-SD-NEXT: fmov w0, s0 455; CHECK-SD-NEXT: ret 456; 457; CHECK-GI-LABEL: convert_to_bitmask_with_different_types_in_chain: 458; CHECK-GI: ; %bb.0: 459; CHECK-GI-NEXT: sub sp, sp, #16 460; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 461; CHECK-GI-NEXT: cmeq.4s v1, v1, #0 462; CHECK-GI-NEXT: cmeq.4h v0, v0, #0 463; CHECK-GI-NEXT: xtn.4h v1, v1 464; CHECK-GI-NEXT: orn.8b v0, v1, v0 465; CHECK-GI-NEXT: ushll.4s v0, v0, #0 466; CHECK-GI-NEXT: mov.s w8, v0[1] 467; CHECK-GI-NEXT: mov.s w9, v0[2] 468; CHECK-GI-NEXT: fmov w11, s0 469; CHECK-GI-NEXT: mov.s w10, v0[3] 470; CHECK-GI-NEXT: and w8, w8, #0x1 471; CHECK-GI-NEXT: bfi w11, w8, #1, #31 472; CHECK-GI-NEXT: and w8, w9, #0x1 473; CHECK-GI-NEXT: and w9, w10, #0x1 474; CHECK-GI-NEXT: orr w8, w11, w8, lsl #2 475; CHECK-GI-NEXT: orr w8, w8, w9, lsl #3 476; CHECK-GI-NEXT: strb w8, [sp, #15] 477; CHECK-GI-NEXT: and w0, w8, #0xff 478; CHECK-GI-NEXT: add sp, sp, #16 479; CHECK-GI-NEXT: ret 480 481 482 %cmp1 = icmp ne <4 x i16> %vec1, zeroinitializer 483 %cmp2 = icmp eq <4 x i32> %vec2, zeroinitializer 484 %chain1 = or <4 x i1> %cmp1, %cmp2 485 %bitmask = bitcast <4 x i1> %chain1 to i4 486 ret i4 %bitmask 487} 488 489define i16 @convert_to_bitmask_without_knowing_type(<16 x i1> %vec) { 490; CHECK-SD-LABEL: convert_to_bitmask_without_knowing_type: 491; CHECK-SD: ; %bb.0: 492; CHECK-SD-NEXT: shl.16b v0, v0, #7 493; CHECK-SD-NEXT: adrp x8, lCPI10_0@PAGE 494; CHECK-SD-NEXT: ldr q1, [x8, lCPI10_0@PAGEOFF] 495; CHECK-SD-NEXT: cmlt.16b v0, v0, #0 496; CHECK-SD-NEXT: and.16b v0, v0, v1 497; CHECK-SD-NEXT: ext.16b v1, v0, v0, #8 498; CHECK-SD-NEXT: zip1.16b v0, v0, v1 499; CHECK-SD-NEXT: addv.8h h0, v0 500; CHECK-SD-NEXT: fmov w0, s0 501; CHECK-SD-NEXT: ret 502; 503; CHECK-GI-LABEL: convert_to_bitmask_without_knowing_type: 504; CHECK-GI: ; %bb.0: 505; CHECK-GI-NEXT: sub sp, sp, #16 506; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 507; CHECK-GI-NEXT: umov.b w8, v0[1] 508; CHECK-GI-NEXT: umov.b w9, v0[0] 509; CHECK-GI-NEXT: umov.b w10, v0[2] 510; CHECK-GI-NEXT: umov.b w11, v0[3] 511; CHECK-GI-NEXT: and w8, w8, #0x1 512; CHECK-GI-NEXT: bfi w9, w8, #1, #31 513; CHECK-GI-NEXT: and w8, w10, #0x1 514; CHECK-GI-NEXT: umov.b w10, v0[4] 515; CHECK-GI-NEXT: orr w8, w9, w8, lsl #2 516; CHECK-GI-NEXT: and w9, w11, #0x1 517; CHECK-GI-NEXT: umov.b w11, v0[5] 518; CHECK-GI-NEXT: orr w8, w8, w9, lsl #3 519; CHECK-GI-NEXT: and w9, w10, #0x1 520; CHECK-GI-NEXT: umov.b w10, v0[6] 521; CHECK-GI-NEXT: orr w8, w8, w9, lsl #4 522; CHECK-GI-NEXT: and w9, w11, #0x1 523; CHECK-GI-NEXT: umov.b w11, v0[7] 524; CHECK-GI-NEXT: orr w8, w8, w9, lsl #5 525; CHECK-GI-NEXT: and w9, w10, #0x1 526; CHECK-GI-NEXT: umov.b w10, v0[8] 527; CHECK-GI-NEXT: orr w8, w8, w9, lsl #6 528; CHECK-GI-NEXT: and w9, w11, #0x1 529; CHECK-GI-NEXT: umov.b w11, v0[9] 530; CHECK-GI-NEXT: orr w8, w8, w9, lsl #7 531; CHECK-GI-NEXT: and w9, w10, #0x1 532; CHECK-GI-NEXT: umov.b w10, v0[10] 533; CHECK-GI-NEXT: orr w8, w8, w9, lsl #8 534; CHECK-GI-NEXT: and w9, w11, #0x1 535; CHECK-GI-NEXT: umov.b w11, v0[11] 536; CHECK-GI-NEXT: orr w8, w8, w9, lsl #9 537; CHECK-GI-NEXT: and w9, w10, #0x1 538; CHECK-GI-NEXT: umov.b w10, v0[12] 539; CHECK-GI-NEXT: orr w8, w8, w9, lsl #10 540; CHECK-GI-NEXT: and w9, w11, #0x1 541; CHECK-GI-NEXT: umov.b w11, v0[13] 542; CHECK-GI-NEXT: orr w8, w8, w9, lsl #11 543; CHECK-GI-NEXT: and w9, w10, #0x1 544; CHECK-GI-NEXT: umov.b w10, v0[14] 545; CHECK-GI-NEXT: orr w8, w8, w9, lsl #12 546; CHECK-GI-NEXT: and w9, w11, #0x1 547; CHECK-GI-NEXT: umov.b w11, v0[15] 548; CHECK-GI-NEXT: orr w8, w8, w9, lsl #13 549; CHECK-GI-NEXT: and w9, w10, #0x1 550; CHECK-GI-NEXT: orr w8, w8, w9, lsl #14 551; CHECK-GI-NEXT: and w9, w11, #0x1 552; CHECK-GI-NEXT: orr w8, w8, w9, lsl #15 553; CHECK-GI-NEXT: strh w8, [sp, #14] 554; CHECK-GI-NEXT: and w0, w8, #0xffff 555; CHECK-GI-NEXT: add sp, sp, #16 556; CHECK-GI-NEXT: ret 557 558 %bitmask = bitcast <16 x i1> %vec to i16 559 ret i16 %bitmask 560} 561 562define i2 @convert_to_bitmask_2xi32(<2 x i32> %vec) { 563; CHECK-LABEL: convert_to_bitmask_2xi32: 564; CHECK: ; %bb.0: 565; CHECK-NEXT: adrp x8, lCPI11_0@PAGE 566; CHECK-NEXT: cmeq.2s v0, v0, #0 567; CHECK-NEXT: ldr d1, [x8, lCPI11_0@PAGEOFF] 568; CHECK-NEXT: bic.8b v0, v1, v0 569; CHECK-NEXT: addp.2s v0, v0, v0 570; CHECK-NEXT: fmov w0, s0 571; CHECK-NEXT: ret 572 573 %cmp_result = icmp ne <2 x i32> %vec, zeroinitializer 574 %bitmask = bitcast <2 x i1> %cmp_result to i2 575 ret i2 %bitmask 576} 577 578define i4 @convert_to_bitmask_4xi8(<4 x i8> %vec) { 579; CHECK-SD-LABEL: convert_to_bitmask_4xi8: 580; CHECK-SD: ; %bb.0: 581; CHECK-SD-NEXT: bic.4h v0, #255, lsl #8 582; CHECK-SD-NEXT: adrp x8, lCPI12_0@PAGE 583; CHECK-SD-NEXT: ldr d1, [x8, lCPI12_0@PAGEOFF] 584; CHECK-SD-NEXT: cmeq.4h v0, v0, #0 585; CHECK-SD-NEXT: bic.8b v0, v1, v0 586; CHECK-SD-NEXT: addv.4h h0, v0 587; CHECK-SD-NEXT: fmov w0, s0 588; CHECK-SD-NEXT: ret 589; 590; CHECK-GI-LABEL: convert_to_bitmask_4xi8: 591; CHECK-GI: ; %bb.0: 592; CHECK-GI-NEXT: sub sp, sp, #16 593; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 594; CHECK-GI-NEXT: mov w8, #0 ; =0x0 595; CHECK-GI-NEXT: uzp1.8b v0, v0, v0 596; CHECK-GI-NEXT: fmov s1, w8 597; CHECK-GI-NEXT: mov.b v1[1], w8 598; CHECK-GI-NEXT: mov.b v1[2], w8 599; CHECK-GI-NEXT: mov.b v1[3], w8 600; CHECK-GI-NEXT: cmeq.8b v0, v0, v1 601; CHECK-GI-NEXT: mvn.8b v0, v0 602; CHECK-GI-NEXT: umov.b w8, v0[0] 603; CHECK-GI-NEXT: umov.b w9, v0[1] 604; CHECK-GI-NEXT: mov.s v1[0], w8 605; CHECK-GI-NEXT: umov.b w8, v0[2] 606; CHECK-GI-NEXT: mov.s v1[1], w9 607; CHECK-GI-NEXT: umov.b w9, v0[3] 608; CHECK-GI-NEXT: mov.s v1[2], w8 609; CHECK-GI-NEXT: mov.s v1[3], w9 610; CHECK-GI-NEXT: mov.s w8, v1[1] 611; CHECK-GI-NEXT: mov.s w9, v1[2] 612; CHECK-GI-NEXT: fmov w11, s1 613; CHECK-GI-NEXT: mov.s w10, v1[3] 614; CHECK-GI-NEXT: and w8, w8, #0x1 615; CHECK-GI-NEXT: bfi w11, w8, #1, #31 616; CHECK-GI-NEXT: and w8, w9, #0x1 617; CHECK-GI-NEXT: and w9, w10, #0x1 618; CHECK-GI-NEXT: orr w8, w11, w8, lsl #2 619; CHECK-GI-NEXT: orr w8, w8, w9, lsl #3 620; CHECK-GI-NEXT: strb w8, [sp, #15] 621; CHECK-GI-NEXT: and w0, w8, #0xff 622; CHECK-GI-NEXT: add sp, sp, #16 623; CHECK-GI-NEXT: ret 624 625 %cmp_result = icmp ne <4 x i8> %vec, zeroinitializer 626 %bitmask = bitcast <4 x i1> %cmp_result to i4 627 ret i4 %bitmask 628} 629 630define i8 @convert_to_bitmask_8xi2(<8 x i2> %vec) { 631; CHECK-LABEL: convert_to_bitmask_8xi2: 632; CHECK: ; %bb.0: 633; CHECK-NEXT: movi.8b v1, #3 634; CHECK-NEXT: adrp x8, lCPI13_0@PAGE 635; CHECK-NEXT: and.8b v0, v0, v1 636; CHECK-NEXT: ldr d1, [x8, lCPI13_0@PAGEOFF] 637; CHECK-NEXT: cmeq.8b v0, v0, #0 638; CHECK-NEXT: bic.8b v0, v1, v0 639; CHECK-NEXT: addv.8b b0, v0 640; CHECK-NEXT: fmov w0, s0 641; CHECK-NEXT: ret 642 643 %cmp_result = icmp ne <8 x i2> %vec, zeroinitializer 644 %bitmask = bitcast <8 x i1> %cmp_result to i8 645 ret i8 %bitmask 646} 647 648define i4 @convert_to_bitmask_float(<4 x float> %vec) { 649; CHECK-SD-LABEL: convert_to_bitmask_float: 650; CHECK-SD: ; %bb.0: 651; CHECK-SD-NEXT: fcmgt.4s v1, v0, #0.0 652; CHECK-SD-NEXT: fcmlt.4s v0, v0, #0.0 653; CHECK-SD-NEXT: adrp x8, lCPI14_0@PAGE 654; CHECK-SD-NEXT: orr.16b v0, v0, v1 655; CHECK-SD-NEXT: ldr q1, [x8, lCPI14_0@PAGEOFF] 656; CHECK-SD-NEXT: and.16b v0, v0, v1 657; CHECK-SD-NEXT: addv.4s s0, v0 658; CHECK-SD-NEXT: fmov w0, s0 659; CHECK-SD-NEXT: ret 660; 661; CHECK-GI-LABEL: convert_to_bitmask_float: 662; CHECK-GI: ; %bb.0: 663; CHECK-GI-NEXT: sub sp, sp, #16 664; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 665; CHECK-GI-NEXT: fcmgt.4s v1, v0, #0.0 666; CHECK-GI-NEXT: fcmlt.4s v0, v0, #0.0 667; CHECK-GI-NEXT: orr.16b v0, v0, v1 668; CHECK-GI-NEXT: mov.s w8, v0[1] 669; CHECK-GI-NEXT: mov.s w9, v0[2] 670; CHECK-GI-NEXT: fmov w11, s0 671; CHECK-GI-NEXT: mov.s w10, v0[3] 672; CHECK-GI-NEXT: and w8, w8, #0x1 673; CHECK-GI-NEXT: bfi w11, w8, #1, #31 674; CHECK-GI-NEXT: and w8, w9, #0x1 675; CHECK-GI-NEXT: and w9, w10, #0x1 676; CHECK-GI-NEXT: orr w8, w11, w8, lsl #2 677; CHECK-GI-NEXT: orr w8, w8, w9, lsl #3 678; CHECK-GI-NEXT: strb w8, [sp, #15] 679; CHECK-GI-NEXT: and w0, w8, #0xff 680; CHECK-GI-NEXT: add sp, sp, #16 681; CHECK-GI-NEXT: ret 682 683 684 %cmp_result = fcmp one <4 x float> %vec, zeroinitializer 685 %bitmask = bitcast <4 x i1> %cmp_result to i4 686 ret i4 %bitmask 687} 688 689; Larger vector types don't map directly, but the can be split/truncated and then converted. 690; After the comparison against 0, this is truncated to <8 x i16>, which is valid again. 691define i8 @convert_large_vector(<8 x i32> %vec) { 692; CHECK-SD-LABEL: convert_large_vector: 693; CHECK-SD: ; %bb.0: 694; CHECK-SD-NEXT: sub sp, sp, #16 695; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 696; CHECK-SD-NEXT: cmeq.4s v1, v1, #0 697; CHECK-SD-NEXT: cmeq.4s v0, v0, #0 698; CHECK-SD-NEXT: adrp x8, lCPI15_0@PAGE 699; CHECK-SD-NEXT: uzp1.8h v0, v0, v1 700; CHECK-SD-NEXT: ldr q1, [x8, lCPI15_0@PAGEOFF] 701; CHECK-SD-NEXT: bic.16b v0, v1, v0 702; CHECK-SD-NEXT: addv.8h h0, v0 703; CHECK-SD-NEXT: fmov w8, s0 704; CHECK-SD-NEXT: and w0, w8, #0xff 705; CHECK-SD-NEXT: add sp, sp, #16 706; CHECK-SD-NEXT: ret 707; 708; CHECK-GI-LABEL: convert_large_vector: 709; CHECK-GI: ; %bb.0: 710; CHECK-GI-NEXT: sub sp, sp, #16 711; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 712; CHECK-GI-NEXT: cmeq.4s v0, v0, #0 713; CHECK-GI-NEXT: cmeq.4s v1, v1, #0 714; CHECK-GI-NEXT: mvn.16b v0, v0 715; CHECK-GI-NEXT: mvn.16b v1, v1 716; CHECK-GI-NEXT: uzp1.8h v0, v0, v1 717; CHECK-GI-NEXT: xtn.8b v0, v0 718; CHECK-GI-NEXT: umov.b w8, v0[1] 719; CHECK-GI-NEXT: umov.b w9, v0[0] 720; CHECK-GI-NEXT: umov.b w10, v0[2] 721; CHECK-GI-NEXT: umov.b w11, v0[3] 722; CHECK-GI-NEXT: and w8, w8, #0x1 723; CHECK-GI-NEXT: bfi w9, w8, #1, #31 724; CHECK-GI-NEXT: and w8, w10, #0x1 725; CHECK-GI-NEXT: umov.b w10, v0[4] 726; CHECK-GI-NEXT: orr w8, w9, w8, lsl #2 727; CHECK-GI-NEXT: and w9, w11, #0x1 728; CHECK-GI-NEXT: umov.b w11, v0[5] 729; CHECK-GI-NEXT: orr w8, w8, w9, lsl #3 730; CHECK-GI-NEXT: and w9, w10, #0x1 731; CHECK-GI-NEXT: umov.b w10, v0[6] 732; CHECK-GI-NEXT: orr w8, w8, w9, lsl #4 733; CHECK-GI-NEXT: and w9, w11, #0x1 734; CHECK-GI-NEXT: umov.b w11, v0[7] 735; CHECK-GI-NEXT: orr w8, w8, w9, lsl #5 736; CHECK-GI-NEXT: and w9, w10, #0x1 737; CHECK-GI-NEXT: orr w8, w8, w9, lsl #6 738; CHECK-GI-NEXT: and w9, w11, #0x1 739; CHECK-GI-NEXT: orr w8, w8, w9, lsl #7 740; CHECK-GI-NEXT: strb w8, [sp, #15] 741; CHECK-GI-NEXT: and w0, w8, #0xff 742; CHECK-GI-NEXT: add sp, sp, #16 743; CHECK-GI-NEXT: ret 744 745 746 %cmp_result = icmp ne <8 x i32> %vec, zeroinitializer 747 %bitmask = bitcast <8 x i1> %cmp_result to i8 748 ret i8 %bitmask 749} 750 751define i4 @convert_legalized_illegal_element_size(<4 x i22> %vec) { 752; CHECK-SD-LABEL: convert_legalized_illegal_element_size: 753; CHECK-SD: ; %bb.0: 754; CHECK-SD-NEXT: movi.4s v1, #63, msl #16 755; CHECK-SD-NEXT: adrp x8, lCPI16_0@PAGE 756; CHECK-SD-NEXT: cmtst.4s v0, v0, v1 757; CHECK-SD-NEXT: ldr d1, [x8, lCPI16_0@PAGEOFF] 758; CHECK-SD-NEXT: xtn.4h v0, v0 759; CHECK-SD-NEXT: and.8b v0, v0, v1 760; CHECK-SD-NEXT: addv.4h h0, v0 761; CHECK-SD-NEXT: fmov w0, s0 762; CHECK-SD-NEXT: ret 763; 764; CHECK-GI-LABEL: convert_legalized_illegal_element_size: 765; CHECK-GI: ; %bb.0: 766; CHECK-GI-NEXT: sub sp, sp, #16 767; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 768; CHECK-GI-NEXT: movi.4s v1, #63, msl #16 769; CHECK-GI-NEXT: and.16b v0, v0, v1 770; CHECK-GI-NEXT: cmeq.4s v0, v0, #0 771; CHECK-GI-NEXT: mvn.16b v0, v0 772; CHECK-GI-NEXT: mov.s w8, v0[1] 773; CHECK-GI-NEXT: mov.s w9, v0[2] 774; CHECK-GI-NEXT: fmov w11, s0 775; CHECK-GI-NEXT: mov.s w10, v0[3] 776; CHECK-GI-NEXT: and w8, w8, #0x1 777; CHECK-GI-NEXT: bfi w11, w8, #1, #31 778; CHECK-GI-NEXT: and w8, w9, #0x1 779; CHECK-GI-NEXT: and w9, w10, #0x1 780; CHECK-GI-NEXT: orr w8, w11, w8, lsl #2 781; CHECK-GI-NEXT: orr w8, w8, w9, lsl #3 782; CHECK-GI-NEXT: strb w8, [sp, #15] 783; CHECK-GI-NEXT: and w0, w8, #0xff 784; CHECK-GI-NEXT: add sp, sp, #16 785; CHECK-GI-NEXT: ret 786 787 %cmp_result = icmp ne <4 x i22> %vec, zeroinitializer 788 %bitmask = bitcast <4 x i1> %cmp_result to i4 789 ret i4 %bitmask 790} 791 792; This may still be converted as a v8i8 after the vector concat (but not as v4iX). 793define i8 @no_direct_convert_for_bad_concat(<4 x i32> %vec) { 794; CHECK-LABEL: no_direct_convert_for_bad_concat: 795; CHECK: ; %bb.0: 796; CHECK-NEXT: cmtst.4s v0, v0, v0 797; CHECK-NEXT: adrp x8, lCPI17_0@PAGE 798; CHECK-NEXT: xtn.4h v0, v0 799; CHECK-NEXT: umov.h w9, v0[0] 800; CHECK-NEXT: mov.b v1[4], w9 801; CHECK-NEXT: umov.h w9, v0[1] 802; CHECK-NEXT: mov.b v1[5], w9 803; CHECK-NEXT: umov.h w9, v0[2] 804; CHECK-NEXT: mov.b v1[6], w9 805; CHECK-NEXT: umov.h w9, v0[3] 806; CHECK-NEXT: mov.b v1[7], w9 807; CHECK-NEXT: shl.8b v0, v1, #7 808; CHECK-NEXT: ldr d1, [x8, lCPI17_0@PAGEOFF] 809; CHECK-NEXT: cmlt.8b v0, v0, #0 810; CHECK-NEXT: and.8b v0, v0, v1 811; CHECK-NEXT: addv.8b b0, v0 812; CHECK-NEXT: fmov w0, s0 813; CHECK-NEXT: ret 814 815 %cmp_result = icmp ne <4 x i32> %vec, zeroinitializer 816 %vector_pad = shufflevector <4 x i1> poison, <4 x i1> %cmp_result, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 4, i32 5, i32 6, i32 7> 817 %bitmask = bitcast <8 x i1> %vector_pad to i8 818 ret i8 %bitmask 819} 820 821define <8 x i1> @no_convert_without_direct_bitcast(<8 x i16> %vec) { 822; CHECK-SD-LABEL: no_convert_without_direct_bitcast: 823; CHECK-SD: ; %bb.0: 824; CHECK-SD-NEXT: cmtst.8h v0, v0, v0 825; CHECK-SD-NEXT: xtn.8b v0, v0 826; CHECK-SD-NEXT: ret 827; 828; CHECK-GI-LABEL: no_convert_without_direct_bitcast: 829; CHECK-GI: ; %bb.0: 830; CHECK-GI-NEXT: cmeq.8h v0, v0, #0 831; CHECK-GI-NEXT: mvn.16b v0, v0 832; CHECK-GI-NEXT: xtn.8b v0, v0 833; CHECK-GI-NEXT: ret 834 835 %cmp_result = icmp ne <8 x i16> %vec, zeroinitializer 836 ret <8 x i1> %cmp_result 837} 838 839define i6 @no_combine_illegal_num_elements(<6 x i32> %vec) { 840; CHECK-SD-LABEL: no_combine_illegal_num_elements: 841; CHECK-SD: ; %bb.0: 842; CHECK-SD-NEXT: sub sp, sp, #16 843; CHECK-SD-NEXT: .cfi_def_cfa_offset 16 844; CHECK-SD-NEXT: fmov s0, w0 845; CHECK-SD-NEXT: fmov s1, w4 846; CHECK-SD-NEXT: mov.s v0[1], w1 847; CHECK-SD-NEXT: mov.s v1[1], w5 848; CHECK-SD-NEXT: mov.s v0[2], w2 849; CHECK-SD-NEXT: cmeq.4s v1, v1, #0 850; CHECK-SD-NEXT: mov.s v0[3], w3 851; CHECK-SD-NEXT: cmeq.4s v0, v0, #0 852; CHECK-SD-NEXT: uzp1.8h v0, v0, v1 853; CHECK-SD-NEXT: mvn.16b v0, v0 854; CHECK-SD-NEXT: xtn.8b v0, v0 855; CHECK-SD-NEXT: umov.b w8, v0[0] 856; CHECK-SD-NEXT: umov.b w9, v0[1] 857; CHECK-SD-NEXT: umov.b w10, v0[2] 858; CHECK-SD-NEXT: and w8, w8, #0x1 859; CHECK-SD-NEXT: bfi w8, w9, #1, #1 860; CHECK-SD-NEXT: umov.b w9, v0[3] 861; CHECK-SD-NEXT: bfi w8, w10, #2, #1 862; CHECK-SD-NEXT: umov.b w10, v0[4] 863; CHECK-SD-NEXT: bfi w8, w9, #3, #1 864; CHECK-SD-NEXT: umov.b w9, v0[5] 865; CHECK-SD-NEXT: bfi w8, w10, #4, #1 866; CHECK-SD-NEXT: orr w8, w8, w9, lsl #5 867; CHECK-SD-NEXT: and w0, w8, #0x3f 868; CHECK-SD-NEXT: add sp, sp, #16 869; CHECK-SD-NEXT: ret 870; 871; CHECK-GI-LABEL: no_combine_illegal_num_elements: 872; CHECK-GI: ; %bb.0: 873; CHECK-GI-NEXT: sub sp, sp, #16 874; CHECK-GI-NEXT: .cfi_def_cfa_offset 16 875; CHECK-GI-NEXT: mov.s v0[0], w0 876; CHECK-GI-NEXT: mov.s v1[0], w4 877; CHECK-GI-NEXT: mov.s v2[0], wzr 878; CHECK-GI-NEXT: mov.s v0[1], w1 879; CHECK-GI-NEXT: mov.s v1[1], w5 880; CHECK-GI-NEXT: mov.s v2[1], wzr 881; CHECK-GI-NEXT: mov.s v0[2], w2 882; CHECK-GI-NEXT: cmeq.4s v1, v1, v2 883; CHECK-GI-NEXT: mvn.16b v1, v1 884; CHECK-GI-NEXT: mov.s v0[3], w3 885; CHECK-GI-NEXT: cmeq.4s v0, v0, #0 886; CHECK-GI-NEXT: mvn.16b v0, v0 887; CHECK-GI-NEXT: mov.s w8, v0[1] 888; CHECK-GI-NEXT: mov.s w9, v0[2] 889; CHECK-GI-NEXT: mov.s w10, v0[3] 890; CHECK-GI-NEXT: mov.h v0[1], w8 891; CHECK-GI-NEXT: mov.s w8, v1[1] 892; CHECK-GI-NEXT: mov.h v0[2], w9 893; CHECK-GI-NEXT: mov.h v0[3], w10 894; CHECK-GI-NEXT: mov.h v0[4], v1[0] 895; CHECK-GI-NEXT: mov.h v0[5], w8 896; CHECK-GI-NEXT: umov.h w8, v0[1] 897; CHECK-GI-NEXT: umov.h w9, v0[0] 898; CHECK-GI-NEXT: umov.h w10, v0[2] 899; CHECK-GI-NEXT: umov.h w11, v0[3] 900; CHECK-GI-NEXT: and w8, w8, #0x1 901; CHECK-GI-NEXT: bfi w9, w8, #1, #31 902; CHECK-GI-NEXT: and w8, w10, #0x1 903; CHECK-GI-NEXT: umov.h w10, v0[4] 904; CHECK-GI-NEXT: orr w8, w9, w8, lsl #2 905; CHECK-GI-NEXT: and w9, w11, #0x1 906; CHECK-GI-NEXT: umov.h w11, v0[5] 907; CHECK-GI-NEXT: orr w8, w8, w9, lsl #3 908; CHECK-GI-NEXT: and w9, w10, #0x1 909; CHECK-GI-NEXT: orr w8, w8, w9, lsl #4 910; CHECK-GI-NEXT: and w9, w11, #0x1 911; CHECK-GI-NEXT: orr w8, w8, w9, lsl #5 912; CHECK-GI-NEXT: and w8, w8, #0x3f 913; CHECK-GI-NEXT: strb w8, [sp, #15] 914; CHECK-GI-NEXT: and w0, w8, #0xff 915; CHECK-GI-NEXT: add sp, sp, #16 916; CHECK-GI-NEXT: ret 917 918 %cmp_result = icmp ne <6 x i32> %vec, zeroinitializer 919 %bitmask = bitcast <6 x i1> %cmp_result to i6 920 ret i6 %bitmask 921} 922 923; Only apply the combine when casting a vector to a scalar. 924define <2 x i8> @vector_to_vector_cast(<16 x i1> %arg) nounwind { 925; CHECK-SD-LABEL: vector_to_vector_cast: 926; CHECK-SD: ; %bb.0: 927; CHECK-SD-NEXT: sub sp, sp, #16 928; CHECK-SD-NEXT: shl.16b v0, v0, #7 929; CHECK-SD-NEXT: adrp x8, lCPI20_0@PAGE 930; CHECK-SD-NEXT: ldr q1, [x8, lCPI20_0@PAGEOFF] 931; CHECK-SD-NEXT: add x8, sp, #14 932; CHECK-SD-NEXT: cmlt.16b v0, v0, #0 933; CHECK-SD-NEXT: and.16b v0, v0, v1 934; CHECK-SD-NEXT: ext.16b v1, v0, v0, #8 935; CHECK-SD-NEXT: zip1.16b v0, v0, v1 936; CHECK-SD-NEXT: addv.8h h0, v0 937; CHECK-SD-NEXT: str h0, [sp, #14] 938; CHECK-SD-NEXT: ld1.b { v0 }[0], [x8] 939; CHECK-SD-NEXT: orr x8, x8, #0x1 940; CHECK-SD-NEXT: ld1.b { v0 }[4], [x8] 941; CHECK-SD-NEXT: ; kill: def $d0 killed $d0 killed $q0 942; CHECK-SD-NEXT: add sp, sp, #16 943; CHECK-SD-NEXT: ret 944; 945; CHECK-GI-LABEL: vector_to_vector_cast: 946; CHECK-GI: ; %bb.0: 947; CHECK-GI-NEXT: sub sp, sp, #16 948; CHECK-GI-NEXT: umov.b w8, v0[1] 949; CHECK-GI-NEXT: mov d1, v0[1] 950; CHECK-GI-NEXT: umov.b w10, v0[1] 951; CHECK-GI-NEXT: umov.b w9, v0[0] 952; CHECK-GI-NEXT: umov.b w13, v0[0] 953; CHECK-GI-NEXT: umov.b w14, v0[2] 954; CHECK-GI-NEXT: umov.b w15, v0[3] 955; CHECK-GI-NEXT: umov.b w11, v0[2] 956; CHECK-GI-NEXT: umov.b w16, v0[4] 957; CHECK-GI-NEXT: umov.b w17, v0[5] 958; CHECK-GI-NEXT: umov.b w12, v0[3] 959; CHECK-GI-NEXT: and w8, w8, #0x1 960; CHECK-GI-NEXT: and w10, w10, #0x1 961; CHECK-GI-NEXT: umov.b w0, v1[1] 962; CHECK-GI-NEXT: bfi w9, w8, #1, #31 963; CHECK-GI-NEXT: bfi w13, w10, #1, #31 964; CHECK-GI-NEXT: and w14, w14, #0x1 965; CHECK-GI-NEXT: umov.b w8, v1[0] 966; CHECK-GI-NEXT: umov.b w10, v1[2] 967; CHECK-GI-NEXT: and w15, w15, #0x1 968; CHECK-GI-NEXT: orr w13, w13, w14, lsl #2 969; CHECK-GI-NEXT: umov.b w14, v1[3] 970; CHECK-GI-NEXT: and w11, w11, #0x1 971; CHECK-GI-NEXT: and w0, w0, #0x1 972; CHECK-GI-NEXT: and w16, w16, #0x1 973; CHECK-GI-NEXT: orr w9, w9, w11, lsl #2 974; CHECK-GI-NEXT: orr w13, w13, w15, lsl #3 975; CHECK-GI-NEXT: umov.b w15, v1[4] 976; CHECK-GI-NEXT: umov.b w11, v0[6] 977; CHECK-GI-NEXT: bfi w8, w0, #1, #31 978; CHECK-GI-NEXT: and w10, w10, #0x1 979; CHECK-GI-NEXT: and w17, w17, #0x1 980; CHECK-GI-NEXT: orr w13, w13, w16, lsl #4 981; CHECK-GI-NEXT: and w14, w14, #0x1 982; CHECK-GI-NEXT: umov.b w0, v0[7] 983; CHECK-GI-NEXT: orr w8, w8, w10, lsl #2 984; CHECK-GI-NEXT: umov.b w10, v1[5] 985; CHECK-GI-NEXT: umov.b w16, v1[6] 986; CHECK-GI-NEXT: orr w13, w13, w17, lsl #5 987; CHECK-GI-NEXT: umov.b w17, v0[4] 988; CHECK-GI-NEXT: and w15, w15, #0x1 989; CHECK-GI-NEXT: orr w8, w8, w14, lsl #3 990; CHECK-GI-NEXT: and w12, w12, #0x1 991; CHECK-GI-NEXT: and w11, w11, #0x1 992; CHECK-GI-NEXT: umov.b w14, v1[7] 993; CHECK-GI-NEXT: orr w9, w9, w12, lsl #3 994; CHECK-GI-NEXT: orr w11, w13, w11, lsl #6 995; CHECK-GI-NEXT: orr w8, w8, w15, lsl #4 996; CHECK-GI-NEXT: umov.b w15, v0[5] 997; CHECK-GI-NEXT: and w10, w10, #0x1 998; CHECK-GI-NEXT: and w0, w0, #0x1 999; CHECK-GI-NEXT: and w12, w17, #0x1 1000; CHECK-GI-NEXT: umov.b w13, v0[1] 1001; CHECK-GI-NEXT: orr w8, w8, w10, lsl #5 1002; CHECK-GI-NEXT: and w16, w16, #0x1 1003; CHECK-GI-NEXT: orr w9, w9, w12, lsl #4 1004; CHECK-GI-NEXT: umov.b w10, v0[0] 1005; CHECK-GI-NEXT: orr w11, w11, w0, lsl #7 1006; CHECK-GI-NEXT: and w14, w14, #0x1 1007; CHECK-GI-NEXT: and w12, w15, #0x1 1008; CHECK-GI-NEXT: umov.b w15, v0[2] 1009; CHECK-GI-NEXT: orr w8, w8, w16, lsl #6 1010; CHECK-GI-NEXT: orr w9, w9, w12, lsl #5 1011; CHECK-GI-NEXT: umov.b w12, v0[6] 1012; CHECK-GI-NEXT: strb w11, [sp, #8] 1013; CHECK-GI-NEXT: and w11, w13, #0x1 1014; CHECK-GI-NEXT: umov.b w13, v0[3] 1015; CHECK-GI-NEXT: orr w8, w8, w14, lsl #7 1016; CHECK-GI-NEXT: umov.b w14, v0[7] 1017; CHECK-GI-NEXT: ldr b0, [sp, #8] 1018; CHECK-GI-NEXT: bfi w10, w11, #1, #31 1019; CHECK-GI-NEXT: and w11, w15, #0x1 1020; CHECK-GI-NEXT: strb w8, [sp, #9] 1021; CHECK-GI-NEXT: umov.b w15, v0[4] 1022; CHECK-GI-NEXT: and w8, w12, #0x1 1023; CHECK-GI-NEXT: orr w10, w10, w11, lsl #2 1024; CHECK-GI-NEXT: orr w8, w9, w8, lsl #6 1025; CHECK-GI-NEXT: and w9, w13, #0x1 1026; CHECK-GI-NEXT: umov.b w11, v0[1] 1027; CHECK-GI-NEXT: orr w9, w10, w9, lsl #3 1028; CHECK-GI-NEXT: umov.b w10, v0[5] 1029; CHECK-GI-NEXT: umov.b w12, v0[0] 1030; CHECK-GI-NEXT: and w13, w14, #0x1 1031; CHECK-GI-NEXT: umov.b w16, v0[2] 1032; CHECK-GI-NEXT: umov.b w17, v0[3] 1033; CHECK-GI-NEXT: and w14, w15, #0x1 1034; CHECK-GI-NEXT: umov.b w15, v0[2] 1035; CHECK-GI-NEXT: orr w8, w8, w13, lsl #7 1036; CHECK-GI-NEXT: orr w9, w9, w14, lsl #4 1037; CHECK-GI-NEXT: umov.b w13, v0[6] 1038; CHECK-GI-NEXT: and w11, w11, #0x1 1039; CHECK-GI-NEXT: umov.b w14, v0[3] 1040; CHECK-GI-NEXT: strb w8, [sp, #10] 1041; CHECK-GI-NEXT: and w8, w10, #0x1 1042; CHECK-GI-NEXT: bfi w12, w11, #1, #31 1043; CHECK-GI-NEXT: orr w8, w9, w8, lsl #5 1044; CHECK-GI-NEXT: umov.b w10, v0[4] 1045; CHECK-GI-NEXT: and w9, w15, #0x1 1046; CHECK-GI-NEXT: umov.b w11, v0[7] 1047; CHECK-GI-NEXT: umov.b w15, v0[1] 1048; CHECK-GI-NEXT: orr w9, w12, w9, lsl #2 1049; CHECK-GI-NEXT: umov.b w12, v0[5] 1050; CHECK-GI-NEXT: and w13, w13, #0x1 1051; CHECK-GI-NEXT: and w14, w14, #0x1 1052; CHECK-GI-NEXT: orr w8, w8, w13, lsl #6 1053; CHECK-GI-NEXT: umov.b w13, v0[0] 1054; CHECK-GI-NEXT: orr w9, w9, w14, lsl #3 1055; CHECK-GI-NEXT: and w10, w10, #0x1 1056; CHECK-GI-NEXT: umov.b w14, v0[6] 1057; CHECK-GI-NEXT: and w11, w11, #0x1 1058; CHECK-GI-NEXT: and w15, w15, #0x1 1059; CHECK-GI-NEXT: umov.b w0, v0[3] 1060; CHECK-GI-NEXT: orr w9, w9, w10, lsl #4 1061; CHECK-GI-NEXT: and w10, w12, #0x1 1062; CHECK-GI-NEXT: umov.b w12, v0[7] 1063; CHECK-GI-NEXT: orr w8, w8, w11, lsl #7 1064; CHECK-GI-NEXT: bfi w13, w15, #1, #31 1065; CHECK-GI-NEXT: and w11, w16, #0x1 1066; CHECK-GI-NEXT: orr w9, w9, w10, lsl #5 1067; CHECK-GI-NEXT: and w10, w14, #0x1 1068; CHECK-GI-NEXT: umov.b w14, v0[4] 1069; CHECK-GI-NEXT: strb w8, [sp, #11] 1070; CHECK-GI-NEXT: umov.b w15, v0[1] 1071; CHECK-GI-NEXT: umov.b w16, v0[3] 1072; CHECK-GI-NEXT: orr w8, w9, w10, lsl #6 1073; CHECK-GI-NEXT: orr w9, w13, w11, lsl #2 1074; CHECK-GI-NEXT: and w10, w12, #0x1 1075; CHECK-GI-NEXT: and w11, w17, #0x1 1076; CHECK-GI-NEXT: umov.b w12, v0[5] 1077; CHECK-GI-NEXT: umov.b w17, v0[0] 1078; CHECK-GI-NEXT: orr w8, w8, w10, lsl #7 1079; CHECK-GI-NEXT: orr w9, w9, w11, lsl #3 1080; CHECK-GI-NEXT: umov.b w10, v0[1] 1081; CHECK-GI-NEXT: and w11, w14, #0x1 1082; CHECK-GI-NEXT: umov.b w14, v0[0] 1083; CHECK-GI-NEXT: and w15, w15, #0x1 1084; CHECK-GI-NEXT: orr w9, w9, w11, lsl #4 1085; CHECK-GI-NEXT: umov.b w11, v0[2] 1086; CHECK-GI-NEXT: umov.b w13, v0[6] 1087; CHECK-GI-NEXT: and w12, w12, #0x1 1088; CHECK-GI-NEXT: bfi w17, w15, #1, #31 1089; CHECK-GI-NEXT: umov.b w15, v0[5] 1090; CHECK-GI-NEXT: orr w9, w9, w12, lsl #5 1091; CHECK-GI-NEXT: and w10, w10, #0x1 1092; CHECK-GI-NEXT: umov.b w12, v0[2] 1093; CHECK-GI-NEXT: bfi w14, w10, #1, #31 1094; CHECK-GI-NEXT: umov.b w10, v0[4] 1095; CHECK-GI-NEXT: ldr b1, [sp, #9] 1096; CHECK-GI-NEXT: and w11, w11, #0x1 1097; CHECK-GI-NEXT: and w13, w13, #0x1 1098; CHECK-GI-NEXT: strb w8, [sp, #12] 1099; CHECK-GI-NEXT: orr w11, w14, w11, lsl #2 1100; CHECK-GI-NEXT: and w14, w16, #0x1 1101; CHECK-GI-NEXT: umov.b w16, v0[4] 1102; CHECK-GI-NEXT: and w12, w12, #0x1 1103; CHECK-GI-NEXT: and w15, w15, #0x1 1104; CHECK-GI-NEXT: orr w9, w9, w13, lsl #6 1105; CHECK-GI-NEXT: orr w11, w11, w14, lsl #3 1106; CHECK-GI-NEXT: orr w12, w17, w12, lsl #2 1107; CHECK-GI-NEXT: and w10, w10, #0x1 1108; CHECK-GI-NEXT: and w17, w0, #0x1 1109; CHECK-GI-NEXT: umov.b w0, v0[5] 1110; CHECK-GI-NEXT: umov.b w14, v0[6] 1111; CHECK-GI-NEXT: orr w10, w11, w10, lsl #4 1112; CHECK-GI-NEXT: orr w12, w12, w17, lsl #3 1113; CHECK-GI-NEXT: umov.b w11, v0[7] 1114; CHECK-GI-NEXT: and w16, w16, #0x1 1115; CHECK-GI-NEXT: umov.b w17, v0[6] 1116; CHECK-GI-NEXT: orr w10, w10, w15, lsl #5 1117; CHECK-GI-NEXT: umov.b w15, v0[7] 1118; CHECK-GI-NEXT: orr w12, w12, w16, lsl #4 1119; CHECK-GI-NEXT: and w16, w0, #0x1 1120; CHECK-GI-NEXT: umov.b w0, v0[7] 1121; CHECK-GI-NEXT: and w14, w14, #0x1 1122; CHECK-GI-NEXT: orr w12, w12, w16, lsl #5 1123; CHECK-GI-NEXT: orr w10, w10, w14, lsl #6 1124; CHECK-GI-NEXT: and w11, w11, #0x1 1125; CHECK-GI-NEXT: and w13, w17, #0x1 1126; CHECK-GI-NEXT: orr w9, w9, w11, lsl #7 1127; CHECK-GI-NEXT: mov.s v0[1], v1[0] 1128; CHECK-GI-NEXT: orr w11, w12, w13, lsl #6 1129; CHECK-GI-NEXT: and w12, w15, #0x1 1130; CHECK-GI-NEXT: ; kill: def $d0 killed $d0 killed $q0 1131; CHECK-GI-NEXT: orr w8, w10, w12, lsl #7 1132; CHECK-GI-NEXT: and w10, w0, #0x1 1133; CHECK-GI-NEXT: strb w9, [sp, #13] 1134; CHECK-GI-NEXT: orr w9, w11, w10, lsl #7 1135; CHECK-GI-NEXT: strb w8, [sp, #14] 1136; CHECK-GI-NEXT: strb w9, [sp, #15] 1137; CHECK-GI-NEXT: add sp, sp, #16 1138; CHECK-GI-NEXT: ret 1139 %bc = bitcast <16 x i1> %arg to <2 x i8> 1140 ret <2 x i8> %bc 1141} 1142