1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; RUN: llc -mtriple=aarch64-apple-darwin -mattr=+neon -verify-machineinstrs < %s | FileCheck %s 3 4define void @store_16_elements(<16 x i8> %vec, ptr %out) { 5; Bits used in mask 6; CHECK-LABEL: store_16_elements: 7; CHECK: ; %bb.0: 8; CHECK-NEXT: Lloh0: 9; CHECK-NEXT: adrp x8, lCPI0_0@PAGE 10; CHECK-NEXT: cmeq.16b v0, v0, #0 11; CHECK-NEXT: Lloh1: 12; CHECK-NEXT: ldr q1, [x8, lCPI0_0@PAGEOFF] 13; CHECK-NEXT: bic.16b v0, v1, v0 14; CHECK-NEXT: ext.16b v1, v0, v0, #8 15; CHECK-NEXT: zip1.16b v0, v0, v1 16; CHECK-NEXT: addv.8h h0, v0 17; CHECK-NEXT: str h0, [x0] 18; CHECK-NEXT: ret 19; CHECK-NEXT: .loh AdrpLdr Lloh0, Lloh1 20 21; Actual conversion 22 23 %cmp_result = icmp ne <16 x i8> %vec, zeroinitializer 24 store <16 x i1> %cmp_result, ptr %out 25 ret void 26} 27 28define void @store_8_elements(<8 x i16> %vec, ptr %out) { 29; CHECK-LABEL: store_8_elements: 30; CHECK: ; %bb.0: 31; CHECK-NEXT: Lloh2: 32; CHECK-NEXT: adrp x8, lCPI1_0@PAGE 33; CHECK-NEXT: cmeq.8h v0, v0, #0 34; CHECK-NEXT: Lloh3: 35; CHECK-NEXT: ldr q1, [x8, lCPI1_0@PAGEOFF] 36; CHECK-NEXT: bic.16b v0, v1, v0 37; CHECK-NEXT: addv.8h h0, v0 38; CHECK-NEXT: fmov w8, s0 39; CHECK-NEXT: strb w8, [x0] 40; CHECK-NEXT: ret 41; CHECK-NEXT: .loh AdrpLdr Lloh2, Lloh3 42 43 44 %cmp_result = icmp ne <8 x i16> %vec, zeroinitializer 45 store <8 x i1> %cmp_result, ptr %out 46 ret void 47} 48 49define void @store_4_elements(<4 x i32> %vec, ptr %out) { 50; CHECK-LABEL: store_4_elements: 51; CHECK: ; %bb.0: 52; CHECK-NEXT: Lloh4: 53; CHECK-NEXT: adrp x8, lCPI2_0@PAGE 54; CHECK-NEXT: cmeq.4s v0, v0, #0 55; CHECK-NEXT: Lloh5: 56; CHECK-NEXT: ldr q1, [x8, lCPI2_0@PAGEOFF] 57; CHECK-NEXT: bic.16b v0, v1, v0 58; CHECK-NEXT: addv.4s s0, v0 59; CHECK-NEXT: fmov w8, s0 60; CHECK-NEXT: strb w8, [x0] 61; CHECK-NEXT: ret 62; CHECK-NEXT: .loh AdrpLdr Lloh4, Lloh5 63 64 65 %cmp_result = icmp ne <4 x i32> %vec, zeroinitializer 66 store <4 x i1> %cmp_result, ptr %out 67 ret void 68} 69 70define void @store_2_elements(<2 x i64> %vec, ptr %out) { 71; CHECK-LABEL: store_2_elements: 72; CHECK: ; %bb.0: 73; CHECK-NEXT: Lloh6: 74; CHECK-NEXT: adrp x8, lCPI3_0@PAGE 75; CHECK-NEXT: cmeq.2d v0, v0, #0 76; CHECK-NEXT: Lloh7: 77; CHECK-NEXT: ldr q1, [x8, lCPI3_0@PAGEOFF] 78; CHECK-NEXT: bic.16b v0, v1, v0 79; CHECK-NEXT: addp.2d d0, v0 80; CHECK-NEXT: fmov x8, d0 81; CHECK-NEXT: strb w8, [x0] 82; CHECK-NEXT: ret 83; CHECK-NEXT: .loh AdrpLdr Lloh6, Lloh7 84 85 86 %cmp_result = icmp ne <2 x i64> %vec, zeroinitializer 87 store <2 x i1> %cmp_result, ptr %out 88 ret void 89} 90 91define void @add_trunc_compare_before_store(<4 x i32> %vec, ptr %out) { 92; CHECK-LABEL: add_trunc_compare_before_store: 93; CHECK: ; %bb.0: 94; CHECK-NEXT: shl.4s v0, v0, #31 95; CHECK-NEXT: Lloh8: 96; CHECK-NEXT: adrp x8, lCPI4_0@PAGE 97; CHECK-NEXT: Lloh9: 98; CHECK-NEXT: ldr q1, [x8, lCPI4_0@PAGEOFF] 99; CHECK-NEXT: cmlt.4s v0, v0, #0 100; CHECK-NEXT: and.16b v0, v0, v1 101; CHECK-NEXT: addv.4s s0, v0 102; CHECK-NEXT: fmov w8, s0 103; CHECK-NEXT: strb w8, [x0] 104; CHECK-NEXT: ret 105; CHECK-NEXT: .loh AdrpLdr Lloh8, Lloh9 106 107 108 %trunc = trunc <4 x i32> %vec to <4 x i1> 109 store <4 x i1> %trunc, ptr %out 110 ret void 111} 112 113define void @add_trunc_mask_unknown_vector_type(<4 x i1> %vec, ptr %out) { 114; CHECK-LABEL: add_trunc_mask_unknown_vector_type: 115; CHECK: ; %bb.0: 116; CHECK-NEXT: shl.4h v0, v0, #15 117; CHECK-NEXT: Lloh10: 118; CHECK-NEXT: adrp x8, lCPI5_0@PAGE 119; CHECK-NEXT: Lloh11: 120; CHECK-NEXT: ldr d1, [x8, lCPI5_0@PAGEOFF] 121; CHECK-NEXT: cmlt.4h v0, v0, #0 122; CHECK-NEXT: and.8b v0, v0, v1 123; CHECK-NEXT: addv.4h h0, v0 124; CHECK-NEXT: fmov w8, s0 125; CHECK-NEXT: strb w8, [x0] 126; CHECK-NEXT: ret 127; CHECK-NEXT: .loh AdrpLdr Lloh10, Lloh11 128 129 130 store <4 x i1> %vec, ptr %out 131 ret void 132} 133 134define void @store_8_elements_64_bit_vector(<8 x i8> %vec, ptr %out) { 135; CHECK-LABEL: store_8_elements_64_bit_vector: 136; CHECK: ; %bb.0: 137; CHECK-NEXT: Lloh12: 138; CHECK-NEXT: adrp x8, lCPI6_0@PAGE 139; CHECK-NEXT: cmeq.8b v0, v0, #0 140; CHECK-NEXT: Lloh13: 141; CHECK-NEXT: ldr d1, [x8, lCPI6_0@PAGEOFF] 142; CHECK-NEXT: bic.8b v0, v1, v0 143; CHECK-NEXT: addv.8b b0, v0 144; CHECK-NEXT: st1.b { v0 }[0], [x0] 145; CHECK-NEXT: ret 146; CHECK-NEXT: .loh AdrpLdr Lloh12, Lloh13 147 148 149 %cmp_result = icmp ne <8 x i8> %vec, zeroinitializer 150 store <8 x i1> %cmp_result, ptr %out 151 ret void 152} 153 154define void @store_4_elements_64_bit_vector(<4 x i16> %vec, ptr %out) { 155; CHECK-LABEL: store_4_elements_64_bit_vector: 156; CHECK: ; %bb.0: 157; CHECK-NEXT: Lloh14: 158; CHECK-NEXT: adrp x8, lCPI7_0@PAGE 159; CHECK-NEXT: cmeq.4h v0, v0, #0 160; CHECK-NEXT: Lloh15: 161; CHECK-NEXT: ldr d1, [x8, lCPI7_0@PAGEOFF] 162; CHECK-NEXT: bic.8b v0, v1, v0 163; CHECK-NEXT: addv.4h h0, v0 164; CHECK-NEXT: fmov w8, s0 165; CHECK-NEXT: strb w8, [x0] 166; CHECK-NEXT: ret 167; CHECK-NEXT: .loh AdrpLdr Lloh14, Lloh15 168 169 170 %cmp_result = icmp ne <4 x i16> %vec, zeroinitializer 171 store <4 x i1> %cmp_result, ptr %out 172 ret void 173} 174 175define void @store_2_elements_64_bit_vector(<2 x i32> %vec, ptr %out) { 176; CHECK-LABEL: store_2_elements_64_bit_vector: 177; CHECK: ; %bb.0: 178; CHECK-NEXT: Lloh16: 179; CHECK-NEXT: adrp x8, lCPI8_0@PAGE 180; CHECK-NEXT: cmeq.2s v0, v0, #0 181; CHECK-NEXT: Lloh17: 182; CHECK-NEXT: ldr d1, [x8, lCPI8_0@PAGEOFF] 183; CHECK-NEXT: bic.8b v0, v1, v0 184; CHECK-NEXT: addp.2s v0, v0, v0 185; CHECK-NEXT: fmov w8, s0 186; CHECK-NEXT: strb w8, [x0] 187; CHECK-NEXT: ret 188; CHECK-NEXT: .loh AdrpLdr Lloh16, Lloh17 189 190 191 %cmp_result = icmp ne <2 x i32> %vec, zeroinitializer 192 store <2 x i1> %cmp_result, ptr %out 193 ret void 194} 195 196define void @no_combine_without_truncate(<16 x i8> %vec, ptr %out) { 197; CHECK-LABEL: no_combine_without_truncate: 198; CHECK: ; %bb.0: 199; CHECK-NEXT: cmtst.16b v0, v0, v0 200; CHECK-NEXT: str q0, [x0] 201; CHECK-NEXT: ret 202 203 %cmp_result = icmp ne <16 x i8> %vec, zeroinitializer 204 %extended_result = sext <16 x i1> %cmp_result to <16 x i8> 205 store <16 x i8> %extended_result, ptr %out 206 ret void 207} 208 209define void @no_combine_for_non_bool_truncate(<4 x i32> %vec, ptr %out) { 210; CHECK-LABEL: no_combine_for_non_bool_truncate: 211; CHECK: ; %bb.0: 212; CHECK-NEXT: xtn.4h v0, v0 213; CHECK-NEXT: uzp1.8b v0, v0, v0 214; CHECK-NEXT: str s0, [x0] 215; CHECK-NEXT: ret 216 217 %trunc = trunc <4 x i32> %vec to <4 x i8> 218 store <4 x i8> %trunc, ptr %out 219 ret void 220} 221 222define void @no_combine_for_build_vector(i1 %a, i1 %b, i1 %c, i1 %d, ptr %out) { 223; CHECK-LABEL: no_combine_for_build_vector: 224; CHECK: ; %bb.0: 225; CHECK-NEXT: orr w8, w0, w1, lsl #1 226; CHECK-NEXT: orr w8, w8, w2, lsl #2 227; CHECK-NEXT: orr w8, w8, w3, lsl #3 228; CHECK-NEXT: strb w8, [x4] 229; CHECK-NEXT: ret 230 231 %1 = insertelement <4 x i1> undef, i1 %a, i64 0 232 %2 = insertelement <4 x i1> %1, i1 %b, i64 1 233 %3 = insertelement <4 x i1> %2, i1 %c, i64 2 234 %vec = insertelement <4 x i1> %3, i1 %d, i64 3 235 store <4 x i1> %vec, ptr %out 236 ret void 237} 238