1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=arm64-apple-ios -o - %s | FileCheck %s 3 4define <16 x i32> @no_existing_zext(<16 x i8> %a, <16 x i32> %op) { 5; CHECK-LABEL: no_existing_zext: 6; CHECK: ; %bb.0: ; %entry 7; CHECK-NEXT: movi.16b v5, #10 8; CHECK-NEXT: cmhi.16b v0, v0, v5 9; CHECK-NEXT: sshll.8h v5, v0, #0 10; CHECK-NEXT: sshll2.8h v0, v0, #0 11; CHECK-NEXT: sshll2.4s v16, v0, #0 12; CHECK-NEXT: sshll.4s v6, v5, #0 13; CHECK-NEXT: sshll.4s v7, v0, #0 14; CHECK-NEXT: sshll2.4s v5, v5, #0 15; CHECK-NEXT: and.16b v4, v4, v16 16; CHECK-NEXT: and.16b v0, v1, v6 17; CHECK-NEXT: and.16b v1, v2, v5 18; CHECK-NEXT: and.16b v2, v3, v7 19; CHECK-NEXT: mov.16b v3, v4 20; CHECK-NEXT: ret 21entry: 22 %cmp = icmp ugt <16 x i8> %a, <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10> 23 %sel = select <16 x i1> %cmp, <16 x i32> %op, <16 x i32> zeroinitializer 24 ret <16 x i32> %sel 25} 26 27define <16 x i32> @second_compare_operand_not_splat(<16 x i8> %a, <16 x i8> %b) { 28; CHECK-LABEL: second_compare_operand_not_splat: 29; CHECK: ; %bb.0: ; %entry 30; CHECK-NEXT: cmgt.16b v1, v0, v1 31; CHECK-NEXT: ushll.8h v2, v0, #0 32; CHECK-NEXT: ushll2.8h v0, v0, #0 33; CHECK-NEXT: sshll.8h v3, v1, #0 34; CHECK-NEXT: sshll2.8h v1, v1, #0 35; CHECK-NEXT: ushll.4s v4, v2, #0 36; CHECK-NEXT: ushll.4s v5, v0, #0 37; CHECK-NEXT: ushll2.4s v2, v2, #0 38; CHECK-NEXT: ushll2.4s v6, v0, #0 39; CHECK-NEXT: sshll.4s v0, v3, #0 40; CHECK-NEXT: sshll.4s v7, v1, #0 41; CHECK-NEXT: sshll2.4s v16, v3, #0 42; CHECK-NEXT: sshll2.4s v1, v1, #0 43; CHECK-NEXT: and.16b v0, v4, v0 44; CHECK-NEXT: and.16b v3, v6, v1 45; CHECK-NEXT: and.16b v1, v2, v16 46; CHECK-NEXT: and.16b v2, v5, v7 47; CHECK-NEXT: ret 48entry: 49 %ext = zext <16 x i8> %a to <16 x i32> 50 %cmp = icmp sgt <16 x i8> %a, %b 51 %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer 52 ret <16 x i32> %sel 53} 54 55define <16 x i32> @same_zext_used_in_cmp_signed_pred_and_select(<16 x i8> %a) { 56; CHECK-LABEL: same_zext_used_in_cmp_signed_pred_and_select: 57; CHECK: ; %bb.0: ; %entry 58; CHECK-NEXT: movi.16b v1, #10 59; CHECK-NEXT: ushll.8h v2, v0, #0 60; CHECK-NEXT: ushll.4s v4, v2, #0 61; CHECK-NEXT: ushll2.4s v2, v2, #0 62; CHECK-NEXT: cmgt.16b v1, v0, v1 63; CHECK-NEXT: ushll2.8h v0, v0, #0 64; CHECK-NEXT: sshll.8h v3, v1, #0 65; CHECK-NEXT: sshll2.8h v1, v1, #0 66; CHECK-NEXT: ushll.4s v5, v0, #0 67; CHECK-NEXT: ushll2.4s v6, v0, #0 68; CHECK-NEXT: sshll.4s v0, v3, #0 69; CHECK-NEXT: sshll.4s v7, v1, #0 70; CHECK-NEXT: sshll2.4s v16, v3, #0 71; CHECK-NEXT: sshll2.4s v1, v1, #0 72; CHECK-NEXT: and.16b v0, v4, v0 73; CHECK-NEXT: and.16b v3, v6, v1 74; CHECK-NEXT: and.16b v1, v2, v16 75; CHECK-NEXT: and.16b v2, v5, v7 76; CHECK-NEXT: ret 77entry: 78 %ext = zext <16 x i8> %a to <16 x i32> 79 %cmp = icmp sgt <16 x i8> %a, <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10> 80 %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer 81 ret <16 x i32> %sel 82} 83 84define <8 x i64> @same_zext_used_in_cmp_unsigned_pred_and_select_v8i64(<8 x i8> %a) { 85; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v8i64: 86; CHECK: ; %bb.0: 87; CHECK-NEXT: ushll.8h v0, v0, #0 88; CHECK-NEXT: mov w8, #10 ; =0xa 89; CHECK-NEXT: dup.2d v2, x8 90; CHECK-NEXT: ushll.4s v1, v0, #0 91; CHECK-NEXT: ushll2.4s v0, v0, #0 92; CHECK-NEXT: ushll.2d v3, v1, #0 93; CHECK-NEXT: ushll2.2d v4, v0, #0 94; CHECK-NEXT: ushll2.2d v1, v1, #0 95; CHECK-NEXT: ushll.2d v5, v0, #0 96; CHECK-NEXT: cmhi.2d v0, v3, v2 97; CHECK-NEXT: cmhi.2d v7, v1, v2 98; CHECK-NEXT: cmhi.2d v6, v5, v2 99; CHECK-NEXT: cmhi.2d v2, v4, v2 100; CHECK-NEXT: and.16b v0, v3, v0 101; CHECK-NEXT: and.16b v1, v1, v7 102; CHECK-NEXT: and.16b v3, v4, v2 103; CHECK-NEXT: and.16b v2, v5, v6 104; CHECK-NEXT: ret 105 %ext = zext <8 x i8> %a to <8 x i64> 106 %cmp = icmp ugt <8 x i8> %a, <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10> 107 %sel = select <8 x i1> %cmp, <8 x i64> %ext, <8 x i64> zeroinitializer 108 ret <8 x i64> %sel 109} 110 111 112define <16 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_v16i32(<16 x i8> %a) { 113; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v16i32: 114; CHECK: ; %bb.0: 115; CHECK-NEXT: ushll.8h v2, v0, #0 116; CHECK-NEXT: ushll2.8h v0, v0, #0 117; CHECK-NEXT: movi.4s v1, #10 118; CHECK-NEXT: ushll.4s v3, v2, #0 119; CHECK-NEXT: ushll2.4s v4, v0, #0 120; CHECK-NEXT: ushll2.4s v2, v2, #0 121; CHECK-NEXT: ushll.4s v5, v0, #0 122; CHECK-NEXT: cmhi.4s v0, v3, v1 123; CHECK-NEXT: cmhi.4s v7, v2, v1 124; CHECK-NEXT: cmhi.4s v6, v5, v1 125; CHECK-NEXT: cmhi.4s v1, v4, v1 126; CHECK-NEXT: and.16b v0, v3, v0 127; CHECK-NEXT: and.16b v3, v4, v1 128; CHECK-NEXT: and.16b v1, v2, v7 129; CHECK-NEXT: and.16b v2, v5, v6 130; CHECK-NEXT: ret 131 %ext = zext <16 x i8> %a to <16 x i32> 132 %cmp = icmp ugt <16 x i8> %a, <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10> 133 %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer 134 ret <16 x i32> %sel 135} 136 137define <8 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_v8i32(<8 x i8> %a) { 138; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v8i32: 139; CHECK: ; %bb.0: 140; CHECK-NEXT: ushll.8h v0, v0, #0 141; CHECK-NEXT: movi.4s v1, #10 142; CHECK-NEXT: ushll2.4s v2, v0, #0 143; CHECK-NEXT: ushll.4s v0, v0, #0 144; CHECK-NEXT: cmhi.4s v3, v0, v1 145; CHECK-NEXT: cmhi.4s v1, v2, v1 146; CHECK-NEXT: and.16b v1, v2, v1 147; CHECK-NEXT: and.16b v0, v0, v3 148; CHECK-NEXT: ret 149 %ext = zext <8 x i8> %a to <8 x i32> 150 %cmp = icmp ugt <8 x i8> %a, <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10> 151 %sel = select <8 x i1> %cmp, <8 x i32> %ext, <8 x i32> zeroinitializer 152 ret <8 x i32> %sel 153} 154 155define <8 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_v8i32_2(<8 x i16> %a) { 156; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v8i32_2: 157; CHECK: ; %bb.0: 158; CHECK-NEXT: movi.4s v1, #10 159; CHECK-NEXT: ushll2.4s v2, v0, #0 160; CHECK-NEXT: ushll.4s v0, v0, #0 161; CHECK-NEXT: cmhi.4s v3, v0, v1 162; CHECK-NEXT: cmhi.4s v1, v2, v1 163; CHECK-NEXT: and.16b v1, v2, v1 164; CHECK-NEXT: and.16b v0, v0, v3 165; CHECK-NEXT: ret 166 %ext = zext <8 x i16> %a to <8 x i32> 167 %cmp = icmp ugt <8 x i16> %a, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10> 168 %sel = select <8 x i1> %cmp, <8 x i32> %ext, <8 x i32> zeroinitializer 169 ret <8 x i32> %sel 170} 171 172 173define <8 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_v8i32_from_v8i15(<8 x i15> %a) { 174; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v8i32_from_v8i15: 175; CHECK: ; %bb.0: 176; CHECK-NEXT: bic.8h v0, #128, lsl #8 177; CHECK-NEXT: movi.4s v1, #10 178; CHECK-NEXT: ushll2.4s v2, v0, #0 179; CHECK-NEXT: ushll.4s v0, v0, #0 180; CHECK-NEXT: cmhi.4s v3, v0, v1 181; CHECK-NEXT: cmhi.4s v1, v2, v1 182; CHECK-NEXT: and.16b v1, v2, v1 183; CHECK-NEXT: and.16b v0, v0, v3 184; CHECK-NEXT: ret 185 %ext = zext <8 x i15> %a to <8 x i32> 186 %cmp = icmp ugt <8 x i15> %a, <i15 10, i15 10, i15 10, i15 10, i15 10, i15 10, i15 10, i15 10> 187 %sel = select <8 x i1> %cmp, <8 x i32> %ext, <8 x i32> zeroinitializer 188 ret <8 x i32> %sel 189} 190 191define <7 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_v7i32(<7 x i16> %a) { 192; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v7i32: 193; CHECK: ; %bb.0: 194; CHECK-NEXT: movi.8h v1, #10 195; CHECK-NEXT: ushll.4s v2, v0, #0 196; CHECK-NEXT: cmhi.8h v1, v0, v1 197; CHECK-NEXT: ushll2.4s v0, v0, #0 198; CHECK-NEXT: sshll.4s v3, v1, #0 199; CHECK-NEXT: sshll2.4s v1, v1, #0 200; CHECK-NEXT: and.16b v2, v2, v3 201; CHECK-NEXT: and.16b v0, v0, v1 202; CHECK-NEXT: mov.s w1, v2[1] 203; CHECK-NEXT: mov.s w2, v2[2] 204; CHECK-NEXT: mov.s w3, v2[3] 205; CHECK-NEXT: mov.s w5, v0[1] 206; CHECK-NEXT: mov.s w6, v0[2] 207; CHECK-NEXT: fmov w0, s2 208; CHECK-NEXT: fmov w4, s0 209; CHECK-NEXT: ret 210 %ext = zext <7 x i16> %a to <7 x i32> 211 %cmp = icmp ugt <7 x i16> %a, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10> 212 %sel = select <7 x i1> %cmp, <7 x i32> %ext, <7 x i32> zeroinitializer 213 ret <7 x i32> %sel 214} 215 216define <3 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_v3i16(<3 x i8> %a) { 217; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v3i16: 218; CHECK: ; %bb.0: 219; CHECK-NEXT: fmov s0, w0 220; CHECK-NEXT: Lloh0: 221; CHECK-NEXT: adrp x8, lCPI9_0@PAGE 222; CHECK-NEXT: movi.2d v3, #0x0000ff000000ff 223; CHECK-NEXT: Lloh1: 224; CHECK-NEXT: ldr d2, [x8, lCPI9_0@PAGEOFF] 225; CHECK-NEXT: mov.h v0[1], w1 226; CHECK-NEXT: mov.h v0[2], w2 227; CHECK-NEXT: ushll.4s v1, v0, #0 228; CHECK-NEXT: bic.4h v0, #255, lsl #8 229; CHECK-NEXT: cmhi.4h v0, v0, v2 230; CHECK-NEXT: and.16b v1, v1, v3 231; CHECK-NEXT: sshll.4s v0, v0, #0 232; CHECK-NEXT: and.16b v0, v1, v0 233; CHECK-NEXT: ret 234; CHECK-NEXT: .loh AdrpLdr Lloh0, Lloh1 235 %ext = zext <3 x i8> %a to <3 x i32> 236 %cmp = icmp ugt <3 x i8> %a, <i8 10, i8 10, i8 10> 237 %sel = select <3 x i1> %cmp, <3 x i32> %ext, <3 x i32> zeroinitializer 238 ret <3 x i32> %sel 239} 240 241define <4 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_v4i32(<4 x i16> %a) { 242; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v4i32: 243; CHECK: ; %bb.0: 244; CHECK-NEXT: movi.4s v1, #10 245; CHECK-NEXT: ushll.4s v0, v0, #0 246; CHECK-NEXT: cmhi.4s v1, v0, v1 247; CHECK-NEXT: and.16b v0, v0, v1 248; CHECK-NEXT: ret 249 %ext = zext <4 x i16> %a to <4 x i32> 250 %cmp = icmp ugt <4 x i16> %a, <i16 10, i16 10, i16 10, i16 10> 251 %sel = select <4 x i1> %cmp, <4 x i32> %ext, <4 x i32> zeroinitializer 252 ret <4 x i32> %sel 253} 254 255define <2 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_v2i32(<2 x i16> %a) { 256; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v2i32: 257; CHECK: ; %bb.0: 258; CHECK-NEXT: movi d1, #0x00ffff0000ffff 259; CHECK-NEXT: movi.2s v2, #10 260; CHECK-NEXT: and.8b v0, v0, v1 261; CHECK-NEXT: cmhi.2s v1, v0, v2 262; CHECK-NEXT: and.8b v0, v0, v1 263; CHECK-NEXT: ret 264 %ext = zext <2 x i16> %a to <2 x i32> 265 %cmp = icmp ugt <2 x i16> %a, <i16 10, i16 10> 266 %sel = select <2 x i1> %cmp, <2 x i32> %ext, <2 x i32> zeroinitializer 267 ret <2 x i32> %sel 268} 269 270define <8 x i32> @same_zext_used_in_cmp_eq_and_select_v8i32(<8 x i16> %a) { 271; CHECK-LABEL: same_zext_used_in_cmp_eq_and_select_v8i32: 272; CHECK: ; %bb.0: 273; CHECK-NEXT: movi.4s v1, #10 274; CHECK-NEXT: ushll2.4s v2, v0, #0 275; CHECK-NEXT: ushll.4s v0, v0, #0 276; CHECK-NEXT: cmeq.4s v3, v0, v1 277; CHECK-NEXT: cmeq.4s v1, v2, v1 278; CHECK-NEXT: and.16b v1, v2, v1 279; CHECK-NEXT: and.16b v0, v0, v3 280; CHECK-NEXT: ret 281 %ext = zext <8 x i16> %a to <8 x i32> 282 %cmp = icmp eq <8 x i16> %a, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10> 283 %sel = select <8 x i1> %cmp, <8 x i32> %ext, <8 x i32> zeroinitializer 284 ret <8 x i32> %sel 285} 286 287define <8 x i32> @same_zext_used_in_cmp_eq_and_select_v8i32_from_v8i13(<8 x i13> %a) { 288; CHECK-LABEL: same_zext_used_in_cmp_eq_and_select_v8i32_from_v8i13: 289; CHECK: ; %bb.0: 290; CHECK-NEXT: bic.8h v0, #224, lsl #8 291; CHECK-NEXT: movi.4s v1, #10 292; CHECK-NEXT: ushll2.4s v2, v0, #0 293; CHECK-NEXT: ushll.4s v0, v0, #0 294; CHECK-NEXT: cmeq.4s v3, v0, v1 295; CHECK-NEXT: cmeq.4s v1, v2, v1 296; CHECK-NEXT: and.16b v1, v2, v1 297; CHECK-NEXT: and.16b v0, v0, v3 298; CHECK-NEXT: ret 299 %ext = zext <8 x i13> %a to <8 x i32> 300 %cmp = icmp eq <8 x i13> %a, <i13 10, i13 10, i13 10, i13 10, i13 10, i13 10, i13 10, i13 10> 301 %sel = select <8 x i1> %cmp, <8 x i32> %ext, <8 x i32> zeroinitializer 302 ret <8 x i32> %sel 303} 304 305define <16 x i32> @same_zext_used_in_cmp_ne_and_select_v8i32(<16 x i8> %a) { 306; CHECK-LABEL: same_zext_used_in_cmp_ne_and_select_v8i32: 307; CHECK: ; %bb.0: 308; CHECK-NEXT: ushll.8h v2, v0, #0 309; CHECK-NEXT: ushll2.8h v0, v0, #0 310; CHECK-NEXT: movi.4s v1, #10 311; CHECK-NEXT: ushll.4s v3, v2, #0 312; CHECK-NEXT: ushll2.4s v4, v0, #0 313; CHECK-NEXT: ushll2.4s v2, v2, #0 314; CHECK-NEXT: ushll.4s v5, v0, #0 315; CHECK-NEXT: cmeq.4s v0, v3, v1 316; CHECK-NEXT: cmeq.4s v7, v2, v1 317; CHECK-NEXT: cmeq.4s v6, v5, v1 318; CHECK-NEXT: cmeq.4s v1, v4, v1 319; CHECK-NEXT: bic.16b v0, v3, v0 320; CHECK-NEXT: bic.16b v3, v4, v1 321; CHECK-NEXT: bic.16b v1, v2, v7 322; CHECK-NEXT: bic.16b v2, v5, v6 323; CHECK-NEXT: ret 324 %ext = zext <16 x i8> %a to <16 x i32> 325 %cmp = icmp ne <16 x i8> %a, <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10> 326 %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer 327 ret <16 x i32> %sel 328} 329 330; A variation of @same_zext_used_in_cmp_unsigned_pred_and_select, with with 331; multiple users of the compare. 332define <16 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_other_use(<16 x i8> %a, <16 x i64> %v, ptr %ptr) { 333; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_other_use: 334; CHECK: ; %bb.0: ; %entry 335; CHECK-NEXT: movi.16b v16, #10 336; CHECK-NEXT: ushll.8h v19, v0, #0 337; CHECK-NEXT: ldr q21, [sp] 338; CHECK-NEXT: ushll.4s v24, v19, #0 339; CHECK-NEXT: ushll2.4s v19, v19, #0 340; CHECK-NEXT: cmhi.16b v16, v0, v16 341; CHECK-NEXT: ushll2.8h v0, v0, #0 342; CHECK-NEXT: sshll2.8h v17, v16, #0 343; CHECK-NEXT: sshll.8h v16, v16, #0 344; CHECK-NEXT: ushll.4s v25, v0, #0 345; CHECK-NEXT: ushll2.4s v0, v0, #0 346; CHECK-NEXT: sshll2.4s v18, v17, #0 347; CHECK-NEXT: sshll.4s v17, v17, #0 348; CHECK-NEXT: sshll2.4s v22, v16, #0 349; CHECK-NEXT: sshll.4s v16, v16, #0 350; CHECK-NEXT: sshll2.2d v20, v18, #0 351; CHECK-NEXT: sshll.2d v23, v18, #0 352; CHECK-NEXT: sshll2.2d v26, v17, #0 353; CHECK-NEXT: sshll.2d v27, v17, #0 354; CHECK-NEXT: and.16b v20, v21, v20 355; CHECK-NEXT: sshll2.2d v21, v22, #0 356; CHECK-NEXT: and.16b v7, v7, v23 357; CHECK-NEXT: sshll.2d v23, v22, #0 358; CHECK-NEXT: and.16b v6, v6, v26 359; CHECK-NEXT: sshll2.2d v26, v16, #0 360; CHECK-NEXT: and.16b v5, v5, v27 361; CHECK-NEXT: stp q7, q20, [x0, #96] 362; CHECK-NEXT: sshll.2d v20, v16, #0 363; CHECK-NEXT: and.16b v21, v4, v21 364; CHECK-NEXT: and.16b v4, v0, v18 365; CHECK-NEXT: and.16b v7, v3, v23 366; CHECK-NEXT: and.16b v3, v19, v22 367; CHECK-NEXT: stp q5, q6, [x0, #64] 368; CHECK-NEXT: and.16b v0, v24, v16 369; CHECK-NEXT: and.16b v6, v2, v26 370; CHECK-NEXT: and.16b v2, v25, v17 371; CHECK-NEXT: and.16b v5, v1, v20 372; CHECK-NEXT: mov.16b v1, v3 373; CHECK-NEXT: mov.16b v3, v4 374; CHECK-NEXT: stp q7, q21, [x0, #32] 375; CHECK-NEXT: stp q5, q6, [x0] 376; CHECK-NEXT: ret 377entry: 378 %ext = zext <16 x i8> %a to <16 x i32> 379 %cmp = icmp ugt <16 x i8> %a, <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10> 380 %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer 381 %sel.2 = select <16 x i1> %cmp, <16 x i64> %v, <16 x i64> zeroinitializer 382 store <16 x i64> %sel.2, ptr %ptr 383 ret <16 x i32> %sel 384} 385 386define <16 x i32> @same_sext_used_in_cmp_signed_pred_and_select_v16i32(<16 x i8> %a) { 387; CHECK-LABEL: same_sext_used_in_cmp_signed_pred_and_select_v16i32: 388; CHECK: ; %bb.0: ; %entry 389; CHECK-NEXT: sshll.8h v2, v0, #0 390; CHECK-NEXT: sshll2.8h v0, v0, #0 391; CHECK-NEXT: movi.4s v1, #10 392; CHECK-NEXT: sshll.4s v3, v2, #0 393; CHECK-NEXT: sshll2.4s v4, v0, #0 394; CHECK-NEXT: sshll2.4s v2, v2, #0 395; CHECK-NEXT: sshll.4s v5, v0, #0 396; CHECK-NEXT: cmgt.4s v0, v3, v1 397; CHECK-NEXT: cmgt.4s v7, v2, v1 398; CHECK-NEXT: cmgt.4s v6, v5, v1 399; CHECK-NEXT: cmgt.4s v1, v4, v1 400; CHECK-NEXT: and.16b v0, v3, v0 401; CHECK-NEXT: and.16b v3, v4, v1 402; CHECK-NEXT: and.16b v1, v2, v7 403; CHECK-NEXT: and.16b v2, v5, v6 404; CHECK-NEXT: ret 405entry: 406 %ext = sext <16 x i8> %a to <16 x i32> 407 %cmp = icmp sgt <16 x i8> %a, <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10> 408 %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer 409 ret <16 x i32> %sel 410} 411 412define <8 x i32> @same_sext_used_in_cmp_eq_and_select_v8i32(<8 x i16> %a) { 413; CHECK-LABEL: same_sext_used_in_cmp_eq_and_select_v8i32: 414; CHECK: ; %bb.0: 415; CHECK-NEXT: movi.4s v1, #10 416; CHECK-NEXT: sshll2.4s v2, v0, #0 417; CHECK-NEXT: sshll.4s v0, v0, #0 418; CHECK-NEXT: cmeq.4s v3, v0, v1 419; CHECK-NEXT: cmeq.4s v1, v2, v1 420; CHECK-NEXT: and.16b v1, v2, v1 421; CHECK-NEXT: and.16b v0, v0, v3 422; CHECK-NEXT: ret 423 %ext = sext <8 x i16> %a to <8 x i32> 424 %cmp = icmp eq <8 x i16> %a, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10> 425 %sel = select <8 x i1> %cmp, <8 x i32> %ext, <8 x i32> zeroinitializer 426 ret <8 x i32> %sel 427} 428 429define <8 x i32> @same_sext_used_in_cmp_eq_and_select_v8i32_from_v8i13(<8 x i13> %a) { 430; CHECK-LABEL: same_sext_used_in_cmp_eq_and_select_v8i32_from_v8i13: 431; CHECK: ; %bb.0: 432; CHECK-NEXT: ushll.4s v2, v0, #0 433; CHECK-NEXT: ushll2.4s v0, v0, #0 434; CHECK-NEXT: movi.4s v1, #10 435; CHECK-NEXT: shl.4s v0, v0, #19 436; CHECK-NEXT: shl.4s v2, v2, #19 437; CHECK-NEXT: sshr.4s v0, v0, #19 438; CHECK-NEXT: sshr.4s v2, v2, #19 439; CHECK-NEXT: cmeq.4s v3, v2, v1 440; CHECK-NEXT: cmeq.4s v1, v0, v1 441; CHECK-NEXT: and.16b v1, v0, v1 442; CHECK-NEXT: and.16b v0, v2, v3 443; CHECK-NEXT: ret 444 %ext = sext <8 x i13> %a to <8 x i32> 445 %cmp = icmp eq <8 x i13> %a, <i13 10, i13 10, i13 10, i13 10, i13 10, i13 10, i13 10, i13 10> 446 %sel = select <8 x i1> %cmp, <8 x i32> %ext, <8 x i32> zeroinitializer 447 ret <8 x i32> %sel 448} 449 450define <16 x i32> @same_sext_used_in_cmp_ne_and_select_v8i32(<16 x i8> %a) { 451; CHECK-LABEL: same_sext_used_in_cmp_ne_and_select_v8i32: 452; CHECK: ; %bb.0: 453; CHECK-NEXT: sshll.8h v2, v0, #0 454; CHECK-NEXT: sshll2.8h v0, v0, #0 455; CHECK-NEXT: movi.4s v1, #10 456; CHECK-NEXT: sshll.4s v3, v2, #0 457; CHECK-NEXT: sshll2.4s v4, v0, #0 458; CHECK-NEXT: sshll2.4s v2, v2, #0 459; CHECK-NEXT: sshll.4s v5, v0, #0 460; CHECK-NEXT: cmeq.4s v0, v3, v1 461; CHECK-NEXT: cmeq.4s v7, v2, v1 462; CHECK-NEXT: cmeq.4s v6, v5, v1 463; CHECK-NEXT: cmeq.4s v1, v4, v1 464; CHECK-NEXT: bic.16b v0, v3, v0 465; CHECK-NEXT: bic.16b v3, v4, v1 466; CHECK-NEXT: bic.16b v1, v2, v7 467; CHECK-NEXT: bic.16b v2, v5, v6 468; CHECK-NEXT: ret 469 %ext = sext <16 x i8> %a to <16 x i32> 470 %cmp = icmp ne <16 x i8> %a, <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10> 471 %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer 472 ret <16 x i32> %sel 473} 474 475define <8 x i32> @same_sext_used_in_cmp_signed_pred_and_select_v8i32(<8 x i16> %a) { 476; CHECK-LABEL: same_sext_used_in_cmp_signed_pred_and_select_v8i32: 477; CHECK: ; %bb.0: ; %entry 478; CHECK-NEXT: movi.4s v1, #10 479; CHECK-NEXT: sshll2.4s v2, v0, #0 480; CHECK-NEXT: sshll.4s v0, v0, #0 481; CHECK-NEXT: cmgt.4s v3, v0, v1 482; CHECK-NEXT: cmgt.4s v1, v2, v1 483; CHECK-NEXT: and.16b v1, v2, v1 484; CHECK-NEXT: and.16b v0, v0, v3 485; CHECK-NEXT: ret 486entry: 487 %ext = sext <8 x i16> %a to <8 x i32> 488 %cmp = icmp sgt <8 x i16> %a, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10> 489 %sel = select <8 x i1> %cmp, <8 x i32> %ext, <8 x i32> zeroinitializer 490 ret <8 x i32> %sel 491} 492 493define <8 x i32> @same_sext_used_in_cmp_unsigned_pred_and_select_v8i32_from_v8i15(<8 x i15> %a) { 494; CHECK-LABEL: same_sext_used_in_cmp_unsigned_pred_and_select_v8i32_from_v8i15: 495; CHECK: ; %bb.0: 496; CHECK-NEXT: ushll.4s v2, v0, #0 497; CHECK-NEXT: ushll2.4s v0, v0, #0 498; CHECK-NEXT: movi.4s v1, #10 499; CHECK-NEXT: shl.4s v0, v0, #17 500; CHECK-NEXT: shl.4s v2, v2, #17 501; CHECK-NEXT: sshr.4s v0, v0, #17 502; CHECK-NEXT: sshr.4s v2, v2, #17 503; CHECK-NEXT: cmge.4s v3, v2, v1 504; CHECK-NEXT: cmge.4s v1, v0, v1 505; CHECK-NEXT: and.16b v1, v0, v1 506; CHECK-NEXT: and.16b v0, v2, v3 507; CHECK-NEXT: ret 508 %ext = sext <8 x i15> %a to <8 x i32> 509 %cmp = icmp sge <8 x i15> %a, <i15 10, i15 10, i15 10, i15 10, i15 10, i15 10, i15 10, i15 10> 510 %sel = select <8 x i1> %cmp, <8 x i32> %ext, <8 x i32> zeroinitializer 511 ret <8 x i32> %sel 512} 513 514define <16 x i32> @same_sext_used_in_cmp_unsigned_pred_and_select(<16 x i8> %a) { 515; CHECK-LABEL: same_sext_used_in_cmp_unsigned_pred_and_select: 516; CHECK: ; %bb.0: ; %entry 517; CHECK-NEXT: movi.16b v1, #10 518; CHECK-NEXT: sshll.8h v2, v0, #0 519; CHECK-NEXT: ext.16b v4, v2, v2, #8 520; CHECK-NEXT: cmhi.16b v1, v0, v1 521; CHECK-NEXT: sshll2.8h v0, v0, #0 522; CHECK-NEXT: sshll.8h v3, v1, #0 523; CHECK-NEXT: sshll2.8h v1, v1, #0 524; CHECK-NEXT: ext.16b v5, v0, v0, #8 525; CHECK-NEXT: ext.16b v6, v3, v3, #8 526; CHECK-NEXT: ext.16b v7, v1, v1, #8 527; CHECK-NEXT: and.8b v2, v2, v3 528; CHECK-NEXT: and.8b v1, v0, v1 529; CHECK-NEXT: sshll.4s v0, v2, #0 530; CHECK-NEXT: and.8b v3, v5, v7 531; CHECK-NEXT: and.8b v4, v4, v6 532; CHECK-NEXT: sshll.4s v2, v1, #0 533; CHECK-NEXT: sshll.4s v3, v3, #0 534; CHECK-NEXT: sshll.4s v1, v4, #0 535; CHECK-NEXT: ret 536entry: 537 %ext = sext <16 x i8> %a to <16 x i32> 538 %cmp = icmp ugt <16 x i8> %a, <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10> 539 %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer 540 ret <16 x i32> %sel 541} 542 543define <16 x i32> @same_zext_used_in_cmp_signed_pred_and_select_can_convert_to_unsigned_pred(<16 x i8> %a) { 544; CHECK-LABEL: same_zext_used_in_cmp_signed_pred_and_select_can_convert_to_unsigned_pred: 545; CHECK: ; %bb.0: ; %entry 546; CHECK-NEXT: cmge.16b v1, v0, #0 547; CHECK-NEXT: ushll.8h v2, v0, #0 548; CHECK-NEXT: ushll2.8h v0, v0, #0 549; CHECK-NEXT: sshll.8h v3, v1, #0 550; CHECK-NEXT: sshll2.8h v1, v1, #0 551; CHECK-NEXT: ushll.4s v4, v2, #0 552; CHECK-NEXT: ushll.4s v5, v0, #0 553; CHECK-NEXT: ushll2.4s v2, v2, #0 554; CHECK-NEXT: ushll2.4s v6, v0, #0 555; CHECK-NEXT: sshll.4s v0, v3, #0 556; CHECK-NEXT: sshll.4s v7, v1, #0 557; CHECK-NEXT: sshll2.4s v16, v3, #0 558; CHECK-NEXT: sshll2.4s v1, v1, #0 559; CHECK-NEXT: and.16b v0, v4, v0 560; CHECK-NEXT: and.16b v3, v6, v1 561; CHECK-NEXT: and.16b v1, v2, v16 562; CHECK-NEXT: and.16b v2, v5, v7 563; CHECK-NEXT: ret 564entry: 565 %ext = zext <16 x i8> %a to <16 x i32> 566 %cmp = icmp sgt <16 x i8> %a, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 567 %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer 568 ret <16 x i32> %sel 569} 570 571define void @extension_in_loop_v16i8_to_v16i32(ptr %src, ptr %dst) { 572; CHECK-LABEL: extension_in_loop_v16i8_to_v16i32: 573; CHECK: ; %bb.0: ; %entry 574; CHECK-NEXT: Lloh2: 575; CHECK-NEXT: adrp x8, lCPI24_0@PAGE 576; CHECK-NEXT: Lloh3: 577; CHECK-NEXT: adrp x9, lCPI24_1@PAGE 578; CHECK-NEXT: Lloh4: 579; CHECK-NEXT: adrp x10, lCPI24_2@PAGE 580; CHECK-NEXT: Lloh5: 581; CHECK-NEXT: ldr q0, [x8, lCPI24_0@PAGEOFF] 582; CHECK-NEXT: Lloh6: 583; CHECK-NEXT: adrp x8, lCPI24_3@PAGE 584; CHECK-NEXT: Lloh7: 585; CHECK-NEXT: ldr q1, [x9, lCPI24_1@PAGEOFF] 586; CHECK-NEXT: Lloh8: 587; CHECK-NEXT: ldr q2, [x10, lCPI24_2@PAGEOFF] 588; CHECK-NEXT: Lloh9: 589; CHECK-NEXT: ldr q3, [x8, lCPI24_3@PAGEOFF] 590; CHECK-NEXT: mov x8, xzr 591; CHECK-NEXT: LBB24_1: ; %loop 592; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 593; CHECK-NEXT: ldr q4, [x0, x8] 594; CHECK-NEXT: add x8, x8, #16 595; CHECK-NEXT: cmp x8, #128 596; CHECK-NEXT: cmge.16b v5, v4, #0 597; CHECK-NEXT: tbl.16b v7, { v4 }, v0 598; CHECK-NEXT: tbl.16b v16, { v4 }, v1 599; CHECK-NEXT: tbl.16b v18, { v4 }, v2 600; CHECK-NEXT: tbl.16b v4, { v4 }, v3 601; CHECK-NEXT: sshll2.8h v6, v5, #0 602; CHECK-NEXT: sshll.8h v5, v5, #0 603; CHECK-NEXT: sshll2.4s v17, v6, #0 604; CHECK-NEXT: sshll.4s v6, v6, #0 605; CHECK-NEXT: sshll2.4s v19, v5, #0 606; CHECK-NEXT: sshll.4s v5, v5, #0 607; CHECK-NEXT: and.16b v7, v7, v17 608; CHECK-NEXT: and.16b v6, v16, v6 609; CHECK-NEXT: and.16b v16, v18, v19 610; CHECK-NEXT: and.16b v4, v4, v5 611; CHECK-NEXT: stp q6, q7, [x1, #32] 612; CHECK-NEXT: stp q4, q16, [x1], #64 613; CHECK-NEXT: b.ne LBB24_1 614; CHECK-NEXT: ; %bb.2: ; %exit 615; CHECK-NEXT: ret 616; CHECK-NEXT: .loh AdrpLdr Lloh6, Lloh9 617; CHECK-NEXT: .loh AdrpLdr Lloh4, Lloh8 618; CHECK-NEXT: .loh AdrpLdr Lloh3, Lloh7 619; CHECK-NEXT: .loh AdrpAdrp Lloh2, Lloh6 620; CHECK-NEXT: .loh AdrpLdr Lloh2, Lloh5 621entry: 622 br label %loop 623 624loop: 625 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 626 %src.gep = getelementptr i8, ptr %src, i64 %iv 627 %load = load <16 x i8>, ptr %src.gep 628 %cmp = icmp sgt <16 x i8> %load, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 629 %ext = zext <16 x i8> %load to <16 x i32> 630 %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer 631 %dst.gep = getelementptr i32, ptr %dst, i64 %iv 632 store <16 x i32> %sel, ptr %dst.gep 633 %iv.next = add nuw i64 %iv, 16 634 %ec = icmp eq i64 %iv.next, 128 635 br i1 %ec, label %exit, label %loop 636 637exit: 638 ret void 639} 640 641define void @extension_in_loop_as_shuffle_v16i8_to_v16i32(ptr %src, ptr %dst) { 642; CHECK-LABEL: extension_in_loop_as_shuffle_v16i8_to_v16i32: 643; CHECK: ; %bb.0: ; %entry 644; CHECK-NEXT: Lloh10: 645; CHECK-NEXT: adrp x8, lCPI25_0@PAGE 646; CHECK-NEXT: Lloh11: 647; CHECK-NEXT: adrp x9, lCPI25_1@PAGE 648; CHECK-NEXT: Lloh12: 649; CHECK-NEXT: adrp x10, lCPI25_2@PAGE 650; CHECK-NEXT: Lloh13: 651; CHECK-NEXT: ldr q0, [x8, lCPI25_0@PAGEOFF] 652; CHECK-NEXT: Lloh14: 653; CHECK-NEXT: adrp x8, lCPI25_3@PAGE 654; CHECK-NEXT: Lloh15: 655; CHECK-NEXT: ldr q1, [x9, lCPI25_1@PAGEOFF] 656; CHECK-NEXT: Lloh16: 657; CHECK-NEXT: ldr q2, [x10, lCPI25_2@PAGEOFF] 658; CHECK-NEXT: Lloh17: 659; CHECK-NEXT: ldr q3, [x8, lCPI25_3@PAGEOFF] 660; CHECK-NEXT: mov x8, xzr 661; CHECK-NEXT: LBB25_1: ; %loop 662; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 663; CHECK-NEXT: ldr q4, [x0, x8] 664; CHECK-NEXT: add x8, x8, #16 665; CHECK-NEXT: cmp x8, #128 666; CHECK-NEXT: cmge.16b v5, v4, #0 667; CHECK-NEXT: tbl.16b v7, { v4 }, v0 668; CHECK-NEXT: tbl.16b v16, { v4 }, v1 669; CHECK-NEXT: tbl.16b v18, { v4 }, v2 670; CHECK-NEXT: tbl.16b v4, { v4 }, v3 671; CHECK-NEXT: sshll2.8h v6, v5, #0 672; CHECK-NEXT: sshll.8h v5, v5, #0 673; CHECK-NEXT: sshll2.4s v17, v6, #0 674; CHECK-NEXT: sshll.4s v6, v6, #0 675; CHECK-NEXT: sshll2.4s v19, v5, #0 676; CHECK-NEXT: sshll.4s v5, v5, #0 677; CHECK-NEXT: and.16b v7, v7, v17 678; CHECK-NEXT: and.16b v6, v16, v6 679; CHECK-NEXT: and.16b v16, v18, v19 680; CHECK-NEXT: and.16b v4, v4, v5 681; CHECK-NEXT: stp q6, q7, [x1, #32] 682; CHECK-NEXT: stp q4, q16, [x1], #64 683; CHECK-NEXT: b.ne LBB25_1 684; CHECK-NEXT: ; %bb.2: ; %exit 685; CHECK-NEXT: ret 686; CHECK-NEXT: .loh AdrpLdr Lloh14, Lloh17 687; CHECK-NEXT: .loh AdrpLdr Lloh12, Lloh16 688; CHECK-NEXT: .loh AdrpLdr Lloh11, Lloh15 689; CHECK-NEXT: .loh AdrpAdrp Lloh10, Lloh14 690; CHECK-NEXT: .loh AdrpLdr Lloh10, Lloh13 691entry: 692 br label %loop 693 694loop: 695 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 696 %src.gep = getelementptr i8, ptr %src, i64 %iv 697 %load = load <16 x i8>, ptr %src.gep 698 %cmp = icmp sgt <16 x i8> %load, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 699 %ext.shuf = shufflevector <16 x i8> %load, <16 x i8> zeroinitializer, <64 x i32> <i32 16, i32 16, i32 16, i32 0, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 2, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16, i32 4, i32 16, i32 16, i32 16, i32 5, i32 16, i32 16, i32 16, i32 6, i32 16, i32 16, i32 16, i32 7, i32 16, i32 16, i32 16, i32 8, i32 16, i32 16, i32 16, i32 9, i32 16, i32 16, i32 16, i32 10, i32 16, i32 16, i32 16, i32 11, i32 16, i32 16, i32 16, i32 12, i32 16, i32 16, i32 16, i32 13, i32 16, i32 16, i32 16, i32 14, i32 16, i32 16, i32 16, i32 15> 700 %ext = bitcast <64 x i8> %ext.shuf to <16 x i32> 701 %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer 702 %dst.gep = getelementptr i32, ptr %dst, i64 %iv 703 store <16 x i32> %sel, ptr %dst.gep 704 %iv.next = add nuw i64 %iv, 16 705 %ec = icmp eq i64 %iv.next, 128 706 br i1 %ec, label %exit, label %loop 707 708exit: 709 ret void 710} 711 712define void @shuffle_in_loop_is_no_extend_v16i8_to_v16i32(ptr %src, ptr %dst) { 713; CHECK-LABEL: shuffle_in_loop_is_no_extend_v16i8_to_v16i32: 714; CHECK: ; %bb.0: ; %entry 715; CHECK-NEXT: Lloh18: 716; CHECK-NEXT: adrp x8, lCPI26_0@PAGE 717; CHECK-NEXT: Lloh19: 718; CHECK-NEXT: adrp x9, lCPI26_1@PAGE 719; CHECK-NEXT: Lloh20: 720; CHECK-NEXT: adrp x10, lCPI26_2@PAGE 721; CHECK-NEXT: Lloh21: 722; CHECK-NEXT: ldr q0, [x8, lCPI26_0@PAGEOFF] 723; CHECK-NEXT: Lloh22: 724; CHECK-NEXT: adrp x8, lCPI26_3@PAGE 725; CHECK-NEXT: Lloh23: 726; CHECK-NEXT: ldr q1, [x9, lCPI26_1@PAGEOFF] 727; CHECK-NEXT: Lloh24: 728; CHECK-NEXT: ldr q2, [x10, lCPI26_2@PAGEOFF] 729; CHECK-NEXT: Lloh25: 730; CHECK-NEXT: ldr q3, [x8, lCPI26_3@PAGEOFF] 731; CHECK-NEXT: mov x8, xzr 732; CHECK-NEXT: LBB26_1: ; %loop 733; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1 734; CHECK-NEXT: ldr q4, [x0, x8] 735; CHECK-NEXT: add x8, x8, #16 736; CHECK-NEXT: cmp x8, #128 737; CHECK-NEXT: cmge.16b v5, v4, #0 738; CHECK-NEXT: tbl.16b v7, { v4 }, v0 739; CHECK-NEXT: tbl.16b v16, { v4 }, v1 740; CHECK-NEXT: tbl.16b v18, { v4 }, v2 741; CHECK-NEXT: tbl.16b v4, { v4 }, v3 742; CHECK-NEXT: sshll2.8h v6, v5, #0 743; CHECK-NEXT: sshll.8h v5, v5, #0 744; CHECK-NEXT: sshll2.4s v17, v6, #0 745; CHECK-NEXT: sshll.4s v6, v6, #0 746; CHECK-NEXT: sshll2.4s v19, v5, #0 747; CHECK-NEXT: sshll.4s v5, v5, #0 748; CHECK-NEXT: and.16b v7, v7, v17 749; CHECK-NEXT: and.16b v6, v16, v6 750; CHECK-NEXT: and.16b v16, v18, v19 751; CHECK-NEXT: and.16b v4, v4, v5 752; CHECK-NEXT: stp q6, q7, [x1, #32] 753; CHECK-NEXT: stp q4, q16, [x1], #64 754; CHECK-NEXT: b.ne LBB26_1 755; CHECK-NEXT: ; %bb.2: ; %exit 756; CHECK-NEXT: ret 757; CHECK-NEXT: .loh AdrpLdr Lloh22, Lloh25 758; CHECK-NEXT: .loh AdrpLdr Lloh20, Lloh24 759; CHECK-NEXT: .loh AdrpLdr Lloh19, Lloh23 760; CHECK-NEXT: .loh AdrpAdrp Lloh18, Lloh22 761; CHECK-NEXT: .loh AdrpLdr Lloh18, Lloh21 762entry: 763 br label %loop 764 765loop: 766 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 767 %src.gep = getelementptr i8, ptr %src, i64 %iv 768 %load = load <16 x i8>, ptr %src.gep 769 %cmp = icmp sgt <16 x i8> %load, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 770 %ext.shuf = shufflevector <16 x i8> %load, <16 x i8> zeroinitializer, <64 x i32> <i32 1, i32 16, i32 16, i32 0, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 2, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16, i32 4, i32 16, i32 16, i32 16, i32 5, i32 16, i32 16, i32 16, i32 6, i32 16, i32 16, i32 16, i32 7, i32 16, i32 16, i32 16, i32 8, i32 16, i32 16, i32 16, i32 9, i32 16, i32 16, i32 16, i32 10, i32 16, i32 16, i32 16, i32 11, i32 16, i32 16, i32 16, i32 12, i32 16, i32 16, i32 16, i32 13, i32 16, i32 16, i32 16, i32 14, i32 16, i32 16, i32 16, i32 15> 771 %ext = bitcast <64 x i8> %ext.shuf to <16 x i32> 772 %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer 773 %dst.gep = getelementptr i32, ptr %dst, i64 %iv 774 store <16 x i32> %sel, ptr %dst.gep 775 %iv.next = add nuw i64 %iv, 16 776 %ec = icmp eq i64 %iv.next, 128 777 br i1 %ec, label %exit, label %loop 778 779exit: 780 ret void 781} 782