1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s 3 4; This test has an instruction that gets sunk into the loop, that is a 5; active.lane.mask operand. (%exitcount.ptrcnt.to.int = ptrtoint). We 6; need to make sure it is loop invariant. 7 8define i32 @a(ptr readnone %b, ptr %c) { 9; CHECK-LABEL: a: 10; CHECK: @ %bb.0: @ %entry 11; CHECK-NEXT: .save {r4, lr} 12; CHECK-NEXT: push {r4, lr} 13; CHECK-NEXT: cmp r0, r1 14; CHECK-NEXT: it ls 15; CHECK-NEXT: popls {r4, pc} 16; CHECK-NEXT: .LBB0_1: @ %while.body.preheader 17; CHECK-NEXT: subs r4, r0, r1 18; CHECK-NEXT: movs r2, #0 19; CHECK-NEXT: mov r3, r1 20; CHECK-NEXT: dlstp.8 lr, r4 21; CHECK-NEXT: .LBB0_2: @ %vector.body 22; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 23; CHECK-NEXT: adds r0, r1, r2 24; CHECK-NEXT: vidup.u8 q0, r0, #1 25; CHECK-NEXT: adds r2, #16 26; CHECK-NEXT: vstrb.8 q0, [r3], #16 27; CHECK-NEXT: letp lr, .LBB0_2 28; CHECK-NEXT: @ %bb.3: @ %while.end 29; CHECK-NEXT: pop {r4, pc} 30entry: 31 %cmp3 = icmp ugt ptr %b, %c 32 br i1 %cmp3, label %while.body.preheader, label %while.end 33 34while.body.preheader: ; preds = %entry 35 %c5 = ptrtoint ptr %c to i32 36 %0 = sub i32 0, %c5 37 %uglygep = getelementptr i8, ptr %b, i32 %0 38 %exitcount.ptrcnt.to.int = ptrtoint ptr %uglygep to i32 39 %n.rnd.up = add i32 %exitcount.ptrcnt.to.int, 15 40 %n.vec = and i32 %n.rnd.up, -16 41 br label %vector.body 42 43vector.body: ; preds = %vector.body, %while.body.preheader 44 %index = phi i32 [ 0, %while.body.preheader ], [ %index.next, %vector.body ] 45 %next.gep = getelementptr i8, ptr %c, i32 %index 46 %1 = or disjoint i32 %index, 1 47 %next.gep7 = getelementptr i8, ptr %c, i32 %1 48 %2 = or disjoint i32 %index, 2 49 %next.gep8 = getelementptr i8, ptr %c, i32 %2 50 %3 = or disjoint i32 %index, 3 51 %next.gep9 = getelementptr i8, ptr %c, i32 %3 52 %4 = or disjoint i32 %index, 4 53 %next.gep10 = getelementptr i8, ptr %c, i32 %4 54 %5 = or disjoint i32 %index, 5 55 %next.gep11 = getelementptr i8, ptr %c, i32 %5 56 %6 = or disjoint i32 %index, 6 57 %next.gep12 = getelementptr i8, ptr %c, i32 %6 58 %7 = or disjoint i32 %index, 7 59 %next.gep13 = getelementptr i8, ptr %c, i32 %7 60 %8 = or disjoint i32 %index, 8 61 %next.gep14 = getelementptr i8, ptr %c, i32 %8 62 %9 = or disjoint i32 %index, 9 63 %next.gep15 = getelementptr i8, ptr %c, i32 %9 64 %10 = or disjoint i32 %index, 10 65 %next.gep16 = getelementptr i8, ptr %c, i32 %10 66 %11 = or disjoint i32 %index, 11 67 %next.gep17 = getelementptr i8, ptr %c, i32 %11 68 %12 = or disjoint i32 %index, 12 69 %next.gep18 = getelementptr i8, ptr %c, i32 %12 70 %13 = or disjoint i32 %index, 13 71 %next.gep19 = getelementptr i8, ptr %c, i32 %13 72 %14 = or disjoint i32 %index, 14 73 %next.gep20 = getelementptr i8, ptr %c, i32 %14 74 %15 = or disjoint i32 %index, 15 75 %next.gep21 = getelementptr i8, ptr %c, i32 %15 76 %16 = insertelement <16 x ptr> poison, ptr %next.gep, i32 0 77 %17 = insertelement <16 x ptr> %16, ptr %next.gep7, i32 1 78 %18 = insertelement <16 x ptr> %17, ptr %next.gep8, i32 2 79 %19 = insertelement <16 x ptr> %18, ptr %next.gep9, i32 3 80 %20 = insertelement <16 x ptr> %19, ptr %next.gep10, i32 4 81 %21 = insertelement <16 x ptr> %20, ptr %next.gep11, i32 5 82 %22 = insertelement <16 x ptr> %21, ptr %next.gep12, i32 6 83 %23 = insertelement <16 x ptr> %22, ptr %next.gep13, i32 7 84 %24 = insertelement <16 x ptr> %23, ptr %next.gep14, i32 8 85 %25 = insertelement <16 x ptr> %24, ptr %next.gep15, i32 9 86 %26 = insertelement <16 x ptr> %25, ptr %next.gep16, i32 10 87 %27 = insertelement <16 x ptr> %26, ptr %next.gep17, i32 11 88 %28 = insertelement <16 x ptr> %27, ptr %next.gep18, i32 12 89 %29 = insertelement <16 x ptr> %28, ptr %next.gep19, i32 13 90 %30 = insertelement <16 x ptr> %29, ptr %next.gep20, i32 14 91 %31 = insertelement <16 x ptr> %30, ptr %next.gep21, i32 15 92 %active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %exitcount.ptrcnt.to.int) 93 %32 = ptrtoint <16 x ptr> %31 to <16 x i32> 94 %33 = trunc <16 x i32> %32 to <16 x i8> 95 call void @llvm.masked.store.v16i8.p0(<16 x i8> %33, ptr %next.gep, i32 1, <16 x i1> %active.lane.mask) 96 %index.next = add i32 %index, 16 97 %34 = icmp eq i32 %index.next, %n.vec 98 br i1 %34, label %while.end, label %vector.body 99 100while.end: ; preds = %vector.body, %entry 101 ret i32 undef 102} 103 104declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32, i32) 105declare void @llvm.masked.store.v16i8.p0(<16 x i8>, ptr, i32 immarg, <16 x i1>) 106