xref: /llvm-project/llvm/test/CodeGen/Thumb2/mve-tailpred-loopinvariant.ll (revision eecb99c5f66c8491766628a2925587e20f3b1dbd)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s
3
4; This test has an instruction that gets sunk into the loop, that is a
5; active.lane.mask operand. (%exitcount.ptrcnt.to.int = ptrtoint). We
6; need to make sure it is loop invariant.
7
8define i32 @a(ptr readnone %b, ptr %c) {
9; CHECK-LABEL: a:
10; CHECK:       @ %bb.0: @ %entry
11; CHECK-NEXT:    .save {r4, lr}
12; CHECK-NEXT:    push {r4, lr}
13; CHECK-NEXT:    cmp r0, r1
14; CHECK-NEXT:    it ls
15; CHECK-NEXT:    popls {r4, pc}
16; CHECK-NEXT:  .LBB0_1: @ %while.body.preheader
17; CHECK-NEXT:    subs r4, r0, r1
18; CHECK-NEXT:    movs r2, #0
19; CHECK-NEXT:    mov r3, r1
20; CHECK-NEXT:    dlstp.8 lr, r4
21; CHECK-NEXT:  .LBB0_2: @ %vector.body
22; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
23; CHECK-NEXT:    adds r0, r1, r2
24; CHECK-NEXT:    vidup.u8 q0, r0, #1
25; CHECK-NEXT:    adds r2, #16
26; CHECK-NEXT:    vstrb.8 q0, [r3], #16
27; CHECK-NEXT:    letp lr, .LBB0_2
28; CHECK-NEXT:  @ %bb.3: @ %while.end
29; CHECK-NEXT:    pop {r4, pc}
30entry:
31  %cmp3 = icmp ugt ptr %b, %c
32  br i1 %cmp3, label %while.body.preheader, label %while.end
33
34while.body.preheader:                             ; preds = %entry
35  %c5 = ptrtoint ptr %c to i32
36  %0 = sub i32 0, %c5
37  %uglygep = getelementptr i8, ptr %b, i32 %0
38  %exitcount.ptrcnt.to.int = ptrtoint ptr %uglygep to i32
39  %n.rnd.up = add i32 %exitcount.ptrcnt.to.int, 15
40  %n.vec = and i32 %n.rnd.up, -16
41  br label %vector.body
42
43vector.body:                                      ; preds = %vector.body, %while.body.preheader
44  %index = phi i32 [ 0, %while.body.preheader ], [ %index.next, %vector.body ]
45  %next.gep = getelementptr i8, ptr %c, i32 %index
46  %1 = or disjoint i32 %index, 1
47  %next.gep7 = getelementptr i8, ptr %c, i32 %1
48  %2 = or disjoint i32 %index, 2
49  %next.gep8 = getelementptr i8, ptr %c, i32 %2
50  %3 = or disjoint i32 %index, 3
51  %next.gep9 = getelementptr i8, ptr %c, i32 %3
52  %4 = or disjoint i32 %index, 4
53  %next.gep10 = getelementptr i8, ptr %c, i32 %4
54  %5 = or disjoint i32 %index, 5
55  %next.gep11 = getelementptr i8, ptr %c, i32 %5
56  %6 = or disjoint i32 %index, 6
57  %next.gep12 = getelementptr i8, ptr %c, i32 %6
58  %7 = or disjoint i32 %index, 7
59  %next.gep13 = getelementptr i8, ptr %c, i32 %7
60  %8 = or disjoint i32 %index, 8
61  %next.gep14 = getelementptr i8, ptr %c, i32 %8
62  %9 = or disjoint i32 %index, 9
63  %next.gep15 = getelementptr i8, ptr %c, i32 %9
64  %10 = or disjoint i32 %index, 10
65  %next.gep16 = getelementptr i8, ptr %c, i32 %10
66  %11 = or disjoint i32 %index, 11
67  %next.gep17 = getelementptr i8, ptr %c, i32 %11
68  %12 = or disjoint i32 %index, 12
69  %next.gep18 = getelementptr i8, ptr %c, i32 %12
70  %13 = or disjoint i32 %index, 13
71  %next.gep19 = getelementptr i8, ptr %c, i32 %13
72  %14 = or disjoint i32 %index, 14
73  %next.gep20 = getelementptr i8, ptr %c, i32 %14
74  %15 = or disjoint i32 %index, 15
75  %next.gep21 = getelementptr i8, ptr %c, i32 %15
76  %16 = insertelement <16 x ptr> poison, ptr %next.gep, i32 0
77  %17 = insertelement <16 x ptr> %16, ptr %next.gep7, i32 1
78  %18 = insertelement <16 x ptr> %17, ptr %next.gep8, i32 2
79  %19 = insertelement <16 x ptr> %18, ptr %next.gep9, i32 3
80  %20 = insertelement <16 x ptr> %19, ptr %next.gep10, i32 4
81  %21 = insertelement <16 x ptr> %20, ptr %next.gep11, i32 5
82  %22 = insertelement <16 x ptr> %21, ptr %next.gep12, i32 6
83  %23 = insertelement <16 x ptr> %22, ptr %next.gep13, i32 7
84  %24 = insertelement <16 x ptr> %23, ptr %next.gep14, i32 8
85  %25 = insertelement <16 x ptr> %24, ptr %next.gep15, i32 9
86  %26 = insertelement <16 x ptr> %25, ptr %next.gep16, i32 10
87  %27 = insertelement <16 x ptr> %26, ptr %next.gep17, i32 11
88  %28 = insertelement <16 x ptr> %27, ptr %next.gep18, i32 12
89  %29 = insertelement <16 x ptr> %28, ptr %next.gep19, i32 13
90  %30 = insertelement <16 x ptr> %29, ptr %next.gep20, i32 14
91  %31 = insertelement <16 x ptr> %30, ptr %next.gep21, i32 15
92  %active.lane.mask = call <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32 %index, i32 %exitcount.ptrcnt.to.int)
93  %32 = ptrtoint <16 x ptr> %31 to <16 x i32>
94  %33 = trunc <16 x i32> %32 to <16 x i8>
95  call void @llvm.masked.store.v16i8.p0(<16 x i8> %33, ptr %next.gep, i32 1, <16 x i1> %active.lane.mask)
96  %index.next = add i32 %index, 16
97  %34 = icmp eq i32 %index.next, %n.vec
98  br i1 %34, label %while.end, label %vector.body
99
100while.end:                                        ; preds = %vector.body, %entry
101  ret i32 undef
102}
103
104declare <16 x i1> @llvm.get.active.lane.mask.v16i1.i32(i32, i32)
105declare void @llvm.masked.store.v16i8.p0(<16 x i8>, ptr, i32 immarg, <16 x i1>)
106