xref: /llvm-project/llvm/test/CodeGen/AArch64/neon-scalarize-histogram.ll (revision 2b15c4a62be6ceab124cb2505ae8dc6a98ba6e7d)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3
2; RUN: llc -mtriple=aarch64 -verify-machineinstrs < %s -o - | FileCheck %s
3
4;; This test exercises the default lowering of the histogram to scalarized code.
5
6define void @histogram_i64(<2 x ptr> %buckets, i64 %inc, <2 x i1> %mask) {
7; CHECK-LABEL: histogram_i64:
8; CHECK:       // %bb.0:
9; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
10; CHECK-NEXT:    fmov w8, s1
11; CHECK-NEXT:    tbnz w8, #0, .LBB0_3
12; CHECK-NEXT:  // %bb.1: // %else
13; CHECK-NEXT:    mov w8, v1.s[1]
14; CHECK-NEXT:    tbnz w8, #0, .LBB0_4
15; CHECK-NEXT:  .LBB0_2: // %else2
16; CHECK-NEXT:    ret
17; CHECK-NEXT:  .LBB0_3: // %cond.histogram.update
18; CHECK-NEXT:    fmov x8, d0
19; CHECK-NEXT:    ldr x9, [x8]
20; CHECK-NEXT:    add x9, x9, x0
21; CHECK-NEXT:    str x9, [x8]
22; CHECK-NEXT:    mov w8, v1.s[1]
23; CHECK-NEXT:    tbz w8, #0, .LBB0_2
24; CHECK-NEXT:  .LBB0_4: // %cond.histogram.update1
25; CHECK-NEXT:    mov x8, v0.d[1]
26; CHECK-NEXT:    ldr x9, [x8]
27; CHECK-NEXT:    add x9, x9, x0
28; CHECK-NEXT:    str x9, [x8]
29; CHECK-NEXT:    ret
30  call void @llvm.experimental.vector.histogram.add.nxv2p0.i64(<2 x ptr> %buckets, i64 %inc, <2 x i1> %mask)
31  ret void
32}
33
34define void @histogram_i32_literal(ptr %base, <4 x i32> %indices, <4 x i1> %mask) {
35; CHECK-LABEL: histogram_i32_literal:
36; CHECK:       // %bb.0:
37; CHECK-NEXT:    dup v2.2d, x0
38; CHECK-NEXT:    sshll v3.2d, v0.2s, #2
39; CHECK-NEXT:    // kill: def $d1 killed $d1 def $q1
40; CHECK-NEXT:    umov w8, v1.h[0]
41; CHECK-NEXT:    add v3.2d, v2.2d, v3.2d
42; CHECK-NEXT:    tbz w8, #0, .LBB1_2
43; CHECK-NEXT:  // %bb.1: // %cond.histogram.update
44; CHECK-NEXT:    fmov x8, d3
45; CHECK-NEXT:    ldr w9, [x8]
46; CHECK-NEXT:    add w9, w9, #1
47; CHECK-NEXT:    str w9, [x8]
48; CHECK-NEXT:  .LBB1_2: // %else
49; CHECK-NEXT:    umov w8, v1.h[1]
50; CHECK-NEXT:    sshll2 v0.2d, v0.4s, #2
51; CHECK-NEXT:    tbz w8, #0, .LBB1_4
52; CHECK-NEXT:  // %bb.3: // %cond.histogram.update1
53; CHECK-NEXT:    mov x8, v3.d[1]
54; CHECK-NEXT:    ldr w9, [x8]
55; CHECK-NEXT:    add w9, w9, #1
56; CHECK-NEXT:    str w9, [x8]
57; CHECK-NEXT:  .LBB1_4: // %else2
58; CHECK-NEXT:    umov w8, v1.h[2]
59; CHECK-NEXT:    add v0.2d, v2.2d, v0.2d
60; CHECK-NEXT:    tbnz w8, #0, .LBB1_7
61; CHECK-NEXT:  // %bb.5: // %else4
62; CHECK-NEXT:    umov w8, v1.h[3]
63; CHECK-NEXT:    tbnz w8, #0, .LBB1_8
64; CHECK-NEXT:  .LBB1_6: // %else6
65; CHECK-NEXT:    ret
66; CHECK-NEXT:  .LBB1_7: // %cond.histogram.update3
67; CHECK-NEXT:    fmov x8, d0
68; CHECK-NEXT:    ldr w9, [x8]
69; CHECK-NEXT:    add w9, w9, #1
70; CHECK-NEXT:    str w9, [x8]
71; CHECK-NEXT:    umov w8, v1.h[3]
72; CHECK-NEXT:    tbz w8, #0, .LBB1_6
73; CHECK-NEXT:  .LBB1_8: // %cond.histogram.update5
74; CHECK-NEXT:    mov x8, v0.d[1]
75; CHECK-NEXT:    ldr w9, [x8]
76; CHECK-NEXT:    add w9, w9, #1
77; CHECK-NEXT:    str w9, [x8]
78; CHECK-NEXT:    ret
79
80  %buckets = getelementptr i32, ptr %base, <4 x i32> %indices
81  call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<4 x ptr> %buckets, i32 1, <4 x i1> %mask)
82  ret void
83}
84
85define void @histogram_i32_literal_alltruemask(ptr %base, <4 x i32> %indices) {
86; CHECK-LABEL: histogram_i32_literal_alltruemask:
87; CHECK:       // %bb.0:
88; CHECK-NEXT:    dup v1.2d, x0
89; CHECK-NEXT:    sshll v2.2d, v0.2s, #2
90; CHECK-NEXT:    sshll2 v0.2d, v0.4s, #2
91; CHECK-NEXT:    add v2.2d, v1.2d, v2.2d
92; CHECK-NEXT:    add v0.2d, v1.2d, v0.2d
93; CHECK-NEXT:    fmov x8, d2
94; CHECK-NEXT:    mov x9, v2.d[1]
95; CHECK-NEXT:    ldr w10, [x8]
96; CHECK-NEXT:    add w10, w10, #1
97; CHECK-NEXT:    str w10, [x8]
98; CHECK-NEXT:    ldr w8, [x9]
99; CHECK-NEXT:    add w8, w8, #1
100; CHECK-NEXT:    str w8, [x9]
101; CHECK-NEXT:    fmov x8, d0
102; CHECK-NEXT:    mov x9, v0.d[1]
103; CHECK-NEXT:    ldr w10, [x8]
104; CHECK-NEXT:    add w10, w10, #1
105; CHECK-NEXT:    str w10, [x8]
106; CHECK-NEXT:    ldr w8, [x9]
107; CHECK-NEXT:    add w8, w8, #1
108; CHECK-NEXT:    str w8, [x9]
109; CHECK-NEXT:    ret
110
111  %buckets = getelementptr i32, ptr %base, <4 x i32> %indices
112  call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<4 x ptr> %buckets, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>)
113  ret void
114}
115