1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 2; RUN: llc -mtriple=aarch64 -verify-machineinstrs < %s -o - | FileCheck %s 3 4;; This test exercises the default lowering of the histogram to scalarized code. 5 6define void @histogram_i64(<2 x ptr> %buckets, i64 %inc, <2 x i1> %mask) { 7; CHECK-LABEL: histogram_i64: 8; CHECK: // %bb.0: 9; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 10; CHECK-NEXT: fmov w8, s1 11; CHECK-NEXT: tbnz w8, #0, .LBB0_3 12; CHECK-NEXT: // %bb.1: // %else 13; CHECK-NEXT: mov w8, v1.s[1] 14; CHECK-NEXT: tbnz w8, #0, .LBB0_4 15; CHECK-NEXT: .LBB0_2: // %else2 16; CHECK-NEXT: ret 17; CHECK-NEXT: .LBB0_3: // %cond.histogram.update 18; CHECK-NEXT: fmov x8, d0 19; CHECK-NEXT: ldr x9, [x8] 20; CHECK-NEXT: add x9, x9, x0 21; CHECK-NEXT: str x9, [x8] 22; CHECK-NEXT: mov w8, v1.s[1] 23; CHECK-NEXT: tbz w8, #0, .LBB0_2 24; CHECK-NEXT: .LBB0_4: // %cond.histogram.update1 25; CHECK-NEXT: mov x8, v0.d[1] 26; CHECK-NEXT: ldr x9, [x8] 27; CHECK-NEXT: add x9, x9, x0 28; CHECK-NEXT: str x9, [x8] 29; CHECK-NEXT: ret 30 call void @llvm.experimental.vector.histogram.add.nxv2p0.i64(<2 x ptr> %buckets, i64 %inc, <2 x i1> %mask) 31 ret void 32} 33 34define void @histogram_i32_literal(ptr %base, <4 x i32> %indices, <4 x i1> %mask) { 35; CHECK-LABEL: histogram_i32_literal: 36; CHECK: // %bb.0: 37; CHECK-NEXT: dup v2.2d, x0 38; CHECK-NEXT: sshll v3.2d, v0.2s, #2 39; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 40; CHECK-NEXT: umov w8, v1.h[0] 41; CHECK-NEXT: add v3.2d, v2.2d, v3.2d 42; CHECK-NEXT: tbz w8, #0, .LBB1_2 43; CHECK-NEXT: // %bb.1: // %cond.histogram.update 44; CHECK-NEXT: fmov x8, d3 45; CHECK-NEXT: ldr w9, [x8] 46; CHECK-NEXT: add w9, w9, #1 47; CHECK-NEXT: str w9, [x8] 48; CHECK-NEXT: .LBB1_2: // %else 49; CHECK-NEXT: umov w8, v1.h[1] 50; CHECK-NEXT: sshll2 v0.2d, v0.4s, #2 51; CHECK-NEXT: tbz w8, #0, .LBB1_4 52; CHECK-NEXT: // %bb.3: // %cond.histogram.update1 53; CHECK-NEXT: mov x8, v3.d[1] 54; CHECK-NEXT: ldr w9, [x8] 55; CHECK-NEXT: add w9, w9, #1 56; CHECK-NEXT: str w9, [x8] 57; CHECK-NEXT: .LBB1_4: // %else2 58; CHECK-NEXT: umov w8, v1.h[2] 59; CHECK-NEXT: add v0.2d, v2.2d, v0.2d 60; CHECK-NEXT: tbnz w8, #0, .LBB1_7 61; CHECK-NEXT: // %bb.5: // %else4 62; CHECK-NEXT: umov w8, v1.h[3] 63; CHECK-NEXT: tbnz w8, #0, .LBB1_8 64; CHECK-NEXT: .LBB1_6: // %else6 65; CHECK-NEXT: ret 66; CHECK-NEXT: .LBB1_7: // %cond.histogram.update3 67; CHECK-NEXT: fmov x8, d0 68; CHECK-NEXT: ldr w9, [x8] 69; CHECK-NEXT: add w9, w9, #1 70; CHECK-NEXT: str w9, [x8] 71; CHECK-NEXT: umov w8, v1.h[3] 72; CHECK-NEXT: tbz w8, #0, .LBB1_6 73; CHECK-NEXT: .LBB1_8: // %cond.histogram.update5 74; CHECK-NEXT: mov x8, v0.d[1] 75; CHECK-NEXT: ldr w9, [x8] 76; CHECK-NEXT: add w9, w9, #1 77; CHECK-NEXT: str w9, [x8] 78; CHECK-NEXT: ret 79 80 %buckets = getelementptr i32, ptr %base, <4 x i32> %indices 81 call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<4 x ptr> %buckets, i32 1, <4 x i1> %mask) 82 ret void 83} 84 85define void @histogram_i32_literal_alltruemask(ptr %base, <4 x i32> %indices) { 86; CHECK-LABEL: histogram_i32_literal_alltruemask: 87; CHECK: // %bb.0: 88; CHECK-NEXT: dup v1.2d, x0 89; CHECK-NEXT: sshll v2.2d, v0.2s, #2 90; CHECK-NEXT: sshll2 v0.2d, v0.4s, #2 91; CHECK-NEXT: add v2.2d, v1.2d, v2.2d 92; CHECK-NEXT: add v0.2d, v1.2d, v0.2d 93; CHECK-NEXT: fmov x8, d2 94; CHECK-NEXT: mov x9, v2.d[1] 95; CHECK-NEXT: ldr w10, [x8] 96; CHECK-NEXT: add w10, w10, #1 97; CHECK-NEXT: str w10, [x8] 98; CHECK-NEXT: ldr w8, [x9] 99; CHECK-NEXT: add w8, w8, #1 100; CHECK-NEXT: str w8, [x9] 101; CHECK-NEXT: fmov x8, d0 102; CHECK-NEXT: mov x9, v0.d[1] 103; CHECK-NEXT: ldr w10, [x8] 104; CHECK-NEXT: add w10, w10, #1 105; CHECK-NEXT: str w10, [x8] 106; CHECK-NEXT: ldr w8, [x9] 107; CHECK-NEXT: add w8, w8, #1 108; CHECK-NEXT: str w8, [x9] 109; CHECK-NEXT: ret 110 111 %buckets = getelementptr i32, ptr %base, <4 x i32> %indices 112 call void @llvm.experimental.vector.histogram.add.nxv4p0.i32(<4 x ptr> %buckets, i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) 113 ret void 114} 115