1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mattr=+sve -force-streaming-compatible  < %s | FileCheck %s
3; RUN: llc -mattr=+sme -force-streaming  < %s | FileCheck %s
4; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE
5
6
7target triple = "aarch64-unknown-linux-gnu"
8
9define <2 x i64> @masked_gather_v2i64(ptr %a, ptr %b) vscale_range(2, 2) {
10; CHECK-LABEL: masked_gather_v2i64:
11; CHECK:       // %bb.0:
12; CHECK-NEXT:    sub sp, sp, #16
13; CHECK-NEXT:    .cfi_def_cfa_offset 16
14; CHECK-NEXT:    ptrue p0.d, vl2
15; CHECK-NEXT:    ldr q0, [x0]
16; CHECK-NEXT:    cmpeq p1.d, p0/z, z0.d, #0
17; CHECK-NEXT:    index z0.d, #1, #1
18; CHECK-NEXT:    mov z1.d, p1/z, #-1 // =0xffffffffffffffff
19; CHECK-NEXT:    and z0.d, z1.d, z0.d
20; CHECK-NEXT:    ldr q1, [x1]
21; CHECK-NEXT:    uaddv d0, p0, z0.d
22; CHECK-NEXT:    ptrue p0.d
23; CHECK-NEXT:    fmov x8, d0
24; CHECK-NEXT:    strb w8, [sp, #12]
25; CHECK-NEXT:    and w8, w8, #0xff
26; CHECK-NEXT:    tbz w8, #0, .LBB0_2
27; CHECK-NEXT:  // %bb.1: // %cond.load
28; CHECK-NEXT:    fmov x9, d1
29; CHECK-NEXT:    ld1rd { z0.d }, p0/z, [x9]
30; CHECK-NEXT:    tbnz w8, #1, .LBB0_3
31; CHECK-NEXT:    b .LBB0_4
32; CHECK-NEXT:  .LBB0_2:
33; CHECK-NEXT:    adrp x9, .LCPI0_0
34; CHECK-NEXT:    ldr q0, [x9, :lo12:.LCPI0_0]
35; CHECK-NEXT:    tbz w8, #1, .LBB0_4
36; CHECK-NEXT:  .LBB0_3: // %cond.load1
37; CHECK-NEXT:    mov w8, #1 // =0x1
38; CHECK-NEXT:    index z2.d, #0, #1
39; CHECK-NEXT:    mov z1.d, z1.d[1]
40; CHECK-NEXT:    mov z3.d, x8
41; CHECK-NEXT:    fmov x8, d1
42; CHECK-NEXT:    cmpeq p0.d, p0/z, z2.d, z3.d
43; CHECK-NEXT:    ldr x8, [x8]
44; CHECK-NEXT:    mov z0.d, p0/m, x8
45; CHECK-NEXT:  .LBB0_4: // %else2
46; CHECK-NEXT:    // kill: def $q0 killed $q0 killed $z0
47; CHECK-NEXT:    add sp, sp, #16
48; CHECK-NEXT:    ret
49;
50; NONEON-NOSVE-LABEL: masked_gather_v2i64:
51; NONEON-NOSVE:       // %bb.0:
52; NONEON-NOSVE-NEXT:    sub sp, sp, #144
53; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 144
54; NONEON-NOSVE-NEXT:    ldr q0, [x0]
55; NONEON-NOSVE-NEXT:    ldr q1, [x1]
56; NONEON-NOSVE-NEXT:    mov w8, #2 // =0x2
57; NONEON-NOSVE-NEXT:    str q0, [sp, #112]
58; NONEON-NOSVE-NEXT:    ldp x10, x9, [sp, #112]
59; NONEON-NOSVE-NEXT:    cmp x9, #0
60; NONEON-NOSVE-NEXT:    csel x8, x8, xzr, eq
61; NONEON-NOSVE-NEXT:    cmp x10, #0
62; NONEON-NOSVE-NEXT:    csetm x9, eq
63; NONEON-NOSVE-NEXT:    sub w8, w8, w9
64; NONEON-NOSVE-NEXT:    strb w8, [sp, #140]
65; NONEON-NOSVE-NEXT:    and w8, w8, #0xff
66; NONEON-NOSVE-NEXT:    tbz w8, #0, .LBB0_2
67; NONEON-NOSVE-NEXT:  // %bb.1: // %cond.load
68; NONEON-NOSVE-NEXT:    str q1, [sp, #96]
69; NONEON-NOSVE-NEXT:    ldr x9, [sp, #96]
70; NONEON-NOSVE-NEXT:    ldr x9, [x9]
71; NONEON-NOSVE-NEXT:    str x9, [sp, #80]
72; NONEON-NOSVE-NEXT:    ldr q0, [sp, #80]
73; NONEON-NOSVE-NEXT:    tbnz w8, #1, .LBB0_3
74; NONEON-NOSVE-NEXT:    b .LBB0_4
75; NONEON-NOSVE-NEXT:  .LBB0_2:
76; NONEON-NOSVE-NEXT:    adrp x9, .LCPI0_0
77; NONEON-NOSVE-NEXT:    ldr q0, [x9, :lo12:.LCPI0_0]
78; NONEON-NOSVE-NEXT:    tbz w8, #1, .LBB0_4
79; NONEON-NOSVE-NEXT:  .LBB0_3: // %cond.load1
80; NONEON-NOSVE-NEXT:    str q1, [sp, #64]
81; NONEON-NOSVE-NEXT:    ldr x8, [sp, #72]
82; NONEON-NOSVE-NEXT:    ldr x8, [x8]
83; NONEON-NOSVE-NEXT:    str q0, [sp]
84; NONEON-NOSVE-NEXT:    ldr x9, [sp]
85; NONEON-NOSVE-NEXT:    str x8, [sp, #48]
86; NONEON-NOSVE-NEXT:    ldr q0, [sp, #48]
87; NONEON-NOSVE-NEXT:    str q0, [sp, #16]
88; NONEON-NOSVE-NEXT:    ldr x8, [sp, #16]
89; NONEON-NOSVE-NEXT:    stp x9, x8, [sp, #32]
90; NONEON-NOSVE-NEXT:    ldr q0, [sp, #32]
91; NONEON-NOSVE-NEXT:  .LBB0_4: // %else2
92; NONEON-NOSVE-NEXT:    add sp, sp, #144
93; NONEON-NOSVE-NEXT:    ret
94  %vals = load <2 x i64>, ptr %a
95  %ptrs = load <2 x ptr>, ptr %b
96  %mask = icmp eq <2 x i64> %vals, zeroinitializer
97  %res = call <2 x i64> @llvm.masked.gather.v2i64(<2 x ptr> %ptrs, i32 8, <2 x i1> %mask, <2 x i64> poison)
98  ret <2 x i64> %res
99}
100
101define void @masked_scatter_v2i64(ptr %a, ptr %b) vscale_range(2, 2) {
102; CHECK-LABEL: masked_scatter_v2i64:
103; CHECK:       // %bb.0:
104; CHECK-NEXT:    sub sp, sp, #16
105; CHECK-NEXT:    .cfi_def_cfa_offset 16
106; CHECK-NEXT:    ptrue p0.d, vl2
107; CHECK-NEXT:    ldr q0, [x0]
108; CHECK-NEXT:    index z1.d, #1, #1
109; CHECK-NEXT:    cmpeq p1.d, p0/z, z0.d, #0
110; CHECK-NEXT:    mov z2.d, p1/z, #-1 // =0xffffffffffffffff
111; CHECK-NEXT:    and z1.d, z2.d, z1.d
112; CHECK-NEXT:    uaddv d1, p0, z1.d
113; CHECK-NEXT:    fmov x8, d1
114; CHECK-NEXT:    ldr q1, [x1]
115; CHECK-NEXT:    strb w8, [sp, #12]
116; CHECK-NEXT:    and w8, w8, #0xff
117; CHECK-NEXT:    tbnz w8, #0, .LBB1_3
118; CHECK-NEXT:  // %bb.1: // %else
119; CHECK-NEXT:    tbnz w8, #1, .LBB1_4
120; CHECK-NEXT:  .LBB1_2: // %else2
121; CHECK-NEXT:    add sp, sp, #16
122; CHECK-NEXT:    ret
123; CHECK-NEXT:  .LBB1_3: // %cond.store
124; CHECK-NEXT:    fmov x9, d0
125; CHECK-NEXT:    fmov x10, d1
126; CHECK-NEXT:    str x9, [x10]
127; CHECK-NEXT:    tbz w8, #1, .LBB1_2
128; CHECK-NEXT:  .LBB1_4: // %cond.store1
129; CHECK-NEXT:    mov z0.d, z0.d[1]
130; CHECK-NEXT:    mov z1.d, z1.d[1]
131; CHECK-NEXT:    fmov x8, d0
132; CHECK-NEXT:    fmov x9, d1
133; CHECK-NEXT:    str x8, [x9]
134; CHECK-NEXT:    add sp, sp, #16
135; CHECK-NEXT:    ret
136;
137; NONEON-NOSVE-LABEL: masked_scatter_v2i64:
138; NONEON-NOSVE:       // %bb.0:
139; NONEON-NOSVE-NEXT:    sub sp, sp, #96
140; NONEON-NOSVE-NEXT:    .cfi_def_cfa_offset 96
141; NONEON-NOSVE-NEXT:    ldr q1, [x0]
142; NONEON-NOSVE-NEXT:    ldr q0, [x1]
143; NONEON-NOSVE-NEXT:    mov w8, #2 // =0x2
144; NONEON-NOSVE-NEXT:    str q1, [sp, #64]
145; NONEON-NOSVE-NEXT:    ldp x10, x9, [sp, #64]
146; NONEON-NOSVE-NEXT:    cmp x9, #0
147; NONEON-NOSVE-NEXT:    csel x8, x8, xzr, eq
148; NONEON-NOSVE-NEXT:    cmp x10, #0
149; NONEON-NOSVE-NEXT:    csetm x9, eq
150; NONEON-NOSVE-NEXT:    sub w8, w8, w9
151; NONEON-NOSVE-NEXT:    strb w8, [sp, #92]
152; NONEON-NOSVE-NEXT:    and w8, w8, #0xff
153; NONEON-NOSVE-NEXT:    tbnz w8, #0, .LBB1_3
154; NONEON-NOSVE-NEXT:  // %bb.1: // %else
155; NONEON-NOSVE-NEXT:    tbnz w8, #1, .LBB1_4
156; NONEON-NOSVE-NEXT:  .LBB1_2: // %else2
157; NONEON-NOSVE-NEXT:    add sp, sp, #96
158; NONEON-NOSVE-NEXT:    ret
159; NONEON-NOSVE-NEXT:  .LBB1_3: // %cond.store
160; NONEON-NOSVE-NEXT:    stp q1, q0, [sp, #32]
161; NONEON-NOSVE-NEXT:    ldr x9, [sp, #32]
162; NONEON-NOSVE-NEXT:    ldr x10, [sp, #48]
163; NONEON-NOSVE-NEXT:    str x9, [x10]
164; NONEON-NOSVE-NEXT:    tbz w8, #1, .LBB1_2
165; NONEON-NOSVE-NEXT:  .LBB1_4: // %cond.store1
166; NONEON-NOSVE-NEXT:    stp q1, q0, [sp]
167; NONEON-NOSVE-NEXT:    ldr x8, [sp, #8]
168; NONEON-NOSVE-NEXT:    ldr x9, [sp, #24]
169; NONEON-NOSVE-NEXT:    str x8, [x9]
170; NONEON-NOSVE-NEXT:    add sp, sp, #96
171; NONEON-NOSVE-NEXT:    ret
172  %vals = load <2 x i64>, ptr %a
173  %ptrs = load <2 x ptr>, ptr %b
174  %mask = icmp eq <2 x i64> %vals, zeroinitializer
175  call void @llvm.masked.scatter.v2i64(<2 x i64> %vals, <2 x ptr> %ptrs, i32 8, <2 x i1> %mask)
176  ret void
177}
178
179declare void @llvm.masked.scatter.v2i64(<2 x i64>, <2 x ptr>, i32, <2 x i1>)
180declare <2 x i64> @llvm.masked.gather.v2i64(<2 x ptr>, i32, <2 x i1>, <2 x i64>)
181