1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s 3; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s 4; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE 5 6 7target triple = "aarch64-unknown-linux-gnu" 8 9define <2 x i64> @masked_gather_v2i64(ptr %a, ptr %b) vscale_range(2, 2) { 10; CHECK-LABEL: masked_gather_v2i64: 11; CHECK: // %bb.0: 12; CHECK-NEXT: sub sp, sp, #16 13; CHECK-NEXT: .cfi_def_cfa_offset 16 14; CHECK-NEXT: ptrue p0.d, vl2 15; CHECK-NEXT: ldr q0, [x0] 16; CHECK-NEXT: cmpeq p1.d, p0/z, z0.d, #0 17; CHECK-NEXT: index z0.d, #1, #1 18; CHECK-NEXT: mov z1.d, p1/z, #-1 // =0xffffffffffffffff 19; CHECK-NEXT: and z0.d, z1.d, z0.d 20; CHECK-NEXT: ldr q1, [x1] 21; CHECK-NEXT: uaddv d0, p0, z0.d 22; CHECK-NEXT: ptrue p0.d 23; CHECK-NEXT: fmov x8, d0 24; CHECK-NEXT: strb w8, [sp, #12] 25; CHECK-NEXT: and w8, w8, #0xff 26; CHECK-NEXT: tbz w8, #0, .LBB0_2 27; CHECK-NEXT: // %bb.1: // %cond.load 28; CHECK-NEXT: fmov x9, d1 29; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x9] 30; CHECK-NEXT: tbnz w8, #1, .LBB0_3 31; CHECK-NEXT: b .LBB0_4 32; CHECK-NEXT: .LBB0_2: 33; CHECK-NEXT: adrp x9, .LCPI0_0 34; CHECK-NEXT: ldr q0, [x9, :lo12:.LCPI0_0] 35; CHECK-NEXT: tbz w8, #1, .LBB0_4 36; CHECK-NEXT: .LBB0_3: // %cond.load1 37; CHECK-NEXT: mov w8, #1 // =0x1 38; CHECK-NEXT: index z2.d, #0, #1 39; CHECK-NEXT: mov z1.d, z1.d[1] 40; CHECK-NEXT: mov z3.d, x8 41; CHECK-NEXT: fmov x8, d1 42; CHECK-NEXT: cmpeq p0.d, p0/z, z2.d, z3.d 43; CHECK-NEXT: ldr x8, [x8] 44; CHECK-NEXT: mov z0.d, p0/m, x8 45; CHECK-NEXT: .LBB0_4: // %else2 46; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 47; CHECK-NEXT: add sp, sp, #16 48; CHECK-NEXT: ret 49; 50; NONEON-NOSVE-LABEL: masked_gather_v2i64: 51; NONEON-NOSVE: // %bb.0: 52; NONEON-NOSVE-NEXT: sub sp, sp, #144 53; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 144 54; NONEON-NOSVE-NEXT: ldr q0, [x0] 55; NONEON-NOSVE-NEXT: ldr q1, [x1] 56; NONEON-NOSVE-NEXT: mov w8, #2 // =0x2 57; NONEON-NOSVE-NEXT: str q0, [sp, #112] 58; NONEON-NOSVE-NEXT: ldp x10, x9, [sp, #112] 59; NONEON-NOSVE-NEXT: cmp x9, #0 60; NONEON-NOSVE-NEXT: csel x8, x8, xzr, eq 61; NONEON-NOSVE-NEXT: cmp x10, #0 62; NONEON-NOSVE-NEXT: csetm x9, eq 63; NONEON-NOSVE-NEXT: sub w8, w8, w9 64; NONEON-NOSVE-NEXT: strb w8, [sp, #140] 65; NONEON-NOSVE-NEXT: and w8, w8, #0xff 66; NONEON-NOSVE-NEXT: tbz w8, #0, .LBB0_2 67; NONEON-NOSVE-NEXT: // %bb.1: // %cond.load 68; NONEON-NOSVE-NEXT: str q1, [sp, #96] 69; NONEON-NOSVE-NEXT: ldr x9, [sp, #96] 70; NONEON-NOSVE-NEXT: ldr x9, [x9] 71; NONEON-NOSVE-NEXT: str x9, [sp, #80] 72; NONEON-NOSVE-NEXT: ldr q0, [sp, #80] 73; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB0_3 74; NONEON-NOSVE-NEXT: b .LBB0_4 75; NONEON-NOSVE-NEXT: .LBB0_2: 76; NONEON-NOSVE-NEXT: adrp x9, .LCPI0_0 77; NONEON-NOSVE-NEXT: ldr q0, [x9, :lo12:.LCPI0_0] 78; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB0_4 79; NONEON-NOSVE-NEXT: .LBB0_3: // %cond.load1 80; NONEON-NOSVE-NEXT: str q1, [sp, #64] 81; NONEON-NOSVE-NEXT: ldr x8, [sp, #72] 82; NONEON-NOSVE-NEXT: ldr x8, [x8] 83; NONEON-NOSVE-NEXT: str q0, [sp] 84; NONEON-NOSVE-NEXT: ldr x9, [sp] 85; NONEON-NOSVE-NEXT: str x8, [sp, #48] 86; NONEON-NOSVE-NEXT: ldr q0, [sp, #48] 87; NONEON-NOSVE-NEXT: str q0, [sp, #16] 88; NONEON-NOSVE-NEXT: ldr x8, [sp, #16] 89; NONEON-NOSVE-NEXT: stp x9, x8, [sp, #32] 90; NONEON-NOSVE-NEXT: ldr q0, [sp, #32] 91; NONEON-NOSVE-NEXT: .LBB0_4: // %else2 92; NONEON-NOSVE-NEXT: add sp, sp, #144 93; NONEON-NOSVE-NEXT: ret 94 %vals = load <2 x i64>, ptr %a 95 %ptrs = load <2 x ptr>, ptr %b 96 %mask = icmp eq <2 x i64> %vals, zeroinitializer 97 %res = call <2 x i64> @llvm.masked.gather.v2i64(<2 x ptr> %ptrs, i32 8, <2 x i1> %mask, <2 x i64> poison) 98 ret <2 x i64> %res 99} 100 101define void @masked_scatter_v2i64(ptr %a, ptr %b) vscale_range(2, 2) { 102; CHECK-LABEL: masked_scatter_v2i64: 103; CHECK: // %bb.0: 104; CHECK-NEXT: sub sp, sp, #16 105; CHECK-NEXT: .cfi_def_cfa_offset 16 106; CHECK-NEXT: ptrue p0.d, vl2 107; CHECK-NEXT: ldr q0, [x0] 108; CHECK-NEXT: index z1.d, #1, #1 109; CHECK-NEXT: cmpeq p1.d, p0/z, z0.d, #0 110; CHECK-NEXT: mov z2.d, p1/z, #-1 // =0xffffffffffffffff 111; CHECK-NEXT: and z1.d, z2.d, z1.d 112; CHECK-NEXT: uaddv d1, p0, z1.d 113; CHECK-NEXT: fmov x8, d1 114; CHECK-NEXT: ldr q1, [x1] 115; CHECK-NEXT: strb w8, [sp, #12] 116; CHECK-NEXT: and w8, w8, #0xff 117; CHECK-NEXT: tbnz w8, #0, .LBB1_3 118; CHECK-NEXT: // %bb.1: // %else 119; CHECK-NEXT: tbnz w8, #1, .LBB1_4 120; CHECK-NEXT: .LBB1_2: // %else2 121; CHECK-NEXT: add sp, sp, #16 122; CHECK-NEXT: ret 123; CHECK-NEXT: .LBB1_3: // %cond.store 124; CHECK-NEXT: fmov x9, d0 125; CHECK-NEXT: fmov x10, d1 126; CHECK-NEXT: str x9, [x10] 127; CHECK-NEXT: tbz w8, #1, .LBB1_2 128; CHECK-NEXT: .LBB1_4: // %cond.store1 129; CHECK-NEXT: mov z0.d, z0.d[1] 130; CHECK-NEXT: mov z1.d, z1.d[1] 131; CHECK-NEXT: fmov x8, d0 132; CHECK-NEXT: fmov x9, d1 133; CHECK-NEXT: str x8, [x9] 134; CHECK-NEXT: add sp, sp, #16 135; CHECK-NEXT: ret 136; 137; NONEON-NOSVE-LABEL: masked_scatter_v2i64: 138; NONEON-NOSVE: // %bb.0: 139; NONEON-NOSVE-NEXT: sub sp, sp, #96 140; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 96 141; NONEON-NOSVE-NEXT: ldr q1, [x0] 142; NONEON-NOSVE-NEXT: ldr q0, [x1] 143; NONEON-NOSVE-NEXT: mov w8, #2 // =0x2 144; NONEON-NOSVE-NEXT: str q1, [sp, #64] 145; NONEON-NOSVE-NEXT: ldp x10, x9, [sp, #64] 146; NONEON-NOSVE-NEXT: cmp x9, #0 147; NONEON-NOSVE-NEXT: csel x8, x8, xzr, eq 148; NONEON-NOSVE-NEXT: cmp x10, #0 149; NONEON-NOSVE-NEXT: csetm x9, eq 150; NONEON-NOSVE-NEXT: sub w8, w8, w9 151; NONEON-NOSVE-NEXT: strb w8, [sp, #92] 152; NONEON-NOSVE-NEXT: and w8, w8, #0xff 153; NONEON-NOSVE-NEXT: tbnz w8, #0, .LBB1_3 154; NONEON-NOSVE-NEXT: // %bb.1: // %else 155; NONEON-NOSVE-NEXT: tbnz w8, #1, .LBB1_4 156; NONEON-NOSVE-NEXT: .LBB1_2: // %else2 157; NONEON-NOSVE-NEXT: add sp, sp, #96 158; NONEON-NOSVE-NEXT: ret 159; NONEON-NOSVE-NEXT: .LBB1_3: // %cond.store 160; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #32] 161; NONEON-NOSVE-NEXT: ldr x9, [sp, #32] 162; NONEON-NOSVE-NEXT: ldr x10, [sp, #48] 163; NONEON-NOSVE-NEXT: str x9, [x10] 164; NONEON-NOSVE-NEXT: tbz w8, #1, .LBB1_2 165; NONEON-NOSVE-NEXT: .LBB1_4: // %cond.store1 166; NONEON-NOSVE-NEXT: stp q1, q0, [sp] 167; NONEON-NOSVE-NEXT: ldr x8, [sp, #8] 168; NONEON-NOSVE-NEXT: ldr x9, [sp, #24] 169; NONEON-NOSVE-NEXT: str x8, [x9] 170; NONEON-NOSVE-NEXT: add sp, sp, #96 171; NONEON-NOSVE-NEXT: ret 172 %vals = load <2 x i64>, ptr %a 173 %ptrs = load <2 x ptr>, ptr %b 174 %mask = icmp eq <2 x i64> %vals, zeroinitializer 175 call void @llvm.masked.scatter.v2i64(<2 x i64> %vals, <2 x ptr> %ptrs, i32 8, <2 x i1> %mask) 176 ret void 177} 178 179declare void @llvm.masked.scatter.v2i64(<2 x i64>, <2 x ptr>, i32, <2 x i1>) 180declare <2 x i64> @llvm.masked.gather.v2i64(<2 x ptr>, i32, <2 x i1>, <2 x i64>) 181