1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s | FileCheck %s 3 4define <vscale x 2 x i64> @masked_gather_nxv2i8(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %mask) { 5; CHECK-LABEL: masked_gather_nxv2i8: 6; CHECK: // %bb.0: 7; CHECK-NEXT: ld1b { z0.d }, p0/z, [z0.d] 8; CHECK-NEXT: ret 9 %vals = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x ptr> %ptrs, i32 1, <vscale x 2 x i1> %mask, <vscale x 2 x i8> undef) 10 %vals.zext = zext <vscale x 2 x i8> %vals to <vscale x 2 x i64> 11 ret <vscale x 2 x i64> %vals.zext 12} 13 14define <vscale x 2 x i64> @masked_gather_nxv2i16(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %mask) { 15; CHECK-LABEL: masked_gather_nxv2i16: 16; CHECK: // %bb.0: 17; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d] 18; CHECK-NEXT: ret 19 %vals = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x ptr> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x i16> undef) 20 %vals.zext = zext <vscale x 2 x i16> %vals to <vscale x 2 x i64> 21 ret <vscale x 2 x i64> %vals.zext 22} 23 24define <vscale x 2 x i64> @masked_gather_nxv2i32(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %mask) { 25; CHECK-LABEL: masked_gather_nxv2i32: 26; CHECK: // %bb.0: 27; CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d] 28; CHECK-NEXT: ret 29 %vals = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x ptr> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> undef) 30 %vals.zext = zext <vscale x 2 x i32> %vals to <vscale x 2 x i64> 31 ret <vscale x 2 x i64> %vals.zext 32} 33 34define <vscale x 2 x i64> @masked_gather_nxv2i64(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %mask) { 35; CHECK-LABEL: masked_gather_nxv2i64: 36; CHECK: // %bb.0: 37; CHECK-NEXT: ld1d { z0.d }, p0/z, [z0.d] 38; CHECK-NEXT: ret 39 %vals = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64(<vscale x 2 x ptr> %ptrs, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x i64> undef) 40 ret <vscale x 2 x i64> %vals 41} 42 43define <vscale x 2 x half> @masked_gather_nxv2f16(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %mask) { 44; CHECK-LABEL: masked_gather_nxv2f16: 45; CHECK: // %bb.0: 46; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d] 47; CHECK-NEXT: ret 48 %vals = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16(<vscale x 2 x ptr> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x half> undef) 49 ret <vscale x 2 x half> %vals 50} 51 52define <vscale x 2 x bfloat> @masked_gather_nxv2bf16(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %mask) #0 { 53; CHECK-LABEL: masked_gather_nxv2bf16: 54; CHECK: // %bb.0: 55; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d] 56; CHECK-NEXT: ret 57 %vals = call <vscale x 2 x bfloat> @llvm.masked.gather.nxv2bf16(<vscale x 2 x ptr> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x bfloat> undef) 58 ret <vscale x 2 x bfloat> %vals 59} 60 61define <vscale x 2 x float> @masked_gather_nxv2f32(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %mask) { 62; CHECK-LABEL: masked_gather_nxv2f32: 63; CHECK: // %bb.0: 64; CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d] 65; CHECK-NEXT: ret 66 %vals = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32(<vscale x 2 x ptr> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x float> undef) 67 ret <vscale x 2 x float> %vals 68} 69 70define <vscale x 2 x double> @masked_gather_nxv2f64(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %mask) { 71; CHECK-LABEL: masked_gather_nxv2f64: 72; CHECK: // %bb.0: 73; CHECK-NEXT: ld1d { z0.d }, p0/z, [z0.d] 74; CHECK-NEXT: ret 75 %vals = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64(<vscale x 2 x ptr> %ptrs, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x double> undef) 76 ret <vscale x 2 x double> %vals 77} 78 79define <vscale x 2 x i64> @masked_sgather_nxv2i8(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %mask) { 80; CHECK-LABEL: masked_sgather_nxv2i8: 81; CHECK: // %bb.0: 82; CHECK-NEXT: ld1sb { z0.d }, p0/z, [z0.d] 83; CHECK-NEXT: ret 84 %vals = call <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x ptr> %ptrs, i32 1, <vscale x 2 x i1> %mask, <vscale x 2 x i8> undef) 85 %vals.sext = sext <vscale x 2 x i8> %vals to <vscale x 2 x i64> 86 ret <vscale x 2 x i64> %vals.sext 87} 88 89define <vscale x 2 x i64> @masked_sgather_nxv2i16(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %mask) { 90; CHECK-LABEL: masked_sgather_nxv2i16: 91; CHECK: // %bb.0: 92; CHECK-NEXT: ld1sh { z0.d }, p0/z, [z0.d] 93; CHECK-NEXT: ret 94 %vals = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x ptr> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x i16> undef) 95 %vals.sext = sext <vscale x 2 x i16> %vals to <vscale x 2 x i64> 96 ret <vscale x 2 x i64> %vals.sext 97} 98 99define <vscale x 2 x i64> @masked_sgather_nxv2i32(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %mask) { 100; CHECK-LABEL: masked_sgather_nxv2i32: 101; CHECK: // %bb.0: 102; CHECK-NEXT: ld1sw { z0.d }, p0/z, [z0.d] 103; CHECK-NEXT: ret 104 %vals = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x ptr> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> undef) 105 %vals.sext = sext <vscale x 2 x i32> %vals to <vscale x 2 x i64> 106 ret <vscale x 2 x i64> %vals.sext 107} 108 109define <vscale x 2 x i64> @masked_gather_passthru(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %mask, <vscale x 2 x i32> %passthru) { 110; CHECK-LABEL: masked_gather_passthru: 111; CHECK: // %bb.0: 112; CHECK-NEXT: ld1sw { z0.d }, p0/z, [z0.d] 113; CHECK-NEXT: sel z0.d, p0, z0.d, z1.d 114; CHECK-NEXT: ret 115 %vals = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x ptr> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> %passthru) 116 %vals.sext = sext <vscale x 2 x i32> %vals to <vscale x 2 x i64> 117 ret <vscale x 2 x i64> %vals.sext 118} 119 120define <vscale x 2 x i64> @masked_gather_passthru_0(<vscale x 2 x ptr> %ptrs, <vscale x 2 x i1> %mask) { 121; CHECK-LABEL: masked_gather_passthru_0: 122; CHECK: // %bb.0: 123; CHECK-NEXT: ld1sw { z0.d }, p0/z, [z0.d] 124; CHECK-NEXT: ret 125 %vals = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x ptr> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> zeroinitializer) 126 %vals.sext = sext <vscale x 2 x i32> %vals to <vscale x 2 x i64> 127 ret <vscale x 2 x i64> %vals.sext 128} 129 130%i64_x3 = type { i64, i64, i64} 131define <vscale x 2 x i64> @masked_gather_non_power_of_two_based_scaling(ptr %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %mask) { 132; CHECK-LABEL: masked_gather_non_power_of_two_based_scaling: 133; CHECK: // %bb.0: 134; CHECK-NEXT: mul z0.d, z0.d, #24 135; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, z0.d] 136; CHECK-NEXT: ret 137 %ptrs = getelementptr inbounds %i64_x3, ptr %base, <vscale x 2 x i64> %offsets 138 %vals = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64(<vscale x 2 x ptr> %ptrs, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x i64> undef) 139 ret <vscale x 2 x i64> %vals 140} 141 142%i64_x4 = type { i64, i64, i64, i64} 143define <vscale x 2 x i64> @masked_gather_non_element_type_based_scaling(ptr %base, <vscale x 2 x i64> %offsets, <vscale x 2 x i1> %mask) { 144; CHECK-LABEL: masked_gather_non_element_type_based_scaling: 145; CHECK: // %bb.0: 146; CHECK-NEXT: lsl z0.d, z0.d, #5 147; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, z0.d] 148; CHECK-NEXT: ret 149 %ptrs = getelementptr inbounds %i64_x4, ptr %base, <vscale x 2 x i64> %offsets 150 %vals = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64(<vscale x 2 x ptr> %ptrs, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x i64> undef) 151 ret <vscale x 2 x i64> %vals 152} 153 154declare <vscale x 2 x i8> @llvm.masked.gather.nxv2i8(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x i8>) 155declare <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x i16>) 156declare <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x i32>) 157declare <vscale x 2 x i64> @llvm.masked.gather.nxv2i64(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x i64>) 158declare <vscale x 2 x half> @llvm.masked.gather.nxv2f16(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x half>) 159declare <vscale x 2 x bfloat> @llvm.masked.gather.nxv2bf16(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x bfloat>) 160declare <vscale x 2 x float> @llvm.masked.gather.nxv2f32(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x float>) 161declare <vscale x 2 x double> @llvm.masked.gather.nxv2f64(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x double>) 162attributes #0 = { "target-features"="+sve,+bf16" } 163