1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-enable-mgather-combine=0 < %s | FileCheck %s 3; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -aarch64-enable-mgather-combine=1 < %s | FileCheck %s 4 5;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 6; unscaled unpacked 32-bit offsets 7;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 8 9define <vscale x 2 x i64> @masked_gather_nxv2i16(ptr %base, <vscale x 2 x i32> %offsets, <vscale x 2 x i1> %mask) { 10; CHECK-LABEL: masked_gather_nxv2i16: 11; CHECK: // %bb.0: 12; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d, sxtw #1] 13; CHECK-NEXT: ret 14 %ptrs = getelementptr i16, ptr %base, <vscale x 2 x i32> %offsets 15 %vals = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x ptr> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x i16> undef) 16 %vals.zext = zext <vscale x 2 x i16> %vals to <vscale x 2 x i64> 17 ret <vscale x 2 x i64> %vals.zext 18} 19 20define <vscale x 2 x i64> @masked_gather_nxv2i32(ptr %base, <vscale x 2 x i32> %offsets, <vscale x 2 x i1> %mask) { 21; CHECK-LABEL: masked_gather_nxv2i32: 22; CHECK: // %bb.0: 23; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d, sxtw #2] 24; CHECK-NEXT: ret 25 %ptrs = getelementptr i32, ptr %base, <vscale x 2 x i32> %offsets 26 %vals = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x ptr> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> undef) 27 %vals.zext = zext <vscale x 2 x i32> %vals to <vscale x 2 x i64> 28 ret <vscale x 2 x i64> %vals.zext 29} 30 31define <vscale x 2 x i64> @masked_gather_nxv2i64(ptr %base, <vscale x 2 x i32> %offsets, <vscale x 2 x i1> %mask) { 32; CHECK-LABEL: masked_gather_nxv2i64: 33; CHECK: // %bb.0: 34; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, z0.d, sxtw #3] 35; CHECK-NEXT: ret 36 %ptrs = getelementptr i64, ptr %base, <vscale x 2 x i32> %offsets 37 %vals = call <vscale x 2 x i64> @llvm.masked.gather.nxv2i64(<vscale x 2 x ptr> %ptrs, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x i64> undef) 38 ret <vscale x 2 x i64> %vals 39} 40 41define <vscale x 2 x half> @masked_gather_nxv2f16(ptr %base, <vscale x 2 x i32> %offsets, <vscale x 2 x i1> %mask) { 42; CHECK-LABEL: masked_gather_nxv2f16: 43; CHECK: // %bb.0: 44; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d, sxtw #1] 45; CHECK-NEXT: ret 46 %ptrs = getelementptr half, ptr %base, <vscale x 2 x i32> %offsets 47 %vals = call <vscale x 2 x half> @llvm.masked.gather.nxv2f16(<vscale x 2 x ptr> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x half> undef) 48 ret <vscale x 2 x half> %vals 49} 50 51define <vscale x 2 x bfloat> @masked_gather_nxv2bf16(ptr %base, <vscale x 2 x i32> %offsets, <vscale x 2 x i1> %mask) #0 { 52; CHECK-LABEL: masked_gather_nxv2bf16: 53; CHECK: // %bb.0: 54; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d, sxtw #1] 55; CHECK-NEXT: ret 56 %ptrs = getelementptr bfloat, ptr %base, <vscale x 2 x i32> %offsets 57 %vals = call <vscale x 2 x bfloat> @llvm.masked.gather.nxv2bf16(<vscale x 2 x ptr> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x bfloat> undef) 58 ret <vscale x 2 x bfloat> %vals 59} 60 61define <vscale x 2 x float> @masked_gather_nxv2f32(ptr %base, <vscale x 2 x i32> %offsets, <vscale x 2 x i1> %mask) { 62; CHECK-LABEL: masked_gather_nxv2f32: 63; CHECK: // %bb.0: 64; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d, sxtw #2] 65; CHECK-NEXT: ret 66 %ptrs = getelementptr float, ptr %base, <vscale x 2 x i32> %offsets 67 %vals = call <vscale x 2 x float> @llvm.masked.gather.nxv2f32(<vscale x 2 x ptr> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x float> undef) 68 ret <vscale x 2 x float> %vals 69} 70 71define <vscale x 2 x double> @masked_gather_nxv2f64(ptr %base, <vscale x 2 x i32> %offsets, <vscale x 2 x i1> %mask) { 72; CHECK-LABEL: masked_gather_nxv2f64: 73; CHECK: // %bb.0: 74; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, z0.d, sxtw #3] 75; CHECK-NEXT: ret 76 %ptrs = getelementptr double, ptr %base, <vscale x 2 x i32> %offsets 77 %vals = call <vscale x 2 x double> @llvm.masked.gather.nxv2f64(<vscale x 2 x ptr> %ptrs, i32 8, <vscale x 2 x i1> %mask, <vscale x 2 x double> undef) 78 ret <vscale x 2 x double> %vals 79} 80 81define <vscale x 2 x i64> @masked_sgather_nxv2i16(ptr %base, <vscale x 2 x i32> %offsets, <vscale x 2 x i1> %mask) { 82; CHECK-LABEL: masked_sgather_nxv2i16: 83; CHECK: // %bb.0: 84; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0, z0.d, sxtw #1] 85; CHECK-NEXT: ret 86 %ptrs = getelementptr i16, ptr %base, <vscale x 2 x i32> %offsets 87 %vals = call <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x ptr> %ptrs, i32 2, <vscale x 2 x i1> %mask, <vscale x 2 x i16> undef) 88 %vals.sext = sext <vscale x 2 x i16> %vals to <vscale x 2 x i64> 89 ret <vscale x 2 x i64> %vals.sext 90} 91 92define <vscale x 2 x i64> @masked_sgather_nxv2i32(ptr %base, <vscale x 2 x i32> %offsets, <vscale x 2 x i1> %mask) { 93; CHECK-LABEL: masked_sgather_nxv2i32: 94; CHECK: // %bb.0: 95; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0, z0.d, sxtw #2] 96; CHECK-NEXT: ret 97 %ptrs = getelementptr i32, ptr %base, <vscale x 2 x i32> %offsets 98 %vals = call <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x ptr> %ptrs, i32 4, <vscale x 2 x i1> %mask, <vscale x 2 x i32> undef) 99 %vals.sext = sext <vscale x 2 x i32> %vals to <vscale x 2 x i64> 100 ret <vscale x 2 x i64> %vals.sext 101} 102 103;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 104; unscaled packed 32-bit offsets 105;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 106 107define <vscale x 4 x i32> @masked_gather_nxv4i16(ptr %base, <vscale x 4 x i32> %offsets, <vscale x 4 x i1> %mask) { 108; CHECK-LABEL: masked_gather_nxv4i16: 109; CHECK: // %bb.0: 110; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, z0.s, sxtw #1] 111; CHECK-NEXT: ret 112 %ptrs = getelementptr i16, ptr %base, <vscale x 4 x i32> %offsets 113 %vals = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16(<vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> %mask, <vscale x 4 x i16> undef) 114 %vals.zext = zext <vscale x 4 x i16> %vals to <vscale x 4 x i32> 115 ret <vscale x 4 x i32> %vals.zext 116} 117 118define <vscale x 4 x i32> @masked_gather_nxv4i32(ptr %base, <vscale x 4 x i32> %offsets, <vscale x 4 x i1> %mask) { 119; CHECK-LABEL: masked_gather_nxv4i32: 120; CHECK: // %bb.0: 121; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, z0.s, sxtw #2] 122; CHECK-NEXT: ret 123 %ptrs = getelementptr i32, ptr %base, <vscale x 4 x i32> %offsets 124 %vals = call <vscale x 4 x i32> @llvm.masked.gather.nxv4i32(<vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x i32> undef) 125 ret <vscale x 4 x i32> %vals 126} 127 128define <vscale x 4 x half> @masked_gather_nxv4f16(ptr %base, <vscale x 4 x i32> %offsets, <vscale x 4 x i1> %mask) { 129; CHECK-LABEL: masked_gather_nxv4f16: 130; CHECK: // %bb.0: 131; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, z0.s, sxtw #1] 132; CHECK-NEXT: ret 133 %ptrs = getelementptr half, ptr %base, <vscale x 4 x i32> %offsets 134 %vals = call <vscale x 4 x half> @llvm.masked.gather.nxv4f16(<vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> %mask, <vscale x 4 x half> undef) 135 ret <vscale x 4 x half> %vals 136} 137 138define <vscale x 4 x bfloat> @masked_gather_nxv4bf16(ptr %base, <vscale x 4 x i32> %offsets, <vscale x 4 x i1> %mask) #0 { 139; CHECK-LABEL: masked_gather_nxv4bf16: 140; CHECK: // %bb.0: 141; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, z0.s, sxtw #1] 142; CHECK-NEXT: ret 143 %ptrs = getelementptr bfloat, ptr %base, <vscale x 4 x i32> %offsets 144 %vals = call <vscale x 4 x bfloat> @llvm.masked.gather.nxv4bf16(<vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> %mask, <vscale x 4 x bfloat> undef) 145 ret <vscale x 4 x bfloat> %vals 146} 147 148define <vscale x 4 x float> @masked_gather_nxv4f32(ptr %base, <vscale x 4 x i32> %offsets, <vscale x 4 x i1> %mask) { 149; CHECK-LABEL: masked_gather_nxv4f32: 150; CHECK: // %bb.0: 151; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, z0.s, sxtw #2] 152; CHECK-NEXT: ret 153 %ptrs = getelementptr float, ptr %base, <vscale x 4 x i32> %offsets 154 %vals = call <vscale x 4 x float> @llvm.masked.gather.nxv4f32(<vscale x 4 x ptr> %ptrs, i32 4, <vscale x 4 x i1> %mask, <vscale x 4 x float> undef) 155 ret <vscale x 4 x float> %vals 156} 157 158define <vscale x 4 x i32> @masked_sgather_nxv4i16(ptr %base, <vscale x 4 x i32> %offsets, <vscale x 4 x i1> %mask) { 159; CHECK-LABEL: masked_sgather_nxv4i16: 160; CHECK: // %bb.0: 161; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0, z0.s, sxtw #1] 162; CHECK-NEXT: ret 163 %ptrs = getelementptr i16, ptr %base, <vscale x 4 x i32> %offsets 164 %vals = call <vscale x 4 x i16> @llvm.masked.gather.nxv4i16(<vscale x 4 x ptr> %ptrs, i32 2, <vscale x 4 x i1> %mask, <vscale x 4 x i16> undef) 165 %vals.sext = sext <vscale x 4 x i16> %vals to <vscale x 4 x i32> 166 ret <vscale x 4 x i32> %vals.sext 167} 168 169declare <vscale x 2 x i16> @llvm.masked.gather.nxv2i16(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x i16>) 170declare <vscale x 2 x i32> @llvm.masked.gather.nxv2i32(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x i32>) 171declare <vscale x 2 x i64> @llvm.masked.gather.nxv2i64(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x i64>) 172declare <vscale x 2 x half> @llvm.masked.gather.nxv2f16(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x half>) 173declare <vscale x 2 x bfloat> @llvm.masked.gather.nxv2bf16(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x bfloat>) 174declare <vscale x 2 x float> @llvm.masked.gather.nxv2f32(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x float>) 175declare <vscale x 2 x double> @llvm.masked.gather.nxv2f64(<vscale x 2 x ptr>, i32, <vscale x 2 x i1>, <vscale x 2 x double>) 176 177declare <vscale x 4 x i16> @llvm.masked.gather.nxv4i16(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>, <vscale x 4 x i16>) 178declare <vscale x 4 x i32> @llvm.masked.gather.nxv4i32(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>, <vscale x 4 x i32>) 179declare <vscale x 4 x half> @llvm.masked.gather.nxv4f16(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>, <vscale x 4 x half>) 180declare <vscale x 4 x bfloat> @llvm.masked.gather.nxv4bf16(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>, <vscale x 4 x bfloat>) 181declare <vscale x 4 x float> @llvm.masked.gather.nxv4f32(<vscale x 4 x ptr>, i32, <vscale x 4 x i1>, <vscale x 4 x float>) 182attributes #0 = { "target-features"="+sve,+bf16" } 183