1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -verify-machineinstrs < %s | FileCheck %s 3 4; 5; LDFF1B, LDFF1W, LDFF1H, LDFF1D: vector base + scalar offset (index) 6; e.g. ldff1b { z0.d }, p0/z, [x0, z0.d] 7; 8 9; LDFF1B 10define <vscale x 4 x i32> @gldff1b_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) { 11; CHECK-LABEL: gldff1b_s_scalar_offset: 12; CHECK: // %bb.0: 13; CHECK-NEXT: ldff1b { z0.s }, p0/z, [x0, z0.s, uxtw] 14; CHECK-NEXT: ret 15 %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg, 16 <vscale x 4 x i32> %base, 17 i64 %offset) 18 %res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32> 19 ret <vscale x 4 x i32> %res 20} 21 22define <vscale x 2 x i64> @gldff1b_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) { 23; CHECK-LABEL: gldff1b_d_scalar_offset: 24; CHECK: // %bb.0: 25; CHECK-NEXT: ldff1b { z0.d }, p0/z, [x0, z0.d] 26; CHECK-NEXT: ret 27 %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg, 28 <vscale x 2 x i64> %base, 29 i64 %offset) 30 %res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64> 31 ret <vscale x 2 x i64> %res 32} 33 34; LDFF1H 35define <vscale x 4 x i32> @gldff1h_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) { 36; CHECK-LABEL: gldff1h_s_scalar_offset: 37; CHECK: // %bb.0: 38; CHECK-NEXT: ldff1h { z0.s }, p0/z, [x0, z0.s, uxtw] 39; CHECK-NEXT: ret 40 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg, 41 <vscale x 4 x i32> %base, 42 i64 %offset) 43 %res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32> 44 ret <vscale x 4 x i32> %res 45} 46 47define <vscale x 2 x i64> @gldff1h_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) { 48; CHECK-LABEL: gldff1h_d_scalar_offset: 49; CHECK: // %bb.0: 50; CHECK-NEXT: ldff1h { z0.d }, p0/z, [x0, z0.d] 51; CHECK-NEXT: ret 52 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg, 53 <vscale x 2 x i64> %base, 54 i64 %offset) 55 %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64> 56 ret <vscale x 2 x i64> %res 57} 58 59; LDFF1W 60define <vscale x 4 x i32> @gldff1w_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) { 61; CHECK-LABEL: gldff1w_s_scalar_offset: 62; CHECK: // %bb.0: 63; CHECK-NEXT: ldff1w { z0.s }, p0/z, [x0, z0.s, uxtw] 64; CHECK-NEXT: ret 65 %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> %pg, 66 <vscale x 4 x i32> %base, 67 i64 %offset) 68 ret <vscale x 4 x i32> %load 69} 70 71define <vscale x 2 x i64> @gldff1w_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) { 72; CHECK-LABEL: gldff1w_d_scalar_offset: 73; CHECK: // %bb.0: 74; CHECK-NEXT: ldff1w { z0.d }, p0/z, [x0, z0.d] 75; CHECK-NEXT: ret 76 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg, 77 <vscale x 2 x i64> %base, 78 i64 %offset) 79 %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64> 80 ret <vscale x 2 x i64> %res 81} 82 83define <vscale x 4 x float> @gldff1w_s_scalar_offset_float(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) { 84; CHECK-LABEL: gldff1w_s_scalar_offset_float: 85; CHECK: // %bb.0: 86; CHECK-NEXT: ldff1w { z0.s }, p0/z, [x0, z0.s, uxtw] 87; CHECK-NEXT: ret 88 %load = call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1> %pg, 89 <vscale x 4 x i32> %base, 90 i64 %offset) 91 ret <vscale x 4 x float> %load 92} 93 94; LDFF1D 95define <vscale x 2 x i64> @gldff1d_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) { 96; CHECK-LABEL: gldff1d_d_scalar_offset: 97; CHECK: // %bb.0: 98; CHECK-NEXT: ldff1d { z0.d }, p0/z, [x0, z0.d] 99; CHECK-NEXT: ret 100 %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> %pg, 101 <vscale x 2 x i64> %base, 102 i64 %offset) 103 ret <vscale x 2 x i64> %load 104} 105 106define <vscale x 2 x double> @gldff1d_d_scalar_offset_double(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) { 107; CHECK-LABEL: gldff1d_d_scalar_offset_double: 108; CHECK: // %bb.0: 109; CHECK-NEXT: ldff1d { z0.d }, p0/z, [x0, z0.d] 110; CHECK-NEXT: ret 111 %load = call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1> %pg, 112 <vscale x 2 x i64> %base, 113 i64 %offset) 114 ret <vscale x 2 x double> %load 115} 116 117; LDFF1SB, LDFF1SW, LDFF1SH: vector base + scalar offset (index) 118; e.g. ldff1b { z0.d }, p0/z, [x0, z0.d] 119; 120 121; LDFF1SB 122define <vscale x 4 x i32> @gldff1sb_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) { 123; CHECK-LABEL: gldff1sb_s_scalar_offset: 124; CHECK: // %bb.0: 125; CHECK-NEXT: ldff1sb { z0.s }, p0/z, [x0, z0.s, uxtw] 126; CHECK-NEXT: ret 127 %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg, 128 <vscale x 4 x i32> %base, 129 i64 %offset) 130 %res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32> 131 ret <vscale x 4 x i32> %res 132} 133 134define <vscale x 2 x i64> @gldff1sb_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) { 135; CHECK-LABEL: gldff1sb_d_scalar_offset: 136; CHECK: // %bb.0: 137; CHECK-NEXT: ldff1sb { z0.d }, p0/z, [x0, z0.d] 138; CHECK-NEXT: ret 139 %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg, 140 <vscale x 2 x i64> %base, 141 i64 %offset) 142 %res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64> 143 ret <vscale x 2 x i64> %res 144} 145 146; LDFF1SH 147define <vscale x 4 x i32> @gldff1sh_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) { 148; CHECK-LABEL: gldff1sh_s_scalar_offset: 149; CHECK: // %bb.0: 150; CHECK-NEXT: ldff1sh { z0.s }, p0/z, [x0, z0.s, uxtw] 151; CHECK-NEXT: ret 152 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg, 153 <vscale x 4 x i32> %base, 154 i64 %offset) 155 %res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32> 156 ret <vscale x 4 x i32> %res 157} 158 159define <vscale x 2 x i64> @gldff1sh_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) { 160; CHECK-LABEL: gldff1sh_d_scalar_offset: 161; CHECK: // %bb.0: 162; CHECK-NEXT: ldff1sh { z0.d }, p0/z, [x0, z0.d] 163; CHECK-NEXT: ret 164 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg, 165 <vscale x 2 x i64> %base, 166 i64 %offset) 167 %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64> 168 ret <vscale x 2 x i64> %res 169} 170 171; LDFF1SW 172define <vscale x 2 x i64> @gldff1sw_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) { 173; CHECK-LABEL: gldff1sw_d_scalar_offset: 174; CHECK: // %bb.0: 175; CHECK-NEXT: ldff1sw { z0.d }, p0/z, [x0, z0.d] 176; CHECK-NEXT: ret 177 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg, 178 <vscale x 2 x i64> %base, 179 i64 %offset) 180 %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64> 181 ret <vscale x 2 x i64> %res 182} 183 184; LDFF1B/LDFF1SB 185declare <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64) 186declare <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64) 187 188; LDFF1H/LDFF1SH 189declare <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64) 190declare <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64) 191 192; LDFF1W/LDFF1SW 193declare <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64) 194declare <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64) 195 196declare <vscale x 4 x float> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64) 197 198; LDFF1D 199declare <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64) 200 201declare <vscale x 2 x double> @llvm.aarch64.sve.ldff1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64) 202