1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s 3 4; 5; LD1H, LD1W, LD1D: base + 64-bit scaled offset 6; e.g. ld1h z0.d, p0/z, [x0, z0.d, lsl #1] 7; 8 9define <vscale x 2 x i64> @gld1h_index(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) { 10; CHECK-LABEL: gld1h_index: 11; CHECK: // %bb.0: 12; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d, lsl #1] 13; CHECK-NEXT: ret 14 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> %pg, 15 ptr %base, 16 <vscale x 2 x i64> %b) 17 %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64> 18 ret <vscale x 2 x i64> %res 19} 20 21define <vscale x 2 x i64> @gld1w_index(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) { 22; CHECK-LABEL: gld1w_index: 23; CHECK: // %bb.0: 24; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d, lsl #2] 25; CHECK-NEXT: ret 26 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> %pg, 27 ptr %base, 28 <vscale x 2 x i64> %b) 29 %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64> 30 ret <vscale x 2 x i64> %res 31} 32 33define <vscale x 2 x i64> @gld1d_index(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) { 34; CHECK-LABEL: gld1d_index: 35; CHECK: // %bb.0: 36; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, z0.d, lsl #3] 37; CHECK-NEXT: ret 38 %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.index.nxv2i64(<vscale x 2 x i1> %pg, 39 ptr %base, 40 <vscale x 2 x i64> %b) 41 ret <vscale x 2 x i64> %load 42} 43 44define <vscale x 2 x double> @gld1d_index_double(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) { 45; CHECK-LABEL: gld1d_index_double: 46; CHECK: // %bb.0: 47; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, z0.d, lsl #3] 48; CHECK-NEXT: ret 49 %load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.index.nxv2f64(<vscale x 2 x i1> %pg, 50 ptr %base, 51 <vscale x 2 x i64> %b) 52 ret <vscale x 2 x double> %load 53} 54 55; 56; LD1SH, LD1SW: base + 64-bit scaled offset 57; e.g. ld1sh z0.d, p0/z, [x0, z0.d, lsl #1] 58; 59 60define <vscale x 2 x i64> @gld1sh_index(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) { 61; CHECK-LABEL: gld1sh_index: 62; CHECK: // %bb.0: 63; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0, z0.d, lsl #1] 64; CHECK-NEXT: ret 65 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> %pg, 66 ptr %base, 67 <vscale x 2 x i64> %b) 68 %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64> 69 ret <vscale x 2 x i64> %res 70} 71 72define <vscale x 2 x i64> @gld1sw_index(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) { 73; CHECK-LABEL: gld1sw_index: 74; CHECK: // %bb.0: 75; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0, z0.d, lsl #2] 76; CHECK-NEXT: ret 77 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> %pg, 78 ptr %base, 79 <vscale x 2 x i64> %b) 80 %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64> 81 ret <vscale x 2 x i64> %res 82} 83 84; 85; LD1H, LD1W, LD1D: base + 64-bit sxtw'd scaled offset 86; e.g. ld1h z0.d, p0/z, [x0, z0.d, sxtw #1] 87; 88 89define <vscale x 2 x i64> @gld1h_index_sxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) { 90; CHECK-LABEL: gld1h_index_sxtw: 91; CHECK: // %bb.0: 92; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d, sxtw #1] 93; CHECK-NEXT: ret 94 %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef, 95 <vscale x 2 x i1> %pg, 96 <vscale x 2 x i64> %b) 97 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> %pg, 98 ptr %base, 99 <vscale x 2 x i64> %sxtw) 100 %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64> 101 ret <vscale x 2 x i64> %res 102} 103 104define <vscale x 2 x i64> @gld1w_index_sxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) { 105; CHECK-LABEL: gld1w_index_sxtw: 106; CHECK: // %bb.0: 107; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d, sxtw #2] 108; CHECK-NEXT: ret 109 %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef, 110 <vscale x 2 x i1> %pg, 111 <vscale x 2 x i64> %b) 112 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> %pg, 113 ptr %base, 114 <vscale x 2 x i64> %sxtw) 115 %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64> 116 ret <vscale x 2 x i64> %res 117} 118 119define <vscale x 2 x i64> @gld1d_index_sxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) { 120; CHECK-LABEL: gld1d_index_sxtw: 121; CHECK: // %bb.0: 122; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, z0.d, sxtw #3] 123; CHECK-NEXT: ret 124 %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef, 125 <vscale x 2 x i1> %pg, 126 <vscale x 2 x i64> %b) 127 %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.index.nxv2i64(<vscale x 2 x i1> %pg, 128 ptr %base, 129 <vscale x 2 x i64> %sxtw) 130 ret <vscale x 2 x i64> %load 131} 132 133define <vscale x 2 x double> @gld1d_index_double_sxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) { 134; CHECK-LABEL: gld1d_index_double_sxtw: 135; CHECK: // %bb.0: 136; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, z0.d, sxtw #3] 137; CHECK-NEXT: ret 138 %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef, 139 <vscale x 2 x i1> %pg, 140 <vscale x 2 x i64> %b) 141 %load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.index.nxv2f64(<vscale x 2 x i1> %pg, 142 ptr %base, 143 <vscale x 2 x i64> %sxtw) 144 ret <vscale x 2 x double> %load 145} 146 147; 148; LD1SH, LD1SW: base + 64-bit sxtw'd scaled offset 149; e.g. ld1sh z0.d, p0/z, [x0, z0.d, sxtw #1] 150; 151 152define <vscale x 2 x i64> @gld1sh_index_sxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) { 153; CHECK-LABEL: gld1sh_index_sxtw: 154; CHECK: // %bb.0: 155; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0, z0.d, sxtw #1] 156; CHECK-NEXT: ret 157 %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef, 158 <vscale x 2 x i1> %pg, 159 <vscale x 2 x i64> %b) 160 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> %pg, 161 ptr %base, 162 <vscale x 2 x i64> %sxtw) 163 %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64> 164 ret <vscale x 2 x i64> %res 165} 166 167define <vscale x 2 x i64> @gld1sw_index_sxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) { 168; CHECK-LABEL: gld1sw_index_sxtw: 169; CHECK: // %bb.0: 170; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0, z0.d, sxtw #2] 171; CHECK-NEXT: ret 172 %sxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64> undef, 173 <vscale x 2 x i1> %pg, 174 <vscale x 2 x i64> %b) 175 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> %pg, 176 ptr %base, 177 <vscale x 2 x i64> %sxtw) 178 %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64> 179 ret <vscale x 2 x i64> %res 180} 181 182; 183; LD1H, LD1W, LD1D: base + 64-bit sxtw'd scaled offset 184; e.g. ld1h z0.d, p0/z, [x0, z0.d, uxtw #1] 185; 186 187define <vscale x 2 x i64> @gld1h_index_uxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) { 188; CHECK-LABEL: gld1h_index_uxtw: 189; CHECK: // %bb.0: 190; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d, uxtw #1] 191; CHECK-NEXT: ret 192 %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef, 193 <vscale x 2 x i1> %pg, 194 <vscale x 2 x i64> %b) 195 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> %pg, 196 ptr %base, 197 <vscale x 2 x i64> %uxtw) 198 %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64> 199 ret <vscale x 2 x i64> %res 200} 201 202define <vscale x 2 x i64> @gld1w_index_uxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) { 203; CHECK-LABEL: gld1w_index_uxtw: 204; CHECK: // %bb.0: 205; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d, uxtw #2] 206; CHECK-NEXT: ret 207 %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef, 208 <vscale x 2 x i1> %pg, 209 <vscale x 2 x i64> %b) 210 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> %pg, 211 ptr %base, 212 <vscale x 2 x i64> %uxtw) 213 %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64> 214 ret <vscale x 2 x i64> %res 215} 216 217define <vscale x 2 x i64> @gld1d_index_uxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) { 218; CHECK-LABEL: gld1d_index_uxtw: 219; CHECK: // %bb.0: 220; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, z0.d, uxtw #3] 221; CHECK-NEXT: ret 222 %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef, 223 <vscale x 2 x i1> %pg, 224 <vscale x 2 x i64> %b) 225 %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.index.nxv2i64(<vscale x 2 x i1> %pg, 226 ptr %base, 227 <vscale x 2 x i64> %uxtw) 228 ret <vscale x 2 x i64> %load 229} 230 231define <vscale x 2 x double> @gld1d_index_double_uxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) { 232; CHECK-LABEL: gld1d_index_double_uxtw: 233; CHECK: // %bb.0: 234; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, z0.d, uxtw #3] 235; CHECK-NEXT: ret 236 %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef, 237 <vscale x 2 x i1> %pg, 238 <vscale x 2 x i64> %b) 239 %load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.index.nxv2f64(<vscale x 2 x i1> %pg, 240 ptr %base, 241 <vscale x 2 x i64> %uxtw) 242 ret <vscale x 2 x double> %load 243} 244 245; 246; LD1SH, LD1SW: base + 64-bit uxtw'd scaled offset 247; e.g. ld1sh z0.d, p0/z, [x0, z0.d, uxtw #1] 248; 249 250define <vscale x 2 x i64> @gld1sh_index_uxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) { 251; CHECK-LABEL: gld1sh_index_uxtw: 252; CHECK: // %bb.0: 253; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0, z0.d, uxtw #1] 254; CHECK-NEXT: ret 255 %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef, 256 <vscale x 2 x i1> %pg, 257 <vscale x 2 x i64> %b) 258 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1> %pg, 259 ptr %base, 260 <vscale x 2 x i64> %uxtw) 261 %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64> 262 ret <vscale x 2 x i64> %res 263} 264 265define <vscale x 2 x i64> @gld1sw_index_uxtw(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i64> %b) { 266; CHECK-LABEL: gld1sw_index_uxtw: 267; CHECK: // %bb.0: 268; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0, z0.d, uxtw #2] 269; CHECK-NEXT: ret 270 %uxtw = call <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64> undef, 271 <vscale x 2 x i1> %pg, 272 <vscale x 2 x i64> %b) 273 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1> %pg, 274 ptr %base, 275 <vscale x 2 x i64> %uxtw) 276 %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64> 277 ret <vscale x 2 x i64> %res 278} 279 280declare <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.index.nxv2i16(<vscale x 2 x i1>, ptr, <vscale x 2 x i64>) 281declare <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.index.nxv2i32(<vscale x 2 x i1>, ptr, <vscale x 2 x i64>) 282declare <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.index.nxv2i64(<vscale x 2 x i1>, ptr, <vscale x 2 x i64>) 283declare <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.index.nxv2f64(<vscale x 2 x i1>, ptr, <vscale x 2 x i64>) 284 285declare <vscale x 2 x i64> @llvm.aarch64.sve.sxtw.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>) 286declare <vscale x 2 x i64> @llvm.aarch64.sve.uxtw.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, <vscale x 2 x i64>) 287