1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -verify-machineinstrs < %s | FileCheck %s 3 4; 5; LDFF1B 6; 7 8define <vscale x 16 x i8> @ldff1b(<vscale x 16 x i1> %pg, ptr %a) { 9; CHECK-LABEL: ldff1b: 10; CHECK: // %bb.0: 11; CHECK-NEXT: ldff1b { z0.b }, p0/z, [x0] 12; CHECK-NEXT: ret 13 %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ldff1.nxv16i8(<vscale x 16 x i1> %pg, ptr %a) 14 ret <vscale x 16 x i8> %load 15} 16 17define <vscale x 16 x i8> @ldff1b_reg(<vscale x 16 x i1> %pg, ptr %a, i64 %offset) { 18; CHECK-LABEL: ldff1b_reg: 19; CHECK: // %bb.0: 20; CHECK-NEXT: ldff1b { z0.b }, p0/z, [x0, x1] 21; CHECK-NEXT: ret 22 %base = getelementptr i8, ptr %a, i64 %offset 23 %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ldff1.nxv16i8(<vscale x 16 x i1> %pg, ptr %base) 24 ret <vscale x 16 x i8> %load 25} 26 27define <vscale x 8 x i16> @ldff1b_h(<vscale x 8 x i1> %pg, ptr %a) { 28; CHECK-LABEL: ldff1b_h: 29; CHECK: // %bb.0: 30; CHECK-NEXT: ldff1b { z0.h }, p0/z, [x0] 31; CHECK-NEXT: ret 32 %load = call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> %pg, ptr %a) 33 %res = zext <vscale x 8 x i8> %load to <vscale x 8 x i16> 34 ret <vscale x 8 x i16> %res 35} 36 37define <vscale x 8 x i16> @ldff1b_h_reg(<vscale x 8 x i1> %pg, ptr %a, i64 %offset) { 38; CHECK-LABEL: ldff1b_h_reg: 39; CHECK: // %bb.0: 40; CHECK-NEXT: ldff1b { z0.h }, p0/z, [x0, x1] 41; CHECK-NEXT: ret 42 %base = getelementptr i8, ptr %a, i64 %offset 43 %load = call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> %pg, ptr %base) 44 %res = zext <vscale x 8 x i8> %load to <vscale x 8 x i16> 45 ret <vscale x 8 x i16> %res 46} 47 48define <vscale x 4 x i32> @ldff1b_s(<vscale x 4 x i1> %pg, ptr %a) { 49; CHECK-LABEL: ldff1b_s: 50; CHECK: // %bb.0: 51; CHECK-NEXT: ldff1b { z0.s }, p0/z, [x0] 52; CHECK-NEXT: ret 53 %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> %pg, ptr %a) 54 %res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32> 55 ret <vscale x 4 x i32> %res 56} 57 58define <vscale x 4 x i32> @ldff1b_s_reg(<vscale x 4 x i1> %pg, ptr %a, i64 %offset) { 59; CHECK-LABEL: ldff1b_s_reg: 60; CHECK: // %bb.0: 61; CHECK-NEXT: ldff1b { z0.s }, p0/z, [x0, x1] 62; CHECK-NEXT: ret 63 %base = getelementptr i8, ptr %a, i64 %offset 64 %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> %pg, ptr %base) 65 %res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32> 66 ret <vscale x 4 x i32> %res 67} 68 69define <vscale x 2 x i64> @ldff1b_d(<vscale x 2 x i1> %pg, ptr %a) { 70; CHECK-LABEL: ldff1b_d: 71; CHECK: // %bb.0: 72; CHECK-NEXT: ldff1b { z0.d }, p0/z, [x0] 73; CHECK-NEXT: ret 74 %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> %pg, ptr %a) 75 %res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64> 76 ret <vscale x 2 x i64> %res 77} 78 79define <vscale x 2 x i64> @ldff1b_d_reg(<vscale x 2 x i1> %pg, ptr %a, i64 %offset) { 80; CHECK-LABEL: ldff1b_d_reg: 81; CHECK: // %bb.0: 82; CHECK-NEXT: ldff1b { z0.d }, p0/z, [x0, x1] 83; CHECK-NEXT: ret 84 %base = getelementptr i8, ptr %a, i64 %offset 85 %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> %pg, ptr %base) 86 %res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64> 87 ret <vscale x 2 x i64> %res 88} 89 90; 91; LDFF1SB 92; 93 94define <vscale x 8 x i16> @ldff1sb_h(<vscale x 8 x i1> %pg, ptr %a) { 95; CHECK-LABEL: ldff1sb_h: 96; CHECK: // %bb.0: 97; CHECK-NEXT: ldff1sb { z0.h }, p0/z, [x0] 98; CHECK-NEXT: ret 99 %load = call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> %pg, ptr %a) 100 %res = sext <vscale x 8 x i8> %load to <vscale x 8 x i16> 101 ret <vscale x 8 x i16> %res 102} 103 104define <vscale x 8 x i16> @ldff1sb_h_reg(<vscale x 8 x i1> %pg, ptr %a, i64 %offset) { 105; CHECK-LABEL: ldff1sb_h_reg: 106; CHECK: // %bb.0: 107; CHECK-NEXT: ldff1sb { z0.h }, p0/z, [x0, x1] 108; CHECK-NEXT: ret 109 %base = getelementptr i8, ptr %a, i64 %offset 110 %load = call <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1> %pg, ptr %base) 111 %res = sext <vscale x 8 x i8> %load to <vscale x 8 x i16> 112 ret <vscale x 8 x i16> %res 113} 114 115define <vscale x 4 x i32> @ldff1sb_s(<vscale x 4 x i1> %pg, ptr %a) { 116; CHECK-LABEL: ldff1sb_s: 117; CHECK: // %bb.0: 118; CHECK-NEXT: ldff1sb { z0.s }, p0/z, [x0] 119; CHECK-NEXT: ret 120 %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> %pg, ptr %a) 121 %res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32> 122 ret <vscale x 4 x i32> %res 123} 124 125define <vscale x 4 x i32> @ldff1sb_s_reg(<vscale x 4 x i1> %pg, ptr %a, i64 %offset) { 126; CHECK-LABEL: ldff1sb_s_reg: 127; CHECK: // %bb.0: 128; CHECK-NEXT: ldff1sb { z0.s }, p0/z, [x0, x1] 129; CHECK-NEXT: ret 130 %base = getelementptr i8, ptr %a, i64 %offset 131 %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1> %pg, ptr %base) 132 %res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32> 133 ret <vscale x 4 x i32> %res 134} 135 136define <vscale x 2 x i64> @ldff1sb_d(<vscale x 2 x i1> %pg, ptr %a) { 137; CHECK-LABEL: ldff1sb_d: 138; CHECK: // %bb.0: 139; CHECK-NEXT: ldff1sb { z0.d }, p0/z, [x0] 140; CHECK-NEXT: ret 141 %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> %pg, ptr %a) 142 %res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64> 143 ret <vscale x 2 x i64> %res 144} 145 146define <vscale x 2 x i64> @ldff1sb_d_reg(<vscale x 2 x i1> %pg, ptr %a, i64 %offset) { 147; CHECK-LABEL: ldff1sb_d_reg: 148; CHECK: // %bb.0: 149; CHECK-NEXT: ldff1sb { z0.d }, p0/z, [x0, x1] 150; CHECK-NEXT: ret 151 %base = getelementptr i8, ptr %a, i64 %offset 152 %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1> %pg, ptr %base) 153 %res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64> 154 ret <vscale x 2 x i64> %res 155} 156 157; 158; LDFF1H 159; 160 161define <vscale x 8 x i16> @ldff1h(<vscale x 8 x i1> %pg, ptr %a) { 162; CHECK-LABEL: ldff1h: 163; CHECK: // %bb.0: 164; CHECK-NEXT: ldff1h { z0.h }, p0/z, [x0] 165; CHECK-NEXT: ret 166 %load = call <vscale x 8 x i16> @llvm.aarch64.sve.ldff1.nxv8i16(<vscale x 8 x i1> %pg, ptr %a) 167 ret <vscale x 8 x i16> %load 168} 169 170define <vscale x 8 x i16> @ldff1h_reg(<vscale x 8 x i1> %pg, ptr %a, i64 %offset) { 171; CHECK-LABEL: ldff1h_reg: 172; CHECK: // %bb.0: 173; CHECK-NEXT: ldff1h { z0.h }, p0/z, [x0, x1, lsl #1] 174; CHECK-NEXT: ret 175 %base = getelementptr i16, ptr %a, i64 %offset 176 %load = call <vscale x 8 x i16> @llvm.aarch64.sve.ldff1.nxv8i16(<vscale x 8 x i1> %pg, ptr %base) 177 ret <vscale x 8 x i16> %load 178} 179 180define <vscale x 4 x i32> @ldff1h_s(<vscale x 4 x i1> %pg, ptr %a) { 181; CHECK-LABEL: ldff1h_s: 182; CHECK: // %bb.0: 183; CHECK-NEXT: ldff1h { z0.s }, p0/z, [x0] 184; CHECK-NEXT: ret 185 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> %pg, ptr %a) 186 %res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32> 187 ret <vscale x 4 x i32> %res 188} 189 190define <vscale x 4 x i32> @ldff1h_s_reg(<vscale x 4 x i1> %pg, ptr %a, i64 %offset) { 191; CHECK-LABEL: ldff1h_s_reg: 192; CHECK: // %bb.0: 193; CHECK-NEXT: ldff1h { z0.s }, p0/z, [x0, x1, lsl #1] 194; CHECK-NEXT: ret 195 %base = getelementptr i16, ptr %a, i64 %offset 196 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> %pg, ptr %base) 197 %res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32> 198 ret <vscale x 4 x i32> %res 199} 200 201define <vscale x 2 x i64> @ldff1h_d(<vscale x 2 x i1> %pg, ptr %a) { 202; CHECK-LABEL: ldff1h_d: 203; CHECK: // %bb.0: 204; CHECK-NEXT: ldff1h { z0.d }, p0/z, [x0] 205; CHECK-NEXT: ret 206 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> %pg, ptr %a) 207 %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64> 208 ret <vscale x 2 x i64> %res 209} 210 211define <vscale x 2 x i64> @ldff1h_d_reg(<vscale x 2 x i1> %pg, ptr %a, i64 %offset) { 212; CHECK-LABEL: ldff1h_d_reg: 213; CHECK: // %bb.0: 214; CHECK-NEXT: ldff1h { z0.d }, p0/z, [x0, x1, lsl #1] 215; CHECK-NEXT: ret 216 %base = getelementptr i16, ptr %a, i64 %offset 217 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> %pg, ptr %base) 218 %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64> 219 ret <vscale x 2 x i64> %res 220} 221 222define <vscale x 8 x half> @ldff1h_f16(<vscale x 8 x i1> %pg, ptr %a) { 223; CHECK-LABEL: ldff1h_f16: 224; CHECK: // %bb.0: 225; CHECK-NEXT: ldff1h { z0.h }, p0/z, [x0] 226; CHECK-NEXT: ret 227 %load = call <vscale x 8 x half> @llvm.aarch64.sve.ldff1.nxv8f16(<vscale x 8 x i1> %pg, ptr %a) 228 ret <vscale x 8 x half> %load 229} 230 231define <vscale x 8 x bfloat> @ldff1h_bf16(<vscale x 8 x i1> %pg, ptr %a) #0 { 232; CHECK-LABEL: ldff1h_bf16: 233; CHECK: // %bb.0: 234; CHECK-NEXT: ldff1h { z0.h }, p0/z, [x0] 235; CHECK-NEXT: ret 236 %load = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ldff1.nxv8bf16(<vscale x 8 x i1> %pg, ptr %a) 237 ret <vscale x 8 x bfloat> %load 238} 239 240define <vscale x 8 x half> @ldff1h_f16_reg(<vscale x 8 x i1> %pg, ptr %a, i64 %offset) { 241; CHECK-LABEL: ldff1h_f16_reg: 242; CHECK: // %bb.0: 243; CHECK-NEXT: ldff1h { z0.h }, p0/z, [x0, x1, lsl #1] 244; CHECK-NEXT: ret 245 %base = getelementptr half, ptr %a, i64 %offset 246 %load = call <vscale x 8 x half> @llvm.aarch64.sve.ldff1.nxv8f16(<vscale x 8 x i1> %pg, ptr %base) 247 ret <vscale x 8 x half> %load 248} 249 250define <vscale x 8 x bfloat> @ldff1h_bf16_reg(<vscale x 8 x i1> %pg, ptr %a, i64 %offset) #0 { 251; CHECK-LABEL: ldff1h_bf16_reg: 252; CHECK: // %bb.0: 253; CHECK-NEXT: ldff1h { z0.h }, p0/z, [x0, x1, lsl #1] 254; CHECK-NEXT: ret 255 %base = getelementptr bfloat, ptr %a, i64 %offset 256 %load = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ldff1.nxv8bf16(<vscale x 8 x i1> %pg, ptr %base) 257 ret <vscale x 8 x bfloat> %load 258} 259 260; 261; LDFF1SH 262; 263 264define <vscale x 4 x i32> @ldff1sh_s(<vscale x 4 x i1> %pg, ptr %a) { 265; CHECK-LABEL: ldff1sh_s: 266; CHECK: // %bb.0: 267; CHECK-NEXT: ldff1sh { z0.s }, p0/z, [x0] 268; CHECK-NEXT: ret 269 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> %pg, ptr %a) 270 %res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32> 271 ret <vscale x 4 x i32> %res 272} 273 274define <vscale x 4 x i32> @ldff1sh_s_reg(<vscale x 4 x i1> %pg, ptr %a, i64 %offset) { 275; CHECK-LABEL: ldff1sh_s_reg: 276; CHECK: // %bb.0: 277; CHECK-NEXT: ldff1sh { z0.s }, p0/z, [x0, x1, lsl #1] 278; CHECK-NEXT: ret 279 %base = getelementptr i16, ptr %a, i64 %offset 280 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1> %pg, ptr %base) 281 %res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32> 282 ret <vscale x 4 x i32> %res 283} 284 285define <vscale x 2 x i64> @ldff1sh_d(<vscale x 2 x i1> %pg, ptr %a) { 286; CHECK-LABEL: ldff1sh_d: 287; CHECK: // %bb.0: 288; CHECK-NEXT: ldff1sh { z0.d }, p0/z, [x0] 289; CHECK-NEXT: ret 290 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> %pg, ptr %a) 291 %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64> 292 ret <vscale x 2 x i64> %res 293} 294 295define <vscale x 2 x i64> @ldff1sh_d_reg(<vscale x 2 x i1> %pg, ptr %a, i64 %offset) { 296; CHECK-LABEL: ldff1sh_d_reg: 297; CHECK: // %bb.0: 298; CHECK-NEXT: ldff1sh { z0.d }, p0/z, [x0, x1, lsl #1] 299; CHECK-NEXT: ret 300 %base = getelementptr i16, ptr %a, i64 %offset 301 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1> %pg, ptr %base) 302 %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64> 303 ret <vscale x 2 x i64> %res 304} 305 306; 307; LDFF1W 308; 309 310define <vscale x 4 x i32> @ldff1w(<vscale x 4 x i1> %pg, ptr %a) { 311; CHECK-LABEL: ldff1w: 312; CHECK: // %bb.0: 313; CHECK-NEXT: ldff1w { z0.s }, p0/z, [x0] 314; CHECK-NEXT: ret 315 %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.nxv4i32(<vscale x 4 x i1> %pg, ptr %a) 316 ret <vscale x 4 x i32> %load 317} 318 319define <vscale x 4 x i32> @ldff1w_reg(<vscale x 4 x i1> %pg, ptr %a, i64 %offset) { 320; CHECK-LABEL: ldff1w_reg: 321; CHECK: // %bb.0: 322; CHECK-NEXT: ldff1w { z0.s }, p0/z, [x0, x1, lsl #2] 323; CHECK-NEXT: ret 324 %base = getelementptr i32, ptr %a, i64 %offset 325 %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.nxv4i32(<vscale x 4 x i1> %pg, ptr %base) 326 ret <vscale x 4 x i32> %load 327} 328 329define <vscale x 2 x i64> @ldff1w_d(<vscale x 2 x i1> %pg, ptr %a) { 330; CHECK-LABEL: ldff1w_d: 331; CHECK: // %bb.0: 332; CHECK-NEXT: ldff1w { z0.d }, p0/z, [x0] 333; CHECK-NEXT: ret 334 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> %pg, ptr %a) 335 %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64> 336 ret <vscale x 2 x i64> %res 337} 338 339define <vscale x 2 x i64> @ldff1w_d_reg(<vscale x 2 x i1> %pg, ptr %a, i64 %offset) { 340; CHECK-LABEL: ldff1w_d_reg: 341; CHECK: // %bb.0: 342; CHECK-NEXT: ldff1w { z0.d }, p0/z, [x0, x1, lsl #2] 343; CHECK-NEXT: ret 344 %base = getelementptr i32, ptr %a, i64 %offset 345 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> %pg, ptr %base) 346 %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64> 347 ret <vscale x 2 x i64> %res 348} 349 350define <vscale x 4 x float> @ldff1w_f32(<vscale x 4 x i1> %pg, ptr %a) { 351; CHECK-LABEL: ldff1w_f32: 352; CHECK: // %bb.0: 353; CHECK-NEXT: ldff1w { z0.s }, p0/z, [x0] 354; CHECK-NEXT: ret 355 %load = call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.nxv4f32(<vscale x 4 x i1> %pg, ptr %a) 356 ret <vscale x 4 x float> %load 357} 358 359define <vscale x 4 x float> @ldff1w_f32_reg(<vscale x 4 x i1> %pg, ptr %a, i64 %offset) { 360; CHECK-LABEL: ldff1w_f32_reg: 361; CHECK: // %bb.0: 362; CHECK-NEXT: ldff1w { z0.s }, p0/z, [x0, x1, lsl #2] 363; CHECK-NEXT: ret 364 %base = getelementptr float, ptr %a, i64 %offset 365 %load = call <vscale x 4 x float> @llvm.aarch64.sve.ldff1.nxv4f32(<vscale x 4 x i1> %pg, ptr %base) 366 ret <vscale x 4 x float> %load 367} 368 369define <vscale x 2 x float> @ldff1w_2f32(<vscale x 2 x i1> %pg, ptr %a) { 370; CHECK-LABEL: ldff1w_2f32: 371; CHECK: // %bb.0: 372; CHECK-NEXT: ldff1w { z0.d }, p0/z, [x0] 373; CHECK-NEXT: ret 374 %load = call <vscale x 2 x float> @llvm.aarch64.sve.ldff1.nxv2f32(<vscale x 2 x i1> %pg, ptr %a) 375 ret <vscale x 2 x float> %load 376} 377 378define <vscale x 2 x float> @ldff1w_2f32_reg(<vscale x 2 x i1> %pg, ptr %a, i64 %offset) { 379; CHECK-LABEL: ldff1w_2f32_reg: 380; CHECK: // %bb.0: 381; CHECK-NEXT: ldff1w { z0.d }, p0/z, [x0, x1, lsl #2] 382; CHECK-NEXT: ret 383 %base = getelementptr float, ptr %a, i64 %offset 384 %load = call <vscale x 2 x float> @llvm.aarch64.sve.ldff1.nxv2f32(<vscale x 2 x i1> %pg, ptr %base) 385 ret <vscale x 2 x float> %load 386} 387 388; 389; LDFF1SW 390; 391 392define <vscale x 2 x i64> @ldff1sw_d(<vscale x 2 x i1> %pg, ptr %a) { 393; CHECK-LABEL: ldff1sw_d: 394; CHECK: // %bb.0: 395; CHECK-NEXT: ldff1sw { z0.d }, p0/z, [x0] 396; CHECK-NEXT: ret 397 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> %pg, ptr %a) 398 %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64> 399 ret <vscale x 2 x i64> %res 400} 401 402define <vscale x 2 x i64> @ldff1sw_d_reg(<vscale x 2 x i1> %pg, ptr %a, i64 %offset) { 403; CHECK-LABEL: ldff1sw_d_reg: 404; CHECK: // %bb.0: 405; CHECK-NEXT: ldff1sw { z0.d }, p0/z, [x0, x1, lsl #2] 406; CHECK-NEXT: ret 407 %base = getelementptr i32, ptr %a, i64 %offset 408 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1> %pg, ptr %base) 409 %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64> 410 ret <vscale x 2 x i64> %res 411} 412 413; 414; LDFF1D 415; 416 417define <vscale x 2 x i64> @ldff1d(<vscale x 2 x i1> %pg, ptr %a) { 418; CHECK-LABEL: ldff1d: 419; CHECK: // %bb.0: 420; CHECK-NEXT: ldff1d { z0.d }, p0/z, [x0] 421; CHECK-NEXT: ret 422 %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.nxv2i64(<vscale x 2 x i1> %pg, ptr %a) 423 ret <vscale x 2 x i64> %load 424} 425 426define <vscale x 2 x i64> @ldff1d_reg(<vscale x 2 x i1> %pg, ptr %a, i64 %offset) { 427; CHECK-LABEL: ldff1d_reg: 428; CHECK: // %bb.0: 429; CHECK-NEXT: ldff1d { z0.d }, p0/z, [x0, x1, lsl #3] 430; CHECK-NEXT: ret 431 %base = getelementptr i64, ptr %a, i64 %offset 432 %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.nxv2i64(<vscale x 2 x i1> %pg, ptr %base) 433 ret <vscale x 2 x i64> %load 434} 435 436 437define <vscale x 2 x double> @ldff1d_f64(<vscale x 2 x i1> %pg, ptr %a) { 438; CHECK-LABEL: ldff1d_f64: 439; CHECK: // %bb.0: 440; CHECK-NEXT: ldff1d { z0.d }, p0/z, [x0] 441; CHECK-NEXT: ret 442 %load = call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.nxv2f64(<vscale x 2 x i1> %pg, ptr %a) 443 ret <vscale x 2 x double> %load 444} 445 446define <vscale x 2 x double> @ldff1d_f64_reg(<vscale x 2 x i1> %pg, ptr %a, i64 %offset) { 447; CHECK-LABEL: ldff1d_f64_reg: 448; CHECK: // %bb.0: 449; CHECK-NEXT: ldff1d { z0.d }, p0/z, [x0, x1, lsl #3] 450; CHECK-NEXT: ret 451 %base = getelementptr double, ptr %a, i64 %offset 452 %load = call <vscale x 2 x double> @llvm.aarch64.sve.ldff1.nxv2f64(<vscale x 2 x i1> %pg, ptr %base) 453 ret <vscale x 2 x double> %load 454} 455 456declare <vscale x 16 x i8> @llvm.aarch64.sve.ldff1.nxv16i8(<vscale x 16 x i1>, ptr) 457 458declare <vscale x 8 x i8> @llvm.aarch64.sve.ldff1.nxv8i8(<vscale x 8 x i1>, ptr) 459declare <vscale x 8 x i16> @llvm.aarch64.sve.ldff1.nxv8i16(<vscale x 8 x i1>, ptr) 460declare <vscale x 8 x half> @llvm.aarch64.sve.ldff1.nxv8f16(<vscale x 8 x i1>, ptr) 461declare <vscale x 8 x bfloat> @llvm.aarch64.sve.ldff1.nxv8bf16(<vscale x 8 x i1>, ptr) 462 463declare <vscale x 4 x i8> @llvm.aarch64.sve.ldff1.nxv4i8(<vscale x 4 x i1>, ptr) 464declare <vscale x 4 x i16> @llvm.aarch64.sve.ldff1.nxv4i16(<vscale x 4 x i1>, ptr) 465declare <vscale x 4 x i32> @llvm.aarch64.sve.ldff1.nxv4i32(<vscale x 4 x i1>, ptr) 466declare <vscale x 2 x float> @llvm.aarch64.sve.ldff1.nxv2f32(<vscale x 2 x i1>, ptr) 467declare <vscale x 4 x float> @llvm.aarch64.sve.ldff1.nxv4f32(<vscale x 4 x i1>, ptr) 468 469declare <vscale x 2 x i8> @llvm.aarch64.sve.ldff1.nxv2i8(<vscale x 2 x i1>, ptr) 470declare <vscale x 2 x i16> @llvm.aarch64.sve.ldff1.nxv2i16(<vscale x 2 x i1>, ptr) 471declare <vscale x 2 x i32> @llvm.aarch64.sve.ldff1.nxv2i32(<vscale x 2 x i1>, ptr) 472declare <vscale x 2 x i64> @llvm.aarch64.sve.ldff1.nxv2i64(<vscale x 2 x i1>, ptr) 473declare <vscale x 2 x double> @llvm.aarch64.sve.ldff1.nxv2f64(<vscale x 2 x i1>, ptr) 474 475; +bf16 is required for the bfloat version. 476attributes #0 = { "target-features"="+sve,+bf16" } 477