1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -verify-machineinstrs < %s | FileCheck %s 3 4; Range testing for the immediate in the reg+imm(mulvl) addressing 5; mode is done only for one instruction. The rest of the instrucions 6; test only one immediate value in bound. 7 8define <vscale x 16 x i8> @ldnf1b(<vscale x 16 x i1> %pg, ptr %a) { 9; CHECK-LABEL: ldnf1b: 10; CHECK: // %bb.0: 11; CHECK-NEXT: ldnf1b { z0.b }, p0/z, [x0] 12; CHECK-NEXT: ret 13 %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> %pg, ptr %a) 14 ret <vscale x 16 x i8> %load 15} 16 17define <vscale x 16 x i8> @ldnf1b_out_of_lower_bound(<vscale x 16 x i1> %pg, ptr %a) { 18; CHECK-LABEL: ldnf1b_out_of_lower_bound: 19; CHECK: // %bb.0: 20; CHECK-NEXT: rdvl x8, #-9 21; CHECK-NEXT: add x8, x0, x8 22; CHECK-NEXT: ldnf1b { z0.b }, p0/z, [x8] 23; CHECK-NEXT: ret 24 %base = getelementptr <vscale x 16 x i8>, ptr %a, i64 -9 25 %base_scalar = bitcast ptr %base to ptr 26 %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> %pg, ptr %base_scalar) 27 ret <vscale x 16 x i8> %load 28} 29 30define <vscale x 16 x i8> @ldnf1b_lower_bound(<vscale x 16 x i1> %pg, ptr %a) { 31; CHECK-LABEL: ldnf1b_lower_bound: 32; CHECK: // %bb.0: 33; CHECK-NEXT: ldnf1b { z0.b }, p0/z, [x0, #-8, mul vl] 34; CHECK-NEXT: ret 35 %base = getelementptr <vscale x 16 x i8>, ptr %a, i64 -8 36 %base_scalar = bitcast ptr %base to ptr 37 %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> %pg, ptr %base_scalar) 38 ret <vscale x 16 x i8> %load 39} 40 41define <vscale x 16 x i8> @ldnf1b_inbound(<vscale x 16 x i1> %pg, ptr %a) { 42; CHECK-LABEL: ldnf1b_inbound: 43; CHECK: // %bb.0: 44; CHECK-NEXT: ldnf1b { z0.b }, p0/z, [x0, #1, mul vl] 45; CHECK-NEXT: ret 46 %base = getelementptr <vscale x 16 x i8>, ptr %a, i64 1 47 %base_scalar = bitcast ptr %base to ptr 48 %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> %pg, ptr %base_scalar) 49 ret <vscale x 16 x i8> %load 50} 51 52define <vscale x 16 x i8> @ldnf1b_upper_bound(<vscale x 16 x i1> %pg, ptr %a) { 53; CHECK-LABEL: ldnf1b_upper_bound: 54; CHECK: // %bb.0: 55; CHECK-NEXT: ldnf1b { z0.b }, p0/z, [x0, #7, mul vl] 56; CHECK-NEXT: ret 57 %base = getelementptr <vscale x 16 x i8>, ptr %a, i64 7 58 %base_scalar = bitcast ptr %base to ptr 59 %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> %pg, ptr %base_scalar) 60 ret <vscale x 16 x i8> %load 61} 62 63define <vscale x 16 x i8> @ldnf1b_out_of_upper_bound(<vscale x 16 x i1> %pg, ptr %a) { 64; CHECK-LABEL: ldnf1b_out_of_upper_bound: 65; CHECK: // %bb.0: 66; CHECK-NEXT: rdvl x8, #8 67; CHECK-NEXT: add x8, x0, x8 68; CHECK-NEXT: ldnf1b { z0.b }, p0/z, [x8] 69; CHECK-NEXT: ret 70 %base = getelementptr <vscale x 16 x i8>, ptr %a, i64 8 71 %base_scalar = bitcast ptr %base to ptr 72 %load = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1> %pg, ptr %base_scalar) 73 ret <vscale x 16 x i8> %load 74} 75 76define <vscale x 8 x i16> @ldnf1b_h(<vscale x 8 x i1> %pg, ptr %a) { 77; CHECK-LABEL: ldnf1b_h: 78; CHECK: // %bb.0: 79; CHECK-NEXT: ldnf1b { z0.h }, p0/z, [x0] 80; CHECK-NEXT: ret 81 %load = call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> %pg, ptr %a) 82 %res = zext <vscale x 8 x i8> %load to <vscale x 8 x i16> 83 ret <vscale x 8 x i16> %res 84} 85 86define <vscale x 8 x i16> @ldnf1b_h_inbound(<vscale x 8 x i1> %pg, ptr %a) { 87; CHECK-LABEL: ldnf1b_h_inbound: 88; CHECK: // %bb.0: 89; CHECK-NEXT: ldnf1b { z0.h }, p0/z, [x0, #7, mul vl] 90; CHECK-NEXT: ret 91 %base = getelementptr <vscale x 8 x i8>, ptr %a, i64 7 92 %base_scalar = bitcast ptr %base to ptr 93 %load = call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> %pg, ptr %base_scalar) 94 %res = zext <vscale x 8 x i8> %load to <vscale x 8 x i16> 95 ret <vscale x 8 x i16> %res 96} 97 98define <vscale x 8 x i16> @ldnf1sb_h(<vscale x 8 x i1> %pg, ptr %a) { 99; CHECK-LABEL: ldnf1sb_h: 100; CHECK: // %bb.0: 101; CHECK-NEXT: ldnf1sb { z0.h }, p0/z, [x0] 102; CHECK-NEXT: ret 103 %load = call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> %pg, ptr %a) 104 %res = sext <vscale x 8 x i8> %load to <vscale x 8 x i16> 105 ret <vscale x 8 x i16> %res 106} 107 108define <vscale x 8 x i16> @ldnf1sb_h_inbound(<vscale x 8 x i1> %pg, ptr %a) { 109; CHECK-LABEL: ldnf1sb_h_inbound: 110; CHECK: // %bb.0: 111; CHECK-NEXT: ldnf1sb { z0.h }, p0/z, [x0, #7, mul vl] 112; CHECK-NEXT: ret 113 %base = getelementptr <vscale x 8 x i8>, ptr %a, i64 7 114 %base_scalar = bitcast ptr %base to ptr 115 %load = call <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1> %pg, ptr %base_scalar) 116 %res = sext <vscale x 8 x i8> %load to <vscale x 8 x i16> 117 ret <vscale x 8 x i16> %res 118} 119 120define <vscale x 8 x i16> @ldnf1h(<vscale x 8 x i1> %pg, ptr %a) { 121; CHECK-LABEL: ldnf1h: 122; CHECK: // %bb.0: 123; CHECK-NEXT: ldnf1h { z0.h }, p0/z, [x0] 124; CHECK-NEXT: ret 125 %load = call <vscale x 8 x i16> @llvm.aarch64.sve.ldnf1.nxv8i16(<vscale x 8 x i1> %pg, ptr %a) 126 ret <vscale x 8 x i16> %load 127} 128 129define <vscale x 8 x i16> @ldnf1h_inbound(<vscale x 8 x i1> %pg, ptr %a) { 130; CHECK-LABEL: ldnf1h_inbound: 131; CHECK: // %bb.0: 132; CHECK-NEXT: ldnf1h { z0.h }, p0/z, [x0, #1, mul vl] 133; CHECK-NEXT: ret 134 %base = getelementptr <vscale x 8 x i16>, ptr %a, i64 1 135 %base_scalar = bitcast ptr %base to ptr 136 %load = call <vscale x 8 x i16> @llvm.aarch64.sve.ldnf1.nxv8i16(<vscale x 8 x i1> %pg, ptr %base_scalar) 137 ret <vscale x 8 x i16> %load 138} 139 140define <vscale x 8 x half> @ldnf1h_f16(<vscale x 8 x i1> %pg, ptr %a) { 141; CHECK-LABEL: ldnf1h_f16: 142; CHECK: // %bb.0: 143; CHECK-NEXT: ldnf1h { z0.h }, p0/z, [x0] 144; CHECK-NEXT: ret 145 %load = call <vscale x 8 x half> @llvm.aarch64.sve.ldnf1.nxv8f16(<vscale x 8 x i1> %pg, ptr %a) 146 ret <vscale x 8 x half> %load 147} 148 149define <vscale x 8 x bfloat> @ldnf1h_bf16(<vscale x 8 x i1> %pg, ptr %a) #0 { 150; CHECK-LABEL: ldnf1h_bf16: 151; CHECK: // %bb.0: 152; CHECK-NEXT: ldnf1h { z0.h }, p0/z, [x0] 153; CHECK-NEXT: ret 154 %load = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ldnf1.nxv8bf16(<vscale x 8 x i1> %pg, ptr %a) 155 ret <vscale x 8 x bfloat> %load 156} 157 158define <vscale x 8 x half> @ldnf1h_f16_inbound(<vscale x 8 x i1> %pg, ptr %a) { 159; CHECK-LABEL: ldnf1h_f16_inbound: 160; CHECK: // %bb.0: 161; CHECK-NEXT: ldnf1h { z0.h }, p0/z, [x0, #1, mul vl] 162; CHECK-NEXT: ret 163 %base = getelementptr <vscale x 8 x half>, ptr %a, i64 1 164 %base_scalar = bitcast ptr %base to ptr 165 %load = call <vscale x 8 x half> @llvm.aarch64.sve.ldnf1.nxv8f16(<vscale x 8 x i1> %pg, ptr %base_scalar) 166 ret <vscale x 8 x half> %load 167} 168 169define <vscale x 8 x bfloat> @ldnf1h_bf16_inbound(<vscale x 8 x i1> %pg, ptr %a) #0 { 170; CHECK-LABEL: ldnf1h_bf16_inbound: 171; CHECK: // %bb.0: 172; CHECK-NEXT: ldnf1h { z0.h }, p0/z, [x0, #1, mul vl] 173; CHECK-NEXT: ret 174 %base = getelementptr <vscale x 8 x bfloat>, ptr %a, i64 1 175 %base_scalar = bitcast ptr %base to ptr 176 %load = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ldnf1.nxv8bf16(<vscale x 8 x i1> %pg, ptr %base_scalar) 177 ret <vscale x 8 x bfloat> %load 178} 179 180define <vscale x 4 x i32> @ldnf1b_s(<vscale x 4 x i1> %pg, ptr %a) { 181; CHECK-LABEL: ldnf1b_s: 182; CHECK: // %bb.0: 183; CHECK-NEXT: ldnf1b { z0.s }, p0/z, [x0] 184; CHECK-NEXT: ret 185 %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> %pg, ptr %a) 186 %res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32> 187 ret <vscale x 4 x i32> %res 188} 189 190define <vscale x 4 x i32> @ldnf1b_s_inbound(<vscale x 4 x i1> %pg, ptr %a) { 191; CHECK-LABEL: ldnf1b_s_inbound: 192; CHECK: // %bb.0: 193; CHECK-NEXT: ldnf1b { z0.s }, p0/z, [x0, #7, mul vl] 194; CHECK-NEXT: ret 195 %base = getelementptr <vscale x 4 x i8>, ptr %a, i64 7 196 %base_scalar = bitcast ptr %base to ptr 197 %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> %pg, ptr %base_scalar) 198 %res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32> 199 ret <vscale x 4 x i32> %res 200} 201 202define <vscale x 4 x i32> @ldnf1sb_s(<vscale x 4 x i1> %pg, ptr %a) { 203; CHECK-LABEL: ldnf1sb_s: 204; CHECK: // %bb.0: 205; CHECK-NEXT: ldnf1sb { z0.s }, p0/z, [x0] 206; CHECK-NEXT: ret 207 %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> %pg, ptr %a) 208 %res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32> 209 ret <vscale x 4 x i32> %res 210} 211 212define <vscale x 4 x i32> @ldnf1sb_s_inbound(<vscale x 4 x i1> %pg, ptr %a) { 213; CHECK-LABEL: ldnf1sb_s_inbound: 214; CHECK: // %bb.0: 215; CHECK-NEXT: ldnf1sb { z0.s }, p0/z, [x0, #7, mul vl] 216; CHECK-NEXT: ret 217 %base = getelementptr <vscale x 4 x i8>, ptr %a, i64 7 218 %base_scalar = bitcast ptr %base to ptr 219 %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1> %pg, ptr %base_scalar) 220 %res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32> 221 ret <vscale x 4 x i32> %res 222} 223 224define <vscale x 4 x i32> @ldnf1h_s(<vscale x 4 x i1> %pg, ptr %a) { 225; CHECK-LABEL: ldnf1h_s: 226; CHECK: // %bb.0: 227; CHECK-NEXT: ldnf1h { z0.s }, p0/z, [x0] 228; CHECK-NEXT: ret 229 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> %pg, ptr %a) 230 %res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32> 231 ret <vscale x 4 x i32> %res 232} 233 234define <vscale x 4 x i32> @ldnf1h_s_inbound(<vscale x 4 x i1> %pg, ptr %a) { 235; CHECK-LABEL: ldnf1h_s_inbound: 236; CHECK: // %bb.0: 237; CHECK-NEXT: ldnf1h { z0.s }, p0/z, [x0, #7, mul vl] 238; CHECK-NEXT: ret 239 %base = getelementptr <vscale x 4 x i16>, ptr %a, i64 7 240 %base_scalar = bitcast ptr %base to ptr 241 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> %pg, ptr %base_scalar) 242 %res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32> 243 ret <vscale x 4 x i32> %res 244} 245 246define <vscale x 4 x i32> @ldnf1sh_s(<vscale x 4 x i1> %pg, ptr %a) { 247; CHECK-LABEL: ldnf1sh_s: 248; CHECK: // %bb.0: 249; CHECK-NEXT: ldnf1sh { z0.s }, p0/z, [x0] 250; CHECK-NEXT: ret 251 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> %pg, ptr %a) 252 %res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32> 253 ret <vscale x 4 x i32> %res 254} 255 256define <vscale x 4 x i32> @ldnf1sh_s_inbound(<vscale x 4 x i1> %pg, ptr %a) { 257; CHECK-LABEL: ldnf1sh_s_inbound: 258; CHECK: // %bb.0: 259; CHECK-NEXT: ldnf1sh { z0.s }, p0/z, [x0, #7, mul vl] 260; CHECK-NEXT: ret 261 %base = getelementptr <vscale x 4 x i16>, ptr %a, i64 7 262 %base_scalar = bitcast ptr %base to ptr 263 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1> %pg, ptr %base_scalar) 264 %res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32> 265 ret <vscale x 4 x i32> %res 266} 267 268define <vscale x 4 x i32> @ldnf1w(<vscale x 4 x i1> %pg, ptr %a) { 269; CHECK-LABEL: ldnf1w: 270; CHECK: // %bb.0: 271; CHECK-NEXT: ldnf1w { z0.s }, p0/z, [x0] 272; CHECK-NEXT: ret 273 %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ldnf1.nxv4i32(<vscale x 4 x i1> %pg, ptr %a) 274 ret <vscale x 4 x i32> %load 275} 276 277define <vscale x 4 x i32> @ldnf1w_inbound(<vscale x 4 x i1> %pg, ptr %a) { 278; CHECK-LABEL: ldnf1w_inbound: 279; CHECK: // %bb.0: 280; CHECK-NEXT: ldnf1w { z0.s }, p0/z, [x0, #7, mul vl] 281; CHECK-NEXT: ret 282 %base = getelementptr <vscale x 4 x i32>, ptr %a, i64 7 283 %base_scalar = bitcast ptr %base to ptr 284 %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ldnf1.nxv4i32(<vscale x 4 x i1> %pg, ptr %base_scalar) 285 ret <vscale x 4 x i32> %load 286} 287 288define <vscale x 4 x float> @ldnf1w_f32(<vscale x 4 x i1> %pg, ptr %a) { 289; CHECK-LABEL: ldnf1w_f32: 290; CHECK: // %bb.0: 291; CHECK-NEXT: ldnf1w { z0.s }, p0/z, [x0] 292; CHECK-NEXT: ret 293 %load = call <vscale x 4 x float> @llvm.aarch64.sve.ldnf1.nxv4f32(<vscale x 4 x i1> %pg, ptr %a) 294 ret <vscale x 4 x float> %load 295} 296 297define <vscale x 4 x float> @ldnf1w_f32_inbound(<vscale x 4 x i1> %pg, ptr %a) { 298; CHECK-LABEL: ldnf1w_f32_inbound: 299; CHECK: // %bb.0: 300; CHECK-NEXT: ldnf1w { z0.s }, p0/z, [x0, #7, mul vl] 301; CHECK-NEXT: ret 302 %base = getelementptr <vscale x 4 x float>, ptr %a, i64 7 303 %base_scalar = bitcast ptr %base to ptr 304 %load = call <vscale x 4 x float> @llvm.aarch64.sve.ldnf1.nxv4f32(<vscale x 4 x i1> %pg, ptr %base_scalar) 305 ret <vscale x 4 x float> %load 306} 307 308define <vscale x 2 x i64> @ldnf1b_d(<vscale x 2 x i1> %pg, ptr %a) { 309; CHECK-LABEL: ldnf1b_d: 310; CHECK: // %bb.0: 311; CHECK-NEXT: ldnf1b { z0.d }, p0/z, [x0] 312; CHECK-NEXT: ret 313 %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> %pg, ptr %a) 314 %res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64> 315 ret <vscale x 2 x i64> %res 316} 317 318define <vscale x 2 x i64> @ldnf1b_d_inbound(<vscale x 2 x i1> %pg, ptr %a) { 319; CHECK-LABEL: ldnf1b_d_inbound: 320; CHECK: // %bb.0: 321; CHECK-NEXT: ldnf1b { z0.d }, p0/z, [x0, #7, mul vl] 322; CHECK-NEXT: ret 323 %base = getelementptr <vscale x 2 x i8>, ptr %a, i64 7 324 %base_scalar = bitcast ptr %base to ptr 325 %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> %pg, ptr %base_scalar) 326 %res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64> 327 ret <vscale x 2 x i64> %res 328} 329 330define <vscale x 2 x i64> @ldnf1sb_d(<vscale x 2 x i1> %pg, ptr %a) { 331; CHECK-LABEL: ldnf1sb_d: 332; CHECK: // %bb.0: 333; CHECK-NEXT: ldnf1sb { z0.d }, p0/z, [x0] 334; CHECK-NEXT: ret 335 %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> %pg, ptr %a) 336 %res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64> 337 ret <vscale x 2 x i64> %res 338} 339 340define <vscale x 2 x i64> @ldnf1sb_d_inbound(<vscale x 2 x i1> %pg, ptr %a) { 341; CHECK-LABEL: ldnf1sb_d_inbound: 342; CHECK: // %bb.0: 343; CHECK-NEXT: ldnf1sb { z0.d }, p0/z, [x0, #7, mul vl] 344; CHECK-NEXT: ret 345 %base = getelementptr <vscale x 2 x i8>, ptr %a, i64 7 346 %base_scalar = bitcast ptr %base to ptr 347 %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1> %pg, ptr %base_scalar) 348 %res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64> 349 ret <vscale x 2 x i64> %res 350} 351 352define <vscale x 2 x i64> @ldnf1h_d(<vscale x 2 x i1> %pg, ptr %a) { 353; CHECK-LABEL: ldnf1h_d: 354; CHECK: // %bb.0: 355; CHECK-NEXT: ldnf1h { z0.d }, p0/z, [x0] 356; CHECK-NEXT: ret 357 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> %pg, ptr %a) 358 %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64> 359 ret <vscale x 2 x i64> %res 360} 361 362define <vscale x 2 x i64> @ldnf1h_d_inbound(<vscale x 2 x i1> %pg, ptr %a) { 363; CHECK-LABEL: ldnf1h_d_inbound: 364; CHECK: // %bb.0: 365; CHECK-NEXT: ldnf1h { z0.d }, p0/z, [x0, #7, mul vl] 366; CHECK-NEXT: ret 367 %base = getelementptr <vscale x 2 x i16>, ptr %a, i64 7 368 %base_scalar = bitcast ptr %base to ptr 369 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> %pg, ptr %base_scalar) 370 %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64> 371 ret <vscale x 2 x i64> %res 372} 373 374define <vscale x 2 x i64> @ldnf1sh_d(<vscale x 2 x i1> %pg, ptr %a) { 375; CHECK-LABEL: ldnf1sh_d: 376; CHECK: // %bb.0: 377; CHECK-NEXT: ldnf1sh { z0.d }, p0/z, [x0] 378; CHECK-NEXT: ret 379 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> %pg, ptr %a) 380 %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64> 381 ret <vscale x 2 x i64> %res 382} 383 384define <vscale x 2 x i64> @ldnf1sh_d_inbound(<vscale x 2 x i1> %pg, ptr %a) { 385; CHECK-LABEL: ldnf1sh_d_inbound: 386; CHECK: // %bb.0: 387; CHECK-NEXT: ldnf1sh { z0.d }, p0/z, [x0, #7, mul vl] 388; CHECK-NEXT: ret 389 %base = getelementptr <vscale x 2 x i16>, ptr %a, i64 7 390 %base_scalar = bitcast ptr %base to ptr 391 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1> %pg, ptr %base_scalar) 392 %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64> 393 ret <vscale x 2 x i64> %res 394} 395 396define <vscale x 2 x i64> @ldnf1w_d(<vscale x 2 x i1> %pg, ptr %a) { 397; CHECK-LABEL: ldnf1w_d: 398; CHECK: // %bb.0: 399; CHECK-NEXT: ldnf1w { z0.d }, p0/z, [x0] 400; CHECK-NEXT: ret 401 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> %pg, ptr %a) 402 %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64> 403 ret <vscale x 2 x i64> %res 404} 405 406define <vscale x 2 x i64> @ldnf1w_d_inbound(<vscale x 2 x i1> %pg, ptr %a) { 407; CHECK-LABEL: ldnf1w_d_inbound: 408; CHECK: // %bb.0: 409; CHECK-NEXT: ldnf1w { z0.d }, p0/z, [x0, #7, mul vl] 410; CHECK-NEXT: ret 411 %base = getelementptr <vscale x 2 x i32>, ptr %a, i64 7 412 %base_scalar = bitcast ptr %base to ptr 413 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> %pg, ptr %base_scalar) 414 %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64> 415 ret <vscale x 2 x i64> %res 416} 417 418define <vscale x 2 x i64> @ldnf1sw_d(<vscale x 2 x i1> %pg, ptr %a) { 419; CHECK-LABEL: ldnf1sw_d: 420; CHECK: // %bb.0: 421; CHECK-NEXT: ldnf1sw { z0.d }, p0/z, [x0] 422; CHECK-NEXT: ret 423 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> %pg, ptr %a) 424 %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64> 425 ret <vscale x 2 x i64> %res 426} 427 428define <vscale x 2 x i64> @ldnf1sw_d_inbound(<vscale x 2 x i1> %pg, ptr %a) { 429; CHECK-LABEL: ldnf1sw_d_inbound: 430; CHECK: // %bb.0: 431; CHECK-NEXT: ldnf1sw { z0.d }, p0/z, [x0, #7, mul vl] 432; CHECK-NEXT: ret 433 %base = getelementptr <vscale x 2 x i32>, ptr %a, i64 7 434 %base_scalar = bitcast ptr %base to ptr 435 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1> %pg, ptr %base_scalar) 436 %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64> 437 ret <vscale x 2 x i64> %res 438} 439 440define <vscale x 2 x i64> @ldnf1d(<vscale x 2 x i1> %pg, ptr %a) { 441; CHECK-LABEL: ldnf1d: 442; CHECK: // %bb.0: 443; CHECK-NEXT: ldnf1d { z0.d }, p0/z, [x0] 444; CHECK-NEXT: ret 445 %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnf1.nxv2i64(<vscale x 2 x i1> %pg, ptr %a) 446 ret <vscale x 2 x i64> %load 447} 448 449define <vscale x 2 x i64> @ldnf1d_inbound(<vscale x 2 x i1> %pg, ptr %a) { 450; CHECK-LABEL: ldnf1d_inbound: 451; CHECK: // %bb.0: 452; CHECK-NEXT: ldnf1d { z0.d }, p0/z, [x0, #1, mul vl] 453; CHECK-NEXT: ret 454 %base = getelementptr <vscale x 2 x i64>, ptr %a, i64 1 455 %base_scalar = bitcast ptr %base to ptr 456 %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnf1.nxv2i64(<vscale x 2 x i1> %pg, ptr %base_scalar) 457 ret <vscale x 2 x i64> %load 458} 459 460define <vscale x 2 x double> @ldnf1d_f64(<vscale x 2 x i1> %pg, ptr %a) { 461; CHECK-LABEL: ldnf1d_f64: 462; CHECK: // %bb.0: 463; CHECK-NEXT: ldnf1d { z0.d }, p0/z, [x0] 464; CHECK-NEXT: ret 465 %load = call <vscale x 2 x double> @llvm.aarch64.sve.ldnf1.nxv2f64(<vscale x 2 x i1> %pg, ptr %a) 466 ret <vscale x 2 x double> %load 467} 468 469define <vscale x 2 x double> @ldnf1d_f64_inbound(<vscale x 2 x i1> %pg, ptr %a) { 470; CHECK-LABEL: ldnf1d_f64_inbound: 471; CHECK: // %bb.0: 472; CHECK-NEXT: ldnf1d { z0.d }, p0/z, [x0, #1, mul vl] 473; CHECK-NEXT: ret 474 %base = getelementptr <vscale x 2 x double>, ptr %a, i64 1 475 %base_scalar = bitcast ptr %base to ptr 476 %load = call <vscale x 2 x double> @llvm.aarch64.sve.ldnf1.nxv2f64(<vscale x 2 x i1> %pg, ptr %base_scalar) 477 ret <vscale x 2 x double> %load 478} 479 480declare <vscale x 16 x i8> @llvm.aarch64.sve.ldnf1.nxv16i8(<vscale x 16 x i1>, ptr) 481 482declare <vscale x 8 x i8> @llvm.aarch64.sve.ldnf1.nxv8i8(<vscale x 8 x i1>, ptr) 483declare <vscale x 8 x i16> @llvm.aarch64.sve.ldnf1.nxv8i16(<vscale x 8 x i1>, ptr) 484declare <vscale x 8 x half> @llvm.aarch64.sve.ldnf1.nxv8f16(<vscale x 8 x i1>, ptr) 485declare <vscale x 8 x bfloat> @llvm.aarch64.sve.ldnf1.nxv8bf16(<vscale x 8 x i1>, ptr) 486 487declare <vscale x 4 x i8> @llvm.aarch64.sve.ldnf1.nxv4i8(<vscale x 4 x i1>, ptr) 488declare <vscale x 4 x i16> @llvm.aarch64.sve.ldnf1.nxv4i16(<vscale x 4 x i1>, ptr) 489declare <vscale x 4 x i32> @llvm.aarch64.sve.ldnf1.nxv4i32(<vscale x 4 x i1>, ptr) 490declare <vscale x 4 x float> @llvm.aarch64.sve.ldnf1.nxv4f32(<vscale x 4 x i1>, ptr) 491 492declare <vscale x 2 x i8> @llvm.aarch64.sve.ldnf1.nxv2i8(<vscale x 2 x i1>, ptr) 493declare <vscale x 2 x i16> @llvm.aarch64.sve.ldnf1.nxv2i16(<vscale x 2 x i1>, ptr) 494declare <vscale x 2 x i32> @llvm.aarch64.sve.ldnf1.nxv2i32(<vscale x 2 x i1>, ptr) 495declare <vscale x 2 x i64> @llvm.aarch64.sve.ldnf1.nxv2i64(<vscale x 2 x i1>, ptr) 496declare <vscale x 2 x double> @llvm.aarch64.sve.ldnf1.nxv2f64(<vscale x 2 x i1>, ptr) 497 498; +bf16 is required for the bfloat version. 499attributes #0 = { "target-features"="+sve,+bf16" } 500