1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s 3 4; 5; LDNT1B, LDNT1W, LDNT1H, LDNT1D: base + 32-bit unscaled offsets, zero (uxtw) 6; extended to 64 bits. 7; e.g. ldnt1h { z0.s }, p0/z, [z0.s, x0] 8; 9 10; LDNT1B 11define <vscale x 4 x i32> @gldnt1b_s_uxtw(<vscale x 4 x i1> %pg, ptr %base, <vscale x 4 x i32> %b) { 12; CHECK-LABEL: gldnt1b_s_uxtw: 13; CHECK: // %bb.0: 14; CHECK-NEXT: ldnt1b { z0.s }, p0/z, [z0.s, x0] 15; CHECK-NEXT: ret 16 %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i8(<vscale x 4 x i1> %pg, 17 ptr %base, 18 <vscale x 4 x i32> %b) 19 %res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32> 20 ret <vscale x 4 x i32> %res 21} 22 23; LDNT1H 24define <vscale x 4 x i32> @gldnt1h_s_uxtw(<vscale x 4 x i1> %pg, ptr %base, <vscale x 4 x i32> %b) { 25; CHECK-LABEL: gldnt1h_s_uxtw: 26; CHECK: // %bb.0: 27; CHECK-NEXT: ldnt1h { z0.s }, p0/z, [z0.s, x0] 28; CHECK-NEXT: ret 29 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i16(<vscale x 4 x i1> %pg, 30 ptr %base, 31 <vscale x 4 x i32> %b) 32 %res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32> 33 ret <vscale x 4 x i32> %res 34} 35 36; LDNT1W 37define <vscale x 4 x i32> @gldnt1w_s_uxtw(<vscale x 4 x i1> %pg, ptr %base, <vscale x 4 x i32> %b) { 38; CHECK-LABEL: gldnt1w_s_uxtw: 39; CHECK: // %bb.0: 40; CHECK-NEXT: ldnt1w { z0.s }, p0/z, [z0.s, x0] 41; CHECK-NEXT: ret 42 %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i32(<vscale x 4 x i1> %pg, 43 ptr %base, 44 <vscale x 4 x i32> %b) 45 ret <vscale x 4 x i32> %load 46} 47 48define <vscale x 4 x float> @gldnt1w_s_uxtw_float(<vscale x 4 x i1> %pg, ptr %base, <vscale x 4 x i32> %b) { 49; CHECK-LABEL: gldnt1w_s_uxtw_float: 50; CHECK: // %bb.0: 51; CHECK-NEXT: ldnt1w { z0.s }, p0/z, [z0.s, x0] 52; CHECK-NEXT: ret 53 %load = call <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4f32(<vscale x 4 x i1> %pg, 54 ptr %base, 55 <vscale x 4 x i32> %b) 56 ret <vscale x 4 x float> %load 57} 58 59; LDNT1SB, LDNT1SW, LDNT1SH: base + 32-bit unscaled offsets, zero (uxtw) 60; extended to 64 bits. 61; e.g. ldnt1sh { z0.s }, p0/z, [z0.s, x0] 62; 63 64; LDNT1SB 65define <vscale x 4 x i32> @gldnt1sb_s_uxtw(<vscale x 4 x i1> %pg, ptr %base, <vscale x 4 x i32> %b) { 66; CHECK-LABEL: gldnt1sb_s_uxtw: 67; CHECK: // %bb.0: 68; CHECK-NEXT: ldnt1sb { z0.s }, p0/z, [z0.s, x0] 69; CHECK-NEXT: ret 70 %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i8(<vscale x 4 x i1> %pg, 71 ptr %base, 72 <vscale x 4 x i32> %b) 73 %res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32> 74 ret <vscale x 4 x i32> %res 75} 76 77; LDNT1SH 78define <vscale x 4 x i32> @gldnt1sh_s_uxtw(<vscale x 4 x i1> %pg, ptr %base, <vscale x 4 x i32> %b) { 79; CHECK-LABEL: gldnt1sh_s_uxtw: 80; CHECK: // %bb.0: 81; CHECK-NEXT: ldnt1sh { z0.s }, p0/z, [z0.s, x0] 82; CHECK-NEXT: ret 83 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i16(<vscale x 4 x i1> %pg, 84 ptr %base, 85 <vscale x 4 x i32> %b) 86 %res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32> 87 ret <vscale x 4 x i32> %res 88} 89 90; LDNT1B/LDNT1SB 91declare <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i8(<vscale x 4 x i1>, ptr, <vscale x 4 x i32>) 92declare <vscale x 4 x i8> @llvm.aarch64.sve.ldnt1.gather.sxtw.nxv4i8(<vscale x 4 x i1>, ptr, <vscale x 4 x i32>) 93 94; LDNT1H/LDNT1SH 95declare <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.sxtw.nxv4i16(<vscale x 4 x i1>, ptr, <vscale x 4 x i32>) 96declare <vscale x 4 x i16> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i16(<vscale x 4 x i1>, ptr, <vscale x 4 x i32>) 97 98; LDNT1W/LDNT1SW 99declare <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.gather.sxtw.nxv4i32(<vscale x 4 x i1>, ptr, <vscale x 4 x i32>) 100declare <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4i32(<vscale x 4 x i1>, ptr, <vscale x 4 x i32>) 101 102declare <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.gather.sxtw.nxv4f32(<vscale x 4 x i1>, ptr, <vscale x 4 x i32>) 103declare <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.gather.uxtw.nxv4f32(<vscale x 4 x i1>, ptr, <vscale x 4 x i32>) 104