1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s 3; RUN: llc -O0 -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s 4 5; 6; LD1B 7; 8 9define <vscale x 16 x i8> @ld1b_i8(<vscale x 16 x i1> %pred, ptr %addr) { 10; CHECK-LABEL: ld1b_i8: 11; CHECK: // %bb.0: 12; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] 13; CHECK-NEXT: ret 14 %res = call <vscale x 16 x i8> @llvm.aarch64.sve.ld1.nxv16i8(<vscale x 16 x i1> %pred, ptr %addr) 15 ret <vscale x 16 x i8> %res 16} 17 18define <vscale x 8 x i16> @ld1b_h(<vscale x 8 x i1> %pred, ptr %addr) { 19; CHECK-LABEL: ld1b_h: 20; CHECK: // %bb.0: 21; CHECK-NEXT: ld1b { z0.h }, p0/z, [x0] 22; CHECK-NEXT: ret 23 %load = call <vscale x 8 x i8> @llvm.aarch64.sve.ld1.nxv8i8(<vscale x 8 x i1> %pred, ptr %addr) 24 %res = zext <vscale x 8 x i8> %load to <vscale x 8 x i16> 25 ret <vscale x 8 x i16> %res 26} 27 28define <vscale x 8 x i16> @ld1sb_h(<vscale x 8 x i1> %pred, ptr %addr) { 29; CHECK-LABEL: ld1sb_h: 30; CHECK: // %bb.0: 31; CHECK-NEXT: ld1sb { z0.h }, p0/z, [x0] 32; CHECK-NEXT: ret 33 %load = call <vscale x 8 x i8> @llvm.aarch64.sve.ld1.nxv8i8(<vscale x 8 x i1> %pred, ptr %addr) 34 %res = sext <vscale x 8 x i8> %load to <vscale x 8 x i16> 35 ret <vscale x 8 x i16> %res 36} 37 38define <vscale x 4 x i32> @ld1b_s(<vscale x 4 x i1> %pred, ptr %addr) { 39; CHECK-LABEL: ld1b_s: 40; CHECK: // %bb.0: 41; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0] 42; CHECK-NEXT: ret 43 %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.nxv4i8(<vscale x 4 x i1> %pred, ptr %addr) 44 %res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32> 45 ret <vscale x 4 x i32> %res 46} 47 48define <vscale x 4 x i32> @ld1sb_s(<vscale x 4 x i1> %pred, ptr %addr) { 49; CHECK-LABEL: ld1sb_s: 50; CHECK: // %bb.0: 51; CHECK-NEXT: ld1sb { z0.s }, p0/z, [x0] 52; CHECK-NEXT: ret 53 %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.nxv4i8(<vscale x 4 x i1> %pred, ptr %addr) 54 %res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32> 55 ret <vscale x 4 x i32> %res 56} 57 58define <vscale x 2 x i64> @ld1b_d(<vscale x 2 x i1> %pred, ptr %addr) { 59; CHECK-LABEL: ld1b_d: 60; CHECK: // %bb.0: 61; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0] 62; CHECK-NEXT: ret 63 %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.nxv2i8(<vscale x 2 x i1> %pred, ptr %addr) 64 %res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64> 65 ret <vscale x 2 x i64> %res 66} 67 68define <vscale x 2 x i64> @ld1sb_d(<vscale x 2 x i1> %pred, ptr %addr) { 69; CHECK-LABEL: ld1sb_d: 70; CHECK: // %bb.0: 71; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x0] 72; CHECK-NEXT: ret 73 %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.nxv2i8(<vscale x 2 x i1> %pred, ptr %addr) 74 %res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64> 75 ret <vscale x 2 x i64> %res 76} 77 78; 79; LD1H 80; 81 82define <vscale x 8 x i16> @ld1h_i16(<vscale x 8 x i1> %pred, ptr %addr) { 83; CHECK-LABEL: ld1h_i16: 84; CHECK: // %bb.0: 85; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 86; CHECK-NEXT: ret 87 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.ld1.nxv8i16(<vscale x 8 x i1> %pred, ptr %addr) 88 ret <vscale x 8 x i16> %res 89} 90 91define <vscale x 8 x half> @ld1h_f16(<vscale x 8 x i1> %pred, ptr %addr) { 92; CHECK-LABEL: ld1h_f16: 93; CHECK: // %bb.0: 94; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 95; CHECK-NEXT: ret 96 %res = call <vscale x 8 x half> @llvm.aarch64.sve.ld1.nxv8f16(<vscale x 8 x i1> %pred, ptr %addr) 97 ret <vscale x 8 x half> %res 98} 99 100define <vscale x 8 x bfloat> @ld1h_bf16(<vscale x 8 x i1> %pred, ptr %addr) #0 { 101; CHECK-LABEL: ld1h_bf16: 102; CHECK: // %bb.0: 103; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 104; CHECK-NEXT: ret 105 %res = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ld1.nxv8bf16(<vscale x 8 x i1> %pred, ptr %addr) 106 ret <vscale x 8 x bfloat> %res 107} 108 109define <vscale x 4 x i32> @ld1h_s(<vscale x 4 x i1> %pred, ptr %addr) { 110; CHECK-LABEL: ld1h_s: 111; CHECK: // %bb.0: 112; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0] 113; CHECK-NEXT: ret 114 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.nxv4i16(<vscale x 4 x i1> %pred, ptr %addr) 115 %res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32> 116 ret <vscale x 4 x i32> %res 117} 118 119define <vscale x 4 x i32> @ld1sh_s(<vscale x 4 x i1> %pred, ptr %addr) { 120; CHECK-LABEL: ld1sh_s: 121; CHECK: // %bb.0: 122; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0] 123; CHECK-NEXT: ret 124 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.nxv4i16(<vscale x 4 x i1> %pred, ptr %addr) 125 %res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32> 126 ret <vscale x 4 x i32> %res 127} 128 129define <vscale x 2 x i64> @ld1h_d(<vscale x 2 x i1> %pred, ptr %addr) { 130; CHECK-LABEL: ld1h_d: 131; CHECK: // %bb.0: 132; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0] 133; CHECK-NEXT: ret 134 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.nxv2i16(<vscale x 2 x i1> %pred, ptr %addr) 135 %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64> 136 ret <vscale x 2 x i64> %res 137} 138 139define <vscale x 2 x i64> @ld1sh_d(<vscale x 2 x i1> %pred, ptr %addr) { 140; CHECK-LABEL: ld1sh_d: 141; CHECK: // %bb.0: 142; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0] 143; CHECK-NEXT: ret 144 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.nxv2i16(<vscale x 2 x i1> %pred, ptr %addr) 145 %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64> 146 ret <vscale x 2 x i64> %res 147} 148 149; 150; LD1W 151; 152 153define <vscale x 4 x i32> @ld1w_i32(<vscale x 4 x i1> %pred, ptr %addr) { 154; CHECK-LABEL: ld1w_i32: 155; CHECK: // %bb.0: 156; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 157; CHECK-NEXT: ret 158 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.nxv4i32(<vscale x 4 x i1> %pred, ptr %addr) 159 ret <vscale x 4 x i32> %res 160} 161 162define <vscale x 4 x float> @ld1w_f32(<vscale x 4 x i1> %pred, ptr %addr) { 163; CHECK-LABEL: ld1w_f32: 164; CHECK: // %bb.0: 165; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 166; CHECK-NEXT: ret 167 %res = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.nxv4f32(<vscale x 4 x i1> %pred, ptr %addr) 168 ret <vscale x 4 x float> %res 169} 170 171define <vscale x 2 x i64> @ld1w_d(<vscale x 2 x i1> %pred, ptr %addr) { 172; CHECK-LABEL: ld1w_d: 173; CHECK: // %bb.0: 174; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0] 175; CHECK-NEXT: ret 176 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.nxv2i32(<vscale x 2 x i1> %pred, ptr %addr) 177 %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64> 178 ret <vscale x 2 x i64> %res 179} 180 181define <vscale x 2 x i64> @ld1sw_d(<vscale x 2 x i1> %pred, ptr %addr) { 182; CHECK-LABEL: ld1sw_d: 183; CHECK: // %bb.0: 184; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0] 185; CHECK-NEXT: ret 186 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.nxv2i32(<vscale x 2 x i1> %pred, ptr %addr) 187 %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64> 188 ret <vscale x 2 x i64> %res 189} 190 191; 192; LD1D 193; 194 195define <vscale x 2 x i64> @ld1d_i64(<vscale x 2 x i1> %pred, ptr %addr) { 196; CHECK-LABEL: ld1d_i64: 197; CHECK: // %bb.0: 198; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 199; CHECK-NEXT: ret 200 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.nxv2i64(<vscale x 2 x i1> %pred, 201 ptr %addr) 202 ret <vscale x 2 x i64> %res 203} 204 205define <vscale x 2 x double> @ld1d_f64(<vscale x 2 x i1> %pred, ptr %addr) { 206; CHECK-LABEL: ld1d_f64: 207; CHECK: // %bb.0: 208; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 209; CHECK-NEXT: ret 210 %res = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.nxv2f64(<vscale x 2 x i1> %pred, 211 ptr %addr) 212 ret <vscale x 2 x double> %res 213} 214 215declare <vscale x 16 x i8> @llvm.aarch64.sve.ld1.nxv16i8(<vscale x 16 x i1>, ptr) 216 217declare <vscale x 8 x i8> @llvm.aarch64.sve.ld1.nxv8i8(<vscale x 8 x i1>, ptr) 218declare <vscale x 8 x i16> @llvm.aarch64.sve.ld1.nxv8i16(<vscale x 8 x i1>, ptr) 219declare <vscale x 8 x half> @llvm.aarch64.sve.ld1.nxv8f16(<vscale x 8 x i1>, ptr) 220declare <vscale x 8 x bfloat> @llvm.aarch64.sve.ld1.nxv8bf16(<vscale x 8 x i1>, ptr) 221 222declare <vscale x 4 x i8> @llvm.aarch64.sve.ld1.nxv4i8(<vscale x 4 x i1>, ptr) 223declare <vscale x 4 x i16> @llvm.aarch64.sve.ld1.nxv4i16(<vscale x 4 x i1>, ptr) 224declare <vscale x 4 x i32> @llvm.aarch64.sve.ld1.nxv4i32(<vscale x 4 x i1>, ptr) 225declare <vscale x 4 x float> @llvm.aarch64.sve.ld1.nxv4f32(<vscale x 4 x i1>, ptr) 226 227declare <vscale x 2 x i8> @llvm.aarch64.sve.ld1.nxv2i8(<vscale x 2 x i1>, ptr) 228declare <vscale x 2 x i16> @llvm.aarch64.sve.ld1.nxv2i16(<vscale x 2 x i1>, ptr) 229declare <vscale x 2 x i32> @llvm.aarch64.sve.ld1.nxv2i32(<vscale x 2 x i1>, ptr) 230declare <vscale x 2 x i64> @llvm.aarch64.sve.ld1.nxv2i64(<vscale x 2 x i1>, ptr) 231declare <vscale x 2 x double> @llvm.aarch64.sve.ld1.nxv2f64(<vscale x 2 x i1>, ptr) 232 233; +bf16 is required for the bfloat version. 234attributes #0 = { "target-features"="+sve,+bf16" } 235