1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s 3; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s 4; RUN: llc -O0 -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s 5; RUN: llc -O0 -mtriple=aarch64-linux-gnu -mattr=+sme -force-streaming < %s | FileCheck %s 6 7; 8; ST1B 9; 10 11define void @st1b_i8(<vscale x 16 x i8> %data, <vscale x 16 x i1> %pred, ptr %addr) { 12; CHECK-LABEL: st1b_i8: 13; CHECK: // %bb.0: 14; CHECK-NEXT: st1b { z0.b }, p0, [x0] 15; CHECK-NEXT: ret 16 call void @llvm.aarch64.sve.st1.nxv16i8(<vscale x 16 x i8> %data, 17 <vscale x 16 x i1> %pred, 18 ptr %addr) 19 ret void 20} 21 22define void @st1b_h(<vscale x 8 x i16> %data, <vscale x 8 x i1> %pred, ptr %addr) { 23; CHECK-LABEL: st1b_h: 24; CHECK: // %bb.0: 25; CHECK-NEXT: st1b { z0.h }, p0, [x0] 26; CHECK-NEXT: ret 27 %trunc = trunc <vscale x 8 x i16> %data to <vscale x 8 x i8> 28 call void @llvm.aarch64.sve.st1.nxv8i8(<vscale x 8 x i8> %trunc, 29 <vscale x 8 x i1> %pred, 30 ptr %addr) 31 ret void 32} 33 34define void @st1b_s(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pred, ptr %addr) { 35; CHECK-LABEL: st1b_s: 36; CHECK: // %bb.0: 37; CHECK-NEXT: st1b { z0.s }, p0, [x0] 38; CHECK-NEXT: ret 39 %trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i8> 40 call void @llvm.aarch64.sve.st1.nxv4i8(<vscale x 4 x i8> %trunc, 41 <vscale x 4 x i1> %pred, 42 ptr %addr) 43 ret void 44} 45 46define void @st1b_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pred, ptr %addr) { 47; CHECK-LABEL: st1b_d: 48; CHECK: // %bb.0: 49; CHECK-NEXT: st1b { z0.d }, p0, [x0] 50; CHECK-NEXT: ret 51 %trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i8> 52 call void @llvm.aarch64.sve.st1.nxv2i8(<vscale x 2 x i8> %trunc, 53 <vscale x 2 x i1> %pred, 54 ptr %addr) 55 ret void 56} 57 58; 59; ST1H 60; 61 62define void @st1h_i16(<vscale x 8 x i16> %data, <vscale x 8 x i1> %pred, ptr %addr) { 63; CHECK-LABEL: st1h_i16: 64; CHECK: // %bb.0: 65; CHECK-NEXT: st1h { z0.h }, p0, [x0] 66; CHECK-NEXT: ret 67 call void @llvm.aarch64.sve.st1.nxv8i16(<vscale x 8 x i16> %data, 68 <vscale x 8 x i1> %pred, 69 ptr %addr) 70 ret void 71} 72 73define void @st1h_f16(<vscale x 8 x half> %data, <vscale x 8 x i1> %pred, ptr %addr) { 74; CHECK-LABEL: st1h_f16: 75; CHECK: // %bb.0: 76; CHECK-NEXT: st1h { z0.h }, p0, [x0] 77; CHECK-NEXT: ret 78 call void @llvm.aarch64.sve.st1.nxv8f16(<vscale x 8 x half> %data, 79 <vscale x 8 x i1> %pred, 80 ptr %addr) 81 ret void 82} 83 84define void @st1h_bf16(<vscale x 8 x bfloat> %data, <vscale x 8 x i1> %pred, ptr %addr) #0 { 85; CHECK-LABEL: st1h_bf16: 86; CHECK: // %bb.0: 87; CHECK-NEXT: st1h { z0.h }, p0, [x0] 88; CHECK-NEXT: ret 89 call void @llvm.aarch64.sve.st1.nxv8bf16(<vscale x 8 x bfloat> %data, 90 <vscale x 8 x i1> %pred, 91 ptr %addr) 92 ret void 93} 94 95define void @st1h_s(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pred, ptr %addr) { 96; CHECK-LABEL: st1h_s: 97; CHECK: // %bb.0: 98; CHECK-NEXT: st1h { z0.s }, p0, [x0] 99; CHECK-NEXT: ret 100 %trunc = trunc <vscale x 4 x i32> %data to <vscale x 4 x i16> 101 call void @llvm.aarch64.sve.st1.nxv4i16(<vscale x 4 x i16> %trunc, 102 <vscale x 4 x i1> %pred, 103 ptr %addr) 104 ret void 105} 106 107define void @st1h_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pred, ptr %addr) { 108; CHECK-LABEL: st1h_d: 109; CHECK: // %bb.0: 110; CHECK-NEXT: st1h { z0.d }, p0, [x0] 111; CHECK-NEXT: ret 112 %trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i16> 113 call void @llvm.aarch64.sve.st1.nxv2i16(<vscale x 2 x i16> %trunc, 114 <vscale x 2 x i1> %pred, 115 ptr %addr) 116 ret void 117} 118 119; 120; ST1W 121; 122 123define void @st1w_i32(<vscale x 4 x i32> %data, <vscale x 4 x i1> %pred, ptr %addr) { 124; CHECK-LABEL: st1w_i32: 125; CHECK: // %bb.0: 126; CHECK-NEXT: st1w { z0.s }, p0, [x0] 127; CHECK-NEXT: ret 128 call void @llvm.aarch64.sve.st1.nxv4i32(<vscale x 4 x i32> %data, 129 <vscale x 4 x i1> %pred, 130 ptr %addr) 131 ret void 132} 133 134define void @st1w_f32(<vscale x 4 x float> %data, <vscale x 4 x i1> %pred, ptr %addr) { 135; CHECK-LABEL: st1w_f32: 136; CHECK: // %bb.0: 137; CHECK-NEXT: st1w { z0.s }, p0, [x0] 138; CHECK-NEXT: ret 139 call void @llvm.aarch64.sve.st1.nxv4f32(<vscale x 4 x float> %data, 140 <vscale x 4 x i1> %pred, 141 ptr %addr) 142 ret void 143} 144 145define void @st1w_d(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pred, ptr %addr) { 146; CHECK-LABEL: st1w_d: 147; CHECK: // %bb.0: 148; CHECK-NEXT: st1w { z0.d }, p0, [x0] 149; CHECK-NEXT: ret 150 %trunc = trunc <vscale x 2 x i64> %data to <vscale x 2 x i32> 151 call void @llvm.aarch64.sve.st1.nxv2i32(<vscale x 2 x i32> %trunc, 152 <vscale x 2 x i1> %pred, 153 ptr %addr) 154 ret void 155} 156 157; 158; ST1D 159; 160 161define void @st1d_i64(<vscale x 2 x i64> %data, <vscale x 2 x i1> %pred, ptr %addr) { 162; CHECK-LABEL: st1d_i64: 163; CHECK: // %bb.0: 164; CHECK-NEXT: st1d { z0.d }, p0, [x0] 165; CHECK-NEXT: ret 166 call void @llvm.aarch64.sve.st1.nxv2i64(<vscale x 2 x i64> %data, 167 <vscale x 2 x i1> %pred, 168 ptr %addr) 169 ret void 170} 171 172define void @st1d_f64(<vscale x 2 x double> %data, <vscale x 2 x i1> %pred, ptr %addr) { 173; CHECK-LABEL: st1d_f64: 174; CHECK: // %bb.0: 175; CHECK-NEXT: st1d { z0.d }, p0, [x0] 176; CHECK-NEXT: ret 177 call void @llvm.aarch64.sve.st1.nxv2f64(<vscale x 2 x double> %data, 178 <vscale x 2 x i1> %pred, 179 ptr %addr) 180 ret void 181} 182 183declare void @llvm.aarch64.sve.st1.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, ptr) 184 185declare void @llvm.aarch64.sve.st1.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i1>, ptr) 186declare void @llvm.aarch64.sve.st1.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, ptr) 187declare void @llvm.aarch64.sve.st1.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, ptr) 188declare void @llvm.aarch64.sve.st1.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, ptr) 189 190declare void @llvm.aarch64.sve.st1.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i1>, ptr) 191declare void @llvm.aarch64.sve.st1.nxv4i16(<vscale x 4 x i16>, <vscale x 4 x i1>, ptr) 192declare void @llvm.aarch64.sve.st1.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, ptr) 193declare void @llvm.aarch64.sve.st1.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, ptr) 194 195declare void @llvm.aarch64.sve.st1.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i1>, ptr) 196declare void @llvm.aarch64.sve.st1.nxv2i16(<vscale x 2 x i16>, <vscale x 2 x i1>, ptr) 197declare void @llvm.aarch64.sve.st1.nxv2i32(<vscale x 2 x i32>, <vscale x 2 x i1>, ptr) 198declare void @llvm.aarch64.sve.st1.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, ptr) 199declare void @llvm.aarch64.sve.st1.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, ptr) 200 201; +bf16 is required for the bfloat version. 202attributes #0 = { "target-features"="+bf16" } 203