1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s 3 4; Range checks: for all the instruction tested in this file, the 5; immediate must be within the range [-8, 7] (4-bit immediate). Out of 6; range values are tested only in one case (following). Valid values 7; are tested all through the rest of the file. 8 9define void @imm_out_of_range(ptr %base, <vscale x 2 x i1> %mask) nounwind { 10; CHECK-LABEL: imm_out_of_range: 11; CHECK: // %bb.0: 12; CHECK-NEXT: rdvl x8, #8 13; CHECK-NEXT: add x8, x0, x8 14; CHECK-NEXT: ldnt1d { z0.d }, p0/z, [x8] 15; CHECK-NEXT: rdvl x8, #-9 16; CHECK-NEXT: add x8, x0, x8 17; CHECK-NEXT: stnt1d { z0.d }, p0, [x8] 18; CHECK-NEXT: ret 19 %base_load = getelementptr <vscale x 2 x i64>, ptr %base, i64 8 20 %base_load_bc = bitcast ptr %base_load to ptr 21 %data = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.nxv2i64(<vscale x 2 x i1> %mask, 22 ptr %base_load_bc) 23 %base_store = getelementptr <vscale x 2 x i64>, ptr %base, i64 -9 24 %base_store_bc = bitcast ptr %base_store to ptr 25 call void @llvm.aarch64.sve.stnt1.nxv2i64(<vscale x 2 x i64> %data, 26 <vscale x 2 x i1> %mask, 27 ptr %base_store_bc) 28 ret void 29} 30 31; 2-lane non-temporal load/stores 32 33 34define void @test_masked_ldst_sv2i64(ptr %base, <vscale x 2 x i1> %mask) nounwind { 35; CHECK-LABEL: test_masked_ldst_sv2i64: 36; CHECK: // %bb.0: 37; CHECK-NEXT: ldnt1d { z0.d }, p0/z, [x0, #-8, mul vl] 38; CHECK-NEXT: stnt1d { z0.d }, p0, [x0, #-7, mul vl] 39; CHECK-NEXT: ret 40 %base_load = getelementptr <vscale x 2 x i64>, ptr %base, i64 -8 41 %base_load_bc = bitcast ptr %base_load to ptr 42 %data = call <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.nxv2i64(<vscale x 2 x i1> %mask, 43 ptr %base_load_bc) 44 %base_store = getelementptr <vscale x 2 x i64>, ptr %base, i64 -7 45 %base_store_bc = bitcast ptr %base_store to ptr 46 call void @llvm.aarch64.sve.stnt1.nxv2i64(<vscale x 2 x i64> %data, 47 <vscale x 2 x i1> %mask, 48 ptr %base_store_bc) 49 ret void 50} 51 52define void @test_masked_ldst_sv2f64(ptr %base, <vscale x 2 x i1> %mask) nounwind { 53; CHECK-LABEL: test_masked_ldst_sv2f64: 54; CHECK: // %bb.0: 55; CHECK-NEXT: ldnt1d { z0.d }, p0/z, [x0, #-6, mul vl] 56; CHECK-NEXT: stnt1d { z0.d }, p0, [x0, #-5, mul vl] 57; CHECK-NEXT: ret 58 %base_load = getelementptr <vscale x 2 x double>, ptr %base, i64 -6 59 %base_load_bc = bitcast ptr %base_load to ptr 60 %data = call <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.nxv2f64(<vscale x 2 x i1> %mask, 61 ptr %base_load_bc) 62 %base_store = getelementptr <vscale x 2 x double>, ptr %base, i64 -5 63 %base_store_bc = bitcast ptr %base_store to ptr 64 call void @llvm.aarch64.sve.stnt1.nxv2f64(<vscale x 2 x double> %data, 65 <vscale x 2 x i1> %mask, 66 ptr %base_store_bc) 67 ret void 68} 69 70; 4-lane non-temporal load/stores. 71 72define void @test_masked_ldst_sv4i32(ptr %base, <vscale x 4 x i1> %mask) nounwind { 73; CHECK-LABEL: test_masked_ldst_sv4i32: 74; CHECK: // %bb.0: 75; CHECK-NEXT: ldnt1w { z0.s }, p0/z, [x0, #6, mul vl] 76; CHECK-NEXT: stnt1w { z0.s }, p0, [x0, #7, mul vl] 77; CHECK-NEXT: ret 78 %base_load = getelementptr <vscale x 4 x i32>, ptr %base, i64 6 79 %base_load_bc = bitcast ptr %base_load to ptr 80 %data = call <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.nxv4i32(<vscale x 4 x i1> %mask, 81 ptr %base_load_bc) 82 %base_store = getelementptr <vscale x 4 x i32>, ptr %base, i64 7 83 %base_store_bc = bitcast ptr %base_store to ptr 84 call void @llvm.aarch64.sve.stnt1.nxv4i32(<vscale x 4 x i32> %data, 85 <vscale x 4 x i1> %mask, 86 ptr %base_store_bc) 87 ret void 88} 89 90define void @test_masked_ldst_sv4f32(ptr %base, <vscale x 4 x i1> %mask) nounwind { 91; CHECK-LABEL: test_masked_ldst_sv4f32: 92; CHECK: // %bb.0: 93; CHECK-NEXT: ldnt1w { z0.s }, p0/z, [x0, #-1, mul vl] 94; CHECK-NEXT: stnt1w { z0.s }, p0, [x0, #2, mul vl] 95; CHECK-NEXT: ret 96 %base_load = getelementptr <vscale x 4 x float>, ptr %base, i64 -1 97 %base_load_bc = bitcast ptr %base_load to ptr 98 %data = call <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.nxv4f32(<vscale x 4 x i1> %mask, 99 ptr %base_load_bc) 100 %base_store = getelementptr <vscale x 4 x float>, ptr %base, i64 2 101 %base_store_bc = bitcast ptr %base_store to ptr 102 call void @llvm.aarch64.sve.stnt1.nxv4f32(<vscale x 4 x float> %data, 103 <vscale x 4 x i1> %mask, 104 ptr %base_store_bc) 105 ret void 106} 107 108 109; 8-lane non-temporal load/stores. 110 111define void @test_masked_ldst_sv8i16(ptr %base, <vscale x 8 x i1> %mask) nounwind { 112; CHECK-LABEL: test_masked_ldst_sv8i16: 113; CHECK: // %bb.0: 114; CHECK-NEXT: ldnt1h { z0.h }, p0/z, [x0, #6, mul vl] 115; CHECK-NEXT: stnt1h { z0.h }, p0, [x0, #7, mul vl] 116; CHECK-NEXT: ret 117 %base_load = getelementptr <vscale x 8 x i16>, ptr %base, i64 6 118 %base_load_bc = bitcast ptr %base_load to ptr 119 %data = call <vscale x 8 x i16> @llvm.aarch64.sve.ldnt1.nxv8i16(<vscale x 8 x i1> %mask, 120 ptr %base_load_bc) 121 %base_store = getelementptr <vscale x 8 x i16>, ptr %base, i64 7 122 %base_store_bc = bitcast ptr %base_store to ptr 123 call void @llvm.aarch64.sve.stnt1.nxv8i16(<vscale x 8 x i16> %data, 124 <vscale x 8 x i1> %mask, 125 ptr %base_store_bc) 126 ret void 127} 128 129define void @test_masked_ldst_sv8f16(ptr %base, <vscale x 8 x i1> %mask) nounwind { 130; CHECK-LABEL: test_masked_ldst_sv8f16: 131; CHECK: // %bb.0: 132; CHECK-NEXT: ldnt1h { z0.h }, p0/z, [x0, #-1, mul vl] 133; CHECK-NEXT: stnt1h { z0.h }, p0, [x0, #2, mul vl] 134; CHECK-NEXT: ret 135 %base_load = getelementptr <vscale x 8 x half>, ptr %base, i64 -1 136 %base_load_bc = bitcast ptr %base_load to ptr 137 %data = call <vscale x 8 x half> @llvm.aarch64.sve.ldnt1.nxv8f16(<vscale x 8 x i1> %mask, 138 ptr %base_load_bc) 139 %base_store = getelementptr <vscale x 8 x half>, ptr %base, i64 2 140 %base_store_bc = bitcast ptr %base_store to ptr 141 call void @llvm.aarch64.sve.stnt1.nxv8f16(<vscale x 8 x half> %data, 142 <vscale x 8 x i1> %mask, 143 ptr %base_store_bc) 144 ret void 145} 146 147define void @test_masked_ldst_sv8bf16(ptr %base, <vscale x 8 x i1> %mask) nounwind #0 { 148; CHECK-LABEL: test_masked_ldst_sv8bf16: 149; CHECK: // %bb.0: 150; CHECK-NEXT: ldnt1h { z0.h }, p0/z, [x0, #-1, mul vl] 151; CHECK-NEXT: stnt1h { z0.h }, p0, [x0, #2, mul vl] 152; CHECK-NEXT: ret 153 %base_load = getelementptr <vscale x 8 x bfloat>, ptr %base, i64 -1 154 %base_load_bc = bitcast ptr %base_load to ptr 155 %data = call <vscale x 8 x bfloat> @llvm.aarch64.sve.ldnt1.nxv8bf16(<vscale x 8 x i1> %mask, 156 ptr %base_load_bc) 157 %base_store = getelementptr <vscale x 8 x bfloat>, ptr %base, i64 2 158 %base_store_bc = bitcast ptr %base_store to ptr 159 call void @llvm.aarch64.sve.stnt1.nxv8bf16(<vscale x 8 x bfloat> %data, 160 <vscale x 8 x i1> %mask, 161 ptr %base_store_bc) 162 ret void 163} 164 165; 16-lane non-temporal load/stores. 166 167define void @test_masked_ldst_sv16i8(ptr %base, <vscale x 16 x i1> %mask) nounwind { 168; CHECK-LABEL: test_masked_ldst_sv16i8: 169; CHECK: // %bb.0: 170; CHECK-NEXT: ldnt1b { z0.b }, p0/z, [x0, #6, mul vl] 171; CHECK-NEXT: stnt1b { z0.b }, p0, [x0, #7, mul vl] 172; CHECK-NEXT: ret 173 %base_load = getelementptr <vscale x 16 x i8>, ptr %base, i64 6 174 %base_load_bc = bitcast ptr %base_load to ptr 175 %data = call <vscale x 16 x i8> @llvm.aarch64.sve.ldnt1.nxv16i8(<vscale x 16 x i1> %mask, 176 ptr %base_load_bc) 177 %base_store = getelementptr <vscale x 16 x i8>, ptr %base, i64 7 178 %base_store_bc = bitcast ptr %base_store to ptr 179 call void @llvm.aarch64.sve.stnt1.nxv16i8(<vscale x 16 x i8> %data, 180 <vscale x 16 x i1> %mask, 181 ptr %base_store_bc) 182 ret void 183} 184 185; 2-element non-temporal loads. 186declare <vscale x 2 x i64> @llvm.aarch64.sve.ldnt1.nxv2i64(<vscale x 2 x i1>, ptr) 187declare <vscale x 2 x double> @llvm.aarch64.sve.ldnt1.nxv2f64(<vscale x 2 x i1>, ptr) 188 189; 4-element non-temporal loads. 190declare <vscale x 4 x i32> @llvm.aarch64.sve.ldnt1.nxv4i32(<vscale x 4 x i1>, ptr) 191declare <vscale x 4 x float> @llvm.aarch64.sve.ldnt1.nxv4f32(<vscale x 4 x i1>, ptr) 192 193; 8-element non-temporal loads. 194declare <vscale x 8 x i16> @llvm.aarch64.sve.ldnt1.nxv8i16(<vscale x 8 x i1>, ptr) 195declare <vscale x 8 x half> @llvm.aarch64.sve.ldnt1.nxv8f16(<vscale x 8 x i1>, ptr) 196declare <vscale x 8 x bfloat> @llvm.aarch64.sve.ldnt1.nxv8bf16(<vscale x 8 x i1>, ptr) 197 198; 16-element non-temporal loads. 199declare <vscale x 16 x i8> @llvm.aarch64.sve.ldnt1.nxv16i8(<vscale x 16 x i1>, ptr) 200 201; 2-element non-temporal stores. 202declare void @llvm.aarch64.sve.stnt1.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, ptr) 203declare void @llvm.aarch64.sve.stnt1.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, ptr) 204 205; 4-element non-temporal stores. 206declare void @llvm.aarch64.sve.stnt1.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, ptr) 207declare void @llvm.aarch64.sve.stnt1.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, ptr) 208 209; 8-element non-temporal stores. 210declare void @llvm.aarch64.sve.stnt1.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, ptr) 211declare void @llvm.aarch64.sve.stnt1.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, ptr) 212declare void @llvm.aarch64.sve.stnt1.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x i1>, ptr) 213 214; 16-element non-temporal stores. 215declare void @llvm.aarch64.sve.stnt1.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, ptr) 216 217; +bf16 is required for the bfloat version. 218attributes #0 = { "target-features"="+sve,+bf16" } 219