1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s 3 4; 5; LD1B, LD1W, LD1H, LD1D: vector base + scalar offset (index) 6; e.g. ld1b { z0.d }, p0/z, [x0, z0.d] 7; 8 9; LD1B 10define <vscale x 4 x i32> @gld1b_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) { 11; CHECK-LABEL: gld1b_s_scalar_offset: 12; CHECK: // %bb.0: 13; CHECK-NEXT: ld1b { z0.s }, p0/z, [x0, z0.s, uxtw] 14; CHECK-NEXT: ret 15 %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg, 16 <vscale x 4 x i32> %base, 17 i64 %offset) 18 %res = zext <vscale x 4 x i8> %load to <vscale x 4 x i32> 19 ret <vscale x 4 x i32> %res 20} 21 22define <vscale x 2 x i64> @gld1b_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) { 23; CHECK-LABEL: gld1b_d_scalar_offset: 24; CHECK: // %bb.0: 25; CHECK-NEXT: ld1b { z0.d }, p0/z, [x0, z0.d] 26; CHECK-NEXT: ret 27 %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg, 28 <vscale x 2 x i64> %base, 29 i64 %offset) 30 %res = zext <vscale x 2 x i8> %load to <vscale x 2 x i64> 31 ret <vscale x 2 x i64> %res 32} 33 34; LD1H 35define <vscale x 4 x i32> @gld1h_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) { 36; CHECK-LABEL: gld1h_s_scalar_offset: 37; CHECK: // %bb.0: 38; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, z0.s, uxtw] 39; CHECK-NEXT: ret 40 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg, 41 <vscale x 4 x i32> %base, 42 i64 %offset) 43 %res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32> 44 ret <vscale x 4 x i32> %res 45} 46 47define <vscale x 2 x i64> @gld1h_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) { 48; CHECK-LABEL: gld1h_d_scalar_offset: 49; CHECK: // %bb.0: 50; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d] 51; CHECK-NEXT: ret 52 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg, 53 <vscale x 2 x i64> %base, 54 i64 %offset) 55 %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64> 56 ret <vscale x 2 x i64> %res 57} 58 59; LD1W 60define <vscale x 4 x i32> @gld1w_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) { 61; CHECK-LABEL: gld1w_s_scalar_offset: 62; CHECK: // %bb.0: 63; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, z0.s, uxtw] 64; CHECK-NEXT: ret 65 %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1> %pg, 66 <vscale x 4 x i32> %base, 67 i64 %offset) 68 ret <vscale x 4 x i32> %load 69} 70 71define <vscale x 2 x i64> @gld1w_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) { 72; CHECK-LABEL: gld1w_d_scalar_offset: 73; CHECK: // %bb.0: 74; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d] 75; CHECK-NEXT: ret 76 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg, 77 <vscale x 2 x i64> %base, 78 i64 %offset) 79 %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64> 80 ret <vscale x 2 x i64> %res 81} 82 83define <vscale x 4 x float> @gld1w_s_scalar_offset_float(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) { 84; CHECK-LABEL: gld1w_s_scalar_offset_float: 85; CHECK: // %bb.0: 86; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, z0.s, uxtw] 87; CHECK-NEXT: ret 88 %load = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1> %pg, 89 <vscale x 4 x i32> %base, 90 i64 %offset) 91 ret <vscale x 4 x float> %load 92} 93 94; LD1D 95define <vscale x 2 x i64> @gld1d_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) { 96; CHECK-LABEL: gld1d_d_scalar_offset: 97; CHECK: // %bb.0: 98; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, z0.d] 99; CHECK-NEXT: ret 100 %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1> %pg, 101 <vscale x 2 x i64> %base, 102 i64 %offset) 103 ret <vscale x 2 x i64> %load 104} 105 106define <vscale x 2 x double> @gld1d_d_scalar_offset_double(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) { 107; CHECK-LABEL: gld1d_d_scalar_offset_double: 108; CHECK: // %bb.0: 109; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, z0.d] 110; CHECK-NEXT: ret 111 %load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1> %pg, 112 <vscale x 2 x i64> %base, 113 i64 %offset) 114 ret <vscale x 2 x double> %load 115} 116 117; LD1SB, LD1SW, LD1SH: vector base + scalar offset (index) 118; e.g. ld1b { z0.d }, p0/z, [x0, z0.d] 119; 120 121; LD1SB 122define <vscale x 4 x i32> @gld1sb_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) { 123; CHECK-LABEL: gld1sb_s_scalar_offset: 124; CHECK: // %bb.0: 125; CHECK-NEXT: ld1sb { z0.s }, p0/z, [x0, z0.s, uxtw] 126; CHECK-NEXT: ret 127 %load = call <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1> %pg, 128 <vscale x 4 x i32> %base, 129 i64 %offset) 130 %res = sext <vscale x 4 x i8> %load to <vscale x 4 x i32> 131 ret <vscale x 4 x i32> %res 132} 133 134define <vscale x 2 x i64> @gld1sb_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) { 135; CHECK-LABEL: gld1sb_d_scalar_offset: 136; CHECK: // %bb.0: 137; CHECK-NEXT: ld1sb { z0.d }, p0/z, [x0, z0.d] 138; CHECK-NEXT: ret 139 %load = call <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1> %pg, 140 <vscale x 2 x i64> %base, 141 i64 %offset) 142 %res = sext <vscale x 2 x i8> %load to <vscale x 2 x i64> 143 ret <vscale x 2 x i64> %res 144} 145 146; LD1SH 147define <vscale x 4 x i32> @gld1sh_s_scalar_offset(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %base, i64 %offset) { 148; CHECK-LABEL: gld1sh_s_scalar_offset: 149; CHECK: // %bb.0: 150; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0, z0.s, uxtw] 151; CHECK-NEXT: ret 152 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1> %pg, 153 <vscale x 4 x i32> %base, 154 i64 %offset) 155 %res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32> 156 ret <vscale x 4 x i32> %res 157} 158 159define <vscale x 2 x i64> @gld1sh_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) { 160; CHECK-LABEL: gld1sh_d_scalar_offset: 161; CHECK: // %bb.0: 162; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0, z0.d] 163; CHECK-NEXT: ret 164 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1> %pg, 165 <vscale x 2 x i64> %base, 166 i64 %offset) 167 %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64> 168 ret <vscale x 2 x i64> %res 169} 170 171; LD1SW 172define <vscale x 2 x i64> @gld1sw_d_scalar_offset(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %base, i64 %offset) { 173; CHECK-LABEL: gld1sw_d_scalar_offset: 174; CHECK: // %bb.0: 175; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0, z0.d] 176; CHECK-NEXT: ret 177 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1> %pg, 178 <vscale x 2 x i64> %base, 179 i64 %offset) 180 %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64> 181 ret <vscale x 2 x i64> %res 182} 183 184; LD1B/LD1SB 185declare <vscale x 4 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i8.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64) 186declare <vscale x 2 x i8> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i8.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64) 187 188; LD1H/LD1SH 189declare <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i16.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64) 190declare <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i16.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64) 191 192; LD1W/LD1SW 193declare <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4i32.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64) 194declare <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i32.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64) 195 196declare <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv4f32.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, i64) 197 198; LD1D 199declare <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64) 200 201declare <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 2 x i1>, <vscale x 2 x i64>, i64) 202