1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s 3 4; 5; LD1H, LD1W, LD1D: base + 32-bit scaled offset, sign (sxtw) or zero (uxtw) 6; extended to 64 bits 7; e.g. ld1h z0.d, p0/z, [x0, z0.d, uxtw #1] 8; 9 10; LD1H 11define <vscale x 4 x i32> @gld1h_s_uxtw_index(<vscale x 4 x i1> %pg, ptr %base, <vscale x 4 x i32> %b) { 12; CHECK-LABEL: gld1h_s_uxtw_index: 13; CHECK: // %bb.0: 14; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, z0.s, uxtw #1] 15; CHECK-NEXT: ret 16 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> %pg, 17 ptr %base, 18 <vscale x 4 x i32> %b) 19 %res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32> 20 ret <vscale x 4 x i32> %res 21} 22 23define <vscale x 4 x i32> @gld1h_s_sxtw_index(<vscale x 4 x i1> %pg, ptr %base, <vscale x 4 x i32> %b) { 24; CHECK-LABEL: gld1h_s_sxtw_index: 25; CHECK: // %bb.0: 26; CHECK-NEXT: ld1h { z0.s }, p0/z, [x0, z0.s, sxtw #1] 27; CHECK-NEXT: ret 28 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> %pg, 29 ptr %base, 30 <vscale x 4 x i32> %b) 31 %res = zext <vscale x 4 x i16> %load to <vscale x 4 x i32> 32 ret <vscale x 4 x i32> %res 33} 34 35define <vscale x 2 x i64> @gld1h_d_uxtw_index(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i32> %b) { 36; CHECK-LABEL: gld1h_d_uxtw_index: 37; CHECK: // %bb.0: 38; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d, uxtw #1] 39; CHECK-NEXT: ret 40 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i16(<vscale x 2 x i1> %pg, 41 ptr %base, 42 <vscale x 2 x i32> %b) 43 %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64> 44 ret <vscale x 2 x i64> %res 45} 46 47define <vscale x 2 x i64> @gld1h_d_sxtw_index(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i32> %b) { 48; CHECK-LABEL: gld1h_d_sxtw_index: 49; CHECK: // %bb.0: 50; CHECK-NEXT: ld1h { z0.d }, p0/z, [x0, z0.d, sxtw #1] 51; CHECK-NEXT: ret 52 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i16(<vscale x 2 x i1> %pg, 53 ptr %base, 54 <vscale x 2 x i32> %b) 55 %res = zext <vscale x 2 x i16> %load to <vscale x 2 x i64> 56 ret <vscale x 2 x i64> %res 57} 58 59; LD1W 60define <vscale x 4 x i32> @gld1w_s_uxtw_index(<vscale x 4 x i1> %pg, ptr %base, <vscale x 4 x i32> %b) { 61; CHECK-LABEL: gld1w_s_uxtw_index: 62; CHECK: // %bb.0: 63; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, z0.s, uxtw #2] 64; CHECK-NEXT: ret 65 %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i32(<vscale x 4 x i1> %pg, 66 ptr %base, 67 <vscale x 4 x i32> %b) 68 ret <vscale x 4 x i32> %load 69} 70 71define <vscale x 4 x i32> @gld1w_s_sxtw_index(<vscale x 4 x i1> %pg, ptr %base, <vscale x 4 x i32> %b) { 72; CHECK-LABEL: gld1w_s_sxtw_index: 73; CHECK: // %bb.0: 74; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, z0.s, sxtw #2] 75; CHECK-NEXT: ret 76 %load = call <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i32(<vscale x 4 x i1> %pg, 77 ptr %base, 78 <vscale x 4 x i32> %b) 79 ret <vscale x 4 x i32> %load 80} 81 82define <vscale x 2 x i64> @gld1w_d_uxtw_index(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i32> %b) { 83; CHECK-LABEL: gld1w_d_uxtw_index: 84; CHECK: // %bb.0: 85; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d, uxtw #2] 86; CHECK-NEXT: ret 87 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i32(<vscale x 2 x i1> %pg, 88 ptr %base, 89 <vscale x 2 x i32> %b) 90 %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64> 91 ret <vscale x 2 x i64> %res 92} 93 94define <vscale x 2 x i64> @gld1w_d_sxtw_index(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i32> %b) { 95; CHECK-LABEL: gld1w_d_sxtw_index: 96; CHECK: // %bb.0: 97; CHECK-NEXT: ld1w { z0.d }, p0/z, [x0, z0.d, sxtw #2] 98; CHECK-NEXT: ret 99 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i32(<vscale x 2 x i1> %pg, 100 ptr %base, 101 <vscale x 2 x i32> %b) 102 %res = zext <vscale x 2 x i32> %load to <vscale x 2 x i64> 103 ret <vscale x 2 x i64> %res 104} 105 106define <vscale x 4 x float> @gld1w_s_uxtw_index_float(<vscale x 4 x i1> %pg, ptr %base, <vscale x 4 x i32> %b) { 107; CHECK-LABEL: gld1w_s_uxtw_index_float: 108; CHECK: // %bb.0: 109; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, z0.s, uxtw #2] 110; CHECK-NEXT: ret 111 %load = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4f32(<vscale x 4 x i1> %pg, 112 ptr %base, 113 <vscale x 4 x i32> %b) 114 ret <vscale x 4 x float> %load 115} 116 117define <vscale x 4 x float> @gld1w_s_sxtw_index_float(<vscale x 4 x i1> %pg, ptr %base, <vscale x 4 x i32> %b) { 118; CHECK-LABEL: gld1w_s_sxtw_index_float: 119; CHECK: // %bb.0: 120; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, z0.s, sxtw #2] 121; CHECK-NEXT: ret 122 %load = call <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4f32(<vscale x 4 x i1> %pg, 123 ptr %base, 124 <vscale x 4 x i32> %b) 125 ret <vscale x 4 x float> %load 126} 127 128; LD1D 129define <vscale x 2 x i64> @gld1d_s_uxtw_index(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i32> %b) { 130; CHECK-LABEL: gld1d_s_uxtw_index: 131; CHECK: // %bb.0: 132; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, z0.d, uxtw #3] 133; CHECK-NEXT: ret 134 %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i64(<vscale x 2 x i1> %pg, 135 ptr %base, 136 <vscale x 2 x i32> %b) 137 ret <vscale x 2 x i64> %load 138} 139 140define <vscale x 2 x i64> @gld1d_sxtw_index(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i32> %b) { 141; CHECK-LABEL: gld1d_sxtw_index: 142; CHECK: // %bb.0: 143; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, z0.d, sxtw #3] 144; CHECK-NEXT: ret 145 %load = call <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i64(<vscale x 2 x i1> %pg, 146 ptr %base, 147 <vscale x 2 x i32> %b) 148 ret <vscale x 2 x i64> %load 149} 150 151define <vscale x 2 x double> @gld1d_uxtw_index_double(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i32> %b) { 152; CHECK-LABEL: gld1d_uxtw_index_double: 153; CHECK: // %bb.0: 154; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, z0.d, uxtw #3] 155; CHECK-NEXT: ret 156 %load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2f64(<vscale x 2 x i1> %pg, 157 ptr %base, 158 <vscale x 2 x i32> %b) 159 ret <vscale x 2 x double> %load 160} 161 162define <vscale x 2 x double> @gld1d_sxtw_index_double(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i32> %b) { 163; CHECK-LABEL: gld1d_sxtw_index_double: 164; CHECK: // %bb.0: 165; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0, z0.d, sxtw #3] 166; CHECK-NEXT: ret 167 %load = call <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2f64(<vscale x 2 x i1> %pg, 168 ptr %base, 169 <vscale x 2 x i32> %b) 170 ret <vscale x 2 x double> %load 171} 172 173; 174; LD1SH, LD1SW, LD1SD: base + 32-bit scaled offset, sign (sxtw) or zero (uxtw) 175; extended to 64 bits 176; e.g. ld1sh z0.d, p0/z, [x0, z0.d, uxtw #1] 177; 178 179; LD1SH 180define <vscale x 4 x i32> @gld1sh_s_uxtw_index(<vscale x 4 x i1> %pg, ptr %base, <vscale x 4 x i32> %b) { 181; CHECK-LABEL: gld1sh_s_uxtw_index: 182; CHECK: // %bb.0: 183; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0, z0.s, uxtw #1] 184; CHECK-NEXT: ret 185 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1> %pg, 186 ptr %base, 187 <vscale x 4 x i32> %b) 188 %res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32> 189 ret <vscale x 4 x i32> %res 190} 191 192define <vscale x 4 x i32> @gld1sh_s_sxtw_index(<vscale x 4 x i1> %pg, ptr %base, <vscale x 4 x i32> %b) { 193; CHECK-LABEL: gld1sh_s_sxtw_index: 194; CHECK: // %bb.0: 195; CHECK-NEXT: ld1sh { z0.s }, p0/z, [x0, z0.s, sxtw #1] 196; CHECK-NEXT: ret 197 %load = call <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1> %pg, 198 ptr %base, 199 <vscale x 4 x i32> %b) 200 %res = sext <vscale x 4 x i16> %load to <vscale x 4 x i32> 201 ret <vscale x 4 x i32> %res 202} 203 204define <vscale x 2 x i64> @gld1sh_d_uxtw_index(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i32> %b) { 205; CHECK-LABEL: gld1sh_d_uxtw_index: 206; CHECK: // %bb.0: 207; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0, z0.d, uxtw #1] 208; CHECK-NEXT: ret 209 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i16(<vscale x 2 x i1> %pg, 210 ptr %base, 211 <vscale x 2 x i32> %b) 212 %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64> 213 ret <vscale x 2 x i64> %res 214} 215 216define <vscale x 2 x i64> @gld1sh_d_sxtw_index(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i32> %b) { 217; CHECK-LABEL: gld1sh_d_sxtw_index: 218; CHECK: // %bb.0: 219; CHECK-NEXT: ld1sh { z0.d }, p0/z, [x0, z0.d, sxtw #1] 220; CHECK-NEXT: ret 221 %load = call <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i16(<vscale x 2 x i1> %pg, 222 ptr %base, 223 <vscale x 2 x i32> %b) 224 %res = sext <vscale x 2 x i16> %load to <vscale x 2 x i64> 225 ret <vscale x 2 x i64> %res 226} 227 228; LD1SW 229define <vscale x 2 x i64> @gld1sw_d_uxtw_index(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i32> %b) { 230; CHECK-LABEL: gld1sw_d_uxtw_index: 231; CHECK: // %bb.0: 232; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0, z0.d, uxtw #2] 233; CHECK-NEXT: ret 234 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i32(<vscale x 2 x i1> %pg, 235 ptr %base, 236 <vscale x 2 x i32> %b) 237 %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64> 238 ret <vscale x 2 x i64> %res 239} 240 241define <vscale x 2 x i64> @gld1sw_d_sxtw_index(<vscale x 2 x i1> %pg, ptr %base, <vscale x 2 x i32> %b) { 242; CHECK-LABEL: gld1sw_d_sxtw_index: 243; CHECK: // %bb.0: 244; CHECK-NEXT: ld1sw { z0.d }, p0/z, [x0, z0.d, sxtw #2] 245; CHECK-NEXT: ret 246 %load = call <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i32(<vscale x 2 x i1> %pg, 247 ptr %base, 248 <vscale x 2 x i32> %b) 249 %res = sext <vscale x 2 x i32> %load to <vscale x 2 x i64> 250 ret <vscale x 2 x i64> %res 251} 252 253 254; LD1H/LD1SH 255declare <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i16(<vscale x 4 x i1>, ptr, <vscale x 4 x i32>) 256declare <vscale x 4 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i16(<vscale x 4 x i1>, ptr, <vscale x 4 x i32>) 257 258declare <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i16(<vscale x 2 x i1>, ptr, <vscale x 2 x i32>) 259declare <vscale x 2 x i16> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i16(<vscale x 2 x i1>, ptr, <vscale x 2 x i32>) 260 261; LD1W/LD1SW 262declare <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4i32(<vscale x 4 x i1>, ptr, <vscale x 4 x i32>) 263declare <vscale x 4 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4i32(<vscale x 4 x i1>, ptr, <vscale x 4 x i32>) 264 265declare <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i32(<vscale x 2 x i1>, ptr, <vscale x 2 x i32>) 266declare <vscale x 2 x i32> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i32(<vscale x 2 x i1>, ptr, <vscale x 2 x i32>) 267 268declare <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv4f32(<vscale x 4 x i1>, ptr, <vscale x 4 x i32>) 269declare <vscale x 4 x float> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv4f32(<vscale x 4 x i1>, ptr, <vscale x 4 x i32>) 270 271; LD1D 272declare <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2i64(<vscale x 2 x i1>, ptr, <vscale x 2 x i32>) 273declare <vscale x 2 x i64> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2i64(<vscale x 2 x i1>, ptr, <vscale x 2 x i32>) 274 275declare <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.uxtw.index.nxv2f64(<vscale x 2 x i1>, ptr, <vscale x 2 x i32>) 276declare <vscale x 2 x double> @llvm.aarch64.sve.ld1.gather.sxtw.index.nxv2f64(<vscale x 2 x i1>, ptr, <vscale x 2 x i32>) 277