1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p1,+bf16 < %s | FileCheck %s 3 4declare <vscale x 2 x i64> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 1 x i1>, <vscale x 2 x i64>, i64) 5declare <vscale x 4 x i32> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv4i32.nxv2i64(<vscale x 1 x i1>, <vscale x 2 x i64>, i64) 6declare <vscale x 8 x i16> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv8i16.nxv2i64(<vscale x 1 x i1>, <vscale x 2 x i64>, i64) 7declare <vscale x 16 x i8> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv16i8.nxv2i64(<vscale x 1 x i1>, <vscale x 2 x i64>, i64) 8declare <vscale x 2 x double> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 1 x i1>, <vscale x 2 x i64>, i64) 9declare <vscale x 4 x float> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv4f32.nxv2i64(<vscale x 1 x i1>, <vscale x 2 x i64>, i64) 10declare <vscale x 8 x half> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv8f16.nxv2i64(<vscale x 1 x i1>, <vscale x 2 x i64>, i64) 11declare <vscale x 8 x bfloat> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv8bf16.nxv2i64(<vscale x 1 x i1>, <vscale x 2 x i64>, i64) 12declare <vscale x 4 x i32> @llvm.aarch64.sve.ld1q.gather.index.nxv4i32(<vscale x 1 x i1>, ptr, <vscale x 2 x i64>) 13declare <vscale x 8 x i16> @llvm.aarch64.sve.ld1q.gather.index.nxv8i16(<vscale x 1 x i1>, ptr, <vscale x 2 x i64>) 14declare <vscale x 2 x i64> @llvm.aarch64.sve.ld1q.gather.index.nxv2i64(<vscale x 1 x i1>, ptr, <vscale x 2 x i64>) 15declare <vscale x 8 x bfloat> @llvm.aarch64.sve.ld1q.gather.index.nxv8bf16(<vscale x 1 x i1>, ptr, <vscale x 2 x i64>) 16declare <vscale x 8 x half> @llvm.aarch64.sve.ld1q.gather.index.nxv8f16(<vscale x 1 x i1>, ptr, <vscale x 2 x i64>) 17declare <vscale x 4 x float> @llvm.aarch64.sve.ld1q.gather.index.nxv4f32(<vscale x 1 x i1>, ptr, <vscale x 2 x i64>) 18declare <vscale x 2 x double> @llvm.aarch64.sve.ld1q.gather.index.nxv2f64(<vscale x 1 x i1>, ptr, <vscale x 2 x i64>) 19 20define <vscale x 8 x i16> @test_svld1q_gather_u64index_s16(<vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx) { 21; CHECK-LABEL: test_svld1q_gather_u64index_s16: 22; CHECK: // %bb.0: // %entry 23; CHECK-NEXT: lsl z0.d, z0.d, #1 24; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x0] 25; CHECK-NEXT: ret 26entry: 27 %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ld1q.gather.index.nxv8i16(<vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx) 28 ret <vscale x 8 x i16> %0 29} 30 31define <vscale x 8 x i16> @test_svld1q_gather_u64index_u16(<vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx) { 32; CHECK-LABEL: test_svld1q_gather_u64index_u16: 33; CHECK: // %bb.0: // %entry 34; CHECK-NEXT: lsl z0.d, z0.d, #1 35; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x0] 36; CHECK-NEXT: ret 37entry: 38 %0 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ld1q.gather.index.nxv8i16(<vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx) 39 ret <vscale x 8 x i16> %0 40} 41 42define <vscale x 4 x i32> @test_svld1q_gather_u64index_s32(<vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx) { 43; CHECK-LABEL: test_svld1q_gather_u64index_s32: 44; CHECK: // %bb.0: // %entry 45; CHECK-NEXT: lsl z0.d, z0.d, #2 46; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x0] 47; CHECK-NEXT: ret 48entry: 49 %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1q.gather.index.nxv4i32(<vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx) 50 ret <vscale x 4 x i32> %0 51} 52 53define <vscale x 4 x i32> @test_svld1q_gather_u64index_u32(<vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx) { 54; CHECK-LABEL: test_svld1q_gather_u64index_u32: 55; CHECK: // %bb.0: // %entry 56; CHECK-NEXT: lsl z0.d, z0.d, #2 57; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x0] 58; CHECK-NEXT: ret 59entry: 60 %0 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1q.gather.index.nxv4i32(<vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx) 61 ret <vscale x 4 x i32> %0 62} 63 64define <vscale x 2 x i64> @test_svld1q_gather_u64index_s64(<vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx) { 65; CHECK-LABEL: test_svld1q_gather_u64index_s64: 66; CHECK: // %bb.0: // %entry 67; CHECK-NEXT: lsl z0.d, z0.d, #3 68; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x0] 69; CHECK-NEXT: ret 70entry: 71 %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1q.gather.index.nxv2i64(<vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx) 72 ret <vscale x 2 x i64> %0 73} 74 75define <vscale x 2 x i64> @test_svld1q_gather_u64index_u64(<vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx) { 76; CHECK-LABEL: test_svld1q_gather_u64index_u64: 77; CHECK: // %bb.0: // %entry 78; CHECK-NEXT: lsl z0.d, z0.d, #3 79; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x0] 80; CHECK-NEXT: ret 81entry: 82 %0 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1q.gather.index.nxv2i64(<vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx) 83 ret <vscale x 2 x i64> %0 84} 85 86define <vscale x 8 x bfloat> @test_svld1q_gather_u64index_bf16(<vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx) { 87; CHECK-LABEL: test_svld1q_gather_u64index_bf16: 88; CHECK: // %bb.0: // %entry 89; CHECK-NEXT: lsl z0.d, z0.d, #1 90; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x0] 91; CHECK-NEXT: ret 92entry: 93 %0 = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.ld1q.gather.index.nxv8bf16(<vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx) 94 ret <vscale x 8 x bfloat> %0 95} 96 97define <vscale x 8 x half> @test_svld1q_gather_u64index_f16(<vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx) { 98; CHECK-LABEL: test_svld1q_gather_u64index_f16: 99; CHECK: // %bb.0: // %entry 100; CHECK-NEXT: lsl z0.d, z0.d, #1 101; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x0] 102; CHECK-NEXT: ret 103entry: 104 %0 = tail call <vscale x 8 x half> @llvm.aarch64.sve.ld1q.gather.index.nxv8f16(<vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx) 105 ret <vscale x 8 x half> %0 106} 107 108define <vscale x 4 x float> @test_svld1q_gather_u64index_f32(<vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx) { 109; CHECK-LABEL: test_svld1q_gather_u64index_f32: 110; CHECK: // %bb.0: // %entry 111; CHECK-NEXT: lsl z0.d, z0.d, #2 112; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x0] 113; CHECK-NEXT: ret 114entry: 115 %0 = tail call <vscale x 4 x float> @llvm.aarch64.sve.ld1q.gather.index.nxv4f32(<vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx) 116 ret <vscale x 4 x float> %0 117} 118 119define <vscale x 2 x double> @test_svld1q_gather_u64index_f64(<vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx) { 120; CHECK-LABEL: test_svld1q_gather_u64index_f64: 121; CHECK: // %bb.0: // %entry 122; CHECK-NEXT: lsl z0.d, z0.d, #3 123; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x0] 124; CHECK-NEXT: ret 125entry: 126 %0 = tail call <vscale x 2 x double> @llvm.aarch64.sve.ld1q.gather.index.nxv2f64(<vscale x 1 x i1> %pg, ptr %base, <vscale x 2 x i64> %idx) 127 ret <vscale x 2 x double> %0 128} 129 130define <vscale x 8 x i16> @test_svld1q_gather_u64base_index_s16(<vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %idx) { 131; CHECK-LABEL: test_svld1q_gather_u64base_index_s16: 132; CHECK: // %bb.0: // %entry 133; CHECK-NEXT: lsl x8, x0, #1 134; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x8] 135; CHECK-NEXT: ret 136entry: 137 %0 = shl i64 %idx, 1 138 %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv8i16.nxv2i64(<vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %0) 139 ret <vscale x 8 x i16> %1 140} 141 142define <vscale x 8 x i16> @test_svld1q_gather_u64base_index_u16(<vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %idx) { 143; CHECK-LABEL: test_svld1q_gather_u64base_index_u16: 144; CHECK: // %bb.0: // %entry 145; CHECK-NEXT: lsl x8, x0, #1 146; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x8] 147; CHECK-NEXT: ret 148entry: 149 %0 = shl i64 %idx, 1 150 %1 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv8i16.nxv2i64(<vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %0) 151 ret <vscale x 8 x i16> %1 152} 153 154define <vscale x 4 x i32> @test_svld1q_gather_u64base_index_s32(<vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %idx) { 155; CHECK-LABEL: test_svld1q_gather_u64base_index_s32: 156; CHECK: // %bb.0: // %entry 157; CHECK-NEXT: lsl x8, x0, #2 158; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x8] 159; CHECK-NEXT: ret 160entry: 161 %0 = shl i64 %idx, 2 162 %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv4i32.nxv2i64(<vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %0) 163 ret <vscale x 4 x i32> %1 164} 165 166define <vscale x 4 x i32> @test_svld1q_gather_u64base_index_u32(<vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %idx) { 167; CHECK-LABEL: test_svld1q_gather_u64base_index_u32: 168; CHECK: // %bb.0: // %entry 169; CHECK-NEXT: lsl x8, x0, #2 170; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x8] 171; CHECK-NEXT: ret 172entry: 173 %0 = shl i64 %idx, 2 174 %1 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv4i32.nxv2i64(<vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %0) 175 ret <vscale x 4 x i32> %1 176} 177 178define <vscale x 2 x i64> @test_svld1q_gather_u64base_index_s64(<vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %idx) { 179; CHECK-LABEL: test_svld1q_gather_u64base_index_s64: 180; CHECK: // %bb.0: // %entry 181; CHECK-NEXT: lsl x8, x0, #3 182; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x8] 183; CHECK-NEXT: ret 184entry: 185 %0 = shl i64 %idx, 3 186 %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %0) 187 ret <vscale x 2 x i64> %1 188} 189 190define <vscale x 2 x i64> @test_svld1q_gather_u64base_index_u64(<vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %idx) { 191; CHECK-LABEL: test_svld1q_gather_u64base_index_u64: 192; CHECK: // %bb.0: // %entry 193; CHECK-NEXT: lsl x8, x0, #3 194; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x8] 195; CHECK-NEXT: ret 196entry: 197 %0 = shl i64 %idx, 3 198 %1 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv2i64.nxv2i64(<vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %0) 199 ret <vscale x 2 x i64> %1 200} 201 202define <vscale x 8 x bfloat> @test_svld1q_gather_u64base_index_bf16(<vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %idx) { 203; CHECK-LABEL: test_svld1q_gather_u64base_index_bf16: 204; CHECK: // %bb.0: // %entry 205; CHECK-NEXT: lsl x8, x0, #1 206; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x8] 207; CHECK-NEXT: ret 208entry: 209 %0 = shl i64 %idx, 1 210 %1 = tail call <vscale x 8 x bfloat> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv8bf16.nxv2i64(<vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %0) 211 ret <vscale x 8 x bfloat> %1 212} 213 214define <vscale x 8 x half> @test_svld1q_gather_u64base_index_f16(<vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %idx) { 215; CHECK-LABEL: test_svld1q_gather_u64base_index_f16: 216; CHECK: // %bb.0: // %entry 217; CHECK-NEXT: lsl x8, x0, #1 218; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x8] 219; CHECK-NEXT: ret 220entry: 221 %0 = shl i64 %idx, 1 222 %1 = tail call <vscale x 8 x half> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv8f16.nxv2i64(<vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %0) 223 ret <vscale x 8 x half> %1 224} 225 226define <vscale x 4 x float> @test_svld1q_gather_u64base_index_f32(<vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %idx) { 227; CHECK-LABEL: test_svld1q_gather_u64base_index_f32: 228; CHECK: // %bb.0: // %entry 229; CHECK-NEXT: lsl x8, x0, #2 230; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x8] 231; CHECK-NEXT: ret 232entry: 233 %0 = shl i64 %idx, 2 234 %1 = tail call <vscale x 4 x float> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv4f32.nxv2i64(<vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %0) 235 ret <vscale x 4 x float> %1 236} 237 238define <vscale x 2 x double> @test_svld1q_gather_u64base_index_f64(<vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %idx) { 239; CHECK-LABEL: test_svld1q_gather_u64base_index_f64: 240; CHECK: // %bb.0: // %entry 241; CHECK-NEXT: lsl x8, x0, #3 242; CHECK-NEXT: ld1q { z0.q }, p0/z, [z0.d, x8] 243; CHECK-NEXT: ret 244entry: 245 %0 = shl i64 %idx, 3 246 %1 = tail call <vscale x 2 x double> @llvm.aarch64.sve.ld1q.gather.scalar.offset.nxv2f64.nxv2i64(<vscale x 1 x i1> %pg, <vscale x 2 x i64> %base, i64 %0) 247 ret <vscale x 2 x double> %1 248} 249 250