1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256 3; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 4; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 5 6target triple = "aarch64-unknown-linux-gnu" 7 8; 9; LD1B 10; 11 12define void @masked_gather_v2i8(ptr %a, ptr %b) vscale_range(2,0) #0 { 13; CHECK-LABEL: masked_gather_v2i8: 14; CHECK: // %bb.0: 15; CHECK-NEXT: ptrue p0.d, vl2 16; CHECK-NEXT: ldr q0, [x1] 17; CHECK-NEXT: ld1b { z0.d }, p0/z, [z0.d] 18; CHECK-NEXT: ptrue p0.s, vl2 19; CHECK-NEXT: xtn v0.2s, v0.2d 20; CHECK-NEXT: st1b { z0.s }, p0, [x0] 21; CHECK-NEXT: ret 22 %ptrs = load <2 x ptr>, ptr %b 23 %vals = call <2 x i8> @llvm.masked.gather.v2i8(<2 x ptr> %ptrs, i32 8, <2 x i1> <i1 true, i1 true>, <2 x i8> undef) 24 store <2 x i8> %vals, ptr %a 25 ret void 26} 27 28define void @masked_gather_v4i8(ptr %a, ptr %b) vscale_range(2,0) #0 { 29; CHECK-LABEL: masked_gather_v4i8: 30; CHECK: // %bb.0: 31; CHECK-NEXT: ptrue p0.d, vl4 32; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1] 33; CHECK-NEXT: ld1b { z0.d }, p0/z, [z0.d] 34; CHECK-NEXT: st1b { z0.d }, p0, [x0] 35; CHECK-NEXT: ret 36 %ptrs = load <4 x ptr>, ptr %b 37 %vals = call <4 x i8> @llvm.masked.gather.v4i8(<4 x ptr> %ptrs, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef) 38 store <4 x i8> %vals, ptr %a 39 ret void 40} 41 42define void @masked_gather_v8i8(ptr %a, ptr %b) #0 { 43; VBITS_GE_256-LABEL: masked_gather_v8i8: 44; VBITS_GE_256: // %bb.0: 45; VBITS_GE_256-NEXT: ptrue p0.d, vl4 46; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 47; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x1, x8, lsl #3] 48; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1] 49; VBITS_GE_256-NEXT: ld1b { z0.d }, p0/z, [z0.d] 50; VBITS_GE_256-NEXT: ld1b { z1.d }, p0/z, [z1.d] 51; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s 52; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h 53; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s 54; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h 55; VBITS_GE_256-NEXT: uzp1 v0.8b, v1.8b, v0.8b 56; VBITS_GE_256-NEXT: str d0, [x0] 57; VBITS_GE_256-NEXT: ret 58; 59; VBITS_GE_512-LABEL: masked_gather_v8i8: 60; VBITS_GE_512: // %bb.0: 61; VBITS_GE_512-NEXT: ptrue p0.d, vl8 62; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x1] 63; VBITS_GE_512-NEXT: ld1b { z0.d }, p0/z, [z0.d] 64; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s 65; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h 66; VBITS_GE_512-NEXT: uzp1 z0.b, z0.b, z0.b 67; VBITS_GE_512-NEXT: str d0, [x0] 68; VBITS_GE_512-NEXT: ret 69 %ptrs = load <8 x ptr>, ptr %b 70 %vals = call <8 x i8> @llvm.masked.gather.v8i8(<8 x ptr> %ptrs, i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i8> undef) 71 store <8 x i8> %vals, ptr %a 72 ret void 73} 74 75define void @masked_gather_v16i8(ptr %a, ptr %b) vscale_range(8,0) #0 { 76; CHECK-LABEL: masked_gather_v16i8: 77; CHECK: // %bb.0: 78; CHECK-NEXT: ptrue p0.d, vl16 79; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1] 80; CHECK-NEXT: ld1b { z0.d }, p0/z, [z0.d] 81; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s 82; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h 83; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b 84; CHECK-NEXT: str q0, [x0] 85; CHECK-NEXT: ret 86 %ptrs = load <16 x ptr>, ptr %b 87 %vals = call <16 x i8> @llvm.masked.gather.v16i8(<16 x ptr> %ptrs, i32 8, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, 88 i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i8> undef) 89 store <16 x i8> %vals, ptr %a 90 ret void 91} 92 93define void @masked_gather_v32i8(ptr %a, ptr %b) vscale_range(16,0) #0 { 94; CHECK-LABEL: masked_gather_v32i8: 95; CHECK: // %bb.0: 96; CHECK-NEXT: ptrue p0.d, vl32 97; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1] 98; CHECK-NEXT: ld1b { z0.d }, p0/z, [z0.d] 99; CHECK-NEXT: st1b { z0.d }, p0, [x0] 100; CHECK-NEXT: ret 101 %ptrs = load <32 x ptr>, ptr %b 102 %vals = call <32 x i8> @llvm.masked.gather.v32i8(<32 x ptr> %ptrs, i32 8, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, 103 i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, 104 i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, 105 i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i8> undef) 106 store <32 x i8> %vals, ptr %a 107 ret void 108} 109 110; 111; LD1H 112; 113 114define void @masked_gather_v2i16(ptr %a, ptr %b) vscale_range(2,0) #0 { 115; CHECK-LABEL: masked_gather_v2i16: 116; CHECK: // %bb.0: 117; CHECK-NEXT: ptrue p0.d, vl2 118; CHECK-NEXT: ldr q0, [x1] 119; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d] 120; CHECK-NEXT: ptrue p0.s, vl2 121; CHECK-NEXT: xtn v0.2s, v0.2d 122; CHECK-NEXT: st1h { z0.s }, p0, [x0] 123; CHECK-NEXT: ret 124 %ptrs = load <2 x ptr>, ptr %b 125 %vals = call <2 x i16> @llvm.masked.gather.v2i16(<2 x ptr> %ptrs, i32 8, <2 x i1> <i1 true, i1 true>, <2 x i16> undef) 126 store <2 x i16> %vals, ptr %a 127 ret void 128} 129 130define void @masked_gather_v4i16(ptr %a, ptr %b) vscale_range(2,0) #0 { 131; CHECK-LABEL: masked_gather_v4i16: 132; CHECK: // %bb.0: 133; CHECK-NEXT: ptrue p0.d, vl4 134; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1] 135; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d] 136; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s 137; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h 138; CHECK-NEXT: str d0, [x0] 139; CHECK-NEXT: ret 140 %ptrs = load <4 x ptr>, ptr %b 141 %vals = call <4 x i16> @llvm.masked.gather.v4i16(<4 x ptr> %ptrs, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i16> undef) 142 store <4 x i16> %vals, ptr %a 143 ret void 144} 145 146define void @masked_gather_v8i16(ptr %a, ptr %b) #0 { 147; VBITS_GE_256-LABEL: masked_gather_v8i16: 148; VBITS_GE_256: // %bb.0: 149; VBITS_GE_256-NEXT: ptrue p0.d, vl4 150; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 151; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x1, x8, lsl #3] 152; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1] 153; VBITS_GE_256-NEXT: ld1h { z0.d }, p0/z, [z0.d] 154; VBITS_GE_256-NEXT: ld1h { z1.d }, p0/z, [z1.d] 155; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s 156; VBITS_GE_256-NEXT: uzp1 z0.h, z0.h, z0.h 157; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s 158; VBITS_GE_256-NEXT: uzp1 z1.h, z1.h, z1.h 159; VBITS_GE_256-NEXT: mov v1.d[1], v0.d[0] 160; VBITS_GE_256-NEXT: str q1, [x0] 161; VBITS_GE_256-NEXT: ret 162; 163; VBITS_GE_512-LABEL: masked_gather_v8i16: 164; VBITS_GE_512: // %bb.0: 165; VBITS_GE_512-NEXT: ptrue p0.d, vl8 166; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x1] 167; VBITS_GE_512-NEXT: ld1h { z0.d }, p0/z, [z0.d] 168; VBITS_GE_512-NEXT: uzp1 z0.s, z0.s, z0.s 169; VBITS_GE_512-NEXT: uzp1 z0.h, z0.h, z0.h 170; VBITS_GE_512-NEXT: str q0, [x0] 171; VBITS_GE_512-NEXT: ret 172 %ptrs = load <8 x ptr>, ptr %b 173 %vals = call <8 x i16> @llvm.masked.gather.v8i16(<8 x ptr> %ptrs, i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i16> undef) 174 store <8 x i16> %vals, ptr %a 175 ret void 176} 177 178define void @masked_gather_v16i16(ptr %a, ptr %b) vscale_range(8,0) #0 { 179; CHECK-LABEL: masked_gather_v16i16: 180; CHECK: // %bb.0: 181; CHECK-NEXT: ptrue p0.d, vl16 182; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1] 183; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d] 184; CHECK-NEXT: st1h { z0.d }, p0, [x0] 185; CHECK-NEXT: ret 186 %ptrs = load <16 x ptr>, ptr %b 187 %vals = call <16 x i16> @llvm.masked.gather.v16i16(<16 x ptr> %ptrs, i32 8, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, 188 i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i16> undef) 189 store <16 x i16> %vals, ptr %a 190 ret void 191} 192 193define void @masked_gather_v32i16(ptr %a, ptr %b) vscale_range(16,0) #0 { 194; CHECK-LABEL: masked_gather_v32i16: 195; CHECK: // %bb.0: 196; CHECK-NEXT: ptrue p0.d, vl32 197; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1] 198; CHECK-NEXT: ld1h { z0.d }, p0/z, [z0.d] 199; CHECK-NEXT: st1h { z0.d }, p0, [x0] 200; CHECK-NEXT: ret 201 %ptrs = load <32 x ptr>, ptr %b 202 %vals = call <32 x i16> @llvm.masked.gather.v32i16(<32 x ptr> %ptrs, i32 8, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, 203 i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, 204 i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, 205 i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i16> undef) 206 store <32 x i16> %vals, ptr %a 207 ret void 208} 209 210; 211; LD1W 212; 213 214define void @masked_gather_v2i32(ptr %a, ptr %b) vscale_range(2,0) #0 { 215; CHECK-LABEL: masked_gather_v2i32: 216; CHECK: // %bb.0: 217; CHECK-NEXT: ptrue p0.d, vl2 218; CHECK-NEXT: ldr q0, [x1] 219; CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d] 220; CHECK-NEXT: xtn v0.2s, v0.2d 221; CHECK-NEXT: str d0, [x0] 222; CHECK-NEXT: ret 223 %ptrs = load <2 x ptr>, ptr %b 224 %vals = call <2 x i32> @llvm.masked.gather.v2i32(<2 x ptr> %ptrs, i32 8, <2 x i1> <i1 true, i1 true>, <2 x i32> undef) 225 store <2 x i32> %vals, ptr %a 226 ret void 227} 228 229define void @masked_gather_v4i32(ptr %a, ptr %b) vscale_range(2,0) #0 { 230; CHECK-LABEL: masked_gather_v4i32: 231; CHECK: // %bb.0: 232; CHECK-NEXT: ptrue p0.d, vl4 233; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1] 234; CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d] 235; CHECK-NEXT: uzp1 z0.s, z0.s, z0.s 236; CHECK-NEXT: str q0, [x0] 237; CHECK-NEXT: ret 238 %ptrs = load <4 x ptr>, ptr %b 239 %vals = call <4 x i32> @llvm.masked.gather.v4i32(<4 x ptr> %ptrs, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef) 240 store <4 x i32> %vals, ptr %a 241 ret void 242} 243 244define void @masked_gather_v8i32(ptr %a, ptr %b) #0 { 245; VBITS_GE_256-LABEL: masked_gather_v8i32: 246; VBITS_GE_256: // %bb.0: 247; VBITS_GE_256-NEXT: ptrue p0.d, vl4 248; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 249; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x1, x8, lsl #3] 250; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1] 251; VBITS_GE_256-NEXT: ld1w { z0.d }, p0/z, [z0.d] 252; VBITS_GE_256-NEXT: ld1w { z1.d }, p0/z, [z1.d] 253; VBITS_GE_256-NEXT: uzp1 z0.s, z0.s, z0.s 254; VBITS_GE_256-NEXT: ptrue p0.s, vl4 255; VBITS_GE_256-NEXT: uzp1 z1.s, z1.s, z1.s 256; VBITS_GE_256-NEXT: splice z1.s, p0, z1.s, z0.s 257; VBITS_GE_256-NEXT: ptrue p0.s, vl8 258; VBITS_GE_256-NEXT: st1w { z1.s }, p0, [x0] 259; VBITS_GE_256-NEXT: ret 260; 261; VBITS_GE_512-LABEL: masked_gather_v8i32: 262; VBITS_GE_512: // %bb.0: 263; VBITS_GE_512-NEXT: ptrue p0.d, vl8 264; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x1] 265; VBITS_GE_512-NEXT: ld1w { z0.d }, p0/z, [z0.d] 266; VBITS_GE_512-NEXT: st1w { z0.d }, p0, [x0] 267; VBITS_GE_512-NEXT: ret 268 %ptrs = load <8 x ptr>, ptr %b 269 %vals = call <8 x i32> @llvm.masked.gather.v8i32(<8 x ptr> %ptrs, i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef) 270 store <8 x i32> %vals, ptr %a 271 ret void 272} 273 274define void @masked_gather_v16i32(ptr %a, ptr %b) vscale_range(8,0) #0 { 275; CHECK-LABEL: masked_gather_v16i32: 276; CHECK: // %bb.0: 277; CHECK-NEXT: ptrue p0.d, vl16 278; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1] 279; CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d] 280; CHECK-NEXT: st1w { z0.d }, p0, [x0] 281; CHECK-NEXT: ret 282 %ptrs = load <16 x ptr>, ptr %b 283 %vals = call <16 x i32> @llvm.masked.gather.v16i32(<16 x ptr> %ptrs, i32 8, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, 284 i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i32> undef) 285 store <16 x i32> %vals, ptr %a 286 ret void 287} 288 289define void @masked_gather_v32i32(ptr %a, ptr %b) vscale_range(16,0) #0 { 290; CHECK-LABEL: masked_gather_v32i32: 291; CHECK: // %bb.0: 292; CHECK-NEXT: ptrue p0.d, vl32 293; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1] 294; CHECK-NEXT: ld1w { z0.d }, p0/z, [z0.d] 295; CHECK-NEXT: st1w { z0.d }, p0, [x0] 296; CHECK-NEXT: ret 297 %ptrs = load <32 x ptr>, ptr %b 298 %vals = call <32 x i32> @llvm.masked.gather.v32i32(<32 x ptr> %ptrs, i32 8, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, 299 i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, 300 i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, 301 i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i32> undef) 302 store <32 x i32> %vals, ptr %a 303 ret void 304} 305 306; 307; LD1D 308; 309 310define void @masked_gather_v2i64(ptr %a, ptr %b) vscale_range(2,0) #0 { 311; CHECK-LABEL: masked_gather_v2i64: 312; CHECK: // %bb.0: 313; CHECK-NEXT: ptrue p0.d, vl2 314; CHECK-NEXT: ldr q0, [x1] 315; CHECK-NEXT: ld1d { z0.d }, p0/z, [z0.d] 316; CHECK-NEXT: str q0, [x0] 317; CHECK-NEXT: ret 318 %ptrs = load <2 x ptr>, ptr %b 319 %vals = call <2 x i64> @llvm.masked.gather.v2i64(<2 x ptr> %ptrs, i32 8, <2 x i1> <i1 true, i1 true>, <2 x i64> undef) 320 store <2 x i64> %vals, ptr %a 321 ret void 322} 323 324define void @masked_gather_v4i64(ptr %a, ptr %b) vscale_range(2,0) #0 { 325; CHECK-LABEL: masked_gather_v4i64: 326; CHECK: // %bb.0: 327; CHECK-NEXT: ptrue p0.d, vl4 328; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1] 329; CHECK-NEXT: ld1d { z0.d }, p0/z, [z0.d] 330; CHECK-NEXT: st1d { z0.d }, p0, [x0] 331; CHECK-NEXT: ret 332 %ptrs = load <4 x ptr>, ptr %b 333 %vals = call <4 x i64> @llvm.masked.gather.v4i64(<4 x ptr> %ptrs, i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i64> undef) 334 store <4 x i64> %vals, ptr %a 335 ret void 336} 337 338define void @masked_gather_v8i64(ptr %a, ptr %b) #0 { 339; VBITS_GE_256-LABEL: masked_gather_v8i64: 340; VBITS_GE_256: // %bb.0: 341; VBITS_GE_256-NEXT: ptrue p0.d, vl4 342; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 343; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [x1, x8, lsl #3] 344; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [x1] 345; VBITS_GE_256-NEXT: ld1d { z0.d }, p0/z, [z0.d] 346; VBITS_GE_256-NEXT: ld1d { z1.d }, p0/z, [z1.d] 347; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] 348; VBITS_GE_256-NEXT: st1d { z1.d }, p0, [x0] 349; VBITS_GE_256-NEXT: ret 350; 351; VBITS_GE_512-LABEL: masked_gather_v8i64: 352; VBITS_GE_512: // %bb.0: 353; VBITS_GE_512-NEXT: ptrue p0.d, vl8 354; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [x1] 355; VBITS_GE_512-NEXT: ld1d { z0.d }, p0/z, [z0.d] 356; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0] 357; VBITS_GE_512-NEXT: ret 358 %ptrs = load <8 x ptr>, ptr %b 359 %vals = call <8 x i64> @llvm.masked.gather.v8i64(<8 x ptr> %ptrs, i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i64> undef) 360 store <8 x i64> %vals, ptr %a 361 ret void 362} 363 364define void @masked_gather_v16i64(ptr %a, ptr %b) vscale_range(8,0) #0 { 365; CHECK-LABEL: masked_gather_v16i64: 366; CHECK: // %bb.0: 367; CHECK-NEXT: ptrue p0.d, vl16 368; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1] 369; CHECK-NEXT: ld1d { z0.d }, p0/z, [z0.d] 370; CHECK-NEXT: st1d { z0.d }, p0, [x0] 371; CHECK-NEXT: ret 372 %ptrs = load <16 x ptr>, ptr %b 373 %vals = call <16 x i64> @llvm.masked.gather.v16i64(<16 x ptr> %ptrs, i32 8, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, 374 i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x i64> undef) 375 store <16 x i64> %vals, ptr %a 376 ret void 377} 378 379define void @masked_gather_v32i64(ptr %a, ptr %b) vscale_range(16,0) #0 { 380; CHECK-LABEL: masked_gather_v32i64: 381; CHECK: // %bb.0: 382; CHECK-NEXT: ptrue p0.d, vl32 383; CHECK-NEXT: ld1d { z0.d }, p0/z, [x1] 384; CHECK-NEXT: ld1d { z0.d }, p0/z, [z0.d] 385; CHECK-NEXT: st1d { z0.d }, p0, [x0] 386; CHECK-NEXT: ret 387 %ptrs = load <32 x ptr>, ptr %b 388 %vals = call <32 x i64> @llvm.masked.gather.v32i64(<32 x ptr> %ptrs, i32 8, <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, 389 i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, 390 i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, 391 i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <32 x i64> undef) 392 store <32 x i64> %vals, ptr %a 393 ret void 394} 395 396declare <2 x i8> @llvm.masked.gather.v2i8(<2 x ptr>, i32, <2 x i1>, <2 x i8>) 397declare <4 x i8> @llvm.masked.gather.v4i8(<4 x ptr>, i32, <4 x i1>, <4 x i8>) 398declare <8 x i8> @llvm.masked.gather.v8i8(<8 x ptr>, i32, <8 x i1>, <8 x i8>) 399declare <16 x i8> @llvm.masked.gather.v16i8(<16 x ptr>, i32, <16 x i1>, <16 x i8>) 400declare <32 x i8> @llvm.masked.gather.v32i8(<32 x ptr>, i32, <32 x i1>, <32 x i8>) 401 402declare <2 x i16> @llvm.masked.gather.v2i16(<2 x ptr>, i32, <2 x i1>, <2 x i16>) 403declare <4 x i16> @llvm.masked.gather.v4i16(<4 x ptr>, i32, <4 x i1>, <4 x i16>) 404declare <8 x i16> @llvm.masked.gather.v8i16(<8 x ptr>, i32, <8 x i1>, <8 x i16>) 405declare <16 x i16> @llvm.masked.gather.v16i16(<16 x ptr>, i32, <16 x i1>, <16 x i16>) 406declare <32 x i16> @llvm.masked.gather.v32i16(<32 x ptr>, i32, <32 x i1>, <32 x i16>) 407 408declare <2 x i32> @llvm.masked.gather.v2i32(<2 x ptr>, i32, <2 x i1>, <2 x i32>) 409declare <4 x i32> @llvm.masked.gather.v4i32(<4 x ptr>, i32, <4 x i1>, <4 x i32>) 410declare <8 x i32> @llvm.masked.gather.v8i32(<8 x ptr>, i32, <8 x i1>, <8 x i32>) 411declare <16 x i32> @llvm.masked.gather.v16i32(<16 x ptr>, i32, <16 x i1>, <16 x i32>) 412declare <32 x i32> @llvm.masked.gather.v32i32(<32 x ptr>, i32, <32 x i1>, <32 x i32>) 413 414declare <2 x i64> @llvm.masked.gather.v2i64(<2 x ptr>, i32, <2 x i1>, <2 x i64>) 415declare <4 x i64> @llvm.masked.gather.v4i64(<4 x ptr>, i32, <4 x i1>, <4 x i64>) 416declare <8 x i64> @llvm.masked.gather.v8i64(<8 x ptr>, i32, <8 x i1>, <8 x i64>) 417declare <16 x i64> @llvm.masked.gather.v16i64(<16 x ptr>, i32, <16 x i1>, <16 x i64>) 418declare <32 x i64> @llvm.masked.gather.v32i64(<32 x ptr>, i32, <32 x i1>, <32 x i64>) 419 420attributes #0 = { "target-features"="+sve" } 421