1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s --check-prefixes=CHECK,CHECK-LD1R 3; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+no-sve-fp-ld1r < %s | FileCheck %s --check-prefixes=CHECK,CHECK-NO-LD1R 4; 5; Check that ldr1* instruction is generated to splat scalar during load, 6; rather than mov from scalar to vector register (which would require the vector unit). 7; 8; one-off: ld1r_stack checks that ldr1b works with stack objects. 9; 10; Test axes: 11; types = [i8, i16, i32, i64, half, float, double] 12; methods = [direct load, gep upper bound - 1, gep out of range x {neg,pos}, sext..., zext..., unpacked_floats...] 13; 14 15@g8 = external global i8 16 17; One-off test for splatted value coming from stack load. 18define <vscale x 16 x i8> @ld1r_stack() { 19; CHECK-LABEL: ld1r_stack: 20; CHECK: // %bb.0: 21; CHECK-NEXT: sub sp, sp, #16 22; CHECK-NEXT: .cfi_def_cfa_offset 16 23; CHECK-NEXT: adrp x8, :got:g8 24; CHECK-NEXT: ptrue p0.b 25; CHECK-NEXT: ldr x8, [x8, :got_lo12:g8] 26; CHECK-NEXT: ldrb w8, [x8] 27; CHECK-NEXT: strb w8, [sp, #12] 28; CHECK-NEXT: ld1rb { z0.b }, p0/z, [sp, #14] 29; CHECK-NEXT: add sp, sp, #16 30; CHECK-NEXT: ret 31 %valp = alloca i8 32 %valp2 = load volatile i8, ptr @g8 33 store volatile i8 %valp2, ptr %valp 34 %valp3 = getelementptr i8, ptr %valp, i32 2 35 %val = load i8, ptr %valp3 36 %1 = insertelement <vscale x 16 x i8> undef, i8 %val, i32 0 37 %2 = shufflevector <vscale x 16 x i8> %1, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer 38 ret <vscale x 16 x i8> %2 39} 40 41define <vscale x 16 x i8> @ld1rb(ptr %valp) { 42; CHECK-LABEL: ld1rb: 43; CHECK: // %bb.0: 44; CHECK-NEXT: ptrue p0.b 45; CHECK-NEXT: ld1rb { z0.b }, p0/z, [x0] 46; CHECK-NEXT: ret 47 %val = load i8, ptr %valp 48 %ins = insertelement <vscale x 16 x i8> undef, i8 %val, i32 0 49 %shf = shufflevector <vscale x 16 x i8> %ins, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer 50 ret <vscale x 16 x i8> %shf 51} 52 53define <vscale x 16 x i8> @ld1rb_gep(ptr %valp) { 54; CHECK-LABEL: ld1rb_gep: 55; CHECK: // %bb.0: 56; CHECK-NEXT: ptrue p0.b 57; CHECK-NEXT: ld1rb { z0.b }, p0/z, [x0, #63] 58; CHECK-NEXT: ret 59 %valp2 = getelementptr i8, ptr %valp, i32 63 60 %val = load i8, ptr %valp2 61 %ins = insertelement <vscale x 16 x i8> undef, i8 %val, i32 0 62 %shf = shufflevector <vscale x 16 x i8> %ins, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer 63 ret <vscale x 16 x i8> %shf 64} 65 66define <vscale x 16 x i8> @ld1rb_gep_out_of_range_up(ptr %valp) { 67; CHECK-LABEL: ld1rb_gep_out_of_range_up: 68; CHECK: // %bb.0: 69; CHECK-NEXT: ptrue p0.b 70; CHECK-NEXT: add x8, x0, #64 71; CHECK-NEXT: ld1rb { z0.b }, p0/z, [x8] 72; CHECK-NEXT: ret 73 %valp2 = getelementptr i8, ptr %valp, i32 64 74 %val = load i8, ptr %valp2 75 %ins = insertelement <vscale x 16 x i8> undef, i8 %val, i32 0 76 %shf = shufflevector <vscale x 16 x i8> %ins, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer 77 ret <vscale x 16 x i8> %shf 78} 79 80define <vscale x 16 x i8> @ld1rb_gep_out_of_range_down(ptr %valp) { 81; CHECK-LABEL: ld1rb_gep_out_of_range_down: 82; CHECK: // %bb.0: 83; CHECK-NEXT: ptrue p0.b 84; CHECK-NEXT: sub x8, x0, #1 85; CHECK-NEXT: ld1rb { z0.b }, p0/z, [x8] 86; CHECK-NEXT: ret 87 %valp2 = getelementptr i8, ptr %valp, i32 -1 88 %val = load i8, ptr %valp2 89 %ins = insertelement <vscale x 16 x i8> undef, i8 %val, i32 0 90 %shf = shufflevector <vscale x 16 x i8> %ins, <vscale x 16 x i8> undef, <vscale x 16 x i32> zeroinitializer 91 ret <vscale x 16 x i8> %shf 92} 93 94define <vscale x 8 x i16> @ld1rb_i8_i16_zext(ptr %valp) { 95; CHECK-LABEL: ld1rb_i8_i16_zext: 96; CHECK: // %bb.0: 97; CHECK-NEXT: ptrue p0.h 98; CHECK-NEXT: ld1rb { z0.h }, p0/z, [x0] 99; CHECK-NEXT: ret 100 %val = load i8, ptr %valp 101 %ext = zext i8 %val to i16 102 %ins = insertelement <vscale x 8 x i16> undef, i16 %ext, i32 0 103 %shf = shufflevector <vscale x 8 x i16> %ins, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer 104 ret <vscale x 8 x i16> %shf 105} 106 107define <vscale x 8 x i16> @ld1rb_i8_i16_sext(ptr %valp) { 108; CHECK-LABEL: ld1rb_i8_i16_sext: 109; CHECK: // %bb.0: 110; CHECK-NEXT: ptrue p0.h 111; CHECK-NEXT: ld1rsb { z0.h }, p0/z, [x0] 112; CHECK-NEXT: ret 113 %val = load i8, ptr %valp 114 %ext = sext i8 %val to i16 115 %ins = insertelement <vscale x 8 x i16> undef, i16 %ext, i32 0 116 %shf = shufflevector <vscale x 8 x i16> %ins, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer 117 ret <vscale x 8 x i16> %shf 118} 119 120define <vscale x 4 x i32> @ld1rb_i8_i32_zext(ptr %valp) { 121; CHECK-LABEL: ld1rb_i8_i32_zext: 122; CHECK: // %bb.0: 123; CHECK-NEXT: ptrue p0.s 124; CHECK-NEXT: ld1rb { z0.s }, p0/z, [x0] 125; CHECK-NEXT: ret 126 %val = load i8, ptr %valp 127 %ext = zext i8 %val to i32 128 %ins = insertelement <vscale x 4 x i32> undef, i32 %ext, i32 0 129 %shf = shufflevector <vscale x 4 x i32> %ins, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer 130 ret <vscale x 4 x i32> %shf 131} 132 133define <vscale x 4 x i32> @ld1rb_i8_i32_sext(ptr %valp) { 134; CHECK-LABEL: ld1rb_i8_i32_sext: 135; CHECK: // %bb.0: 136; CHECK-NEXT: ptrue p0.s 137; CHECK-NEXT: ld1rsb { z0.s }, p0/z, [x0] 138; CHECK-NEXT: ret 139 %val = load i8, ptr %valp 140 %ext = sext i8 %val to i32 141 %ins = insertelement <vscale x 4 x i32> undef, i32 %ext, i32 0 142 %shf = shufflevector <vscale x 4 x i32> %ins, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer 143 ret <vscale x 4 x i32> %shf 144} 145 146define <vscale x 2 x i64> @ld1rb_i8_i64_zext(ptr %valp) { 147; CHECK-LABEL: ld1rb_i8_i64_zext: 148; CHECK: // %bb.0: 149; CHECK-NEXT: ptrue p0.d 150; CHECK-NEXT: ld1rb { z0.d }, p0/z, [x0] 151; CHECK-NEXT: ret 152 %val = load i8, ptr %valp 153 %ext = zext i8 %val to i64 154 %ins = insertelement <vscale x 2 x i64> undef, i64 %ext, i32 0 155 %shf = shufflevector <vscale x 2 x i64> %ins, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer 156 ret <vscale x 2 x i64> %shf 157} 158 159define <vscale x 2 x i64> @ld1rb_i8_i64_sext(ptr %valp) { 160; CHECK-LABEL: ld1rb_i8_i64_sext: 161; CHECK: // %bb.0: 162; CHECK-NEXT: ptrue p0.d 163; CHECK-NEXT: ld1rsb { z0.d }, p0/z, [x0] 164; CHECK-NEXT: ret 165 %val = load i8, ptr %valp 166 %ext = sext i8 %val to i64 167 %ins = insertelement <vscale x 2 x i64> undef, i64 %ext, i32 0 168 %shf = shufflevector <vscale x 2 x i64> %ins, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer 169 ret <vscale x 2 x i64> %shf 170} 171 172define <vscale x 8 x i16> @ld1rh(ptr %valp) { 173; CHECK-LABEL: ld1rh: 174; CHECK: // %bb.0: 175; CHECK-NEXT: ptrue p0.h 176; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x0] 177; CHECK-NEXT: ret 178 %val = load i16, ptr %valp 179 %ins = insertelement <vscale x 8 x i16> undef, i16 %val, i32 0 180 %shf = shufflevector <vscale x 8 x i16> %ins, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer 181 ret <vscale x 8 x i16> %shf 182} 183 184define <vscale x 8 x i16> @ld1rh_gep(ptr %valp) { 185; CHECK-LABEL: ld1rh_gep: 186; CHECK: // %bb.0: 187; CHECK-NEXT: ptrue p0.h 188; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x0, #126] 189; CHECK-NEXT: ret 190 %valp2 = getelementptr i16, ptr %valp, i32 63 191 %val = load i16, ptr %valp2 192 %ins = insertelement <vscale x 8 x i16> undef, i16 %val, i32 0 193 %shf = shufflevector <vscale x 8 x i16> %ins, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer 194 ret <vscale x 8 x i16> %shf 195} 196 197define <vscale x 8 x i16> @ld1rh_gep_out_of_range_up(ptr %valp) { 198; CHECK-LABEL: ld1rh_gep_out_of_range_up: 199; CHECK: // %bb.0: 200; CHECK-NEXT: ptrue p0.h 201; CHECK-NEXT: add x8, x0, #128 202; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x8] 203; CHECK-NEXT: ret 204 %valp2 = getelementptr i16, ptr %valp, i32 64 205 %val = load i16, ptr %valp2 206 %ins = insertelement <vscale x 8 x i16> undef, i16 %val, i32 0 207 %shf = shufflevector <vscale x 8 x i16> %ins, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer 208 ret <vscale x 8 x i16> %shf 209} 210 211define <vscale x 8 x i16> @ld1rh_gep_out_of_range_down(ptr %valp) { 212; CHECK-LABEL: ld1rh_gep_out_of_range_down: 213; CHECK: // %bb.0: 214; CHECK-NEXT: ptrue p0.h 215; CHECK-NEXT: sub x8, x0, #2 216; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x8] 217; CHECK-NEXT: ret 218 %valp2 = getelementptr i16, ptr %valp, i32 -1 219 %val = load i16, ptr %valp2 220 %ins = insertelement <vscale x 8 x i16> undef, i16 %val, i32 0 221 %shf = shufflevector <vscale x 8 x i16> %ins, <vscale x 8 x i16> undef, <vscale x 8 x i32> zeroinitializer 222 ret <vscale x 8 x i16> %shf 223} 224 225define <vscale x 4 x i32> @ld1rh_i16_i32_zext(ptr %valp) { 226; CHECK-LABEL: ld1rh_i16_i32_zext: 227; CHECK: // %bb.0: 228; CHECK-NEXT: ptrue p0.s 229; CHECK-NEXT: ld1rh { z0.s }, p0/z, [x0] 230; CHECK-NEXT: ret 231 %val = load i16, ptr %valp 232 %ext = zext i16 %val to i32 233 %ins = insertelement <vscale x 4 x i32> undef, i32 %ext, i32 0 234 %shf = shufflevector <vscale x 4 x i32> %ins, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer 235 ret <vscale x 4 x i32> %shf 236} 237 238define <vscale x 4 x i32> @ld1rh_i16_i32_sext(ptr %valp) { 239; CHECK-LABEL: ld1rh_i16_i32_sext: 240; CHECK: // %bb.0: 241; CHECK-NEXT: ptrue p0.s 242; CHECK-NEXT: ld1rsh { z0.s }, p0/z, [x0] 243; CHECK-NEXT: ret 244 %val = load i16, ptr %valp 245 %ext = sext i16 %val to i32 246 %ins = insertelement <vscale x 4 x i32> undef, i32 %ext, i32 0 247 %shf = shufflevector <vscale x 4 x i32> %ins, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer 248 ret <vscale x 4 x i32> %shf 249} 250 251define <vscale x 2 x i64> @ld1rh_i16_i64_zext(ptr %valp) { 252; CHECK-LABEL: ld1rh_i16_i64_zext: 253; CHECK: // %bb.0: 254; CHECK-NEXT: ptrue p0.d 255; CHECK-NEXT: ld1rh { z0.d }, p0/z, [x0] 256; CHECK-NEXT: ret 257 %val = load i16, ptr %valp 258 %ext = zext i16 %val to i64 259 %ins = insertelement <vscale x 2 x i64> undef, i64 %ext, i32 0 260 %shf = shufflevector <vscale x 2 x i64> %ins, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer 261 ret <vscale x 2 x i64> %shf 262} 263 264define <vscale x 2 x i64> @ld1rh_i16_i64_sext(ptr %valp) { 265; CHECK-LABEL: ld1rh_i16_i64_sext: 266; CHECK: // %bb.0: 267; CHECK-NEXT: ptrue p0.d 268; CHECK-NEXT: ld1rsh { z0.d }, p0/z, [x0] 269; CHECK-NEXT: ret 270 %val = load i16, ptr %valp 271 %ext = sext i16 %val to i64 272 %ins = insertelement <vscale x 2 x i64> undef, i64 %ext, i32 0 273 %shf = shufflevector <vscale x 2 x i64> %ins, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer 274 ret <vscale x 2 x i64> %shf 275} 276 277define <vscale x 4 x i32> @ld1rw(ptr %valp) { 278; CHECK-LABEL: ld1rw: 279; CHECK: // %bb.0: 280; CHECK-NEXT: ptrue p0.s 281; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x0] 282; CHECK-NEXT: ret 283 %val = load i32, ptr %valp 284 %ins = insertelement <vscale x 4 x i32> undef, i32 %val, i32 0 285 %shf = shufflevector <vscale x 4 x i32> %ins, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer 286 ret <vscale x 4 x i32> %shf 287} 288 289define <vscale x 4 x i32> @ld1rw_gep(ptr %valp) { 290; CHECK-LABEL: ld1rw_gep: 291; CHECK: // %bb.0: 292; CHECK-NEXT: ptrue p0.s 293; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x0, #252] 294; CHECK-NEXT: ret 295 %valp2 = getelementptr i32, ptr %valp, i32 63 296 %val = load i32, ptr %valp2 297 %ins = insertelement <vscale x 4 x i32> undef, i32 %val, i32 0 298 %shf = shufflevector <vscale x 4 x i32> %ins, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer 299 ret <vscale x 4 x i32> %shf 300} 301 302define <vscale x 4 x i32> @ld1rw_gep_out_of_range_up(ptr %valp) { 303; CHECK-LABEL: ld1rw_gep_out_of_range_up: 304; CHECK: // %bb.0: 305; CHECK-NEXT: ptrue p0.s 306; CHECK-NEXT: add x8, x0, #256 307; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x8] 308; CHECK-NEXT: ret 309 %valp2 = getelementptr i32, ptr %valp, i32 64 310 %val = load i32, ptr %valp2 311 %ins = insertelement <vscale x 4 x i32> undef, i32 %val, i32 0 312 %shf = shufflevector <vscale x 4 x i32> %ins, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer 313 ret <vscale x 4 x i32> %shf 314} 315 316define <vscale x 4 x i32> @ld1rw_gep_out_of_range_down(ptr %valp) { 317; CHECK-LABEL: ld1rw_gep_out_of_range_down: 318; CHECK: // %bb.0: 319; CHECK-NEXT: ptrue p0.s 320; CHECK-NEXT: sub x8, x0, #4 321; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x8] 322; CHECK-NEXT: ret 323 %valp2 = getelementptr i32, ptr %valp, i32 -1 324 %val = load i32, ptr %valp2 325 %ins = insertelement <vscale x 4 x i32> undef, i32 %val, i32 0 326 %shf = shufflevector <vscale x 4 x i32> %ins, <vscale x 4 x i32> undef, <vscale x 4 x i32> zeroinitializer 327 ret <vscale x 4 x i32> %shf 328} 329 330define <vscale x 2 x i64> @ld1rw_i32_i64_zext(ptr %valp) { 331; CHECK-LABEL: ld1rw_i32_i64_zext: 332; CHECK: // %bb.0: 333; CHECK-NEXT: ptrue p0.d 334; CHECK-NEXT: ld1rw { z0.d }, p0/z, [x0] 335; CHECK-NEXT: ret 336 %val = load i32, ptr %valp 337 %ext = zext i32 %val to i64 338 %ins = insertelement <vscale x 2 x i64> undef, i64 %ext, i32 0 339 %shf = shufflevector <vscale x 2 x i64> %ins, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer 340 ret <vscale x 2 x i64> %shf 341} 342 343define <vscale x 2 x i64> @ld1rw_i32_i64_sext(ptr %valp) { 344; CHECK-LABEL: ld1rw_i32_i64_sext: 345; CHECK: // %bb.0: 346; CHECK-NEXT: ptrue p0.d 347; CHECK-NEXT: ld1rsw { z0.d }, p0/z, [x0] 348; CHECK-NEXT: ret 349 %val = load i32, ptr %valp 350 %ext = sext i32 %val to i64 351 %ins = insertelement <vscale x 2 x i64> undef, i64 %ext, i32 0 352 %shf = shufflevector <vscale x 2 x i64> %ins, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer 353 ret <vscale x 2 x i64> %shf 354} 355 356define <vscale x 2 x i64> @ld1rd(ptr %valp) { 357; CHECK-LABEL: ld1rd: 358; CHECK: // %bb.0: 359; CHECK-NEXT: ptrue p0.d 360; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x0] 361; CHECK-NEXT: ret 362 %val = load i64, ptr %valp 363 %ins = insertelement <vscale x 2 x i64> undef, i64 %val, i32 0 364 %shf = shufflevector <vscale x 2 x i64> %ins, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer 365 ret <vscale x 2 x i64> %shf 366} 367 368define <vscale x 2 x i64> @ld1rd_gep(ptr %valp) { 369; CHECK-LABEL: ld1rd_gep: 370; CHECK: // %bb.0: 371; CHECK-NEXT: ptrue p0.d 372; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x0, #504] 373; CHECK-NEXT: ret 374 %valp2 = getelementptr i64, ptr %valp, i32 63 375 %val = load i64, ptr %valp2 376 %ins = insertelement <vscale x 2 x i64> undef, i64 %val, i32 0 377 %shf = shufflevector <vscale x 2 x i64> %ins, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer 378 ret <vscale x 2 x i64> %shf 379} 380 381define <vscale x 2 x i64> @ld1rd_gep_out_of_range_up(ptr %valp) { 382; CHECK-LABEL: ld1rd_gep_out_of_range_up: 383; CHECK: // %bb.0: 384; CHECK-NEXT: ptrue p0.d 385; CHECK-NEXT: add x8, x0, #512 386; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x8] 387; CHECK-NEXT: ret 388 %valp2 = getelementptr i64, ptr %valp, i32 64 389 %val = load i64, ptr %valp2 390 %ins = insertelement <vscale x 2 x i64> undef, i64 %val, i32 0 391 %shf = shufflevector <vscale x 2 x i64> %ins, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer 392 ret <vscale x 2 x i64> %shf 393} 394 395define <vscale x 2 x i64> @ld1rd_gep_out_of_range_down(ptr %valp) { 396; CHECK-LABEL: ld1rd_gep_out_of_range_down: 397; CHECK: // %bb.0: 398; CHECK-NEXT: ptrue p0.d 399; CHECK-NEXT: sub x8, x0, #8 400; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x8] 401; CHECK-NEXT: ret 402 %valp2 = getelementptr i64, ptr %valp, i32 -1 403 %val = load i64, ptr %valp2 404 %ins = insertelement <vscale x 2 x i64> undef, i64 %val, i32 0 405 %shf = shufflevector <vscale x 2 x i64> %ins, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer 406 ret <vscale x 2 x i64> %shf 407} 408 409define <vscale x 8 x half> @ld1rh_half(ptr %valp) { 410; CHECK-LD1R-LABEL: ld1rh_half: 411; CHECK-LD1R: // %bb.0: 412; CHECK-LD1R-NEXT: ptrue p0.h 413; CHECK-LD1R-NEXT: ld1rh { z0.h }, p0/z, [x0] 414; CHECK-LD1R-NEXT: ret 415; 416; CHECK-NO-LD1R-LABEL: ld1rh_half: 417; CHECK-NO-LD1R: // %bb.0: 418; CHECK-NO-LD1R-NEXT: ldr h0, [x0] 419; CHECK-NO-LD1R-NEXT: mov z0.h, h0 420; CHECK-NO-LD1R-NEXT: ret 421 %val = load half, ptr %valp 422 %ins = insertelement <vscale x 8 x half> undef, half %val, i32 0 423 %shf = shufflevector <vscale x 8 x half> %ins, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer 424 ret <vscale x 8 x half> %shf 425} 426 427define <vscale x 8 x half> @ld1rh_half_neoverse(ptr %valp) #1 { 428; CHECK-LABEL: ld1rh_half_neoverse: 429; CHECK: // %bb.0: 430; CHECK-NEXT: ldr h0, [x0] 431; CHECK-NEXT: mov z0.h, h0 432; CHECK-NEXT: ret 433 %val = load half, ptr %valp 434 %ins = insertelement <vscale x 8 x half> undef, half %val, i32 0 435 %shf = shufflevector <vscale x 8 x half> %ins, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer 436 ret <vscale x 8 x half> %shf 437} 438 439define <vscale x 8 x half> @ld1rh_half_gep(ptr %valp) { 440; CHECK-LD1R-LABEL: ld1rh_half_gep: 441; CHECK-LD1R: // %bb.0: 442; CHECK-LD1R-NEXT: ptrue p0.h 443; CHECK-LD1R-NEXT: ld1rh { z0.h }, p0/z, [x0, #126] 444; CHECK-LD1R-NEXT: ret 445; 446; CHECK-NO-LD1R-LABEL: ld1rh_half_gep: 447; CHECK-NO-LD1R: // %bb.0: 448; CHECK-NO-LD1R-NEXT: ldr h0, [x0, #126] 449; CHECK-NO-LD1R-NEXT: mov z0.h, h0 450; CHECK-NO-LD1R-NEXT: ret 451 %valp2 = getelementptr half, ptr %valp, i32 63 452 %val = load half, ptr %valp2 453 %ins = insertelement <vscale x 8 x half> undef, half %val, i32 0 454 %shf = shufflevector <vscale x 8 x half> %ins, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer 455 ret <vscale x 8 x half> %shf 456} 457 458define <vscale x 8 x half> @ld1rh_half_gep_out_of_range_up(ptr %valp) { 459; CHECK-LD1R-LABEL: ld1rh_half_gep_out_of_range_up: 460; CHECK-LD1R: // %bb.0: 461; CHECK-LD1R-NEXT: ptrue p0.h 462; CHECK-LD1R-NEXT: add x8, x0, #128 463; CHECK-LD1R-NEXT: ld1rh { z0.h }, p0/z, [x8] 464; CHECK-LD1R-NEXT: ret 465; 466; CHECK-NO-LD1R-LABEL: ld1rh_half_gep_out_of_range_up: 467; CHECK-NO-LD1R: // %bb.0: 468; CHECK-NO-LD1R-NEXT: ldr h0, [x0, #128] 469; CHECK-NO-LD1R-NEXT: mov z0.h, h0 470; CHECK-NO-LD1R-NEXT: ret 471 %valp2 = getelementptr half, ptr %valp, i32 64 472 %val = load half, ptr %valp2 473 %ins = insertelement <vscale x 8 x half> undef, half %val, i32 0 474 %shf = shufflevector <vscale x 8 x half> %ins, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer 475 ret <vscale x 8 x half> %shf 476} 477 478define <vscale x 8 x half> @ld1rh_half_gep_out_of_range_down(ptr %valp) { 479; CHECK-LD1R-LABEL: ld1rh_half_gep_out_of_range_down: 480; CHECK-LD1R: // %bb.0: 481; CHECK-LD1R-NEXT: ptrue p0.h 482; CHECK-LD1R-NEXT: sub x8, x0, #2 483; CHECK-LD1R-NEXT: ld1rh { z0.h }, p0/z, [x8] 484; CHECK-LD1R-NEXT: ret 485; 486; CHECK-NO-LD1R-LABEL: ld1rh_half_gep_out_of_range_down: 487; CHECK-NO-LD1R: // %bb.0: 488; CHECK-NO-LD1R-NEXT: ldur h0, [x0, #-2] 489; CHECK-NO-LD1R-NEXT: mov z0.h, h0 490; CHECK-NO-LD1R-NEXT: ret 491 %valp2 = getelementptr half, ptr %valp, i32 -1 492 %val = load half, ptr %valp2 493 %ins = insertelement <vscale x 8 x half> undef, half %val, i32 0 494 %shf = shufflevector <vscale x 8 x half> %ins, <vscale x 8 x half> undef, <vscale x 8 x i32> zeroinitializer 495 ret <vscale x 8 x half> %shf 496} 497 498define <vscale x 4 x half> @ld1rh_half_unpacked4(ptr %valp) { 499; CHECK-LD1R-LABEL: ld1rh_half_unpacked4: 500; CHECK-LD1R: // %bb.0: 501; CHECK-LD1R-NEXT: ptrue p0.s 502; CHECK-LD1R-NEXT: ld1rh { z0.s }, p0/z, [x0] 503; CHECK-LD1R-NEXT: ret 504; 505; CHECK-NO-LD1R-LABEL: ld1rh_half_unpacked4: 506; CHECK-NO-LD1R: // %bb.0: 507; CHECK-NO-LD1R-NEXT: ldr h0, [x0] 508; CHECK-NO-LD1R-NEXT: mov z0.h, h0 509; CHECK-NO-LD1R-NEXT: ret 510 %val = load half, ptr %valp 511 %ins = insertelement <vscale x 4 x half> undef, half %val, i32 0 512 %shf = shufflevector <vscale x 4 x half> %ins, <vscale x 4 x half> undef, <vscale x 4 x i32> zeroinitializer 513 ret <vscale x 4 x half> %shf 514} 515 516define <vscale x 4 x half> @ld1rh_half_unpacked4_gep(ptr %valp) { 517; CHECK-LD1R-LABEL: ld1rh_half_unpacked4_gep: 518; CHECK-LD1R: // %bb.0: 519; CHECK-LD1R-NEXT: ptrue p0.s 520; CHECK-LD1R-NEXT: ld1rh { z0.s }, p0/z, [x0, #126] 521; CHECK-LD1R-NEXT: ret 522; 523; CHECK-NO-LD1R-LABEL: ld1rh_half_unpacked4_gep: 524; CHECK-NO-LD1R: // %bb.0: 525; CHECK-NO-LD1R-NEXT: ldr h0, [x0, #126] 526; CHECK-NO-LD1R-NEXT: mov z0.h, h0 527; CHECK-NO-LD1R-NEXT: ret 528 %valp2 = getelementptr half, ptr %valp, i32 63 529 %val = load half, ptr %valp2 530 %ins = insertelement <vscale x 4 x half> undef, half %val, i32 0 531 %shf = shufflevector <vscale x 4 x half> %ins, <vscale x 4 x half> undef, <vscale x 4 x i32> zeroinitializer 532 ret <vscale x 4 x half> %shf 533} 534 535define <vscale x 4 x half> @ld1rh_half_unpacked4_gep_out_of_range_up(ptr %valp) { 536; CHECK-LD1R-LABEL: ld1rh_half_unpacked4_gep_out_of_range_up: 537; CHECK-LD1R: // %bb.0: 538; CHECK-LD1R-NEXT: ptrue p0.s 539; CHECK-LD1R-NEXT: add x8, x0, #128 540; CHECK-LD1R-NEXT: ld1rh { z0.s }, p0/z, [x8] 541; CHECK-LD1R-NEXT: ret 542; 543; CHECK-NO-LD1R-LABEL: ld1rh_half_unpacked4_gep_out_of_range_up: 544; CHECK-NO-LD1R: // %bb.0: 545; CHECK-NO-LD1R-NEXT: ldr h0, [x0, #128] 546; CHECK-NO-LD1R-NEXT: mov z0.h, h0 547; CHECK-NO-LD1R-NEXT: ret 548 %valp2 = getelementptr half, ptr %valp, i32 64 549 %val = load half, ptr %valp2 550 %ins = insertelement <vscale x 4 x half> undef, half %val, i32 0 551 %shf = shufflevector <vscale x 4 x half> %ins, <vscale x 4 x half> undef, <vscale x 4 x i32> zeroinitializer 552 ret <vscale x 4 x half> %shf 553} 554 555define <vscale x 4 x half> @ld1rh_half_unpacked4_gep_out_of_range_down(ptr %valp) { 556; CHECK-LD1R-LABEL: ld1rh_half_unpacked4_gep_out_of_range_down: 557; CHECK-LD1R: // %bb.0: 558; CHECK-LD1R-NEXT: ptrue p0.s 559; CHECK-LD1R-NEXT: sub x8, x0, #2 560; CHECK-LD1R-NEXT: ld1rh { z0.s }, p0/z, [x8] 561; CHECK-LD1R-NEXT: ret 562; 563; CHECK-NO-LD1R-LABEL: ld1rh_half_unpacked4_gep_out_of_range_down: 564; CHECK-NO-LD1R: // %bb.0: 565; CHECK-NO-LD1R-NEXT: ldur h0, [x0, #-2] 566; CHECK-NO-LD1R-NEXT: mov z0.h, h0 567; CHECK-NO-LD1R-NEXT: ret 568 %valp2 = getelementptr half, ptr %valp, i32 -1 569 %val = load half, ptr %valp2 570 %ins = insertelement <vscale x 4 x half> undef, half %val, i32 0 571 %shf = shufflevector <vscale x 4 x half> %ins, <vscale x 4 x half> undef, <vscale x 4 x i32> zeroinitializer 572 ret <vscale x 4 x half> %shf 573} 574 575define <vscale x 2 x half> @ld1rh_half_unpacked2(ptr %valp) { 576; CHECK-LD1R-LABEL: ld1rh_half_unpacked2: 577; CHECK-LD1R: // %bb.0: 578; CHECK-LD1R-NEXT: ptrue p0.d 579; CHECK-LD1R-NEXT: ld1rh { z0.d }, p0/z, [x0] 580; CHECK-LD1R-NEXT: ret 581; 582; CHECK-NO-LD1R-LABEL: ld1rh_half_unpacked2: 583; CHECK-NO-LD1R: // %bb.0: 584; CHECK-NO-LD1R-NEXT: ldr h0, [x0] 585; CHECK-NO-LD1R-NEXT: mov z0.h, h0 586; CHECK-NO-LD1R-NEXT: ret 587 %val = load half, ptr %valp 588 %ins = insertelement <vscale x 2 x half> undef, half %val, i32 0 589 %shf = shufflevector <vscale x 2 x half> %ins, <vscale x 2 x half> undef, <vscale x 2 x i32> zeroinitializer 590 ret <vscale x 2 x half> %shf 591} 592 593define <vscale x 2 x half> @ld1rh_half_unpacked2_gep(ptr %valp) { 594; CHECK-LD1R-LABEL: ld1rh_half_unpacked2_gep: 595; CHECK-LD1R: // %bb.0: 596; CHECK-LD1R-NEXT: ptrue p0.d 597; CHECK-LD1R-NEXT: ld1rh { z0.d }, p0/z, [x0, #126] 598; CHECK-LD1R-NEXT: ret 599; 600; CHECK-NO-LD1R-LABEL: ld1rh_half_unpacked2_gep: 601; CHECK-NO-LD1R: // %bb.0: 602; CHECK-NO-LD1R-NEXT: ldr h0, [x0, #126] 603; CHECK-NO-LD1R-NEXT: mov z0.h, h0 604; CHECK-NO-LD1R-NEXT: ret 605 %valp2 = getelementptr half, ptr %valp, i32 63 606 %val = load half, ptr %valp2 607 %ins = insertelement <vscale x 2 x half> undef, half %val, i32 0 608 %shf = shufflevector <vscale x 2 x half> %ins, <vscale x 2 x half> undef, <vscale x 2 x i32> zeroinitializer 609 ret <vscale x 2 x half> %shf 610} 611 612define <vscale x 2 x half> @ld1rh_half_unpacked2_gep_out_of_range_up(ptr %valp) { 613; CHECK-LD1R-LABEL: ld1rh_half_unpacked2_gep_out_of_range_up: 614; CHECK-LD1R: // %bb.0: 615; CHECK-LD1R-NEXT: ptrue p0.d 616; CHECK-LD1R-NEXT: add x8, x0, #128 617; CHECK-LD1R-NEXT: ld1rh { z0.d }, p0/z, [x8] 618; CHECK-LD1R-NEXT: ret 619; 620; CHECK-NO-LD1R-LABEL: ld1rh_half_unpacked2_gep_out_of_range_up: 621; CHECK-NO-LD1R: // %bb.0: 622; CHECK-NO-LD1R-NEXT: ldr h0, [x0, #128] 623; CHECK-NO-LD1R-NEXT: mov z0.h, h0 624; CHECK-NO-LD1R-NEXT: ret 625 %valp2 = getelementptr half, ptr %valp, i32 64 626 %val = load half, ptr %valp2 627 %ins = insertelement <vscale x 2 x half> undef, half %val, i32 0 628 %shf = shufflevector <vscale x 2 x half> %ins, <vscale x 2 x half> undef, <vscale x 2 x i32> zeroinitializer 629 ret <vscale x 2 x half> %shf 630} 631 632define <vscale x 2 x half> @ld1rh_half_unpacked2_gep_out_of_range_down(ptr %valp) { 633; CHECK-LD1R-LABEL: ld1rh_half_unpacked2_gep_out_of_range_down: 634; CHECK-LD1R: // %bb.0: 635; CHECK-LD1R-NEXT: ptrue p0.d 636; CHECK-LD1R-NEXT: sub x8, x0, #2 637; CHECK-LD1R-NEXT: ld1rh { z0.d }, p0/z, [x8] 638; CHECK-LD1R-NEXT: ret 639; 640; CHECK-NO-LD1R-LABEL: ld1rh_half_unpacked2_gep_out_of_range_down: 641; CHECK-NO-LD1R: // %bb.0: 642; CHECK-NO-LD1R-NEXT: ldur h0, [x0, #-2] 643; CHECK-NO-LD1R-NEXT: mov z0.h, h0 644; CHECK-NO-LD1R-NEXT: ret 645 %valp2 = getelementptr half, ptr %valp, i32 -1 646 %val = load half, ptr %valp2 647 %ins = insertelement <vscale x 2 x half> undef, half %val, i32 0 648 %shf = shufflevector <vscale x 2 x half> %ins, <vscale x 2 x half> undef, <vscale x 2 x i32> zeroinitializer 649 ret <vscale x 2 x half> %shf 650} 651 652define <vscale x 4 x float> @ld1rw_float(ptr %valp) { 653; CHECK-LD1R-LABEL: ld1rw_float: 654; CHECK-LD1R: // %bb.0: 655; CHECK-LD1R-NEXT: ptrue p0.s 656; CHECK-LD1R-NEXT: ld1rw { z0.s }, p0/z, [x0] 657; CHECK-LD1R-NEXT: ret 658; 659; CHECK-NO-LD1R-LABEL: ld1rw_float: 660; CHECK-NO-LD1R: // %bb.0: 661; CHECK-NO-LD1R-NEXT: ldr s0, [x0] 662; CHECK-NO-LD1R-NEXT: mov z0.s, s0 663; CHECK-NO-LD1R-NEXT: ret 664 %val = load float, ptr %valp 665 %ins = insertelement <vscale x 4 x float> undef, float %val, i32 0 666 %shf = shufflevector <vscale x 4 x float> %ins, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer 667 ret <vscale x 4 x float> %shf 668} 669 670define <vscale x 4 x float> @ld1rw_float_gep(ptr %valp) { 671; CHECK-LD1R-LABEL: ld1rw_float_gep: 672; CHECK-LD1R: // %bb.0: 673; CHECK-LD1R-NEXT: ptrue p0.s 674; CHECK-LD1R-NEXT: ld1rw { z0.s }, p0/z, [x0, #252] 675; CHECK-LD1R-NEXT: ret 676; 677; CHECK-NO-LD1R-LABEL: ld1rw_float_gep: 678; CHECK-NO-LD1R: // %bb.0: 679; CHECK-NO-LD1R-NEXT: ldr s0, [x0, #252] 680; CHECK-NO-LD1R-NEXT: mov z0.s, s0 681; CHECK-NO-LD1R-NEXT: ret 682 %valp2 = getelementptr float, ptr %valp, i32 63 683 %val = load float, ptr %valp2 684 %ins = insertelement <vscale x 4 x float> undef, float %val, i32 0 685 %shf = shufflevector <vscale x 4 x float> %ins, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer 686 ret <vscale x 4 x float> %shf 687} 688 689define <vscale x 4 x float> @ld1rw_float_gep_out_of_range_up(ptr %valp) { 690; CHECK-LD1R-LABEL: ld1rw_float_gep_out_of_range_up: 691; CHECK-LD1R: // %bb.0: 692; CHECK-LD1R-NEXT: ptrue p0.s 693; CHECK-LD1R-NEXT: add x8, x0, #256 694; CHECK-LD1R-NEXT: ld1rw { z0.s }, p0/z, [x8] 695; CHECK-LD1R-NEXT: ret 696; 697; CHECK-NO-LD1R-LABEL: ld1rw_float_gep_out_of_range_up: 698; CHECK-NO-LD1R: // %bb.0: 699; CHECK-NO-LD1R-NEXT: ldr s0, [x0, #256] 700; CHECK-NO-LD1R-NEXT: mov z0.s, s0 701; CHECK-NO-LD1R-NEXT: ret 702 %valp2 = getelementptr float, ptr %valp, i32 64 703 %val = load float, ptr %valp2 704 %ins = insertelement <vscale x 4 x float> undef, float %val, i32 0 705 %shf = shufflevector <vscale x 4 x float> %ins, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer 706 ret <vscale x 4 x float> %shf 707} 708 709define <vscale x 4 x float> @ld1rw_float_gep_out_of_range_down(ptr %valp) { 710; CHECK-LD1R-LABEL: ld1rw_float_gep_out_of_range_down: 711; CHECK-LD1R: // %bb.0: 712; CHECK-LD1R-NEXT: ptrue p0.s 713; CHECK-LD1R-NEXT: sub x8, x0, #4 714; CHECK-LD1R-NEXT: ld1rw { z0.s }, p0/z, [x8] 715; CHECK-LD1R-NEXT: ret 716; 717; CHECK-NO-LD1R-LABEL: ld1rw_float_gep_out_of_range_down: 718; CHECK-NO-LD1R: // %bb.0: 719; CHECK-NO-LD1R-NEXT: ldur s0, [x0, #-4] 720; CHECK-NO-LD1R-NEXT: mov z0.s, s0 721; CHECK-NO-LD1R-NEXT: ret 722 %valp2 = getelementptr float, ptr %valp, i32 -1 723 %val = load float, ptr %valp2 724 %ins = insertelement <vscale x 4 x float> undef, float %val, i32 0 725 %shf = shufflevector <vscale x 4 x float> %ins, <vscale x 4 x float> undef, <vscale x 4 x i32> zeroinitializer 726 ret <vscale x 4 x float> %shf 727} 728 729define <vscale x 2 x float> @ld1rw_float_unpacked2(ptr %valp) { 730; CHECK-LD1R-LABEL: ld1rw_float_unpacked2: 731; CHECK-LD1R: // %bb.0: 732; CHECK-LD1R-NEXT: ptrue p0.d 733; CHECK-LD1R-NEXT: ld1rw { z0.d }, p0/z, [x0] 734; CHECK-LD1R-NEXT: ret 735; 736; CHECK-NO-LD1R-LABEL: ld1rw_float_unpacked2: 737; CHECK-NO-LD1R: // %bb.0: 738; CHECK-NO-LD1R-NEXT: ldr s0, [x0] 739; CHECK-NO-LD1R-NEXT: mov z0.s, s0 740; CHECK-NO-LD1R-NEXT: ret 741 %val = load float, ptr %valp 742 %ins = insertelement <vscale x 2 x float> undef, float %val, i32 0 743 %shf = shufflevector <vscale x 2 x float> %ins, <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer 744 ret <vscale x 2 x float> %shf 745} 746 747define <vscale x 2 x float> @ld1rw_float_unpacked2_gep(ptr %valp) { 748; CHECK-LD1R-LABEL: ld1rw_float_unpacked2_gep: 749; CHECK-LD1R: // %bb.0: 750; CHECK-LD1R-NEXT: ptrue p0.d 751; CHECK-LD1R-NEXT: ld1rw { z0.d }, p0/z, [x0, #252] 752; CHECK-LD1R-NEXT: ret 753; 754; CHECK-NO-LD1R-LABEL: ld1rw_float_unpacked2_gep: 755; CHECK-NO-LD1R: // %bb.0: 756; CHECK-NO-LD1R-NEXT: ldr s0, [x0, #252] 757; CHECK-NO-LD1R-NEXT: mov z0.s, s0 758; CHECK-NO-LD1R-NEXT: ret 759 %valp2 = getelementptr float, ptr %valp, i32 63 760 %val = load float, ptr %valp2 761 %ins = insertelement <vscale x 2 x float> undef, float %val, i32 0 762 %shf = shufflevector <vscale x 2 x float> %ins, <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer 763 ret <vscale x 2 x float> %shf 764} 765 766define <vscale x 2 x float> @ld1rw_float_unpacked2_gep_out_of_range_up(ptr %valp) { 767; CHECK-LD1R-LABEL: ld1rw_float_unpacked2_gep_out_of_range_up: 768; CHECK-LD1R: // %bb.0: 769; CHECK-LD1R-NEXT: ptrue p0.d 770; CHECK-LD1R-NEXT: add x8, x0, #256 771; CHECK-LD1R-NEXT: ld1rw { z0.d }, p0/z, [x8] 772; CHECK-LD1R-NEXT: ret 773; 774; CHECK-NO-LD1R-LABEL: ld1rw_float_unpacked2_gep_out_of_range_up: 775; CHECK-NO-LD1R: // %bb.0: 776; CHECK-NO-LD1R-NEXT: ldr s0, [x0, #256] 777; CHECK-NO-LD1R-NEXT: mov z0.s, s0 778; CHECK-NO-LD1R-NEXT: ret 779 %valp2 = getelementptr float, ptr %valp, i32 64 780 %val = load float, ptr %valp2 781 %ins = insertelement <vscale x 2 x float> undef, float %val, i32 0 782 %shf = shufflevector <vscale x 2 x float> %ins, <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer 783 ret <vscale x 2 x float> %shf 784} 785 786define <vscale x 2 x float> @ld1rw_float_unpacked2_gep_out_of_range_down(ptr %valp) { 787; CHECK-LD1R-LABEL: ld1rw_float_unpacked2_gep_out_of_range_down: 788; CHECK-LD1R: // %bb.0: 789; CHECK-LD1R-NEXT: ptrue p0.d 790; CHECK-LD1R-NEXT: sub x8, x0, #4 791; CHECK-LD1R-NEXT: ld1rw { z0.d }, p0/z, [x8] 792; CHECK-LD1R-NEXT: ret 793; 794; CHECK-NO-LD1R-LABEL: ld1rw_float_unpacked2_gep_out_of_range_down: 795; CHECK-NO-LD1R: // %bb.0: 796; CHECK-NO-LD1R-NEXT: ldur s0, [x0, #-4] 797; CHECK-NO-LD1R-NEXT: mov z0.s, s0 798; CHECK-NO-LD1R-NEXT: ret 799 %valp2 = getelementptr float, ptr %valp, i32 -1 800 %val = load float, ptr %valp2 801 %ins = insertelement <vscale x 2 x float> undef, float %val, i32 0 802 %shf = shufflevector <vscale x 2 x float> %ins, <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer 803 ret <vscale x 2 x float> %shf 804} 805 806define <vscale x 2 x double> @ld1rd_double(ptr %valp) { 807; CHECK-LD1R-LABEL: ld1rd_double: 808; CHECK-LD1R: // %bb.0: 809; CHECK-LD1R-NEXT: ptrue p0.d 810; CHECK-LD1R-NEXT: ld1rd { z0.d }, p0/z, [x0] 811; CHECK-LD1R-NEXT: ret 812; 813; CHECK-NO-LD1R-LABEL: ld1rd_double: 814; CHECK-NO-LD1R: // %bb.0: 815; CHECK-NO-LD1R-NEXT: ldr d0, [x0] 816; CHECK-NO-LD1R-NEXT: mov z0.d, d0 817; CHECK-NO-LD1R-NEXT: ret 818 %val = load double, ptr %valp 819 %ins = insertelement <vscale x 2 x double> undef, double %val, i32 0 820 %shf = shufflevector <vscale x 2 x double> %ins, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer 821 ret <vscale x 2 x double> %shf 822} 823 824define <vscale x 2 x double> @ld1rd_double_gep(ptr %valp) { 825; CHECK-LD1R-LABEL: ld1rd_double_gep: 826; CHECK-LD1R: // %bb.0: 827; CHECK-LD1R-NEXT: ptrue p0.d 828; CHECK-LD1R-NEXT: ld1rd { z0.d }, p0/z, [x0, #504] 829; CHECK-LD1R-NEXT: ret 830; 831; CHECK-NO-LD1R-LABEL: ld1rd_double_gep: 832; CHECK-NO-LD1R: // %bb.0: 833; CHECK-NO-LD1R-NEXT: ldr d0, [x0, #504] 834; CHECK-NO-LD1R-NEXT: mov z0.d, d0 835; CHECK-NO-LD1R-NEXT: ret 836 %valp2 = getelementptr double, ptr %valp, i32 63 837 %val = load double, ptr %valp2 838 %ins = insertelement <vscale x 2 x double> undef, double %val, i32 0 839 %shf = shufflevector <vscale x 2 x double> %ins, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer 840 ret <vscale x 2 x double> %shf 841} 842 843define <vscale x 2 x double> @ld1rd_double_gep_out_of_range_up(ptr %valp) { 844; CHECK-LD1R-LABEL: ld1rd_double_gep_out_of_range_up: 845; CHECK-LD1R: // %bb.0: 846; CHECK-LD1R-NEXT: ptrue p0.d 847; CHECK-LD1R-NEXT: add x8, x0, #512 848; CHECK-LD1R-NEXT: ld1rd { z0.d }, p0/z, [x8] 849; CHECK-LD1R-NEXT: ret 850; 851; CHECK-NO-LD1R-LABEL: ld1rd_double_gep_out_of_range_up: 852; CHECK-NO-LD1R: // %bb.0: 853; CHECK-NO-LD1R-NEXT: ldr d0, [x0, #512] 854; CHECK-NO-LD1R-NEXT: mov z0.d, d0 855; CHECK-NO-LD1R-NEXT: ret 856 %valp2 = getelementptr double, ptr %valp, i32 64 857 %val = load double, ptr %valp2 858 %ins = insertelement <vscale x 2 x double> undef, double %val, i32 0 859 %shf = shufflevector <vscale x 2 x double> %ins, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer 860 ret <vscale x 2 x double> %shf 861} 862 863define <vscale x 2 x double> @ld1rd_double_gep_out_of_range_down(ptr %valp) { 864; CHECK-LD1R-LABEL: ld1rd_double_gep_out_of_range_down: 865; CHECK-LD1R: // %bb.0: 866; CHECK-LD1R-NEXT: ptrue p0.d 867; CHECK-LD1R-NEXT: sub x8, x0, #8 868; CHECK-LD1R-NEXT: ld1rd { z0.d }, p0/z, [x8] 869; CHECK-LD1R-NEXT: ret 870; 871; CHECK-NO-LD1R-LABEL: ld1rd_double_gep_out_of_range_down: 872; CHECK-NO-LD1R: // %bb.0: 873; CHECK-NO-LD1R-NEXT: ldur d0, [x0, #-8] 874; CHECK-NO-LD1R-NEXT: mov z0.d, d0 875; CHECK-NO-LD1R-NEXT: ret 876 %valp2 = getelementptr double, ptr %valp, i32 -1 877 %val = load double, ptr %valp2 878 %ins = insertelement <vscale x 2 x double> undef, double %val, i32 0 879 %shf = shufflevector <vscale x 2 x double> %ins, <vscale x 2 x double> undef, <vscale x 2 x i32> zeroinitializer 880 ret <vscale x 2 x double> %shf 881} 882 883define <vscale x 2 x double> @dupq_ld1rqd_f64(ptr %a) { 884; CHECK-LABEL: dupq_ld1rqd_f64: 885; CHECK: // %bb.0: 886; CHECK-NEXT: ptrue p0.d 887; CHECK-NEXT: ld1rqd { z0.d }, p0/z, [x0] 888; CHECK-NEXT: ret 889 %1 = load <2 x double>, ptr %a 890 %2 = tail call fast <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v2f64(<vscale x 2 x double> undef, <2 x double> %1, i64 0) 891 %3 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.dupq.lane.nxv2f64(<vscale x 2 x double> %2, i64 0) 892 ret <vscale x 2 x double> %3 893} 894 895define <vscale x 4 x float> @dupq_ld1rqw_f32(ptr %a) { 896; CHECK-LABEL: dupq_ld1rqw_f32: 897; CHECK: // %bb.0: 898; CHECK-NEXT: ptrue p0.s 899; CHECK-NEXT: ld1rqw { z0.s }, p0/z, [x0] 900; CHECK-NEXT: ret 901 %1 = load <4 x float>, ptr %a 902 %2 = tail call fast <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float> undef, <4 x float> %1, i64 0) 903 %3 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.dupq.lane.nxv4f32(<vscale x 4 x float> %2, i64 0) 904 ret <vscale x 4 x float> %3 905} 906 907define <vscale x 8 x half> @dupq_ld1rqh_f16(ptr %a) { 908; CHECK-LABEL: dupq_ld1rqh_f16: 909; CHECK: // %bb.0: 910; CHECK-NEXT: ptrue p0.h 911; CHECK-NEXT: ld1rqh { z0.h }, p0/z, [x0] 912; CHECK-NEXT: ret 913 %1 = load <8 x half>, ptr %a 914 %2 = tail call fast <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v8f16(<vscale x 8 x half> undef, <8 x half> %1, i64 0) 915 %3 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.dupq.lane.nxv8f16(<vscale x 8 x half> %2, i64 0) 916 ret <vscale x 8 x half> %3 917} 918 919define <vscale x 8 x bfloat> @dupq_ld1rqh_bf16(ptr %a) #0 { 920; CHECK-LABEL: dupq_ld1rqh_bf16: 921; CHECK: // %bb.0: 922; CHECK-NEXT: ptrue p0.h 923; CHECK-NEXT: ld1rqh { z0.h }, p0/z, [x0] 924; CHECK-NEXT: ret 925 %1 = load <8 x bfloat>, ptr %a 926 %2 = tail call fast <vscale x 8 x bfloat> @llvm.vector.insert.nxv8bf16.v8bf16(<vscale x 8 x bfloat> undef, <8 x bfloat> %1, i64 0) 927 %3 = tail call fast <vscale x 8 x bfloat> @llvm.aarch64.sve.dupq.lane.nxv8bf16(<vscale x 8 x bfloat> %2, i64 0) 928 ret <vscale x 8 x bfloat> %3 929} 930 931define <vscale x 2 x i64> @dupq_ld1rqd_i64(ptr %a) #0 { 932; CHECK-LABEL: dupq_ld1rqd_i64: 933; CHECK: // %bb.0: 934; CHECK-NEXT: ptrue p0.d 935; CHECK-NEXT: ld1rqd { z0.d }, p0/z, [x0] 936; CHECK-NEXT: ret 937 %1 = load <2 x i64>, ptr %a 938 %2 = tail call <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64> undef, <2 x i64> %1, i64 0) 939 %3 = tail call <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64> %2, i64 0) 940 ret <vscale x 2 x i64> %3 941} 942 943define <vscale x 4 x i32> @dupq_ld1rqw_i32(ptr %a) #0 { 944; CHECK-LABEL: dupq_ld1rqw_i32: 945; CHECK: // %bb.0: 946; CHECK-NEXT: ptrue p0.s 947; CHECK-NEXT: ld1rqw { z0.s }, p0/z, [x0] 948; CHECK-NEXT: ret 949 %1 = load <4 x i32>, ptr %a 950 %2 = tail call <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32> undef, <4 x i32> %1, i64 0) 951 %3 = tail call <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32> %2, i64 0) 952 ret <vscale x 4 x i32> %3 953} 954 955define <vscale x 8 x i16> @dupq_ld1rqw_i16(ptr %a) #0 { 956; CHECK-LABEL: dupq_ld1rqw_i16: 957; CHECK: // %bb.0: 958; CHECK-NEXT: ptrue p0.h 959; CHECK-NEXT: ld1rqh { z0.h }, p0/z, [x0] 960; CHECK-NEXT: ret 961 %1 = load <8 x i16>, ptr %a 962 %2 = tail call <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16> undef, <8 x i16> %1, i64 0) 963 %3 = tail call <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16> %2, i64 0) 964 ret <vscale x 8 x i16> %3 965} 966 967define <vscale x 16 x i8> @dupq_ld1rqw_i8(ptr %a) #0 { 968; CHECK-LABEL: dupq_ld1rqw_i8: 969; CHECK: // %bb.0: 970; CHECK-NEXT: ptrue p0.b 971; CHECK-NEXT: ld1rqb { z0.b }, p0/z, [x0] 972; CHECK-NEXT: ret 973 %1 = load <16 x i8>, ptr %a 974 %2 = tail call <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8> undef, <16 x i8> %1, i64 0) 975 %3 = tail call <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8> %2, i64 0) 976 ret <vscale x 16 x i8> %3 977} 978 979; 980; 981; Tests for dup: 982; 983; Positive tests: 984; * dup with passthru=undef or passthrue=zero. 985; * sign/zero extending. 986; * unpacked types. 987; 988; Negative tests: 989; * dup with passthru as a parameter. 990; 991; 992 993define <vscale x 16 x i8> @dup_ld1rb_i8_passthruundef_nxv16i8(<vscale x 16 x i1> %pg, ptr %addr) { 994; CHECK-LABEL: dup_ld1rb_i8_passthruundef_nxv16i8: 995; CHECK: // %bb.0: 996; CHECK-NEXT: ld1rb { z0.b }, p0/z, [x0] 997; CHECK-NEXT: ret 998 %ld = load i8, ptr %addr 999 %res = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.nxv16i8(<vscale x 16 x i8> undef, <vscale x 16 x i1> %pg, i8 %ld) 1000 ret <vscale x 16 x i8> %res 1001} 1002define <vscale x 8 x i16> @dup_ld1rh_i16_passthruundef_nxv8i16(<vscale x 8 x i1> %pg, ptr %addr) { 1003; CHECK-LABEL: dup_ld1rh_i16_passthruundef_nxv8i16: 1004; CHECK: // %bb.0: 1005; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x0] 1006; CHECK-NEXT: ret 1007 %ld = load i16, ptr %addr 1008 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, i16 %ld) 1009 ret <vscale x 8 x i16> %res 1010} 1011define <vscale x 8 x i16> @dup_ld1rh_i8_passthruundef_nxv8i16_sext(<vscale x 8 x i1> %pg, ptr %addr) { 1012; CHECK-LABEL: dup_ld1rh_i8_passthruundef_nxv8i16_sext: 1013; CHECK: // %bb.0: 1014; CHECK-NEXT: ld1rsb { z0.h }, p0/z, [x0] 1015; CHECK-NEXT: ret 1016 %ld = load i8, ptr %addr 1017 %ext = sext i8 %ld to i16 1018 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, i16 %ext) 1019 ret <vscale x 8 x i16> %res 1020} 1021define <vscale x 8 x i16> @dup_ld1rh_i8_passthruundef_nxv8i16_zext(<vscale x 8 x i1> %pg, ptr %addr) { 1022; CHECK-LABEL: dup_ld1rh_i8_passthruundef_nxv8i16_zext: 1023; CHECK: // %bb.0: 1024; CHECK-NEXT: ld1rb { z0.h }, p0/z, [x0] 1025; CHECK-NEXT: ret 1026 %ld = load i8, ptr %addr 1027 %ext = zext i8 %ld to i16 1028 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.nxv8i16(<vscale x 8 x i16> undef, <vscale x 8 x i1> %pg, i16 %ext) 1029 ret <vscale x 8 x i16> %res 1030} 1031define <vscale x 4 x i32> @dup_ld1rs_i32_passthruundef_nxv4i32(<vscale x 4 x i1> %pg, ptr %addr) { 1032; CHECK-LABEL: dup_ld1rs_i32_passthruundef_nxv4i32: 1033; CHECK: // %bb.0: 1034; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x0] 1035; CHECK-NEXT: ret 1036 %ld = load i32, ptr %addr 1037 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, i32 %ld) 1038 ret <vscale x 4 x i32> %res 1039} 1040define <vscale x 4 x i32> @dup_ld1rs_i8_passthruundef_nxv4i32_sext(<vscale x 4 x i1> %pg, ptr %addr) { 1041; CHECK-LABEL: dup_ld1rs_i8_passthruundef_nxv4i32_sext: 1042; CHECK: // %bb.0: 1043; CHECK-NEXT: ld1rsb { z0.s }, p0/z, [x0] 1044; CHECK-NEXT: ret 1045 %ld = load i8, ptr %addr 1046 %ext = sext i8 %ld to i32 1047 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, i32 %ext) 1048 ret <vscale x 4 x i32> %res 1049} 1050define <vscale x 4 x i32> @dup_ld1rs_i8_passthruundef_nxv4i32_zext(<vscale x 4 x i1> %pg, ptr %addr) { 1051; CHECK-LABEL: dup_ld1rs_i8_passthruundef_nxv4i32_zext: 1052; CHECK: // %bb.0: 1053; CHECK-NEXT: ld1rb { z0.s }, p0/z, [x0] 1054; CHECK-NEXT: ret 1055 %ld = load i8, ptr %addr 1056 %ext = zext i8 %ld to i32 1057 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, i32 %ext) 1058 ret <vscale x 4 x i32> %res 1059} 1060define <vscale x 4 x i32> @dup_ld1rs_i16_passthruundef_nxv4i32_sext(<vscale x 4 x i1> %pg, ptr %addr) { 1061; CHECK-LABEL: dup_ld1rs_i16_passthruundef_nxv4i32_sext: 1062; CHECK: // %bb.0: 1063; CHECK-NEXT: ld1rsh { z0.s }, p0/z, [x0] 1064; CHECK-NEXT: ret 1065 %ld = load i16, ptr %addr 1066 %ext = sext i16 %ld to i32 1067 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, i32 %ext) 1068 ret <vscale x 4 x i32> %res 1069} 1070define <vscale x 4 x i32> @dup_ld1rs_i16_passthruundef_nxv4i32_zext(<vscale x 4 x i1> %pg, ptr %addr) { 1071; CHECK-LABEL: dup_ld1rs_i16_passthruundef_nxv4i32_zext: 1072; CHECK: // %bb.0: 1073; CHECK-NEXT: ld1rh { z0.s }, p0/z, [x0] 1074; CHECK-NEXT: ret 1075 %ld = load i16, ptr %addr 1076 %ext = zext i16 %ld to i32 1077 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.nxv4i32(<vscale x 4 x i32> undef, <vscale x 4 x i1> %pg, i32 %ext) 1078 ret <vscale x 4 x i32> %res 1079} 1080define <vscale x 2 x i64> @dup_ld1rd_i64_passthruundef_nxv2i64(<vscale x 2 x i1> %pg, ptr %addr) { 1081; CHECK-LABEL: dup_ld1rd_i64_passthruundef_nxv2i64: 1082; CHECK: // %bb.0: 1083; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x0] 1084; CHECK-NEXT: ret 1085 %ld = load i64, ptr %addr 1086 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, i64 %ld) 1087 ret <vscale x 2 x i64> %res 1088} 1089define <vscale x 2 x i64> @dup_ld1rs_i8_passthruundef_nxv2i64_sext(<vscale x 2 x i1> %pg, ptr %addr) { 1090; CHECK-LABEL: dup_ld1rs_i8_passthruundef_nxv2i64_sext: 1091; CHECK: // %bb.0: 1092; CHECK-NEXT: ld1rsb { z0.d }, p0/z, [x0] 1093; CHECK-NEXT: ret 1094 %ld = load i8, ptr %addr 1095 %ext = sext i8 %ld to i64 1096 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, i64 %ext) 1097 ret <vscale x 2 x i64> %res 1098} 1099define <vscale x 2 x i64> @dup_ld1rs_i8_passthruundef_nxv2i64_zext(<vscale x 2 x i1> %pg, ptr %addr) { 1100; CHECK-LABEL: dup_ld1rs_i8_passthruundef_nxv2i64_zext: 1101; CHECK: // %bb.0: 1102; CHECK-NEXT: ld1rb { z0.d }, p0/z, [x0] 1103; CHECK-NEXT: ret 1104 %ld = load i8, ptr %addr 1105 %ext = zext i8 %ld to i64 1106 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, i64 %ext) 1107 ret <vscale x 2 x i64> %res 1108} 1109define <vscale x 2 x i64> @dup_ld1rs_i16_passthruundef_nxv2i64_sext(<vscale x 2 x i1> %pg, ptr %addr) { 1110; CHECK-LABEL: dup_ld1rs_i16_passthruundef_nxv2i64_sext: 1111; CHECK: // %bb.0: 1112; CHECK-NEXT: ld1rsh { z0.d }, p0/z, [x0] 1113; CHECK-NEXT: ret 1114 %ld = load i16, ptr %addr 1115 %ext = sext i16 %ld to i64 1116 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, i64 %ext) 1117 ret <vscale x 2 x i64> %res 1118} 1119define <vscale x 2 x i64> @dup_ld1rs_i16_passthruundef_nxv2i64_zext(<vscale x 2 x i1> %pg, ptr %addr) { 1120; CHECK-LABEL: dup_ld1rs_i16_passthruundef_nxv2i64_zext: 1121; CHECK: // %bb.0: 1122; CHECK-NEXT: ld1rh { z0.d }, p0/z, [x0] 1123; CHECK-NEXT: ret 1124 %ld = load i16, ptr %addr 1125 %ext = zext i16 %ld to i64 1126 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, i64 %ext) 1127 ret <vscale x 2 x i64> %res 1128} 1129define <vscale x 2 x i64> @dup_ld1rs_i32_passthruundef_nxv2i64_sext(<vscale x 2 x i1> %pg, ptr %addr) { 1130; CHECK-LABEL: dup_ld1rs_i32_passthruundef_nxv2i64_sext: 1131; CHECK: // %bb.0: 1132; CHECK-NEXT: ld1rsw { z0.d }, p0/z, [x0] 1133; CHECK-NEXT: ret 1134 %ld = load i32, ptr %addr 1135 %ext = sext i32 %ld to i64 1136 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, i64 %ext) 1137 ret <vscale x 2 x i64> %res 1138} 1139define <vscale x 2 x i64> @dup_ld1rs_i32_passthruundef_nxv2i64_zext(<vscale x 2 x i1> %pg, ptr %addr) { 1140; CHECK-LABEL: dup_ld1rs_i32_passthruundef_nxv2i64_zext: 1141; CHECK: // %bb.0: 1142; CHECK-NEXT: ld1rw { z0.d }, p0/z, [x0] 1143; CHECK-NEXT: ret 1144 %ld = load i32, ptr %addr 1145 %ext = zext i32 %ld to i64 1146 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, i64 %ext) 1147 ret <vscale x 2 x i64> %res 1148} 1149define <vscale x 8 x half> @dup_ld1rh_half_passthruundef_nxv8f16(<vscale x 8 x i1> %pg, ptr %addr) { 1150; CHECK-LD1R-LABEL: dup_ld1rh_half_passthruundef_nxv8f16: 1151; CHECK-LD1R: // %bb.0: 1152; CHECK-LD1R-NEXT: ld1rh { z0.h }, p0/z, [x0] 1153; CHECK-LD1R-NEXT: ret 1154; 1155; CHECK-NO-LD1R-LABEL: dup_ld1rh_half_passthruundef_nxv8f16: 1156; CHECK-NO-LD1R: // %bb.0: 1157; CHECK-NO-LD1R-NEXT: ldr h0, [x0] 1158; CHECK-NO-LD1R-NEXT: mov z0.h, p0/m, h0 1159; CHECK-NO-LD1R-NEXT: ret 1160 %ld = load half, ptr %addr 1161 %res = call <vscale x 8 x half> @llvm.aarch64.sve.dup.nxv8f16(<vscale x 8 x half> undef, <vscale x 8 x i1> %pg, half %ld) 1162 ret <vscale x 8 x half> %res 1163} 1164define <vscale x 4 x float> @dup_ld1rs_float_passthruundef_nxv4f32(<vscale x 4 x i1> %pg, ptr %addr) { 1165; CHECK-LD1R-LABEL: dup_ld1rs_float_passthruundef_nxv4f32: 1166; CHECK-LD1R: // %bb.0: 1167; CHECK-LD1R-NEXT: ld1rw { z0.s }, p0/z, [x0] 1168; CHECK-LD1R-NEXT: ret 1169; 1170; CHECK-NO-LD1R-LABEL: dup_ld1rs_float_passthruundef_nxv4f32: 1171; CHECK-NO-LD1R: // %bb.0: 1172; CHECK-NO-LD1R-NEXT: ldr s0, [x0] 1173; CHECK-NO-LD1R-NEXT: mov z0.s, p0/m, s0 1174; CHECK-NO-LD1R-NEXT: ret 1175 %ld = load float, ptr %addr 1176 %res = call <vscale x 4 x float> @llvm.aarch64.sve.dup.nxv4f32(<vscale x 4 x float> undef, <vscale x 4 x i1> %pg, float %ld) 1177 ret <vscale x 4 x float> %res 1178} 1179define <vscale x 2 x double> @dup_ld1rd_double_passthruundef_nxv2f64(<vscale x 2 x i1> %pg, ptr %addr) { 1180; CHECK-LD1R-LABEL: dup_ld1rd_double_passthruundef_nxv2f64: 1181; CHECK-LD1R: // %bb.0: 1182; CHECK-LD1R-NEXT: ld1rd { z0.d }, p0/z, [x0] 1183; CHECK-LD1R-NEXT: ret 1184; 1185; CHECK-NO-LD1R-LABEL: dup_ld1rd_double_passthruundef_nxv2f64: 1186; CHECK-NO-LD1R: // %bb.0: 1187; CHECK-NO-LD1R-NEXT: ldr d0, [x0] 1188; CHECK-NO-LD1R-NEXT: mov z0.d, p0/m, d0 1189; CHECK-NO-LD1R-NEXT: ret 1190 %ld = load double, ptr %addr 1191 %res = call <vscale x 2 x double> @llvm.aarch64.sve.dup.nxv2f64(<vscale x 2 x double> undef, <vscale x 2 x i1> %pg, double %ld) 1192 ret <vscale x 2 x double> %res 1193} 1194define <vscale x 4 x half> @dup_ld1rh_half_passthruundef_nxv4f16(<vscale x 4 x i1> %pg, ptr %addr) { 1195; CHECK-LD1R-LABEL: dup_ld1rh_half_passthruundef_nxv4f16: 1196; CHECK-LD1R: // %bb.0: 1197; CHECK-LD1R-NEXT: ld1rh { z0.s }, p0/z, [x0] 1198; CHECK-LD1R-NEXT: ret 1199; 1200; CHECK-NO-LD1R-LABEL: dup_ld1rh_half_passthruundef_nxv4f16: 1201; CHECK-NO-LD1R: // %bb.0: 1202; CHECK-NO-LD1R-NEXT: ldr h0, [x0] 1203; CHECK-NO-LD1R-NEXT: mov z0.h, p0/m, h0 1204; CHECK-NO-LD1R-NEXT: ret 1205 %ld = load half, ptr %addr 1206 %res = call <vscale x 4 x half> @llvm.aarch64.sve.dup.nxv4f16(<vscale x 4 x half> undef, <vscale x 4 x i1> %pg, half %ld) 1207 ret <vscale x 4 x half> %res 1208} 1209define <vscale x 16 x i8> @dup_ld1rb_i8_passthruzero_nxv16i8(<vscale x 16 x i1> %pg, ptr %addr) { 1210; CHECK-LABEL: dup_ld1rb_i8_passthruzero_nxv16i8: 1211; CHECK: // %bb.0: 1212; CHECK-NEXT: ld1rb { z0.b }, p0/z, [x0] 1213; CHECK-NEXT: ret 1214 %ld = load i8, ptr %addr 1215 %res = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.nxv16i8(<vscale x 16 x i8> zeroinitializer, <vscale x 16 x i1> %pg, i8 %ld) 1216 ret <vscale x 16 x i8> %res 1217} 1218define <vscale x 8 x i16> @dup_ld1rh_i16_passthruzero_nxv8i16(<vscale x 8 x i1> %pg, ptr %addr) { 1219; CHECK-LABEL: dup_ld1rh_i16_passthruzero_nxv8i16: 1220; CHECK: // %bb.0: 1221; CHECK-NEXT: ld1rh { z0.h }, p0/z, [x0] 1222; CHECK-NEXT: ret 1223 %ld = load i16, ptr %addr 1224 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.nxv8i16(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x i1> %pg, i16 %ld) 1225 ret <vscale x 8 x i16> %res 1226} 1227define <vscale x 4 x i32> @dup_ld1rs_i32_passthruzero_nxv4i32(<vscale x 4 x i1> %pg, ptr %addr) { 1228; CHECK-LABEL: dup_ld1rs_i32_passthruzero_nxv4i32: 1229; CHECK: // %bb.0: 1230; CHECK-NEXT: ld1rw { z0.s }, p0/z, [x0] 1231; CHECK-NEXT: ret 1232 %ld = load i32, ptr %addr 1233 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.nxv4i32(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x i1> %pg, i32 %ld) 1234 ret <vscale x 4 x i32> %res 1235} 1236define <vscale x 2 x i64> @dup_ld1rd_i64_passthruzero_nxv2i64(<vscale x 2 x i1> %pg, ptr %addr) { 1237; CHECK-LABEL: dup_ld1rd_i64_passthruzero_nxv2i64: 1238; CHECK: // %bb.0: 1239; CHECK-NEXT: ld1rd { z0.d }, p0/z, [x0] 1240; CHECK-NEXT: ret 1241 %ld = load i64, ptr %addr 1242 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> %pg, i64 %ld) 1243 ret <vscale x 2 x i64> %res 1244} 1245define <vscale x 8 x half> @dup_ld1rh_half_passthruzero_nxv8f16(<vscale x 8 x i1> %pg, ptr %addr) { 1246; CHECK-LD1R-LABEL: dup_ld1rh_half_passthruzero_nxv8f16: 1247; CHECK-LD1R: // %bb.0: 1248; CHECK-LD1R-NEXT: ld1rh { z0.h }, p0/z, [x0] 1249; CHECK-LD1R-NEXT: ret 1250; 1251; CHECK-NO-LD1R-LABEL: dup_ld1rh_half_passthruzero_nxv8f16: 1252; CHECK-NO-LD1R: // %bb.0: 1253; CHECK-NO-LD1R-NEXT: mov z0.h, #0 // =0x0 1254; CHECK-NO-LD1R-NEXT: ldr h1, [x0] 1255; CHECK-NO-LD1R-NEXT: mov z0.h, p0/m, h1 1256; CHECK-NO-LD1R-NEXT: ret 1257 %ld = load half, ptr %addr 1258 %res = call <vscale x 8 x half> @llvm.aarch64.sve.dup.nxv8f16(<vscale x 8 x half> zeroinitializer, <vscale x 8 x i1> %pg, half %ld) 1259 ret <vscale x 8 x half> %res 1260} 1261define <vscale x 4 x float> @dup_ld1rs_float_passthruzero_nxv4f32(<vscale x 4 x i1> %pg, ptr %addr) { 1262; CHECK-LD1R-LABEL: dup_ld1rs_float_passthruzero_nxv4f32: 1263; CHECK-LD1R: // %bb.0: 1264; CHECK-LD1R-NEXT: ld1rw { z0.s }, p0/z, [x0] 1265; CHECK-LD1R-NEXT: ret 1266; 1267; CHECK-NO-LD1R-LABEL: dup_ld1rs_float_passthruzero_nxv4f32: 1268; CHECK-NO-LD1R: // %bb.0: 1269; CHECK-NO-LD1R-NEXT: mov z0.s, #0 // =0x0 1270; CHECK-NO-LD1R-NEXT: ldr s1, [x0] 1271; CHECK-NO-LD1R-NEXT: mov z0.s, p0/m, s1 1272; CHECK-NO-LD1R-NEXT: ret 1273 %ld = load float, ptr %addr 1274 %res = call <vscale x 4 x float> @llvm.aarch64.sve.dup.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x i1> %pg, float %ld) 1275 ret <vscale x 4 x float> %res 1276} 1277define <vscale x 2 x double> @dup_ld1rd_double_passthruzero_nxv2f64(<vscale x 2 x i1> %pg, ptr %addr) { 1278; CHECK-LD1R-LABEL: dup_ld1rd_double_passthruzero_nxv2f64: 1279; CHECK-LD1R: // %bb.0: 1280; CHECK-LD1R-NEXT: ld1rd { z0.d }, p0/z, [x0] 1281; CHECK-LD1R-NEXT: ret 1282; 1283; CHECK-NO-LD1R-LABEL: dup_ld1rd_double_passthruzero_nxv2f64: 1284; CHECK-NO-LD1R: // %bb.0: 1285; CHECK-NO-LD1R-NEXT: mov z0.d, #0 // =0x0 1286; CHECK-NO-LD1R-NEXT: ldr d1, [x0] 1287; CHECK-NO-LD1R-NEXT: mov z0.d, p0/m, d1 1288; CHECK-NO-LD1R-NEXT: ret 1289 %ld = load double, ptr %addr 1290 %res = call <vscale x 2 x double> @llvm.aarch64.sve.dup.nxv2f64(<vscale x 2 x double> zeroinitializer, <vscale x 2 x i1> %pg, double %ld) 1291 ret <vscale x 2 x double> %res 1292} 1293define <vscale x 4 x half> @dup_ld1rh_half_passthruzero_nxv4f16(<vscale x 4 x i1> %pg, ptr %addr) { 1294; CHECK-LD1R-LABEL: dup_ld1rh_half_passthruzero_nxv4f16: 1295; CHECK-LD1R: // %bb.0: 1296; CHECK-LD1R-NEXT: ld1rh { z0.s }, p0/z, [x0] 1297; CHECK-LD1R-NEXT: ret 1298; 1299; CHECK-NO-LD1R-LABEL: dup_ld1rh_half_passthruzero_nxv4f16: 1300; CHECK-NO-LD1R: // %bb.0: 1301; CHECK-NO-LD1R-NEXT: mov z0.h, #0 // =0x0 1302; CHECK-NO-LD1R-NEXT: ldr h1, [x0] 1303; CHECK-NO-LD1R-NEXT: mov z0.h, p0/m, h1 1304; CHECK-NO-LD1R-NEXT: ret 1305 %ld = load half, ptr %addr 1306 %res = call <vscale x 4 x half> @llvm.aarch64.sve.dup.nxv4f16(<vscale x 4 x half> zeroinitializer, <vscale x 4 x i1> %pg, half %ld) 1307 ret <vscale x 4 x half> %res 1308} 1309define <vscale x 2 x half> @dup_ld1rh_half_passthruzero_nxv2f16(<vscale x 2 x i1> %pg, ptr %addr) { 1310; CHECK-LD1R-LABEL: dup_ld1rh_half_passthruzero_nxv2f16: 1311; CHECK-LD1R: // %bb.0: 1312; CHECK-LD1R-NEXT: ld1rh { z0.d }, p0/z, [x0] 1313; CHECK-LD1R-NEXT: ret 1314; 1315; CHECK-NO-LD1R-LABEL: dup_ld1rh_half_passthruzero_nxv2f16: 1316; CHECK-NO-LD1R: // %bb.0: 1317; CHECK-NO-LD1R-NEXT: mov z0.h, #0 // =0x0 1318; CHECK-NO-LD1R-NEXT: ldr h1, [x0] 1319; CHECK-NO-LD1R-NEXT: mov z0.h, p0/m, h1 1320; CHECK-NO-LD1R-NEXT: ret 1321 %ld = load half, ptr %addr 1322 %res = call <vscale x 2 x half> @llvm.aarch64.sve.dup.nxv2f16(<vscale x 2 x half> zeroinitializer, <vscale x 2 x i1> %pg, half %ld) 1323 ret <vscale x 2 x half> %res 1324} 1325define <vscale x 2 x float> @dup_ld1rs_float_passthruzero_nxv2f32(<vscale x 2 x i1> %pg, ptr %addr) { 1326; CHECK-LD1R-LABEL: dup_ld1rs_float_passthruzero_nxv2f32: 1327; CHECK-LD1R: // %bb.0: 1328; CHECK-LD1R-NEXT: ld1rw { z0.d }, p0/z, [x0] 1329; CHECK-LD1R-NEXT: ret 1330; 1331; CHECK-NO-LD1R-LABEL: dup_ld1rs_float_passthruzero_nxv2f32: 1332; CHECK-NO-LD1R: // %bb.0: 1333; CHECK-NO-LD1R-NEXT: mov z0.s, #0 // =0x0 1334; CHECK-NO-LD1R-NEXT: ldr s1, [x0] 1335; CHECK-NO-LD1R-NEXT: mov z0.s, p0/m, s1 1336; CHECK-NO-LD1R-NEXT: ret 1337 %ld = load float, ptr %addr 1338 %res = call <vscale x 2 x float> @llvm.aarch64.sve.dup.nxv2f32(<vscale x 2 x float> zeroinitializer, <vscale x 2 x i1> %pg, float %ld) 1339 ret <vscale x 2 x float> %res 1340} 1341define <vscale x 16 x i8> @negtest_dup_ld1rb_i8_passthru_nxv16i8(<vscale x 16 x i8> %pt, <vscale x 16 x i1> %pg, ptr %addr) { 1342; CHECK-LABEL: negtest_dup_ld1rb_i8_passthru_nxv16i8: 1343; CHECK: // %bb.0: 1344; CHECK-NEXT: ldrb w8, [x0] 1345; CHECK-NEXT: mov z0.b, p0/m, w8 1346; CHECK-NEXT: ret 1347 %ld = load i8, ptr %addr 1348 %res = call <vscale x 16 x i8> @llvm.aarch64.sve.dup.nxv16i8(<vscale x 16 x i8> %pt, <vscale x 16 x i1> %pg, i8 %ld) 1349 ret <vscale x 16 x i8> %res 1350} 1351define <vscale x 8 x i16> @negtest_dup_ld1rh_i16_passthru_nxv8i16(<vscale x 8 x i16> %pt, <vscale x 8 x i1> %pg, ptr %addr) { 1352; CHECK-LABEL: negtest_dup_ld1rh_i16_passthru_nxv8i16: 1353; CHECK: // %bb.0: 1354; CHECK-NEXT: ldrh w8, [x0] 1355; CHECK-NEXT: mov z0.h, p0/m, w8 1356; CHECK-NEXT: ret 1357 %ld = load i16, ptr %addr 1358 %res = call <vscale x 8 x i16> @llvm.aarch64.sve.dup.nxv8i16(<vscale x 8 x i16> %pt, <vscale x 8 x i1> %pg, i16 %ld) 1359 ret <vscale x 8 x i16> %res 1360} 1361define <vscale x 4 x i32> @negtest_dup_ld1rs_i32_passthru_nxv4i32(<vscale x 4 x i32> %pt, <vscale x 4 x i1> %pg, ptr %addr) { 1362; CHECK-LABEL: negtest_dup_ld1rs_i32_passthru_nxv4i32: 1363; CHECK: // %bb.0: 1364; CHECK-NEXT: ldr w8, [x0] 1365; CHECK-NEXT: mov z0.s, p0/m, w8 1366; CHECK-NEXT: ret 1367 %ld = load i32, ptr %addr 1368 %res = call <vscale x 4 x i32> @llvm.aarch64.sve.dup.nxv4i32(<vscale x 4 x i32> %pt, <vscale x 4 x i1> %pg, i32 %ld) 1369 ret <vscale x 4 x i32> %res 1370} 1371define <vscale x 2 x i64> @negtest_dup_ld1rd_i64_passthru_nxv2i64(<vscale x 2 x i64> %pt, <vscale x 2 x i1> %pg, ptr %addr) { 1372; CHECK-LABEL: negtest_dup_ld1rd_i64_passthru_nxv2i64: 1373; CHECK: // %bb.0: 1374; CHECK-NEXT: ldr x8, [x0] 1375; CHECK-NEXT: mov z0.d, p0/m, x8 1376; CHECK-NEXT: ret 1377 %ld = load i64, ptr %addr 1378 %res = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.nxv2i64(<vscale x 2 x i64> %pt, <vscale x 2 x i1> %pg, i64 %ld) 1379 ret <vscale x 2 x i64> %res 1380} 1381define <vscale x 8 x half> @negtest_dup_ld1rh_half_passthru_nxv8f16(<vscale x 8 x half> %pt, <vscale x 8 x i1> %pg, ptr %addr) { 1382; CHECK-LABEL: negtest_dup_ld1rh_half_passthru_nxv8f16: 1383; CHECK: // %bb.0: 1384; CHECK-NEXT: ldr h1, [x0] 1385; CHECK-NEXT: mov z0.h, p0/m, h1 1386; CHECK-NEXT: ret 1387 %ld = load half, ptr %addr 1388 %res = call <vscale x 8 x half> @llvm.aarch64.sve.dup.nxv8f16(<vscale x 8 x half> %pt, <vscale x 8 x i1> %pg, half %ld) 1389 ret <vscale x 8 x half> %res 1390} 1391define <vscale x 4 x float> @negtest_dup_ld1rs_float_passthru_nxv4f32(<vscale x 4 x float> %pt, <vscale x 4 x i1> %pg, ptr %addr) { 1392; CHECK-LABEL: negtest_dup_ld1rs_float_passthru_nxv4f32: 1393; CHECK: // %bb.0: 1394; CHECK-NEXT: ldr s1, [x0] 1395; CHECK-NEXT: mov z0.s, p0/m, s1 1396; CHECK-NEXT: ret 1397 %ld = load float, ptr %addr 1398 %res = call <vscale x 4 x float> @llvm.aarch64.sve.dup.nxv4f32(<vscale x 4 x float> %pt, <vscale x 4 x i1> %pg, float %ld) 1399 ret <vscale x 4 x float> %res 1400} 1401define <vscale x 2 x double> @negtest_dup_ld1rd_double_passthru_nxv2f64(<vscale x 2 x double> %pt, <vscale x 2 x i1> %pg, ptr %addr) { 1402; CHECK-LABEL: negtest_dup_ld1rd_double_passthru_nxv2f64: 1403; CHECK: // %bb.0: 1404; CHECK-NEXT: ldr d1, [x0] 1405; CHECK-NEXT: mov z0.d, p0/m, d1 1406; CHECK-NEXT: ret 1407 %ld = load double, ptr %addr 1408 %res = call <vscale x 2 x double> @llvm.aarch64.sve.dup.nxv2f64(<vscale x 2 x double> %pt, <vscale x 2 x i1> %pg, double %ld) 1409 ret <vscale x 2 x double> %res 1410} 1411 1412 1413; Check that a load consumed by a scalable splat prefers a replicating load. 1414define ptr @avoid_preindex_load(ptr %src, ptr %out) { 1415; CHECK-LABEL: avoid_preindex_load: 1416; CHECK: // %bb.0: 1417; CHECK-NEXT: ptrue p0.d 1418; CHECK-NEXT: ld1rsb { z0.d }, p0/z, [x0, #1] 1419; CHECK-NEXT: add x0, x0, #1 1420; CHECK-NEXT: st1d { z0.d }, p0, [x1] 1421; CHECK-NEXT: ret 1422 %ptr = getelementptr inbounds i8, ptr %src, i64 1 1423 %tmp = load i8, ptr %ptr, align 4 1424 %ext = sext i8 %tmp to i64 1425 %ins = insertelement <vscale x 2 x i64> undef, i64 %ext, i32 0 1426 %dup = shufflevector <vscale x 2 x i64> %ins, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer 1427 store <vscale x 2 x i64> %dup, ptr %out 1428 ret ptr %ptr 1429} 1430 1431; Check that a load consumed by a scalable splat prefers a replicating 1432; load over a pre-indexed load. 1433define ptr @avoid_preindex_load_dup(ptr %src, <vscale x 2 x i1> %pg, ptr %out) { 1434; CHECK-LABEL: avoid_preindex_load_dup: 1435; CHECK: // %bb.0: 1436; CHECK-NEXT: ld1rsb { z0.d }, p0/z, [x0, #1] 1437; CHECK-NEXT: ptrue p0.d 1438; CHECK-NEXT: add x0, x0, #1 1439; CHECK-NEXT: st1d { z0.d }, p0, [x1] 1440; CHECK-NEXT: ret 1441 %ptr = getelementptr inbounds i8, ptr %src, i64 1 1442 %tmp = load i8, ptr %ptr, align 4 1443 %ext = sext i8 %tmp to i64 1444 %dup = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.nxv2i64(<vscale x 2 x i64> undef, <vscale x 2 x i1> %pg, i64 %ext) 1445 store <vscale x 2 x i64> %dup, ptr %out 1446 ret ptr %ptr 1447} 1448 1449; Same as avoid_preindex_load_dup, but with zero passthru. 1450define ptr @avoid_preindex_load_dup_passthru_zero(ptr %src, <vscale x 2 x i1> %pg, ptr %out) { 1451; CHECK-LABEL: avoid_preindex_load_dup_passthru_zero: 1452; CHECK: // %bb.0: 1453; CHECK-NEXT: ld1rsb { z0.d }, p0/z, [x0, #1] 1454; CHECK-NEXT: ptrue p0.d 1455; CHECK-NEXT: add x0, x0, #1 1456; CHECK-NEXT: st1d { z0.d }, p0, [x1] 1457; CHECK-NEXT: ret 1458 %ptr = getelementptr inbounds i8, ptr %src, i64 1 1459 %tmp = load i8, ptr %ptr, align 4 1460 %ext = sext i8 %tmp to i64 1461 %dup = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.nxv2i64(<vscale x 2 x i64> zeroinitializer, <vscale x 2 x i1> %pg, i64 %ext) 1462 store <vscale x 2 x i64> %dup, ptr %out 1463 ret ptr %ptr 1464} 1465 1466; If a dup has a non-undef passthru, stick with the pre-indexed load. 1467define ptr @preindex_load_dup_passthru(<vscale x 2 x i64> %passthru, ptr %src, <vscale x 2 x i1> %pg, ptr %out) { 1468; CHECK-LABEL: preindex_load_dup_passthru: 1469; CHECK: // %bb.0: 1470; CHECK-NEXT: ldrsb x8, [x0, #1]! 1471; CHECK-NEXT: mov z0.d, p0/m, x8 1472; CHECK-NEXT: ptrue p0.d 1473; CHECK-NEXT: st1d { z0.d }, p0, [x1] 1474; CHECK-NEXT: ret 1475 %ptr = getelementptr inbounds i8, ptr %src, i64 1 1476 %tmp = load i8, ptr %ptr, align 4 1477 %ext = sext i8 %tmp to i64 1478 %dup = call <vscale x 2 x i64> @llvm.aarch64.sve.dup.nxv2i64(<vscale x 2 x i64> %passthru, <vscale x 2 x i1> %pg, i64 %ext) 1479 store <vscale x 2 x i64> %dup, ptr %out 1480 ret ptr %ptr 1481} 1482 1483; Show that a second user of the load prevents the replicating load 1484; check which would ordinarily inhibit indexed loads from firing. 1485define ptr @preidx8sext64_instead_of_ld1r(ptr %src, ptr %out, ptr %dst) { 1486; CHECK-LABEL: preidx8sext64_instead_of_ld1r: 1487; CHECK: // %bb.0: 1488; CHECK-NEXT: ldrsb x8, [x0, #1]! 1489; CHECK-NEXT: ptrue p0.d 1490; CHECK-NEXT: mov z0.d, x8 1491; CHECK-NEXT: st1d { z0.d }, p0, [x1] 1492; CHECK-NEXT: str x8, [x2] 1493; CHECK-NEXT: ret 1494 %ptr = getelementptr inbounds i8, ptr %src, i64 1 1495 %tmp = load i8, ptr %ptr, align 4 1496 %ext = sext i8 %tmp to i64 1497 %ins = insertelement <vscale x 2 x i64> undef, i64 %ext, i32 0 1498 %dup = shufflevector <vscale x 2 x i64> %ins, <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer 1499 store <vscale x 2 x i64> %dup, ptr %out 1500 store i64 %ext, ptr %dst 1501 ret ptr %ptr 1502} 1503 1504 1505declare <vscale x 16 x i8> @llvm.aarch64.sve.dupq.lane.nxv16i8(<vscale x 16 x i8>, i64) 1506declare <vscale x 8 x i16> @llvm.aarch64.sve.dupq.lane.nxv8i16(<vscale x 8 x i16>, i64) 1507declare <vscale x 4 x i32> @llvm.aarch64.sve.dupq.lane.nxv4i32(<vscale x 4 x i32>, i64) 1508declare <vscale x 2 x i64> @llvm.aarch64.sve.dupq.lane.nxv2i64(<vscale x 2 x i64>, i64) 1509declare <vscale x 8 x half> @llvm.aarch64.sve.dupq.lane.nxv8f16(<vscale x 8 x half>, i64) 1510declare <vscale x 8 x bfloat> @llvm.aarch64.sve.dupq.lane.nxv8bf16(<vscale x 8 x bfloat>, i64) 1511declare <vscale x 4 x float> @llvm.aarch64.sve.dupq.lane.nxv4f32(<vscale x 4 x float>, i64) 1512declare <vscale x 2 x double> @llvm.aarch64.sve.dupq.lane.nxv2f64(<vscale x 2 x double>, i64) 1513 1514declare <vscale x 2 x double> @llvm.vector.insert.nxv2f64.v2f64(<vscale x 2 x double>, <2 x double>, i64) 1515declare <vscale x 4 x float> @llvm.vector.insert.nxv4f32.v4f32(<vscale x 4 x float>, <4 x float>, i64) 1516declare <vscale x 8 x half> @llvm.vector.insert.nxv8f16.v8f16(<vscale x 8 x half>, <8 x half>, i64) 1517declare <vscale x 2 x i64> @llvm.vector.insert.nxv2i64.v2i64(<vscale x 2 x i64>, <2 x i64>, i64) 1518declare <vscale x 4 x i32> @llvm.vector.insert.nxv4i32.v4i32(<vscale x 4 x i32>, <4 x i32>, i64) 1519declare <vscale x 8 x i16> @llvm.vector.insert.nxv8i16.v8i16(<vscale x 8 x i16>, <8 x i16>, i64) 1520declare <vscale x 16 x i8> @llvm.vector.insert.nxv16i8.v16i8(<vscale x 16 x i8>, <16 x i8>, i64) 1521declare <vscale x 8 x bfloat> @llvm.vector.insert.nxv8bf16.v8bf16(<vscale x 8 x bfloat>, <8 x bfloat>, i64) 1522 1523declare <vscale x 16 x i8> @llvm.aarch64.sve.dup.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, i8) 1524declare <vscale x 8 x i16> @llvm.aarch64.sve.dup.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i16) 1525declare <vscale x 4 x i32> @llvm.aarch64.sve.dup.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32) 1526declare <vscale x 2 x i64> @llvm.aarch64.sve.dup.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i64) 1527declare <vscale x 8 x half> @llvm.aarch64.sve.dup.nxv8f16(<vscale x 8 x half>, <vscale x 8 x i1>, half) 1528declare <vscale x 4 x float> @llvm.aarch64.sve.dup.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, float) 1529declare <vscale x 2 x double> @llvm.aarch64.sve.dup.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, double) 1530declare <vscale x 4 x half> @llvm.aarch64.sve.dup.nxv4f16(<vscale x 4 x half>, <vscale x 4 x i1>, half) 1531declare <vscale x 2 x half> @llvm.aarch64.sve.dup.nxv2f16(<vscale x 2 x half>, <vscale x 2 x i1>, half) 1532declare <vscale x 2 x float> @llvm.aarch64.sve.dup.nxv2f32(<vscale x 2 x float>, <vscale x 2 x i1>, float) 1533 1534 1535attributes #0 = { "target-features"="+sve,+bf16" } 1536attributes #1 = { "target-cpu"="neoverse-v1" } 1537