1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -aarch64-sve-vector-bits-min=256 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_256 3; RUN: llc -aarch64-sve-vector-bits-min=512 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 4; RUN: llc -aarch64-sve-vector-bits-min=2048 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512 5 6target triple = "aarch64-unknown-linux-gnu" 7 8; 9; DUP (integer) 10; 11 12; Don't use SVE for 64-bit vectors. 13define <8 x i8> @splat_v8i8(i8 %a) vscale_range(2,0) #0 { 14; CHECK-LABEL: splat_v8i8: 15; CHECK: // %bb.0: 16; CHECK-NEXT: dup v0.8b, w0 17; CHECK-NEXT: ret 18 %insert = insertelement <8 x i8> undef, i8 %a, i64 0 19 %splat = shufflevector <8 x i8> %insert, <8 x i8> undef, <8 x i32> zeroinitializer 20 ret <8 x i8> %splat 21} 22 23; Don't use SVE for 128-bit vectors. 24define <16 x i8> @splat_v16i8(i8 %a) vscale_range(2,0) #0 { 25; CHECK-LABEL: splat_v16i8: 26; CHECK: // %bb.0: 27; CHECK-NEXT: dup v0.16b, w0 28; CHECK-NEXT: ret 29 %insert = insertelement <16 x i8> undef, i8 %a, i64 0 30 %splat = shufflevector <16 x i8> %insert, <16 x i8> undef, <16 x i32> zeroinitializer 31 ret <16 x i8> %splat 32} 33 34define void @splat_v32i8(i8 %a, ptr %b) vscale_range(2,0) #0 { 35; CHECK-LABEL: splat_v32i8: 36; CHECK: // %bb.0: 37; CHECK-NEXT: mov z0.b, w0 38; CHECK-NEXT: ptrue p0.b, vl32 39; CHECK-NEXT: st1b { z0.b }, p0, [x1] 40; CHECK-NEXT: ret 41 %insert = insertelement <32 x i8> undef, i8 %a, i64 0 42 %splat = shufflevector <32 x i8> %insert, <32 x i8> undef, <32 x i32> zeroinitializer 43 store <32 x i8> %splat, ptr %b 44 ret void 45} 46 47define void @splat_v64i8(i8 %a, ptr %b) #0 { 48; VBITS_GE_256-LABEL: splat_v64i8: 49; VBITS_GE_256: // %bb.0: 50; VBITS_GE_256-NEXT: mov z0.b, w0 51; VBITS_GE_256-NEXT: ptrue p0.b, vl32 52; VBITS_GE_256-NEXT: mov w8, #32 // =0x20 53; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x1, x8] 54; VBITS_GE_256-NEXT: st1b { z0.b }, p0, [x1] 55; VBITS_GE_256-NEXT: ret 56; 57; VBITS_GE_512-LABEL: splat_v64i8: 58; VBITS_GE_512: // %bb.0: 59; VBITS_GE_512-NEXT: mov z0.b, w0 60; VBITS_GE_512-NEXT: ptrue p0.b, vl64 61; VBITS_GE_512-NEXT: st1b { z0.b }, p0, [x1] 62; VBITS_GE_512-NEXT: ret 63 %insert = insertelement <64 x i8> undef, i8 %a, i64 0 64 %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer 65 store <64 x i8> %splat, ptr %b 66 ret void 67} 68 69define void @splat_v128i8(i8 %a, ptr %b) vscale_range(8,0) #0 { 70; CHECK-LABEL: splat_v128i8: 71; CHECK: // %bb.0: 72; CHECK-NEXT: mov z0.b, w0 73; CHECK-NEXT: ptrue p0.b, vl128 74; CHECK-NEXT: st1b { z0.b }, p0, [x1] 75; CHECK-NEXT: ret 76 %insert = insertelement <128 x i8> undef, i8 %a, i64 0 77 %splat = shufflevector <128 x i8> %insert, <128 x i8> undef, <128 x i32> zeroinitializer 78 store <128 x i8> %splat, ptr %b 79 ret void 80} 81 82define void @splat_v256i8(i8 %a, ptr %b) vscale_range(16,0) #0 { 83; CHECK-LABEL: splat_v256i8: 84; CHECK: // %bb.0: 85; CHECK-NEXT: mov z0.b, w0 86; CHECK-NEXT: ptrue p0.b, vl256 87; CHECK-NEXT: st1b { z0.b }, p0, [x1] 88; CHECK-NEXT: ret 89 %insert = insertelement <256 x i8> undef, i8 %a, i64 0 90 %splat = shufflevector <256 x i8> %insert, <256 x i8> undef, <256 x i32> zeroinitializer 91 store <256 x i8> %splat, ptr %b 92 ret void 93} 94 95; Don't use SVE for 64-bit vectors. 96define <4 x i16> @splat_v4i16(i16 %a) vscale_range(2,0) #0 { 97; CHECK-LABEL: splat_v4i16: 98; CHECK: // %bb.0: 99; CHECK-NEXT: dup v0.4h, w0 100; CHECK-NEXT: ret 101 %insert = insertelement <4 x i16> undef, i16 %a, i64 0 102 %splat = shufflevector <4 x i16> %insert, <4 x i16> undef, <4 x i32> zeroinitializer 103 ret <4 x i16> %splat 104} 105 106; Don't use SVE for 128-bit vectors. 107define <8 x i16> @splat_v8i16(i16 %a) vscale_range(2,0) #0 { 108; CHECK-LABEL: splat_v8i16: 109; CHECK: // %bb.0: 110; CHECK-NEXT: dup v0.8h, w0 111; CHECK-NEXT: ret 112 %insert = insertelement <8 x i16> undef, i16 %a, i64 0 113 %splat = shufflevector <8 x i16> %insert, <8 x i16> undef, <8 x i32> zeroinitializer 114 ret <8 x i16> %splat 115} 116 117define void @splat_v16i16(i16 %a, ptr %b) vscale_range(2,0) #0 { 118; CHECK-LABEL: splat_v16i16: 119; CHECK: // %bb.0: 120; CHECK-NEXT: mov z0.h, w0 121; CHECK-NEXT: ptrue p0.h, vl16 122; CHECK-NEXT: st1h { z0.h }, p0, [x1] 123; CHECK-NEXT: ret 124 %insert = insertelement <16 x i16> undef, i16 %a, i64 0 125 %splat = shufflevector <16 x i16> %insert, <16 x i16> undef, <16 x i32> zeroinitializer 126 store <16 x i16> %splat, ptr %b 127 ret void 128} 129 130define void @splat_v32i16(i16 %a, ptr %b) #0 { 131; VBITS_GE_256-LABEL: splat_v32i16: 132; VBITS_GE_256: // %bb.0: 133; VBITS_GE_256-NEXT: mov z0.h, w0 134; VBITS_GE_256-NEXT: ptrue p0.h, vl16 135; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 136; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x1, x8, lsl #1] 137; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x1] 138; VBITS_GE_256-NEXT: ret 139; 140; VBITS_GE_512-LABEL: splat_v32i16: 141; VBITS_GE_512: // %bb.0: 142; VBITS_GE_512-NEXT: mov z0.h, w0 143; VBITS_GE_512-NEXT: ptrue p0.h, vl32 144; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x1] 145; VBITS_GE_512-NEXT: ret 146 %insert = insertelement <32 x i16> undef, i16 %a, i64 0 147 %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer 148 store <32 x i16> %splat, ptr %b 149 ret void 150} 151 152define void @splat_v64i16(i16 %a, ptr %b) vscale_range(8,0) #0 { 153; CHECK-LABEL: splat_v64i16: 154; CHECK: // %bb.0: 155; CHECK-NEXT: mov z0.h, w0 156; CHECK-NEXT: ptrue p0.h, vl64 157; CHECK-NEXT: st1h { z0.h }, p0, [x1] 158; CHECK-NEXT: ret 159 %insert = insertelement <64 x i16> undef, i16 %a, i64 0 160 %splat = shufflevector <64 x i16> %insert, <64 x i16> undef, <64 x i32> zeroinitializer 161 store <64 x i16> %splat, ptr %b 162 ret void 163} 164 165define void @splat_v128i16(i16 %a, ptr %b) vscale_range(16,0) #0 { 166; CHECK-LABEL: splat_v128i16: 167; CHECK: // %bb.0: 168; CHECK-NEXT: mov z0.h, w0 169; CHECK-NEXT: ptrue p0.h, vl128 170; CHECK-NEXT: st1h { z0.h }, p0, [x1] 171; CHECK-NEXT: ret 172 %insert = insertelement <128 x i16> undef, i16 %a, i64 0 173 %splat = shufflevector <128 x i16> %insert, <128 x i16> undef, <128 x i32> zeroinitializer 174 store <128 x i16> %splat, ptr %b 175 ret void 176} 177 178; Don't use SVE for 64-bit vectors. 179define <2 x i32> @splat_v2i32(i32 %a) vscale_range(2,0) #0 { 180; CHECK-LABEL: splat_v2i32: 181; CHECK: // %bb.0: 182; CHECK-NEXT: dup v0.2s, w0 183; CHECK-NEXT: ret 184 %insert = insertelement <2 x i32> undef, i32 %a, i64 0 185 %splat = shufflevector <2 x i32> %insert, <2 x i32> undef, <2 x i32> zeroinitializer 186 ret <2 x i32> %splat 187} 188 189; Don't use SVE for 128-bit vectors. 190define <4 x i32> @splat_v4i32(i32 %a) vscale_range(2,0) #0 { 191; CHECK-LABEL: splat_v4i32: 192; CHECK: // %bb.0: 193; CHECK-NEXT: dup v0.4s, w0 194; CHECK-NEXT: ret 195 %insert = insertelement <4 x i32> undef, i32 %a, i64 0 196 %splat = shufflevector <4 x i32> %insert, <4 x i32> undef, <4 x i32> zeroinitializer 197 ret <4 x i32> %splat 198} 199 200define void @splat_v8i32(i32 %a, ptr %b) vscale_range(2,0) #0 { 201; CHECK-LABEL: splat_v8i32: 202; CHECK: // %bb.0: 203; CHECK-NEXT: mov z0.s, w0 204; CHECK-NEXT: ptrue p0.s, vl8 205; CHECK-NEXT: st1w { z0.s }, p0, [x1] 206; CHECK-NEXT: ret 207 %insert = insertelement <8 x i32> undef, i32 %a, i64 0 208 %splat = shufflevector <8 x i32> %insert, <8 x i32> undef, <8 x i32> zeroinitializer 209 store <8 x i32> %splat, ptr %b 210 ret void 211} 212 213define void @splat_v16i32(i32 %a, ptr %b) #0 { 214; VBITS_GE_256-LABEL: splat_v16i32: 215; VBITS_GE_256: // %bb.0: 216; VBITS_GE_256-NEXT: mov z0.s, w0 217; VBITS_GE_256-NEXT: ptrue p0.s, vl8 218; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 219; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1, x8, lsl #2] 220; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x1] 221; VBITS_GE_256-NEXT: ret 222; 223; VBITS_GE_512-LABEL: splat_v16i32: 224; VBITS_GE_512: // %bb.0: 225; VBITS_GE_512-NEXT: mov z0.s, w0 226; VBITS_GE_512-NEXT: ptrue p0.s, vl16 227; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x1] 228; VBITS_GE_512-NEXT: ret 229 %insert = insertelement <16 x i32> undef, i32 %a, i64 0 230 %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer 231 store <16 x i32> %splat, ptr %b 232 ret void 233} 234 235define void @splat_v32i32(i32 %a, ptr %b) vscale_range(8,0) #0 { 236; CHECK-LABEL: splat_v32i32: 237; CHECK: // %bb.0: 238; CHECK-NEXT: mov z0.s, w0 239; CHECK-NEXT: ptrue p0.s, vl32 240; CHECK-NEXT: st1w { z0.s }, p0, [x1] 241; CHECK-NEXT: ret 242 %insert = insertelement <32 x i32> undef, i32 %a, i64 0 243 %splat = shufflevector <32 x i32> %insert, <32 x i32> undef, <32 x i32> zeroinitializer 244 store <32 x i32> %splat, ptr %b 245 ret void 246} 247 248define void @splat_v64i32(i32 %a, ptr %b) vscale_range(16,0) #0 { 249; CHECK-LABEL: splat_v64i32: 250; CHECK: // %bb.0: 251; CHECK-NEXT: mov z0.s, w0 252; CHECK-NEXT: ptrue p0.s, vl64 253; CHECK-NEXT: st1w { z0.s }, p0, [x1] 254; CHECK-NEXT: ret 255 %insert = insertelement <64 x i32> undef, i32 %a, i64 0 256 %splat = shufflevector <64 x i32> %insert, <64 x i32> undef, <64 x i32> zeroinitializer 257 store <64 x i32> %splat, ptr %b 258 ret void 259} 260 261; Don't use SVE for 64-bit vectors. 262define <1 x i64> @splat_v1i64(i64 %a) vscale_range(2,0) #0 { 263; CHECK-LABEL: splat_v1i64: 264; CHECK: // %bb.0: 265; CHECK-NEXT: fmov d0, x0 266; CHECK-NEXT: ret 267 %insert = insertelement <1 x i64> undef, i64 %a, i64 0 268 %splat = shufflevector <1 x i64> %insert, <1 x i64> undef, <1 x i32> zeroinitializer 269 ret <1 x i64> %splat 270} 271 272; Don't use SVE for 128-bit vectors. 273define <2 x i64> @splat_v2i64(i64 %a) vscale_range(2,0) #0 { 274; CHECK-LABEL: splat_v2i64: 275; CHECK: // %bb.0: 276; CHECK-NEXT: dup v0.2d, x0 277; CHECK-NEXT: ret 278 %insert = insertelement <2 x i64> undef, i64 %a, i64 0 279 %splat = shufflevector <2 x i64> %insert, <2 x i64> undef, <2 x i32> zeroinitializer 280 ret <2 x i64> %splat 281} 282 283define void @splat_v4i64(i64 %a, ptr %b) vscale_range(2,0) #0 { 284; CHECK-LABEL: splat_v4i64: 285; CHECK: // %bb.0: 286; CHECK-NEXT: mov z0.d, x0 287; CHECK-NEXT: ptrue p0.d, vl4 288; CHECK-NEXT: st1d { z0.d }, p0, [x1] 289; CHECK-NEXT: ret 290 %insert = insertelement <4 x i64> undef, i64 %a, i64 0 291 %splat = shufflevector <4 x i64> %insert, <4 x i64> undef, <4 x i32> zeroinitializer 292 store <4 x i64> %splat, ptr %b 293 ret void 294} 295 296define void @splat_v8i64(i64 %a, ptr %b) #0 { 297; VBITS_GE_256-LABEL: splat_v8i64: 298; VBITS_GE_256: // %bb.0: 299; VBITS_GE_256-NEXT: mov z0.d, x0 300; VBITS_GE_256-NEXT: ptrue p0.d, vl4 301; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 302; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1, x8, lsl #3] 303; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x1] 304; VBITS_GE_256-NEXT: ret 305; 306; VBITS_GE_512-LABEL: splat_v8i64: 307; VBITS_GE_512: // %bb.0: 308; VBITS_GE_512-NEXT: mov z0.d, x0 309; VBITS_GE_512-NEXT: ptrue p0.d, vl8 310; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x1] 311; VBITS_GE_512-NEXT: ret 312 %insert = insertelement <8 x i64> undef, i64 %a, i64 0 313 %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer 314 store <8 x i64> %splat, ptr %b 315 ret void 316} 317 318define void @splat_v16i64(i64 %a, ptr %b) vscale_range(8,0) #0 { 319; CHECK-LABEL: splat_v16i64: 320; CHECK: // %bb.0: 321; CHECK-NEXT: mov z0.d, x0 322; CHECK-NEXT: ptrue p0.d, vl16 323; CHECK-NEXT: st1d { z0.d }, p0, [x1] 324; CHECK-NEXT: ret 325 %insert = insertelement <16 x i64> undef, i64 %a, i64 0 326 %splat = shufflevector <16 x i64> %insert, <16 x i64> undef, <16 x i32> zeroinitializer 327 store <16 x i64> %splat, ptr %b 328 ret void 329} 330 331define void @splat_v32i64(i64 %a, ptr %b) vscale_range(16,0) #0 { 332; CHECK-LABEL: splat_v32i64: 333; CHECK: // %bb.0: 334; CHECK-NEXT: mov z0.d, x0 335; CHECK-NEXT: ptrue p0.d, vl32 336; CHECK-NEXT: st1d { z0.d }, p0, [x1] 337; CHECK-NEXT: ret 338 %insert = insertelement <32 x i64> undef, i64 %a, i64 0 339 %splat = shufflevector <32 x i64> %insert, <32 x i64> undef, <32 x i32> zeroinitializer 340 store <32 x i64> %splat, ptr %b 341 ret void 342} 343 344; 345; DUP (floating-point) 346; 347 348; Don't use SVE for 64-bit vectors. 349define <4 x half> @splat_v4f16(half %a) vscale_range(2,0) #0 { 350; CHECK-LABEL: splat_v4f16: 351; CHECK: // %bb.0: 352; CHECK-NEXT: // kill: def $h0 killed $h0 def $q0 353; CHECK-NEXT: dup v0.4h, v0.h[0] 354; CHECK-NEXT: ret 355 %insert = insertelement <4 x half> undef, half %a, i64 0 356 %splat = shufflevector <4 x half> %insert, <4 x half> undef, <4 x i32> zeroinitializer 357 ret <4 x half> %splat 358} 359 360; Don't use SVE for 128-bit vectors. 361define <8 x half> @splat_v8f16(half %a) vscale_range(2,0) #0 { 362; CHECK-LABEL: splat_v8f16: 363; CHECK: // %bb.0: 364; CHECK-NEXT: // kill: def $h0 killed $h0 def $q0 365; CHECK-NEXT: dup v0.8h, v0.h[0] 366; CHECK-NEXT: ret 367 %insert = insertelement <8 x half> undef, half %a, i64 0 368 %splat = shufflevector <8 x half> %insert, <8 x half> undef, <8 x i32> zeroinitializer 369 ret <8 x half> %splat 370} 371 372define void @splat_v16f16(half %a, ptr %b) vscale_range(2,0) #0 { 373; CHECK-LABEL: splat_v16f16: 374; CHECK: // %bb.0: 375; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 376; CHECK-NEXT: ptrue p0.h, vl16 377; CHECK-NEXT: mov z0.h, h0 378; CHECK-NEXT: st1h { z0.h }, p0, [x0] 379; CHECK-NEXT: ret 380 %insert = insertelement <16 x half> undef, half %a, i64 0 381 %splat = shufflevector <16 x half> %insert, <16 x half> undef, <16 x i32> zeroinitializer 382 store <16 x half> %splat, ptr %b 383 ret void 384} 385 386define void @splat_v32f16(half %a, ptr %b) #0 { 387; VBITS_GE_256-LABEL: splat_v32f16: 388; VBITS_GE_256: // %bb.0: 389; VBITS_GE_256-NEXT: // kill: def $h0 killed $h0 def $z0 390; VBITS_GE_256-NEXT: ptrue p0.h, vl16 391; VBITS_GE_256-NEXT: mov x8, #16 // =0x10 392; VBITS_GE_256-NEXT: mov z0.h, h0 393; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0, x8, lsl #1] 394; VBITS_GE_256-NEXT: st1h { z0.h }, p0, [x0] 395; VBITS_GE_256-NEXT: ret 396; 397; VBITS_GE_512-LABEL: splat_v32f16: 398; VBITS_GE_512: // %bb.0: 399; VBITS_GE_512-NEXT: // kill: def $h0 killed $h0 def $z0 400; VBITS_GE_512-NEXT: ptrue p0.h, vl32 401; VBITS_GE_512-NEXT: mov z0.h, h0 402; VBITS_GE_512-NEXT: st1h { z0.h }, p0, [x0] 403; VBITS_GE_512-NEXT: ret 404 %insert = insertelement <32 x half> undef, half %a, i64 0 405 %splat = shufflevector <32 x half> %insert, <32 x half> undef, <32 x i32> zeroinitializer 406 store <32 x half> %splat, ptr %b 407 ret void 408} 409 410define void @splat_v64f16(half %a, ptr %b) vscale_range(8,0) #0 { 411; CHECK-LABEL: splat_v64f16: 412; CHECK: // %bb.0: 413; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 414; CHECK-NEXT: ptrue p0.h, vl64 415; CHECK-NEXT: mov z0.h, h0 416; CHECK-NEXT: st1h { z0.h }, p0, [x0] 417; CHECK-NEXT: ret 418 %insert = insertelement <64 x half> undef, half %a, i64 0 419 %splat = shufflevector <64 x half> %insert, <64 x half> undef, <64 x i32> zeroinitializer 420 store <64 x half> %splat, ptr %b 421 ret void 422} 423 424define void @splat_v128f16(half %a, ptr %b) vscale_range(16,0) #0 { 425; CHECK-LABEL: splat_v128f16: 426; CHECK: // %bb.0: 427; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 428; CHECK-NEXT: ptrue p0.h, vl128 429; CHECK-NEXT: mov z0.h, h0 430; CHECK-NEXT: st1h { z0.h }, p0, [x0] 431; CHECK-NEXT: ret 432 %insert = insertelement <128 x half> undef, half %a, i64 0 433 %splat = shufflevector <128 x half> %insert, <128 x half> undef, <128 x i32> zeroinitializer 434 store <128 x half> %splat, ptr %b 435 ret void 436} 437 438; Don't use SVE for 64-bit vectors. 439define <2 x float> @splat_v2f32(float %a, <2 x float> %op2) vscale_range(2,0) #0 { 440; CHECK-LABEL: splat_v2f32: 441; CHECK: // %bb.0: 442; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 443; CHECK-NEXT: dup v0.2s, v0.s[0] 444; CHECK-NEXT: ret 445 %insert = insertelement <2 x float> undef, float %a, i64 0 446 %splat = shufflevector <2 x float> %insert, <2 x float> undef, <2 x i32> zeroinitializer 447 ret <2 x float> %splat 448} 449 450; Don't use SVE for 128-bit vectors. 451define <4 x float> @splat_v4f32(float %a, <4 x float> %op2) vscale_range(2,0) #0 { 452; CHECK-LABEL: splat_v4f32: 453; CHECK: // %bb.0: 454; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 455; CHECK-NEXT: dup v0.4s, v0.s[0] 456; CHECK-NEXT: ret 457 %insert = insertelement <4 x float> undef, float %a, i64 0 458 %splat = shufflevector <4 x float> %insert, <4 x float> undef, <4 x i32> zeroinitializer 459 ret <4 x float> %splat 460} 461 462define void @splat_v8f32(float %a, ptr %b) vscale_range(2,0) #0 { 463; CHECK-LABEL: splat_v8f32: 464; CHECK: // %bb.0: 465; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 466; CHECK-NEXT: ptrue p0.s, vl8 467; CHECK-NEXT: mov z0.s, s0 468; CHECK-NEXT: st1w { z0.s }, p0, [x0] 469; CHECK-NEXT: ret 470 %insert = insertelement <8 x float> undef, float %a, i64 0 471 %splat = shufflevector <8 x float> %insert, <8 x float> undef, <8 x i32> zeroinitializer 472 store <8 x float> %splat, ptr %b 473 ret void 474} 475 476define void @splat_v16f32(float %a, ptr %b) #0 { 477; VBITS_GE_256-LABEL: splat_v16f32: 478; VBITS_GE_256: // %bb.0: 479; VBITS_GE_256-NEXT: // kill: def $s0 killed $s0 def $z0 480; VBITS_GE_256-NEXT: ptrue p0.s, vl8 481; VBITS_GE_256-NEXT: mov x8, #8 // =0x8 482; VBITS_GE_256-NEXT: mov z0.s, s0 483; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0, x8, lsl #2] 484; VBITS_GE_256-NEXT: st1w { z0.s }, p0, [x0] 485; VBITS_GE_256-NEXT: ret 486; 487; VBITS_GE_512-LABEL: splat_v16f32: 488; VBITS_GE_512: // %bb.0: 489; VBITS_GE_512-NEXT: // kill: def $s0 killed $s0 def $z0 490; VBITS_GE_512-NEXT: ptrue p0.s, vl16 491; VBITS_GE_512-NEXT: mov z0.s, s0 492; VBITS_GE_512-NEXT: st1w { z0.s }, p0, [x0] 493; VBITS_GE_512-NEXT: ret 494 %insert = insertelement <16 x float> undef, float %a, i64 0 495 %splat = shufflevector <16 x float> %insert, <16 x float> undef, <16 x i32> zeroinitializer 496 store <16 x float> %splat, ptr %b 497 ret void 498} 499 500define void @splat_v32f32(float %a, ptr %b) vscale_range(8,0) #0 { 501; CHECK-LABEL: splat_v32f32: 502; CHECK: // %bb.0: 503; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 504; CHECK-NEXT: ptrue p0.s, vl32 505; CHECK-NEXT: mov z0.s, s0 506; CHECK-NEXT: st1w { z0.s }, p0, [x0] 507; CHECK-NEXT: ret 508 %insert = insertelement <32 x float> undef, float %a, i64 0 509 %splat = shufflevector <32 x float> %insert, <32 x float> undef, <32 x i32> zeroinitializer 510 store <32 x float> %splat, ptr %b 511 ret void 512} 513 514define void @splat_v64f32(float %a, ptr %b) vscale_range(16,0) #0 { 515; CHECK-LABEL: splat_v64f32: 516; CHECK: // %bb.0: 517; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 518; CHECK-NEXT: ptrue p0.s, vl64 519; CHECK-NEXT: mov z0.s, s0 520; CHECK-NEXT: st1w { z0.s }, p0, [x0] 521; CHECK-NEXT: ret 522 %insert = insertelement <64 x float> undef, float %a, i64 0 523 %splat = shufflevector <64 x float> %insert, <64 x float> undef, <64 x i32> zeroinitializer 524 store <64 x float> %splat, ptr %b 525 ret void 526} 527 528; Don't use SVE for 64-bit vectors. 529define <1 x double> @splat_v1f64(double %a, <1 x double> %op2) vscale_range(2,0) #0 { 530; CHECK-LABEL: splat_v1f64: 531; CHECK: // %bb.0: 532; CHECK-NEXT: ret 533 %insert = insertelement <1 x double> undef, double %a, i64 0 534 %splat = shufflevector <1 x double> %insert, <1 x double> undef, <1 x i32> zeroinitializer 535 ret <1 x double> %splat 536} 537 538; Don't use SVE for 128-bit vectors. 539define <2 x double> @splat_v2f64(double %a, <2 x double> %op2) vscale_range(2,0) #0 { 540; CHECK-LABEL: splat_v2f64: 541; CHECK: // %bb.0: 542; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 543; CHECK-NEXT: dup v0.2d, v0.d[0] 544; CHECK-NEXT: ret 545 %insert = insertelement <2 x double> undef, double %a, i64 0 546 %splat = shufflevector <2 x double> %insert, <2 x double> undef, <2 x i32> zeroinitializer 547 ret <2 x double> %splat 548} 549 550define void @splat_v4f64(double %a, ptr %b) vscale_range(2,0) #0 { 551; CHECK-LABEL: splat_v4f64: 552; CHECK: // %bb.0: 553; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 554; CHECK-NEXT: ptrue p0.d, vl4 555; CHECK-NEXT: mov z0.d, d0 556; CHECK-NEXT: st1d { z0.d }, p0, [x0] 557; CHECK-NEXT: ret 558 %insert = insertelement <4 x double> undef, double %a, i64 0 559 %splat = shufflevector <4 x double> %insert, <4 x double> undef, <4 x i32> zeroinitializer 560 store <4 x double> %splat, ptr %b 561 ret void 562} 563 564define void @splat_v8f64(double %a, ptr %b) #0 { 565; VBITS_GE_256-LABEL: splat_v8f64: 566; VBITS_GE_256: // %bb.0: 567; VBITS_GE_256-NEXT: // kill: def $d0 killed $d0 def $z0 568; VBITS_GE_256-NEXT: ptrue p0.d, vl4 569; VBITS_GE_256-NEXT: mov x8, #4 // =0x4 570; VBITS_GE_256-NEXT: mov z0.d, d0 571; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0, x8, lsl #3] 572; VBITS_GE_256-NEXT: st1d { z0.d }, p0, [x0] 573; VBITS_GE_256-NEXT: ret 574; 575; VBITS_GE_512-LABEL: splat_v8f64: 576; VBITS_GE_512: // %bb.0: 577; VBITS_GE_512-NEXT: // kill: def $d0 killed $d0 def $z0 578; VBITS_GE_512-NEXT: ptrue p0.d, vl8 579; VBITS_GE_512-NEXT: mov z0.d, d0 580; VBITS_GE_512-NEXT: st1d { z0.d }, p0, [x0] 581; VBITS_GE_512-NEXT: ret 582 %insert = insertelement <8 x double> undef, double %a, i64 0 583 %splat = shufflevector <8 x double> %insert, <8 x double> undef, <8 x i32> zeroinitializer 584 store <8 x double> %splat, ptr %b 585 ret void 586} 587 588define void @splat_v16f64(double %a, ptr %b) vscale_range(8,0) #0 { 589; CHECK-LABEL: splat_v16f64: 590; CHECK: // %bb.0: 591; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 592; CHECK-NEXT: ptrue p0.d, vl16 593; CHECK-NEXT: mov z0.d, d0 594; CHECK-NEXT: st1d { z0.d }, p0, [x0] 595; CHECK-NEXT: ret 596 %insert = insertelement <16 x double> undef, double %a, i64 0 597 %splat = shufflevector <16 x double> %insert, <16 x double> undef, <16 x i32> zeroinitializer 598 store <16 x double> %splat, ptr %b 599 ret void 600} 601 602define void @splat_v32f64(double %a, ptr %b) vscale_range(16,0) #0 { 603; CHECK-LABEL: splat_v32f64: 604; CHECK: // %bb.0: 605; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 606; CHECK-NEXT: ptrue p0.d, vl32 607; CHECK-NEXT: mov z0.d, d0 608; CHECK-NEXT: st1d { z0.d }, p0, [x0] 609; CHECK-NEXT: ret 610 %insert = insertelement <32 x double> undef, double %a, i64 0 611 %splat = shufflevector <32 x double> %insert, <32 x double> undef, <32 x i32> zeroinitializer 612 store <32 x double> %splat, ptr %b 613 ret void 614} 615 616; 617; DUP (integer immediate) 618; 619 620define void @splat_imm_v64i8(ptr %a) vscale_range(4,0) #0 { 621; CHECK-LABEL: splat_imm_v64i8: 622; CHECK: // %bb.0: 623; CHECK-NEXT: mov z0.b, #1 // =0x1 624; CHECK-NEXT: ptrue p0.b, vl64 625; CHECK-NEXT: st1b { z0.b }, p0, [x0] 626; CHECK-NEXT: ret 627 %insert = insertelement <64 x i8> undef, i8 1, i64 0 628 %splat = shufflevector <64 x i8> %insert, <64 x i8> undef, <64 x i32> zeroinitializer 629 store <64 x i8> %splat, ptr %a 630 ret void 631} 632 633define void @splat_imm_v32i16(ptr %a) vscale_range(4,0) #0 { 634; CHECK-LABEL: splat_imm_v32i16: 635; CHECK: // %bb.0: 636; CHECK-NEXT: mov z0.h, #2 // =0x2 637; CHECK-NEXT: ptrue p0.h, vl32 638; CHECK-NEXT: st1h { z0.h }, p0, [x0] 639; CHECK-NEXT: ret 640 %insert = insertelement <32 x i16> undef, i16 2, i64 0 641 %splat = shufflevector <32 x i16> %insert, <32 x i16> undef, <32 x i32> zeroinitializer 642 store <32 x i16> %splat, ptr %a 643 ret void 644} 645 646define void @splat_imm_v16i32(ptr %a) vscale_range(4,0) #0 { 647; CHECK-LABEL: splat_imm_v16i32: 648; CHECK: // %bb.0: 649; CHECK-NEXT: mov z0.s, #3 // =0x3 650; CHECK-NEXT: ptrue p0.s, vl16 651; CHECK-NEXT: st1w { z0.s }, p0, [x0] 652; CHECK-NEXT: ret 653 %insert = insertelement <16 x i32> undef, i32 3, i64 0 654 %splat = shufflevector <16 x i32> %insert, <16 x i32> undef, <16 x i32> zeroinitializer 655 store <16 x i32> %splat, ptr %a 656 ret void 657} 658 659define void @splat_imm_v8i64(ptr %a) vscale_range(4,0) #0 { 660; CHECK-LABEL: splat_imm_v8i64: 661; CHECK: // %bb.0: 662; CHECK-NEXT: mov z0.d, #4 // =0x4 663; CHECK-NEXT: ptrue p0.d, vl8 664; CHECK-NEXT: st1d { z0.d }, p0, [x0] 665; CHECK-NEXT: ret 666 %insert = insertelement <8 x i64> undef, i64 4, i64 0 667 %splat = shufflevector <8 x i64> %insert, <8 x i64> undef, <8 x i32> zeroinitializer 668 store <8 x i64> %splat, ptr %a 669 ret void 670} 671 672; 673; DUP (floating-point immediate) 674; 675 676define void @splat_imm_v32f16(ptr %a) vscale_range(4,0) #0 { 677; CHECK-LABEL: splat_imm_v32f16: 678; CHECK: // %bb.0: 679; CHECK-NEXT: fmov z0.h, #5.00000000 680; CHECK-NEXT: ptrue p0.h, vl32 681; CHECK-NEXT: st1h { z0.h }, p0, [x0] 682; CHECK-NEXT: ret 683 %insert = insertelement <32 x half> undef, half 5.0, i64 0 684 %splat = shufflevector <32 x half> %insert, <32 x half> undef, <32 x i32> zeroinitializer 685 store <32 x half> %splat, ptr %a 686 ret void 687} 688 689define void @splat_imm_v16f32(ptr %a) vscale_range(4,0) #0 { 690; CHECK-LABEL: splat_imm_v16f32: 691; CHECK: // %bb.0: 692; CHECK-NEXT: fmov z0.s, #6.00000000 693; CHECK-NEXT: ptrue p0.s, vl16 694; CHECK-NEXT: st1w { z0.s }, p0, [x0] 695; CHECK-NEXT: ret 696 %insert = insertelement <16 x float> undef, float 6.0, i64 0 697 %splat = shufflevector <16 x float> %insert, <16 x float> undef, <16 x i32> zeroinitializer 698 store <16 x float> %splat, ptr %a 699 ret void 700} 701 702define void @splat_imm_v8f64(ptr %a) vscale_range(4,0) #0 { 703; CHECK-LABEL: splat_imm_v8f64: 704; CHECK: // %bb.0: 705; CHECK-NEXT: fmov z0.d, #7.00000000 706; CHECK-NEXT: ptrue p0.d, vl8 707; CHECK-NEXT: st1d { z0.d }, p0, [x0] 708; CHECK-NEXT: ret 709 %insert = insertelement <8 x double> undef, double 7.0, i64 0 710 %splat = shufflevector <8 x double> %insert, <8 x double> undef, <8 x i32> zeroinitializer 711 store <8 x double> %splat, ptr %a 712 ret void 713} 714 715define void @load_splat_v8f32(ptr %a, ptr %b) vscale_range(2,2) #0 { 716; CHECK-LABEL: load_splat_v8f32: 717; CHECK: // %bb.0: 718; CHECK-NEXT: ptrue p0.s 719; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 720; CHECK-NEXT: mov z0.s, s0 721; CHECK-NEXT: st1w { z0.s }, p0, [x1] 722; CHECK-NEXT: ret 723 %v = load <8 x float>, ptr %a 724 %splat = shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> zeroinitializer 725 store <8 x float> %splat, ptr %b 726 ret void 727} 728 729define void @load_splat_v4f64(ptr %a, ptr %b) vscale_range(2,2) #0 { 730; CHECK-LABEL: load_splat_v4f64: 731; CHECK: // %bb.0: 732; CHECK-NEXT: ptrue p0.d 733; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 734; CHECK-NEXT: mov z0.d, d0 735; CHECK-NEXT: st1d { z0.d }, p0, [x1] 736; CHECK-NEXT: ret 737 %v = load <4 x double>, ptr %a 738 %splat = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> zeroinitializer 739 store <4 x double> %splat, ptr %b 740 ret void 741} 742 743define void @load_splat_v32i8(ptr %a, ptr %b) vscale_range(2,2) #0 { 744; CHECK-LABEL: load_splat_v32i8: 745; CHECK: // %bb.0: 746; CHECK-NEXT: ptrue p0.b 747; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0] 748; CHECK-NEXT: mov z0.b, b0 749; CHECK-NEXT: st1b { z0.b }, p0, [x1] 750; CHECK-NEXT: ret 751 %v = load <32 x i8>, ptr %a 752 %splat = shufflevector <32 x i8> %v, <32 x i8> undef, <32 x i32> zeroinitializer 753 store <32 x i8> %splat, ptr %b 754 ret void 755} 756 757define void @load_splat_v16i16(ptr %a, ptr %b) vscale_range(2,2) #0 { 758; CHECK-LABEL: load_splat_v16i16: 759; CHECK: // %bb.0: 760; CHECK-NEXT: ptrue p0.h 761; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0] 762; CHECK-NEXT: mov z0.h, h0 763; CHECK-NEXT: st1h { z0.h }, p0, [x1] 764; CHECK-NEXT: ret 765 %v = load <16 x i16>, ptr %a 766 %splat = shufflevector <16 x i16> %v, <16 x i16> undef, <16 x i32> zeroinitializer 767 store <16 x i16> %splat, ptr %b 768 ret void 769} 770 771define void @load_splat_v8i32(ptr %a, ptr %b) vscale_range(2,2) #0 { 772; CHECK-LABEL: load_splat_v8i32: 773; CHECK: // %bb.0: 774; CHECK-NEXT: ptrue p0.s 775; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 776; CHECK-NEXT: mov z0.s, s0 777; CHECK-NEXT: st1w { z0.s }, p0, [x1] 778; CHECK-NEXT: ret 779 %v = load <8 x i32>, ptr %a 780 %splat = shufflevector <8 x i32> %v, <8 x i32> undef, <8 x i32> zeroinitializer 781 store <8 x i32> %splat, ptr %b 782 ret void 783} 784 785define void @load_splat_v4i64(ptr %a, ptr %b) vscale_range(2,2) #0 { 786; CHECK-LABEL: load_splat_v4i64: 787; CHECK: // %bb.0: 788; CHECK-NEXT: ptrue p0.d 789; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0] 790; CHECK-NEXT: mov z0.d, d0 791; CHECK-NEXT: st1d { z0.d }, p0, [x1] 792; CHECK-NEXT: ret 793 %v = load <4 x i64>, ptr %a 794 %splat = shufflevector <4 x i64> %v, <4 x i64> undef, <4 x i32> zeroinitializer 795 store <4 x i64> %splat, ptr %b 796 ret void 797} 798 799attributes #0 = { "target-features"="+sve" } 800