1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 < %s | FileCheck %s 3 4define <vscale x 16 x i8> @test_lane0_16xi8(<vscale x 16 x i8> %a) { 5; CHECK-LABEL: test_lane0_16xi8: 6; CHECK: // %bb.0: 7; CHECK-NEXT: ptrue p0.b, vl1 8; CHECK-NEXT: mov w8, #30 // =0x1e 9; CHECK-NEXT: mov z0.b, p0/m, w8 10; CHECK-NEXT: ret 11 %b = insertelement <vscale x 16 x i8> %a, i8 30, i32 0 12 ret <vscale x 16 x i8> %b 13} 14 15define <vscale x 8 x i16> @test_lane0_8xi16(<vscale x 8 x i16> %a) { 16; CHECK-LABEL: test_lane0_8xi16: 17; CHECK: // %bb.0: 18; CHECK-NEXT: ptrue p0.h, vl1 19; CHECK-NEXT: mov w8, #30 // =0x1e 20; CHECK-NEXT: mov z0.h, p0/m, w8 21; CHECK-NEXT: ret 22 %b = insertelement <vscale x 8 x i16> %a, i16 30, i32 0 23 ret <vscale x 8 x i16> %b 24} 25 26define <vscale x 4 x i32> @test_lane0_4xi32(<vscale x 4 x i32> %a) { 27; CHECK-LABEL: test_lane0_4xi32: 28; CHECK: // %bb.0: 29; CHECK-NEXT: ptrue p0.s, vl1 30; CHECK-NEXT: mov w8, #30 // =0x1e 31; CHECK-NEXT: mov z0.s, p0/m, w8 32; CHECK-NEXT: ret 33 %b = insertelement <vscale x 4 x i32> %a, i32 30, i32 0 34 ret <vscale x 4 x i32> %b 35} 36 37define <vscale x 2 x i64> @test_lane0_2xi64(<vscale x 2 x i64> %a) { 38; CHECK-LABEL: test_lane0_2xi64: 39; CHECK: // %bb.0: 40; CHECK-NEXT: ptrue p0.d, vl1 41; CHECK-NEXT: mov w8, #30 // =0x1e 42; CHECK-NEXT: mov z0.d, p0/m, x8 43; CHECK-NEXT: ret 44 %b = insertelement <vscale x 2 x i64> %a, i64 30, i32 0 45 ret <vscale x 2 x i64> %b 46} 47 48define <vscale x 2 x double> @test_lane0_2xf64(<vscale x 2 x double> %a) { 49; CHECK-LABEL: test_lane0_2xf64: 50; CHECK: // %bb.0: 51; CHECK-NEXT: fmov d1, #1.00000000 52; CHECK-NEXT: ptrue p0.d, vl1 53; CHECK-NEXT: mov z0.d, p0/m, z1.d 54; CHECK-NEXT: ret 55 %b = insertelement <vscale x 2 x double> %a, double 1.0, i32 0 56 ret <vscale x 2 x double> %b 57} 58 59define <vscale x 4 x float> @test_lane0_4xf32(<vscale x 4 x float> %a) { 60; CHECK-LABEL: test_lane0_4xf32: 61; CHECK: // %bb.0: 62; CHECK-NEXT: fmov s1, #1.00000000 63; CHECK-NEXT: ptrue p0.s, vl1 64; CHECK-NEXT: mov z0.s, p0/m, z1.s 65; CHECK-NEXT: ret 66 %b = insertelement <vscale x 4 x float> %a, float 1.0, i32 0 67 ret <vscale x 4 x float> %b 68} 69 70define <vscale x 8 x half> @test_lane0_8xf16(<vscale x 8 x half> %a) { 71; CHECK-LABEL: test_lane0_8xf16: 72; CHECK: // %bb.0: 73; CHECK-NEXT: fmov h1, #1.00000000 74; CHECK-NEXT: ptrue p0.h, vl1 75; CHECK-NEXT: mov z0.h, p0/m, z1.h 76; CHECK-NEXT: ret 77 %b = insertelement <vscale x 8 x half> %a, half 1.0, i32 0 78 ret <vscale x 8 x half> %b 79} 80 81define <vscale x 8 x bfloat> @test_lane0_8xbf16(<vscale x 8 x bfloat> %a, bfloat %x) { 82; CHECK-LABEL: test_lane0_8xbf16: 83; CHECK: // %bb.0: 84; CHECK-NEXT: ptrue p0.h, vl1 85; CHECK-NEXT: // kill: def $h1 killed $h1 def $z1 86; CHECK-NEXT: mov z0.h, p0/m, z1.h 87; CHECK-NEXT: ret 88 %b = insertelement <vscale x 8 x bfloat> %a, bfloat %x, i32 0 89 ret <vscale x 8 x bfloat> %b 90} 91 92; Undefined lane insert 93define <vscale x 2 x i64> @test_lane4_2xi64(<vscale x 2 x i64> %a) { 94; CHECK-LABEL: test_lane4_2xi64: 95; CHECK: // %bb.0: 96; CHECK-NEXT: mov w8, #4 // =0x4 97; CHECK-NEXT: index z1.d, #0, #1 98; CHECK-NEXT: ptrue p0.d 99; CHECK-NEXT: mov z2.d, x8 100; CHECK-NEXT: mov w8, #30 // =0x1e 101; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d 102; CHECK-NEXT: mov z0.d, p0/m, x8 103; CHECK-NEXT: ret 104 %b = insertelement <vscale x 2 x i64> %a, i64 30, i32 4 105 ret <vscale x 2 x i64> %b 106} 107 108; Undefined lane insert 109define <vscale x 8 x half> @test_lane9_8xf16(<vscale x 8 x half> %a) { 110; CHECK-LABEL: test_lane9_8xf16: 111; CHECK: // %bb.0: 112; CHECK-NEXT: mov w8, #9 // =0x9 113; CHECK-NEXT: index z1.h, #0, #1 114; CHECK-NEXT: ptrue p0.h 115; CHECK-NEXT: mov z2.h, w8 116; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, z2.h 117; CHECK-NEXT: fmov h1, #1.00000000 118; CHECK-NEXT: mov z0.h, p0/m, h1 119; CHECK-NEXT: ret 120 %b = insertelement <vscale x 8 x half> %a, half 1.0, i32 9 121 ret <vscale x 8 x half> %b 122} 123 124define <vscale x 8 x bfloat> @test_lane9_8xbf16(<vscale x 8 x bfloat> %a, bfloat %x) { 125; CHECK-LABEL: test_lane9_8xbf16: 126; CHECK: // %bb.0: 127; CHECK-NEXT: mov w8, #9 // =0x9 128; CHECK-NEXT: index z2.h, #0, #1 129; CHECK-NEXT: ptrue p0.h 130; CHECK-NEXT: mov z3.h, w8 131; CHECK-NEXT: cmpeq p0.h, p0/z, z2.h, z3.h 132; CHECK-NEXT: mov z0.h, p0/m, h1 133; CHECK-NEXT: ret 134 %b = insertelement <vscale x 8 x bfloat> %a, bfloat %x, i32 9 135 ret <vscale x 8 x bfloat> %b 136} 137 138define <vscale x 16 x i8> @test_lane1_16xi8(<vscale x 16 x i8> %a) { 139; CHECK-LABEL: test_lane1_16xi8: 140; CHECK: // %bb.0: 141; CHECK-NEXT: mov w8, #1 // =0x1 142; CHECK-NEXT: index z1.b, #0, #1 143; CHECK-NEXT: ptrue p0.b 144; CHECK-NEXT: mov z2.b, w8 145; CHECK-NEXT: mov w8, #30 // =0x1e 146; CHECK-NEXT: cmpeq p0.b, p0/z, z1.b, z2.b 147; CHECK-NEXT: mov z0.b, p0/m, w8 148; CHECK-NEXT: ret 149 %b = insertelement <vscale x 16 x i8> %a, i8 30, i32 1 150 ret <vscale x 16 x i8> %b 151} 152 153define <vscale x 16 x i8> @test_lanex_16xi8(<vscale x 16 x i8> %a, i32 %x) { 154; CHECK-LABEL: test_lanex_16xi8: 155; CHECK: // %bb.0: 156; CHECK-NEXT: index z1.b, #0, #1 157; CHECK-NEXT: mov w8, w0 158; CHECK-NEXT: ptrue p0.b 159; CHECK-NEXT: mov z2.b, w8 160; CHECK-NEXT: mov w8, #30 // =0x1e 161; CHECK-NEXT: cmpeq p0.b, p0/z, z1.b, z2.b 162; CHECK-NEXT: mov z0.b, p0/m, w8 163; CHECK-NEXT: ret 164 %b = insertelement <vscale x 16 x i8> %a, i8 30, i32 %x 165 ret <vscale x 16 x i8> %b 166} 167 168 169; Redundant lane insert 170define <vscale x 4 x i32> @extract_insert_4xi32(<vscale x 4 x i32> %a) { 171; CHECK-LABEL: extract_insert_4xi32: 172; CHECK: // %bb.0: 173; CHECK-NEXT: ret 174 %b = extractelement <vscale x 4 x i32> %a, i32 2 175 %c = insertelement <vscale x 4 x i32> %a, i32 %b, i32 2 176 ret <vscale x 4 x i32> %c 177} 178 179define <vscale x 8 x i16> @test_lane6_undef_8xi16(i16 %a) { 180; CHECK-LABEL: test_lane6_undef_8xi16: 181; CHECK: // %bb.0: 182; CHECK-NEXT: mov w8, #6 // =0x6 183; CHECK-NEXT: index z0.h, #0, #1 184; CHECK-NEXT: ptrue p0.h 185; CHECK-NEXT: mov z1.h, w8 186; CHECK-NEXT: cmpeq p0.h, p0/z, z0.h, z1.h 187; CHECK-NEXT: mov z0.h, p0/m, w0 188; CHECK-NEXT: ret 189 %b = insertelement <vscale x 8 x i16> undef, i16 %a, i32 6 190 ret <vscale x 8 x i16> %b 191} 192 193define <vscale x 16 x i8> @test_lane0_undef_16xi8(i8 %a) { 194; CHECK-LABEL: test_lane0_undef_16xi8: 195; CHECK: // %bb.0: 196; CHECK-NEXT: fmov s0, w0 197; CHECK-NEXT: ret 198 %b = insertelement <vscale x 16 x i8> undef, i8 %a, i32 0 199 ret <vscale x 16 x i8> %b 200} 201 202define <vscale x 16 x i8> @test_insert0_of_extract0_16xi8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 203; CHECK-LABEL: test_insert0_of_extract0_16xi8: 204; CHECK: // %bb.0: 205; CHECK-NEXT: fmov w8, s1 206; CHECK-NEXT: ptrue p0.b, vl1 207; CHECK-NEXT: mov z0.b, p0/m, w8 208; CHECK-NEXT: ret 209 %c = extractelement <vscale x 16 x i8> %b, i32 0 210 %d = insertelement <vscale x 16 x i8> %a, i8 %c, i32 0 211 ret <vscale x 16 x i8> %d 212} 213 214define <vscale x 16 x i8> @test_insert64_of_extract64_16xi8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 215; CHECK-LABEL: test_insert64_of_extract64_16xi8: 216; CHECK: // %bb.0: 217; CHECK-NEXT: mov w8, #64 // =0x40 218; CHECK-NEXT: whilels p0.b, xzr, x8 219; CHECK-NEXT: mov z2.b, w8 220; CHECK-NEXT: lastb w9, p0, z1.b 221; CHECK-NEXT: index z1.b, #0, #1 222; CHECK-NEXT: ptrue p0.b 223; CHECK-NEXT: cmpeq p0.b, p0/z, z1.b, z2.b 224; CHECK-NEXT: mov z0.b, p0/m, w9 225; CHECK-NEXT: ret 226 %c = extractelement <vscale x 16 x i8> %b, i32 64 227 %d = insertelement <vscale x 16 x i8> %a, i8 %c, i32 64 228 ret <vscale x 16 x i8> %d 229} 230 231define <vscale x 16 x i8> @test_insert3_of_extract1_16xi8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { 232; CHECK-LABEL: test_insert3_of_extract1_16xi8: 233; CHECK: // %bb.0: 234; CHECK-NEXT: mov w8, #3 // =0x3 235; CHECK-NEXT: index z2.b, #0, #1 236; CHECK-NEXT: ptrue p0.b 237; CHECK-NEXT: mov z3.b, w8 238; CHECK-NEXT: umov w8, v1.b[1] 239; CHECK-NEXT: cmpeq p0.b, p0/z, z2.b, z3.b 240; CHECK-NEXT: mov z0.b, p0/m, w8 241; CHECK-NEXT: ret 242 %c = extractelement <vscale x 16 x i8> %b, i32 1 243 %d = insertelement <vscale x 16 x i8> %a, i8 %c, i32 3 244 ret <vscale x 16 x i8> %d 245} 246 247define <vscale x 8 x half> @test_insert_into_undef_nxv8f16(half %a) { 248; CHECK-LABEL: test_insert_into_undef_nxv8f16: 249; CHECK: // %bb.0: 250; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 251; CHECK-NEXT: ret 252 %b = insertelement <vscale x 8 x half> undef, half %a, i32 0 253 ret <vscale x 8 x half> %b 254} 255 256define <vscale x 4 x half> @test_insert_into_undef_nxv4f16(half %a) { 257; CHECK-LABEL: test_insert_into_undef_nxv4f16: 258; CHECK: // %bb.0: 259; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 260; CHECK-NEXT: ret 261 %b = insertelement <vscale x 4 x half> undef, half %a, i32 0 262 ret <vscale x 4 x half> %b 263} 264 265define <vscale x 2 x half> @test_insert_into_undef_nxv2f16(half %a) { 266; CHECK-LABEL: test_insert_into_undef_nxv2f16: 267; CHECK: // %bb.0: 268; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 269; CHECK-NEXT: ret 270 %b = insertelement <vscale x 2 x half> undef, half %a, i32 0 271 ret <vscale x 2 x half> %b 272} 273 274define <vscale x 8 x bfloat> @test_insert_into_undef_nxv8bf16(bfloat %a) { 275; CHECK-LABEL: test_insert_into_undef_nxv8bf16: 276; CHECK: // %bb.0: 277; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 278; CHECK-NEXT: ret 279 %b = insertelement <vscale x 8 x bfloat> undef, bfloat %a, i32 0 280 ret <vscale x 8 x bfloat> %b 281} 282 283define <vscale x 4 x bfloat> @test_insert_into_undef_nxv4bf16(bfloat %a) { 284; CHECK-LABEL: test_insert_into_undef_nxv4bf16: 285; CHECK: // %bb.0: 286; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 287; CHECK-NEXT: ret 288 %b = insertelement <vscale x 4 x bfloat> undef, bfloat %a, i32 0 289 ret <vscale x 4 x bfloat> %b 290} 291 292define <vscale x 2 x bfloat> @test_insert_into_undef_nxv2bf16(bfloat %a) { 293; CHECK-LABEL: test_insert_into_undef_nxv2bf16: 294; CHECK: // %bb.0: 295; CHECK-NEXT: // kill: def $h0 killed $h0 def $z0 296; CHECK-NEXT: ret 297 %b = insertelement <vscale x 2 x bfloat> undef, bfloat %a, i32 0 298 ret <vscale x 2 x bfloat> %b 299} 300 301define <vscale x 4 x float> @test_insert_into_undef_nxv4f32(float %a) { 302; CHECK-LABEL: test_insert_into_undef_nxv4f32: 303; CHECK: // %bb.0: 304; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 305; CHECK-NEXT: ret 306 %b = insertelement <vscale x 4 x float> undef, float %a, i32 0 307 ret <vscale x 4 x float> %b 308} 309 310define <vscale x 2 x float> @test_insert_into_undef_nxv2f32(float %a) { 311; CHECK-LABEL: test_insert_into_undef_nxv2f32: 312; CHECK: // %bb.0: 313; CHECK-NEXT: // kill: def $s0 killed $s0 def $z0 314; CHECK-NEXT: ret 315 %b = insertelement <vscale x 2 x float> undef, float %a, i32 0 316 ret <vscale x 2 x float> %b 317} 318 319define <vscale x 2 x double> @test_insert_into_undef_nxv2f64(double %a) { 320; CHECK-LABEL: test_insert_into_undef_nxv2f64: 321; CHECK: // %bb.0: 322; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 323; CHECK-NEXT: ret 324 %b = insertelement <vscale x 2 x double> undef, double %a, i32 0 325 ret <vscale x 2 x double> %b 326} 327 328; Insert scalar at index 329define <vscale x 2 x half> @test_insert_with_index_nxv2f16(half %h, i64 %idx) { 330; CHECK-LABEL: test_insert_with_index_nxv2f16: 331; CHECK: // %bb.0: 332; CHECK-NEXT: index z1.d, #0, #1 333; CHECK-NEXT: mov z2.d, x0 334; CHECK-NEXT: ptrue p0.d 335; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d 336; CHECK-NEXT: mov z0.h, p0/m, h0 337; CHECK-NEXT: ret 338 %res = insertelement <vscale x 2 x half> undef, half %h, i64 %idx 339 ret <vscale x 2 x half> %res 340} 341 342define <vscale x 4 x half> @test_insert_with_index_nxv4f16(half %h, i64 %idx) { 343; CHECK-LABEL: test_insert_with_index_nxv4f16: 344; CHECK: // %bb.0: 345; CHECK-NEXT: index z1.s, #0, #1 346; CHECK-NEXT: mov z2.s, w0 347; CHECK-NEXT: ptrue p0.s 348; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s 349; CHECK-NEXT: mov z0.h, p0/m, h0 350; CHECK-NEXT: ret 351 %res = insertelement <vscale x 4 x half> undef, half %h, i64 %idx 352 ret <vscale x 4 x half> %res 353} 354 355define <vscale x 8 x half> @test_insert_with_index_nxv8f16(half %h, i64 %idx) { 356; CHECK-LABEL: test_insert_with_index_nxv8f16: 357; CHECK: // %bb.0: 358; CHECK-NEXT: index z1.h, #0, #1 359; CHECK-NEXT: mov z2.h, w0 360; CHECK-NEXT: ptrue p0.h 361; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, z2.h 362; CHECK-NEXT: mov z0.h, p0/m, h0 363; CHECK-NEXT: ret 364 %res = insertelement <vscale x 8 x half> undef, half %h, i64 %idx 365 ret <vscale x 8 x half> %res 366} 367 368define <vscale x 2 x bfloat> @test_insert_with_index_nxv2bf16(bfloat %h, i64 %idx) { 369; CHECK-LABEL: test_insert_with_index_nxv2bf16: 370; CHECK: // %bb.0: 371; CHECK-NEXT: index z1.d, #0, #1 372; CHECK-NEXT: mov z2.d, x0 373; CHECK-NEXT: ptrue p0.d 374; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d 375; CHECK-NEXT: mov z0.h, p0/m, h0 376; CHECK-NEXT: ret 377 %res = insertelement <vscale x 2 x bfloat> undef, bfloat %h, i64 %idx 378 ret <vscale x 2 x bfloat> %res 379} 380 381define <vscale x 4 x bfloat> @test_insert_with_index_nxv4bf16(bfloat %h, i64 %idx) { 382; CHECK-LABEL: test_insert_with_index_nxv4bf16: 383; CHECK: // %bb.0: 384; CHECK-NEXT: index z1.s, #0, #1 385; CHECK-NEXT: mov z2.s, w0 386; CHECK-NEXT: ptrue p0.s 387; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s 388; CHECK-NEXT: mov z0.h, p0/m, h0 389; CHECK-NEXT: ret 390 %res = insertelement <vscale x 4 x bfloat> undef, bfloat %h, i64 %idx 391 ret <vscale x 4 x bfloat> %res 392} 393 394define <vscale x 8 x bfloat> @test_insert_with_index_nxv8bf16(bfloat %h, i64 %idx) { 395; CHECK-LABEL: test_insert_with_index_nxv8bf16: 396; CHECK: // %bb.0: 397; CHECK-NEXT: index z1.h, #0, #1 398; CHECK-NEXT: mov z2.h, w0 399; CHECK-NEXT: ptrue p0.h 400; CHECK-NEXT: cmpeq p0.h, p0/z, z1.h, z2.h 401; CHECK-NEXT: mov z0.h, p0/m, h0 402; CHECK-NEXT: ret 403 %res = insertelement <vscale x 8 x bfloat> undef, bfloat %h, i64 %idx 404 ret <vscale x 8 x bfloat> %res 405} 406 407define <vscale x 2 x float> @test_insert_with_index_nxv2f32(float %f, i64 %idx) { 408; CHECK-LABEL: test_insert_with_index_nxv2f32: 409; CHECK: // %bb.0: 410; CHECK-NEXT: index z1.d, #0, #1 411; CHECK-NEXT: mov z2.d, x0 412; CHECK-NEXT: ptrue p0.d 413; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d 414; CHECK-NEXT: mov z0.s, p0/m, s0 415; CHECK-NEXT: ret 416 %res = insertelement <vscale x 2 x float> undef, float %f, i64 %idx 417 ret <vscale x 2 x float> %res 418} 419 420define <vscale x 4 x float> @test_insert_with_index_nxv4f32(float %f, i64 %idx) { 421; CHECK-LABEL: test_insert_with_index_nxv4f32: 422; CHECK: // %bb.0: 423; CHECK-NEXT: index z1.s, #0, #1 424; CHECK-NEXT: mov z2.s, w0 425; CHECK-NEXT: ptrue p0.s 426; CHECK-NEXT: cmpeq p0.s, p0/z, z1.s, z2.s 427; CHECK-NEXT: mov z0.s, p0/m, s0 428; CHECK-NEXT: ret 429 %res = insertelement <vscale x 4 x float> undef, float %f, i64 %idx 430 ret <vscale x 4 x float> %res 431} 432 433define <vscale x 2 x double> @test_insert_with_index_nxv2f64(double %d, i64 %idx) { 434; CHECK-LABEL: test_insert_with_index_nxv2f64: 435; CHECK: // %bb.0: 436; CHECK-NEXT: index z1.d, #0, #1 437; CHECK-NEXT: mov z2.d, x0 438; CHECK-NEXT: ptrue p0.d 439; CHECK-NEXT: cmpeq p0.d, p0/z, z1.d, z2.d 440; CHECK-NEXT: mov z0.d, p0/m, d0 441; CHECK-NEXT: ret 442 %res = insertelement <vscale x 2 x double> undef, double %d, i64 %idx 443 ret <vscale x 2 x double> %res 444} 445 446;Predicate insert 447define <vscale x 2 x i1> @test_predicate_insert_2xi1_immediate (<vscale x 2 x i1> %val, i1 %elt) { 448; CHECK-LABEL: test_predicate_insert_2xi1_immediate: 449; CHECK: // %bb.0: 450; CHECK-NEXT: mov z0.d, p0/z, #1 // =0x1 451; CHECK-NEXT: ptrue p0.d, vl1 452; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 453; CHECK-NEXT: mov z0.d, p0/m, x0 454; CHECK-NEXT: ptrue p0.d 455; CHECK-NEXT: and z0.d, z0.d, #0x1 456; CHECK-NEXT: cmpne p0.d, p0/z, z0.d, #0 457; CHECK-NEXT: ret 458 %res = insertelement <vscale x 2 x i1> %val, i1 %elt, i32 0 459 ret <vscale x 2 x i1> %res 460} 461 462define <vscale x 4 x i1> @test_predicate_insert_4xi1_immediate (<vscale x 4 x i1> %val, i1 %elt) { 463; CHECK-LABEL: test_predicate_insert_4xi1_immediate: 464; CHECK: // %bb.0: 465; CHECK-NEXT: mov w8, #2 // =0x2 466; CHECK-NEXT: index z0.s, #0, #1 467; CHECK-NEXT: ptrue p1.s 468; CHECK-NEXT: mov z1.s, w8 469; CHECK-NEXT: cmpeq p2.s, p1/z, z0.s, z1.s 470; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1 471; CHECK-NEXT: mov z0.s, p2/m, w0 472; CHECK-NEXT: and z0.s, z0.s, #0x1 473; CHECK-NEXT: cmpne p0.s, p1/z, z0.s, #0 474; CHECK-NEXT: ret 475 %res = insertelement <vscale x 4 x i1> %val, i1 %elt, i32 2 476 ret <vscale x 4 x i1> %res 477} 478 479define <vscale x 8 x i1> @test_predicate_insert_8xi1_immediate (<vscale x 8 x i1> %val, i32 %idx) { 480; CHECK-LABEL: test_predicate_insert_8xi1_immediate: 481; CHECK: // %bb.0: 482; CHECK-NEXT: index z0.h, #0, #1 483; CHECK-NEXT: mov w8, w0 484; CHECK-NEXT: ptrue p1.h 485; CHECK-NEXT: mov z1.h, w8 486; CHECK-NEXT: mov w8, #1 // =0x1 487; CHECK-NEXT: cmpeq p2.h, p1/z, z0.h, z1.h 488; CHECK-NEXT: mov z0.h, p0/z, #1 // =0x1 489; CHECK-NEXT: mov z0.h, p2/m, w8 490; CHECK-NEXT: and z0.h, z0.h, #0x1 491; CHECK-NEXT: cmpne p0.h, p1/z, z0.h, #0 492; CHECK-NEXT: ret 493 %res = insertelement <vscale x 8 x i1> %val, i1 1, i32 %idx 494 ret <vscale x 8 x i1> %res 495} 496 497define <vscale x 16 x i1> @test_predicate_insert_16xi1_immediate (<vscale x 16 x i1> %val) { 498; CHECK-LABEL: test_predicate_insert_16xi1_immediate: 499; CHECK: // %bb.0: 500; CHECK-NEXT: mov w8, #4 // =0x4 501; CHECK-NEXT: index z0.b, #0, #1 502; CHECK-NEXT: ptrue p1.b 503; CHECK-NEXT: mov z1.b, w8 504; CHECK-NEXT: mov w8, wzr 505; CHECK-NEXT: cmpeq p2.b, p1/z, z0.b, z1.b 506; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1 507; CHECK-NEXT: mov z0.b, p2/m, w8 508; CHECK-NEXT: and z0.b, z0.b, #0x1 509; CHECK-NEXT: cmpne p0.b, p1/z, z0.b, #0 510; CHECK-NEXT: ret 511 %res = insertelement <vscale x 16 x i1> %val, i1 0, i32 4 512 ret <vscale x 16 x i1> %res 513} 514 515 516define <vscale x 2 x i1> @test_predicate_insert_2xi1(<vscale x 2 x i1> %val, i1 %elt, i32 %idx) { 517; CHECK-LABEL: test_predicate_insert_2xi1: 518; CHECK: // %bb.0: 519; CHECK-NEXT: index z0.d, #0, #1 520; CHECK-NEXT: mov w8, w1 521; CHECK-NEXT: ptrue p1.d 522; CHECK-NEXT: mov z1.d, x8 523; CHECK-NEXT: // kill: def $w0 killed $w0 def $x0 524; CHECK-NEXT: cmpeq p2.d, p1/z, z0.d, z1.d 525; CHECK-NEXT: mov z0.d, p0/z, #1 // =0x1 526; CHECK-NEXT: mov z0.d, p2/m, x0 527; CHECK-NEXT: and z0.d, z0.d, #0x1 528; CHECK-NEXT: cmpne p0.d, p1/z, z0.d, #0 529; CHECK-NEXT: ret 530 %res = insertelement <vscale x 2 x i1> %val, i1 %elt, i32 %idx 531 ret <vscale x 2 x i1> %res 532} 533 534define <vscale x 4 x i1> @test_predicate_insert_4xi1(<vscale x 4 x i1> %val, i1 %elt, i32 %idx) { 535; CHECK-LABEL: test_predicate_insert_4xi1: 536; CHECK: // %bb.0: 537; CHECK-NEXT: index z0.s, #0, #1 538; CHECK-NEXT: mov w8, w1 539; CHECK-NEXT: ptrue p1.s 540; CHECK-NEXT: mov z1.s, w8 541; CHECK-NEXT: cmpeq p2.s, p1/z, z0.s, z1.s 542; CHECK-NEXT: mov z0.s, p0/z, #1 // =0x1 543; CHECK-NEXT: mov z0.s, p2/m, w0 544; CHECK-NEXT: and z0.s, z0.s, #0x1 545; CHECK-NEXT: cmpne p0.s, p1/z, z0.s, #0 546; CHECK-NEXT: ret 547 %res = insertelement <vscale x 4 x i1> %val, i1 %elt, i32 %idx 548 ret <vscale x 4 x i1> %res 549} 550define <vscale x 8 x i1> @test_predicate_insert_8xi1(<vscale x 8 x i1> %val, i1 %elt, i32 %idx) { 551; CHECK-LABEL: test_predicate_insert_8xi1: 552; CHECK: // %bb.0: 553; CHECK-NEXT: index z0.h, #0, #1 554; CHECK-NEXT: mov w8, w1 555; CHECK-NEXT: ptrue p1.h 556; CHECK-NEXT: mov z1.h, w8 557; CHECK-NEXT: cmpeq p2.h, p1/z, z0.h, z1.h 558; CHECK-NEXT: mov z0.h, p0/z, #1 // =0x1 559; CHECK-NEXT: mov z0.h, p2/m, w0 560; CHECK-NEXT: and z0.h, z0.h, #0x1 561; CHECK-NEXT: cmpne p0.h, p1/z, z0.h, #0 562; CHECK-NEXT: ret 563 %res = insertelement <vscale x 8 x i1> %val, i1 %elt, i32 %idx 564 ret <vscale x 8 x i1> %res 565} 566 567define <vscale x 16 x i1> @test_predicate_insert_16xi1(<vscale x 16 x i1> %val, i1 %elt, i32 %idx) { 568; CHECK-LABEL: test_predicate_insert_16xi1: 569; CHECK: // %bb.0: 570; CHECK-NEXT: index z0.b, #0, #1 571; CHECK-NEXT: mov w8, w1 572; CHECK-NEXT: ptrue p1.b 573; CHECK-NEXT: mov z1.b, w8 574; CHECK-NEXT: cmpeq p2.b, p1/z, z0.b, z1.b 575; CHECK-NEXT: mov z0.b, p0/z, #1 // =0x1 576; CHECK-NEXT: mov z0.b, p2/m, w0 577; CHECK-NEXT: and z0.b, z0.b, #0x1 578; CHECK-NEXT: cmpne p0.b, p1/z, z0.b, #0 579; CHECK-NEXT: ret 580 %res = insertelement <vscale x 16 x i1> %val, i1 %elt, i32 %idx 581 ret <vscale x 16 x i1> %res 582} 583 584define <vscale x 32 x i1> @test_predicate_insert_32xi1(<vscale x 32 x i1> %val, i1 %elt, i32 %idx) uwtable { 585; CHECK-LABEL: test_predicate_insert_32xi1: 586; CHECK: // %bb.0: 587; CHECK-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill 588; CHECK-NEXT: .cfi_def_cfa_offset 16 589; CHECK-NEXT: .cfi_offset w29, -16 590; CHECK-NEXT: addvl sp, sp, #-2 591; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x10, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 16 * VG 592; CHECK-NEXT: rdvl x8, #2 593; CHECK-NEXT: mov z0.b, p1/z, #1 // =0x1 594; CHECK-NEXT: mov z1.b, p0/z, #1 // =0x1 595; CHECK-NEXT: sub x8, x8, #1 596; CHECK-NEXT: mov w9, w1 597; CHECK-NEXT: ptrue p1.b 598; CHECK-NEXT: cmp x9, x8 599; CHECK-NEXT: csel x8, x9, x8, lo 600; CHECK-NEXT: mov x9, sp 601; CHECK-NEXT: st1b { z0.b }, p1, [sp, #1, mul vl] 602; CHECK-NEXT: st1b { z1.b }, p1, [sp] 603; CHECK-NEXT: strb w0, [x9, x8] 604; CHECK-NEXT: ld1b { z0.b }, p1/z, [sp] 605; CHECK-NEXT: ld1b { z1.b }, p1/z, [sp, #1, mul vl] 606; CHECK-NEXT: and z0.b, z0.b, #0x1 607; CHECK-NEXT: and z1.b, z1.b, #0x1 608; CHECK-NEXT: cmpne p0.b, p1/z, z0.b, #0 609; CHECK-NEXT: cmpne p1.b, p1/z, z1.b, #0 610; CHECK-NEXT: addvl sp, sp, #2 611; CHECK-NEXT: .cfi_def_cfa wsp, 16 612; CHECK-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload 613; CHECK-NEXT: .cfi_def_cfa_offset 0 614; CHECK-NEXT: .cfi_restore w29 615; CHECK-NEXT: ret 616 %res = insertelement <vscale x 32 x i1> %val, i1 %elt, i32 %idx 617 ret <vscale x 32 x i1> %res 618} 619