1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 3 2; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,NEON-FIXED 3; RUN: llc -mtriple=aarch64 -mattr=+sve -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,SVE-FIXED 4 5define i8 @extract_last_i8(<16 x i8> %data, <16 x i8> %mask, i8 %passthru) { 6; NEON-FIXED-LABEL: extract_last_i8: 7; NEON-FIXED: // %bb.0: 8; NEON-FIXED-NEXT: sub sp, sp, #16 9; NEON-FIXED-NEXT: .cfi_def_cfa_offset 16 10; NEON-FIXED-NEXT: cmeq v2.16b, v1.16b, #0 11; NEON-FIXED-NEXT: adrp x8, .LCPI0_0 12; NEON-FIXED-NEXT: cmtst v1.16b, v1.16b, v1.16b 13; NEON-FIXED-NEXT: ldr q3, [x8, :lo12:.LCPI0_0] 14; NEON-FIXED-NEXT: mov x9, sp 15; NEON-FIXED-NEXT: str q0, [sp] 16; NEON-FIXED-NEXT: bic v2.16b, v3.16b, v2.16b 17; NEON-FIXED-NEXT: umaxv b1, v1.16b 18; NEON-FIXED-NEXT: umaxv b2, v2.16b 19; NEON-FIXED-NEXT: fmov w8, s2 20; NEON-FIXED-NEXT: bfxil x9, x8, #0, #4 21; NEON-FIXED-NEXT: ldrb w8, [x9] 22; NEON-FIXED-NEXT: fmov w9, s1 23; NEON-FIXED-NEXT: tst w9, #0x1 24; NEON-FIXED-NEXT: csel w0, w8, w0, ne 25; NEON-FIXED-NEXT: add sp, sp, #16 26; NEON-FIXED-NEXT: ret 27; 28; SVE-FIXED-LABEL: extract_last_i8: 29; SVE-FIXED: // %bb.0: 30; SVE-FIXED-NEXT: sub sp, sp, #16 31; SVE-FIXED-NEXT: .cfi_def_cfa_offset 16 32; SVE-FIXED-NEXT: index z2.b, #0, #1 33; SVE-FIXED-NEXT: cmeq v3.16b, v1.16b, #0 34; SVE-FIXED-NEXT: cmtst v1.16b, v1.16b, v1.16b 35; SVE-FIXED-NEXT: mov x9, sp 36; SVE-FIXED-NEXT: str q0, [sp] 37; SVE-FIXED-NEXT: bic v2.16b, v2.16b, v3.16b 38; SVE-FIXED-NEXT: umaxv b1, v1.16b 39; SVE-FIXED-NEXT: umaxv b2, v2.16b 40; SVE-FIXED-NEXT: fmov w8, s2 41; SVE-FIXED-NEXT: bfxil x9, x8, #0, #4 42; SVE-FIXED-NEXT: ldrb w8, [x9] 43; SVE-FIXED-NEXT: fmov w9, s1 44; SVE-FIXED-NEXT: tst w9, #0x1 45; SVE-FIXED-NEXT: csel w0, w8, w0, ne 46; SVE-FIXED-NEXT: add sp, sp, #16 47; SVE-FIXED-NEXT: ret 48 %notzero = icmp ne <16 x i8> %mask, zeroinitializer 49 %res = call i8 @llvm.experimental.vector.extract.last.active.v16i8(<16 x i8> %data, <16 x i1> %notzero, i8 %passthru) 50 ret i8 %res 51} 52 53define i16 @extract_last_i16(<8 x i16> %data, <8 x i16> %mask, i16 %passthru) { 54; NEON-FIXED-LABEL: extract_last_i16: 55; NEON-FIXED: // %bb.0: 56; NEON-FIXED-NEXT: sub sp, sp, #16 57; NEON-FIXED-NEXT: .cfi_def_cfa_offset 16 58; NEON-FIXED-NEXT: cmtst v1.8h, v1.8h, v1.8h 59; NEON-FIXED-NEXT: adrp x8, .LCPI1_0 60; NEON-FIXED-NEXT: mov x9, sp 61; NEON-FIXED-NEXT: ldr d3, [x8, :lo12:.LCPI1_0] 62; NEON-FIXED-NEXT: str q0, [sp] 63; NEON-FIXED-NEXT: xtn v2.8b, v1.8h 64; NEON-FIXED-NEXT: umaxv h1, v1.8h 65; NEON-FIXED-NEXT: and v2.8b, v2.8b, v3.8b 66; NEON-FIXED-NEXT: umaxv b2, v2.8b 67; NEON-FIXED-NEXT: fmov w8, s2 68; NEON-FIXED-NEXT: bfi x9, x8, #1, #3 69; NEON-FIXED-NEXT: ldrh w8, [x9] 70; NEON-FIXED-NEXT: fmov w9, s1 71; NEON-FIXED-NEXT: tst w9, #0x1 72; NEON-FIXED-NEXT: csel w0, w8, w0, ne 73; NEON-FIXED-NEXT: add sp, sp, #16 74; NEON-FIXED-NEXT: ret 75; 76; SVE-FIXED-LABEL: extract_last_i16: 77; SVE-FIXED: // %bb.0: 78; SVE-FIXED-NEXT: sub sp, sp, #16 79; SVE-FIXED-NEXT: .cfi_def_cfa_offset 16 80; SVE-FIXED-NEXT: cmtst v1.8h, v1.8h, v1.8h 81; SVE-FIXED-NEXT: index z3.b, #0, #1 82; SVE-FIXED-NEXT: mov x9, sp 83; SVE-FIXED-NEXT: str q0, [sp] 84; SVE-FIXED-NEXT: xtn v2.8b, v1.8h 85; SVE-FIXED-NEXT: umaxv h1, v1.8h 86; SVE-FIXED-NEXT: and v2.8b, v2.8b, v3.8b 87; SVE-FIXED-NEXT: umaxv b2, v2.8b 88; SVE-FIXED-NEXT: fmov w8, s2 89; SVE-FIXED-NEXT: bfi x9, x8, #1, #3 90; SVE-FIXED-NEXT: ldrh w8, [x9] 91; SVE-FIXED-NEXT: fmov w9, s1 92; SVE-FIXED-NEXT: tst w9, #0x1 93; SVE-FIXED-NEXT: csel w0, w8, w0, ne 94; SVE-FIXED-NEXT: add sp, sp, #16 95; SVE-FIXED-NEXT: ret 96 %notzero = icmp ne <8 x i16> %mask, zeroinitializer 97 %res = call i16 @llvm.experimental.vector.extract.last.active.v8i16(<8 x i16> %data, <8 x i1> %notzero, i16 %passthru) 98 ret i16 %res 99} 100 101define i32 @extract_last_i32(<4 x i32> %data, <4 x i32> %mask, i32 %passthru) { 102; NEON-FIXED-LABEL: extract_last_i32: 103; NEON-FIXED: // %bb.0: 104; NEON-FIXED-NEXT: sub sp, sp, #16 105; NEON-FIXED-NEXT: .cfi_def_cfa_offset 16 106; NEON-FIXED-NEXT: cmtst v1.4s, v1.4s, v1.4s 107; NEON-FIXED-NEXT: adrp x8, .LCPI2_0 108; NEON-FIXED-NEXT: mov x9, sp 109; NEON-FIXED-NEXT: ldr d3, [x8, :lo12:.LCPI2_0] 110; NEON-FIXED-NEXT: str q0, [sp] 111; NEON-FIXED-NEXT: xtn v2.4h, v1.4s 112; NEON-FIXED-NEXT: umaxv s1, v1.4s 113; NEON-FIXED-NEXT: and v2.8b, v2.8b, v3.8b 114; NEON-FIXED-NEXT: umaxv h2, v2.4h 115; NEON-FIXED-NEXT: fmov w8, s2 116; NEON-FIXED-NEXT: bfi x9, x8, #2, #2 117; NEON-FIXED-NEXT: ldr w8, [x9] 118; NEON-FIXED-NEXT: fmov w9, s1 119; NEON-FIXED-NEXT: tst w9, #0x1 120; NEON-FIXED-NEXT: csel w0, w8, w0, ne 121; NEON-FIXED-NEXT: add sp, sp, #16 122; NEON-FIXED-NEXT: ret 123; 124; SVE-FIXED-LABEL: extract_last_i32: 125; SVE-FIXED: // %bb.0: 126; SVE-FIXED-NEXT: sub sp, sp, #16 127; SVE-FIXED-NEXT: .cfi_def_cfa_offset 16 128; SVE-FIXED-NEXT: cmtst v1.4s, v1.4s, v1.4s 129; SVE-FIXED-NEXT: index z3.h, #0, #1 130; SVE-FIXED-NEXT: mov x9, sp 131; SVE-FIXED-NEXT: str q0, [sp] 132; SVE-FIXED-NEXT: xtn v2.4h, v1.4s 133; SVE-FIXED-NEXT: umaxv s1, v1.4s 134; SVE-FIXED-NEXT: and v2.8b, v2.8b, v3.8b 135; SVE-FIXED-NEXT: umaxv h2, v2.4h 136; SVE-FIXED-NEXT: fmov w8, s2 137; SVE-FIXED-NEXT: bfi x9, x8, #2, #2 138; SVE-FIXED-NEXT: ldr w8, [x9] 139; SVE-FIXED-NEXT: fmov w9, s1 140; SVE-FIXED-NEXT: tst w9, #0x1 141; SVE-FIXED-NEXT: csel w0, w8, w0, ne 142; SVE-FIXED-NEXT: add sp, sp, #16 143; SVE-FIXED-NEXT: ret 144 %notzero = icmp ne <4 x i32> %mask, zeroinitializer 145 %res = call i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32> %data, <4 x i1> %notzero, i32 %passthru) 146 ret i32 %res 147} 148 149define i64 @extract_last_i64(<2 x i64> %data, <2 x i64> %mask, i64 %passthru) { 150; NEON-FIXED-LABEL: extract_last_i64: 151; NEON-FIXED: // %bb.0: 152; NEON-FIXED-NEXT: sub sp, sp, #16 153; NEON-FIXED-NEXT: .cfi_def_cfa_offset 16 154; NEON-FIXED-NEXT: cmtst v1.2d, v1.2d, v1.2d 155; NEON-FIXED-NEXT: adrp x8, .LCPI3_0 156; NEON-FIXED-NEXT: mov x9, sp 157; NEON-FIXED-NEXT: ldr d3, [x8, :lo12:.LCPI3_0] 158; NEON-FIXED-NEXT: str q0, [sp] 159; NEON-FIXED-NEXT: xtn v2.2s, v1.2d 160; NEON-FIXED-NEXT: umaxv s1, v1.4s 161; NEON-FIXED-NEXT: and v2.8b, v2.8b, v3.8b 162; NEON-FIXED-NEXT: umaxp v2.2s, v2.2s, v2.2s 163; NEON-FIXED-NEXT: fmov w8, s2 164; NEON-FIXED-NEXT: bfi x9, x8, #3, #1 165; NEON-FIXED-NEXT: ldr x8, [x9] 166; NEON-FIXED-NEXT: fmov w9, s1 167; NEON-FIXED-NEXT: tst w9, #0x1 168; NEON-FIXED-NEXT: csel x0, x8, x0, ne 169; NEON-FIXED-NEXT: add sp, sp, #16 170; NEON-FIXED-NEXT: ret 171; 172; SVE-FIXED-LABEL: extract_last_i64: 173; SVE-FIXED: // %bb.0: 174; SVE-FIXED-NEXT: sub sp, sp, #16 175; SVE-FIXED-NEXT: .cfi_def_cfa_offset 16 176; SVE-FIXED-NEXT: cmtst v1.2d, v1.2d, v1.2d 177; SVE-FIXED-NEXT: index z3.s, #0, #1 178; SVE-FIXED-NEXT: mov x9, sp 179; SVE-FIXED-NEXT: str q0, [sp] 180; SVE-FIXED-NEXT: xtn v2.2s, v1.2d 181; SVE-FIXED-NEXT: umaxv s1, v1.4s 182; SVE-FIXED-NEXT: and v2.8b, v2.8b, v3.8b 183; SVE-FIXED-NEXT: umaxp v2.2s, v2.2s, v2.2s 184; SVE-FIXED-NEXT: fmov w8, s2 185; SVE-FIXED-NEXT: bfi x9, x8, #3, #1 186; SVE-FIXED-NEXT: ldr x8, [x9] 187; SVE-FIXED-NEXT: fmov w9, s1 188; SVE-FIXED-NEXT: tst w9, #0x1 189; SVE-FIXED-NEXT: csel x0, x8, x0, ne 190; SVE-FIXED-NEXT: add sp, sp, #16 191; SVE-FIXED-NEXT: ret 192 %notzero = icmp ne <2 x i64> %mask, zeroinitializer 193 %res = call i64 @llvm.experimental.vector.extract.last.active.v2i64(<2 x i64> %data, <2 x i1> %notzero, i64 %passthru) 194 ret i64 %res 195} 196 197define float @extract_last_float(<4 x float> %data, <4 x i32> %mask, float %passthru) { 198; NEON-FIXED-LABEL: extract_last_float: 199; NEON-FIXED: // %bb.0: 200; NEON-FIXED-NEXT: sub sp, sp, #16 201; NEON-FIXED-NEXT: .cfi_def_cfa_offset 16 202; NEON-FIXED-NEXT: cmtst v1.4s, v1.4s, v1.4s 203; NEON-FIXED-NEXT: adrp x8, .LCPI4_0 204; NEON-FIXED-NEXT: mov x9, sp 205; NEON-FIXED-NEXT: ldr d4, [x8, :lo12:.LCPI4_0] 206; NEON-FIXED-NEXT: str q0, [sp] 207; NEON-FIXED-NEXT: xtn v3.4h, v1.4s 208; NEON-FIXED-NEXT: umaxv s1, v1.4s 209; NEON-FIXED-NEXT: and v3.8b, v3.8b, v4.8b 210; NEON-FIXED-NEXT: umaxv h3, v3.4h 211; NEON-FIXED-NEXT: fmov w8, s3 212; NEON-FIXED-NEXT: bfi x9, x8, #2, #2 213; NEON-FIXED-NEXT: fmov w8, s1 214; NEON-FIXED-NEXT: ldr s0, [x9] 215; NEON-FIXED-NEXT: tst w8, #0x1 216; NEON-FIXED-NEXT: fcsel s0, s0, s2, ne 217; NEON-FIXED-NEXT: add sp, sp, #16 218; NEON-FIXED-NEXT: ret 219; 220; SVE-FIXED-LABEL: extract_last_float: 221; SVE-FIXED: // %bb.0: 222; SVE-FIXED-NEXT: sub sp, sp, #16 223; SVE-FIXED-NEXT: .cfi_def_cfa_offset 16 224; SVE-FIXED-NEXT: cmtst v1.4s, v1.4s, v1.4s 225; SVE-FIXED-NEXT: index z4.h, #0, #1 226; SVE-FIXED-NEXT: mov x9, sp 227; SVE-FIXED-NEXT: str q0, [sp] 228; SVE-FIXED-NEXT: xtn v3.4h, v1.4s 229; SVE-FIXED-NEXT: umaxv s1, v1.4s 230; SVE-FIXED-NEXT: and v3.8b, v3.8b, v4.8b 231; SVE-FIXED-NEXT: umaxv h3, v3.4h 232; SVE-FIXED-NEXT: fmov w8, s3 233; SVE-FIXED-NEXT: bfi x9, x8, #2, #2 234; SVE-FIXED-NEXT: fmov w8, s1 235; SVE-FIXED-NEXT: ldr s0, [x9] 236; SVE-FIXED-NEXT: tst w8, #0x1 237; SVE-FIXED-NEXT: fcsel s0, s0, s2, ne 238; SVE-FIXED-NEXT: add sp, sp, #16 239; SVE-FIXED-NEXT: ret 240 %notzero = icmp ne <4 x i32> %mask, zeroinitializer 241 %res = call float @llvm.experimental.vector.extract.last.active.v4f32(<4 x float> %data, <4 x i1> %notzero, float %passthru) 242 ret float %res 243} 244 245define double @extract_last_double(<2 x double> %data, <2 x i64> %mask, double %passthru) { 246; NEON-FIXED-LABEL: extract_last_double: 247; NEON-FIXED: // %bb.0: 248; NEON-FIXED-NEXT: sub sp, sp, #16 249; NEON-FIXED-NEXT: .cfi_def_cfa_offset 16 250; NEON-FIXED-NEXT: cmtst v1.2d, v1.2d, v1.2d 251; NEON-FIXED-NEXT: adrp x8, .LCPI5_0 252; NEON-FIXED-NEXT: mov x9, sp 253; NEON-FIXED-NEXT: ldr d4, [x8, :lo12:.LCPI5_0] 254; NEON-FIXED-NEXT: str q0, [sp] 255; NEON-FIXED-NEXT: xtn v3.2s, v1.2d 256; NEON-FIXED-NEXT: umaxv s1, v1.4s 257; NEON-FIXED-NEXT: and v3.8b, v3.8b, v4.8b 258; NEON-FIXED-NEXT: umaxp v3.2s, v3.2s, v3.2s 259; NEON-FIXED-NEXT: fmov w8, s3 260; NEON-FIXED-NEXT: bfi x9, x8, #3, #1 261; NEON-FIXED-NEXT: fmov w8, s1 262; NEON-FIXED-NEXT: ldr d0, [x9] 263; NEON-FIXED-NEXT: tst w8, #0x1 264; NEON-FIXED-NEXT: fcsel d0, d0, d2, ne 265; NEON-FIXED-NEXT: add sp, sp, #16 266; NEON-FIXED-NEXT: ret 267; 268; SVE-FIXED-LABEL: extract_last_double: 269; SVE-FIXED: // %bb.0: 270; SVE-FIXED-NEXT: sub sp, sp, #16 271; SVE-FIXED-NEXT: .cfi_def_cfa_offset 16 272; SVE-FIXED-NEXT: cmtst v1.2d, v1.2d, v1.2d 273; SVE-FIXED-NEXT: index z4.s, #0, #1 274; SVE-FIXED-NEXT: mov x9, sp 275; SVE-FIXED-NEXT: str q0, [sp] 276; SVE-FIXED-NEXT: xtn v3.2s, v1.2d 277; SVE-FIXED-NEXT: umaxv s1, v1.4s 278; SVE-FIXED-NEXT: and v3.8b, v3.8b, v4.8b 279; SVE-FIXED-NEXT: umaxp v3.2s, v3.2s, v3.2s 280; SVE-FIXED-NEXT: fmov w8, s3 281; SVE-FIXED-NEXT: bfi x9, x8, #3, #1 282; SVE-FIXED-NEXT: fmov w8, s1 283; SVE-FIXED-NEXT: ldr d0, [x9] 284; SVE-FIXED-NEXT: tst w8, #0x1 285; SVE-FIXED-NEXT: fcsel d0, d0, d2, ne 286; SVE-FIXED-NEXT: add sp, sp, #16 287; SVE-FIXED-NEXT: ret 288 %notzero = icmp ne <2 x i64> %mask, zeroinitializer 289 %res = call double @llvm.experimental.vector.extract.last.active.v2f64(<2 x double> %data, <2 x i1> %notzero, double %passthru) 290 ret double %res 291} 292 293define i8 @extract_last_i8_scalable(<vscale x 16 x i8> %data, <vscale x 16 x i1> %mask, i8 %passthru) #0 { 294; CHECK-LABEL: extract_last_i8_scalable: 295; CHECK: // %bb.0: 296; CHECK-NEXT: index z1.b, #0, #1 297; CHECK-NEXT: mov z2.b, #0 // =0x0 298; CHECK-NEXT: ptrue p1.b 299; CHECK-NEXT: sel z1.b, p0, z1.b, z2.b 300; CHECK-NEXT: umaxv b1, p1, z1.b 301; CHECK-NEXT: fmov w8, s1 302; CHECK-NEXT: and x8, x8, #0xff 303; CHECK-NEXT: whilels p1.b, xzr, x8 304; CHECK-NEXT: ptest p0, p0.b 305; CHECK-NEXT: lastb w8, p1, z0.b 306; CHECK-NEXT: csel w0, w8, w0, ne 307; CHECK-NEXT: ret 308 %res = call i8 @llvm.experimental.vector.extract.last.active.nxv16i8(<vscale x 16 x i8> %data, <vscale x 16 x i1> %mask, i8 %passthru) 309 ret i8 %res 310} 311 312define i16 @extract_last_i16_scalable(<vscale x 8 x i16> %data, <vscale x 8 x i1> %mask, i16 %passthru) #0 { 313; CHECK-LABEL: extract_last_i16_scalable: 314; CHECK: // %bb.0: 315; CHECK-NEXT: index z1.h, #0, #1 316; CHECK-NEXT: mov z2.h, #0 // =0x0 317; CHECK-NEXT: ptrue p1.h 318; CHECK-NEXT: sel z1.h, p0, z1.h, z2.h 319; CHECK-NEXT: umaxv h1, p1, z1.h 320; CHECK-NEXT: fmov w8, s1 321; CHECK-NEXT: and x8, x8, #0xffff 322; CHECK-NEXT: whilels p2.h, xzr, x8 323; CHECK-NEXT: ptest p1, p0.b 324; CHECK-NEXT: lastb w8, p2, z0.h 325; CHECK-NEXT: csel w0, w8, w0, ne 326; CHECK-NEXT: ret 327 %res = call i16 @llvm.experimental.vector.extract.last.active.nxv8i16(<vscale x 8 x i16> %data, <vscale x 8 x i1> %mask, i16 %passthru) 328 ret i16 %res 329} 330 331define i32 @extract_last_i32_scalable(<vscale x 4 x i32> %data, <vscale x 4 x i1> %mask, i32 %passthru) #0 { 332; CHECK-LABEL: extract_last_i32_scalable: 333; CHECK: // %bb.0: 334; CHECK-NEXT: index z1.s, #0, #1 335; CHECK-NEXT: mov z2.s, #0 // =0x0 336; CHECK-NEXT: ptrue p1.s 337; CHECK-NEXT: sel z1.s, p0, z1.s, z2.s 338; CHECK-NEXT: umaxv s1, p1, z1.s 339; CHECK-NEXT: fmov w8, s1 340; CHECK-NEXT: mov w8, w8 341; CHECK-NEXT: whilels p2.s, xzr, x8 342; CHECK-NEXT: ptest p1, p0.b 343; CHECK-NEXT: lastb w8, p2, z0.s 344; CHECK-NEXT: csel w0, w8, w0, ne 345; CHECK-NEXT: ret 346 %res = call i32 @llvm.experimental.vector.extract.last.active.nxv4i32(<vscale x 4 x i32> %data, <vscale x 4 x i1> %mask, i32 %passthru) 347 ret i32 %res 348} 349 350define i64 @extract_last_i64_scalable(<vscale x 2 x i64> %data, <vscale x 2 x i1> %mask, i64 %passthru) #0 { 351; CHECK-LABEL: extract_last_i64_scalable: 352; CHECK: // %bb.0: 353; CHECK-NEXT: index z1.d, #0, #1 354; CHECK-NEXT: mov z2.d, #0 // =0x0 355; CHECK-NEXT: ptrue p1.d 356; CHECK-NEXT: sel z1.d, p0, z1.d, z2.d 357; CHECK-NEXT: umaxv d1, p1, z1.d 358; CHECK-NEXT: fmov x8, d1 359; CHECK-NEXT: whilels p2.d, xzr, x8 360; CHECK-NEXT: ptest p1, p0.b 361; CHECK-NEXT: lastb x8, p2, z0.d 362; CHECK-NEXT: csel x0, x8, x0, ne 363; CHECK-NEXT: ret 364 %res = call i64 @llvm.experimental.vector.extract.last.active.nxv2i64(<vscale x 2 x i64> %data, <vscale x 2 x i1> %mask, i64 %passthru) 365 ret i64 %res 366} 367 368define float @extract_last_float_scalable(<vscale x 4 x float> %data, <vscale x 4 x i1> %mask, float %passthru) #0 { 369; CHECK-LABEL: extract_last_float_scalable: 370; CHECK: // %bb.0: 371; CHECK-NEXT: index z2.s, #0, #1 372; CHECK-NEXT: mov z3.s, #0 // =0x0 373; CHECK-NEXT: ptrue p1.s 374; CHECK-NEXT: sel z2.s, p0, z2.s, z3.s 375; CHECK-NEXT: umaxv s2, p1, z2.s 376; CHECK-NEXT: fmov w8, s2 377; CHECK-NEXT: mov w8, w8 378; CHECK-NEXT: whilels p2.s, xzr, x8 379; CHECK-NEXT: ptest p1, p0.b 380; CHECK-NEXT: lastb s0, p2, z0.s 381; CHECK-NEXT: fcsel s0, s0, s1, ne 382; CHECK-NEXT: ret 383 %res = call float @llvm.experimental.vector.extract.last.active.nxv4f32(<vscale x 4 x float> %data, <vscale x 4 x i1> %mask, float %passthru) 384 ret float %res 385} 386 387define double @extract_last_double_scalable(<vscale x 2 x double> %data, <vscale x 2 x i1> %mask, double %passthru) #0 { 388; CHECK-LABEL: extract_last_double_scalable: 389; CHECK: // %bb.0: 390; CHECK-NEXT: index z2.d, #0, #1 391; CHECK-NEXT: mov z3.d, #0 // =0x0 392; CHECK-NEXT: ptrue p1.d 393; CHECK-NEXT: sel z2.d, p0, z2.d, z3.d 394; CHECK-NEXT: umaxv d2, p1, z2.d 395; CHECK-NEXT: fmov x8, d2 396; CHECK-NEXT: whilels p2.d, xzr, x8 397; CHECK-NEXT: ptest p1, p0.b 398; CHECK-NEXT: lastb d0, p2, z0.d 399; CHECK-NEXT: fcsel d0, d0, d1, ne 400; CHECK-NEXT: ret 401 %res = call double @llvm.experimental.vector.extract.last.active.nxv2f64(<vscale x 2 x double> %data, <vscale x 2 x i1> %mask, double %passthru) 402 ret double %res 403} 404 405;; If the passthru parameter is poison, we shouldn't see a select at the end. 406define i8 @extract_last_i8_scalable_poison_passthru(<vscale x 16 x i8> %data, <vscale x 16 x i1> %mask) #0 { 407; CHECK-LABEL: extract_last_i8_scalable_poison_passthru: 408; CHECK: // %bb.0: 409; CHECK-NEXT: index z1.b, #0, #1 410; CHECK-NEXT: mov z2.b, #0 // =0x0 411; CHECK-NEXT: sel z1.b, p0, z1.b, z2.b 412; CHECK-NEXT: ptrue p0.b 413; CHECK-NEXT: umaxv b1, p0, z1.b 414; CHECK-NEXT: fmov w8, s1 415; CHECK-NEXT: and x8, x8, #0xff 416; CHECK-NEXT: whilels p0.b, xzr, x8 417; CHECK-NEXT: lastb w0, p0, z0.b 418; CHECK-NEXT: ret 419 %res = call i8 @llvm.experimental.vector.extract.last.active.nxv16i8(<vscale x 16 x i8> %data, <vscale x 16 x i1> %mask, i8 poison) 420 ret i8 %res 421} 422 423declare i8 @llvm.experimental.vector.extract.last.active.v16i8(<16 x i8>, <16 x i1>, i8) 424declare i16 @llvm.experimental.vector.extract.last.active.v8i16(<8 x i16>, <8 x i1>, i16) 425declare i32 @llvm.experimental.vector.extract.last.active.v4i32(<4 x i32>, <4 x i1>, i32) 426declare i64 @llvm.experimental.vector.extract.last.active.v2i64(<2 x i64>, <2 x i1>, i64) 427declare float @llvm.experimental.vector.extract.last.active.v4f32(<4 x float>, <4 x i1>, float) 428declare double @llvm.experimental.vector.extract.last.active.v2f64(<2 x double>, <2 x i1>, double) 429declare i8 @llvm.experimental.vector.extract.last.active.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i1>, i8) 430declare i16 @llvm.experimental.vector.extract.last.active.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i1>, i16) 431declare i32 @llvm.experimental.vector.extract.last.active.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i1>, i32) 432declare i64 @llvm.experimental.vector.extract.last.active.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i1>, i64) 433declare float @llvm.experimental.vector.extract.last.active.nxv4f32(<vscale x 4 x float>, <vscale x 4 x i1>, float) 434declare double @llvm.experimental.vector.extract.last.active.nxv2f64(<vscale x 2 x double>, <vscale x 2 x i1>, double) 435 436attributes #0 = { "target-features"="+sve" vscale_range(1, 16) } 437