1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mattr=+sve -force-streaming-compatible < %s | FileCheck %s 3; RUN: llc -mattr=+sme -force-streaming < %s | FileCheck %s 4; RUN: llc -force-streaming-compatible < %s | FileCheck %s --check-prefix=NONEON-NOSVE 5 6target triple = "aarch64-unknown-linux-gnu" 7 8; 9; UADDV 10; 11 12define i8 @uaddv_v8i8(<8 x i8> %a) { 13; CHECK-LABEL: uaddv_v8i8: 14; CHECK: // %bb.0: 15; CHECK-NEXT: ptrue p0.b, vl8 16; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 17; CHECK-NEXT: uaddv d0, p0, z0.b 18; CHECK-NEXT: fmov w0, s0 19; CHECK-NEXT: ret 20; 21; NONEON-NOSVE-LABEL: uaddv_v8i8: 22; NONEON-NOSVE: // %bb.0: 23; NONEON-NOSVE-NEXT: sub sp, sp, #16 24; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 25; NONEON-NOSVE-NEXT: str d0, [sp, #8] 26; NONEON-NOSVE-NEXT: ldrb w8, [sp, #13] 27; NONEON-NOSVE-NEXT: ldrb w9, [sp, #12] 28; NONEON-NOSVE-NEXT: ldrb w10, [sp, #11] 29; NONEON-NOSVE-NEXT: ldrb w11, [sp, #10] 30; NONEON-NOSVE-NEXT: ldrb w12, [sp, #9] 31; NONEON-NOSVE-NEXT: ldrb w13, [sp, #8] 32; NONEON-NOSVE-NEXT: ldrb w14, [sp, #14] 33; NONEON-NOSVE-NEXT: add w8, w9, w8 34; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] 35; NONEON-NOSVE-NEXT: add w12, w13, w12 36; NONEON-NOSVE-NEXT: add w10, w11, w10 37; NONEON-NOSVE-NEXT: add w10, w12, w10 38; NONEON-NOSVE-NEXT: add w8, w8, w14 39; NONEON-NOSVE-NEXT: add w8, w10, w8 40; NONEON-NOSVE-NEXT: add w0, w8, w9 41; NONEON-NOSVE-NEXT: add sp, sp, #16 42; NONEON-NOSVE-NEXT: ret 43 %res = call i8 @llvm.vector.reduce.add.v8i8(<8 x i8> %a) 44 ret i8 %res 45} 46 47define i8 @uaddv_v16i8(<16 x i8> %a) { 48; CHECK-LABEL: uaddv_v16i8: 49; CHECK: // %bb.0: 50; CHECK-NEXT: ptrue p0.b, vl16 51; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 52; CHECK-NEXT: uaddv d0, p0, z0.b 53; CHECK-NEXT: fmov w0, s0 54; CHECK-NEXT: ret 55; 56; NONEON-NOSVE-LABEL: uaddv_v16i8: 57; NONEON-NOSVE: // %bb.0: 58; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! 59; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 60; NONEON-NOSVE-NEXT: ldrb w9, [sp, #5] 61; NONEON-NOSVE-NEXT: ldrb w10, [sp, #1] 62; NONEON-NOSVE-NEXT: ldrb w11, [sp] 63; NONEON-NOSVE-NEXT: ldrb w12, [sp, #4] 64; NONEON-NOSVE-NEXT: ldrb w13, [sp, #3] 65; NONEON-NOSVE-NEXT: ldrb w14, [sp, #2] 66; NONEON-NOSVE-NEXT: add w10, w11, w10 67; NONEON-NOSVE-NEXT: ldrb w8, [sp, #12] 68; NONEON-NOSVE-NEXT: ldrb w15, [sp, #8] 69; NONEON-NOSVE-NEXT: add w11, w14, w13 70; NONEON-NOSVE-NEXT: add w9, w12, w9 71; NONEON-NOSVE-NEXT: ldrb w16, [sp, #6] 72; NONEON-NOSVE-NEXT: ldrb w12, [sp, #7] 73; NONEON-NOSVE-NEXT: add w10, w10, w11 74; NONEON-NOSVE-NEXT: ldrb w13, [sp, #11] 75; NONEON-NOSVE-NEXT: ldrb w11, [sp, #13] 76; NONEON-NOSVE-NEXT: add w9, w9, w16 77; NONEON-NOSVE-NEXT: ldrb w14, [sp, #9] 78; NONEON-NOSVE-NEXT: add w12, w12, w15 79; NONEON-NOSVE-NEXT: ldrb w15, [sp, #14] 80; NONEON-NOSVE-NEXT: add w8, w13, w8 81; NONEON-NOSVE-NEXT: ldrb w16, [sp, #10] 82; NONEON-NOSVE-NEXT: ldrb w13, [sp, #15] 83; NONEON-NOSVE-NEXT: add w12, w12, w14 84; NONEON-NOSVE-NEXT: add w8, w8, w11 85; NONEON-NOSVE-NEXT: add w9, w10, w9 86; NONEON-NOSVE-NEXT: add w10, w12, w16 87; NONEON-NOSVE-NEXT: add w8, w8, w15 88; NONEON-NOSVE-NEXT: add w9, w9, w10 89; NONEON-NOSVE-NEXT: add w8, w8, w13 90; NONEON-NOSVE-NEXT: add w0, w9, w8 91; NONEON-NOSVE-NEXT: add sp, sp, #16 92; NONEON-NOSVE-NEXT: ret 93 %res = call i8 @llvm.vector.reduce.add.v16i8(<16 x i8> %a) 94 ret i8 %res 95} 96 97define i8 @uaddv_v32i8(ptr %a) { 98; CHECK-LABEL: uaddv_v32i8: 99; CHECK: // %bb.0: 100; CHECK-NEXT: ldp q1, q0, [x0] 101; CHECK-NEXT: ptrue p0.b, vl16 102; CHECK-NEXT: add z0.b, z1.b, z0.b 103; CHECK-NEXT: uaddv d0, p0, z0.b 104; CHECK-NEXT: fmov w0, s0 105; CHECK-NEXT: ret 106; 107; NONEON-NOSVE-LABEL: uaddv_v32i8: 108; NONEON-NOSVE: // %bb.0: 109; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] 110; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! 111; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 112; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] 113; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1] 114; NONEON-NOSVE-NEXT: ldrb w10, [sp, #16] 115; NONEON-NOSVE-NEXT: ldrb w11, [sp] 116; NONEON-NOSVE-NEXT: ldrb w12, [sp, #18] 117; NONEON-NOSVE-NEXT: ldrb w13, [sp, #2] 118; NONEON-NOSVE-NEXT: add w8, w9, w8 119; NONEON-NOSVE-NEXT: ldrb w14, [sp, #19] 120; NONEON-NOSVE-NEXT: ldrb w15, [sp, #3] 121; NONEON-NOSVE-NEXT: add w9, w11, w10 122; NONEON-NOSVE-NEXT: add w10, w13, w12 123; NONEON-NOSVE-NEXT: ldrb w12, [sp, #21] 124; NONEON-NOSVE-NEXT: ldrb w13, [sp, #5] 125; NONEON-NOSVE-NEXT: add w11, w15, w14 126; NONEON-NOSVE-NEXT: add w8, w9, w8 127; NONEON-NOSVE-NEXT: ldrb w16, [sp, #20] 128; NONEON-NOSVE-NEXT: ldrb w17, [sp, #4] 129; NONEON-NOSVE-NEXT: add w9, w10, w11 130; NONEON-NOSVE-NEXT: add w10, w13, w12 131; NONEON-NOSVE-NEXT: ldrb w11, [sp, #22] 132; NONEON-NOSVE-NEXT: ldrb w12, [sp, #6] 133; NONEON-NOSVE-NEXT: add w14, w17, w16 134; NONEON-NOSVE-NEXT: ldrb w13, [sp, #23] 135; NONEON-NOSVE-NEXT: ldrb w15, [sp, #8] 136; NONEON-NOSVE-NEXT: add w10, w14, w10 137; NONEON-NOSVE-NEXT: ldrb w14, [sp, #7] 138; NONEON-NOSVE-NEXT: add w11, w12, w11 139; NONEON-NOSVE-NEXT: ldrb w12, [sp, #24] 140; NONEON-NOSVE-NEXT: add w8, w8, w9 141; NONEON-NOSVE-NEXT: add w9, w10, w11 142; NONEON-NOSVE-NEXT: add w10, w14, w13 143; NONEON-NOSVE-NEXT: ldrb w13, [sp, #9] 144; NONEON-NOSVE-NEXT: add w8, w8, w9 145; NONEON-NOSVE-NEXT: add w11, w15, w12 146; NONEON-NOSVE-NEXT: ldrb w12, [sp, #25] 147; NONEON-NOSVE-NEXT: ldrb w14, [sp, #26] 148; NONEON-NOSVE-NEXT: add w9, w10, w11 149; NONEON-NOSVE-NEXT: ldrb w11, [sp, #27] 150; NONEON-NOSVE-NEXT: ldrb w15, [sp, #10] 151; NONEON-NOSVE-NEXT: add w10, w13, w12 152; NONEON-NOSVE-NEXT: ldrb w12, [sp, #11] 153; NONEON-NOSVE-NEXT: ldrb w13, [sp, #12] 154; NONEON-NOSVE-NEXT: add w9, w9, w10 155; NONEON-NOSVE-NEXT: ldrb w10, [sp, #28] 156; NONEON-NOSVE-NEXT: ldrb w16, [sp, #14] 157; NONEON-NOSVE-NEXT: add w11, w12, w11 158; NONEON-NOSVE-NEXT: ldrb w12, [sp, #29] 159; NONEON-NOSVE-NEXT: ldrb w17, [sp, #15] 160; NONEON-NOSVE-NEXT: add w10, w13, w10 161; NONEON-NOSVE-NEXT: ldrb w13, [sp, #13] 162; NONEON-NOSVE-NEXT: add w14, w15, w14 163; NONEON-NOSVE-NEXT: add w10, w11, w10 164; NONEON-NOSVE-NEXT: ldrb w11, [sp, #30] 165; NONEON-NOSVE-NEXT: add w9, w9, w14 166; NONEON-NOSVE-NEXT: add w12, w13, w12 167; NONEON-NOSVE-NEXT: ldrb w13, [sp, #31] 168; NONEON-NOSVE-NEXT: add w8, w8, w9 169; NONEON-NOSVE-NEXT: add w10, w10, w12 170; NONEON-NOSVE-NEXT: add w11, w16, w11 171; NONEON-NOSVE-NEXT: add w10, w10, w11 172; NONEON-NOSVE-NEXT: add w11, w17, w13 173; NONEON-NOSVE-NEXT: add w9, w10, w11 174; NONEON-NOSVE-NEXT: add w0, w8, w9 175; NONEON-NOSVE-NEXT: add sp, sp, #32 176; NONEON-NOSVE-NEXT: ret 177 %op = load <32 x i8>, ptr %a 178 %res = call i8 @llvm.vector.reduce.add.v32i8(<32 x i8> %op) 179 ret i8 %res 180} 181 182define i16 @uaddv_v4i16(<4 x i16> %a) { 183; CHECK-LABEL: uaddv_v4i16: 184; CHECK: // %bb.0: 185; CHECK-NEXT: ptrue p0.h, vl4 186; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 187; CHECK-NEXT: uaddv d0, p0, z0.h 188; CHECK-NEXT: fmov w0, s0 189; CHECK-NEXT: ret 190; 191; NONEON-NOSVE-LABEL: uaddv_v4i16: 192; NONEON-NOSVE: // %bb.0: 193; NONEON-NOSVE-NEXT: sub sp, sp, #16 194; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 195; NONEON-NOSVE-NEXT: str d0, [sp, #8] 196; NONEON-NOSVE-NEXT: ldrh w8, [sp, #14] 197; NONEON-NOSVE-NEXT: ldrh w9, [sp, #12] 198; NONEON-NOSVE-NEXT: ldrh w10, [sp, #10] 199; NONEON-NOSVE-NEXT: ldrh w11, [sp, #8] 200; NONEON-NOSVE-NEXT: add w8, w9, w8 201; NONEON-NOSVE-NEXT: add w10, w11, w10 202; NONEON-NOSVE-NEXT: add w0, w10, w8 203; NONEON-NOSVE-NEXT: add sp, sp, #16 204; NONEON-NOSVE-NEXT: ret 205 %res = call i16 @llvm.vector.reduce.add.v4i16(<4 x i16> %a) 206 ret i16 %res 207} 208 209define i16 @uaddv_v8i16(<8 x i16> %a) { 210; CHECK-LABEL: uaddv_v8i16: 211; CHECK: // %bb.0: 212; CHECK-NEXT: ptrue p0.h, vl8 213; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 214; CHECK-NEXT: uaddv d0, p0, z0.h 215; CHECK-NEXT: fmov w0, s0 216; CHECK-NEXT: ret 217; 218; NONEON-NOSVE-LABEL: uaddv_v8i16: 219; NONEON-NOSVE: // %bb.0: 220; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! 221; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 222; NONEON-NOSVE-NEXT: ldrh w8, [sp, #10] 223; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] 224; NONEON-NOSVE-NEXT: ldrh w10, [sp, #6] 225; NONEON-NOSVE-NEXT: ldrh w11, [sp, #4] 226; NONEON-NOSVE-NEXT: ldrh w12, [sp, #2] 227; NONEON-NOSVE-NEXT: ldrh w13, [sp] 228; NONEON-NOSVE-NEXT: add w8, w9, w8 229; NONEON-NOSVE-NEXT: ldrh w14, [sp, #12] 230; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] 231; NONEON-NOSVE-NEXT: add w12, w13, w12 232; NONEON-NOSVE-NEXT: add w10, w11, w10 233; NONEON-NOSVE-NEXT: add w10, w12, w10 234; NONEON-NOSVE-NEXT: add w8, w8, w14 235; NONEON-NOSVE-NEXT: add w8, w10, w8 236; NONEON-NOSVE-NEXT: add w0, w8, w9 237; NONEON-NOSVE-NEXT: add sp, sp, #16 238; NONEON-NOSVE-NEXT: ret 239 %res = call i16 @llvm.vector.reduce.add.v8i16(<8 x i16> %a) 240 ret i16 %res 241} 242 243define i16 @uaddv_v16i16(ptr %a) { 244; CHECK-LABEL: uaddv_v16i16: 245; CHECK: // %bb.0: 246; CHECK-NEXT: ldp q1, q0, [x0] 247; CHECK-NEXT: ptrue p0.h, vl8 248; CHECK-NEXT: add z0.h, z1.h, z0.h 249; CHECK-NEXT: uaddv d0, p0, z0.h 250; CHECK-NEXT: fmov w0, s0 251; CHECK-NEXT: ret 252; 253; NONEON-NOSVE-LABEL: uaddv_v16i16: 254; NONEON-NOSVE: // %bb.0: 255; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] 256; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! 257; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 258; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] 259; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2] 260; NONEON-NOSVE-NEXT: ldrh w10, [sp, #16] 261; NONEON-NOSVE-NEXT: ldrh w11, [sp] 262; NONEON-NOSVE-NEXT: ldrh w12, [sp, #20] 263; NONEON-NOSVE-NEXT: ldrh w13, [sp, #4] 264; NONEON-NOSVE-NEXT: add w8, w9, w8 265; NONEON-NOSVE-NEXT: ldrh w14, [sp, #22] 266; NONEON-NOSVE-NEXT: ldrh w15, [sp, #6] 267; NONEON-NOSVE-NEXT: add w9, w11, w10 268; NONEON-NOSVE-NEXT: add w12, w13, w12 269; NONEON-NOSVE-NEXT: ldrh w10, [sp, #26] 270; NONEON-NOSVE-NEXT: ldrh w11, [sp, #10] 271; NONEON-NOSVE-NEXT: add w13, w15, w14 272; NONEON-NOSVE-NEXT: add w8, w9, w8 273; NONEON-NOSVE-NEXT: ldrh w16, [sp, #24] 274; NONEON-NOSVE-NEXT: ldrh w17, [sp, #8] 275; NONEON-NOSVE-NEXT: add w9, w12, w13 276; NONEON-NOSVE-NEXT: add w10, w11, w10 277; NONEON-NOSVE-NEXT: ldrh w11, [sp, #28] 278; NONEON-NOSVE-NEXT: ldrh w15, [sp, #12] 279; NONEON-NOSVE-NEXT: add w14, w17, w16 280; NONEON-NOSVE-NEXT: ldrh w12, [sp, #30] 281; NONEON-NOSVE-NEXT: ldrh w13, [sp, #14] 282; NONEON-NOSVE-NEXT: add w10, w14, w10 283; NONEON-NOSVE-NEXT: add w11, w15, w11 284; NONEON-NOSVE-NEXT: add w8, w8, w9 285; NONEON-NOSVE-NEXT: add w9, w10, w11 286; NONEON-NOSVE-NEXT: add w8, w8, w9 287; NONEON-NOSVE-NEXT: add w9, w13, w12 288; NONEON-NOSVE-NEXT: add w0, w8, w9 289; NONEON-NOSVE-NEXT: add sp, sp, #32 290; NONEON-NOSVE-NEXT: ret 291 %op = load <16 x i16>, ptr %a 292 %res = call i16 @llvm.vector.reduce.add.v16i16(<16 x i16> %op) 293 ret i16 %res 294} 295 296define i32 @uaddv_v2i32(<2 x i32> %a) { 297; CHECK-LABEL: uaddv_v2i32: 298; CHECK: // %bb.0: 299; CHECK-NEXT: ptrue p0.s, vl2 300; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 301; CHECK-NEXT: uaddv d0, p0, z0.s 302; CHECK-NEXT: fmov w0, s0 303; CHECK-NEXT: ret 304; 305; NONEON-NOSVE-LABEL: uaddv_v2i32: 306; NONEON-NOSVE: // %bb.0: 307; NONEON-NOSVE-NEXT: sub sp, sp, #16 308; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 309; NONEON-NOSVE-NEXT: str d0, [sp, #8] 310; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #8] 311; NONEON-NOSVE-NEXT: add w0, w9, w8 312; NONEON-NOSVE-NEXT: add sp, sp, #16 313; NONEON-NOSVE-NEXT: ret 314 %res = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> %a) 315 ret i32 %res 316} 317 318define i32 @uaddv_v4i32(<4 x i32> %a) { 319; CHECK-LABEL: uaddv_v4i32: 320; CHECK: // %bb.0: 321; CHECK-NEXT: ptrue p0.s, vl4 322; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 323; CHECK-NEXT: uaddv d0, p0, z0.s 324; CHECK-NEXT: fmov w0, s0 325; CHECK-NEXT: ret 326; 327; NONEON-NOSVE-LABEL: uaddv_v4i32: 328; NONEON-NOSVE: // %bb.0: 329; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! 330; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 331; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #8] 332; NONEON-NOSVE-NEXT: ldp w11, w10, [sp], #16 333; NONEON-NOSVE-NEXT: add w10, w11, w10 334; NONEON-NOSVE-NEXT: add w8, w9, w8 335; NONEON-NOSVE-NEXT: add w0, w10, w8 336; NONEON-NOSVE-NEXT: ret 337 %res = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> %a) 338 ret i32 %res 339} 340 341define i32 @uaddv_v8i32(ptr %a) { 342; CHECK-LABEL: uaddv_v8i32: 343; CHECK: // %bb.0: 344; CHECK-NEXT: ldp q1, q0, [x0] 345; CHECK-NEXT: ptrue p0.s, vl4 346; CHECK-NEXT: add z0.s, z1.s, z0.s 347; CHECK-NEXT: uaddv d0, p0, z0.s 348; CHECK-NEXT: fmov w0, s0 349; CHECK-NEXT: ret 350; 351; NONEON-NOSVE-LABEL: uaddv_v8i32: 352; NONEON-NOSVE: // %bb.0: 353; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] 354; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! 355; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 356; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #16] 357; NONEON-NOSVE-NEXT: ldp w11, w10, [sp] 358; NONEON-NOSVE-NEXT: ldp w12, w13, [sp, #24] 359; NONEON-NOSVE-NEXT: ldp w14, w15, [sp, #8] 360; NONEON-NOSVE-NEXT: add w8, w10, w8 361; NONEON-NOSVE-NEXT: add w9, w11, w9 362; NONEON-NOSVE-NEXT: add w8, w9, w8 363; NONEON-NOSVE-NEXT: add w10, w14, w12 364; NONEON-NOSVE-NEXT: add w11, w15, w13 365; NONEON-NOSVE-NEXT: add w9, w10, w11 366; NONEON-NOSVE-NEXT: add w0, w8, w9 367; NONEON-NOSVE-NEXT: add sp, sp, #32 368; NONEON-NOSVE-NEXT: ret 369 %op = load <8 x i32>, ptr %a 370 %res = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %op) 371 ret i32 %res 372} 373 374define i64 @uaddv_v2i64(<2 x i64> %a) { 375; CHECK-LABEL: uaddv_v2i64: 376; CHECK: // %bb.0: 377; CHECK-NEXT: ptrue p0.d, vl2 378; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 379; CHECK-NEXT: uaddv d0, p0, z0.d 380; CHECK-NEXT: fmov x0, d0 381; CHECK-NEXT: ret 382; 383; NONEON-NOSVE-LABEL: uaddv_v2i64: 384; NONEON-NOSVE: // %bb.0: 385; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! 386; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 387; NONEON-NOSVE-NEXT: ldp x9, x8, [sp], #16 388; NONEON-NOSVE-NEXT: add x0, x9, x8 389; NONEON-NOSVE-NEXT: ret 390 %res = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> %a) 391 ret i64 %res 392} 393 394define i64 @uaddv_v4i64(ptr %a) { 395; CHECK-LABEL: uaddv_v4i64: 396; CHECK: // %bb.0: 397; CHECK-NEXT: ldp q1, q0, [x0] 398; CHECK-NEXT: ptrue p0.d, vl2 399; CHECK-NEXT: add z0.d, z1.d, z0.d 400; CHECK-NEXT: uaddv d0, p0, z0.d 401; CHECK-NEXT: fmov x0, d0 402; CHECK-NEXT: ret 403; 404; NONEON-NOSVE-LABEL: uaddv_v4i64: 405; NONEON-NOSVE: // %bb.0: 406; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] 407; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! 408; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 409; NONEON-NOSVE-NEXT: ldp x9, x8, [sp, #16] 410; NONEON-NOSVE-NEXT: ldp x11, x10, [sp], #32 411; NONEON-NOSVE-NEXT: add x8, x10, x8 412; NONEON-NOSVE-NEXT: add x9, x11, x9 413; NONEON-NOSVE-NEXT: add x0, x9, x8 414; NONEON-NOSVE-NEXT: ret 415 %op = load <4 x i64>, ptr %a 416 %res = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> %op) 417 ret i64 %res 418} 419 420; 421; SMAXV 422; 423 424define i8 @smaxv_v8i8(<8 x i8> %a) { 425; CHECK-LABEL: smaxv_v8i8: 426; CHECK: // %bb.0: 427; CHECK-NEXT: ptrue p0.b, vl8 428; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 429; CHECK-NEXT: smaxv b0, p0, z0.b 430; CHECK-NEXT: fmov w0, s0 431; CHECK-NEXT: ret 432; 433; NONEON-NOSVE-LABEL: smaxv_v8i8: 434; NONEON-NOSVE: // %bb.0: 435; NONEON-NOSVE-NEXT: sub sp, sp, #16 436; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 437; NONEON-NOSVE-NEXT: str d0, [sp, #8] 438; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #9] 439; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #8] 440; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #10] 441; NONEON-NOSVE-NEXT: cmp w9, w8 442; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt 443; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #11] 444; NONEON-NOSVE-NEXT: cmp w8, w10 445; NONEON-NOSVE-NEXT: csel w8, w8, w10, gt 446; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #12] 447; NONEON-NOSVE-NEXT: cmp w8, w9 448; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt 449; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #13] 450; NONEON-NOSVE-NEXT: cmp w8, w10 451; NONEON-NOSVE-NEXT: csel w8, w8, w10, gt 452; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #14] 453; NONEON-NOSVE-NEXT: cmp w8, w9 454; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt 455; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #15] 456; NONEON-NOSVE-NEXT: cmp w8, w10 457; NONEON-NOSVE-NEXT: csel w8, w8, w10, gt 458; NONEON-NOSVE-NEXT: cmp w8, w9 459; NONEON-NOSVE-NEXT: csel w0, w8, w9, gt 460; NONEON-NOSVE-NEXT: add sp, sp, #16 461; NONEON-NOSVE-NEXT: ret 462 %res = call i8 @llvm.vector.reduce.smax.v8i8(<8 x i8> %a) 463 ret i8 %res 464} 465 466define i8 @smaxv_v16i8(<16 x i8> %a) { 467; CHECK-LABEL: smaxv_v16i8: 468; CHECK: // %bb.0: 469; CHECK-NEXT: ptrue p0.b, vl16 470; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 471; CHECK-NEXT: smaxv b0, p0, z0.b 472; CHECK-NEXT: fmov w0, s0 473; CHECK-NEXT: ret 474; 475; NONEON-NOSVE-LABEL: smaxv_v16i8: 476; NONEON-NOSVE: // %bb.0: 477; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! 478; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 479; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #1] 480; NONEON-NOSVE-NEXT: ldrsb w9, [sp] 481; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #2] 482; NONEON-NOSVE-NEXT: cmp w9, w8 483; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt 484; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #3] 485; NONEON-NOSVE-NEXT: cmp w8, w10 486; NONEON-NOSVE-NEXT: csel w8, w8, w10, gt 487; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #4] 488; NONEON-NOSVE-NEXT: cmp w8, w9 489; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt 490; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #5] 491; NONEON-NOSVE-NEXT: cmp w8, w10 492; NONEON-NOSVE-NEXT: csel w8, w8, w10, gt 493; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #6] 494; NONEON-NOSVE-NEXT: cmp w8, w9 495; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt 496; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #7] 497; NONEON-NOSVE-NEXT: cmp w8, w10 498; NONEON-NOSVE-NEXT: csel w8, w8, w10, gt 499; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #8] 500; NONEON-NOSVE-NEXT: cmp w8, w9 501; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt 502; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #9] 503; NONEON-NOSVE-NEXT: cmp w8, w10 504; NONEON-NOSVE-NEXT: csel w8, w8, w10, gt 505; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #10] 506; NONEON-NOSVE-NEXT: cmp w8, w9 507; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt 508; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #11] 509; NONEON-NOSVE-NEXT: cmp w8, w10 510; NONEON-NOSVE-NEXT: csel w8, w8, w10, gt 511; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #12] 512; NONEON-NOSVE-NEXT: cmp w8, w9 513; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt 514; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #13] 515; NONEON-NOSVE-NEXT: cmp w8, w10 516; NONEON-NOSVE-NEXT: csel w8, w8, w10, gt 517; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #14] 518; NONEON-NOSVE-NEXT: cmp w8, w9 519; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt 520; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #15] 521; NONEON-NOSVE-NEXT: cmp w8, w10 522; NONEON-NOSVE-NEXT: csel w8, w8, w10, gt 523; NONEON-NOSVE-NEXT: cmp w8, w9 524; NONEON-NOSVE-NEXT: csel w0, w8, w9, gt 525; NONEON-NOSVE-NEXT: add sp, sp, #16 526; NONEON-NOSVE-NEXT: ret 527 %res = call i8 @llvm.vector.reduce.smax.v16i8(<16 x i8> %a) 528 ret i8 %res 529} 530 531define i8 @smaxv_v32i8(ptr %a) { 532; CHECK-LABEL: smaxv_v32i8: 533; CHECK: // %bb.0: 534; CHECK-NEXT: ldp q1, q0, [x0] 535; CHECK-NEXT: ptrue p0.b, vl16 536; CHECK-NEXT: smax z0.b, p0/m, z0.b, z1.b 537; CHECK-NEXT: smaxv b0, p0, z0.b 538; CHECK-NEXT: fmov w0, s0 539; CHECK-NEXT: ret 540; 541; NONEON-NOSVE-LABEL: smaxv_v32i8: 542; NONEON-NOSVE: // %bb.0: 543; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] 544; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! 545; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 546; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #17] 547; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #1] 548; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #16] 549; NONEON-NOSVE-NEXT: ldrsb w11, [sp] 550; NONEON-NOSVE-NEXT: cmp w9, w8 551; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt 552; NONEON-NOSVE-NEXT: cmp w11, w10 553; NONEON-NOSVE-NEXT: csel w9, w11, w10, gt 554; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #18] 555; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #2] 556; NONEON-NOSVE-NEXT: cmp w9, w8 557; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt 558; NONEON-NOSVE-NEXT: cmp w11, w10 559; NONEON-NOSVE-NEXT: csel w9, w11, w10, gt 560; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #19] 561; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #3] 562; NONEON-NOSVE-NEXT: cmp w8, w9 563; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt 564; NONEON-NOSVE-NEXT: cmp w11, w10 565; NONEON-NOSVE-NEXT: csel w9, w11, w10, gt 566; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #20] 567; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #4] 568; NONEON-NOSVE-NEXT: cmp w8, w9 569; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt 570; NONEON-NOSVE-NEXT: cmp w11, w10 571; NONEON-NOSVE-NEXT: csel w9, w11, w10, gt 572; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #21] 573; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #5] 574; NONEON-NOSVE-NEXT: cmp w8, w9 575; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt 576; NONEON-NOSVE-NEXT: cmp w11, w10 577; NONEON-NOSVE-NEXT: csel w9, w11, w10, gt 578; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #22] 579; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #6] 580; NONEON-NOSVE-NEXT: cmp w8, w9 581; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt 582; NONEON-NOSVE-NEXT: cmp w11, w10 583; NONEON-NOSVE-NEXT: csel w9, w11, w10, gt 584; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #23] 585; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #7] 586; NONEON-NOSVE-NEXT: cmp w8, w9 587; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt 588; NONEON-NOSVE-NEXT: cmp w11, w10 589; NONEON-NOSVE-NEXT: csel w9, w11, w10, gt 590; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #24] 591; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #8] 592; NONEON-NOSVE-NEXT: cmp w8, w9 593; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt 594; NONEON-NOSVE-NEXT: cmp w11, w10 595; NONEON-NOSVE-NEXT: csel w9, w11, w10, gt 596; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #25] 597; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #9] 598; NONEON-NOSVE-NEXT: cmp w8, w9 599; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt 600; NONEON-NOSVE-NEXT: cmp w11, w10 601; NONEON-NOSVE-NEXT: csel w9, w11, w10, gt 602; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #26] 603; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #10] 604; NONEON-NOSVE-NEXT: cmp w8, w9 605; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt 606; NONEON-NOSVE-NEXT: cmp w11, w10 607; NONEON-NOSVE-NEXT: csel w9, w11, w10, gt 608; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #27] 609; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #11] 610; NONEON-NOSVE-NEXT: cmp w8, w9 611; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt 612; NONEON-NOSVE-NEXT: cmp w11, w10 613; NONEON-NOSVE-NEXT: csel w9, w11, w10, gt 614; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #28] 615; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #12] 616; NONEON-NOSVE-NEXT: cmp w8, w9 617; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt 618; NONEON-NOSVE-NEXT: cmp w11, w10 619; NONEON-NOSVE-NEXT: csel w9, w11, w10, gt 620; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #29] 621; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #13] 622; NONEON-NOSVE-NEXT: cmp w8, w9 623; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt 624; NONEON-NOSVE-NEXT: cmp w11, w10 625; NONEON-NOSVE-NEXT: csel w9, w11, w10, gt 626; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #30] 627; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #14] 628; NONEON-NOSVE-NEXT: cmp w8, w9 629; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt 630; NONEON-NOSVE-NEXT: cmp w11, w10 631; NONEON-NOSVE-NEXT: csel w9, w11, w10, gt 632; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #31] 633; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #15] 634; NONEON-NOSVE-NEXT: cmp w8, w9 635; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt 636; NONEON-NOSVE-NEXT: cmp w11, w10 637; NONEON-NOSVE-NEXT: csel w9, w11, w10, gt 638; NONEON-NOSVE-NEXT: cmp w8, w9 639; NONEON-NOSVE-NEXT: csel w0, w8, w9, gt 640; NONEON-NOSVE-NEXT: add sp, sp, #32 641; NONEON-NOSVE-NEXT: ret 642 %op = load <32 x i8>, ptr %a 643 %res = call i8 @llvm.vector.reduce.smax.v32i8(<32 x i8> %op) 644 ret i8 %res 645} 646 647define i16 @smaxv_v4i16(<4 x i16> %a) { 648; CHECK-LABEL: smaxv_v4i16: 649; CHECK: // %bb.0: 650; CHECK-NEXT: ptrue p0.h, vl4 651; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 652; CHECK-NEXT: smaxv h0, p0, z0.h 653; CHECK-NEXT: fmov w0, s0 654; CHECK-NEXT: ret 655; 656; NONEON-NOSVE-LABEL: smaxv_v4i16: 657; NONEON-NOSVE: // %bb.0: 658; NONEON-NOSVE-NEXT: sub sp, sp, #16 659; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 660; NONEON-NOSVE-NEXT: str d0, [sp, #8] 661; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #10] 662; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #8] 663; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #12] 664; NONEON-NOSVE-NEXT: cmp w9, w8 665; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt 666; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #14] 667; NONEON-NOSVE-NEXT: cmp w8, w10 668; NONEON-NOSVE-NEXT: csel w8, w8, w10, gt 669; NONEON-NOSVE-NEXT: cmp w8, w9 670; NONEON-NOSVE-NEXT: csel w0, w8, w9, gt 671; NONEON-NOSVE-NEXT: add sp, sp, #16 672; NONEON-NOSVE-NEXT: ret 673 %res = call i16 @llvm.vector.reduce.smax.v4i16(<4 x i16> %a) 674 ret i16 %res 675} 676 677define i16 @smaxv_v8i16(<8 x i16> %a) { 678; CHECK-LABEL: smaxv_v8i16: 679; CHECK: // %bb.0: 680; CHECK-NEXT: ptrue p0.h, vl8 681; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 682; CHECK-NEXT: smaxv h0, p0, z0.h 683; CHECK-NEXT: fmov w0, s0 684; CHECK-NEXT: ret 685; 686; NONEON-NOSVE-LABEL: smaxv_v8i16: 687; NONEON-NOSVE: // %bb.0: 688; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! 689; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 690; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #2] 691; NONEON-NOSVE-NEXT: ldrsh w9, [sp] 692; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #4] 693; NONEON-NOSVE-NEXT: cmp w9, w8 694; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt 695; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #6] 696; NONEON-NOSVE-NEXT: cmp w8, w10 697; NONEON-NOSVE-NEXT: csel w8, w8, w10, gt 698; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #8] 699; NONEON-NOSVE-NEXT: cmp w8, w9 700; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt 701; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #10] 702; NONEON-NOSVE-NEXT: cmp w8, w10 703; NONEON-NOSVE-NEXT: csel w8, w8, w10, gt 704; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #12] 705; NONEON-NOSVE-NEXT: cmp w8, w9 706; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt 707; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #14] 708; NONEON-NOSVE-NEXT: cmp w8, w10 709; NONEON-NOSVE-NEXT: csel w8, w8, w10, gt 710; NONEON-NOSVE-NEXT: cmp w8, w9 711; NONEON-NOSVE-NEXT: csel w0, w8, w9, gt 712; NONEON-NOSVE-NEXT: add sp, sp, #16 713; NONEON-NOSVE-NEXT: ret 714 %res = call i16 @llvm.vector.reduce.smax.v8i16(<8 x i16> %a) 715 ret i16 %res 716} 717 718define i16 @smaxv_v16i16(ptr %a) { 719; CHECK-LABEL: smaxv_v16i16: 720; CHECK: // %bb.0: 721; CHECK-NEXT: ldp q1, q0, [x0] 722; CHECK-NEXT: ptrue p0.h, vl8 723; CHECK-NEXT: smax z0.h, p0/m, z0.h, z1.h 724; CHECK-NEXT: smaxv h0, p0, z0.h 725; CHECK-NEXT: fmov w0, s0 726; CHECK-NEXT: ret 727; 728; NONEON-NOSVE-LABEL: smaxv_v16i16: 729; NONEON-NOSVE: // %bb.0: 730; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] 731; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! 732; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 733; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #18] 734; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #2] 735; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #16] 736; NONEON-NOSVE-NEXT: ldrsh w11, [sp] 737; NONEON-NOSVE-NEXT: cmp w9, w8 738; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt 739; NONEON-NOSVE-NEXT: cmp w11, w10 740; NONEON-NOSVE-NEXT: csel w9, w11, w10, gt 741; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #20] 742; NONEON-NOSVE-NEXT: ldrsh w11, [sp, #4] 743; NONEON-NOSVE-NEXT: cmp w9, w8 744; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt 745; NONEON-NOSVE-NEXT: cmp w11, w10 746; NONEON-NOSVE-NEXT: csel w9, w11, w10, gt 747; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #22] 748; NONEON-NOSVE-NEXT: ldrsh w11, [sp, #6] 749; NONEON-NOSVE-NEXT: cmp w8, w9 750; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt 751; NONEON-NOSVE-NEXT: cmp w11, w10 752; NONEON-NOSVE-NEXT: csel w9, w11, w10, gt 753; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #24] 754; NONEON-NOSVE-NEXT: ldrsh w11, [sp, #8] 755; NONEON-NOSVE-NEXT: cmp w8, w9 756; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt 757; NONEON-NOSVE-NEXT: cmp w11, w10 758; NONEON-NOSVE-NEXT: csel w9, w11, w10, gt 759; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #26] 760; NONEON-NOSVE-NEXT: ldrsh w11, [sp, #10] 761; NONEON-NOSVE-NEXT: cmp w8, w9 762; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt 763; NONEON-NOSVE-NEXT: cmp w11, w10 764; NONEON-NOSVE-NEXT: csel w9, w11, w10, gt 765; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #28] 766; NONEON-NOSVE-NEXT: ldrsh w11, [sp, #12] 767; NONEON-NOSVE-NEXT: cmp w8, w9 768; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt 769; NONEON-NOSVE-NEXT: cmp w11, w10 770; NONEON-NOSVE-NEXT: csel w9, w11, w10, gt 771; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #30] 772; NONEON-NOSVE-NEXT: ldrsh w11, [sp, #14] 773; NONEON-NOSVE-NEXT: cmp w8, w9 774; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt 775; NONEON-NOSVE-NEXT: cmp w11, w10 776; NONEON-NOSVE-NEXT: csel w9, w11, w10, gt 777; NONEON-NOSVE-NEXT: cmp w8, w9 778; NONEON-NOSVE-NEXT: csel w0, w8, w9, gt 779; NONEON-NOSVE-NEXT: add sp, sp, #32 780; NONEON-NOSVE-NEXT: ret 781 %op = load <16 x i16>, ptr %a 782 %res = call i16 @llvm.vector.reduce.smax.v16i16(<16 x i16> %op) 783 ret i16 %res 784} 785 786define i32 @smaxv_v2i32(<2 x i32> %a) { 787; CHECK-LABEL: smaxv_v2i32: 788; CHECK: // %bb.0: 789; CHECK-NEXT: ptrue p0.s, vl2 790; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 791; CHECK-NEXT: smaxv s0, p0, z0.s 792; CHECK-NEXT: fmov w0, s0 793; CHECK-NEXT: ret 794; 795; NONEON-NOSVE-LABEL: smaxv_v2i32: 796; NONEON-NOSVE: // %bb.0: 797; NONEON-NOSVE-NEXT: sub sp, sp, #16 798; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 799; NONEON-NOSVE-NEXT: str d0, [sp, #8] 800; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #8] 801; NONEON-NOSVE-NEXT: cmp w9, w8 802; NONEON-NOSVE-NEXT: csel w0, w9, w8, gt 803; NONEON-NOSVE-NEXT: add sp, sp, #16 804; NONEON-NOSVE-NEXT: ret 805 %res = call i32 @llvm.vector.reduce.smax.v2i32(<2 x i32> %a) 806 ret i32 %res 807} 808 809define i32 @smaxv_v4i32(<4 x i32> %a) { 810; CHECK-LABEL: smaxv_v4i32: 811; CHECK: // %bb.0: 812; CHECK-NEXT: ptrue p0.s, vl4 813; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 814; CHECK-NEXT: smaxv s0, p0, z0.s 815; CHECK-NEXT: fmov w0, s0 816; CHECK-NEXT: ret 817; 818; NONEON-NOSVE-LABEL: smaxv_v4i32: 819; NONEON-NOSVE: // %bb.0: 820; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! 821; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 822; NONEON-NOSVE-NEXT: ldp w9, w8, [sp] 823; NONEON-NOSVE-NEXT: cmp w9, w8 824; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt 825; NONEON-NOSVE-NEXT: ldp w10, w9, [sp, #8] 826; NONEON-NOSVE-NEXT: cmp w8, w10 827; NONEON-NOSVE-NEXT: csel w8, w8, w10, gt 828; NONEON-NOSVE-NEXT: cmp w8, w9 829; NONEON-NOSVE-NEXT: csel w0, w8, w9, gt 830; NONEON-NOSVE-NEXT: add sp, sp, #16 831; NONEON-NOSVE-NEXT: ret 832 %res = call i32 @llvm.vector.reduce.smax.v4i32(<4 x i32> %a) 833 ret i32 %res 834} 835 836define i32 @smaxv_v8i32(ptr %a) { 837; CHECK-LABEL: smaxv_v8i32: 838; CHECK: // %bb.0: 839; CHECK-NEXT: ldp q1, q0, [x0] 840; CHECK-NEXT: ptrue p0.s, vl4 841; CHECK-NEXT: smax z0.s, p0/m, z0.s, z1.s 842; CHECK-NEXT: smaxv s0, p0, z0.s 843; CHECK-NEXT: fmov w0, s0 844; CHECK-NEXT: ret 845; 846; NONEON-NOSVE-LABEL: smaxv_v8i32: 847; NONEON-NOSVE: // %bb.0: 848; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] 849; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! 850; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 851; NONEON-NOSVE-NEXT: ldp w11, w8, [sp] 852; NONEON-NOSVE-NEXT: ldp w10, w9, [sp, #16] 853; NONEON-NOSVE-NEXT: cmp w8, w9 854; NONEON-NOSVE-NEXT: csel w8, w8, w9, gt 855; NONEON-NOSVE-NEXT: cmp w11, w10 856; NONEON-NOSVE-NEXT: csel w9, w11, w10, gt 857; NONEON-NOSVE-NEXT: cmp w9, w8 858; NONEON-NOSVE-NEXT: ldp w10, w12, [sp, #8] 859; NONEON-NOSVE-NEXT: csel w8, w9, w8, gt 860; NONEON-NOSVE-NEXT: ldp w11, w9, [sp, #24] 861; NONEON-NOSVE-NEXT: cmp w10, w11 862; NONEON-NOSVE-NEXT: csel w10, w10, w11, gt 863; NONEON-NOSVE-NEXT: cmp w8, w10 864; NONEON-NOSVE-NEXT: csel w8, w8, w10, gt 865; NONEON-NOSVE-NEXT: cmp w12, w9 866; NONEON-NOSVE-NEXT: csel w9, w12, w9, gt 867; NONEON-NOSVE-NEXT: cmp w8, w9 868; NONEON-NOSVE-NEXT: csel w0, w8, w9, gt 869; NONEON-NOSVE-NEXT: add sp, sp, #32 870; NONEON-NOSVE-NEXT: ret 871 %op = load <8 x i32>, ptr %a 872 %res = call i32 @llvm.vector.reduce.smax.v8i32(<8 x i32> %op) 873 ret i32 %res 874} 875 876; No NEON 64-bit vector SMAXV support. Use SVE. 877define i64 @smaxv_v2i64(<2 x i64> %a) { 878; CHECK-LABEL: smaxv_v2i64: 879; CHECK: // %bb.0: 880; CHECK-NEXT: ptrue p0.d, vl2 881; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 882; CHECK-NEXT: smaxv d0, p0, z0.d 883; CHECK-NEXT: fmov x0, d0 884; CHECK-NEXT: ret 885; 886; NONEON-NOSVE-LABEL: smaxv_v2i64: 887; NONEON-NOSVE: // %bb.0: 888; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! 889; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 890; NONEON-NOSVE-NEXT: ldp x9, x8, [sp], #16 891; NONEON-NOSVE-NEXT: cmp x9, x8 892; NONEON-NOSVE-NEXT: csel x0, x9, x8, gt 893; NONEON-NOSVE-NEXT: ret 894 %res = call i64 @llvm.vector.reduce.smax.v2i64(<2 x i64> %a) 895 ret i64 %res 896} 897 898define i64 @smaxv_v4i64(ptr %a) { 899; CHECK-LABEL: smaxv_v4i64: 900; CHECK: // %bb.0: 901; CHECK-NEXT: ldp q1, q0, [x0] 902; CHECK-NEXT: ptrue p0.d, vl2 903; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d 904; CHECK-NEXT: smaxv d0, p0, z0.d 905; CHECK-NEXT: fmov x0, d0 906; CHECK-NEXT: ret 907; 908; NONEON-NOSVE-LABEL: smaxv_v4i64: 909; NONEON-NOSVE: // %bb.0: 910; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] 911; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! 912; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 913; NONEON-NOSVE-NEXT: ldp x8, x10, [sp, #8] 914; NONEON-NOSVE-NEXT: ldr x9, [sp, #24] 915; NONEON-NOSVE-NEXT: ldr x11, [sp], #32 916; NONEON-NOSVE-NEXT: cmp x8, x9 917; NONEON-NOSVE-NEXT: csel x8, x8, x9, gt 918; NONEON-NOSVE-NEXT: cmp x11, x10 919; NONEON-NOSVE-NEXT: csel x9, x11, x10, gt 920; NONEON-NOSVE-NEXT: cmp x9, x8 921; NONEON-NOSVE-NEXT: csel x0, x9, x8, gt 922; NONEON-NOSVE-NEXT: ret 923 %op = load <4 x i64>, ptr %a 924 %res = call i64 @llvm.vector.reduce.smax.v4i64(<4 x i64> %op) 925 ret i64 %res 926} 927 928; 929; SMINV 930; 931 932define i8 @sminv_v8i8(<8 x i8> %a) { 933; CHECK-LABEL: sminv_v8i8: 934; CHECK: // %bb.0: 935; CHECK-NEXT: ptrue p0.b, vl8 936; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 937; CHECK-NEXT: sminv b0, p0, z0.b 938; CHECK-NEXT: fmov w0, s0 939; CHECK-NEXT: ret 940; 941; NONEON-NOSVE-LABEL: sminv_v8i8: 942; NONEON-NOSVE: // %bb.0: 943; NONEON-NOSVE-NEXT: sub sp, sp, #16 944; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 945; NONEON-NOSVE-NEXT: str d0, [sp, #8] 946; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #9] 947; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #8] 948; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #10] 949; NONEON-NOSVE-NEXT: cmp w9, w8 950; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt 951; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #11] 952; NONEON-NOSVE-NEXT: cmp w8, w10 953; NONEON-NOSVE-NEXT: csel w8, w8, w10, lt 954; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #12] 955; NONEON-NOSVE-NEXT: cmp w8, w9 956; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt 957; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #13] 958; NONEON-NOSVE-NEXT: cmp w8, w10 959; NONEON-NOSVE-NEXT: csel w8, w8, w10, lt 960; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #14] 961; NONEON-NOSVE-NEXT: cmp w8, w9 962; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt 963; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #15] 964; NONEON-NOSVE-NEXT: cmp w8, w10 965; NONEON-NOSVE-NEXT: csel w8, w8, w10, lt 966; NONEON-NOSVE-NEXT: cmp w8, w9 967; NONEON-NOSVE-NEXT: csel w0, w8, w9, lt 968; NONEON-NOSVE-NEXT: add sp, sp, #16 969; NONEON-NOSVE-NEXT: ret 970 %res = call i8 @llvm.vector.reduce.smin.v8i8(<8 x i8> %a) 971 ret i8 %res 972} 973 974define i8 @sminv_v16i8(<16 x i8> %a) { 975; CHECK-LABEL: sminv_v16i8: 976; CHECK: // %bb.0: 977; CHECK-NEXT: ptrue p0.b, vl16 978; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 979; CHECK-NEXT: sminv b0, p0, z0.b 980; CHECK-NEXT: fmov w0, s0 981; CHECK-NEXT: ret 982; 983; NONEON-NOSVE-LABEL: sminv_v16i8: 984; NONEON-NOSVE: // %bb.0: 985; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! 986; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 987; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #1] 988; NONEON-NOSVE-NEXT: ldrsb w9, [sp] 989; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #2] 990; NONEON-NOSVE-NEXT: cmp w9, w8 991; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt 992; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #3] 993; NONEON-NOSVE-NEXT: cmp w8, w10 994; NONEON-NOSVE-NEXT: csel w8, w8, w10, lt 995; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #4] 996; NONEON-NOSVE-NEXT: cmp w8, w9 997; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt 998; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #5] 999; NONEON-NOSVE-NEXT: cmp w8, w10 1000; NONEON-NOSVE-NEXT: csel w8, w8, w10, lt 1001; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #6] 1002; NONEON-NOSVE-NEXT: cmp w8, w9 1003; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt 1004; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #7] 1005; NONEON-NOSVE-NEXT: cmp w8, w10 1006; NONEON-NOSVE-NEXT: csel w8, w8, w10, lt 1007; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #8] 1008; NONEON-NOSVE-NEXT: cmp w8, w9 1009; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt 1010; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #9] 1011; NONEON-NOSVE-NEXT: cmp w8, w10 1012; NONEON-NOSVE-NEXT: csel w8, w8, w10, lt 1013; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #10] 1014; NONEON-NOSVE-NEXT: cmp w8, w9 1015; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt 1016; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #11] 1017; NONEON-NOSVE-NEXT: cmp w8, w10 1018; NONEON-NOSVE-NEXT: csel w8, w8, w10, lt 1019; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #12] 1020; NONEON-NOSVE-NEXT: cmp w8, w9 1021; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt 1022; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #13] 1023; NONEON-NOSVE-NEXT: cmp w8, w10 1024; NONEON-NOSVE-NEXT: csel w8, w8, w10, lt 1025; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #14] 1026; NONEON-NOSVE-NEXT: cmp w8, w9 1027; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt 1028; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #15] 1029; NONEON-NOSVE-NEXT: cmp w8, w10 1030; NONEON-NOSVE-NEXT: csel w8, w8, w10, lt 1031; NONEON-NOSVE-NEXT: cmp w8, w9 1032; NONEON-NOSVE-NEXT: csel w0, w8, w9, lt 1033; NONEON-NOSVE-NEXT: add sp, sp, #16 1034; NONEON-NOSVE-NEXT: ret 1035 %res = call i8 @llvm.vector.reduce.smin.v16i8(<16 x i8> %a) 1036 ret i8 %res 1037} 1038 1039define i8 @sminv_v32i8(ptr %a) { 1040; CHECK-LABEL: sminv_v32i8: 1041; CHECK: // %bb.0: 1042; CHECK-NEXT: ldp q1, q0, [x0] 1043; CHECK-NEXT: ptrue p0.b, vl16 1044; CHECK-NEXT: smin z0.b, p0/m, z0.b, z1.b 1045; CHECK-NEXT: sminv b0, p0, z0.b 1046; CHECK-NEXT: fmov w0, s0 1047; CHECK-NEXT: ret 1048; 1049; NONEON-NOSVE-LABEL: sminv_v32i8: 1050; NONEON-NOSVE: // %bb.0: 1051; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] 1052; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! 1053; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 1054; NONEON-NOSVE-NEXT: ldrsb w8, [sp, #17] 1055; NONEON-NOSVE-NEXT: ldrsb w9, [sp, #1] 1056; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #16] 1057; NONEON-NOSVE-NEXT: ldrsb w11, [sp] 1058; NONEON-NOSVE-NEXT: cmp w9, w8 1059; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt 1060; NONEON-NOSVE-NEXT: cmp w11, w10 1061; NONEON-NOSVE-NEXT: csel w9, w11, w10, lt 1062; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #18] 1063; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #2] 1064; NONEON-NOSVE-NEXT: cmp w9, w8 1065; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt 1066; NONEON-NOSVE-NEXT: cmp w11, w10 1067; NONEON-NOSVE-NEXT: csel w9, w11, w10, lt 1068; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #19] 1069; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #3] 1070; NONEON-NOSVE-NEXT: cmp w8, w9 1071; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt 1072; NONEON-NOSVE-NEXT: cmp w11, w10 1073; NONEON-NOSVE-NEXT: csel w9, w11, w10, lt 1074; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #20] 1075; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #4] 1076; NONEON-NOSVE-NEXT: cmp w8, w9 1077; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt 1078; NONEON-NOSVE-NEXT: cmp w11, w10 1079; NONEON-NOSVE-NEXT: csel w9, w11, w10, lt 1080; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #21] 1081; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #5] 1082; NONEON-NOSVE-NEXT: cmp w8, w9 1083; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt 1084; NONEON-NOSVE-NEXT: cmp w11, w10 1085; NONEON-NOSVE-NEXT: csel w9, w11, w10, lt 1086; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #22] 1087; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #6] 1088; NONEON-NOSVE-NEXT: cmp w8, w9 1089; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt 1090; NONEON-NOSVE-NEXT: cmp w11, w10 1091; NONEON-NOSVE-NEXT: csel w9, w11, w10, lt 1092; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #23] 1093; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #7] 1094; NONEON-NOSVE-NEXT: cmp w8, w9 1095; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt 1096; NONEON-NOSVE-NEXT: cmp w11, w10 1097; NONEON-NOSVE-NEXT: csel w9, w11, w10, lt 1098; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #24] 1099; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #8] 1100; NONEON-NOSVE-NEXT: cmp w8, w9 1101; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt 1102; NONEON-NOSVE-NEXT: cmp w11, w10 1103; NONEON-NOSVE-NEXT: csel w9, w11, w10, lt 1104; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #25] 1105; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #9] 1106; NONEON-NOSVE-NEXT: cmp w8, w9 1107; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt 1108; NONEON-NOSVE-NEXT: cmp w11, w10 1109; NONEON-NOSVE-NEXT: csel w9, w11, w10, lt 1110; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #26] 1111; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #10] 1112; NONEON-NOSVE-NEXT: cmp w8, w9 1113; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt 1114; NONEON-NOSVE-NEXT: cmp w11, w10 1115; NONEON-NOSVE-NEXT: csel w9, w11, w10, lt 1116; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #27] 1117; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #11] 1118; NONEON-NOSVE-NEXT: cmp w8, w9 1119; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt 1120; NONEON-NOSVE-NEXT: cmp w11, w10 1121; NONEON-NOSVE-NEXT: csel w9, w11, w10, lt 1122; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #28] 1123; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #12] 1124; NONEON-NOSVE-NEXT: cmp w8, w9 1125; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt 1126; NONEON-NOSVE-NEXT: cmp w11, w10 1127; NONEON-NOSVE-NEXT: csel w9, w11, w10, lt 1128; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #29] 1129; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #13] 1130; NONEON-NOSVE-NEXT: cmp w8, w9 1131; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt 1132; NONEON-NOSVE-NEXT: cmp w11, w10 1133; NONEON-NOSVE-NEXT: csel w9, w11, w10, lt 1134; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #30] 1135; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #14] 1136; NONEON-NOSVE-NEXT: cmp w8, w9 1137; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt 1138; NONEON-NOSVE-NEXT: cmp w11, w10 1139; NONEON-NOSVE-NEXT: csel w9, w11, w10, lt 1140; NONEON-NOSVE-NEXT: ldrsb w10, [sp, #31] 1141; NONEON-NOSVE-NEXT: ldrsb w11, [sp, #15] 1142; NONEON-NOSVE-NEXT: cmp w8, w9 1143; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt 1144; NONEON-NOSVE-NEXT: cmp w11, w10 1145; NONEON-NOSVE-NEXT: csel w9, w11, w10, lt 1146; NONEON-NOSVE-NEXT: cmp w8, w9 1147; NONEON-NOSVE-NEXT: csel w0, w8, w9, lt 1148; NONEON-NOSVE-NEXT: add sp, sp, #32 1149; NONEON-NOSVE-NEXT: ret 1150 %op = load <32 x i8>, ptr %a 1151 %res = call i8 @llvm.vector.reduce.smin.v32i8(<32 x i8> %op) 1152 ret i8 %res 1153} 1154 1155define i16 @sminv_v4i16(<4 x i16> %a) { 1156; CHECK-LABEL: sminv_v4i16: 1157; CHECK: // %bb.0: 1158; CHECK-NEXT: ptrue p0.h, vl4 1159; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 1160; CHECK-NEXT: sminv h0, p0, z0.h 1161; CHECK-NEXT: fmov w0, s0 1162; CHECK-NEXT: ret 1163; 1164; NONEON-NOSVE-LABEL: sminv_v4i16: 1165; NONEON-NOSVE: // %bb.0: 1166; NONEON-NOSVE-NEXT: sub sp, sp, #16 1167; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 1168; NONEON-NOSVE-NEXT: str d0, [sp, #8] 1169; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #10] 1170; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #8] 1171; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #12] 1172; NONEON-NOSVE-NEXT: cmp w9, w8 1173; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt 1174; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #14] 1175; NONEON-NOSVE-NEXT: cmp w8, w10 1176; NONEON-NOSVE-NEXT: csel w8, w8, w10, lt 1177; NONEON-NOSVE-NEXT: cmp w8, w9 1178; NONEON-NOSVE-NEXT: csel w0, w8, w9, lt 1179; NONEON-NOSVE-NEXT: add sp, sp, #16 1180; NONEON-NOSVE-NEXT: ret 1181 %res = call i16 @llvm.vector.reduce.smin.v4i16(<4 x i16> %a) 1182 ret i16 %res 1183} 1184 1185define i16 @sminv_v8i16(<8 x i16> %a) { 1186; CHECK-LABEL: sminv_v8i16: 1187; CHECK: // %bb.0: 1188; CHECK-NEXT: ptrue p0.h, vl8 1189; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 1190; CHECK-NEXT: sminv h0, p0, z0.h 1191; CHECK-NEXT: fmov w0, s0 1192; CHECK-NEXT: ret 1193; 1194; NONEON-NOSVE-LABEL: sminv_v8i16: 1195; NONEON-NOSVE: // %bb.0: 1196; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! 1197; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 1198; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #2] 1199; NONEON-NOSVE-NEXT: ldrsh w9, [sp] 1200; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #4] 1201; NONEON-NOSVE-NEXT: cmp w9, w8 1202; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt 1203; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #6] 1204; NONEON-NOSVE-NEXT: cmp w8, w10 1205; NONEON-NOSVE-NEXT: csel w8, w8, w10, lt 1206; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #8] 1207; NONEON-NOSVE-NEXT: cmp w8, w9 1208; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt 1209; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #10] 1210; NONEON-NOSVE-NEXT: cmp w8, w10 1211; NONEON-NOSVE-NEXT: csel w8, w8, w10, lt 1212; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #12] 1213; NONEON-NOSVE-NEXT: cmp w8, w9 1214; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt 1215; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #14] 1216; NONEON-NOSVE-NEXT: cmp w8, w10 1217; NONEON-NOSVE-NEXT: csel w8, w8, w10, lt 1218; NONEON-NOSVE-NEXT: cmp w8, w9 1219; NONEON-NOSVE-NEXT: csel w0, w8, w9, lt 1220; NONEON-NOSVE-NEXT: add sp, sp, #16 1221; NONEON-NOSVE-NEXT: ret 1222 %res = call i16 @llvm.vector.reduce.smin.v8i16(<8 x i16> %a) 1223 ret i16 %res 1224} 1225 1226define i16 @sminv_v16i16(ptr %a) { 1227; CHECK-LABEL: sminv_v16i16: 1228; CHECK: // %bb.0: 1229; CHECK-NEXT: ldp q1, q0, [x0] 1230; CHECK-NEXT: ptrue p0.h, vl8 1231; CHECK-NEXT: smin z0.h, p0/m, z0.h, z1.h 1232; CHECK-NEXT: sminv h0, p0, z0.h 1233; CHECK-NEXT: fmov w0, s0 1234; CHECK-NEXT: ret 1235; 1236; NONEON-NOSVE-LABEL: sminv_v16i16: 1237; NONEON-NOSVE: // %bb.0: 1238; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] 1239; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! 1240; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 1241; NONEON-NOSVE-NEXT: ldrsh w8, [sp, #18] 1242; NONEON-NOSVE-NEXT: ldrsh w9, [sp, #2] 1243; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #16] 1244; NONEON-NOSVE-NEXT: ldrsh w11, [sp] 1245; NONEON-NOSVE-NEXT: cmp w9, w8 1246; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt 1247; NONEON-NOSVE-NEXT: cmp w11, w10 1248; NONEON-NOSVE-NEXT: csel w9, w11, w10, lt 1249; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #20] 1250; NONEON-NOSVE-NEXT: ldrsh w11, [sp, #4] 1251; NONEON-NOSVE-NEXT: cmp w9, w8 1252; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt 1253; NONEON-NOSVE-NEXT: cmp w11, w10 1254; NONEON-NOSVE-NEXT: csel w9, w11, w10, lt 1255; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #22] 1256; NONEON-NOSVE-NEXT: ldrsh w11, [sp, #6] 1257; NONEON-NOSVE-NEXT: cmp w8, w9 1258; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt 1259; NONEON-NOSVE-NEXT: cmp w11, w10 1260; NONEON-NOSVE-NEXT: csel w9, w11, w10, lt 1261; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #24] 1262; NONEON-NOSVE-NEXT: ldrsh w11, [sp, #8] 1263; NONEON-NOSVE-NEXT: cmp w8, w9 1264; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt 1265; NONEON-NOSVE-NEXT: cmp w11, w10 1266; NONEON-NOSVE-NEXT: csel w9, w11, w10, lt 1267; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #26] 1268; NONEON-NOSVE-NEXT: ldrsh w11, [sp, #10] 1269; NONEON-NOSVE-NEXT: cmp w8, w9 1270; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt 1271; NONEON-NOSVE-NEXT: cmp w11, w10 1272; NONEON-NOSVE-NEXT: csel w9, w11, w10, lt 1273; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #28] 1274; NONEON-NOSVE-NEXT: ldrsh w11, [sp, #12] 1275; NONEON-NOSVE-NEXT: cmp w8, w9 1276; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt 1277; NONEON-NOSVE-NEXT: cmp w11, w10 1278; NONEON-NOSVE-NEXT: csel w9, w11, w10, lt 1279; NONEON-NOSVE-NEXT: ldrsh w10, [sp, #30] 1280; NONEON-NOSVE-NEXT: ldrsh w11, [sp, #14] 1281; NONEON-NOSVE-NEXT: cmp w8, w9 1282; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt 1283; NONEON-NOSVE-NEXT: cmp w11, w10 1284; NONEON-NOSVE-NEXT: csel w9, w11, w10, lt 1285; NONEON-NOSVE-NEXT: cmp w8, w9 1286; NONEON-NOSVE-NEXT: csel w0, w8, w9, lt 1287; NONEON-NOSVE-NEXT: add sp, sp, #32 1288; NONEON-NOSVE-NEXT: ret 1289 %op = load <16 x i16>, ptr %a 1290 %res = call i16 @llvm.vector.reduce.smin.v16i16(<16 x i16> %op) 1291 ret i16 %res 1292} 1293 1294define i32 @sminv_v2i32(<2 x i32> %a) { 1295; CHECK-LABEL: sminv_v2i32: 1296; CHECK: // %bb.0: 1297; CHECK-NEXT: ptrue p0.s, vl2 1298; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 1299; CHECK-NEXT: sminv s0, p0, z0.s 1300; CHECK-NEXT: fmov w0, s0 1301; CHECK-NEXT: ret 1302; 1303; NONEON-NOSVE-LABEL: sminv_v2i32: 1304; NONEON-NOSVE: // %bb.0: 1305; NONEON-NOSVE-NEXT: sub sp, sp, #16 1306; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 1307; NONEON-NOSVE-NEXT: str d0, [sp, #8] 1308; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #8] 1309; NONEON-NOSVE-NEXT: cmp w9, w8 1310; NONEON-NOSVE-NEXT: csel w0, w9, w8, lt 1311; NONEON-NOSVE-NEXT: add sp, sp, #16 1312; NONEON-NOSVE-NEXT: ret 1313 %res = call i32 @llvm.vector.reduce.smin.v2i32(<2 x i32> %a) 1314 ret i32 %res 1315} 1316 1317define i32 @sminv_v4i32(<4 x i32> %a) { 1318; CHECK-LABEL: sminv_v4i32: 1319; CHECK: // %bb.0: 1320; CHECK-NEXT: ptrue p0.s, vl4 1321; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 1322; CHECK-NEXT: sminv s0, p0, z0.s 1323; CHECK-NEXT: fmov w0, s0 1324; CHECK-NEXT: ret 1325; 1326; NONEON-NOSVE-LABEL: sminv_v4i32: 1327; NONEON-NOSVE: // %bb.0: 1328; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! 1329; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 1330; NONEON-NOSVE-NEXT: ldp w9, w8, [sp] 1331; NONEON-NOSVE-NEXT: cmp w9, w8 1332; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt 1333; NONEON-NOSVE-NEXT: ldp w10, w9, [sp, #8] 1334; NONEON-NOSVE-NEXT: cmp w8, w10 1335; NONEON-NOSVE-NEXT: csel w8, w8, w10, lt 1336; NONEON-NOSVE-NEXT: cmp w8, w9 1337; NONEON-NOSVE-NEXT: csel w0, w8, w9, lt 1338; NONEON-NOSVE-NEXT: add sp, sp, #16 1339; NONEON-NOSVE-NEXT: ret 1340 %res = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> %a) 1341 ret i32 %res 1342} 1343 1344define i32 @sminv_v8i32(ptr %a) { 1345; CHECK-LABEL: sminv_v8i32: 1346; CHECK: // %bb.0: 1347; CHECK-NEXT: ldp q1, q0, [x0] 1348; CHECK-NEXT: ptrue p0.s, vl4 1349; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s 1350; CHECK-NEXT: sminv s0, p0, z0.s 1351; CHECK-NEXT: fmov w0, s0 1352; CHECK-NEXT: ret 1353; 1354; NONEON-NOSVE-LABEL: sminv_v8i32: 1355; NONEON-NOSVE: // %bb.0: 1356; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] 1357; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! 1358; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 1359; NONEON-NOSVE-NEXT: ldp w11, w8, [sp] 1360; NONEON-NOSVE-NEXT: ldp w10, w9, [sp, #16] 1361; NONEON-NOSVE-NEXT: cmp w8, w9 1362; NONEON-NOSVE-NEXT: csel w8, w8, w9, lt 1363; NONEON-NOSVE-NEXT: cmp w11, w10 1364; NONEON-NOSVE-NEXT: csel w9, w11, w10, lt 1365; NONEON-NOSVE-NEXT: cmp w9, w8 1366; NONEON-NOSVE-NEXT: ldp w10, w12, [sp, #8] 1367; NONEON-NOSVE-NEXT: csel w8, w9, w8, lt 1368; NONEON-NOSVE-NEXT: ldp w11, w9, [sp, #24] 1369; NONEON-NOSVE-NEXT: cmp w10, w11 1370; NONEON-NOSVE-NEXT: csel w10, w10, w11, lt 1371; NONEON-NOSVE-NEXT: cmp w8, w10 1372; NONEON-NOSVE-NEXT: csel w8, w8, w10, lt 1373; NONEON-NOSVE-NEXT: cmp w12, w9 1374; NONEON-NOSVE-NEXT: csel w9, w12, w9, lt 1375; NONEON-NOSVE-NEXT: cmp w8, w9 1376; NONEON-NOSVE-NEXT: csel w0, w8, w9, lt 1377; NONEON-NOSVE-NEXT: add sp, sp, #32 1378; NONEON-NOSVE-NEXT: ret 1379 %op = load <8 x i32>, ptr %a 1380 %res = call i32 @llvm.vector.reduce.smin.v8i32(<8 x i32> %op) 1381 ret i32 %res 1382} 1383 1384; No NEON 64-bit vector SMINV support. Use SVE. 1385define i64 @sminv_v2i64(<2 x i64> %a) { 1386; CHECK-LABEL: sminv_v2i64: 1387; CHECK: // %bb.0: 1388; CHECK-NEXT: ptrue p0.d, vl2 1389; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 1390; CHECK-NEXT: sminv d0, p0, z0.d 1391; CHECK-NEXT: fmov x0, d0 1392; CHECK-NEXT: ret 1393; 1394; NONEON-NOSVE-LABEL: sminv_v2i64: 1395; NONEON-NOSVE: // %bb.0: 1396; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! 1397; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 1398; NONEON-NOSVE-NEXT: ldp x9, x8, [sp], #16 1399; NONEON-NOSVE-NEXT: cmp x9, x8 1400; NONEON-NOSVE-NEXT: csel x0, x9, x8, lt 1401; NONEON-NOSVE-NEXT: ret 1402 %res = call i64 @llvm.vector.reduce.smin.v2i64(<2 x i64> %a) 1403 ret i64 %res 1404} 1405 1406define i64 @sminv_v4i64(ptr %a) { 1407; CHECK-LABEL: sminv_v4i64: 1408; CHECK: // %bb.0: 1409; CHECK-NEXT: ldp q1, q0, [x0] 1410; CHECK-NEXT: ptrue p0.d, vl2 1411; CHECK-NEXT: smin z0.d, p0/m, z0.d, z1.d 1412; CHECK-NEXT: sminv d0, p0, z0.d 1413; CHECK-NEXT: fmov x0, d0 1414; CHECK-NEXT: ret 1415; 1416; NONEON-NOSVE-LABEL: sminv_v4i64: 1417; NONEON-NOSVE: // %bb.0: 1418; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] 1419; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! 1420; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 1421; NONEON-NOSVE-NEXT: ldp x8, x10, [sp, #8] 1422; NONEON-NOSVE-NEXT: ldr x9, [sp, #24] 1423; NONEON-NOSVE-NEXT: ldr x11, [sp], #32 1424; NONEON-NOSVE-NEXT: cmp x8, x9 1425; NONEON-NOSVE-NEXT: csel x8, x8, x9, lt 1426; NONEON-NOSVE-NEXT: cmp x11, x10 1427; NONEON-NOSVE-NEXT: csel x9, x11, x10, lt 1428; NONEON-NOSVE-NEXT: cmp x9, x8 1429; NONEON-NOSVE-NEXT: csel x0, x9, x8, lt 1430; NONEON-NOSVE-NEXT: ret 1431 %op = load <4 x i64>, ptr %a 1432 %res = call i64 @llvm.vector.reduce.smin.v4i64(<4 x i64> %op) 1433 ret i64 %res 1434} 1435 1436; 1437; UMAXV 1438; 1439 1440define i8 @umaxv_v8i8(<8 x i8> %a) { 1441; CHECK-LABEL: umaxv_v8i8: 1442; CHECK: // %bb.0: 1443; CHECK-NEXT: ptrue p0.b, vl8 1444; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 1445; CHECK-NEXT: umaxv b0, p0, z0.b 1446; CHECK-NEXT: fmov w0, s0 1447; CHECK-NEXT: ret 1448; 1449; NONEON-NOSVE-LABEL: umaxv_v8i8: 1450; NONEON-NOSVE: // %bb.0: 1451; NONEON-NOSVE-NEXT: sub sp, sp, #16 1452; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 1453; NONEON-NOSVE-NEXT: str d0, [sp, #8] 1454; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9] 1455; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] 1456; NONEON-NOSVE-NEXT: ldrb w10, [sp, #10] 1457; NONEON-NOSVE-NEXT: cmp w9, w8 1458; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi 1459; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] 1460; NONEON-NOSVE-NEXT: cmp w8, w10 1461; NONEON-NOSVE-NEXT: csel w8, w8, w10, hi 1462; NONEON-NOSVE-NEXT: ldrb w10, [sp, #12] 1463; NONEON-NOSVE-NEXT: cmp w8, w9 1464; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi 1465; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] 1466; NONEON-NOSVE-NEXT: cmp w8, w10 1467; NONEON-NOSVE-NEXT: csel w8, w8, w10, hi 1468; NONEON-NOSVE-NEXT: ldrb w10, [sp, #14] 1469; NONEON-NOSVE-NEXT: cmp w8, w9 1470; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi 1471; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] 1472; NONEON-NOSVE-NEXT: cmp w8, w10 1473; NONEON-NOSVE-NEXT: csel w8, w8, w10, hi 1474; NONEON-NOSVE-NEXT: cmp w8, w9 1475; NONEON-NOSVE-NEXT: csel w0, w8, w9, hi 1476; NONEON-NOSVE-NEXT: add sp, sp, #16 1477; NONEON-NOSVE-NEXT: ret 1478 %res = call i8 @llvm.vector.reduce.umax.v8i8(<8 x i8> %a) 1479 ret i8 %res 1480} 1481 1482define i8 @umaxv_v16i8(<16 x i8> %a) { 1483; CHECK-LABEL: umaxv_v16i8: 1484; CHECK: // %bb.0: 1485; CHECK-NEXT: ptrue p0.b, vl16 1486; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 1487; CHECK-NEXT: umaxv b0, p0, z0.b 1488; CHECK-NEXT: fmov w0, s0 1489; CHECK-NEXT: ret 1490; 1491; NONEON-NOSVE-LABEL: umaxv_v16i8: 1492; NONEON-NOSVE: // %bb.0: 1493; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! 1494; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 1495; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1] 1496; NONEON-NOSVE-NEXT: ldrb w9, [sp] 1497; NONEON-NOSVE-NEXT: ldrb w10, [sp, #2] 1498; NONEON-NOSVE-NEXT: cmp w9, w8 1499; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi 1500; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3] 1501; NONEON-NOSVE-NEXT: cmp w8, w10 1502; NONEON-NOSVE-NEXT: csel w8, w8, w10, hi 1503; NONEON-NOSVE-NEXT: ldrb w10, [sp, #4] 1504; NONEON-NOSVE-NEXT: cmp w8, w9 1505; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi 1506; NONEON-NOSVE-NEXT: ldrb w9, [sp, #5] 1507; NONEON-NOSVE-NEXT: cmp w8, w10 1508; NONEON-NOSVE-NEXT: csel w8, w8, w10, hi 1509; NONEON-NOSVE-NEXT: ldrb w10, [sp, #6] 1510; NONEON-NOSVE-NEXT: cmp w8, w9 1511; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi 1512; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7] 1513; NONEON-NOSVE-NEXT: cmp w8, w10 1514; NONEON-NOSVE-NEXT: csel w8, w8, w10, hi 1515; NONEON-NOSVE-NEXT: ldrb w10, [sp, #8] 1516; NONEON-NOSVE-NEXT: cmp w8, w9 1517; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi 1518; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] 1519; NONEON-NOSVE-NEXT: cmp w8, w10 1520; NONEON-NOSVE-NEXT: csel w8, w8, w10, hi 1521; NONEON-NOSVE-NEXT: ldrb w10, [sp, #10] 1522; NONEON-NOSVE-NEXT: cmp w8, w9 1523; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi 1524; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] 1525; NONEON-NOSVE-NEXT: cmp w8, w10 1526; NONEON-NOSVE-NEXT: csel w8, w8, w10, hi 1527; NONEON-NOSVE-NEXT: ldrb w10, [sp, #12] 1528; NONEON-NOSVE-NEXT: cmp w8, w9 1529; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi 1530; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] 1531; NONEON-NOSVE-NEXT: cmp w8, w10 1532; NONEON-NOSVE-NEXT: csel w8, w8, w10, hi 1533; NONEON-NOSVE-NEXT: ldrb w10, [sp, #14] 1534; NONEON-NOSVE-NEXT: cmp w8, w9 1535; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi 1536; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] 1537; NONEON-NOSVE-NEXT: cmp w8, w10 1538; NONEON-NOSVE-NEXT: csel w8, w8, w10, hi 1539; NONEON-NOSVE-NEXT: cmp w8, w9 1540; NONEON-NOSVE-NEXT: csel w0, w8, w9, hi 1541; NONEON-NOSVE-NEXT: add sp, sp, #16 1542; NONEON-NOSVE-NEXT: ret 1543 %res = call i8 @llvm.vector.reduce.umax.v16i8(<16 x i8> %a) 1544 ret i8 %res 1545} 1546 1547define i8 @umaxv_v32i8(ptr %a) { 1548; CHECK-LABEL: umaxv_v32i8: 1549; CHECK: // %bb.0: 1550; CHECK-NEXT: ldp q1, q0, [x0] 1551; CHECK-NEXT: ptrue p0.b, vl16 1552; CHECK-NEXT: umax z0.b, p0/m, z0.b, z1.b 1553; CHECK-NEXT: umaxv b0, p0, z0.b 1554; CHECK-NEXT: fmov w0, s0 1555; CHECK-NEXT: ret 1556; 1557; NONEON-NOSVE-LABEL: umaxv_v32i8: 1558; NONEON-NOSVE: // %bb.0: 1559; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] 1560; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! 1561; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 1562; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] 1563; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1] 1564; NONEON-NOSVE-NEXT: ldrb w10, [sp, #16] 1565; NONEON-NOSVE-NEXT: ldrb w11, [sp] 1566; NONEON-NOSVE-NEXT: cmp w9, w8 1567; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi 1568; NONEON-NOSVE-NEXT: cmp w11, w10 1569; NONEON-NOSVE-NEXT: csel w9, w11, w10, hi 1570; NONEON-NOSVE-NEXT: ldrb w10, [sp, #18] 1571; NONEON-NOSVE-NEXT: ldrb w11, [sp, #2] 1572; NONEON-NOSVE-NEXT: cmp w9, w8 1573; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi 1574; NONEON-NOSVE-NEXT: cmp w11, w10 1575; NONEON-NOSVE-NEXT: csel w9, w11, w10, hi 1576; NONEON-NOSVE-NEXT: ldrb w10, [sp, #19] 1577; NONEON-NOSVE-NEXT: ldrb w11, [sp, #3] 1578; NONEON-NOSVE-NEXT: cmp w8, w9 1579; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi 1580; NONEON-NOSVE-NEXT: cmp w11, w10 1581; NONEON-NOSVE-NEXT: csel w9, w11, w10, hi 1582; NONEON-NOSVE-NEXT: ldrb w10, [sp, #20] 1583; NONEON-NOSVE-NEXT: ldrb w11, [sp, #4] 1584; NONEON-NOSVE-NEXT: cmp w8, w9 1585; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi 1586; NONEON-NOSVE-NEXT: cmp w11, w10 1587; NONEON-NOSVE-NEXT: csel w9, w11, w10, hi 1588; NONEON-NOSVE-NEXT: ldrb w10, [sp, #21] 1589; NONEON-NOSVE-NEXT: ldrb w11, [sp, #5] 1590; NONEON-NOSVE-NEXT: cmp w8, w9 1591; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi 1592; NONEON-NOSVE-NEXT: cmp w11, w10 1593; NONEON-NOSVE-NEXT: csel w9, w11, w10, hi 1594; NONEON-NOSVE-NEXT: ldrb w10, [sp, #22] 1595; NONEON-NOSVE-NEXT: ldrb w11, [sp, #6] 1596; NONEON-NOSVE-NEXT: cmp w8, w9 1597; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi 1598; NONEON-NOSVE-NEXT: cmp w11, w10 1599; NONEON-NOSVE-NEXT: csel w9, w11, w10, hi 1600; NONEON-NOSVE-NEXT: ldrb w10, [sp, #23] 1601; NONEON-NOSVE-NEXT: ldrb w11, [sp, #7] 1602; NONEON-NOSVE-NEXT: cmp w8, w9 1603; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi 1604; NONEON-NOSVE-NEXT: cmp w11, w10 1605; NONEON-NOSVE-NEXT: csel w9, w11, w10, hi 1606; NONEON-NOSVE-NEXT: ldrb w10, [sp, #24] 1607; NONEON-NOSVE-NEXT: ldrb w11, [sp, #8] 1608; NONEON-NOSVE-NEXT: cmp w8, w9 1609; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi 1610; NONEON-NOSVE-NEXT: cmp w11, w10 1611; NONEON-NOSVE-NEXT: csel w9, w11, w10, hi 1612; NONEON-NOSVE-NEXT: ldrb w10, [sp, #25] 1613; NONEON-NOSVE-NEXT: ldrb w11, [sp, #9] 1614; NONEON-NOSVE-NEXT: cmp w8, w9 1615; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi 1616; NONEON-NOSVE-NEXT: cmp w11, w10 1617; NONEON-NOSVE-NEXT: csel w9, w11, w10, hi 1618; NONEON-NOSVE-NEXT: ldrb w10, [sp, #26] 1619; NONEON-NOSVE-NEXT: ldrb w11, [sp, #10] 1620; NONEON-NOSVE-NEXT: cmp w8, w9 1621; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi 1622; NONEON-NOSVE-NEXT: cmp w11, w10 1623; NONEON-NOSVE-NEXT: csel w9, w11, w10, hi 1624; NONEON-NOSVE-NEXT: ldrb w10, [sp, #27] 1625; NONEON-NOSVE-NEXT: ldrb w11, [sp, #11] 1626; NONEON-NOSVE-NEXT: cmp w8, w9 1627; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi 1628; NONEON-NOSVE-NEXT: cmp w11, w10 1629; NONEON-NOSVE-NEXT: csel w9, w11, w10, hi 1630; NONEON-NOSVE-NEXT: ldrb w10, [sp, #28] 1631; NONEON-NOSVE-NEXT: ldrb w11, [sp, #12] 1632; NONEON-NOSVE-NEXT: cmp w8, w9 1633; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi 1634; NONEON-NOSVE-NEXT: cmp w11, w10 1635; NONEON-NOSVE-NEXT: csel w9, w11, w10, hi 1636; NONEON-NOSVE-NEXT: ldrb w10, [sp, #29] 1637; NONEON-NOSVE-NEXT: ldrb w11, [sp, #13] 1638; NONEON-NOSVE-NEXT: cmp w8, w9 1639; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi 1640; NONEON-NOSVE-NEXT: cmp w11, w10 1641; NONEON-NOSVE-NEXT: csel w9, w11, w10, hi 1642; NONEON-NOSVE-NEXT: ldrb w10, [sp, #30] 1643; NONEON-NOSVE-NEXT: ldrb w11, [sp, #14] 1644; NONEON-NOSVE-NEXT: cmp w8, w9 1645; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi 1646; NONEON-NOSVE-NEXT: cmp w11, w10 1647; NONEON-NOSVE-NEXT: csel w9, w11, w10, hi 1648; NONEON-NOSVE-NEXT: ldrb w10, [sp, #31] 1649; NONEON-NOSVE-NEXT: ldrb w11, [sp, #15] 1650; NONEON-NOSVE-NEXT: cmp w8, w9 1651; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi 1652; NONEON-NOSVE-NEXT: cmp w11, w10 1653; NONEON-NOSVE-NEXT: csel w9, w11, w10, hi 1654; NONEON-NOSVE-NEXT: cmp w8, w9 1655; NONEON-NOSVE-NEXT: csel w0, w8, w9, hi 1656; NONEON-NOSVE-NEXT: add sp, sp, #32 1657; NONEON-NOSVE-NEXT: ret 1658 %op = load <32 x i8>, ptr %a 1659 %res = call i8 @llvm.vector.reduce.umax.v32i8(<32 x i8> %op) 1660 ret i8 %res 1661} 1662 1663define i16 @umaxv_v4i16(<4 x i16> %a) { 1664; CHECK-LABEL: umaxv_v4i16: 1665; CHECK: // %bb.0: 1666; CHECK-NEXT: ptrue p0.h, vl4 1667; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 1668; CHECK-NEXT: umaxv h0, p0, z0.h 1669; CHECK-NEXT: fmov w0, s0 1670; CHECK-NEXT: ret 1671; 1672; NONEON-NOSVE-LABEL: umaxv_v4i16: 1673; NONEON-NOSVE: // %bb.0: 1674; NONEON-NOSVE-NEXT: sub sp, sp, #16 1675; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 1676; NONEON-NOSVE-NEXT: str d0, [sp, #8] 1677; NONEON-NOSVE-NEXT: ldrh w8, [sp, #10] 1678; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] 1679; NONEON-NOSVE-NEXT: ldrh w10, [sp, #12] 1680; NONEON-NOSVE-NEXT: cmp w9, w8 1681; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi 1682; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] 1683; NONEON-NOSVE-NEXT: cmp w8, w10 1684; NONEON-NOSVE-NEXT: csel w8, w8, w10, hi 1685; NONEON-NOSVE-NEXT: cmp w8, w9 1686; NONEON-NOSVE-NEXT: csel w0, w8, w9, hi 1687; NONEON-NOSVE-NEXT: add sp, sp, #16 1688; NONEON-NOSVE-NEXT: ret 1689 %res = call i16 @llvm.vector.reduce.umax.v4i16(<4 x i16> %a) 1690 ret i16 %res 1691} 1692 1693define i16 @umaxv_v8i16(<8 x i16> %a) { 1694; CHECK-LABEL: umaxv_v8i16: 1695; CHECK: // %bb.0: 1696; CHECK-NEXT: ptrue p0.h, vl8 1697; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 1698; CHECK-NEXT: umaxv h0, p0, z0.h 1699; CHECK-NEXT: fmov w0, s0 1700; CHECK-NEXT: ret 1701; 1702; NONEON-NOSVE-LABEL: umaxv_v8i16: 1703; NONEON-NOSVE: // %bb.0: 1704; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! 1705; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 1706; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2] 1707; NONEON-NOSVE-NEXT: ldrh w9, [sp] 1708; NONEON-NOSVE-NEXT: ldrh w10, [sp, #4] 1709; NONEON-NOSVE-NEXT: cmp w9, w8 1710; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi 1711; NONEON-NOSVE-NEXT: ldrh w9, [sp, #6] 1712; NONEON-NOSVE-NEXT: cmp w8, w10 1713; NONEON-NOSVE-NEXT: csel w8, w8, w10, hi 1714; NONEON-NOSVE-NEXT: ldrh w10, [sp, #8] 1715; NONEON-NOSVE-NEXT: cmp w8, w9 1716; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi 1717; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] 1718; NONEON-NOSVE-NEXT: cmp w8, w10 1719; NONEON-NOSVE-NEXT: csel w8, w8, w10, hi 1720; NONEON-NOSVE-NEXT: ldrh w10, [sp, #12] 1721; NONEON-NOSVE-NEXT: cmp w8, w9 1722; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi 1723; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] 1724; NONEON-NOSVE-NEXT: cmp w8, w10 1725; NONEON-NOSVE-NEXT: csel w8, w8, w10, hi 1726; NONEON-NOSVE-NEXT: cmp w8, w9 1727; NONEON-NOSVE-NEXT: csel w0, w8, w9, hi 1728; NONEON-NOSVE-NEXT: add sp, sp, #16 1729; NONEON-NOSVE-NEXT: ret 1730 %res = call i16 @llvm.vector.reduce.umax.v8i16(<8 x i16> %a) 1731 ret i16 %res 1732} 1733 1734define i16 @umaxv_v16i16(ptr %a) { 1735; CHECK-LABEL: umaxv_v16i16: 1736; CHECK: // %bb.0: 1737; CHECK-NEXT: ldp q1, q0, [x0] 1738; CHECK-NEXT: ptrue p0.h, vl8 1739; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h 1740; CHECK-NEXT: umaxv h0, p0, z0.h 1741; CHECK-NEXT: fmov w0, s0 1742; CHECK-NEXT: ret 1743; 1744; NONEON-NOSVE-LABEL: umaxv_v16i16: 1745; NONEON-NOSVE: // %bb.0: 1746; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] 1747; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! 1748; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 1749; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] 1750; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2] 1751; NONEON-NOSVE-NEXT: ldrh w10, [sp, #16] 1752; NONEON-NOSVE-NEXT: ldrh w11, [sp] 1753; NONEON-NOSVE-NEXT: cmp w9, w8 1754; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi 1755; NONEON-NOSVE-NEXT: cmp w11, w10 1756; NONEON-NOSVE-NEXT: csel w9, w11, w10, hi 1757; NONEON-NOSVE-NEXT: ldrh w10, [sp, #20] 1758; NONEON-NOSVE-NEXT: ldrh w11, [sp, #4] 1759; NONEON-NOSVE-NEXT: cmp w9, w8 1760; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi 1761; NONEON-NOSVE-NEXT: cmp w11, w10 1762; NONEON-NOSVE-NEXT: csel w9, w11, w10, hi 1763; NONEON-NOSVE-NEXT: ldrh w10, [sp, #22] 1764; NONEON-NOSVE-NEXT: ldrh w11, [sp, #6] 1765; NONEON-NOSVE-NEXT: cmp w8, w9 1766; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi 1767; NONEON-NOSVE-NEXT: cmp w11, w10 1768; NONEON-NOSVE-NEXT: csel w9, w11, w10, hi 1769; NONEON-NOSVE-NEXT: ldrh w10, [sp, #24] 1770; NONEON-NOSVE-NEXT: ldrh w11, [sp, #8] 1771; NONEON-NOSVE-NEXT: cmp w8, w9 1772; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi 1773; NONEON-NOSVE-NEXT: cmp w11, w10 1774; NONEON-NOSVE-NEXT: csel w9, w11, w10, hi 1775; NONEON-NOSVE-NEXT: ldrh w10, [sp, #26] 1776; NONEON-NOSVE-NEXT: ldrh w11, [sp, #10] 1777; NONEON-NOSVE-NEXT: cmp w8, w9 1778; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi 1779; NONEON-NOSVE-NEXT: cmp w11, w10 1780; NONEON-NOSVE-NEXT: csel w9, w11, w10, hi 1781; NONEON-NOSVE-NEXT: ldrh w10, [sp, #28] 1782; NONEON-NOSVE-NEXT: ldrh w11, [sp, #12] 1783; NONEON-NOSVE-NEXT: cmp w8, w9 1784; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi 1785; NONEON-NOSVE-NEXT: cmp w11, w10 1786; NONEON-NOSVE-NEXT: csel w9, w11, w10, hi 1787; NONEON-NOSVE-NEXT: ldrh w10, [sp, #30] 1788; NONEON-NOSVE-NEXT: ldrh w11, [sp, #14] 1789; NONEON-NOSVE-NEXT: cmp w8, w9 1790; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi 1791; NONEON-NOSVE-NEXT: cmp w11, w10 1792; NONEON-NOSVE-NEXT: csel w9, w11, w10, hi 1793; NONEON-NOSVE-NEXT: cmp w8, w9 1794; NONEON-NOSVE-NEXT: csel w0, w8, w9, hi 1795; NONEON-NOSVE-NEXT: add sp, sp, #32 1796; NONEON-NOSVE-NEXT: ret 1797 %op = load <16 x i16>, ptr %a 1798 %res = call i16 @llvm.vector.reduce.umax.v16i16(<16 x i16> %op) 1799 ret i16 %res 1800} 1801 1802define i32 @umaxv_v2i32(<2 x i32> %a) { 1803; CHECK-LABEL: umaxv_v2i32: 1804; CHECK: // %bb.0: 1805; CHECK-NEXT: ptrue p0.s, vl2 1806; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 1807; CHECK-NEXT: umaxv s0, p0, z0.s 1808; CHECK-NEXT: fmov w0, s0 1809; CHECK-NEXT: ret 1810; 1811; NONEON-NOSVE-LABEL: umaxv_v2i32: 1812; NONEON-NOSVE: // %bb.0: 1813; NONEON-NOSVE-NEXT: sub sp, sp, #16 1814; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 1815; NONEON-NOSVE-NEXT: str d0, [sp, #8] 1816; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #8] 1817; NONEON-NOSVE-NEXT: cmp w9, w8 1818; NONEON-NOSVE-NEXT: csel w0, w9, w8, hi 1819; NONEON-NOSVE-NEXT: add sp, sp, #16 1820; NONEON-NOSVE-NEXT: ret 1821 %res = call i32 @llvm.vector.reduce.umax.v2i32(<2 x i32> %a) 1822 ret i32 %res 1823} 1824 1825define i32 @umaxv_v4i32(<4 x i32> %a) { 1826; CHECK-LABEL: umaxv_v4i32: 1827; CHECK: // %bb.0: 1828; CHECK-NEXT: ptrue p0.s, vl4 1829; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 1830; CHECK-NEXT: umaxv s0, p0, z0.s 1831; CHECK-NEXT: fmov w0, s0 1832; CHECK-NEXT: ret 1833; 1834; NONEON-NOSVE-LABEL: umaxv_v4i32: 1835; NONEON-NOSVE: // %bb.0: 1836; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! 1837; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 1838; NONEON-NOSVE-NEXT: ldp w9, w8, [sp] 1839; NONEON-NOSVE-NEXT: cmp w9, w8 1840; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi 1841; NONEON-NOSVE-NEXT: ldp w10, w9, [sp, #8] 1842; NONEON-NOSVE-NEXT: cmp w8, w10 1843; NONEON-NOSVE-NEXT: csel w8, w8, w10, hi 1844; NONEON-NOSVE-NEXT: cmp w8, w9 1845; NONEON-NOSVE-NEXT: csel w0, w8, w9, hi 1846; NONEON-NOSVE-NEXT: add sp, sp, #16 1847; NONEON-NOSVE-NEXT: ret 1848 %res = call i32 @llvm.vector.reduce.umax.v4i32(<4 x i32> %a) 1849 ret i32 %res 1850} 1851 1852define i32 @umaxv_v8i32(ptr %a) { 1853; CHECK-LABEL: umaxv_v8i32: 1854; CHECK: // %bb.0: 1855; CHECK-NEXT: ldp q1, q0, [x0] 1856; CHECK-NEXT: ptrue p0.s, vl4 1857; CHECK-NEXT: umax z0.s, p0/m, z0.s, z1.s 1858; CHECK-NEXT: umaxv s0, p0, z0.s 1859; CHECK-NEXT: fmov w0, s0 1860; CHECK-NEXT: ret 1861; 1862; NONEON-NOSVE-LABEL: umaxv_v8i32: 1863; NONEON-NOSVE: // %bb.0: 1864; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] 1865; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! 1866; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 1867; NONEON-NOSVE-NEXT: ldp w11, w8, [sp] 1868; NONEON-NOSVE-NEXT: ldp w10, w9, [sp, #16] 1869; NONEON-NOSVE-NEXT: cmp w8, w9 1870; NONEON-NOSVE-NEXT: csel w8, w8, w9, hi 1871; NONEON-NOSVE-NEXT: cmp w11, w10 1872; NONEON-NOSVE-NEXT: csel w9, w11, w10, hi 1873; NONEON-NOSVE-NEXT: cmp w9, w8 1874; NONEON-NOSVE-NEXT: ldp w10, w12, [sp, #8] 1875; NONEON-NOSVE-NEXT: csel w8, w9, w8, hi 1876; NONEON-NOSVE-NEXT: ldp w11, w9, [sp, #24] 1877; NONEON-NOSVE-NEXT: cmp w10, w11 1878; NONEON-NOSVE-NEXT: csel w10, w10, w11, hi 1879; NONEON-NOSVE-NEXT: cmp w8, w10 1880; NONEON-NOSVE-NEXT: csel w8, w8, w10, hi 1881; NONEON-NOSVE-NEXT: cmp w12, w9 1882; NONEON-NOSVE-NEXT: csel w9, w12, w9, hi 1883; NONEON-NOSVE-NEXT: cmp w8, w9 1884; NONEON-NOSVE-NEXT: csel w0, w8, w9, hi 1885; NONEON-NOSVE-NEXT: add sp, sp, #32 1886; NONEON-NOSVE-NEXT: ret 1887 %op = load <8 x i32>, ptr %a 1888 %res = call i32 @llvm.vector.reduce.umax.v8i32(<8 x i32> %op) 1889 ret i32 %res 1890} 1891 1892; No NEON 64-bit vector UMAXV support. Use SVE. 1893define i64 @umaxv_v2i64(<2 x i64> %a) { 1894; CHECK-LABEL: umaxv_v2i64: 1895; CHECK: // %bb.0: 1896; CHECK-NEXT: ptrue p0.d, vl2 1897; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 1898; CHECK-NEXT: umaxv d0, p0, z0.d 1899; CHECK-NEXT: fmov x0, d0 1900; CHECK-NEXT: ret 1901; 1902; NONEON-NOSVE-LABEL: umaxv_v2i64: 1903; NONEON-NOSVE: // %bb.0: 1904; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! 1905; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 1906; NONEON-NOSVE-NEXT: ldp x9, x8, [sp], #16 1907; NONEON-NOSVE-NEXT: cmp x9, x8 1908; NONEON-NOSVE-NEXT: csel x0, x9, x8, hi 1909; NONEON-NOSVE-NEXT: ret 1910 %res = call i64 @llvm.vector.reduce.umax.v2i64(<2 x i64> %a) 1911 ret i64 %res 1912} 1913 1914define i64 @umaxv_v4i64(ptr %a) { 1915; CHECK-LABEL: umaxv_v4i64: 1916; CHECK: // %bb.0: 1917; CHECK-NEXT: ldp q1, q0, [x0] 1918; CHECK-NEXT: ptrue p0.d, vl2 1919; CHECK-NEXT: umax z0.d, p0/m, z0.d, z1.d 1920; CHECK-NEXT: umaxv d0, p0, z0.d 1921; CHECK-NEXT: fmov x0, d0 1922; CHECK-NEXT: ret 1923; 1924; NONEON-NOSVE-LABEL: umaxv_v4i64: 1925; NONEON-NOSVE: // %bb.0: 1926; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] 1927; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! 1928; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 1929; NONEON-NOSVE-NEXT: ldp x8, x10, [sp, #8] 1930; NONEON-NOSVE-NEXT: ldr x9, [sp, #24] 1931; NONEON-NOSVE-NEXT: ldr x11, [sp], #32 1932; NONEON-NOSVE-NEXT: cmp x8, x9 1933; NONEON-NOSVE-NEXT: csel x8, x8, x9, hi 1934; NONEON-NOSVE-NEXT: cmp x11, x10 1935; NONEON-NOSVE-NEXT: csel x9, x11, x10, hi 1936; NONEON-NOSVE-NEXT: cmp x9, x8 1937; NONEON-NOSVE-NEXT: csel x0, x9, x8, hi 1938; NONEON-NOSVE-NEXT: ret 1939 %op = load <4 x i64>, ptr %a 1940 %res = call i64 @llvm.vector.reduce.umax.v4i64(<4 x i64> %op) 1941 ret i64 %res 1942} 1943 1944; 1945; UMINV 1946; 1947 1948define i8 @uminv_v8i8(<8 x i8> %a) { 1949; CHECK-LABEL: uminv_v8i8: 1950; CHECK: // %bb.0: 1951; CHECK-NEXT: ptrue p0.b, vl8 1952; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 1953; CHECK-NEXT: uminv b0, p0, z0.b 1954; CHECK-NEXT: fmov w0, s0 1955; CHECK-NEXT: ret 1956; 1957; NONEON-NOSVE-LABEL: uminv_v8i8: 1958; NONEON-NOSVE: // %bb.0: 1959; NONEON-NOSVE-NEXT: sub sp, sp, #16 1960; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 1961; NONEON-NOSVE-NEXT: str d0, [sp, #8] 1962; NONEON-NOSVE-NEXT: ldrb w8, [sp, #9] 1963; NONEON-NOSVE-NEXT: ldrb w9, [sp, #8] 1964; NONEON-NOSVE-NEXT: ldrb w10, [sp, #10] 1965; NONEON-NOSVE-NEXT: cmp w9, w8 1966; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo 1967; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] 1968; NONEON-NOSVE-NEXT: cmp w8, w10 1969; NONEON-NOSVE-NEXT: csel w8, w8, w10, lo 1970; NONEON-NOSVE-NEXT: ldrb w10, [sp, #12] 1971; NONEON-NOSVE-NEXT: cmp w8, w9 1972; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo 1973; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] 1974; NONEON-NOSVE-NEXT: cmp w8, w10 1975; NONEON-NOSVE-NEXT: csel w8, w8, w10, lo 1976; NONEON-NOSVE-NEXT: ldrb w10, [sp, #14] 1977; NONEON-NOSVE-NEXT: cmp w8, w9 1978; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo 1979; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] 1980; NONEON-NOSVE-NEXT: cmp w8, w10 1981; NONEON-NOSVE-NEXT: csel w8, w8, w10, lo 1982; NONEON-NOSVE-NEXT: cmp w8, w9 1983; NONEON-NOSVE-NEXT: csel w0, w8, w9, lo 1984; NONEON-NOSVE-NEXT: add sp, sp, #16 1985; NONEON-NOSVE-NEXT: ret 1986 %res = call i8 @llvm.vector.reduce.umin.v8i8(<8 x i8> %a) 1987 ret i8 %res 1988} 1989 1990define i8 @uminv_v16i8(<16 x i8> %a) { 1991; CHECK-LABEL: uminv_v16i8: 1992; CHECK: // %bb.0: 1993; CHECK-NEXT: ptrue p0.b, vl16 1994; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 1995; CHECK-NEXT: uminv b0, p0, z0.b 1996; CHECK-NEXT: fmov w0, s0 1997; CHECK-NEXT: ret 1998; 1999; NONEON-NOSVE-LABEL: uminv_v16i8: 2000; NONEON-NOSVE: // %bb.0: 2001; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! 2002; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 2003; NONEON-NOSVE-NEXT: ldrb w8, [sp, #1] 2004; NONEON-NOSVE-NEXT: ldrb w9, [sp] 2005; NONEON-NOSVE-NEXT: ldrb w10, [sp, #2] 2006; NONEON-NOSVE-NEXT: cmp w9, w8 2007; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo 2008; NONEON-NOSVE-NEXT: ldrb w9, [sp, #3] 2009; NONEON-NOSVE-NEXT: cmp w8, w10 2010; NONEON-NOSVE-NEXT: csel w8, w8, w10, lo 2011; NONEON-NOSVE-NEXT: ldrb w10, [sp, #4] 2012; NONEON-NOSVE-NEXT: cmp w8, w9 2013; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo 2014; NONEON-NOSVE-NEXT: ldrb w9, [sp, #5] 2015; NONEON-NOSVE-NEXT: cmp w8, w10 2016; NONEON-NOSVE-NEXT: csel w8, w8, w10, lo 2017; NONEON-NOSVE-NEXT: ldrb w10, [sp, #6] 2018; NONEON-NOSVE-NEXT: cmp w8, w9 2019; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo 2020; NONEON-NOSVE-NEXT: ldrb w9, [sp, #7] 2021; NONEON-NOSVE-NEXT: cmp w8, w10 2022; NONEON-NOSVE-NEXT: csel w8, w8, w10, lo 2023; NONEON-NOSVE-NEXT: ldrb w10, [sp, #8] 2024; NONEON-NOSVE-NEXT: cmp w8, w9 2025; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo 2026; NONEON-NOSVE-NEXT: ldrb w9, [sp, #9] 2027; NONEON-NOSVE-NEXT: cmp w8, w10 2028; NONEON-NOSVE-NEXT: csel w8, w8, w10, lo 2029; NONEON-NOSVE-NEXT: ldrb w10, [sp, #10] 2030; NONEON-NOSVE-NEXT: cmp w8, w9 2031; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo 2032; NONEON-NOSVE-NEXT: ldrb w9, [sp, #11] 2033; NONEON-NOSVE-NEXT: cmp w8, w10 2034; NONEON-NOSVE-NEXT: csel w8, w8, w10, lo 2035; NONEON-NOSVE-NEXT: ldrb w10, [sp, #12] 2036; NONEON-NOSVE-NEXT: cmp w8, w9 2037; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo 2038; NONEON-NOSVE-NEXT: ldrb w9, [sp, #13] 2039; NONEON-NOSVE-NEXT: cmp w8, w10 2040; NONEON-NOSVE-NEXT: csel w8, w8, w10, lo 2041; NONEON-NOSVE-NEXT: ldrb w10, [sp, #14] 2042; NONEON-NOSVE-NEXT: cmp w8, w9 2043; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo 2044; NONEON-NOSVE-NEXT: ldrb w9, [sp, #15] 2045; NONEON-NOSVE-NEXT: cmp w8, w10 2046; NONEON-NOSVE-NEXT: csel w8, w8, w10, lo 2047; NONEON-NOSVE-NEXT: cmp w8, w9 2048; NONEON-NOSVE-NEXT: csel w0, w8, w9, lo 2049; NONEON-NOSVE-NEXT: add sp, sp, #16 2050; NONEON-NOSVE-NEXT: ret 2051 %res = call i8 @llvm.vector.reduce.umin.v16i8(<16 x i8> %a) 2052 ret i8 %res 2053} 2054 2055define i8 @uminv_v32i8(ptr %a) { 2056; CHECK-LABEL: uminv_v32i8: 2057; CHECK: // %bb.0: 2058; CHECK-NEXT: ldp q1, q0, [x0] 2059; CHECK-NEXT: ptrue p0.b, vl16 2060; CHECK-NEXT: umin z0.b, p0/m, z0.b, z1.b 2061; CHECK-NEXT: uminv b0, p0, z0.b 2062; CHECK-NEXT: fmov w0, s0 2063; CHECK-NEXT: ret 2064; 2065; NONEON-NOSVE-LABEL: uminv_v32i8: 2066; NONEON-NOSVE: // %bb.0: 2067; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] 2068; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! 2069; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 2070; NONEON-NOSVE-NEXT: ldrb w8, [sp, #17] 2071; NONEON-NOSVE-NEXT: ldrb w9, [sp, #1] 2072; NONEON-NOSVE-NEXT: ldrb w10, [sp, #16] 2073; NONEON-NOSVE-NEXT: ldrb w11, [sp] 2074; NONEON-NOSVE-NEXT: cmp w9, w8 2075; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo 2076; NONEON-NOSVE-NEXT: cmp w11, w10 2077; NONEON-NOSVE-NEXT: csel w9, w11, w10, lo 2078; NONEON-NOSVE-NEXT: ldrb w10, [sp, #18] 2079; NONEON-NOSVE-NEXT: ldrb w11, [sp, #2] 2080; NONEON-NOSVE-NEXT: cmp w9, w8 2081; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo 2082; NONEON-NOSVE-NEXT: cmp w11, w10 2083; NONEON-NOSVE-NEXT: csel w9, w11, w10, lo 2084; NONEON-NOSVE-NEXT: ldrb w10, [sp, #19] 2085; NONEON-NOSVE-NEXT: ldrb w11, [sp, #3] 2086; NONEON-NOSVE-NEXT: cmp w8, w9 2087; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo 2088; NONEON-NOSVE-NEXT: cmp w11, w10 2089; NONEON-NOSVE-NEXT: csel w9, w11, w10, lo 2090; NONEON-NOSVE-NEXT: ldrb w10, [sp, #20] 2091; NONEON-NOSVE-NEXT: ldrb w11, [sp, #4] 2092; NONEON-NOSVE-NEXT: cmp w8, w9 2093; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo 2094; NONEON-NOSVE-NEXT: cmp w11, w10 2095; NONEON-NOSVE-NEXT: csel w9, w11, w10, lo 2096; NONEON-NOSVE-NEXT: ldrb w10, [sp, #21] 2097; NONEON-NOSVE-NEXT: ldrb w11, [sp, #5] 2098; NONEON-NOSVE-NEXT: cmp w8, w9 2099; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo 2100; NONEON-NOSVE-NEXT: cmp w11, w10 2101; NONEON-NOSVE-NEXT: csel w9, w11, w10, lo 2102; NONEON-NOSVE-NEXT: ldrb w10, [sp, #22] 2103; NONEON-NOSVE-NEXT: ldrb w11, [sp, #6] 2104; NONEON-NOSVE-NEXT: cmp w8, w9 2105; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo 2106; NONEON-NOSVE-NEXT: cmp w11, w10 2107; NONEON-NOSVE-NEXT: csel w9, w11, w10, lo 2108; NONEON-NOSVE-NEXT: ldrb w10, [sp, #23] 2109; NONEON-NOSVE-NEXT: ldrb w11, [sp, #7] 2110; NONEON-NOSVE-NEXT: cmp w8, w9 2111; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo 2112; NONEON-NOSVE-NEXT: cmp w11, w10 2113; NONEON-NOSVE-NEXT: csel w9, w11, w10, lo 2114; NONEON-NOSVE-NEXT: ldrb w10, [sp, #24] 2115; NONEON-NOSVE-NEXT: ldrb w11, [sp, #8] 2116; NONEON-NOSVE-NEXT: cmp w8, w9 2117; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo 2118; NONEON-NOSVE-NEXT: cmp w11, w10 2119; NONEON-NOSVE-NEXT: csel w9, w11, w10, lo 2120; NONEON-NOSVE-NEXT: ldrb w10, [sp, #25] 2121; NONEON-NOSVE-NEXT: ldrb w11, [sp, #9] 2122; NONEON-NOSVE-NEXT: cmp w8, w9 2123; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo 2124; NONEON-NOSVE-NEXT: cmp w11, w10 2125; NONEON-NOSVE-NEXT: csel w9, w11, w10, lo 2126; NONEON-NOSVE-NEXT: ldrb w10, [sp, #26] 2127; NONEON-NOSVE-NEXT: ldrb w11, [sp, #10] 2128; NONEON-NOSVE-NEXT: cmp w8, w9 2129; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo 2130; NONEON-NOSVE-NEXT: cmp w11, w10 2131; NONEON-NOSVE-NEXT: csel w9, w11, w10, lo 2132; NONEON-NOSVE-NEXT: ldrb w10, [sp, #27] 2133; NONEON-NOSVE-NEXT: ldrb w11, [sp, #11] 2134; NONEON-NOSVE-NEXT: cmp w8, w9 2135; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo 2136; NONEON-NOSVE-NEXT: cmp w11, w10 2137; NONEON-NOSVE-NEXT: csel w9, w11, w10, lo 2138; NONEON-NOSVE-NEXT: ldrb w10, [sp, #28] 2139; NONEON-NOSVE-NEXT: ldrb w11, [sp, #12] 2140; NONEON-NOSVE-NEXT: cmp w8, w9 2141; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo 2142; NONEON-NOSVE-NEXT: cmp w11, w10 2143; NONEON-NOSVE-NEXT: csel w9, w11, w10, lo 2144; NONEON-NOSVE-NEXT: ldrb w10, [sp, #29] 2145; NONEON-NOSVE-NEXT: ldrb w11, [sp, #13] 2146; NONEON-NOSVE-NEXT: cmp w8, w9 2147; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo 2148; NONEON-NOSVE-NEXT: cmp w11, w10 2149; NONEON-NOSVE-NEXT: csel w9, w11, w10, lo 2150; NONEON-NOSVE-NEXT: ldrb w10, [sp, #30] 2151; NONEON-NOSVE-NEXT: ldrb w11, [sp, #14] 2152; NONEON-NOSVE-NEXT: cmp w8, w9 2153; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo 2154; NONEON-NOSVE-NEXT: cmp w11, w10 2155; NONEON-NOSVE-NEXT: csel w9, w11, w10, lo 2156; NONEON-NOSVE-NEXT: ldrb w10, [sp, #31] 2157; NONEON-NOSVE-NEXT: ldrb w11, [sp, #15] 2158; NONEON-NOSVE-NEXT: cmp w8, w9 2159; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo 2160; NONEON-NOSVE-NEXT: cmp w11, w10 2161; NONEON-NOSVE-NEXT: csel w9, w11, w10, lo 2162; NONEON-NOSVE-NEXT: cmp w8, w9 2163; NONEON-NOSVE-NEXT: csel w0, w8, w9, lo 2164; NONEON-NOSVE-NEXT: add sp, sp, #32 2165; NONEON-NOSVE-NEXT: ret 2166 %op = load <32 x i8>, ptr %a 2167 %res = call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> %op) 2168 ret i8 %res 2169} 2170 2171define i16 @uminv_v4i16(<4 x i16> %a) { 2172; CHECK-LABEL: uminv_v4i16: 2173; CHECK: // %bb.0: 2174; CHECK-NEXT: ptrue p0.h, vl4 2175; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 2176; CHECK-NEXT: uminv h0, p0, z0.h 2177; CHECK-NEXT: fmov w0, s0 2178; CHECK-NEXT: ret 2179; 2180; NONEON-NOSVE-LABEL: uminv_v4i16: 2181; NONEON-NOSVE: // %bb.0: 2182; NONEON-NOSVE-NEXT: sub sp, sp, #16 2183; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 2184; NONEON-NOSVE-NEXT: str d0, [sp, #8] 2185; NONEON-NOSVE-NEXT: ldrh w8, [sp, #10] 2186; NONEON-NOSVE-NEXT: ldrh w9, [sp, #8] 2187; NONEON-NOSVE-NEXT: ldrh w10, [sp, #12] 2188; NONEON-NOSVE-NEXT: cmp w9, w8 2189; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo 2190; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] 2191; NONEON-NOSVE-NEXT: cmp w8, w10 2192; NONEON-NOSVE-NEXT: csel w8, w8, w10, lo 2193; NONEON-NOSVE-NEXT: cmp w8, w9 2194; NONEON-NOSVE-NEXT: csel w0, w8, w9, lo 2195; NONEON-NOSVE-NEXT: add sp, sp, #16 2196; NONEON-NOSVE-NEXT: ret 2197 %res = call i16 @llvm.vector.reduce.umin.v4i16(<4 x i16> %a) 2198 ret i16 %res 2199} 2200 2201define i16 @uminv_v8i16(<8 x i16> %a) { 2202; CHECK-LABEL: uminv_v8i16: 2203; CHECK: // %bb.0: 2204; CHECK-NEXT: ptrue p0.h, vl8 2205; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 2206; CHECK-NEXT: uminv h0, p0, z0.h 2207; CHECK-NEXT: fmov w0, s0 2208; CHECK-NEXT: ret 2209; 2210; NONEON-NOSVE-LABEL: uminv_v8i16: 2211; NONEON-NOSVE: // %bb.0: 2212; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! 2213; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 2214; NONEON-NOSVE-NEXT: ldrh w8, [sp, #2] 2215; NONEON-NOSVE-NEXT: ldrh w9, [sp] 2216; NONEON-NOSVE-NEXT: ldrh w10, [sp, #4] 2217; NONEON-NOSVE-NEXT: cmp w9, w8 2218; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo 2219; NONEON-NOSVE-NEXT: ldrh w9, [sp, #6] 2220; NONEON-NOSVE-NEXT: cmp w8, w10 2221; NONEON-NOSVE-NEXT: csel w8, w8, w10, lo 2222; NONEON-NOSVE-NEXT: ldrh w10, [sp, #8] 2223; NONEON-NOSVE-NEXT: cmp w8, w9 2224; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo 2225; NONEON-NOSVE-NEXT: ldrh w9, [sp, #10] 2226; NONEON-NOSVE-NEXT: cmp w8, w10 2227; NONEON-NOSVE-NEXT: csel w8, w8, w10, lo 2228; NONEON-NOSVE-NEXT: ldrh w10, [sp, #12] 2229; NONEON-NOSVE-NEXT: cmp w8, w9 2230; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo 2231; NONEON-NOSVE-NEXT: ldrh w9, [sp, #14] 2232; NONEON-NOSVE-NEXT: cmp w8, w10 2233; NONEON-NOSVE-NEXT: csel w8, w8, w10, lo 2234; NONEON-NOSVE-NEXT: cmp w8, w9 2235; NONEON-NOSVE-NEXT: csel w0, w8, w9, lo 2236; NONEON-NOSVE-NEXT: add sp, sp, #16 2237; NONEON-NOSVE-NEXT: ret 2238 %res = call i16 @llvm.vector.reduce.umin.v8i16(<8 x i16> %a) 2239 ret i16 %res 2240} 2241 2242define i16 @uminv_v16i16(ptr %a) { 2243; CHECK-LABEL: uminv_v16i16: 2244; CHECK: // %bb.0: 2245; CHECK-NEXT: ldp q1, q0, [x0] 2246; CHECK-NEXT: ptrue p0.h, vl8 2247; CHECK-NEXT: umin z0.h, p0/m, z0.h, z1.h 2248; CHECK-NEXT: uminv h0, p0, z0.h 2249; CHECK-NEXT: fmov w0, s0 2250; CHECK-NEXT: ret 2251; 2252; NONEON-NOSVE-LABEL: uminv_v16i16: 2253; NONEON-NOSVE: // %bb.0: 2254; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] 2255; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! 2256; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 2257; NONEON-NOSVE-NEXT: ldrh w8, [sp, #18] 2258; NONEON-NOSVE-NEXT: ldrh w9, [sp, #2] 2259; NONEON-NOSVE-NEXT: ldrh w10, [sp, #16] 2260; NONEON-NOSVE-NEXT: ldrh w11, [sp] 2261; NONEON-NOSVE-NEXT: cmp w9, w8 2262; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo 2263; NONEON-NOSVE-NEXT: cmp w11, w10 2264; NONEON-NOSVE-NEXT: csel w9, w11, w10, lo 2265; NONEON-NOSVE-NEXT: ldrh w10, [sp, #20] 2266; NONEON-NOSVE-NEXT: ldrh w11, [sp, #4] 2267; NONEON-NOSVE-NEXT: cmp w9, w8 2268; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo 2269; NONEON-NOSVE-NEXT: cmp w11, w10 2270; NONEON-NOSVE-NEXT: csel w9, w11, w10, lo 2271; NONEON-NOSVE-NEXT: ldrh w10, [sp, #22] 2272; NONEON-NOSVE-NEXT: ldrh w11, [sp, #6] 2273; NONEON-NOSVE-NEXT: cmp w8, w9 2274; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo 2275; NONEON-NOSVE-NEXT: cmp w11, w10 2276; NONEON-NOSVE-NEXT: csel w9, w11, w10, lo 2277; NONEON-NOSVE-NEXT: ldrh w10, [sp, #24] 2278; NONEON-NOSVE-NEXT: ldrh w11, [sp, #8] 2279; NONEON-NOSVE-NEXT: cmp w8, w9 2280; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo 2281; NONEON-NOSVE-NEXT: cmp w11, w10 2282; NONEON-NOSVE-NEXT: csel w9, w11, w10, lo 2283; NONEON-NOSVE-NEXT: ldrh w10, [sp, #26] 2284; NONEON-NOSVE-NEXT: ldrh w11, [sp, #10] 2285; NONEON-NOSVE-NEXT: cmp w8, w9 2286; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo 2287; NONEON-NOSVE-NEXT: cmp w11, w10 2288; NONEON-NOSVE-NEXT: csel w9, w11, w10, lo 2289; NONEON-NOSVE-NEXT: ldrh w10, [sp, #28] 2290; NONEON-NOSVE-NEXT: ldrh w11, [sp, #12] 2291; NONEON-NOSVE-NEXT: cmp w8, w9 2292; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo 2293; NONEON-NOSVE-NEXT: cmp w11, w10 2294; NONEON-NOSVE-NEXT: csel w9, w11, w10, lo 2295; NONEON-NOSVE-NEXT: ldrh w10, [sp, #30] 2296; NONEON-NOSVE-NEXT: ldrh w11, [sp, #14] 2297; NONEON-NOSVE-NEXT: cmp w8, w9 2298; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo 2299; NONEON-NOSVE-NEXT: cmp w11, w10 2300; NONEON-NOSVE-NEXT: csel w9, w11, w10, lo 2301; NONEON-NOSVE-NEXT: cmp w8, w9 2302; NONEON-NOSVE-NEXT: csel w0, w8, w9, lo 2303; NONEON-NOSVE-NEXT: add sp, sp, #32 2304; NONEON-NOSVE-NEXT: ret 2305 %op = load <16 x i16>, ptr %a 2306 %res = call i16 @llvm.vector.reduce.umin.v16i16(<16 x i16> %op) 2307 ret i16 %res 2308} 2309 2310define i32 @uminv_v2i32(<2 x i32> %a) { 2311; CHECK-LABEL: uminv_v2i32: 2312; CHECK: // %bb.0: 2313; CHECK-NEXT: ptrue p0.s, vl2 2314; CHECK-NEXT: // kill: def $d0 killed $d0 def $z0 2315; CHECK-NEXT: uminv s0, p0, z0.s 2316; CHECK-NEXT: fmov w0, s0 2317; CHECK-NEXT: ret 2318; 2319; NONEON-NOSVE-LABEL: uminv_v2i32: 2320; NONEON-NOSVE: // %bb.0: 2321; NONEON-NOSVE-NEXT: sub sp, sp, #16 2322; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 2323; NONEON-NOSVE-NEXT: str d0, [sp, #8] 2324; NONEON-NOSVE-NEXT: ldp w9, w8, [sp, #8] 2325; NONEON-NOSVE-NEXT: cmp w9, w8 2326; NONEON-NOSVE-NEXT: csel w0, w9, w8, lo 2327; NONEON-NOSVE-NEXT: add sp, sp, #16 2328; NONEON-NOSVE-NEXT: ret 2329 %res = call i32 @llvm.vector.reduce.umin.v2i32(<2 x i32> %a) 2330 ret i32 %res 2331} 2332 2333define i32 @uminv_v4i32(<4 x i32> %a) { 2334; CHECK-LABEL: uminv_v4i32: 2335; CHECK: // %bb.0: 2336; CHECK-NEXT: ptrue p0.s, vl4 2337; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 2338; CHECK-NEXT: uminv s0, p0, z0.s 2339; CHECK-NEXT: fmov w0, s0 2340; CHECK-NEXT: ret 2341; 2342; NONEON-NOSVE-LABEL: uminv_v4i32: 2343; NONEON-NOSVE: // %bb.0: 2344; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! 2345; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 2346; NONEON-NOSVE-NEXT: ldp w9, w8, [sp] 2347; NONEON-NOSVE-NEXT: cmp w9, w8 2348; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo 2349; NONEON-NOSVE-NEXT: ldp w10, w9, [sp, #8] 2350; NONEON-NOSVE-NEXT: cmp w8, w10 2351; NONEON-NOSVE-NEXT: csel w8, w8, w10, lo 2352; NONEON-NOSVE-NEXT: cmp w8, w9 2353; NONEON-NOSVE-NEXT: csel w0, w8, w9, lo 2354; NONEON-NOSVE-NEXT: add sp, sp, #16 2355; NONEON-NOSVE-NEXT: ret 2356 %res = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> %a) 2357 ret i32 %res 2358} 2359 2360define i32 @uminv_v8i32(ptr %a) { 2361; CHECK-LABEL: uminv_v8i32: 2362; CHECK: // %bb.0: 2363; CHECK-NEXT: ldp q1, q0, [x0] 2364; CHECK-NEXT: ptrue p0.s, vl4 2365; CHECK-NEXT: umin z0.s, p0/m, z0.s, z1.s 2366; CHECK-NEXT: uminv s0, p0, z0.s 2367; CHECK-NEXT: fmov w0, s0 2368; CHECK-NEXT: ret 2369; 2370; NONEON-NOSVE-LABEL: uminv_v8i32: 2371; NONEON-NOSVE: // %bb.0: 2372; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] 2373; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! 2374; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 2375; NONEON-NOSVE-NEXT: ldp w11, w8, [sp] 2376; NONEON-NOSVE-NEXT: ldp w10, w9, [sp, #16] 2377; NONEON-NOSVE-NEXT: cmp w8, w9 2378; NONEON-NOSVE-NEXT: csel w8, w8, w9, lo 2379; NONEON-NOSVE-NEXT: cmp w11, w10 2380; NONEON-NOSVE-NEXT: csel w9, w11, w10, lo 2381; NONEON-NOSVE-NEXT: cmp w9, w8 2382; NONEON-NOSVE-NEXT: ldp w10, w12, [sp, #8] 2383; NONEON-NOSVE-NEXT: csel w8, w9, w8, lo 2384; NONEON-NOSVE-NEXT: ldp w11, w9, [sp, #24] 2385; NONEON-NOSVE-NEXT: cmp w10, w11 2386; NONEON-NOSVE-NEXT: csel w10, w10, w11, lo 2387; NONEON-NOSVE-NEXT: cmp w8, w10 2388; NONEON-NOSVE-NEXT: csel w8, w8, w10, lo 2389; NONEON-NOSVE-NEXT: cmp w12, w9 2390; NONEON-NOSVE-NEXT: csel w9, w12, w9, lo 2391; NONEON-NOSVE-NEXT: cmp w8, w9 2392; NONEON-NOSVE-NEXT: csel w0, w8, w9, lo 2393; NONEON-NOSVE-NEXT: add sp, sp, #32 2394; NONEON-NOSVE-NEXT: ret 2395 %op = load <8 x i32>, ptr %a 2396 %res = call i32 @llvm.vector.reduce.umin.v8i32(<8 x i32> %op) 2397 ret i32 %res 2398} 2399 2400; No NEON 64-bit vector UMINV support. Use SVE. 2401define i64 @uminv_v2i64(<2 x i64> %a) { 2402; CHECK-LABEL: uminv_v2i64: 2403; CHECK: // %bb.0: 2404; CHECK-NEXT: ptrue p0.d, vl2 2405; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 2406; CHECK-NEXT: uminv d0, p0, z0.d 2407; CHECK-NEXT: fmov x0, d0 2408; CHECK-NEXT: ret 2409; 2410; NONEON-NOSVE-LABEL: uminv_v2i64: 2411; NONEON-NOSVE: // %bb.0: 2412; NONEON-NOSVE-NEXT: str q0, [sp, #-16]! 2413; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 16 2414; NONEON-NOSVE-NEXT: ldp x9, x8, [sp], #16 2415; NONEON-NOSVE-NEXT: cmp x9, x8 2416; NONEON-NOSVE-NEXT: csel x0, x9, x8, lo 2417; NONEON-NOSVE-NEXT: ret 2418 %res = call i64 @llvm.vector.reduce.umin.v2i64(<2 x i64> %a) 2419 ret i64 %res 2420} 2421 2422define i64 @uminv_v4i64(ptr %a) { 2423; CHECK-LABEL: uminv_v4i64: 2424; CHECK: // %bb.0: 2425; CHECK-NEXT: ldp q1, q0, [x0] 2426; CHECK-NEXT: ptrue p0.d, vl2 2427; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d 2428; CHECK-NEXT: uminv d0, p0, z0.d 2429; CHECK-NEXT: fmov x0, d0 2430; CHECK-NEXT: ret 2431; 2432; NONEON-NOSVE-LABEL: uminv_v4i64: 2433; NONEON-NOSVE: // %bb.0: 2434; NONEON-NOSVE-NEXT: ldp q1, q0, [x0] 2435; NONEON-NOSVE-NEXT: stp q1, q0, [sp, #-32]! 2436; NONEON-NOSVE-NEXT: .cfi_def_cfa_offset 32 2437; NONEON-NOSVE-NEXT: ldp x8, x10, [sp, #8] 2438; NONEON-NOSVE-NEXT: ldr x9, [sp, #24] 2439; NONEON-NOSVE-NEXT: ldr x11, [sp], #32 2440; NONEON-NOSVE-NEXT: cmp x8, x9 2441; NONEON-NOSVE-NEXT: csel x8, x8, x9, lo 2442; NONEON-NOSVE-NEXT: cmp x11, x10 2443; NONEON-NOSVE-NEXT: csel x9, x11, x10, lo 2444; NONEON-NOSVE-NEXT: cmp x9, x8 2445; NONEON-NOSVE-NEXT: csel x0, x9, x8, lo 2446; NONEON-NOSVE-NEXT: ret 2447 %op = load <4 x i64>, ptr %a 2448 %res = call i64 @llvm.vector.reduce.umin.v4i64(<4 x i64> %op) 2449 ret i64 %res 2450} 2451 2452declare i8 @llvm.vector.reduce.add.v8i8(<8 x i8>) 2453declare i8 @llvm.vector.reduce.add.v16i8(<16 x i8>) 2454declare i8 @llvm.vector.reduce.add.v32i8(<32 x i8>) 2455 2456declare i16 @llvm.vector.reduce.add.v4i16(<4 x i16>) 2457declare i16 @llvm.vector.reduce.add.v8i16(<8 x i16>) 2458declare i16 @llvm.vector.reduce.add.v16i16(<16 x i16>) 2459 2460declare i32 @llvm.vector.reduce.add.v2i32(<2 x i32>) 2461declare i32 @llvm.vector.reduce.add.v4i32(<4 x i32>) 2462declare i32 @llvm.vector.reduce.add.v8i32(<8 x i32>) 2463 2464declare i64 @llvm.vector.reduce.add.v2i64(<2 x i64>) 2465declare i64 @llvm.vector.reduce.add.v4i64(<4 x i64>) 2466 2467declare i8 @llvm.vector.reduce.smax.v8i8(<8 x i8>) 2468declare i8 @llvm.vector.reduce.smax.v16i8(<16 x i8>) 2469declare i8 @llvm.vector.reduce.smax.v32i8(<32 x i8>) 2470 2471declare i16 @llvm.vector.reduce.smax.v4i16(<4 x i16>) 2472declare i16 @llvm.vector.reduce.smax.v8i16(<8 x i16>) 2473declare i16 @llvm.vector.reduce.smax.v16i16(<16 x i16>) 2474 2475declare i32 @llvm.vector.reduce.smax.v2i32(<2 x i32>) 2476declare i32 @llvm.vector.reduce.smax.v4i32(<4 x i32>) 2477declare i32 @llvm.vector.reduce.smax.v8i32(<8 x i32>) 2478 2479declare i64 @llvm.vector.reduce.smax.v2i64(<2 x i64>) 2480declare i64 @llvm.vector.reduce.smax.v4i64(<4 x i64>) 2481 2482declare i8 @llvm.vector.reduce.smin.v8i8(<8 x i8>) 2483declare i8 @llvm.vector.reduce.smin.v16i8(<16 x i8>) 2484declare i8 @llvm.vector.reduce.smin.v32i8(<32 x i8>) 2485 2486declare i16 @llvm.vector.reduce.smin.v4i16(<4 x i16>) 2487declare i16 @llvm.vector.reduce.smin.v8i16(<8 x i16>) 2488declare i16 @llvm.vector.reduce.smin.v16i16(<16 x i16>) 2489 2490declare i32 @llvm.vector.reduce.smin.v2i32(<2 x i32>) 2491declare i32 @llvm.vector.reduce.smin.v4i32(<4 x i32>) 2492declare i32 @llvm.vector.reduce.smin.v8i32(<8 x i32>) 2493 2494declare i64 @llvm.vector.reduce.smin.v2i64(<2 x i64>) 2495declare i64 @llvm.vector.reduce.smin.v4i64(<4 x i64>) 2496 2497declare i8 @llvm.vector.reduce.umax.v8i8(<8 x i8>) 2498declare i8 @llvm.vector.reduce.umax.v16i8(<16 x i8>) 2499declare i8 @llvm.vector.reduce.umax.v32i8(<32 x i8>) 2500 2501declare i16 @llvm.vector.reduce.umax.v4i16(<4 x i16>) 2502declare i16 @llvm.vector.reduce.umax.v8i16(<8 x i16>) 2503declare i16 @llvm.vector.reduce.umax.v16i16(<16 x i16>) 2504 2505declare i32 @llvm.vector.reduce.umax.v2i32(<2 x i32>) 2506declare i32 @llvm.vector.reduce.umax.v4i32(<4 x i32>) 2507declare i32 @llvm.vector.reduce.umax.v8i32(<8 x i32>) 2508 2509declare i64 @llvm.vector.reduce.umax.v2i64(<2 x i64>) 2510declare i64 @llvm.vector.reduce.umax.v4i64(<4 x i64>) 2511 2512declare i8 @llvm.vector.reduce.umin.v8i8(<8 x i8>) 2513declare i8 @llvm.vector.reduce.umin.v16i8(<16 x i8>) 2514declare i8 @llvm.vector.reduce.umin.v32i8(<32 x i8>) 2515 2516declare i16 @llvm.vector.reduce.umin.v4i16(<4 x i16>) 2517declare i16 @llvm.vector.reduce.umin.v8i16(<8 x i16>) 2518declare i16 @llvm.vector.reduce.umin.v16i16(<16 x i16>) 2519 2520declare i32 @llvm.vector.reduce.umin.v2i32(<2 x i32>) 2521declare i32 @llvm.vector.reduce.umin.v4i32(<4 x i32>) 2522declare i32 @llvm.vector.reduce.umin.v8i32(<8 x i32>) 2523 2524declare i64 @llvm.vector.reduce.umin.v2i64(<2 x i64>) 2525declare i64 @llvm.vector.reduce.umin.v4i64(<4 x i64>) 2526