1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK 3 4declare i1 @llvm.vector.reduce.and.v1i1(<1 x i1> %a) 5declare i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %a) 6declare i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %a) 7declare i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %a) 8declare i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %a) 9declare i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %a) 10 11declare i1 @llvm.vector.reduce.or.v1i1(<1 x i1> %a) 12declare i1 @llvm.vector.reduce.or.v2i1(<2 x i1> %a) 13declare i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %a) 14declare i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %a) 15declare i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %a) 16declare i1 @llvm.vector.reduce.or.v32i1(<32 x i1> %a) 17 18declare i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> %a) 19declare i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> %a) 20declare i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %a) 21declare i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %a) 22declare i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %a) 23declare i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> %a) 24 25define i32 @reduce_and_v1i8(<1 x i8> %a0, i32 %a1, i32 %a2) nounwind { 26; CHECK-LABEL: reduce_and_v1i8: 27; CHECK: // %bb.0: 28; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 29; CHECK-NEXT: smov w8, v0.b[0] 30; CHECK-NEXT: cmp w8, #0 31; CHECK-NEXT: csel w0, w0, w1, lt 32; CHECK-NEXT: ret 33 %x = icmp slt <1 x i8> %a0, zeroinitializer 34 %y = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> %x) 35 %z = select i1 %y, i32 %a1, i32 %a2 36 ret i32 %z 37} 38 39define i32 @reduce_and_v2i8(<2 x i8> %a0, i32 %a1, i32 %a2) nounwind { 40; CHECK-LABEL: reduce_and_v2i8: 41; CHECK: // %bb.0: 42; CHECK-NEXT: shl v0.2s, v0.2s, #24 43; CHECK-NEXT: sshr v0.2s, v0.2s, #24 44; CHECK-NEXT: cmlt v0.2s, v0.2s, #0 45; CHECK-NEXT: uminp v0.2s, v0.2s, v0.2s 46; CHECK-NEXT: fmov w8, s0 47; CHECK-NEXT: tst w8, #0x1 48; CHECK-NEXT: csel w0, w0, w1, ne 49; CHECK-NEXT: ret 50 %x = icmp slt <2 x i8> %a0, zeroinitializer 51 %y = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %x) 52 %z = select i1 %y, i32 %a1, i32 %a2 53 ret i32 %z 54} 55 56define i32 @reduce_and_v4i8(<4 x i8> %a0, i32 %a1, i32 %a2) nounwind { 57; CHECK-LABEL: reduce_and_v4i8: 58; CHECK: // %bb.0: 59; CHECK-NEXT: shl v0.4h, v0.4h, #8 60; CHECK-NEXT: sshr v0.4h, v0.4h, #8 61; CHECK-NEXT: cmlt v0.4h, v0.4h, #0 62; CHECK-NEXT: uminv h0, v0.4h 63; CHECK-NEXT: fmov w8, s0 64; CHECK-NEXT: tst w8, #0x1 65; CHECK-NEXT: csel w0, w0, w1, ne 66; CHECK-NEXT: ret 67 %x = icmp slt <4 x i8> %a0, zeroinitializer 68 %y = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %x) 69 %z = select i1 %y, i32 %a1, i32 %a2 70 ret i32 %z 71} 72 73define i32 @reduce_and_v8i8(<8 x i8> %a0, i32 %a1, i32 %a2) nounwind { 74; CHECK-LABEL: reduce_and_v8i8: 75; CHECK: // %bb.0: 76; CHECK-NEXT: cmlt v0.8b, v0.8b, #0 77; CHECK-NEXT: uminv b0, v0.8b 78; CHECK-NEXT: fmov w8, s0 79; CHECK-NEXT: tst w8, #0x1 80; CHECK-NEXT: csel w0, w0, w1, ne 81; CHECK-NEXT: ret 82 %x = icmp slt <8 x i8> %a0, zeroinitializer 83 %y = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %x) 84 %z = select i1 %y, i32 %a1, i32 %a2 85 ret i32 %z 86} 87 88define i32 @reduce_and_v16i8(<16 x i8> %a0, i32 %a1, i32 %a2) nounwind { 89; CHECK-LABEL: reduce_and_v16i8: 90; CHECK: // %bb.0: 91; CHECK-NEXT: cmlt v0.16b, v0.16b, #0 92; CHECK-NEXT: uminv b0, v0.16b 93; CHECK-NEXT: fmov w8, s0 94; CHECK-NEXT: tst w8, #0x1 95; CHECK-NEXT: csel w0, w0, w1, ne 96; CHECK-NEXT: ret 97 %x = icmp slt <16 x i8> %a0, zeroinitializer 98 %y = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %x) 99 %z = select i1 %y, i32 %a1, i32 %a2 100 ret i32 %z 101} 102 103define i32 @reduce_and_v32i8(<32 x i8> %a0, i32 %a1, i32 %a2) nounwind { 104; CHECK-LABEL: reduce_and_v32i8: 105; CHECK: // %bb.0: 106; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 107; CHECK-NEXT: cmlt v0.16b, v0.16b, #0 108; CHECK-NEXT: uminv b0, v0.16b 109; CHECK-NEXT: fmov w8, s0 110; CHECK-NEXT: tst w8, #0x1 111; CHECK-NEXT: csel w0, w0, w1, ne 112; CHECK-NEXT: ret 113 %x = icmp slt <32 x i8> %a0, zeroinitializer 114 %y = call i1 @llvm.vector.reduce.and.v32i1(<32 x i1> %x) 115 %z = select i1 %y, i32 %a1, i32 %a2 116 ret i32 %z 117} 118 119define i32 @reduce_and_v1i16(<1 x i16> %a0, i32 %a1, i32 %a2) nounwind { 120; CHECK-LABEL: reduce_and_v1i16: 121; CHECK: // %bb.0: 122; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 123; CHECK-NEXT: smov w8, v0.h[0] 124; CHECK-NEXT: cmp w8, #0 125; CHECK-NEXT: csel w0, w0, w1, lt 126; CHECK-NEXT: ret 127 %x = icmp slt <1 x i16> %a0, zeroinitializer 128 %y = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> %x) 129 %z = select i1 %y, i32 %a1, i32 %a2 130 ret i32 %z 131} 132 133define i32 @reduce_and_v2i16(<2 x i16> %a0, i32 %a1, i32 %a2) nounwind { 134; CHECK-LABEL: reduce_and_v2i16: 135; CHECK: // %bb.0: 136; CHECK-NEXT: shl v0.2s, v0.2s, #16 137; CHECK-NEXT: sshr v0.2s, v0.2s, #16 138; CHECK-NEXT: cmlt v0.2s, v0.2s, #0 139; CHECK-NEXT: uminp v0.2s, v0.2s, v0.2s 140; CHECK-NEXT: fmov w8, s0 141; CHECK-NEXT: tst w8, #0x1 142; CHECK-NEXT: csel w0, w0, w1, ne 143; CHECK-NEXT: ret 144 %x = icmp slt <2 x i16> %a0, zeroinitializer 145 %y = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %x) 146 %z = select i1 %y, i32 %a1, i32 %a2 147 ret i32 %z 148} 149 150define i32 @reduce_and_v4i16(<4 x i16> %a0, i32 %a1, i32 %a2) nounwind { 151; CHECK-LABEL: reduce_and_v4i16: 152; CHECK: // %bb.0: 153; CHECK-NEXT: cmlt v0.4h, v0.4h, #0 154; CHECK-NEXT: uminv h0, v0.4h 155; CHECK-NEXT: fmov w8, s0 156; CHECK-NEXT: tst w8, #0x1 157; CHECK-NEXT: csel w0, w0, w1, ne 158; CHECK-NEXT: ret 159 %x = icmp slt <4 x i16> %a0, zeroinitializer 160 %y = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %x) 161 %z = select i1 %y, i32 %a1, i32 %a2 162 ret i32 %z 163} 164 165define i32 @reduce_and_v8i16(<8 x i16> %a0, i32 %a1, i32 %a2) nounwind { 166; CHECK-LABEL: reduce_and_v8i16: 167; CHECK: // %bb.0: 168; CHECK-NEXT: cmlt v0.8h, v0.8h, #0 169; CHECK-NEXT: uminv h0, v0.8h 170; CHECK-NEXT: fmov w8, s0 171; CHECK-NEXT: tst w8, #0x1 172; CHECK-NEXT: csel w0, w0, w1, ne 173; CHECK-NEXT: ret 174 %x = icmp slt <8 x i16> %a0, zeroinitializer 175 %y = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %x) 176 %z = select i1 %y, i32 %a1, i32 %a2 177 ret i32 %z 178} 179 180define i32 @reduce_and_v16i16(<16 x i16> %a0, i32 %a1, i32 %a2) nounwind { 181; CHECK-LABEL: reduce_and_v16i16: 182; CHECK: // %bb.0: 183; CHECK-NEXT: cmlt v1.8h, v1.8h, #0 184; CHECK-NEXT: cmlt v0.8h, v0.8h, #0 185; CHECK-NEXT: uzp1 v0.16b, v0.16b, v1.16b 186; CHECK-NEXT: uminv b0, v0.16b 187; CHECK-NEXT: fmov w8, s0 188; CHECK-NEXT: tst w8, #0x1 189; CHECK-NEXT: csel w0, w0, w1, ne 190; CHECK-NEXT: ret 191 %x = icmp slt <16 x i16> %a0, zeroinitializer 192 %y = call i1 @llvm.vector.reduce.and.v16i1(<16 x i1> %x) 193 %z = select i1 %y, i32 %a1, i32 %a2 194 ret i32 %z 195} 196 197define i32 @reduce_and_v1i32(<1 x i32> %a0, i32 %a1, i32 %a2) nounwind { 198; CHECK-LABEL: reduce_and_v1i32: 199; CHECK: // %bb.0: 200; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 201; CHECK-NEXT: fmov w8, s0 202; CHECK-NEXT: cmp w8, #0 203; CHECK-NEXT: csel w0, w0, w1, lt 204; CHECK-NEXT: ret 205 %x = icmp slt <1 x i32> %a0, zeroinitializer 206 %y = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> %x) 207 %z = select i1 %y, i32 %a1, i32 %a2 208 ret i32 %z 209} 210 211define i32 @reduce_and_v2i32(<2 x i32> %a0, i32 %a1, i32 %a2) nounwind { 212; CHECK-LABEL: reduce_and_v2i32: 213; CHECK: // %bb.0: 214; CHECK-NEXT: cmlt v0.2s, v0.2s, #0 215; CHECK-NEXT: uminp v0.2s, v0.2s, v0.2s 216; CHECK-NEXT: fmov w8, s0 217; CHECK-NEXT: tst w8, #0x1 218; CHECK-NEXT: csel w0, w0, w1, ne 219; CHECK-NEXT: ret 220 %x = icmp slt <2 x i32> %a0, zeroinitializer 221 %y = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %x) 222 %z = select i1 %y, i32 %a1, i32 %a2 223 ret i32 %z 224} 225 226define i32 @reduce_and_v4i32(<4 x i32> %a0, i32 %a1, i32 %a2) nounwind { 227; CHECK-LABEL: reduce_and_v4i32: 228; CHECK: // %bb.0: 229; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 230; CHECK-NEXT: uminv s0, v0.4s 231; CHECK-NEXT: fmov w8, s0 232; CHECK-NEXT: tst w8, #0x1 233; CHECK-NEXT: csel w0, w0, w1, ne 234; CHECK-NEXT: ret 235 %x = icmp slt <4 x i32> %a0, zeroinitializer 236 %y = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %x) 237 %z = select i1 %y, i32 %a1, i32 %a2 238 ret i32 %z 239} 240 241define i32 @reduce_and_v8i32(<8 x i32> %a0, i32 %a1, i32 %a2) nounwind { 242; CHECK-LABEL: reduce_and_v8i32: 243; CHECK: // %bb.0: 244; CHECK-NEXT: cmlt v1.4s, v1.4s, #0 245; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 246; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h 247; CHECK-NEXT: uminv h0, v0.8h 248; CHECK-NEXT: fmov w8, s0 249; CHECK-NEXT: tst w8, #0x1 250; CHECK-NEXT: csel w0, w0, w1, ne 251; CHECK-NEXT: ret 252 %x = icmp slt <8 x i32> %a0, zeroinitializer 253 %y = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> %x) 254 %z = select i1 %y, i32 %a1, i32 %a2 255 ret i32 %z 256} 257 258define i32 @reduce_and_v1i64(<1 x i64> %a0, i32 %a1, i32 %a2) nounwind { 259; CHECK-LABEL: reduce_and_v1i64: 260; CHECK: // %bb.0: 261; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 262; CHECK-NEXT: fmov x8, d0 263; CHECK-NEXT: cmp x8, #0 264; CHECK-NEXT: csel w0, w0, w1, lt 265; CHECK-NEXT: ret 266 %x = icmp slt <1 x i64> %a0, zeroinitializer 267 %y = call i1 @llvm.vector.reduce.and.v1i1(<1 x i1> %x) 268 %z = select i1 %y, i32 %a1, i32 %a2 269 ret i32 %z 270} 271 272define i32 @reduce_and_v2i64(<2 x i64> %a0, i32 %a1, i32 %a2) nounwind { 273; CHECK-LABEL: reduce_and_v2i64: 274; CHECK: // %bb.0: 275; CHECK-NEXT: cmlt v0.2d, v0.2d, #0 276; CHECK-NEXT: uminv s0, v0.4s 277; CHECK-NEXT: fmov w8, s0 278; CHECK-NEXT: tst w8, #0x1 279; CHECK-NEXT: csel w0, w0, w1, ne 280; CHECK-NEXT: ret 281 %x = icmp slt <2 x i64> %a0, zeroinitializer 282 %y = call i1 @llvm.vector.reduce.and.v2i1(<2 x i1> %x) 283 %z = select i1 %y, i32 %a1, i32 %a2 284 ret i32 %z 285} 286 287define i32 @reduce_and_v4i64(<4 x i64> %a0, i32 %a1, i32 %a2) nounwind { 288; CHECK-LABEL: reduce_and_v4i64: 289; CHECK: // %bb.0: 290; CHECK-NEXT: cmlt v1.2d, v1.2d, #0 291; CHECK-NEXT: cmlt v0.2d, v0.2d, #0 292; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s 293; CHECK-NEXT: uminv s0, v0.4s 294; CHECK-NEXT: fmov w8, s0 295; CHECK-NEXT: tst w8, #0x1 296; CHECK-NEXT: csel w0, w0, w1, ne 297; CHECK-NEXT: ret 298 %x = icmp slt <4 x i64> %a0, zeroinitializer 299 %y = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> %x) 300 %z = select i1 %y, i32 %a1, i32 %a2 301 ret i32 %z 302} 303 304define i32 @reduce_or_v1i8(<1 x i8> %a0, i32 %a1, i32 %a2) nounwind { 305; CHECK-LABEL: reduce_or_v1i8: 306; CHECK: // %bb.0: 307; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 308; CHECK-NEXT: smov w8, v0.b[0] 309; CHECK-NEXT: cmp w8, #0 310; CHECK-NEXT: csel w0, w0, w1, lt 311; CHECK-NEXT: ret 312 %x = icmp slt <1 x i8> %a0, zeroinitializer 313 %y = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> %x) 314 %z = select i1 %y, i32 %a1, i32 %a2 315 ret i32 %z 316} 317 318define i32 @reduce_or_v2i8(<2 x i8> %a0, i32 %a1, i32 %a2) nounwind { 319; CHECK-LABEL: reduce_or_v2i8: 320; CHECK: // %bb.0: 321; CHECK-NEXT: shl v0.2s, v0.2s, #24 322; CHECK-NEXT: sshr v0.2s, v0.2s, #24 323; CHECK-NEXT: cmlt v0.2s, v0.2s, #0 324; CHECK-NEXT: umaxp v0.2s, v0.2s, v0.2s 325; CHECK-NEXT: fmov w8, s0 326; CHECK-NEXT: tst w8, #0x1 327; CHECK-NEXT: csel w0, w0, w1, ne 328; CHECK-NEXT: ret 329 %x = icmp slt <2 x i8> %a0, zeroinitializer 330 %y = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> %x) 331 %z = select i1 %y, i32 %a1, i32 %a2 332 ret i32 %z 333} 334 335define i32 @reduce_or_v4i8(<4 x i8> %a0, i32 %a1, i32 %a2) nounwind { 336; CHECK-LABEL: reduce_or_v4i8: 337; CHECK: // %bb.0: 338; CHECK-NEXT: shl v0.4h, v0.4h, #8 339; CHECK-NEXT: sshr v0.4h, v0.4h, #8 340; CHECK-NEXT: cmlt v0.4h, v0.4h, #0 341; CHECK-NEXT: umaxv h0, v0.4h 342; CHECK-NEXT: fmov w8, s0 343; CHECK-NEXT: tst w8, #0x1 344; CHECK-NEXT: csel w0, w0, w1, ne 345; CHECK-NEXT: ret 346 %x = icmp slt <4 x i8> %a0, zeroinitializer 347 %y = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %x) 348 %z = select i1 %y, i32 %a1, i32 %a2 349 ret i32 %z 350} 351 352define i32 @reduce_or_v8i8(<8 x i8> %a0, i32 %a1, i32 %a2) nounwind { 353; CHECK-LABEL: reduce_or_v8i8: 354; CHECK: // %bb.0: 355; CHECK-NEXT: cmlt v0.8b, v0.8b, #0 356; CHECK-NEXT: umaxv b0, v0.8b 357; CHECK-NEXT: fmov w8, s0 358; CHECK-NEXT: tst w8, #0x1 359; CHECK-NEXT: csel w0, w0, w1, ne 360; CHECK-NEXT: ret 361 %x = icmp slt <8 x i8> %a0, zeroinitializer 362 %y = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %x) 363 %z = select i1 %y, i32 %a1, i32 %a2 364 ret i32 %z 365} 366 367define i32 @reduce_or_v16i8(<16 x i8> %a0, i32 %a1, i32 %a2) nounwind { 368; CHECK-LABEL: reduce_or_v16i8: 369; CHECK: // %bb.0: 370; CHECK-NEXT: cmlt v0.16b, v0.16b, #0 371; CHECK-NEXT: umaxv b0, v0.16b 372; CHECK-NEXT: fmov w8, s0 373; CHECK-NEXT: tst w8, #0x1 374; CHECK-NEXT: csel w0, w0, w1, ne 375; CHECK-NEXT: ret 376 %x = icmp slt <16 x i8> %a0, zeroinitializer 377 %y = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %x) 378 %z = select i1 %y, i32 %a1, i32 %a2 379 ret i32 %z 380} 381 382define i32 @reduce_or_v32i8(<32 x i8> %a0, i32 %a1, i32 %a2) nounwind { 383; CHECK-LABEL: reduce_or_v32i8: 384; CHECK: // %bb.0: 385; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b 386; CHECK-NEXT: cmlt v0.16b, v0.16b, #0 387; CHECK-NEXT: umaxv b0, v0.16b 388; CHECK-NEXT: fmov w8, s0 389; CHECK-NEXT: tst w8, #0x1 390; CHECK-NEXT: csel w0, w0, w1, ne 391; CHECK-NEXT: ret 392 %x = icmp slt <32 x i8> %a0, zeroinitializer 393 %y = call i1 @llvm.vector.reduce.or.v32i1(<32 x i1> %x) 394 %z = select i1 %y, i32 %a1, i32 %a2 395 ret i32 %z 396} 397 398define i32 @reduce_or_v1i16(<1 x i16> %a0, i32 %a1, i32 %a2) nounwind { 399; CHECK-LABEL: reduce_or_v1i16: 400; CHECK: // %bb.0: 401; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 402; CHECK-NEXT: smov w8, v0.h[0] 403; CHECK-NEXT: cmp w8, #0 404; CHECK-NEXT: csel w0, w0, w1, lt 405; CHECK-NEXT: ret 406 %x = icmp slt <1 x i16> %a0, zeroinitializer 407 %y = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> %x) 408 %z = select i1 %y, i32 %a1, i32 %a2 409 ret i32 %z 410} 411 412define i32 @reduce_or_v2i16(<2 x i16> %a0, i32 %a1, i32 %a2) nounwind { 413; CHECK-LABEL: reduce_or_v2i16: 414; CHECK: // %bb.0: 415; CHECK-NEXT: shl v0.2s, v0.2s, #16 416; CHECK-NEXT: sshr v0.2s, v0.2s, #16 417; CHECK-NEXT: cmlt v0.2s, v0.2s, #0 418; CHECK-NEXT: umaxp v0.2s, v0.2s, v0.2s 419; CHECK-NEXT: fmov w8, s0 420; CHECK-NEXT: tst w8, #0x1 421; CHECK-NEXT: csel w0, w0, w1, ne 422; CHECK-NEXT: ret 423 %x = icmp slt <2 x i16> %a0, zeroinitializer 424 %y = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> %x) 425 %z = select i1 %y, i32 %a1, i32 %a2 426 ret i32 %z 427} 428 429define i32 @reduce_or_v4i16(<4 x i16> %a0, i32 %a1, i32 %a2) nounwind { 430; CHECK-LABEL: reduce_or_v4i16: 431; CHECK: // %bb.0: 432; CHECK-NEXT: cmlt v0.4h, v0.4h, #0 433; CHECK-NEXT: umaxv h0, v0.4h 434; CHECK-NEXT: fmov w8, s0 435; CHECK-NEXT: tst w8, #0x1 436; CHECK-NEXT: csel w0, w0, w1, ne 437; CHECK-NEXT: ret 438 %x = icmp slt <4 x i16> %a0, zeroinitializer 439 %y = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %x) 440 %z = select i1 %y, i32 %a1, i32 %a2 441 ret i32 %z 442} 443 444define i32 @reduce_or_v8i16(<8 x i16> %a0, i32 %a1, i32 %a2) nounwind { 445; CHECK-LABEL: reduce_or_v8i16: 446; CHECK: // %bb.0: 447; CHECK-NEXT: cmlt v0.8h, v0.8h, #0 448; CHECK-NEXT: umaxv h0, v0.8h 449; CHECK-NEXT: fmov w8, s0 450; CHECK-NEXT: tst w8, #0x1 451; CHECK-NEXT: csel w0, w0, w1, ne 452; CHECK-NEXT: ret 453 %x = icmp slt <8 x i16> %a0, zeroinitializer 454 %y = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %x) 455 %z = select i1 %y, i32 %a1, i32 %a2 456 ret i32 %z 457} 458 459define i32 @reduce_or_v16i16(<16 x i16> %a0, i32 %a1, i32 %a2) nounwind { 460; CHECK-LABEL: reduce_or_v16i16: 461; CHECK: // %bb.0: 462; CHECK-NEXT: cmlt v1.8h, v1.8h, #0 463; CHECK-NEXT: cmlt v0.8h, v0.8h, #0 464; CHECK-NEXT: uzp1 v0.16b, v0.16b, v1.16b 465; CHECK-NEXT: umaxv b0, v0.16b 466; CHECK-NEXT: fmov w8, s0 467; CHECK-NEXT: tst w8, #0x1 468; CHECK-NEXT: csel w0, w0, w1, ne 469; CHECK-NEXT: ret 470 %x = icmp slt <16 x i16> %a0, zeroinitializer 471 %y = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> %x) 472 %z = select i1 %y, i32 %a1, i32 %a2 473 ret i32 %z 474} 475 476define i32 @reduce_or_v1i32(<1 x i32> %a0, i32 %a1, i32 %a2) nounwind { 477; CHECK-LABEL: reduce_or_v1i32: 478; CHECK: // %bb.0: 479; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 480; CHECK-NEXT: fmov w8, s0 481; CHECK-NEXT: cmp w8, #0 482; CHECK-NEXT: csel w0, w0, w1, lt 483; CHECK-NEXT: ret 484 %x = icmp slt <1 x i32> %a0, zeroinitializer 485 %y = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> %x) 486 %z = select i1 %y, i32 %a1, i32 %a2 487 ret i32 %z 488} 489 490define i32 @reduce_or_v2i32(<2 x i32> %a0, i32 %a1, i32 %a2) nounwind { 491; CHECK-LABEL: reduce_or_v2i32: 492; CHECK: // %bb.0: 493; CHECK-NEXT: cmlt v0.2s, v0.2s, #0 494; CHECK-NEXT: umaxp v0.2s, v0.2s, v0.2s 495; CHECK-NEXT: fmov w8, s0 496; CHECK-NEXT: tst w8, #0x1 497; CHECK-NEXT: csel w0, w0, w1, ne 498; CHECK-NEXT: ret 499 %x = icmp slt <2 x i32> %a0, zeroinitializer 500 %y = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> %x) 501 %z = select i1 %y, i32 %a1, i32 %a2 502 ret i32 %z 503} 504 505define i32 @reduce_or_v4i32(<4 x i32> %a0, i32 %a1, i32 %a2) nounwind { 506; CHECK-LABEL: reduce_or_v4i32: 507; CHECK: // %bb.0: 508; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 509; CHECK-NEXT: umaxv s0, v0.4s 510; CHECK-NEXT: fmov w8, s0 511; CHECK-NEXT: tst w8, #0x1 512; CHECK-NEXT: csel w0, w0, w1, ne 513; CHECK-NEXT: ret 514 %x = icmp slt <4 x i32> %a0, zeroinitializer 515 %y = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %x) 516 %z = select i1 %y, i32 %a1, i32 %a2 517 ret i32 %z 518} 519 520define i32 @reduce_or_v8i32(<8 x i32> %a0, i32 %a1, i32 %a2) nounwind { 521; CHECK-LABEL: reduce_or_v8i32: 522; CHECK: // %bb.0: 523; CHECK-NEXT: cmlt v1.4s, v1.4s, #0 524; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 525; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h 526; CHECK-NEXT: umaxv h0, v0.8h 527; CHECK-NEXT: fmov w8, s0 528; CHECK-NEXT: tst w8, #0x1 529; CHECK-NEXT: csel w0, w0, w1, ne 530; CHECK-NEXT: ret 531 %x = icmp slt <8 x i32> %a0, zeroinitializer 532 %y = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> %x) 533 %z = select i1 %y, i32 %a1, i32 %a2 534 ret i32 %z 535} 536 537define i32 @reduce_or_v1i64(<1 x i64> %a0, i32 %a1, i32 %a2) nounwind { 538; CHECK-LABEL: reduce_or_v1i64: 539; CHECK: // %bb.0: 540; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 541; CHECK-NEXT: fmov x8, d0 542; CHECK-NEXT: cmp x8, #0 543; CHECK-NEXT: csel w0, w0, w1, lt 544; CHECK-NEXT: ret 545 %x = icmp slt <1 x i64> %a0, zeroinitializer 546 %y = call i1 @llvm.vector.reduce.or.v1i1(<1 x i1> %x) 547 %z = select i1 %y, i32 %a1, i32 %a2 548 ret i32 %z 549} 550 551define i32 @reduce_or_v2i64(<2 x i64> %a0, i32 %a1, i32 %a2) nounwind { 552; CHECK-LABEL: reduce_or_v2i64: 553; CHECK: // %bb.0: 554; CHECK-NEXT: cmlt v0.2d, v0.2d, #0 555; CHECK-NEXT: umaxv s0, v0.4s 556; CHECK-NEXT: fmov w8, s0 557; CHECK-NEXT: tst w8, #0x1 558; CHECK-NEXT: csel w0, w0, w1, ne 559; CHECK-NEXT: ret 560 %x = icmp slt <2 x i64> %a0, zeroinitializer 561 %y = call i1 @llvm.vector.reduce.or.v2i1(<2 x i1> %x) 562 %z = select i1 %y, i32 %a1, i32 %a2 563 ret i32 %z 564} 565 566define i32 @reduce_or_v4i64(<4 x i64> %a0, i32 %a1, i32 %a2) nounwind { 567; CHECK-LABEL: reduce_or_v4i64: 568; CHECK: // %bb.0: 569; CHECK-NEXT: cmlt v1.2d, v1.2d, #0 570; CHECK-NEXT: cmlt v0.2d, v0.2d, #0 571; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s 572; CHECK-NEXT: umaxv s0, v0.4s 573; CHECK-NEXT: fmov w8, s0 574; CHECK-NEXT: tst w8, #0x1 575; CHECK-NEXT: csel w0, w0, w1, ne 576; CHECK-NEXT: ret 577 %x = icmp slt <4 x i64> %a0, zeroinitializer 578 %y = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> %x) 579 %z = select i1 %y, i32 %a1, i32 %a2 580 ret i32 %z 581} 582 583define i32 @reduce_xor_v1i8(<1 x i8> %a0, i32 %a1, i32 %a2) nounwind { 584; CHECK-LABEL: reduce_xor_v1i8: 585; CHECK: // %bb.0: 586; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 587; CHECK-NEXT: smov w8, v0.b[0] 588; CHECK-NEXT: cmp w8, #0 589; CHECK-NEXT: csel w0, w0, w1, lt 590; CHECK-NEXT: ret 591 %x = icmp slt <1 x i8> %a0, zeroinitializer 592 %y = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> %x) 593 %z = select i1 %y, i32 %a1, i32 %a2 594 ret i32 %z 595} 596 597define i32 @reduce_xor_v2i8(<2 x i8> %a0, i32 %a1, i32 %a2) nounwind { 598; CHECK-LABEL: reduce_xor_v2i8: 599; CHECK: // %bb.0: 600; CHECK-NEXT: shl v0.2s, v0.2s, #24 601; CHECK-NEXT: sshr v0.2s, v0.2s, #24 602; CHECK-NEXT: cmlt v0.2s, v0.2s, #0 603; CHECK-NEXT: addp v0.2s, v0.2s, v0.2s 604; CHECK-NEXT: fmov w8, s0 605; CHECK-NEXT: tst w8, #0x1 606; CHECK-NEXT: csel w0, w0, w1, ne 607; CHECK-NEXT: ret 608 %x = icmp slt <2 x i8> %a0, zeroinitializer 609 %y = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> %x) 610 %z = select i1 %y, i32 %a1, i32 %a2 611 ret i32 %z 612} 613 614define i32 @reduce_xor_v4i8(<4 x i8> %a0, i32 %a1, i32 %a2) nounwind { 615; CHECK-LABEL: reduce_xor_v4i8: 616; CHECK: // %bb.0: 617; CHECK-NEXT: shl v0.4h, v0.4h, #8 618; CHECK-NEXT: sshr v0.4h, v0.4h, #8 619; CHECK-NEXT: cmlt v0.4h, v0.4h, #0 620; CHECK-NEXT: addv h0, v0.4h 621; CHECK-NEXT: fmov w8, s0 622; CHECK-NEXT: tst w8, #0x1 623; CHECK-NEXT: csel w0, w0, w1, ne 624; CHECK-NEXT: ret 625 %x = icmp slt <4 x i8> %a0, zeroinitializer 626 %y = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %x) 627 %z = select i1 %y, i32 %a1, i32 %a2 628 ret i32 %z 629} 630 631define i32 @reduce_xor_v8i8(<8 x i8> %a0, i32 %a1, i32 %a2) nounwind { 632; CHECK-LABEL: reduce_xor_v8i8: 633; CHECK: // %bb.0: 634; CHECK-NEXT: cmlt v0.8b, v0.8b, #0 635; CHECK-NEXT: addv b0, v0.8b 636; CHECK-NEXT: fmov w8, s0 637; CHECK-NEXT: tst w8, #0x1 638; CHECK-NEXT: csel w0, w0, w1, ne 639; CHECK-NEXT: ret 640 %x = icmp slt <8 x i8> %a0, zeroinitializer 641 %y = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %x) 642 %z = select i1 %y, i32 %a1, i32 %a2 643 ret i32 %z 644} 645 646define i32 @reduce_xor_v16i8(<16 x i8> %a0, i32 %a1, i32 %a2) nounwind { 647; CHECK-LABEL: reduce_xor_v16i8: 648; CHECK: // %bb.0: 649; CHECK-NEXT: cmlt v0.16b, v0.16b, #0 650; CHECK-NEXT: addv b0, v0.16b 651; CHECK-NEXT: fmov w8, s0 652; CHECK-NEXT: tst w8, #0x1 653; CHECK-NEXT: csel w0, w0, w1, ne 654; CHECK-NEXT: ret 655 %x = icmp slt <16 x i8> %a0, zeroinitializer 656 %y = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %x) 657 %z = select i1 %y, i32 %a1, i32 %a2 658 ret i32 %z 659} 660 661define i32 @reduce_xor_v32i8(<32 x i8> %a0, i32 %a1, i32 %a2) nounwind { 662; CHECK-LABEL: reduce_xor_v32i8: 663; CHECK: // %bb.0: 664; CHECK-NEXT: cmlt v1.16b, v1.16b, #0 665; CHECK-NEXT: cmlt v0.16b, v0.16b, #0 666; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b 667; CHECK-NEXT: addv b0, v0.16b 668; CHECK-NEXT: fmov w8, s0 669; CHECK-NEXT: tst w8, #0x1 670; CHECK-NEXT: csel w0, w0, w1, ne 671; CHECK-NEXT: ret 672 %x = icmp slt <32 x i8> %a0, zeroinitializer 673 %y = call i1 @llvm.vector.reduce.xor.v32i1(<32 x i1> %x) 674 %z = select i1 %y, i32 %a1, i32 %a2 675 ret i32 %z 676} 677 678define i32 @reduce_xor_v1i16(<1 x i16> %a0, i32 %a1, i32 %a2) nounwind { 679; CHECK-LABEL: reduce_xor_v1i16: 680; CHECK: // %bb.0: 681; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 682; CHECK-NEXT: smov w8, v0.h[0] 683; CHECK-NEXT: cmp w8, #0 684; CHECK-NEXT: csel w0, w0, w1, lt 685; CHECK-NEXT: ret 686 %x = icmp slt <1 x i16> %a0, zeroinitializer 687 %y = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> %x) 688 %z = select i1 %y, i32 %a1, i32 %a2 689 ret i32 %z 690} 691 692define i32 @reduce_xor_v2i16(<2 x i16> %a0, i32 %a1, i32 %a2) nounwind { 693; CHECK-LABEL: reduce_xor_v2i16: 694; CHECK: // %bb.0: 695; CHECK-NEXT: shl v0.2s, v0.2s, #16 696; CHECK-NEXT: sshr v0.2s, v0.2s, #16 697; CHECK-NEXT: cmlt v0.2s, v0.2s, #0 698; CHECK-NEXT: addp v0.2s, v0.2s, v0.2s 699; CHECK-NEXT: fmov w8, s0 700; CHECK-NEXT: tst w8, #0x1 701; CHECK-NEXT: csel w0, w0, w1, ne 702; CHECK-NEXT: ret 703 %x = icmp slt <2 x i16> %a0, zeroinitializer 704 %y = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> %x) 705 %z = select i1 %y, i32 %a1, i32 %a2 706 ret i32 %z 707} 708 709define i32 @reduce_xor_v4i16(<4 x i16> %a0, i32 %a1, i32 %a2) nounwind { 710; CHECK-LABEL: reduce_xor_v4i16: 711; CHECK: // %bb.0: 712; CHECK-NEXT: cmlt v0.4h, v0.4h, #0 713; CHECK-NEXT: addv h0, v0.4h 714; CHECK-NEXT: fmov w8, s0 715; CHECK-NEXT: tst w8, #0x1 716; CHECK-NEXT: csel w0, w0, w1, ne 717; CHECK-NEXT: ret 718 %x = icmp slt <4 x i16> %a0, zeroinitializer 719 %y = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %x) 720 %z = select i1 %y, i32 %a1, i32 %a2 721 ret i32 %z 722} 723 724define i32 @reduce_xor_v8i16(<8 x i16> %a0, i32 %a1, i32 %a2) nounwind { 725; CHECK-LABEL: reduce_xor_v8i16: 726; CHECK: // %bb.0: 727; CHECK-NEXT: cmlt v0.8h, v0.8h, #0 728; CHECK-NEXT: addv h0, v0.8h 729; CHECK-NEXT: fmov w8, s0 730; CHECK-NEXT: tst w8, #0x1 731; CHECK-NEXT: csel w0, w0, w1, ne 732; CHECK-NEXT: ret 733 %x = icmp slt <8 x i16> %a0, zeroinitializer 734 %y = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %x) 735 %z = select i1 %y, i32 %a1, i32 %a2 736 ret i32 %z 737} 738 739define i32 @reduce_xor_v16i16(<16 x i16> %a0, i32 %a1, i32 %a2) nounwind { 740; CHECK-LABEL: reduce_xor_v16i16: 741; CHECK: // %bb.0: 742; CHECK-NEXT: cmlt v1.8h, v1.8h, #0 743; CHECK-NEXT: cmlt v0.8h, v0.8h, #0 744; CHECK-NEXT: uzp1 v0.16b, v0.16b, v1.16b 745; CHECK-NEXT: addv b0, v0.16b 746; CHECK-NEXT: fmov w8, s0 747; CHECK-NEXT: tst w8, #0x1 748; CHECK-NEXT: csel w0, w0, w1, ne 749; CHECK-NEXT: ret 750 %x = icmp slt <16 x i16> %a0, zeroinitializer 751 %y = call i1 @llvm.vector.reduce.xor.v16i1(<16 x i1> %x) 752 %z = select i1 %y, i32 %a1, i32 %a2 753 ret i32 %z 754} 755 756define i32 @reduce_xor_v1i32(<1 x i32> %a0, i32 %a1, i32 %a2) nounwind { 757; CHECK-LABEL: reduce_xor_v1i32: 758; CHECK: // %bb.0: 759; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 760; CHECK-NEXT: fmov w8, s0 761; CHECK-NEXT: cmp w8, #0 762; CHECK-NEXT: csel w0, w0, w1, lt 763; CHECK-NEXT: ret 764 %x = icmp slt <1 x i32> %a0, zeroinitializer 765 %y = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> %x) 766 %z = select i1 %y, i32 %a1, i32 %a2 767 ret i32 %z 768} 769 770define i32 @reduce_xor_v2i32(<2 x i32> %a0, i32 %a1, i32 %a2) nounwind { 771; CHECK-LABEL: reduce_xor_v2i32: 772; CHECK: // %bb.0: 773; CHECK-NEXT: cmlt v0.2s, v0.2s, #0 774; CHECK-NEXT: addp v0.2s, v0.2s, v0.2s 775; CHECK-NEXT: fmov w8, s0 776; CHECK-NEXT: tst w8, #0x1 777; CHECK-NEXT: csel w0, w0, w1, ne 778; CHECK-NEXT: ret 779 %x = icmp slt <2 x i32> %a0, zeroinitializer 780 %y = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> %x) 781 %z = select i1 %y, i32 %a1, i32 %a2 782 ret i32 %z 783} 784 785define i32 @reduce_xor_v4i32(<4 x i32> %a0, i32 %a1, i32 %a2) nounwind { 786; CHECK-LABEL: reduce_xor_v4i32: 787; CHECK: // %bb.0: 788; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 789; CHECK-NEXT: addv s0, v0.4s 790; CHECK-NEXT: fmov w8, s0 791; CHECK-NEXT: tst w8, #0x1 792; CHECK-NEXT: csel w0, w0, w1, ne 793; CHECK-NEXT: ret 794 %x = icmp slt <4 x i32> %a0, zeroinitializer 795 %y = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %x) 796 %z = select i1 %y, i32 %a1, i32 %a2 797 ret i32 %z 798} 799 800define i32 @reduce_xor_v8i32(<8 x i32> %a0, i32 %a1, i32 %a2) nounwind { 801; CHECK-LABEL: reduce_xor_v8i32: 802; CHECK: // %bb.0: 803; CHECK-NEXT: cmlt v1.4s, v1.4s, #0 804; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 805; CHECK-NEXT: uzp1 v0.8h, v0.8h, v1.8h 806; CHECK-NEXT: addv h0, v0.8h 807; CHECK-NEXT: fmov w8, s0 808; CHECK-NEXT: tst w8, #0x1 809; CHECK-NEXT: csel w0, w0, w1, ne 810; CHECK-NEXT: ret 811 %x = icmp slt <8 x i32> %a0, zeroinitializer 812 %y = call i1 @llvm.vector.reduce.xor.v8i1(<8 x i1> %x) 813 %z = select i1 %y, i32 %a1, i32 %a2 814 ret i32 %z 815} 816 817define i32 @reduce_xor_v1i64(<1 x i64> %a0, i32 %a1, i32 %a2) nounwind { 818; CHECK-LABEL: reduce_xor_v1i64: 819; CHECK: // %bb.0: 820; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 821; CHECK-NEXT: fmov x8, d0 822; CHECK-NEXT: cmp x8, #0 823; CHECK-NEXT: csel w0, w0, w1, lt 824; CHECK-NEXT: ret 825 %x = icmp slt <1 x i64> %a0, zeroinitializer 826 %y = call i1 @llvm.vector.reduce.xor.v1i1(<1 x i1> %x) 827 %z = select i1 %y, i32 %a1, i32 %a2 828 ret i32 %z 829} 830 831define i32 @reduce_xor_v2i64(<2 x i64> %a0, i32 %a1, i32 %a2) nounwind { 832; CHECK-LABEL: reduce_xor_v2i64: 833; CHECK: // %bb.0: 834; CHECK-NEXT: cmlt v0.2d, v0.2d, #0 835; CHECK-NEXT: addp d0, v0.2d 836; CHECK-NEXT: fmov w8, s0 837; CHECK-NEXT: tst w8, #0x1 838; CHECK-NEXT: csel w0, w0, w1, ne 839; CHECK-NEXT: ret 840 %x = icmp slt <2 x i64> %a0, zeroinitializer 841 %y = call i1 @llvm.vector.reduce.xor.v2i1(<2 x i1> %x) 842 %z = select i1 %y, i32 %a1, i32 %a2 843 ret i32 %z 844} 845 846define i32 @reduce_xor_v4i64(<4 x i64> %a0, i32 %a1, i32 %a2) nounwind { 847; CHECK-LABEL: reduce_xor_v4i64: 848; CHECK: // %bb.0: 849; CHECK-NEXT: cmlt v1.2d, v1.2d, #0 850; CHECK-NEXT: cmlt v0.2d, v0.2d, #0 851; CHECK-NEXT: uzp1 v0.4s, v0.4s, v1.4s 852; CHECK-NEXT: addv s0, v0.4s 853; CHECK-NEXT: fmov w8, s0 854; CHECK-NEXT: tst w8, #0x1 855; CHECK-NEXT: csel w0, w0, w1, ne 856; CHECK-NEXT: ret 857 %x = icmp slt <4 x i64> %a0, zeroinitializer 858 %y = call i1 @llvm.vector.reduce.xor.v4i1(<4 x i1> %x) 859 %z = select i1 %y, i32 %a1, i32 %a2 860 ret i32 %z 861} 862