1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64 -mattr=+sve < %s | FileCheck %s 3 4define i1 @ptest_v16i1_256bit_min_sve(ptr %a, ptr %b) vscale_range(2, 0) { 5; CHECK-LABEL: ptest_v16i1_256bit_min_sve: 6; CHECK: // %bb.0: 7; CHECK-NEXT: ptrue p0.s, vl8 8; CHECK-NEXT: mov x8, #8 // =0x8 9; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0, x8, lsl #2] 10; CHECK-NEXT: ld1w { z1.s }, p0/z, [x0] 11; CHECK-NEXT: fcmne p1.s, p0/z, z0.s, #0.0 12; CHECK-NEXT: fcmne p0.s, p0/z, z1.s, #0.0 13; CHECK-NEXT: mov z0.s, p1/z, #-1 // =0xffffffffffffffff 14; CHECK-NEXT: mov z1.s, p0/z, #-1 // =0xffffffffffffffff 15; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h 16; CHECK-NEXT: uzp1 z1.h, z1.h, z1.h 17; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b 18; CHECK-NEXT: uzp1 z1.b, z1.b, z1.b 19; CHECK-NEXT: mov v1.d[1], v0.d[0] 20; CHECK-NEXT: umaxv b0, v1.16b 21; CHECK-NEXT: fmov w8, s0 22; CHECK-NEXT: and w0, w8, #0x1 23; CHECK-NEXT: ret 24 %v1 = load <16 x float>, ptr %a, align 4 25 %v2 = fcmp une <16 x float> %v1, zeroinitializer 26 %v3 = call i1 @llvm.vector.reduce.or.i1.v16i1 (<16 x i1> %v2) 27 ret i1 %v3 28} 29 30define i1 @ptest_v16i1_512bit_min_sve(ptr %a, ptr %b) vscale_range(4, 0) { 31; CHECK-LABEL: ptest_v16i1_512bit_min_sve: 32; CHECK: // %bb.0: 33; CHECK-NEXT: ptrue p0.s, vl16 34; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 35; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, #0.0 36; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff 37; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h 38; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b 39; CHECK-NEXT: umaxv b0, v0.16b 40; CHECK-NEXT: fmov w8, s0 41; CHECK-NEXT: and w0, w8, #0x1 42; CHECK-NEXT: ret 43 %v1 = load <16 x float>, ptr %a, align 4 44 %v2 = fcmp une <16 x float> %v1, zeroinitializer 45 %v3 = call i1 @llvm.vector.reduce.or.i1.v16i1 (<16 x i1> %v2) 46 ret i1 %v3 47} 48 49define i1 @ptest_v16i1_512bit_sve(ptr %a, ptr %b) vscale_range(4, 4) { 50; CHECK-LABEL: ptest_v16i1_512bit_sve: 51; CHECK: // %bb.0: 52; CHECK-NEXT: ptrue p0.s 53; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 54; CHECK-NEXT: fcmne p0.s, p0/z, z0.s, #0.0 55; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff 56; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h 57; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b 58; CHECK-NEXT: umaxv b0, v0.16b 59; CHECK-NEXT: fmov w8, s0 60; CHECK-NEXT: and w0, w8, #0x1 61; CHECK-NEXT: ret 62 %v1 = load <16 x float>, ptr %a, align 4 63 %v2 = fcmp une <16 x float> %v1, zeroinitializer 64 %v3 = call i1 @llvm.vector.reduce.or.i1.v16i1 (<16 x i1> %v2) 65 ret i1 %v3 66} 67 68define i1 @ptest_or_v16i1_512bit_min_sve(ptr %a, ptr %b) vscale_range(4, 0) { 69; CHECK-LABEL: ptest_or_v16i1_512bit_min_sve: 70; CHECK: // %bb.0: 71; CHECK-NEXT: ptrue p0.s, vl16 72; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 73; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] 74; CHECK-NEXT: fcmne p1.s, p0/z, z0.s, #0.0 75; CHECK-NEXT: fcmne p0.s, p0/z, z1.s, #0.0 76; CHECK-NEXT: mov p0.b, p1/m, p1.b 77; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff 78; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h 79; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b 80; CHECK-NEXT: umaxv b0, v0.16b 81; CHECK-NEXT: fmov w8, s0 82; CHECK-NEXT: and w0, w8, #0x1 83; CHECK-NEXT: ret 84 %v1 = load <16 x float>, ptr %a, align 4 85 %v2 = fcmp une <16 x float> %v1, zeroinitializer 86 %v4 = load <16 x float>, ptr %b, align 4 87 %v5 = fcmp une <16 x float> %v4, zeroinitializer 88 %v6 = or <16 x i1> %v2, %v5 89 %v7 = call i1 @llvm.vector.reduce.or.i1.v16i1 (<16 x i1> %v6) 90 ret i1 %v7 91} 92 93declare i1 @llvm.vector.reduce.or.i1.v16i1(<16 x i1>) 94 95; 96; AND reduction. 97; 98 99define i1 @ptest_and_v16i1_512bit_sve(ptr %a, ptr %b) vscale_range(4, 4) { 100; CHECK-LABEL: ptest_and_v16i1_512bit_sve: 101; CHECK: // %bb.0: 102; CHECK-NEXT: ptrue p0.s 103; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 104; CHECK-NEXT: fcmne p1.s, p0/z, z0.s, #0.0 105; CHECK-NEXT: ld1w { z0.s }, p0/z, [x1] 106; CHECK-NEXT: fcmne p0.s, p1/z, z0.s, #0.0 107; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff 108; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h 109; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b 110; CHECK-NEXT: uminv b0, v0.16b 111; CHECK-NEXT: fmov w8, s0 112; CHECK-NEXT: and w0, w8, #0x1 113; CHECK-NEXT: ret 114 %v1 = load <16 x float>, ptr %a, align 4 115 %v2 = fcmp une <16 x float> %v1, zeroinitializer 116 %v4 = load <16 x float>, ptr %b, align 4 117 %v5 = fcmp une <16 x float> %v4, zeroinitializer 118 %v6 = and <16 x i1> %v2, %v5 119 %v7 = call i1 @llvm.vector.reduce.and.i1.v16i1 (<16 x i1> %v6) 120 ret i1 %v7 121} 122 123define i1 @ptest_and_v16i1_512bit_min_sve(ptr %a, ptr %b) vscale_range(4, 0) { 124; CHECK-LABEL: ptest_and_v16i1_512bit_min_sve: 125; CHECK: // %bb.0: 126; CHECK-NEXT: ptrue p0.s, vl16 127; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] 128; CHECK-NEXT: ld1w { z1.s }, p0/z, [x1] 129; CHECK-NEXT: fcmne p1.s, p0/z, z0.s, #0.0 130; CHECK-NEXT: fcmne p0.s, p0/z, z1.s, #0.0 131; CHECK-NEXT: and p0.b, p1/z, p1.b, p0.b 132; CHECK-NEXT: mov z0.s, p0/z, #-1 // =0xffffffffffffffff 133; CHECK-NEXT: uzp1 z0.h, z0.h, z0.h 134; CHECK-NEXT: uzp1 z0.b, z0.b, z0.b 135; CHECK-NEXT: uminv b0, v0.16b 136; CHECK-NEXT: fmov w8, s0 137; CHECK-NEXT: and w0, w8, #0x1 138; CHECK-NEXT: ret 139 %v1 = load <16 x float>, ptr %a, align 4 140 %v2 = fcmp une <16 x float> %v1, zeroinitializer 141 %v4 = load <16 x float>, ptr %b, align 4 142 %v5 = fcmp une <16 x float> %v4, zeroinitializer 143 %v6 = and <16 x i1> %v2, %v5 144 %v7 = call i1 @llvm.vector.reduce.and.i1.v16i1 (<16 x i1> %v6) 145 ret i1 %v7 146} 147 148declare i1 @llvm.vector.reduce.and.i1.v16i1(<16 x i1>) 149