1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s 3 4; ANDV 5 6define i8 @andv_nxv8i8(<vscale x 8 x i8> %a) { 7; CHECK-LABEL: andv_nxv8i8: 8; CHECK: // %bb.0: 9; CHECK-NEXT: ptrue p0.h 10; CHECK-NEXT: andv h0, p0, z0.h 11; CHECK-NEXT: fmov w0, s0 12; CHECK-NEXT: ret 13 %res = call i8 @llvm.vector.reduce.and.nxv8i8(<vscale x 8 x i8> %a) 14 ret i8 %res 15} 16 17define i32 @andv_nxv8i32(<vscale x 8 x i32> %a) { 18; CHECK-LABEL: andv_nxv8i32: 19; CHECK: // %bb.0: 20; CHECK-NEXT: and z0.d, z0.d, z1.d 21; CHECK-NEXT: ptrue p0.s 22; CHECK-NEXT: andv s0, p0, z0.s 23; CHECK-NEXT: fmov w0, s0 24; CHECK-NEXT: ret 25 %res = call i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32> %a) 26 ret i32 %res 27} 28 29; ORV 30 31define i32 @orv_nxv2i32(<vscale x 2 x i32> %a) { 32; CHECK-LABEL: orv_nxv2i32: 33; CHECK: // %bb.0: 34; CHECK-NEXT: ptrue p0.d 35; CHECK-NEXT: orv d0, p0, z0.d 36; CHECK-NEXT: fmov w0, s0 37; CHECK-NEXT: ret 38 %res = call i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32> %a) 39 ret i32 %res 40} 41 42define i64 @orv_nxv8i64(<vscale x 8 x i64> %a) { 43; CHECK-LABEL: orv_nxv8i64: 44; CHECK: // %bb.0: 45; CHECK-NEXT: orr z1.d, z1.d, z3.d 46; CHECK-NEXT: orr z0.d, z0.d, z2.d 47; CHECK-NEXT: ptrue p0.d 48; CHECK-NEXT: orr z0.d, z0.d, z1.d 49; CHECK-NEXT: orv d0, p0, z0.d 50; CHECK-NEXT: fmov x0, d0 51; CHECK-NEXT: ret 52 %res = call i64 @llvm.vector.reduce.or.nxv8i64(<vscale x 8 x i64> %a) 53 ret i64 %res 54} 55 56; XORV 57 58define i16 @xorv_nxv2i16(<vscale x 2 x i16> %a) { 59; CHECK-LABEL: xorv_nxv2i16: 60; CHECK: // %bb.0: 61; CHECK-NEXT: ptrue p0.d 62; CHECK-NEXT: eorv d0, p0, z0.d 63; CHECK-NEXT: fmov w0, s0 64; CHECK-NEXT: ret 65 %res = call i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16> %a) 66 ret i16 %res 67} 68 69define i32 @xorv_nxv8i32(<vscale x 8 x i32> %a) { 70; CHECK-LABEL: xorv_nxv8i32: 71; CHECK: // %bb.0: 72; CHECK-NEXT: eor z0.d, z0.d, z1.d 73; CHECK-NEXT: ptrue p0.s 74; CHECK-NEXT: eorv s0, p0, z0.s 75; CHECK-NEXT: fmov w0, s0 76; CHECK-NEXT: ret 77 %res = call i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32> %a) 78 ret i32 %res 79} 80 81; UADDV 82 83define i16 @uaddv_nxv4i16(<vscale x 4 x i16> %a) { 84; CHECK-LABEL: uaddv_nxv4i16: 85; CHECK: // %bb.0: 86; CHECK-NEXT: ptrue p0.s 87; CHECK-NEXT: uaddv d0, p0, z0.s 88; CHECK-NEXT: fmov w0, s0 89; CHECK-NEXT: ret 90 %res = call i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16> %a) 91 ret i16 %res 92} 93 94define i16 @uaddv_nxv16i16(<vscale x 16 x i16> %a) { 95; CHECK-LABEL: uaddv_nxv16i16: 96; CHECK: // %bb.0: 97; CHECK-NEXT: add z0.h, z0.h, z1.h 98; CHECK-NEXT: ptrue p0.h 99; CHECK-NEXT: uaddv d0, p0, z0.h 100; CHECK-NEXT: fmov w0, s0 101; CHECK-NEXT: ret 102 %res = call i16 @llvm.vector.reduce.add.nxv16i16(<vscale x 16 x i16> %a) 103 ret i16 %res 104} 105 106define i32 @uaddv_nxv16i32(<vscale x 16 x i32> %a) { 107; CHECK-LABEL: uaddv_nxv16i32: 108; CHECK: // %bb.0: 109; CHECK-NEXT: add z1.s, z1.s, z3.s 110; CHECK-NEXT: add z0.s, z0.s, z2.s 111; CHECK-NEXT: ptrue p0.s 112; CHECK-NEXT: add z0.s, z0.s, z1.s 113; CHECK-NEXT: uaddv d0, p0, z0.s 114; CHECK-NEXT: fmov w0, s0 115; CHECK-NEXT: ret 116 %res = call i32 @llvm.vector.reduce.add.nxv16i32(<vscale x 16 x i32> %a) 117 ret i32 %res 118} 119 120; UMINV 121 122define i32 @umin_nxv2i32(<vscale x 2 x i32> %a) { 123; CHECK-LABEL: umin_nxv2i32: 124; CHECK: // %bb.0: 125; CHECK-NEXT: and z0.d, z0.d, #0xffffffff 126; CHECK-NEXT: ptrue p0.d 127; CHECK-NEXT: uminv d0, p0, z0.d 128; CHECK-NEXT: fmov w0, s0 129; CHECK-NEXT: ret 130 %res = call i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32> %a) 131 ret i32 %res 132} 133 134define i64 @umin_nxv4i64(<vscale x 4 x i64> %a) { 135; CHECK-LABEL: umin_nxv4i64: 136; CHECK: // %bb.0: 137; CHECK-NEXT: ptrue p0.d 138; CHECK-NEXT: umin z0.d, p0/m, z0.d, z1.d 139; CHECK-NEXT: uminv d0, p0, z0.d 140; CHECK-NEXT: fmov x0, d0 141; CHECK-NEXT: ret 142 %res = call i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64> %a) 143 ret i64 %res 144} 145 146; SMINV 147 148define i8 @smin_nxv4i8(<vscale x 4 x i8> %a) { 149; CHECK-LABEL: smin_nxv4i8: 150; CHECK: // %bb.0: 151; CHECK-NEXT: ptrue p0.s 152; CHECK-NEXT: sxtb z0.s, p0/m, z0.s 153; CHECK-NEXT: sminv s0, p0, z0.s 154; CHECK-NEXT: fmov w0, s0 155; CHECK-NEXT: ret 156 %res = call i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8> %a) 157 ret i8 %res 158} 159 160define i32 @smin_nxv8i32(<vscale x 8 x i32> %a) { 161; CHECK-LABEL: smin_nxv8i32: 162; CHECK: // %bb.0: 163; CHECK-NEXT: ptrue p0.s 164; CHECK-NEXT: smin z0.s, p0/m, z0.s, z1.s 165; CHECK-NEXT: sminv s0, p0, z0.s 166; CHECK-NEXT: fmov w0, s0 167; CHECK-NEXT: ret 168 %res = call i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32> %a) 169 ret i32 %res 170} 171 172; UMAXV 173 174define i16 @smin_nxv16i16(<vscale x 16 x i16> %a) { 175; CHECK-LABEL: smin_nxv16i16: 176; CHECK: // %bb.0: 177; CHECK-NEXT: ptrue p0.h 178; CHECK-NEXT: umax z0.h, p0/m, z0.h, z1.h 179; CHECK-NEXT: umaxv h0, p0, z0.h 180; CHECK-NEXT: fmov w0, s0 181; CHECK-NEXT: ret 182 %res = call i16 @llvm.vector.reduce.umax.nxv16i16(<vscale x 16 x i16> %a) 183 ret i16 %res 184} 185 186; SMAXV 187 188define i64 @smin_nxv8i64(<vscale x 8 x i64> %a) { 189; CHECK-LABEL: smin_nxv8i64: 190; CHECK: // %bb.0: 191; CHECK-NEXT: ptrue p0.d 192; CHECK-NEXT: smax z1.d, p0/m, z1.d, z3.d 193; CHECK-NEXT: smax z0.d, p0/m, z0.d, z2.d 194; CHECK-NEXT: smax z0.d, p0/m, z0.d, z1.d 195; CHECK-NEXT: smaxv d0, p0, z0.d 196; CHECK-NEXT: fmov x0, d0 197; CHECK-NEXT: ret 198 %res = call i64 @llvm.vector.reduce.smax.nxv8i64(<vscale x 8 x i64> %a) 199 ret i64 %res 200} 201 202declare i8 @llvm.vector.reduce.and.nxv8i8(<vscale x 8 x i8>) 203declare i32 @llvm.vector.reduce.and.nxv8i32(<vscale x 8 x i32>) 204 205declare i32 @llvm.vector.reduce.or.nxv2i32(<vscale x 2 x i32>) 206declare i64 @llvm.vector.reduce.or.nxv8i64(<vscale x 8 x i64>) 207 208declare i16 @llvm.vector.reduce.xor.nxv2i16(<vscale x 2 x i16>) 209declare i32 @llvm.vector.reduce.xor.nxv8i32(<vscale x 8 x i32>) 210 211declare i16 @llvm.vector.reduce.add.nxv4i16(<vscale x 4 x i16>) 212declare i16 @llvm.vector.reduce.add.nxv16i16(<vscale x 16 x i16>) 213declare i32 @llvm.vector.reduce.add.nxv16i32(<vscale x 16 x i32>) 214 215declare i32 @llvm.vector.reduce.umin.nxv2i32(<vscale x 2 x i32>) 216declare i64 @llvm.vector.reduce.umin.nxv4i64(<vscale x 4 x i64>) 217 218declare i8 @llvm.vector.reduce.smin.nxv4i8(<vscale x 4 x i8>) 219declare i32 @llvm.vector.reduce.smin.nxv8i32(<vscale x 8 x i32>) 220 221declare i16 @llvm.vector.reduce.umax.nxv16i16(<vscale x 16 x i16>) 222 223declare i64 @llvm.vector.reduce.smax.nxv8i64(<vscale x 8 x i64>) 224