1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK 3 4declare <8 x i8> @llvm.cttz.v8i8(<8 x i8>, i1) 5declare <4 x i16> @llvm.cttz.v4i16(<4 x i16>, i1) 6declare <2 x i32> @llvm.cttz.v2i32(<2 x i32>, i1) 7declare <1 x i64> @llvm.cttz.v1i64(<1 x i64>, i1) 8 9declare <16 x i8> @llvm.cttz.v16i8(<16 x i8>, i1) 10declare <8 x i16> @llvm.cttz.v8i16(<8 x i16>, i1) 11declare <4 x i32> @llvm.cttz.v4i32(<4 x i32>, i1) 12declare <2 x i64> @llvm.cttz.v2i64(<2 x i64>, i1) 13 14define <8 x i8> @cttz_v8i8(<8 x i8> %a) nounwind { 15; CHECK-LABEL: cttz_v8i8: 16; CHECK: // %bb.0: 17; CHECK-NEXT: movi v1.8b, #1 18; CHECK-NEXT: sub v1.8b, v0.8b, v1.8b 19; CHECK-NEXT: bic v0.8b, v1.8b, v0.8b 20; CHECK-NEXT: cnt v0.8b, v0.8b 21; CHECK-NEXT: ret 22 %b = call <8 x i8> @llvm.cttz.v8i8(<8 x i8> %a, i1 true) 23 ret <8 x i8> %b 24} 25 26define <4 x i16> @cttz_v4i16(<4 x i16> %a) nounwind { 27; CHECK-LABEL: cttz_v4i16: 28; CHECK: // %bb.0: 29; CHECK-NEXT: movi v1.4h, #1 30; CHECK-NEXT: sub v1.4h, v0.4h, v1.4h 31; CHECK-NEXT: bic v0.8b, v1.8b, v0.8b 32; CHECK-NEXT: movi v1.4h, #16 33; CHECK-NEXT: clz v0.4h, v0.4h 34; CHECK-NEXT: sub v0.4h, v1.4h, v0.4h 35; CHECK-NEXT: ret 36 %b = call <4 x i16> @llvm.cttz.v4i16(<4 x i16> %a, i1 true) 37 ret <4 x i16> %b 38} 39 40define <2 x i32> @cttz_v2i32(<2 x i32> %a) nounwind { 41; CHECK-LABEL: cttz_v2i32: 42; CHECK: // %bb.0: 43; CHECK-NEXT: movi v1.2s, #1 44; CHECK-NEXT: sub v1.2s, v0.2s, v1.2s 45; CHECK-NEXT: bic v0.8b, v1.8b, v0.8b 46; CHECK-NEXT: movi v1.2s, #32 47; CHECK-NEXT: clz v0.2s, v0.2s 48; CHECK-NEXT: sub v0.2s, v1.2s, v0.2s 49; CHECK-NEXT: ret 50 %b = call <2 x i32> @llvm.cttz.v2i32(<2 x i32> %a, i1 true) 51 ret <2 x i32> %b 52} 53 54define <1 x i64> @cttz_v1i64(<1 x i64> %a) nounwind { 55; CHECK-LABEL: cttz_v1i64: 56; CHECK: // %bb.0: 57; CHECK-NEXT: mov w8, #1 // =0x1 58; CHECK-NEXT: fmov d1, x8 59; CHECK-NEXT: sub d1, d0, d1 60; CHECK-NEXT: bic v0.8b, v1.8b, v0.8b 61; CHECK-NEXT: cnt v0.8b, v0.8b 62; CHECK-NEXT: uaddlp v0.4h, v0.8b 63; CHECK-NEXT: uaddlp v0.2s, v0.4h 64; CHECK-NEXT: uaddlp v0.1d, v0.2s 65; CHECK-NEXT: ret 66 %b = call <1 x i64> @llvm.cttz.v1i64(<1 x i64> %a, i1 true) 67 ret <1 x i64> %b 68} 69 70define <16 x i8> @cttz_v16i8(<16 x i8> %a) nounwind { 71; CHECK-LABEL: cttz_v16i8: 72; CHECK: // %bb.0: 73; CHECK-NEXT: movi v1.16b, #1 74; CHECK-NEXT: sub v1.16b, v0.16b, v1.16b 75; CHECK-NEXT: bic v0.16b, v1.16b, v0.16b 76; CHECK-NEXT: cnt v0.16b, v0.16b 77; CHECK-NEXT: ret 78 %b = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) 79 ret <16 x i8> %b 80} 81 82define <8 x i16> @cttz_v8i16(<8 x i16> %a) nounwind { 83; CHECK-LABEL: cttz_v8i16: 84; CHECK: // %bb.0: 85; CHECK-NEXT: movi v1.8h, #1 86; CHECK-NEXT: sub v1.8h, v0.8h, v1.8h 87; CHECK-NEXT: bic v0.16b, v1.16b, v0.16b 88; CHECK-NEXT: movi v1.8h, #16 89; CHECK-NEXT: clz v0.8h, v0.8h 90; CHECK-NEXT: sub v0.8h, v1.8h, v0.8h 91; CHECK-NEXT: ret 92 %b = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true) 93 ret <8 x i16> %b 94} 95 96define <4 x i32> @cttz_v4i32(<4 x i32> %a) nounwind { 97; CHECK-LABEL: cttz_v4i32: 98; CHECK: // %bb.0: 99; CHECK-NEXT: movi v1.4s, #1 100; CHECK-NEXT: sub v1.4s, v0.4s, v1.4s 101; CHECK-NEXT: bic v0.16b, v1.16b, v0.16b 102; CHECK-NEXT: movi v1.4s, #32 103; CHECK-NEXT: clz v0.4s, v0.4s 104; CHECK-NEXT: sub v0.4s, v1.4s, v0.4s 105; CHECK-NEXT: ret 106 %b = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) 107 ret <4 x i32> %b 108} 109 110define <2 x i64> @cttz_v2i64(<2 x i64> %a) nounwind { 111; CHECK-LABEL: cttz_v2i64: 112; CHECK: // %bb.0: 113; CHECK-NEXT: mov w8, #1 // =0x1 114; CHECK-NEXT: dup v1.2d, x8 115; CHECK-NEXT: sub v1.2d, v0.2d, v1.2d 116; CHECK-NEXT: bic v0.16b, v1.16b, v0.16b 117; CHECK-NEXT: cnt v0.16b, v0.16b 118; CHECK-NEXT: uaddlp v0.8h, v0.16b 119; CHECK-NEXT: uaddlp v0.4s, v0.8h 120; CHECK-NEXT: uaddlp v0.2d, v0.4s 121; CHECK-NEXT: ret 122 %b = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true) 123 ret <2 x i64> %b 124} 125