1; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s 2 3; FIXED WIDTH 4 5define i8 @ctz_v8i1(<8 x i1> %a) { 6; CHECK-LABEL: .LCPI0_0: 7; CHECK-NEXT: .byte 8 8; CHECK-NEXT: .byte 7 9; CHECK-NEXT: .byte 6 10; CHECK-NEXT: .byte 5 11; CHECK-NEXT: .byte 4 12; CHECK-NEXT: .byte 3 13; CHECK-NEXT: .byte 2 14; CHECK-NEXT: .byte 1 15; CHECK-LABEL: ctz_v8i1: 16; CHECK: // %bb.0: 17; CHECK-NEXT: shl v0.8b, v0.8b, #7 18; CHECK-NEXT: adrp x8, .LCPI0_0 19; CHECK-NEXT: mov w9, #8 // =0x8 20; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI0_0] 21; CHECK-NEXT: cmlt v0.8b, v0.8b, #0 22; CHECK-NEXT: and v0.8b, v0.8b, v1.8b 23; CHECK-NEXT: umaxv b0, v0.8b 24; CHECK-NEXT: fmov w8, s0 25; CHECK-NEXT: sub w0, w9, w8 26; CHECK-NEXT: ret 27 %res = call i8 @llvm.experimental.cttz.elts.i8.v8i1(<8 x i1> %a, i1 0) 28 ret i8 %res 29} 30 31define i32 @ctz_v16i1(<16 x i1> %a) { 32; CHECK-LABEL: .LCPI1_0: 33; CHECK-NEXT: .byte 16 34; CHECK-NEXT: .byte 15 35; CHECK-NEXT: .byte 14 36; CHECK-NEXT: .byte 13 37; CHECK-NEXT: .byte 12 38; CHECK-NEXT: .byte 11 39; CHECK-NEXT: .byte 10 40; CHECK-NEXT: .byte 9 41; CHECK-NEXT: .byte 8 42; CHECK-NEXT: .byte 7 43; CHECK-NEXT: .byte 6 44; CHECK-NEXT: .byte 5 45; CHECK-NEXT: .byte 4 46; CHECK-NEXT: .byte 3 47; CHECK-NEXT: .byte 2 48; CHECK-NEXT: .byte 1 49; CHECK-LABEL: ctz_v16i1: 50; CHECK: // %bb.0: 51; CHECK-NEXT: shl v0.16b, v0.16b, #7 52; CHECK-NEXT: adrp x8, .LCPI1_0 53; CHECK-NEXT: mov w9, #16 // =0x10 54; CHECK-NEXT: ldr q1, [x8, :lo12:.LCPI1_0] 55; CHECK-NEXT: cmlt v0.16b, v0.16b, #0 56; CHECK-NEXT: and v0.16b, v0.16b, v1.16b 57; CHECK-NEXT: umaxv b0, v0.16b 58; CHECK-NEXT: fmov w8, s0 59; CHECK-NEXT: sub w8, w9, w8 60; CHECK-NEXT: and w0, w8, #0xff 61; CHECK-NEXT: ret 62 %res = call i32 @llvm.experimental.cttz.elts.i32.v16i1(<16 x i1> %a, i1 0) 63 ret i32 %res 64} 65 66define i16 @ctz_v4i32(<4 x i32> %a) { 67; CHECK-LABEL: .LCPI2_0: 68; CHECK-NEXT: .hword 4 69; CHECK-NEXT: .hword 3 70; CHECK-NEXT: .hword 2 71; CHECK-NEXT: .hword 1 72; CHECK-LABEL: ctz_v4i32: 73; CHECK: // %bb.0: 74; CHECK-NEXT: cmtst v0.4s, v0.4s, v0.4s 75; CHECK-NEXT: adrp x8, .LCPI2_0 76; CHECK-NEXT: mov w9, #4 // =0x4 77; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI2_0] 78; CHECK-NEXT: xtn v0.4h, v0.4s 79; CHECK-NEXT: and v0.8b, v0.8b, v1.8b 80; CHECK-NEXT: umaxv h0, v0.4h 81; CHECK-NEXT: fmov w8, s0 82; CHECK-NEXT: sub w8, w9, w8 83; CHECK-NEXT: and w0, w8, #0xff 84; CHECK-NEXT: ret 85 %res = call i16 @llvm.experimental.cttz.elts.i16.v4i32(<4 x i32> %a, i1 0) 86 ret i16 %res 87} 88 89define i7 @ctz_i7_v8i1(<8 x i1> %a) { 90; CHECK-LABEL: .LCPI3_0: 91; CHECK-NEXT: .byte 8 92; CHECK-NEXT: .byte 7 93; CHECK-NEXT: .byte 6 94; CHECK-NEXT: .byte 5 95; CHECK-NEXT: .byte 4 96; CHECK-NEXT: .byte 3 97; CHECK-NEXT: .byte 2 98; CHECK-NEXT: .byte 1 99; CHECK-LABEL: ctz_i7_v8i1: 100; CHECK: // %bb.0: 101; CHECK-NEXT: shl v0.8b, v0.8b, #7 102; CHECK-NEXT: adrp x8, .LCPI3_0 103; CHECK-NEXT: mov w9, #8 // =0x8 104; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI3_0] 105; CHECK-NEXT: cmlt v0.8b, v0.8b, #0 106; CHECK-NEXT: and v0.8b, v0.8b, v1.8b 107; CHECK-NEXT: umaxv b0, v0.8b 108; CHECK-NEXT: fmov w8, s0 109; CHECK-NEXT: sub w0, w9, w8 110; CHECK-NEXT: ret 111 %res = call i7 @llvm.experimental.cttz.elts.i7.v8i1(<8 x i1> %a, i1 0) 112 ret i7 %res 113} 114 115; ZERO IS POISON 116 117define i8 @ctz_v8i1_poison(<8 x i1> %a) { 118; CHECK-LABEL: .LCPI4_0: 119; CHECK-NEXT: .byte 8 120; CHECK-NEXT: .byte 7 121; CHECK-NEXT: .byte 6 122; CHECK-NEXT: .byte 5 123; CHECK-NEXT: .byte 4 124; CHECK-NEXT: .byte 3 125; CHECK-NEXT: .byte 2 126; CHECK-NEXT: .byte 1 127; CHECK-LABEL: ctz_v8i1_poison: 128; CHECK: // %bb.0: 129; CHECK-NEXT: shl v0.8b, v0.8b, #7 130; CHECK-NEXT: adrp x8, .LCPI4_0 131; CHECK-NEXT: mov w9, #8 // =0x8 132; CHECK-NEXT: ldr d1, [x8, :lo12:.LCPI4_0] 133; CHECK-NEXT: cmlt v0.8b, v0.8b, #0 134; CHECK-NEXT: and v0.8b, v0.8b, v1.8b 135; CHECK-NEXT: umaxv b0, v0.8b 136; CHECK-NEXT: fmov w8, s0 137; CHECK-NEXT: sub w0, w9, w8 138; CHECK-NEXT: ret 139 %res = call i8 @llvm.experimental.cttz.elts.i8.v8i1(<8 x i1> %a, i1 1) 140 ret i8 %res 141} 142 143declare i8 @llvm.experimental.cttz.elts.i8.v8i1(<8 x i1>, i1) 144declare i7 @llvm.experimental.cttz.elts.i7.v8i1(<8 x i1>, i1) 145declare i32 @llvm.experimental.cttz.elts.i32.v16i1(<16 x i1>, i1) 146declare i16 @llvm.experimental.cttz.elts.i16.v4i32(<4 x i32>, i1) 147