1; RUN: llc -mtriple=x86_64-unknown-unknown < %s | FileCheck %s 2 3define i8 @ctz_v8i16(<8 x i16> %a) { 4; CHECK-LABEL: .LCPI0_0: 5; CHECK-NEXT: .byte 8 6; CHECK-NEXT: .byte 7 7; CHECK-NEXT: .byte 6 8; CHECK-NEXT: .byte 5 9; CHECK-NEXT: .byte 4 10; CHECK-NEXT: .byte 3 11; CHECK-NEXT: .byte 2 12; CHECK-NEXT: .byte 1 13; CHECK-LABEL: ctz_v8i16: 14; CHECK: # %bb.0: 15; CHECK-NEXT: pxor %xmm1, %xmm1 16; CHECK-NEXT: pcmpeqw %xmm0, %xmm1 17; CHECK-NEXT: packsswb %xmm1, %xmm1 18; CHECK-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 19; CHECK-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) 20; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx 21; CHECK-NEXT: movl -{{[0-9]+}}(%rsp), %eax 22; CHECK-NEXT: movl -{{[0-9]+}}(%rsp), %edx 23; CHECK-NEXT: cmpb %cl, %al 24; CHECK-NEXT: cmoval %eax, %ecx 25; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 26; CHECK-NEXT: cmpb %al, %cl 27; CHECK-NEXT: cmovbel %eax, %ecx 28; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 29; CHECK-NEXT: cmpb %al, %cl 30; CHECK-NEXT: cmovbel %eax, %ecx 31; CHECK-NEXT: cmpb %dl, %cl 32; CHECK-NEXT: cmovbel %edx, %ecx 33; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 34; CHECK-NEXT: cmpb %al, %cl 35; CHECK-NEXT: cmovbel %eax, %ecx 36; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 37; CHECK-NEXT: cmpb %al, %cl 38; CHECK-NEXT: cmovbel %eax, %ecx 39; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 40; CHECK-NEXT: cmpb %al, %cl 41; CHECK-NEXT: cmovbel %eax, %ecx 42; CHECK-NEXT: movb $8, %al 43; CHECK-NEXT: subb %cl, %al 44; CHECK-NEXT: retq 45 %res = call i8 @llvm.experimental.cttz.elts.i8.v8i16(<8 x i16> %a, i1 0) 46 ret i8 %res 47} 48 49define i16 @ctz_v4i32(<4 x i32> %a) { 50; CHECK-LABEL: .LCPI1_0: 51; CHECK-NEXT: .byte 4 52; CHECK-NEXT: .byte 3 53; CHECK-NEXT: .byte 2 54; CHECK-NEXT: .byte 1 55; CHECK-LABEL: ctz_v4i32: 56; CHECK: # %bb.0: 57; CHECK-NEXT: pxor %xmm1, %xmm1 58; CHECK-NEXT: pcmpeqd %xmm0, %xmm1 59; CHECK-NEXT: packssdw %xmm1, %xmm1 60; CHECK-NEXT: pcmpeqd %xmm0, %xmm0 61; CHECK-NEXT: pxor %xmm1, %xmm0 62; CHECK-NEXT: packsswb %xmm0, %xmm0 63; CHECK-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 64; CHECK-NEXT: movd %xmm0, %eax 65; CHECK-NEXT: movl %eax, %ecx 66; CHECK-NEXT: shrl $8, %ecx 67; CHECK-NEXT: cmpb %cl, %al 68; CHECK-NEXT: cmoval %eax, %ecx 69; CHECK-NEXT: movl %eax, %edx 70; CHECK-NEXT: shrl $16, %edx 71; CHECK-NEXT: cmpb %dl, %cl 72; CHECK-NEXT: cmoval %ecx, %edx 73; CHECK-NEXT: shrl $24, %eax 74; CHECK-NEXT: cmpb %al, %dl 75; CHECK-NEXT: cmoval %edx, %eax 76; CHECK-NEXT: movb $4, %cl 77; CHECK-NEXT: subb %al, %cl 78; CHECK-NEXT: movzbl %cl, %eax 79; CHECK-NEXT: # kill: def $ax killed $ax killed $eax 80; CHECK-NEXT: retq 81 %res = call i16 @llvm.experimental.cttz.elts.i16.v4i32(<4 x i32> %a, i1 0) 82 ret i16 %res 83} 84 85; ZERO IS POISON 86 87define i8 @ctz_v8i16_poison(<8 x i16> %a) { 88; CHECK-LABEL: .LCPI2_0: 89; CHECK-NEXT: .byte 8 90; CHECK-NEXT: .byte 7 91; CHECK-NEXT: .byte 6 92; CHECK-NEXT: .byte 5 93; CHECK-NEXT: .byte 4 94; CHECK-NEXT: .byte 3 95; CHECK-NEXT: .byte 2 96; CHECK-NEXT: .byte 1 97; CHECK-LABEL: ctz_v8i16_poison: 98; CHECK: # %bb.0: 99; CHECK-NEXT: pxor %xmm1, %xmm1 100; CHECK-NEXT: pcmpeqw %xmm0, %xmm1 101; CHECK-NEXT: packsswb %xmm1, %xmm1 102; CHECK-NEXT: pandn {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 103; CHECK-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) 104; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %ecx 105; CHECK-NEXT: movl -{{[0-9]+}}(%rsp), %eax 106; CHECK-NEXT: movl -{{[0-9]+}}(%rsp), %edx 107; CHECK-NEXT: cmpb %cl, %al 108; CHECK-NEXT: cmoval %eax, %ecx 109; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 110; CHECK-NEXT: cmpb %al, %cl 111; CHECK-NEXT: cmovbel %eax, %ecx 112; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 113; CHECK-NEXT: cmpb %al, %cl 114; CHECK-NEXT: cmovbel %eax, %ecx 115; CHECK-NEXT: cmpb %dl, %cl 116; CHECK-NEXT: cmovbel %edx, %ecx 117; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 118; CHECK-NEXT: cmpb %al, %cl 119; CHECK-NEXT: cmovbel %eax, %ecx 120; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 121; CHECK-NEXT: cmpb %al, %cl 122; CHECK-NEXT: cmovbel %eax, %ecx 123; CHECK-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 124; CHECK-NEXT: cmpb %al, %cl 125; CHECK-NEXT: cmovbel %eax, %ecx 126; CHECK-NEXT: movb $8, %al 127; CHECK-NEXT: subb %cl, %al 128; CHECK-NEXT: retq 129 %res = call i8 @llvm.experimental.cttz.elts.i8.v8i16(<8 x i16> %a, i1 1) 130 ret i8 %res 131} 132 133declare i8 @llvm.experimental.cttz.elts.i8.v8i16(<8 x i16>, i1) 134declare i16 @llvm.experimental.cttz.elts.i16.v4i32(<4 x i32>, i1) 135