1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-- -mattr=+sse2 | FileCheck %s --check-prefixes=CHECK,X86 3; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.2 | FileCheck %s --check-prefixes=CHECK,X64 4 5define i32 @freeze_sext(i8 %a0) nounwind { 6; X86-LABEL: freeze_sext: 7; X86: # %bb.0: 8; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax 9; X86-NEXT: retl 10; 11; X64-LABEL: freeze_sext: 12; X64: # %bb.0: 13; X64-NEXT: movsbl %dil, %eax 14; X64-NEXT: retq 15 %x = sext i8 %a0 to i16 16 %y = freeze i16 %x 17 %z = sext i16 %y to i32 18 ret i32 %z 19} 20 21define <4 x i32> @freeze_sext_vec(<4 x i8> %a0) nounwind { 22; X86-LABEL: freeze_sext_vec: 23; X86: # %bb.0: 24; X86-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 25; X86-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 26; X86-NEXT: psrad $24, %xmm0 27; X86-NEXT: retl 28; 29; X64-LABEL: freeze_sext_vec: 30; X64: # %bb.0: 31; X64-NEXT: pmovsxbd %xmm0, %xmm0 32; X64-NEXT: retq 33 %x = sext <4 x i8> %a0 to <4 x i16> 34 %y = freeze <4 x i16> %x 35 %z = sext <4 x i16> %y to <4 x i32> 36 ret <4 x i32> %z 37} 38 39define i32 @freeze_zext(i8 %a0) nounwind { 40; X86-LABEL: freeze_zext: 41; X86: # %bb.0: 42; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax 43; X86-NEXT: retl 44; 45; X64-LABEL: freeze_zext: 46; X64: # %bb.0: 47; X64-NEXT: movzbl %dil, %eax 48; X64-NEXT: retq 49 %x = zext i8 %a0 to i16 50 %y = freeze i16 %x 51 %z = zext i16 %y to i32 52 ret i32 %z 53} 54 55define <2 x i64> @freeze_zext_vec(<2 x i16> %a0) nounwind { 56; X86-LABEL: freeze_zext_vec: 57; X86: # %bb.0: 58; X86-NEXT: pxor %xmm1, %xmm1 59; X86-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 60; X86-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 61; X86-NEXT: retl 62; 63; X64-LABEL: freeze_zext_vec: 64; X64: # %bb.0: 65; X64-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 66; X64-NEXT: retq 67 %x = zext <2 x i16> %a0 to <2 x i32> 68 %y = freeze <2 x i32> %x 69 %z = zext <2 x i32> %y to <2 x i64> 70 ret <2 x i64> %z 71} 72 73define i32 @freeze_bswap(i32 %a0) nounwind { 74; X86-LABEL: freeze_bswap: 75; X86: # %bb.0: 76; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 77; X86-NEXT: retl 78; 79; X64-LABEL: freeze_bswap: 80; X64: # %bb.0: 81; X64-NEXT: movl %edi, %eax 82; X64-NEXT: retq 83 %x = call i32 @llvm.bswap.i32(i32 %a0) 84 %y = freeze i32 %x 85 %z = call i32 @llvm.bswap.i32(i32 %y) 86 ret i32 %z 87} 88declare i32 @llvm.bswap.i32(i32) 89 90define <4 x i32> @freeze_bswap_vec(<4 x i32> %a0) nounwind { 91; CHECK-LABEL: freeze_bswap_vec: 92; CHECK: # %bb.0: 93; CHECK-NEXT: ret{{[l|q]}} 94 %x = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %a0) 95 %y = freeze <4 x i32> %x 96 %z = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> %y) 97 ret <4 x i32> %z 98} 99declare <4 x i32> @llvm.bswap.v4i32(<4 x i32>) 100 101define i32 @freeze_bitreverse(i32 %a0) nounwind { 102; X86-LABEL: freeze_bitreverse: 103; X86: # %bb.0: 104; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 105; X86-NEXT: retl 106; 107; X64-LABEL: freeze_bitreverse: 108; X64: # %bb.0: 109; X64-NEXT: movl %edi, %eax 110; X64-NEXT: retq 111 %x = call i32 @llvm.bitreverse.i32(i32 %a0) 112 %y = freeze i32 %x 113 %z = call i32 @llvm.bitreverse.i32(i32 %y) 114 ret i32 %z 115} 116declare i32 @llvm.bitreverse.i32(i32) 117 118define <4 x i32> @freeze_bitreverse_vec(<4 x i32> %a0) nounwind { 119; CHECK-LABEL: freeze_bitreverse_vec: 120; CHECK: # %bb.0: 121; CHECK-NEXT: ret{{[l|q]}} 122 %x = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %a0) 123 %y = freeze <4 x i32> %x 124 %z = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %y) 125 ret <4 x i32> %z 126} 127declare <4 x i32> @llvm.bitreverse.v4i32(<4 x i32>) 128 129; split parity pattern 130define i8 @freeze_ctpop(i8 %a0) nounwind { 131; X86-LABEL: freeze_ctpop: 132; X86: # %bb.0: 133; X86-NEXT: cmpb $0, {{[0-9]+}}(%esp) 134; X86-NEXT: setnp %al 135; X86-NEXT: retl 136; 137; X64-LABEL: freeze_ctpop: 138; X64: # %bb.0: 139; X64-NEXT: testb %dil, %dil 140; X64-NEXT: setnp %al 141; X64-NEXT: retq 142 %x = call i8 @llvm.ctpop.i8(i8 %a0) 143 %y = freeze i8 %x 144 %z = and i8 %y, 1 145 ret i8 %z 146} 147declare i8 @llvm.ctpop.i8(i8) 148 149define <16 x i8> @freeze_ctpop_vec(<16 x i8> %a0) nounwind { 150; X86-LABEL: freeze_ctpop_vec: 151; X86: # %bb.0: 152; X86-NEXT: movdqa %xmm0, %xmm1 153; X86-NEXT: psrlw $1, %xmm1 154; X86-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 155; X86-NEXT: psubb %xmm1, %xmm0 156; X86-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 157; X86-NEXT: movdqa %xmm0, %xmm2 158; X86-NEXT: pand %xmm1, %xmm2 159; X86-NEXT: psrlw $2, %xmm0 160; X86-NEXT: pand %xmm1, %xmm0 161; X86-NEXT: paddb %xmm2, %xmm0 162; X86-NEXT: movdqa %xmm0, %xmm1 163; X86-NEXT: psrlw $4, %xmm1 164; X86-NEXT: paddb %xmm1, %xmm0 165; X86-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 166; X86-NEXT: retl 167; 168; X64-LABEL: freeze_ctpop_vec: 169; X64: # %bb.0: 170; X64-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 171; X64-NEXT: movdqa %xmm0, %xmm3 172; X64-NEXT: pand %xmm2, %xmm3 173; X64-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 174; X64-NEXT: movdqa %xmm1, %xmm4 175; X64-NEXT: pshufb %xmm3, %xmm4 176; X64-NEXT: psrlw $4, %xmm0 177; X64-NEXT: pand %xmm2, %xmm0 178; X64-NEXT: pshufb %xmm0, %xmm1 179; X64-NEXT: paddb %xmm4, %xmm1 180; X64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 181; X64-NEXT: movdqa %xmm1, %xmm0 182; X64-NEXT: retq 183 %x = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a0) 184 %y = freeze <16 x i8> %x 185 %z = and <16 x i8> %y, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 186 ret <16 x i8> %z 187} 188declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8>) 189 190; parity knownbits pattern 191define i8 @freeze_parity(i8 %a0) nounwind { 192; X86-LABEL: freeze_parity: 193; X86: # %bb.0: 194; X86-NEXT: cmpb $0, {{[0-9]+}}(%esp) 195; X86-NEXT: setnp %al 196; X86-NEXT: retl 197; 198; X64-LABEL: freeze_parity: 199; X64: # %bb.0: 200; X64-NEXT: testb %dil, %dil 201; X64-NEXT: setnp %al 202; X64-NEXT: retq 203 %x = call i8 @llvm.ctpop.i8(i8 %a0) 204 %y = and i8 %x, 1 205 %z = freeze i8 %y 206 %w = and i8 %z, 1 207 ret i8 %w 208} 209 210define <16 x i8> @freeze_parity_vec(<16 x i8> %a0) nounwind { 211; X86-LABEL: freeze_parity_vec: 212; X86: # %bb.0: 213; X86-NEXT: movdqa %xmm0, %xmm1 214; X86-NEXT: psrlw $1, %xmm1 215; X86-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1 216; X86-NEXT: psubb %xmm1, %xmm0 217; X86-NEXT: movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51] 218; X86-NEXT: movdqa %xmm0, %xmm2 219; X86-NEXT: pand %xmm1, %xmm2 220; X86-NEXT: psrlw $2, %xmm0 221; X86-NEXT: pand %xmm1, %xmm0 222; X86-NEXT: paddb %xmm2, %xmm0 223; X86-NEXT: movdqa %xmm0, %xmm1 224; X86-NEXT: psrlw $4, %xmm1 225; X86-NEXT: paddb %xmm1, %xmm0 226; X86-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 227; X86-NEXT: retl 228; 229; X64-LABEL: freeze_parity_vec: 230; X64: # %bb.0: 231; X64-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 232; X64-NEXT: movdqa %xmm0, %xmm3 233; X64-NEXT: pand %xmm2, %xmm3 234; X64-NEXT: movdqa {{.*#+}} xmm1 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4] 235; X64-NEXT: movdqa %xmm1, %xmm4 236; X64-NEXT: pshufb %xmm3, %xmm4 237; X64-NEXT: psrlw $4, %xmm0 238; X64-NEXT: pand %xmm2, %xmm0 239; X64-NEXT: pshufb %xmm0, %xmm1 240; X64-NEXT: paddb %xmm4, %xmm1 241; X64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 242; X64-NEXT: movdqa %xmm1, %xmm0 243; X64-NEXT: retq 244 %x = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a0) 245 %y = and <16 x i8> %x, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 246 %z = freeze <16 x i8> %y 247 %w = and <16 x i8> %z, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1> 248 ret <16 x i8> %z 249} 250