1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=X86 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=X64 4 5declare i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %lhs, i32, <16 x i8>, i32, i8) 6declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %lhs, i32, <16 x i8>, i32, i8) 7declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %lhs, i32, <16 x i8>, i32, i8) 8declare i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %lhs, <16 x i8>, i8) 9declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %lhs, <16 x i8>, i8) 10declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %lhs, <16 x i8>, i8) 11 12define i1 @pcmpestri_reg_eq_i8(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len) nounwind { 13; X86-LABEL: pcmpestri_reg_eq_i8: 14; X86: # %bb.0: # %entry 15; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 16; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 17; X86-NEXT: pcmpestri $24, %xmm1, %xmm0 18; X86-NEXT: setae %al 19; X86-NEXT: retl 20; 21; X64-LABEL: pcmpestri_reg_eq_i8: 22; X64: # %bb.0: # %entry 23; X64-NEXT: movl %esi, %edx 24; X64-NEXT: movl %edi, %eax 25; X64-NEXT: pcmpestri $24, %xmm1, %xmm0 26; X64-NEXT: setae %al 27; X64-NEXT: retq 28entry: 29 %c = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24) 30 %result = icmp eq i32 %c, 0 31 ret i1 %result 32} 33 34define i32 @pcmpestri_reg_idx_i8(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len) nounwind { 35; X86-LABEL: pcmpestri_reg_idx_i8: 36; X86: # %bb.0: # %entry 37; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 38; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 39; X86-NEXT: pcmpestri $24, %xmm1, %xmm0 40; X86-NEXT: movl %ecx, %eax 41; X86-NEXT: retl 42; 43; X64-LABEL: pcmpestri_reg_idx_i8: 44; X64: # %bb.0: # %entry 45; X64-NEXT: movl %esi, %edx 46; X64-NEXT: movl %edi, %eax 47; X64-NEXT: pcmpestri $24, %xmm1, %xmm0 48; X64-NEXT: movl %ecx, %eax 49; X64-NEXT: retq 50entry: 51 %idx = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24) 52 ret i32 %idx 53} 54 55define i32 @pcmpestri_reg_diff_i8(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len) nounwind { 56; X86-LABEL: pcmpestri_reg_diff_i8: 57; X86: # %bb.0: # %entry 58; X86-NEXT: pushl %ebp 59; X86-NEXT: movl %esp, %ebp 60; X86-NEXT: andl $-16, %esp 61; X86-NEXT: subl $48, %esp 62; X86-NEXT: movl 8(%ebp), %eax 63; X86-NEXT: movl 12(%ebp), %edx 64; X86-NEXT: pcmpestri $24, %xmm1, %xmm0 65; X86-NEXT: cmpl $16, %ecx 66; X86-NEXT: jne .LBB2_2 67; X86-NEXT: # %bb.1: 68; X86-NEXT: xorl %eax, %eax 69; X86-NEXT: jmp .LBB2_3 70; X86-NEXT: .LBB2_2: # %compare 71; X86-NEXT: movdqa %xmm0, (%esp) 72; X86-NEXT: andl $15, %ecx 73; X86-NEXT: movzbl (%esp,%ecx), %eax 74; X86-NEXT: movdqa %xmm1, {{[0-9]+}}(%esp) 75; X86-NEXT: subb 16(%esp,%ecx), %al 76; X86-NEXT: .LBB2_3: # %exit 77; X86-NEXT: movzbl %al, %eax 78; X86-NEXT: movl %ebp, %esp 79; X86-NEXT: popl %ebp 80; X86-NEXT: retl 81; 82; X64-LABEL: pcmpestri_reg_diff_i8: 83; X64: # %bb.0: # %entry 84; X64-NEXT: movl %esi, %edx 85; X64-NEXT: movl %edi, %eax 86; X64-NEXT: pcmpestri $24, %xmm1, %xmm0 87; X64-NEXT: # kill: def $ecx killed $ecx def $rcx 88; X64-NEXT: cmpl $16, %ecx 89; X64-NEXT: jne .LBB2_2 90; X64-NEXT: # %bb.1: 91; X64-NEXT: xorl %eax, %eax 92; X64-NEXT: movzbl %al, %eax 93; X64-NEXT: retq 94; X64-NEXT: .LBB2_2: # %compare 95; X64-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) 96; X64-NEXT: andl $15, %ecx 97; X64-NEXT: movzbl -24(%rsp,%rcx), %eax 98; X64-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) 99; X64-NEXT: subb -40(%rsp,%rcx), %al 100; X64-NEXT: movzbl %al, %eax 101; X64-NEXT: retq 102entry: 103 %idx = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24) 104 %eq = icmp eq i32 %idx, 16 105 br i1 %eq, label %exit, label %compare 106 107compare: 108 %lhs_c = extractelement <16 x i8> %lhs, i32 %idx 109 %rhs_c = extractelement <16 x i8> %rhs, i32 %idx 110 %sub = sub i8 %lhs_c, %rhs_c 111 br label %exit 112 113exit: 114 %result = phi i8 [ 0, %entry ], [ %sub, %compare ] 115 %result_ext = zext i8 %result to i32 116 ret i32 %result_ext 117} 118 119define i1 @pcmpestri_mem_eq_i8(ptr %lhs_ptr, i32 %lhs_len, ptr %rhs_ptr, i32 %rhs_len) nounwind { 120; X86-LABEL: pcmpestri_mem_eq_i8: 121; X86: # %bb.0: # %entry 122; X86-NEXT: pushl %esi 123; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 124; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 125; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 126; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 127; X86-NEXT: movdqu (%esi), %xmm0 128; X86-NEXT: pcmpestri $24, (%ecx), %xmm0 129; X86-NEXT: setae %al 130; X86-NEXT: popl %esi 131; X86-NEXT: retl 132; 133; X64-LABEL: pcmpestri_mem_eq_i8: 134; X64: # %bb.0: # %entry 135; X64-NEXT: movq %rdx, %r8 136; X64-NEXT: movl %esi, %eax 137; X64-NEXT: movdqu (%rdi), %xmm0 138; X64-NEXT: movl %ecx, %edx 139; X64-NEXT: pcmpestri $24, (%r8), %xmm0 140; X64-NEXT: setae %al 141; X64-NEXT: retq 142entry: 143 %lhs = load <16 x i8>, ptr %lhs_ptr, align 1 144 %rhs = load <16 x i8>, ptr %rhs_ptr, align 1 145 %c = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24) 146 %result = icmp eq i32 %c, 0 147 ret i1 %result 148} 149 150define i32 @pcmpestri_mem_idx_i8(ptr %lhs_ptr, i32 %lhs_len, ptr %rhs_ptr, i32 %rhs_len) nounwind { 151; X86-LABEL: pcmpestri_mem_idx_i8: 152; X86: # %bb.0: # %entry 153; X86-NEXT: pushl %esi 154; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 155; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 156; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 157; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 158; X86-NEXT: movdqu (%esi), %xmm0 159; X86-NEXT: pcmpestri $24, (%ecx), %xmm0 160; X86-NEXT: movl %ecx, %eax 161; X86-NEXT: popl %esi 162; X86-NEXT: retl 163; 164; X64-LABEL: pcmpestri_mem_idx_i8: 165; X64: # %bb.0: # %entry 166; X64-NEXT: movq %rdx, %r8 167; X64-NEXT: movl %esi, %eax 168; X64-NEXT: movdqu (%rdi), %xmm0 169; X64-NEXT: movl %ecx, %edx 170; X64-NEXT: pcmpestri $24, (%r8), %xmm0 171; X64-NEXT: movl %ecx, %eax 172; X64-NEXT: retq 173entry: 174 %lhs = load <16 x i8>, ptr %lhs_ptr, align 1 175 %rhs = load <16 x i8>, ptr %rhs_ptr, align 1 176 %idx = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24) 177 ret i32 %idx 178} 179 180define i32 @pcmpestri_mem_diff_i8(ptr %lhs_ptr, i32 %lhs_len, ptr %rhs_ptr, i32 %rhs_len) nounwind { 181; X86-LABEL: pcmpestri_mem_diff_i8: 182; X86: # %bb.0: # %entry 183; X86-NEXT: pushl %ebp 184; X86-NEXT: movl %esp, %ebp 185; X86-NEXT: pushl %esi 186; X86-NEXT: andl $-16, %esp 187; X86-NEXT: subl $48, %esp 188; X86-NEXT: movl 12(%ebp), %eax 189; X86-NEXT: movl 20(%ebp), %edx 190; X86-NEXT: movl 16(%ebp), %ecx 191; X86-NEXT: movl 8(%ebp), %esi 192; X86-NEXT: movdqu (%esi), %xmm1 193; X86-NEXT: movdqu (%ecx), %xmm0 194; X86-NEXT: pcmpestri $24, %xmm0, %xmm1 195; X86-NEXT: cmpl $16, %ecx 196; X86-NEXT: jne .LBB5_2 197; X86-NEXT: # %bb.1: 198; X86-NEXT: xorl %eax, %eax 199; X86-NEXT: jmp .LBB5_3 200; X86-NEXT: .LBB5_2: # %compare 201; X86-NEXT: movdqa %xmm1, (%esp) 202; X86-NEXT: andl $15, %ecx 203; X86-NEXT: movzbl (%esp,%ecx), %eax 204; X86-NEXT: movdqa %xmm0, {{[0-9]+}}(%esp) 205; X86-NEXT: subb 16(%esp,%ecx), %al 206; X86-NEXT: .LBB5_3: # %exit 207; X86-NEXT: movzbl %al, %eax 208; X86-NEXT: leal -4(%ebp), %esp 209; X86-NEXT: popl %esi 210; X86-NEXT: popl %ebp 211; X86-NEXT: retl 212; 213; X64-LABEL: pcmpestri_mem_diff_i8: 214; X64: # %bb.0: # %entry 215; X64-NEXT: movl %esi, %eax 216; X64-NEXT: movdqu (%rdi), %xmm1 217; X64-NEXT: movdqu (%rdx), %xmm0 218; X64-NEXT: movl %ecx, %edx 219; X64-NEXT: pcmpestri $24, %xmm0, %xmm1 220; X64-NEXT: # kill: def $ecx killed $ecx def $rcx 221; X64-NEXT: cmpl $16, %ecx 222; X64-NEXT: jne .LBB5_2 223; X64-NEXT: # %bb.1: 224; X64-NEXT: xorl %eax, %eax 225; X64-NEXT: movzbl %al, %eax 226; X64-NEXT: retq 227; X64-NEXT: .LBB5_2: # %compare 228; X64-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) 229; X64-NEXT: andl $15, %ecx 230; X64-NEXT: movzbl -24(%rsp,%rcx), %eax 231; X64-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) 232; X64-NEXT: subb -40(%rsp,%rcx), %al 233; X64-NEXT: movzbl %al, %eax 234; X64-NEXT: retq 235entry: 236 %lhs = load <16 x i8>, ptr %lhs_ptr, align 1 237 %rhs = load <16 x i8>, ptr %rhs_ptr, align 1 238 %idx = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24) 239 %eq = icmp eq i32 %idx, 16 240 br i1 %eq, label %exit, label %compare 241 242compare: 243 %lhs_c = extractelement <16 x i8> %lhs, i32 %idx 244 %rhs_c = extractelement <16 x i8> %rhs, i32 %idx 245 %sub = sub i8 %lhs_c, %rhs_c 246 br label %exit 247 248exit: 249 %result = phi i8 [ 0, %entry ], [ %sub, %compare ] 250 %result_ext = zext i8 %result to i32 251 ret i32 %result_ext 252} 253 254define i1 @pcmpestri_reg_eq_i16(<8 x i16> %lhs, i32 %lhs_len, <8 x i16> %rhs, i32 %rhs_len) nounwind { 255; X86-LABEL: pcmpestri_reg_eq_i16: 256; X86: # %bb.0: # %entry 257; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 258; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 259; X86-NEXT: pcmpestri $24, %xmm1, %xmm0 260; X86-NEXT: setae %al 261; X86-NEXT: retl 262; 263; X64-LABEL: pcmpestri_reg_eq_i16: 264; X64: # %bb.0: # %entry 265; X64-NEXT: movl %esi, %edx 266; X64-NEXT: movl %edi, %eax 267; X64-NEXT: pcmpestri $24, %xmm1, %xmm0 268; X64-NEXT: setae %al 269; X64-NEXT: retq 270entry: 271 %lhs_cast = bitcast <8 x i16> %lhs to <16 x i8> 272 %rhs_cast = bitcast <8 x i16> %rhs to <16 x i8> 273 %c = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %lhs_cast, i32 %lhs_len, <16 x i8> %rhs_cast, i32 %rhs_len, i8 24) 274 %result = icmp eq i32 %c, 0 275 ret i1 %result 276} 277 278define i32 @pcmpestri_reg_idx_i16(<8 x i16> %lhs, i32 %lhs_len, <8 x i16> %rhs, i32 %rhs_len) nounwind { 279; X86-LABEL: pcmpestri_reg_idx_i16: 280; X86: # %bb.0: # %entry 281; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 282; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 283; X86-NEXT: pcmpestri $24, %xmm1, %xmm0 284; X86-NEXT: movl %ecx, %eax 285; X86-NEXT: retl 286; 287; X64-LABEL: pcmpestri_reg_idx_i16: 288; X64: # %bb.0: # %entry 289; X64-NEXT: movl %esi, %edx 290; X64-NEXT: movl %edi, %eax 291; X64-NEXT: pcmpestri $24, %xmm1, %xmm0 292; X64-NEXT: movl %ecx, %eax 293; X64-NEXT: retq 294entry: 295 %lhs_cast = bitcast <8 x i16> %lhs to <16 x i8> 296 %rhs_cast = bitcast <8 x i16> %rhs to <16 x i8> 297 %idx = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %lhs_cast, i32 %lhs_len, <16 x i8> %rhs_cast, i32 %rhs_len, i8 24) 298 ret i32 %idx 299} 300 301define i32 @pcmpestri_reg_diff_i16(<8 x i16> %lhs, i32 %lhs_len, <8 x i16> %rhs, i32 %rhs_len) nounwind { 302; X86-LABEL: pcmpestri_reg_diff_i16: 303; X86: # %bb.0: # %entry 304; X86-NEXT: pushl %ebp 305; X86-NEXT: movl %esp, %ebp 306; X86-NEXT: andl $-16, %esp 307; X86-NEXT: subl $48, %esp 308; X86-NEXT: movl 8(%ebp), %eax 309; X86-NEXT: movl 12(%ebp), %edx 310; X86-NEXT: pcmpestri $24, %xmm1, %xmm0 311; X86-NEXT: cmpl $16, %ecx 312; X86-NEXT: jne .LBB8_2 313; X86-NEXT: # %bb.1: 314; X86-NEXT: xorl %eax, %eax 315; X86-NEXT: jmp .LBB8_3 316; X86-NEXT: .LBB8_2: # %compare 317; X86-NEXT: movdqa %xmm0, (%esp) 318; X86-NEXT: addl %ecx, %ecx 319; X86-NEXT: andl $14, %ecx 320; X86-NEXT: movzwl (%esp,%ecx), %eax 321; X86-NEXT: movdqa %xmm1, {{[0-9]+}}(%esp) 322; X86-NEXT: subw 16(%esp,%ecx), %ax 323; X86-NEXT: .LBB8_3: # %exit 324; X86-NEXT: movzwl %ax, %eax 325; X86-NEXT: movl %ebp, %esp 326; X86-NEXT: popl %ebp 327; X86-NEXT: retl 328; 329; X64-LABEL: pcmpestri_reg_diff_i16: 330; X64: # %bb.0: # %entry 331; X64-NEXT: movl %esi, %edx 332; X64-NEXT: movl %edi, %eax 333; X64-NEXT: pcmpestri $24, %xmm1, %xmm0 334; X64-NEXT: # kill: def $ecx killed $ecx def $rcx 335; X64-NEXT: cmpl $16, %ecx 336; X64-NEXT: jne .LBB8_2 337; X64-NEXT: # %bb.1: 338; X64-NEXT: xorl %eax, %eax 339; X64-NEXT: movzwl %ax, %eax 340; X64-NEXT: retq 341; X64-NEXT: .LBB8_2: # %compare 342; X64-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) 343; X64-NEXT: andl $7, %ecx 344; X64-NEXT: addl %ecx, %ecx 345; X64-NEXT: movzwl -24(%rsp,%rcx), %eax 346; X64-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) 347; X64-NEXT: subw -40(%rsp,%rcx), %ax 348; X64-NEXT: movzwl %ax, %eax 349; X64-NEXT: retq 350entry: 351 %lhs_cast = bitcast <8 x i16> %lhs to <16 x i8> 352 %rhs_cast = bitcast <8 x i16> %rhs to <16 x i8> 353 %idx = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %lhs_cast, i32 %lhs_len, <16 x i8> %rhs_cast, i32 %rhs_len, i8 24) 354 %eq = icmp eq i32 %idx, 16 355 br i1 %eq, label %exit, label %compare 356 357compare: 358 %lhs_c = extractelement <8 x i16> %lhs, i32 %idx 359 %rhs_c = extractelement <8 x i16> %rhs, i32 %idx 360 %sub = sub i16 %lhs_c, %rhs_c 361 br label %exit 362 363exit: 364 %result = phi i16 [ 0, %entry ], [ %sub, %compare ] 365 %result_ext = zext i16 %result to i32 366 ret i32 %result_ext 367} 368 369define i1 @pcmpestri_mem_eq_i16(ptr %lhs_ptr, i32 %lhs_len, ptr %rhs_ptr, i32 %rhs_len) nounwind { 370; X86-LABEL: pcmpestri_mem_eq_i16: 371; X86: # %bb.0: # %entry 372; X86-NEXT: pushl %esi 373; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 374; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 375; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 376; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 377; X86-NEXT: movdqu (%esi), %xmm0 378; X86-NEXT: pcmpestri $25, (%ecx), %xmm0 379; X86-NEXT: setae %al 380; X86-NEXT: popl %esi 381; X86-NEXT: retl 382; 383; X64-LABEL: pcmpestri_mem_eq_i16: 384; X64: # %bb.0: # %entry 385; X64-NEXT: movq %rdx, %r8 386; X64-NEXT: movl %esi, %eax 387; X64-NEXT: movdqu (%rdi), %xmm0 388; X64-NEXT: movl %ecx, %edx 389; X64-NEXT: pcmpestri $25, (%r8), %xmm0 390; X64-NEXT: setae %al 391; X64-NEXT: retq 392entry: 393 %lhs = load <8 x i16>, ptr %lhs_ptr, align 1 394 %rhs = load <8 x i16>, ptr %rhs_ptr, align 1 395 %lhs_cast = bitcast <8 x i16> %lhs to <16 x i8> 396 %rhs_cast = bitcast <8 x i16> %rhs to <16 x i8> 397 %c = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %lhs_cast, i32 %lhs_len, <16 x i8> %rhs_cast, i32 %rhs_len, i8 25) 398 %result = icmp eq i32 %c, 0 399 ret i1 %result 400} 401 402define i32 @pcmpestri_mem_idx_i16(ptr %lhs_ptr, i32 %lhs_len, ptr %rhs_ptr, i32 %rhs_len) nounwind { 403; X86-LABEL: pcmpestri_mem_idx_i16: 404; X86: # %bb.0: # %entry 405; X86-NEXT: pushl %esi 406; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 407; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 408; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 409; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 410; X86-NEXT: movdqu (%esi), %xmm0 411; X86-NEXT: pcmpestri $25, (%ecx), %xmm0 412; X86-NEXT: movl %ecx, %eax 413; X86-NEXT: popl %esi 414; X86-NEXT: retl 415; 416; X64-LABEL: pcmpestri_mem_idx_i16: 417; X64: # %bb.0: # %entry 418; X64-NEXT: movq %rdx, %r8 419; X64-NEXT: movl %esi, %eax 420; X64-NEXT: movdqu (%rdi), %xmm0 421; X64-NEXT: movl %ecx, %edx 422; X64-NEXT: pcmpestri $25, (%r8), %xmm0 423; X64-NEXT: movl %ecx, %eax 424; X64-NEXT: retq 425entry: 426 %lhs = load <8 x i16>, ptr %lhs_ptr, align 1 427 %rhs = load <8 x i16>, ptr %rhs_ptr, align 1 428 %lhs_cast = bitcast <8 x i16> %lhs to <16 x i8> 429 %rhs_cast = bitcast <8 x i16> %rhs to <16 x i8> 430 %idx = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %lhs_cast, i32 %lhs_len, <16 x i8> %rhs_cast, i32 %rhs_len, i8 25) 431 ret i32 %idx 432} 433 434define i32 @pcmpestri_mem_diff_i16(ptr %lhs_ptr, i32 %lhs_len, ptr %rhs_ptr, i32 %rhs_len) nounwind { 435; X86-LABEL: pcmpestri_mem_diff_i16: 436; X86: # %bb.0: # %entry 437; X86-NEXT: pushl %ebp 438; X86-NEXT: movl %esp, %ebp 439; X86-NEXT: pushl %esi 440; X86-NEXT: andl $-16, %esp 441; X86-NEXT: subl $48, %esp 442; X86-NEXT: movl 12(%ebp), %eax 443; X86-NEXT: movl 20(%ebp), %edx 444; X86-NEXT: movl 16(%ebp), %ecx 445; X86-NEXT: movl 8(%ebp), %esi 446; X86-NEXT: movdqu (%esi), %xmm1 447; X86-NEXT: movdqu (%ecx), %xmm0 448; X86-NEXT: pcmpestri $25, %xmm0, %xmm1 449; X86-NEXT: cmpl $8, %ecx 450; X86-NEXT: jne .LBB11_2 451; X86-NEXT: # %bb.1: 452; X86-NEXT: xorl %eax, %eax 453; X86-NEXT: jmp .LBB11_3 454; X86-NEXT: .LBB11_2: # %compare 455; X86-NEXT: movdqa %xmm1, (%esp) 456; X86-NEXT: addl %ecx, %ecx 457; X86-NEXT: andl $14, %ecx 458; X86-NEXT: movzwl (%esp,%ecx), %eax 459; X86-NEXT: movdqa %xmm0, {{[0-9]+}}(%esp) 460; X86-NEXT: subw 16(%esp,%ecx), %ax 461; X86-NEXT: .LBB11_3: # %exit 462; X86-NEXT: movzwl %ax, %eax 463; X86-NEXT: leal -4(%ebp), %esp 464; X86-NEXT: popl %esi 465; X86-NEXT: popl %ebp 466; X86-NEXT: retl 467; 468; X64-LABEL: pcmpestri_mem_diff_i16: 469; X64: # %bb.0: # %entry 470; X64-NEXT: movl %esi, %eax 471; X64-NEXT: movdqu (%rdi), %xmm1 472; X64-NEXT: movdqu (%rdx), %xmm0 473; X64-NEXT: movl %ecx, %edx 474; X64-NEXT: pcmpestri $25, %xmm0, %xmm1 475; X64-NEXT: # kill: def $ecx killed $ecx def $rcx 476; X64-NEXT: cmpl $8, %ecx 477; X64-NEXT: jne .LBB11_2 478; X64-NEXT: # %bb.1: 479; X64-NEXT: xorl %eax, %eax 480; X64-NEXT: movzwl %ax, %eax 481; X64-NEXT: retq 482; X64-NEXT: .LBB11_2: # %compare 483; X64-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) 484; X64-NEXT: andl $7, %ecx 485; X64-NEXT: addl %ecx, %ecx 486; X64-NEXT: movzwl -24(%rsp,%rcx), %eax 487; X64-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) 488; X64-NEXT: subw -40(%rsp,%rcx), %ax 489; X64-NEXT: movzwl %ax, %eax 490; X64-NEXT: retq 491entry: 492 %lhs = load <8 x i16>, ptr %lhs_ptr, align 1 493 %rhs = load <8 x i16>, ptr %rhs_ptr, align 1 494 %lhs_cast = bitcast <8 x i16> %lhs to <16 x i8> 495 %rhs_cast = bitcast <8 x i16> %rhs to <16 x i8> 496 %idx = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %lhs_cast, i32 %lhs_len, <16 x i8> %rhs_cast, i32 %rhs_len, i8 25) 497 %eq = icmp eq i32 %idx, 8 498 br i1 %eq, label %exit, label %compare 499 500compare: 501 %lhs_c = extractelement <8 x i16> %lhs, i32 %idx 502 %rhs_c = extractelement <8 x i16> %rhs, i32 %idx 503 %sub = sub i16 %lhs_c, %rhs_c 504 br label %exit 505 506exit: 507 %result = phi i16 [ 0, %entry ], [ %sub, %compare ] 508 %result_ext = zext i16 %result to i32 509 ret i32 %result_ext 510} 511 512define i1 @pcmpistri_reg_eq_i8(<16 x i8> %lhs, <16 x i8> %rhs) nounwind { 513; X86-LABEL: pcmpistri_reg_eq_i8: 514; X86: # %bb.0: # %entry 515; X86-NEXT: pcmpistri $24, %xmm1, %xmm0 516; X86-NEXT: setae %al 517; X86-NEXT: retl 518; 519; X64-LABEL: pcmpistri_reg_eq_i8: 520; X64: # %bb.0: # %entry 521; X64-NEXT: pcmpistri $24, %xmm1, %xmm0 522; X64-NEXT: setae %al 523; X64-NEXT: retq 524entry: 525 %c = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 526 %result = icmp eq i32 %c, 0 527 ret i1 %result 528} 529 530define i32 @pcmpistri_reg_idx_i8(<16 x i8> %lhs, <16 x i8> %rhs) nounwind { 531; X86-LABEL: pcmpistri_reg_idx_i8: 532; X86: # %bb.0: # %entry 533; X86-NEXT: pcmpistri $24, %xmm1, %xmm0 534; X86-NEXT: movl %ecx, %eax 535; X86-NEXT: retl 536; 537; X64-LABEL: pcmpistri_reg_idx_i8: 538; X64: # %bb.0: # %entry 539; X64-NEXT: pcmpistri $24, %xmm1, %xmm0 540; X64-NEXT: movl %ecx, %eax 541; X64-NEXT: retq 542entry: 543 %idx = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 544 ret i32 %idx 545} 546 547define i32 @pcmpistri_reg_diff_i8(<16 x i8> %lhs, <16 x i8> %rhs) nounwind { 548; X86-LABEL: pcmpistri_reg_diff_i8: 549; X86: # %bb.0: # %entry 550; X86-NEXT: pcmpistri $24, %xmm1, %xmm0 551; X86-NEXT: cmpl $16, %ecx 552; X86-NEXT: jne .LBB14_2 553; X86-NEXT: # %bb.1: 554; X86-NEXT: xorl %eax, %eax 555; X86-NEXT: movzbl %al, %eax 556; X86-NEXT: retl 557; X86-NEXT: .LBB14_2: # %compare 558; X86-NEXT: pushl %ebp 559; X86-NEXT: movl %esp, %ebp 560; X86-NEXT: andl $-16, %esp 561; X86-NEXT: subl $48, %esp 562; X86-NEXT: movdqa %xmm0, (%esp) 563; X86-NEXT: andl $15, %ecx 564; X86-NEXT: movzbl (%esp,%ecx), %eax 565; X86-NEXT: movdqa %xmm1, {{[0-9]+}}(%esp) 566; X86-NEXT: subb 16(%esp,%ecx), %al 567; X86-NEXT: movl %ebp, %esp 568; X86-NEXT: popl %ebp 569; X86-NEXT: movzbl %al, %eax 570; X86-NEXT: retl 571; 572; X64-LABEL: pcmpistri_reg_diff_i8: 573; X64: # %bb.0: # %entry 574; X64-NEXT: pcmpistri $24, %xmm1, %xmm0 575; X64-NEXT: # kill: def $ecx killed $ecx def $rcx 576; X64-NEXT: cmpl $16, %ecx 577; X64-NEXT: jne .LBB14_2 578; X64-NEXT: # %bb.1: 579; X64-NEXT: xorl %eax, %eax 580; X64-NEXT: movzbl %al, %eax 581; X64-NEXT: retq 582; X64-NEXT: .LBB14_2: # %compare 583; X64-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) 584; X64-NEXT: andl $15, %ecx 585; X64-NEXT: movzbl -24(%rsp,%rcx), %eax 586; X64-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) 587; X64-NEXT: subb -40(%rsp,%rcx), %al 588; X64-NEXT: movzbl %al, %eax 589; X64-NEXT: retq 590entry: 591 %idx = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 592 %eq = icmp eq i32 %idx, 16 593 br i1 %eq, label %exit, label %compare 594 595compare: 596 %lhs_c = extractelement <16 x i8> %lhs, i32 %idx 597 %rhs_c = extractelement <16 x i8> %rhs, i32 %idx 598 %sub = sub i8 %lhs_c, %rhs_c 599 br label %exit 600 601exit: 602 %result = phi i8 [ 0, %entry ], [ %sub, %compare ] 603 %result_ext = zext i8 %result to i32 604 ret i32 %result_ext 605} 606 607define i1 @pcmpistri_mem_eq_i8(ptr %lhs_ptr, ptr %rhs_ptr) nounwind { 608; X86-LABEL: pcmpistri_mem_eq_i8: 609; X86: # %bb.0: # %entry 610; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 611; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 612; X86-NEXT: movdqu (%ecx), %xmm0 613; X86-NEXT: pcmpistri $24, (%eax), %xmm0 614; X86-NEXT: setae %al 615; X86-NEXT: retl 616; 617; X64-LABEL: pcmpistri_mem_eq_i8: 618; X64: # %bb.0: # %entry 619; X64-NEXT: movdqu (%rdi), %xmm0 620; X64-NEXT: pcmpistri $24, (%rsi), %xmm0 621; X64-NEXT: setae %al 622; X64-NEXT: retq 623entry: 624 %lhs = load <16 x i8>, ptr %lhs_ptr, align 1 625 %rhs = load <16 x i8>, ptr %rhs_ptr, align 1 626 %c = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 627 %result = icmp eq i32 %c, 0 628 ret i1 %result 629} 630 631define i32 @pcmpistri_mem_idx_i8(ptr %lhs_ptr, ptr %rhs_ptr) nounwind { 632; X86-LABEL: pcmpistri_mem_idx_i8: 633; X86: # %bb.0: # %entry 634; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 635; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 636; X86-NEXT: movdqu (%ecx), %xmm0 637; X86-NEXT: pcmpistri $24, (%eax), %xmm0 638; X86-NEXT: movl %ecx, %eax 639; X86-NEXT: retl 640; 641; X64-LABEL: pcmpistri_mem_idx_i8: 642; X64: # %bb.0: # %entry 643; X64-NEXT: movdqu (%rdi), %xmm0 644; X64-NEXT: pcmpistri $24, (%rsi), %xmm0 645; X64-NEXT: movl %ecx, %eax 646; X64-NEXT: retq 647entry: 648 %lhs = load <16 x i8>, ptr %lhs_ptr, align 1 649 %rhs = load <16 x i8>, ptr %rhs_ptr, align 1 650 %idx = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 651 ret i32 %idx 652} 653 654define i32 @pcmpistri_mem_diff_i8(ptr %lhs_ptr, ptr %rhs_ptr) nounwind { 655; X86-LABEL: pcmpistri_mem_diff_i8: 656; X86: # %bb.0: # %entry 657; X86-NEXT: pushl %ebp 658; X86-NEXT: movl %esp, %ebp 659; X86-NEXT: andl $-16, %esp 660; X86-NEXT: subl $48, %esp 661; X86-NEXT: movl 12(%ebp), %eax 662; X86-NEXT: movl 8(%ebp), %ecx 663; X86-NEXT: movdqu (%ecx), %xmm1 664; X86-NEXT: movdqu (%eax), %xmm0 665; X86-NEXT: pcmpistri $24, %xmm0, %xmm1 666; X86-NEXT: cmpl $16, %ecx 667; X86-NEXT: jne .LBB17_2 668; X86-NEXT: # %bb.1: 669; X86-NEXT: xorl %eax, %eax 670; X86-NEXT: jmp .LBB17_3 671; X86-NEXT: .LBB17_2: # %compare 672; X86-NEXT: movdqa %xmm1, (%esp) 673; X86-NEXT: andl $15, %ecx 674; X86-NEXT: movzbl (%esp,%ecx), %eax 675; X86-NEXT: movdqa %xmm0, {{[0-9]+}}(%esp) 676; X86-NEXT: subb 16(%esp,%ecx), %al 677; X86-NEXT: .LBB17_3: # %exit 678; X86-NEXT: movzbl %al, %eax 679; X86-NEXT: movl %ebp, %esp 680; X86-NEXT: popl %ebp 681; X86-NEXT: retl 682; 683; X64-LABEL: pcmpistri_mem_diff_i8: 684; X64: # %bb.0: # %entry 685; X64-NEXT: movdqu (%rdi), %xmm1 686; X64-NEXT: movdqu (%rsi), %xmm0 687; X64-NEXT: pcmpistri $24, %xmm0, %xmm1 688; X64-NEXT: # kill: def $ecx killed $ecx def $rcx 689; X64-NEXT: cmpl $16, %ecx 690; X64-NEXT: jne .LBB17_2 691; X64-NEXT: # %bb.1: 692; X64-NEXT: xorl %eax, %eax 693; X64-NEXT: movzbl %al, %eax 694; X64-NEXT: retq 695; X64-NEXT: .LBB17_2: # %compare 696; X64-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) 697; X64-NEXT: andl $15, %ecx 698; X64-NEXT: movzbl -24(%rsp,%rcx), %eax 699; X64-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) 700; X64-NEXT: subb -40(%rsp,%rcx), %al 701; X64-NEXT: movzbl %al, %eax 702; X64-NEXT: retq 703entry: 704 %lhs = load <16 x i8>, ptr %lhs_ptr, align 1 705 %rhs = load <16 x i8>, ptr %rhs_ptr, align 1 706 %idx = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 707 %eq = icmp eq i32 %idx, 16 708 br i1 %eq, label %exit, label %compare 709 710compare: 711 %lhs_c = extractelement <16 x i8> %lhs, i32 %idx 712 %rhs_c = extractelement <16 x i8> %rhs, i32 %idx 713 %sub = sub i8 %lhs_c, %rhs_c 714 br label %exit 715 716exit: 717 %result = phi i8 [ 0, %entry ], [ %sub, %compare ] 718 %result_ext = zext i8 %result to i32 719 ret i32 %result_ext 720} 721 722define i1 @pcmpistri_reg_eq_i16(<8 x i16> %lhs, <8 x i16> %rhs) nounwind { 723; X86-LABEL: pcmpistri_reg_eq_i16: 724; X86: # %bb.0: # %entry 725; X86-NEXT: pcmpistri $24, %xmm1, %xmm0 726; X86-NEXT: setae %al 727; X86-NEXT: retl 728; 729; X64-LABEL: pcmpistri_reg_eq_i16: 730; X64: # %bb.0: # %entry 731; X64-NEXT: pcmpistri $24, %xmm1, %xmm0 732; X64-NEXT: setae %al 733; X64-NEXT: retq 734entry: 735 %lhs_cast = bitcast <8 x i16> %lhs to <16 x i8> 736 %rhs_cast = bitcast <8 x i16> %rhs to <16 x i8> 737 %c = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %lhs_cast, <16 x i8> %rhs_cast, i8 24) 738 %result = icmp eq i32 %c, 0 739 ret i1 %result 740} 741 742define i32 @pcmpistri_reg_idx_i16(<8 x i16> %lhs, <8 x i16> %rhs) nounwind { 743; X86-LABEL: pcmpistri_reg_idx_i16: 744; X86: # %bb.0: # %entry 745; X86-NEXT: pcmpistri $24, %xmm1, %xmm0 746; X86-NEXT: movl %ecx, %eax 747; X86-NEXT: retl 748; 749; X64-LABEL: pcmpistri_reg_idx_i16: 750; X64: # %bb.0: # %entry 751; X64-NEXT: pcmpistri $24, %xmm1, %xmm0 752; X64-NEXT: movl %ecx, %eax 753; X64-NEXT: retq 754entry: 755 %lhs_cast = bitcast <8 x i16> %lhs to <16 x i8> 756 %rhs_cast = bitcast <8 x i16> %rhs to <16 x i8> 757 %idx = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %lhs_cast, <16 x i8> %rhs_cast, i8 24) 758 ret i32 %idx 759} 760 761define i32 @pcmpistri_reg_diff_i16(<8 x i16> %lhs, <8 x i16> %rhs) nounwind { 762; X86-LABEL: pcmpistri_reg_diff_i16: 763; X86: # %bb.0: # %entry 764; X86-NEXT: pcmpistri $24, %xmm1, %xmm0 765; X86-NEXT: cmpl $16, %ecx 766; X86-NEXT: jne .LBB20_2 767; X86-NEXT: # %bb.1: 768; X86-NEXT: xorl %eax, %eax 769; X86-NEXT: movzwl %ax, %eax 770; X86-NEXT: retl 771; X86-NEXT: .LBB20_2: # %compare 772; X86-NEXT: pushl %ebp 773; X86-NEXT: movl %esp, %ebp 774; X86-NEXT: andl $-16, %esp 775; X86-NEXT: subl $48, %esp 776; X86-NEXT: movdqa %xmm0, (%esp) 777; X86-NEXT: addl %ecx, %ecx 778; X86-NEXT: andl $14, %ecx 779; X86-NEXT: movzwl (%esp,%ecx), %eax 780; X86-NEXT: movdqa %xmm1, {{[0-9]+}}(%esp) 781; X86-NEXT: subw 16(%esp,%ecx), %ax 782; X86-NEXT: movl %ebp, %esp 783; X86-NEXT: popl %ebp 784; X86-NEXT: movzwl %ax, %eax 785; X86-NEXT: retl 786; 787; X64-LABEL: pcmpistri_reg_diff_i16: 788; X64: # %bb.0: # %entry 789; X64-NEXT: pcmpistri $24, %xmm1, %xmm0 790; X64-NEXT: # kill: def $ecx killed $ecx def $rcx 791; X64-NEXT: cmpl $16, %ecx 792; X64-NEXT: jne .LBB20_2 793; X64-NEXT: # %bb.1: 794; X64-NEXT: xorl %eax, %eax 795; X64-NEXT: movzwl %ax, %eax 796; X64-NEXT: retq 797; X64-NEXT: .LBB20_2: # %compare 798; X64-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) 799; X64-NEXT: andl $7, %ecx 800; X64-NEXT: addl %ecx, %ecx 801; X64-NEXT: movzwl -24(%rsp,%rcx), %eax 802; X64-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) 803; X64-NEXT: subw -40(%rsp,%rcx), %ax 804; X64-NEXT: movzwl %ax, %eax 805; X64-NEXT: retq 806entry: 807 %lhs_cast = bitcast <8 x i16> %lhs to <16 x i8> 808 %rhs_cast = bitcast <8 x i16> %rhs to <16 x i8> 809 %idx = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %lhs_cast, <16 x i8> %rhs_cast, i8 24) 810 %eq = icmp eq i32 %idx, 16 811 br i1 %eq, label %exit, label %compare 812 813compare: 814 %lhs_c = extractelement <8 x i16> %lhs, i32 %idx 815 %rhs_c = extractelement <8 x i16> %rhs, i32 %idx 816 %sub = sub i16 %lhs_c, %rhs_c 817 br label %exit 818 819exit: 820 %result = phi i16 [ 0, %entry ], [ %sub, %compare ] 821 %result_ext = zext i16 %result to i32 822 ret i32 %result_ext 823} 824 825define i1 @pcmpistri_mem_eq_i16(ptr %lhs_ptr, ptr %rhs_ptr) nounwind { 826; X86-LABEL: pcmpistri_mem_eq_i16: 827; X86: # %bb.0: # %entry 828; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 829; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 830; X86-NEXT: movdqu (%ecx), %xmm0 831; X86-NEXT: pcmpistri $25, (%eax), %xmm0 832; X86-NEXT: setae %al 833; X86-NEXT: retl 834; 835; X64-LABEL: pcmpistri_mem_eq_i16: 836; X64: # %bb.0: # %entry 837; X64-NEXT: movdqu (%rdi), %xmm0 838; X64-NEXT: pcmpistri $25, (%rsi), %xmm0 839; X64-NEXT: setae %al 840; X64-NEXT: retq 841entry: 842 %lhs = load <8 x i16>, ptr %lhs_ptr, align 1 843 %rhs = load <8 x i16>, ptr %rhs_ptr, align 1 844 %lhs_cast = bitcast <8 x i16> %lhs to <16 x i8> 845 %rhs_cast = bitcast <8 x i16> %rhs to <16 x i8> 846 %c = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %lhs_cast, <16 x i8> %rhs_cast, i8 25) 847 %result = icmp eq i32 %c, 0 848 ret i1 %result 849} 850 851define i32 @pcmpistri_mem_idx_i16(ptr %lhs_ptr, ptr %rhs_ptr) nounwind { 852; X86-LABEL: pcmpistri_mem_idx_i16: 853; X86: # %bb.0: # %entry 854; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 855; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 856; X86-NEXT: movdqu (%ecx), %xmm0 857; X86-NEXT: pcmpistri $25, (%eax), %xmm0 858; X86-NEXT: movl %ecx, %eax 859; X86-NEXT: retl 860; 861; X64-LABEL: pcmpistri_mem_idx_i16: 862; X64: # %bb.0: # %entry 863; X64-NEXT: movdqu (%rdi), %xmm0 864; X64-NEXT: pcmpistri $25, (%rsi), %xmm0 865; X64-NEXT: movl %ecx, %eax 866; X64-NEXT: retq 867entry: 868 %lhs = load <8 x i16>, ptr %lhs_ptr, align 1 869 %rhs = load <8 x i16>, ptr %rhs_ptr, align 1 870 %lhs_cast = bitcast <8 x i16> %lhs to <16 x i8> 871 %rhs_cast = bitcast <8 x i16> %rhs to <16 x i8> 872 %idx = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %lhs_cast, <16 x i8> %rhs_cast, i8 25) 873 ret i32 %idx 874} 875 876define i32 @pcmpistri_mem_diff_i16(ptr %lhs_ptr, ptr %rhs_ptr) nounwind { 877; X86-LABEL: pcmpistri_mem_diff_i16: 878; X86: # %bb.0: # %entry 879; X86-NEXT: pushl %ebp 880; X86-NEXT: movl %esp, %ebp 881; X86-NEXT: andl $-16, %esp 882; X86-NEXT: subl $48, %esp 883; X86-NEXT: movl 12(%ebp), %eax 884; X86-NEXT: movl 8(%ebp), %ecx 885; X86-NEXT: movdqu (%ecx), %xmm1 886; X86-NEXT: movdqu (%eax), %xmm0 887; X86-NEXT: pcmpistri $25, %xmm0, %xmm1 888; X86-NEXT: cmpl $8, %ecx 889; X86-NEXT: jne .LBB23_2 890; X86-NEXT: # %bb.1: 891; X86-NEXT: xorl %eax, %eax 892; X86-NEXT: jmp .LBB23_3 893; X86-NEXT: .LBB23_2: # %compare 894; X86-NEXT: movdqa %xmm1, (%esp) 895; X86-NEXT: addl %ecx, %ecx 896; X86-NEXT: andl $14, %ecx 897; X86-NEXT: movzwl (%esp,%ecx), %eax 898; X86-NEXT: movdqa %xmm0, {{[0-9]+}}(%esp) 899; X86-NEXT: subw 16(%esp,%ecx), %ax 900; X86-NEXT: .LBB23_3: # %exit 901; X86-NEXT: movzwl %ax, %eax 902; X86-NEXT: movl %ebp, %esp 903; X86-NEXT: popl %ebp 904; X86-NEXT: retl 905; 906; X64-LABEL: pcmpistri_mem_diff_i16: 907; X64: # %bb.0: # %entry 908; X64-NEXT: movdqu (%rdi), %xmm1 909; X64-NEXT: movdqu (%rsi), %xmm0 910; X64-NEXT: pcmpistri $25, %xmm0, %xmm1 911; X64-NEXT: # kill: def $ecx killed $ecx def $rcx 912; X64-NEXT: cmpl $8, %ecx 913; X64-NEXT: jne .LBB23_2 914; X64-NEXT: # %bb.1: 915; X64-NEXT: xorl %eax, %eax 916; X64-NEXT: movzwl %ax, %eax 917; X64-NEXT: retq 918; X64-NEXT: .LBB23_2: # %compare 919; X64-NEXT: movdqa %xmm1, -{{[0-9]+}}(%rsp) 920; X64-NEXT: andl $7, %ecx 921; X64-NEXT: addl %ecx, %ecx 922; X64-NEXT: movzwl -24(%rsp,%rcx), %eax 923; X64-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) 924; X64-NEXT: subw -40(%rsp,%rcx), %ax 925; X64-NEXT: movzwl %ax, %eax 926; X64-NEXT: retq 927entry: 928 %lhs = load <8 x i16>, ptr %lhs_ptr, align 1 929 %rhs = load <8 x i16>, ptr %rhs_ptr, align 1 930 %lhs_cast = bitcast <8 x i16> %lhs to <16 x i8> 931 %rhs_cast = bitcast <8 x i16> %rhs to <16 x i8> 932 %idx = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %lhs_cast, <16 x i8> %rhs_cast, i8 25) 933 %eq = icmp eq i32 %idx, 8 934 br i1 %eq, label %exit, label %compare 935 936compare: 937 %lhs_c = extractelement <8 x i16> %lhs, i32 %idx 938 %rhs_c = extractelement <8 x i16> %rhs, i32 %idx 939 %sub = sub i16 %lhs_c, %rhs_c 940 br label %exit 941 942exit: 943 %result = phi i16 [ 0, %entry ], [ %sub, %compare ] 944 %result_ext = zext i16 %result to i32 945 ret i32 %result_ext 946} 947 948define void @pcmpestr_index_flag(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, ptr %iptr, ptr %fptr) nounwind { 949; X86-LABEL: pcmpestr_index_flag: 950; X86: # %bb.0: # %entry 951; X86-NEXT: pushl %ebx 952; X86-NEXT: pushl %edi 953; X86-NEXT: pushl %esi 954; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 955; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 956; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 957; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 958; X86-NEXT: xorl %ebx, %ebx 959; X86-NEXT: pcmpestri $24, %xmm1, %xmm0 960; X86-NEXT: setb %bl 961; X86-NEXT: movl %ecx, (%edi) 962; X86-NEXT: movl %ebx, (%esi) 963; X86-NEXT: popl %esi 964; X86-NEXT: popl %edi 965; X86-NEXT: popl %ebx 966; X86-NEXT: retl 967; 968; X64-LABEL: pcmpestr_index_flag: 969; X64: # %bb.0: # %entry 970; X64-NEXT: movq %rcx, %r8 971; X64-NEXT: movq %rdx, %r9 972; X64-NEXT: movl %esi, %edx 973; X64-NEXT: movl %edi, %eax 974; X64-NEXT: xorl %esi, %esi 975; X64-NEXT: pcmpestri $24, %xmm1, %xmm0 976; X64-NEXT: setb %sil 977; X64-NEXT: movl %ecx, (%r9) 978; X64-NEXT: movl %esi, (%r8) 979; X64-NEXT: retq 980entry: 981 %flag = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24) 982 %index = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24) 983 store i32 %index, ptr %iptr 984 store i32 %flag, ptr %fptr 985 ret void 986} 987 988define void @pcmpestr_mask_flag(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, ptr %mptr, ptr %fptr) nounwind { 989; X86-LABEL: pcmpestr_mask_flag: 990; X86: # %bb.0: # %entry 991; X86-NEXT: pushl %ebx 992; X86-NEXT: pushl %esi 993; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 994; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 995; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 996; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 997; X86-NEXT: xorl %ebx, %ebx 998; X86-NEXT: pcmpestrm $24, %xmm1, %xmm0 999; X86-NEXT: setb %bl 1000; X86-NEXT: movdqa %xmm0, (%esi) 1001; X86-NEXT: movl %ebx, (%ecx) 1002; X86-NEXT: popl %esi 1003; X86-NEXT: popl %ebx 1004; X86-NEXT: retl 1005; 1006; X64-LABEL: pcmpestr_mask_flag: 1007; X64: # %bb.0: # %entry 1008; X64-NEXT: movq %rdx, %r8 1009; X64-NEXT: movl %esi, %edx 1010; X64-NEXT: movl %edi, %eax 1011; X64-NEXT: xorl %esi, %esi 1012; X64-NEXT: pcmpestrm $24, %xmm1, %xmm0 1013; X64-NEXT: setb %sil 1014; X64-NEXT: movdqa %xmm0, (%r8) 1015; X64-NEXT: movl %esi, (%rcx) 1016; X64-NEXT: retq 1017entry: 1018 %flag = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24) 1019 %mask = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24) 1020 store <16 x i8> %mask, ptr %mptr 1021 store i32 %flag, ptr %fptr 1022 ret void 1023} 1024 1025define void @pcmpestr_mask_index(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, ptr %mptr, ptr %iptr) nounwind { 1026; X86-LABEL: pcmpestr_mask_index: 1027; X86: # %bb.0: # %entry 1028; X86-NEXT: pushl %edi 1029; X86-NEXT: pushl %esi 1030; X86-NEXT: movdqa %xmm0, %xmm2 1031; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1032; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 1033; X86-NEXT: pcmpestrm $24, %xmm1, %xmm0 1034; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 1035; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 1036; X86-NEXT: pcmpestri $24, %xmm1, %xmm2 1037; X86-NEXT: movdqa %xmm0, (%edi) 1038; X86-NEXT: movl %ecx, (%esi) 1039; X86-NEXT: popl %esi 1040; X86-NEXT: popl %edi 1041; X86-NEXT: retl 1042; 1043; X64-LABEL: pcmpestr_mask_index: 1044; X64: # %bb.0: # %entry 1045; X64-NEXT: movq %rcx, %r8 1046; X64-NEXT: movq %rdx, %r9 1047; X64-NEXT: movl %esi, %edx 1048; X64-NEXT: movl %edi, %eax 1049; X64-NEXT: movdqa %xmm0, %xmm2 1050; X64-NEXT: pcmpestrm $24, %xmm1, %xmm0 1051; X64-NEXT: pcmpestri $24, %xmm1, %xmm2 1052; X64-NEXT: movdqa %xmm0, (%r9) 1053; X64-NEXT: movl %ecx, (%r8) 1054; X64-NEXT: retq 1055entry: 1056 %index = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24) 1057 %mask = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24) 1058 store <16 x i8> %mask, ptr %mptr 1059 store i32 %index, ptr %iptr 1060 ret void 1061} 1062 1063define void @pcmpestr_mask_index_flag(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, ptr %mptr, ptr %iptr, ptr %fptr) nounwind { 1064; X86-LABEL: pcmpestr_mask_index_flag: 1065; X86: # %bb.0: # %entry 1066; X86-NEXT: pushl %ebp 1067; X86-NEXT: pushl %ebx 1068; X86-NEXT: pushl %edi 1069; X86-NEXT: pushl %esi 1070; X86-NEXT: movdqa %xmm0, %xmm2 1071; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1072; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 1073; X86-NEXT: pcmpestrm $24, %xmm1, %xmm0 1074; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 1075; X86-NEXT: movl {{[0-9]+}}(%esp), %edi 1076; X86-NEXT: movl {{[0-9]+}}(%esp), %ebp 1077; X86-NEXT: xorl %ebx, %ebx 1078; X86-NEXT: pcmpestri $24, %xmm1, %xmm2 1079; X86-NEXT: setb %bl 1080; X86-NEXT: movdqa %xmm0, (%ebp) 1081; X86-NEXT: movl %ecx, (%edi) 1082; X86-NEXT: movl %ebx, (%esi) 1083; X86-NEXT: popl %esi 1084; X86-NEXT: popl %edi 1085; X86-NEXT: popl %ebx 1086; X86-NEXT: popl %ebp 1087; X86-NEXT: retl 1088; 1089; X64-LABEL: pcmpestr_mask_index_flag: 1090; X64: # %bb.0: # %entry 1091; X64-NEXT: movq %rcx, %r9 1092; X64-NEXT: movq %rdx, %r10 1093; X64-NEXT: movl %esi, %edx 1094; X64-NEXT: movl %edi, %eax 1095; X64-NEXT: movdqa %xmm0, %xmm2 1096; X64-NEXT: pcmpestrm $24, %xmm1, %xmm0 1097; X64-NEXT: xorl %esi, %esi 1098; X64-NEXT: pcmpestri $24, %xmm1, %xmm2 1099; X64-NEXT: setb %sil 1100; X64-NEXT: movdqa %xmm0, (%r10) 1101; X64-NEXT: movl %ecx, (%r9) 1102; X64-NEXT: movl %esi, (%r8) 1103; X64-NEXT: retq 1104entry: 1105 %index = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24) 1106 %mask = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24) 1107 %flag = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24) 1108 store <16 x i8> %mask, ptr %mptr 1109 store i32 %index, ptr %iptr 1110 store i32 %flag, ptr %fptr 1111 ret void 1112} 1113 1114define void @pcmpistr_index_flag(<16 x i8> %lhs, <16 x i8> %rhs, ptr %iptr, ptr %fptr) nounwind { 1115; X86-LABEL: pcmpistr_index_flag: 1116; X86: # %bb.0: # %entry 1117; X86-NEXT: pushl %esi 1118; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 1119; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 1120; X86-NEXT: xorl %eax, %eax 1121; X86-NEXT: pcmpistri $24, %xmm1, %xmm0 1122; X86-NEXT: setb %al 1123; X86-NEXT: movl %ecx, (%esi) 1124; X86-NEXT: movl %eax, (%edx) 1125; X86-NEXT: popl %esi 1126; X86-NEXT: retl 1127; 1128; X64-LABEL: pcmpistr_index_flag: 1129; X64: # %bb.0: # %entry 1130; X64-NEXT: xorl %eax, %eax 1131; X64-NEXT: pcmpistri $24, %xmm1, %xmm0 1132; X64-NEXT: setb %al 1133; X64-NEXT: movl %ecx, (%rdi) 1134; X64-NEXT: movl %eax, (%rsi) 1135; X64-NEXT: retq 1136entry: 1137 %flag = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 1138 %index = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 1139 store i32 %index, ptr %iptr 1140 store i32 %flag, ptr %fptr 1141 ret void 1142} 1143 1144define void @pcmpistr_mask_flag(<16 x i8> %lhs, <16 x i8> %rhs, ptr %mptr, ptr %fptr) nounwind { 1145; X86-LABEL: pcmpistr_mask_flag: 1146; X86: # %bb.0: # %entry 1147; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 1148; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 1149; X86-NEXT: xorl %eax, %eax 1150; X86-NEXT: pcmpistrm $24, %xmm1, %xmm0 1151; X86-NEXT: setb %al 1152; X86-NEXT: movdqa %xmm0, (%edx) 1153; X86-NEXT: movl %eax, (%ecx) 1154; X86-NEXT: retl 1155; 1156; X64-LABEL: pcmpistr_mask_flag: 1157; X64: # %bb.0: # %entry 1158; X64-NEXT: xorl %eax, %eax 1159; X64-NEXT: pcmpistrm $24, %xmm1, %xmm0 1160; X64-NEXT: setb %al 1161; X64-NEXT: movdqa %xmm0, (%rdi) 1162; X64-NEXT: movl %eax, (%rsi) 1163; X64-NEXT: retq 1164entry: 1165 %flag = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 1166 %mask = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 1167 store <16 x i8> %mask, ptr %mptr 1168 store i32 %flag, ptr %fptr 1169 ret void 1170} 1171 1172define void @pcmpistr_mask_index(<16 x i8> %lhs, <16 x i8> %rhs, ptr %mptr, ptr %iptr) nounwind { 1173; X86-LABEL: pcmpistr_mask_index: 1174; X86: # %bb.0: # %entry 1175; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1176; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 1177; X86-NEXT: pcmpistri $24, %xmm1, %xmm0 1178; X86-NEXT: pcmpistrm $24, %xmm1, %xmm0 1179; X86-NEXT: movdqa %xmm0, (%edx) 1180; X86-NEXT: movl %ecx, (%eax) 1181; X86-NEXT: retl 1182; 1183; X64-LABEL: pcmpistr_mask_index: 1184; X64: # %bb.0: # %entry 1185; X64-NEXT: pcmpistri $24, %xmm1, %xmm0 1186; X64-NEXT: pcmpistrm $24, %xmm1, %xmm0 1187; X64-NEXT: movdqa %xmm0, (%rdi) 1188; X64-NEXT: movl %ecx, (%rsi) 1189; X64-NEXT: retq 1190entry: 1191 %index = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 1192 %mask = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 1193 store <16 x i8> %mask, ptr %mptr 1194 store i32 %index, ptr %iptr 1195 ret void 1196} 1197 1198define void @pcmpistr_mask_index_flag(<16 x i8> %lhs, <16 x i8> %rhs, ptr %mptr, ptr %iptr, ptr %fptr) nounwind { 1199; X86-LABEL: pcmpistr_mask_index_flag: 1200; X86: # %bb.0: # %entry 1201; X86-NEXT: pushl %ebx 1202; X86-NEXT: pushl %esi 1203; X86-NEXT: movdqa %xmm0, %xmm2 1204; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1205; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 1206; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 1207; X86-NEXT: pcmpistrm $24, %xmm1, %xmm0 1208; X86-NEXT: xorl %ebx, %ebx 1209; X86-NEXT: pcmpistri $24, %xmm1, %xmm2 1210; X86-NEXT: setb %bl 1211; X86-NEXT: movdqa %xmm0, (%esi) 1212; X86-NEXT: movl %ecx, (%edx) 1213; X86-NEXT: movl %ebx, (%eax) 1214; X86-NEXT: popl %esi 1215; X86-NEXT: popl %ebx 1216; X86-NEXT: retl 1217; 1218; X64-LABEL: pcmpistr_mask_index_flag: 1219; X64: # %bb.0: # %entry 1220; X64-NEXT: movdqa %xmm0, %xmm2 1221; X64-NEXT: pcmpistrm $24, %xmm1, %xmm0 1222; X64-NEXT: xorl %eax, %eax 1223; X64-NEXT: pcmpistri $24, %xmm1, %xmm2 1224; X64-NEXT: setb %al 1225; X64-NEXT: movdqa %xmm0, (%rdi) 1226; X64-NEXT: movl %ecx, (%rsi) 1227; X64-NEXT: movl %eax, (%rdx) 1228; X64-NEXT: retq 1229entry: 1230 %index = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 1231 %mask = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 1232 %flag = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 1233 store <16 x i8> %mask, ptr %mptr 1234 store i32 %index, ptr %iptr 1235 store i32 %flag, ptr %fptr 1236 ret void 1237} 1238 1239; Make sure we don't fold loads when we need to emit pcmpistrm and pcmpistri. 1240define void @pcmpistr_mask_index_flag_load(<16 x i8> %lhs, ptr %rhsptr, ptr %mptr, ptr %iptr, ptr %fptr) nounwind { 1241; X86-LABEL: pcmpistr_mask_index_flag_load: 1242; X86: # %bb.0: # %entry 1243; X86-NEXT: pushl %ebx 1244; X86-NEXT: pushl %esi 1245; X86-NEXT: movdqa %xmm0, %xmm1 1246; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1247; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 1248; X86-NEXT: movl {{[0-9]+}}(%esp), %esi 1249; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 1250; X86-NEXT: movdqu (%ecx), %xmm2 1251; X86-NEXT: pcmpistrm $24, %xmm2, %xmm0 1252; X86-NEXT: xorl %ebx, %ebx 1253; X86-NEXT: pcmpistri $24, %xmm2, %xmm1 1254; X86-NEXT: setb %bl 1255; X86-NEXT: movdqa %xmm0, (%esi) 1256; X86-NEXT: movl %ecx, (%edx) 1257; X86-NEXT: movl %ebx, (%eax) 1258; X86-NEXT: popl %esi 1259; X86-NEXT: popl %ebx 1260; X86-NEXT: retl 1261; 1262; X64-LABEL: pcmpistr_mask_index_flag_load: 1263; X64: # %bb.0: # %entry 1264; X64-NEXT: movq %rcx, %rax 1265; X64-NEXT: movdqa %xmm0, %xmm1 1266; X64-NEXT: movdqu (%rdi), %xmm2 1267; X64-NEXT: pcmpistrm $24, %xmm2, %xmm0 1268; X64-NEXT: xorl %edi, %edi 1269; X64-NEXT: pcmpistri $24, %xmm2, %xmm1 1270; X64-NEXT: setb %dil 1271; X64-NEXT: movdqa %xmm0, (%rsi) 1272; X64-NEXT: movl %ecx, (%rdx) 1273; X64-NEXT: movl %edi, (%rax) 1274; X64-NEXT: retq 1275entry: 1276 %rhs = load <16 x i8>, ptr %rhsptr, align 1 1277 %index = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 1278 %mask = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 1279 %flag = call i32 @llvm.x86.sse42.pcmpistric128(<16 x i8> %lhs, <16 x i8> %rhs, i8 24) 1280 store <16 x i8> %mask, ptr %mptr 1281 store i32 %index, ptr %iptr 1282 store i32 %flag, ptr %fptr 1283 ret void 1284} 1285 1286; Make sure we don't fold nontemporal loads. 1287define i32 @pcmpestri_nontemporal(<16 x i8> %lhs, i32 %lhs_len, ptr %rhsptr, i32 %rhs_len) nounwind { 1288; X86-LABEL: pcmpestri_nontemporal: 1289; X86: # %bb.0: # %entry 1290; X86-NEXT: pushl %ebx 1291; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 1292; X86-NEXT: movl {{[0-9]+}}(%esp), %edx 1293; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx 1294; X86-NEXT: movntdqa (%ecx), %xmm1 1295; X86-NEXT: xorl %ebx, %ebx 1296; X86-NEXT: pcmpestri $24, %xmm1, %xmm0 1297; X86-NEXT: setb %bl 1298; X86-NEXT: movl %ebx, %eax 1299; X86-NEXT: popl %ebx 1300; X86-NEXT: retl 1301; 1302; X64-LABEL: pcmpestri_nontemporal: 1303; X64: # %bb.0: # %entry 1304; X64-NEXT: movl %edi, %eax 1305; X64-NEXT: movntdqa (%rsi), %xmm1 1306; X64-NEXT: xorl %esi, %esi 1307; X64-NEXT: pcmpestri $24, %xmm1, %xmm0 1308; X64-NEXT: setb %sil 1309; X64-NEXT: movl %esi, %eax 1310; X64-NEXT: retq 1311entry: 1312 %rhs = load <16 x i8>, ptr %rhsptr, align 16, !nontemporal !0 1313 %flag = call i32 @llvm.x86.sse42.pcmpestric128(<16 x i8> %lhs, i32 %lhs_len, <16 x i8> %rhs, i32 %rhs_len, i8 24) 1314 ret i32 %flag 1315} 1316 1317!0 = !{ i32 1 } 1318