1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=-sse,-sse2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-BASELINE 3; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,-sse2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SSE1 4; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,+sse2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-SSE2 5; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+xop < %s | FileCheck %s --check-prefixes=CHECK,CHECK-XOP 6 7; https://bugs.llvm.org/show_bug.cgi?id=37104 8 9; All the advanced stuff (negative tests, commutativity) is handled in the 10; scalar version of the test only. 11 12; ============================================================================ ; 13; 8-bit vector width 14; ============================================================================ ; 15 16define <1 x i8> @out_v1i8(<1 x i8> %x, <1 x i8> %y, <1 x i8> %mask) nounwind { 17; CHECK-LABEL: out_v1i8: 18; CHECK: # %bb.0: 19; CHECK-NEXT: movl %edx, %eax 20; CHECK-NEXT: andl %edx, %edi 21; CHECK-NEXT: notb %al 22; CHECK-NEXT: andb %sil, %al 23; CHECK-NEXT: orb %dil, %al 24; CHECK-NEXT: # kill: def $al killed $al killed $eax 25; CHECK-NEXT: retq 26 %mx = and <1 x i8> %x, %mask 27 %notmask = xor <1 x i8> %mask, <i8 -1> 28 %my = and <1 x i8> %y, %notmask 29 %r = or <1 x i8> %mx, %my 30 ret <1 x i8> %r 31} 32 33; ============================================================================ ; 34; 16-bit vector width 35; ============================================================================ ; 36 37define <2 x i8> @out_v2i8(<2 x i8> %x, <2 x i8> %y, <2 x i8> %mask) nounwind { 38; CHECK-BASELINE-LABEL: out_v2i8: 39; CHECK-BASELINE: # %bb.0: 40; CHECK-BASELINE-NEXT: movl %r8d, %eax 41; CHECK-BASELINE-NEXT: andl %r9d, %esi 42; CHECK-BASELINE-NEXT: andl %r8d, %edi 43; CHECK-BASELINE-NEXT: notb %al 44; CHECK-BASELINE-NEXT: notb %r9b 45; CHECK-BASELINE-NEXT: andb %cl, %r9b 46; CHECK-BASELINE-NEXT: andb %dl, %al 47; CHECK-BASELINE-NEXT: orb %dil, %al 48; CHECK-BASELINE-NEXT: orb %sil, %r9b 49; CHECK-BASELINE-NEXT: # kill: def $al killed $al killed $eax 50; CHECK-BASELINE-NEXT: movl %r9d, %edx 51; CHECK-BASELINE-NEXT: retq 52; 53; CHECK-SSE1-LABEL: out_v2i8: 54; CHECK-SSE1: # %bb.0: 55; CHECK-SSE1-NEXT: movl %r8d, %eax 56; CHECK-SSE1-NEXT: andl %r9d, %esi 57; CHECK-SSE1-NEXT: andl %r8d, %edi 58; CHECK-SSE1-NEXT: notb %al 59; CHECK-SSE1-NEXT: notb %r9b 60; CHECK-SSE1-NEXT: andb %cl, %r9b 61; CHECK-SSE1-NEXT: andb %dl, %al 62; CHECK-SSE1-NEXT: orb %dil, %al 63; CHECK-SSE1-NEXT: orb %sil, %r9b 64; CHECK-SSE1-NEXT: # kill: def $al killed $al killed $eax 65; CHECK-SSE1-NEXT: movl %r9d, %edx 66; CHECK-SSE1-NEXT: retq 67; 68; CHECK-SSE2-LABEL: out_v2i8: 69; CHECK-SSE2: # %bb.0: 70; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 71; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 72; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 73; CHECK-SSE2-NEXT: retq 74; 75; CHECK-XOP-LABEL: out_v2i8: 76; CHECK-XOP: # %bb.0: 77; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 78; CHECK-XOP-NEXT: retq 79 %mx = and <2 x i8> %x, %mask 80 %notmask = xor <2 x i8> %mask, <i8 -1, i8 -1> 81 %my = and <2 x i8> %y, %notmask 82 %r = or <2 x i8> %mx, %my 83 ret <2 x i8> %r 84} 85 86define <1 x i16> @out_v1i16(<1 x i16> %x, <1 x i16> %y, <1 x i16> %mask) nounwind { 87; CHECK-LABEL: out_v1i16: 88; CHECK: # %bb.0: 89; CHECK-NEXT: movl %edx, %eax 90; CHECK-NEXT: andl %edx, %edi 91; CHECK-NEXT: notl %eax 92; CHECK-NEXT: andl %esi, %eax 93; CHECK-NEXT: orl %edi, %eax 94; CHECK-NEXT: # kill: def $ax killed $ax killed $eax 95; CHECK-NEXT: retq 96 %mx = and <1 x i16> %x, %mask 97 %notmask = xor <1 x i16> %mask, <i16 -1> 98 %my = and <1 x i16> %y, %notmask 99 %r = or <1 x i16> %mx, %my 100 ret <1 x i16> %r 101} 102 103; ============================================================================ ; 104; 32-bit vector width 105; ============================================================================ ; 106 107define <4 x i8> @out_v4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind { 108; CHECK-BASELINE-LABEL: out_v4i8: 109; CHECK-BASELINE: # %bb.0: 110; CHECK-BASELINE-NEXT: movq %rdi, %rax 111; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %edi 112; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d 113; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d 114; CHECK-BASELINE-NEXT: xorl %r9d, %esi 115; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %sil 116; CHECK-BASELINE-NEXT: xorb %r9b, %sil 117; CHECK-BASELINE-NEXT: xorb %r11b, %dl 118; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %dl 119; CHECK-BASELINE-NEXT: xorb %r11b, %dl 120; CHECK-BASELINE-NEXT: xorb %r10b, %cl 121; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl 122; CHECK-BASELINE-NEXT: xorb %r10b, %cl 123; CHECK-BASELINE-NEXT: xorb %dil, %r8b 124; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r8b 125; CHECK-BASELINE-NEXT: xorb %dil, %r8b 126; CHECK-BASELINE-NEXT: movb %r8b, 3(%rax) 127; CHECK-BASELINE-NEXT: movb %cl, 2(%rax) 128; CHECK-BASELINE-NEXT: movb %dl, 1(%rax) 129; CHECK-BASELINE-NEXT: movb %sil, (%rax) 130; CHECK-BASELINE-NEXT: retq 131; 132; CHECK-SSE1-LABEL: out_v4i8: 133; CHECK-SSE1: # %bb.0: 134; CHECK-SSE1-NEXT: movq %rdi, %rax 135; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %edi 136; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d 137; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d 138; CHECK-SSE1-NEXT: xorl %r9d, %esi 139; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %sil 140; CHECK-SSE1-NEXT: xorb %r9b, %sil 141; CHECK-SSE1-NEXT: xorb %r11b, %dl 142; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %dl 143; CHECK-SSE1-NEXT: xorb %r11b, %dl 144; CHECK-SSE1-NEXT: xorb %r10b, %cl 145; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl 146; CHECK-SSE1-NEXT: xorb %r10b, %cl 147; CHECK-SSE1-NEXT: xorb %dil, %r8b 148; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r8b 149; CHECK-SSE1-NEXT: xorb %dil, %r8b 150; CHECK-SSE1-NEXT: movb %r8b, 3(%rax) 151; CHECK-SSE1-NEXT: movb %cl, 2(%rax) 152; CHECK-SSE1-NEXT: movb %dl, 1(%rax) 153; CHECK-SSE1-NEXT: movb %sil, (%rax) 154; CHECK-SSE1-NEXT: retq 155; 156; CHECK-SSE2-LABEL: out_v4i8: 157; CHECK-SSE2: # %bb.0: 158; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 159; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 160; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 161; CHECK-SSE2-NEXT: retq 162; 163; CHECK-XOP-LABEL: out_v4i8: 164; CHECK-XOP: # %bb.0: 165; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 166; CHECK-XOP-NEXT: retq 167 %mx = and <4 x i8> %x, %mask 168 %notmask = xor <4 x i8> %mask, <i8 -1, i8 -1, i8 -1, i8 -1> 169 %my = and <4 x i8> %y, %notmask 170 %r = or <4 x i8> %mx, %my 171 ret <4 x i8> %r 172} 173 174define <4 x i8> @out_v4i8_undef(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind { 175; CHECK-BASELINE-LABEL: out_v4i8_undef: 176; CHECK-BASELINE: # %bb.0: 177; CHECK-BASELINE-NEXT: movq %rdi, %rax 178; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %edi 179; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d 180; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl 181; CHECK-BASELINE-NEXT: xorl %r9d, %esi 182; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %sil 183; CHECK-BASELINE-NEXT: xorb %r9b, %sil 184; CHECK-BASELINE-NEXT: xorb %r10b, %dl 185; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %dl 186; CHECK-BASELINE-NEXT: xorb %r10b, %dl 187; CHECK-BASELINE-NEXT: xorb %dil, %r8b 188; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r8b 189; CHECK-BASELINE-NEXT: xorb %dil, %r8b 190; CHECK-BASELINE-NEXT: movb %cl, 2(%rax) 191; CHECK-BASELINE-NEXT: movb %r8b, 3(%rax) 192; CHECK-BASELINE-NEXT: movb %dl, 1(%rax) 193; CHECK-BASELINE-NEXT: movb %sil, (%rax) 194; CHECK-BASELINE-NEXT: retq 195; 196; CHECK-SSE1-LABEL: out_v4i8_undef: 197; CHECK-SSE1: # %bb.0: 198; CHECK-SSE1-NEXT: movq %rdi, %rax 199; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %edi 200; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d 201; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl 202; CHECK-SSE1-NEXT: xorl %r9d, %esi 203; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %sil 204; CHECK-SSE1-NEXT: xorb %r9b, %sil 205; CHECK-SSE1-NEXT: xorb %r10b, %dl 206; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %dl 207; CHECK-SSE1-NEXT: xorb %r10b, %dl 208; CHECK-SSE1-NEXT: xorb %dil, %r8b 209; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r8b 210; CHECK-SSE1-NEXT: xorb %dil, %r8b 211; CHECK-SSE1-NEXT: movb %cl, 2(%rax) 212; CHECK-SSE1-NEXT: movb %r8b, 3(%rax) 213; CHECK-SSE1-NEXT: movb %dl, 1(%rax) 214; CHECK-SSE1-NEXT: movb %sil, (%rax) 215; CHECK-SSE1-NEXT: retq 216; 217; CHECK-SSE2-LABEL: out_v4i8_undef: 218; CHECK-SSE2: # %bb.0: 219; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 220; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 221; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 222; CHECK-SSE2-NEXT: retq 223; 224; CHECK-XOP-LABEL: out_v4i8_undef: 225; CHECK-XOP: # %bb.0: 226; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 227; CHECK-XOP-NEXT: retq 228 %mx = and <4 x i8> %x, %mask 229 %notmask = xor <4 x i8> %mask, <i8 -1, i8 -1, i8 undef, i8 -1> 230 %my = and <4 x i8> %y, %notmask 231 %r = or <4 x i8> %mx, %my 232 ret <4 x i8> %r 233} 234 235define <2 x i16> @out_v2i16(<2 x i16> %x, <2 x i16> %y, <2 x i16> %mask) nounwind { 236; CHECK-BASELINE-LABEL: out_v2i16: 237; CHECK-BASELINE: # %bb.0: 238; CHECK-BASELINE-NEXT: movl %r8d, %eax 239; CHECK-BASELINE-NEXT: andl %r9d, %esi 240; CHECK-BASELINE-NEXT: andl %r8d, %edi 241; CHECK-BASELINE-NEXT: notl %eax 242; CHECK-BASELINE-NEXT: notl %r9d 243; CHECK-BASELINE-NEXT: andl %ecx, %r9d 244; CHECK-BASELINE-NEXT: orl %esi, %r9d 245; CHECK-BASELINE-NEXT: andl %edx, %eax 246; CHECK-BASELINE-NEXT: orl %edi, %eax 247; CHECK-BASELINE-NEXT: # kill: def $ax killed $ax killed $eax 248; CHECK-BASELINE-NEXT: movl %r9d, %edx 249; CHECK-BASELINE-NEXT: retq 250; 251; CHECK-SSE1-LABEL: out_v2i16: 252; CHECK-SSE1: # %bb.0: 253; CHECK-SSE1-NEXT: movl %r8d, %eax 254; CHECK-SSE1-NEXT: andl %r9d, %esi 255; CHECK-SSE1-NEXT: andl %r8d, %edi 256; CHECK-SSE1-NEXT: notl %eax 257; CHECK-SSE1-NEXT: notl %r9d 258; CHECK-SSE1-NEXT: andl %ecx, %r9d 259; CHECK-SSE1-NEXT: orl %esi, %r9d 260; CHECK-SSE1-NEXT: andl %edx, %eax 261; CHECK-SSE1-NEXT: orl %edi, %eax 262; CHECK-SSE1-NEXT: # kill: def $ax killed $ax killed $eax 263; CHECK-SSE1-NEXT: movl %r9d, %edx 264; CHECK-SSE1-NEXT: retq 265; 266; CHECK-SSE2-LABEL: out_v2i16: 267; CHECK-SSE2: # %bb.0: 268; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 269; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 270; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 271; CHECK-SSE2-NEXT: retq 272; 273; CHECK-XOP-LABEL: out_v2i16: 274; CHECK-XOP: # %bb.0: 275; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 276; CHECK-XOP-NEXT: retq 277 %mx = and <2 x i16> %x, %mask 278 %notmask = xor <2 x i16> %mask, <i16 -1, i16 -1> 279 %my = and <2 x i16> %y, %notmask 280 %r = or <2 x i16> %mx, %my 281 ret <2 x i16> %r 282} 283 284define <1 x i32> @out_v1i32(<1 x i32> %x, <1 x i32> %y, <1 x i32> %mask) nounwind { 285; CHECK-LABEL: out_v1i32: 286; CHECK: # %bb.0: 287; CHECK-NEXT: movl %edi, %eax 288; CHECK-NEXT: xorl %esi, %eax 289; CHECK-NEXT: andl %edx, %eax 290; CHECK-NEXT: xorl %esi, %eax 291; CHECK-NEXT: retq 292 %mx = and <1 x i32> %x, %mask 293 %notmask = xor <1 x i32> %mask, <i32 -1> 294 %my = and <1 x i32> %y, %notmask 295 %r = or <1 x i32> %mx, %my 296 ret <1 x i32> %r 297} 298 299; ============================================================================ ; 300; 64-bit vector width 301; ============================================================================ ; 302 303define <8 x i8> @out_v8i8(<8 x i8> %x, <8 x i8> %y, <8 x i8> %mask) nounwind { 304; CHECK-BASELINE-LABEL: out_v8i8: 305; CHECK-BASELINE: # %bb.0: 306; CHECK-BASELINE-NEXT: pushq %rbp 307; CHECK-BASELINE-NEXT: pushq %r15 308; CHECK-BASELINE-NEXT: pushq %r14 309; CHECK-BASELINE-NEXT: pushq %r12 310; CHECK-BASELINE-NEXT: pushq %rbx 311; CHECK-BASELINE-NEXT: movq %rdi, %rax 312; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %edi 313; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d 314; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d 315; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx 316; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp 317; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d 318; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d 319; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d 320; CHECK-BASELINE-NEXT: xorb %r12b, %sil 321; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %sil 322; CHECK-BASELINE-NEXT: xorb %r12b, %sil 323; CHECK-BASELINE-NEXT: xorb %r15b, %dl 324; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %dl 325; CHECK-BASELINE-NEXT: xorb %r15b, %dl 326; CHECK-BASELINE-NEXT: xorb %r14b, %cl 327; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl 328; CHECK-BASELINE-NEXT: xorb %r14b, %cl 329; CHECK-BASELINE-NEXT: xorb %bpl, %r8b 330; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r8b 331; CHECK-BASELINE-NEXT: xorb %bpl, %r8b 332; CHECK-BASELINE-NEXT: xorb %bl, %r9b 333; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r9b 334; CHECK-BASELINE-NEXT: xorb %bl, %r9b 335; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx 336; CHECK-BASELINE-NEXT: xorb %r11b, %bl 337; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %bl 338; CHECK-BASELINE-NEXT: xorb %r11b, %bl 339; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d 340; CHECK-BASELINE-NEXT: xorb %r10b, %r11b 341; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r11b 342; CHECK-BASELINE-NEXT: xorb %r10b, %r11b 343; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d 344; CHECK-BASELINE-NEXT: xorb %dil, %r10b 345; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r10b 346; CHECK-BASELINE-NEXT: xorb %dil, %r10b 347; CHECK-BASELINE-NEXT: movb %r10b, 7(%rax) 348; CHECK-BASELINE-NEXT: movb %r11b, 6(%rax) 349; CHECK-BASELINE-NEXT: movb %bl, 5(%rax) 350; CHECK-BASELINE-NEXT: movb %r9b, 4(%rax) 351; CHECK-BASELINE-NEXT: movb %r8b, 3(%rax) 352; CHECK-BASELINE-NEXT: movb %cl, 2(%rax) 353; CHECK-BASELINE-NEXT: movb %dl, 1(%rax) 354; CHECK-BASELINE-NEXT: movb %sil, (%rax) 355; CHECK-BASELINE-NEXT: popq %rbx 356; CHECK-BASELINE-NEXT: popq %r12 357; CHECK-BASELINE-NEXT: popq %r14 358; CHECK-BASELINE-NEXT: popq %r15 359; CHECK-BASELINE-NEXT: popq %rbp 360; CHECK-BASELINE-NEXT: retq 361; 362; CHECK-SSE1-LABEL: out_v8i8: 363; CHECK-SSE1: # %bb.0: 364; CHECK-SSE1-NEXT: pushq %rbp 365; CHECK-SSE1-NEXT: pushq %r15 366; CHECK-SSE1-NEXT: pushq %r14 367; CHECK-SSE1-NEXT: pushq %r12 368; CHECK-SSE1-NEXT: pushq %rbx 369; CHECK-SSE1-NEXT: movq %rdi, %rax 370; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %edi 371; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d 372; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d 373; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx 374; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp 375; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d 376; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d 377; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d 378; CHECK-SSE1-NEXT: xorb %r12b, %sil 379; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %sil 380; CHECK-SSE1-NEXT: xorb %r12b, %sil 381; CHECK-SSE1-NEXT: xorb %r15b, %dl 382; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %dl 383; CHECK-SSE1-NEXT: xorb %r15b, %dl 384; CHECK-SSE1-NEXT: xorb %r14b, %cl 385; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl 386; CHECK-SSE1-NEXT: xorb %r14b, %cl 387; CHECK-SSE1-NEXT: xorb %bpl, %r8b 388; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r8b 389; CHECK-SSE1-NEXT: xorb %bpl, %r8b 390; CHECK-SSE1-NEXT: xorb %bl, %r9b 391; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r9b 392; CHECK-SSE1-NEXT: xorb %bl, %r9b 393; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx 394; CHECK-SSE1-NEXT: xorb %r11b, %bl 395; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %bl 396; CHECK-SSE1-NEXT: xorb %r11b, %bl 397; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d 398; CHECK-SSE1-NEXT: xorb %r10b, %r11b 399; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r11b 400; CHECK-SSE1-NEXT: xorb %r10b, %r11b 401; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d 402; CHECK-SSE1-NEXT: xorb %dil, %r10b 403; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r10b 404; CHECK-SSE1-NEXT: xorb %dil, %r10b 405; CHECK-SSE1-NEXT: movb %r10b, 7(%rax) 406; CHECK-SSE1-NEXT: movb %r11b, 6(%rax) 407; CHECK-SSE1-NEXT: movb %bl, 5(%rax) 408; CHECK-SSE1-NEXT: movb %r9b, 4(%rax) 409; CHECK-SSE1-NEXT: movb %r8b, 3(%rax) 410; CHECK-SSE1-NEXT: movb %cl, 2(%rax) 411; CHECK-SSE1-NEXT: movb %dl, 1(%rax) 412; CHECK-SSE1-NEXT: movb %sil, (%rax) 413; CHECK-SSE1-NEXT: popq %rbx 414; CHECK-SSE1-NEXT: popq %r12 415; CHECK-SSE1-NEXT: popq %r14 416; CHECK-SSE1-NEXT: popq %r15 417; CHECK-SSE1-NEXT: popq %rbp 418; CHECK-SSE1-NEXT: retq 419; 420; CHECK-SSE2-LABEL: out_v8i8: 421; CHECK-SSE2: # %bb.0: 422; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 423; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 424; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 425; CHECK-SSE2-NEXT: retq 426; 427; CHECK-XOP-LABEL: out_v8i8: 428; CHECK-XOP: # %bb.0: 429; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 430; CHECK-XOP-NEXT: retq 431 %mx = and <8 x i8> %x, %mask 432 %notmask = xor <8 x i8> %mask, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 433 %my = and <8 x i8> %y, %notmask 434 %r = or <8 x i8> %mx, %my 435 ret <8 x i8> %r 436} 437 438define <4 x i16> @out_v4i16(<4 x i16> %x, <4 x i16> %y, <4 x i16> %mask) nounwind { 439; CHECK-BASELINE-LABEL: out_v4i16: 440; CHECK-BASELINE: # %bb.0: 441; CHECK-BASELINE-NEXT: movq %rdi, %rax 442; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %edi 443; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d 444; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %r11d 445; CHECK-BASELINE-NEXT: xorl %r11d, %edx 446; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %dx 447; CHECK-BASELINE-NEXT: xorl %r11d, %edx 448; CHECK-BASELINE-NEXT: xorl %r10d, %ecx 449; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %cx 450; CHECK-BASELINE-NEXT: xorl %r10d, %ecx 451; CHECK-BASELINE-NEXT: xorl %edi, %r8d 452; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %r8w 453; CHECK-BASELINE-NEXT: xorl %edi, %r8d 454; CHECK-BASELINE-NEXT: xorl %r9d, %esi 455; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %si 456; CHECK-BASELINE-NEXT: xorl %r9d, %esi 457; CHECK-BASELINE-NEXT: movw %si, (%rax) 458; CHECK-BASELINE-NEXT: movw %r8w, 6(%rax) 459; CHECK-BASELINE-NEXT: movw %cx, 4(%rax) 460; CHECK-BASELINE-NEXT: movw %dx, 2(%rax) 461; CHECK-BASELINE-NEXT: retq 462; 463; CHECK-SSE1-LABEL: out_v4i16: 464; CHECK-SSE1: # %bb.0: 465; CHECK-SSE1-NEXT: movq %rdi, %rax 466; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %edi 467; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d 468; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %r11d 469; CHECK-SSE1-NEXT: xorl %r11d, %edx 470; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %dx 471; CHECK-SSE1-NEXT: xorl %r11d, %edx 472; CHECK-SSE1-NEXT: xorl %r10d, %ecx 473; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %cx 474; CHECK-SSE1-NEXT: xorl %r10d, %ecx 475; CHECK-SSE1-NEXT: xorl %edi, %r8d 476; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %r8w 477; CHECK-SSE1-NEXT: xorl %edi, %r8d 478; CHECK-SSE1-NEXT: xorl %r9d, %esi 479; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %si 480; CHECK-SSE1-NEXT: xorl %r9d, %esi 481; CHECK-SSE1-NEXT: movw %si, (%rax) 482; CHECK-SSE1-NEXT: movw %r8w, 6(%rax) 483; CHECK-SSE1-NEXT: movw %cx, 4(%rax) 484; CHECK-SSE1-NEXT: movw %dx, 2(%rax) 485; CHECK-SSE1-NEXT: retq 486; 487; CHECK-SSE2-LABEL: out_v4i16: 488; CHECK-SSE2: # %bb.0: 489; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 490; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 491; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 492; CHECK-SSE2-NEXT: retq 493; 494; CHECK-XOP-LABEL: out_v4i16: 495; CHECK-XOP: # %bb.0: 496; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 497; CHECK-XOP-NEXT: retq 498 %mx = and <4 x i16> %x, %mask 499 %notmask = xor <4 x i16> %mask, <i16 -1, i16 -1, i16 -1, i16 -1> 500 %my = and <4 x i16> %y, %notmask 501 %r = or <4 x i16> %mx, %my 502 ret <4 x i16> %r 503} 504 505define <4 x i16> @out_v4i16_undef(<4 x i16> %x, <4 x i16> %y, <4 x i16> %mask) nounwind { 506; CHECK-BASELINE-LABEL: out_v4i16_undef: 507; CHECK-BASELINE: # %bb.0: 508; CHECK-BASELINE-NEXT: movq %rdi, %rax 509; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %edi 510; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d 511; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %cx 512; CHECK-BASELINE-NEXT: xorl %r10d, %edx 513; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %dx 514; CHECK-BASELINE-NEXT: xorl %r10d, %edx 515; CHECK-BASELINE-NEXT: xorl %edi, %r8d 516; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %r8w 517; CHECK-BASELINE-NEXT: xorl %edi, %r8d 518; CHECK-BASELINE-NEXT: xorl %r9d, %esi 519; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %si 520; CHECK-BASELINE-NEXT: xorl %r9d, %esi 521; CHECK-BASELINE-NEXT: movw %cx, 4(%rax) 522; CHECK-BASELINE-NEXT: movw %si, (%rax) 523; CHECK-BASELINE-NEXT: movw %r8w, 6(%rax) 524; CHECK-BASELINE-NEXT: movw %dx, 2(%rax) 525; CHECK-BASELINE-NEXT: retq 526; 527; CHECK-SSE1-LABEL: out_v4i16_undef: 528; CHECK-SSE1: # %bb.0: 529; CHECK-SSE1-NEXT: movq %rdi, %rax 530; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %edi 531; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d 532; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %cx 533; CHECK-SSE1-NEXT: xorl %r10d, %edx 534; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %dx 535; CHECK-SSE1-NEXT: xorl %r10d, %edx 536; CHECK-SSE1-NEXT: xorl %edi, %r8d 537; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %r8w 538; CHECK-SSE1-NEXT: xorl %edi, %r8d 539; CHECK-SSE1-NEXT: xorl %r9d, %esi 540; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %si 541; CHECK-SSE1-NEXT: xorl %r9d, %esi 542; CHECK-SSE1-NEXT: movw %cx, 4(%rax) 543; CHECK-SSE1-NEXT: movw %si, (%rax) 544; CHECK-SSE1-NEXT: movw %r8w, 6(%rax) 545; CHECK-SSE1-NEXT: movw %dx, 2(%rax) 546; CHECK-SSE1-NEXT: retq 547; 548; CHECK-SSE2-LABEL: out_v4i16_undef: 549; CHECK-SSE2: # %bb.0: 550; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 551; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 552; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 553; CHECK-SSE2-NEXT: retq 554; 555; CHECK-XOP-LABEL: out_v4i16_undef: 556; CHECK-XOP: # %bb.0: 557; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 558; CHECK-XOP-NEXT: retq 559 %mx = and <4 x i16> %x, %mask 560 %notmask = xor <4 x i16> %mask, <i16 -1, i16 -1, i16 undef, i16 -1> 561 %my = and <4 x i16> %y, %notmask 562 %r = or <4 x i16> %mx, %my 563 ret <4 x i16> %r 564} 565 566define <2 x i32> @out_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %mask) nounwind { 567; CHECK-BASELINE-LABEL: out_v2i32: 568; CHECK-BASELINE: # %bb.0: 569; CHECK-BASELINE-NEXT: movl %edi, %eax 570; CHECK-BASELINE-NEXT: xorl %edx, %eax 571; CHECK-BASELINE-NEXT: andl %r8d, %eax 572; CHECK-BASELINE-NEXT: xorl %edx, %eax 573; CHECK-BASELINE-NEXT: xorl %ecx, %esi 574; CHECK-BASELINE-NEXT: andl %r9d, %esi 575; CHECK-BASELINE-NEXT: xorl %ecx, %esi 576; CHECK-BASELINE-NEXT: movl %esi, %edx 577; CHECK-BASELINE-NEXT: retq 578; 579; CHECK-SSE1-LABEL: out_v2i32: 580; CHECK-SSE1: # %bb.0: 581; CHECK-SSE1-NEXT: movl %edi, %eax 582; CHECK-SSE1-NEXT: xorl %edx, %eax 583; CHECK-SSE1-NEXT: andl %r8d, %eax 584; CHECK-SSE1-NEXT: xorl %edx, %eax 585; CHECK-SSE1-NEXT: xorl %ecx, %esi 586; CHECK-SSE1-NEXT: andl %r9d, %esi 587; CHECK-SSE1-NEXT: xorl %ecx, %esi 588; CHECK-SSE1-NEXT: movl %esi, %edx 589; CHECK-SSE1-NEXT: retq 590; 591; CHECK-SSE2-LABEL: out_v2i32: 592; CHECK-SSE2: # %bb.0: 593; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 594; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 595; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 596; CHECK-SSE2-NEXT: retq 597; 598; CHECK-XOP-LABEL: out_v2i32: 599; CHECK-XOP: # %bb.0: 600; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 601; CHECK-XOP-NEXT: retq 602 %mx = and <2 x i32> %x, %mask 603 %notmask = xor <2 x i32> %mask, <i32 -1, i32 -1> 604 %my = and <2 x i32> %y, %notmask 605 %r = or <2 x i32> %mx, %my 606 ret <2 x i32> %r 607} 608 609define <1 x i64> @out_v1i64(<1 x i64> %x, <1 x i64> %y, <1 x i64> %mask) nounwind { 610; CHECK-LABEL: out_v1i64: 611; CHECK: # %bb.0: 612; CHECK-NEXT: movq %rdi, %rax 613; CHECK-NEXT: xorq %rsi, %rax 614; CHECK-NEXT: andq %rdx, %rax 615; CHECK-NEXT: xorq %rsi, %rax 616; CHECK-NEXT: retq 617 %mx = and <1 x i64> %x, %mask 618 %notmask = xor <1 x i64> %mask, <i64 -1> 619 %my = and <1 x i64> %y, %notmask 620 %r = or <1 x i64> %mx, %my 621 ret <1 x i64> %r 622} 623 624; ============================================================================ ; 625; 128-bit vector width 626; ============================================================================ ; 627 628define <16 x i8> @out_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) nounwind { 629; CHECK-BASELINE-LABEL: out_v16i8: 630; CHECK-BASELINE: # %bb.0: 631; CHECK-BASELINE-NEXT: pushq %rbp 632; CHECK-BASELINE-NEXT: pushq %r15 633; CHECK-BASELINE-NEXT: pushq %r14 634; CHECK-BASELINE-NEXT: pushq %r13 635; CHECK-BASELINE-NEXT: pushq %r12 636; CHECK-BASELINE-NEXT: pushq %rbx 637; CHECK-BASELINE-NEXT: movl %edx, %r11d 638; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r13d 639; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d 640; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp 641; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d 642; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d 643; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx 644; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 645; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %edx 646; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d 647; CHECK-BASELINE-NEXT: xorb %r10b, %sil 648; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %sil 649; CHECK-BASELINE-NEXT: xorb %r10b, %sil 650; CHECK-BASELINE-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 651; CHECK-BASELINE-NEXT: xorb %dl, %r11b 652; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r11b 653; CHECK-BASELINE-NEXT: xorb %dl, %r11b 654; CHECK-BASELINE-NEXT: xorb %al, %cl 655; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl 656; CHECK-BASELINE-NEXT: xorb %al, %cl 657; CHECK-BASELINE-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 658; CHECK-BASELINE-NEXT: xorb %bl, %r8b 659; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r8b 660; CHECK-BASELINE-NEXT: xorb %bl, %r8b 661; CHECK-BASELINE-NEXT: movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 662; CHECK-BASELINE-NEXT: xorb %r14b, %r9b 663; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r9b 664; CHECK-BASELINE-NEXT: xorb %r14b, %r9b 665; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d 666; CHECK-BASELINE-NEXT: xorb %r12b, %r14b 667; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r14b 668; CHECK-BASELINE-NEXT: xorb %r12b, %r14b 669; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d 670; CHECK-BASELINE-NEXT: xorb %bpl, %r12b 671; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r12b 672; CHECK-BASELINE-NEXT: xorb %bpl, %r12b 673; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %esi 674; CHECK-BASELINE-NEXT: xorb %r15b, %sil 675; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %sil 676; CHECK-BASELINE-NEXT: xorb %r15b, %sil 677; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %edx 678; CHECK-BASELINE-NEXT: xorb %r13b, %dl 679; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %dl 680; CHECK-BASELINE-NEXT: xorb %r13b, %dl 681; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx 682; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 683; CHECK-BASELINE-NEXT: xorb %al, %cl 684; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl 685; CHECK-BASELINE-NEXT: xorb %al, %cl 686; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r13d 687; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 688; CHECK-BASELINE-NEXT: xorb %al, %r13b 689; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r13b 690; CHECK-BASELINE-NEXT: xorb %al, %r13b 691; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d 692; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 693; CHECK-BASELINE-NEXT: xorb %al, %r15b 694; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r15b 695; CHECK-BASELINE-NEXT: xorb %al, %r15b 696; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp 697; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 698; CHECK-BASELINE-NEXT: xorb %al, %bpl 699; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %bpl 700; CHECK-BASELINE-NEXT: xorb %al, %bpl 701; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx 702; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 703; CHECK-BASELINE-NEXT: xorb %al, %bl 704; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %bl 705; CHECK-BASELINE-NEXT: xorb %al, %bl 706; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 707; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r8d 708; CHECK-BASELINE-NEXT: xorb %r8b, %al 709; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %al 710; CHECK-BASELINE-NEXT: xorb %r8b, %al 711; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d 712; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r8d 713; CHECK-BASELINE-NEXT: xorb %r8b, %r10b 714; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r10b 715; CHECK-BASELINE-NEXT: xorb %r8b, %r10b 716; CHECK-BASELINE-NEXT: movb %r10b, 15(%rdi) 717; CHECK-BASELINE-NEXT: movb %al, 14(%rdi) 718; CHECK-BASELINE-NEXT: movb %bl, 13(%rdi) 719; CHECK-BASELINE-NEXT: movb %bpl, 12(%rdi) 720; CHECK-BASELINE-NEXT: movb %r15b, 11(%rdi) 721; CHECK-BASELINE-NEXT: movb %r13b, 10(%rdi) 722; CHECK-BASELINE-NEXT: movb %cl, 9(%rdi) 723; CHECK-BASELINE-NEXT: movb %dl, 8(%rdi) 724; CHECK-BASELINE-NEXT: movb %sil, 7(%rdi) 725; CHECK-BASELINE-NEXT: movb %r12b, 6(%rdi) 726; CHECK-BASELINE-NEXT: movb %r14b, 5(%rdi) 727; CHECK-BASELINE-NEXT: movb %r9b, 4(%rdi) 728; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 729; CHECK-BASELINE-NEXT: movb %al, 3(%rdi) 730; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 731; CHECK-BASELINE-NEXT: movb %al, 2(%rdi) 732; CHECK-BASELINE-NEXT: movb %r11b, 1(%rdi) 733; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 734; CHECK-BASELINE-NEXT: movb %al, (%rdi) 735; CHECK-BASELINE-NEXT: movq %rdi, %rax 736; CHECK-BASELINE-NEXT: popq %rbx 737; CHECK-BASELINE-NEXT: popq %r12 738; CHECK-BASELINE-NEXT: popq %r13 739; CHECK-BASELINE-NEXT: popq %r14 740; CHECK-BASELINE-NEXT: popq %r15 741; CHECK-BASELINE-NEXT: popq %rbp 742; CHECK-BASELINE-NEXT: retq 743; 744; CHECK-SSE1-LABEL: out_v16i8: 745; CHECK-SSE1: # %bb.0: 746; CHECK-SSE1-NEXT: pushq %rbp 747; CHECK-SSE1-NEXT: pushq %r15 748; CHECK-SSE1-NEXT: pushq %r14 749; CHECK-SSE1-NEXT: pushq %r13 750; CHECK-SSE1-NEXT: pushq %r12 751; CHECK-SSE1-NEXT: pushq %rbx 752; CHECK-SSE1-NEXT: movl %edx, %r11d 753; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r13d 754; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d 755; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp 756; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d 757; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d 758; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx 759; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 760; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %edx 761; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d 762; CHECK-SSE1-NEXT: xorb %r10b, %sil 763; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %sil 764; CHECK-SSE1-NEXT: xorb %r10b, %sil 765; CHECK-SSE1-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 766; CHECK-SSE1-NEXT: xorb %dl, %r11b 767; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r11b 768; CHECK-SSE1-NEXT: xorb %dl, %r11b 769; CHECK-SSE1-NEXT: xorb %al, %cl 770; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl 771; CHECK-SSE1-NEXT: xorb %al, %cl 772; CHECK-SSE1-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 773; CHECK-SSE1-NEXT: xorb %bl, %r8b 774; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r8b 775; CHECK-SSE1-NEXT: xorb %bl, %r8b 776; CHECK-SSE1-NEXT: movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 777; CHECK-SSE1-NEXT: xorb %r14b, %r9b 778; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r9b 779; CHECK-SSE1-NEXT: xorb %r14b, %r9b 780; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d 781; CHECK-SSE1-NEXT: xorb %r12b, %r14b 782; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r14b 783; CHECK-SSE1-NEXT: xorb %r12b, %r14b 784; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d 785; CHECK-SSE1-NEXT: xorb %bpl, %r12b 786; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r12b 787; CHECK-SSE1-NEXT: xorb %bpl, %r12b 788; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %esi 789; CHECK-SSE1-NEXT: xorb %r15b, %sil 790; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %sil 791; CHECK-SSE1-NEXT: xorb %r15b, %sil 792; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %edx 793; CHECK-SSE1-NEXT: xorb %r13b, %dl 794; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %dl 795; CHECK-SSE1-NEXT: xorb %r13b, %dl 796; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx 797; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 798; CHECK-SSE1-NEXT: xorb %al, %cl 799; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl 800; CHECK-SSE1-NEXT: xorb %al, %cl 801; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r13d 802; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 803; CHECK-SSE1-NEXT: xorb %al, %r13b 804; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r13b 805; CHECK-SSE1-NEXT: xorb %al, %r13b 806; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d 807; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 808; CHECK-SSE1-NEXT: xorb %al, %r15b 809; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r15b 810; CHECK-SSE1-NEXT: xorb %al, %r15b 811; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp 812; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 813; CHECK-SSE1-NEXT: xorb %al, %bpl 814; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %bpl 815; CHECK-SSE1-NEXT: xorb %al, %bpl 816; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx 817; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 818; CHECK-SSE1-NEXT: xorb %al, %bl 819; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %bl 820; CHECK-SSE1-NEXT: xorb %al, %bl 821; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 822; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r8d 823; CHECK-SSE1-NEXT: xorb %r8b, %al 824; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %al 825; CHECK-SSE1-NEXT: xorb %r8b, %al 826; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d 827; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r8d 828; CHECK-SSE1-NEXT: xorb %r8b, %r10b 829; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r10b 830; CHECK-SSE1-NEXT: xorb %r8b, %r10b 831; CHECK-SSE1-NEXT: movb %r10b, 15(%rdi) 832; CHECK-SSE1-NEXT: movb %al, 14(%rdi) 833; CHECK-SSE1-NEXT: movb %bl, 13(%rdi) 834; CHECK-SSE1-NEXT: movb %bpl, 12(%rdi) 835; CHECK-SSE1-NEXT: movb %r15b, 11(%rdi) 836; CHECK-SSE1-NEXT: movb %r13b, 10(%rdi) 837; CHECK-SSE1-NEXT: movb %cl, 9(%rdi) 838; CHECK-SSE1-NEXT: movb %dl, 8(%rdi) 839; CHECK-SSE1-NEXT: movb %sil, 7(%rdi) 840; CHECK-SSE1-NEXT: movb %r12b, 6(%rdi) 841; CHECK-SSE1-NEXT: movb %r14b, 5(%rdi) 842; CHECK-SSE1-NEXT: movb %r9b, 4(%rdi) 843; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 844; CHECK-SSE1-NEXT: movb %al, 3(%rdi) 845; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 846; CHECK-SSE1-NEXT: movb %al, 2(%rdi) 847; CHECK-SSE1-NEXT: movb %r11b, 1(%rdi) 848; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 849; CHECK-SSE1-NEXT: movb %al, (%rdi) 850; CHECK-SSE1-NEXT: movq %rdi, %rax 851; CHECK-SSE1-NEXT: popq %rbx 852; CHECK-SSE1-NEXT: popq %r12 853; CHECK-SSE1-NEXT: popq %r13 854; CHECK-SSE1-NEXT: popq %r14 855; CHECK-SSE1-NEXT: popq %r15 856; CHECK-SSE1-NEXT: popq %rbp 857; CHECK-SSE1-NEXT: retq 858; 859; CHECK-SSE2-LABEL: out_v16i8: 860; CHECK-SSE2: # %bb.0: 861; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 862; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 863; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 864; CHECK-SSE2-NEXT: retq 865; 866; CHECK-XOP-LABEL: out_v16i8: 867; CHECK-XOP: # %bb.0: 868; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 869; CHECK-XOP-NEXT: retq 870 %mx = and <16 x i8> %x, %mask 871 %notmask = xor <16 x i8> %mask, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 872 %my = and <16 x i8> %y, %notmask 873 %r = or <16 x i8> %mx, %my 874 ret <16 x i8> %r 875} 876 877define <8 x i16> @out_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %mask) nounwind { 878; CHECK-BASELINE-LABEL: out_v8i16: 879; CHECK-BASELINE: # %bb.0: 880; CHECK-BASELINE-NEXT: pushq %rbp 881; CHECK-BASELINE-NEXT: pushq %r15 882; CHECK-BASELINE-NEXT: pushq %r14 883; CHECK-BASELINE-NEXT: pushq %r12 884; CHECK-BASELINE-NEXT: pushq %rbx 885; CHECK-BASELINE-NEXT: movq %rdi, %rax 886; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %edi 887; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d 888; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %r11d 889; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %ebx 890; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %ebp 891; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %r14d 892; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %r15d 893; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %r12d 894; CHECK-BASELINE-NEXT: xorl %r12d, %esi 895; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %si 896; CHECK-BASELINE-NEXT: xorl %r12d, %esi 897; CHECK-BASELINE-NEXT: xorl %r15d, %edx 898; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %dx 899; CHECK-BASELINE-NEXT: xorl %r15d, %edx 900; CHECK-BASELINE-NEXT: xorl %r14d, %ecx 901; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %cx 902; CHECK-BASELINE-NEXT: xorl %r14d, %ecx 903; CHECK-BASELINE-NEXT: xorl %ebp, %r8d 904; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %r8w 905; CHECK-BASELINE-NEXT: xorl %ebp, %r8d 906; CHECK-BASELINE-NEXT: xorl %ebx, %r9d 907; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %r9w 908; CHECK-BASELINE-NEXT: xorl %ebx, %r9d 909; CHECK-BASELINE-NEXT: movl %r11d, %ebx 910; CHECK-BASELINE-NEXT: xorw {{[0-9]+}}(%rsp), %bx 911; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %bx 912; CHECK-BASELINE-NEXT: xorl %r11d, %ebx 913; CHECK-BASELINE-NEXT: movl %r10d, %r11d 914; CHECK-BASELINE-NEXT: xorw {{[0-9]+}}(%rsp), %r11w 915; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %r11w 916; CHECK-BASELINE-NEXT: xorl %r10d, %r11d 917; CHECK-BASELINE-NEXT: movl %edi, %r10d 918; CHECK-BASELINE-NEXT: xorw {{[0-9]+}}(%rsp), %r10w 919; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %r10w 920; CHECK-BASELINE-NEXT: xorl %edi, %r10d 921; CHECK-BASELINE-NEXT: movw %r10w, 14(%rax) 922; CHECK-BASELINE-NEXT: movw %r11w, 12(%rax) 923; CHECK-BASELINE-NEXT: movw %bx, 10(%rax) 924; CHECK-BASELINE-NEXT: movw %r9w, 8(%rax) 925; CHECK-BASELINE-NEXT: movw %r8w, 6(%rax) 926; CHECK-BASELINE-NEXT: movw %cx, 4(%rax) 927; CHECK-BASELINE-NEXT: movw %dx, 2(%rax) 928; CHECK-BASELINE-NEXT: movw %si, (%rax) 929; CHECK-BASELINE-NEXT: popq %rbx 930; CHECK-BASELINE-NEXT: popq %r12 931; CHECK-BASELINE-NEXT: popq %r14 932; CHECK-BASELINE-NEXT: popq %r15 933; CHECK-BASELINE-NEXT: popq %rbp 934; CHECK-BASELINE-NEXT: retq 935; 936; CHECK-SSE1-LABEL: out_v8i16: 937; CHECK-SSE1: # %bb.0: 938; CHECK-SSE1-NEXT: pushq %rbp 939; CHECK-SSE1-NEXT: pushq %r15 940; CHECK-SSE1-NEXT: pushq %r14 941; CHECK-SSE1-NEXT: pushq %r12 942; CHECK-SSE1-NEXT: pushq %rbx 943; CHECK-SSE1-NEXT: movq %rdi, %rax 944; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %edi 945; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d 946; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %r11d 947; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %ebx 948; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %ebp 949; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %r14d 950; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %r15d 951; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %r12d 952; CHECK-SSE1-NEXT: xorl %r12d, %esi 953; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %si 954; CHECK-SSE1-NEXT: xorl %r12d, %esi 955; CHECK-SSE1-NEXT: xorl %r15d, %edx 956; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %dx 957; CHECK-SSE1-NEXT: xorl %r15d, %edx 958; CHECK-SSE1-NEXT: xorl %r14d, %ecx 959; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %cx 960; CHECK-SSE1-NEXT: xorl %r14d, %ecx 961; CHECK-SSE1-NEXT: xorl %ebp, %r8d 962; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %r8w 963; CHECK-SSE1-NEXT: xorl %ebp, %r8d 964; CHECK-SSE1-NEXT: xorl %ebx, %r9d 965; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %r9w 966; CHECK-SSE1-NEXT: xorl %ebx, %r9d 967; CHECK-SSE1-NEXT: movl %r11d, %ebx 968; CHECK-SSE1-NEXT: xorw {{[0-9]+}}(%rsp), %bx 969; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %bx 970; CHECK-SSE1-NEXT: xorl %r11d, %ebx 971; CHECK-SSE1-NEXT: movl %r10d, %r11d 972; CHECK-SSE1-NEXT: xorw {{[0-9]+}}(%rsp), %r11w 973; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %r11w 974; CHECK-SSE1-NEXT: xorl %r10d, %r11d 975; CHECK-SSE1-NEXT: movl %edi, %r10d 976; CHECK-SSE1-NEXT: xorw {{[0-9]+}}(%rsp), %r10w 977; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %r10w 978; CHECK-SSE1-NEXT: xorl %edi, %r10d 979; CHECK-SSE1-NEXT: movw %r10w, 14(%rax) 980; CHECK-SSE1-NEXT: movw %r11w, 12(%rax) 981; CHECK-SSE1-NEXT: movw %bx, 10(%rax) 982; CHECK-SSE1-NEXT: movw %r9w, 8(%rax) 983; CHECK-SSE1-NEXT: movw %r8w, 6(%rax) 984; CHECK-SSE1-NEXT: movw %cx, 4(%rax) 985; CHECK-SSE1-NEXT: movw %dx, 2(%rax) 986; CHECK-SSE1-NEXT: movw %si, (%rax) 987; CHECK-SSE1-NEXT: popq %rbx 988; CHECK-SSE1-NEXT: popq %r12 989; CHECK-SSE1-NEXT: popq %r14 990; CHECK-SSE1-NEXT: popq %r15 991; CHECK-SSE1-NEXT: popq %rbp 992; CHECK-SSE1-NEXT: retq 993; 994; CHECK-SSE2-LABEL: out_v8i16: 995; CHECK-SSE2: # %bb.0: 996; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 997; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 998; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 999; CHECK-SSE2-NEXT: retq 1000; 1001; CHECK-XOP-LABEL: out_v8i16: 1002; CHECK-XOP: # %bb.0: 1003; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 1004; CHECK-XOP-NEXT: retq 1005 %mx = and <8 x i16> %x, %mask 1006 %notmask = xor <8 x i16> %mask, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 1007 %my = and <8 x i16> %y, %notmask 1008 %r = or <8 x i16> %mx, %my 1009 ret <8 x i16> %r 1010} 1011 1012define <4 x i32> @out_v4i32(ptr%px, ptr%py, ptr%pmask) nounwind { 1013; CHECK-BASELINE-LABEL: out_v4i32: 1014; CHECK-BASELINE: # %bb.0: 1015; CHECK-BASELINE-NEXT: movq %rdi, %rax 1016; CHECK-BASELINE-NEXT: movl 12(%rdx), %edi 1017; CHECK-BASELINE-NEXT: movl 8(%rdx), %r8d 1018; CHECK-BASELINE-NEXT: movl (%rdx), %r9d 1019; CHECK-BASELINE-NEXT: movl 4(%rdx), %r10d 1020; CHECK-BASELINE-NEXT: movl (%rsi), %edx 1021; CHECK-BASELINE-NEXT: xorl %r9d, %edx 1022; CHECK-BASELINE-NEXT: andl (%rcx), %edx 1023; CHECK-BASELINE-NEXT: xorl %r9d, %edx 1024; CHECK-BASELINE-NEXT: movl 4(%rsi), %r9d 1025; CHECK-BASELINE-NEXT: xorl %r10d, %r9d 1026; CHECK-BASELINE-NEXT: andl 4(%rcx), %r9d 1027; CHECK-BASELINE-NEXT: xorl %r10d, %r9d 1028; CHECK-BASELINE-NEXT: movl 8(%rsi), %r10d 1029; CHECK-BASELINE-NEXT: xorl %r8d, %r10d 1030; CHECK-BASELINE-NEXT: andl 8(%rcx), %r10d 1031; CHECK-BASELINE-NEXT: xorl %r8d, %r10d 1032; CHECK-BASELINE-NEXT: movl 12(%rsi), %esi 1033; CHECK-BASELINE-NEXT: xorl %edi, %esi 1034; CHECK-BASELINE-NEXT: andl 12(%rcx), %esi 1035; CHECK-BASELINE-NEXT: xorl %edi, %esi 1036; CHECK-BASELINE-NEXT: movl %esi, 12(%rax) 1037; CHECK-BASELINE-NEXT: movl %r10d, 8(%rax) 1038; CHECK-BASELINE-NEXT: movl %r9d, 4(%rax) 1039; CHECK-BASELINE-NEXT: movl %edx, (%rax) 1040; CHECK-BASELINE-NEXT: retq 1041; 1042; CHECK-SSE1-LABEL: out_v4i32: 1043; CHECK-SSE1: # %bb.0: 1044; CHECK-SSE1-NEXT: movq %rdi, %rax 1045; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 1046; CHECK-SSE1-NEXT: movaps (%rsi), %xmm1 1047; CHECK-SSE1-NEXT: andps %xmm0, %xmm1 1048; CHECK-SSE1-NEXT: andnps (%rdx), %xmm0 1049; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 1050; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 1051; CHECK-SSE1-NEXT: retq 1052; 1053; CHECK-SSE2-LABEL: out_v4i32: 1054; CHECK-SSE2: # %bb.0: 1055; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 1056; CHECK-SSE2-NEXT: movaps (%rdi), %xmm1 1057; CHECK-SSE2-NEXT: andps %xmm0, %xmm1 1058; CHECK-SSE2-NEXT: andnps (%rsi), %xmm0 1059; CHECK-SSE2-NEXT: orps %xmm1, %xmm0 1060; CHECK-SSE2-NEXT: retq 1061; 1062; CHECK-XOP-LABEL: out_v4i32: 1063; CHECK-XOP: # %bb.0: 1064; CHECK-XOP-NEXT: vmovdqa (%rdi), %xmm0 1065; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm1 1066; CHECK-XOP-NEXT: vpcmov %xmm1, (%rsi), %xmm0, %xmm0 1067; CHECK-XOP-NEXT: retq 1068 %x = load <4 x i32>, ptr%px, align 16 1069 %y = load <4 x i32>, ptr%py, align 16 1070 %mask = load <4 x i32>, ptr%pmask, align 16 1071 %mx = and <4 x i32> %x, %mask 1072 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> 1073 %my = and <4 x i32> %y, %notmask 1074 %r = or <4 x i32> %mx, %my 1075 ret <4 x i32> %r 1076} 1077 1078define <4 x i32> @out_v4i32_undef(ptr%px, ptr%py, ptr%pmask) nounwind { 1079; CHECK-BASELINE-LABEL: out_v4i32_undef: 1080; CHECK-BASELINE: # %bb.0: 1081; CHECK-BASELINE-NEXT: movq %rdi, %rax 1082; CHECK-BASELINE-NEXT: movl 8(%rsi), %edi 1083; CHECK-BASELINE-NEXT: movl 12(%rdx), %r8d 1084; CHECK-BASELINE-NEXT: movl (%rdx), %r9d 1085; CHECK-BASELINE-NEXT: movl 4(%rdx), %edx 1086; CHECK-BASELINE-NEXT: andl 8(%rcx), %edi 1087; CHECK-BASELINE-NEXT: movl (%rsi), %r10d 1088; CHECK-BASELINE-NEXT: xorl %r9d, %r10d 1089; CHECK-BASELINE-NEXT: andl (%rcx), %r10d 1090; CHECK-BASELINE-NEXT: xorl %r9d, %r10d 1091; CHECK-BASELINE-NEXT: movl 4(%rsi), %r9d 1092; CHECK-BASELINE-NEXT: xorl %edx, %r9d 1093; CHECK-BASELINE-NEXT: andl 4(%rcx), %r9d 1094; CHECK-BASELINE-NEXT: xorl %edx, %r9d 1095; CHECK-BASELINE-NEXT: movl 12(%rsi), %edx 1096; CHECK-BASELINE-NEXT: xorl %r8d, %edx 1097; CHECK-BASELINE-NEXT: andl 12(%rcx), %edx 1098; CHECK-BASELINE-NEXT: xorl %r8d, %edx 1099; CHECK-BASELINE-NEXT: movl %edi, 8(%rax) 1100; CHECK-BASELINE-NEXT: movl %edx, 12(%rax) 1101; CHECK-BASELINE-NEXT: movl %r9d, 4(%rax) 1102; CHECK-BASELINE-NEXT: movl %r10d, (%rax) 1103; CHECK-BASELINE-NEXT: retq 1104; 1105; CHECK-SSE1-LABEL: out_v4i32_undef: 1106; CHECK-SSE1: # %bb.0: 1107; CHECK-SSE1-NEXT: movq %rdi, %rax 1108; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 1109; CHECK-SSE1-NEXT: movaps (%rsi), %xmm1 1110; CHECK-SSE1-NEXT: andps %xmm0, %xmm1 1111; CHECK-SSE1-NEXT: andnps (%rdx), %xmm0 1112; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 1113; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 1114; CHECK-SSE1-NEXT: retq 1115; 1116; CHECK-SSE2-LABEL: out_v4i32_undef: 1117; CHECK-SSE2: # %bb.0: 1118; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 1119; CHECK-SSE2-NEXT: movaps (%rdi), %xmm1 1120; CHECK-SSE2-NEXT: andps %xmm0, %xmm1 1121; CHECK-SSE2-NEXT: andnps (%rsi), %xmm0 1122; CHECK-SSE2-NEXT: orps %xmm1, %xmm0 1123; CHECK-SSE2-NEXT: retq 1124; 1125; CHECK-XOP-LABEL: out_v4i32_undef: 1126; CHECK-XOP: # %bb.0: 1127; CHECK-XOP-NEXT: vmovdqa (%rdi), %xmm0 1128; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm1 1129; CHECK-XOP-NEXT: vpcmov %xmm1, (%rsi), %xmm0, %xmm0 1130; CHECK-XOP-NEXT: retq 1131 %x = load <4 x i32>, ptr%px, align 16 1132 %y = load <4 x i32>, ptr%py, align 16 1133 %mask = load <4 x i32>, ptr%pmask, align 16 1134 %mx = and <4 x i32> %x, %mask 1135 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 undef, i32 -1> 1136 %my = and <4 x i32> %y, %notmask 1137 %r = or <4 x i32> %mx, %my 1138 ret <4 x i32> %r 1139} 1140 1141define <2 x i64> @out_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %mask) nounwind { 1142; CHECK-BASELINE-LABEL: out_v2i64: 1143; CHECK-BASELINE: # %bb.0: 1144; CHECK-BASELINE-NEXT: movq %rdi, %rax 1145; CHECK-BASELINE-NEXT: xorq %rdx, %rax 1146; CHECK-BASELINE-NEXT: andq %r8, %rax 1147; CHECK-BASELINE-NEXT: xorq %rdx, %rax 1148; CHECK-BASELINE-NEXT: xorq %rcx, %rsi 1149; CHECK-BASELINE-NEXT: andq %r9, %rsi 1150; CHECK-BASELINE-NEXT: xorq %rcx, %rsi 1151; CHECK-BASELINE-NEXT: movq %rsi, %rdx 1152; CHECK-BASELINE-NEXT: retq 1153; 1154; CHECK-SSE1-LABEL: out_v2i64: 1155; CHECK-SSE1: # %bb.0: 1156; CHECK-SSE1-NEXT: movq %rdi, %rax 1157; CHECK-SSE1-NEXT: xorq %rdx, %rax 1158; CHECK-SSE1-NEXT: andq %r8, %rax 1159; CHECK-SSE1-NEXT: xorq %rdx, %rax 1160; CHECK-SSE1-NEXT: xorq %rcx, %rsi 1161; CHECK-SSE1-NEXT: andq %r9, %rsi 1162; CHECK-SSE1-NEXT: xorq %rcx, %rsi 1163; CHECK-SSE1-NEXT: movq %rsi, %rdx 1164; CHECK-SSE1-NEXT: retq 1165; 1166; CHECK-SSE2-LABEL: out_v2i64: 1167; CHECK-SSE2: # %bb.0: 1168; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 1169; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 1170; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 1171; CHECK-SSE2-NEXT: retq 1172; 1173; CHECK-XOP-LABEL: out_v2i64: 1174; CHECK-XOP: # %bb.0: 1175; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 1176; CHECK-XOP-NEXT: retq 1177 %mx = and <2 x i64> %x, %mask 1178 %notmask = xor <2 x i64> %mask, <i64 -1, i64 -1> 1179 %my = and <2 x i64> %y, %notmask 1180 %r = or <2 x i64> %mx, %my 1181 ret <2 x i64> %r 1182} 1183 1184; ============================================================================ ; 1185; 256-bit vector width 1186; ============================================================================ ; 1187 1188define <32 x i8> @out_v32i8(ptr%px, ptr%py, ptr%pmask) nounwind { 1189; CHECK-BASELINE-LABEL: out_v32i8: 1190; CHECK-BASELINE: # %bb.0: 1191; CHECK-BASELINE-NEXT: pushq %rbp 1192; CHECK-BASELINE-NEXT: pushq %r15 1193; CHECK-BASELINE-NEXT: pushq %r14 1194; CHECK-BASELINE-NEXT: pushq %r13 1195; CHECK-BASELINE-NEXT: pushq %r12 1196; CHECK-BASELINE-NEXT: pushq %rbx 1197; CHECK-BASELINE-NEXT: movq %rcx, %r10 1198; CHECK-BASELINE-NEXT: movq %rdx, %r8 1199; CHECK-BASELINE-NEXT: movq %rsi, %r9 1200; CHECK-BASELINE-NEXT: movq %rdi, %r11 1201; CHECK-BASELINE-NEXT: movzbl 15(%rdx), %eax 1202; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1203; CHECK-BASELINE-NEXT: movzbl 14(%rdx), %eax 1204; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1205; CHECK-BASELINE-NEXT: movzbl 13(%rdx), %eax 1206; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1207; CHECK-BASELINE-NEXT: movzbl 12(%rdx), %eax 1208; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1209; CHECK-BASELINE-NEXT: movzbl 11(%rdx), %eax 1210; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1211; CHECK-BASELINE-NEXT: movzbl 10(%rdx), %eax 1212; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1213; CHECK-BASELINE-NEXT: movzbl 9(%rdx), %ebp 1214; CHECK-BASELINE-NEXT: movzbl 8(%rdx), %r14d 1215; CHECK-BASELINE-NEXT: movzbl 7(%rdx), %r15d 1216; CHECK-BASELINE-NEXT: movzbl 6(%rdx), %r12d 1217; CHECK-BASELINE-NEXT: movzbl 5(%rdx), %r13d 1218; CHECK-BASELINE-NEXT: movzbl 4(%rdx), %esi 1219; CHECK-BASELINE-NEXT: movzbl 3(%rdx), %edx 1220; CHECK-BASELINE-NEXT: movzbl 2(%r8), %edi 1221; CHECK-BASELINE-NEXT: movzbl (%r8), %eax 1222; CHECK-BASELINE-NEXT: movzbl 1(%r8), %ecx 1223; CHECK-BASELINE-NEXT: movzbl (%r9), %ebx 1224; CHECK-BASELINE-NEXT: xorb %al, %bl 1225; CHECK-BASELINE-NEXT: andb (%r10), %bl 1226; CHECK-BASELINE-NEXT: xorb %al, %bl 1227; CHECK-BASELINE-NEXT: movb %bl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1228; CHECK-BASELINE-NEXT: movzbl 1(%r9), %eax 1229; CHECK-BASELINE-NEXT: xorb %cl, %al 1230; CHECK-BASELINE-NEXT: andb 1(%r10), %al 1231; CHECK-BASELINE-NEXT: xorb %cl, %al 1232; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1233; CHECK-BASELINE-NEXT: movzbl 2(%r9), %eax 1234; CHECK-BASELINE-NEXT: xorb %dil, %al 1235; CHECK-BASELINE-NEXT: andb 2(%r10), %al 1236; CHECK-BASELINE-NEXT: xorb %dil, %al 1237; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1238; CHECK-BASELINE-NEXT: movzbl 3(%r9), %eax 1239; CHECK-BASELINE-NEXT: xorb %dl, %al 1240; CHECK-BASELINE-NEXT: andb 3(%r10), %al 1241; CHECK-BASELINE-NEXT: xorb %dl, %al 1242; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1243; CHECK-BASELINE-NEXT: movzbl 4(%r9), %eax 1244; CHECK-BASELINE-NEXT: xorb %sil, %al 1245; CHECK-BASELINE-NEXT: andb 4(%r10), %al 1246; CHECK-BASELINE-NEXT: xorb %sil, %al 1247; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1248; CHECK-BASELINE-NEXT: movzbl 5(%r9), %eax 1249; CHECK-BASELINE-NEXT: xorb %r13b, %al 1250; CHECK-BASELINE-NEXT: andb 5(%r10), %al 1251; CHECK-BASELINE-NEXT: xorb %r13b, %al 1252; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1253; CHECK-BASELINE-NEXT: movzbl 6(%r9), %eax 1254; CHECK-BASELINE-NEXT: xorb %r12b, %al 1255; CHECK-BASELINE-NEXT: andb 6(%r10), %al 1256; CHECK-BASELINE-NEXT: xorb %r12b, %al 1257; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1258; CHECK-BASELINE-NEXT: movzbl 7(%r9), %eax 1259; CHECK-BASELINE-NEXT: xorb %r15b, %al 1260; CHECK-BASELINE-NEXT: andb 7(%r10), %al 1261; CHECK-BASELINE-NEXT: xorb %r15b, %al 1262; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1263; CHECK-BASELINE-NEXT: movzbl 8(%r9), %eax 1264; CHECK-BASELINE-NEXT: xorb %r14b, %al 1265; CHECK-BASELINE-NEXT: andb 8(%r10), %al 1266; CHECK-BASELINE-NEXT: xorb %r14b, %al 1267; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1268; CHECK-BASELINE-NEXT: movzbl 9(%r9), %eax 1269; CHECK-BASELINE-NEXT: xorb %bpl, %al 1270; CHECK-BASELINE-NEXT: andb 9(%r10), %al 1271; CHECK-BASELINE-NEXT: xorb %bpl, %al 1272; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1273; CHECK-BASELINE-NEXT: movzbl 10(%r9), %eax 1274; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload 1275; CHECK-BASELINE-NEXT: xorb %cl, %al 1276; CHECK-BASELINE-NEXT: andb 10(%r10), %al 1277; CHECK-BASELINE-NEXT: xorb %cl, %al 1278; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1279; CHECK-BASELINE-NEXT: movzbl 11(%r9), %eax 1280; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload 1281; CHECK-BASELINE-NEXT: xorb %cl, %al 1282; CHECK-BASELINE-NEXT: andb 11(%r10), %al 1283; CHECK-BASELINE-NEXT: xorb %cl, %al 1284; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1285; CHECK-BASELINE-NEXT: movzbl 12(%r9), %eax 1286; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload 1287; CHECK-BASELINE-NEXT: xorb %cl, %al 1288; CHECK-BASELINE-NEXT: andb 12(%r10), %al 1289; CHECK-BASELINE-NEXT: xorb %cl, %al 1290; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1291; CHECK-BASELINE-NEXT: movzbl 13(%r9), %eax 1292; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload 1293; CHECK-BASELINE-NEXT: xorb %cl, %al 1294; CHECK-BASELINE-NEXT: andb 13(%r10), %al 1295; CHECK-BASELINE-NEXT: xorb %cl, %al 1296; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1297; CHECK-BASELINE-NEXT: movzbl 14(%r9), %eax 1298; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload 1299; CHECK-BASELINE-NEXT: xorb %cl, %al 1300; CHECK-BASELINE-NEXT: andb 14(%r10), %al 1301; CHECK-BASELINE-NEXT: xorb %cl, %al 1302; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1303; CHECK-BASELINE-NEXT: movzbl 15(%r9), %eax 1304; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload 1305; CHECK-BASELINE-NEXT: xorb %cl, %al 1306; CHECK-BASELINE-NEXT: andb 15(%r10), %al 1307; CHECK-BASELINE-NEXT: xorb %cl, %al 1308; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1309; CHECK-BASELINE-NEXT: movzbl 16(%r8), %eax 1310; CHECK-BASELINE-NEXT: movzbl 16(%r9), %ecx 1311; CHECK-BASELINE-NEXT: xorb %al, %cl 1312; CHECK-BASELINE-NEXT: andb 16(%r10), %cl 1313; CHECK-BASELINE-NEXT: xorb %al, %cl 1314; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1315; CHECK-BASELINE-NEXT: movzbl 17(%r8), %eax 1316; CHECK-BASELINE-NEXT: movzbl 17(%r9), %ecx 1317; CHECK-BASELINE-NEXT: xorb %al, %cl 1318; CHECK-BASELINE-NEXT: andb 17(%r10), %cl 1319; CHECK-BASELINE-NEXT: xorb %al, %cl 1320; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1321; CHECK-BASELINE-NEXT: movzbl 18(%r8), %eax 1322; CHECK-BASELINE-NEXT: movzbl 18(%r9), %ecx 1323; CHECK-BASELINE-NEXT: xorb %al, %cl 1324; CHECK-BASELINE-NEXT: andb 18(%r10), %cl 1325; CHECK-BASELINE-NEXT: xorb %al, %cl 1326; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1327; CHECK-BASELINE-NEXT: movzbl 19(%r8), %eax 1328; CHECK-BASELINE-NEXT: movzbl 19(%r9), %ecx 1329; CHECK-BASELINE-NEXT: xorb %al, %cl 1330; CHECK-BASELINE-NEXT: andb 19(%r10), %cl 1331; CHECK-BASELINE-NEXT: xorb %al, %cl 1332; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1333; CHECK-BASELINE-NEXT: movzbl 20(%r8), %eax 1334; CHECK-BASELINE-NEXT: movzbl 20(%r9), %ecx 1335; CHECK-BASELINE-NEXT: xorb %al, %cl 1336; CHECK-BASELINE-NEXT: andb 20(%r10), %cl 1337; CHECK-BASELINE-NEXT: xorb %al, %cl 1338; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1339; CHECK-BASELINE-NEXT: movzbl 21(%r8), %eax 1340; CHECK-BASELINE-NEXT: movzbl 21(%r9), %r13d 1341; CHECK-BASELINE-NEXT: xorb %al, %r13b 1342; CHECK-BASELINE-NEXT: andb 21(%r10), %r13b 1343; CHECK-BASELINE-NEXT: xorb %al, %r13b 1344; CHECK-BASELINE-NEXT: movzbl 22(%r8), %eax 1345; CHECK-BASELINE-NEXT: movzbl 22(%r9), %r12d 1346; CHECK-BASELINE-NEXT: xorb %al, %r12b 1347; CHECK-BASELINE-NEXT: andb 22(%r10), %r12b 1348; CHECK-BASELINE-NEXT: xorb %al, %r12b 1349; CHECK-BASELINE-NEXT: movzbl 23(%r8), %eax 1350; CHECK-BASELINE-NEXT: movzbl 23(%r9), %r15d 1351; CHECK-BASELINE-NEXT: xorb %al, %r15b 1352; CHECK-BASELINE-NEXT: andb 23(%r10), %r15b 1353; CHECK-BASELINE-NEXT: xorb %al, %r15b 1354; CHECK-BASELINE-NEXT: movzbl 24(%r8), %eax 1355; CHECK-BASELINE-NEXT: movzbl 24(%r9), %r14d 1356; CHECK-BASELINE-NEXT: xorb %al, %r14b 1357; CHECK-BASELINE-NEXT: andb 24(%r10), %r14b 1358; CHECK-BASELINE-NEXT: xorb %al, %r14b 1359; CHECK-BASELINE-NEXT: movzbl 25(%r8), %eax 1360; CHECK-BASELINE-NEXT: movzbl 25(%r9), %ebp 1361; CHECK-BASELINE-NEXT: xorb %al, %bpl 1362; CHECK-BASELINE-NEXT: andb 25(%r10), %bpl 1363; CHECK-BASELINE-NEXT: xorb %al, %bpl 1364; CHECK-BASELINE-NEXT: movzbl 26(%r8), %eax 1365; CHECK-BASELINE-NEXT: movzbl 26(%r9), %edi 1366; CHECK-BASELINE-NEXT: xorb %al, %dil 1367; CHECK-BASELINE-NEXT: andb 26(%r10), %dil 1368; CHECK-BASELINE-NEXT: xorb %al, %dil 1369; CHECK-BASELINE-NEXT: movzbl 27(%r8), %eax 1370; CHECK-BASELINE-NEXT: movzbl 27(%r9), %esi 1371; CHECK-BASELINE-NEXT: xorb %al, %sil 1372; CHECK-BASELINE-NEXT: andb 27(%r10), %sil 1373; CHECK-BASELINE-NEXT: xorb %al, %sil 1374; CHECK-BASELINE-NEXT: movzbl 28(%r8), %eax 1375; CHECK-BASELINE-NEXT: movzbl 28(%r9), %edx 1376; CHECK-BASELINE-NEXT: xorb %al, %dl 1377; CHECK-BASELINE-NEXT: andb 28(%r10), %dl 1378; CHECK-BASELINE-NEXT: xorb %al, %dl 1379; CHECK-BASELINE-NEXT: movzbl 29(%r8), %eax 1380; CHECK-BASELINE-NEXT: movzbl 29(%r9), %ecx 1381; CHECK-BASELINE-NEXT: xorb %al, %cl 1382; CHECK-BASELINE-NEXT: andb 29(%r10), %cl 1383; CHECK-BASELINE-NEXT: xorb %al, %cl 1384; CHECK-BASELINE-NEXT: movzbl 30(%r8), %ebx 1385; CHECK-BASELINE-NEXT: movzbl 30(%r9), %eax 1386; CHECK-BASELINE-NEXT: xorb %bl, %al 1387; CHECK-BASELINE-NEXT: andb 30(%r10), %al 1388; CHECK-BASELINE-NEXT: xorb %bl, %al 1389; CHECK-BASELINE-NEXT: movzbl 31(%r8), %r8d 1390; CHECK-BASELINE-NEXT: movzbl 31(%r9), %r9d 1391; CHECK-BASELINE-NEXT: xorb %r8b, %r9b 1392; CHECK-BASELINE-NEXT: andb 31(%r10), %r9b 1393; CHECK-BASELINE-NEXT: xorb %r8b, %r9b 1394; CHECK-BASELINE-NEXT: movb %r9b, 31(%r11) 1395; CHECK-BASELINE-NEXT: movb %al, 30(%r11) 1396; CHECK-BASELINE-NEXT: movb %cl, 29(%r11) 1397; CHECK-BASELINE-NEXT: movb %dl, 28(%r11) 1398; CHECK-BASELINE-NEXT: movb %sil, 27(%r11) 1399; CHECK-BASELINE-NEXT: movb %dil, 26(%r11) 1400; CHECK-BASELINE-NEXT: movb %bpl, 25(%r11) 1401; CHECK-BASELINE-NEXT: movb %r14b, 24(%r11) 1402; CHECK-BASELINE-NEXT: movb %r15b, 23(%r11) 1403; CHECK-BASELINE-NEXT: movb %r12b, 22(%r11) 1404; CHECK-BASELINE-NEXT: movb %r13b, 21(%r11) 1405; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 1406; CHECK-BASELINE-NEXT: movb %al, 20(%r11) 1407; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 1408; CHECK-BASELINE-NEXT: movb %al, 19(%r11) 1409; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 1410; CHECK-BASELINE-NEXT: movb %al, 18(%r11) 1411; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 1412; CHECK-BASELINE-NEXT: movb %al, 17(%r11) 1413; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 1414; CHECK-BASELINE-NEXT: movb %al, 16(%r11) 1415; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 1416; CHECK-BASELINE-NEXT: movb %al, 15(%r11) 1417; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 1418; CHECK-BASELINE-NEXT: movb %al, 14(%r11) 1419; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 1420; CHECK-BASELINE-NEXT: movb %al, 13(%r11) 1421; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 1422; CHECK-BASELINE-NEXT: movb %al, 12(%r11) 1423; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 1424; CHECK-BASELINE-NEXT: movb %al, 11(%r11) 1425; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 1426; CHECK-BASELINE-NEXT: movb %al, 10(%r11) 1427; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 1428; CHECK-BASELINE-NEXT: movb %al, 9(%r11) 1429; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 1430; CHECK-BASELINE-NEXT: movb %al, 8(%r11) 1431; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 1432; CHECK-BASELINE-NEXT: movb %al, 7(%r11) 1433; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 1434; CHECK-BASELINE-NEXT: movb %al, 6(%r11) 1435; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 1436; CHECK-BASELINE-NEXT: movb %al, 5(%r11) 1437; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 1438; CHECK-BASELINE-NEXT: movb %al, 4(%r11) 1439; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 1440; CHECK-BASELINE-NEXT: movb %al, 3(%r11) 1441; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 1442; CHECK-BASELINE-NEXT: movb %al, 2(%r11) 1443; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 1444; CHECK-BASELINE-NEXT: movb %al, 1(%r11) 1445; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 1446; CHECK-BASELINE-NEXT: movb %al, (%r11) 1447; CHECK-BASELINE-NEXT: movq %r11, %rax 1448; CHECK-BASELINE-NEXT: popq %rbx 1449; CHECK-BASELINE-NEXT: popq %r12 1450; CHECK-BASELINE-NEXT: popq %r13 1451; CHECK-BASELINE-NEXT: popq %r14 1452; CHECK-BASELINE-NEXT: popq %r15 1453; CHECK-BASELINE-NEXT: popq %rbp 1454; CHECK-BASELINE-NEXT: retq 1455; 1456; CHECK-SSE1-LABEL: out_v32i8: 1457; CHECK-SSE1: # %bb.0: 1458; CHECK-SSE1-NEXT: pushq %rbp 1459; CHECK-SSE1-NEXT: pushq %r15 1460; CHECK-SSE1-NEXT: pushq %r14 1461; CHECK-SSE1-NEXT: pushq %r13 1462; CHECK-SSE1-NEXT: pushq %r12 1463; CHECK-SSE1-NEXT: pushq %rbx 1464; CHECK-SSE1-NEXT: movq %rcx, %r10 1465; CHECK-SSE1-NEXT: movq %rdx, %r8 1466; CHECK-SSE1-NEXT: movq %rsi, %r9 1467; CHECK-SSE1-NEXT: movq %rdi, %r11 1468; CHECK-SSE1-NEXT: movzbl 15(%rdx), %eax 1469; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1470; CHECK-SSE1-NEXT: movzbl 14(%rdx), %eax 1471; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1472; CHECK-SSE1-NEXT: movzbl 13(%rdx), %eax 1473; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1474; CHECK-SSE1-NEXT: movzbl 12(%rdx), %eax 1475; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1476; CHECK-SSE1-NEXT: movzbl 11(%rdx), %eax 1477; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1478; CHECK-SSE1-NEXT: movzbl 10(%rdx), %eax 1479; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1480; CHECK-SSE1-NEXT: movzbl 9(%rdx), %ebp 1481; CHECK-SSE1-NEXT: movzbl 8(%rdx), %r14d 1482; CHECK-SSE1-NEXT: movzbl 7(%rdx), %r15d 1483; CHECK-SSE1-NEXT: movzbl 6(%rdx), %r12d 1484; CHECK-SSE1-NEXT: movzbl 5(%rdx), %r13d 1485; CHECK-SSE1-NEXT: movzbl 4(%rdx), %esi 1486; CHECK-SSE1-NEXT: movzbl 3(%rdx), %edx 1487; CHECK-SSE1-NEXT: movzbl 2(%r8), %edi 1488; CHECK-SSE1-NEXT: movzbl (%r8), %eax 1489; CHECK-SSE1-NEXT: movzbl 1(%r8), %ecx 1490; CHECK-SSE1-NEXT: movzbl (%r9), %ebx 1491; CHECK-SSE1-NEXT: xorb %al, %bl 1492; CHECK-SSE1-NEXT: andb (%r10), %bl 1493; CHECK-SSE1-NEXT: xorb %al, %bl 1494; CHECK-SSE1-NEXT: movb %bl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1495; CHECK-SSE1-NEXT: movzbl 1(%r9), %eax 1496; CHECK-SSE1-NEXT: xorb %cl, %al 1497; CHECK-SSE1-NEXT: andb 1(%r10), %al 1498; CHECK-SSE1-NEXT: xorb %cl, %al 1499; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1500; CHECK-SSE1-NEXT: movzbl 2(%r9), %eax 1501; CHECK-SSE1-NEXT: xorb %dil, %al 1502; CHECK-SSE1-NEXT: andb 2(%r10), %al 1503; CHECK-SSE1-NEXT: xorb %dil, %al 1504; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1505; CHECK-SSE1-NEXT: movzbl 3(%r9), %eax 1506; CHECK-SSE1-NEXT: xorb %dl, %al 1507; CHECK-SSE1-NEXT: andb 3(%r10), %al 1508; CHECK-SSE1-NEXT: xorb %dl, %al 1509; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1510; CHECK-SSE1-NEXT: movzbl 4(%r9), %eax 1511; CHECK-SSE1-NEXT: xorb %sil, %al 1512; CHECK-SSE1-NEXT: andb 4(%r10), %al 1513; CHECK-SSE1-NEXT: xorb %sil, %al 1514; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1515; CHECK-SSE1-NEXT: movzbl 5(%r9), %eax 1516; CHECK-SSE1-NEXT: xorb %r13b, %al 1517; CHECK-SSE1-NEXT: andb 5(%r10), %al 1518; CHECK-SSE1-NEXT: xorb %r13b, %al 1519; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1520; CHECK-SSE1-NEXT: movzbl 6(%r9), %eax 1521; CHECK-SSE1-NEXT: xorb %r12b, %al 1522; CHECK-SSE1-NEXT: andb 6(%r10), %al 1523; CHECK-SSE1-NEXT: xorb %r12b, %al 1524; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1525; CHECK-SSE1-NEXT: movzbl 7(%r9), %eax 1526; CHECK-SSE1-NEXT: xorb %r15b, %al 1527; CHECK-SSE1-NEXT: andb 7(%r10), %al 1528; CHECK-SSE1-NEXT: xorb %r15b, %al 1529; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1530; CHECK-SSE1-NEXT: movzbl 8(%r9), %eax 1531; CHECK-SSE1-NEXT: xorb %r14b, %al 1532; CHECK-SSE1-NEXT: andb 8(%r10), %al 1533; CHECK-SSE1-NEXT: xorb %r14b, %al 1534; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1535; CHECK-SSE1-NEXT: movzbl 9(%r9), %eax 1536; CHECK-SSE1-NEXT: xorb %bpl, %al 1537; CHECK-SSE1-NEXT: andb 9(%r10), %al 1538; CHECK-SSE1-NEXT: xorb %bpl, %al 1539; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1540; CHECK-SSE1-NEXT: movzbl 10(%r9), %eax 1541; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload 1542; CHECK-SSE1-NEXT: xorb %cl, %al 1543; CHECK-SSE1-NEXT: andb 10(%r10), %al 1544; CHECK-SSE1-NEXT: xorb %cl, %al 1545; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1546; CHECK-SSE1-NEXT: movzbl 11(%r9), %eax 1547; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload 1548; CHECK-SSE1-NEXT: xorb %cl, %al 1549; CHECK-SSE1-NEXT: andb 11(%r10), %al 1550; CHECK-SSE1-NEXT: xorb %cl, %al 1551; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1552; CHECK-SSE1-NEXT: movzbl 12(%r9), %eax 1553; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload 1554; CHECK-SSE1-NEXT: xorb %cl, %al 1555; CHECK-SSE1-NEXT: andb 12(%r10), %al 1556; CHECK-SSE1-NEXT: xorb %cl, %al 1557; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1558; CHECK-SSE1-NEXT: movzbl 13(%r9), %eax 1559; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload 1560; CHECK-SSE1-NEXT: xorb %cl, %al 1561; CHECK-SSE1-NEXT: andb 13(%r10), %al 1562; CHECK-SSE1-NEXT: xorb %cl, %al 1563; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1564; CHECK-SSE1-NEXT: movzbl 14(%r9), %eax 1565; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload 1566; CHECK-SSE1-NEXT: xorb %cl, %al 1567; CHECK-SSE1-NEXT: andb 14(%r10), %al 1568; CHECK-SSE1-NEXT: xorb %cl, %al 1569; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1570; CHECK-SSE1-NEXT: movzbl 15(%r9), %eax 1571; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 1-byte Folded Reload 1572; CHECK-SSE1-NEXT: xorb %cl, %al 1573; CHECK-SSE1-NEXT: andb 15(%r10), %al 1574; CHECK-SSE1-NEXT: xorb %cl, %al 1575; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1576; CHECK-SSE1-NEXT: movzbl 16(%r8), %eax 1577; CHECK-SSE1-NEXT: movzbl 16(%r9), %ecx 1578; CHECK-SSE1-NEXT: xorb %al, %cl 1579; CHECK-SSE1-NEXT: andb 16(%r10), %cl 1580; CHECK-SSE1-NEXT: xorb %al, %cl 1581; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1582; CHECK-SSE1-NEXT: movzbl 17(%r8), %eax 1583; CHECK-SSE1-NEXT: movzbl 17(%r9), %ecx 1584; CHECK-SSE1-NEXT: xorb %al, %cl 1585; CHECK-SSE1-NEXT: andb 17(%r10), %cl 1586; CHECK-SSE1-NEXT: xorb %al, %cl 1587; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1588; CHECK-SSE1-NEXT: movzbl 18(%r8), %eax 1589; CHECK-SSE1-NEXT: movzbl 18(%r9), %ecx 1590; CHECK-SSE1-NEXT: xorb %al, %cl 1591; CHECK-SSE1-NEXT: andb 18(%r10), %cl 1592; CHECK-SSE1-NEXT: xorb %al, %cl 1593; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1594; CHECK-SSE1-NEXT: movzbl 19(%r8), %eax 1595; CHECK-SSE1-NEXT: movzbl 19(%r9), %ecx 1596; CHECK-SSE1-NEXT: xorb %al, %cl 1597; CHECK-SSE1-NEXT: andb 19(%r10), %cl 1598; CHECK-SSE1-NEXT: xorb %al, %cl 1599; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1600; CHECK-SSE1-NEXT: movzbl 20(%r8), %eax 1601; CHECK-SSE1-NEXT: movzbl 20(%r9), %ecx 1602; CHECK-SSE1-NEXT: xorb %al, %cl 1603; CHECK-SSE1-NEXT: andb 20(%r10), %cl 1604; CHECK-SSE1-NEXT: xorb %al, %cl 1605; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 1606; CHECK-SSE1-NEXT: movzbl 21(%r8), %eax 1607; CHECK-SSE1-NEXT: movzbl 21(%r9), %r13d 1608; CHECK-SSE1-NEXT: xorb %al, %r13b 1609; CHECK-SSE1-NEXT: andb 21(%r10), %r13b 1610; CHECK-SSE1-NEXT: xorb %al, %r13b 1611; CHECK-SSE1-NEXT: movzbl 22(%r8), %eax 1612; CHECK-SSE1-NEXT: movzbl 22(%r9), %r12d 1613; CHECK-SSE1-NEXT: xorb %al, %r12b 1614; CHECK-SSE1-NEXT: andb 22(%r10), %r12b 1615; CHECK-SSE1-NEXT: xorb %al, %r12b 1616; CHECK-SSE1-NEXT: movzbl 23(%r8), %eax 1617; CHECK-SSE1-NEXT: movzbl 23(%r9), %r15d 1618; CHECK-SSE1-NEXT: xorb %al, %r15b 1619; CHECK-SSE1-NEXT: andb 23(%r10), %r15b 1620; CHECK-SSE1-NEXT: xorb %al, %r15b 1621; CHECK-SSE1-NEXT: movzbl 24(%r8), %eax 1622; CHECK-SSE1-NEXT: movzbl 24(%r9), %r14d 1623; CHECK-SSE1-NEXT: xorb %al, %r14b 1624; CHECK-SSE1-NEXT: andb 24(%r10), %r14b 1625; CHECK-SSE1-NEXT: xorb %al, %r14b 1626; CHECK-SSE1-NEXT: movzbl 25(%r8), %eax 1627; CHECK-SSE1-NEXT: movzbl 25(%r9), %ebp 1628; CHECK-SSE1-NEXT: xorb %al, %bpl 1629; CHECK-SSE1-NEXT: andb 25(%r10), %bpl 1630; CHECK-SSE1-NEXT: xorb %al, %bpl 1631; CHECK-SSE1-NEXT: movzbl 26(%r8), %eax 1632; CHECK-SSE1-NEXT: movzbl 26(%r9), %edi 1633; CHECK-SSE1-NEXT: xorb %al, %dil 1634; CHECK-SSE1-NEXT: andb 26(%r10), %dil 1635; CHECK-SSE1-NEXT: xorb %al, %dil 1636; CHECK-SSE1-NEXT: movzbl 27(%r8), %eax 1637; CHECK-SSE1-NEXT: movzbl 27(%r9), %esi 1638; CHECK-SSE1-NEXT: xorb %al, %sil 1639; CHECK-SSE1-NEXT: andb 27(%r10), %sil 1640; CHECK-SSE1-NEXT: xorb %al, %sil 1641; CHECK-SSE1-NEXT: movzbl 28(%r8), %eax 1642; CHECK-SSE1-NEXT: movzbl 28(%r9), %edx 1643; CHECK-SSE1-NEXT: xorb %al, %dl 1644; CHECK-SSE1-NEXT: andb 28(%r10), %dl 1645; CHECK-SSE1-NEXT: xorb %al, %dl 1646; CHECK-SSE1-NEXT: movzbl 29(%r8), %eax 1647; CHECK-SSE1-NEXT: movzbl 29(%r9), %ecx 1648; CHECK-SSE1-NEXT: xorb %al, %cl 1649; CHECK-SSE1-NEXT: andb 29(%r10), %cl 1650; CHECK-SSE1-NEXT: xorb %al, %cl 1651; CHECK-SSE1-NEXT: movzbl 30(%r8), %ebx 1652; CHECK-SSE1-NEXT: movzbl 30(%r9), %eax 1653; CHECK-SSE1-NEXT: xorb %bl, %al 1654; CHECK-SSE1-NEXT: andb 30(%r10), %al 1655; CHECK-SSE1-NEXT: xorb %bl, %al 1656; CHECK-SSE1-NEXT: movzbl 31(%r8), %r8d 1657; CHECK-SSE1-NEXT: movzbl 31(%r9), %r9d 1658; CHECK-SSE1-NEXT: xorb %r8b, %r9b 1659; CHECK-SSE1-NEXT: andb 31(%r10), %r9b 1660; CHECK-SSE1-NEXT: xorb %r8b, %r9b 1661; CHECK-SSE1-NEXT: movb %r9b, 31(%r11) 1662; CHECK-SSE1-NEXT: movb %al, 30(%r11) 1663; CHECK-SSE1-NEXT: movb %cl, 29(%r11) 1664; CHECK-SSE1-NEXT: movb %dl, 28(%r11) 1665; CHECK-SSE1-NEXT: movb %sil, 27(%r11) 1666; CHECK-SSE1-NEXT: movb %dil, 26(%r11) 1667; CHECK-SSE1-NEXT: movb %bpl, 25(%r11) 1668; CHECK-SSE1-NEXT: movb %r14b, 24(%r11) 1669; CHECK-SSE1-NEXT: movb %r15b, 23(%r11) 1670; CHECK-SSE1-NEXT: movb %r12b, 22(%r11) 1671; CHECK-SSE1-NEXT: movb %r13b, 21(%r11) 1672; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 1673; CHECK-SSE1-NEXT: movb %al, 20(%r11) 1674; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 1675; CHECK-SSE1-NEXT: movb %al, 19(%r11) 1676; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 1677; CHECK-SSE1-NEXT: movb %al, 18(%r11) 1678; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 1679; CHECK-SSE1-NEXT: movb %al, 17(%r11) 1680; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 1681; CHECK-SSE1-NEXT: movb %al, 16(%r11) 1682; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 1683; CHECK-SSE1-NEXT: movb %al, 15(%r11) 1684; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 1685; CHECK-SSE1-NEXT: movb %al, 14(%r11) 1686; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 1687; CHECK-SSE1-NEXT: movb %al, 13(%r11) 1688; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 1689; CHECK-SSE1-NEXT: movb %al, 12(%r11) 1690; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 1691; CHECK-SSE1-NEXT: movb %al, 11(%r11) 1692; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 1693; CHECK-SSE1-NEXT: movb %al, 10(%r11) 1694; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 1695; CHECK-SSE1-NEXT: movb %al, 9(%r11) 1696; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 1697; CHECK-SSE1-NEXT: movb %al, 8(%r11) 1698; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 1699; CHECK-SSE1-NEXT: movb %al, 7(%r11) 1700; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 1701; CHECK-SSE1-NEXT: movb %al, 6(%r11) 1702; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 1703; CHECK-SSE1-NEXT: movb %al, 5(%r11) 1704; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 1705; CHECK-SSE1-NEXT: movb %al, 4(%r11) 1706; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 1707; CHECK-SSE1-NEXT: movb %al, 3(%r11) 1708; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 1709; CHECK-SSE1-NEXT: movb %al, 2(%r11) 1710; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 1711; CHECK-SSE1-NEXT: movb %al, 1(%r11) 1712; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 1713; CHECK-SSE1-NEXT: movb %al, (%r11) 1714; CHECK-SSE1-NEXT: movq %r11, %rax 1715; CHECK-SSE1-NEXT: popq %rbx 1716; CHECK-SSE1-NEXT: popq %r12 1717; CHECK-SSE1-NEXT: popq %r13 1718; CHECK-SSE1-NEXT: popq %r14 1719; CHECK-SSE1-NEXT: popq %r15 1720; CHECK-SSE1-NEXT: popq %rbp 1721; CHECK-SSE1-NEXT: retq 1722; 1723; CHECK-SSE2-LABEL: out_v32i8: 1724; CHECK-SSE2: # %bb.0: 1725; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 1726; CHECK-SSE2-NEXT: movaps 16(%rdx), %xmm1 1727; CHECK-SSE2-NEXT: movaps 16(%rdi), %xmm2 1728; CHECK-SSE2-NEXT: andps %xmm1, %xmm2 1729; CHECK-SSE2-NEXT: movaps (%rdi), %xmm3 1730; CHECK-SSE2-NEXT: andps %xmm0, %xmm3 1731; CHECK-SSE2-NEXT: andnps 16(%rsi), %xmm1 1732; CHECK-SSE2-NEXT: orps %xmm2, %xmm1 1733; CHECK-SSE2-NEXT: andnps (%rsi), %xmm0 1734; CHECK-SSE2-NEXT: orps %xmm3, %xmm0 1735; CHECK-SSE2-NEXT: retq 1736; 1737; CHECK-XOP-LABEL: out_v32i8: 1738; CHECK-XOP: # %bb.0: 1739; CHECK-XOP-NEXT: vmovdqa (%rdi), %ymm0 1740; CHECK-XOP-NEXT: vmovdqa (%rdx), %ymm1 1741; CHECK-XOP-NEXT: vpcmov %ymm1, (%rsi), %ymm0, %ymm0 1742; CHECK-XOP-NEXT: retq 1743 %x = load <32 x i8>, ptr%px, align 32 1744 %y = load <32 x i8>, ptr%py, align 32 1745 %mask = load <32 x i8>, ptr%pmask, align 32 1746 %mx = and <32 x i8> %x, %mask 1747 %notmask = xor <32 x i8> %mask, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1> 1748 %my = and <32 x i8> %y, %notmask 1749 %r = or <32 x i8> %mx, %my 1750 ret <32 x i8> %r 1751} 1752 1753define <16 x i16> @out_v16i16(ptr%px, ptr%py, ptr%pmask) nounwind { 1754; CHECK-BASELINE-LABEL: out_v16i16: 1755; CHECK-BASELINE: # %bb.0: 1756; CHECK-BASELINE-NEXT: pushq %rbp 1757; CHECK-BASELINE-NEXT: pushq %r15 1758; CHECK-BASELINE-NEXT: pushq %r14 1759; CHECK-BASELINE-NEXT: pushq %r13 1760; CHECK-BASELINE-NEXT: pushq %r12 1761; CHECK-BASELINE-NEXT: pushq %rbx 1762; CHECK-BASELINE-NEXT: movzwl 18(%rdx), %r15d 1763; CHECK-BASELINE-NEXT: movzwl 16(%rdx), %r14d 1764; CHECK-BASELINE-NEXT: movzwl 14(%rdx), %ebp 1765; CHECK-BASELINE-NEXT: movzwl 12(%rdx), %ebx 1766; CHECK-BASELINE-NEXT: movzwl 10(%rdx), %r13d 1767; CHECK-BASELINE-NEXT: movzwl 8(%rdx), %r11d 1768; CHECK-BASELINE-NEXT: movzwl 6(%rdx), %r10d 1769; CHECK-BASELINE-NEXT: movzwl 4(%rdx), %r9d 1770; CHECK-BASELINE-NEXT: movzwl (%rdx), %r8d 1771; CHECK-BASELINE-NEXT: movzwl 2(%rdx), %r12d 1772; CHECK-BASELINE-NEXT: movzwl (%rsi), %eax 1773; CHECK-BASELINE-NEXT: xorw %r8w, %ax 1774; CHECK-BASELINE-NEXT: andw (%rcx), %ax 1775; CHECK-BASELINE-NEXT: xorl %eax, %r8d 1776; CHECK-BASELINE-NEXT: movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1777; CHECK-BASELINE-NEXT: movzwl 2(%rsi), %eax 1778; CHECK-BASELINE-NEXT: xorw %r12w, %ax 1779; CHECK-BASELINE-NEXT: andw 2(%rcx), %ax 1780; CHECK-BASELINE-NEXT: xorl %eax, %r12d 1781; CHECK-BASELINE-NEXT: movzwl 4(%rsi), %eax 1782; CHECK-BASELINE-NEXT: xorw %r9w, %ax 1783; CHECK-BASELINE-NEXT: andw 4(%rcx), %ax 1784; CHECK-BASELINE-NEXT: xorl %eax, %r9d 1785; CHECK-BASELINE-NEXT: movl %r9d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1786; CHECK-BASELINE-NEXT: movzwl 6(%rsi), %eax 1787; CHECK-BASELINE-NEXT: xorw %r10w, %ax 1788; CHECK-BASELINE-NEXT: andw 6(%rcx), %ax 1789; CHECK-BASELINE-NEXT: xorl %eax, %r10d 1790; CHECK-BASELINE-NEXT: movl %r10d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1791; CHECK-BASELINE-NEXT: movzwl 8(%rsi), %eax 1792; CHECK-BASELINE-NEXT: xorw %r11w, %ax 1793; CHECK-BASELINE-NEXT: andw 8(%rcx), %ax 1794; CHECK-BASELINE-NEXT: xorl %eax, %r11d 1795; CHECK-BASELINE-NEXT: movl %r11d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1796; CHECK-BASELINE-NEXT: movzwl 10(%rsi), %eax 1797; CHECK-BASELINE-NEXT: xorw %r13w, %ax 1798; CHECK-BASELINE-NEXT: andw 10(%rcx), %ax 1799; CHECK-BASELINE-NEXT: xorl %eax, %r13d 1800; CHECK-BASELINE-NEXT: movl %r13d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1801; CHECK-BASELINE-NEXT: movzwl 12(%rsi), %eax 1802; CHECK-BASELINE-NEXT: xorw %bx, %ax 1803; CHECK-BASELINE-NEXT: andw 12(%rcx), %ax 1804; CHECK-BASELINE-NEXT: xorl %eax, %ebx 1805; CHECK-BASELINE-NEXT: movzwl 14(%rsi), %eax 1806; CHECK-BASELINE-NEXT: xorw %bp, %ax 1807; CHECK-BASELINE-NEXT: andw 14(%rcx), %ax 1808; CHECK-BASELINE-NEXT: xorl %eax, %ebp 1809; CHECK-BASELINE-NEXT: movzwl 16(%rsi), %eax 1810; CHECK-BASELINE-NEXT: xorw %r14w, %ax 1811; CHECK-BASELINE-NEXT: andw 16(%rcx), %ax 1812; CHECK-BASELINE-NEXT: xorl %eax, %r14d 1813; CHECK-BASELINE-NEXT: movzwl 18(%rsi), %eax 1814; CHECK-BASELINE-NEXT: xorw %r15w, %ax 1815; CHECK-BASELINE-NEXT: andw 18(%rcx), %ax 1816; CHECK-BASELINE-NEXT: xorl %eax, %r15d 1817; CHECK-BASELINE-NEXT: movzwl 20(%rdx), %r13d 1818; CHECK-BASELINE-NEXT: movzwl 20(%rsi), %eax 1819; CHECK-BASELINE-NEXT: xorw %r13w, %ax 1820; CHECK-BASELINE-NEXT: andw 20(%rcx), %ax 1821; CHECK-BASELINE-NEXT: xorl %eax, %r13d 1822; CHECK-BASELINE-NEXT: movzwl 22(%rdx), %r9d 1823; CHECK-BASELINE-NEXT: movzwl 22(%rsi), %eax 1824; CHECK-BASELINE-NEXT: xorw %r9w, %ax 1825; CHECK-BASELINE-NEXT: andw 22(%rcx), %ax 1826; CHECK-BASELINE-NEXT: xorl %eax, %r9d 1827; CHECK-BASELINE-NEXT: movzwl 24(%rdx), %r8d 1828; CHECK-BASELINE-NEXT: movzwl 24(%rsi), %eax 1829; CHECK-BASELINE-NEXT: xorw %r8w, %ax 1830; CHECK-BASELINE-NEXT: andw 24(%rcx), %ax 1831; CHECK-BASELINE-NEXT: xorl %eax, %r8d 1832; CHECK-BASELINE-NEXT: movzwl 26(%rdx), %eax 1833; CHECK-BASELINE-NEXT: movzwl 26(%rsi), %r10d 1834; CHECK-BASELINE-NEXT: xorw %ax, %r10w 1835; CHECK-BASELINE-NEXT: andw 26(%rcx), %r10w 1836; CHECK-BASELINE-NEXT: xorl %r10d, %eax 1837; CHECK-BASELINE-NEXT: movzwl 28(%rdx), %r10d 1838; CHECK-BASELINE-NEXT: movzwl 28(%rsi), %r11d 1839; CHECK-BASELINE-NEXT: xorw %r10w, %r11w 1840; CHECK-BASELINE-NEXT: andw 28(%rcx), %r11w 1841; CHECK-BASELINE-NEXT: xorl %r11d, %r10d 1842; CHECK-BASELINE-NEXT: movzwl 30(%rdx), %edx 1843; CHECK-BASELINE-NEXT: movzwl 30(%rsi), %esi 1844; CHECK-BASELINE-NEXT: xorw %dx, %si 1845; CHECK-BASELINE-NEXT: andw 30(%rcx), %si 1846; CHECK-BASELINE-NEXT: xorl %esi, %edx 1847; CHECK-BASELINE-NEXT: movw %dx, 30(%rdi) 1848; CHECK-BASELINE-NEXT: movw %r10w, 28(%rdi) 1849; CHECK-BASELINE-NEXT: movw %ax, 26(%rdi) 1850; CHECK-BASELINE-NEXT: movw %r8w, 24(%rdi) 1851; CHECK-BASELINE-NEXT: movw %r9w, 22(%rdi) 1852; CHECK-BASELINE-NEXT: movw %r13w, 20(%rdi) 1853; CHECK-BASELINE-NEXT: movw %r15w, 18(%rdi) 1854; CHECK-BASELINE-NEXT: movw %r14w, 16(%rdi) 1855; CHECK-BASELINE-NEXT: movw %bp, 14(%rdi) 1856; CHECK-BASELINE-NEXT: movw %bx, 12(%rdi) 1857; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 1858; CHECK-BASELINE-NEXT: movw %ax, 10(%rdi) 1859; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 1860; CHECK-BASELINE-NEXT: movw %ax, 8(%rdi) 1861; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 1862; CHECK-BASELINE-NEXT: movw %ax, 6(%rdi) 1863; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 1864; CHECK-BASELINE-NEXT: movw %ax, 4(%rdi) 1865; CHECK-BASELINE-NEXT: movw %r12w, 2(%rdi) 1866; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 1867; CHECK-BASELINE-NEXT: movw %ax, (%rdi) 1868; CHECK-BASELINE-NEXT: movq %rdi, %rax 1869; CHECK-BASELINE-NEXT: popq %rbx 1870; CHECK-BASELINE-NEXT: popq %r12 1871; CHECK-BASELINE-NEXT: popq %r13 1872; CHECK-BASELINE-NEXT: popq %r14 1873; CHECK-BASELINE-NEXT: popq %r15 1874; CHECK-BASELINE-NEXT: popq %rbp 1875; CHECK-BASELINE-NEXT: retq 1876; 1877; CHECK-SSE1-LABEL: out_v16i16: 1878; CHECK-SSE1: # %bb.0: 1879; CHECK-SSE1-NEXT: pushq %rbp 1880; CHECK-SSE1-NEXT: pushq %r15 1881; CHECK-SSE1-NEXT: pushq %r14 1882; CHECK-SSE1-NEXT: pushq %r13 1883; CHECK-SSE1-NEXT: pushq %r12 1884; CHECK-SSE1-NEXT: pushq %rbx 1885; CHECK-SSE1-NEXT: movzwl 18(%rdx), %r15d 1886; CHECK-SSE1-NEXT: movzwl 16(%rdx), %r14d 1887; CHECK-SSE1-NEXT: movzwl 14(%rdx), %ebp 1888; CHECK-SSE1-NEXT: movzwl 12(%rdx), %ebx 1889; CHECK-SSE1-NEXT: movzwl 10(%rdx), %r13d 1890; CHECK-SSE1-NEXT: movzwl 8(%rdx), %r11d 1891; CHECK-SSE1-NEXT: movzwl 6(%rdx), %r10d 1892; CHECK-SSE1-NEXT: movzwl 4(%rdx), %r9d 1893; CHECK-SSE1-NEXT: movzwl (%rdx), %r8d 1894; CHECK-SSE1-NEXT: movzwl 2(%rdx), %r12d 1895; CHECK-SSE1-NEXT: movzwl (%rsi), %eax 1896; CHECK-SSE1-NEXT: xorw %r8w, %ax 1897; CHECK-SSE1-NEXT: andw (%rcx), %ax 1898; CHECK-SSE1-NEXT: xorl %eax, %r8d 1899; CHECK-SSE1-NEXT: movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1900; CHECK-SSE1-NEXT: movzwl 2(%rsi), %eax 1901; CHECK-SSE1-NEXT: xorw %r12w, %ax 1902; CHECK-SSE1-NEXT: andw 2(%rcx), %ax 1903; CHECK-SSE1-NEXT: xorl %eax, %r12d 1904; CHECK-SSE1-NEXT: movzwl 4(%rsi), %eax 1905; CHECK-SSE1-NEXT: xorw %r9w, %ax 1906; CHECK-SSE1-NEXT: andw 4(%rcx), %ax 1907; CHECK-SSE1-NEXT: xorl %eax, %r9d 1908; CHECK-SSE1-NEXT: movl %r9d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1909; CHECK-SSE1-NEXT: movzwl 6(%rsi), %eax 1910; CHECK-SSE1-NEXT: xorw %r10w, %ax 1911; CHECK-SSE1-NEXT: andw 6(%rcx), %ax 1912; CHECK-SSE1-NEXT: xorl %eax, %r10d 1913; CHECK-SSE1-NEXT: movl %r10d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1914; CHECK-SSE1-NEXT: movzwl 8(%rsi), %eax 1915; CHECK-SSE1-NEXT: xorw %r11w, %ax 1916; CHECK-SSE1-NEXT: andw 8(%rcx), %ax 1917; CHECK-SSE1-NEXT: xorl %eax, %r11d 1918; CHECK-SSE1-NEXT: movl %r11d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1919; CHECK-SSE1-NEXT: movzwl 10(%rsi), %eax 1920; CHECK-SSE1-NEXT: xorw %r13w, %ax 1921; CHECK-SSE1-NEXT: andw 10(%rcx), %ax 1922; CHECK-SSE1-NEXT: xorl %eax, %r13d 1923; CHECK-SSE1-NEXT: movl %r13d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 1924; CHECK-SSE1-NEXT: movzwl 12(%rsi), %eax 1925; CHECK-SSE1-NEXT: xorw %bx, %ax 1926; CHECK-SSE1-NEXT: andw 12(%rcx), %ax 1927; CHECK-SSE1-NEXT: xorl %eax, %ebx 1928; CHECK-SSE1-NEXT: movzwl 14(%rsi), %eax 1929; CHECK-SSE1-NEXT: xorw %bp, %ax 1930; CHECK-SSE1-NEXT: andw 14(%rcx), %ax 1931; CHECK-SSE1-NEXT: xorl %eax, %ebp 1932; CHECK-SSE1-NEXT: movzwl 16(%rsi), %eax 1933; CHECK-SSE1-NEXT: xorw %r14w, %ax 1934; CHECK-SSE1-NEXT: andw 16(%rcx), %ax 1935; CHECK-SSE1-NEXT: xorl %eax, %r14d 1936; CHECK-SSE1-NEXT: movzwl 18(%rsi), %eax 1937; CHECK-SSE1-NEXT: xorw %r15w, %ax 1938; CHECK-SSE1-NEXT: andw 18(%rcx), %ax 1939; CHECK-SSE1-NEXT: xorl %eax, %r15d 1940; CHECK-SSE1-NEXT: movzwl 20(%rdx), %r13d 1941; CHECK-SSE1-NEXT: movzwl 20(%rsi), %eax 1942; CHECK-SSE1-NEXT: xorw %r13w, %ax 1943; CHECK-SSE1-NEXT: andw 20(%rcx), %ax 1944; CHECK-SSE1-NEXT: xorl %eax, %r13d 1945; CHECK-SSE1-NEXT: movzwl 22(%rdx), %r9d 1946; CHECK-SSE1-NEXT: movzwl 22(%rsi), %eax 1947; CHECK-SSE1-NEXT: xorw %r9w, %ax 1948; CHECK-SSE1-NEXT: andw 22(%rcx), %ax 1949; CHECK-SSE1-NEXT: xorl %eax, %r9d 1950; CHECK-SSE1-NEXT: movzwl 24(%rdx), %r8d 1951; CHECK-SSE1-NEXT: movzwl 24(%rsi), %eax 1952; CHECK-SSE1-NEXT: xorw %r8w, %ax 1953; CHECK-SSE1-NEXT: andw 24(%rcx), %ax 1954; CHECK-SSE1-NEXT: xorl %eax, %r8d 1955; CHECK-SSE1-NEXT: movzwl 26(%rdx), %eax 1956; CHECK-SSE1-NEXT: movzwl 26(%rsi), %r10d 1957; CHECK-SSE1-NEXT: xorw %ax, %r10w 1958; CHECK-SSE1-NEXT: andw 26(%rcx), %r10w 1959; CHECK-SSE1-NEXT: xorl %r10d, %eax 1960; CHECK-SSE1-NEXT: movzwl 28(%rdx), %r10d 1961; CHECK-SSE1-NEXT: movzwl 28(%rsi), %r11d 1962; CHECK-SSE1-NEXT: xorw %r10w, %r11w 1963; CHECK-SSE1-NEXT: andw 28(%rcx), %r11w 1964; CHECK-SSE1-NEXT: xorl %r11d, %r10d 1965; CHECK-SSE1-NEXT: movzwl 30(%rdx), %edx 1966; CHECK-SSE1-NEXT: movzwl 30(%rsi), %esi 1967; CHECK-SSE1-NEXT: xorw %dx, %si 1968; CHECK-SSE1-NEXT: andw 30(%rcx), %si 1969; CHECK-SSE1-NEXT: xorl %esi, %edx 1970; CHECK-SSE1-NEXT: movw %dx, 30(%rdi) 1971; CHECK-SSE1-NEXT: movw %r10w, 28(%rdi) 1972; CHECK-SSE1-NEXT: movw %ax, 26(%rdi) 1973; CHECK-SSE1-NEXT: movw %r8w, 24(%rdi) 1974; CHECK-SSE1-NEXT: movw %r9w, 22(%rdi) 1975; CHECK-SSE1-NEXT: movw %r13w, 20(%rdi) 1976; CHECK-SSE1-NEXT: movw %r15w, 18(%rdi) 1977; CHECK-SSE1-NEXT: movw %r14w, 16(%rdi) 1978; CHECK-SSE1-NEXT: movw %bp, 14(%rdi) 1979; CHECK-SSE1-NEXT: movw %bx, 12(%rdi) 1980; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 1981; CHECK-SSE1-NEXT: movw %ax, 10(%rdi) 1982; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 1983; CHECK-SSE1-NEXT: movw %ax, 8(%rdi) 1984; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 1985; CHECK-SSE1-NEXT: movw %ax, 6(%rdi) 1986; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 1987; CHECK-SSE1-NEXT: movw %ax, 4(%rdi) 1988; CHECK-SSE1-NEXT: movw %r12w, 2(%rdi) 1989; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 1990; CHECK-SSE1-NEXT: movw %ax, (%rdi) 1991; CHECK-SSE1-NEXT: movq %rdi, %rax 1992; CHECK-SSE1-NEXT: popq %rbx 1993; CHECK-SSE1-NEXT: popq %r12 1994; CHECK-SSE1-NEXT: popq %r13 1995; CHECK-SSE1-NEXT: popq %r14 1996; CHECK-SSE1-NEXT: popq %r15 1997; CHECK-SSE1-NEXT: popq %rbp 1998; CHECK-SSE1-NEXT: retq 1999; 2000; CHECK-SSE2-LABEL: out_v16i16: 2001; CHECK-SSE2: # %bb.0: 2002; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 2003; CHECK-SSE2-NEXT: movaps 16(%rdx), %xmm1 2004; CHECK-SSE2-NEXT: movaps 16(%rdi), %xmm2 2005; CHECK-SSE2-NEXT: andps %xmm1, %xmm2 2006; CHECK-SSE2-NEXT: movaps (%rdi), %xmm3 2007; CHECK-SSE2-NEXT: andps %xmm0, %xmm3 2008; CHECK-SSE2-NEXT: andnps 16(%rsi), %xmm1 2009; CHECK-SSE2-NEXT: orps %xmm2, %xmm1 2010; CHECK-SSE2-NEXT: andnps (%rsi), %xmm0 2011; CHECK-SSE2-NEXT: orps %xmm3, %xmm0 2012; CHECK-SSE2-NEXT: retq 2013; 2014; CHECK-XOP-LABEL: out_v16i16: 2015; CHECK-XOP: # %bb.0: 2016; CHECK-XOP-NEXT: vmovdqa (%rdi), %ymm0 2017; CHECK-XOP-NEXT: vmovdqa (%rdx), %ymm1 2018; CHECK-XOP-NEXT: vpcmov %ymm1, (%rsi), %ymm0, %ymm0 2019; CHECK-XOP-NEXT: retq 2020 %x = load <16 x i16>, ptr%px, align 32 2021 %y = load <16 x i16>, ptr%py, align 32 2022 %mask = load <16 x i16>, ptr%pmask, align 32 2023 %mx = and <16 x i16> %x, %mask 2024 %notmask = xor <16 x i16> %mask, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 2025 %my = and <16 x i16> %y, %notmask 2026 %r = or <16 x i16> %mx, %my 2027 ret <16 x i16> %r 2028} 2029 2030define <8 x i32> @out_v8i32(ptr%px, ptr%py, ptr%pmask) nounwind { 2031; CHECK-BASELINE-LABEL: out_v8i32: 2032; CHECK-BASELINE: # %bb.0: 2033; CHECK-BASELINE-NEXT: pushq %rbp 2034; CHECK-BASELINE-NEXT: pushq %r14 2035; CHECK-BASELINE-NEXT: pushq %rbx 2036; CHECK-BASELINE-NEXT: movq %rdi, %rax 2037; CHECK-BASELINE-NEXT: movl 28(%rdx), %edi 2038; CHECK-BASELINE-NEXT: movl 24(%rdx), %r8d 2039; CHECK-BASELINE-NEXT: movl 20(%rdx), %r10d 2040; CHECK-BASELINE-NEXT: movl 16(%rdx), %ebx 2041; CHECK-BASELINE-NEXT: movl 12(%rdx), %r14d 2042; CHECK-BASELINE-NEXT: movl 8(%rdx), %ebp 2043; CHECK-BASELINE-NEXT: movl (%rdx), %r9d 2044; CHECK-BASELINE-NEXT: movl 4(%rdx), %r11d 2045; CHECK-BASELINE-NEXT: movl (%rsi), %edx 2046; CHECK-BASELINE-NEXT: xorl %r9d, %edx 2047; CHECK-BASELINE-NEXT: andl (%rcx), %edx 2048; CHECK-BASELINE-NEXT: xorl %r9d, %edx 2049; CHECK-BASELINE-NEXT: movl 4(%rsi), %r9d 2050; CHECK-BASELINE-NEXT: xorl %r11d, %r9d 2051; CHECK-BASELINE-NEXT: andl 4(%rcx), %r9d 2052; CHECK-BASELINE-NEXT: xorl %r11d, %r9d 2053; CHECK-BASELINE-NEXT: movl 8(%rsi), %r11d 2054; CHECK-BASELINE-NEXT: xorl %ebp, %r11d 2055; CHECK-BASELINE-NEXT: andl 8(%rcx), %r11d 2056; CHECK-BASELINE-NEXT: xorl %ebp, %r11d 2057; CHECK-BASELINE-NEXT: movl 12(%rsi), %ebp 2058; CHECK-BASELINE-NEXT: xorl %r14d, %ebp 2059; CHECK-BASELINE-NEXT: andl 12(%rcx), %ebp 2060; CHECK-BASELINE-NEXT: xorl %r14d, %ebp 2061; CHECK-BASELINE-NEXT: movl 16(%rsi), %r14d 2062; CHECK-BASELINE-NEXT: xorl %ebx, %r14d 2063; CHECK-BASELINE-NEXT: andl 16(%rcx), %r14d 2064; CHECK-BASELINE-NEXT: xorl %ebx, %r14d 2065; CHECK-BASELINE-NEXT: movl 20(%rsi), %ebx 2066; CHECK-BASELINE-NEXT: xorl %r10d, %ebx 2067; CHECK-BASELINE-NEXT: andl 20(%rcx), %ebx 2068; CHECK-BASELINE-NEXT: xorl %r10d, %ebx 2069; CHECK-BASELINE-NEXT: movl 24(%rsi), %r10d 2070; CHECK-BASELINE-NEXT: xorl %r8d, %r10d 2071; CHECK-BASELINE-NEXT: andl 24(%rcx), %r10d 2072; CHECK-BASELINE-NEXT: xorl %r8d, %r10d 2073; CHECK-BASELINE-NEXT: movl 28(%rsi), %esi 2074; CHECK-BASELINE-NEXT: xorl %edi, %esi 2075; CHECK-BASELINE-NEXT: andl 28(%rcx), %esi 2076; CHECK-BASELINE-NEXT: xorl %edi, %esi 2077; CHECK-BASELINE-NEXT: movl %esi, 28(%rax) 2078; CHECK-BASELINE-NEXT: movl %r10d, 24(%rax) 2079; CHECK-BASELINE-NEXT: movl %ebx, 20(%rax) 2080; CHECK-BASELINE-NEXT: movl %r14d, 16(%rax) 2081; CHECK-BASELINE-NEXT: movl %ebp, 12(%rax) 2082; CHECK-BASELINE-NEXT: movl %r11d, 8(%rax) 2083; CHECK-BASELINE-NEXT: movl %r9d, 4(%rax) 2084; CHECK-BASELINE-NEXT: movl %edx, (%rax) 2085; CHECK-BASELINE-NEXT: popq %rbx 2086; CHECK-BASELINE-NEXT: popq %r14 2087; CHECK-BASELINE-NEXT: popq %rbp 2088; CHECK-BASELINE-NEXT: retq 2089; 2090; CHECK-SSE1-LABEL: out_v8i32: 2091; CHECK-SSE1: # %bb.0: 2092; CHECK-SSE1-NEXT: pushq %rbp 2093; CHECK-SSE1-NEXT: pushq %r14 2094; CHECK-SSE1-NEXT: pushq %rbx 2095; CHECK-SSE1-NEXT: movq %rdi, %rax 2096; CHECK-SSE1-NEXT: movl 28(%rdx), %edi 2097; CHECK-SSE1-NEXT: movl 24(%rdx), %r8d 2098; CHECK-SSE1-NEXT: movl 20(%rdx), %r10d 2099; CHECK-SSE1-NEXT: movl 16(%rdx), %ebx 2100; CHECK-SSE1-NEXT: movl 12(%rdx), %r14d 2101; CHECK-SSE1-NEXT: movl 8(%rdx), %ebp 2102; CHECK-SSE1-NEXT: movl (%rdx), %r9d 2103; CHECK-SSE1-NEXT: movl 4(%rdx), %r11d 2104; CHECK-SSE1-NEXT: movl (%rsi), %edx 2105; CHECK-SSE1-NEXT: xorl %r9d, %edx 2106; CHECK-SSE1-NEXT: andl (%rcx), %edx 2107; CHECK-SSE1-NEXT: xorl %r9d, %edx 2108; CHECK-SSE1-NEXT: movl 4(%rsi), %r9d 2109; CHECK-SSE1-NEXT: xorl %r11d, %r9d 2110; CHECK-SSE1-NEXT: andl 4(%rcx), %r9d 2111; CHECK-SSE1-NEXT: xorl %r11d, %r9d 2112; CHECK-SSE1-NEXT: movl 8(%rsi), %r11d 2113; CHECK-SSE1-NEXT: xorl %ebp, %r11d 2114; CHECK-SSE1-NEXT: andl 8(%rcx), %r11d 2115; CHECK-SSE1-NEXT: xorl %ebp, %r11d 2116; CHECK-SSE1-NEXT: movl 12(%rsi), %ebp 2117; CHECK-SSE1-NEXT: xorl %r14d, %ebp 2118; CHECK-SSE1-NEXT: andl 12(%rcx), %ebp 2119; CHECK-SSE1-NEXT: xorl %r14d, %ebp 2120; CHECK-SSE1-NEXT: movl 16(%rsi), %r14d 2121; CHECK-SSE1-NEXT: xorl %ebx, %r14d 2122; CHECK-SSE1-NEXT: andl 16(%rcx), %r14d 2123; CHECK-SSE1-NEXT: xorl %ebx, %r14d 2124; CHECK-SSE1-NEXT: movl 20(%rsi), %ebx 2125; CHECK-SSE1-NEXT: xorl %r10d, %ebx 2126; CHECK-SSE1-NEXT: andl 20(%rcx), %ebx 2127; CHECK-SSE1-NEXT: xorl %r10d, %ebx 2128; CHECK-SSE1-NEXT: movl 24(%rsi), %r10d 2129; CHECK-SSE1-NEXT: xorl %r8d, %r10d 2130; CHECK-SSE1-NEXT: andl 24(%rcx), %r10d 2131; CHECK-SSE1-NEXT: xorl %r8d, %r10d 2132; CHECK-SSE1-NEXT: movl 28(%rsi), %esi 2133; CHECK-SSE1-NEXT: xorl %edi, %esi 2134; CHECK-SSE1-NEXT: andl 28(%rcx), %esi 2135; CHECK-SSE1-NEXT: xorl %edi, %esi 2136; CHECK-SSE1-NEXT: movl %esi, 28(%rax) 2137; CHECK-SSE1-NEXT: movl %r10d, 24(%rax) 2138; CHECK-SSE1-NEXT: movl %ebx, 20(%rax) 2139; CHECK-SSE1-NEXT: movl %r14d, 16(%rax) 2140; CHECK-SSE1-NEXT: movl %ebp, 12(%rax) 2141; CHECK-SSE1-NEXT: movl %r11d, 8(%rax) 2142; CHECK-SSE1-NEXT: movl %r9d, 4(%rax) 2143; CHECK-SSE1-NEXT: movl %edx, (%rax) 2144; CHECK-SSE1-NEXT: popq %rbx 2145; CHECK-SSE1-NEXT: popq %r14 2146; CHECK-SSE1-NEXT: popq %rbp 2147; CHECK-SSE1-NEXT: retq 2148; 2149; CHECK-SSE2-LABEL: out_v8i32: 2150; CHECK-SSE2: # %bb.0: 2151; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 2152; CHECK-SSE2-NEXT: movaps 16(%rdx), %xmm1 2153; CHECK-SSE2-NEXT: movaps 16(%rdi), %xmm2 2154; CHECK-SSE2-NEXT: andps %xmm1, %xmm2 2155; CHECK-SSE2-NEXT: movaps (%rdi), %xmm3 2156; CHECK-SSE2-NEXT: andps %xmm0, %xmm3 2157; CHECK-SSE2-NEXT: andnps 16(%rsi), %xmm1 2158; CHECK-SSE2-NEXT: orps %xmm2, %xmm1 2159; CHECK-SSE2-NEXT: andnps (%rsi), %xmm0 2160; CHECK-SSE2-NEXT: orps %xmm3, %xmm0 2161; CHECK-SSE2-NEXT: retq 2162; 2163; CHECK-XOP-LABEL: out_v8i32: 2164; CHECK-XOP: # %bb.0: 2165; CHECK-XOP-NEXT: vmovdqa (%rdi), %ymm0 2166; CHECK-XOP-NEXT: vmovdqa (%rdx), %ymm1 2167; CHECK-XOP-NEXT: vpcmov %ymm1, (%rsi), %ymm0, %ymm0 2168; CHECK-XOP-NEXT: retq 2169 %x = load <8 x i32>, ptr%px, align 32 2170 %y = load <8 x i32>, ptr%py, align 32 2171 %mask = load <8 x i32>, ptr%pmask, align 32 2172 %mx = and <8 x i32> %x, %mask 2173 %notmask = xor <8 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1, i32 -1> 2174 %my = and <8 x i32> %y, %notmask 2175 %r = or <8 x i32> %mx, %my 2176 ret <8 x i32> %r 2177} 2178 2179define <4 x i64> @out_v4i64(ptr%px, ptr%py, ptr%pmask) nounwind { 2180; CHECK-BASELINE-LABEL: out_v4i64: 2181; CHECK-BASELINE: # %bb.0: 2182; CHECK-BASELINE-NEXT: movq %rdi, %rax 2183; CHECK-BASELINE-NEXT: movq 24(%rdx), %rdi 2184; CHECK-BASELINE-NEXT: movq 16(%rdx), %r8 2185; CHECK-BASELINE-NEXT: movq (%rdx), %r9 2186; CHECK-BASELINE-NEXT: movq 8(%rdx), %r10 2187; CHECK-BASELINE-NEXT: movq (%rsi), %rdx 2188; CHECK-BASELINE-NEXT: xorq %r9, %rdx 2189; CHECK-BASELINE-NEXT: andq (%rcx), %rdx 2190; CHECK-BASELINE-NEXT: xorq %r9, %rdx 2191; CHECK-BASELINE-NEXT: movq 8(%rsi), %r9 2192; CHECK-BASELINE-NEXT: xorq %r10, %r9 2193; CHECK-BASELINE-NEXT: andq 8(%rcx), %r9 2194; CHECK-BASELINE-NEXT: xorq %r10, %r9 2195; CHECK-BASELINE-NEXT: movq 16(%rsi), %r10 2196; CHECK-BASELINE-NEXT: xorq %r8, %r10 2197; CHECK-BASELINE-NEXT: andq 16(%rcx), %r10 2198; CHECK-BASELINE-NEXT: xorq %r8, %r10 2199; CHECK-BASELINE-NEXT: movq 24(%rsi), %rsi 2200; CHECK-BASELINE-NEXT: xorq %rdi, %rsi 2201; CHECK-BASELINE-NEXT: andq 24(%rcx), %rsi 2202; CHECK-BASELINE-NEXT: xorq %rdi, %rsi 2203; CHECK-BASELINE-NEXT: movq %rsi, 24(%rax) 2204; CHECK-BASELINE-NEXT: movq %r10, 16(%rax) 2205; CHECK-BASELINE-NEXT: movq %r9, 8(%rax) 2206; CHECK-BASELINE-NEXT: movq %rdx, (%rax) 2207; CHECK-BASELINE-NEXT: retq 2208; 2209; CHECK-SSE1-LABEL: out_v4i64: 2210; CHECK-SSE1: # %bb.0: 2211; CHECK-SSE1-NEXT: movq %rdi, %rax 2212; CHECK-SSE1-NEXT: movq 24(%rdx), %rdi 2213; CHECK-SSE1-NEXT: movq 16(%rdx), %r8 2214; CHECK-SSE1-NEXT: movq (%rdx), %r9 2215; CHECK-SSE1-NEXT: movq 8(%rdx), %r10 2216; CHECK-SSE1-NEXT: movq (%rsi), %rdx 2217; CHECK-SSE1-NEXT: xorq %r9, %rdx 2218; CHECK-SSE1-NEXT: andq (%rcx), %rdx 2219; CHECK-SSE1-NEXT: xorq %r9, %rdx 2220; CHECK-SSE1-NEXT: movq 8(%rsi), %r9 2221; CHECK-SSE1-NEXT: xorq %r10, %r9 2222; CHECK-SSE1-NEXT: andq 8(%rcx), %r9 2223; CHECK-SSE1-NEXT: xorq %r10, %r9 2224; CHECK-SSE1-NEXT: movq 16(%rsi), %r10 2225; CHECK-SSE1-NEXT: xorq %r8, %r10 2226; CHECK-SSE1-NEXT: andq 16(%rcx), %r10 2227; CHECK-SSE1-NEXT: xorq %r8, %r10 2228; CHECK-SSE1-NEXT: movq 24(%rsi), %rsi 2229; CHECK-SSE1-NEXT: xorq %rdi, %rsi 2230; CHECK-SSE1-NEXT: andq 24(%rcx), %rsi 2231; CHECK-SSE1-NEXT: xorq %rdi, %rsi 2232; CHECK-SSE1-NEXT: movq %rsi, 24(%rax) 2233; CHECK-SSE1-NEXT: movq %r10, 16(%rax) 2234; CHECK-SSE1-NEXT: movq %r9, 8(%rax) 2235; CHECK-SSE1-NEXT: movq %rdx, (%rax) 2236; CHECK-SSE1-NEXT: retq 2237; 2238; CHECK-SSE2-LABEL: out_v4i64: 2239; CHECK-SSE2: # %bb.0: 2240; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 2241; CHECK-SSE2-NEXT: movaps 16(%rdx), %xmm1 2242; CHECK-SSE2-NEXT: movaps 16(%rdi), %xmm2 2243; CHECK-SSE2-NEXT: andps %xmm1, %xmm2 2244; CHECK-SSE2-NEXT: movaps (%rdi), %xmm3 2245; CHECK-SSE2-NEXT: andps %xmm0, %xmm3 2246; CHECK-SSE2-NEXT: andnps 16(%rsi), %xmm1 2247; CHECK-SSE2-NEXT: orps %xmm2, %xmm1 2248; CHECK-SSE2-NEXT: andnps (%rsi), %xmm0 2249; CHECK-SSE2-NEXT: orps %xmm3, %xmm0 2250; CHECK-SSE2-NEXT: retq 2251; 2252; CHECK-XOP-LABEL: out_v4i64: 2253; CHECK-XOP: # %bb.0: 2254; CHECK-XOP-NEXT: vmovdqa (%rdi), %ymm0 2255; CHECK-XOP-NEXT: vmovdqa (%rdx), %ymm1 2256; CHECK-XOP-NEXT: vpcmov %ymm1, (%rsi), %ymm0, %ymm0 2257; CHECK-XOP-NEXT: retq 2258 %x = load <4 x i64>, ptr%px, align 32 2259 %y = load <4 x i64>, ptr%py, align 32 2260 %mask = load <4 x i64>, ptr%pmask, align 32 2261 %mx = and <4 x i64> %x, %mask 2262 %notmask = xor <4 x i64> %mask, <i64 -1, i64 -1, i64 -1, i64 -1> 2263 %my = and <4 x i64> %y, %notmask 2264 %r = or <4 x i64> %mx, %my 2265 ret <4 x i64> %r 2266} 2267 2268;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2269; Should be the same as the previous one. 2270;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2271 2272; ============================================================================ ; 2273; 8-bit vector width 2274; ============================================================================ ; 2275 2276define <1 x i8> @in_v1i8(<1 x i8> %x, <1 x i8> %y, <1 x i8> %mask) nounwind { 2277; CHECK-LABEL: in_v1i8: 2278; CHECK: # %bb.0: 2279; CHECK-NEXT: movl %edi, %eax 2280; CHECK-NEXT: xorl %esi, %eax 2281; CHECK-NEXT: andl %edx, %eax 2282; CHECK-NEXT: xorl %esi, %eax 2283; CHECK-NEXT: # kill: def $al killed $al killed $eax 2284; CHECK-NEXT: retq 2285 %n0 = xor <1 x i8> %x, %y 2286 %n1 = and <1 x i8> %n0, %mask 2287 %r = xor <1 x i8> %n1, %y 2288 ret <1 x i8> %r 2289} 2290 2291; ============================================================================ ; 2292; 16-bit vector width 2293; ============================================================================ ; 2294 2295define <2 x i8> @in_v2i8(<2 x i8> %x, <2 x i8> %y, <2 x i8> %mask) nounwind { 2296; CHECK-BASELINE-LABEL: in_v2i8: 2297; CHECK-BASELINE: # %bb.0: 2298; CHECK-BASELINE-NEXT: movl %edi, %eax 2299; CHECK-BASELINE-NEXT: xorl %edx, %eax 2300; CHECK-BASELINE-NEXT: xorl %ecx, %esi 2301; CHECK-BASELINE-NEXT: andl %r9d, %esi 2302; CHECK-BASELINE-NEXT: andl %r8d, %eax 2303; CHECK-BASELINE-NEXT: xorl %edx, %eax 2304; CHECK-BASELINE-NEXT: xorl %ecx, %esi 2305; CHECK-BASELINE-NEXT: # kill: def $al killed $al killed $eax 2306; CHECK-BASELINE-NEXT: movl %esi, %edx 2307; CHECK-BASELINE-NEXT: retq 2308; 2309; CHECK-SSE1-LABEL: in_v2i8: 2310; CHECK-SSE1: # %bb.0: 2311; CHECK-SSE1-NEXT: movl %edi, %eax 2312; CHECK-SSE1-NEXT: xorl %edx, %eax 2313; CHECK-SSE1-NEXT: xorl %ecx, %esi 2314; CHECK-SSE1-NEXT: andl %r9d, %esi 2315; CHECK-SSE1-NEXT: andl %r8d, %eax 2316; CHECK-SSE1-NEXT: xorl %edx, %eax 2317; CHECK-SSE1-NEXT: xorl %ecx, %esi 2318; CHECK-SSE1-NEXT: # kill: def $al killed $al killed $eax 2319; CHECK-SSE1-NEXT: movl %esi, %edx 2320; CHECK-SSE1-NEXT: retq 2321; 2322; CHECK-SSE2-LABEL: in_v2i8: 2323; CHECK-SSE2: # %bb.0: 2324; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 2325; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 2326; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 2327; CHECK-SSE2-NEXT: retq 2328; 2329; CHECK-XOP-LABEL: in_v2i8: 2330; CHECK-XOP: # %bb.0: 2331; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 2332; CHECK-XOP-NEXT: retq 2333 %n0 = xor <2 x i8> %x, %y 2334 %n1 = and <2 x i8> %n0, %mask 2335 %r = xor <2 x i8> %n1, %y 2336 ret <2 x i8> %r 2337} 2338 2339define <1 x i16> @in_v1i16(<1 x i16> %x, <1 x i16> %y, <1 x i16> %mask) nounwind { 2340; CHECK-LABEL: in_v1i16: 2341; CHECK: # %bb.0: 2342; CHECK-NEXT: movl %edi, %eax 2343; CHECK-NEXT: xorl %esi, %eax 2344; CHECK-NEXT: andl %edx, %eax 2345; CHECK-NEXT: xorl %esi, %eax 2346; CHECK-NEXT: # kill: def $ax killed $ax killed $eax 2347; CHECK-NEXT: retq 2348 %n0 = xor <1 x i16> %x, %y 2349 %n1 = and <1 x i16> %n0, %mask 2350 %r = xor <1 x i16> %n1, %y 2351 ret <1 x i16> %r 2352} 2353 2354; ============================================================================ ; 2355; 32-bit vector width 2356; ============================================================================ ; 2357 2358define <4 x i8> @in_v4i8(<4 x i8> %x, <4 x i8> %y, <4 x i8> %mask) nounwind { 2359; CHECK-BASELINE-LABEL: in_v4i8: 2360; CHECK-BASELINE: # %bb.0: 2361; CHECK-BASELINE-NEXT: movq %rdi, %rax 2362; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %edi 2363; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d 2364; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d 2365; CHECK-BASELINE-NEXT: xorl %r9d, %esi 2366; CHECK-BASELINE-NEXT: xorb %r11b, %dl 2367; CHECK-BASELINE-NEXT: xorb %r10b, %cl 2368; CHECK-BASELINE-NEXT: xorb %dil, %r8b 2369; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r8b 2370; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl 2371; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %dl 2372; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %sil 2373; CHECK-BASELINE-NEXT: xorb %r9b, %sil 2374; CHECK-BASELINE-NEXT: xorb %r11b, %dl 2375; CHECK-BASELINE-NEXT: xorb %r10b, %cl 2376; CHECK-BASELINE-NEXT: xorb %dil, %r8b 2377; CHECK-BASELINE-NEXT: movb %r8b, 3(%rax) 2378; CHECK-BASELINE-NEXT: movb %cl, 2(%rax) 2379; CHECK-BASELINE-NEXT: movb %dl, 1(%rax) 2380; CHECK-BASELINE-NEXT: movb %sil, (%rax) 2381; CHECK-BASELINE-NEXT: retq 2382; 2383; CHECK-SSE1-LABEL: in_v4i8: 2384; CHECK-SSE1: # %bb.0: 2385; CHECK-SSE1-NEXT: movq %rdi, %rax 2386; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %edi 2387; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d 2388; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d 2389; CHECK-SSE1-NEXT: xorl %r9d, %esi 2390; CHECK-SSE1-NEXT: xorb %r11b, %dl 2391; CHECK-SSE1-NEXT: xorb %r10b, %cl 2392; CHECK-SSE1-NEXT: xorb %dil, %r8b 2393; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r8b 2394; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl 2395; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %dl 2396; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %sil 2397; CHECK-SSE1-NEXT: xorb %r9b, %sil 2398; CHECK-SSE1-NEXT: xorb %r11b, %dl 2399; CHECK-SSE1-NEXT: xorb %r10b, %cl 2400; CHECK-SSE1-NEXT: xorb %dil, %r8b 2401; CHECK-SSE1-NEXT: movb %r8b, 3(%rax) 2402; CHECK-SSE1-NEXT: movb %cl, 2(%rax) 2403; CHECK-SSE1-NEXT: movb %dl, 1(%rax) 2404; CHECK-SSE1-NEXT: movb %sil, (%rax) 2405; CHECK-SSE1-NEXT: retq 2406; 2407; CHECK-SSE2-LABEL: in_v4i8: 2408; CHECK-SSE2: # %bb.0: 2409; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 2410; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 2411; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 2412; CHECK-SSE2-NEXT: retq 2413; 2414; CHECK-XOP-LABEL: in_v4i8: 2415; CHECK-XOP: # %bb.0: 2416; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 2417; CHECK-XOP-NEXT: retq 2418 %n0 = xor <4 x i8> %x, %y 2419 %n1 = and <4 x i8> %n0, %mask 2420 %r = xor <4 x i8> %n1, %y 2421 ret <4 x i8> %r 2422} 2423 2424define <2 x i16> @in_v2i16(<2 x i16> %x, <2 x i16> %y, <2 x i16> %mask) nounwind { 2425; CHECK-BASELINE-LABEL: in_v2i16: 2426; CHECK-BASELINE: # %bb.0: 2427; CHECK-BASELINE-NEXT: movl %edi, %eax 2428; CHECK-BASELINE-NEXT: xorl %edx, %eax 2429; CHECK-BASELINE-NEXT: xorl %ecx, %esi 2430; CHECK-BASELINE-NEXT: andl %r9d, %esi 2431; CHECK-BASELINE-NEXT: andl %r8d, %eax 2432; CHECK-BASELINE-NEXT: xorl %edx, %eax 2433; CHECK-BASELINE-NEXT: xorl %ecx, %esi 2434; CHECK-BASELINE-NEXT: # kill: def $ax killed $ax killed $eax 2435; CHECK-BASELINE-NEXT: movl %esi, %edx 2436; CHECK-BASELINE-NEXT: retq 2437; 2438; CHECK-SSE1-LABEL: in_v2i16: 2439; CHECK-SSE1: # %bb.0: 2440; CHECK-SSE1-NEXT: movl %edi, %eax 2441; CHECK-SSE1-NEXT: xorl %edx, %eax 2442; CHECK-SSE1-NEXT: xorl %ecx, %esi 2443; CHECK-SSE1-NEXT: andl %r9d, %esi 2444; CHECK-SSE1-NEXT: andl %r8d, %eax 2445; CHECK-SSE1-NEXT: xorl %edx, %eax 2446; CHECK-SSE1-NEXT: xorl %ecx, %esi 2447; CHECK-SSE1-NEXT: # kill: def $ax killed $ax killed $eax 2448; CHECK-SSE1-NEXT: movl %esi, %edx 2449; CHECK-SSE1-NEXT: retq 2450; 2451; CHECK-SSE2-LABEL: in_v2i16: 2452; CHECK-SSE2: # %bb.0: 2453; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 2454; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 2455; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 2456; CHECK-SSE2-NEXT: retq 2457; 2458; CHECK-XOP-LABEL: in_v2i16: 2459; CHECK-XOP: # %bb.0: 2460; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 2461; CHECK-XOP-NEXT: retq 2462 %n0 = xor <2 x i16> %x, %y 2463 %n1 = and <2 x i16> %n0, %mask 2464 %r = xor <2 x i16> %n1, %y 2465 ret <2 x i16> %r 2466} 2467 2468define <1 x i32> @in_v1i32(<1 x i32> %x, <1 x i32> %y, <1 x i32> %mask) nounwind { 2469; CHECK-LABEL: in_v1i32: 2470; CHECK: # %bb.0: 2471; CHECK-NEXT: movl %edi, %eax 2472; CHECK-NEXT: xorl %esi, %eax 2473; CHECK-NEXT: andl %edx, %eax 2474; CHECK-NEXT: xorl %esi, %eax 2475; CHECK-NEXT: retq 2476 %n0 = xor <1 x i32> %x, %y 2477 %n1 = and <1 x i32> %n0, %mask 2478 %r = xor <1 x i32> %n1, %y 2479 ret <1 x i32> %r 2480} 2481 2482; ============================================================================ ; 2483; 64-bit vector width 2484; ============================================================================ ; 2485 2486define <8 x i8> @in_v8i8(<8 x i8> %x, <8 x i8> %y, <8 x i8> %mask) nounwind { 2487; CHECK-BASELINE-LABEL: in_v8i8: 2488; CHECK-BASELINE: # %bb.0: 2489; CHECK-BASELINE-NEXT: pushq %rbp 2490; CHECK-BASELINE-NEXT: pushq %r15 2491; CHECK-BASELINE-NEXT: pushq %r14 2492; CHECK-BASELINE-NEXT: pushq %r13 2493; CHECK-BASELINE-NEXT: pushq %r12 2494; CHECK-BASELINE-NEXT: pushq %rbx 2495; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d 2496; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx 2497; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp 2498; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d 2499; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d 2500; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d 2501; CHECK-BASELINE-NEXT: xorb %r11b, %sil 2502; CHECK-BASELINE-NEXT: xorb %r12b, %dl 2503; CHECK-BASELINE-NEXT: xorb %r14b, %cl 2504; CHECK-BASELINE-NEXT: xorb %bpl, %r8b 2505; CHECK-BASELINE-NEXT: xorb %bl, %r9b 2506; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d 2507; CHECK-BASELINE-NEXT: xorb {{[0-9]+}}(%rsp), %r15b 2508; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r13d 2509; CHECK-BASELINE-NEXT: xorb {{[0-9]+}}(%rsp), %r13b 2510; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2511; CHECK-BASELINE-NEXT: xorb %r10b, %al 2512; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r9b 2513; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r8b 2514; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl 2515; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %dl 2516; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %sil 2517; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %al 2518; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r13b 2519; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r15b 2520; CHECK-BASELINE-NEXT: xorb %r11b, %sil 2521; CHECK-BASELINE-NEXT: xorb %r12b, %dl 2522; CHECK-BASELINE-NEXT: xorb %r14b, %cl 2523; CHECK-BASELINE-NEXT: xorb %bpl, %r8b 2524; CHECK-BASELINE-NEXT: xorb %bl, %r9b 2525; CHECK-BASELINE-NEXT: xorb {{[0-9]+}}(%rsp), %r15b 2526; CHECK-BASELINE-NEXT: xorb {{[0-9]+}}(%rsp), %r13b 2527; CHECK-BASELINE-NEXT: xorb %r10b, %al 2528; CHECK-BASELINE-NEXT: movb %al, 7(%rdi) 2529; CHECK-BASELINE-NEXT: movb %r13b, 6(%rdi) 2530; CHECK-BASELINE-NEXT: movb %r15b, 5(%rdi) 2531; CHECK-BASELINE-NEXT: movb %r9b, 4(%rdi) 2532; CHECK-BASELINE-NEXT: movb %r8b, 3(%rdi) 2533; CHECK-BASELINE-NEXT: movb %cl, 2(%rdi) 2534; CHECK-BASELINE-NEXT: movb %dl, 1(%rdi) 2535; CHECK-BASELINE-NEXT: movb %sil, (%rdi) 2536; CHECK-BASELINE-NEXT: movq %rdi, %rax 2537; CHECK-BASELINE-NEXT: popq %rbx 2538; CHECK-BASELINE-NEXT: popq %r12 2539; CHECK-BASELINE-NEXT: popq %r13 2540; CHECK-BASELINE-NEXT: popq %r14 2541; CHECK-BASELINE-NEXT: popq %r15 2542; CHECK-BASELINE-NEXT: popq %rbp 2543; CHECK-BASELINE-NEXT: retq 2544; 2545; CHECK-SSE1-LABEL: in_v8i8: 2546; CHECK-SSE1: # %bb.0: 2547; CHECK-SSE1-NEXT: pushq %rbp 2548; CHECK-SSE1-NEXT: pushq %r15 2549; CHECK-SSE1-NEXT: pushq %r14 2550; CHECK-SSE1-NEXT: pushq %r13 2551; CHECK-SSE1-NEXT: pushq %r12 2552; CHECK-SSE1-NEXT: pushq %rbx 2553; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d 2554; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx 2555; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp 2556; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d 2557; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d 2558; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d 2559; CHECK-SSE1-NEXT: xorb %r11b, %sil 2560; CHECK-SSE1-NEXT: xorb %r12b, %dl 2561; CHECK-SSE1-NEXT: xorb %r14b, %cl 2562; CHECK-SSE1-NEXT: xorb %bpl, %r8b 2563; CHECK-SSE1-NEXT: xorb %bl, %r9b 2564; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d 2565; CHECK-SSE1-NEXT: xorb {{[0-9]+}}(%rsp), %r15b 2566; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r13d 2567; CHECK-SSE1-NEXT: xorb {{[0-9]+}}(%rsp), %r13b 2568; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2569; CHECK-SSE1-NEXT: xorb %r10b, %al 2570; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r9b 2571; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r8b 2572; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl 2573; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %dl 2574; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %sil 2575; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %al 2576; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r13b 2577; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r15b 2578; CHECK-SSE1-NEXT: xorb %r11b, %sil 2579; CHECK-SSE1-NEXT: xorb %r12b, %dl 2580; CHECK-SSE1-NEXT: xorb %r14b, %cl 2581; CHECK-SSE1-NEXT: xorb %bpl, %r8b 2582; CHECK-SSE1-NEXT: xorb %bl, %r9b 2583; CHECK-SSE1-NEXT: xorb {{[0-9]+}}(%rsp), %r15b 2584; CHECK-SSE1-NEXT: xorb {{[0-9]+}}(%rsp), %r13b 2585; CHECK-SSE1-NEXT: xorb %r10b, %al 2586; CHECK-SSE1-NEXT: movb %al, 7(%rdi) 2587; CHECK-SSE1-NEXT: movb %r13b, 6(%rdi) 2588; CHECK-SSE1-NEXT: movb %r15b, 5(%rdi) 2589; CHECK-SSE1-NEXT: movb %r9b, 4(%rdi) 2590; CHECK-SSE1-NEXT: movb %r8b, 3(%rdi) 2591; CHECK-SSE1-NEXT: movb %cl, 2(%rdi) 2592; CHECK-SSE1-NEXT: movb %dl, 1(%rdi) 2593; CHECK-SSE1-NEXT: movb %sil, (%rdi) 2594; CHECK-SSE1-NEXT: movq %rdi, %rax 2595; CHECK-SSE1-NEXT: popq %rbx 2596; CHECK-SSE1-NEXT: popq %r12 2597; CHECK-SSE1-NEXT: popq %r13 2598; CHECK-SSE1-NEXT: popq %r14 2599; CHECK-SSE1-NEXT: popq %r15 2600; CHECK-SSE1-NEXT: popq %rbp 2601; CHECK-SSE1-NEXT: retq 2602; 2603; CHECK-SSE2-LABEL: in_v8i8: 2604; CHECK-SSE2: # %bb.0: 2605; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 2606; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 2607; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 2608; CHECK-SSE2-NEXT: retq 2609; 2610; CHECK-XOP-LABEL: in_v8i8: 2611; CHECK-XOP: # %bb.0: 2612; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 2613; CHECK-XOP-NEXT: retq 2614 %n0 = xor <8 x i8> %x, %y 2615 %n1 = and <8 x i8> %n0, %mask 2616 %r = xor <8 x i8> %n1, %y 2617 ret <8 x i8> %r 2618} 2619 2620define <4 x i16> @in_v4i16(<4 x i16> %x, <4 x i16> %y, <4 x i16> %mask) nounwind { 2621; CHECK-BASELINE-LABEL: in_v4i16: 2622; CHECK-BASELINE: # %bb.0: 2623; CHECK-BASELINE-NEXT: movq %rdi, %rax 2624; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %edi 2625; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %r10d 2626; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %r11d 2627; CHECK-BASELINE-NEXT: xorl %r9d, %esi 2628; CHECK-BASELINE-NEXT: xorl %r11d, %edx 2629; CHECK-BASELINE-NEXT: xorl %r10d, %ecx 2630; CHECK-BASELINE-NEXT: xorl %edi, %r8d 2631; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %r8w 2632; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %cx 2633; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %dx 2634; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %si 2635; CHECK-BASELINE-NEXT: xorl %r9d, %esi 2636; CHECK-BASELINE-NEXT: xorl %r11d, %edx 2637; CHECK-BASELINE-NEXT: xorl %r10d, %ecx 2638; CHECK-BASELINE-NEXT: xorl %edi, %r8d 2639; CHECK-BASELINE-NEXT: movw %r8w, 6(%rax) 2640; CHECK-BASELINE-NEXT: movw %cx, 4(%rax) 2641; CHECK-BASELINE-NEXT: movw %dx, 2(%rax) 2642; CHECK-BASELINE-NEXT: movw %si, (%rax) 2643; CHECK-BASELINE-NEXT: retq 2644; 2645; CHECK-SSE1-LABEL: in_v4i16: 2646; CHECK-SSE1: # %bb.0: 2647; CHECK-SSE1-NEXT: movq %rdi, %rax 2648; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %edi 2649; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %r10d 2650; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %r11d 2651; CHECK-SSE1-NEXT: xorl %r9d, %esi 2652; CHECK-SSE1-NEXT: xorl %r11d, %edx 2653; CHECK-SSE1-NEXT: xorl %r10d, %ecx 2654; CHECK-SSE1-NEXT: xorl %edi, %r8d 2655; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %r8w 2656; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %cx 2657; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %dx 2658; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %si 2659; CHECK-SSE1-NEXT: xorl %r9d, %esi 2660; CHECK-SSE1-NEXT: xorl %r11d, %edx 2661; CHECK-SSE1-NEXT: xorl %r10d, %ecx 2662; CHECK-SSE1-NEXT: xorl %edi, %r8d 2663; CHECK-SSE1-NEXT: movw %r8w, 6(%rax) 2664; CHECK-SSE1-NEXT: movw %cx, 4(%rax) 2665; CHECK-SSE1-NEXT: movw %dx, 2(%rax) 2666; CHECK-SSE1-NEXT: movw %si, (%rax) 2667; CHECK-SSE1-NEXT: retq 2668; 2669; CHECK-SSE2-LABEL: in_v4i16: 2670; CHECK-SSE2: # %bb.0: 2671; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 2672; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 2673; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 2674; CHECK-SSE2-NEXT: retq 2675; 2676; CHECK-XOP-LABEL: in_v4i16: 2677; CHECK-XOP: # %bb.0: 2678; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 2679; CHECK-XOP-NEXT: retq 2680 %n0 = xor <4 x i16> %x, %y 2681 %n1 = and <4 x i16> %n0, %mask 2682 %r = xor <4 x i16> %n1, %y 2683 ret <4 x i16> %r 2684} 2685 2686define <2 x i32> @in_v2i32(<2 x i32> %x, <2 x i32> %y, <2 x i32> %mask) nounwind { 2687; CHECK-BASELINE-LABEL: in_v2i32: 2688; CHECK-BASELINE: # %bb.0: 2689; CHECK-BASELINE-NEXT: movl %edi, %eax 2690; CHECK-BASELINE-NEXT: xorl %edx, %eax 2691; CHECK-BASELINE-NEXT: xorl %ecx, %esi 2692; CHECK-BASELINE-NEXT: andl %r9d, %esi 2693; CHECK-BASELINE-NEXT: andl %r8d, %eax 2694; CHECK-BASELINE-NEXT: xorl %edx, %eax 2695; CHECK-BASELINE-NEXT: xorl %ecx, %esi 2696; CHECK-BASELINE-NEXT: movl %esi, %edx 2697; CHECK-BASELINE-NEXT: retq 2698; 2699; CHECK-SSE1-LABEL: in_v2i32: 2700; CHECK-SSE1: # %bb.0: 2701; CHECK-SSE1-NEXT: movl %edi, %eax 2702; CHECK-SSE1-NEXT: xorl %edx, %eax 2703; CHECK-SSE1-NEXT: xorl %ecx, %esi 2704; CHECK-SSE1-NEXT: andl %r9d, %esi 2705; CHECK-SSE1-NEXT: andl %r8d, %eax 2706; CHECK-SSE1-NEXT: xorl %edx, %eax 2707; CHECK-SSE1-NEXT: xorl %ecx, %esi 2708; CHECK-SSE1-NEXT: movl %esi, %edx 2709; CHECK-SSE1-NEXT: retq 2710; 2711; CHECK-SSE2-LABEL: in_v2i32: 2712; CHECK-SSE2: # %bb.0: 2713; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 2714; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 2715; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 2716; CHECK-SSE2-NEXT: retq 2717; 2718; CHECK-XOP-LABEL: in_v2i32: 2719; CHECK-XOP: # %bb.0: 2720; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 2721; CHECK-XOP-NEXT: retq 2722 %n0 = xor <2 x i32> %x, %y 2723 %n1 = and <2 x i32> %n0, %mask 2724 %r = xor <2 x i32> %n1, %y 2725 ret <2 x i32> %r 2726} 2727 2728define <1 x i64> @in_v1i64(<1 x i64> %x, <1 x i64> %y, <1 x i64> %mask) nounwind { 2729; CHECK-LABEL: in_v1i64: 2730; CHECK: # %bb.0: 2731; CHECK-NEXT: movq %rdi, %rax 2732; CHECK-NEXT: xorq %rsi, %rax 2733; CHECK-NEXT: andq %rdx, %rax 2734; CHECK-NEXT: xorq %rsi, %rax 2735; CHECK-NEXT: retq 2736 %n0 = xor <1 x i64> %x, %y 2737 %n1 = and <1 x i64> %n0, %mask 2738 %r = xor <1 x i64> %n1, %y 2739 ret <1 x i64> %r 2740} 2741 2742; ============================================================================ ; 2743; 128-bit vector width 2744; ============================================================================ ; 2745 2746define <16 x i8> @in_v16i8(<16 x i8> %x, <16 x i8> %y, <16 x i8> %mask) nounwind { 2747; CHECK-BASELINE-LABEL: in_v16i8: 2748; CHECK-BASELINE: # %bb.0: 2749; CHECK-BASELINE-NEXT: pushq %rbp 2750; CHECK-BASELINE-NEXT: pushq %r15 2751; CHECK-BASELINE-NEXT: pushq %r14 2752; CHECK-BASELINE-NEXT: pushq %r13 2753; CHECK-BASELINE-NEXT: pushq %r12 2754; CHECK-BASELINE-NEXT: pushq %rbx 2755; CHECK-BASELINE-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 2756; CHECK-BASELINE-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 2757; CHECK-BASELINE-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 2758; CHECK-BASELINE-NEXT: movq %rdi, %rdx 2759; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %esi 2760; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx 2761; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2762; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx 2763; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp 2764; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d 2765; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d 2766; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d 2767; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r13d 2768; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d 2769; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d 2770; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %edi 2771; CHECK-BASELINE-NEXT: xorb %dil, %r9b 2772; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r9b 2773; CHECK-BASELINE-NEXT: xorb %dil, %r9b 2774; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %edi 2775; CHECK-BASELINE-NEXT: xorb %r10b, %dil 2776; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %dil 2777; CHECK-BASELINE-NEXT: xorb %r10b, %dil 2778; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d 2779; CHECK-BASELINE-NEXT: xorb %r11b, %r10b 2780; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r10b 2781; CHECK-BASELINE-NEXT: xorb %r11b, %r10b 2782; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d 2783; CHECK-BASELINE-NEXT: xorb %r13b, %r11b 2784; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r11b 2785; CHECK-BASELINE-NEXT: xorb %r13b, %r11b 2786; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r13d 2787; CHECK-BASELINE-NEXT: xorb %r12b, %r13b 2788; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r13b 2789; CHECK-BASELINE-NEXT: xorb %r12b, %r13b 2790; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d 2791; CHECK-BASELINE-NEXT: xorb %r15b, %r12b 2792; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r12b 2793; CHECK-BASELINE-NEXT: xorb %r15b, %r12b 2794; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d 2795; CHECK-BASELINE-NEXT: xorb %r14b, %r15b 2796; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r15b 2797; CHECK-BASELINE-NEXT: xorb %r14b, %r15b 2798; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d 2799; CHECK-BASELINE-NEXT: xorb %bpl, %r14b 2800; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r14b 2801; CHECK-BASELINE-NEXT: xorb %bpl, %r14b 2802; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp 2803; CHECK-BASELINE-NEXT: xorb %bl, %bpl 2804; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %bpl 2805; CHECK-BASELINE-NEXT: xorb %bl, %bpl 2806; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx 2807; CHECK-BASELINE-NEXT: xorb %al, %bl 2808; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %bl 2809; CHECK-BASELINE-NEXT: xorb %al, %bl 2810; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2811; CHECK-BASELINE-NEXT: xorb %cl, %al 2812; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %al 2813; CHECK-BASELINE-NEXT: xorb %cl, %al 2814; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx 2815; CHECK-BASELINE-NEXT: xorb %sil, %cl 2816; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl 2817; CHECK-BASELINE-NEXT: xorb %sil, %cl 2818; CHECK-BASELINE-NEXT: movb %cl, 15(%rdx) 2819; CHECK-BASELINE-NEXT: movb %al, 14(%rdx) 2820; CHECK-BASELINE-NEXT: movb %bl, 13(%rdx) 2821; CHECK-BASELINE-NEXT: movb %bpl, 12(%rdx) 2822; CHECK-BASELINE-NEXT: movb %r14b, 11(%rdx) 2823; CHECK-BASELINE-NEXT: movb %r15b, 10(%rdx) 2824; CHECK-BASELINE-NEXT: movb %r12b, 9(%rdx) 2825; CHECK-BASELINE-NEXT: movb %r13b, 8(%rdx) 2826; CHECK-BASELINE-NEXT: movb %r11b, 7(%rdx) 2827; CHECK-BASELINE-NEXT: movb %r10b, 6(%rdx) 2828; CHECK-BASELINE-NEXT: movb %dil, 5(%rdx) 2829; CHECK-BASELINE-NEXT: movb %r9b, 4(%rdx) 2830; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2831; CHECK-BASELINE-NEXT: xorb %al, %r8b 2832; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %r8b 2833; CHECK-BASELINE-NEXT: xorb %al, %r8b 2834; CHECK-BASELINE-NEXT: movb %r8b, 3(%rdx) 2835; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2836; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload 2837; CHECK-BASELINE-NEXT: xorb %al, %cl 2838; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl 2839; CHECK-BASELINE-NEXT: xorb %al, %cl 2840; CHECK-BASELINE-NEXT: movb %cl, 2(%rdx) 2841; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2842; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload 2843; CHECK-BASELINE-NEXT: xorb %al, %cl 2844; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl 2845; CHECK-BASELINE-NEXT: xorb %al, %cl 2846; CHECK-BASELINE-NEXT: movb %cl, 1(%rdx) 2847; CHECK-BASELINE-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2848; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload 2849; CHECK-BASELINE-NEXT: xorb %al, %cl 2850; CHECK-BASELINE-NEXT: andb {{[0-9]+}}(%rsp), %cl 2851; CHECK-BASELINE-NEXT: xorb %al, %cl 2852; CHECK-BASELINE-NEXT: movb %cl, (%rdx) 2853; CHECK-BASELINE-NEXT: movq %rdx, %rax 2854; CHECK-BASELINE-NEXT: popq %rbx 2855; CHECK-BASELINE-NEXT: popq %r12 2856; CHECK-BASELINE-NEXT: popq %r13 2857; CHECK-BASELINE-NEXT: popq %r14 2858; CHECK-BASELINE-NEXT: popq %r15 2859; CHECK-BASELINE-NEXT: popq %rbp 2860; CHECK-BASELINE-NEXT: retq 2861; 2862; CHECK-SSE1-LABEL: in_v16i8: 2863; CHECK-SSE1: # %bb.0: 2864; CHECK-SSE1-NEXT: pushq %rbp 2865; CHECK-SSE1-NEXT: pushq %r15 2866; CHECK-SSE1-NEXT: pushq %r14 2867; CHECK-SSE1-NEXT: pushq %r13 2868; CHECK-SSE1-NEXT: pushq %r12 2869; CHECK-SSE1-NEXT: pushq %rbx 2870; CHECK-SSE1-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 2871; CHECK-SSE1-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 2872; CHECK-SSE1-NEXT: movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 2873; CHECK-SSE1-NEXT: movq %rdi, %rdx 2874; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %esi 2875; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx 2876; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2877; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx 2878; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp 2879; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d 2880; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d 2881; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d 2882; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r13d 2883; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d 2884; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d 2885; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %edi 2886; CHECK-SSE1-NEXT: xorb %dil, %r9b 2887; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r9b 2888; CHECK-SSE1-NEXT: xorb %dil, %r9b 2889; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %edi 2890; CHECK-SSE1-NEXT: xorb %r10b, %dil 2891; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %dil 2892; CHECK-SSE1-NEXT: xorb %r10b, %dil 2893; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r10d 2894; CHECK-SSE1-NEXT: xorb %r11b, %r10b 2895; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r10b 2896; CHECK-SSE1-NEXT: xorb %r11b, %r10b 2897; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r11d 2898; CHECK-SSE1-NEXT: xorb %r13b, %r11b 2899; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r11b 2900; CHECK-SSE1-NEXT: xorb %r13b, %r11b 2901; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r13d 2902; CHECK-SSE1-NEXT: xorb %r12b, %r13b 2903; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r13b 2904; CHECK-SSE1-NEXT: xorb %r12b, %r13b 2905; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r12d 2906; CHECK-SSE1-NEXT: xorb %r15b, %r12b 2907; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r12b 2908; CHECK-SSE1-NEXT: xorb %r15b, %r12b 2909; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r15d 2910; CHECK-SSE1-NEXT: xorb %r14b, %r15b 2911; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r15b 2912; CHECK-SSE1-NEXT: xorb %r14b, %r15b 2913; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %r14d 2914; CHECK-SSE1-NEXT: xorb %bpl, %r14b 2915; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r14b 2916; CHECK-SSE1-NEXT: xorb %bpl, %r14b 2917; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebp 2918; CHECK-SSE1-NEXT: xorb %bl, %bpl 2919; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %bpl 2920; CHECK-SSE1-NEXT: xorb %bl, %bpl 2921; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ebx 2922; CHECK-SSE1-NEXT: xorb %al, %bl 2923; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %bl 2924; CHECK-SSE1-NEXT: xorb %al, %bl 2925; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2926; CHECK-SSE1-NEXT: xorb %cl, %al 2927; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %al 2928; CHECK-SSE1-NEXT: xorb %cl, %al 2929; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %ecx 2930; CHECK-SSE1-NEXT: xorb %sil, %cl 2931; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl 2932; CHECK-SSE1-NEXT: xorb %sil, %cl 2933; CHECK-SSE1-NEXT: movb %cl, 15(%rdx) 2934; CHECK-SSE1-NEXT: movb %al, 14(%rdx) 2935; CHECK-SSE1-NEXT: movb %bl, 13(%rdx) 2936; CHECK-SSE1-NEXT: movb %bpl, 12(%rdx) 2937; CHECK-SSE1-NEXT: movb %r14b, 11(%rdx) 2938; CHECK-SSE1-NEXT: movb %r15b, 10(%rdx) 2939; CHECK-SSE1-NEXT: movb %r12b, 9(%rdx) 2940; CHECK-SSE1-NEXT: movb %r13b, 8(%rdx) 2941; CHECK-SSE1-NEXT: movb %r11b, 7(%rdx) 2942; CHECK-SSE1-NEXT: movb %r10b, 6(%rdx) 2943; CHECK-SSE1-NEXT: movb %dil, 5(%rdx) 2944; CHECK-SSE1-NEXT: movb %r9b, 4(%rdx) 2945; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2946; CHECK-SSE1-NEXT: xorb %al, %r8b 2947; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %r8b 2948; CHECK-SSE1-NEXT: xorb %al, %r8b 2949; CHECK-SSE1-NEXT: movb %r8b, 3(%rdx) 2950; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2951; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload 2952; CHECK-SSE1-NEXT: xorb %al, %cl 2953; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl 2954; CHECK-SSE1-NEXT: xorb %al, %cl 2955; CHECK-SSE1-NEXT: movb %cl, 2(%rdx) 2956; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2957; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload 2958; CHECK-SSE1-NEXT: xorb %al, %cl 2959; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl 2960; CHECK-SSE1-NEXT: xorb %al, %cl 2961; CHECK-SSE1-NEXT: movb %cl, 1(%rdx) 2962; CHECK-SSE1-NEXT: movzbl {{[0-9]+}}(%rsp), %eax 2963; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload 2964; CHECK-SSE1-NEXT: xorb %al, %cl 2965; CHECK-SSE1-NEXT: andb {{[0-9]+}}(%rsp), %cl 2966; CHECK-SSE1-NEXT: xorb %al, %cl 2967; CHECK-SSE1-NEXT: movb %cl, (%rdx) 2968; CHECK-SSE1-NEXT: movq %rdx, %rax 2969; CHECK-SSE1-NEXT: popq %rbx 2970; CHECK-SSE1-NEXT: popq %r12 2971; CHECK-SSE1-NEXT: popq %r13 2972; CHECK-SSE1-NEXT: popq %r14 2973; CHECK-SSE1-NEXT: popq %r15 2974; CHECK-SSE1-NEXT: popq %rbp 2975; CHECK-SSE1-NEXT: retq 2976; 2977; CHECK-SSE2-LABEL: in_v16i8: 2978; CHECK-SSE2: # %bb.0: 2979; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 2980; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 2981; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 2982; CHECK-SSE2-NEXT: retq 2983; 2984; CHECK-XOP-LABEL: in_v16i8: 2985; CHECK-XOP: # %bb.0: 2986; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 2987; CHECK-XOP-NEXT: retq 2988 %n0 = xor <16 x i8> %x, %y 2989 %n1 = and <16 x i8> %n0, %mask 2990 %r = xor <16 x i8> %n1, %y 2991 ret <16 x i8> %r 2992} 2993 2994define <8 x i16> @in_v8i16(<8 x i16> %x, <8 x i16> %y, <8 x i16> %mask) nounwind { 2995; CHECK-BASELINE-LABEL: in_v8i16: 2996; CHECK-BASELINE: # %bb.0: 2997; CHECK-BASELINE-NEXT: pushq %rbx 2998; CHECK-BASELINE-NEXT: movq %rdi, %rax 2999; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %edi 3000; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %r10d 3001; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %r11d 3002; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %ebx 3003; CHECK-BASELINE-NEXT: xorl %ebx, %esi 3004; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %si 3005; CHECK-BASELINE-NEXT: xorl %ebx, %esi 3006; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %ebx 3007; CHECK-BASELINE-NEXT: xorl %ebx, %edx 3008; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %dx 3009; CHECK-BASELINE-NEXT: xorl %ebx, %edx 3010; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %ebx 3011; CHECK-BASELINE-NEXT: xorl %ebx, %ecx 3012; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %cx 3013; CHECK-BASELINE-NEXT: xorl %ebx, %ecx 3014; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %ebx 3015; CHECK-BASELINE-NEXT: xorl %ebx, %r8d 3016; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %r8w 3017; CHECK-BASELINE-NEXT: xorl %ebx, %r8d 3018; CHECK-BASELINE-NEXT: movl {{[0-9]+}}(%rsp), %ebx 3019; CHECK-BASELINE-NEXT: xorl %ebx, %r9d 3020; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %r9w 3021; CHECK-BASELINE-NEXT: xorl %ebx, %r9d 3022; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %ebx 3023; CHECK-BASELINE-NEXT: xorw %r11w, %bx 3024; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %bx 3025; CHECK-BASELINE-NEXT: xorl %r11d, %ebx 3026; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %r11d 3027; CHECK-BASELINE-NEXT: xorw %r10w, %r11w 3028; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %r11w 3029; CHECK-BASELINE-NEXT: xorl %r10d, %r11d 3030; CHECK-BASELINE-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d 3031; CHECK-BASELINE-NEXT: xorw %di, %r10w 3032; CHECK-BASELINE-NEXT: andw {{[0-9]+}}(%rsp), %r10w 3033; CHECK-BASELINE-NEXT: xorl %edi, %r10d 3034; CHECK-BASELINE-NEXT: movw %r10w, 14(%rax) 3035; CHECK-BASELINE-NEXT: movw %r11w, 12(%rax) 3036; CHECK-BASELINE-NEXT: movw %bx, 10(%rax) 3037; CHECK-BASELINE-NEXT: movw %r9w, 8(%rax) 3038; CHECK-BASELINE-NEXT: movw %r8w, 6(%rax) 3039; CHECK-BASELINE-NEXT: movw %cx, 4(%rax) 3040; CHECK-BASELINE-NEXT: movw %dx, 2(%rax) 3041; CHECK-BASELINE-NEXT: movw %si, (%rax) 3042; CHECK-BASELINE-NEXT: popq %rbx 3043; CHECK-BASELINE-NEXT: retq 3044; 3045; CHECK-SSE1-LABEL: in_v8i16: 3046; CHECK-SSE1: # %bb.0: 3047; CHECK-SSE1-NEXT: pushq %rbx 3048; CHECK-SSE1-NEXT: movq %rdi, %rax 3049; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %edi 3050; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %r10d 3051; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %r11d 3052; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %ebx 3053; CHECK-SSE1-NEXT: xorl %ebx, %esi 3054; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %si 3055; CHECK-SSE1-NEXT: xorl %ebx, %esi 3056; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %ebx 3057; CHECK-SSE1-NEXT: xorl %ebx, %edx 3058; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %dx 3059; CHECK-SSE1-NEXT: xorl %ebx, %edx 3060; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %ebx 3061; CHECK-SSE1-NEXT: xorl %ebx, %ecx 3062; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %cx 3063; CHECK-SSE1-NEXT: xorl %ebx, %ecx 3064; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %ebx 3065; CHECK-SSE1-NEXT: xorl %ebx, %r8d 3066; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %r8w 3067; CHECK-SSE1-NEXT: xorl %ebx, %r8d 3068; CHECK-SSE1-NEXT: movl {{[0-9]+}}(%rsp), %ebx 3069; CHECK-SSE1-NEXT: xorl %ebx, %r9d 3070; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %r9w 3071; CHECK-SSE1-NEXT: xorl %ebx, %r9d 3072; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %ebx 3073; CHECK-SSE1-NEXT: xorw %r11w, %bx 3074; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %bx 3075; CHECK-SSE1-NEXT: xorl %r11d, %ebx 3076; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %r11d 3077; CHECK-SSE1-NEXT: xorw %r10w, %r11w 3078; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %r11w 3079; CHECK-SSE1-NEXT: xorl %r10d, %r11d 3080; CHECK-SSE1-NEXT: movzwl {{[0-9]+}}(%rsp), %r10d 3081; CHECK-SSE1-NEXT: xorw %di, %r10w 3082; CHECK-SSE1-NEXT: andw {{[0-9]+}}(%rsp), %r10w 3083; CHECK-SSE1-NEXT: xorl %edi, %r10d 3084; CHECK-SSE1-NEXT: movw %r10w, 14(%rax) 3085; CHECK-SSE1-NEXT: movw %r11w, 12(%rax) 3086; CHECK-SSE1-NEXT: movw %bx, 10(%rax) 3087; CHECK-SSE1-NEXT: movw %r9w, 8(%rax) 3088; CHECK-SSE1-NEXT: movw %r8w, 6(%rax) 3089; CHECK-SSE1-NEXT: movw %cx, 4(%rax) 3090; CHECK-SSE1-NEXT: movw %dx, 2(%rax) 3091; CHECK-SSE1-NEXT: movw %si, (%rax) 3092; CHECK-SSE1-NEXT: popq %rbx 3093; CHECK-SSE1-NEXT: retq 3094; 3095; CHECK-SSE2-LABEL: in_v8i16: 3096; CHECK-SSE2: # %bb.0: 3097; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 3098; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 3099; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 3100; CHECK-SSE2-NEXT: retq 3101; 3102; CHECK-XOP-LABEL: in_v8i16: 3103; CHECK-XOP: # %bb.0: 3104; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 3105; CHECK-XOP-NEXT: retq 3106 %n0 = xor <8 x i16> %x, %y 3107 %n1 = and <8 x i16> %n0, %mask 3108 %r = xor <8 x i16> %n1, %y 3109 ret <8 x i16> %r 3110} 3111 3112define <4 x i32> @in_v4i32(ptr%px, ptr%py, ptr%pmask) nounwind { 3113; CHECK-BASELINE-LABEL: in_v4i32: 3114; CHECK-BASELINE: # %bb.0: 3115; CHECK-BASELINE-NEXT: pushq %rbx 3116; CHECK-BASELINE-NEXT: movq %rdi, %rax 3117; CHECK-BASELINE-NEXT: movl 12(%rdx), %edi 3118; CHECK-BASELINE-NEXT: movl 8(%rdx), %r8d 3119; CHECK-BASELINE-NEXT: movl (%rdx), %r9d 3120; CHECK-BASELINE-NEXT: movl 4(%rdx), %r10d 3121; CHECK-BASELINE-NEXT: movl (%rsi), %edx 3122; CHECK-BASELINE-NEXT: xorl %r9d, %edx 3123; CHECK-BASELINE-NEXT: movl 4(%rsi), %r11d 3124; CHECK-BASELINE-NEXT: xorl %r10d, %r11d 3125; CHECK-BASELINE-NEXT: movl 8(%rsi), %ebx 3126; CHECK-BASELINE-NEXT: xorl %r8d, %ebx 3127; CHECK-BASELINE-NEXT: movl 12(%rsi), %esi 3128; CHECK-BASELINE-NEXT: xorl %edi, %esi 3129; CHECK-BASELINE-NEXT: andl 12(%rcx), %esi 3130; CHECK-BASELINE-NEXT: andl 8(%rcx), %ebx 3131; CHECK-BASELINE-NEXT: andl 4(%rcx), %r11d 3132; CHECK-BASELINE-NEXT: andl (%rcx), %edx 3133; CHECK-BASELINE-NEXT: xorl %r9d, %edx 3134; CHECK-BASELINE-NEXT: xorl %r10d, %r11d 3135; CHECK-BASELINE-NEXT: xorl %r8d, %ebx 3136; CHECK-BASELINE-NEXT: xorl %edi, %esi 3137; CHECK-BASELINE-NEXT: movl %esi, 12(%rax) 3138; CHECK-BASELINE-NEXT: movl %ebx, 8(%rax) 3139; CHECK-BASELINE-NEXT: movl %r11d, 4(%rax) 3140; CHECK-BASELINE-NEXT: movl %edx, (%rax) 3141; CHECK-BASELINE-NEXT: popq %rbx 3142; CHECK-BASELINE-NEXT: retq 3143; 3144; CHECK-SSE1-LABEL: in_v4i32: 3145; CHECK-SSE1: # %bb.0: 3146; CHECK-SSE1-NEXT: movq %rdi, %rax 3147; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 3148; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1 3149; CHECK-SSE1-NEXT: andnps (%rdx), %xmm1 3150; CHECK-SSE1-NEXT: andps (%rsi), %xmm0 3151; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 3152; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 3153; CHECK-SSE1-NEXT: retq 3154; 3155; CHECK-SSE2-LABEL: in_v4i32: 3156; CHECK-SSE2: # %bb.0: 3157; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 3158; CHECK-SSE2-NEXT: movaps %xmm0, %xmm1 3159; CHECK-SSE2-NEXT: andnps (%rsi), %xmm1 3160; CHECK-SSE2-NEXT: andps (%rdi), %xmm0 3161; CHECK-SSE2-NEXT: orps %xmm1, %xmm0 3162; CHECK-SSE2-NEXT: retq 3163; 3164; CHECK-XOP-LABEL: in_v4i32: 3165; CHECK-XOP: # %bb.0: 3166; CHECK-XOP-NEXT: vmovdqa (%rdi), %xmm0 3167; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm1 3168; CHECK-XOP-NEXT: vpcmov %xmm1, (%rsi), %xmm0, %xmm0 3169; CHECK-XOP-NEXT: retq 3170 %x = load <4 x i32>, ptr%px, align 16 3171 %y = load <4 x i32>, ptr%py, align 16 3172 %mask = load <4 x i32>, ptr%pmask, align 16 3173 %n0 = xor <4 x i32> %x, %y 3174 %n1 = and <4 x i32> %n0, %mask 3175 %r = xor <4 x i32> %n1, %y 3176 ret <4 x i32> %r 3177} 3178 3179define <2 x i64> @in_v2i64(<2 x i64> %x, <2 x i64> %y, <2 x i64> %mask) nounwind { 3180; CHECK-BASELINE-LABEL: in_v2i64: 3181; CHECK-BASELINE: # %bb.0: 3182; CHECK-BASELINE-NEXT: movq %rdi, %rax 3183; CHECK-BASELINE-NEXT: xorq %rdx, %rax 3184; CHECK-BASELINE-NEXT: xorq %rcx, %rsi 3185; CHECK-BASELINE-NEXT: andq %r9, %rsi 3186; CHECK-BASELINE-NEXT: andq %r8, %rax 3187; CHECK-BASELINE-NEXT: xorq %rdx, %rax 3188; CHECK-BASELINE-NEXT: xorq %rcx, %rsi 3189; CHECK-BASELINE-NEXT: movq %rsi, %rdx 3190; CHECK-BASELINE-NEXT: retq 3191; 3192; CHECK-SSE1-LABEL: in_v2i64: 3193; CHECK-SSE1: # %bb.0: 3194; CHECK-SSE1-NEXT: movq %rdi, %rax 3195; CHECK-SSE1-NEXT: xorq %rdx, %rax 3196; CHECK-SSE1-NEXT: xorq %rcx, %rsi 3197; CHECK-SSE1-NEXT: andq %r9, %rsi 3198; CHECK-SSE1-NEXT: andq %r8, %rax 3199; CHECK-SSE1-NEXT: xorq %rdx, %rax 3200; CHECK-SSE1-NEXT: xorq %rcx, %rsi 3201; CHECK-SSE1-NEXT: movq %rsi, %rdx 3202; CHECK-SSE1-NEXT: retq 3203; 3204; CHECK-SSE2-LABEL: in_v2i64: 3205; CHECK-SSE2: # %bb.0: 3206; CHECK-SSE2-NEXT: andps %xmm2, %xmm0 3207; CHECK-SSE2-NEXT: andnps %xmm1, %xmm2 3208; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 3209; CHECK-SSE2-NEXT: retq 3210; 3211; CHECK-XOP-LABEL: in_v2i64: 3212; CHECK-XOP: # %bb.0: 3213; CHECK-XOP-NEXT: vpcmov %xmm2, %xmm1, %xmm0, %xmm0 3214; CHECK-XOP-NEXT: retq 3215 %n0 = xor <2 x i64> %x, %y 3216 %n1 = and <2 x i64> %n0, %mask 3217 %r = xor <2 x i64> %n1, %y 3218 ret <2 x i64> %r 3219} 3220 3221; ============================================================================ ; 3222; 256-bit vector width 3223; ============================================================================ ; 3224 3225define <32 x i8> @in_v32i8(ptr%px, ptr%py, ptr%pmask) nounwind { 3226; CHECK-BASELINE-LABEL: in_v32i8: 3227; CHECK-BASELINE: # %bb.0: 3228; CHECK-BASELINE-NEXT: pushq %rbp 3229; CHECK-BASELINE-NEXT: pushq %r15 3230; CHECK-BASELINE-NEXT: pushq %r14 3231; CHECK-BASELINE-NEXT: pushq %r13 3232; CHECK-BASELINE-NEXT: pushq %r12 3233; CHECK-BASELINE-NEXT: pushq %rbx 3234; CHECK-BASELINE-NEXT: movq %rcx, %r12 3235; CHECK-BASELINE-NEXT: movq %rdx, %r15 3236; CHECK-BASELINE-NEXT: movq %rsi, %r14 3237; CHECK-BASELINE-NEXT: movq %rdi, %r13 3238; CHECK-BASELINE-NEXT: movzbl 15(%rdx), %eax 3239; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3240; CHECK-BASELINE-NEXT: movzbl 14(%rdx), %eax 3241; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3242; CHECK-BASELINE-NEXT: movzbl 13(%rdx), %eax 3243; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3244; CHECK-BASELINE-NEXT: movzbl 12(%rdx), %eax 3245; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3246; CHECK-BASELINE-NEXT: movzbl 11(%rdx), %eax 3247; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3248; CHECK-BASELINE-NEXT: movzbl 10(%rdx), %eax 3249; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3250; CHECK-BASELINE-NEXT: movzbl 9(%rdx), %r8d 3251; CHECK-BASELINE-NEXT: movzbl 8(%rdx), %r9d 3252; CHECK-BASELINE-NEXT: movzbl 7(%rdx), %r10d 3253; CHECK-BASELINE-NEXT: movzbl 6(%rdx), %ebp 3254; CHECK-BASELINE-NEXT: movzbl 5(%rdx), %edi 3255; CHECK-BASELINE-NEXT: movzbl 4(%rdx), %esi 3256; CHECK-BASELINE-NEXT: movzbl 3(%rdx), %eax 3257; CHECK-BASELINE-NEXT: movzbl 2(%rdx), %ecx 3258; CHECK-BASELINE-NEXT: movzbl (%rdx), %r11d 3259; CHECK-BASELINE-NEXT: movzbl 1(%rdx), %edx 3260; CHECK-BASELINE-NEXT: movzbl (%r14), %ebx 3261; CHECK-BASELINE-NEXT: xorb %r11b, %bl 3262; CHECK-BASELINE-NEXT: andb (%r12), %bl 3263; CHECK-BASELINE-NEXT: xorb %r11b, %bl 3264; CHECK-BASELINE-NEXT: movb %bl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3265; CHECK-BASELINE-NEXT: movzbl 1(%r14), %r11d 3266; CHECK-BASELINE-NEXT: xorb %dl, %r11b 3267; CHECK-BASELINE-NEXT: andb 1(%r12), %r11b 3268; CHECK-BASELINE-NEXT: xorb %dl, %r11b 3269; CHECK-BASELINE-NEXT: movb %r11b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3270; CHECK-BASELINE-NEXT: movzbl 2(%r14), %edx 3271; CHECK-BASELINE-NEXT: xorb %cl, %dl 3272; CHECK-BASELINE-NEXT: andb 2(%r12), %dl 3273; CHECK-BASELINE-NEXT: xorb %cl, %dl 3274; CHECK-BASELINE-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3275; CHECK-BASELINE-NEXT: movzbl 3(%r14), %ecx 3276; CHECK-BASELINE-NEXT: xorb %al, %cl 3277; CHECK-BASELINE-NEXT: andb 3(%r12), %cl 3278; CHECK-BASELINE-NEXT: xorb %al, %cl 3279; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3280; CHECK-BASELINE-NEXT: movzbl 4(%r14), %eax 3281; CHECK-BASELINE-NEXT: xorb %sil, %al 3282; CHECK-BASELINE-NEXT: andb 4(%r12), %al 3283; CHECK-BASELINE-NEXT: xorb %sil, %al 3284; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3285; CHECK-BASELINE-NEXT: movzbl 5(%r14), %eax 3286; CHECK-BASELINE-NEXT: xorb %dil, %al 3287; CHECK-BASELINE-NEXT: andb 5(%r12), %al 3288; CHECK-BASELINE-NEXT: xorb %dil, %al 3289; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3290; CHECK-BASELINE-NEXT: movzbl 6(%r14), %eax 3291; CHECK-BASELINE-NEXT: xorb %bpl, %al 3292; CHECK-BASELINE-NEXT: andb 6(%r12), %al 3293; CHECK-BASELINE-NEXT: xorb %bpl, %al 3294; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3295; CHECK-BASELINE-NEXT: movzbl 7(%r14), %eax 3296; CHECK-BASELINE-NEXT: xorb %r10b, %al 3297; CHECK-BASELINE-NEXT: andb 7(%r12), %al 3298; CHECK-BASELINE-NEXT: xorb %r10b, %al 3299; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3300; CHECK-BASELINE-NEXT: movzbl 8(%r14), %eax 3301; CHECK-BASELINE-NEXT: xorb %r9b, %al 3302; CHECK-BASELINE-NEXT: andb 8(%r12), %al 3303; CHECK-BASELINE-NEXT: xorb %r9b, %al 3304; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3305; CHECK-BASELINE-NEXT: movzbl 9(%r14), %eax 3306; CHECK-BASELINE-NEXT: xorb %r8b, %al 3307; CHECK-BASELINE-NEXT: andb 9(%r12), %al 3308; CHECK-BASELINE-NEXT: xorb %r8b, %al 3309; CHECK-BASELINE-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3310; CHECK-BASELINE-NEXT: movzbl 10(%r14), %ecx 3311; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3312; CHECK-BASELINE-NEXT: xorb %al, %cl 3313; CHECK-BASELINE-NEXT: andb 10(%r12), %cl 3314; CHECK-BASELINE-NEXT: xorb %al, %cl 3315; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3316; CHECK-BASELINE-NEXT: movzbl 11(%r14), %ecx 3317; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3318; CHECK-BASELINE-NEXT: xorb %al, %cl 3319; CHECK-BASELINE-NEXT: andb 11(%r12), %cl 3320; CHECK-BASELINE-NEXT: xorb %al, %cl 3321; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3322; CHECK-BASELINE-NEXT: movzbl 12(%r14), %ecx 3323; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3324; CHECK-BASELINE-NEXT: xorb %al, %cl 3325; CHECK-BASELINE-NEXT: andb 12(%r12), %cl 3326; CHECK-BASELINE-NEXT: xorb %al, %cl 3327; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3328; CHECK-BASELINE-NEXT: movzbl 13(%r14), %ecx 3329; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3330; CHECK-BASELINE-NEXT: xorb %al, %cl 3331; CHECK-BASELINE-NEXT: andb 13(%r12), %cl 3332; CHECK-BASELINE-NEXT: xorb %al, %cl 3333; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3334; CHECK-BASELINE-NEXT: movzbl 14(%r14), %ecx 3335; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3336; CHECK-BASELINE-NEXT: xorb %al, %cl 3337; CHECK-BASELINE-NEXT: andb 14(%r12), %cl 3338; CHECK-BASELINE-NEXT: xorb %al, %cl 3339; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3340; CHECK-BASELINE-NEXT: movzbl 15(%r14), %ecx 3341; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3342; CHECK-BASELINE-NEXT: xorb %al, %cl 3343; CHECK-BASELINE-NEXT: andb 15(%r12), %cl 3344; CHECK-BASELINE-NEXT: xorb %al, %cl 3345; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3346; CHECK-BASELINE-NEXT: movzbl 16(%r15), %eax 3347; CHECK-BASELINE-NEXT: movzbl 16(%r14), %ecx 3348; CHECK-BASELINE-NEXT: xorb %al, %cl 3349; CHECK-BASELINE-NEXT: andb 16(%r12), %cl 3350; CHECK-BASELINE-NEXT: xorb %al, %cl 3351; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3352; CHECK-BASELINE-NEXT: movzbl 17(%r15), %eax 3353; CHECK-BASELINE-NEXT: movzbl 17(%r14), %ecx 3354; CHECK-BASELINE-NEXT: xorb %al, %cl 3355; CHECK-BASELINE-NEXT: andb 17(%r12), %cl 3356; CHECK-BASELINE-NEXT: xorb %al, %cl 3357; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3358; CHECK-BASELINE-NEXT: movzbl 18(%r15), %eax 3359; CHECK-BASELINE-NEXT: movzbl 18(%r14), %ecx 3360; CHECK-BASELINE-NEXT: xorb %al, %cl 3361; CHECK-BASELINE-NEXT: andb 18(%r12), %cl 3362; CHECK-BASELINE-NEXT: xorb %al, %cl 3363; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3364; CHECK-BASELINE-NEXT: movzbl 19(%r15), %eax 3365; CHECK-BASELINE-NEXT: movzbl 19(%r14), %ecx 3366; CHECK-BASELINE-NEXT: xorb %al, %cl 3367; CHECK-BASELINE-NEXT: andb 19(%r12), %cl 3368; CHECK-BASELINE-NEXT: xorb %al, %cl 3369; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3370; CHECK-BASELINE-NEXT: movzbl 20(%r15), %eax 3371; CHECK-BASELINE-NEXT: movzbl 20(%r14), %ecx 3372; CHECK-BASELINE-NEXT: xorb %al, %cl 3373; CHECK-BASELINE-NEXT: andb 20(%r12), %cl 3374; CHECK-BASELINE-NEXT: xorb %al, %cl 3375; CHECK-BASELINE-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3376; CHECK-BASELINE-NEXT: movzbl 21(%r15), %eax 3377; CHECK-BASELINE-NEXT: movzbl 21(%r14), %ebp 3378; CHECK-BASELINE-NEXT: xorb %al, %bpl 3379; CHECK-BASELINE-NEXT: andb 21(%r12), %bpl 3380; CHECK-BASELINE-NEXT: xorb %al, %bpl 3381; CHECK-BASELINE-NEXT: movzbl 22(%r15), %eax 3382; CHECK-BASELINE-NEXT: movzbl 22(%r14), %ebx 3383; CHECK-BASELINE-NEXT: xorb %al, %bl 3384; CHECK-BASELINE-NEXT: andb 22(%r12), %bl 3385; CHECK-BASELINE-NEXT: xorb %al, %bl 3386; CHECK-BASELINE-NEXT: movzbl 23(%r15), %eax 3387; CHECK-BASELINE-NEXT: movzbl 23(%r14), %r11d 3388; CHECK-BASELINE-NEXT: xorb %al, %r11b 3389; CHECK-BASELINE-NEXT: andb 23(%r12), %r11b 3390; CHECK-BASELINE-NEXT: xorb %al, %r11b 3391; CHECK-BASELINE-NEXT: movzbl 24(%r15), %eax 3392; CHECK-BASELINE-NEXT: movzbl 24(%r14), %r9d 3393; CHECK-BASELINE-NEXT: xorb %al, %r9b 3394; CHECK-BASELINE-NEXT: andb 24(%r12), %r9b 3395; CHECK-BASELINE-NEXT: xorb %al, %r9b 3396; CHECK-BASELINE-NEXT: movzbl 25(%r15), %eax 3397; CHECK-BASELINE-NEXT: movzbl 25(%r14), %r8d 3398; CHECK-BASELINE-NEXT: xorb %al, %r8b 3399; CHECK-BASELINE-NEXT: andb 25(%r12), %r8b 3400; CHECK-BASELINE-NEXT: xorb %al, %r8b 3401; CHECK-BASELINE-NEXT: movzbl 26(%r15), %eax 3402; CHECK-BASELINE-NEXT: movzbl 26(%r14), %edi 3403; CHECK-BASELINE-NEXT: xorb %al, %dil 3404; CHECK-BASELINE-NEXT: andb 26(%r12), %dil 3405; CHECK-BASELINE-NEXT: xorb %al, %dil 3406; CHECK-BASELINE-NEXT: movzbl 27(%r15), %eax 3407; CHECK-BASELINE-NEXT: movzbl 27(%r14), %esi 3408; CHECK-BASELINE-NEXT: xorb %al, %sil 3409; CHECK-BASELINE-NEXT: andb 27(%r12), %sil 3410; CHECK-BASELINE-NEXT: xorb %al, %sil 3411; CHECK-BASELINE-NEXT: movzbl 28(%r15), %eax 3412; CHECK-BASELINE-NEXT: movzbl 28(%r14), %edx 3413; CHECK-BASELINE-NEXT: xorb %al, %dl 3414; CHECK-BASELINE-NEXT: andb 28(%r12), %dl 3415; CHECK-BASELINE-NEXT: xorb %al, %dl 3416; CHECK-BASELINE-NEXT: movzbl 29(%r15), %eax 3417; CHECK-BASELINE-NEXT: movzbl 29(%r14), %ecx 3418; CHECK-BASELINE-NEXT: xorb %al, %cl 3419; CHECK-BASELINE-NEXT: andb 29(%r12), %cl 3420; CHECK-BASELINE-NEXT: xorb %al, %cl 3421; CHECK-BASELINE-NEXT: movzbl 30(%r15), %r10d 3422; CHECK-BASELINE-NEXT: movzbl 30(%r14), %eax 3423; CHECK-BASELINE-NEXT: xorb %r10b, %al 3424; CHECK-BASELINE-NEXT: andb 30(%r12), %al 3425; CHECK-BASELINE-NEXT: xorb %r10b, %al 3426; CHECK-BASELINE-NEXT: movzbl 31(%r15), %r10d 3427; CHECK-BASELINE-NEXT: movzbl 31(%r14), %r14d 3428; CHECK-BASELINE-NEXT: xorb %r10b, %r14b 3429; CHECK-BASELINE-NEXT: andb 31(%r12), %r14b 3430; CHECK-BASELINE-NEXT: xorb %r10b, %r14b 3431; CHECK-BASELINE-NEXT: movb %r14b, 31(%r13) 3432; CHECK-BASELINE-NEXT: movb %al, 30(%r13) 3433; CHECK-BASELINE-NEXT: movb %cl, 29(%r13) 3434; CHECK-BASELINE-NEXT: movb %dl, 28(%r13) 3435; CHECK-BASELINE-NEXT: movb %sil, 27(%r13) 3436; CHECK-BASELINE-NEXT: movb %dil, 26(%r13) 3437; CHECK-BASELINE-NEXT: movb %r8b, 25(%r13) 3438; CHECK-BASELINE-NEXT: movb %r9b, 24(%r13) 3439; CHECK-BASELINE-NEXT: movb %r11b, 23(%r13) 3440; CHECK-BASELINE-NEXT: movb %bl, 22(%r13) 3441; CHECK-BASELINE-NEXT: movb %bpl, 21(%r13) 3442; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3443; CHECK-BASELINE-NEXT: movb %al, 20(%r13) 3444; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3445; CHECK-BASELINE-NEXT: movb %al, 19(%r13) 3446; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3447; CHECK-BASELINE-NEXT: movb %al, 18(%r13) 3448; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3449; CHECK-BASELINE-NEXT: movb %al, 17(%r13) 3450; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3451; CHECK-BASELINE-NEXT: movb %al, 16(%r13) 3452; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3453; CHECK-BASELINE-NEXT: movb %al, 15(%r13) 3454; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3455; CHECK-BASELINE-NEXT: movb %al, 14(%r13) 3456; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3457; CHECK-BASELINE-NEXT: movb %al, 13(%r13) 3458; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3459; CHECK-BASELINE-NEXT: movb %al, 12(%r13) 3460; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3461; CHECK-BASELINE-NEXT: movb %al, 11(%r13) 3462; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3463; CHECK-BASELINE-NEXT: movb %al, 10(%r13) 3464; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3465; CHECK-BASELINE-NEXT: movb %al, 9(%r13) 3466; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3467; CHECK-BASELINE-NEXT: movb %al, 8(%r13) 3468; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3469; CHECK-BASELINE-NEXT: movb %al, 7(%r13) 3470; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3471; CHECK-BASELINE-NEXT: movb %al, 6(%r13) 3472; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3473; CHECK-BASELINE-NEXT: movb %al, 5(%r13) 3474; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3475; CHECK-BASELINE-NEXT: movb %al, 4(%r13) 3476; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3477; CHECK-BASELINE-NEXT: movb %al, 3(%r13) 3478; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3479; CHECK-BASELINE-NEXT: movb %al, 2(%r13) 3480; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3481; CHECK-BASELINE-NEXT: movb %al, 1(%r13) 3482; CHECK-BASELINE-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3483; CHECK-BASELINE-NEXT: movb %al, (%r13) 3484; CHECK-BASELINE-NEXT: movq %r13, %rax 3485; CHECK-BASELINE-NEXT: popq %rbx 3486; CHECK-BASELINE-NEXT: popq %r12 3487; CHECK-BASELINE-NEXT: popq %r13 3488; CHECK-BASELINE-NEXT: popq %r14 3489; CHECK-BASELINE-NEXT: popq %r15 3490; CHECK-BASELINE-NEXT: popq %rbp 3491; CHECK-BASELINE-NEXT: retq 3492; 3493; CHECK-SSE1-LABEL: in_v32i8: 3494; CHECK-SSE1: # %bb.0: 3495; CHECK-SSE1-NEXT: pushq %rbp 3496; CHECK-SSE1-NEXT: pushq %r15 3497; CHECK-SSE1-NEXT: pushq %r14 3498; CHECK-SSE1-NEXT: pushq %r13 3499; CHECK-SSE1-NEXT: pushq %r12 3500; CHECK-SSE1-NEXT: pushq %rbx 3501; CHECK-SSE1-NEXT: movq %rcx, %r12 3502; CHECK-SSE1-NEXT: movq %rdx, %r15 3503; CHECK-SSE1-NEXT: movq %rsi, %r14 3504; CHECK-SSE1-NEXT: movq %rdi, %r13 3505; CHECK-SSE1-NEXT: movzbl 15(%rdx), %eax 3506; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3507; CHECK-SSE1-NEXT: movzbl 14(%rdx), %eax 3508; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3509; CHECK-SSE1-NEXT: movzbl 13(%rdx), %eax 3510; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3511; CHECK-SSE1-NEXT: movzbl 12(%rdx), %eax 3512; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3513; CHECK-SSE1-NEXT: movzbl 11(%rdx), %eax 3514; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3515; CHECK-SSE1-NEXT: movzbl 10(%rdx), %eax 3516; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3517; CHECK-SSE1-NEXT: movzbl 9(%rdx), %r8d 3518; CHECK-SSE1-NEXT: movzbl 8(%rdx), %r9d 3519; CHECK-SSE1-NEXT: movzbl 7(%rdx), %r10d 3520; CHECK-SSE1-NEXT: movzbl 6(%rdx), %ebp 3521; CHECK-SSE1-NEXT: movzbl 5(%rdx), %edi 3522; CHECK-SSE1-NEXT: movzbl 4(%rdx), %esi 3523; CHECK-SSE1-NEXT: movzbl 3(%rdx), %eax 3524; CHECK-SSE1-NEXT: movzbl 2(%rdx), %ecx 3525; CHECK-SSE1-NEXT: movzbl (%rdx), %r11d 3526; CHECK-SSE1-NEXT: movzbl 1(%rdx), %edx 3527; CHECK-SSE1-NEXT: movzbl (%r14), %ebx 3528; CHECK-SSE1-NEXT: xorb %r11b, %bl 3529; CHECK-SSE1-NEXT: andb (%r12), %bl 3530; CHECK-SSE1-NEXT: xorb %r11b, %bl 3531; CHECK-SSE1-NEXT: movb %bl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3532; CHECK-SSE1-NEXT: movzbl 1(%r14), %r11d 3533; CHECK-SSE1-NEXT: xorb %dl, %r11b 3534; CHECK-SSE1-NEXT: andb 1(%r12), %r11b 3535; CHECK-SSE1-NEXT: xorb %dl, %r11b 3536; CHECK-SSE1-NEXT: movb %r11b, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3537; CHECK-SSE1-NEXT: movzbl 2(%r14), %edx 3538; CHECK-SSE1-NEXT: xorb %cl, %dl 3539; CHECK-SSE1-NEXT: andb 2(%r12), %dl 3540; CHECK-SSE1-NEXT: xorb %cl, %dl 3541; CHECK-SSE1-NEXT: movb %dl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3542; CHECK-SSE1-NEXT: movzbl 3(%r14), %ecx 3543; CHECK-SSE1-NEXT: xorb %al, %cl 3544; CHECK-SSE1-NEXT: andb 3(%r12), %cl 3545; CHECK-SSE1-NEXT: xorb %al, %cl 3546; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3547; CHECK-SSE1-NEXT: movzbl 4(%r14), %eax 3548; CHECK-SSE1-NEXT: xorb %sil, %al 3549; CHECK-SSE1-NEXT: andb 4(%r12), %al 3550; CHECK-SSE1-NEXT: xorb %sil, %al 3551; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3552; CHECK-SSE1-NEXT: movzbl 5(%r14), %eax 3553; CHECK-SSE1-NEXT: xorb %dil, %al 3554; CHECK-SSE1-NEXT: andb 5(%r12), %al 3555; CHECK-SSE1-NEXT: xorb %dil, %al 3556; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3557; CHECK-SSE1-NEXT: movzbl 6(%r14), %eax 3558; CHECK-SSE1-NEXT: xorb %bpl, %al 3559; CHECK-SSE1-NEXT: andb 6(%r12), %al 3560; CHECK-SSE1-NEXT: xorb %bpl, %al 3561; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3562; CHECK-SSE1-NEXT: movzbl 7(%r14), %eax 3563; CHECK-SSE1-NEXT: xorb %r10b, %al 3564; CHECK-SSE1-NEXT: andb 7(%r12), %al 3565; CHECK-SSE1-NEXT: xorb %r10b, %al 3566; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3567; CHECK-SSE1-NEXT: movzbl 8(%r14), %eax 3568; CHECK-SSE1-NEXT: xorb %r9b, %al 3569; CHECK-SSE1-NEXT: andb 8(%r12), %al 3570; CHECK-SSE1-NEXT: xorb %r9b, %al 3571; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3572; CHECK-SSE1-NEXT: movzbl 9(%r14), %eax 3573; CHECK-SSE1-NEXT: xorb %r8b, %al 3574; CHECK-SSE1-NEXT: andb 9(%r12), %al 3575; CHECK-SSE1-NEXT: xorb %r8b, %al 3576; CHECK-SSE1-NEXT: movb %al, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3577; CHECK-SSE1-NEXT: movzbl 10(%r14), %ecx 3578; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3579; CHECK-SSE1-NEXT: xorb %al, %cl 3580; CHECK-SSE1-NEXT: andb 10(%r12), %cl 3581; CHECK-SSE1-NEXT: xorb %al, %cl 3582; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3583; CHECK-SSE1-NEXT: movzbl 11(%r14), %ecx 3584; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3585; CHECK-SSE1-NEXT: xorb %al, %cl 3586; CHECK-SSE1-NEXT: andb 11(%r12), %cl 3587; CHECK-SSE1-NEXT: xorb %al, %cl 3588; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3589; CHECK-SSE1-NEXT: movzbl 12(%r14), %ecx 3590; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3591; CHECK-SSE1-NEXT: xorb %al, %cl 3592; CHECK-SSE1-NEXT: andb 12(%r12), %cl 3593; CHECK-SSE1-NEXT: xorb %al, %cl 3594; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3595; CHECK-SSE1-NEXT: movzbl 13(%r14), %ecx 3596; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3597; CHECK-SSE1-NEXT: xorb %al, %cl 3598; CHECK-SSE1-NEXT: andb 13(%r12), %cl 3599; CHECK-SSE1-NEXT: xorb %al, %cl 3600; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3601; CHECK-SSE1-NEXT: movzbl 14(%r14), %ecx 3602; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3603; CHECK-SSE1-NEXT: xorb %al, %cl 3604; CHECK-SSE1-NEXT: andb 14(%r12), %cl 3605; CHECK-SSE1-NEXT: xorb %al, %cl 3606; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3607; CHECK-SSE1-NEXT: movzbl 15(%r14), %ecx 3608; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3609; CHECK-SSE1-NEXT: xorb %al, %cl 3610; CHECK-SSE1-NEXT: andb 15(%r12), %cl 3611; CHECK-SSE1-NEXT: xorb %al, %cl 3612; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3613; CHECK-SSE1-NEXT: movzbl 16(%r15), %eax 3614; CHECK-SSE1-NEXT: movzbl 16(%r14), %ecx 3615; CHECK-SSE1-NEXT: xorb %al, %cl 3616; CHECK-SSE1-NEXT: andb 16(%r12), %cl 3617; CHECK-SSE1-NEXT: xorb %al, %cl 3618; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3619; CHECK-SSE1-NEXT: movzbl 17(%r15), %eax 3620; CHECK-SSE1-NEXT: movzbl 17(%r14), %ecx 3621; CHECK-SSE1-NEXT: xorb %al, %cl 3622; CHECK-SSE1-NEXT: andb 17(%r12), %cl 3623; CHECK-SSE1-NEXT: xorb %al, %cl 3624; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3625; CHECK-SSE1-NEXT: movzbl 18(%r15), %eax 3626; CHECK-SSE1-NEXT: movzbl 18(%r14), %ecx 3627; CHECK-SSE1-NEXT: xorb %al, %cl 3628; CHECK-SSE1-NEXT: andb 18(%r12), %cl 3629; CHECK-SSE1-NEXT: xorb %al, %cl 3630; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3631; CHECK-SSE1-NEXT: movzbl 19(%r15), %eax 3632; CHECK-SSE1-NEXT: movzbl 19(%r14), %ecx 3633; CHECK-SSE1-NEXT: xorb %al, %cl 3634; CHECK-SSE1-NEXT: andb 19(%r12), %cl 3635; CHECK-SSE1-NEXT: xorb %al, %cl 3636; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3637; CHECK-SSE1-NEXT: movzbl 20(%r15), %eax 3638; CHECK-SSE1-NEXT: movzbl 20(%r14), %ecx 3639; CHECK-SSE1-NEXT: xorb %al, %cl 3640; CHECK-SSE1-NEXT: andb 20(%r12), %cl 3641; CHECK-SSE1-NEXT: xorb %al, %cl 3642; CHECK-SSE1-NEXT: movb %cl, {{[-0-9]+}}(%r{{[sb]}}p) # 1-byte Spill 3643; CHECK-SSE1-NEXT: movzbl 21(%r15), %eax 3644; CHECK-SSE1-NEXT: movzbl 21(%r14), %ebp 3645; CHECK-SSE1-NEXT: xorb %al, %bpl 3646; CHECK-SSE1-NEXT: andb 21(%r12), %bpl 3647; CHECK-SSE1-NEXT: xorb %al, %bpl 3648; CHECK-SSE1-NEXT: movzbl 22(%r15), %eax 3649; CHECK-SSE1-NEXT: movzbl 22(%r14), %ebx 3650; CHECK-SSE1-NEXT: xorb %al, %bl 3651; CHECK-SSE1-NEXT: andb 22(%r12), %bl 3652; CHECK-SSE1-NEXT: xorb %al, %bl 3653; CHECK-SSE1-NEXT: movzbl 23(%r15), %eax 3654; CHECK-SSE1-NEXT: movzbl 23(%r14), %r11d 3655; CHECK-SSE1-NEXT: xorb %al, %r11b 3656; CHECK-SSE1-NEXT: andb 23(%r12), %r11b 3657; CHECK-SSE1-NEXT: xorb %al, %r11b 3658; CHECK-SSE1-NEXT: movzbl 24(%r15), %eax 3659; CHECK-SSE1-NEXT: movzbl 24(%r14), %r9d 3660; CHECK-SSE1-NEXT: xorb %al, %r9b 3661; CHECK-SSE1-NEXT: andb 24(%r12), %r9b 3662; CHECK-SSE1-NEXT: xorb %al, %r9b 3663; CHECK-SSE1-NEXT: movzbl 25(%r15), %eax 3664; CHECK-SSE1-NEXT: movzbl 25(%r14), %r8d 3665; CHECK-SSE1-NEXT: xorb %al, %r8b 3666; CHECK-SSE1-NEXT: andb 25(%r12), %r8b 3667; CHECK-SSE1-NEXT: xorb %al, %r8b 3668; CHECK-SSE1-NEXT: movzbl 26(%r15), %eax 3669; CHECK-SSE1-NEXT: movzbl 26(%r14), %edi 3670; CHECK-SSE1-NEXT: xorb %al, %dil 3671; CHECK-SSE1-NEXT: andb 26(%r12), %dil 3672; CHECK-SSE1-NEXT: xorb %al, %dil 3673; CHECK-SSE1-NEXT: movzbl 27(%r15), %eax 3674; CHECK-SSE1-NEXT: movzbl 27(%r14), %esi 3675; CHECK-SSE1-NEXT: xorb %al, %sil 3676; CHECK-SSE1-NEXT: andb 27(%r12), %sil 3677; CHECK-SSE1-NEXT: xorb %al, %sil 3678; CHECK-SSE1-NEXT: movzbl 28(%r15), %eax 3679; CHECK-SSE1-NEXT: movzbl 28(%r14), %edx 3680; CHECK-SSE1-NEXT: xorb %al, %dl 3681; CHECK-SSE1-NEXT: andb 28(%r12), %dl 3682; CHECK-SSE1-NEXT: xorb %al, %dl 3683; CHECK-SSE1-NEXT: movzbl 29(%r15), %eax 3684; CHECK-SSE1-NEXT: movzbl 29(%r14), %ecx 3685; CHECK-SSE1-NEXT: xorb %al, %cl 3686; CHECK-SSE1-NEXT: andb 29(%r12), %cl 3687; CHECK-SSE1-NEXT: xorb %al, %cl 3688; CHECK-SSE1-NEXT: movzbl 30(%r15), %r10d 3689; CHECK-SSE1-NEXT: movzbl 30(%r14), %eax 3690; CHECK-SSE1-NEXT: xorb %r10b, %al 3691; CHECK-SSE1-NEXT: andb 30(%r12), %al 3692; CHECK-SSE1-NEXT: xorb %r10b, %al 3693; CHECK-SSE1-NEXT: movzbl 31(%r15), %r10d 3694; CHECK-SSE1-NEXT: movzbl 31(%r14), %r14d 3695; CHECK-SSE1-NEXT: xorb %r10b, %r14b 3696; CHECK-SSE1-NEXT: andb 31(%r12), %r14b 3697; CHECK-SSE1-NEXT: xorb %r10b, %r14b 3698; CHECK-SSE1-NEXT: movb %r14b, 31(%r13) 3699; CHECK-SSE1-NEXT: movb %al, 30(%r13) 3700; CHECK-SSE1-NEXT: movb %cl, 29(%r13) 3701; CHECK-SSE1-NEXT: movb %dl, 28(%r13) 3702; CHECK-SSE1-NEXT: movb %sil, 27(%r13) 3703; CHECK-SSE1-NEXT: movb %dil, 26(%r13) 3704; CHECK-SSE1-NEXT: movb %r8b, 25(%r13) 3705; CHECK-SSE1-NEXT: movb %r9b, 24(%r13) 3706; CHECK-SSE1-NEXT: movb %r11b, 23(%r13) 3707; CHECK-SSE1-NEXT: movb %bl, 22(%r13) 3708; CHECK-SSE1-NEXT: movb %bpl, 21(%r13) 3709; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3710; CHECK-SSE1-NEXT: movb %al, 20(%r13) 3711; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3712; CHECK-SSE1-NEXT: movb %al, 19(%r13) 3713; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3714; CHECK-SSE1-NEXT: movb %al, 18(%r13) 3715; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3716; CHECK-SSE1-NEXT: movb %al, 17(%r13) 3717; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3718; CHECK-SSE1-NEXT: movb %al, 16(%r13) 3719; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3720; CHECK-SSE1-NEXT: movb %al, 15(%r13) 3721; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3722; CHECK-SSE1-NEXT: movb %al, 14(%r13) 3723; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3724; CHECK-SSE1-NEXT: movb %al, 13(%r13) 3725; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3726; CHECK-SSE1-NEXT: movb %al, 12(%r13) 3727; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3728; CHECK-SSE1-NEXT: movb %al, 11(%r13) 3729; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3730; CHECK-SSE1-NEXT: movb %al, 10(%r13) 3731; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3732; CHECK-SSE1-NEXT: movb %al, 9(%r13) 3733; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3734; CHECK-SSE1-NEXT: movb %al, 8(%r13) 3735; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3736; CHECK-SSE1-NEXT: movb %al, 7(%r13) 3737; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3738; CHECK-SSE1-NEXT: movb %al, 6(%r13) 3739; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3740; CHECK-SSE1-NEXT: movb %al, 5(%r13) 3741; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3742; CHECK-SSE1-NEXT: movb %al, 4(%r13) 3743; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3744; CHECK-SSE1-NEXT: movb %al, 3(%r13) 3745; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3746; CHECK-SSE1-NEXT: movb %al, 2(%r13) 3747; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3748; CHECK-SSE1-NEXT: movb %al, 1(%r13) 3749; CHECK-SSE1-NEXT: movzbl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload 3750; CHECK-SSE1-NEXT: movb %al, (%r13) 3751; CHECK-SSE1-NEXT: movq %r13, %rax 3752; CHECK-SSE1-NEXT: popq %rbx 3753; CHECK-SSE1-NEXT: popq %r12 3754; CHECK-SSE1-NEXT: popq %r13 3755; CHECK-SSE1-NEXT: popq %r14 3756; CHECK-SSE1-NEXT: popq %r15 3757; CHECK-SSE1-NEXT: popq %rbp 3758; CHECK-SSE1-NEXT: retq 3759; 3760; CHECK-SSE2-LABEL: in_v32i8: 3761; CHECK-SSE2: # %bb.0: 3762; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 3763; CHECK-SSE2-NEXT: movaps 16(%rdx), %xmm1 3764; CHECK-SSE2-NEXT: movaps %xmm0, %xmm2 3765; CHECK-SSE2-NEXT: andnps (%rsi), %xmm2 3766; CHECK-SSE2-NEXT: andps (%rdi), %xmm0 3767; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 3768; CHECK-SSE2-NEXT: movaps %xmm1, %xmm2 3769; CHECK-SSE2-NEXT: andnps 16(%rsi), %xmm2 3770; CHECK-SSE2-NEXT: andps 16(%rdi), %xmm1 3771; CHECK-SSE2-NEXT: orps %xmm2, %xmm1 3772; CHECK-SSE2-NEXT: retq 3773; 3774; CHECK-XOP-LABEL: in_v32i8: 3775; CHECK-XOP: # %bb.0: 3776; CHECK-XOP-NEXT: vmovdqa (%rdi), %ymm0 3777; CHECK-XOP-NEXT: vmovdqa (%rdx), %ymm1 3778; CHECK-XOP-NEXT: vpcmov %ymm1, (%rsi), %ymm0, %ymm0 3779; CHECK-XOP-NEXT: retq 3780 %x = load <32 x i8>, ptr%px, align 32 3781 %y = load <32 x i8>, ptr%py, align 32 3782 %mask = load <32 x i8>, ptr%pmask, align 32 3783 %n0 = xor <32 x i8> %x, %y 3784 %n1 = and <32 x i8> %n0, %mask 3785 %r = xor <32 x i8> %n1, %y 3786 ret <32 x i8> %r 3787} 3788 3789define <16 x i16> @in_v16i16(ptr%px, ptr%py, ptr%pmask) nounwind { 3790; CHECK-BASELINE-LABEL: in_v16i16: 3791; CHECK-BASELINE: # %bb.0: 3792; CHECK-BASELINE-NEXT: pushq %rbp 3793; CHECK-BASELINE-NEXT: pushq %r15 3794; CHECK-BASELINE-NEXT: pushq %r14 3795; CHECK-BASELINE-NEXT: pushq %r13 3796; CHECK-BASELINE-NEXT: pushq %r12 3797; CHECK-BASELINE-NEXT: pushq %rbx 3798; CHECK-BASELINE-NEXT: movq %rcx, %r9 3799; CHECK-BASELINE-NEXT: movq %rdi, %r10 3800; CHECK-BASELINE-NEXT: movzwl 30(%rdx), %edi 3801; CHECK-BASELINE-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3802; CHECK-BASELINE-NEXT: movl 28(%rdx), %edi 3803; CHECK-BASELINE-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3804; CHECK-BASELINE-NEXT: movzwl 26(%rdx), %edi 3805; CHECK-BASELINE-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3806; CHECK-BASELINE-NEXT: movl 24(%rdx), %eax 3807; CHECK-BASELINE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3808; CHECK-BASELINE-NEXT: movzwl 22(%rdx), %eax 3809; CHECK-BASELINE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3810; CHECK-BASELINE-NEXT: movl 20(%rdx), %r8d 3811; CHECK-BASELINE-NEXT: movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3812; CHECK-BASELINE-NEXT: movzwl 18(%rdx), %r11d 3813; CHECK-BASELINE-NEXT: movl %r11d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3814; CHECK-BASELINE-NEXT: movl 16(%rdx), %ebx 3815; CHECK-BASELINE-NEXT: movl %ebx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3816; CHECK-BASELINE-NEXT: movzwl 14(%rdx), %ebp 3817; CHECK-BASELINE-NEXT: movl %ebp, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3818; CHECK-BASELINE-NEXT: movl 12(%rdx), %r14d 3819; CHECK-BASELINE-NEXT: movl %r14d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3820; CHECK-BASELINE-NEXT: movzwl 10(%rdx), %r15d 3821; CHECK-BASELINE-NEXT: movl %r15d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3822; CHECK-BASELINE-NEXT: movl 8(%rdx), %r12d 3823; CHECK-BASELINE-NEXT: movl %r12d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3824; CHECK-BASELINE-NEXT: movzwl 6(%rdx), %r13d 3825; CHECK-BASELINE-NEXT: movl %r13d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3826; CHECK-BASELINE-NEXT: movl (%rdx), %ecx 3827; CHECK-BASELINE-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3828; CHECK-BASELINE-NEXT: movl 4(%rdx), %edi 3829; CHECK-BASELINE-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3830; CHECK-BASELINE-NEXT: movzwl 2(%rdx), %eax 3831; CHECK-BASELINE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3832; CHECK-BASELINE-NEXT: movzwl (%rsi), %edx 3833; CHECK-BASELINE-NEXT: xorw %cx, %dx 3834; CHECK-BASELINE-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3835; CHECK-BASELINE-NEXT: movzwl 2(%rsi), %ecx 3836; CHECK-BASELINE-NEXT: xorw %ax, %cx 3837; CHECK-BASELINE-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3838; CHECK-BASELINE-NEXT: movzwl 4(%rsi), %eax 3839; CHECK-BASELINE-NEXT: xorw %di, %ax 3840; CHECK-BASELINE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3841; CHECK-BASELINE-NEXT: movzwl 6(%rsi), %ecx 3842; CHECK-BASELINE-NEXT: xorw %r13w, %cx 3843; CHECK-BASELINE-NEXT: movzwl 8(%rsi), %eax 3844; CHECK-BASELINE-NEXT: xorw %r12w, %ax 3845; CHECK-BASELINE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3846; CHECK-BASELINE-NEXT: movzwl 10(%rsi), %eax 3847; CHECK-BASELINE-NEXT: xorw %r15w, %ax 3848; CHECK-BASELINE-NEXT: movzwl 12(%rsi), %edx 3849; CHECK-BASELINE-NEXT: xorw %r14w, %dx 3850; CHECK-BASELINE-NEXT: movzwl 14(%rsi), %r13d 3851; CHECK-BASELINE-NEXT: xorw %bp, %r13w 3852; CHECK-BASELINE-NEXT: movzwl 16(%rsi), %r12d 3853; CHECK-BASELINE-NEXT: xorw %bx, %r12w 3854; CHECK-BASELINE-NEXT: movzwl 18(%rsi), %r15d 3855; CHECK-BASELINE-NEXT: xorw %r11w, %r15w 3856; CHECK-BASELINE-NEXT: movzwl 20(%rsi), %r14d 3857; CHECK-BASELINE-NEXT: xorw %r8w, %r14w 3858; CHECK-BASELINE-NEXT: movzwl 22(%rsi), %ebp 3859; CHECK-BASELINE-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %bp # 2-byte Folded Reload 3860; CHECK-BASELINE-NEXT: movzwl 24(%rsi), %ebx 3861; CHECK-BASELINE-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %bx # 2-byte Folded Reload 3862; CHECK-BASELINE-NEXT: movzwl 26(%rsi), %r11d 3863; CHECK-BASELINE-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %r11w # 2-byte Folded Reload 3864; CHECK-BASELINE-NEXT: movzwl 28(%rsi), %edi 3865; CHECK-BASELINE-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %di # 2-byte Folded Reload 3866; CHECK-BASELINE-NEXT: movzwl 30(%rsi), %esi 3867; CHECK-BASELINE-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %si # 2-byte Folded Reload 3868; CHECK-BASELINE-NEXT: andw 30(%r9), %si 3869; CHECK-BASELINE-NEXT: andw 28(%r9), %di 3870; CHECK-BASELINE-NEXT: andw 26(%r9), %r11w 3871; CHECK-BASELINE-NEXT: andw 24(%r9), %bx 3872; CHECK-BASELINE-NEXT: andw 22(%r9), %bp 3873; CHECK-BASELINE-NEXT: andw 20(%r9), %r14w 3874; CHECK-BASELINE-NEXT: andw 18(%r9), %r15w 3875; CHECK-BASELINE-NEXT: andw 16(%r9), %r12w 3876; CHECK-BASELINE-NEXT: andw 14(%r9), %r13w 3877; CHECK-BASELINE-NEXT: andw 12(%r9), %dx 3878; CHECK-BASELINE-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3879; CHECK-BASELINE-NEXT: andw 10(%r9), %ax 3880; CHECK-BASELINE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3881; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload 3882; CHECK-BASELINE-NEXT: andw 8(%r9), %dx 3883; CHECK-BASELINE-NEXT: andw 6(%r9), %cx 3884; CHECK-BASELINE-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3885; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %r8d # 4-byte Reload 3886; CHECK-BASELINE-NEXT: andw 4(%r9), %r8w 3887; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 3888; CHECK-BASELINE-NEXT: andw 2(%r9), %ax 3889; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload 3890; CHECK-BASELINE-NEXT: andw (%r9), %cx 3891; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Folded Reload 3892; CHECK-BASELINE-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3893; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload 3894; CHECK-BASELINE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3895; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r8d # 4-byte Folded Reload 3896; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %r9d # 4-byte Reload 3897; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r9d # 4-byte Folded Reload 3898; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Folded Reload 3899; CHECK-BASELINE-NEXT: movl %edx, %ecx 3900; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload 3901; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Folded Reload 3902; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 3903; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload 3904; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r13d # 4-byte Folded Reload 3905; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r12d # 4-byte Folded Reload 3906; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r15d # 4-byte Folded Reload 3907; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r14d # 4-byte Folded Reload 3908; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebp # 4-byte Folded Reload 3909; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebx # 4-byte Folded Reload 3910; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r11d # 4-byte Folded Reload 3911; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Folded Reload 3912; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Folded Reload 3913; CHECK-BASELINE-NEXT: movw %si, 30(%r10) 3914; CHECK-BASELINE-NEXT: movw %di, 28(%r10) 3915; CHECK-BASELINE-NEXT: movw %r11w, 26(%r10) 3916; CHECK-BASELINE-NEXT: movw %bx, 24(%r10) 3917; CHECK-BASELINE-NEXT: movw %bp, 22(%r10) 3918; CHECK-BASELINE-NEXT: movw %r14w, 20(%r10) 3919; CHECK-BASELINE-NEXT: movw %r15w, 18(%r10) 3920; CHECK-BASELINE-NEXT: movw %r12w, 16(%r10) 3921; CHECK-BASELINE-NEXT: movw %r13w, 14(%r10) 3922; CHECK-BASELINE-NEXT: movw %ax, 12(%r10) 3923; CHECK-BASELINE-NEXT: movw %dx, 10(%r10) 3924; CHECK-BASELINE-NEXT: movw %cx, 8(%r10) 3925; CHECK-BASELINE-NEXT: movw %r9w, 6(%r10) 3926; CHECK-BASELINE-NEXT: movw %r8w, 4(%r10) 3927; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 3928; CHECK-BASELINE-NEXT: movw %ax, 2(%r10) 3929; CHECK-BASELINE-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 3930; CHECK-BASELINE-NEXT: movw %ax, (%r10) 3931; CHECK-BASELINE-NEXT: movq %r10, %rax 3932; CHECK-BASELINE-NEXT: popq %rbx 3933; CHECK-BASELINE-NEXT: popq %r12 3934; CHECK-BASELINE-NEXT: popq %r13 3935; CHECK-BASELINE-NEXT: popq %r14 3936; CHECK-BASELINE-NEXT: popq %r15 3937; CHECK-BASELINE-NEXT: popq %rbp 3938; CHECK-BASELINE-NEXT: retq 3939; 3940; CHECK-SSE1-LABEL: in_v16i16: 3941; CHECK-SSE1: # %bb.0: 3942; CHECK-SSE1-NEXT: pushq %rbp 3943; CHECK-SSE1-NEXT: pushq %r15 3944; CHECK-SSE1-NEXT: pushq %r14 3945; CHECK-SSE1-NEXT: pushq %r13 3946; CHECK-SSE1-NEXT: pushq %r12 3947; CHECK-SSE1-NEXT: pushq %rbx 3948; CHECK-SSE1-NEXT: movq %rcx, %r9 3949; CHECK-SSE1-NEXT: movq %rdi, %r10 3950; CHECK-SSE1-NEXT: movzwl 30(%rdx), %edi 3951; CHECK-SSE1-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3952; CHECK-SSE1-NEXT: movl 28(%rdx), %edi 3953; CHECK-SSE1-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3954; CHECK-SSE1-NEXT: movzwl 26(%rdx), %edi 3955; CHECK-SSE1-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3956; CHECK-SSE1-NEXT: movl 24(%rdx), %eax 3957; CHECK-SSE1-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3958; CHECK-SSE1-NEXT: movzwl 22(%rdx), %eax 3959; CHECK-SSE1-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3960; CHECK-SSE1-NEXT: movl 20(%rdx), %r8d 3961; CHECK-SSE1-NEXT: movl %r8d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3962; CHECK-SSE1-NEXT: movzwl 18(%rdx), %r11d 3963; CHECK-SSE1-NEXT: movl %r11d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3964; CHECK-SSE1-NEXT: movl 16(%rdx), %ebx 3965; CHECK-SSE1-NEXT: movl %ebx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3966; CHECK-SSE1-NEXT: movzwl 14(%rdx), %ebp 3967; CHECK-SSE1-NEXT: movl %ebp, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3968; CHECK-SSE1-NEXT: movl 12(%rdx), %r14d 3969; CHECK-SSE1-NEXT: movl %r14d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3970; CHECK-SSE1-NEXT: movzwl 10(%rdx), %r15d 3971; CHECK-SSE1-NEXT: movl %r15d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3972; CHECK-SSE1-NEXT: movl 8(%rdx), %r12d 3973; CHECK-SSE1-NEXT: movl %r12d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3974; CHECK-SSE1-NEXT: movzwl 6(%rdx), %r13d 3975; CHECK-SSE1-NEXT: movl %r13d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3976; CHECK-SSE1-NEXT: movl (%rdx), %ecx 3977; CHECK-SSE1-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3978; CHECK-SSE1-NEXT: movl 4(%rdx), %edi 3979; CHECK-SSE1-NEXT: movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3980; CHECK-SSE1-NEXT: movzwl 2(%rdx), %eax 3981; CHECK-SSE1-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3982; CHECK-SSE1-NEXT: movzwl (%rsi), %edx 3983; CHECK-SSE1-NEXT: xorw %cx, %dx 3984; CHECK-SSE1-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3985; CHECK-SSE1-NEXT: movzwl 2(%rsi), %ecx 3986; CHECK-SSE1-NEXT: xorw %ax, %cx 3987; CHECK-SSE1-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3988; CHECK-SSE1-NEXT: movzwl 4(%rsi), %eax 3989; CHECK-SSE1-NEXT: xorw %di, %ax 3990; CHECK-SSE1-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3991; CHECK-SSE1-NEXT: movzwl 6(%rsi), %ecx 3992; CHECK-SSE1-NEXT: xorw %r13w, %cx 3993; CHECK-SSE1-NEXT: movzwl 8(%rsi), %eax 3994; CHECK-SSE1-NEXT: xorw %r12w, %ax 3995; CHECK-SSE1-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 3996; CHECK-SSE1-NEXT: movzwl 10(%rsi), %eax 3997; CHECK-SSE1-NEXT: xorw %r15w, %ax 3998; CHECK-SSE1-NEXT: movzwl 12(%rsi), %edx 3999; CHECK-SSE1-NEXT: xorw %r14w, %dx 4000; CHECK-SSE1-NEXT: movzwl 14(%rsi), %r13d 4001; CHECK-SSE1-NEXT: xorw %bp, %r13w 4002; CHECK-SSE1-NEXT: movzwl 16(%rsi), %r12d 4003; CHECK-SSE1-NEXT: xorw %bx, %r12w 4004; CHECK-SSE1-NEXT: movzwl 18(%rsi), %r15d 4005; CHECK-SSE1-NEXT: xorw %r11w, %r15w 4006; CHECK-SSE1-NEXT: movzwl 20(%rsi), %r14d 4007; CHECK-SSE1-NEXT: xorw %r8w, %r14w 4008; CHECK-SSE1-NEXT: movzwl 22(%rsi), %ebp 4009; CHECK-SSE1-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %bp # 2-byte Folded Reload 4010; CHECK-SSE1-NEXT: movzwl 24(%rsi), %ebx 4011; CHECK-SSE1-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %bx # 2-byte Folded Reload 4012; CHECK-SSE1-NEXT: movzwl 26(%rsi), %r11d 4013; CHECK-SSE1-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %r11w # 2-byte Folded Reload 4014; CHECK-SSE1-NEXT: movzwl 28(%rsi), %edi 4015; CHECK-SSE1-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %di # 2-byte Folded Reload 4016; CHECK-SSE1-NEXT: movzwl 30(%rsi), %esi 4017; CHECK-SSE1-NEXT: xorw {{[-0-9]+}}(%r{{[sb]}}p), %si # 2-byte Folded Reload 4018; CHECK-SSE1-NEXT: andw 30(%r9), %si 4019; CHECK-SSE1-NEXT: andw 28(%r9), %di 4020; CHECK-SSE1-NEXT: andw 26(%r9), %r11w 4021; CHECK-SSE1-NEXT: andw 24(%r9), %bx 4022; CHECK-SSE1-NEXT: andw 22(%r9), %bp 4023; CHECK-SSE1-NEXT: andw 20(%r9), %r14w 4024; CHECK-SSE1-NEXT: andw 18(%r9), %r15w 4025; CHECK-SSE1-NEXT: andw 16(%r9), %r12w 4026; CHECK-SSE1-NEXT: andw 14(%r9), %r13w 4027; CHECK-SSE1-NEXT: andw 12(%r9), %dx 4028; CHECK-SSE1-NEXT: movl %edx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4029; CHECK-SSE1-NEXT: andw 10(%r9), %ax 4030; CHECK-SSE1-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4031; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload 4032; CHECK-SSE1-NEXT: andw 8(%r9), %dx 4033; CHECK-SSE1-NEXT: andw 6(%r9), %cx 4034; CHECK-SSE1-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4035; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %r8d # 4-byte Reload 4036; CHECK-SSE1-NEXT: andw 4(%r9), %r8w 4037; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 4038; CHECK-SSE1-NEXT: andw 2(%r9), %ax 4039; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Reload 4040; CHECK-SSE1-NEXT: andw (%r9), %cx 4041; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ecx # 4-byte Folded Reload 4042; CHECK-SSE1-NEXT: movl %ecx, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4043; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload 4044; CHECK-SSE1-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4045; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r8d # 4-byte Folded Reload 4046; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %r9d # 4-byte Reload 4047; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r9d # 4-byte Folded Reload 4048; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Folded Reload 4049; CHECK-SSE1-NEXT: movl %edx, %ecx 4050; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Reload 4051; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %edx # 4-byte Folded Reload 4052; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 4053; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload 4054; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r13d # 4-byte Folded Reload 4055; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r12d # 4-byte Folded Reload 4056; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r15d # 4-byte Folded Reload 4057; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r14d # 4-byte Folded Reload 4058; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebp # 4-byte Folded Reload 4059; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %ebx # 4-byte Folded Reload 4060; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r11d # 4-byte Folded Reload 4061; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %edi # 4-byte Folded Reload 4062; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %esi # 4-byte Folded Reload 4063; CHECK-SSE1-NEXT: movw %si, 30(%r10) 4064; CHECK-SSE1-NEXT: movw %di, 28(%r10) 4065; CHECK-SSE1-NEXT: movw %r11w, 26(%r10) 4066; CHECK-SSE1-NEXT: movw %bx, 24(%r10) 4067; CHECK-SSE1-NEXT: movw %bp, 22(%r10) 4068; CHECK-SSE1-NEXT: movw %r14w, 20(%r10) 4069; CHECK-SSE1-NEXT: movw %r15w, 18(%r10) 4070; CHECK-SSE1-NEXT: movw %r12w, 16(%r10) 4071; CHECK-SSE1-NEXT: movw %r13w, 14(%r10) 4072; CHECK-SSE1-NEXT: movw %ax, 12(%r10) 4073; CHECK-SSE1-NEXT: movw %dx, 10(%r10) 4074; CHECK-SSE1-NEXT: movw %cx, 8(%r10) 4075; CHECK-SSE1-NEXT: movw %r9w, 6(%r10) 4076; CHECK-SSE1-NEXT: movw %r8w, 4(%r10) 4077; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 4078; CHECK-SSE1-NEXT: movw %ax, 2(%r10) 4079; CHECK-SSE1-NEXT: movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload 4080; CHECK-SSE1-NEXT: movw %ax, (%r10) 4081; CHECK-SSE1-NEXT: movq %r10, %rax 4082; CHECK-SSE1-NEXT: popq %rbx 4083; CHECK-SSE1-NEXT: popq %r12 4084; CHECK-SSE1-NEXT: popq %r13 4085; CHECK-SSE1-NEXT: popq %r14 4086; CHECK-SSE1-NEXT: popq %r15 4087; CHECK-SSE1-NEXT: popq %rbp 4088; CHECK-SSE1-NEXT: retq 4089; 4090; CHECK-SSE2-LABEL: in_v16i16: 4091; CHECK-SSE2: # %bb.0: 4092; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 4093; CHECK-SSE2-NEXT: movaps 16(%rdx), %xmm1 4094; CHECK-SSE2-NEXT: movaps %xmm0, %xmm2 4095; CHECK-SSE2-NEXT: andnps (%rsi), %xmm2 4096; CHECK-SSE2-NEXT: andps (%rdi), %xmm0 4097; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 4098; CHECK-SSE2-NEXT: movaps %xmm1, %xmm2 4099; CHECK-SSE2-NEXT: andnps 16(%rsi), %xmm2 4100; CHECK-SSE2-NEXT: andps 16(%rdi), %xmm1 4101; CHECK-SSE2-NEXT: orps %xmm2, %xmm1 4102; CHECK-SSE2-NEXT: retq 4103; 4104; CHECK-XOP-LABEL: in_v16i16: 4105; CHECK-XOP: # %bb.0: 4106; CHECK-XOP-NEXT: vmovdqa (%rdi), %ymm0 4107; CHECK-XOP-NEXT: vmovdqa (%rdx), %ymm1 4108; CHECK-XOP-NEXT: vpcmov %ymm1, (%rsi), %ymm0, %ymm0 4109; CHECK-XOP-NEXT: retq 4110 %x = load <16 x i16>, ptr%px, align 32 4111 %y = load <16 x i16>, ptr%py, align 32 4112 %mask = load <16 x i16>, ptr%pmask, align 32 4113 %n0 = xor <16 x i16> %x, %y 4114 %n1 = and <16 x i16> %n0, %mask 4115 %r = xor <16 x i16> %n1, %y 4116 ret <16 x i16> %r 4117} 4118 4119define <8 x i32> @in_v8i32(ptr%px, ptr%py, ptr%pmask) nounwind { 4120; CHECK-BASELINE-LABEL: in_v8i32: 4121; CHECK-BASELINE: # %bb.0: 4122; CHECK-BASELINE-NEXT: pushq %rbp 4123; CHECK-BASELINE-NEXT: pushq %r15 4124; CHECK-BASELINE-NEXT: pushq %r14 4125; CHECK-BASELINE-NEXT: pushq %r13 4126; CHECK-BASELINE-NEXT: pushq %r12 4127; CHECK-BASELINE-NEXT: pushq %rbx 4128; CHECK-BASELINE-NEXT: movl 28(%rdx), %ebp 4129; CHECK-BASELINE-NEXT: movl 24(%rdx), %ebx 4130; CHECK-BASELINE-NEXT: movl 20(%rdx), %r10d 4131; CHECK-BASELINE-NEXT: movl 16(%rdx), %eax 4132; CHECK-BASELINE-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4133; CHECK-BASELINE-NEXT: movl 12(%rdx), %r12d 4134; CHECK-BASELINE-NEXT: movl %r12d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4135; CHECK-BASELINE-NEXT: movl 8(%rdx), %r14d 4136; CHECK-BASELINE-NEXT: movl %r14d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4137; CHECK-BASELINE-NEXT: movl (%rdx), %r15d 4138; CHECK-BASELINE-NEXT: movl 4(%rdx), %r13d 4139; CHECK-BASELINE-NEXT: movl (%rsi), %r8d 4140; CHECK-BASELINE-NEXT: xorl %r15d, %r8d 4141; CHECK-BASELINE-NEXT: movl 4(%rsi), %r9d 4142; CHECK-BASELINE-NEXT: xorl %r13d, %r9d 4143; CHECK-BASELINE-NEXT: movl 8(%rsi), %r11d 4144; CHECK-BASELINE-NEXT: xorl %r14d, %r11d 4145; CHECK-BASELINE-NEXT: movl 12(%rsi), %r14d 4146; CHECK-BASELINE-NEXT: xorl %r12d, %r14d 4147; CHECK-BASELINE-NEXT: movl 16(%rsi), %r12d 4148; CHECK-BASELINE-NEXT: xorl %eax, %r12d 4149; CHECK-BASELINE-NEXT: movl 20(%rsi), %edx 4150; CHECK-BASELINE-NEXT: xorl %r10d, %edx 4151; CHECK-BASELINE-NEXT: movl 24(%rsi), %eax 4152; CHECK-BASELINE-NEXT: xorl %ebx, %eax 4153; CHECK-BASELINE-NEXT: movl 28(%rsi), %esi 4154; CHECK-BASELINE-NEXT: xorl %ebp, %esi 4155; CHECK-BASELINE-NEXT: andl 28(%rcx), %esi 4156; CHECK-BASELINE-NEXT: andl 24(%rcx), %eax 4157; CHECK-BASELINE-NEXT: andl 20(%rcx), %edx 4158; CHECK-BASELINE-NEXT: andl 16(%rcx), %r12d 4159; CHECK-BASELINE-NEXT: andl 12(%rcx), %r14d 4160; CHECK-BASELINE-NEXT: andl 8(%rcx), %r11d 4161; CHECK-BASELINE-NEXT: andl 4(%rcx), %r9d 4162; CHECK-BASELINE-NEXT: andl (%rcx), %r8d 4163; CHECK-BASELINE-NEXT: xorl %r15d, %r8d 4164; CHECK-BASELINE-NEXT: xorl %r13d, %r9d 4165; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r11d # 4-byte Folded Reload 4166; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r14d # 4-byte Folded Reload 4167; CHECK-BASELINE-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r12d # 4-byte Folded Reload 4168; CHECK-BASELINE-NEXT: xorl %r10d, %edx 4169; CHECK-BASELINE-NEXT: xorl %ebx, %eax 4170; CHECK-BASELINE-NEXT: xorl %ebp, %esi 4171; CHECK-BASELINE-NEXT: movl %esi, 28(%rdi) 4172; CHECK-BASELINE-NEXT: movl %eax, 24(%rdi) 4173; CHECK-BASELINE-NEXT: movl %edx, 20(%rdi) 4174; CHECK-BASELINE-NEXT: movl %r12d, 16(%rdi) 4175; CHECK-BASELINE-NEXT: movl %r14d, 12(%rdi) 4176; CHECK-BASELINE-NEXT: movl %r11d, 8(%rdi) 4177; CHECK-BASELINE-NEXT: movl %r9d, 4(%rdi) 4178; CHECK-BASELINE-NEXT: movl %r8d, (%rdi) 4179; CHECK-BASELINE-NEXT: movq %rdi, %rax 4180; CHECK-BASELINE-NEXT: popq %rbx 4181; CHECK-BASELINE-NEXT: popq %r12 4182; CHECK-BASELINE-NEXT: popq %r13 4183; CHECK-BASELINE-NEXT: popq %r14 4184; CHECK-BASELINE-NEXT: popq %r15 4185; CHECK-BASELINE-NEXT: popq %rbp 4186; CHECK-BASELINE-NEXT: retq 4187; 4188; CHECK-SSE1-LABEL: in_v8i32: 4189; CHECK-SSE1: # %bb.0: 4190; CHECK-SSE1-NEXT: pushq %rbp 4191; CHECK-SSE1-NEXT: pushq %r15 4192; CHECK-SSE1-NEXT: pushq %r14 4193; CHECK-SSE1-NEXT: pushq %r13 4194; CHECK-SSE1-NEXT: pushq %r12 4195; CHECK-SSE1-NEXT: pushq %rbx 4196; CHECK-SSE1-NEXT: movl 28(%rdx), %ebp 4197; CHECK-SSE1-NEXT: movl 24(%rdx), %ebx 4198; CHECK-SSE1-NEXT: movl 20(%rdx), %r10d 4199; CHECK-SSE1-NEXT: movl 16(%rdx), %eax 4200; CHECK-SSE1-NEXT: movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4201; CHECK-SSE1-NEXT: movl 12(%rdx), %r12d 4202; CHECK-SSE1-NEXT: movl %r12d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4203; CHECK-SSE1-NEXT: movl 8(%rdx), %r14d 4204; CHECK-SSE1-NEXT: movl %r14d, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 4205; CHECK-SSE1-NEXT: movl (%rdx), %r15d 4206; CHECK-SSE1-NEXT: movl 4(%rdx), %r13d 4207; CHECK-SSE1-NEXT: movl (%rsi), %r8d 4208; CHECK-SSE1-NEXT: xorl %r15d, %r8d 4209; CHECK-SSE1-NEXT: movl 4(%rsi), %r9d 4210; CHECK-SSE1-NEXT: xorl %r13d, %r9d 4211; CHECK-SSE1-NEXT: movl 8(%rsi), %r11d 4212; CHECK-SSE1-NEXT: xorl %r14d, %r11d 4213; CHECK-SSE1-NEXT: movl 12(%rsi), %r14d 4214; CHECK-SSE1-NEXT: xorl %r12d, %r14d 4215; CHECK-SSE1-NEXT: movl 16(%rsi), %r12d 4216; CHECK-SSE1-NEXT: xorl %eax, %r12d 4217; CHECK-SSE1-NEXT: movl 20(%rsi), %edx 4218; CHECK-SSE1-NEXT: xorl %r10d, %edx 4219; CHECK-SSE1-NEXT: movl 24(%rsi), %eax 4220; CHECK-SSE1-NEXT: xorl %ebx, %eax 4221; CHECK-SSE1-NEXT: movl 28(%rsi), %esi 4222; CHECK-SSE1-NEXT: xorl %ebp, %esi 4223; CHECK-SSE1-NEXT: andl 28(%rcx), %esi 4224; CHECK-SSE1-NEXT: andl 24(%rcx), %eax 4225; CHECK-SSE1-NEXT: andl 20(%rcx), %edx 4226; CHECK-SSE1-NEXT: andl 16(%rcx), %r12d 4227; CHECK-SSE1-NEXT: andl 12(%rcx), %r14d 4228; CHECK-SSE1-NEXT: andl 8(%rcx), %r11d 4229; CHECK-SSE1-NEXT: andl 4(%rcx), %r9d 4230; CHECK-SSE1-NEXT: andl (%rcx), %r8d 4231; CHECK-SSE1-NEXT: xorl %r15d, %r8d 4232; CHECK-SSE1-NEXT: xorl %r13d, %r9d 4233; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r11d # 4-byte Folded Reload 4234; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r14d # 4-byte Folded Reload 4235; CHECK-SSE1-NEXT: xorl {{[-0-9]+}}(%r{{[sb]}}p), %r12d # 4-byte Folded Reload 4236; CHECK-SSE1-NEXT: xorl %r10d, %edx 4237; CHECK-SSE1-NEXT: xorl %ebx, %eax 4238; CHECK-SSE1-NEXT: xorl %ebp, %esi 4239; CHECK-SSE1-NEXT: movl %esi, 28(%rdi) 4240; CHECK-SSE1-NEXT: movl %eax, 24(%rdi) 4241; CHECK-SSE1-NEXT: movl %edx, 20(%rdi) 4242; CHECK-SSE1-NEXT: movl %r12d, 16(%rdi) 4243; CHECK-SSE1-NEXT: movl %r14d, 12(%rdi) 4244; CHECK-SSE1-NEXT: movl %r11d, 8(%rdi) 4245; CHECK-SSE1-NEXT: movl %r9d, 4(%rdi) 4246; CHECK-SSE1-NEXT: movl %r8d, (%rdi) 4247; CHECK-SSE1-NEXT: movq %rdi, %rax 4248; CHECK-SSE1-NEXT: popq %rbx 4249; CHECK-SSE1-NEXT: popq %r12 4250; CHECK-SSE1-NEXT: popq %r13 4251; CHECK-SSE1-NEXT: popq %r14 4252; CHECK-SSE1-NEXT: popq %r15 4253; CHECK-SSE1-NEXT: popq %rbp 4254; CHECK-SSE1-NEXT: retq 4255; 4256; CHECK-SSE2-LABEL: in_v8i32: 4257; CHECK-SSE2: # %bb.0: 4258; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 4259; CHECK-SSE2-NEXT: movaps 16(%rdx), %xmm1 4260; CHECK-SSE2-NEXT: movaps %xmm0, %xmm2 4261; CHECK-SSE2-NEXT: andnps (%rsi), %xmm2 4262; CHECK-SSE2-NEXT: andps (%rdi), %xmm0 4263; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 4264; CHECK-SSE2-NEXT: movaps %xmm1, %xmm2 4265; CHECK-SSE2-NEXT: andnps 16(%rsi), %xmm2 4266; CHECK-SSE2-NEXT: andps 16(%rdi), %xmm1 4267; CHECK-SSE2-NEXT: orps %xmm2, %xmm1 4268; CHECK-SSE2-NEXT: retq 4269; 4270; CHECK-XOP-LABEL: in_v8i32: 4271; CHECK-XOP: # %bb.0: 4272; CHECK-XOP-NEXT: vmovdqa (%rdi), %ymm0 4273; CHECK-XOP-NEXT: vmovdqa (%rdx), %ymm1 4274; CHECK-XOP-NEXT: vpcmov %ymm1, (%rsi), %ymm0, %ymm0 4275; CHECK-XOP-NEXT: retq 4276 %x = load <8 x i32>, ptr%px, align 32 4277 %y = load <8 x i32>, ptr%py, align 32 4278 %mask = load <8 x i32>, ptr%pmask, align 32 4279 %n0 = xor <8 x i32> %x, %y 4280 %n1 = and <8 x i32> %n0, %mask 4281 %r = xor <8 x i32> %n1, %y 4282 ret <8 x i32> %r 4283} 4284 4285define <4 x i64> @in_v4i64(ptr%px, ptr%py, ptr%pmask) nounwind { 4286; CHECK-BASELINE-LABEL: in_v4i64: 4287; CHECK-BASELINE: # %bb.0: 4288; CHECK-BASELINE-NEXT: pushq %rbx 4289; CHECK-BASELINE-NEXT: movq %rdi, %rax 4290; CHECK-BASELINE-NEXT: movq 24(%rdx), %rdi 4291; CHECK-BASELINE-NEXT: movq 16(%rdx), %r8 4292; CHECK-BASELINE-NEXT: movq (%rdx), %r9 4293; CHECK-BASELINE-NEXT: movq 8(%rdx), %r10 4294; CHECK-BASELINE-NEXT: movq (%rsi), %rdx 4295; CHECK-BASELINE-NEXT: xorq %r9, %rdx 4296; CHECK-BASELINE-NEXT: movq 8(%rsi), %r11 4297; CHECK-BASELINE-NEXT: xorq %r10, %r11 4298; CHECK-BASELINE-NEXT: movq 16(%rsi), %rbx 4299; CHECK-BASELINE-NEXT: xorq %r8, %rbx 4300; CHECK-BASELINE-NEXT: movq 24(%rsi), %rsi 4301; CHECK-BASELINE-NEXT: xorq %rdi, %rsi 4302; CHECK-BASELINE-NEXT: andq 24(%rcx), %rsi 4303; CHECK-BASELINE-NEXT: andq 16(%rcx), %rbx 4304; CHECK-BASELINE-NEXT: andq 8(%rcx), %r11 4305; CHECK-BASELINE-NEXT: andq (%rcx), %rdx 4306; CHECK-BASELINE-NEXT: xorq %r9, %rdx 4307; CHECK-BASELINE-NEXT: xorq %r10, %r11 4308; CHECK-BASELINE-NEXT: xorq %r8, %rbx 4309; CHECK-BASELINE-NEXT: xorq %rdi, %rsi 4310; CHECK-BASELINE-NEXT: movq %rsi, 24(%rax) 4311; CHECK-BASELINE-NEXT: movq %rbx, 16(%rax) 4312; CHECK-BASELINE-NEXT: movq %r11, 8(%rax) 4313; CHECK-BASELINE-NEXT: movq %rdx, (%rax) 4314; CHECK-BASELINE-NEXT: popq %rbx 4315; CHECK-BASELINE-NEXT: retq 4316; 4317; CHECK-SSE1-LABEL: in_v4i64: 4318; CHECK-SSE1: # %bb.0: 4319; CHECK-SSE1-NEXT: pushq %rbx 4320; CHECK-SSE1-NEXT: movq %rdi, %rax 4321; CHECK-SSE1-NEXT: movq 24(%rdx), %rdi 4322; CHECK-SSE1-NEXT: movq 16(%rdx), %r8 4323; CHECK-SSE1-NEXT: movq (%rdx), %r9 4324; CHECK-SSE1-NEXT: movq 8(%rdx), %r10 4325; CHECK-SSE1-NEXT: movq (%rsi), %rdx 4326; CHECK-SSE1-NEXT: xorq %r9, %rdx 4327; CHECK-SSE1-NEXT: movq 8(%rsi), %r11 4328; CHECK-SSE1-NEXT: xorq %r10, %r11 4329; CHECK-SSE1-NEXT: movq 16(%rsi), %rbx 4330; CHECK-SSE1-NEXT: xorq %r8, %rbx 4331; CHECK-SSE1-NEXT: movq 24(%rsi), %rsi 4332; CHECK-SSE1-NEXT: xorq %rdi, %rsi 4333; CHECK-SSE1-NEXT: andq 24(%rcx), %rsi 4334; CHECK-SSE1-NEXT: andq 16(%rcx), %rbx 4335; CHECK-SSE1-NEXT: andq 8(%rcx), %r11 4336; CHECK-SSE1-NEXT: andq (%rcx), %rdx 4337; CHECK-SSE1-NEXT: xorq %r9, %rdx 4338; CHECK-SSE1-NEXT: xorq %r10, %r11 4339; CHECK-SSE1-NEXT: xorq %r8, %rbx 4340; CHECK-SSE1-NEXT: xorq %rdi, %rsi 4341; CHECK-SSE1-NEXT: movq %rsi, 24(%rax) 4342; CHECK-SSE1-NEXT: movq %rbx, 16(%rax) 4343; CHECK-SSE1-NEXT: movq %r11, 8(%rax) 4344; CHECK-SSE1-NEXT: movq %rdx, (%rax) 4345; CHECK-SSE1-NEXT: popq %rbx 4346; CHECK-SSE1-NEXT: retq 4347; 4348; CHECK-SSE2-LABEL: in_v4i64: 4349; CHECK-SSE2: # %bb.0: 4350; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 4351; CHECK-SSE2-NEXT: movaps 16(%rdx), %xmm1 4352; CHECK-SSE2-NEXT: movaps %xmm0, %xmm2 4353; CHECK-SSE2-NEXT: andnps (%rsi), %xmm2 4354; CHECK-SSE2-NEXT: andps (%rdi), %xmm0 4355; CHECK-SSE2-NEXT: orps %xmm2, %xmm0 4356; CHECK-SSE2-NEXT: movaps %xmm1, %xmm2 4357; CHECK-SSE2-NEXT: andnps 16(%rsi), %xmm2 4358; CHECK-SSE2-NEXT: andps 16(%rdi), %xmm1 4359; CHECK-SSE2-NEXT: orps %xmm2, %xmm1 4360; CHECK-SSE2-NEXT: retq 4361; 4362; CHECK-XOP-LABEL: in_v4i64: 4363; CHECK-XOP: # %bb.0: 4364; CHECK-XOP-NEXT: vmovdqa (%rdi), %ymm0 4365; CHECK-XOP-NEXT: vmovdqa (%rdx), %ymm1 4366; CHECK-XOP-NEXT: vpcmov %ymm1, (%rsi), %ymm0, %ymm0 4367; CHECK-XOP-NEXT: retq 4368 %x = load <4 x i64>, ptr%px, align 32 4369 %y = load <4 x i64>, ptr%py, align 32 4370 %mask = load <4 x i64>, ptr%pmask, align 32 4371 %n0 = xor <4 x i64> %x, %y 4372 %n1 = and <4 x i64> %n0, %mask 4373 %r = xor <4 x i64> %n1, %y 4374 ret <4 x i64> %r 4375} 4376