1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,-sse2 < %s | FileCheck %s --check-prefix=CHECK-SSE1 3; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+sse,+sse2 < %s | FileCheck %s --check-prefix=CHECK-SSE2 4; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+xop < %s | FileCheck %s --check-prefix=CHECK-XOP 5 6; ============================================================================ ; 7; Various cases with %x and/or %y being a constant 8; ============================================================================ ; 9 10define <4 x i32> @out_constant_varx_mone(ptr%px, ptr%py, ptr%pmask) { 11; CHECK-SSE1-LABEL: out_constant_varx_mone: 12; CHECK-SSE1: # %bb.0: 13; CHECK-SSE1-NEXT: movq %rdi, %rax 14; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 15; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN] 16; CHECK-SSE1-NEXT: xorps %xmm0, %xmm1 17; CHECK-SSE1-NEXT: andps (%rsi), %xmm0 18; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 19; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 20; CHECK-SSE1-NEXT: retq 21; 22; CHECK-SSE2-LABEL: out_constant_varx_mone: 23; CHECK-SSE2: # %bb.0: 24; CHECK-SSE2-NEXT: movdqa (%rdx), %xmm0 25; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1 26; CHECK-SSE2-NEXT: pxor %xmm0, %xmm1 27; CHECK-SSE2-NEXT: pand (%rdi), %xmm0 28; CHECK-SSE2-NEXT: por %xmm1, %xmm0 29; CHECK-SSE2-NEXT: retq 30; 31; CHECK-XOP-LABEL: out_constant_varx_mone: 32; CHECK-XOP: # %bb.0: 33; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0 34; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 35; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm1 36; CHECK-XOP-NEXT: vpand (%rdi), %xmm0, %xmm0 37; CHECK-XOP-NEXT: vpor %xmm1, %xmm0, %xmm0 38; CHECK-XOP-NEXT: retq 39 %x = load <4 x i32>, ptr%px, align 16 40 %y = load <4 x i32>, ptr%py, align 16 41 %mask = load <4 x i32>, ptr%pmask, align 16 42 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> 43 %mx = and <4 x i32> %mask, %x 44 %my = and <4 x i32> %notmask, <i32 -1, i32 -1, i32 -1, i32 -1> 45 %r = or <4 x i32> %mx, %my 46 ret <4 x i32> %r 47} 48 49define <4 x i32> @in_constant_varx_mone(ptr%px, ptr%py, ptr%pmask) { 50; CHECK-SSE1-LABEL: in_constant_varx_mone: 51; CHECK-SSE1: # %bb.0: 52; CHECK-SSE1-NEXT: movq %rdi, %rax 53; CHECK-SSE1-NEXT: movaps (%rsi), %xmm0 54; CHECK-SSE1-NEXT: andnps (%rcx), %xmm0 55; CHECK-SSE1-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 56; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 57; CHECK-SSE1-NEXT: retq 58; 59; CHECK-SSE2-LABEL: in_constant_varx_mone: 60; CHECK-SSE2: # %bb.0: 61; CHECK-SSE2-NEXT: movdqa (%rdi), %xmm0 62; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1 63; CHECK-SSE2-NEXT: pandn (%rdx), %xmm0 64; CHECK-SSE2-NEXT: pxor %xmm1, %xmm0 65; CHECK-SSE2-NEXT: retq 66; 67; CHECK-XOP-LABEL: in_constant_varx_mone: 68; CHECK-XOP: # %bb.0: 69; CHECK-XOP-NEXT: vmovdqa (%rdi), %xmm0 70; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 71; CHECK-XOP-NEXT: vpandn (%rdx), %xmm0, %xmm0 72; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm0 73; CHECK-XOP-NEXT: retq 74 %x = load <4 x i32>, ptr%px, align 16 75 %y = load <4 x i32>, ptr%py, align 16 76 %mask = load <4 x i32>, ptr%pmask, align 16 77 %n0 = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> ; %x 78 %n1 = and <4 x i32> %n0, %mask 79 %r = xor <4 x i32> %n1, <i32 -1, i32 -1, i32 -1, i32 -1> 80 ret <4 x i32> %r 81} 82 83; This is not a canonical form. Testing for completeness only. 84define <4 x i32> @out_constant_varx_mone_invmask(ptr%px, ptr%py, ptr%pmask) { 85; CHECK-SSE1-LABEL: out_constant_varx_mone_invmask: 86; CHECK-SSE1: # %bb.0: 87; CHECK-SSE1-NEXT: movq %rdi, %rax 88; CHECK-SSE1-NEXT: movaps (%rsi), %xmm0 89; CHECK-SSE1-NEXT: orps (%rcx), %xmm0 90; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 91; CHECK-SSE1-NEXT: retq 92; 93; CHECK-SSE2-LABEL: out_constant_varx_mone_invmask: 94; CHECK-SSE2: # %bb.0: 95; CHECK-SSE2-NEXT: movaps (%rdi), %xmm0 96; CHECK-SSE2-NEXT: orps (%rdx), %xmm0 97; CHECK-SSE2-NEXT: retq 98; 99; CHECK-XOP-LABEL: out_constant_varx_mone_invmask: 100; CHECK-XOP: # %bb.0: 101; CHECK-XOP-NEXT: vmovaps (%rdi), %xmm0 102; CHECK-XOP-NEXT: vorps (%rdx), %xmm0, %xmm0 103; CHECK-XOP-NEXT: retq 104 %x = load <4 x i32>, ptr%px, align 16 105 %y = load <4 x i32>, ptr%py, align 16 106 %mask = load <4 x i32>, ptr%pmask, align 16 107 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> 108 %mx = and <4 x i32> %notmask, %x 109 %my = and <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> 110 %r = or <4 x i32> %mx, %my 111 ret <4 x i32> %r 112} 113 114; This is not a canonical form. Testing for completeness only. 115define <4 x i32> @in_constant_varx_mone_invmask(ptr%px, ptr%py, ptr%pmask) { 116; CHECK-SSE1-LABEL: in_constant_varx_mone_invmask: 117; CHECK-SSE1: # %bb.0: 118; CHECK-SSE1-NEXT: movq %rdi, %rax 119; CHECK-SSE1-NEXT: movaps (%rsi), %xmm0 120; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN] 121; CHECK-SSE1-NEXT: movaps (%rcx), %xmm2 122; CHECK-SSE1-NEXT: xorps %xmm1, %xmm2 123; CHECK-SSE1-NEXT: andnps %xmm2, %xmm0 124; CHECK-SSE1-NEXT: xorps %xmm1, %xmm0 125; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 126; CHECK-SSE1-NEXT: retq 127; 128; CHECK-SSE2-LABEL: in_constant_varx_mone_invmask: 129; CHECK-SSE2: # %bb.0: 130; CHECK-SSE2-NEXT: movaps (%rdi), %xmm0 131; CHECK-SSE2-NEXT: orps (%rdx), %xmm0 132; CHECK-SSE2-NEXT: retq 133; 134; CHECK-XOP-LABEL: in_constant_varx_mone_invmask: 135; CHECK-XOP: # %bb.0: 136; CHECK-XOP-NEXT: vmovaps (%rdi), %xmm0 137; CHECK-XOP-NEXT: vorps (%rdx), %xmm0, %xmm0 138; CHECK-XOP-NEXT: retq 139 %x = load <4 x i32>, ptr%px, align 16 140 %y = load <4 x i32>, ptr%py, align 16 141 %mask = load <4 x i32>, ptr%pmask, align 16 142 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> 143 %n0 = xor <4 x i32> %x, <i32 -1, i32 -1, i32 -1, i32 -1> ; %x 144 %n1 = and <4 x i32> %n0, %notmask 145 %r = xor <4 x i32> %n1, <i32 -1, i32 -1, i32 -1, i32 -1> 146 ret <4 x i32> %r 147} 148 149define <4 x i32> @out_constant_varx_42(ptr%px, ptr%py, ptr%pmask) { 150; CHECK-SSE1-LABEL: out_constant_varx_42: 151; CHECK-SSE1: # %bb.0: 152; CHECK-SSE1-NEXT: movq %rdi, %rax 153; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 154; CHECK-SSE1-NEXT: movaps (%rsi), %xmm1 155; CHECK-SSE1-NEXT: andps %xmm0, %xmm1 156; CHECK-SSE1-NEXT: andnps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 157; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 158; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 159; CHECK-SSE1-NEXT: retq 160; 161; CHECK-SSE2-LABEL: out_constant_varx_42: 162; CHECK-SSE2: # %bb.0: 163; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 164; CHECK-SSE2-NEXT: movaps (%rdi), %xmm1 165; CHECK-SSE2-NEXT: andps %xmm0, %xmm1 166; CHECK-SSE2-NEXT: andnps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 167; CHECK-SSE2-NEXT: orps %xmm1, %xmm0 168; CHECK-SSE2-NEXT: retq 169; 170; CHECK-XOP-LABEL: out_constant_varx_42: 171; CHECK-XOP: # %bb.0: 172; CHECK-XOP-NEXT: vmovdqa (%rdi), %xmm0 173; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm1 174; CHECK-XOP-NEXT: vpcmov %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 175; CHECK-XOP-NEXT: retq 176 %x = load <4 x i32>, ptr%px, align 16 177 %y = load <4 x i32>, ptr%py, align 16 178 %mask = load <4 x i32>, ptr%pmask, align 16 179 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> 180 %mx = and <4 x i32> %mask, %x 181 %my = and <4 x i32> %notmask, <i32 42, i32 42, i32 42, i32 42> 182 %r = or <4 x i32> %mx, %my 183 ret <4 x i32> %r 184} 185 186define <4 x i32> @in_constant_varx_42(ptr%px, ptr%py, ptr%pmask) { 187; CHECK-SSE1-LABEL: in_constant_varx_42: 188; CHECK-SSE1: # %bb.0: 189; CHECK-SSE1-NEXT: movq %rdi, %rax 190; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 191; CHECK-SSE1-NEXT: movaps (%rsi), %xmm1 192; CHECK-SSE1-NEXT: andps %xmm0, %xmm1 193; CHECK-SSE1-NEXT: andnps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 194; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 195; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 196; CHECK-SSE1-NEXT: retq 197; 198; CHECK-SSE2-LABEL: in_constant_varx_42: 199; CHECK-SSE2: # %bb.0: 200; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 201; CHECK-SSE2-NEXT: movaps (%rdi), %xmm1 202; CHECK-SSE2-NEXT: andps %xmm0, %xmm1 203; CHECK-SSE2-NEXT: andnps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 204; CHECK-SSE2-NEXT: orps %xmm1, %xmm0 205; CHECK-SSE2-NEXT: retq 206; 207; CHECK-XOP-LABEL: in_constant_varx_42: 208; CHECK-XOP: # %bb.0: 209; CHECK-XOP-NEXT: vmovdqa (%rdi), %xmm0 210; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm1 211; CHECK-XOP-NEXT: vpcmov %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 212; CHECK-XOP-NEXT: retq 213 %x = load <4 x i32>, ptr%px, align 16 214 %y = load <4 x i32>, ptr%py, align 16 215 %mask = load <4 x i32>, ptr%pmask, align 16 216 %n0 = xor <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42> ; %x 217 %n1 = and <4 x i32> %n0, %mask 218 %r = xor <4 x i32> %n1, <i32 42, i32 42, i32 42, i32 42> 219 ret <4 x i32> %r 220} 221 222; This is not a canonical form. Testing for completeness only. 223define <4 x i32> @out_constant_varx_42_invmask(ptr%px, ptr%py, ptr%pmask) { 224; CHECK-SSE1-LABEL: out_constant_varx_42_invmask: 225; CHECK-SSE1: # %bb.0: 226; CHECK-SSE1-NEXT: movq %rdi, %rax 227; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 228; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1 229; CHECK-SSE1-NEXT: andnps (%rsi), %xmm1 230; CHECK-SSE1-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 231; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 232; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 233; CHECK-SSE1-NEXT: retq 234; 235; CHECK-SSE2-LABEL: out_constant_varx_42_invmask: 236; CHECK-SSE2: # %bb.0: 237; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 238; CHECK-SSE2-NEXT: movaps %xmm0, %xmm1 239; CHECK-SSE2-NEXT: andnps (%rdi), %xmm1 240; CHECK-SSE2-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 241; CHECK-SSE2-NEXT: orps %xmm1, %xmm0 242; CHECK-SSE2-NEXT: retq 243; 244; CHECK-XOP-LABEL: out_constant_varx_42_invmask: 245; CHECK-XOP: # %bb.0: 246; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0 247; CHECK-XOP-NEXT: vbroadcastss {{.*#+}} xmm1 = [42,42,42,42] 248; CHECK-XOP-NEXT: vpcmov %xmm0, (%rdi), %xmm1, %xmm0 249; CHECK-XOP-NEXT: retq 250 %x = load <4 x i32>, ptr%px, align 16 251 %y = load <4 x i32>, ptr%py, align 16 252 %mask = load <4 x i32>, ptr%pmask, align 16 253 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> 254 %mx = and <4 x i32> %notmask, %x 255 %my = and <4 x i32> %mask, <i32 42, i32 42, i32 42, i32 42> 256 %r = or <4 x i32> %mx, %my 257 ret <4 x i32> %r 258} 259 260; This is not a canonical form. Testing for completeness only. 261define <4 x i32> @in_constant_varx_42_invmask(ptr%px, ptr%py, ptr%pmask) { 262; CHECK-SSE1-LABEL: in_constant_varx_42_invmask: 263; CHECK-SSE1: # %bb.0: 264; CHECK-SSE1-NEXT: movq %rdi, %rax 265; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 266; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1 267; CHECK-SSE1-NEXT: andnps (%rsi), %xmm1 268; CHECK-SSE1-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 269; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 270; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 271; CHECK-SSE1-NEXT: retq 272; 273; CHECK-SSE2-LABEL: in_constant_varx_42_invmask: 274; CHECK-SSE2: # %bb.0: 275; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 276; CHECK-SSE2-NEXT: movaps %xmm0, %xmm1 277; CHECK-SSE2-NEXT: andnps (%rdi), %xmm1 278; CHECK-SSE2-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 279; CHECK-SSE2-NEXT: orps %xmm1, %xmm0 280; CHECK-SSE2-NEXT: retq 281; 282; CHECK-XOP-LABEL: in_constant_varx_42_invmask: 283; CHECK-XOP: # %bb.0: 284; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0 285; CHECK-XOP-NEXT: vbroadcastss {{.*#+}} xmm1 = [42,42,42,42] 286; CHECK-XOP-NEXT: vpcmov %xmm0, (%rdi), %xmm1, %xmm0 287; CHECK-XOP-NEXT: retq 288 %x = load <4 x i32>, ptr%px, align 16 289 %y = load <4 x i32>, ptr%py, align 16 290 %mask = load <4 x i32>, ptr%pmask, align 16 291 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> 292 %n0 = xor <4 x i32> %x, <i32 42, i32 42, i32 42, i32 42> ; %x 293 %n1 = and <4 x i32> %n0, %notmask 294 %r = xor <4 x i32> %n1, <i32 42, i32 42, i32 42, i32 42> 295 ret <4 x i32> %r 296} 297 298define <4 x i32> @out_constant_mone_vary(ptr%px, ptr%py, ptr%pmask) { 299; CHECK-SSE1-LABEL: out_constant_mone_vary: 300; CHECK-SSE1: # %bb.0: 301; CHECK-SSE1-NEXT: movq %rdi, %rax 302; CHECK-SSE1-NEXT: movaps (%rdx), %xmm0 303; CHECK-SSE1-NEXT: orps (%rcx), %xmm0 304; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 305; CHECK-SSE1-NEXT: retq 306; 307; CHECK-SSE2-LABEL: out_constant_mone_vary: 308; CHECK-SSE2: # %bb.0: 309; CHECK-SSE2-NEXT: movaps (%rsi), %xmm0 310; CHECK-SSE2-NEXT: orps (%rdx), %xmm0 311; CHECK-SSE2-NEXT: retq 312; 313; CHECK-XOP-LABEL: out_constant_mone_vary: 314; CHECK-XOP: # %bb.0: 315; CHECK-XOP-NEXT: vmovaps (%rsi), %xmm0 316; CHECK-XOP-NEXT: vorps (%rdx), %xmm0, %xmm0 317; CHECK-XOP-NEXT: retq 318 %x = load <4 x i32>, ptr%px, align 16 319 %y = load <4 x i32>, ptr%py, align 16 320 %mask = load <4 x i32>, ptr%pmask, align 16 321 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> 322 %mx = and <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> 323 %my = and <4 x i32> %notmask, %y 324 %r = or <4 x i32> %mx, %my 325 ret <4 x i32> %r 326} 327 328define <4 x i32> @in_constant_mone_vary(ptr%px, ptr%py, ptr%pmask) { 329; CHECK-SSE1-LABEL: in_constant_mone_vary: 330; CHECK-SSE1: # %bb.0: 331; CHECK-SSE1-NEXT: movq %rdi, %rax 332; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 333; CHECK-SSE1-NEXT: orps (%rdx), %xmm0 334; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 335; CHECK-SSE1-NEXT: retq 336; 337; CHECK-SSE2-LABEL: in_constant_mone_vary: 338; CHECK-SSE2: # %bb.0: 339; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 340; CHECK-SSE2-NEXT: orps (%rsi), %xmm0 341; CHECK-SSE2-NEXT: retq 342; 343; CHECK-XOP-LABEL: in_constant_mone_vary: 344; CHECK-XOP: # %bb.0: 345; CHECK-XOP-NEXT: vmovaps (%rdx), %xmm0 346; CHECK-XOP-NEXT: vorps (%rsi), %xmm0, %xmm0 347; CHECK-XOP-NEXT: retq 348 %x = load <4 x i32>, ptr%px, align 16 349 %y = load <4 x i32>, ptr%py, align 16 350 %mask = load <4 x i32>, ptr%pmask, align 16 351 %n0 = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %y ; %x 352 %n1 = and <4 x i32> %n0, %mask 353 %r = xor <4 x i32> %n1, %y 354 ret <4 x i32> %r 355} 356 357; This is not a canonical form. Testing for completeness only. 358define <4 x i32> @out_constant_mone_vary_invmask(ptr%px, ptr%py, ptr%pmask) { 359; CHECK-SSE1-LABEL: out_constant_mone_vary_invmask: 360; CHECK-SSE1: # %bb.0: 361; CHECK-SSE1-NEXT: movq %rdi, %rax 362; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 363; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [NaN,NaN,NaN,NaN] 364; CHECK-SSE1-NEXT: xorps %xmm0, %xmm1 365; CHECK-SSE1-NEXT: andps (%rdx), %xmm0 366; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 367; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 368; CHECK-SSE1-NEXT: retq 369; 370; CHECK-SSE2-LABEL: out_constant_mone_vary_invmask: 371; CHECK-SSE2: # %bb.0: 372; CHECK-SSE2-NEXT: movdqa (%rdx), %xmm0 373; CHECK-SSE2-NEXT: pcmpeqd %xmm1, %xmm1 374; CHECK-SSE2-NEXT: pxor %xmm0, %xmm1 375; CHECK-SSE2-NEXT: pand (%rsi), %xmm0 376; CHECK-SSE2-NEXT: por %xmm1, %xmm0 377; CHECK-SSE2-NEXT: retq 378; 379; CHECK-XOP-LABEL: out_constant_mone_vary_invmask: 380; CHECK-XOP: # %bb.0: 381; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0 382; CHECK-XOP-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 383; CHECK-XOP-NEXT: vpxor %xmm1, %xmm0, %xmm1 384; CHECK-XOP-NEXT: vpand (%rsi), %xmm0, %xmm0 385; CHECK-XOP-NEXT: vpor %xmm0, %xmm1, %xmm0 386; CHECK-XOP-NEXT: retq 387 %x = load <4 x i32>, ptr%px, align 16 388 %y = load <4 x i32>, ptr%py, align 16 389 %mask = load <4 x i32>, ptr%pmask, align 16 390 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> 391 %mx = and <4 x i32> %notmask, <i32 -1, i32 -1, i32 -1, i32 -1> 392 %my = and <4 x i32> %mask, %y 393 %r = or <4 x i32> %mx, %my 394 ret <4 x i32> %r 395} 396 397; This is not a canonical form. Testing for completeness only. 398define <4 x i32> @in_constant_mone_vary_invmask(ptr%px, ptr%py, ptr%pmask) { 399; CHECK-SSE1-LABEL: in_constant_mone_vary_invmask: 400; CHECK-SSE1: # %bb.0: 401; CHECK-SSE1-NEXT: movq %rdi, %rax 402; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 403; CHECK-SSE1-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 404; CHECK-SSE1-NEXT: orps (%rdx), %xmm0 405; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 406; CHECK-SSE1-NEXT: retq 407; 408; CHECK-SSE2-LABEL: in_constant_mone_vary_invmask: 409; CHECK-SSE2: # %bb.0: 410; CHECK-SSE2-NEXT: pcmpeqd %xmm0, %xmm0 411; CHECK-SSE2-NEXT: pxor (%rdx), %xmm0 412; CHECK-SSE2-NEXT: por (%rsi), %xmm0 413; CHECK-SSE2-NEXT: retq 414; 415; CHECK-XOP-LABEL: in_constant_mone_vary_invmask: 416; CHECK-XOP: # %bb.0: 417; CHECK-XOP-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0 418; CHECK-XOP-NEXT: vpxor (%rdx), %xmm0, %xmm0 419; CHECK-XOP-NEXT: vpor (%rsi), %xmm0, %xmm0 420; CHECK-XOP-NEXT: retq 421 %x = load <4 x i32>, ptr%px, align 16 422 %y = load <4 x i32>, ptr%py, align 16 423 %mask = load <4 x i32>, ptr%pmask, align 16 424 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> 425 %n0 = xor <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, %y ; %x 426 %n1 = and <4 x i32> %n0, %notmask 427 %r = xor <4 x i32> %n1, %y 428 ret <4 x i32> %r 429} 430 431define <4 x i32> @out_constant_42_vary(ptr%px, ptr%py, ptr%pmask) { 432; CHECK-SSE1-LABEL: out_constant_42_vary: 433; CHECK-SSE1: # %bb.0: 434; CHECK-SSE1-NEXT: movq %rdi, %rax 435; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 436; CHECK-SSE1-NEXT: movaps {{.*#+}} xmm1 = [5.88545355E-44,5.88545355E-44,5.88545355E-44,5.88545355E-44] 437; CHECK-SSE1-NEXT: andps %xmm0, %xmm1 438; CHECK-SSE1-NEXT: andnps (%rdx), %xmm0 439; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 440; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 441; CHECK-SSE1-NEXT: retq 442; 443; CHECK-SSE2-LABEL: out_constant_42_vary: 444; CHECK-SSE2: # %bb.0: 445; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 446; CHECK-SSE2-NEXT: movaps {{.*#+}} xmm1 = [42,42,42,42] 447; CHECK-SSE2-NEXT: andps %xmm0, %xmm1 448; CHECK-SSE2-NEXT: andnps (%rsi), %xmm0 449; CHECK-SSE2-NEXT: orps %xmm1, %xmm0 450; CHECK-SSE2-NEXT: retq 451; 452; CHECK-XOP-LABEL: out_constant_42_vary: 453; CHECK-XOP: # %bb.0: 454; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0 455; CHECK-XOP-NEXT: vbroadcastss {{.*#+}} xmm1 = [42,42,42,42] 456; CHECK-XOP-NEXT: vpcmov %xmm0, (%rsi), %xmm1, %xmm0 457; CHECK-XOP-NEXT: retq 458 %x = load <4 x i32>, ptr%px, align 16 459 %y = load <4 x i32>, ptr%py, align 16 460 %mask = load <4 x i32>, ptr%pmask, align 16 461 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> 462 %mx = and <4 x i32> %mask, <i32 42, i32 42, i32 42, i32 42> 463 %my = and <4 x i32> %notmask, %y 464 %r = or <4 x i32> %mx, %my 465 ret <4 x i32> %r 466} 467 468define <4 x i32> @in_constant_42_vary(ptr%px, ptr%py, ptr%pmask) { 469; CHECK-SSE1-LABEL: in_constant_42_vary: 470; CHECK-SSE1: # %bb.0: 471; CHECK-SSE1-NEXT: movq %rdi, %rax 472; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 473; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1 474; CHECK-SSE1-NEXT: andnps (%rdx), %xmm1 475; CHECK-SSE1-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 476; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 477; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 478; CHECK-SSE1-NEXT: retq 479; 480; CHECK-SSE2-LABEL: in_constant_42_vary: 481; CHECK-SSE2: # %bb.0: 482; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 483; CHECK-SSE2-NEXT: movaps %xmm0, %xmm1 484; CHECK-SSE2-NEXT: andnps (%rsi), %xmm1 485; CHECK-SSE2-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 486; CHECK-SSE2-NEXT: orps %xmm1, %xmm0 487; CHECK-SSE2-NEXT: retq 488; 489; CHECK-XOP-LABEL: in_constant_42_vary: 490; CHECK-XOP: # %bb.0: 491; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm0 492; CHECK-XOP-NEXT: vbroadcastss {{.*#+}} xmm1 = [42,42,42,42] 493; CHECK-XOP-NEXT: vpcmov %xmm0, (%rsi), %xmm1, %xmm0 494; CHECK-XOP-NEXT: retq 495 %x = load <4 x i32>, ptr%px, align 16 496 %y = load <4 x i32>, ptr%py, align 16 497 %mask = load <4 x i32>, ptr%pmask, align 16 498 %n0 = xor <4 x i32> <i32 42, i32 42, i32 42, i32 42>, %y ; %x 499 %n1 = and <4 x i32> %n0, %mask 500 %r = xor <4 x i32> %n1, %y 501 ret <4 x i32> %r 502} 503 504; This is not a canonical form. Testing for completeness only. 505define <4 x i32> @out_constant_42_vary_invmask(ptr%px, ptr%py, ptr%pmask) { 506; CHECK-SSE1-LABEL: out_constant_42_vary_invmask: 507; CHECK-SSE1: # %bb.0: 508; CHECK-SSE1-NEXT: movq %rdi, %rax 509; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 510; CHECK-SSE1-NEXT: movaps %xmm0, %xmm1 511; CHECK-SSE1-NEXT: andnps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 512; CHECK-SSE1-NEXT: andps (%rdx), %xmm0 513; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 514; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 515; CHECK-SSE1-NEXT: retq 516; 517; CHECK-SSE2-LABEL: out_constant_42_vary_invmask: 518; CHECK-SSE2: # %bb.0: 519; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 520; CHECK-SSE2-NEXT: movaps %xmm0, %xmm1 521; CHECK-SSE2-NEXT: andnps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 522; CHECK-SSE2-NEXT: andps (%rsi), %xmm0 523; CHECK-SSE2-NEXT: orps %xmm1, %xmm0 524; CHECK-SSE2-NEXT: retq 525; 526; CHECK-XOP-LABEL: out_constant_42_vary_invmask: 527; CHECK-XOP: # %bb.0: 528; CHECK-XOP-NEXT: vmovdqa (%rsi), %xmm0 529; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm1 530; CHECK-XOP-NEXT: vpcmov %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 531; CHECK-XOP-NEXT: retq 532 %x = load <4 x i32>, ptr%px, align 16 533 %y = load <4 x i32>, ptr%py, align 16 534 %mask = load <4 x i32>, ptr%pmask, align 16 535 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> 536 %mx = and <4 x i32> %notmask, <i32 42, i32 42, i32 42, i32 42> 537 %my = and <4 x i32> %mask, %y 538 %r = or <4 x i32> %mx, %my 539 ret <4 x i32> %r 540} 541 542; This is not a canonical form. Testing for completeness only. 543define <4 x i32> @in_constant_42_vary_invmask(ptr%px, ptr%py, ptr%pmask) { 544; CHECK-SSE1-LABEL: in_constant_42_vary_invmask: 545; CHECK-SSE1: # %bb.0: 546; CHECK-SSE1-NEXT: movq %rdi, %rax 547; CHECK-SSE1-NEXT: movaps (%rcx), %xmm0 548; CHECK-SSE1-NEXT: movaps (%rdx), %xmm1 549; CHECK-SSE1-NEXT: andps %xmm0, %xmm1 550; CHECK-SSE1-NEXT: andnps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 551; CHECK-SSE1-NEXT: orps %xmm1, %xmm0 552; CHECK-SSE1-NEXT: movaps %xmm0, (%rdi) 553; CHECK-SSE1-NEXT: retq 554; 555; CHECK-SSE2-LABEL: in_constant_42_vary_invmask: 556; CHECK-SSE2: # %bb.0: 557; CHECK-SSE2-NEXT: movaps (%rdx), %xmm0 558; CHECK-SSE2-NEXT: movaps (%rsi), %xmm1 559; CHECK-SSE2-NEXT: andps %xmm0, %xmm1 560; CHECK-SSE2-NEXT: andnps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 561; CHECK-SSE2-NEXT: orps %xmm1, %xmm0 562; CHECK-SSE2-NEXT: retq 563; 564; CHECK-XOP-LABEL: in_constant_42_vary_invmask: 565; CHECK-XOP: # %bb.0: 566; CHECK-XOP-NEXT: vmovdqa (%rsi), %xmm0 567; CHECK-XOP-NEXT: vmovdqa (%rdx), %xmm1 568; CHECK-XOP-NEXT: vpcmov %xmm1, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 569; CHECK-XOP-NEXT: retq 570 %x = load <4 x i32>, ptr%px, align 16 571 %y = load <4 x i32>, ptr%py, align 16 572 %mask = load <4 x i32>, ptr%pmask, align 16 573 %notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1> 574 %n0 = xor <4 x i32> <i32 42, i32 42, i32 42, i32 42>, %y ; %x 575 %n1 = and <4 x i32> %n0, %notmask 576 %r = xor <4 x i32> %n1, %y 577 ret <4 x i32> %r 578} 579