1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefix=AVX2 4 5define <8 x i32> @zext_and_v8i32(<8 x i16> %x, <8 x i16> %y) { 6; SSE2-LABEL: zext_and_v8i32: 7; SSE2: # %bb.0: 8; SSE2-NEXT: pand %xmm0, %xmm1 9; SSE2-NEXT: pxor %xmm2, %xmm2 10; SSE2-NEXT: movdqa %xmm1, %xmm0 11; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 12; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 13; SSE2-NEXT: retq 14; 15; AVX2-LABEL: zext_and_v8i32: 16; AVX2: # %bb.0: 17; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 18; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 19; AVX2-NEXT: retq 20 %xz = zext <8 x i16> %x to <8 x i32> 21 %yz = zext <8 x i16> %y to <8 x i32> 22 %r = and <8 x i32> %xz, %yz 23 ret <8 x i32> %r 24} 25 26define <8 x i32> @zext_or_v8i32(<8 x i16> %x, <8 x i16> %y) { 27; SSE2-LABEL: zext_or_v8i32: 28; SSE2: # %bb.0: 29; SSE2-NEXT: por %xmm0, %xmm1 30; SSE2-NEXT: pxor %xmm2, %xmm2 31; SSE2-NEXT: movdqa %xmm1, %xmm0 32; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 33; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 34; SSE2-NEXT: retq 35; 36; AVX2-LABEL: zext_or_v8i32: 37; AVX2: # %bb.0: 38; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 39; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 40; AVX2-NEXT: retq 41 %xz = zext <8 x i16> %x to <8 x i32> 42 %yz = zext <8 x i16> %y to <8 x i32> 43 %r = or <8 x i32> %xz, %yz 44 ret <8 x i32> %r 45} 46 47define <8 x i32> @zext_xor_v8i32(<8 x i16> %x, <8 x i16> %y) { 48; SSE2-LABEL: zext_xor_v8i32: 49; SSE2: # %bb.0: 50; SSE2-NEXT: pxor %xmm0, %xmm1 51; SSE2-NEXT: pxor %xmm2, %xmm2 52; SSE2-NEXT: movdqa %xmm1, %xmm0 53; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 54; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 55; SSE2-NEXT: retq 56; 57; AVX2-LABEL: zext_xor_v8i32: 58; AVX2: # %bb.0: 59; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 60; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 61; AVX2-NEXT: retq 62 %xz = zext <8 x i16> %x to <8 x i32> 63 %yz = zext <8 x i16> %y to <8 x i32> 64 %r = xor <8 x i32> %xz, %yz 65 ret <8 x i32> %r 66} 67 68define <8 x i32> @sext_and_v8i32(<8 x i16> %x, <8 x i16> %y) { 69; SSE2-LABEL: sext_and_v8i32: 70; SSE2: # %bb.0: 71; SSE2-NEXT: pand %xmm1, %xmm0 72; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 73; SSE2-NEXT: psrad $16, %xmm2 74; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 75; SSE2-NEXT: psrad $16, %xmm1 76; SSE2-NEXT: movdqa %xmm2, %xmm0 77; SSE2-NEXT: retq 78; 79; AVX2-LABEL: sext_and_v8i32: 80; AVX2: # %bb.0: 81; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 82; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 83; AVX2-NEXT: retq 84 %xs = sext <8 x i16> %x to <8 x i32> 85 %ys = sext <8 x i16> %y to <8 x i32> 86 %r = and <8 x i32> %xs, %ys 87 ret <8 x i32> %r 88} 89 90define <8 x i32> @sext_or_v8i32(<8 x i16> %x, <8 x i16> %y) { 91; SSE2-LABEL: sext_or_v8i32: 92; SSE2: # %bb.0: 93; SSE2-NEXT: por %xmm1, %xmm0 94; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 95; SSE2-NEXT: psrad $16, %xmm2 96; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 97; SSE2-NEXT: psrad $16, %xmm1 98; SSE2-NEXT: movdqa %xmm2, %xmm0 99; SSE2-NEXT: retq 100; 101; AVX2-LABEL: sext_or_v8i32: 102; AVX2: # %bb.0: 103; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 104; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 105; AVX2-NEXT: retq 106 %xs = sext <8 x i16> %x to <8 x i32> 107 %ys = sext <8 x i16> %y to <8 x i32> 108 %r = or <8 x i32> %xs, %ys 109 ret <8 x i32> %r 110} 111 112define <8 x i32> @sext_xor_v8i32(<8 x i16> %x, <8 x i16> %y) { 113; SSE2-LABEL: sext_xor_v8i32: 114; SSE2: # %bb.0: 115; SSE2-NEXT: pxor %xmm1, %xmm0 116; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 117; SSE2-NEXT: psrad $16, %xmm2 118; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 119; SSE2-NEXT: psrad $16, %xmm1 120; SSE2-NEXT: movdqa %xmm2, %xmm0 121; SSE2-NEXT: retq 122; 123; AVX2-LABEL: sext_xor_v8i32: 124; AVX2: # %bb.0: 125; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 126; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 127; AVX2-NEXT: retq 128 %xs = sext <8 x i16> %x to <8 x i32> 129 %ys = sext <8 x i16> %y to <8 x i32> 130 %r = xor <8 x i32> %xs, %ys 131 ret <8 x i32> %r 132} 133 134define <8 x i16> @zext_and_v8i16(<8 x i8> %x, <8 x i8> %y) { 135; SSE2-LABEL: zext_and_v8i16: 136; SSE2: # %bb.0: 137; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 138; SSE2-NEXT: pxor %xmm2, %xmm2 139; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 140; SSE2-NEXT: pand %xmm1, %xmm0 141; SSE2-NEXT: retq 142; 143; AVX2-LABEL: zext_and_v8i16: 144; AVX2: # %bb.0: 145; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 146; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 147; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 148; AVX2-NEXT: retq 149 %xz = zext <8 x i8> %x to <8 x i16> 150 %yz = zext <8 x i8> %y to <8 x i16> 151 %r = and <8 x i16> %xz, %yz 152 ret <8 x i16> %r 153} 154 155define <8 x i16> @zext_or_v8i16(<8 x i8> %x, <8 x i8> %y) { 156; SSE2-LABEL: zext_or_v8i16: 157; SSE2: # %bb.0: 158; SSE2-NEXT: por %xmm1, %xmm0 159; SSE2-NEXT: pxor %xmm1, %xmm1 160; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 161; SSE2-NEXT: retq 162; 163; AVX2-LABEL: zext_or_v8i16: 164; AVX2: # %bb.0: 165; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 166; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 167; AVX2-NEXT: retq 168 %xz = zext <8 x i8> %x to <8 x i16> 169 %yz = zext <8 x i8> %y to <8 x i16> 170 %r = or <8 x i16> %xz, %yz 171 ret <8 x i16> %r 172} 173 174define <8 x i16> @zext_xor_v8i16(<8 x i8> %x, <8 x i8> %y) { 175; SSE2-LABEL: zext_xor_v8i16: 176; SSE2: # %bb.0: 177; SSE2-NEXT: pxor %xmm1, %xmm0 178; SSE2-NEXT: pxor %xmm1, %xmm1 179; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 180; SSE2-NEXT: retq 181; 182; AVX2-LABEL: zext_xor_v8i16: 183; AVX2: # %bb.0: 184; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 185; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 186; AVX2-NEXT: retq 187 %xz = zext <8 x i8> %x to <8 x i16> 188 %yz = zext <8 x i8> %y to <8 x i16> 189 %r = xor <8 x i16> %xz, %yz 190 ret <8 x i16> %r 191} 192 193define <8 x i16> @sext_and_v8i16(<8 x i8> %x, <8 x i8> %y) { 194; SSE2-LABEL: sext_and_v8i16: 195; SSE2: # %bb.0: 196; SSE2-NEXT: pand %xmm1, %xmm0 197; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 198; SSE2-NEXT: psraw $8, %xmm0 199; SSE2-NEXT: retq 200; 201; AVX2-LABEL: sext_and_v8i16: 202; AVX2: # %bb.0: 203; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 204; AVX2-NEXT: vpmovsxbw %xmm0, %xmm0 205; AVX2-NEXT: retq 206 %xs = sext <8 x i8> %x to <8 x i16> 207 %ys = sext <8 x i8> %y to <8 x i16> 208 %r = and <8 x i16> %xs, %ys 209 ret <8 x i16> %r 210} 211 212define <8 x i16> @sext_or_v8i16(<8 x i8> %x, <8 x i8> %y) { 213; SSE2-LABEL: sext_or_v8i16: 214; SSE2: # %bb.0: 215; SSE2-NEXT: por %xmm1, %xmm0 216; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 217; SSE2-NEXT: psraw $8, %xmm0 218; SSE2-NEXT: retq 219; 220; AVX2-LABEL: sext_or_v8i16: 221; AVX2: # %bb.0: 222; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 223; AVX2-NEXT: vpmovsxbw %xmm0, %xmm0 224; AVX2-NEXT: retq 225 %xs = sext <8 x i8> %x to <8 x i16> 226 %ys = sext <8 x i8> %y to <8 x i16> 227 %r = or <8 x i16> %xs, %ys 228 ret <8 x i16> %r 229} 230 231define <8 x i16> @sext_xor_v8i16(<8 x i8> %x, <8 x i8> %y) { 232; SSE2-LABEL: sext_xor_v8i16: 233; SSE2: # %bb.0: 234; SSE2-NEXT: pxor %xmm1, %xmm0 235; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 236; SSE2-NEXT: psraw $8, %xmm0 237; SSE2-NEXT: retq 238; 239; AVX2-LABEL: sext_xor_v8i16: 240; AVX2: # %bb.0: 241; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 242; AVX2-NEXT: vpmovsxbw %xmm0, %xmm0 243; AVX2-NEXT: retq 244 %xs = sext <8 x i8> %x to <8 x i16> 245 %ys = sext <8 x i8> %y to <8 x i16> 246 %r = xor <8 x i16> %xs, %ys 247 ret <8 x i16> %r 248} 249 250define <8 x i32> @bool_zext_and(<8 x i1> %x, <8 x i1> %y) { 251; SSE2-LABEL: bool_zext_and: 252; SSE2: # %bb.0: 253; SSE2-NEXT: movdqa %xmm0, %xmm3 254; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4,4,5,5,6,6,7,7] 255; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 256; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 257; SSE2-NEXT: pxor %xmm4, %xmm4 258; SSE2-NEXT: movdqa %xmm1, %xmm2 259; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7] 260; SSE2-NEXT: pand %xmm3, %xmm2 261; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3] 262; SSE2-NEXT: pand %xmm1, %xmm0 263; SSE2-NEXT: movdqa %xmm2, %xmm1 264; SSE2-NEXT: retq 265; 266; AVX2-LABEL: bool_zext_and: 267; AVX2: # %bb.0: 268; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 269; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 270; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 271; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 272; AVX2-NEXT: retq 273 %xz = zext <8 x i1> %x to <8 x i32> 274 %yz = zext <8 x i1> %y to <8 x i32> 275 %r = and <8 x i32> %xz, %yz 276 ret <8 x i32> %r 277} 278 279define <8 x i32> @bool_zext_or(<8 x i1> %x, <8 x i1> %y) { 280; SSE2-LABEL: bool_zext_or: 281; SSE2: # %bb.0: 282; SSE2-NEXT: por %xmm0, %xmm1 283; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 284; SSE2-NEXT: pxor %xmm2, %xmm2 285; SSE2-NEXT: movdqa %xmm1, %xmm0 286; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 287; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 288; SSE2-NEXT: retq 289; 290; AVX2-LABEL: bool_zext_or: 291; AVX2: # %bb.0: 292; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 293; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 294; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 295; AVX2-NEXT: retq 296 %xz = zext <8 x i1> %x to <8 x i32> 297 %yz = zext <8 x i1> %y to <8 x i32> 298 %r = or <8 x i32> %xz, %yz 299 ret <8 x i32> %r 300} 301 302define <8 x i32> @bool_zext_xor(<8 x i1> %x, <8 x i1> %y) { 303; SSE2-LABEL: bool_zext_xor: 304; SSE2: # %bb.0: 305; SSE2-NEXT: pxor %xmm0, %xmm1 306; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 307; SSE2-NEXT: pxor %xmm2, %xmm2 308; SSE2-NEXT: movdqa %xmm1, %xmm0 309; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 310; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 311; SSE2-NEXT: retq 312; 313; AVX2-LABEL: bool_zext_xor: 314; AVX2: # %bb.0: 315; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 316; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 317; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 318; AVX2-NEXT: retq 319 %xz = zext <8 x i1> %x to <8 x i32> 320 %yz = zext <8 x i1> %y to <8 x i32> 321 %r = xor <8 x i32> %xz, %yz 322 ret <8 x i32> %r 323} 324 325define <8 x i32> @bool_sext_and(<8 x i1> %x, <8 x i1> %y) { 326; SSE2-LABEL: bool_sext_and: 327; SSE2: # %bb.0: 328; SSE2-NEXT: pand %xmm0, %xmm1 329; SSE2-NEXT: movdqa %xmm1, %xmm0 330; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 331; SSE2-NEXT: pslld $31, %xmm0 332; SSE2-NEXT: psrad $31, %xmm0 333; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 334; SSE2-NEXT: pslld $31, %xmm1 335; SSE2-NEXT: psrad $31, %xmm1 336; SSE2-NEXT: retq 337; 338; AVX2-LABEL: bool_sext_and: 339; AVX2: # %bb.0: 340; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 341; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 342; AVX2-NEXT: vpslld $31, %ymm0, %ymm0 343; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 344; AVX2-NEXT: retq 345 %xs = sext <8 x i1> %x to <8 x i32> 346 %ys = sext <8 x i1> %y to <8 x i32> 347 %r = and <8 x i32> %xs, %ys 348 ret <8 x i32> %r 349} 350 351define <8 x i32> @bool_sext_or(<8 x i1> %x, <8 x i1> %y) { 352; SSE2-LABEL: bool_sext_or: 353; SSE2: # %bb.0: 354; SSE2-NEXT: por %xmm0, %xmm1 355; SSE2-NEXT: movdqa %xmm1, %xmm0 356; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 357; SSE2-NEXT: pslld $31, %xmm0 358; SSE2-NEXT: psrad $31, %xmm0 359; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 360; SSE2-NEXT: pslld $31, %xmm1 361; SSE2-NEXT: psrad $31, %xmm1 362; SSE2-NEXT: retq 363; 364; AVX2-LABEL: bool_sext_or: 365; AVX2: # %bb.0: 366; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 367; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 368; AVX2-NEXT: vpslld $31, %ymm0, %ymm0 369; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 370; AVX2-NEXT: retq 371 %xs = sext <8 x i1> %x to <8 x i32> 372 %ys = sext <8 x i1> %y to <8 x i32> 373 %r = or <8 x i32> %xs, %ys 374 ret <8 x i32> %r 375} 376 377define <8 x i32> @bool_sext_xor(<8 x i1> %x, <8 x i1> %y) { 378; SSE2-LABEL: bool_sext_xor: 379; SSE2: # %bb.0: 380; SSE2-NEXT: pxor %xmm0, %xmm1 381; SSE2-NEXT: movdqa %xmm1, %xmm0 382; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 383; SSE2-NEXT: pslld $31, %xmm0 384; SSE2-NEXT: psrad $31, %xmm0 385; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 386; SSE2-NEXT: pslld $31, %xmm1 387; SSE2-NEXT: psrad $31, %xmm1 388; SSE2-NEXT: retq 389; 390; AVX2-LABEL: bool_sext_xor: 391; AVX2: # %bb.0: 392; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 393; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 394; AVX2-NEXT: vpslld $31, %ymm0, %ymm0 395; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 396; AVX2-NEXT: retq 397 %xs = sext <8 x i1> %x to <8 x i32> 398 %ys = sext <8 x i1> %y to <8 x i32> 399 %r = xor <8 x i32> %xs, %ys 400 ret <8 x i32> %r 401} 402 403