1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefix=SSE 3; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefix=AVX 4 5; xor undef, undef --> 0 because it's not worth fighting to make that return undef? 6 7define <4 x i64> @xor_insert_insert(<2 x i64> %x, <2 x i64> %y) { 8; SSE-LABEL: xor_insert_insert: 9; SSE: # %bb.0: 10; SSE-NEXT: xorps %xmm1, %xmm0 11; SSE-NEXT: xorps %xmm1, %xmm1 12; SSE-NEXT: retq 13; 14; AVX-LABEL: xor_insert_insert: 15; AVX: # %bb.0: 16; AVX-NEXT: vxorps %xmm1, %xmm0, %xmm0 17; AVX-NEXT: retq 18 %xw = shufflevector <2 x i64> %x, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 19 %yw = shufflevector <2 x i64> %y, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 20 %r = xor <4 x i64> %xw, %yw 21 ret <4 x i64> %r 22} 23 24define <4 x i64> @xor_insert_insert_high_half(<2 x i64> %x, <2 x i64> %y) { 25; SSE-LABEL: xor_insert_insert_high_half: 26; SSE: # %bb.0: 27; SSE-NEXT: xorps %xmm0, %xmm1 28; SSE-NEXT: xorps %xmm0, %xmm0 29; SSE-NEXT: retq 30; 31; AVX-LABEL: xor_insert_insert_high_half: 32; AVX: # %bb.0: 33; AVX-NEXT: vxorps %xmm1, %xmm0, %xmm0 34; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 35; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 36; AVX-NEXT: retq 37 %xw = shufflevector <2 x i64> %x, <2 x i64> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1> 38 %yw = shufflevector <2 x i64> %y, <2 x i64> undef, <4 x i32> <i32 undef, i32 undef, i32 0, i32 1> 39 %r = xor <4 x i64> %xw, %yw 40 ret <4 x i64> %r 41} 42 43; All elements of the add are undefined: 44; x[0] , x[1] , x[2] , x[3], u , u , u , u 45; + u , u , u , u , 42 , 43 , 44 , 45 46 47define <8 x i32> @add_undef_elts(<4 x i32> %x) { 48; SSE-LABEL: add_undef_elts: 49; SSE: # %bb.0: 50; SSE-NEXT: retq 51; 52; AVX-LABEL: add_undef_elts: 53; AVX: # %bb.0: 54; AVX-NEXT: retq 55 %extend = shufflevector <4 x i32> %x, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 56 %bogus_bo = add <8 x i32> %extend, <i32 undef, i32 undef, i32 undef, i32 undef, i32 42, i32 43, i32 44, i32 12> 57 %arbitrary_shuf = shufflevector <8 x i32> %bogus_bo, <8 x i32> undef, <8 x i32> <i32 6, i32 0, i32 5, i32 4, i32 3, i32 2, i32 1, i32 7> 58 ret <8 x i32> %arbitrary_shuf 59} 60 61; Verify that constant operand 0 for a sub works too. 62 63define <8 x i32> @sub_undef_elts(<4 x i32> %x) { 64; SSE-LABEL: sub_undef_elts: 65; SSE: # %bb.0: 66; SSE-NEXT: retq 67; 68; AVX-LABEL: sub_undef_elts: 69; AVX: # %bb.0: 70; AVX-NEXT: retq 71 %extend = shufflevector <4 x i32> %x, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 72 %bogus_bo = sub <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 42, i32 43, i32 44, i32 12>, %extend 73 %arbitrary_shuf = shufflevector <8 x i32> %bogus_bo, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 5, i32 4, i32 3, i32 2, i32 6, i32 7> 74 ret <8 x i32> %arbitrary_shuf 75} 76 77; and undef, C --> 0, so this tests that we are tracking known zero lanes. 78 79define <4 x i64> @and_undef_elts(<2 x i64> %x) { 80; SSE-LABEL: and_undef_elts: 81; SSE: # %bb.0: 82; SSE-NEXT: xorps %xmm0, %xmm0 83; SSE-NEXT: xorps %xmm1, %xmm1 84; SSE-NEXT: retq 85; 86; AVX-LABEL: and_undef_elts: 87; AVX: # %bb.0: 88; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 89; AVX-NEXT: retq 90 %extend = shufflevector <2 x i64> %x, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 91 %bogus_bo = and <4 x i64> %extend, <i64 undef, i64 undef, i64 42, i64 43> 92 %arbitrary_shuf = shufflevector <4 x i64> %bogus_bo, <4 x i64> undef, <4 x i32> <i32 3, i32 0, i32 1, i32 2> 93 ret <4 x i64> %arbitrary_shuf 94} 95 96; or undef, C --> -1, so this tests that we are tracking known all-ones lanes. 97 98define <4 x i64> @or_undef_elts(<2 x i64> %x) { 99; SSE-LABEL: or_undef_elts: 100; SSE: # %bb.0: 101; SSE-NEXT: pcmpeqd %xmm0, %xmm0 102; SSE-NEXT: pcmpeqd %xmm1, %xmm1 103; SSE-NEXT: retq 104; 105; AVX-LABEL: or_undef_elts: 106; AVX: # %bb.0: 107; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 108; AVX-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,0,1,2] 109; AVX-NEXT: vorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 110; AVX-NEXT: retq 111 %extend = shufflevector <2 x i64> %x, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 112 %bogus_bo = or <4 x i64> %extend, <i64 undef, i64 undef, i64 42, i64 43> 113 %arbitrary_shuf = shufflevector <4 x i64> %bogus_bo, <4 x i64> undef, <4 x i32> <i32 3, i32 0, i32 1, i32 2> 114 ret <4 x i64> %arbitrary_shuf 115} 116 117; Verify that this isn't limited to high/low halves. 118 119define <8 x i32> @xor_undef_elts(<4 x i32> %x) { 120; SSE-LABEL: xor_undef_elts: 121; SSE: # %bb.0: 122; SSE-NEXT: retq 123; 124; AVX-LABEL: xor_undef_elts: 125; AVX: # %bb.0: 126; AVX-NEXT: retq 127 %extend = shufflevector <4 x i32> %x, <4 x i32> undef, <8 x i32> <i32 undef, i32 undef, i32 1, i32 3, i32 0, i32 2, i32 undef, i32 undef> 128 %bogus_bo = xor <8 x i32> %extend, <i32 42, i32 43, i32 undef, i32 undef, i32 undef, i32 undef, i32 44, i32 12> 129 %arbitrary_shuf = shufflevector <8 x i32> %bogus_bo, <8 x i32> undef, <8 x i32> <i32 6, i32 1, i32 5, i32 4, i32 3, i32 2, i32 0, i32 7> 130 ret <8 x i32> %arbitrary_shuf 131} 132 133; Verify that this isn't limited to high/low halves 134; Special case: the undef-ness of the 1st shuffle may be lost if we turn that into vector concat. 135 136define <8 x i32> @xor_undef_elts_alt(<4 x i32> %x) { 137; SSE-LABEL: xor_undef_elts_alt: 138; SSE: # %bb.0: 139; SSE-NEXT: movaps %xmm0, %xmm1 140; SSE-NEXT: movaps {{.*#+}} xmm2 = [u,u,44,12] 141; SSE-NEXT: xorps %xmm0, %xmm2 142; SSE-NEXT: xorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 143; SSE-NEXT: movaps %xmm1, %xmm0 144; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,0],xmm2[2,0] 145; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm2[1,0] 146; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,0],xmm1[0,0] 147; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,2],xmm2[2,0] 148; SSE-NEXT: retq 149; 150; AVX-LABEL: xor_undef_elts_alt: 151; AVX: # %bb.0: 152; AVX-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 153; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 154; AVX-NEXT: vxorps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 155; AVX-NEXT: vmovaps {{.*#+}} ymm1 = [6,1,5,4,3,2,0,7] 156; AVX-NEXT: vpermps %ymm0, %ymm1, %ymm0 157; AVX-NEXT: retq 158 %extend = shufflevector <4 x i32> %x, <4 x i32> undef, <8 x i32> <i32 undef, i32 undef, i32 2, i32 3, i32 0, i32 1, i32 undef, i32 undef> 159 %bogus_bo = xor <8 x i32> %extend, <i32 42, i32 43, i32 undef, i32 undef, i32 undef, i32 undef, i32 44, i32 12> 160 %arbitrary_shuf = shufflevector <8 x i32> %bogus_bo, <8 x i32> undef, <8 x i32> <i32 6, i32 1, i32 5, i32 4, i32 3, i32 2, i32 0, i32 7> 161 ret <8 x i32> %arbitrary_shuf 162} 163 164