1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,X86,X86-SLOW 3; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=CHECK,X86,X86-FAST-ALL 4; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=CHECK,X86,X86-FAST-PERLANE 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,X64,X64-SLOW 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=CHECK,X64,X64-FAST-ALL 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=CHECK,X64,X64-FAST-PERLANE 8 9define <4 x i32> @trunc4(<4 x i64> %A) nounwind { 10; X86-SLOW-LABEL: trunc4: 11; X86-SLOW: # %bb.0: 12; X86-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm1 13; X86-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 14; X86-SLOW-NEXT: vzeroupper 15; X86-SLOW-NEXT: retl 16; 17; X86-FAST-ALL-LABEL: trunc4: 18; X86-FAST-ALL: # %bb.0: 19; X86-FAST-ALL-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [0,2,4,6,0,2,4,6] 20; X86-FAST-ALL-NEXT: # ymm1 = mem[0,1,0,1] 21; X86-FAST-ALL-NEXT: vpermps %ymm0, %ymm1, %ymm0 22; X86-FAST-ALL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 23; X86-FAST-ALL-NEXT: vzeroupper 24; X86-FAST-ALL-NEXT: retl 25; 26; X86-FAST-PERLANE-LABEL: trunc4: 27; X86-FAST-PERLANE: # %bb.0: 28; X86-FAST-PERLANE-NEXT: vextractf128 $1, %ymm0, %xmm1 29; X86-FAST-PERLANE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 30; X86-FAST-PERLANE-NEXT: vzeroupper 31; X86-FAST-PERLANE-NEXT: retl 32; 33; X64-SLOW-LABEL: trunc4: 34; X64-SLOW: # %bb.0: 35; X64-SLOW-NEXT: vextractf128 $1, %ymm0, %xmm1 36; X64-SLOW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 37; X64-SLOW-NEXT: vzeroupper 38; X64-SLOW-NEXT: retq 39; 40; X64-FAST-ALL-LABEL: trunc4: 41; X64-FAST-ALL: # %bb.0: 42; X64-FAST-ALL-NEXT: vbroadcastf128 {{.*#+}} ymm1 = [0,2,4,6,0,2,4,6] 43; X64-FAST-ALL-NEXT: # ymm1 = mem[0,1,0,1] 44; X64-FAST-ALL-NEXT: vpermps %ymm0, %ymm1, %ymm0 45; X64-FAST-ALL-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 46; X64-FAST-ALL-NEXT: vzeroupper 47; X64-FAST-ALL-NEXT: retq 48; 49; X64-FAST-PERLANE-LABEL: trunc4: 50; X64-FAST-PERLANE: # %bb.0: 51; X64-FAST-PERLANE-NEXT: vextractf128 $1, %ymm0, %xmm1 52; X64-FAST-PERLANE-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] 53; X64-FAST-PERLANE-NEXT: vzeroupper 54; X64-FAST-PERLANE-NEXT: retq 55 %B = trunc <4 x i64> %A to <4 x i32> 56 ret <4 x i32>%B 57} 58 59define <8 x i16> @trunc8(<8 x i32> %A) nounwind { 60; CHECK-LABEL: trunc8: 61; CHECK: # %bb.0: 62; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u] 63; CHECK-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] 64; CHECK-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 65; CHECK-NEXT: vzeroupper 66; CHECK-NEXT: ret{{[l|q]}} 67 %B = trunc <8 x i32> %A to <8 x i16> 68 ret <8 x i16>%B 69} 70 71define <4 x i64> @sext4(<4 x i32> %A) nounwind { 72; CHECK-LABEL: sext4: 73; CHECK: # %bb.0: 74; CHECK-NEXT: vpmovsxdq %xmm0, %ymm0 75; CHECK-NEXT: ret{{[l|q]}} 76 %B = sext <4 x i32> %A to <4 x i64> 77 ret <4 x i64>%B 78} 79 80define <8 x i32> @sext8(<8 x i16> %A) nounwind { 81; CHECK-LABEL: sext8: 82; CHECK: # %bb.0: 83; CHECK-NEXT: vpmovsxwd %xmm0, %ymm0 84; CHECK-NEXT: ret{{[l|q]}} 85 %B = sext <8 x i16> %A to <8 x i32> 86 ret <8 x i32>%B 87} 88 89define <4 x i64> @zext4(<4 x i32> %A) nounwind { 90; CHECK-LABEL: zext4: 91; CHECK: # %bb.0: 92; CHECK-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 93; CHECK-NEXT: ret{{[l|q]}} 94 %B = zext <4 x i32> %A to <4 x i64> 95 ret <4 x i64>%B 96} 97 98define <8 x i32> @zext8(<8 x i16> %A) nounwind { 99; CHECK-LABEL: zext8: 100; CHECK: # %bb.0: 101; CHECK-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 102; CHECK-NEXT: ret{{[l|q]}} 103 %B = zext <8 x i16> %A to <8 x i32> 104 ret <8 x i32>%B 105} 106 107define <8 x i32> @zext_8i8_8i32(<8 x i8> %A) nounwind { 108; CHECK-LABEL: zext_8i8_8i32: 109; CHECK: # %bb.0: 110; CHECK-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 111; CHECK-NEXT: ret{{[l|q]}} 112 %B = zext <8 x i8> %A to <8 x i32> 113 ret <8 x i32>%B 114} 115 116define <16 x i16> @zext_16i8_16i16(<16 x i8> %z) { 117; CHECK-LABEL: zext_16i8_16i16: 118; CHECK: # %bb.0: 119; CHECK-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 120; CHECK-NEXT: ret{{[l|q]}} 121 %t = zext <16 x i8> %z to <16 x i16> 122 ret <16 x i16> %t 123} 124 125define <16 x i16> @sext_16i8_16i16(<16 x i8> %z) { 126; CHECK-LABEL: sext_16i8_16i16: 127; CHECK: # %bb.0: 128; CHECK-NEXT: vpmovsxbw %xmm0, %ymm0 129; CHECK-NEXT: ret{{[l|q]}} 130 %t = sext <16 x i8> %z to <16 x i16> 131 ret <16 x i16> %t 132} 133 134define <16 x i8> @trunc_16i16_16i8(<16 x i16> %z) { 135; X86-LABEL: trunc_16i16_16i8: 136; X86: # %bb.0: 137; X86-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0 138; X86-NEXT: vextracti128 $1, %ymm0, %xmm1 139; X86-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 140; X86-NEXT: vzeroupper 141; X86-NEXT: retl 142; 143; X64-LABEL: trunc_16i16_16i8: 144; X64: # %bb.0: 145; X64-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 146; X64-NEXT: vextracti128 $1, %ymm0, %xmm1 147; X64-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 148; X64-NEXT: vzeroupper 149; X64-NEXT: retq 150 %t = trunc <16 x i16> %z to <16 x i8> 151 ret <16 x i8> %t 152} 153 154define <4 x i64> @load_sext_test1(ptr%ptr) { 155; X86-LABEL: load_sext_test1: 156; X86: # %bb.0: 157; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 158; X86-NEXT: vpmovsxdq (%eax), %ymm0 159; X86-NEXT: retl 160; 161; X64-LABEL: load_sext_test1: 162; X64: # %bb.0: 163; X64-NEXT: vpmovsxdq (%rdi), %ymm0 164; X64-NEXT: retq 165 %X = load <4 x i32>, ptr %ptr 166 %Y = sext <4 x i32> %X to <4 x i64> 167 ret <4 x i64>%Y 168} 169 170define <4 x i64> @load_sext_test2(ptr%ptr) { 171; X86-LABEL: load_sext_test2: 172; X86: # %bb.0: 173; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 174; X86-NEXT: vpmovsxbq (%eax), %ymm0 175; X86-NEXT: retl 176; 177; X64-LABEL: load_sext_test2: 178; X64: # %bb.0: 179; X64-NEXT: vpmovsxbq (%rdi), %ymm0 180; X64-NEXT: retq 181 %X = load <4 x i8>, ptr %ptr 182 %Y = sext <4 x i8> %X to <4 x i64> 183 ret <4 x i64>%Y 184} 185 186define <4 x i64> @load_sext_test3(ptr%ptr) { 187; X86-LABEL: load_sext_test3: 188; X86: # %bb.0: 189; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 190; X86-NEXT: vpmovsxwq (%eax), %ymm0 191; X86-NEXT: retl 192; 193; X64-LABEL: load_sext_test3: 194; X64: # %bb.0: 195; X64-NEXT: vpmovsxwq (%rdi), %ymm0 196; X64-NEXT: retq 197 %X = load <4 x i16>, ptr %ptr 198 %Y = sext <4 x i16> %X to <4 x i64> 199 ret <4 x i64>%Y 200} 201 202define <8 x i32> @load_sext_test4(ptr%ptr) { 203; X86-LABEL: load_sext_test4: 204; X86: # %bb.0: 205; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 206; X86-NEXT: vpmovsxwd (%eax), %ymm0 207; X86-NEXT: retl 208; 209; X64-LABEL: load_sext_test4: 210; X64: # %bb.0: 211; X64-NEXT: vpmovsxwd (%rdi), %ymm0 212; X64-NEXT: retq 213 %X = load <8 x i16>, ptr %ptr 214 %Y = sext <8 x i16> %X to <8 x i32> 215 ret <8 x i32>%Y 216} 217 218define <8 x i32> @load_sext_test5(ptr%ptr) { 219; X86-LABEL: load_sext_test5: 220; X86: # %bb.0: 221; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 222; X86-NEXT: vpmovsxbd (%eax), %ymm0 223; X86-NEXT: retl 224; 225; X64-LABEL: load_sext_test5: 226; X64: # %bb.0: 227; X64-NEXT: vpmovsxbd (%rdi), %ymm0 228; X64-NEXT: retq 229 %X = load <8 x i8>, ptr %ptr 230 %Y = sext <8 x i8> %X to <8 x i32> 231 ret <8 x i32>%Y 232} 233