1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skylake | FileCheck %s --check-prefixes=CHECK,CHECK-AVX2,CHECK-SKL 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,CHECK-AVX2,CHECK-V3 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=icelake-server | FileCheck %s --check-prefixes=CHECK,CHECK-ICX,CHECK-ICX-NO-BYPASS-DELAY 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=icelake-server -mattr=-no-bypass-delay-shuffle | FileCheck %s --check-prefixes=CHECK,CHECK-ICX,CHECK-ICX-BYPASS-DELAY 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge -mattr=+no-bypass-delay-shuffle | FileCheck %s --check-prefixes=CHECK,CHECK-SNB,CHECK-SNB-NO-BYPASS-DELAY 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=sandybridge -mattr=-no-bypass-delay-shuffle | FileCheck %s --check-prefixes=CHECK,CHECK-SNB,CHECK-SNB-BYPASS-DELAY 8 9define <8 x float> @transform_VUNPCKLPDYrr(<8 x float> %a, <8 x float> %b) nounwind { 10; CHECK-AVX2-LABEL: transform_VUNPCKLPDYrr: 11; CHECK-AVX2: # %bb.0: 12; CHECK-AVX2-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 13; CHECK-AVX2-NEXT: retq 14; 15; CHECK-ICX-NO-BYPASS-DELAY-LABEL: transform_VUNPCKLPDYrr: 16; CHECK-ICX-NO-BYPASS-DELAY: # %bb.0: 17; CHECK-ICX-NO-BYPASS-DELAY-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 18; CHECK-ICX-NO-BYPASS-DELAY-NEXT: retq 19; 20; CHECK-ICX-BYPASS-DELAY-LABEL: transform_VUNPCKLPDYrr: 21; CHECK-ICX-BYPASS-DELAY: # %bb.0: 22; CHECK-ICX-BYPASS-DELAY-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 23; CHECK-ICX-BYPASS-DELAY-NEXT: retq 24; 25; CHECK-SNB-LABEL: transform_VUNPCKLPDYrr: 26; CHECK-SNB: # %bb.0: 27; CHECK-SNB-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2] 28; CHECK-SNB-NEXT: retq 29 %shufp = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 12, i32 13> 30 ret <8 x float> %shufp 31} 32 33define <8 x float> @transform_VUNPCKHPDYrr(<8 x float> %a, <8 x float> %b) nounwind { 34; CHECK-AVX2-LABEL: transform_VUNPCKHPDYrr: 35; CHECK-AVX2: # %bb.0: 36; CHECK-AVX2-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 37; CHECK-AVX2-NEXT: retq 38; 39; CHECK-ICX-NO-BYPASS-DELAY-LABEL: transform_VUNPCKHPDYrr: 40; CHECK-ICX-NO-BYPASS-DELAY: # %bb.0: 41; CHECK-ICX-NO-BYPASS-DELAY-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 42; CHECK-ICX-NO-BYPASS-DELAY-NEXT: retq 43; 44; CHECK-ICX-BYPASS-DELAY-LABEL: transform_VUNPCKHPDYrr: 45; CHECK-ICX-BYPASS-DELAY: # %bb.0: 46; CHECK-ICX-BYPASS-DELAY-NEXT: vshufpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 47; CHECK-ICX-BYPASS-DELAY-NEXT: retq 48; 49; CHECK-SNB-LABEL: transform_VUNPCKHPDYrr: 50; CHECK-SNB: # %bb.0: 51; CHECK-SNB-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3] 52; CHECK-SNB-NEXT: retq 53 %shufp = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 3, i32 10, i32 11, i32 6, i32 7, i32 14, i32 15> 54 ret <8 x float> %shufp 55} 56 57define <4 x float> @transform_VUNPCKLPDrr(<4 x float> %a, <4 x float> %b) nounwind { 58; CHECK-AVX2-LABEL: transform_VUNPCKLPDrr: 59; CHECK-AVX2: # %bb.0: 60; CHECK-AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 61; CHECK-AVX2-NEXT: retq 62; 63; CHECK-ICX-NO-BYPASS-DELAY-LABEL: transform_VUNPCKLPDrr: 64; CHECK-ICX-NO-BYPASS-DELAY: # %bb.0: 65; CHECK-ICX-NO-BYPASS-DELAY-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 66; CHECK-ICX-NO-BYPASS-DELAY-NEXT: retq 67; 68; CHECK-ICX-BYPASS-DELAY-LABEL: transform_VUNPCKLPDrr: 69; CHECK-ICX-BYPASS-DELAY: # %bb.0: 70; CHECK-ICX-BYPASS-DELAY-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 71; CHECK-ICX-BYPASS-DELAY-NEXT: retq 72; 73; CHECK-SNB-NO-BYPASS-DELAY-LABEL: transform_VUNPCKLPDrr: 74; CHECK-SNB-NO-BYPASS-DELAY: # %bb.0: 75; CHECK-SNB-NO-BYPASS-DELAY-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 76; CHECK-SNB-NO-BYPASS-DELAY-NEXT: retq 77; 78; CHECK-SNB-BYPASS-DELAY-LABEL: transform_VUNPCKLPDrr: 79; CHECK-SNB-BYPASS-DELAY: # %bb.0: 80; CHECK-SNB-BYPASS-DELAY-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 81; CHECK-SNB-BYPASS-DELAY-NEXT: retq 82 %shufp = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 83 ret <4 x float> %shufp 84} 85 86define <4 x float> @transform_VUNPCKHPDrr(<4 x float> %a, <4 x float> %b) nounwind { 87; CHECK-AVX2-LABEL: transform_VUNPCKHPDrr: 88; CHECK-AVX2: # %bb.0: 89; CHECK-AVX2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 90; CHECK-AVX2-NEXT: retq 91; 92; CHECK-ICX-NO-BYPASS-DELAY-LABEL: transform_VUNPCKHPDrr: 93; CHECK-ICX-NO-BYPASS-DELAY: # %bb.0: 94; CHECK-ICX-NO-BYPASS-DELAY-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] 95; CHECK-ICX-NO-BYPASS-DELAY-NEXT: retq 96; 97; CHECK-ICX-BYPASS-DELAY-LABEL: transform_VUNPCKHPDrr: 98; CHECK-ICX-BYPASS-DELAY: # %bb.0: 99; CHECK-ICX-BYPASS-DELAY-NEXT: vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 100; CHECK-ICX-BYPASS-DELAY-NEXT: retq 101; 102; CHECK-SNB-NO-BYPASS-DELAY-LABEL: transform_VUNPCKHPDrr: 103; CHECK-SNB-NO-BYPASS-DELAY: # %bb.0: 104; CHECK-SNB-NO-BYPASS-DELAY-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] 105; CHECK-SNB-NO-BYPASS-DELAY-NEXT: retq 106; 107; CHECK-SNB-BYPASS-DELAY-LABEL: transform_VUNPCKHPDrr: 108; CHECK-SNB-BYPASS-DELAY: # %bb.0: 109; CHECK-SNB-BYPASS-DELAY-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] 110; CHECK-SNB-BYPASS-DELAY-NEXT: retq 111 %shufp = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 112 ret <4 x float> %shufp 113} 114 115define <8 x float> @transform_VUNPCKLPDYrm(<8 x float> %a, ptr %pb) nounwind { 116; CHECK-AVX2-LABEL: transform_VUNPCKLPDYrm: 117; CHECK-AVX2: # %bb.0: 118; CHECK-AVX2-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] 119; CHECK-AVX2-NEXT: retq 120; 121; CHECK-ICX-NO-BYPASS-DELAY-LABEL: transform_VUNPCKLPDYrm: 122; CHECK-ICX-NO-BYPASS-DELAY: # %bb.0: 123; CHECK-ICX-NO-BYPASS-DELAY-NEXT: vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] 124; CHECK-ICX-NO-BYPASS-DELAY-NEXT: retq 125; 126; CHECK-ICX-BYPASS-DELAY-LABEL: transform_VUNPCKLPDYrm: 127; CHECK-ICX-BYPASS-DELAY: # %bb.0: 128; CHECK-ICX-BYPASS-DELAY-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] 129; CHECK-ICX-BYPASS-DELAY-NEXT: retq 130; 131; CHECK-SNB-LABEL: transform_VUNPCKLPDYrm: 132; CHECK-SNB: # %bb.0: 133; CHECK-SNB-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2] 134; CHECK-SNB-NEXT: retq 135 %b = load <8 x float>, ptr %pb 136 %shufp = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 4, i32 5, i32 12, i32 13> 137 ret <8 x float> %shufp 138} 139 140define <8 x float> @transform_VUNPCKHPDYrm(<8 x float> %a, ptr %pb) nounwind { 141; CHECK-AVX2-LABEL: transform_VUNPCKHPDYrm: 142; CHECK-AVX2: # %bb.0: 143; CHECK-AVX2-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] 144; CHECK-AVX2-NEXT: retq 145; 146; CHECK-ICX-NO-BYPASS-DELAY-LABEL: transform_VUNPCKHPDYrm: 147; CHECK-ICX-NO-BYPASS-DELAY: # %bb.0: 148; CHECK-ICX-NO-BYPASS-DELAY-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] 149; CHECK-ICX-NO-BYPASS-DELAY-NEXT: retq 150; 151; CHECK-ICX-BYPASS-DELAY-LABEL: transform_VUNPCKHPDYrm: 152; CHECK-ICX-BYPASS-DELAY: # %bb.0: 153; CHECK-ICX-BYPASS-DELAY-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] 154; CHECK-ICX-BYPASS-DELAY-NEXT: retq 155; 156; CHECK-SNB-LABEL: transform_VUNPCKHPDYrm: 157; CHECK-SNB: # %bb.0: 158; CHECK-SNB-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3] 159; CHECK-SNB-NEXT: retq 160 %b = load <8 x float>, ptr %pb 161 %shufp = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 3, i32 10, i32 11, i32 6, i32 7, i32 14, i32 15> 162 ret <8 x float> %shufp 163} 164 165define <4 x float> @transform_VUNPCKLPDrm(<4 x float> %a, ptr %pb) nounwind { 166; CHECK-AVX2-LABEL: transform_VUNPCKLPDrm: 167; CHECK-AVX2: # %bb.0: 168; CHECK-AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] 169; CHECK-AVX2-NEXT: retq 170; 171; CHECK-ICX-NO-BYPASS-DELAY-LABEL: transform_VUNPCKLPDrm: 172; CHECK-ICX-NO-BYPASS-DELAY: # %bb.0: 173; CHECK-ICX-NO-BYPASS-DELAY-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],mem[0] 174; CHECK-ICX-NO-BYPASS-DELAY-NEXT: retq 175; 176; CHECK-ICX-BYPASS-DELAY-LABEL: transform_VUNPCKLPDrm: 177; CHECK-ICX-BYPASS-DELAY: # %bb.0: 178; CHECK-ICX-BYPASS-DELAY-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] 179; CHECK-ICX-BYPASS-DELAY-NEXT: retq 180; 181; CHECK-SNB-NO-BYPASS-DELAY-LABEL: transform_VUNPCKLPDrm: 182; CHECK-SNB-NO-BYPASS-DELAY: # %bb.0: 183; CHECK-SNB-NO-BYPASS-DELAY-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],mem[0] 184; CHECK-SNB-NO-BYPASS-DELAY-NEXT: retq 185; 186; CHECK-SNB-BYPASS-DELAY-LABEL: transform_VUNPCKLPDrm: 187; CHECK-SNB-BYPASS-DELAY: # %bb.0: 188; CHECK-SNB-BYPASS-DELAY-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] 189; CHECK-SNB-BYPASS-DELAY-NEXT: retq 190 %b = load <4 x float>, ptr %pb 191 %shufp = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 192 ret <4 x float> %shufp 193} 194 195define <4 x float> @transform_VUNPCKHPDrm(<4 x float> %a, ptr %pb) nounwind { 196; CHECK-AVX2-LABEL: transform_VUNPCKHPDrm: 197; CHECK-AVX2: # %bb.0: 198; CHECK-AVX2-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],mem[1] 199; CHECK-AVX2-NEXT: retq 200; 201; CHECK-ICX-NO-BYPASS-DELAY-LABEL: transform_VUNPCKHPDrm: 202; CHECK-ICX-NO-BYPASS-DELAY: # %bb.0: 203; CHECK-ICX-NO-BYPASS-DELAY-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],mem[1] 204; CHECK-ICX-NO-BYPASS-DELAY-NEXT: retq 205; 206; CHECK-ICX-BYPASS-DELAY-LABEL: transform_VUNPCKHPDrm: 207; CHECK-ICX-BYPASS-DELAY: # %bb.0: 208; CHECK-ICX-BYPASS-DELAY-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],mem[1] 209; CHECK-ICX-BYPASS-DELAY-NEXT: retq 210; 211; CHECK-SNB-NO-BYPASS-DELAY-LABEL: transform_VUNPCKHPDrm: 212; CHECK-SNB-NO-BYPASS-DELAY: # %bb.0: 213; CHECK-SNB-NO-BYPASS-DELAY-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],mem[1] 214; CHECK-SNB-NO-BYPASS-DELAY-NEXT: retq 215; 216; CHECK-SNB-BYPASS-DELAY-LABEL: transform_VUNPCKHPDrm: 217; CHECK-SNB-BYPASS-DELAY: # %bb.0: 218; CHECK-SNB-BYPASS-DELAY-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],mem[1] 219; CHECK-SNB-BYPASS-DELAY-NEXT: retq 220 %b = load <4 x float>, ptr %pb 221 %shufp = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 222 ret <4 x float> %shufp 223} 224;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 225; CHECK: {{.*}} 226; CHECK-ICX: {{.*}} 227; CHECK-SKL: {{.*}} 228; CHECK-V3: {{.*}} 229