1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=SSE,SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse3 | FileCheck %s --check-prefixes=SSE,SSE3 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSSE3 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2 9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2 10 11define <2 x double> @insert_v2f64_z1(<2 x double> %a) { 12; SSE2-LABEL: insert_v2f64_z1: 13; SSE2: # %bb.0: 14; SSE2-NEXT: xorpd %xmm1, %xmm1 15; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 16; SSE2-NEXT: retq 17; 18; SSE3-LABEL: insert_v2f64_z1: 19; SSE3: # %bb.0: 20; SSE3-NEXT: xorpd %xmm1, %xmm1 21; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 22; SSE3-NEXT: retq 23; 24; SSSE3-LABEL: insert_v2f64_z1: 25; SSSE3: # %bb.0: 26; SSSE3-NEXT: xorpd %xmm1, %xmm1 27; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 28; SSSE3-NEXT: retq 29; 30; SSE41-LABEL: insert_v2f64_z1: 31; SSE41: # %bb.0: 32; SSE41-NEXT: xorps %xmm1, %xmm1 33; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 34; SSE41-NEXT: retq 35; 36; AVX-LABEL: insert_v2f64_z1: 37; AVX: # %bb.0: 38; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 39; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 40; AVX-NEXT: retq 41 %1 = insertelement <2 x double> %a, double 0.0, i32 0 42 ret <2 x double> %1 43} 44 45define <4 x double> @insert_v4f64_0zz3(<4 x double> %a) { 46; SSE2-LABEL: insert_v4f64_0zz3: 47; SSE2: # %bb.0: 48; SSE2-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 49; SSE2-NEXT: xorpd %xmm2, %xmm2 50; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 51; SSE2-NEXT: retq 52; 53; SSE3-LABEL: insert_v4f64_0zz3: 54; SSE3: # %bb.0: 55; SSE3-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 56; SSE3-NEXT: xorpd %xmm2, %xmm2 57; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 58; SSE3-NEXT: retq 59; 60; SSSE3-LABEL: insert_v4f64_0zz3: 61; SSSE3: # %bb.0: 62; SSSE3-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 63; SSSE3-NEXT: xorpd %xmm2, %xmm2 64; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 65; SSSE3-NEXT: retq 66; 67; SSE41-LABEL: insert_v4f64_0zz3: 68; SSE41: # %bb.0: 69; SSE41-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero 70; SSE41-NEXT: xorps %xmm2, %xmm2 71; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3] 72; SSE41-NEXT: retq 73; 74; AVX-LABEL: insert_v4f64_0zz3: 75; AVX: # %bb.0: 76; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 77; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7] 78; AVX-NEXT: retq 79 %1 = insertelement <4 x double> %a, double 0.0, i32 1 80 %2 = insertelement <4 x double> %1, double 0.0, i32 2 81 ret <4 x double> %2 82} 83 84define <2 x i64> @insert_v2i64_z1(<2 x i64> %a) { 85; SSE2-LABEL: insert_v2i64_z1: 86; SSE2: # %bb.0: 87; SSE2-NEXT: xorpd %xmm1, %xmm1 88; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 89; SSE2-NEXT: retq 90; 91; SSE3-LABEL: insert_v2i64_z1: 92; SSE3: # %bb.0: 93; SSE3-NEXT: xorpd %xmm1, %xmm1 94; SSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 95; SSE3-NEXT: retq 96; 97; SSSE3-LABEL: insert_v2i64_z1: 98; SSSE3: # %bb.0: 99; SSSE3-NEXT: xorpd %xmm1, %xmm1 100; SSSE3-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 101; SSSE3-NEXT: retq 102; 103; SSE41-LABEL: insert_v2i64_z1: 104; SSE41: # %bb.0: 105; SSE41-NEXT: xorps %xmm1, %xmm1 106; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 107; SSE41-NEXT: retq 108; 109; AVX-LABEL: insert_v2i64_z1: 110; AVX: # %bb.0: 111; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 112; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 113; AVX-NEXT: retq 114 %1 = insertelement <2 x i64> %a, i64 0, i32 0 115 ret <2 x i64> %1 116} 117 118define <4 x i64> @insert_v4i64_01z3(<4 x i64> %a) { 119; SSE2-LABEL: insert_v4i64_01z3: 120; SSE2: # %bb.0: 121; SSE2-NEXT: xorpd %xmm2, %xmm2 122; SSE2-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 123; SSE2-NEXT: retq 124; 125; SSE3-LABEL: insert_v4i64_01z3: 126; SSE3: # %bb.0: 127; SSE3-NEXT: xorpd %xmm2, %xmm2 128; SSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 129; SSE3-NEXT: retq 130; 131; SSSE3-LABEL: insert_v4i64_01z3: 132; SSSE3: # %bb.0: 133; SSSE3-NEXT: xorpd %xmm2, %xmm2 134; SSSE3-NEXT: movsd {{.*#+}} xmm1 = xmm2[0],xmm1[1] 135; SSSE3-NEXT: retq 136; 137; SSE41-LABEL: insert_v4i64_01z3: 138; SSE41: # %bb.0: 139; SSE41-NEXT: xorps %xmm2, %xmm2 140; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm2[0,1],xmm1[2,3] 141; SSE41-NEXT: retq 142; 143; AVX-LABEL: insert_v4i64_01z3: 144; AVX: # %bb.0: 145; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 146; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7] 147; AVX-NEXT: retq 148 %1 = insertelement <4 x i64> %a, i64 0, i32 2 149 ret <4 x i64> %1 150} 151 152define <4 x float> @insert_v4f32_01z3(<4 x float> %a) { 153; SSE2-LABEL: insert_v4f32_01z3: 154; SSE2: # %bb.0: 155; SSE2-NEXT: xorps %xmm1, %xmm1 156; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0] 157; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] 158; SSE2-NEXT: retq 159; 160; SSE3-LABEL: insert_v4f32_01z3: 161; SSE3: # %bb.0: 162; SSE3-NEXT: xorps %xmm1, %xmm1 163; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0] 164; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] 165; SSE3-NEXT: retq 166; 167; SSSE3-LABEL: insert_v4f32_01z3: 168; SSSE3: # %bb.0: 169; SSSE3-NEXT: xorps %xmm1, %xmm1 170; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0] 171; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] 172; SSSE3-NEXT: retq 173; 174; SSE41-LABEL: insert_v4f32_01z3: 175; SSE41: # %bb.0: 176; SSE41-NEXT: xorps %xmm1, %xmm1 177; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] 178; SSE41-NEXT: retq 179; 180; AVX-LABEL: insert_v4f32_01z3: 181; AVX: # %bb.0: 182; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 183; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] 184; AVX-NEXT: retq 185 %1 = insertelement <4 x float> %a, float 0.0, i32 2 186 ret <4 x float> %1 187} 188 189define <8 x float> @insert_v8f32_z12345z7(<8 x float> %a) { 190; SSE2-LABEL: insert_v8f32_z12345z7: 191; SSE2: # %bb.0: 192; SSE2-NEXT: xorps %xmm2, %xmm2 193; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] 194; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0] 195; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2] 196; SSE2-NEXT: retq 197; 198; SSE3-LABEL: insert_v8f32_z12345z7: 199; SSE3: # %bb.0: 200; SSE3-NEXT: xorps %xmm2, %xmm2 201; SSE3-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] 202; SSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0] 203; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2] 204; SSE3-NEXT: retq 205; 206; SSSE3-LABEL: insert_v8f32_z12345z7: 207; SSSE3: # %bb.0: 208; SSSE3-NEXT: xorps %xmm2, %xmm2 209; SSSE3-NEXT: movss {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] 210; SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0],xmm1[3,0] 211; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm2[0,2] 212; SSSE3-NEXT: retq 213; 214; SSE41-LABEL: insert_v8f32_z12345z7: 215; SSE41: # %bb.0: 216; SSE41-NEXT: xorps %xmm2, %xmm2 217; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] 218; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0,1],xmm2[2],xmm1[3] 219; SSE41-NEXT: retq 220; 221; AVX-LABEL: insert_v8f32_z12345z7: 222; AVX: # %bb.0: 223; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 224; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7] 225; AVX-NEXT: retq 226 %1 = insertelement <8 x float> %a, float 0.0, i32 0 227 %2 = insertelement <8 x float> %1, float 0.0, i32 6 228 ret <8 x float> %2 229} 230 231define <4 x i32> @insert_v4i32_01z3(<4 x i32> %a) { 232; SSE2-LABEL: insert_v4i32_01z3: 233; SSE2: # %bb.0: 234; SSE2-NEXT: xorps %xmm1, %xmm1 235; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0] 236; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] 237; SSE2-NEXT: retq 238; 239; SSE3-LABEL: insert_v4i32_01z3: 240; SSE3: # %bb.0: 241; SSE3-NEXT: xorps %xmm1, %xmm1 242; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0] 243; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] 244; SSE3-NEXT: retq 245; 246; SSSE3-LABEL: insert_v4i32_01z3: 247; SSSE3: # %bb.0: 248; SSSE3-NEXT: xorps %xmm1, %xmm1 249; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[3,0] 250; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,2] 251; SSSE3-NEXT: retq 252; 253; SSE41-LABEL: insert_v4i32_01z3: 254; SSE41: # %bb.0: 255; SSE41-NEXT: xorps %xmm1, %xmm1 256; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] 257; SSE41-NEXT: retq 258; 259; AVX-LABEL: insert_v4i32_01z3: 260; AVX: # %bb.0: 261; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 262; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2],xmm0[3] 263; AVX-NEXT: retq 264 %1 = insertelement <4 x i32> %a, i32 0, i32 2 265 ret <4 x i32> %1 266} 267 268define <8 x i32> @insert_v8i32_z12345z7(<8 x i32> %a) { 269; SSE2-LABEL: insert_v8i32_z12345z7: 270; SSE2: # %bb.0: 271; SSE2-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 272; SSE2-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 273; SSE2-NEXT: retq 274; 275; SSE3-LABEL: insert_v8i32_z12345z7: 276; SSE3: # %bb.0: 277; SSE3-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 278; SSE3-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 279; SSE3-NEXT: retq 280; 281; SSSE3-LABEL: insert_v8i32_z12345z7: 282; SSSE3: # %bb.0: 283; SSSE3-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 284; SSSE3-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 285; SSSE3-NEXT: retq 286; 287; SSE41-LABEL: insert_v8i32_z12345z7: 288; SSE41: # %bb.0: 289; SSE41-NEXT: xorps %xmm2, %xmm2 290; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3] 291; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0,1],xmm2[2],xmm1[3] 292; SSE41-NEXT: retq 293; 294; AVX-LABEL: insert_v8i32_z12345z7: 295; AVX: # %bb.0: 296; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 297; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5],ymm1[6],ymm0[7] 298; AVX-NEXT: retq 299 %1 = insertelement <8 x i32> %a, i32 0, i32 0 300 %2 = insertelement <8 x i32> %1, i32 0, i32 6 301 ret <8 x i32> %2 302} 303 304define <8 x i16> @insert_v8i16_z12345z7(<8 x i16> %a) { 305; SSE2-LABEL: insert_v8i16_z12345z7: 306; SSE2: # %bb.0: 307; SSE2-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 308; SSE2-NEXT: retq 309; 310; SSE3-LABEL: insert_v8i16_z12345z7: 311; SSE3: # %bb.0: 312; SSE3-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 313; SSE3-NEXT: retq 314; 315; SSSE3-LABEL: insert_v8i16_z12345z7: 316; SSSE3: # %bb.0: 317; SSSE3-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 318; SSSE3-NEXT: retq 319; 320; SSE41-LABEL: insert_v8i16_z12345z7: 321; SSE41: # %bb.0: 322; SSE41-NEXT: pxor %xmm1, %xmm1 323; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7] 324; SSE41-NEXT: retq 325; 326; AVX-LABEL: insert_v8i16_z12345z7: 327; AVX: # %bb.0: 328; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 329; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3,4,5],xmm1[6],xmm0[7] 330; AVX-NEXT: retq 331 %1 = insertelement <8 x i16> %a, i16 0, i32 0 332 %2 = insertelement <8 x i16> %1, i16 0, i32 6 333 ret <8 x i16> %2 334} 335 336define <16 x i16> @insert_v16i16_z12345z789ABCDEz(<16 x i16> %a) { 337; SSE2-LABEL: insert_v16i16_z12345z789ABCDEz: 338; SSE2: # %bb.0: 339; SSE2-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 340; SSE2-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 341; SSE2-NEXT: retq 342; 343; SSE3-LABEL: insert_v16i16_z12345z789ABCDEz: 344; SSE3: # %bb.0: 345; SSE3-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 346; SSE3-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 347; SSE3-NEXT: retq 348; 349; SSSE3-LABEL: insert_v16i16_z12345z789ABCDEz: 350; SSSE3: # %bb.0: 351; SSSE3-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 352; SSSE3-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 353; SSSE3-NEXT: retq 354; 355; SSE41-LABEL: insert_v16i16_z12345z789ABCDEz: 356; SSE41: # %bb.0: 357; SSE41-NEXT: pxor %xmm2, %xmm2 358; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm2[0],xmm0[1,2,3,4,5],xmm2[6],xmm0[7] 359; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6],xmm2[7] 360; SSE41-NEXT: retq 361; 362; AVX-LABEL: insert_v16i16_z12345z789ABCDEz: 363; AVX: # %bb.0: 364; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 365; AVX-NEXT: retq 366 %1 = insertelement <16 x i16> %a, i16 0, i32 0 367 %2 = insertelement <16 x i16> %1, i16 0, i32 6 368 %3 = insertelement <16 x i16> %2, i16 0, i32 15 369 ret <16 x i16> %3 370} 371 372define <16 x i8> @insert_v16i8_z123456789ABCDEz(<16 x i8> %a) { 373; SSE-LABEL: insert_v16i8_z123456789ABCDEz: 374; SSE: # %bb.0: 375; SSE-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 376; SSE-NEXT: retq 377; 378; AVX-LABEL: insert_v16i8_z123456789ABCDEz: 379; AVX: # %bb.0: 380; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 381; AVX-NEXT: retq 382 %1 = insertelement <16 x i8> %a, i8 0, i32 0 383 %2 = insertelement <16 x i8> %1, i8 0, i32 15 384 ret <16 x i8> %2 385} 386 387define <32 x i8> @insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz(<32 x i8> %a) { 388; SSE2-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz: 389; SSE2: # %bb.0: 390; SSE2-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 391; SSE2-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 392; SSE2-NEXT: retq 393; 394; SSE3-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz: 395; SSE3: # %bb.0: 396; SSE3-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 397; SSE3-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 398; SSE3-NEXT: retq 399; 400; SSSE3-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz: 401; SSSE3: # %bb.0: 402; SSSE3-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 403; SSSE3-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 404; SSSE3-NEXT: retq 405; 406; SSE41-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz: 407; SSE41: # %bb.0: 408; SSE41-NEXT: pxor %xmm2, %xmm2 409; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1,2,3,4,5,6],xmm2[7] 410; SSE41-NEXT: andps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 411; SSE41-NEXT: retq 412; 413; AVX-LABEL: insert_v32i8_z123456789ABCDEzGHIJKLMNOPQRSTzz: 414; AVX: # %bb.0: 415; AVX-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 416; AVX-NEXT: retq 417 %1 = insertelement <32 x i8> %a, i8 0, i32 0 418 %2 = insertelement <32 x i8> %1, i8 0, i32 15 419 %3 = insertelement <32 x i8> %2, i8 0, i32 30 420 %4 = insertelement <32 x i8> %3, i8 0, i32 31 421 ret <32 x i8> %4 422} 423 424define <4 x i32> @PR41512(i32 %x, i32 %y) { 425; SSE-LABEL: PR41512: 426; SSE: # %bb.0: 427; SSE-NEXT: movd %edi, %xmm0 428; SSE-NEXT: movd %esi, %xmm1 429; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 430; SSE-NEXT: retq 431; 432; AVX-LABEL: PR41512: 433; AVX: # %bb.0: 434; AVX-NEXT: vmovd %edi, %xmm0 435; AVX-NEXT: vmovd %esi, %xmm1 436; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 437; AVX-NEXT: retq 438 %ins1 = insertelement <4 x i32> <i32 undef, i32 0, i32 undef, i32 undef>, i32 %x, i32 0 439 %ins2 = insertelement <4 x i32> <i32 undef, i32 0, i32 undef, i32 undef>, i32 %y, i32 0 440 %r = shufflevector <4 x i32> %ins1, <4 x i32> %ins2, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 441 ret <4 x i32> %r 442} 443 444define <4 x i64> @PR41512_v4i64(i64 %x, i64 %y) { 445; SSE-LABEL: PR41512_v4i64: 446; SSE: # %bb.0: 447; SSE-NEXT: movq %rdi, %xmm0 448; SSE-NEXT: movq %rsi, %xmm1 449; SSE-NEXT: retq 450; 451; AVX1-LABEL: PR41512_v4i64: 452; AVX1: # %bb.0: 453; AVX1-NEXT: vmovq %rdi, %xmm0 454; AVX1-NEXT: vmovq %rsi, %xmm1 455; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 456; AVX1-NEXT: retq 457; 458; AVX2-LABEL: PR41512_v4i64: 459; AVX2: # %bb.0: 460; AVX2-NEXT: vmovq %rdi, %xmm0 461; AVX2-NEXT: vmovq %rsi, %xmm1 462; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 463; AVX2-NEXT: retq 464 %ins1 = insertelement <4 x i64> <i64 undef, i64 0, i64 undef, i64 undef>, i64 %x, i32 0 465 %ins2 = insertelement <4 x i64> <i64 undef, i64 0, i64 undef, i64 undef>, i64 %y, i32 0 466 %r = shufflevector <4 x i64> %ins1, <4 x i64> %ins2, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 467 ret <4 x i64> %r 468} 469 470define <8 x float> @PR41512_v8f32(float %x, float %y) { 471; SSE2-LABEL: PR41512_v8f32: 472; SSE2: # %bb.0: 473; SSE2-NEXT: xorps %xmm2, %xmm2 474; SSE2-NEXT: xorps %xmm3, %xmm3 475; SSE2-NEXT: movss {{.*#+}} xmm3 = xmm0[0],xmm3[1,2,3] 476; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3] 477; SSE2-NEXT: movaps %xmm3, %xmm0 478; SSE2-NEXT: movaps %xmm2, %xmm1 479; SSE2-NEXT: retq 480; 481; SSE3-LABEL: PR41512_v8f32: 482; SSE3: # %bb.0: 483; SSE3-NEXT: xorps %xmm2, %xmm2 484; SSE3-NEXT: xorps %xmm3, %xmm3 485; SSE3-NEXT: movss {{.*#+}} xmm3 = xmm0[0],xmm3[1,2,3] 486; SSE3-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3] 487; SSE3-NEXT: movaps %xmm3, %xmm0 488; SSE3-NEXT: movaps %xmm2, %xmm1 489; SSE3-NEXT: retq 490; 491; SSSE3-LABEL: PR41512_v8f32: 492; SSSE3: # %bb.0: 493; SSSE3-NEXT: xorps %xmm2, %xmm2 494; SSSE3-NEXT: xorps %xmm3, %xmm3 495; SSSE3-NEXT: movss {{.*#+}} xmm3 = xmm0[0],xmm3[1,2,3] 496; SSSE3-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3] 497; SSSE3-NEXT: movaps %xmm3, %xmm0 498; SSSE3-NEXT: movaps %xmm2, %xmm1 499; SSSE3-NEXT: retq 500; 501; SSE41-LABEL: PR41512_v8f32: 502; SSE41: # %bb.0: 503; SSE41-NEXT: xorps %xmm2, %xmm2 504; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3] 505; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3] 506; SSE41-NEXT: retq 507; 508; AVX-LABEL: PR41512_v8f32: 509; AVX: # %bb.0: 510; AVX-NEXT: vxorps %xmm2, %xmm2, %xmm2 511; AVX-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3] 512; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm2[1,2,3] 513; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 514; AVX-NEXT: retq 515 %ins1 = insertelement <8 x float> zeroinitializer, float %x, i32 0 516 %ins2 = insertelement <8 x float> zeroinitializer, float %y, i32 0 517 %r = shufflevector <8 x float> %ins1, <8 x float> %ins2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> 518 ret <8 x float> %r 519} 520 521define <4 x i32> @PR41512_loads(ptr %p1, ptr %p2) { 522; SSE-LABEL: PR41512_loads: 523; SSE: # %bb.0: 524; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 525; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 526; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 527; SSE-NEXT: retq 528; 529; AVX-LABEL: PR41512_loads: 530; AVX: # %bb.0: 531; AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 532; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 533; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 534; AVX-NEXT: retq 535 %x = load i32, ptr %p1 536 %y = load i32, ptr %p2 537 %ins1 = insertelement <4 x i32> <i32 undef, i32 0, i32 undef, i32 undef>, i32 %x, i32 0 538 %ins2 = insertelement <4 x i32> <i32 undef, i32 0, i32 undef, i32 undef>, i32 %y, i32 0 539 %r = shufflevector <4 x i32> %ins1, <4 x i32> %ins2, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 540 ret <4 x i32> %r 541} 542