1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86-SSE,X86-SSE2 3; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X64-SSE,X64-SSE2 4; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=X86-SSE,X86-SSE4 5; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=X64-SSE,X64-SSE4 6; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X86-AVX,X86-AVX1 7; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=X64-AVX,X64-AVX1 8; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X86-AVX,X86-AVX2 9; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=X64-AVX,X64-AVX2 10; RUN: llc < %s -disable-peephole -mtriple=i686-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=X86-AVX,X86-AVX512F 11; RUN: llc < %s -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=X64-AVX,X64-AVX512F 12 13define <16 x i8> @elt0_v16i8(i8 %x) { 14; X86-SSE2-LABEL: elt0_v16i8: 15; X86-SSE2: # %bb.0: 16; X86-SSE2-NEXT: movzbl {{[0-9]+}}(%esp), %eax 17; X86-SSE2-NEXT: movd %eax, %xmm0 18; X86-SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0 19; X86-SSE2-NEXT: retl 20; 21; X64-SSE2-LABEL: elt0_v16i8: 22; X64-SSE2: # %bb.0: 23; X64-SSE2-NEXT: movzbl %dil, %eax 24; X64-SSE2-NEXT: movd %eax, %xmm0 25; X64-SSE2-NEXT: por {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 26; X64-SSE2-NEXT: retq 27; 28; X86-SSE4-LABEL: elt0_v16i8: 29; X86-SSE4: # %bb.0: 30; X86-SSE4-NEXT: movdqa {{.*#+}} xmm0 = [u,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 31; X86-SSE4-NEXT: pinsrb $0, {{[0-9]+}}(%esp), %xmm0 32; X86-SSE4-NEXT: retl 33; 34; X64-SSE4-LABEL: elt0_v16i8: 35; X64-SSE4: # %bb.0: 36; X64-SSE4-NEXT: movdqa {{.*#+}} xmm0 = [u,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 37; X64-SSE4-NEXT: pinsrb $0, %edi, %xmm0 38; X64-SSE4-NEXT: retq 39; 40; X86-AVX-LABEL: elt0_v16i8: 41; X86-AVX: # %bb.0: 42; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [u,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 43; X86-AVX-NEXT: vpinsrb $0, {{[0-9]+}}(%esp), %xmm0, %xmm0 44; X86-AVX-NEXT: retl 45; 46; X64-AVX-LABEL: elt0_v16i8: 47; X64-AVX: # %bb.0: 48; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [u,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15] 49; X64-AVX-NEXT: vpinsrb $0, %edi, %xmm0, %xmm0 50; X64-AVX-NEXT: retq 51 %ins = insertelement <16 x i8> <i8 42, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, i8 %x, i32 0 52 ret <16 x i8> %ins 53} 54 55define <8 x i16> @elt5_v8i16(i16 %x) { 56; X86-SSE2-LABEL: elt5_v8i16: 57; X86-SSE2: # %bb.0: 58; X86-SSE2-NEXT: movdqa {{.*#+}} xmm0 = [42,1,2,3,4,u,6,7] 59; X86-SSE2-NEXT: pinsrw $5, {{[0-9]+}}(%esp), %xmm0 60; X86-SSE2-NEXT: retl 61; 62; X64-SSE2-LABEL: elt5_v8i16: 63; X64-SSE2: # %bb.0: 64; X64-SSE2-NEXT: movdqa {{.*#+}} xmm0 = [42,1,2,3,4,u,6,7] 65; X64-SSE2-NEXT: pinsrw $5, %edi, %xmm0 66; X64-SSE2-NEXT: retq 67; 68; X86-SSE4-LABEL: elt5_v8i16: 69; X86-SSE4: # %bb.0: 70; X86-SSE4-NEXT: pmovsxbw {{.*#+}} xmm0 = [42,1,2,3,4,0,6,7] 71; X86-SSE4-NEXT: pinsrw $5, {{[0-9]+}}(%esp), %xmm0 72; X86-SSE4-NEXT: retl 73; 74; X64-SSE4-LABEL: elt5_v8i16: 75; X64-SSE4: # %bb.0: 76; X64-SSE4-NEXT: pmovsxbw {{.*#+}} xmm0 = [42,1,2,3,4,0,6,7] 77; X64-SSE4-NEXT: pinsrw $5, %edi, %xmm0 78; X64-SSE4-NEXT: retq 79; 80; X86-AVX-LABEL: elt5_v8i16: 81; X86-AVX: # %bb.0: 82; X86-AVX-NEXT: vpmovsxbw {{.*#+}} xmm0 = [42,1,2,3,4,0,6,7] 83; X86-AVX-NEXT: vpinsrw $5, {{[0-9]+}}(%esp), %xmm0, %xmm0 84; X86-AVX-NEXT: retl 85; 86; X64-AVX-LABEL: elt5_v8i16: 87; X64-AVX: # %bb.0: 88; X64-AVX-NEXT: vpmovsxbw {{.*#+}} xmm0 = [42,1,2,3,4,0,6,7] 89; X64-AVX-NEXT: vpinsrw $5, %edi, %xmm0, %xmm0 90; X64-AVX-NEXT: retq 91 %ins = insertelement <8 x i16> <i16 42, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, i16 %x, i32 5 92 ret <8 x i16> %ins 93} 94 95define <4 x i32> @elt3_v4i32(i32 %x) { 96; X86-SSE2-LABEL: elt3_v4i32: 97; X86-SSE2: # %bb.0: 98; X86-SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 99; X86-SSE2-NEXT: movaps {{.*#+}} xmm0 = [42,1,2,u] 100; X86-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3] 101; X86-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0] 102; X86-SSE2-NEXT: retl 103; 104; X64-SSE2-LABEL: elt3_v4i32: 105; X64-SSE2: # %bb.0: 106; X64-SSE2-NEXT: movd %edi, %xmm1 107; X64-SSE2-NEXT: movaps {{.*#+}} xmm0 = [42,1,2,u] 108; X64-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,3] 109; X64-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,0] 110; X64-SSE2-NEXT: retq 111; 112; X86-SSE4-LABEL: elt3_v4i32: 113; X86-SSE4: # %bb.0: 114; X86-SSE4-NEXT: pmovsxbd {{.*#+}} xmm0 = [42,1,2,0] 115; X86-SSE4-NEXT: pinsrd $3, {{[0-9]+}}(%esp), %xmm0 116; X86-SSE4-NEXT: retl 117; 118; X64-SSE4-LABEL: elt3_v4i32: 119; X64-SSE4: # %bb.0: 120; X64-SSE4-NEXT: pmovsxbd {{.*#+}} xmm0 = [42,1,2,0] 121; X64-SSE4-NEXT: pinsrd $3, %edi, %xmm0 122; X64-SSE4-NEXT: retq 123; 124; X86-AVX-LABEL: elt3_v4i32: 125; X86-AVX: # %bb.0: 126; X86-AVX-NEXT: vpmovsxbd {{.*#+}} xmm0 = [42,1,2,0] 127; X86-AVX-NEXT: vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0 128; X86-AVX-NEXT: retl 129; 130; X64-AVX-LABEL: elt3_v4i32: 131; X64-AVX: # %bb.0: 132; X64-AVX-NEXT: vpmovsxbd {{.*#+}} xmm0 = [42,1,2,0] 133; X64-AVX-NEXT: vpinsrd $3, %edi, %xmm0, %xmm0 134; X64-AVX-NEXT: retq 135 %ins = insertelement <4 x i32> <i32 42, i32 1, i32 2, i32 3>, i32 %x, i32 3 136 ret <4 x i32> %ins 137} 138 139define <2 x i64> @elt0_v2i64(i64 %x) { 140; X86-SSE-LABEL: elt0_v2i64: 141; X86-SSE: # %bb.0: 142; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 143; X86-SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] 144; X86-SSE-NEXT: retl 145; 146; X64-SSE2-LABEL: elt0_v2i64: 147; X64-SSE2: # %bb.0: 148; X64-SSE2-NEXT: movq %rdi, %xmm1 149; X64-SSE2-NEXT: movapd {{.*#+}} xmm0 = [u,1] 150; X64-SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 151; X64-SSE2-NEXT: retq 152; 153; X64-SSE4-LABEL: elt0_v2i64: 154; X64-SSE4: # %bb.0: 155; X64-SSE4-NEXT: pmovsxbq {{.*#+}} xmm0 = [1,1] 156; X64-SSE4-NEXT: pinsrq $0, %rdi, %xmm0 157; X64-SSE4-NEXT: retq 158; 159; X86-AVX-LABEL: elt0_v2i64: 160; X86-AVX: # %bb.0: 161; X86-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 162; X86-AVX-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0] 163; X86-AVX-NEXT: retl 164; 165; X64-AVX-LABEL: elt0_v2i64: 166; X64-AVX: # %bb.0: 167; X64-AVX-NEXT: vpmovsxbq {{.*#+}} xmm0 = [1,1] 168; X64-AVX-NEXT: vpinsrq $0, %rdi, %xmm0, %xmm0 169; X64-AVX-NEXT: retq 170 %ins = insertelement <2 x i64> <i64 42, i64 1>, i64 %x, i32 0 171 ret <2 x i64> %ins 172} 173 174define <4 x float> @elt1_v4f32(float %x) { 175; X86-SSE2-LABEL: elt1_v4f32: 176; X86-SSE2: # %bb.0: 177; X86-SSE2-NEXT: movaps {{.*#+}} xmm1 = [4.2E+1,u,2.0E+0,3.0E+0] 178; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 179; X86-SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 180; X86-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] 181; X86-SSE2-NEXT: retl 182; 183; X64-SSE2-LABEL: elt1_v4f32: 184; X64-SSE2: # %bb.0: 185; X64-SSE2-NEXT: movaps {{.*#+}} xmm1 = [4.2E+1,u,2.0E+0,3.0E+0] 186; X64-SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 187; X64-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[2,3] 188; X64-SSE2-NEXT: retq 189; 190; X86-SSE4-LABEL: elt1_v4f32: 191; X86-SSE4: # %bb.0: 192; X86-SSE4-NEXT: movaps {{.*#+}} xmm0 = [4.2E+1,u,2.0E+0,3.0E+0] 193; X86-SSE4-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] 194; X86-SSE4-NEXT: retl 195; 196; X64-SSE4-LABEL: elt1_v4f32: 197; X64-SSE4: # %bb.0: 198; X64-SSE4-NEXT: movaps {{.*#+}} xmm1 = [4.2E+1,u,2.0E+0,3.0E+0] 199; X64-SSE4-NEXT: insertps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[2,3] 200; X64-SSE4-NEXT: movaps %xmm1, %xmm0 201; X64-SSE4-NEXT: retq 202; 203; X86-AVX-LABEL: elt1_v4f32: 204; X86-AVX: # %bb.0: 205; X86-AVX-NEXT: vmovaps {{.*#+}} xmm0 = [4.2E+1,u,2.0E+0,3.0E+0] 206; X86-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] 207; X86-AVX-NEXT: retl 208; 209; X64-AVX-LABEL: elt1_v4f32: 210; X64-AVX: # %bb.0: 211; X64-AVX-NEXT: vmovaps {{.*#+}} xmm1 = [4.2E+1,u,2.0E+0,3.0E+0] 212; X64-AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] 213; X64-AVX-NEXT: retq 214 %ins = insertelement <4 x float> <float 42.0, float 1.0, float 2.0, float 3.0>, float %x, i32 1 215 ret <4 x float> %ins 216} 217 218define <2 x double> @elt1_v2f64(double %x) { 219; X86-SSE-LABEL: elt1_v2f64: 220; X86-SSE: # %bb.0: 221; X86-SSE-NEXT: movaps {{.*#+}} xmm0 = [4.2E+1,u] 222; X86-SSE-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] 223; X86-SSE-NEXT: retl 224; 225; X64-SSE-LABEL: elt1_v2f64: 226; X64-SSE: # %bb.0: 227; X64-SSE-NEXT: movaps {{.*#+}} xmm1 = [4.2E+1,u] 228; X64-SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] 229; X64-SSE-NEXT: movaps %xmm1, %xmm0 230; X64-SSE-NEXT: retq 231; 232; X86-AVX-LABEL: elt1_v2f64: 233; X86-AVX: # %bb.0: 234; X86-AVX-NEXT: vmovddup {{.*#+}} xmm0 = [4.2E+1,4.2E+1] 235; X86-AVX-NEXT: # xmm0 = mem[0,0] 236; X86-AVX-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] 237; X86-AVX-NEXT: retl 238; 239; X64-AVX-LABEL: elt1_v2f64: 240; X64-AVX: # %bb.0: 241; X64-AVX-NEXT: vmovddup {{.*#+}} xmm1 = [4.2E+1,4.2E+1] 242; X64-AVX-NEXT: # xmm1 = mem[0,0] 243; X64-AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] 244; X64-AVX-NEXT: retq 245 %ins = insertelement <2 x double> <double 42.0, double 1.0>, double %x, i32 1 246 ret <2 x double> %ins 247} 248 249define <8 x i32> @elt7_v8i32(i32 %x) { 250; X86-SSE2-LABEL: elt7_v8i32: 251; X86-SSE2: # %bb.0: 252; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 253; X86-SSE2-NEXT: movaps {{.*#+}} xmm1 = [4,5,6,u] 254; X86-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 255; X86-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] 256; X86-SSE2-NEXT: movaps {{.*#+}} xmm0 = [42,1,2,3] 257; X86-SSE2-NEXT: retl 258; 259; X64-SSE2-LABEL: elt7_v8i32: 260; X64-SSE2: # %bb.0: 261; X64-SSE2-NEXT: movd %edi, %xmm0 262; X64-SSE2-NEXT: movaps {{.*#+}} xmm1 = [4,5,6,u] 263; X64-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3] 264; X64-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0] 265; X64-SSE2-NEXT: movaps {{.*#+}} xmm0 = [42,1,2,3] 266; X64-SSE2-NEXT: retq 267; 268; X86-SSE4-LABEL: elt7_v8i32: 269; X86-SSE4: # %bb.0: 270; X86-SSE4-NEXT: pmovsxbd {{.*#+}} xmm1 = [4,5,6,0] 271; X86-SSE4-NEXT: pinsrd $3, {{[0-9]+}}(%esp), %xmm1 272; X86-SSE4-NEXT: movaps {{.*#+}} xmm0 = [42,1,2,3] 273; X86-SSE4-NEXT: retl 274; 275; X64-SSE4-LABEL: elt7_v8i32: 276; X64-SSE4: # %bb.0: 277; X64-SSE4-NEXT: pmovsxbd {{.*#+}} xmm1 = [4,5,6,0] 278; X64-SSE4-NEXT: pinsrd $3, %edi, %xmm1 279; X64-SSE4-NEXT: movaps {{.*#+}} xmm0 = [42,1,2,3] 280; X64-SSE4-NEXT: retq 281; 282; X86-AVX-LABEL: elt7_v8i32: 283; X86-AVX: # %bb.0: 284; X86-AVX-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0 285; X86-AVX-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5,6],ymm0[7] 286; X86-AVX-NEXT: retl 287; 288; X64-AVX1-LABEL: elt7_v8i32: 289; X64-AVX1: # %bb.0: 290; X64-AVX1-NEXT: vmovd %edi, %xmm0 291; X64-AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 292; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 293; X64-AVX1-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5,6],ymm0[7] 294; X64-AVX1-NEXT: retq 295; 296; X64-AVX2-LABEL: elt7_v8i32: 297; X64-AVX2: # %bb.0: 298; X64-AVX2-NEXT: vmovd %edi, %xmm0 299; X64-AVX2-NEXT: vpbroadcastd %xmm0, %ymm0 300; X64-AVX2-NEXT: vpblendd {{.*#+}} ymm0 = mem[0,1,2,3,4,5,6],ymm0[7] 301; X64-AVX2-NEXT: retq 302; 303; X64-AVX512F-LABEL: elt7_v8i32: 304; X64-AVX512F: # %bb.0: 305; X64-AVX512F-NEXT: vmovd %edi, %xmm0 306; X64-AVX512F-NEXT: vpbroadcastd %xmm0, %ymm0 307; X64-AVX512F-NEXT: vpblendd {{.*#+}} ymm0 = mem[0,1,2,3,4,5,6],ymm0[7] 308; X64-AVX512F-NEXT: retq 309 %ins = insertelement <8 x i32> <i32 42, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, i32 %x, i32 7 310 ret <8 x i32> %ins 311} 312 313define <8 x float> @elt6_v8f32(float %x) { 314; X86-SSE2-LABEL: elt6_v8f32: 315; X86-SSE2: # %bb.0: 316; X86-SSE2-NEXT: movaps {{.*#+}} xmm1 = [4.0E+0,5.0E+0,u,7.0E+0] 317; X86-SSE2-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 318; X86-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0] 319; X86-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2] 320; X86-SSE2-NEXT: movaps {{.*#+}} xmm0 = [4.2E+1,1.0E+0,2.0E+0,3.0E+0] 321; X86-SSE2-NEXT: retl 322; 323; X64-SSE2-LABEL: elt6_v8f32: 324; X64-SSE2: # %bb.0: 325; X64-SSE2-NEXT: movaps {{.*#+}} xmm1 = [4.0E+0,5.0E+0,u,7.0E+0] 326; X64-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[3,0] 327; X64-SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,2] 328; X64-SSE2-NEXT: movaps {{.*#+}} xmm0 = [4.2E+1,1.0E+0,2.0E+0,3.0E+0] 329; X64-SSE2-NEXT: retq 330; 331; X86-SSE4-LABEL: elt6_v8f32: 332; X86-SSE4: # %bb.0: 333; X86-SSE4-NEXT: movaps {{.*#+}} xmm1 = [4.0E+0,5.0E+0,u,7.0E+0] 334; X86-SSE4-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3] 335; X86-SSE4-NEXT: movaps {{.*#+}} xmm0 = [4.2E+1,1.0E+0,2.0E+0,3.0E+0] 336; X86-SSE4-NEXT: retl 337; 338; X64-SSE4-LABEL: elt6_v8f32: 339; X64-SSE4: # %bb.0: 340; X64-SSE4-NEXT: movaps {{.*#+}} xmm1 = [4.0E+0,5.0E+0,u,7.0E+0] 341; X64-SSE4-NEXT: insertps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0],xmm1[3] 342; X64-SSE4-NEXT: movaps {{.*#+}} xmm0 = [4.2E+1,1.0E+0,2.0E+0,3.0E+0] 343; X64-SSE4-NEXT: retq 344; 345; X86-AVX-LABEL: elt6_v8f32: 346; X86-AVX: # %bb.0: 347; X86-AVX-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0 348; X86-AVX-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5],ymm0[6],mem[7] 349; X86-AVX-NEXT: retl 350; 351; X64-AVX1-LABEL: elt6_v8f32: 352; X64-AVX1: # %bb.0: 353; X64-AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 354; X64-AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 355; X64-AVX1-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5],ymm0[6],mem[7] 356; X64-AVX1-NEXT: retq 357; 358; X64-AVX2-LABEL: elt6_v8f32: 359; X64-AVX2: # %bb.0: 360; X64-AVX2-NEXT: vbroadcastss %xmm0, %ymm0 361; X64-AVX2-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5],ymm0[6],mem[7] 362; X64-AVX2-NEXT: retq 363; 364; X64-AVX512F-LABEL: elt6_v8f32: 365; X64-AVX512F: # %bb.0: 366; X64-AVX512F-NEXT: vbroadcastss %xmm0, %ymm0 367; X64-AVX512F-NEXT: vblendps {{.*#+}} ymm0 = mem[0,1,2,3,4,5],ymm0[6],mem[7] 368; X64-AVX512F-NEXT: retq 369 %ins = insertelement <8 x float> <float 42.0, float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0>, float %x, i32 6 370 ret <8 x float> %ins 371} 372 373define <8 x i64> @elt5_v8i64(i64 %x) { 374; X86-SSE-LABEL: elt5_v8i64: 375; X86-SSE: # %bb.0: 376; X86-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 377; X86-SSE-NEXT: movss {{.*#+}} xmm2 = [4,0,0,0] 378; X86-SSE-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm0[0] 379; X86-SSE-NEXT: movaps {{.*#+}} xmm0 = [42,0,1,0] 380; X86-SSE-NEXT: movaps {{.*#+}} xmm1 = [2,0,3,0] 381; X86-SSE-NEXT: movaps {{.*#+}} xmm3 = [6,0,7,0] 382; X86-SSE-NEXT: retl 383; 384; X64-SSE2-LABEL: elt5_v8i64: 385; X64-SSE2: # %bb.0: 386; X64-SSE2-NEXT: movq %rdi, %xmm0 387; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [4,u] 388; X64-SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm0[0] 389; X64-SSE2-NEXT: movaps {{.*#+}} xmm0 = [42,1] 390; X64-SSE2-NEXT: movaps {{.*#+}} xmm1 = [2,3] 391; X64-SSE2-NEXT: movaps {{.*#+}} xmm3 = [6,7] 392; X64-SSE2-NEXT: retq 393; 394; X64-SSE4-LABEL: elt5_v8i64: 395; X64-SSE4: # %bb.0: 396; X64-SSE4-NEXT: pmovsxbq {{.*#+}} xmm2 = [4,4] 397; X64-SSE4-NEXT: pinsrq $1, %rdi, %xmm2 398; X64-SSE4-NEXT: movaps {{.*#+}} xmm0 = [42,1] 399; X64-SSE4-NEXT: movaps {{.*#+}} xmm1 = [2,3] 400; X64-SSE4-NEXT: movaps {{.*#+}} xmm3 = [6,7] 401; X64-SSE4-NEXT: retq 402; 403; X86-AVX1-LABEL: elt5_v8i64: 404; X86-AVX1: # %bb.0: 405; X86-AVX1-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 406; X86-AVX1-NEXT: vmovss {{.*#+}} xmm1 = [4,0,0,0] 407; X86-AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] 408; X86-AVX1-NEXT: vinsertf128 $1, {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm1 409; X86-AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [42,0,1,0,2,0,3,0] 410; X86-AVX1-NEXT: retl 411; 412; X64-AVX1-LABEL: elt5_v8i64: 413; X64-AVX1: # %bb.0: 414; X64-AVX1-NEXT: vmovdqa {{.*#+}} ymm0 = [4,u,6,7] 415; X64-AVX1-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm1 416; X64-AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7] 417; X64-AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [42,1,2,3] 418; X64-AVX1-NEXT: retq 419; 420; X86-AVX2-LABEL: elt5_v8i64: 421; X86-AVX2: # %bb.0: 422; X86-AVX2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 423; X86-AVX2-NEXT: vmovss {{.*#+}} xmm1 = [4,0,0,0] 424; X86-AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] 425; X86-AVX2-NEXT: vinsertf128 $1, {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm1 426; X86-AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [42,0,1,0,2,0,3,0] 427; X86-AVX2-NEXT: retl 428; 429; X64-AVX2-LABEL: elt5_v8i64: 430; X64-AVX2: # %bb.0: 431; X64-AVX2-NEXT: vpmovsxbq {{.*#+}} ymm0 = [4,0,6,7] 432; X64-AVX2-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm1 433; X64-AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7] 434; X64-AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [42,1,2,3] 435; X64-AVX2-NEXT: retq 436; 437; X86-AVX512F-LABEL: elt5_v8i64: 438; X86-AVX512F: # %bb.0: 439; X86-AVX512F-NEXT: vpmovsxbq {{.*#+}} ymm0 = [42,1,2,3] 440; X86-AVX512F-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 441; X86-AVX512F-NEXT: vpmovsxbq {{.*#+}} xmm2 = [4,0] 442; X86-AVX512F-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0] 443; X86-AVX512F-NEXT: vinsertf128 $1, {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1 444; X86-AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 445; X86-AVX512F-NEXT: retl 446; 447; X64-AVX512F-LABEL: elt5_v8i64: 448; X64-AVX512F: # %bb.0: 449; X64-AVX512F-NEXT: vmovq %rdi, %xmm1 450; X64-AVX512F-NEXT: vpmovsxbq {{.*#+}} zmm0 = [8,9,10,11,12,0,14,15] 451; X64-AVX512F-NEXT: vpermi2q {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0 452; X64-AVX512F-NEXT: retq 453 %ins = insertelement <8 x i64> <i64 42, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, i64 %x, i32 5 454 ret <8 x i64> %ins 455} 456 457define <8 x double> @elt1_v8f64(double %x) { 458; X86-SSE-LABEL: elt1_v8f64: 459; X86-SSE: # %bb.0: 460; X86-SSE-NEXT: movaps {{.*#+}} xmm0 = [4.2E+1,u] 461; X86-SSE-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] 462; X86-SSE-NEXT: movaps {{.*#+}} xmm1 = [2.0E+0,3.0E+0] 463; X86-SSE-NEXT: movaps {{.*#+}} xmm2 = [4.0E+0,5.0E+0] 464; X86-SSE-NEXT: movaps {{.*#+}} xmm3 = [6.0E+0,7.0E+0] 465; X86-SSE-NEXT: retl 466; 467; X64-SSE-LABEL: elt1_v8f64: 468; X64-SSE: # %bb.0: 469; X64-SSE-NEXT: movaps {{.*#+}} xmm4 = [4.2E+1,u] 470; X64-SSE-NEXT: movlhps {{.*#+}} xmm4 = xmm4[0],xmm0[0] 471; X64-SSE-NEXT: movaps {{.*#+}} xmm1 = [2.0E+0,3.0E+0] 472; X64-SSE-NEXT: movaps {{.*#+}} xmm2 = [4.0E+0,5.0E+0] 473; X64-SSE-NEXT: movaps {{.*#+}} xmm3 = [6.0E+0,7.0E+0] 474; X64-SSE-NEXT: movaps %xmm4, %xmm0 475; X64-SSE-NEXT: retq 476; 477; X86-AVX1-LABEL: elt1_v8f64: 478; X86-AVX1: # %bb.0: 479; X86-AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [4.2E+1,u,2.0E+0,3.0E+0] 480; X86-AVX1-NEXT: vmovhps {{.*#+}} xmm1 = xmm0[0,1],mem[0,1] 481; X86-AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 482; X86-AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [4.0E+0,5.0E+0,6.0E+0,7.0E+0] 483; X86-AVX1-NEXT: retl 484; 485; X64-AVX1-LABEL: elt1_v8f64: 486; X64-AVX1: # %bb.0: 487; X64-AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [4.2E+1,u,2.0E+0,3.0E+0] 488; X64-AVX1-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] 489; X64-AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 490; X64-AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [4.0E+0,5.0E+0,6.0E+0,7.0E+0] 491; X64-AVX1-NEXT: retq 492; 493; X86-AVX2-LABEL: elt1_v8f64: 494; X86-AVX2: # %bb.0: 495; X86-AVX2-NEXT: vmovaps {{.*#+}} ymm0 = [4.2E+1,u,2.0E+0,3.0E+0] 496; X86-AVX2-NEXT: vmovhps {{.*#+}} xmm1 = xmm0[0,1],mem[0,1] 497; X86-AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 498; X86-AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [4.0E+0,5.0E+0,6.0E+0,7.0E+0] 499; X86-AVX2-NEXT: retl 500; 501; X64-AVX2-LABEL: elt1_v8f64: 502; X64-AVX2: # %bb.0: 503; X64-AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [4.2E+1,u,2.0E+0,3.0E+0] 504; X64-AVX2-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] 505; X64-AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 506; X64-AVX2-NEXT: vmovaps {{.*#+}} ymm1 = [4.0E+0,5.0E+0,6.0E+0,7.0E+0] 507; X64-AVX2-NEXT: retq 508; 509; X86-AVX512F-LABEL: elt1_v8f64: 510; X86-AVX512F: # %bb.0: 511; X86-AVX512F-NEXT: vmovaps {{.*#+}} zmm0 = [4.2E+1,u,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0] 512; X86-AVX512F-NEXT: vmovhps {{.*#+}} xmm1 = xmm0[0,1],mem[0,1] 513; X86-AVX512F-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0 514; X86-AVX512F-NEXT: retl 515; 516; X64-AVX512F-LABEL: elt1_v8f64: 517; X64-AVX512F: # %bb.0: 518; X64-AVX512F-NEXT: vmovaps {{.*#+}} zmm1 = [4.2E+1,u,2.0E+0,3.0E+0,4.0E+0,5.0E+0,6.0E+0,7.0E+0] 519; X64-AVX512F-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] 520; X64-AVX512F-NEXT: vinsertf32x4 $0, %xmm0, %zmm1, %zmm0 521; X64-AVX512F-NEXT: retq 522 %ins = insertelement <8 x double> <double 42.0, double 1.0, double 2.0, double 3.0, double 4.0, double 5.0, double 6.0, double 7.0>, double %x, i32 1 523 ret <8 x double> %ins 524} 525 526