1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2 4 5; 0'th element insertion into an AVX register. 6 7define <8 x float> @insert_f32_firstelt_of_low_subvector(<8 x float> %x, float %s) { 8; ALL-LABEL: insert_f32_firstelt_of_low_subvector: 9; ALL: # %bb.0: 10; ALL-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1 11; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6,7] 12; ALL-NEXT: retq 13 %i0 = insertelement <8 x float> %x, float %s, i32 0 14 ret <8 x float> %i0 15} 16 17define <4 x double> @insert_f64_firstelt_of_low_subvector(<4 x double> %x, double %s) { 18; ALL-LABEL: insert_f64_firstelt_of_low_subvector: 19; ALL: # %bb.0: 20; ALL-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1 21; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7] 22; ALL-NEXT: retq 23 %i0 = insertelement <4 x double> %x, double %s, i32 0 24 ret <4 x double> %i0 25} 26 27define <32 x i8> @insert_i8_firstelt_of_low_subvector(<32 x i8> %x, i8 %s) { 28; AVX-LABEL: insert_i8_firstelt_of_low_subvector: 29; AVX: # %bb.0: 30; AVX-NEXT: vpinsrb $0, %edi, %xmm0, %xmm1 31; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 32; AVX-NEXT: retq 33; 34; AVX2-LABEL: insert_i8_firstelt_of_low_subvector: 35; AVX2: # %bb.0: 36; AVX2-NEXT: vpinsrb $0, %edi, %xmm0, %xmm1 37; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 38; AVX2-NEXT: retq 39 %i0 = insertelement <32 x i8> %x, i8 %s, i32 0 40 ret <32 x i8> %i0 41} 42 43define <16 x i16> @insert_i16_firstelt_of_low_subvector(<16 x i16> %x, i16 %s) { 44; AVX-LABEL: insert_i16_firstelt_of_low_subvector: 45; AVX: # %bb.0: 46; AVX-NEXT: vpinsrw $0, %edi, %xmm0, %xmm1 47; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 48; AVX-NEXT: retq 49; 50; AVX2-LABEL: insert_i16_firstelt_of_low_subvector: 51; AVX2: # %bb.0: 52; AVX2-NEXT: vpinsrw $0, %edi, %xmm0, %xmm1 53; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 54; AVX2-NEXT: retq 55 %i0 = insertelement <16 x i16> %x, i16 %s, i32 0 56 ret <16 x i16> %i0 57} 58 59define <8 x i32> @insert_i32_firstelt_of_low_subvector(<8 x i32> %x, i32 %s) { 60; AVX-LABEL: insert_i32_firstelt_of_low_subvector: 61; AVX: # %bb.0: 62; AVX-NEXT: vpinsrd $0, %edi, %xmm0, %xmm1 63; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 64; AVX-NEXT: retq 65; 66; AVX2-LABEL: insert_i32_firstelt_of_low_subvector: 67; AVX2: # %bb.0: 68; AVX2-NEXT: vmovd %edi, %xmm1 69; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6,7] 70; AVX2-NEXT: retq 71 %i0 = insertelement <8 x i32> %x, i32 %s, i32 0 72 ret <8 x i32> %i0 73} 74 75define <4 x i64> @insert_i64_firstelt_of_low_subvector(<4 x i64> %x, i64 %s) { 76; AVX-LABEL: insert_i64_firstelt_of_low_subvector: 77; AVX: # %bb.0: 78; AVX-NEXT: vpinsrq $0, %rdi, %xmm0, %xmm1 79; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 80; AVX-NEXT: retq 81; 82; AVX2-LABEL: insert_i64_firstelt_of_low_subvector: 83; AVX2: # %bb.0: 84; AVX2-NEXT: vmovq %rdi, %xmm1 85; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7] 86; AVX2-NEXT: retq 87 %i0 = insertelement <4 x i64> %x, i64 %s, i32 0 88 ret <4 x i64> %i0 89} 90 91; 0'th element of high subvector insertion into an AVX register. 92 93define <8 x float> @insert_f32_firstelt_of_high_subvector(<8 x float> %x, float %s) { 94; AVX-LABEL: insert_f32_firstelt_of_high_subvector: 95; AVX: # %bb.0: 96; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2 97; AVX-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3] 98; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 99; AVX-NEXT: retq 100; 101; AVX2-LABEL: insert_f32_firstelt_of_high_subvector: 102; AVX2: # %bb.0: 103; AVX2-NEXT: vbroadcastss %xmm1, %ymm1 104; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4],ymm0[5,6,7] 105; AVX2-NEXT: retq 106 %i0 = insertelement <8 x float> %x, float %s, i32 4 107 ret <8 x float> %i0 108} 109 110define <4 x double> @insert_f64_firstelt_of_high_subvector(<4 x double> %x, double %s) { 111; AVX-LABEL: insert_f64_firstelt_of_high_subvector: 112; AVX: # %bb.0: 113; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2 114; AVX-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3] 115; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 116; AVX-NEXT: retq 117; 118; AVX2-LABEL: insert_f64_firstelt_of_high_subvector: 119; AVX2: # %bb.0: 120; AVX2-NEXT: vbroadcastsd %xmm1, %ymm1 121; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7] 122; AVX2-NEXT: retq 123 %i0 = insertelement <4 x double> %x, double %s, i32 2 124 ret <4 x double> %i0 125} 126 127define <32 x i8> @insert_i8_firstelt_of_high_subvector(<32 x i8> %x, i8 %s) { 128; AVX-LABEL: insert_i8_firstelt_of_high_subvector: 129; AVX: # %bb.0: 130; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 131; AVX-NEXT: vpinsrb $0, %edi, %xmm1, %xmm1 132; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 133; AVX-NEXT: retq 134; 135; AVX2-LABEL: insert_i8_firstelt_of_high_subvector: 136; AVX2: # %bb.0: 137; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 138; AVX2-NEXT: vpinsrb $0, %edi, %xmm1, %xmm1 139; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 140; AVX2-NEXT: retq 141 %i0 = insertelement <32 x i8> %x, i8 %s, i32 16 142 ret <32 x i8> %i0 143} 144 145define <16 x i16> @insert_i16_firstelt_of_high_subvector(<16 x i16> %x, i16 %s) { 146; AVX-LABEL: insert_i16_firstelt_of_high_subvector: 147; AVX: # %bb.0: 148; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 149; AVX-NEXT: vpinsrw $0, %edi, %xmm1, %xmm1 150; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 151; AVX-NEXT: retq 152; 153; AVX2-LABEL: insert_i16_firstelt_of_high_subvector: 154; AVX2: # %bb.0: 155; AVX2-NEXT: vmovd %edi, %xmm1 156; AVX2-NEXT: vpbroadcastw %xmm1, %ymm1 157; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm0[1,2,3,4,5,6,7],ymm1[8],ymm0[9,10,11,12,13,14,15] 158; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 159; AVX2-NEXT: retq 160 %i0 = insertelement <16 x i16> %x, i16 %s, i32 8 161 ret <16 x i16> %i0 162} 163 164define <8 x i32> @insert_i32_firstelt_of_high_subvector(<8 x i32> %x, i32 %s) { 165; AVX-LABEL: insert_i32_firstelt_of_high_subvector: 166; AVX: # %bb.0: 167; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 168; AVX-NEXT: vpinsrd $0, %edi, %xmm1, %xmm1 169; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 170; AVX-NEXT: retq 171; 172; AVX2-LABEL: insert_i32_firstelt_of_high_subvector: 173; AVX2: # %bb.0: 174; AVX2-NEXT: vmovd %edi, %xmm1 175; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1 176; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4],ymm0[5,6,7] 177; AVX2-NEXT: retq 178 %i0 = insertelement <8 x i32> %x, i32 %s, i32 4 179 ret <8 x i32> %i0 180} 181 182define <4 x i64> @insert_i64_firstelt_of_high_subvector(<4 x i64> %x, i64 %s) { 183; AVX-LABEL: insert_i64_firstelt_of_high_subvector: 184; AVX: # %bb.0: 185; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 186; AVX-NEXT: vpinsrq $0, %rdi, %xmm1, %xmm1 187; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 188; AVX-NEXT: retq 189; 190; AVX2-LABEL: insert_i64_firstelt_of_high_subvector: 191; AVX2: # %bb.0: 192; AVX2-NEXT: vmovq %rdi, %xmm1 193; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1 194; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7] 195; AVX2-NEXT: retq 196 %i0 = insertelement <4 x i64> %x, i64 %s, i32 2 197 ret <4 x i64> %i0 198} 199 200; element insertion into 0'th element of both subvectors 201 202define <8 x float> @insert_f32_firstelts(<8 x float> %x, float %s) { 203; AVX-LABEL: insert_f32_firstelts: 204; AVX: # %bb.0: 205; AVX-NEXT: vblendps {{.*#+}} xmm2 = xmm1[0],xmm0[1,2,3] 206; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 207; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 208; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 209; AVX-NEXT: retq 210; 211; AVX2-LABEL: insert_f32_firstelts: 212; AVX2: # %bb.0: 213; AVX2-NEXT: vbroadcastss %xmm1, %ymm1 214; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3],ymm1[4],ymm0[5,6,7] 215; AVX2-NEXT: retq 216 %i0 = insertelement <8 x float> %x, float %s, i32 0 217 %i1 = insertelement <8 x float> %i0, float %s, i32 4 218 ret <8 x float> %i1 219} 220 221define <4 x double> @insert_f64_firstelts(<4 x double> %x, double %s) { 222; AVX-LABEL: insert_f64_firstelts: 223; AVX: # %bb.0: 224; AVX-NEXT: vblendps {{.*#+}} xmm2 = xmm1[0,1],xmm0[2,3] 225; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 226; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 227; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 228; AVX-NEXT: retq 229; 230; AVX2-LABEL: insert_f64_firstelts: 231; AVX2: # %bb.0: 232; AVX2-NEXT: vbroadcastsd %xmm1, %ymm1 233; AVX2-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3] 234; AVX2-NEXT: retq 235 %i0 = insertelement <4 x double> %x, double %s, i32 0 236 %i1 = insertelement <4 x double> %i0, double %s, i32 2 237 ret <4 x double> %i1 238} 239 240define <32 x i8> @insert_i8_firstelts(<32 x i8> %x, i8 %s) { 241; AVX-LABEL: insert_i8_firstelts: 242; AVX: # %bb.0: 243; AVX-NEXT: vpinsrb $0, %edi, %xmm0, %xmm1 244; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 245; AVX-NEXT: vpinsrb $0, %edi, %xmm0, %xmm0 246; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 247; AVX-NEXT: retq 248; 249; AVX2-LABEL: insert_i8_firstelts: 250; AVX2: # %bb.0: 251; AVX2-NEXT: vpinsrb $0, %edi, %xmm0, %xmm1 252; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 253; AVX2-NEXT: vpinsrb $0, %edi, %xmm0, %xmm0 254; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 255; AVX2-NEXT: retq 256 %i0 = insertelement <32 x i8> %x, i8 %s, i32 0 257 %i1 = insertelement <32 x i8> %i0, i8 %s, i32 16 258 ret <32 x i8> %i1 259} 260 261define <16 x i16> @insert_i16_firstelts(<16 x i16> %x, i16 %s) { 262; AVX-LABEL: insert_i16_firstelts: 263; AVX: # %bb.0: 264; AVX-NEXT: vpinsrw $0, %edi, %xmm0, %xmm1 265; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 266; AVX-NEXT: vpinsrw $0, %edi, %xmm0, %xmm0 267; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 268; AVX-NEXT: retq 269; 270; AVX2-LABEL: insert_i16_firstelts: 271; AVX2: # %bb.0: 272; AVX2-NEXT: vpinsrw $0, %edi, %xmm0, %xmm1 273; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 274; AVX2-NEXT: vmovd %edi, %xmm1 275; AVX2-NEXT: vpbroadcastw %xmm1, %ymm1 276; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm0[1,2,3,4,5,6,7],ymm1[8],ymm0[9,10,11,12,13,14,15] 277; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 278; AVX2-NEXT: retq 279 %i0 = insertelement <16 x i16> %x, i16 %s, i32 0 280 %i1 = insertelement <16 x i16> %i0, i16 %s, i32 8 281 ret <16 x i16> %i1 282} 283 284define <8 x i32> @insert_i32_firstelts(<8 x i32> %x, i32 %s) { 285; AVX-LABEL: insert_i32_firstelts: 286; AVX: # %bb.0: 287; AVX-NEXT: vpinsrd $0, %edi, %xmm0, %xmm1 288; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 289; AVX-NEXT: vpinsrd $0, %edi, %xmm0, %xmm0 290; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 291; AVX-NEXT: retq 292; 293; AVX2-LABEL: insert_i32_firstelts: 294; AVX2: # %bb.0: 295; AVX2-NEXT: vmovd %edi, %xmm1 296; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1 297; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3],ymm1[4],ymm0[5,6,7] 298; AVX2-NEXT: retq 299 %i0 = insertelement <8 x i32> %x, i32 %s, i32 0 300 %i1 = insertelement <8 x i32> %i0, i32 %s, i32 4 301 ret <8 x i32> %i1 302} 303 304define <4 x i64> @insert_i64_firstelts(<4 x i64> %x, i64 %s) { 305; AVX-LABEL: insert_i64_firstelts: 306; AVX: # %bb.0: 307; AVX-NEXT: vpinsrq $0, %rdi, %xmm0, %xmm1 308; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 309; AVX-NEXT: vpinsrq $0, %rdi, %xmm0, %xmm0 310; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 311; AVX-NEXT: retq 312; 313; AVX2-LABEL: insert_i64_firstelts: 314; AVX2: # %bb.0: 315; AVX2-NEXT: vmovq %rdi, %xmm1 316; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1 317; AVX2-NEXT: vpunpckhqdq {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3] 318; AVX2-NEXT: retq 319 %i0 = insertelement <4 x i64> %x, i64 %s, i32 0 320 %i1 = insertelement <4 x i64> %i0, i64 %s, i32 2 321 ret <4 x i64> %i1 322} 323 324; element insertion into two elements of high subvector 325 326define <8 x float> @insert_f32_two_elts_of_high_subvector(<8 x float> %x, float %s) { 327; AVX-LABEL: insert_f32_two_elts_of_high_subvector: 328; AVX: # %bb.0: 329; AVX-NEXT: vextractf128 $1, %ymm0, %xmm2 330; AVX-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,0],xmm2[2,3] 331; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 332; AVX-NEXT: retq 333; 334; AVX2-LABEL: insert_f32_two_elts_of_high_subvector: 335; AVX2: # %bb.0: 336; AVX2-NEXT: vbroadcastss %xmm1, %ymm1 337; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7] 338; AVX2-NEXT: retq 339 %i0 = insertelement <8 x float> %x, float %s, i32 4 340 %i1 = insertelement <8 x float> %i0, float %s, i32 5 341 ret <8 x float> %i1 342} 343 344define <4 x double> @insert_f64_two_elts_of_high_subvector(<4 x double> %x, double %s) { 345; AVX-LABEL: insert_f64_two_elts_of_high_subvector: 346; AVX: # %bb.0: 347; AVX-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0] 348; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 349; AVX-NEXT: retq 350; 351; AVX2-LABEL: insert_f64_two_elts_of_high_subvector: 352; AVX2: # %bb.0: 353; AVX2-NEXT: vbroadcastsd %xmm1, %ymm1 354; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 355; AVX2-NEXT: retq 356 %i0 = insertelement <4 x double> %x, double %s, i32 2 357 %i1 = insertelement <4 x double> %i0, double %s, i32 3 358 ret <4 x double> %i1 359} 360 361define <32 x i8> @insert_i8_two_elts_of_high_subvector(<32 x i8> %x, i8 %s) { 362; AVX-LABEL: insert_i8_two_elts_of_high_subvector: 363; AVX: # %bb.0: 364; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 365; AVX-NEXT: vpinsrb $0, %edi, %xmm1, %xmm1 366; AVX-NEXT: vpinsrb $1, %edi, %xmm1, %xmm1 367; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 368; AVX-NEXT: retq 369; 370; AVX2-LABEL: insert_i8_two_elts_of_high_subvector: 371; AVX2: # %bb.0: 372; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 373; AVX2-NEXT: vpinsrb $0, %edi, %xmm1, %xmm1 374; AVX2-NEXT: vpinsrb $1, %edi, %xmm1, %xmm1 375; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 376; AVX2-NEXT: retq 377 %i0 = insertelement <32 x i8> %x, i8 %s, i32 16 378 %i1 = insertelement <32 x i8> %i0, i8 %s, i32 17 379 ret <32 x i8> %i1 380} 381 382define <16 x i16> @insert_i16_two_elts_of_high_subvector(<16 x i16> %x, i16 %s) { 383; AVX-LABEL: insert_i16_two_elts_of_high_subvector: 384; AVX: # %bb.0: 385; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 386; AVX-NEXT: vpinsrw $0, %edi, %xmm1, %xmm1 387; AVX-NEXT: vpinsrw $1, %edi, %xmm1, %xmm1 388; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 389; AVX-NEXT: retq 390; 391; AVX2-LABEL: insert_i16_two_elts_of_high_subvector: 392; AVX2: # %bb.0: 393; AVX2-NEXT: vmovd %edi, %xmm1 394; AVX2-NEXT: vpbroadcastw %xmm1, %ymm1 395; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4],ymm0[5,6,7] 396; AVX2-NEXT: retq 397 %i0 = insertelement <16 x i16> %x, i16 %s, i32 8 398 %i1 = insertelement <16 x i16> %i0, i16 %s, i32 9 399 ret <16 x i16> %i1 400} 401 402define <8 x i32> @insert_i32_two_elts_of_high_subvector(<8 x i32> %x, i32 %s) { 403; AVX-LABEL: insert_i32_two_elts_of_high_subvector: 404; AVX: # %bb.0: 405; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1 406; AVX-NEXT: vpinsrd $0, %edi, %xmm1, %xmm1 407; AVX-NEXT: vpinsrd $1, %edi, %xmm1, %xmm1 408; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 409; AVX-NEXT: retq 410; 411; AVX2-LABEL: insert_i32_two_elts_of_high_subvector: 412; AVX2: # %bb.0: 413; AVX2-NEXT: vmovd %edi, %xmm1 414; AVX2-NEXT: vpbroadcastd %xmm1, %ymm1 415; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7] 416; AVX2-NEXT: retq 417 %i0 = insertelement <8 x i32> %x, i32 %s, i32 4 418 %i1 = insertelement <8 x i32> %i0, i32 %s, i32 5 419 ret <8 x i32> %i1 420} 421 422define <4 x i64> @insert_i64_two_elts_of_high_subvector(<4 x i64> %x, i64 %s) { 423; AVX-LABEL: insert_i64_two_elts_of_high_subvector: 424; AVX: # %bb.0: 425; AVX-NEXT: vmovq %rdi, %xmm1 426; AVX-NEXT: vpinsrq $1, %rdi, %xmm1, %xmm1 427; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 428; AVX-NEXT: retq 429; 430; AVX2-LABEL: insert_i64_two_elts_of_high_subvector: 431; AVX2: # %bb.0: 432; AVX2-NEXT: vmovq %rdi, %xmm1 433; AVX2-NEXT: vpbroadcastq %xmm1, %ymm1 434; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7] 435; AVX2-NEXT: retq 436 %i0 = insertelement <4 x i64> %x, i64 %s, i32 2 437 %i1 = insertelement <4 x i64> %i0, i64 %s, i32 3 438 ret <4 x i64> %i1 439} 440 441; element insertion into two elements of low subvector 442 443define <8 x float> @insert_f32_two_elts_of_low_subvector(<8 x float> %x, float %s) { 444; ALL-LABEL: insert_f32_two_elts_of_low_subvector: 445; ALL: # %bb.0: 446; ALL-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,3] 447; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 448; ALL-NEXT: retq 449 %i0 = insertelement <8 x float> %x, float %s, i32 0 450 %i1 = insertelement <8 x float> %i0, float %s, i32 1 451 ret <8 x float> %i1 452} 453 454define <4 x double> @insert_f64_two_elts_of_low_subvector(<4 x double> %x, double %s) { 455; ALL-LABEL: insert_f64_two_elts_of_low_subvector: 456; ALL: # %bb.0: 457; ALL-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0] 458; ALL-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 459; ALL-NEXT: retq 460 %i0 = insertelement <4 x double> %x, double %s, i32 0 461 %i1 = insertelement <4 x double> %i0, double %s, i32 1 462 ret <4 x double> %i1 463} 464 465define <32 x i8> @insert_i8_two_elts_of_low_subvector(<32 x i8> %x, i8 %s) { 466; AVX-LABEL: insert_i8_two_elts_of_low_subvector: 467; AVX: # %bb.0: 468; AVX-NEXT: vpinsrb $0, %edi, %xmm0, %xmm1 469; AVX-NEXT: vpinsrb $1, %edi, %xmm1, %xmm1 470; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 471; AVX-NEXT: retq 472; 473; AVX2-LABEL: insert_i8_two_elts_of_low_subvector: 474; AVX2: # %bb.0: 475; AVX2-NEXT: vpinsrb $0, %edi, %xmm0, %xmm1 476; AVX2-NEXT: vpinsrb $1, %edi, %xmm1, %xmm1 477; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 478; AVX2-NEXT: retq 479 %i0 = insertelement <32 x i8> %x, i8 %s, i32 0 480 %i1 = insertelement <32 x i8> %i0, i8 %s, i32 1 481 ret <32 x i8> %i1 482} 483 484define <16 x i16> @insert_i16_two_elts_of_low_subvector(<16 x i16> %x, i16 %s) { 485; AVX-LABEL: insert_i16_two_elts_of_low_subvector: 486; AVX: # %bb.0: 487; AVX-NEXT: vpinsrw $0, %edi, %xmm0, %xmm1 488; AVX-NEXT: vpinsrw $1, %edi, %xmm1, %xmm1 489; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 490; AVX-NEXT: retq 491; 492; AVX2-LABEL: insert_i16_two_elts_of_low_subvector: 493; AVX2: # %bb.0: 494; AVX2-NEXT: vpinsrw $0, %edi, %xmm0, %xmm1 495; AVX2-NEXT: vpinsrw $1, %edi, %xmm1, %xmm1 496; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 497; AVX2-NEXT: retq 498 %i0 = insertelement <16 x i16> %x, i16 %s, i32 0 499 %i1 = insertelement <16 x i16> %i0, i16 %s, i32 1 500 ret <16 x i16> %i1 501} 502 503define <8 x i32> @insert_i32_two_elts_of_low_subvector(<8 x i32> %x, i32 %s) { 504; AVX-LABEL: insert_i32_two_elts_of_low_subvector: 505; AVX: # %bb.0: 506; AVX-NEXT: vpinsrd $0, %edi, %xmm0, %xmm1 507; AVX-NEXT: vpinsrd $1, %edi, %xmm1, %xmm1 508; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 509; AVX-NEXT: retq 510; 511; AVX2-LABEL: insert_i32_two_elts_of_low_subvector: 512; AVX2: # %bb.0: 513; AVX2-NEXT: vmovd %edi, %xmm1 514; AVX2-NEXT: vpblendd {{.*#+}} xmm1 = xmm1[0],xmm0[1,2,3] 515; AVX2-NEXT: vpinsrd $1, %edi, %xmm1, %xmm1 516; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 517; AVX2-NEXT: retq 518 %i0 = insertelement <8 x i32> %x, i32 %s, i32 0 519 %i1 = insertelement <8 x i32> %i0, i32 %s, i32 1 520 ret <8 x i32> %i1 521} 522 523define <4 x i64> @insert_i64_two_elts_of_low_subvector(<4 x i64> %x, i64 %s) { 524; AVX-LABEL: insert_i64_two_elts_of_low_subvector: 525; AVX: # %bb.0: 526; AVX-NEXT: vmovq %rdi, %xmm1 527; AVX-NEXT: vpinsrq $1, %rdi, %xmm1, %xmm1 528; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 529; AVX-NEXT: retq 530; 531; AVX2-LABEL: insert_i64_two_elts_of_low_subvector: 532; AVX2: # %bb.0: 533; AVX2-NEXT: vmovq %rdi, %xmm1 534; AVX2-NEXT: vpinsrq $1, %rdi, %xmm1, %xmm1 535; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7] 536; AVX2-NEXT: retq 537 %i0 = insertelement <4 x i64> %x, i64 %s, i32 0 538 %i1 = insertelement <4 x i64> %i0, i64 %s, i32 1 539 ret <4 x i64> %i1 540} 541