1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX 6 7; 0'th element insertion into an SSE register. 8 9define <4 x float> @insert_f32_firstelt(<4 x float> %x, float %s) { 10; SSE2-LABEL: insert_f32_firstelt: 11; SSE2: # %bb.0: 12; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 13; SSE2-NEXT: retq 14; 15; SSE41-LABEL: insert_f32_firstelt: 16; SSE41: # %bb.0: 17; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 18; SSE41-NEXT: retq 19; 20; AVX-LABEL: insert_f32_firstelt: 21; AVX: # %bb.0: 22; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 23; AVX-NEXT: retq 24 %i0 = insertelement <4 x float> %x, float %s, i32 0 25 ret <4 x float> %i0 26} 27 28define <2 x double> @insert_f64_firstelt(<2 x double> %x, double %s) { 29; SSE2-LABEL: insert_f64_firstelt: 30; SSE2: # %bb.0: 31; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 32; SSE2-NEXT: retq 33; 34; SSE41-LABEL: insert_f64_firstelt: 35; SSE41: # %bb.0: 36; SSE41-NEXT: blendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 37; SSE41-NEXT: retq 38; 39; AVX-LABEL: insert_f64_firstelt: 40; AVX: # %bb.0: 41; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm1[0,1],xmm0[2,3] 42; AVX-NEXT: retq 43 %i0 = insertelement <2 x double> %x, double %s, i32 0 44 ret <2 x double> %i0 45} 46 47define <16 x i8> @insert_i8_firstelt(<16 x i8> %x, i8 %s) { 48; SSE2-LABEL: insert_i8_firstelt: 49; SSE2: # %bb.0: 50; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 51; SSE2-NEXT: pand %xmm1, %xmm0 52; SSE2-NEXT: movd %edi, %xmm2 53; SSE2-NEXT: pandn %xmm2, %xmm1 54; SSE2-NEXT: por %xmm1, %xmm0 55; SSE2-NEXT: retq 56; 57; SSE41-LABEL: insert_i8_firstelt: 58; SSE41: # %bb.0: 59; SSE41-NEXT: pinsrb $0, %edi, %xmm0 60; SSE41-NEXT: retq 61; 62; AVX-LABEL: insert_i8_firstelt: 63; AVX: # %bb.0: 64; AVX-NEXT: vpinsrb $0, %edi, %xmm0, %xmm0 65; AVX-NEXT: retq 66 %i0 = insertelement <16 x i8> %x, i8 %s, i32 0 67 ret <16 x i8> %i0 68} 69 70define <8 x i16> @insert_i16_firstelt(<8 x i16> %x, i16 %s) { 71; SSE-LABEL: insert_i16_firstelt: 72; SSE: # %bb.0: 73; SSE-NEXT: pinsrw $0, %edi, %xmm0 74; SSE-NEXT: retq 75; 76; AVX-LABEL: insert_i16_firstelt: 77; AVX: # %bb.0: 78; AVX-NEXT: vpinsrw $0, %edi, %xmm0, %xmm0 79; AVX-NEXT: retq 80 %i0 = insertelement <8 x i16> %x, i16 %s, i32 0 81 ret <8 x i16> %i0 82} 83 84define <4 x i32> @insert_i32_firstelt(<4 x i32> %x, i32 %s) { 85; SSE2-LABEL: insert_i32_firstelt: 86; SSE2: # %bb.0: 87; SSE2-NEXT: movd %edi, %xmm1 88; SSE2-NEXT: movss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3] 89; SSE2-NEXT: retq 90; 91; SSE41-LABEL: insert_i32_firstelt: 92; SSE41: # %bb.0: 93; SSE41-NEXT: pinsrd $0, %edi, %xmm0 94; SSE41-NEXT: retq 95; 96; AVX-LABEL: insert_i32_firstelt: 97; AVX: # %bb.0: 98; AVX-NEXT: vpinsrd $0, %edi, %xmm0, %xmm0 99; AVX-NEXT: retq 100 %i0 = insertelement <4 x i32> %x, i32 %s, i32 0 101 ret <4 x i32> %i0 102} 103 104define <2 x i64> @insert_i64_firstelt(<2 x i64> %x, i64 %s) { 105; SSE2-LABEL: insert_i64_firstelt: 106; SSE2: # %bb.0: 107; SSE2-NEXT: movq %rdi, %xmm1 108; SSE2-NEXT: movsd {{.*#+}} xmm0 = xmm1[0],xmm0[1] 109; SSE2-NEXT: retq 110; 111; SSE41-LABEL: insert_i64_firstelt: 112; SSE41: # %bb.0: 113; SSE41-NEXT: pinsrq $0, %rdi, %xmm0 114; SSE41-NEXT: retq 115; 116; AVX-LABEL: insert_i64_firstelt: 117; AVX: # %bb.0: 118; AVX-NEXT: vpinsrq $0, %rdi, %xmm0, %xmm0 119; AVX-NEXT: retq 120 %i0 = insertelement <2 x i64> %x, i64 %s, i32 0 121 ret <2 x i64> %i0 122} 123 124; 1'th element insertion. 125 126define <4 x float> @insert_f32_secondelt(<4 x float> %x, float %s) { 127; SSE2-LABEL: insert_f32_secondelt: 128; SSE2: # %bb.0: 129; SSE2-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] 130; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3] 131; SSE2-NEXT: movaps %xmm1, %xmm0 132; SSE2-NEXT: retq 133; 134; SSE41-LABEL: insert_f32_secondelt: 135; SSE41: # %bb.0: 136; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 137; SSE41-NEXT: retq 138; 139; AVX-LABEL: insert_f32_secondelt: 140; AVX: # %bb.0: 141; AVX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] 142; AVX-NEXT: retq 143 %i0 = insertelement <4 x float> %x, float %s, i32 1 144 ret <4 x float> %i0 145} 146 147define <2 x double> @insert_f64_secondelt(<2 x double> %x, double %s) { 148; SSE-LABEL: insert_f64_secondelt: 149; SSE: # %bb.0: 150; SSE-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 151; SSE-NEXT: retq 152; 153; AVX-LABEL: insert_f64_secondelt: 154; AVX: # %bb.0: 155; AVX-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 156; AVX-NEXT: retq 157 %i0 = insertelement <2 x double> %x, double %s, i32 1 158 ret <2 x double> %i0 159} 160 161define <16 x i8> @insert_i8_secondelt(<16 x i8> %x, i8 %s) { 162; SSE2-LABEL: insert_i8_secondelt: 163; SSE2: # %bb.0: 164; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 165; SSE2-NEXT: pand %xmm1, %xmm0 166; SSE2-NEXT: movd %edi, %xmm2 167; SSE2-NEXT: psllw $8, %xmm2 168; SSE2-NEXT: pandn %xmm2, %xmm1 169; SSE2-NEXT: por %xmm1, %xmm0 170; SSE2-NEXT: retq 171; 172; SSE41-LABEL: insert_i8_secondelt: 173; SSE41: # %bb.0: 174; SSE41-NEXT: pinsrb $1, %edi, %xmm0 175; SSE41-NEXT: retq 176; 177; AVX-LABEL: insert_i8_secondelt: 178; AVX: # %bb.0: 179; AVX-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 180; AVX-NEXT: retq 181 %i0 = insertelement <16 x i8> %x, i8 %s, i32 1 182 ret <16 x i8> %i0 183} 184 185define <8 x i16> @insert_i16_secondelt(<8 x i16> %x, i16 %s) { 186; SSE-LABEL: insert_i16_secondelt: 187; SSE: # %bb.0: 188; SSE-NEXT: pinsrw $1, %edi, %xmm0 189; SSE-NEXT: retq 190; 191; AVX-LABEL: insert_i16_secondelt: 192; AVX: # %bb.0: 193; AVX-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 194; AVX-NEXT: retq 195 %i0 = insertelement <8 x i16> %x, i16 %s, i32 1 196 ret <8 x i16> %i0 197} 198 199define <4 x i32> @insert_i32_secondelt(<4 x i32> %x, i32 %s) { 200; SSE2-LABEL: insert_i32_secondelt: 201; SSE2: # %bb.0: 202; SSE2-NEXT: movd %edi, %xmm1 203; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0] 204; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3] 205; SSE2-NEXT: movaps %xmm1, %xmm0 206; SSE2-NEXT: retq 207; 208; SSE41-LABEL: insert_i32_secondelt: 209; SSE41: # %bb.0: 210; SSE41-NEXT: pinsrd $1, %edi, %xmm0 211; SSE41-NEXT: retq 212; 213; AVX-LABEL: insert_i32_secondelt: 214; AVX: # %bb.0: 215; AVX-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 216; AVX-NEXT: retq 217 %i0 = insertelement <4 x i32> %x, i32 %s, i32 1 218 ret <4 x i32> %i0 219} 220 221define <2 x i64> @insert_i64_secondelt(<2 x i64> %x, i64 %s) { 222; SSE2-LABEL: insert_i64_secondelt: 223; SSE2: # %bb.0: 224; SSE2-NEXT: movq %rdi, %xmm1 225; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 226; SSE2-NEXT: retq 227; 228; SSE41-LABEL: insert_i64_secondelt: 229; SSE41: # %bb.0: 230; SSE41-NEXT: pinsrq $1, %rdi, %xmm0 231; SSE41-NEXT: retq 232; 233; AVX-LABEL: insert_i64_secondelt: 234; AVX: # %bb.0: 235; AVX-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm0 236; AVX-NEXT: retq 237 %i0 = insertelement <2 x i64> %x, i64 %s, i32 1 238 ret <2 x i64> %i0 239} 240 241; element insertion into two elements 242 243define <4 x float> @insert_f32_two_elts(<4 x float> %x, float %s) { 244; SSE-LABEL: insert_f32_two_elts: 245; SSE: # %bb.0: 246; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[2,3] 247; SSE-NEXT: movaps %xmm1, %xmm0 248; SSE-NEXT: retq 249; 250; AVX-LABEL: insert_f32_two_elts: 251; AVX: # %bb.0: 252; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm1[0,0],xmm0[2,3] 253; AVX-NEXT: retq 254 %i0 = insertelement <4 x float> %x, float %s, i32 0 255 %i1 = insertelement <4 x float> %i0, float %s, i32 1 256 ret <4 x float> %i1 257} 258 259define <2 x double> @insert_f64_two_elts(<2 x double> %x, double %s) { 260; SSE2-LABEL: insert_f64_two_elts: 261; SSE2: # %bb.0: 262; SSE2-NEXT: movaps %xmm1, %xmm0 263; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 264; SSE2-NEXT: retq 265; 266; SSE41-LABEL: insert_f64_two_elts: 267; SSE41: # %bb.0: 268; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0] 269; SSE41-NEXT: retq 270; 271; AVX-LABEL: insert_f64_two_elts: 272; AVX: # %bb.0: 273; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm1[0,0] 274; AVX-NEXT: retq 275 %i0 = insertelement <2 x double> %x, double %s, i32 0 276 %i1 = insertelement <2 x double> %i0, double %s, i32 1 277 ret <2 x double> %i1 278} 279 280define <16 x i8> @insert_i8_two_elts(<16 x i8> %x, i8 %s) { 281; SSE2-LABEL: insert_i8_two_elts: 282; SSE2: # %bb.0: 283; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [0,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 284; SSE2-NEXT: pand %xmm1, %xmm0 285; SSE2-NEXT: movd %edi, %xmm2 286; SSE2-NEXT: pandn %xmm2, %xmm1 287; SSE2-NEXT: por %xmm1, %xmm0 288; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [255,0,255,255,255,255,255,255,255,255,255,255,255,255,255,255] 289; SSE2-NEXT: pand %xmm1, %xmm0 290; SSE2-NEXT: psllw $8, %xmm2 291; SSE2-NEXT: pandn %xmm2, %xmm1 292; SSE2-NEXT: por %xmm1, %xmm0 293; SSE2-NEXT: retq 294; 295; SSE41-LABEL: insert_i8_two_elts: 296; SSE41: # %bb.0: 297; SSE41-NEXT: pinsrb $0, %edi, %xmm0 298; SSE41-NEXT: pinsrb $1, %edi, %xmm0 299; SSE41-NEXT: retq 300; 301; AVX-LABEL: insert_i8_two_elts: 302; AVX: # %bb.0: 303; AVX-NEXT: vpinsrb $0, %edi, %xmm0, %xmm0 304; AVX-NEXT: vpinsrb $1, %edi, %xmm0, %xmm0 305; AVX-NEXT: retq 306 %i0 = insertelement <16 x i8> %x, i8 %s, i32 0 307 %i1 = insertelement <16 x i8> %i0, i8 %s, i32 1 308 ret <16 x i8> %i1 309} 310 311define <8 x i16> @insert_i16_two_elts(<8 x i16> %x, i16 %s) { 312; SSE-LABEL: insert_i16_two_elts: 313; SSE: # %bb.0: 314; SSE-NEXT: pinsrw $0, %edi, %xmm0 315; SSE-NEXT: pinsrw $1, %edi, %xmm0 316; SSE-NEXT: retq 317; 318; AVX-LABEL: insert_i16_two_elts: 319; AVX: # %bb.0: 320; AVX-NEXT: vpinsrw $0, %edi, %xmm0, %xmm0 321; AVX-NEXT: vpinsrw $1, %edi, %xmm0, %xmm0 322; AVX-NEXT: retq 323 %i0 = insertelement <8 x i16> %x, i16 %s, i32 0 324 %i1 = insertelement <8 x i16> %i0, i16 %s, i32 1 325 ret <8 x i16> %i1 326} 327 328define <4 x i32> @insert_i32_two_elts(<4 x i32> %x, i32 %s) { 329; SSE2-LABEL: insert_i32_two_elts: 330; SSE2: # %bb.0: 331; SSE2-NEXT: movd %edi, %xmm2 332; SSE2-NEXT: movd %edi, %xmm1 333; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 334; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[2,0],xmm0[2,3] 335; SSE2-NEXT: movaps %xmm1, %xmm0 336; SSE2-NEXT: retq 337; 338; SSE41-LABEL: insert_i32_two_elts: 339; SSE41: # %bb.0: 340; SSE41-NEXT: pinsrd $0, %edi, %xmm0 341; SSE41-NEXT: pinsrd $1, %edi, %xmm0 342; SSE41-NEXT: retq 343; 344; AVX-LABEL: insert_i32_two_elts: 345; AVX: # %bb.0: 346; AVX-NEXT: vpinsrd $0, %edi, %xmm0, %xmm0 347; AVX-NEXT: vpinsrd $1, %edi, %xmm0, %xmm0 348; AVX-NEXT: retq 349 %i0 = insertelement <4 x i32> %x, i32 %s, i32 0 350 %i1 = insertelement <4 x i32> %i0, i32 %s, i32 1 351 ret <4 x i32> %i1 352} 353 354define <2 x i64> @insert_i64_two_elts(<2 x i64> %x, i64 %s) { 355; SSE-LABEL: insert_i64_two_elts: 356; SSE: # %bb.0: 357; SSE-NEXT: movq %rdi, %xmm0 358; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 359; SSE-NEXT: retq 360 %i0 = insertelement <2 x i64> %x, i64 %s, i32 0 361 %i1 = insertelement <2 x i64> %i0, i64 %s, i32 1 362 ret <2 x i64> %i1 363} 364