1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefixes=ALL,SSE,SSE2 3; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse4.1 | FileCheck %s --check-prefixes=ALL,SSE,SSE41 4; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefixes=ALL,AVX,AVX1OR2,AVX1 5; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=ALL,AVX,AVX1OR2,AVX2 6; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefixes=ALL,AVX,AVX512,AVX512F 7; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefixes=ALL,AVX,AVX512,AVX512BW 8; RUN: llc < %s -mtriple=i686-- -mattr=+avx2 | FileCheck %s --check-prefixes=ALL,X86AVX2 9 10define <16 x i8> @undef_index(i8 %x) nounwind { 11; ALL-LABEL: undef_index: 12; ALL: # %bb.0: 13; ALL-NEXT: ret{{[l|q]}} 14 %ins = insertelement <16 x i8> undef, i8 %x, i64 undef 15 ret <16 x i8> %ins 16} 17 18define <16 x i8> @undef_scalar(<16 x i8> %x, i32 %index) nounwind { 19; ALL-LABEL: undef_scalar: 20; ALL: # %bb.0: 21; ALL-NEXT: ret{{[l|q]}} 22 %ins = insertelement <16 x i8> %x, i8 undef, i32 %index 23 ret <16 x i8> %ins 24} 25 26; 27; Insertion into undef vectors 28; 29 30define <16 x i8> @arg_i8_v16i8_undef(i8 %x, i32 %y) nounwind { 31; SSE2-LABEL: arg_i8_v16i8_undef: 32; SSE2: # %bb.0: 33; SSE2-NEXT: movd %edi, %xmm0 34; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 35; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 36; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 37; SSE2-NEXT: retq 38; 39; SSE41-LABEL: arg_i8_v16i8_undef: 40; SSE41: # %bb.0: 41; SSE41-NEXT: movd %edi, %xmm0 42; SSE41-NEXT: pxor %xmm1, %xmm1 43; SSE41-NEXT: pshufb %xmm1, %xmm0 44; SSE41-NEXT: retq 45; 46; AVX1-LABEL: arg_i8_v16i8_undef: 47; AVX1: # %bb.0: 48; AVX1-NEXT: vmovd %edi, %xmm0 49; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 50; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 51; AVX1-NEXT: retq 52; 53; AVX2-LABEL: arg_i8_v16i8_undef: 54; AVX2: # %bb.0: 55; AVX2-NEXT: vmovd %edi, %xmm0 56; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0 57; AVX2-NEXT: retq 58; 59; AVX512F-LABEL: arg_i8_v16i8_undef: 60; AVX512F: # %bb.0: 61; AVX512F-NEXT: vmovd %edi, %xmm0 62; AVX512F-NEXT: vpbroadcastb %xmm0, %xmm0 63; AVX512F-NEXT: retq 64; 65; AVX512BW-LABEL: arg_i8_v16i8_undef: 66; AVX512BW: # %bb.0: 67; AVX512BW-NEXT: vpbroadcastb %edi, %xmm0 68; AVX512BW-NEXT: retq 69; 70; X86AVX2-LABEL: arg_i8_v16i8_undef: 71; X86AVX2: # %bb.0: 72; X86AVX2-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %xmm0 73; X86AVX2-NEXT: retl 74 %ins = insertelement <16 x i8> undef, i8 %x, i32 %y 75 ret <16 x i8> %ins 76} 77 78define <8 x i16> @arg_i16_v8i16_undef(i16 %x, i32 %y) nounwind { 79; SSE-LABEL: arg_i16_v8i16_undef: 80; SSE: # %bb.0: 81; SSE-NEXT: movd %edi, %xmm0 82; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 83; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 84; SSE-NEXT: retq 85; 86; AVX1-LABEL: arg_i16_v8i16_undef: 87; AVX1: # %bb.0: 88; AVX1-NEXT: vmovd %edi, %xmm0 89; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 90; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 91; AVX1-NEXT: retq 92; 93; AVX2-LABEL: arg_i16_v8i16_undef: 94; AVX2: # %bb.0: 95; AVX2-NEXT: vmovd %edi, %xmm0 96; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 97; AVX2-NEXT: retq 98; 99; AVX512F-LABEL: arg_i16_v8i16_undef: 100; AVX512F: # %bb.0: 101; AVX512F-NEXT: vmovd %edi, %xmm0 102; AVX512F-NEXT: vpbroadcastw %xmm0, %xmm0 103; AVX512F-NEXT: retq 104; 105; AVX512BW-LABEL: arg_i16_v8i16_undef: 106; AVX512BW: # %bb.0: 107; AVX512BW-NEXT: vpbroadcastw %edi, %xmm0 108; AVX512BW-NEXT: retq 109; 110; X86AVX2-LABEL: arg_i16_v8i16_undef: 111; X86AVX2: # %bb.0: 112; X86AVX2-NEXT: vpbroadcastw {{[0-9]+}}(%esp), %xmm0 113; X86AVX2-NEXT: retl 114 %ins = insertelement <8 x i16> undef, i16 %x, i32 %y 115 ret <8 x i16> %ins 116} 117 118define <4 x i32> @arg_i32_v4i32_undef(i32 %x, i32 %y) nounwind { 119; SSE-LABEL: arg_i32_v4i32_undef: 120; SSE: # %bb.0: 121; SSE-NEXT: movd %edi, %xmm0 122; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 123; SSE-NEXT: retq 124; 125; AVX1-LABEL: arg_i32_v4i32_undef: 126; AVX1: # %bb.0: 127; AVX1-NEXT: vmovd %edi, %xmm0 128; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 129; AVX1-NEXT: retq 130; 131; AVX2-LABEL: arg_i32_v4i32_undef: 132; AVX2: # %bb.0: 133; AVX2-NEXT: vmovd %edi, %xmm0 134; AVX2-NEXT: vpbroadcastd %xmm0, %xmm0 135; AVX2-NEXT: retq 136; 137; AVX512-LABEL: arg_i32_v4i32_undef: 138; AVX512: # %bb.0: 139; AVX512-NEXT: vpbroadcastd %edi, %xmm0 140; AVX512-NEXT: retq 141; 142; X86AVX2-LABEL: arg_i32_v4i32_undef: 143; X86AVX2: # %bb.0: 144; X86AVX2-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm0 145; X86AVX2-NEXT: retl 146 %ins = insertelement <4 x i32> undef, i32 %x, i32 %y 147 ret <4 x i32> %ins 148} 149 150define <2 x i64> @arg_i64_v2i64_undef(i64 %x, i32 %y) nounwind { 151; SSE-LABEL: arg_i64_v2i64_undef: 152; SSE: # %bb.0: 153; SSE-NEXT: movq %rdi, %xmm0 154; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 155; SSE-NEXT: retq 156; 157; AVX1-LABEL: arg_i64_v2i64_undef: 158; AVX1: # %bb.0: 159; AVX1-NEXT: vmovq %rdi, %xmm0 160; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 161; AVX1-NEXT: retq 162; 163; AVX2-LABEL: arg_i64_v2i64_undef: 164; AVX2: # %bb.0: 165; AVX2-NEXT: vmovq %rdi, %xmm0 166; AVX2-NEXT: vpbroadcastq %xmm0, %xmm0 167; AVX2-NEXT: retq 168; 169; AVX512-LABEL: arg_i64_v2i64_undef: 170; AVX512: # %bb.0: 171; AVX512-NEXT: vpbroadcastq %rdi, %xmm0 172; AVX512-NEXT: retq 173; 174; X86AVX2-LABEL: arg_i64_v2i64_undef: 175; X86AVX2: # %bb.0: 176; X86AVX2-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 177; X86AVX2-NEXT: retl 178 %ins = insertelement <2 x i64> undef, i64 %x, i32 %y 179 ret <2 x i64> %ins 180} 181 182define <4 x float> @arg_f32_v4f32_undef(float %x, i32 %y) nounwind { 183; SSE-LABEL: arg_f32_v4f32_undef: 184; SSE: # %bb.0: 185; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0] 186; SSE-NEXT: retq 187; 188; AVX1-LABEL: arg_f32_v4f32_undef: 189; AVX1: # %bb.0: 190; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,0,0] 191; AVX1-NEXT: retq 192; 193; AVX2-LABEL: arg_f32_v4f32_undef: 194; AVX2: # %bb.0: 195; AVX2-NEXT: vbroadcastss %xmm0, %xmm0 196; AVX2-NEXT: retq 197; 198; AVX512-LABEL: arg_f32_v4f32_undef: 199; AVX512: # %bb.0: 200; AVX512-NEXT: vbroadcastss %xmm0, %xmm0 201; AVX512-NEXT: retq 202; 203; X86AVX2-LABEL: arg_f32_v4f32_undef: 204; X86AVX2: # %bb.0: 205; X86AVX2-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm0 206; X86AVX2-NEXT: retl 207 %ins = insertelement <4 x float> undef, float %x, i32 %y 208 ret <4 x float> %ins 209} 210 211define <2 x double> @arg_f64_v2f64_undef(double %x, i32 %y) nounwind { 212; SSE2-LABEL: arg_f64_v2f64_undef: 213; SSE2: # %bb.0: 214; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 215; SSE2-NEXT: retq 216; 217; SSE41-LABEL: arg_f64_v2f64_undef: 218; SSE41: # %bb.0: 219; SSE41-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0] 220; SSE41-NEXT: retq 221; 222; AVX-LABEL: arg_f64_v2f64_undef: 223; AVX: # %bb.0: 224; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 225; AVX-NEXT: retq 226; 227; X86AVX2-LABEL: arg_f64_v2f64_undef: 228; X86AVX2: # %bb.0: 229; X86AVX2-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 230; X86AVX2-NEXT: retl 231 %ins = insertelement <2 x double> undef, double %x, i32 %y 232 ret <2 x double> %ins 233} 234 235define <16 x i8> @load_i8_v16i8_undef(ptr %p, i32 %y) nounwind { 236; SSE2-LABEL: load_i8_v16i8_undef: 237; SSE2: # %bb.0: 238; SSE2-NEXT: movzbl (%rdi), %eax 239; SSE2-NEXT: movd %eax, %xmm0 240; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 241; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 242; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 243; SSE2-NEXT: retq 244; 245; SSE41-LABEL: load_i8_v16i8_undef: 246; SSE41: # %bb.0: 247; SSE41-NEXT: movzbl (%rdi), %eax 248; SSE41-NEXT: movd %eax, %xmm0 249; SSE41-NEXT: pxor %xmm1, %xmm1 250; SSE41-NEXT: pshufb %xmm1, %xmm0 251; SSE41-NEXT: retq 252; 253; AVX1-LABEL: load_i8_v16i8_undef: 254; AVX1: # %bb.0: 255; AVX1-NEXT: movzbl (%rdi), %eax 256; AVX1-NEXT: vmovd %eax, %xmm0 257; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 258; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 259; AVX1-NEXT: retq 260; 261; AVX2-LABEL: load_i8_v16i8_undef: 262; AVX2: # %bb.0: 263; AVX2-NEXT: vpbroadcastb (%rdi), %xmm0 264; AVX2-NEXT: retq 265; 266; AVX512-LABEL: load_i8_v16i8_undef: 267; AVX512: # %bb.0: 268; AVX512-NEXT: vpbroadcastb (%rdi), %xmm0 269; AVX512-NEXT: retq 270; 271; X86AVX2-LABEL: load_i8_v16i8_undef: 272; X86AVX2: # %bb.0: 273; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax 274; X86AVX2-NEXT: vpbroadcastb (%eax), %xmm0 275; X86AVX2-NEXT: retl 276 %x = load i8, ptr %p 277 %ins = insertelement <16 x i8> undef, i8 %x, i32 %y 278 ret <16 x i8> %ins 279} 280 281define <8 x i16> @load_i16_v8i16_undef(ptr %p, i32 %y) nounwind { 282; SSE-LABEL: load_i16_v8i16_undef: 283; SSE: # %bb.0: 284; SSE-NEXT: movzwl (%rdi), %eax 285; SSE-NEXT: movd %eax, %xmm0 286; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 287; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 288; SSE-NEXT: retq 289; 290; AVX1-LABEL: load_i16_v8i16_undef: 291; AVX1: # %bb.0: 292; AVX1-NEXT: movzwl (%rdi), %eax 293; AVX1-NEXT: vmovd %eax, %xmm0 294; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 295; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 296; AVX1-NEXT: retq 297; 298; AVX2-LABEL: load_i16_v8i16_undef: 299; AVX2: # %bb.0: 300; AVX2-NEXT: vpbroadcastw (%rdi), %xmm0 301; AVX2-NEXT: retq 302; 303; AVX512-LABEL: load_i16_v8i16_undef: 304; AVX512: # %bb.0: 305; AVX512-NEXT: vpbroadcastw (%rdi), %xmm0 306; AVX512-NEXT: retq 307; 308; X86AVX2-LABEL: load_i16_v8i16_undef: 309; X86AVX2: # %bb.0: 310; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax 311; X86AVX2-NEXT: vpbroadcastw (%eax), %xmm0 312; X86AVX2-NEXT: retl 313 %x = load i16, ptr %p 314 %ins = insertelement <8 x i16> undef, i16 %x, i32 %y 315 ret <8 x i16> %ins 316} 317 318define <4 x i32> @load_i32_v4i32_undef(ptr %p, i32 %y) nounwind { 319; SSE-LABEL: load_i32_v4i32_undef: 320; SSE: # %bb.0: 321; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 322; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 323; SSE-NEXT: retq 324; 325; AVX-LABEL: load_i32_v4i32_undef: 326; AVX: # %bb.0: 327; AVX-NEXT: vbroadcastss (%rdi), %xmm0 328; AVX-NEXT: retq 329; 330; X86AVX2-LABEL: load_i32_v4i32_undef: 331; X86AVX2: # %bb.0: 332; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax 333; X86AVX2-NEXT: vbroadcastss (%eax), %xmm0 334; X86AVX2-NEXT: retl 335 %x = load i32, ptr %p 336 %ins = insertelement <4 x i32> undef, i32 %x, i32 %y 337 ret <4 x i32> %ins 338} 339 340define <2 x i64> @load_i64_v2i64_undef(ptr %p, i32 %y) nounwind { 341; SSE-LABEL: load_i64_v2i64_undef: 342; SSE: # %bb.0: 343; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 344; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 345; SSE-NEXT: retq 346; 347; AVX-LABEL: load_i64_v2i64_undef: 348; AVX: # %bb.0: 349; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 350; AVX-NEXT: retq 351; 352; X86AVX2-LABEL: load_i64_v2i64_undef: 353; X86AVX2: # %bb.0: 354; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax 355; X86AVX2-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 356; X86AVX2-NEXT: retl 357 %x = load i64, ptr %p 358 %ins = insertelement <2 x i64> undef, i64 %x, i32 %y 359 ret <2 x i64> %ins 360} 361 362define <4 x float> @load_f32_v4f32_undef(ptr %p, i32 %y) nounwind { 363; SSE-LABEL: load_f32_v4f32_undef: 364; SSE: # %bb.0: 365; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 366; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0] 367; SSE-NEXT: retq 368; 369; AVX-LABEL: load_f32_v4f32_undef: 370; AVX: # %bb.0: 371; AVX-NEXT: vbroadcastss (%rdi), %xmm0 372; AVX-NEXT: retq 373; 374; X86AVX2-LABEL: load_f32_v4f32_undef: 375; X86AVX2: # %bb.0: 376; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax 377; X86AVX2-NEXT: vbroadcastss (%eax), %xmm0 378; X86AVX2-NEXT: retl 379 %x = load float, ptr %p 380 %ins = insertelement <4 x float> undef, float %x, i32 %y 381 ret <4 x float> %ins 382} 383 384define <2 x double> @load_f64_v2f64_undef(ptr %p, i32 %y) nounwind { 385; SSE2-LABEL: load_f64_v2f64_undef: 386; SSE2: # %bb.0: 387; SSE2-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 388; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 389; SSE2-NEXT: retq 390; 391; SSE41-LABEL: load_f64_v2f64_undef: 392; SSE41: # %bb.0: 393; SSE41-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 394; SSE41-NEXT: retq 395; 396; AVX-LABEL: load_f64_v2f64_undef: 397; AVX: # %bb.0: 398; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 399; AVX-NEXT: retq 400; 401; X86AVX2-LABEL: load_f64_v2f64_undef: 402; X86AVX2: # %bb.0: 403; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax 404; X86AVX2-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 405; X86AVX2-NEXT: retl 406 %x = load double, ptr %p 407 %ins = insertelement <2 x double> undef, double %x, i32 %y 408 ret <2 x double> %ins 409} 410 411define <32 x i8> @arg_i8_v32i8_undef(i8 %x, i32 %y) nounwind { 412; SSE-LABEL: arg_i8_v32i8_undef: 413; SSE: # %bb.0: 414; SSE-NEXT: # kill: def $esi killed $esi def $rsi 415; SSE-NEXT: andl $31, %esi 416; SSE-NEXT: movb %dil, -40(%rsp,%rsi) 417; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 418; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 419; SSE-NEXT: retq 420; 421; AVX1-LABEL: arg_i8_v32i8_undef: 422; AVX1: # %bb.0: 423; AVX1-NEXT: vmovd %edi, %xmm0 424; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 425; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 426; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 427; AVX1-NEXT: retq 428; 429; AVX2-LABEL: arg_i8_v32i8_undef: 430; AVX2: # %bb.0: 431; AVX2-NEXT: vmovd %edi, %xmm0 432; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0 433; AVX2-NEXT: retq 434; 435; AVX512F-LABEL: arg_i8_v32i8_undef: 436; AVX512F: # %bb.0: 437; AVX512F-NEXT: vmovd %edi, %xmm0 438; AVX512F-NEXT: vpbroadcastb %xmm0, %ymm0 439; AVX512F-NEXT: retq 440; 441; AVX512BW-LABEL: arg_i8_v32i8_undef: 442; AVX512BW: # %bb.0: 443; AVX512BW-NEXT: vpbroadcastb %edi, %ymm0 444; AVX512BW-NEXT: retq 445; 446; X86AVX2-LABEL: arg_i8_v32i8_undef: 447; X86AVX2: # %bb.0: 448; X86AVX2-NEXT: vpbroadcastb {{[0-9]+}}(%esp), %ymm0 449; X86AVX2-NEXT: retl 450 %ins = insertelement <32 x i8> undef, i8 %x, i32 %y 451 ret <32 x i8> %ins 452} 453 454define <16 x i16> @arg_i16_v16i16_undef(i16 %x, i32 %y) nounwind { 455; SSE-LABEL: arg_i16_v16i16_undef: 456; SSE: # %bb.0: 457; SSE-NEXT: # kill: def $esi killed $esi def $rsi 458; SSE-NEXT: andl $15, %esi 459; SSE-NEXT: movw %di, -40(%rsp,%rsi,2) 460; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 461; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 462; SSE-NEXT: retq 463; 464; AVX1-LABEL: arg_i16_v16i16_undef: 465; AVX1: # %bb.0: 466; AVX1-NEXT: vmovd %edi, %xmm0 467; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 468; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 469; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 470; AVX1-NEXT: retq 471; 472; AVX2-LABEL: arg_i16_v16i16_undef: 473; AVX2: # %bb.0: 474; AVX2-NEXT: vmovd %edi, %xmm0 475; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0 476; AVX2-NEXT: retq 477; 478; AVX512F-LABEL: arg_i16_v16i16_undef: 479; AVX512F: # %bb.0: 480; AVX512F-NEXT: vmovd %edi, %xmm0 481; AVX512F-NEXT: vpbroadcastw %xmm0, %ymm0 482; AVX512F-NEXT: retq 483; 484; AVX512BW-LABEL: arg_i16_v16i16_undef: 485; AVX512BW: # %bb.0: 486; AVX512BW-NEXT: vpbroadcastw %edi, %ymm0 487; AVX512BW-NEXT: retq 488; 489; X86AVX2-LABEL: arg_i16_v16i16_undef: 490; X86AVX2: # %bb.0: 491; X86AVX2-NEXT: vpbroadcastw {{[0-9]+}}(%esp), %ymm0 492; X86AVX2-NEXT: retl 493 %ins = insertelement <16 x i16> undef, i16 %x, i32 %y 494 ret <16 x i16> %ins 495} 496 497define <8 x i32> @arg_i32_v8i32_undef(i32 %x, i32 %y) nounwind { 498; SSE-LABEL: arg_i32_v8i32_undef: 499; SSE: # %bb.0: 500; SSE-NEXT: # kill: def $esi killed $esi def $rsi 501; SSE-NEXT: andl $7, %esi 502; SSE-NEXT: movl %edi, -40(%rsp,%rsi,4) 503; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 504; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 505; SSE-NEXT: retq 506; 507; AVX1-LABEL: arg_i32_v8i32_undef: 508; AVX1: # %bb.0: 509; AVX1-NEXT: vmovd %edi, %xmm0 510; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 511; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 512; AVX1-NEXT: retq 513; 514; AVX2-LABEL: arg_i32_v8i32_undef: 515; AVX2: # %bb.0: 516; AVX2-NEXT: vmovd %edi, %xmm0 517; AVX2-NEXT: vpbroadcastd %xmm0, %ymm0 518; AVX2-NEXT: retq 519; 520; AVX512-LABEL: arg_i32_v8i32_undef: 521; AVX512: # %bb.0: 522; AVX512-NEXT: vpbroadcastd %edi, %ymm0 523; AVX512-NEXT: retq 524; 525; X86AVX2-LABEL: arg_i32_v8i32_undef: 526; X86AVX2: # %bb.0: 527; X86AVX2-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0 528; X86AVX2-NEXT: retl 529 %ins = insertelement <8 x i32> undef, i32 %x, i32 %y 530 ret <8 x i32> %ins 531} 532 533define <4 x i64> @arg_i64_v4i64_undef(i64 %x, i32 %y) nounwind { 534; SSE-LABEL: arg_i64_v4i64_undef: 535; SSE: # %bb.0: 536; SSE-NEXT: # kill: def $esi killed $esi def $rsi 537; SSE-NEXT: andl $3, %esi 538; SSE-NEXT: movq %rdi, -40(%rsp,%rsi,8) 539; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 540; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 541; SSE-NEXT: retq 542; 543; AVX1-LABEL: arg_i64_v4i64_undef: 544; AVX1: # %bb.0: 545; AVX1-NEXT: vmovq %rdi, %xmm0 546; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 547; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 548; AVX1-NEXT: retq 549; 550; AVX2-LABEL: arg_i64_v4i64_undef: 551; AVX2: # %bb.0: 552; AVX2-NEXT: vmovq %rdi, %xmm0 553; AVX2-NEXT: vpbroadcastq %xmm0, %ymm0 554; AVX2-NEXT: retq 555; 556; AVX512-LABEL: arg_i64_v4i64_undef: 557; AVX512: # %bb.0: 558; AVX512-NEXT: vpbroadcastq %rdi, %ymm0 559; AVX512-NEXT: retq 560; 561; X86AVX2-LABEL: arg_i64_v4i64_undef: 562; X86AVX2: # %bb.0: 563; X86AVX2-NEXT: vbroadcastsd {{[0-9]+}}(%esp), %ymm0 564; X86AVX2-NEXT: retl 565 %ins = insertelement <4 x i64> undef, i64 %x, i32 %y 566 ret <4 x i64> %ins 567} 568 569define <8 x float> @arg_f32_v8f32_undef(float %x, i32 %y) nounwind { 570; SSE-LABEL: arg_f32_v8f32_undef: 571; SSE: # %bb.0: 572; SSE-NEXT: # kill: def $edi killed $edi def $rdi 573; SSE-NEXT: andl $7, %edi 574; SSE-NEXT: movss %xmm0, -40(%rsp,%rdi,4) 575; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 576; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 577; SSE-NEXT: retq 578; 579; AVX1-LABEL: arg_f32_v8f32_undef: 580; AVX1: # %bb.0: 581; AVX1-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,0,0,0] 582; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 583; AVX1-NEXT: retq 584; 585; AVX2-LABEL: arg_f32_v8f32_undef: 586; AVX2: # %bb.0: 587; AVX2-NEXT: vbroadcastss %xmm0, %ymm0 588; AVX2-NEXT: retq 589; 590; AVX512-LABEL: arg_f32_v8f32_undef: 591; AVX512: # %bb.0: 592; AVX512-NEXT: vbroadcastss %xmm0, %ymm0 593; AVX512-NEXT: retq 594; 595; X86AVX2-LABEL: arg_f32_v8f32_undef: 596; X86AVX2: # %bb.0: 597; X86AVX2-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm0 598; X86AVX2-NEXT: retl 599 %ins = insertelement <8 x float> undef, float %x, i32 %y 600 ret <8 x float> %ins 601} 602 603define <4 x double> @arg_f64_v4f64_undef(double %x, i32 %y) nounwind { 604; SSE-LABEL: arg_f64_v4f64_undef: 605; SSE: # %bb.0: 606; SSE-NEXT: # kill: def $edi killed $edi def $rdi 607; SSE-NEXT: andl $3, %edi 608; SSE-NEXT: movsd %xmm0, -40(%rsp,%rdi,8) 609; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 610; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 611; SSE-NEXT: retq 612; 613; AVX1-LABEL: arg_f64_v4f64_undef: 614; AVX1: # %bb.0: 615; AVX1-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0] 616; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 617; AVX1-NEXT: retq 618; 619; AVX2-LABEL: arg_f64_v4f64_undef: 620; AVX2: # %bb.0: 621; AVX2-NEXT: vbroadcastsd %xmm0, %ymm0 622; AVX2-NEXT: retq 623; 624; AVX512-LABEL: arg_f64_v4f64_undef: 625; AVX512: # %bb.0: 626; AVX512-NEXT: vbroadcastsd %xmm0, %ymm0 627; AVX512-NEXT: retq 628; 629; X86AVX2-LABEL: arg_f64_v4f64_undef: 630; X86AVX2: # %bb.0: 631; X86AVX2-NEXT: vbroadcastsd {{[0-9]+}}(%esp), %ymm0 632; X86AVX2-NEXT: retl 633 %ins = insertelement <4 x double> undef, double %x, i32 %y 634 ret <4 x double> %ins 635} 636 637define <32 x i8> @load_i8_v32i8_undef(ptr %p, i32 %y) nounwind { 638; SSE-LABEL: load_i8_v32i8_undef: 639; SSE: # %bb.0: 640; SSE-NEXT: # kill: def $esi killed $esi def $rsi 641; SSE-NEXT: movzbl (%rdi), %eax 642; SSE-NEXT: andl $31, %esi 643; SSE-NEXT: movb %al, -40(%rsp,%rsi) 644; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 645; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 646; SSE-NEXT: retq 647; 648; AVX1-LABEL: load_i8_v32i8_undef: 649; AVX1: # %bb.0: 650; AVX1-NEXT: movzbl (%rdi), %eax 651; AVX1-NEXT: vmovd %eax, %xmm0 652; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 653; AVX1-NEXT: vpshufb %xmm1, %xmm0, %xmm0 654; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 655; AVX1-NEXT: retq 656; 657; AVX2-LABEL: load_i8_v32i8_undef: 658; AVX2: # %bb.0: 659; AVX2-NEXT: vpbroadcastb (%rdi), %ymm0 660; AVX2-NEXT: retq 661; 662; AVX512-LABEL: load_i8_v32i8_undef: 663; AVX512: # %bb.0: 664; AVX512-NEXT: vpbroadcastb (%rdi), %ymm0 665; AVX512-NEXT: retq 666; 667; X86AVX2-LABEL: load_i8_v32i8_undef: 668; X86AVX2: # %bb.0: 669; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax 670; X86AVX2-NEXT: vpbroadcastb (%eax), %ymm0 671; X86AVX2-NEXT: retl 672 %x = load i8, ptr %p 673 %ins = insertelement <32 x i8> undef, i8 %x, i32 %y 674 ret <32 x i8> %ins 675} 676 677define <16 x i16> @load_i16_v16i16_undef(ptr %p, i32 %y) nounwind { 678; SSE-LABEL: load_i16_v16i16_undef: 679; SSE: # %bb.0: 680; SSE-NEXT: # kill: def $esi killed $esi def $rsi 681; SSE-NEXT: movzwl (%rdi), %eax 682; SSE-NEXT: andl $15, %esi 683; SSE-NEXT: movw %ax, -40(%rsp,%rsi,2) 684; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 685; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 686; SSE-NEXT: retq 687; 688; AVX1-LABEL: load_i16_v16i16_undef: 689; AVX1: # %bb.0: 690; AVX1-NEXT: movzwl (%rdi), %eax 691; AVX1-NEXT: vmovd %eax, %xmm0 692; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 693; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 694; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 695; AVX1-NEXT: retq 696; 697; AVX2-LABEL: load_i16_v16i16_undef: 698; AVX2: # %bb.0: 699; AVX2-NEXT: vpbroadcastw (%rdi), %ymm0 700; AVX2-NEXT: retq 701; 702; AVX512-LABEL: load_i16_v16i16_undef: 703; AVX512: # %bb.0: 704; AVX512-NEXT: vpbroadcastw (%rdi), %ymm0 705; AVX512-NEXT: retq 706; 707; X86AVX2-LABEL: load_i16_v16i16_undef: 708; X86AVX2: # %bb.0: 709; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax 710; X86AVX2-NEXT: vpbroadcastw (%eax), %ymm0 711; X86AVX2-NEXT: retl 712 %x = load i16, ptr %p 713 %ins = insertelement <16 x i16> undef, i16 %x, i32 %y 714 ret <16 x i16> %ins 715} 716 717define <8 x i32> @load_i32_v8i32_undef(ptr %p, i32 %y) nounwind { 718; SSE-LABEL: load_i32_v8i32_undef: 719; SSE: # %bb.0: 720; SSE-NEXT: # kill: def $esi killed $esi def $rsi 721; SSE-NEXT: movl (%rdi), %eax 722; SSE-NEXT: andl $7, %esi 723; SSE-NEXT: movl %eax, -40(%rsp,%rsi,4) 724; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 725; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 726; SSE-NEXT: retq 727; 728; AVX-LABEL: load_i32_v8i32_undef: 729; AVX: # %bb.0: 730; AVX-NEXT: vbroadcastss (%rdi), %ymm0 731; AVX-NEXT: retq 732; 733; X86AVX2-LABEL: load_i32_v8i32_undef: 734; X86AVX2: # %bb.0: 735; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax 736; X86AVX2-NEXT: vbroadcastss (%eax), %ymm0 737; X86AVX2-NEXT: retl 738 %x = load i32, ptr %p 739 %ins = insertelement <8 x i32> undef, i32 %x, i32 %y 740 ret <8 x i32> %ins 741} 742 743define <4 x i64> @load_i64_v4i64_undef(ptr %p, i32 %y) nounwind { 744; SSE-LABEL: load_i64_v4i64_undef: 745; SSE: # %bb.0: 746; SSE-NEXT: # kill: def $esi killed $esi def $rsi 747; SSE-NEXT: movq (%rdi), %rax 748; SSE-NEXT: andl $3, %esi 749; SSE-NEXT: movq %rax, -40(%rsp,%rsi,8) 750; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 751; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 752; SSE-NEXT: retq 753; 754; AVX-LABEL: load_i64_v4i64_undef: 755; AVX: # %bb.0: 756; AVX-NEXT: vbroadcastsd (%rdi), %ymm0 757; AVX-NEXT: retq 758; 759; X86AVX2-LABEL: load_i64_v4i64_undef: 760; X86AVX2: # %bb.0: 761; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax 762; X86AVX2-NEXT: vbroadcastsd (%eax), %ymm0 763; X86AVX2-NEXT: retl 764 %x = load i64, ptr %p 765 %ins = insertelement <4 x i64> undef, i64 %x, i32 %y 766 ret <4 x i64> %ins 767} 768 769define <8 x float> @load_f32_v8f32_undef(ptr %p, i32 %y) nounwind { 770; SSE-LABEL: load_f32_v8f32_undef: 771; SSE: # %bb.0: 772; SSE-NEXT: # kill: def $esi killed $esi def $rsi 773; SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 774; SSE-NEXT: andl $7, %esi 775; SSE-NEXT: movss %xmm0, -40(%rsp,%rsi,4) 776; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 777; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 778; SSE-NEXT: retq 779; 780; AVX-LABEL: load_f32_v8f32_undef: 781; AVX: # %bb.0: 782; AVX-NEXT: vbroadcastss (%rdi), %ymm0 783; AVX-NEXT: retq 784; 785; X86AVX2-LABEL: load_f32_v8f32_undef: 786; X86AVX2: # %bb.0: 787; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax 788; X86AVX2-NEXT: vbroadcastss (%eax), %ymm0 789; X86AVX2-NEXT: retl 790 %x = load float, ptr %p 791 %ins = insertelement <8 x float> undef, float %x, i32 %y 792 ret <8 x float> %ins 793} 794 795define <4 x double> @load_f64_v4f64_undef(ptr %p, i32 %y) nounwind { 796; SSE-LABEL: load_f64_v4f64_undef: 797; SSE: # %bb.0: 798; SSE-NEXT: # kill: def $esi killed $esi def $rsi 799; SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 800; SSE-NEXT: andl $3, %esi 801; SSE-NEXT: movsd %xmm0, -40(%rsp,%rsi,8) 802; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 803; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 804; SSE-NEXT: retq 805; 806; AVX-LABEL: load_f64_v4f64_undef: 807; AVX: # %bb.0: 808; AVX-NEXT: vbroadcastsd (%rdi), %ymm0 809; AVX-NEXT: retq 810; 811; X86AVX2-LABEL: load_f64_v4f64_undef: 812; X86AVX2: # %bb.0: 813; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax 814; X86AVX2-NEXT: vbroadcastsd (%eax), %ymm0 815; X86AVX2-NEXT: retl 816 %x = load double, ptr %p 817 %ins = insertelement <4 x double> undef, double %x, i32 %y 818 ret <4 x double> %ins 819} 820 821; 822; Insertion into arg vectors 823; 824 825define <16 x i8> @arg_i8_v16i8(<16 x i8> %v, i8 %x, i32 %y) nounwind { 826; SSE-LABEL: arg_i8_v16i8: 827; SSE: # %bb.0: 828; SSE-NEXT: # kill: def $esi killed $esi def $rsi 829; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 830; SSE-NEXT: andl $15, %esi 831; SSE-NEXT: movb %dil, -24(%rsp,%rsi) 832; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 833; SSE-NEXT: retq 834; 835; AVX1OR2-LABEL: arg_i8_v16i8: 836; AVX1OR2: # %bb.0: 837; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi 838; AVX1OR2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 839; AVX1OR2-NEXT: andl $15, %esi 840; AVX1OR2-NEXT: movb %dil, -24(%rsp,%rsi) 841; AVX1OR2-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 842; AVX1OR2-NEXT: retq 843; 844; AVX512F-LABEL: arg_i8_v16i8: 845; AVX512F: # %bb.0: 846; AVX512F-NEXT: # kill: def $esi killed $esi def $rsi 847; AVX512F-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 848; AVX512F-NEXT: andl $15, %esi 849; AVX512F-NEXT: movb %dil, -24(%rsp,%rsi) 850; AVX512F-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 851; AVX512F-NEXT: retq 852; 853; AVX512BW-LABEL: arg_i8_v16i8: 854; AVX512BW: # %bb.0: 855; AVX512BW-NEXT: vpbroadcastb %esi, %xmm1 856; AVX512BW-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1 857; AVX512BW-NEXT: vpbroadcastb %edi, %xmm0 {%k1} 858; AVX512BW-NEXT: retq 859; 860; X86AVX2-LABEL: arg_i8_v16i8: 861; X86AVX2: # %bb.0: 862; X86AVX2-NEXT: pushl %ebp 863; X86AVX2-NEXT: movl %esp, %ebp 864; X86AVX2-NEXT: andl $-16, %esp 865; X86AVX2-NEXT: subl $32, %esp 866; X86AVX2-NEXT: movl 12(%ebp), %eax 867; X86AVX2-NEXT: andl $15, %eax 868; X86AVX2-NEXT: movzbl 8(%ebp), %ecx 869; X86AVX2-NEXT: vmovaps %xmm0, (%esp) 870; X86AVX2-NEXT: movb %cl, (%esp,%eax) 871; X86AVX2-NEXT: vmovaps (%esp), %xmm0 872; X86AVX2-NEXT: movl %ebp, %esp 873; X86AVX2-NEXT: popl %ebp 874; X86AVX2-NEXT: retl 875 %ins = insertelement <16 x i8> %v, i8 %x, i32 %y 876 ret <16 x i8> %ins 877} 878 879define <8 x i16> @arg_i16_v8i16(<8 x i16> %v, i16 %x, i32 %y) nounwind { 880; SSE-LABEL: arg_i16_v8i16: 881; SSE: # %bb.0: 882; SSE-NEXT: # kill: def $esi killed $esi def $rsi 883; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 884; SSE-NEXT: andl $7, %esi 885; SSE-NEXT: movw %di, -24(%rsp,%rsi,2) 886; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 887; SSE-NEXT: retq 888; 889; AVX1OR2-LABEL: arg_i16_v8i16: 890; AVX1OR2: # %bb.0: 891; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi 892; AVX1OR2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 893; AVX1OR2-NEXT: andl $7, %esi 894; AVX1OR2-NEXT: movw %di, -24(%rsp,%rsi,2) 895; AVX1OR2-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 896; AVX1OR2-NEXT: retq 897; 898; AVX512F-LABEL: arg_i16_v8i16: 899; AVX512F: # %bb.0: 900; AVX512F-NEXT: # kill: def $esi killed $esi def $rsi 901; AVX512F-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 902; AVX512F-NEXT: andl $7, %esi 903; AVX512F-NEXT: movw %di, -24(%rsp,%rsi,2) 904; AVX512F-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 905; AVX512F-NEXT: retq 906; 907; AVX512BW-LABEL: arg_i16_v8i16: 908; AVX512BW: # %bb.0: 909; AVX512BW-NEXT: vpbroadcastw %esi, %xmm1 910; AVX512BW-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1 911; AVX512BW-NEXT: vpbroadcastw %edi, %xmm0 {%k1} 912; AVX512BW-NEXT: retq 913; 914; X86AVX2-LABEL: arg_i16_v8i16: 915; X86AVX2: # %bb.0: 916; X86AVX2-NEXT: pushl %ebp 917; X86AVX2-NEXT: movl %esp, %ebp 918; X86AVX2-NEXT: andl $-16, %esp 919; X86AVX2-NEXT: subl $32, %esp 920; X86AVX2-NEXT: movl 12(%ebp), %eax 921; X86AVX2-NEXT: andl $7, %eax 922; X86AVX2-NEXT: movzwl 8(%ebp), %ecx 923; X86AVX2-NEXT: vmovaps %xmm0, (%esp) 924; X86AVX2-NEXT: movw %cx, (%esp,%eax,2) 925; X86AVX2-NEXT: vmovaps (%esp), %xmm0 926; X86AVX2-NEXT: movl %ebp, %esp 927; X86AVX2-NEXT: popl %ebp 928; X86AVX2-NEXT: retl 929 %ins = insertelement <8 x i16> %v, i16 %x, i32 %y 930 ret <8 x i16> %ins 931} 932 933define <4 x i32> @arg_i32_v4i32(<4 x i32> %v, i32 %x, i32 %y) nounwind { 934; SSE-LABEL: arg_i32_v4i32: 935; SSE: # %bb.0: 936; SSE-NEXT: # kill: def $esi killed $esi def $rsi 937; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 938; SSE-NEXT: andl $3, %esi 939; SSE-NEXT: movl %edi, -24(%rsp,%rsi,4) 940; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 941; SSE-NEXT: retq 942; 943; AVX1OR2-LABEL: arg_i32_v4i32: 944; AVX1OR2: # %bb.0: 945; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi 946; AVX1OR2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 947; AVX1OR2-NEXT: andl $3, %esi 948; AVX1OR2-NEXT: movl %edi, -24(%rsp,%rsi,4) 949; AVX1OR2-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 950; AVX1OR2-NEXT: retq 951; 952; AVX512-LABEL: arg_i32_v4i32: 953; AVX512: # %bb.0: 954; AVX512-NEXT: vpbroadcastd %esi, %xmm1 955; AVX512-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1 956; AVX512-NEXT: vpbroadcastd %edi, %xmm0 {%k1} 957; AVX512-NEXT: retq 958; 959; X86AVX2-LABEL: arg_i32_v4i32: 960; X86AVX2: # %bb.0: 961; X86AVX2-NEXT: pushl %ebp 962; X86AVX2-NEXT: movl %esp, %ebp 963; X86AVX2-NEXT: andl $-16, %esp 964; X86AVX2-NEXT: subl $32, %esp 965; X86AVX2-NEXT: movl 12(%ebp), %eax 966; X86AVX2-NEXT: andl $3, %eax 967; X86AVX2-NEXT: movl 8(%ebp), %ecx 968; X86AVX2-NEXT: vmovaps %xmm0, (%esp) 969; X86AVX2-NEXT: movl %ecx, (%esp,%eax,4) 970; X86AVX2-NEXT: vmovaps (%esp), %xmm0 971; X86AVX2-NEXT: movl %ebp, %esp 972; X86AVX2-NEXT: popl %ebp 973; X86AVX2-NEXT: retl 974 %ins = insertelement <4 x i32> %v, i32 %x, i32 %y 975 ret <4 x i32> %ins 976} 977 978define <2 x i64> @arg_i64_v2i64(<2 x i64> %v, i64 %x, i32 %y) nounwind { 979; SSE-LABEL: arg_i64_v2i64: 980; SSE: # %bb.0: 981; SSE-NEXT: # kill: def $esi killed $esi def $rsi 982; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 983; SSE-NEXT: andl $1, %esi 984; SSE-NEXT: movq %rdi, -24(%rsp,%rsi,8) 985; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 986; SSE-NEXT: retq 987; 988; AVX1OR2-LABEL: arg_i64_v2i64: 989; AVX1OR2: # %bb.0: 990; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi 991; AVX1OR2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 992; AVX1OR2-NEXT: andl $1, %esi 993; AVX1OR2-NEXT: movq %rdi, -24(%rsp,%rsi,8) 994; AVX1OR2-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 995; AVX1OR2-NEXT: retq 996; 997; AVX512-LABEL: arg_i64_v2i64: 998; AVX512: # %bb.0: 999; AVX512-NEXT: movl %esi, %eax 1000; AVX512-NEXT: vpbroadcastq %rax, %xmm1 1001; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1 1002; AVX512-NEXT: vpbroadcastq %rdi, %xmm0 {%k1} 1003; AVX512-NEXT: retq 1004; 1005; X86AVX2-LABEL: arg_i64_v2i64: 1006; X86AVX2: # %bb.0: 1007; X86AVX2-NEXT: pushl %ebp 1008; X86AVX2-NEXT: movl %esp, %ebp 1009; X86AVX2-NEXT: pushl %esi 1010; X86AVX2-NEXT: andl $-16, %esp 1011; X86AVX2-NEXT: subl $48, %esp 1012; X86AVX2-NEXT: movl 8(%ebp), %edx 1013; X86AVX2-NEXT: movl 12(%ebp), %eax 1014; X86AVX2-NEXT: movl 16(%ebp), %ecx 1015; X86AVX2-NEXT: vmovaps %xmm0, (%esp) 1016; X86AVX2-NEXT: addl %ecx, %ecx 1017; X86AVX2-NEXT: movl %ecx, %esi 1018; X86AVX2-NEXT: andl $3, %esi 1019; X86AVX2-NEXT: movl %edx, (%esp,%esi,4) 1020; X86AVX2-NEXT: vmovaps (%esp), %xmm0 1021; X86AVX2-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp) 1022; X86AVX2-NEXT: incl %ecx 1023; X86AVX2-NEXT: andl $3, %ecx 1024; X86AVX2-NEXT: movl %eax, 16(%esp,%ecx,4) 1025; X86AVX2-NEXT: vmovaps {{[0-9]+}}(%esp), %xmm0 1026; X86AVX2-NEXT: leal -4(%ebp), %esp 1027; X86AVX2-NEXT: popl %esi 1028; X86AVX2-NEXT: popl %ebp 1029; X86AVX2-NEXT: retl 1030 %ins = insertelement <2 x i64> %v, i64 %x, i32 %y 1031 ret <2 x i64> %ins 1032} 1033 1034define <4 x float> @arg_f32_v4f32(<4 x float> %v, float %x, i32 %y) nounwind { 1035; SSE2-LABEL: arg_f32_v4f32: 1036; SSE2: # %bb.0: 1037; SSE2-NEXT: # kill: def $edi killed $edi def $rdi 1038; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 1039; SSE2-NEXT: andl $3, %edi 1040; SSE2-NEXT: movss %xmm1, -24(%rsp,%rdi,4) 1041; SSE2-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 1042; SSE2-NEXT: retq 1043; 1044; SSE41-LABEL: arg_f32_v4f32: 1045; SSE41: # %bb.0: 1046; SSE41-NEXT: movaps %xmm0, %xmm2 1047; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0,0,0] 1048; SSE41-NEXT: movd %edi, %xmm0 1049; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 1050; SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1051; SSE41-NEXT: blendvps %xmm0, %xmm1, %xmm2 1052; SSE41-NEXT: movaps %xmm2, %xmm0 1053; SSE41-NEXT: retq 1054; 1055; AVX1-LABEL: arg_f32_v4f32: 1056; AVX1: # %bb.0: 1057; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,0,0,0] 1058; AVX1-NEXT: vmovd %edi, %xmm2 1059; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] 1060; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 1061; AVX1-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 1062; AVX1-NEXT: retq 1063; 1064; AVX2-LABEL: arg_f32_v4f32: 1065; AVX2: # %bb.0: 1066; AVX2-NEXT: vbroadcastss %xmm1, %xmm1 1067; AVX2-NEXT: vmovd %edi, %xmm2 1068; AVX2-NEXT: vpbroadcastd %xmm2, %xmm2 1069; AVX2-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 1070; AVX2-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 1071; AVX2-NEXT: retq 1072; 1073; AVX512-LABEL: arg_f32_v4f32: 1074; AVX512: # %bb.0: 1075; AVX512-NEXT: vpbroadcastd %edi, %xmm2 1076; AVX512-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %k1 1077; AVX512-NEXT: vbroadcastss %xmm1, %xmm0 {%k1} 1078; AVX512-NEXT: retq 1079; 1080; X86AVX2-LABEL: arg_f32_v4f32: 1081; X86AVX2: # %bb.0: 1082; X86AVX2-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %xmm1 1083; X86AVX2-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 1084; X86AVX2-NEXT: vbroadcastss {{[0-9]+}}(%esp), %xmm2 1085; X86AVX2-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm0 1086; X86AVX2-NEXT: retl 1087 %ins = insertelement <4 x float> %v, float %x, i32 %y 1088 ret <4 x float> %ins 1089} 1090 1091define <2 x double> @arg_f64_v2f64(<2 x double> %v, double %x, i32 %y) nounwind { 1092; SSE2-LABEL: arg_f64_v2f64: 1093; SSE2: # %bb.0: 1094; SSE2-NEXT: # kill: def $edi killed $edi def $rdi 1095; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 1096; SSE2-NEXT: andl $1, %edi 1097; SSE2-NEXT: movsd %xmm1, -24(%rsp,%rdi,8) 1098; SSE2-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 1099; SSE2-NEXT: retq 1100; 1101; SSE41-LABEL: arg_f64_v2f64: 1102; SSE41: # %bb.0: 1103; SSE41-NEXT: movapd %xmm0, %xmm2 1104; SSE41-NEXT: movddup {{.*#+}} xmm1 = xmm1[0,0] 1105; SSE41-NEXT: movd %edi, %xmm0 1106; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 1107; SSE41-NEXT: pcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1108; SSE41-NEXT: blendvpd %xmm0, %xmm1, %xmm2 1109; SSE41-NEXT: movapd %xmm2, %xmm0 1110; SSE41-NEXT: retq 1111; 1112; AVX1-LABEL: arg_f64_v2f64: 1113; AVX1: # %bb.0: 1114; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0] 1115; AVX1-NEXT: vmovd %edi, %xmm2 1116; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] 1117; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 1118; AVX1-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 1119; AVX1-NEXT: retq 1120; 1121; AVX2-LABEL: arg_f64_v2f64: 1122; AVX2: # %bb.0: 1123; AVX2-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0] 1124; AVX2-NEXT: movl %edi, %eax 1125; AVX2-NEXT: vmovq %rax, %xmm2 1126; AVX2-NEXT: vpbroadcastq %xmm2, %xmm2 1127; AVX2-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 1128; AVX2-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 1129; AVX2-NEXT: retq 1130; 1131; AVX512-LABEL: arg_f64_v2f64: 1132; AVX512: # %bb.0: 1133; AVX512-NEXT: movl %edi, %eax 1134; AVX512-NEXT: vpbroadcastq %rax, %xmm2 1135; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %k1 1136; AVX512-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = xmm1[0,0] 1137; AVX512-NEXT: retq 1138; 1139; X86AVX2-LABEL: arg_f64_v2f64: 1140; X86AVX2: # %bb.0: 1141; X86AVX2-NEXT: pushl %ebp 1142; X86AVX2-NEXT: movl %esp, %ebp 1143; X86AVX2-NEXT: andl $-16, %esp 1144; X86AVX2-NEXT: subl $32, %esp 1145; X86AVX2-NEXT: movl 16(%ebp), %eax 1146; X86AVX2-NEXT: andl $1, %eax 1147; X86AVX2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 1148; X86AVX2-NEXT: vmovaps %xmm0, (%esp) 1149; X86AVX2-NEXT: vmovsd %xmm1, (%esp,%eax,8) 1150; X86AVX2-NEXT: vmovaps (%esp), %xmm0 1151; X86AVX2-NEXT: movl %ebp, %esp 1152; X86AVX2-NEXT: popl %ebp 1153; X86AVX2-NEXT: retl 1154 %ins = insertelement <2 x double> %v, double %x, i32 %y 1155 ret <2 x double> %ins 1156} 1157 1158define <16 x i8> @load_i8_v16i8(<16 x i8> %v, ptr %p, i32 %y) nounwind { 1159; SSE-LABEL: load_i8_v16i8: 1160; SSE: # %bb.0: 1161; SSE-NEXT: # kill: def $esi killed $esi def $rsi 1162; SSE-NEXT: movzbl (%rdi), %eax 1163; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 1164; SSE-NEXT: andl $15, %esi 1165; SSE-NEXT: movb %al, -24(%rsp,%rsi) 1166; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 1167; SSE-NEXT: retq 1168; 1169; AVX1OR2-LABEL: load_i8_v16i8: 1170; AVX1OR2: # %bb.0: 1171; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi 1172; AVX1OR2-NEXT: movzbl (%rdi), %eax 1173; AVX1OR2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 1174; AVX1OR2-NEXT: andl $15, %esi 1175; AVX1OR2-NEXT: movb %al, -24(%rsp,%rsi) 1176; AVX1OR2-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 1177; AVX1OR2-NEXT: retq 1178; 1179; AVX512F-LABEL: load_i8_v16i8: 1180; AVX512F: # %bb.0: 1181; AVX512F-NEXT: # kill: def $esi killed $esi def $rsi 1182; AVX512F-NEXT: movzbl (%rdi), %eax 1183; AVX512F-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 1184; AVX512F-NEXT: andl $15, %esi 1185; AVX512F-NEXT: movb %al, -24(%rsp,%rsi) 1186; AVX512F-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 1187; AVX512F-NEXT: retq 1188; 1189; AVX512BW-LABEL: load_i8_v16i8: 1190; AVX512BW: # %bb.0: 1191; AVX512BW-NEXT: vpbroadcastb %esi, %xmm1 1192; AVX512BW-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1 1193; AVX512BW-NEXT: vpbroadcastb (%rdi), %xmm0 {%k1} 1194; AVX512BW-NEXT: retq 1195; 1196; X86AVX2-LABEL: load_i8_v16i8: 1197; X86AVX2: # %bb.0: 1198; X86AVX2-NEXT: pushl %ebp 1199; X86AVX2-NEXT: movl %esp, %ebp 1200; X86AVX2-NEXT: andl $-16, %esp 1201; X86AVX2-NEXT: subl $32, %esp 1202; X86AVX2-NEXT: movl 12(%ebp), %eax 1203; X86AVX2-NEXT: andl $15, %eax 1204; X86AVX2-NEXT: movl 8(%ebp), %ecx 1205; X86AVX2-NEXT: movzbl (%ecx), %ecx 1206; X86AVX2-NEXT: vmovaps %xmm0, (%esp) 1207; X86AVX2-NEXT: movb %cl, (%esp,%eax) 1208; X86AVX2-NEXT: vmovaps (%esp), %xmm0 1209; X86AVX2-NEXT: movl %ebp, %esp 1210; X86AVX2-NEXT: popl %ebp 1211; X86AVX2-NEXT: retl 1212 %x = load i8, ptr %p 1213 %ins = insertelement <16 x i8> %v, i8 %x, i32 %y 1214 ret <16 x i8> %ins 1215} 1216 1217define <8 x i16> @load_i16_v8i16(<8 x i16> %v, ptr %p, i32 %y) nounwind { 1218; SSE-LABEL: load_i16_v8i16: 1219; SSE: # %bb.0: 1220; SSE-NEXT: # kill: def $esi killed $esi def $rsi 1221; SSE-NEXT: movzwl (%rdi), %eax 1222; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 1223; SSE-NEXT: andl $7, %esi 1224; SSE-NEXT: movw %ax, -24(%rsp,%rsi,2) 1225; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 1226; SSE-NEXT: retq 1227; 1228; AVX1OR2-LABEL: load_i16_v8i16: 1229; AVX1OR2: # %bb.0: 1230; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi 1231; AVX1OR2-NEXT: movzwl (%rdi), %eax 1232; AVX1OR2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 1233; AVX1OR2-NEXT: andl $7, %esi 1234; AVX1OR2-NEXT: movw %ax, -24(%rsp,%rsi,2) 1235; AVX1OR2-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 1236; AVX1OR2-NEXT: retq 1237; 1238; AVX512F-LABEL: load_i16_v8i16: 1239; AVX512F: # %bb.0: 1240; AVX512F-NEXT: # kill: def $esi killed $esi def $rsi 1241; AVX512F-NEXT: movzwl (%rdi), %eax 1242; AVX512F-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 1243; AVX512F-NEXT: andl $7, %esi 1244; AVX512F-NEXT: movw %ax, -24(%rsp,%rsi,2) 1245; AVX512F-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 1246; AVX512F-NEXT: retq 1247; 1248; AVX512BW-LABEL: load_i16_v8i16: 1249; AVX512BW: # %bb.0: 1250; AVX512BW-NEXT: vpbroadcastw %esi, %xmm1 1251; AVX512BW-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1 1252; AVX512BW-NEXT: vpbroadcastw (%rdi), %xmm0 {%k1} 1253; AVX512BW-NEXT: retq 1254; 1255; X86AVX2-LABEL: load_i16_v8i16: 1256; X86AVX2: # %bb.0: 1257; X86AVX2-NEXT: pushl %ebp 1258; X86AVX2-NEXT: movl %esp, %ebp 1259; X86AVX2-NEXT: andl $-16, %esp 1260; X86AVX2-NEXT: subl $32, %esp 1261; X86AVX2-NEXT: movl 12(%ebp), %eax 1262; X86AVX2-NEXT: andl $7, %eax 1263; X86AVX2-NEXT: movl 8(%ebp), %ecx 1264; X86AVX2-NEXT: movzwl (%ecx), %ecx 1265; X86AVX2-NEXT: vmovaps %xmm0, (%esp) 1266; X86AVX2-NEXT: movw %cx, (%esp,%eax,2) 1267; X86AVX2-NEXT: vmovaps (%esp), %xmm0 1268; X86AVX2-NEXT: movl %ebp, %esp 1269; X86AVX2-NEXT: popl %ebp 1270; X86AVX2-NEXT: retl 1271 %x = load i16, ptr %p 1272 %ins = insertelement <8 x i16> %v, i16 %x, i32 %y 1273 ret <8 x i16> %ins 1274} 1275 1276define <4 x i32> @load_i32_v4i32(<4 x i32> %v, ptr %p, i32 %y) nounwind { 1277; SSE-LABEL: load_i32_v4i32: 1278; SSE: # %bb.0: 1279; SSE-NEXT: # kill: def $esi killed $esi def $rsi 1280; SSE-NEXT: movl (%rdi), %eax 1281; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 1282; SSE-NEXT: andl $3, %esi 1283; SSE-NEXT: movl %eax, -24(%rsp,%rsi,4) 1284; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 1285; SSE-NEXT: retq 1286; 1287; AVX1OR2-LABEL: load_i32_v4i32: 1288; AVX1OR2: # %bb.0: 1289; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi 1290; AVX1OR2-NEXT: movl (%rdi), %eax 1291; AVX1OR2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 1292; AVX1OR2-NEXT: andl $3, %esi 1293; AVX1OR2-NEXT: movl %eax, -24(%rsp,%rsi,4) 1294; AVX1OR2-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 1295; AVX1OR2-NEXT: retq 1296; 1297; AVX512-LABEL: load_i32_v4i32: 1298; AVX512: # %bb.0: 1299; AVX512-NEXT: vpbroadcastd %esi, %xmm1 1300; AVX512-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1 1301; AVX512-NEXT: vpbroadcastd (%rdi), %xmm0 {%k1} 1302; AVX512-NEXT: retq 1303; 1304; X86AVX2-LABEL: load_i32_v4i32: 1305; X86AVX2: # %bb.0: 1306; X86AVX2-NEXT: pushl %ebp 1307; X86AVX2-NEXT: movl %esp, %ebp 1308; X86AVX2-NEXT: andl $-16, %esp 1309; X86AVX2-NEXT: subl $32, %esp 1310; X86AVX2-NEXT: movl 12(%ebp), %eax 1311; X86AVX2-NEXT: andl $3, %eax 1312; X86AVX2-NEXT: movl 8(%ebp), %ecx 1313; X86AVX2-NEXT: movl (%ecx), %ecx 1314; X86AVX2-NEXT: vmovaps %xmm0, (%esp) 1315; X86AVX2-NEXT: movl %ecx, (%esp,%eax,4) 1316; X86AVX2-NEXT: vmovaps (%esp), %xmm0 1317; X86AVX2-NEXT: movl %ebp, %esp 1318; X86AVX2-NEXT: popl %ebp 1319; X86AVX2-NEXT: retl 1320 %x = load i32, ptr %p 1321 %ins = insertelement <4 x i32> %v, i32 %x, i32 %y 1322 ret <4 x i32> %ins 1323} 1324 1325define <2 x i64> @load_i64_v2i64(<2 x i64> %v, ptr %p, i32 %y) nounwind { 1326; SSE-LABEL: load_i64_v2i64: 1327; SSE: # %bb.0: 1328; SSE-NEXT: # kill: def $esi killed $esi def $rsi 1329; SSE-NEXT: movq (%rdi), %rax 1330; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 1331; SSE-NEXT: andl $1, %esi 1332; SSE-NEXT: movq %rax, -24(%rsp,%rsi,8) 1333; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 1334; SSE-NEXT: retq 1335; 1336; AVX1OR2-LABEL: load_i64_v2i64: 1337; AVX1OR2: # %bb.0: 1338; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi 1339; AVX1OR2-NEXT: movq (%rdi), %rax 1340; AVX1OR2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 1341; AVX1OR2-NEXT: andl $1, %esi 1342; AVX1OR2-NEXT: movq %rax, -24(%rsp,%rsi,8) 1343; AVX1OR2-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 1344; AVX1OR2-NEXT: retq 1345; 1346; AVX512-LABEL: load_i64_v2i64: 1347; AVX512: # %bb.0: 1348; AVX512-NEXT: movl %esi, %eax 1349; AVX512-NEXT: vpbroadcastq %rax, %xmm1 1350; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1 1351; AVX512-NEXT: vpbroadcastq (%rdi), %xmm0 {%k1} 1352; AVX512-NEXT: retq 1353; 1354; X86AVX2-LABEL: load_i64_v2i64: 1355; X86AVX2: # %bb.0: 1356; X86AVX2-NEXT: pushl %ebp 1357; X86AVX2-NEXT: movl %esp, %ebp 1358; X86AVX2-NEXT: pushl %esi 1359; X86AVX2-NEXT: andl $-16, %esp 1360; X86AVX2-NEXT: subl $48, %esp 1361; X86AVX2-NEXT: movl 12(%ebp), %eax 1362; X86AVX2-NEXT: movl 8(%ebp), %ecx 1363; X86AVX2-NEXT: movl (%ecx), %edx 1364; X86AVX2-NEXT: movl 4(%ecx), %ecx 1365; X86AVX2-NEXT: vmovaps %xmm0, (%esp) 1366; X86AVX2-NEXT: addl %eax, %eax 1367; X86AVX2-NEXT: movl %eax, %esi 1368; X86AVX2-NEXT: andl $3, %esi 1369; X86AVX2-NEXT: movl %edx, (%esp,%esi,4) 1370; X86AVX2-NEXT: vmovaps (%esp), %xmm0 1371; X86AVX2-NEXT: vmovaps %xmm0, {{[0-9]+}}(%esp) 1372; X86AVX2-NEXT: incl %eax 1373; X86AVX2-NEXT: andl $3, %eax 1374; X86AVX2-NEXT: movl %ecx, 16(%esp,%eax,4) 1375; X86AVX2-NEXT: vmovaps {{[0-9]+}}(%esp), %xmm0 1376; X86AVX2-NEXT: leal -4(%ebp), %esp 1377; X86AVX2-NEXT: popl %esi 1378; X86AVX2-NEXT: popl %ebp 1379; X86AVX2-NEXT: retl 1380 %x = load i64, ptr %p 1381 %ins = insertelement <2 x i64> %v, i64 %x, i32 %y 1382 ret <2 x i64> %ins 1383} 1384 1385define <4 x float> @load_f32_v4f32(<4 x float> %v, ptr %p, i32 %y) nounwind { 1386; SSE2-LABEL: load_f32_v4f32: 1387; SSE2: # %bb.0: 1388; SSE2-NEXT: # kill: def $esi killed $esi def $rsi 1389; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 1390; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 1391; SSE2-NEXT: andl $3, %esi 1392; SSE2-NEXT: movss %xmm1, -24(%rsp,%rsi,4) 1393; SSE2-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 1394; SSE2-NEXT: retq 1395; 1396; SSE41-LABEL: load_f32_v4f32: 1397; SSE41: # %bb.0: 1398; SSE41-NEXT: movaps %xmm0, %xmm1 1399; SSE41-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 1400; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,0,0,0] 1401; SSE41-NEXT: movd %esi, %xmm0 1402; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 1403; SSE41-NEXT: pcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1404; SSE41-NEXT: blendvps %xmm0, %xmm2, %xmm1 1405; SSE41-NEXT: movaps %xmm1, %xmm0 1406; SSE41-NEXT: retq 1407; 1408; AVX1-LABEL: load_f32_v4f32: 1409; AVX1: # %bb.0: 1410; AVX1-NEXT: vbroadcastss (%rdi), %xmm1 1411; AVX1-NEXT: vmovd %esi, %xmm2 1412; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] 1413; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 1414; AVX1-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 1415; AVX1-NEXT: retq 1416; 1417; AVX2-LABEL: load_f32_v4f32: 1418; AVX2: # %bb.0: 1419; AVX2-NEXT: vbroadcastss (%rdi), %xmm1 1420; AVX2-NEXT: vmovd %esi, %xmm2 1421; AVX2-NEXT: vpbroadcastd %xmm2, %xmm2 1422; AVX2-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 1423; AVX2-NEXT: vblendvps %xmm2, %xmm1, %xmm0, %xmm0 1424; AVX2-NEXT: retq 1425; 1426; AVX512-LABEL: load_f32_v4f32: 1427; AVX512: # %bb.0: 1428; AVX512-NEXT: vpbroadcastd %esi, %xmm1 1429; AVX512-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1 1430; AVX512-NEXT: vbroadcastss (%rdi), %xmm0 {%k1} 1431; AVX512-NEXT: retq 1432; 1433; X86AVX2-LABEL: load_f32_v4f32: 1434; X86AVX2: # %bb.0: 1435; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax 1436; X86AVX2-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %xmm1 1437; X86AVX2-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm1 1438; X86AVX2-NEXT: vbroadcastss (%eax), %xmm2 1439; X86AVX2-NEXT: vblendvps %xmm1, %xmm2, %xmm0, %xmm0 1440; X86AVX2-NEXT: retl 1441 %x = load float, ptr %p 1442 %ins = insertelement <4 x float> %v, float %x, i32 %y 1443 ret <4 x float> %ins 1444} 1445 1446define <2 x double> @load_f64_v2f64(<2 x double> %v, ptr %p, i32 %y) nounwind { 1447; SSE2-LABEL: load_f64_v2f64: 1448; SSE2: # %bb.0: 1449; SSE2-NEXT: # kill: def $esi killed $esi def $rsi 1450; SSE2-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 1451; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 1452; SSE2-NEXT: andl $1, %esi 1453; SSE2-NEXT: movsd %xmm1, -24(%rsp,%rsi,8) 1454; SSE2-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 1455; SSE2-NEXT: retq 1456; 1457; SSE41-LABEL: load_f64_v2f64: 1458; SSE41: # %bb.0: 1459; SSE41-NEXT: movapd %xmm0, %xmm1 1460; SSE41-NEXT: movddup {{.*#+}} xmm2 = mem[0,0] 1461; SSE41-NEXT: movd %esi, %xmm0 1462; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 1463; SSE41-NEXT: pcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 1464; SSE41-NEXT: blendvpd %xmm0, %xmm2, %xmm1 1465; SSE41-NEXT: movapd %xmm1, %xmm0 1466; SSE41-NEXT: retq 1467; 1468; AVX1-LABEL: load_f64_v2f64: 1469; AVX1: # %bb.0: 1470; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] 1471; AVX1-NEXT: vmovd %esi, %xmm2 1472; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] 1473; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 1474; AVX1-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 1475; AVX1-NEXT: retq 1476; 1477; AVX2-LABEL: load_f64_v2f64: 1478; AVX2: # %bb.0: 1479; AVX2-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] 1480; AVX2-NEXT: movl %esi, %eax 1481; AVX2-NEXT: vmovq %rax, %xmm2 1482; AVX2-NEXT: vpbroadcastq %xmm2, %xmm2 1483; AVX2-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2 1484; AVX2-NEXT: vblendvpd %xmm2, %xmm1, %xmm0, %xmm0 1485; AVX2-NEXT: retq 1486; 1487; AVX512-LABEL: load_f64_v2f64: 1488; AVX512: # %bb.0: 1489; AVX512-NEXT: movl %esi, %eax 1490; AVX512-NEXT: vpbroadcastq %rax, %xmm1 1491; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %k1 1492; AVX512-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = mem[0,0] 1493; AVX512-NEXT: retq 1494; 1495; X86AVX2-LABEL: load_f64_v2f64: 1496; X86AVX2: # %bb.0: 1497; X86AVX2-NEXT: pushl %ebp 1498; X86AVX2-NEXT: movl %esp, %ebp 1499; X86AVX2-NEXT: andl $-16, %esp 1500; X86AVX2-NEXT: subl $32, %esp 1501; X86AVX2-NEXT: movl 12(%ebp), %eax 1502; X86AVX2-NEXT: andl $1, %eax 1503; X86AVX2-NEXT: movl 8(%ebp), %ecx 1504; X86AVX2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 1505; X86AVX2-NEXT: vmovaps %xmm0, (%esp) 1506; X86AVX2-NEXT: vmovsd %xmm1, (%esp,%eax,8) 1507; X86AVX2-NEXT: vmovaps (%esp), %xmm0 1508; X86AVX2-NEXT: movl %ebp, %esp 1509; X86AVX2-NEXT: popl %ebp 1510; X86AVX2-NEXT: retl 1511 %x = load double, ptr %p 1512 %ins = insertelement <2 x double> %v, double %x, i32 %y 1513 ret <2 x double> %ins 1514} 1515 1516define <32 x i8> @arg_i8_v32i8(<32 x i8> %v, i8 %x, i32 %y) nounwind { 1517; SSE-LABEL: arg_i8_v32i8: 1518; SSE: # %bb.0: 1519; SSE-NEXT: # kill: def $esi killed $esi def $rsi 1520; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) 1521; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 1522; SSE-NEXT: andl $31, %esi 1523; SSE-NEXT: movb %dil, -40(%rsp,%rsi) 1524; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 1525; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 1526; SSE-NEXT: retq 1527; 1528; AVX1OR2-LABEL: arg_i8_v32i8: 1529; AVX1OR2: # %bb.0: 1530; AVX1OR2-NEXT: pushq %rbp 1531; AVX1OR2-NEXT: movq %rsp, %rbp 1532; AVX1OR2-NEXT: andq $-32, %rsp 1533; AVX1OR2-NEXT: subq $64, %rsp 1534; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi 1535; AVX1OR2-NEXT: vmovaps %ymm0, (%rsp) 1536; AVX1OR2-NEXT: andl $31, %esi 1537; AVX1OR2-NEXT: movb %dil, (%rsp,%rsi) 1538; AVX1OR2-NEXT: vmovaps (%rsp), %ymm0 1539; AVX1OR2-NEXT: movq %rbp, %rsp 1540; AVX1OR2-NEXT: popq %rbp 1541; AVX1OR2-NEXT: retq 1542; 1543; AVX512F-LABEL: arg_i8_v32i8: 1544; AVX512F: # %bb.0: 1545; AVX512F-NEXT: pushq %rbp 1546; AVX512F-NEXT: movq %rsp, %rbp 1547; AVX512F-NEXT: andq $-32, %rsp 1548; AVX512F-NEXT: subq $64, %rsp 1549; AVX512F-NEXT: # kill: def $esi killed $esi def $rsi 1550; AVX512F-NEXT: vmovaps %ymm0, (%rsp) 1551; AVX512F-NEXT: andl $31, %esi 1552; AVX512F-NEXT: movb %dil, (%rsp,%rsi) 1553; AVX512F-NEXT: vmovaps (%rsp), %ymm0 1554; AVX512F-NEXT: movq %rbp, %rsp 1555; AVX512F-NEXT: popq %rbp 1556; AVX512F-NEXT: retq 1557; 1558; AVX512BW-LABEL: arg_i8_v32i8: 1559; AVX512BW: # %bb.0: 1560; AVX512BW-NEXT: vpbroadcastb %esi, %ymm1 1561; AVX512BW-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1 1562; AVX512BW-NEXT: vpbroadcastb %edi, %ymm0 {%k1} 1563; AVX512BW-NEXT: retq 1564; 1565; X86AVX2-LABEL: arg_i8_v32i8: 1566; X86AVX2: # %bb.0: 1567; X86AVX2-NEXT: pushl %ebp 1568; X86AVX2-NEXT: movl %esp, %ebp 1569; X86AVX2-NEXT: andl $-32, %esp 1570; X86AVX2-NEXT: subl $64, %esp 1571; X86AVX2-NEXT: movl 12(%ebp), %eax 1572; X86AVX2-NEXT: andl $31, %eax 1573; X86AVX2-NEXT: movzbl 8(%ebp), %ecx 1574; X86AVX2-NEXT: vmovaps %ymm0, (%esp) 1575; X86AVX2-NEXT: movb %cl, (%esp,%eax) 1576; X86AVX2-NEXT: vmovaps (%esp), %ymm0 1577; X86AVX2-NEXT: movl %ebp, %esp 1578; X86AVX2-NEXT: popl %ebp 1579; X86AVX2-NEXT: retl 1580 %ins = insertelement <32 x i8> %v, i8 %x, i32 %y 1581 ret <32 x i8> %ins 1582} 1583 1584define <16 x i16> @arg_i16_v16i16(<16 x i16> %v, i16 %x, i32 %y) nounwind { 1585; SSE-LABEL: arg_i16_v16i16: 1586; SSE: # %bb.0: 1587; SSE-NEXT: # kill: def $esi killed $esi def $rsi 1588; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) 1589; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 1590; SSE-NEXT: andl $15, %esi 1591; SSE-NEXT: movw %di, -40(%rsp,%rsi,2) 1592; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 1593; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 1594; SSE-NEXT: retq 1595; 1596; AVX1OR2-LABEL: arg_i16_v16i16: 1597; AVX1OR2: # %bb.0: 1598; AVX1OR2-NEXT: pushq %rbp 1599; AVX1OR2-NEXT: movq %rsp, %rbp 1600; AVX1OR2-NEXT: andq $-32, %rsp 1601; AVX1OR2-NEXT: subq $64, %rsp 1602; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi 1603; AVX1OR2-NEXT: vmovaps %ymm0, (%rsp) 1604; AVX1OR2-NEXT: andl $15, %esi 1605; AVX1OR2-NEXT: movw %di, (%rsp,%rsi,2) 1606; AVX1OR2-NEXT: vmovaps (%rsp), %ymm0 1607; AVX1OR2-NEXT: movq %rbp, %rsp 1608; AVX1OR2-NEXT: popq %rbp 1609; AVX1OR2-NEXT: retq 1610; 1611; AVX512F-LABEL: arg_i16_v16i16: 1612; AVX512F: # %bb.0: 1613; AVX512F-NEXT: pushq %rbp 1614; AVX512F-NEXT: movq %rsp, %rbp 1615; AVX512F-NEXT: andq $-32, %rsp 1616; AVX512F-NEXT: subq $64, %rsp 1617; AVX512F-NEXT: # kill: def $esi killed $esi def $rsi 1618; AVX512F-NEXT: vmovaps %ymm0, (%rsp) 1619; AVX512F-NEXT: andl $15, %esi 1620; AVX512F-NEXT: movw %di, (%rsp,%rsi,2) 1621; AVX512F-NEXT: vmovaps (%rsp), %ymm0 1622; AVX512F-NEXT: movq %rbp, %rsp 1623; AVX512F-NEXT: popq %rbp 1624; AVX512F-NEXT: retq 1625; 1626; AVX512BW-LABEL: arg_i16_v16i16: 1627; AVX512BW: # %bb.0: 1628; AVX512BW-NEXT: vpbroadcastw %esi, %ymm1 1629; AVX512BW-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1 1630; AVX512BW-NEXT: vpbroadcastw %edi, %ymm0 {%k1} 1631; AVX512BW-NEXT: retq 1632; 1633; X86AVX2-LABEL: arg_i16_v16i16: 1634; X86AVX2: # %bb.0: 1635; X86AVX2-NEXT: pushl %ebp 1636; X86AVX2-NEXT: movl %esp, %ebp 1637; X86AVX2-NEXT: andl $-32, %esp 1638; X86AVX2-NEXT: subl $64, %esp 1639; X86AVX2-NEXT: movl 12(%ebp), %eax 1640; X86AVX2-NEXT: andl $15, %eax 1641; X86AVX2-NEXT: movzwl 8(%ebp), %ecx 1642; X86AVX2-NEXT: vmovaps %ymm0, (%esp) 1643; X86AVX2-NEXT: movw %cx, (%esp,%eax,2) 1644; X86AVX2-NEXT: vmovaps (%esp), %ymm0 1645; X86AVX2-NEXT: movl %ebp, %esp 1646; X86AVX2-NEXT: popl %ebp 1647; X86AVX2-NEXT: retl 1648 %ins = insertelement <16 x i16> %v, i16 %x, i32 %y 1649 ret <16 x i16> %ins 1650} 1651 1652define <8 x i32> @arg_i32_v8i32(<8 x i32> %v, i32 %x, i32 %y) nounwind { 1653; SSE-LABEL: arg_i32_v8i32: 1654; SSE: # %bb.0: 1655; SSE-NEXT: # kill: def $esi killed $esi def $rsi 1656; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) 1657; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 1658; SSE-NEXT: andl $7, %esi 1659; SSE-NEXT: movl %edi, -40(%rsp,%rsi,4) 1660; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 1661; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 1662; SSE-NEXT: retq 1663; 1664; AVX1OR2-LABEL: arg_i32_v8i32: 1665; AVX1OR2: # %bb.0: 1666; AVX1OR2-NEXT: pushq %rbp 1667; AVX1OR2-NEXT: movq %rsp, %rbp 1668; AVX1OR2-NEXT: andq $-32, %rsp 1669; AVX1OR2-NEXT: subq $64, %rsp 1670; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi 1671; AVX1OR2-NEXT: vmovaps %ymm0, (%rsp) 1672; AVX1OR2-NEXT: andl $7, %esi 1673; AVX1OR2-NEXT: movl %edi, (%rsp,%rsi,4) 1674; AVX1OR2-NEXT: vmovaps (%rsp), %ymm0 1675; AVX1OR2-NEXT: movq %rbp, %rsp 1676; AVX1OR2-NEXT: popq %rbp 1677; AVX1OR2-NEXT: retq 1678; 1679; AVX512-LABEL: arg_i32_v8i32: 1680; AVX512: # %bb.0: 1681; AVX512-NEXT: vpbroadcastd %esi, %ymm1 1682; AVX512-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1 1683; AVX512-NEXT: vpbroadcastd %edi, %ymm0 {%k1} 1684; AVX512-NEXT: retq 1685; 1686; X86AVX2-LABEL: arg_i32_v8i32: 1687; X86AVX2: # %bb.0: 1688; X86AVX2-NEXT: pushl %ebp 1689; X86AVX2-NEXT: movl %esp, %ebp 1690; X86AVX2-NEXT: andl $-32, %esp 1691; X86AVX2-NEXT: subl $64, %esp 1692; X86AVX2-NEXT: movl 12(%ebp), %eax 1693; X86AVX2-NEXT: andl $7, %eax 1694; X86AVX2-NEXT: movl 8(%ebp), %ecx 1695; X86AVX2-NEXT: vmovaps %ymm0, (%esp) 1696; X86AVX2-NEXT: movl %ecx, (%esp,%eax,4) 1697; X86AVX2-NEXT: vmovaps (%esp), %ymm0 1698; X86AVX2-NEXT: movl %ebp, %esp 1699; X86AVX2-NEXT: popl %ebp 1700; X86AVX2-NEXT: retl 1701 %ins = insertelement <8 x i32> %v, i32 %x, i32 %y 1702 ret <8 x i32> %ins 1703} 1704 1705define <4 x i64> @arg_i64_v4i64(<4 x i64> %v, i64 %x, i32 %y) nounwind { 1706; SSE-LABEL: arg_i64_v4i64: 1707; SSE: # %bb.0: 1708; SSE-NEXT: # kill: def $esi killed $esi def $rsi 1709; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) 1710; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 1711; SSE-NEXT: andl $3, %esi 1712; SSE-NEXT: movq %rdi, -40(%rsp,%rsi,8) 1713; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 1714; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 1715; SSE-NEXT: retq 1716; 1717; AVX1OR2-LABEL: arg_i64_v4i64: 1718; AVX1OR2: # %bb.0: 1719; AVX1OR2-NEXT: pushq %rbp 1720; AVX1OR2-NEXT: movq %rsp, %rbp 1721; AVX1OR2-NEXT: andq $-32, %rsp 1722; AVX1OR2-NEXT: subq $64, %rsp 1723; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi 1724; AVX1OR2-NEXT: vmovaps %ymm0, (%rsp) 1725; AVX1OR2-NEXT: andl $3, %esi 1726; AVX1OR2-NEXT: movq %rdi, (%rsp,%rsi,8) 1727; AVX1OR2-NEXT: vmovaps (%rsp), %ymm0 1728; AVX1OR2-NEXT: movq %rbp, %rsp 1729; AVX1OR2-NEXT: popq %rbp 1730; AVX1OR2-NEXT: retq 1731; 1732; AVX512-LABEL: arg_i64_v4i64: 1733; AVX512: # %bb.0: 1734; AVX512-NEXT: movl %esi, %eax 1735; AVX512-NEXT: vpbroadcastq %rax, %ymm1 1736; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1 1737; AVX512-NEXT: vpbroadcastq %rdi, %ymm0 {%k1} 1738; AVX512-NEXT: retq 1739; 1740; X86AVX2-LABEL: arg_i64_v4i64: 1741; X86AVX2: # %bb.0: 1742; X86AVX2-NEXT: pushl %ebp 1743; X86AVX2-NEXT: movl %esp, %ebp 1744; X86AVX2-NEXT: pushl %esi 1745; X86AVX2-NEXT: andl $-32, %esp 1746; X86AVX2-NEXT: subl $96, %esp 1747; X86AVX2-NEXT: movl 8(%ebp), %edx 1748; X86AVX2-NEXT: movl 12(%ebp), %eax 1749; X86AVX2-NEXT: movl 16(%ebp), %ecx 1750; X86AVX2-NEXT: vmovaps %ymm0, (%esp) 1751; X86AVX2-NEXT: addl %ecx, %ecx 1752; X86AVX2-NEXT: movl %ecx, %esi 1753; X86AVX2-NEXT: andl $7, %esi 1754; X86AVX2-NEXT: movl %edx, (%esp,%esi,4) 1755; X86AVX2-NEXT: vmovaps (%esp), %ymm0 1756; X86AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp) 1757; X86AVX2-NEXT: incl %ecx 1758; X86AVX2-NEXT: andl $7, %ecx 1759; X86AVX2-NEXT: movl %eax, 32(%esp,%ecx,4) 1760; X86AVX2-NEXT: vmovaps {{[0-9]+}}(%esp), %ymm0 1761; X86AVX2-NEXT: leal -4(%ebp), %esp 1762; X86AVX2-NEXT: popl %esi 1763; X86AVX2-NEXT: popl %ebp 1764; X86AVX2-NEXT: retl 1765 %ins = insertelement <4 x i64> %v, i64 %x, i32 %y 1766 ret <4 x i64> %ins 1767} 1768 1769define <8 x float> @arg_f32_v8f32(<8 x float> %v, float %x, i32 %y) nounwind { 1770; SSE-LABEL: arg_f32_v8f32: 1771; SSE: # %bb.0: 1772; SSE-NEXT: # kill: def $edi killed $edi def $rdi 1773; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) 1774; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 1775; SSE-NEXT: andl $7, %edi 1776; SSE-NEXT: movss %xmm2, -40(%rsp,%rdi,4) 1777; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 1778; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 1779; SSE-NEXT: retq 1780; 1781; AVX1-LABEL: arg_f32_v8f32: 1782; AVX1: # %bb.0: 1783; AVX1-NEXT: vshufps {{.*#+}} xmm1 = xmm1[0,0,0,0] 1784; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 1785; AVX1-NEXT: vmovd %edi, %xmm2 1786; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,0,0,0] 1787; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 1788; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm2, %xmm2 1789; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 1790; AVX1-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 1791; AVX1-NEXT: retq 1792; 1793; AVX2-LABEL: arg_f32_v8f32: 1794; AVX2: # %bb.0: 1795; AVX2-NEXT: vbroadcastss %xmm1, %ymm1 1796; AVX2-NEXT: vmovd %edi, %xmm2 1797; AVX2-NEXT: vpbroadcastd %xmm2, %ymm2 1798; AVX2-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 1799; AVX2-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 1800; AVX2-NEXT: retq 1801; 1802; AVX512-LABEL: arg_f32_v8f32: 1803; AVX512: # %bb.0: 1804; AVX512-NEXT: vpbroadcastd %edi, %ymm2 1805; AVX512-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %k1 1806; AVX512-NEXT: vbroadcastss %xmm1, %ymm0 {%k1} 1807; AVX512-NEXT: retq 1808; 1809; X86AVX2-LABEL: arg_f32_v8f32: 1810; X86AVX2: # %bb.0: 1811; X86AVX2-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %ymm1 1812; X86AVX2-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1 1813; X86AVX2-NEXT: vbroadcastss {{[0-9]+}}(%esp), %ymm2 1814; X86AVX2-NEXT: vblendvps %ymm1, %ymm2, %ymm0, %ymm0 1815; X86AVX2-NEXT: retl 1816 %ins = insertelement <8 x float> %v, float %x, i32 %y 1817 ret <8 x float> %ins 1818} 1819 1820define <4 x double> @arg_f64_v4f64(<4 x double> %v, double %x, i32 %y) nounwind { 1821; SSE-LABEL: arg_f64_v4f64: 1822; SSE: # %bb.0: 1823; SSE-NEXT: # kill: def $edi killed $edi def $rdi 1824; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) 1825; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 1826; SSE-NEXT: andl $3, %edi 1827; SSE-NEXT: movsd %xmm2, -40(%rsp,%rdi,8) 1828; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 1829; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 1830; SSE-NEXT: retq 1831; 1832; AVX1-LABEL: arg_f64_v4f64: 1833; AVX1: # %bb.0: 1834; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0] 1835; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm1, %ymm1 1836; AVX1-NEXT: movl %edi, %eax 1837; AVX1-NEXT: vmovq %rax, %xmm2 1838; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm2[0,1,0,1] 1839; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm3 1840; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm2, %xmm2 1841; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 1842; AVX1-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 1843; AVX1-NEXT: retq 1844; 1845; AVX2-LABEL: arg_f64_v4f64: 1846; AVX2: # %bb.0: 1847; AVX2-NEXT: vbroadcastsd %xmm1, %ymm1 1848; AVX2-NEXT: movl %edi, %eax 1849; AVX2-NEXT: vmovq %rax, %xmm2 1850; AVX2-NEXT: vpbroadcastq %xmm2, %ymm2 1851; AVX2-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 1852; AVX2-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 1853; AVX2-NEXT: retq 1854; 1855; AVX512-LABEL: arg_f64_v4f64: 1856; AVX512: # %bb.0: 1857; AVX512-NEXT: movl %edi, %eax 1858; AVX512-NEXT: vpbroadcastq %rax, %ymm2 1859; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %k1 1860; AVX512-NEXT: vbroadcastsd %xmm1, %ymm0 {%k1} 1861; AVX512-NEXT: retq 1862; 1863; X86AVX2-LABEL: arg_f64_v4f64: 1864; X86AVX2: # %bb.0: 1865; X86AVX2-NEXT: pushl %ebp 1866; X86AVX2-NEXT: movl %esp, %ebp 1867; X86AVX2-NEXT: andl $-32, %esp 1868; X86AVX2-NEXT: subl $64, %esp 1869; X86AVX2-NEXT: movl 16(%ebp), %eax 1870; X86AVX2-NEXT: andl $3, %eax 1871; X86AVX2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 1872; X86AVX2-NEXT: vmovaps %ymm0, (%esp) 1873; X86AVX2-NEXT: vmovsd %xmm1, (%esp,%eax,8) 1874; X86AVX2-NEXT: vmovaps (%esp), %ymm0 1875; X86AVX2-NEXT: movl %ebp, %esp 1876; X86AVX2-NEXT: popl %ebp 1877; X86AVX2-NEXT: retl 1878 %ins = insertelement <4 x double> %v, double %x, i32 %y 1879 ret <4 x double> %ins 1880} 1881 1882define <32 x i8> @load_i8_v32i8(<32 x i8> %v, ptr %p, i32 %y) nounwind { 1883; SSE-LABEL: load_i8_v32i8: 1884; SSE: # %bb.0: 1885; SSE-NEXT: # kill: def $esi killed $esi def $rsi 1886; SSE-NEXT: movzbl (%rdi), %eax 1887; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) 1888; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 1889; SSE-NEXT: andl $31, %esi 1890; SSE-NEXT: movb %al, -40(%rsp,%rsi) 1891; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 1892; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 1893; SSE-NEXT: retq 1894; 1895; AVX1OR2-LABEL: load_i8_v32i8: 1896; AVX1OR2: # %bb.0: 1897; AVX1OR2-NEXT: pushq %rbp 1898; AVX1OR2-NEXT: movq %rsp, %rbp 1899; AVX1OR2-NEXT: andq $-32, %rsp 1900; AVX1OR2-NEXT: subq $64, %rsp 1901; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi 1902; AVX1OR2-NEXT: movzbl (%rdi), %eax 1903; AVX1OR2-NEXT: vmovaps %ymm0, (%rsp) 1904; AVX1OR2-NEXT: andl $31, %esi 1905; AVX1OR2-NEXT: movb %al, (%rsp,%rsi) 1906; AVX1OR2-NEXT: vmovaps (%rsp), %ymm0 1907; AVX1OR2-NEXT: movq %rbp, %rsp 1908; AVX1OR2-NEXT: popq %rbp 1909; AVX1OR2-NEXT: retq 1910; 1911; AVX512F-LABEL: load_i8_v32i8: 1912; AVX512F: # %bb.0: 1913; AVX512F-NEXT: pushq %rbp 1914; AVX512F-NEXT: movq %rsp, %rbp 1915; AVX512F-NEXT: andq $-32, %rsp 1916; AVX512F-NEXT: subq $64, %rsp 1917; AVX512F-NEXT: # kill: def $esi killed $esi def $rsi 1918; AVX512F-NEXT: movzbl (%rdi), %eax 1919; AVX512F-NEXT: vmovaps %ymm0, (%rsp) 1920; AVX512F-NEXT: andl $31, %esi 1921; AVX512F-NEXT: movb %al, (%rsp,%rsi) 1922; AVX512F-NEXT: vmovaps (%rsp), %ymm0 1923; AVX512F-NEXT: movq %rbp, %rsp 1924; AVX512F-NEXT: popq %rbp 1925; AVX512F-NEXT: retq 1926; 1927; AVX512BW-LABEL: load_i8_v32i8: 1928; AVX512BW: # %bb.0: 1929; AVX512BW-NEXT: vpbroadcastb %esi, %ymm1 1930; AVX512BW-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1 1931; AVX512BW-NEXT: vpbroadcastb (%rdi), %ymm0 {%k1} 1932; AVX512BW-NEXT: retq 1933; 1934; X86AVX2-LABEL: load_i8_v32i8: 1935; X86AVX2: # %bb.0: 1936; X86AVX2-NEXT: pushl %ebp 1937; X86AVX2-NEXT: movl %esp, %ebp 1938; X86AVX2-NEXT: andl $-32, %esp 1939; X86AVX2-NEXT: subl $64, %esp 1940; X86AVX2-NEXT: movl 12(%ebp), %eax 1941; X86AVX2-NEXT: andl $31, %eax 1942; X86AVX2-NEXT: movl 8(%ebp), %ecx 1943; X86AVX2-NEXT: movzbl (%ecx), %ecx 1944; X86AVX2-NEXT: vmovaps %ymm0, (%esp) 1945; X86AVX2-NEXT: movb %cl, (%esp,%eax) 1946; X86AVX2-NEXT: vmovaps (%esp), %ymm0 1947; X86AVX2-NEXT: movl %ebp, %esp 1948; X86AVX2-NEXT: popl %ebp 1949; X86AVX2-NEXT: retl 1950 %x = load i8, ptr %p 1951 %ins = insertelement <32 x i8> %v, i8 %x, i32 %y 1952 ret <32 x i8> %ins 1953} 1954 1955define <16 x i16> @load_i16_v16i16(<16 x i16> %v, ptr %p, i32 %y) nounwind { 1956; SSE-LABEL: load_i16_v16i16: 1957; SSE: # %bb.0: 1958; SSE-NEXT: # kill: def $esi killed $esi def $rsi 1959; SSE-NEXT: movzwl (%rdi), %eax 1960; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) 1961; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 1962; SSE-NEXT: andl $15, %esi 1963; SSE-NEXT: movw %ax, -40(%rsp,%rsi,2) 1964; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 1965; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 1966; SSE-NEXT: retq 1967; 1968; AVX1OR2-LABEL: load_i16_v16i16: 1969; AVX1OR2: # %bb.0: 1970; AVX1OR2-NEXT: pushq %rbp 1971; AVX1OR2-NEXT: movq %rsp, %rbp 1972; AVX1OR2-NEXT: andq $-32, %rsp 1973; AVX1OR2-NEXT: subq $64, %rsp 1974; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi 1975; AVX1OR2-NEXT: movzwl (%rdi), %eax 1976; AVX1OR2-NEXT: vmovaps %ymm0, (%rsp) 1977; AVX1OR2-NEXT: andl $15, %esi 1978; AVX1OR2-NEXT: movw %ax, (%rsp,%rsi,2) 1979; AVX1OR2-NEXT: vmovaps (%rsp), %ymm0 1980; AVX1OR2-NEXT: movq %rbp, %rsp 1981; AVX1OR2-NEXT: popq %rbp 1982; AVX1OR2-NEXT: retq 1983; 1984; AVX512F-LABEL: load_i16_v16i16: 1985; AVX512F: # %bb.0: 1986; AVX512F-NEXT: pushq %rbp 1987; AVX512F-NEXT: movq %rsp, %rbp 1988; AVX512F-NEXT: andq $-32, %rsp 1989; AVX512F-NEXT: subq $64, %rsp 1990; AVX512F-NEXT: # kill: def $esi killed $esi def $rsi 1991; AVX512F-NEXT: movzwl (%rdi), %eax 1992; AVX512F-NEXT: vmovaps %ymm0, (%rsp) 1993; AVX512F-NEXT: andl $15, %esi 1994; AVX512F-NEXT: movw %ax, (%rsp,%rsi,2) 1995; AVX512F-NEXT: vmovaps (%rsp), %ymm0 1996; AVX512F-NEXT: movq %rbp, %rsp 1997; AVX512F-NEXT: popq %rbp 1998; AVX512F-NEXT: retq 1999; 2000; AVX512BW-LABEL: load_i16_v16i16: 2001; AVX512BW: # %bb.0: 2002; AVX512BW-NEXT: vpbroadcastw %esi, %ymm1 2003; AVX512BW-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1 2004; AVX512BW-NEXT: vpbroadcastw (%rdi), %ymm0 {%k1} 2005; AVX512BW-NEXT: retq 2006; 2007; X86AVX2-LABEL: load_i16_v16i16: 2008; X86AVX2: # %bb.0: 2009; X86AVX2-NEXT: pushl %ebp 2010; X86AVX2-NEXT: movl %esp, %ebp 2011; X86AVX2-NEXT: andl $-32, %esp 2012; X86AVX2-NEXT: subl $64, %esp 2013; X86AVX2-NEXT: movl 12(%ebp), %eax 2014; X86AVX2-NEXT: andl $15, %eax 2015; X86AVX2-NEXT: movl 8(%ebp), %ecx 2016; X86AVX2-NEXT: movzwl (%ecx), %ecx 2017; X86AVX2-NEXT: vmovaps %ymm0, (%esp) 2018; X86AVX2-NEXT: movw %cx, (%esp,%eax,2) 2019; X86AVX2-NEXT: vmovaps (%esp), %ymm0 2020; X86AVX2-NEXT: movl %ebp, %esp 2021; X86AVX2-NEXT: popl %ebp 2022; X86AVX2-NEXT: retl 2023 %x = load i16, ptr %p 2024 %ins = insertelement <16 x i16> %v, i16 %x, i32 %y 2025 ret <16 x i16> %ins 2026} 2027 2028define <8 x i32> @load_i32_v8i32(<8 x i32> %v, ptr %p, i32 %y) nounwind { 2029; SSE-LABEL: load_i32_v8i32: 2030; SSE: # %bb.0: 2031; SSE-NEXT: # kill: def $esi killed $esi def $rsi 2032; SSE-NEXT: movl (%rdi), %eax 2033; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) 2034; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 2035; SSE-NEXT: andl $7, %esi 2036; SSE-NEXT: movl %eax, -40(%rsp,%rsi,4) 2037; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 2038; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 2039; SSE-NEXT: retq 2040; 2041; AVX1OR2-LABEL: load_i32_v8i32: 2042; AVX1OR2: # %bb.0: 2043; AVX1OR2-NEXT: pushq %rbp 2044; AVX1OR2-NEXT: movq %rsp, %rbp 2045; AVX1OR2-NEXT: andq $-32, %rsp 2046; AVX1OR2-NEXT: subq $64, %rsp 2047; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi 2048; AVX1OR2-NEXT: movl (%rdi), %eax 2049; AVX1OR2-NEXT: vmovaps %ymm0, (%rsp) 2050; AVX1OR2-NEXT: andl $7, %esi 2051; AVX1OR2-NEXT: movl %eax, (%rsp,%rsi,4) 2052; AVX1OR2-NEXT: vmovaps (%rsp), %ymm0 2053; AVX1OR2-NEXT: movq %rbp, %rsp 2054; AVX1OR2-NEXT: popq %rbp 2055; AVX1OR2-NEXT: retq 2056; 2057; AVX512-LABEL: load_i32_v8i32: 2058; AVX512: # %bb.0: 2059; AVX512-NEXT: vpbroadcastd %esi, %ymm1 2060; AVX512-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1 2061; AVX512-NEXT: vpbroadcastd (%rdi), %ymm0 {%k1} 2062; AVX512-NEXT: retq 2063; 2064; X86AVX2-LABEL: load_i32_v8i32: 2065; X86AVX2: # %bb.0: 2066; X86AVX2-NEXT: pushl %ebp 2067; X86AVX2-NEXT: movl %esp, %ebp 2068; X86AVX2-NEXT: andl $-32, %esp 2069; X86AVX2-NEXT: subl $64, %esp 2070; X86AVX2-NEXT: movl 12(%ebp), %eax 2071; X86AVX2-NEXT: andl $7, %eax 2072; X86AVX2-NEXT: movl 8(%ebp), %ecx 2073; X86AVX2-NEXT: movl (%ecx), %ecx 2074; X86AVX2-NEXT: vmovaps %ymm0, (%esp) 2075; X86AVX2-NEXT: movl %ecx, (%esp,%eax,4) 2076; X86AVX2-NEXT: vmovaps (%esp), %ymm0 2077; X86AVX2-NEXT: movl %ebp, %esp 2078; X86AVX2-NEXT: popl %ebp 2079; X86AVX2-NEXT: retl 2080 %x = load i32, ptr %p 2081 %ins = insertelement <8 x i32> %v, i32 %x, i32 %y 2082 ret <8 x i32> %ins 2083} 2084 2085define <4 x i64> @load_i64_v4i64(<4 x i64> %v, ptr %p, i32 %y) nounwind { 2086; SSE-LABEL: load_i64_v4i64: 2087; SSE: # %bb.0: 2088; SSE-NEXT: # kill: def $esi killed $esi def $rsi 2089; SSE-NEXT: movq (%rdi), %rax 2090; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) 2091; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 2092; SSE-NEXT: andl $3, %esi 2093; SSE-NEXT: movq %rax, -40(%rsp,%rsi,8) 2094; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 2095; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 2096; SSE-NEXT: retq 2097; 2098; AVX1OR2-LABEL: load_i64_v4i64: 2099; AVX1OR2: # %bb.0: 2100; AVX1OR2-NEXT: pushq %rbp 2101; AVX1OR2-NEXT: movq %rsp, %rbp 2102; AVX1OR2-NEXT: andq $-32, %rsp 2103; AVX1OR2-NEXT: subq $64, %rsp 2104; AVX1OR2-NEXT: # kill: def $esi killed $esi def $rsi 2105; AVX1OR2-NEXT: movq (%rdi), %rax 2106; AVX1OR2-NEXT: vmovaps %ymm0, (%rsp) 2107; AVX1OR2-NEXT: andl $3, %esi 2108; AVX1OR2-NEXT: movq %rax, (%rsp,%rsi,8) 2109; AVX1OR2-NEXT: vmovaps (%rsp), %ymm0 2110; AVX1OR2-NEXT: movq %rbp, %rsp 2111; AVX1OR2-NEXT: popq %rbp 2112; AVX1OR2-NEXT: retq 2113; 2114; AVX512-LABEL: load_i64_v4i64: 2115; AVX512: # %bb.0: 2116; AVX512-NEXT: movl %esi, %eax 2117; AVX512-NEXT: vpbroadcastq %rax, %ymm1 2118; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1 2119; AVX512-NEXT: vpbroadcastq (%rdi), %ymm0 {%k1} 2120; AVX512-NEXT: retq 2121; 2122; X86AVX2-LABEL: load_i64_v4i64: 2123; X86AVX2: # %bb.0: 2124; X86AVX2-NEXT: pushl %ebp 2125; X86AVX2-NEXT: movl %esp, %ebp 2126; X86AVX2-NEXT: pushl %esi 2127; X86AVX2-NEXT: andl $-32, %esp 2128; X86AVX2-NEXT: subl $96, %esp 2129; X86AVX2-NEXT: movl 12(%ebp), %eax 2130; X86AVX2-NEXT: movl 8(%ebp), %ecx 2131; X86AVX2-NEXT: movl (%ecx), %edx 2132; X86AVX2-NEXT: movl 4(%ecx), %ecx 2133; X86AVX2-NEXT: vmovaps %ymm0, (%esp) 2134; X86AVX2-NEXT: addl %eax, %eax 2135; X86AVX2-NEXT: movl %eax, %esi 2136; X86AVX2-NEXT: andl $7, %esi 2137; X86AVX2-NEXT: movl %edx, (%esp,%esi,4) 2138; X86AVX2-NEXT: vmovaps (%esp), %ymm0 2139; X86AVX2-NEXT: vmovaps %ymm0, {{[0-9]+}}(%esp) 2140; X86AVX2-NEXT: incl %eax 2141; X86AVX2-NEXT: andl $7, %eax 2142; X86AVX2-NEXT: movl %ecx, 32(%esp,%eax,4) 2143; X86AVX2-NEXT: vmovaps {{[0-9]+}}(%esp), %ymm0 2144; X86AVX2-NEXT: leal -4(%ebp), %esp 2145; X86AVX2-NEXT: popl %esi 2146; X86AVX2-NEXT: popl %ebp 2147; X86AVX2-NEXT: retl 2148 %x = load i64, ptr %p 2149 %ins = insertelement <4 x i64> %v, i64 %x, i32 %y 2150 ret <4 x i64> %ins 2151} 2152 2153define <8 x float> @load_f32_v8f32(<8 x float> %v, ptr %p, i32 %y) nounwind { 2154; SSE-LABEL: load_f32_v8f32: 2155; SSE: # %bb.0: 2156; SSE-NEXT: # kill: def $esi killed $esi def $rsi 2157; SSE-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 2158; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) 2159; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 2160; SSE-NEXT: andl $7, %esi 2161; SSE-NEXT: movss %xmm2, -40(%rsp,%rsi,4) 2162; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 2163; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 2164; SSE-NEXT: retq 2165; 2166; AVX1-LABEL: load_f32_v8f32: 2167; AVX1: # %bb.0: 2168; AVX1-NEXT: vmovd %esi, %xmm1 2169; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,0,0,0] 2170; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2 2171; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm1, %xmm1 2172; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 2173; AVX1-NEXT: vbroadcastss (%rdi), %ymm2 2174; AVX1-NEXT: vblendvps %ymm1, %ymm2, %ymm0, %ymm0 2175; AVX1-NEXT: retq 2176; 2177; AVX2-LABEL: load_f32_v8f32: 2178; AVX2: # %bb.0: 2179; AVX2-NEXT: vbroadcastss (%rdi), %ymm1 2180; AVX2-NEXT: vmovd %esi, %xmm2 2181; AVX2-NEXT: vpbroadcastd %xmm2, %ymm2 2182; AVX2-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 2183; AVX2-NEXT: vblendvps %ymm2, %ymm1, %ymm0, %ymm0 2184; AVX2-NEXT: retq 2185; 2186; AVX512-LABEL: load_f32_v8f32: 2187; AVX512: # %bb.0: 2188; AVX512-NEXT: vpbroadcastd %esi, %ymm1 2189; AVX512-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1 2190; AVX512-NEXT: vbroadcastss (%rdi), %ymm0 {%k1} 2191; AVX512-NEXT: retq 2192; 2193; X86AVX2-LABEL: load_f32_v8f32: 2194; X86AVX2: # %bb.0: 2195; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax 2196; X86AVX2-NEXT: vpbroadcastd {{[0-9]+}}(%esp), %ymm1 2197; X86AVX2-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm1 2198; X86AVX2-NEXT: vbroadcastss (%eax), %ymm2 2199; X86AVX2-NEXT: vblendvps %ymm1, %ymm2, %ymm0, %ymm0 2200; X86AVX2-NEXT: retl 2201 %x = load float, ptr %p 2202 %ins = insertelement <8 x float> %v, float %x, i32 %y 2203 ret <8 x float> %ins 2204} 2205 2206define <4 x double> @load_f64_v4f64(<4 x double> %v, ptr %p, i32 %y) nounwind { 2207; SSE-LABEL: load_f64_v4f64: 2208; SSE: # %bb.0: 2209; SSE-NEXT: # kill: def $esi killed $esi def $rsi 2210; SSE-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero 2211; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) 2212; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 2213; SSE-NEXT: andl $3, %esi 2214; SSE-NEXT: movsd %xmm2, -40(%rsp,%rsi,8) 2215; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm0 2216; SSE-NEXT: movaps -{{[0-9]+}}(%rsp), %xmm1 2217; SSE-NEXT: retq 2218; 2219; AVX1-LABEL: load_f64_v4f64: 2220; AVX1: # %bb.0: 2221; AVX1-NEXT: movl %esi, %eax 2222; AVX1-NEXT: vmovq %rax, %xmm1 2223; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[0,1,0,1] 2224; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm2 2225; AVX1-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}+16(%rip), %xmm1, %xmm1 2226; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 2227; AVX1-NEXT: vbroadcastsd (%rdi), %ymm2 2228; AVX1-NEXT: vblendvpd %ymm1, %ymm2, %ymm0, %ymm0 2229; AVX1-NEXT: retq 2230; 2231; AVX2-LABEL: load_f64_v4f64: 2232; AVX2: # %bb.0: 2233; AVX2-NEXT: vbroadcastsd (%rdi), %ymm1 2234; AVX2-NEXT: movl %esi, %eax 2235; AVX2-NEXT: vmovq %rax, %xmm2 2236; AVX2-NEXT: vpbroadcastq %xmm2, %ymm2 2237; AVX2-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm2, %ymm2 2238; AVX2-NEXT: vblendvpd %ymm2, %ymm1, %ymm0, %ymm0 2239; AVX2-NEXT: retq 2240; 2241; AVX512-LABEL: load_f64_v4f64: 2242; AVX512: # %bb.0: 2243; AVX512-NEXT: movl %esi, %eax 2244; AVX512-NEXT: vpbroadcastq %rax, %ymm1 2245; AVX512-NEXT: vpcmpeqq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %k1 2246; AVX512-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1} 2247; AVX512-NEXT: retq 2248; 2249; X86AVX2-LABEL: load_f64_v4f64: 2250; X86AVX2: # %bb.0: 2251; X86AVX2-NEXT: pushl %ebp 2252; X86AVX2-NEXT: movl %esp, %ebp 2253; X86AVX2-NEXT: andl $-32, %esp 2254; X86AVX2-NEXT: subl $64, %esp 2255; X86AVX2-NEXT: movl 12(%ebp), %eax 2256; X86AVX2-NEXT: andl $3, %eax 2257; X86AVX2-NEXT: movl 8(%ebp), %ecx 2258; X86AVX2-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 2259; X86AVX2-NEXT: vmovaps %ymm0, (%esp) 2260; X86AVX2-NEXT: vmovsd %xmm1, (%esp,%eax,8) 2261; X86AVX2-NEXT: vmovaps (%esp), %ymm0 2262; X86AVX2-NEXT: movl %ebp, %esp 2263; X86AVX2-NEXT: popl %ebp 2264; X86AVX2-NEXT: retl 2265 %x = load double, ptr %p 2266 %ins = insertelement <4 x double> %v, double %x, i32 %y 2267 ret <4 x double> %ins 2268} 2269 2270; Don't die trying to insert to an invalid index. 2271 2272define i32 @PR44139(ptr %p) { 2273; SSE-LABEL: PR44139: 2274; SSE: # %bb.0: 2275; SSE-NEXT: movl (%rdi), %eax 2276; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,1,0,1] 2277; SSE-NEXT: movdqa %xmm0, 96(%rdi) 2278; SSE-NEXT: movdqa %xmm0, 112(%rdi) 2279; SSE-NEXT: movdqa %xmm0, 64(%rdi) 2280; SSE-NEXT: movdqa %xmm0, 80(%rdi) 2281; SSE-NEXT: movdqa %xmm0, 32(%rdi) 2282; SSE-NEXT: movdqa %xmm0, 48(%rdi) 2283; SSE-NEXT: movdqa %xmm0, (%rdi) 2284; SSE-NEXT: movdqa %xmm0, 16(%rdi) 2285; SSE-NEXT: leal 2147483647(%rax), %ecx 2286; SSE-NEXT: testl %eax, %eax 2287; SSE-NEXT: cmovnsl %eax, %ecx 2288; SSE-NEXT: andl $-2147483648, %ecx # imm = 0x80000000 2289; SSE-NEXT: addl %eax, %ecx 2290; SSE-NEXT: # kill: def $eax killed $eax killed $rax 2291; SSE-NEXT: xorl %edx, %edx 2292; SSE-NEXT: divl %ecx 2293; SSE-NEXT: retq 2294; 2295; AVX1-LABEL: PR44139: 2296; AVX1: # %bb.0: 2297; AVX1-NEXT: movq (%rdi), %rax 2298; AVX1-NEXT: vbroadcastsd (%rdi), %ymm0 2299; AVX1-NEXT: vpinsrq $1, %rax, %xmm0, %xmm1 2300; AVX1-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7] 2301; AVX1-NEXT: vmovaps %ymm0, 64(%rdi) 2302; AVX1-NEXT: vmovaps %ymm0, 96(%rdi) 2303; AVX1-NEXT: vmovaps %ymm0, 32(%rdi) 2304; AVX1-NEXT: vmovaps %ymm1, (%rdi) 2305; AVX1-NEXT: leal 2147483647(%rax), %ecx 2306; AVX1-NEXT: testl %eax, %eax 2307; AVX1-NEXT: cmovnsl %eax, %ecx 2308; AVX1-NEXT: andl $-2147483648, %ecx # imm = 0x80000000 2309; AVX1-NEXT: addl %eax, %ecx 2310; AVX1-NEXT: # kill: def $eax killed $eax killed $rax 2311; AVX1-NEXT: xorl %edx, %edx 2312; AVX1-NEXT: divl %ecx 2313; AVX1-NEXT: vzeroupper 2314; AVX1-NEXT: retq 2315; 2316; AVX2-LABEL: PR44139: 2317; AVX2: # %bb.0: 2318; AVX2-NEXT: movq (%rdi), %rax 2319; AVX2-NEXT: vpbroadcastq (%rdi), %ymm0 2320; AVX2-NEXT: vpinsrq $1, %rax, %xmm0, %xmm1 2321; AVX2-NEXT: vpblendd {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7] 2322; AVX2-NEXT: vmovdqa %ymm0, 64(%rdi) 2323; AVX2-NEXT: vmovdqa %ymm0, 96(%rdi) 2324; AVX2-NEXT: vmovdqa %ymm0, 32(%rdi) 2325; AVX2-NEXT: vmovdqa %ymm1, (%rdi) 2326; AVX2-NEXT: leal 2147483647(%rax), %ecx 2327; AVX2-NEXT: testl %eax, %eax 2328; AVX2-NEXT: cmovnsl %eax, %ecx 2329; AVX2-NEXT: andl $-2147483648, %ecx # imm = 0x80000000 2330; AVX2-NEXT: addl %eax, %ecx 2331; AVX2-NEXT: # kill: def $eax killed $eax killed $rax 2332; AVX2-NEXT: xorl %edx, %edx 2333; AVX2-NEXT: divl %ecx 2334; AVX2-NEXT: vzeroupper 2335; AVX2-NEXT: retq 2336; 2337; AVX512-LABEL: PR44139: 2338; AVX512: # %bb.0: 2339; AVX512-NEXT: movq (%rdi), %rax 2340; AVX512-NEXT: vpbroadcastq (%rdi), %zmm0 2341; AVX512-NEXT: vpinsrq $1, %rax, %xmm0, %xmm1 2342; AVX512-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm1 2343; AVX512-NEXT: vmovdqa64 %zmm0, 64(%rdi) 2344; AVX512-NEXT: vmovdqa64 %zmm1, (%rdi) 2345; AVX512-NEXT: leal 2147483647(%rax), %ecx 2346; AVX512-NEXT: testl %eax, %eax 2347; AVX512-NEXT: cmovnsl %eax, %ecx 2348; AVX512-NEXT: andl $-2147483648, %ecx # imm = 0x80000000 2349; AVX512-NEXT: addl %eax, %ecx 2350; AVX512-NEXT: # kill: def $eax killed $eax killed $rax 2351; AVX512-NEXT: xorl %edx, %edx 2352; AVX512-NEXT: divl %ecx 2353; AVX512-NEXT: vzeroupper 2354; AVX512-NEXT: retq 2355; 2356; X86AVX2-LABEL: PR44139: 2357; X86AVX2: # %bb.0: 2358; X86AVX2-NEXT: movl {{[0-9]+}}(%esp), %ecx 2359; X86AVX2-NEXT: vbroadcastsd (%ecx), %ymm0 2360; X86AVX2-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],mem[0] 2361; X86AVX2-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3],ymm0[4,5,6,7] 2362; X86AVX2-NEXT: vmovaps %ymm0, 64(%ecx) 2363; X86AVX2-NEXT: vmovaps %ymm0, 96(%ecx) 2364; X86AVX2-NEXT: vmovaps %ymm0, 32(%ecx) 2365; X86AVX2-NEXT: movl (%ecx), %eax 2366; X86AVX2-NEXT: vmovaps %ymm1, (%ecx) 2367; X86AVX2-NEXT: leal 2147483647(%eax), %ecx 2368; X86AVX2-NEXT: testl %eax, %eax 2369; X86AVX2-NEXT: cmovnsl %eax, %ecx 2370; X86AVX2-NEXT: andl $-2147483648, %ecx # imm = 0x80000000 2371; X86AVX2-NEXT: addl %eax, %ecx 2372; X86AVX2-NEXT: xorl %edx, %edx 2373; X86AVX2-NEXT: divl %ecx 2374; X86AVX2-NEXT: vzeroupper 2375; X86AVX2-NEXT: retl 2376 %L = load <16 x i64>, ptr %p 2377 %E1 = extractelement <16 x i64> %L, i64 0 2378 %tempvector = insertelement <16 x i64> undef, i64 %E1, i32 0 2379 %vector = shufflevector <16 x i64> %tempvector, <16 x i64> undef, <16 x i32> zeroinitializer 2380 %C3 = icmp sgt i64 9223372036854775807, -9223372036854775808 2381 %t0 = trunc <16 x i64> %vector to <16 x i32> 2382 %I4 = insertelement <16 x i64> %vector, i64 %E1, i1 %C3 2383 store <16 x i64> %I4, ptr %p 2384 %elt = extractelement <16 x i32> %t0, i32 0 2385 %B = srem i32 %elt, -2147483648 2386 %B9 = udiv i32 %elt, %B 2387 ret i32 %B9 2388} 2389