1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE42 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 7 8; Widened shuffle broadcast loads 9 10define <4 x float> @load_splat_4f32_4f32_0101(ptr %ptr) nounwind uwtable readnone ssp { 11; SSE2-LABEL: load_splat_4f32_4f32_0101: 12; SSE2: # %bb.0: # %entry 13; SSE2-NEXT: movaps (%rdi), %xmm0 14; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 15; SSE2-NEXT: retq 16; 17; SSE42-LABEL: load_splat_4f32_4f32_0101: 18; SSE42: # %bb.0: # %entry 19; SSE42-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 20; SSE42-NEXT: retq 21; 22; AVX-LABEL: load_splat_4f32_4f32_0101: 23; AVX: # %bb.0: # %entry 24; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 25; AVX-NEXT: retq 26entry: 27 %ld = load <4 x float>, ptr %ptr 28 %ret = shufflevector <4 x float> %ld, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 29 ret <4 x float> %ret 30} 31 32define <8 x float> @load_splat_8f32_4f32_01010101(ptr %ptr) nounwind uwtable readnone ssp { 33; SSE2-LABEL: load_splat_8f32_4f32_01010101: 34; SSE2: # %bb.0: # %entry 35; SSE2-NEXT: movaps (%rdi), %xmm0 36; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 37; SSE2-NEXT: movaps %xmm0, %xmm1 38; SSE2-NEXT: retq 39; 40; SSE42-LABEL: load_splat_8f32_4f32_01010101: 41; SSE42: # %bb.0: # %entry 42; SSE42-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 43; SSE42-NEXT: movapd %xmm0, %xmm1 44; SSE42-NEXT: retq 45; 46; AVX-LABEL: load_splat_8f32_4f32_01010101: 47; AVX: # %bb.0: # %entry 48; AVX-NEXT: vbroadcastsd (%rdi), %ymm0 49; AVX-NEXT: retq 50entry: 51 %ld = load <4 x float>, ptr %ptr 52 %ret = shufflevector <4 x float> %ld, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 53 ret <8 x float> %ret 54} 55 56define <8 x float> @load_splat_8f32_8f32_01010101(ptr %ptr) nounwind uwtable readnone ssp { 57; SSE2-LABEL: load_splat_8f32_8f32_01010101: 58; SSE2: # %bb.0: # %entry 59; SSE2-NEXT: movaps (%rdi), %xmm0 60; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 61; SSE2-NEXT: movaps %xmm0, %xmm1 62; SSE2-NEXT: retq 63; 64; SSE42-LABEL: load_splat_8f32_8f32_01010101: 65; SSE42: # %bb.0: # %entry 66; SSE42-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 67; SSE42-NEXT: movapd %xmm0, %xmm1 68; SSE42-NEXT: retq 69; 70; AVX-LABEL: load_splat_8f32_8f32_01010101: 71; AVX: # %bb.0: # %entry 72; AVX-NEXT: vbroadcastsd (%rdi), %ymm0 73; AVX-NEXT: retq 74entry: 75 %ld = load <8 x float>, ptr %ptr 76 %ret = shufflevector <8 x float> %ld, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 77 ret <8 x float> %ret 78} 79 80define <4 x i32> @load_splat_4i32_4i32_0101(ptr %ptr) nounwind uwtable readnone ssp { 81; SSE-LABEL: load_splat_4i32_4i32_0101: 82; SSE: # %bb.0: # %entry 83; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,1,0,1] 84; SSE-NEXT: retq 85; 86; AVX1-LABEL: load_splat_4i32_4i32_0101: 87; AVX1: # %bb.0: # %entry 88; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,0,1] 89; AVX1-NEXT: retq 90; 91; AVX2-LABEL: load_splat_4i32_4i32_0101: 92; AVX2: # %bb.0: # %entry 93; AVX2-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 94; AVX2-NEXT: retq 95; 96; AVX512-LABEL: load_splat_4i32_4i32_0101: 97; AVX512: # %bb.0: # %entry 98; AVX512-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 99; AVX512-NEXT: retq 100entry: 101 %ld = load <4 x i32>, ptr %ptr 102 %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 103 ret <4 x i32> %ret 104} 105 106define <8 x i32> @load_splat_8i32_4i32_01010101(ptr %ptr) nounwind uwtable readnone ssp { 107; SSE-LABEL: load_splat_8i32_4i32_01010101: 108; SSE: # %bb.0: # %entry 109; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,1,0,1] 110; SSE-NEXT: movdqa %xmm0, %xmm1 111; SSE-NEXT: retq 112; 113; AVX-LABEL: load_splat_8i32_4i32_01010101: 114; AVX: # %bb.0: # %entry 115; AVX-NEXT: vbroadcastsd (%rdi), %ymm0 116; AVX-NEXT: retq 117entry: 118 %ld = load <4 x i32>, ptr %ptr 119 %ret = shufflevector <4 x i32> %ld, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 120 ret <8 x i32> %ret 121} 122 123define <8 x i32> @load_splat_8i32_8i32_01010101(ptr %ptr) nounwind uwtable readnone ssp { 124; SSE-LABEL: load_splat_8i32_8i32_01010101: 125; SSE: # %bb.0: # %entry 126; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,1,0,1] 127; SSE-NEXT: movdqa %xmm0, %xmm1 128; SSE-NEXT: retq 129; 130; AVX-LABEL: load_splat_8i32_8i32_01010101: 131; AVX: # %bb.0: # %entry 132; AVX-NEXT: vbroadcastsd (%rdi), %ymm0 133; AVX-NEXT: retq 134entry: 135 %ld = load <8 x i32>, ptr %ptr 136 %ret = shufflevector <8 x i32> %ld, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 137 ret <8 x i32> %ret 138} 139 140define <8 x i16> @load_splat_8i16_8i16_01010101(ptr %ptr) nounwind uwtable readnone ssp { 141; SSE-LABEL: load_splat_8i16_8i16_01010101: 142; SSE: # %bb.0: # %entry 143; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,0,0,0] 144; SSE-NEXT: retq 145; 146; AVX1-LABEL: load_splat_8i16_8i16_01010101: 147; AVX1: # %bb.0: # %entry 148; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,0,0,0] 149; AVX1-NEXT: retq 150; 151; AVX2-LABEL: load_splat_8i16_8i16_01010101: 152; AVX2: # %bb.0: # %entry 153; AVX2-NEXT: vbroadcastss (%rdi), %xmm0 154; AVX2-NEXT: retq 155; 156; AVX512-LABEL: load_splat_8i16_8i16_01010101: 157; AVX512: # %bb.0: # %entry 158; AVX512-NEXT: vbroadcastss (%rdi), %xmm0 159; AVX512-NEXT: retq 160entry: 161 %ld = load <8 x i16>, ptr %ptr 162 %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 163 ret <8 x i16> %ret 164} 165 166define <8 x i16> @load_splat_8i16_8i16_01230123(ptr %ptr) nounwind uwtable readnone ssp { 167; SSE-LABEL: load_splat_8i16_8i16_01230123: 168; SSE: # %bb.0: # %entry 169; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,1,0,1] 170; SSE-NEXT: retq 171; 172; AVX1-LABEL: load_splat_8i16_8i16_01230123: 173; AVX1: # %bb.0: # %entry 174; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,0,1] 175; AVX1-NEXT: retq 176; 177; AVX2-LABEL: load_splat_8i16_8i16_01230123: 178; AVX2: # %bb.0: # %entry 179; AVX2-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 180; AVX2-NEXT: retq 181; 182; AVX512-LABEL: load_splat_8i16_8i16_01230123: 183; AVX512: # %bb.0: # %entry 184; AVX512-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 185; AVX512-NEXT: retq 186entry: 187 %ld = load <8 x i16>, ptr %ptr 188 %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 189 ret <8 x i16> %ret 190} 191 192define <16 x i16> @load_splat_16i16_8i16_0101010101010101(ptr %ptr) nounwind uwtable readnone ssp { 193; SSE-LABEL: load_splat_16i16_8i16_0101010101010101: 194; SSE: # %bb.0: # %entry 195; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,0,0,0] 196; SSE-NEXT: movdqa %xmm0, %xmm1 197; SSE-NEXT: retq 198; 199; AVX-LABEL: load_splat_16i16_8i16_0101010101010101: 200; AVX: # %bb.0: # %entry 201; AVX-NEXT: vbroadcastss (%rdi), %ymm0 202; AVX-NEXT: retq 203entry: 204 %ld = load <8 x i16>, ptr %ptr 205 %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 206 ret <16 x i16> %ret 207} 208 209define <16 x i16> @load_splat_16i16_8i16_0123012301230123(ptr %ptr) nounwind uwtable readnone ssp { 210; SSE-LABEL: load_splat_16i16_8i16_0123012301230123: 211; SSE: # %bb.0: # %entry 212; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,1,0,1] 213; SSE-NEXT: movdqa %xmm0, %xmm1 214; SSE-NEXT: retq 215; 216; AVX-LABEL: load_splat_16i16_8i16_0123012301230123: 217; AVX: # %bb.0: # %entry 218; AVX-NEXT: vbroadcastsd (%rdi), %ymm0 219; AVX-NEXT: retq 220entry: 221 %ld = load <8 x i16>, ptr %ptr 222 %ret = shufflevector <8 x i16> %ld, <8 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3,i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 223 ret <16 x i16> %ret 224} 225 226define <16 x i16> @load_splat_16i16_16i16_0101010101010101(ptr %ptr) nounwind uwtable readnone ssp { 227; SSE-LABEL: load_splat_16i16_16i16_0101010101010101: 228; SSE: # %bb.0: # %entry 229; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,0,0,0] 230; SSE-NEXT: movdqa %xmm0, %xmm1 231; SSE-NEXT: retq 232; 233; AVX-LABEL: load_splat_16i16_16i16_0101010101010101: 234; AVX: # %bb.0: # %entry 235; AVX-NEXT: vbroadcastss (%rdi), %ymm0 236; AVX-NEXT: retq 237entry: 238 %ld = load <16 x i16>, ptr %ptr 239 %ret = shufflevector <16 x i16> %ld, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 240 ret <16 x i16> %ret 241} 242 243define <16 x i16> @load_splat_16i16_16i16_0123012301230123(ptr %ptr) nounwind uwtable readnone ssp { 244; SSE-LABEL: load_splat_16i16_16i16_0123012301230123: 245; SSE: # %bb.0: # %entry 246; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,1,0,1] 247; SSE-NEXT: movdqa %xmm0, %xmm1 248; SSE-NEXT: retq 249; 250; AVX-LABEL: load_splat_16i16_16i16_0123012301230123: 251; AVX: # %bb.0: # %entry 252; AVX-NEXT: vbroadcastsd (%rdi), %ymm0 253; AVX-NEXT: retq 254entry: 255 %ld = load <16 x i16>, ptr %ptr 256 %ret = shufflevector <16 x i16> %ld, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3,i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 257 ret <16 x i16> %ret 258} 259 260define <16 x i8> @load_splat_16i8_16i8_0101010101010101(ptr %ptr) nounwind uwtable readnone ssp { 261; SSE-LABEL: load_splat_16i8_16i8_0101010101010101: 262; SSE: # %bb.0: # %entry 263; SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[0,0,0,0,4,5,6,7] 264; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 265; SSE-NEXT: retq 266; 267; AVX1-LABEL: load_splat_16i8_16i8_0101010101010101: 268; AVX1: # %bb.0: # %entry 269; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = mem[0,0,0,0,4,5,6,7] 270; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 271; AVX1-NEXT: retq 272; 273; AVX2-LABEL: load_splat_16i8_16i8_0101010101010101: 274; AVX2: # %bb.0: # %entry 275; AVX2-NEXT: vpbroadcastw (%rdi), %xmm0 276; AVX2-NEXT: retq 277; 278; AVX512-LABEL: load_splat_16i8_16i8_0101010101010101: 279; AVX512: # %bb.0: # %entry 280; AVX512-NEXT: vpbroadcastw (%rdi), %xmm0 281; AVX512-NEXT: retq 282entry: 283 %ld = load <16 x i8>, ptr %ptr 284 %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 285 ret <16 x i8> %ret 286} 287 288define <16 x i8> @load_splat_16i8_16i8_0123012301230123(ptr %ptr) nounwind uwtable readnone ssp { 289; SSE-LABEL: load_splat_16i8_16i8_0123012301230123: 290; SSE: # %bb.0: # %entry 291; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,0,0,0] 292; SSE-NEXT: retq 293; 294; AVX1-LABEL: load_splat_16i8_16i8_0123012301230123: 295; AVX1: # %bb.0: # %entry 296; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,0,0,0] 297; AVX1-NEXT: retq 298; 299; AVX2-LABEL: load_splat_16i8_16i8_0123012301230123: 300; AVX2: # %bb.0: # %entry 301; AVX2-NEXT: vbroadcastss (%rdi), %xmm0 302; AVX2-NEXT: retq 303; 304; AVX512-LABEL: load_splat_16i8_16i8_0123012301230123: 305; AVX512: # %bb.0: # %entry 306; AVX512-NEXT: vbroadcastss (%rdi), %xmm0 307; AVX512-NEXT: retq 308entry: 309 %ld = load <16 x i8>, ptr %ptr 310 %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 311 ret <16 x i8> %ret 312} 313 314define <16 x i8> @load_splat_16i8_16i8_0123456701234567(ptr %ptr) nounwind uwtable readnone ssp { 315; SSE-LABEL: load_splat_16i8_16i8_0123456701234567: 316; SSE: # %bb.0: # %entry 317; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,1,0,1] 318; SSE-NEXT: retq 319; 320; AVX1-LABEL: load_splat_16i8_16i8_0123456701234567: 321; AVX1: # %bb.0: # %entry 322; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,0,1] 323; AVX1-NEXT: retq 324; 325; AVX2-LABEL: load_splat_16i8_16i8_0123456701234567: 326; AVX2: # %bb.0: # %entry 327; AVX2-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 328; AVX2-NEXT: retq 329; 330; AVX512-LABEL: load_splat_16i8_16i8_0123456701234567: 331; AVX512: # %bb.0: # %entry 332; AVX512-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 333; AVX512-NEXT: retq 334entry: 335 %ld = load <16 x i8>, ptr %ptr 336 %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 337 ret <16 x i8> %ret 338} 339 340define <32 x i8> @load_splat_32i8_16i8_01010101010101010101010101010101(ptr %ptr) nounwind uwtable readnone ssp { 341; SSE-LABEL: load_splat_32i8_16i8_01010101010101010101010101010101: 342; SSE: # %bb.0: # %entry 343; SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[0,0,0,0,4,5,6,7] 344; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 345; SSE-NEXT: movdqa %xmm0, %xmm1 346; SSE-NEXT: retq 347; 348; AVX1-LABEL: load_splat_32i8_16i8_01010101010101010101010101010101: 349; AVX1: # %bb.0: # %entry 350; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = mem[0,0,0,0,4,5,6,7] 351; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 352; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 353; AVX1-NEXT: retq 354; 355; AVX2-LABEL: load_splat_32i8_16i8_01010101010101010101010101010101: 356; AVX2: # %bb.0: # %entry 357; AVX2-NEXT: vpbroadcastw (%rdi), %ymm0 358; AVX2-NEXT: retq 359; 360; AVX512-LABEL: load_splat_32i8_16i8_01010101010101010101010101010101: 361; AVX512: # %bb.0: # %entry 362; AVX512-NEXT: vpbroadcastw (%rdi), %ymm0 363; AVX512-NEXT: retq 364entry: 365 %ld = load <16 x i8>, ptr %ptr 366 %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 367 ret <32 x i8> %ret 368} 369 370define <32 x i8> @load_splat_32i8_16i8_01230123012301230123012301230123(ptr %ptr) nounwind uwtable readnone ssp { 371; SSE-LABEL: load_splat_32i8_16i8_01230123012301230123012301230123: 372; SSE: # %bb.0: # %entry 373; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,0,0,0] 374; SSE-NEXT: movdqa %xmm0, %xmm1 375; SSE-NEXT: retq 376; 377; AVX-LABEL: load_splat_32i8_16i8_01230123012301230123012301230123: 378; AVX: # %bb.0: # %entry 379; AVX-NEXT: vbroadcastss (%rdi), %ymm0 380; AVX-NEXT: retq 381entry: 382 %ld = load <16 x i8>, ptr %ptr 383 %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 384 ret <32 x i8> %ret 385} 386 387define <32 x i8> @load_splat_32i8_16i8_01234567012345670123456701234567(ptr %ptr) nounwind uwtable readnone ssp { 388; SSE-LABEL: load_splat_32i8_16i8_01234567012345670123456701234567: 389; SSE: # %bb.0: # %entry 390; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,1,0,1] 391; SSE-NEXT: movdqa %xmm0, %xmm1 392; SSE-NEXT: retq 393; 394; AVX-LABEL: load_splat_32i8_16i8_01234567012345670123456701234567: 395; AVX: # %bb.0: # %entry 396; AVX-NEXT: vbroadcastsd (%rdi), %ymm0 397; AVX-NEXT: retq 398entry: 399 %ld = load <16 x i8>, ptr %ptr 400 %ret = shufflevector <16 x i8> %ld, <16 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 401 ret <32 x i8> %ret 402} 403 404define <32 x i8> @load_splat_32i8_32i8_01010101010101010101010101010101(ptr %ptr) nounwind uwtable readnone ssp { 405; SSE-LABEL: load_splat_32i8_32i8_01010101010101010101010101010101: 406; SSE: # %bb.0: # %entry 407; SSE-NEXT: pshuflw {{.*#+}} xmm0 = mem[0,0,0,0,4,5,6,7] 408; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 409; SSE-NEXT: movdqa %xmm0, %xmm1 410; SSE-NEXT: retq 411; 412; AVX1-LABEL: load_splat_32i8_32i8_01010101010101010101010101010101: 413; AVX1: # %bb.0: # %entry 414; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = mem[0,0,0,0,4,5,6,7] 415; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 416; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 417; AVX1-NEXT: retq 418; 419; AVX2-LABEL: load_splat_32i8_32i8_01010101010101010101010101010101: 420; AVX2: # %bb.0: # %entry 421; AVX2-NEXT: vpbroadcastw (%rdi), %ymm0 422; AVX2-NEXT: retq 423; 424; AVX512-LABEL: load_splat_32i8_32i8_01010101010101010101010101010101: 425; AVX512: # %bb.0: # %entry 426; AVX512-NEXT: vpbroadcastw (%rdi), %ymm0 427; AVX512-NEXT: retq 428entry: 429 %ld = load <32 x i8>, ptr %ptr 430 %ret = shufflevector <32 x i8> %ld, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 431 ret <32 x i8> %ret 432} 433 434define <32 x i8> @load_splat_32i8_32i8_01230123012301230123012301230123(ptr %ptr) nounwind uwtable readnone ssp { 435; SSE-LABEL: load_splat_32i8_32i8_01230123012301230123012301230123: 436; SSE: # %bb.0: # %entry 437; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,0,0,0] 438; SSE-NEXT: movdqa %xmm0, %xmm1 439; SSE-NEXT: retq 440; 441; AVX-LABEL: load_splat_32i8_32i8_01230123012301230123012301230123: 442; AVX: # %bb.0: # %entry 443; AVX-NEXT: vbroadcastss (%rdi), %ymm0 444; AVX-NEXT: retq 445entry: 446 %ld = load <32 x i8>, ptr %ptr 447 %ret = shufflevector <32 x i8> %ld, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3> 448 ret <32 x i8> %ret 449} 450 451define <32 x i8> @load_splat_32i8_32i8_01234567012345670123456701234567(ptr %ptr) nounwind uwtable readnone ssp { 452; SSE-LABEL: load_splat_32i8_32i8_01234567012345670123456701234567: 453; SSE: # %bb.0: # %entry 454; SSE-NEXT: pshufd {{.*#+}} xmm0 = mem[0,1,0,1] 455; SSE-NEXT: movdqa %xmm0, %xmm1 456; SSE-NEXT: retq 457; 458; AVX-LABEL: load_splat_32i8_32i8_01234567012345670123456701234567: 459; AVX: # %bb.0: # %entry 460; AVX-NEXT: vbroadcastsd (%rdi), %ymm0 461; AVX-NEXT: retq 462entry: 463 %ld = load <32 x i8>, ptr %ptr 464 %ret = shufflevector <32 x i8> %ld, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 465 ret <32 x i8> %ret 466} 467 468define <4 x float> @load_splat_4f32_8f32_0000(ptr %ptr) nounwind uwtable readnone ssp { 469; SSE-LABEL: load_splat_4f32_8f32_0000: 470; SSE: # %bb.0: # %entry 471; SSE-NEXT: movaps (%rdi), %xmm0 472; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0] 473; SSE-NEXT: retq 474; 475; AVX-LABEL: load_splat_4f32_8f32_0000: 476; AVX: # %bb.0: # %entry 477; AVX-NEXT: vbroadcastss (%rdi), %xmm0 478; AVX-NEXT: retq 479entry: 480 %ld = load <8 x float>, ptr %ptr 481 %ret = shufflevector <8 x float> %ld, <8 x float> undef, <4 x i32> zeroinitializer 482 ret <4 x float> %ret 483} 484 485define <8 x float> @load_splat_8f32_16f32_89898989(ptr %ptr) nounwind uwtable readnone ssp { 486; SSE2-LABEL: load_splat_8f32_16f32_89898989: 487; SSE2: # %bb.0: # %entry 488; SSE2-NEXT: movaps 32(%rdi), %xmm0 489; SSE2-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0,0] 490; SSE2-NEXT: movaps %xmm0, %xmm1 491; SSE2-NEXT: retq 492; 493; SSE42-LABEL: load_splat_8f32_16f32_89898989: 494; SSE42: # %bb.0: # %entry 495; SSE42-NEXT: movddup {{.*#+}} xmm0 = mem[0,0] 496; SSE42-NEXT: movapd %xmm0, %xmm1 497; SSE42-NEXT: retq 498; 499; AVX-LABEL: load_splat_8f32_16f32_89898989: 500; AVX: # %bb.0: # %entry 501; AVX-NEXT: vbroadcastsd 32(%rdi), %ymm0 502; AVX-NEXT: retq 503entry: 504 %ld = load <16 x float>, ptr %ptr 505 %ret = shufflevector <16 x float> %ld, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 8, i32 9, i32 8, i32 9, i32 8, i32 9> 506 ret <8 x float> %ret 507} 508 509; PR34394 510define <4 x i32> @load_splat_4i32_2i32_0101(ptr %vp) { 511; SSE-LABEL: load_splat_4i32_2i32_0101: 512; SSE: # %bb.0: 513; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 514; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 515; SSE-NEXT: retq 516; 517; AVX-LABEL: load_splat_4i32_2i32_0101: 518; AVX: # %bb.0: 519; AVX-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 520; AVX-NEXT: retq 521 %vec = load <2 x i32>, ptr %vp 522 %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 523 ret <4 x i32> %res 524} 525 526define <8 x i32> @load_splat_8i32_2i32_0101(ptr %vp) { 527; SSE-LABEL: load_splat_8i32_2i32_0101: 528; SSE: # %bb.0: 529; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 530; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 531; SSE-NEXT: movdqa %xmm0, %xmm1 532; SSE-NEXT: retq 533; 534; AVX-LABEL: load_splat_8i32_2i32_0101: 535; AVX: # %bb.0: 536; AVX-NEXT: vbroadcastsd (%rdi), %ymm0 537; AVX-NEXT: retq 538 %vec = load <2 x i32>, ptr %vp 539 %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 540 ret <8 x i32> %res 541} 542 543define <16 x i32> @load_splat_16i32_2i32_0101(ptr %vp) { 544; SSE-LABEL: load_splat_16i32_2i32_0101: 545; SSE: # %bb.0: 546; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 547; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1] 548; SSE-NEXT: movdqa %xmm0, %xmm1 549; SSE-NEXT: movdqa %xmm0, %xmm2 550; SSE-NEXT: movdqa %xmm0, %xmm3 551; SSE-NEXT: retq 552; 553; AVX1-LABEL: load_splat_16i32_2i32_0101: 554; AVX1: # %bb.0: 555; AVX1-NEXT: vbroadcastsd (%rdi), %ymm0 556; AVX1-NEXT: vmovaps %ymm0, %ymm1 557; AVX1-NEXT: retq 558; 559; AVX2-LABEL: load_splat_16i32_2i32_0101: 560; AVX2: # %bb.0: 561; AVX2-NEXT: vbroadcastsd (%rdi), %ymm0 562; AVX2-NEXT: vmovaps %ymm0, %ymm1 563; AVX2-NEXT: retq 564; 565; AVX512-LABEL: load_splat_16i32_2i32_0101: 566; AVX512: # %bb.0: 567; AVX512-NEXT: vbroadcastsd (%rdi), %zmm0 568; AVX512-NEXT: retq 569 %vec = load <2 x i32>, ptr %vp 570 %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 571 ret <16 x i32> %res 572} 573