1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+sse2 < %s | FileCheck %s --check-prefix=SSE 3; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+avx < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVXONLY 4; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+avx512f < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=KNL 5; RUN: llc -O0 -fast-isel -fast-isel-abort=1 -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512bw,+avx512vl < %s | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=SKX 6 7; Verify that fast-isel knows how to select aligned/unaligned vector loads. 8; Also verify that the selected load instruction is in the correct domain. 9 10define <16 x i8> @test_v16i8(<16 x i8>* %V) { 11; SSE-LABEL: test_v16i8: 12; SSE: # BB#0: # %entry 13; SSE-NEXT: movdqa (%rdi), %xmm0 14; SSE-NEXT: retq 15; 16; AVX-LABEL: test_v16i8: 17; AVX: # BB#0: # %entry 18; AVX-NEXT: vmovdqa (%rdi), %xmm0 19; AVX-NEXT: retq 20entry: 21 %0 = load <16 x i8>, <16 x i8>* %V, align 16 22 ret <16 x i8> %0 23} 24 25define <8 x i16> @test_v8i16(<8 x i16>* %V) { 26; SSE-LABEL: test_v8i16: 27; SSE: # BB#0: # %entry 28; SSE-NEXT: movdqa (%rdi), %xmm0 29; SSE-NEXT: retq 30; 31; AVX-LABEL: test_v8i16: 32; AVX: # BB#0: # %entry 33; AVX-NEXT: vmovdqa (%rdi), %xmm0 34; AVX-NEXT: retq 35entry: 36 %0 = load <8 x i16>, <8 x i16>* %V, align 16 37 ret <8 x i16> %0 38} 39 40define <4 x i32> @test_v4i32(<4 x i32>* %V) { 41; SSE-LABEL: test_v4i32: 42; SSE: # BB#0: # %entry 43; SSE-NEXT: movdqa (%rdi), %xmm0 44; SSE-NEXT: retq 45; 46; AVX-LABEL: test_v4i32: 47; AVX: # BB#0: # %entry 48; AVX-NEXT: vmovdqa (%rdi), %xmm0 49; AVX-NEXT: retq 50entry: 51 %0 = load <4 x i32>, <4 x i32>* %V, align 16 52 ret <4 x i32> %0 53} 54 55define <2 x i64> @test_v2i64(<2 x i64>* %V) { 56; SSE-LABEL: test_v2i64: 57; SSE: # BB#0: # %entry 58; SSE-NEXT: movdqa (%rdi), %xmm0 59; SSE-NEXT: retq 60; 61; AVX-LABEL: test_v2i64: 62; AVX: # BB#0: # %entry 63; AVX-NEXT: vmovdqa (%rdi), %xmm0 64; AVX-NEXT: retq 65entry: 66 %0 = load <2 x i64>, <2 x i64>* %V, align 16 67 ret <2 x i64> %0 68} 69 70define <16 x i8> @test_v16i8_unaligned(<16 x i8>* %V) { 71; SSE-LABEL: test_v16i8_unaligned: 72; SSE: # BB#0: # %entry 73; SSE-NEXT: movdqu (%rdi), %xmm0 74; SSE-NEXT: retq 75; 76; AVX-LABEL: test_v16i8_unaligned: 77; AVX: # BB#0: # %entry 78; AVX-NEXT: vmovdqu (%rdi), %xmm0 79; AVX-NEXT: retq 80entry: 81 %0 = load <16 x i8>, <16 x i8>* %V, align 4 82 ret <16 x i8> %0 83} 84 85define <8 x i16> @test_v8i16_unaligned(<8 x i16>* %V) { 86; SSE-LABEL: test_v8i16_unaligned: 87; SSE: # BB#0: # %entry 88; SSE-NEXT: movdqu (%rdi), %xmm0 89; SSE-NEXT: retq 90; 91; AVX-LABEL: test_v8i16_unaligned: 92; AVX: # BB#0: # %entry 93; AVX-NEXT: vmovdqu (%rdi), %xmm0 94; AVX-NEXT: retq 95entry: 96 %0 = load <8 x i16>, <8 x i16>* %V, align 4 97 ret <8 x i16> %0 98} 99 100define <4 x i32> @test_v4i32_unaligned(<4 x i32>* %V) { 101; SSE-LABEL: test_v4i32_unaligned: 102; SSE: # BB#0: # %entry 103; SSE-NEXT: movdqu (%rdi), %xmm0 104; SSE-NEXT: retq 105; 106; AVX-LABEL: test_v4i32_unaligned: 107; AVX: # BB#0: # %entry 108; AVX-NEXT: vmovdqu (%rdi), %xmm0 109; AVX-NEXT: retq 110entry: 111 %0 = load <4 x i32>, <4 x i32>* %V, align 4 112 ret <4 x i32> %0 113} 114 115define <2 x i64> @test_v2i64_unaligned(<2 x i64>* %V) { 116; SSE-LABEL: test_v2i64_unaligned: 117; SSE: # BB#0: # %entry 118; SSE-NEXT: movdqu (%rdi), %xmm0 119; SSE-NEXT: retq 120; 121; AVX-LABEL: test_v2i64_unaligned: 122; AVX: # BB#0: # %entry 123; AVX-NEXT: vmovdqu (%rdi), %xmm0 124; AVX-NEXT: retq 125entry: 126 %0 = load <2 x i64>, <2 x i64>* %V, align 4 127 ret <2 x i64> %0 128} 129 130define <4 x float> @test_v4f32(<4 x float>* %V) { 131; SSE-LABEL: test_v4f32: 132; SSE: # BB#0: # %entry 133; SSE-NEXT: movaps (%rdi), %xmm0 134; SSE-NEXT: retq 135; 136; AVX-LABEL: test_v4f32: 137; AVX: # BB#0: # %entry 138; AVX-NEXT: vmovaps (%rdi), %xmm0 139; AVX-NEXT: retq 140entry: 141 %0 = load <4 x float>, <4 x float>* %V, align 16 142 ret <4 x float> %0 143} 144 145define <2 x double> @test_v2f64(<2 x double>* %V) { 146; SSE-LABEL: test_v2f64: 147; SSE: # BB#0: # %entry 148; SSE-NEXT: movapd (%rdi), %xmm0 149; SSE-NEXT: retq 150; 151; AVX-LABEL: test_v2f64: 152; AVX: # BB#0: # %entry 153; AVX-NEXT: vmovapd (%rdi), %xmm0 154; AVX-NEXT: retq 155entry: 156 %0 = load <2 x double>, <2 x double>* %V, align 16 157 ret <2 x double> %0 158} 159 160define <4 x float> @test_v4f32_unaligned(<4 x float>* %V) { 161; SSE-LABEL: test_v4f32_unaligned: 162; SSE: # BB#0: # %entry 163; SSE-NEXT: movups (%rdi), %xmm0 164; SSE-NEXT: retq 165; 166; AVX-LABEL: test_v4f32_unaligned: 167; AVX: # BB#0: # %entry 168; AVX-NEXT: vmovups (%rdi), %xmm0 169; AVX-NEXT: retq 170entry: 171 %0 = load <4 x float>, <4 x float>* %V, align 4 172 ret <4 x float> %0 173} 174 175define <2 x double> @test_v2f64_unaligned(<2 x double>* %V) { 176; SSE-LABEL: test_v2f64_unaligned: 177; SSE: # BB#0: # %entry 178; SSE-NEXT: movupd (%rdi), %xmm0 179; SSE-NEXT: retq 180; 181; AVX-LABEL: test_v2f64_unaligned: 182; AVX: # BB#0: # %entry 183; AVX-NEXT: vmovupd (%rdi), %xmm0 184; AVX-NEXT: retq 185entry: 186 %0 = load <2 x double>, <2 x double>* %V, align 4 187 ret <2 x double> %0 188} 189 190define <16 x i8> @test_v16i8_abi_alignment(<16 x i8>* %V) { 191; SSE-LABEL: test_v16i8_abi_alignment: 192; SSE: # BB#0: # %entry 193; SSE-NEXT: movdqa (%rdi), %xmm0 194; SSE-NEXT: retq 195; 196; AVX-LABEL: test_v16i8_abi_alignment: 197; AVX: # BB#0: # %entry 198; AVX-NEXT: vmovdqa (%rdi), %xmm0 199; AVX-NEXT: retq 200entry: 201 %0 = load <16 x i8>, <16 x i8>* %V 202 ret <16 x i8> %0 203} 204 205define <8 x i16> @test_v8i16_abi_alignment(<8 x i16>* %V) { 206; SSE-LABEL: test_v8i16_abi_alignment: 207; SSE: # BB#0: # %entry 208; SSE-NEXT: movdqa (%rdi), %xmm0 209; SSE-NEXT: retq 210; 211; AVX-LABEL: test_v8i16_abi_alignment: 212; AVX: # BB#0: # %entry 213; AVX-NEXT: vmovdqa (%rdi), %xmm0 214; AVX-NEXT: retq 215entry: 216 %0 = load <8 x i16>, <8 x i16>* %V 217 ret <8 x i16> %0 218} 219 220define <4 x i32> @test_v4i32_abi_alignment(<4 x i32>* %V) { 221; SSE-LABEL: test_v4i32_abi_alignment: 222; SSE: # BB#0: # %entry 223; SSE-NEXT: movdqa (%rdi), %xmm0 224; SSE-NEXT: retq 225; 226; AVX-LABEL: test_v4i32_abi_alignment: 227; AVX: # BB#0: # %entry 228; AVX-NEXT: vmovdqa (%rdi), %xmm0 229; AVX-NEXT: retq 230entry: 231 %0 = load <4 x i32>, <4 x i32>* %V 232 ret <4 x i32> %0 233} 234 235define <2 x i64> @test_v2i64_abi_alignment(<2 x i64>* %V) { 236; SSE-LABEL: test_v2i64_abi_alignment: 237; SSE: # BB#0: # %entry 238; SSE-NEXT: movdqa (%rdi), %xmm0 239; SSE-NEXT: retq 240; 241; AVX-LABEL: test_v2i64_abi_alignment: 242; AVX: # BB#0: # %entry 243; AVX-NEXT: vmovdqa (%rdi), %xmm0 244; AVX-NEXT: retq 245entry: 246 %0 = load <2 x i64>, <2 x i64>* %V 247 ret <2 x i64> %0 248} 249 250define <4 x float> @test_v4f32_abi_alignment(<4 x float>* %V) { 251; SSE-LABEL: test_v4f32_abi_alignment: 252; SSE: # BB#0: # %entry 253; SSE-NEXT: movaps (%rdi), %xmm0 254; SSE-NEXT: retq 255; 256; AVX-LABEL: test_v4f32_abi_alignment: 257; AVX: # BB#0: # %entry 258; AVX-NEXT: vmovaps (%rdi), %xmm0 259; AVX-NEXT: retq 260entry: 261 %0 = load <4 x float>, <4 x float>* %V 262 ret <4 x float> %0 263} 264 265define <2 x double> @test_v2f64_abi_alignment(<2 x double>* %V) { 266; SSE-LABEL: test_v2f64_abi_alignment: 267; SSE: # BB#0: # %entry 268; SSE-NEXT: movapd (%rdi), %xmm0 269; SSE-NEXT: retq 270; 271; AVX-LABEL: test_v2f64_abi_alignment: 272; AVX: # BB#0: # %entry 273; AVX-NEXT: vmovapd (%rdi), %xmm0 274; AVX-NEXT: retq 275entry: 276 %0 = load <2 x double>, <2 x double>* %V 277 ret <2 x double> %0 278} 279 280define <32 x i8> @test_v32i8(<32 x i8>* %V) { 281; SSE-LABEL: test_v32i8: 282; SSE: # BB#0: # %entry 283; SSE-NEXT: movaps (%rdi), %xmm0 284; SSE-NEXT: movaps 16(%rdi), %xmm1 285; SSE-NEXT: retq 286; 287; AVX-LABEL: test_v32i8: 288; AVX: # BB#0: # %entry 289; AVX-NEXT: vmovdqa (%rdi), %ymm0 290; AVX-NEXT: retq 291entry: 292 %0 = load <32 x i8>, <32 x i8>* %V, align 32 293 ret <32 x i8> %0 294} 295 296define <16 x i16> @test_v16i16(<16 x i16>* %V) { 297; SSE-LABEL: test_v16i16: 298; SSE: # BB#0: # %entry 299; SSE-NEXT: movaps (%rdi), %xmm0 300; SSE-NEXT: movaps 16(%rdi), %xmm1 301; SSE-NEXT: retq 302; 303; AVX-LABEL: test_v16i16: 304; AVX: # BB#0: # %entry 305; AVX-NEXT: vmovdqa (%rdi), %ymm0 306; AVX-NEXT: retq 307entry: 308 %0 = load <16 x i16>, <16 x i16>* %V, align 32 309 ret <16 x i16> %0 310} 311 312define <8 x i32> @test_v8i32(<8 x i32>* %V) { 313; SSE-LABEL: test_v8i32: 314; SSE: # BB#0: # %entry 315; SSE-NEXT: movaps (%rdi), %xmm0 316; SSE-NEXT: movaps 16(%rdi), %xmm1 317; SSE-NEXT: retq 318; 319; AVX-LABEL: test_v8i32: 320; AVX: # BB#0: # %entry 321; AVX-NEXT: vmovdqu (%rdi), %ymm0 322; AVX-NEXT: retq 323entry: 324 %0 = load <8 x i32>, <8 x i32>* %V, align 16 325 ret <8 x i32> %0 326} 327 328define <4 x i64> @test_v4i64(<4 x i64>* %V) { 329; SSE-LABEL: test_v4i64: 330; SSE: # BB#0: # %entry 331; SSE-NEXT: movaps (%rdi), %xmm0 332; SSE-NEXT: movaps 16(%rdi), %xmm1 333; SSE-NEXT: retq 334; 335; AVX-LABEL: test_v4i64: 336; AVX: # BB#0: # %entry 337; AVX-NEXT: vmovdqa (%rdi), %ymm0 338; AVX-NEXT: retq 339entry: 340 %0 = load <4 x i64>, <4 x i64>* %V, align 32 341 ret <4 x i64> %0 342} 343 344define <32 x i8> @test_v32i8_unaligned(<32 x i8>* %V) { 345; SSE-LABEL: test_v32i8_unaligned: 346; SSE: # BB#0: # %entry 347; SSE-NEXT: movups (%rdi), %xmm0 348; SSE-NEXT: movups 16(%rdi), %xmm1 349; SSE-NEXT: retq 350; 351; AVX-LABEL: test_v32i8_unaligned: 352; AVX: # BB#0: # %entry 353; AVX-NEXT: vmovdqu (%rdi), %ymm0 354; AVX-NEXT: retq 355entry: 356 %0 = load <32 x i8>, <32 x i8>* %V, align 4 357 ret <32 x i8> %0 358} 359 360define <16 x i16> @test_v16i16_unaligned(<16 x i16>* %V) { 361; SSE-LABEL: test_v16i16_unaligned: 362; SSE: # BB#0: # %entry 363; SSE-NEXT: movups (%rdi), %xmm0 364; SSE-NEXT: movups 16(%rdi), %xmm1 365; SSE-NEXT: retq 366; 367; AVX-LABEL: test_v16i16_unaligned: 368; AVX: # BB#0: # %entry 369; AVX-NEXT: vmovdqu (%rdi), %ymm0 370; AVX-NEXT: retq 371entry: 372 %0 = load <16 x i16>, <16 x i16>* %V, align 4 373 ret <16 x i16> %0 374} 375 376define <8 x i32> @test_v8i32_unaligned(<8 x i32>* %V) { 377; SSE-LABEL: test_v8i32_unaligned: 378; SSE: # BB#0: # %entry 379; SSE-NEXT: movups (%rdi), %xmm0 380; SSE-NEXT: movups 16(%rdi), %xmm1 381; SSE-NEXT: retq 382; 383; AVX-LABEL: test_v8i32_unaligned: 384; AVX: # BB#0: # %entry 385; AVX-NEXT: vmovdqu (%rdi), %ymm0 386; AVX-NEXT: retq 387entry: 388 %0 = load <8 x i32>, <8 x i32>* %V, align 4 389 ret <8 x i32> %0 390} 391 392define <4 x i64> @test_v4i64_unaligned(<4 x i64>* %V) { 393; SSE-LABEL: test_v4i64_unaligned: 394; SSE: # BB#0: # %entry 395; SSE-NEXT: movups (%rdi), %xmm0 396; SSE-NEXT: movups 16(%rdi), %xmm1 397; SSE-NEXT: retq 398; 399; AVX-LABEL: test_v4i64_unaligned: 400; AVX: # BB#0: # %entry 401; AVX-NEXT: vmovdqu (%rdi), %ymm0 402; AVX-NEXT: retq 403entry: 404 %0 = load <4 x i64>, <4 x i64>* %V, align 4 405 ret <4 x i64> %0 406} 407 408define <8 x float> @test_v8f32(<8 x float>* %V) { 409; SSE-LABEL: test_v8f32: 410; SSE: # BB#0: # %entry 411; SSE-NEXT: movaps (%rdi), %xmm0 412; SSE-NEXT: movaps 16(%rdi), %xmm1 413; SSE-NEXT: retq 414; 415; AVX-LABEL: test_v8f32: 416; AVX: # BB#0: # %entry 417; AVX-NEXT: vmovups (%rdi), %ymm0 418; AVX-NEXT: retq 419entry: 420 %0 = load <8 x float>, <8 x float>* %V, align 16 421 ret <8 x float> %0 422} 423 424define <4 x double> @test_v4f64(<4 x double>* %V) { 425; SSE-LABEL: test_v4f64: 426; SSE: # BB#0: # %entry 427; SSE-NEXT: movapd (%rdi), %xmm0 428; SSE-NEXT: movapd 16(%rdi), %xmm1 429; SSE-NEXT: retq 430; 431; AVX-LABEL: test_v4f64: 432; AVX: # BB#0: # %entry 433; AVX-NEXT: vmovupd (%rdi), %ymm0 434; AVX-NEXT: retq 435entry: 436 %0 = load <4 x double>, <4 x double>* %V, align 16 437 ret <4 x double> %0 438} 439 440define <8 x float> @test_v8f32_unaligned(<8 x float>* %V) { 441; SSE-LABEL: test_v8f32_unaligned: 442; SSE: # BB#0: # %entry 443; SSE-NEXT: movups (%rdi), %xmm0 444; SSE-NEXT: movups 16(%rdi), %xmm1 445; SSE-NEXT: retq 446; 447; AVX-LABEL: test_v8f32_unaligned: 448; AVX: # BB#0: # %entry 449; AVX-NEXT: vmovups (%rdi), %ymm0 450; AVX-NEXT: retq 451entry: 452 %0 = load <8 x float>, <8 x float>* %V, align 4 453 ret <8 x float> %0 454} 455 456define <4 x double> @test_v4f64_unaligned(<4 x double>* %V) { 457; SSE-LABEL: test_v4f64_unaligned: 458; SSE: # BB#0: # %entry 459; SSE-NEXT: movupd (%rdi), %xmm0 460; SSE-NEXT: movupd 16(%rdi), %xmm1 461; SSE-NEXT: retq 462; 463; AVX-LABEL: test_v4f64_unaligned: 464; AVX: # BB#0: # %entry 465; AVX-NEXT: vmovupd (%rdi), %ymm0 466; AVX-NEXT: retq 467entry: 468 %0 = load <4 x double>, <4 x double>* %V, align 4 469 ret <4 x double> %0 470} 471 472define <64 x i8> @test_v64i8(<64 x i8>* %V) { 473; SSE-LABEL: test_v64i8: 474; SSE: # BB#0: # %entry 475; SSE-NEXT: movaps (%rdi), %xmm0 476; SSE-NEXT: movaps 16(%rdi), %xmm1 477; SSE-NEXT: movaps 32(%rdi), %xmm2 478; SSE-NEXT: movaps 48(%rdi), %xmm3 479; SSE-NEXT: retq 480; 481; AVXONLY-LABEL: test_v64i8: 482; AVXONLY: # BB#0: # %entry 483; AVXONLY-NEXT: vmovaps (%rdi), %ymm0 484; AVXONLY-NEXT: vmovaps 32(%rdi), %ymm1 485; AVXONLY-NEXT: retq 486; 487; KNL-LABEL: test_v64i8: 488; KNL: # BB#0: # %entry 489; KNL-NEXT: vmovaps (%rdi), %ymm0 490; KNL-NEXT: vmovaps 32(%rdi), %ymm1 491; KNL-NEXT: retq 492; 493; SKX-LABEL: test_v64i8: 494; SKX: # BB#0: # %entry 495; SKX-NEXT: vmovdqu64 (%rdi), %zmm0 496; SKX-NEXT: retq 497entry: 498 %0 = load <64 x i8>, <64 x i8>* %V, align 32 499 ret <64 x i8> %0 500} 501 502define <32 x i16> @test_v32i16(<32 x i16>* %V) { 503; SSE-LABEL: test_v32i16: 504; SSE: # BB#0: # %entry 505; SSE-NEXT: movaps (%rdi), %xmm0 506; SSE-NEXT: movaps 16(%rdi), %xmm1 507; SSE-NEXT: movaps 32(%rdi), %xmm2 508; SSE-NEXT: movaps 48(%rdi), %xmm3 509; SSE-NEXT: retq 510; 511; AVXONLY-LABEL: test_v32i16: 512; AVXONLY: # BB#0: # %entry 513; AVXONLY-NEXT: vmovaps (%rdi), %ymm0 514; AVXONLY-NEXT: vmovaps 32(%rdi), %ymm1 515; AVXONLY-NEXT: retq 516; 517; KNL-LABEL: test_v32i16: 518; KNL: # BB#0: # %entry 519; KNL-NEXT: vmovaps (%rdi), %ymm0 520; KNL-NEXT: vmovaps 32(%rdi), %ymm1 521; KNL-NEXT: retq 522; 523; SKX-LABEL: test_v32i16: 524; SKX: # BB#0: # %entry 525; SKX-NEXT: vmovdqu64 (%rdi), %zmm0 526; SKX-NEXT: retq 527entry: 528 %0 = load <32 x i16>, <32 x i16>* %V, align 32 529 ret <32 x i16> %0 530} 531 532define <16 x i32> @test_v16i32(<16 x i32>* %V) { 533; SSE-LABEL: test_v16i32: 534; SSE: # BB#0: # %entry 535; SSE-NEXT: movaps (%rdi), %xmm0 536; SSE-NEXT: movaps 16(%rdi), %xmm1 537; SSE-NEXT: movaps 32(%rdi), %xmm2 538; SSE-NEXT: movaps 48(%rdi), %xmm3 539; SSE-NEXT: retq 540; 541; AVXONLY-LABEL: test_v16i32: 542; AVXONLY: # BB#0: # %entry 543; AVXONLY-NEXT: vmovups (%rdi), %ymm0 544; AVXONLY-NEXT: vmovups 32(%rdi), %ymm1 545; AVXONLY-NEXT: retq 546; 547; AVX512-LABEL: test_v16i32: 548; AVX512: # BB#0: # %entry 549; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0 550; AVX512-NEXT: retq 551entry: 552 %0 = load <16 x i32>, <16 x i32>* %V, align 16 553 ret <16 x i32> %0 554} 555 556define <8 x i64> @test_v8i64(<8 x i64>* %V) { 557; SSE-LABEL: test_v8i64: 558; SSE: # BB#0: # %entry 559; SSE-NEXT: movaps (%rdi), %xmm0 560; SSE-NEXT: movaps 16(%rdi), %xmm1 561; SSE-NEXT: movaps 32(%rdi), %xmm2 562; SSE-NEXT: movaps 48(%rdi), %xmm3 563; SSE-NEXT: retq 564; 565; AVXONLY-LABEL: test_v8i64: 566; AVXONLY: # BB#0: # %entry 567; AVXONLY-NEXT: vmovaps (%rdi), %ymm0 568; AVXONLY-NEXT: vmovaps 32(%rdi), %ymm1 569; AVXONLY-NEXT: retq 570; 571; AVX512-LABEL: test_v8i64: 572; AVX512: # BB#0: # %entry 573; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0 574; AVX512-NEXT: retq 575entry: 576 %0 = load <8 x i64>, <8 x i64>* %V, align 32 577 ret <8 x i64> %0 578} 579 580define <64 x i8> @test_v64i8_unaligned(<64 x i8>* %V) { 581; SSE-LABEL: test_v64i8_unaligned: 582; SSE: # BB#0: # %entry 583; SSE-NEXT: movups (%rdi), %xmm0 584; SSE-NEXT: movups 16(%rdi), %xmm1 585; SSE-NEXT: movups 32(%rdi), %xmm2 586; SSE-NEXT: movups 48(%rdi), %xmm3 587; SSE-NEXT: retq 588; 589; AVXONLY-LABEL: test_v64i8_unaligned: 590; AVXONLY: # BB#0: # %entry 591; AVXONLY-NEXT: vmovups (%rdi), %ymm0 592; AVXONLY-NEXT: vmovups 32(%rdi), %ymm1 593; AVXONLY-NEXT: retq 594; 595; KNL-LABEL: test_v64i8_unaligned: 596; KNL: # BB#0: # %entry 597; KNL-NEXT: vmovups (%rdi), %ymm0 598; KNL-NEXT: vmovups 32(%rdi), %ymm1 599; KNL-NEXT: retq 600; 601; SKX-LABEL: test_v64i8_unaligned: 602; SKX: # BB#0: # %entry 603; SKX-NEXT: vmovdqu64 (%rdi), %zmm0 604; SKX-NEXT: retq 605entry: 606 %0 = load <64 x i8>, <64 x i8>* %V, align 4 607 ret <64 x i8> %0 608} 609 610define <32 x i16> @test_v32i16_unaligned(<32 x i16>* %V) { 611; SSE-LABEL: test_v32i16_unaligned: 612; SSE: # BB#0: # %entry 613; SSE-NEXT: movups (%rdi), %xmm0 614; SSE-NEXT: movups 16(%rdi), %xmm1 615; SSE-NEXT: movups 32(%rdi), %xmm2 616; SSE-NEXT: movups 48(%rdi), %xmm3 617; SSE-NEXT: retq 618; 619; AVXONLY-LABEL: test_v32i16_unaligned: 620; AVXONLY: # BB#0: # %entry 621; AVXONLY-NEXT: vmovups (%rdi), %ymm0 622; AVXONLY-NEXT: vmovups 32(%rdi), %ymm1 623; AVXONLY-NEXT: retq 624; 625; KNL-LABEL: test_v32i16_unaligned: 626; KNL: # BB#0: # %entry 627; KNL-NEXT: vmovups (%rdi), %ymm0 628; KNL-NEXT: vmovups 32(%rdi), %ymm1 629; KNL-NEXT: retq 630; 631; SKX-LABEL: test_v32i16_unaligned: 632; SKX: # BB#0: # %entry 633; SKX-NEXT: vmovdqu64 (%rdi), %zmm0 634; SKX-NEXT: retq 635entry: 636 %0 = load <32 x i16>, <32 x i16>* %V, align 4 637 ret <32 x i16> %0 638} 639 640define <16 x i32> @test_v16i32_unaligned(<16 x i32>* %V) { 641; SSE-LABEL: test_v16i32_unaligned: 642; SSE: # BB#0: # %entry 643; SSE-NEXT: movups (%rdi), %xmm0 644; SSE-NEXT: movups 16(%rdi), %xmm1 645; SSE-NEXT: movups 32(%rdi), %xmm2 646; SSE-NEXT: movups 48(%rdi), %xmm3 647; SSE-NEXT: retq 648; 649; AVXONLY-LABEL: test_v16i32_unaligned: 650; AVXONLY: # BB#0: # %entry 651; AVXONLY-NEXT: vmovups (%rdi), %ymm0 652; AVXONLY-NEXT: vmovups 32(%rdi), %ymm1 653; AVXONLY-NEXT: retq 654; 655; AVX512-LABEL: test_v16i32_unaligned: 656; AVX512: # BB#0: # %entry 657; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0 658; AVX512-NEXT: retq 659entry: 660 %0 = load <16 x i32>, <16 x i32>* %V, align 4 661 ret <16 x i32> %0 662} 663 664define <8 x i64> @test_v8i64_unaligned(<8 x i64>* %V) { 665; SSE-LABEL: test_v8i64_unaligned: 666; SSE: # BB#0: # %entry 667; SSE-NEXT: movups (%rdi), %xmm0 668; SSE-NEXT: movups 16(%rdi), %xmm1 669; SSE-NEXT: movups 32(%rdi), %xmm2 670; SSE-NEXT: movups 48(%rdi), %xmm3 671; SSE-NEXT: retq 672; 673; AVXONLY-LABEL: test_v8i64_unaligned: 674; AVXONLY: # BB#0: # %entry 675; AVXONLY-NEXT: vmovups (%rdi), %ymm0 676; AVXONLY-NEXT: vmovups 32(%rdi), %ymm1 677; AVXONLY-NEXT: retq 678; 679; AVX512-LABEL: test_v8i64_unaligned: 680; AVX512: # BB#0: # %entry 681; AVX512-NEXT: vmovdqu64 (%rdi), %zmm0 682; AVX512-NEXT: retq 683entry: 684 %0 = load <8 x i64>, <8 x i64>* %V, align 4 685 ret <8 x i64> %0 686} 687 688define <8 x float> @test_v16f32(<8 x float>* %V) { 689; SSE-LABEL: test_v16f32: 690; SSE: # BB#0: # %entry 691; SSE-NEXT: movaps (%rdi), %xmm0 692; SSE-NEXT: movaps 16(%rdi), %xmm1 693; SSE-NEXT: retq 694; 695; AVX-LABEL: test_v16f32: 696; AVX: # BB#0: # %entry 697; AVX-NEXT: vmovups (%rdi), %ymm0 698; AVX-NEXT: retq 699entry: 700 %0 = load <8 x float>, <8 x float>* %V, align 16 701 ret <8 x float> %0 702} 703 704define <8 x double> @test_v8f64(<8 x double>* %V) { 705; SSE-LABEL: test_v8f64: 706; SSE: # BB#0: # %entry 707; SSE-NEXT: movapd (%rdi), %xmm0 708; SSE-NEXT: movapd 16(%rdi), %xmm1 709; SSE-NEXT: movapd 32(%rdi), %xmm2 710; SSE-NEXT: movapd 48(%rdi), %xmm3 711; SSE-NEXT: retq 712; 713; AVXONLY-LABEL: test_v8f64: 714; AVXONLY: # BB#0: # %entry 715; AVXONLY-NEXT: vmovupd (%rdi), %ymm0 716; AVXONLY-NEXT: vmovupd 32(%rdi), %ymm1 717; AVXONLY-NEXT: retq 718; 719; AVX512-LABEL: test_v8f64: 720; AVX512: # BB#0: # %entry 721; AVX512-NEXT: vmovupd (%rdi), %zmm0 722; AVX512-NEXT: retq 723entry: 724 %0 = load <8 x double>, <8 x double>* %V, align 16 725 ret <8 x double> %0 726} 727 728define <16 x float> @test_v16f32_unaligned(<16 x float>* %V) { 729; SSE-LABEL: test_v16f32_unaligned: 730; SSE: # BB#0: # %entry 731; SSE-NEXT: movups (%rdi), %xmm0 732; SSE-NEXT: movups 16(%rdi), %xmm1 733; SSE-NEXT: movups 32(%rdi), %xmm2 734; SSE-NEXT: movups 48(%rdi), %xmm3 735; SSE-NEXT: retq 736; 737; AVXONLY-LABEL: test_v16f32_unaligned: 738; AVXONLY: # BB#0: # %entry 739; AVXONLY-NEXT: vmovups (%rdi), %ymm0 740; AVXONLY-NEXT: vmovups 32(%rdi), %ymm1 741; AVXONLY-NEXT: retq 742; 743; AVX512-LABEL: test_v16f32_unaligned: 744; AVX512: # BB#0: # %entry 745; AVX512-NEXT: vmovups (%rdi), %zmm0 746; AVX512-NEXT: retq 747entry: 748 %0 = load <16 x float>, <16 x float>* %V, align 4 749 ret <16 x float> %0 750} 751 752define <8 x double> @test_v8f64_unaligned(<8 x double>* %V) { 753; SSE-LABEL: test_v8f64_unaligned: 754; SSE: # BB#0: # %entry 755; SSE-NEXT: movupd (%rdi), %xmm0 756; SSE-NEXT: movupd 16(%rdi), %xmm1 757; SSE-NEXT: movupd 32(%rdi), %xmm2 758; SSE-NEXT: movupd 48(%rdi), %xmm3 759; SSE-NEXT: retq 760; 761; AVXONLY-LABEL: test_v8f64_unaligned: 762; AVXONLY: # BB#0: # %entry 763; AVXONLY-NEXT: vmovupd (%rdi), %ymm0 764; AVXONLY-NEXT: vmovupd 32(%rdi), %ymm1 765; AVXONLY-NEXT: retq 766; 767; AVX512-LABEL: test_v8f64_unaligned: 768; AVX512: # BB#0: # %entry 769; AVX512-NEXT: vmovupd (%rdi), %zmm0 770; AVX512-NEXT: retq 771entry: 772 %0 = load <8 x double>, <8 x double>* %V, align 4 773 ret <8 x double> %0 774} 775 776