1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=CHECK,SSE,SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4a | FileCheck %s --check-prefixes=CHECK,SSE,SSE4A 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=CHECK,SSE,SSE41 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,AVX 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=CHECK,AVX 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,AVX512 9 10; Test codegen for under aligned nontemporal vector stores 11 12; XMM versions. 13 14define void @test_zero_v2f64_align1(ptr %dst) nounwind { 15; CHECK-LABEL: test_zero_v2f64_align1: 16; CHECK: # %bb.0: 17; CHECK-NEXT: xorl %eax, %eax 18; CHECK-NEXT: movntiq %rax, 8(%rdi) 19; CHECK-NEXT: movntiq %rax, (%rdi) 20; CHECK-NEXT: retq 21 store <2 x double> zeroinitializer, ptr %dst, align 1, !nontemporal !1 22 ret void 23} 24 25define void @test_zero_v4f32_align1(ptr %dst) nounwind { 26; CHECK-LABEL: test_zero_v4f32_align1: 27; CHECK: # %bb.0: 28; CHECK-NEXT: xorl %eax, %eax 29; CHECK-NEXT: movntiq %rax, 8(%rdi) 30; CHECK-NEXT: movntiq %rax, (%rdi) 31; CHECK-NEXT: retq 32 store <4 x float> zeroinitializer, ptr %dst, align 1, !nontemporal !1 33 ret void 34} 35 36define void @test_zero_v2i64_align1(ptr %dst) nounwind { 37; CHECK-LABEL: test_zero_v2i64_align1: 38; CHECK: # %bb.0: 39; CHECK-NEXT: xorl %eax, %eax 40; CHECK-NEXT: movntiq %rax, 8(%rdi) 41; CHECK-NEXT: movntiq %rax, (%rdi) 42; CHECK-NEXT: retq 43 store <2 x i64> zeroinitializer, ptr %dst, align 1, !nontemporal !1 44 ret void 45} 46 47define void @test_zero_v4i32_align1(ptr %dst) nounwind { 48; CHECK-LABEL: test_zero_v4i32_align1: 49; CHECK: # %bb.0: 50; CHECK-NEXT: xorl %eax, %eax 51; CHECK-NEXT: movntiq %rax, 8(%rdi) 52; CHECK-NEXT: movntiq %rax, (%rdi) 53; CHECK-NEXT: retq 54 store <4 x i32> zeroinitializer, ptr %dst, align 1, !nontemporal !1 55 ret void 56} 57 58define void @test_zero_v8i16_align1(ptr %dst) nounwind { 59; CHECK-LABEL: test_zero_v8i16_align1: 60; CHECK: # %bb.0: 61; CHECK-NEXT: xorl %eax, %eax 62; CHECK-NEXT: movntiq %rax, 8(%rdi) 63; CHECK-NEXT: movntiq %rax, (%rdi) 64; CHECK-NEXT: retq 65 store <8 x i16> zeroinitializer, ptr %dst, align 1, !nontemporal !1 66 ret void 67} 68 69define void @test_zero_v16i8_align1(ptr %dst) nounwind { 70; CHECK-LABEL: test_zero_v16i8_align1: 71; CHECK: # %bb.0: 72; CHECK-NEXT: xorl %eax, %eax 73; CHECK-NEXT: movntiq %rax, 8(%rdi) 74; CHECK-NEXT: movntiq %rax, (%rdi) 75; CHECK-NEXT: retq 76 store <16 x i8> zeroinitializer, ptr %dst, align 1, !nontemporal !1 77 ret void 78} 79 80; YMM versions. 81 82define void @test_zero_v4f64_align1(ptr %dst) nounwind { 83; CHECK-LABEL: test_zero_v4f64_align1: 84; CHECK: # %bb.0: 85; CHECK-NEXT: xorl %eax, %eax 86; CHECK-NEXT: movntiq %rax, 8(%rdi) 87; CHECK-NEXT: movntiq %rax, (%rdi) 88; CHECK-NEXT: movntiq %rax, 24(%rdi) 89; CHECK-NEXT: movntiq %rax, 16(%rdi) 90; CHECK-NEXT: retq 91 store <4 x double> zeroinitializer, ptr %dst, align 1, !nontemporal !1 92 ret void 93} 94 95define void @test_zero_v8f32_align1(ptr %dst) nounwind { 96; CHECK-LABEL: test_zero_v8f32_align1: 97; CHECK: # %bb.0: 98; CHECK-NEXT: xorl %eax, %eax 99; CHECK-NEXT: movntiq %rax, 8(%rdi) 100; CHECK-NEXT: movntiq %rax, (%rdi) 101; CHECK-NEXT: movntiq %rax, 24(%rdi) 102; CHECK-NEXT: movntiq %rax, 16(%rdi) 103; CHECK-NEXT: retq 104 store <8 x float> zeroinitializer, ptr %dst, align 1, !nontemporal !1 105 ret void 106} 107 108define void @test_zero_v4i64_align1(ptr %dst) nounwind { 109; CHECK-LABEL: test_zero_v4i64_align1: 110; CHECK: # %bb.0: 111; CHECK-NEXT: xorl %eax, %eax 112; CHECK-NEXT: movntiq %rax, 8(%rdi) 113; CHECK-NEXT: movntiq %rax, (%rdi) 114; CHECK-NEXT: movntiq %rax, 24(%rdi) 115; CHECK-NEXT: movntiq %rax, 16(%rdi) 116; CHECK-NEXT: retq 117 store <4 x i64> zeroinitializer, ptr %dst, align 1, !nontemporal !1 118 ret void 119} 120 121define void @test_zero_v8i32_align1(ptr %dst) nounwind { 122; CHECK-LABEL: test_zero_v8i32_align1: 123; CHECK: # %bb.0: 124; CHECK-NEXT: xorl %eax, %eax 125; CHECK-NEXT: movntiq %rax, 8(%rdi) 126; CHECK-NEXT: movntiq %rax, (%rdi) 127; CHECK-NEXT: movntiq %rax, 24(%rdi) 128; CHECK-NEXT: movntiq %rax, 16(%rdi) 129; CHECK-NEXT: retq 130 store <8 x i32> zeroinitializer, ptr %dst, align 1, !nontemporal !1 131 ret void 132} 133 134define void @test_zero_v16i16_align1(ptr %dst) nounwind { 135; CHECK-LABEL: test_zero_v16i16_align1: 136; CHECK: # %bb.0: 137; CHECK-NEXT: xorl %eax, %eax 138; CHECK-NEXT: movntiq %rax, 8(%rdi) 139; CHECK-NEXT: movntiq %rax, (%rdi) 140; CHECK-NEXT: movntiq %rax, 24(%rdi) 141; CHECK-NEXT: movntiq %rax, 16(%rdi) 142; CHECK-NEXT: retq 143 store <16 x i16> zeroinitializer, ptr %dst, align 1, !nontemporal !1 144 ret void 145} 146 147define void @test_zero_v32i8_align1(ptr %dst) nounwind { 148; CHECK-LABEL: test_zero_v32i8_align1: 149; CHECK: # %bb.0: 150; CHECK-NEXT: xorl %eax, %eax 151; CHECK-NEXT: movntiq %rax, 8(%rdi) 152; CHECK-NEXT: movntiq %rax, (%rdi) 153; CHECK-NEXT: movntiq %rax, 24(%rdi) 154; CHECK-NEXT: movntiq %rax, 16(%rdi) 155; CHECK-NEXT: retq 156 store <32 x i8> zeroinitializer, ptr %dst, align 1, !nontemporal !1 157 ret void 158} 159 160define void @test_zero_v4f64_align16(ptr %dst) nounwind { 161; SSE-LABEL: test_zero_v4f64_align16: 162; SSE: # %bb.0: 163; SSE-NEXT: xorps %xmm0, %xmm0 164; SSE-NEXT: movntps %xmm0, 16(%rdi) 165; SSE-NEXT: movntps %xmm0, (%rdi) 166; SSE-NEXT: retq 167; 168; AVX-LABEL: test_zero_v4f64_align16: 169; AVX: # %bb.0: 170; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 171; AVX-NEXT: vmovntps %xmm0, 16(%rdi) 172; AVX-NEXT: vmovntps %xmm0, (%rdi) 173; AVX-NEXT: retq 174; 175; AVX512-LABEL: test_zero_v4f64_align16: 176; AVX512: # %bb.0: 177; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 178; AVX512-NEXT: vmovntps %xmm0, 16(%rdi) 179; AVX512-NEXT: vmovntps %xmm0, (%rdi) 180; AVX512-NEXT: retq 181 store <4 x double> zeroinitializer, ptr %dst, align 16, !nontemporal !1 182 ret void 183} 184 185define void @test_zero_v8f32_align16(ptr %dst) nounwind { 186; SSE-LABEL: test_zero_v8f32_align16: 187; SSE: # %bb.0: 188; SSE-NEXT: xorps %xmm0, %xmm0 189; SSE-NEXT: movntps %xmm0, 16(%rdi) 190; SSE-NEXT: movntps %xmm0, (%rdi) 191; SSE-NEXT: retq 192; 193; AVX-LABEL: test_zero_v8f32_align16: 194; AVX: # %bb.0: 195; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 196; AVX-NEXT: vmovntps %xmm0, 16(%rdi) 197; AVX-NEXT: vmovntps %xmm0, (%rdi) 198; AVX-NEXT: retq 199; 200; AVX512-LABEL: test_zero_v8f32_align16: 201; AVX512: # %bb.0: 202; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 203; AVX512-NEXT: vmovntps %xmm0, 16(%rdi) 204; AVX512-NEXT: vmovntps %xmm0, (%rdi) 205; AVX512-NEXT: retq 206 store <8 x float> zeroinitializer, ptr %dst, align 16, !nontemporal !1 207 ret void 208} 209 210define void @test_zero_v4i64_align16(ptr %dst) nounwind { 211; SSE-LABEL: test_zero_v4i64_align16: 212; SSE: # %bb.0: 213; SSE-NEXT: xorps %xmm0, %xmm0 214; SSE-NEXT: movntps %xmm0, 16(%rdi) 215; SSE-NEXT: movntps %xmm0, (%rdi) 216; SSE-NEXT: retq 217; 218; AVX-LABEL: test_zero_v4i64_align16: 219; AVX: # %bb.0: 220; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 221; AVX-NEXT: vmovntps %xmm0, 16(%rdi) 222; AVX-NEXT: vmovntps %xmm0, (%rdi) 223; AVX-NEXT: retq 224; 225; AVX512-LABEL: test_zero_v4i64_align16: 226; AVX512: # %bb.0: 227; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 228; AVX512-NEXT: vmovntps %xmm0, 16(%rdi) 229; AVX512-NEXT: vmovntps %xmm0, (%rdi) 230; AVX512-NEXT: retq 231 store <4 x i64> zeroinitializer, ptr %dst, align 16, !nontemporal !1 232 ret void 233} 234 235define void @test_zero_v8i32_align16(ptr %dst) nounwind { 236; SSE-LABEL: test_zero_v8i32_align16: 237; SSE: # %bb.0: 238; SSE-NEXT: xorps %xmm0, %xmm0 239; SSE-NEXT: movntps %xmm0, 16(%rdi) 240; SSE-NEXT: movntps %xmm0, (%rdi) 241; SSE-NEXT: retq 242; 243; AVX-LABEL: test_zero_v8i32_align16: 244; AVX: # %bb.0: 245; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 246; AVX-NEXT: vmovntps %xmm0, 16(%rdi) 247; AVX-NEXT: vmovntps %xmm0, (%rdi) 248; AVX-NEXT: retq 249; 250; AVX512-LABEL: test_zero_v8i32_align16: 251; AVX512: # %bb.0: 252; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 253; AVX512-NEXT: vmovntps %xmm0, 16(%rdi) 254; AVX512-NEXT: vmovntps %xmm0, (%rdi) 255; AVX512-NEXT: retq 256 store <8 x i32> zeroinitializer, ptr %dst, align 16, !nontemporal !1 257 ret void 258} 259 260define void @test_zero_v16i16_align16(ptr %dst) nounwind { 261; SSE-LABEL: test_zero_v16i16_align16: 262; SSE: # %bb.0: 263; SSE-NEXT: xorps %xmm0, %xmm0 264; SSE-NEXT: movntps %xmm0, 16(%rdi) 265; SSE-NEXT: movntps %xmm0, (%rdi) 266; SSE-NEXT: retq 267; 268; AVX-LABEL: test_zero_v16i16_align16: 269; AVX: # %bb.0: 270; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 271; AVX-NEXT: vmovntps %xmm0, 16(%rdi) 272; AVX-NEXT: vmovntps %xmm0, (%rdi) 273; AVX-NEXT: retq 274; 275; AVX512-LABEL: test_zero_v16i16_align16: 276; AVX512: # %bb.0: 277; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 278; AVX512-NEXT: vmovntps %xmm0, 16(%rdi) 279; AVX512-NEXT: vmovntps %xmm0, (%rdi) 280; AVX512-NEXT: retq 281 store <16 x i16> zeroinitializer, ptr %dst, align 16, !nontemporal !1 282 ret void 283} 284 285define void @test_zero_v32i8_align16(ptr %dst) nounwind { 286; SSE-LABEL: test_zero_v32i8_align16: 287; SSE: # %bb.0: 288; SSE-NEXT: xorps %xmm0, %xmm0 289; SSE-NEXT: movntps %xmm0, 16(%rdi) 290; SSE-NEXT: movntps %xmm0, (%rdi) 291; SSE-NEXT: retq 292; 293; AVX-LABEL: test_zero_v32i8_align16: 294; AVX: # %bb.0: 295; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 296; AVX-NEXT: vmovntps %xmm0, 16(%rdi) 297; AVX-NEXT: vmovntps %xmm0, (%rdi) 298; AVX-NEXT: retq 299; 300; AVX512-LABEL: test_zero_v32i8_align16: 301; AVX512: # %bb.0: 302; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 303; AVX512-NEXT: vmovntps %xmm0, 16(%rdi) 304; AVX512-NEXT: vmovntps %xmm0, (%rdi) 305; AVX512-NEXT: retq 306 store <32 x i8> zeroinitializer, ptr %dst, align 16, !nontemporal !1 307 ret void 308} 309 310; ZMM versions. 311 312define void @test_zero_v8f64_align1(ptr %dst) nounwind { 313; CHECK-LABEL: test_zero_v8f64_align1: 314; CHECK: # %bb.0: 315; CHECK-NEXT: xorl %eax, %eax 316; CHECK-NEXT: movntiq %rax, 8(%rdi) 317; CHECK-NEXT: movntiq %rax, (%rdi) 318; CHECK-NEXT: movntiq %rax, 24(%rdi) 319; CHECK-NEXT: movntiq %rax, 16(%rdi) 320; CHECK-NEXT: movntiq %rax, 40(%rdi) 321; CHECK-NEXT: movntiq %rax, 32(%rdi) 322; CHECK-NEXT: movntiq %rax, 56(%rdi) 323; CHECK-NEXT: movntiq %rax, 48(%rdi) 324; CHECK-NEXT: retq 325 store <8 x double> zeroinitializer, ptr %dst, align 1, !nontemporal !1 326 ret void 327} 328 329define void @test_zero_v16f32_align1(ptr %dst) nounwind { 330; CHECK-LABEL: test_zero_v16f32_align1: 331; CHECK: # %bb.0: 332; CHECK-NEXT: xorl %eax, %eax 333; CHECK-NEXT: movntiq %rax, 8(%rdi) 334; CHECK-NEXT: movntiq %rax, (%rdi) 335; CHECK-NEXT: movntiq %rax, 24(%rdi) 336; CHECK-NEXT: movntiq %rax, 16(%rdi) 337; CHECK-NEXT: movntiq %rax, 40(%rdi) 338; CHECK-NEXT: movntiq %rax, 32(%rdi) 339; CHECK-NEXT: movntiq %rax, 56(%rdi) 340; CHECK-NEXT: movntiq %rax, 48(%rdi) 341; CHECK-NEXT: retq 342 store <16 x float> zeroinitializer, ptr %dst, align 1, !nontemporal !1 343 ret void 344} 345 346define void @test_zero_v8i64_align1(ptr %dst) nounwind { 347; CHECK-LABEL: test_zero_v8i64_align1: 348; CHECK: # %bb.0: 349; CHECK-NEXT: xorl %eax, %eax 350; CHECK-NEXT: movntiq %rax, 8(%rdi) 351; CHECK-NEXT: movntiq %rax, (%rdi) 352; CHECK-NEXT: movntiq %rax, 24(%rdi) 353; CHECK-NEXT: movntiq %rax, 16(%rdi) 354; CHECK-NEXT: movntiq %rax, 40(%rdi) 355; CHECK-NEXT: movntiq %rax, 32(%rdi) 356; CHECK-NEXT: movntiq %rax, 56(%rdi) 357; CHECK-NEXT: movntiq %rax, 48(%rdi) 358; CHECK-NEXT: retq 359 store <8 x i64> zeroinitializer, ptr %dst, align 1, !nontemporal !1 360 ret void 361} 362 363define void @test_zero_v16i32_align1(ptr %dst) nounwind { 364; CHECK-LABEL: test_zero_v16i32_align1: 365; CHECK: # %bb.0: 366; CHECK-NEXT: xorl %eax, %eax 367; CHECK-NEXT: movntiq %rax, 8(%rdi) 368; CHECK-NEXT: movntiq %rax, (%rdi) 369; CHECK-NEXT: movntiq %rax, 24(%rdi) 370; CHECK-NEXT: movntiq %rax, 16(%rdi) 371; CHECK-NEXT: movntiq %rax, 40(%rdi) 372; CHECK-NEXT: movntiq %rax, 32(%rdi) 373; CHECK-NEXT: movntiq %rax, 56(%rdi) 374; CHECK-NEXT: movntiq %rax, 48(%rdi) 375; CHECK-NEXT: retq 376 store <16 x i32> zeroinitializer, ptr %dst, align 1, !nontemporal !1 377 ret void 378} 379 380define void @test_zero_v32i16_align1(ptr %dst) nounwind { 381; CHECK-LABEL: test_zero_v32i16_align1: 382; CHECK: # %bb.0: 383; CHECK-NEXT: xorl %eax, %eax 384; CHECK-NEXT: movntiq %rax, 8(%rdi) 385; CHECK-NEXT: movntiq %rax, (%rdi) 386; CHECK-NEXT: movntiq %rax, 24(%rdi) 387; CHECK-NEXT: movntiq %rax, 16(%rdi) 388; CHECK-NEXT: movntiq %rax, 40(%rdi) 389; CHECK-NEXT: movntiq %rax, 32(%rdi) 390; CHECK-NEXT: movntiq %rax, 56(%rdi) 391; CHECK-NEXT: movntiq %rax, 48(%rdi) 392; CHECK-NEXT: retq 393 store <32 x i16> zeroinitializer, ptr %dst, align 1, !nontemporal !1 394 ret void 395} 396 397define void @test_zero_v64i8_align1(ptr %dst) nounwind { 398; CHECK-LABEL: test_zero_v64i8_align1: 399; CHECK: # %bb.0: 400; CHECK-NEXT: xorl %eax, %eax 401; CHECK-NEXT: movntiq %rax, 8(%rdi) 402; CHECK-NEXT: movntiq %rax, (%rdi) 403; CHECK-NEXT: movntiq %rax, 24(%rdi) 404; CHECK-NEXT: movntiq %rax, 16(%rdi) 405; CHECK-NEXT: movntiq %rax, 40(%rdi) 406; CHECK-NEXT: movntiq %rax, 32(%rdi) 407; CHECK-NEXT: movntiq %rax, 56(%rdi) 408; CHECK-NEXT: movntiq %rax, 48(%rdi) 409; CHECK-NEXT: retq 410 store <64 x i8> zeroinitializer, ptr %dst, align 1, !nontemporal !1 411 ret void 412} 413 414define void @test_zero_v8f64_align16(ptr %dst) nounwind { 415; SSE-LABEL: test_zero_v8f64_align16: 416; SSE: # %bb.0: 417; SSE-NEXT: xorps %xmm0, %xmm0 418; SSE-NEXT: movntps %xmm0, 16(%rdi) 419; SSE-NEXT: movntps %xmm0, (%rdi) 420; SSE-NEXT: movntps %xmm0, 48(%rdi) 421; SSE-NEXT: movntps %xmm0, 32(%rdi) 422; SSE-NEXT: retq 423; 424; AVX-LABEL: test_zero_v8f64_align16: 425; AVX: # %bb.0: 426; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 427; AVX-NEXT: vmovntps %xmm0, 16(%rdi) 428; AVX-NEXT: vmovntps %xmm0, (%rdi) 429; AVX-NEXT: vmovntps %xmm0, 48(%rdi) 430; AVX-NEXT: vmovntps %xmm0, 32(%rdi) 431; AVX-NEXT: retq 432; 433; AVX512-LABEL: test_zero_v8f64_align16: 434; AVX512: # %bb.0: 435; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 436; AVX512-NEXT: vmovntps %xmm0, 16(%rdi) 437; AVX512-NEXT: vmovntps %xmm0, (%rdi) 438; AVX512-NEXT: vmovntps %xmm0, 48(%rdi) 439; AVX512-NEXT: vmovntps %xmm0, 32(%rdi) 440; AVX512-NEXT: retq 441 store <8 x double> zeroinitializer, ptr %dst, align 16, !nontemporal !1 442 ret void 443} 444 445define void @test_zero_v16f32_align16(ptr %dst) nounwind { 446; SSE-LABEL: test_zero_v16f32_align16: 447; SSE: # %bb.0: 448; SSE-NEXT: xorps %xmm0, %xmm0 449; SSE-NEXT: movntps %xmm0, 16(%rdi) 450; SSE-NEXT: movntps %xmm0, (%rdi) 451; SSE-NEXT: movntps %xmm0, 48(%rdi) 452; SSE-NEXT: movntps %xmm0, 32(%rdi) 453; SSE-NEXT: retq 454; 455; AVX-LABEL: test_zero_v16f32_align16: 456; AVX: # %bb.0: 457; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 458; AVX-NEXT: vmovntps %xmm0, 16(%rdi) 459; AVX-NEXT: vmovntps %xmm0, (%rdi) 460; AVX-NEXT: vmovntps %xmm0, 48(%rdi) 461; AVX-NEXT: vmovntps %xmm0, 32(%rdi) 462; AVX-NEXT: retq 463; 464; AVX512-LABEL: test_zero_v16f32_align16: 465; AVX512: # %bb.0: 466; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 467; AVX512-NEXT: vmovntps %xmm0, 16(%rdi) 468; AVX512-NEXT: vmovntps %xmm0, (%rdi) 469; AVX512-NEXT: vmovntps %xmm0, 48(%rdi) 470; AVX512-NEXT: vmovntps %xmm0, 32(%rdi) 471; AVX512-NEXT: retq 472 store <16 x float> zeroinitializer, ptr %dst, align 16, !nontemporal !1 473 ret void 474} 475 476define void @test_zero_v8i64_align16(ptr %dst) nounwind { 477; SSE-LABEL: test_zero_v8i64_align16: 478; SSE: # %bb.0: 479; SSE-NEXT: xorps %xmm0, %xmm0 480; SSE-NEXT: movntps %xmm0, 16(%rdi) 481; SSE-NEXT: movntps %xmm0, (%rdi) 482; SSE-NEXT: movntps %xmm0, 48(%rdi) 483; SSE-NEXT: movntps %xmm0, 32(%rdi) 484; SSE-NEXT: retq 485; 486; AVX-LABEL: test_zero_v8i64_align16: 487; AVX: # %bb.0: 488; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 489; AVX-NEXT: vmovntps %xmm0, 16(%rdi) 490; AVX-NEXT: vmovntps %xmm0, (%rdi) 491; AVX-NEXT: vmovntps %xmm0, 48(%rdi) 492; AVX-NEXT: vmovntps %xmm0, 32(%rdi) 493; AVX-NEXT: retq 494; 495; AVX512-LABEL: test_zero_v8i64_align16: 496; AVX512: # %bb.0: 497; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 498; AVX512-NEXT: vmovntps %xmm0, 16(%rdi) 499; AVX512-NEXT: vmovntps %xmm0, (%rdi) 500; AVX512-NEXT: vmovntps %xmm0, 48(%rdi) 501; AVX512-NEXT: vmovntps %xmm0, 32(%rdi) 502; AVX512-NEXT: retq 503 store <8 x i64> zeroinitializer, ptr %dst, align 16, !nontemporal !1 504 ret void 505} 506 507define void @test_zero_v16i32_align16(ptr %dst) nounwind { 508; SSE-LABEL: test_zero_v16i32_align16: 509; SSE: # %bb.0: 510; SSE-NEXT: xorps %xmm0, %xmm0 511; SSE-NEXT: movntps %xmm0, 16(%rdi) 512; SSE-NEXT: movntps %xmm0, (%rdi) 513; SSE-NEXT: movntps %xmm0, 48(%rdi) 514; SSE-NEXT: movntps %xmm0, 32(%rdi) 515; SSE-NEXT: retq 516; 517; AVX-LABEL: test_zero_v16i32_align16: 518; AVX: # %bb.0: 519; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 520; AVX-NEXT: vmovntps %xmm0, 16(%rdi) 521; AVX-NEXT: vmovntps %xmm0, (%rdi) 522; AVX-NEXT: vmovntps %xmm0, 48(%rdi) 523; AVX-NEXT: vmovntps %xmm0, 32(%rdi) 524; AVX-NEXT: retq 525; 526; AVX512-LABEL: test_zero_v16i32_align16: 527; AVX512: # %bb.0: 528; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 529; AVX512-NEXT: vmovntps %xmm0, 16(%rdi) 530; AVX512-NEXT: vmovntps %xmm0, (%rdi) 531; AVX512-NEXT: vmovntps %xmm0, 48(%rdi) 532; AVX512-NEXT: vmovntps %xmm0, 32(%rdi) 533; AVX512-NEXT: retq 534 store <16 x i32> zeroinitializer, ptr %dst, align 16, !nontemporal !1 535 ret void 536} 537 538define void @test_zero_v32i16_align16(ptr %dst) nounwind { 539; SSE-LABEL: test_zero_v32i16_align16: 540; SSE: # %bb.0: 541; SSE-NEXT: xorps %xmm0, %xmm0 542; SSE-NEXT: movntps %xmm0, 16(%rdi) 543; SSE-NEXT: movntps %xmm0, (%rdi) 544; SSE-NEXT: movntps %xmm0, 48(%rdi) 545; SSE-NEXT: movntps %xmm0, 32(%rdi) 546; SSE-NEXT: retq 547; 548; AVX-LABEL: test_zero_v32i16_align16: 549; AVX: # %bb.0: 550; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 551; AVX-NEXT: vmovntps %xmm0, 16(%rdi) 552; AVX-NEXT: vmovntps %xmm0, (%rdi) 553; AVX-NEXT: vmovntps %xmm0, 48(%rdi) 554; AVX-NEXT: vmovntps %xmm0, 32(%rdi) 555; AVX-NEXT: retq 556; 557; AVX512-LABEL: test_zero_v32i16_align16: 558; AVX512: # %bb.0: 559; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 560; AVX512-NEXT: vmovntps %xmm0, 16(%rdi) 561; AVX512-NEXT: vmovntps %xmm0, (%rdi) 562; AVX512-NEXT: vmovntps %xmm0, 48(%rdi) 563; AVX512-NEXT: vmovntps %xmm0, 32(%rdi) 564; AVX512-NEXT: retq 565 store <32 x i16> zeroinitializer, ptr %dst, align 16, !nontemporal !1 566 ret void 567} 568 569define void @test_zero_v64i8_align16(ptr %dst) nounwind { 570; SSE-LABEL: test_zero_v64i8_align16: 571; SSE: # %bb.0: 572; SSE-NEXT: xorps %xmm0, %xmm0 573; SSE-NEXT: movntps %xmm0, 16(%rdi) 574; SSE-NEXT: movntps %xmm0, (%rdi) 575; SSE-NEXT: movntps %xmm0, 48(%rdi) 576; SSE-NEXT: movntps %xmm0, 32(%rdi) 577; SSE-NEXT: retq 578; 579; AVX-LABEL: test_zero_v64i8_align16: 580; AVX: # %bb.0: 581; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 582; AVX-NEXT: vmovntps %xmm0, 16(%rdi) 583; AVX-NEXT: vmovntps %xmm0, (%rdi) 584; AVX-NEXT: vmovntps %xmm0, 48(%rdi) 585; AVX-NEXT: vmovntps %xmm0, 32(%rdi) 586; AVX-NEXT: retq 587; 588; AVX512-LABEL: test_zero_v64i8_align16: 589; AVX512: # %bb.0: 590; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 591; AVX512-NEXT: vmovntps %xmm0, 16(%rdi) 592; AVX512-NEXT: vmovntps %xmm0, (%rdi) 593; AVX512-NEXT: vmovntps %xmm0, 48(%rdi) 594; AVX512-NEXT: vmovntps %xmm0, 32(%rdi) 595; AVX512-NEXT: retq 596 store <64 x i8> zeroinitializer, ptr %dst, align 16, !nontemporal !1 597 ret void 598} 599 600define void @test_zero_v8f64_align32(ptr %dst) nounwind { 601; SSE-LABEL: test_zero_v8f64_align32: 602; SSE: # %bb.0: 603; SSE-NEXT: xorps %xmm0, %xmm0 604; SSE-NEXT: movntps %xmm0, 48(%rdi) 605; SSE-NEXT: movntps %xmm0, 32(%rdi) 606; SSE-NEXT: movntps %xmm0, 16(%rdi) 607; SSE-NEXT: movntps %xmm0, (%rdi) 608; SSE-NEXT: retq 609; 610; AVX-LABEL: test_zero_v8f64_align32: 611; AVX: # %bb.0: 612; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 613; AVX-NEXT: vmovntps %ymm0, 32(%rdi) 614; AVX-NEXT: vmovntps %ymm0, (%rdi) 615; AVX-NEXT: vzeroupper 616; AVX-NEXT: retq 617; 618; AVX512-LABEL: test_zero_v8f64_align32: 619; AVX512: # %bb.0: 620; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 621; AVX512-NEXT: vmovntps %ymm0, 32(%rdi) 622; AVX512-NEXT: vmovntps %ymm0, (%rdi) 623; AVX512-NEXT: vzeroupper 624; AVX512-NEXT: retq 625 store <8 x double> zeroinitializer, ptr %dst, align 32, !nontemporal !1 626 ret void 627} 628 629define void @test_zero_v16f32_align32(ptr %dst) nounwind { 630; SSE-LABEL: test_zero_v16f32_align32: 631; SSE: # %bb.0: 632; SSE-NEXT: xorps %xmm0, %xmm0 633; SSE-NEXT: movntps %xmm0, 48(%rdi) 634; SSE-NEXT: movntps %xmm0, 32(%rdi) 635; SSE-NEXT: movntps %xmm0, 16(%rdi) 636; SSE-NEXT: movntps %xmm0, (%rdi) 637; SSE-NEXT: retq 638; 639; AVX-LABEL: test_zero_v16f32_align32: 640; AVX: # %bb.0: 641; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 642; AVX-NEXT: vmovntps %ymm0, 32(%rdi) 643; AVX-NEXT: vmovntps %ymm0, (%rdi) 644; AVX-NEXT: vzeroupper 645; AVX-NEXT: retq 646; 647; AVX512-LABEL: test_zero_v16f32_align32: 648; AVX512: # %bb.0: 649; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 650; AVX512-NEXT: vmovntps %ymm0, 32(%rdi) 651; AVX512-NEXT: vmovntps %ymm0, (%rdi) 652; AVX512-NEXT: vzeroupper 653; AVX512-NEXT: retq 654 store <16 x float> zeroinitializer, ptr %dst, align 32, !nontemporal !1 655 ret void 656} 657 658define void @test_zero_v8i64_align32(ptr %dst) nounwind { 659; SSE-LABEL: test_zero_v8i64_align32: 660; SSE: # %bb.0: 661; SSE-NEXT: xorps %xmm0, %xmm0 662; SSE-NEXT: movntps %xmm0, 48(%rdi) 663; SSE-NEXT: movntps %xmm0, 32(%rdi) 664; SSE-NEXT: movntps %xmm0, 16(%rdi) 665; SSE-NEXT: movntps %xmm0, (%rdi) 666; SSE-NEXT: retq 667; 668; AVX-LABEL: test_zero_v8i64_align32: 669; AVX: # %bb.0: 670; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 671; AVX-NEXT: vmovntps %ymm0, 32(%rdi) 672; AVX-NEXT: vmovntps %ymm0, (%rdi) 673; AVX-NEXT: vzeroupper 674; AVX-NEXT: retq 675; 676; AVX512-LABEL: test_zero_v8i64_align32: 677; AVX512: # %bb.0: 678; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 679; AVX512-NEXT: vmovntps %ymm0, 32(%rdi) 680; AVX512-NEXT: vmovntps %ymm0, (%rdi) 681; AVX512-NEXT: vzeroupper 682; AVX512-NEXT: retq 683 store <8 x i64> zeroinitializer, ptr %dst, align 32, !nontemporal !1 684 ret void 685} 686 687define void @test_zero_v16i32_align32(ptr %dst) nounwind { 688; SSE-LABEL: test_zero_v16i32_align32: 689; SSE: # %bb.0: 690; SSE-NEXT: xorps %xmm0, %xmm0 691; SSE-NEXT: movntps %xmm0, 48(%rdi) 692; SSE-NEXT: movntps %xmm0, 32(%rdi) 693; SSE-NEXT: movntps %xmm0, 16(%rdi) 694; SSE-NEXT: movntps %xmm0, (%rdi) 695; SSE-NEXT: retq 696; 697; AVX-LABEL: test_zero_v16i32_align32: 698; AVX: # %bb.0: 699; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 700; AVX-NEXT: vmovntps %ymm0, 32(%rdi) 701; AVX-NEXT: vmovntps %ymm0, (%rdi) 702; AVX-NEXT: vzeroupper 703; AVX-NEXT: retq 704; 705; AVX512-LABEL: test_zero_v16i32_align32: 706; AVX512: # %bb.0: 707; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 708; AVX512-NEXT: vmovntps %ymm0, 32(%rdi) 709; AVX512-NEXT: vmovntps %ymm0, (%rdi) 710; AVX512-NEXT: vzeroupper 711; AVX512-NEXT: retq 712 store <16 x i32> zeroinitializer, ptr %dst, align 32, !nontemporal !1 713 ret void 714} 715 716define void @test_zero_v32i16_align32(ptr %dst) nounwind { 717; SSE-LABEL: test_zero_v32i16_align32: 718; SSE: # %bb.0: 719; SSE-NEXT: xorps %xmm0, %xmm0 720; SSE-NEXT: movntps %xmm0, 48(%rdi) 721; SSE-NEXT: movntps %xmm0, 32(%rdi) 722; SSE-NEXT: movntps %xmm0, 16(%rdi) 723; SSE-NEXT: movntps %xmm0, (%rdi) 724; SSE-NEXT: retq 725; 726; AVX-LABEL: test_zero_v32i16_align32: 727; AVX: # %bb.0: 728; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 729; AVX-NEXT: vmovntps %ymm0, 32(%rdi) 730; AVX-NEXT: vmovntps %ymm0, (%rdi) 731; AVX-NEXT: vzeroupper 732; AVX-NEXT: retq 733; 734; AVX512-LABEL: test_zero_v32i16_align32: 735; AVX512: # %bb.0: 736; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 737; AVX512-NEXT: vmovntps %ymm0, 32(%rdi) 738; AVX512-NEXT: vmovntps %ymm0, (%rdi) 739; AVX512-NEXT: vzeroupper 740; AVX512-NEXT: retq 741 store <32 x i16> zeroinitializer, ptr %dst, align 32, !nontemporal !1 742 ret void 743} 744 745define void @test_zero_v64i8_align32(ptr %dst) nounwind { 746; SSE-LABEL: test_zero_v64i8_align32: 747; SSE: # %bb.0: 748; SSE-NEXT: xorps %xmm0, %xmm0 749; SSE-NEXT: movntps %xmm0, 48(%rdi) 750; SSE-NEXT: movntps %xmm0, 32(%rdi) 751; SSE-NEXT: movntps %xmm0, 16(%rdi) 752; SSE-NEXT: movntps %xmm0, (%rdi) 753; SSE-NEXT: retq 754; 755; AVX-LABEL: test_zero_v64i8_align32: 756; AVX: # %bb.0: 757; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 758; AVX-NEXT: vmovntps %ymm0, 32(%rdi) 759; AVX-NEXT: vmovntps %ymm0, (%rdi) 760; AVX-NEXT: vzeroupper 761; AVX-NEXT: retq 762; 763; AVX512-LABEL: test_zero_v64i8_align32: 764; AVX512: # %bb.0: 765; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 766; AVX512-NEXT: vmovntps %ymm0, 32(%rdi) 767; AVX512-NEXT: vmovntps %ymm0, (%rdi) 768; AVX512-NEXT: vzeroupper 769; AVX512-NEXT: retq 770 store <64 x i8> zeroinitializer, ptr %dst, align 32, !nontemporal !1 771 ret void 772} 773 774!1 = !{i32 1} 775;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 776; SSE2: {{.*}} 777; SSE41: {{.*}} 778; SSE4A: {{.*}} 779