1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=sse2,-sse4.2 | FileCheck %s --check-prefixes=GPR,SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=sse4.2,-avx | FileCheck %s --check-prefixes=GPR,SSE4 4; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=avx,-avx512f | FileCheck %s --check-prefixes=GPR,AVX 5; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=avx512f | FileCheck %s --check-prefixes=GPR,AVX512 6 7declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) nounwind 8declare void @llvm.memset.inline.p0.i64(ptr nocapture, i8, i64, i1) nounwind 9 10; ///////////////////////////////////////////////////////////////////////////// 11 12define void @memset_1(ptr %a, i8 %value) nounwind { 13; GPR-LABEL: memset_1: 14; GPR: # %bb.0: 15; GPR-NEXT: movb %sil, (%rdi) 16; GPR-NEXT: retq 17 tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 1, i1 0) 18 ret void 19} 20 21define void @memset_2(ptr %a, i8 %value) nounwind { 22; GPR-LABEL: memset_2: 23; GPR: # %bb.0: 24; GPR-NEXT: movzbl %sil, %eax 25; GPR-NEXT: shll $8, %esi 26; GPR-NEXT: orl %esi, %eax 27; GPR-NEXT: movw %ax, (%rdi) 28; GPR-NEXT: retq 29 tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 2, i1 0) 30 ret void 31} 32 33define void @memset_4(ptr %a, i8 %value) nounwind { 34; GPR-LABEL: memset_4: 35; GPR: # %bb.0: 36; GPR-NEXT: movzbl %sil, %eax 37; GPR-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101 38; GPR-NEXT: movl %eax, (%rdi) 39; GPR-NEXT: retq 40 tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 4, i1 0) 41 ret void 42} 43 44define void @memset_8(ptr %a, i8 %value) nounwind { 45; GPR-LABEL: memset_8: 46; GPR: # %bb.0: 47; GPR-NEXT: movzbl %sil, %eax 48; GPR-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101 49; GPR-NEXT: imulq %rax, %rcx 50; GPR-NEXT: movq %rcx, (%rdi) 51; GPR-NEXT: retq 52 tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 8, i1 0) 53 ret void 54} 55 56define void @memset_16(ptr %a, i8 %value) nounwind { 57; SSE2-LABEL: memset_16: 58; SSE2: # %bb.0: 59; SSE2-NEXT: movzbl %sil, %eax 60; SSE2-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101 61; SSE2-NEXT: imulq %rax, %rcx 62; SSE2-NEXT: movq %rcx, 8(%rdi) 63; SSE2-NEXT: movq %rcx, (%rdi) 64; SSE2-NEXT: retq 65; 66; SSE4-LABEL: memset_16: 67; SSE4: # %bb.0: 68; SSE4-NEXT: movd %esi, %xmm0 69; SSE4-NEXT: pxor %xmm1, %xmm1 70; SSE4-NEXT: pshufb %xmm1, %xmm0 71; SSE4-NEXT: movdqu %xmm0, (%rdi) 72; SSE4-NEXT: retq 73; 74; AVX-LABEL: memset_16: 75; AVX: # %bb.0: 76; AVX-NEXT: vmovd %esi, %xmm0 77; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 78; AVX-NEXT: vpshufb %xmm1, %xmm0, %xmm0 79; AVX-NEXT: vmovdqu %xmm0, (%rdi) 80; AVX-NEXT: retq 81; 82; AVX512-LABEL: memset_16: 83; AVX512: # %bb.0: 84; AVX512-NEXT: vmovd %esi, %xmm0 85; AVX512-NEXT: vpbroadcastb %xmm0, %xmm0 86; AVX512-NEXT: vmovdqu %xmm0, (%rdi) 87; AVX512-NEXT: retq 88 tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 16, i1 0) 89 ret void 90} 91 92define void @memset_32(ptr %a, i8 %value) nounwind { 93; SSE2-LABEL: memset_32: 94; SSE2: # %bb.0: 95; SSE2-NEXT: movzbl %sil, %eax 96; SSE2-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101 97; SSE2-NEXT: imulq %rax, %rcx 98; SSE2-NEXT: movq %rcx, 24(%rdi) 99; SSE2-NEXT: movq %rcx, 16(%rdi) 100; SSE2-NEXT: movq %rcx, 8(%rdi) 101; SSE2-NEXT: movq %rcx, (%rdi) 102; SSE2-NEXT: retq 103; 104; SSE4-LABEL: memset_32: 105; SSE4: # %bb.0: 106; SSE4-NEXT: movd %esi, %xmm0 107; SSE4-NEXT: pxor %xmm1, %xmm1 108; SSE4-NEXT: pshufb %xmm1, %xmm0 109; SSE4-NEXT: movdqu %xmm0, 16(%rdi) 110; SSE4-NEXT: movdqu %xmm0, (%rdi) 111; SSE4-NEXT: retq 112; 113; AVX-LABEL: memset_32: 114; AVX: # %bb.0: 115; AVX-NEXT: vmovd %esi, %xmm0 116; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 117; AVX-NEXT: vpshufb %xmm1, %xmm0, %xmm0 118; AVX-NEXT: vmovdqu %xmm0, 16(%rdi) 119; AVX-NEXT: vmovdqu %xmm0, (%rdi) 120; AVX-NEXT: retq 121; 122; AVX512-LABEL: memset_32: 123; AVX512: # %bb.0: 124; AVX512-NEXT: vmovd %esi, %xmm0 125; AVX512-NEXT: vpbroadcastb %xmm0, %ymm0 126; AVX512-NEXT: vmovdqu %ymm0, (%rdi) 127; AVX512-NEXT: vzeroupper 128; AVX512-NEXT: retq 129 tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 32, i1 0) 130 ret void 131} 132 133define void @memset_64(ptr %a, i8 %value) nounwind { 134; SSE2-LABEL: memset_64: 135; SSE2: # %bb.0: 136; SSE2-NEXT: movzbl %sil, %eax 137; SSE2-NEXT: movabsq $72340172838076673, %rcx # imm = 0x101010101010101 138; SSE2-NEXT: imulq %rax, %rcx 139; SSE2-NEXT: movq %rcx, 56(%rdi) 140; SSE2-NEXT: movq %rcx, 48(%rdi) 141; SSE2-NEXT: movq %rcx, 40(%rdi) 142; SSE2-NEXT: movq %rcx, 32(%rdi) 143; SSE2-NEXT: movq %rcx, 24(%rdi) 144; SSE2-NEXT: movq %rcx, 16(%rdi) 145; SSE2-NEXT: movq %rcx, 8(%rdi) 146; SSE2-NEXT: movq %rcx, (%rdi) 147; SSE2-NEXT: retq 148; 149; SSE4-LABEL: memset_64: 150; SSE4: # %bb.0: 151; SSE4-NEXT: movd %esi, %xmm0 152; SSE4-NEXT: pxor %xmm1, %xmm1 153; SSE4-NEXT: pshufb %xmm1, %xmm0 154; SSE4-NEXT: movdqu %xmm0, 48(%rdi) 155; SSE4-NEXT: movdqu %xmm0, 32(%rdi) 156; SSE4-NEXT: movdqu %xmm0, 16(%rdi) 157; SSE4-NEXT: movdqu %xmm0, (%rdi) 158; SSE4-NEXT: retq 159; 160; AVX-LABEL: memset_64: 161; AVX: # %bb.0: 162; AVX-NEXT: vmovd %esi, %xmm0 163; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 164; AVX-NEXT: vpshufb %xmm1, %xmm0, %xmm0 165; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 166; AVX-NEXT: vmovups %ymm0, 32(%rdi) 167; AVX-NEXT: vmovups %ymm0, (%rdi) 168; AVX-NEXT: vzeroupper 169; AVX-NEXT: retq 170; 171; AVX512-LABEL: memset_64: 172; AVX512: # %bb.0: 173; AVX512-NEXT: movzbl %sil, %eax 174; AVX512-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101 175; AVX512-NEXT: vpbroadcastd %eax, %zmm0 176; AVX512-NEXT: vmovdqu64 %zmm0, (%rdi) 177; AVX512-NEXT: vzeroupper 178; AVX512-NEXT: retq 179 tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 64, i1 0) 180 ret void 181} 182 183; ///////////////////////////////////////////////////////////////////////////// 184 185define void @aligned_memset_16(ptr align 16 %a, i8 %value) nounwind { 186; SSE2-LABEL: aligned_memset_16: 187; SSE2: # %bb.0: 188; SSE2-NEXT: movd %esi, %xmm0 189; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 190; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 191; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 192; SSE2-NEXT: movdqa %xmm0, (%rdi) 193; SSE2-NEXT: retq 194; 195; SSE4-LABEL: aligned_memset_16: 196; SSE4: # %bb.0: 197; SSE4-NEXT: movd %esi, %xmm0 198; SSE4-NEXT: pxor %xmm1, %xmm1 199; SSE4-NEXT: pshufb %xmm1, %xmm0 200; SSE4-NEXT: movdqa %xmm0, (%rdi) 201; SSE4-NEXT: retq 202; 203; AVX-LABEL: aligned_memset_16: 204; AVX: # %bb.0: 205; AVX-NEXT: vmovd %esi, %xmm0 206; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 207; AVX-NEXT: vpshufb %xmm1, %xmm0, %xmm0 208; AVX-NEXT: vmovdqa %xmm0, (%rdi) 209; AVX-NEXT: retq 210; 211; AVX512-LABEL: aligned_memset_16: 212; AVX512: # %bb.0: 213; AVX512-NEXT: vmovd %esi, %xmm0 214; AVX512-NEXT: vpbroadcastb %xmm0, %xmm0 215; AVX512-NEXT: vmovdqa %xmm0, (%rdi) 216; AVX512-NEXT: retq 217 tail call void @llvm.memset.inline.p0.i64(ptr align 16 %a, i8 %value, i64 16, i1 0) 218 ret void 219} 220 221define void @aligned_memset_32(ptr align 32 %a, i8 %value) nounwind { 222; SSE2-LABEL: aligned_memset_32: 223; SSE2: # %bb.0: 224; SSE2-NEXT: movd %esi, %xmm0 225; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 226; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 227; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 228; SSE2-NEXT: movdqa %xmm0, 16(%rdi) 229; SSE2-NEXT: movdqa %xmm0, (%rdi) 230; SSE2-NEXT: retq 231; 232; SSE4-LABEL: aligned_memset_32: 233; SSE4: # %bb.0: 234; SSE4-NEXT: movd %esi, %xmm0 235; SSE4-NEXT: pxor %xmm1, %xmm1 236; SSE4-NEXT: pshufb %xmm1, %xmm0 237; SSE4-NEXT: movdqa %xmm0, 16(%rdi) 238; SSE4-NEXT: movdqa %xmm0, (%rdi) 239; SSE4-NEXT: retq 240; 241; AVX-LABEL: aligned_memset_32: 242; AVX: # %bb.0: 243; AVX-NEXT: vmovd %esi, %xmm0 244; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 245; AVX-NEXT: vpshufb %xmm1, %xmm0, %xmm0 246; AVX-NEXT: vmovdqa %xmm0, 16(%rdi) 247; AVX-NEXT: vmovdqa %xmm0, (%rdi) 248; AVX-NEXT: retq 249; 250; AVX512-LABEL: aligned_memset_32: 251; AVX512: # %bb.0: 252; AVX512-NEXT: vmovd %esi, %xmm0 253; AVX512-NEXT: vpbroadcastb %xmm0, %ymm0 254; AVX512-NEXT: vmovdqa %ymm0, (%rdi) 255; AVX512-NEXT: vzeroupper 256; AVX512-NEXT: retq 257 tail call void @llvm.memset.inline.p0.i64(ptr align 32 %a, i8 %value, i64 32, i1 0) 258 ret void 259} 260 261define void @aligned_memset_64(ptr align 64 %a, i8 %value) nounwind { 262; SSE2-LABEL: aligned_memset_64: 263; SSE2: # %bb.0: 264; SSE2-NEXT: movd %esi, %xmm0 265; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 266; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 267; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 268; SSE2-NEXT: movdqa %xmm0, 48(%rdi) 269; SSE2-NEXT: movdqa %xmm0, 32(%rdi) 270; SSE2-NEXT: movdqa %xmm0, 16(%rdi) 271; SSE2-NEXT: movdqa %xmm0, (%rdi) 272; SSE2-NEXT: retq 273; 274; SSE4-LABEL: aligned_memset_64: 275; SSE4: # %bb.0: 276; SSE4-NEXT: movd %esi, %xmm0 277; SSE4-NEXT: pxor %xmm1, %xmm1 278; SSE4-NEXT: pshufb %xmm1, %xmm0 279; SSE4-NEXT: movdqa %xmm0, 48(%rdi) 280; SSE4-NEXT: movdqa %xmm0, 32(%rdi) 281; SSE4-NEXT: movdqa %xmm0, 16(%rdi) 282; SSE4-NEXT: movdqa %xmm0, (%rdi) 283; SSE4-NEXT: retq 284; 285; AVX-LABEL: aligned_memset_64: 286; AVX: # %bb.0: 287; AVX-NEXT: vmovd %esi, %xmm0 288; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 289; AVX-NEXT: vpshufb %xmm1, %xmm0, %xmm0 290; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 291; AVX-NEXT: vmovaps %ymm0, 32(%rdi) 292; AVX-NEXT: vmovaps %ymm0, (%rdi) 293; AVX-NEXT: vzeroupper 294; AVX-NEXT: retq 295; 296; AVX512-LABEL: aligned_memset_64: 297; AVX512: # %bb.0: 298; AVX512-NEXT: movzbl %sil, %eax 299; AVX512-NEXT: imull $16843009, %eax, %eax # imm = 0x1010101 300; AVX512-NEXT: vpbroadcastd %eax, %zmm0 301; AVX512-NEXT: vmovdqa64 %zmm0, (%rdi) 302; AVX512-NEXT: vzeroupper 303; AVX512-NEXT: retq 304 tail call void @llvm.memset.inline.p0.i64(ptr align 64 %a, i8 %value, i64 64, i1 0) 305 ret void 306} 307 308; ///////////////////////////////////////////////////////////////////////////// 309 310define void @bzero_1(ptr %a) nounwind { 311; GPR-LABEL: bzero_1: 312; GPR: # %bb.0: 313; GPR-NEXT: movb $0, (%rdi) 314; GPR-NEXT: retq 315 tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 1, i1 0) 316 ret void 317} 318 319define void @bzero_2(ptr %a) nounwind { 320; GPR-LABEL: bzero_2: 321; GPR: # %bb.0: 322; GPR-NEXT: movw $0, (%rdi) 323; GPR-NEXT: retq 324 tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 2, i1 0) 325 ret void 326} 327 328define void @bzero_4(ptr %a) nounwind { 329; GPR-LABEL: bzero_4: 330; GPR: # %bb.0: 331; GPR-NEXT: movl $0, (%rdi) 332; GPR-NEXT: retq 333 tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 4, i1 0) 334 ret void 335} 336 337define void @bzero_8(ptr %a) nounwind { 338; GPR-LABEL: bzero_8: 339; GPR: # %bb.0: 340; GPR-NEXT: movq $0, (%rdi) 341; GPR-NEXT: retq 342 tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 8, i1 0) 343 ret void 344} 345 346define void @bzero_16(ptr %a) nounwind { 347; SSE2-LABEL: bzero_16: 348; SSE2: # %bb.0: 349; SSE2-NEXT: movq $0, 8(%rdi) 350; SSE2-NEXT: movq $0, (%rdi) 351; SSE2-NEXT: retq 352; 353; SSE4-LABEL: bzero_16: 354; SSE4: # %bb.0: 355; SSE4-NEXT: xorps %xmm0, %xmm0 356; SSE4-NEXT: movups %xmm0, (%rdi) 357; SSE4-NEXT: retq 358; 359; AVX-LABEL: bzero_16: 360; AVX: # %bb.0: 361; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 362; AVX-NEXT: vmovups %xmm0, (%rdi) 363; AVX-NEXT: retq 364; 365; AVX512-LABEL: bzero_16: 366; AVX512: # %bb.0: 367; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 368; AVX512-NEXT: vmovups %xmm0, (%rdi) 369; AVX512-NEXT: retq 370 tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 16, i1 0) 371 ret void 372} 373 374define void @bzero_32(ptr %a) nounwind { 375; SSE2-LABEL: bzero_32: 376; SSE2: # %bb.0: 377; SSE2-NEXT: movq $0, 24(%rdi) 378; SSE2-NEXT: movq $0, 16(%rdi) 379; SSE2-NEXT: movq $0, 8(%rdi) 380; SSE2-NEXT: movq $0, (%rdi) 381; SSE2-NEXT: retq 382; 383; SSE4-LABEL: bzero_32: 384; SSE4: # %bb.0: 385; SSE4-NEXT: xorps %xmm0, %xmm0 386; SSE4-NEXT: movups %xmm0, 16(%rdi) 387; SSE4-NEXT: movups %xmm0, (%rdi) 388; SSE4-NEXT: retq 389; 390; AVX-LABEL: bzero_32: 391; AVX: # %bb.0: 392; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 393; AVX-NEXT: vmovups %ymm0, (%rdi) 394; AVX-NEXT: vzeroupper 395; AVX-NEXT: retq 396; 397; AVX512-LABEL: bzero_32: 398; AVX512: # %bb.0: 399; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 400; AVX512-NEXT: vmovups %ymm0, (%rdi) 401; AVX512-NEXT: vzeroupper 402; AVX512-NEXT: retq 403 tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 32, i1 0) 404 ret void 405} 406 407define void @bzero_64(ptr %a) nounwind { 408; SSE2-LABEL: bzero_64: 409; SSE2: # %bb.0: 410; SSE2-NEXT: movq $0, 56(%rdi) 411; SSE2-NEXT: movq $0, 48(%rdi) 412; SSE2-NEXT: movq $0, 40(%rdi) 413; SSE2-NEXT: movq $0, 32(%rdi) 414; SSE2-NEXT: movq $0, 24(%rdi) 415; SSE2-NEXT: movq $0, 16(%rdi) 416; SSE2-NEXT: movq $0, 8(%rdi) 417; SSE2-NEXT: movq $0, (%rdi) 418; SSE2-NEXT: retq 419; 420; SSE4-LABEL: bzero_64: 421; SSE4: # %bb.0: 422; SSE4-NEXT: xorps %xmm0, %xmm0 423; SSE4-NEXT: movups %xmm0, 48(%rdi) 424; SSE4-NEXT: movups %xmm0, 32(%rdi) 425; SSE4-NEXT: movups %xmm0, 16(%rdi) 426; SSE4-NEXT: movups %xmm0, (%rdi) 427; SSE4-NEXT: retq 428; 429; AVX-LABEL: bzero_64: 430; AVX: # %bb.0: 431; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 432; AVX-NEXT: vmovups %ymm0, 32(%rdi) 433; AVX-NEXT: vmovups %ymm0, (%rdi) 434; AVX-NEXT: vzeroupper 435; AVX-NEXT: retq 436; 437; AVX512-LABEL: bzero_64: 438; AVX512: # %bb.0: 439; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 440; AVX512-NEXT: vmovups %zmm0, (%rdi) 441; AVX512-NEXT: vzeroupper 442; AVX512-NEXT: retq 443 tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 64, i1 0) 444 ret void 445} 446 447; ///////////////////////////////////////////////////////////////////////////// 448 449define void @aligned_bzero_16(ptr %a) nounwind { 450; SSE2-LABEL: aligned_bzero_16: 451; SSE2: # %bb.0: 452; SSE2-NEXT: xorps %xmm0, %xmm0 453; SSE2-NEXT: movaps %xmm0, (%rdi) 454; SSE2-NEXT: retq 455; 456; SSE4-LABEL: aligned_bzero_16: 457; SSE4: # %bb.0: 458; SSE4-NEXT: xorps %xmm0, %xmm0 459; SSE4-NEXT: movaps %xmm0, (%rdi) 460; SSE4-NEXT: retq 461; 462; AVX-LABEL: aligned_bzero_16: 463; AVX: # %bb.0: 464; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 465; AVX-NEXT: vmovaps %xmm0, (%rdi) 466; AVX-NEXT: retq 467; 468; AVX512-LABEL: aligned_bzero_16: 469; AVX512: # %bb.0: 470; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 471; AVX512-NEXT: vmovaps %xmm0, (%rdi) 472; AVX512-NEXT: retq 473 tail call void @llvm.memset.inline.p0.i64(ptr align 16 %a, i8 0, i64 16, i1 0) 474 ret void 475} 476 477define void @aligned_bzero_32(ptr %a) nounwind { 478; SSE2-LABEL: aligned_bzero_32: 479; SSE2: # %bb.0: 480; SSE2-NEXT: xorps %xmm0, %xmm0 481; SSE2-NEXT: movaps %xmm0, 16(%rdi) 482; SSE2-NEXT: movaps %xmm0, (%rdi) 483; SSE2-NEXT: retq 484; 485; SSE4-LABEL: aligned_bzero_32: 486; SSE4: # %bb.0: 487; SSE4-NEXT: xorps %xmm0, %xmm0 488; SSE4-NEXT: movaps %xmm0, 16(%rdi) 489; SSE4-NEXT: movaps %xmm0, (%rdi) 490; SSE4-NEXT: retq 491; 492; AVX-LABEL: aligned_bzero_32: 493; AVX: # %bb.0: 494; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 495; AVX-NEXT: vmovaps %ymm0, (%rdi) 496; AVX-NEXT: vzeroupper 497; AVX-NEXT: retq 498; 499; AVX512-LABEL: aligned_bzero_32: 500; AVX512: # %bb.0: 501; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 502; AVX512-NEXT: vmovaps %ymm0, (%rdi) 503; AVX512-NEXT: vzeroupper 504; AVX512-NEXT: retq 505 tail call void @llvm.memset.inline.p0.i64(ptr align 32 %a, i8 0, i64 32, i1 0) 506 ret void 507} 508 509define void @aligned_bzero_64(ptr %a) nounwind { 510; SSE2-LABEL: aligned_bzero_64: 511; SSE2: # %bb.0: 512; SSE2-NEXT: xorps %xmm0, %xmm0 513; SSE2-NEXT: movaps %xmm0, 48(%rdi) 514; SSE2-NEXT: movaps %xmm0, 32(%rdi) 515; SSE2-NEXT: movaps %xmm0, 16(%rdi) 516; SSE2-NEXT: movaps %xmm0, (%rdi) 517; SSE2-NEXT: retq 518; 519; SSE4-LABEL: aligned_bzero_64: 520; SSE4: # %bb.0: 521; SSE4-NEXT: xorps %xmm0, %xmm0 522; SSE4-NEXT: movaps %xmm0, 48(%rdi) 523; SSE4-NEXT: movaps %xmm0, 32(%rdi) 524; SSE4-NEXT: movaps %xmm0, 16(%rdi) 525; SSE4-NEXT: movaps %xmm0, (%rdi) 526; SSE4-NEXT: retq 527; 528; AVX-LABEL: aligned_bzero_64: 529; AVX: # %bb.0: 530; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 531; AVX-NEXT: vmovaps %ymm0, 32(%rdi) 532; AVX-NEXT: vmovaps %ymm0, (%rdi) 533; AVX-NEXT: vzeroupper 534; AVX-NEXT: retq 535; 536; AVX512-LABEL: aligned_bzero_64: 537; AVX512: # %bb.0: 538; AVX512-NEXT: vxorps %xmm0, %xmm0, %xmm0 539; AVX512-NEXT: vmovaps %zmm0, (%rdi) 540; AVX512-NEXT: vzeroupper 541; AVX512-NEXT: retq 542 tail call void @llvm.memset.inline.p0.i64(ptr align 64 %a, i8 0, i64 64, i1 0) 543 ret void 544} 545