xref: /llvm-project/llvm/test/CodeGen/X86/memset-inline.ll (revision b53ea2b9c5ac252fa417f5fe76ce805bb09ed1ab)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=sse2,-sse4.2 | FileCheck %s --check-prefixes=GPR,SSE2
3; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=sse4.2,-avx  | FileCheck %s --check-prefixes=GPR,SSE4
4; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=avx,-avx512f | FileCheck %s --check-prefixes=GPR,AVX
5; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=avx512f      | FileCheck %s --check-prefixes=GPR,AVX512
6
7declare void @llvm.memset.p0.i64(ptr nocapture, i8, i64, i1) nounwind
8declare void @llvm.memset.inline.p0.i64(ptr nocapture, i8, i64, i1) nounwind
9
10; /////////////////////////////////////////////////////////////////////////////
11
12define void @memset_1(ptr %a, i8 %value) nounwind {
13; GPR-LABEL: memset_1:
14; GPR:       # %bb.0:
15; GPR-NEXT:    movb %sil, (%rdi)
16; GPR-NEXT:    retq
17  tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 1, i1 0)
18  ret void
19}
20
21define void @memset_2(ptr %a, i8 %value) nounwind {
22; GPR-LABEL: memset_2:
23; GPR:       # %bb.0:
24; GPR-NEXT:    movzbl %sil, %eax
25; GPR-NEXT:    shll $8, %esi
26; GPR-NEXT:    orl %esi, %eax
27; GPR-NEXT:    movw %ax, (%rdi)
28; GPR-NEXT:    retq
29  tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 2, i1 0)
30  ret void
31}
32
33define void @memset_4(ptr %a, i8 %value) nounwind {
34; GPR-LABEL: memset_4:
35; GPR:       # %bb.0:
36; GPR-NEXT:    movzbl %sil, %eax
37; GPR-NEXT:    imull $16843009, %eax, %eax # imm = 0x1010101
38; GPR-NEXT:    movl %eax, (%rdi)
39; GPR-NEXT:    retq
40  tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 4, i1 0)
41  ret void
42}
43
44define void @memset_8(ptr %a, i8 %value) nounwind {
45; GPR-LABEL: memset_8:
46; GPR:       # %bb.0:
47; GPR-NEXT:    movzbl %sil, %eax
48; GPR-NEXT:    movabsq $72340172838076673, %rcx # imm = 0x101010101010101
49; GPR-NEXT:    imulq %rax, %rcx
50; GPR-NEXT:    movq %rcx, (%rdi)
51; GPR-NEXT:    retq
52  tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 8, i1 0)
53  ret void
54}
55
56define void @memset_16(ptr %a, i8 %value) nounwind {
57; SSE2-LABEL: memset_16:
58; SSE2:       # %bb.0:
59; SSE2-NEXT:    movzbl %sil, %eax
60; SSE2-NEXT:    movabsq $72340172838076673, %rcx # imm = 0x101010101010101
61; SSE2-NEXT:    imulq %rax, %rcx
62; SSE2-NEXT:    movq %rcx, 8(%rdi)
63; SSE2-NEXT:    movq %rcx, (%rdi)
64; SSE2-NEXT:    retq
65;
66; SSE4-LABEL: memset_16:
67; SSE4:       # %bb.0:
68; SSE4-NEXT:    movd %esi, %xmm0
69; SSE4-NEXT:    pxor %xmm1, %xmm1
70; SSE4-NEXT:    pshufb %xmm1, %xmm0
71; SSE4-NEXT:    movdqu %xmm0, (%rdi)
72; SSE4-NEXT:    retq
73;
74; AVX-LABEL: memset_16:
75; AVX:       # %bb.0:
76; AVX-NEXT:    vmovd %esi, %xmm0
77; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
78; AVX-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
79; AVX-NEXT:    vmovdqu %xmm0, (%rdi)
80; AVX-NEXT:    retq
81;
82; AVX512-LABEL: memset_16:
83; AVX512:       # %bb.0:
84; AVX512-NEXT:    vmovd %esi, %xmm0
85; AVX512-NEXT:    vpbroadcastb %xmm0, %xmm0
86; AVX512-NEXT:    vmovdqu %xmm0, (%rdi)
87; AVX512-NEXT:    retq
88  tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 16, i1 0)
89  ret void
90}
91
92define void @memset_32(ptr %a, i8 %value) nounwind {
93; SSE2-LABEL: memset_32:
94; SSE2:       # %bb.0:
95; SSE2-NEXT:    movzbl %sil, %eax
96; SSE2-NEXT:    movabsq $72340172838076673, %rcx # imm = 0x101010101010101
97; SSE2-NEXT:    imulq %rax, %rcx
98; SSE2-NEXT:    movq %rcx, 24(%rdi)
99; SSE2-NEXT:    movq %rcx, 16(%rdi)
100; SSE2-NEXT:    movq %rcx, 8(%rdi)
101; SSE2-NEXT:    movq %rcx, (%rdi)
102; SSE2-NEXT:    retq
103;
104; SSE4-LABEL: memset_32:
105; SSE4:       # %bb.0:
106; SSE4-NEXT:    movd %esi, %xmm0
107; SSE4-NEXT:    pxor %xmm1, %xmm1
108; SSE4-NEXT:    pshufb %xmm1, %xmm0
109; SSE4-NEXT:    movdqu %xmm0, 16(%rdi)
110; SSE4-NEXT:    movdqu %xmm0, (%rdi)
111; SSE4-NEXT:    retq
112;
113; AVX-LABEL: memset_32:
114; AVX:       # %bb.0:
115; AVX-NEXT:    vmovd %esi, %xmm0
116; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
117; AVX-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
118; AVX-NEXT:    vmovdqu %xmm0, 16(%rdi)
119; AVX-NEXT:    vmovdqu %xmm0, (%rdi)
120; AVX-NEXT:    retq
121;
122; AVX512-LABEL: memset_32:
123; AVX512:       # %bb.0:
124; AVX512-NEXT:    vmovd %esi, %xmm0
125; AVX512-NEXT:    vpbroadcastb %xmm0, %ymm0
126; AVX512-NEXT:    vmovdqu %ymm0, (%rdi)
127; AVX512-NEXT:    vzeroupper
128; AVX512-NEXT:    retq
129  tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 32, i1 0)
130  ret void
131}
132
133define void @memset_64(ptr %a, i8 %value) nounwind {
134; SSE2-LABEL: memset_64:
135; SSE2:       # %bb.0:
136; SSE2-NEXT:    movzbl %sil, %eax
137; SSE2-NEXT:    movabsq $72340172838076673, %rcx # imm = 0x101010101010101
138; SSE2-NEXT:    imulq %rax, %rcx
139; SSE2-NEXT:    movq %rcx, 56(%rdi)
140; SSE2-NEXT:    movq %rcx, 48(%rdi)
141; SSE2-NEXT:    movq %rcx, 40(%rdi)
142; SSE2-NEXT:    movq %rcx, 32(%rdi)
143; SSE2-NEXT:    movq %rcx, 24(%rdi)
144; SSE2-NEXT:    movq %rcx, 16(%rdi)
145; SSE2-NEXT:    movq %rcx, 8(%rdi)
146; SSE2-NEXT:    movq %rcx, (%rdi)
147; SSE2-NEXT:    retq
148;
149; SSE4-LABEL: memset_64:
150; SSE4:       # %bb.0:
151; SSE4-NEXT:    movd %esi, %xmm0
152; SSE4-NEXT:    pxor %xmm1, %xmm1
153; SSE4-NEXT:    pshufb %xmm1, %xmm0
154; SSE4-NEXT:    movdqu %xmm0, 48(%rdi)
155; SSE4-NEXT:    movdqu %xmm0, 32(%rdi)
156; SSE4-NEXT:    movdqu %xmm0, 16(%rdi)
157; SSE4-NEXT:    movdqu %xmm0, (%rdi)
158; SSE4-NEXT:    retq
159;
160; AVX-LABEL: memset_64:
161; AVX:       # %bb.0:
162; AVX-NEXT:    vmovd %esi, %xmm0
163; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
164; AVX-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
165; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
166; AVX-NEXT:    vmovups %ymm0, 32(%rdi)
167; AVX-NEXT:    vmovups %ymm0, (%rdi)
168; AVX-NEXT:    vzeroupper
169; AVX-NEXT:    retq
170;
171; AVX512-LABEL: memset_64:
172; AVX512:       # %bb.0:
173; AVX512-NEXT:    movzbl %sil, %eax
174; AVX512-NEXT:    imull $16843009, %eax, %eax # imm = 0x1010101
175; AVX512-NEXT:    vpbroadcastd %eax, %zmm0
176; AVX512-NEXT:    vmovdqu64 %zmm0, (%rdi)
177; AVX512-NEXT:    vzeroupper
178; AVX512-NEXT:    retq
179  tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 %value, i64 64, i1 0)
180  ret void
181}
182
183; /////////////////////////////////////////////////////////////////////////////
184
185define void @aligned_memset_16(ptr align 16 %a, i8 %value) nounwind {
186; SSE2-LABEL: aligned_memset_16:
187; SSE2:       # %bb.0:
188; SSE2-NEXT:    movd %esi, %xmm0
189; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
190; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
191; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
192; SSE2-NEXT:    movdqa %xmm0, (%rdi)
193; SSE2-NEXT:    retq
194;
195; SSE4-LABEL: aligned_memset_16:
196; SSE4:       # %bb.0:
197; SSE4-NEXT:    movd %esi, %xmm0
198; SSE4-NEXT:    pxor %xmm1, %xmm1
199; SSE4-NEXT:    pshufb %xmm1, %xmm0
200; SSE4-NEXT:    movdqa %xmm0, (%rdi)
201; SSE4-NEXT:    retq
202;
203; AVX-LABEL: aligned_memset_16:
204; AVX:       # %bb.0:
205; AVX-NEXT:    vmovd %esi, %xmm0
206; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
207; AVX-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
208; AVX-NEXT:    vmovdqa %xmm0, (%rdi)
209; AVX-NEXT:    retq
210;
211; AVX512-LABEL: aligned_memset_16:
212; AVX512:       # %bb.0:
213; AVX512-NEXT:    vmovd %esi, %xmm0
214; AVX512-NEXT:    vpbroadcastb %xmm0, %xmm0
215; AVX512-NEXT:    vmovdqa %xmm0, (%rdi)
216; AVX512-NEXT:    retq
217  tail call void @llvm.memset.inline.p0.i64(ptr align 16 %a, i8 %value, i64 16, i1 0)
218  ret void
219}
220
221define void @aligned_memset_32(ptr align 32 %a, i8 %value) nounwind {
222; SSE2-LABEL: aligned_memset_32:
223; SSE2:       # %bb.0:
224; SSE2-NEXT:    movd %esi, %xmm0
225; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
226; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
227; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
228; SSE2-NEXT:    movdqa %xmm0, 16(%rdi)
229; SSE2-NEXT:    movdqa %xmm0, (%rdi)
230; SSE2-NEXT:    retq
231;
232; SSE4-LABEL: aligned_memset_32:
233; SSE4:       # %bb.0:
234; SSE4-NEXT:    movd %esi, %xmm0
235; SSE4-NEXT:    pxor %xmm1, %xmm1
236; SSE4-NEXT:    pshufb %xmm1, %xmm0
237; SSE4-NEXT:    movdqa %xmm0, 16(%rdi)
238; SSE4-NEXT:    movdqa %xmm0, (%rdi)
239; SSE4-NEXT:    retq
240;
241; AVX-LABEL: aligned_memset_32:
242; AVX:       # %bb.0:
243; AVX-NEXT:    vmovd %esi, %xmm0
244; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
245; AVX-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
246; AVX-NEXT:    vmovdqa %xmm0, 16(%rdi)
247; AVX-NEXT:    vmovdqa %xmm0, (%rdi)
248; AVX-NEXT:    retq
249;
250; AVX512-LABEL: aligned_memset_32:
251; AVX512:       # %bb.0:
252; AVX512-NEXT:    vmovd %esi, %xmm0
253; AVX512-NEXT:    vpbroadcastb %xmm0, %ymm0
254; AVX512-NEXT:    vmovdqa %ymm0, (%rdi)
255; AVX512-NEXT:    vzeroupper
256; AVX512-NEXT:    retq
257  tail call void @llvm.memset.inline.p0.i64(ptr align 32 %a, i8 %value, i64 32, i1 0)
258  ret void
259}
260
261define void @aligned_memset_64(ptr align 64 %a, i8 %value) nounwind {
262; SSE2-LABEL: aligned_memset_64:
263; SSE2:       # %bb.0:
264; SSE2-NEXT:    movd %esi, %xmm0
265; SSE2-NEXT:    punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7]
266; SSE2-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
267; SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
268; SSE2-NEXT:    movdqa %xmm0, 48(%rdi)
269; SSE2-NEXT:    movdqa %xmm0, 32(%rdi)
270; SSE2-NEXT:    movdqa %xmm0, 16(%rdi)
271; SSE2-NEXT:    movdqa %xmm0, (%rdi)
272; SSE2-NEXT:    retq
273;
274; SSE4-LABEL: aligned_memset_64:
275; SSE4:       # %bb.0:
276; SSE4-NEXT:    movd %esi, %xmm0
277; SSE4-NEXT:    pxor %xmm1, %xmm1
278; SSE4-NEXT:    pshufb %xmm1, %xmm0
279; SSE4-NEXT:    movdqa %xmm0, 48(%rdi)
280; SSE4-NEXT:    movdqa %xmm0, 32(%rdi)
281; SSE4-NEXT:    movdqa %xmm0, 16(%rdi)
282; SSE4-NEXT:    movdqa %xmm0, (%rdi)
283; SSE4-NEXT:    retq
284;
285; AVX-LABEL: aligned_memset_64:
286; AVX:       # %bb.0:
287; AVX-NEXT:    vmovd %esi, %xmm0
288; AVX-NEXT:    vpxor %xmm1, %xmm1, %xmm1
289; AVX-NEXT:    vpshufb %xmm1, %xmm0, %xmm0
290; AVX-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
291; AVX-NEXT:    vmovaps %ymm0, 32(%rdi)
292; AVX-NEXT:    vmovaps %ymm0, (%rdi)
293; AVX-NEXT:    vzeroupper
294; AVX-NEXT:    retq
295;
296; AVX512-LABEL: aligned_memset_64:
297; AVX512:       # %bb.0:
298; AVX512-NEXT:    movzbl %sil, %eax
299; AVX512-NEXT:    imull $16843009, %eax, %eax # imm = 0x1010101
300; AVX512-NEXT:    vpbroadcastd %eax, %zmm0
301; AVX512-NEXT:    vmovdqa64 %zmm0, (%rdi)
302; AVX512-NEXT:    vzeroupper
303; AVX512-NEXT:    retq
304  tail call void @llvm.memset.inline.p0.i64(ptr align 64 %a, i8 %value, i64 64, i1 0)
305  ret void
306}
307
308; /////////////////////////////////////////////////////////////////////////////
309
310define void @bzero_1(ptr %a) nounwind {
311; GPR-LABEL: bzero_1:
312; GPR:       # %bb.0:
313; GPR-NEXT:    movb $0, (%rdi)
314; GPR-NEXT:    retq
315  tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 1, i1 0)
316  ret void
317}
318
319define void @bzero_2(ptr %a) nounwind {
320; GPR-LABEL: bzero_2:
321; GPR:       # %bb.0:
322; GPR-NEXT:    movw $0, (%rdi)
323; GPR-NEXT:    retq
324  tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 2, i1 0)
325  ret void
326}
327
328define void @bzero_4(ptr %a) nounwind {
329; GPR-LABEL: bzero_4:
330; GPR:       # %bb.0:
331; GPR-NEXT:    movl $0, (%rdi)
332; GPR-NEXT:    retq
333  tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 4, i1 0)
334  ret void
335}
336
337define void @bzero_8(ptr %a) nounwind {
338; GPR-LABEL: bzero_8:
339; GPR:       # %bb.0:
340; GPR-NEXT:    movq $0, (%rdi)
341; GPR-NEXT:    retq
342  tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 8, i1 0)
343  ret void
344}
345
346define void @bzero_16(ptr %a) nounwind {
347; SSE2-LABEL: bzero_16:
348; SSE2:       # %bb.0:
349; SSE2-NEXT:    movq $0, 8(%rdi)
350; SSE2-NEXT:    movq $0, (%rdi)
351; SSE2-NEXT:    retq
352;
353; SSE4-LABEL: bzero_16:
354; SSE4:       # %bb.0:
355; SSE4-NEXT:    xorps %xmm0, %xmm0
356; SSE4-NEXT:    movups %xmm0, (%rdi)
357; SSE4-NEXT:    retq
358;
359; AVX-LABEL: bzero_16:
360; AVX:       # %bb.0:
361; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
362; AVX-NEXT:    vmovups %xmm0, (%rdi)
363; AVX-NEXT:    retq
364;
365; AVX512-LABEL: bzero_16:
366; AVX512:       # %bb.0:
367; AVX512-NEXT:    vxorps %xmm0, %xmm0, %xmm0
368; AVX512-NEXT:    vmovups %xmm0, (%rdi)
369; AVX512-NEXT:    retq
370  tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 16, i1 0)
371  ret void
372}
373
374define void @bzero_32(ptr %a) nounwind {
375; SSE2-LABEL: bzero_32:
376; SSE2:       # %bb.0:
377; SSE2-NEXT:    movq $0, 24(%rdi)
378; SSE2-NEXT:    movq $0, 16(%rdi)
379; SSE2-NEXT:    movq $0, 8(%rdi)
380; SSE2-NEXT:    movq $0, (%rdi)
381; SSE2-NEXT:    retq
382;
383; SSE4-LABEL: bzero_32:
384; SSE4:       # %bb.0:
385; SSE4-NEXT:    xorps %xmm0, %xmm0
386; SSE4-NEXT:    movups %xmm0, 16(%rdi)
387; SSE4-NEXT:    movups %xmm0, (%rdi)
388; SSE4-NEXT:    retq
389;
390; AVX-LABEL: bzero_32:
391; AVX:       # %bb.0:
392; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
393; AVX-NEXT:    vmovups %ymm0, (%rdi)
394; AVX-NEXT:    vzeroupper
395; AVX-NEXT:    retq
396;
397; AVX512-LABEL: bzero_32:
398; AVX512:       # %bb.0:
399; AVX512-NEXT:    vxorps %xmm0, %xmm0, %xmm0
400; AVX512-NEXT:    vmovups %ymm0, (%rdi)
401; AVX512-NEXT:    vzeroupper
402; AVX512-NEXT:    retq
403  tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 32, i1 0)
404  ret void
405}
406
407define void @bzero_64(ptr %a) nounwind {
408; SSE2-LABEL: bzero_64:
409; SSE2:       # %bb.0:
410; SSE2-NEXT:    movq $0, 56(%rdi)
411; SSE2-NEXT:    movq $0, 48(%rdi)
412; SSE2-NEXT:    movq $0, 40(%rdi)
413; SSE2-NEXT:    movq $0, 32(%rdi)
414; SSE2-NEXT:    movq $0, 24(%rdi)
415; SSE2-NEXT:    movq $0, 16(%rdi)
416; SSE2-NEXT:    movq $0, 8(%rdi)
417; SSE2-NEXT:    movq $0, (%rdi)
418; SSE2-NEXT:    retq
419;
420; SSE4-LABEL: bzero_64:
421; SSE4:       # %bb.0:
422; SSE4-NEXT:    xorps %xmm0, %xmm0
423; SSE4-NEXT:    movups %xmm0, 48(%rdi)
424; SSE4-NEXT:    movups %xmm0, 32(%rdi)
425; SSE4-NEXT:    movups %xmm0, 16(%rdi)
426; SSE4-NEXT:    movups %xmm0, (%rdi)
427; SSE4-NEXT:    retq
428;
429; AVX-LABEL: bzero_64:
430; AVX:       # %bb.0:
431; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
432; AVX-NEXT:    vmovups %ymm0, 32(%rdi)
433; AVX-NEXT:    vmovups %ymm0, (%rdi)
434; AVX-NEXT:    vzeroupper
435; AVX-NEXT:    retq
436;
437; AVX512-LABEL: bzero_64:
438; AVX512:       # %bb.0:
439; AVX512-NEXT:    vxorps %xmm0, %xmm0, %xmm0
440; AVX512-NEXT:    vmovups %zmm0, (%rdi)
441; AVX512-NEXT:    vzeroupper
442; AVX512-NEXT:    retq
443  tail call void @llvm.memset.inline.p0.i64(ptr %a, i8 0, i64 64, i1 0)
444  ret void
445}
446
447; /////////////////////////////////////////////////////////////////////////////
448
449define void @aligned_bzero_16(ptr %a) nounwind {
450; SSE2-LABEL: aligned_bzero_16:
451; SSE2:       # %bb.0:
452; SSE2-NEXT:    xorps %xmm0, %xmm0
453; SSE2-NEXT:    movaps %xmm0, (%rdi)
454; SSE2-NEXT:    retq
455;
456; SSE4-LABEL: aligned_bzero_16:
457; SSE4:       # %bb.0:
458; SSE4-NEXT:    xorps %xmm0, %xmm0
459; SSE4-NEXT:    movaps %xmm0, (%rdi)
460; SSE4-NEXT:    retq
461;
462; AVX-LABEL: aligned_bzero_16:
463; AVX:       # %bb.0:
464; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
465; AVX-NEXT:    vmovaps %xmm0, (%rdi)
466; AVX-NEXT:    retq
467;
468; AVX512-LABEL: aligned_bzero_16:
469; AVX512:       # %bb.0:
470; AVX512-NEXT:    vxorps %xmm0, %xmm0, %xmm0
471; AVX512-NEXT:    vmovaps %xmm0, (%rdi)
472; AVX512-NEXT:    retq
473  tail call void @llvm.memset.inline.p0.i64(ptr align 16 %a, i8 0, i64 16, i1 0)
474  ret void
475}
476
477define void @aligned_bzero_32(ptr %a) nounwind {
478; SSE2-LABEL: aligned_bzero_32:
479; SSE2:       # %bb.0:
480; SSE2-NEXT:    xorps %xmm0, %xmm0
481; SSE2-NEXT:    movaps %xmm0, 16(%rdi)
482; SSE2-NEXT:    movaps %xmm0, (%rdi)
483; SSE2-NEXT:    retq
484;
485; SSE4-LABEL: aligned_bzero_32:
486; SSE4:       # %bb.0:
487; SSE4-NEXT:    xorps %xmm0, %xmm0
488; SSE4-NEXT:    movaps %xmm0, 16(%rdi)
489; SSE4-NEXT:    movaps %xmm0, (%rdi)
490; SSE4-NEXT:    retq
491;
492; AVX-LABEL: aligned_bzero_32:
493; AVX:       # %bb.0:
494; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
495; AVX-NEXT:    vmovaps %ymm0, (%rdi)
496; AVX-NEXT:    vzeroupper
497; AVX-NEXT:    retq
498;
499; AVX512-LABEL: aligned_bzero_32:
500; AVX512:       # %bb.0:
501; AVX512-NEXT:    vxorps %xmm0, %xmm0, %xmm0
502; AVX512-NEXT:    vmovaps %ymm0, (%rdi)
503; AVX512-NEXT:    vzeroupper
504; AVX512-NEXT:    retq
505  tail call void @llvm.memset.inline.p0.i64(ptr align 32 %a, i8 0, i64 32, i1 0)
506  ret void
507}
508
509define void @aligned_bzero_64(ptr %a) nounwind {
510; SSE2-LABEL: aligned_bzero_64:
511; SSE2:       # %bb.0:
512; SSE2-NEXT:    xorps %xmm0, %xmm0
513; SSE2-NEXT:    movaps %xmm0, 48(%rdi)
514; SSE2-NEXT:    movaps %xmm0, 32(%rdi)
515; SSE2-NEXT:    movaps %xmm0, 16(%rdi)
516; SSE2-NEXT:    movaps %xmm0, (%rdi)
517; SSE2-NEXT:    retq
518;
519; SSE4-LABEL: aligned_bzero_64:
520; SSE4:       # %bb.0:
521; SSE4-NEXT:    xorps %xmm0, %xmm0
522; SSE4-NEXT:    movaps %xmm0, 48(%rdi)
523; SSE4-NEXT:    movaps %xmm0, 32(%rdi)
524; SSE4-NEXT:    movaps %xmm0, 16(%rdi)
525; SSE4-NEXT:    movaps %xmm0, (%rdi)
526; SSE4-NEXT:    retq
527;
528; AVX-LABEL: aligned_bzero_64:
529; AVX:       # %bb.0:
530; AVX-NEXT:    vxorps %xmm0, %xmm0, %xmm0
531; AVX-NEXT:    vmovaps %ymm0, 32(%rdi)
532; AVX-NEXT:    vmovaps %ymm0, (%rdi)
533; AVX-NEXT:    vzeroupper
534; AVX-NEXT:    retq
535;
536; AVX512-LABEL: aligned_bzero_64:
537; AVX512:       # %bb.0:
538; AVX512-NEXT:    vxorps %xmm0, %xmm0, %xmm0
539; AVX512-NEXT:    vmovaps %zmm0, (%rdi)
540; AVX512-NEXT:    vzeroupper
541; AVX512-NEXT:    retq
542  tail call void @llvm.memset.inline.p0.i64(ptr align 64 %a, i8 0, i64 64, i1 0)
543  ret void
544}
545