xref: /llvm-project/llvm/test/CodeGen/X86/avoid-sfb-overlaps.ll (revision f0dd12ec5c0169ba5b4363b62d59511181cf954a)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-linux -mcpu=x86-64 -verify-machineinstrs | FileCheck %s -check-prefix=CHECK
3; RUN: llc < %s -mtriple=x86_64-linux -mcpu=x86-64 --x86-disable-avoid-SFB -verify-machineinstrs | FileCheck %s --check-prefix=DISABLED
4; RUN: llc < %s -mtriple=x86_64-linux -mcpu=core-avx2 -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-AVX2
5; RUN: llc < %s -mtriple=x86_64-linux -mcpu=skx -verify-machineinstrs | FileCheck %s -check-prefix=CHECK-AVX512
6
7; ModuleID = '../testSFB/testOverlapBlocks.c'
8source_filename = "../testSFB/testOverlapBlocks.c"
9target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
10target triple = "x86_64-unknown-linux-gnu"
11
12; Function Attrs: nounwind uwtable
13define dso_local void @test_overlap_1(ptr nocapture %A, i32 %x) local_unnamed_addr #0 {
14; CHECK-LABEL: test_overlap_1:
15; CHECK:       # %bb.0: # %entry
16; CHECK-NEXT:    movl $7, -8(%rdi)
17; CHECK-NEXT:    movq -16(%rdi), %rax
18; CHECK-NEXT:    movq %rax, (%rdi)
19; CHECK-NEXT:    movl -8(%rdi), %eax
20; CHECK-NEXT:    movl %eax, 8(%rdi)
21; CHECK-NEXT:    movl -4(%rdi), %eax
22; CHECK-NEXT:    movl %eax, 12(%rdi)
23; CHECK-NEXT:    movslq %esi, %rax
24; CHECK-NEXT:    movq %rax, -9(%rdi)
25; CHECK-NEXT:    movq %rax, -16(%rdi)
26; CHECK-NEXT:    movb $0, -1(%rdi)
27; CHECK-NEXT:    movq -16(%rdi), %rax
28; CHECK-NEXT:    movq %rax, 16(%rdi)
29; CHECK-NEXT:    movl -8(%rdi), %eax
30; CHECK-NEXT:    movl %eax, 24(%rdi)
31; CHECK-NEXT:    movzwl -4(%rdi), %eax
32; CHECK-NEXT:    movw %ax, 28(%rdi)
33; CHECK-NEXT:    movzbl -2(%rdi), %eax
34; CHECK-NEXT:    movb %al, 30(%rdi)
35; CHECK-NEXT:    movzbl -1(%rdi), %eax
36; CHECK-NEXT:    movb %al, 31(%rdi)
37; CHECK-NEXT:    retq
38;
39; DISABLED-LABEL: test_overlap_1:
40; DISABLED:       # %bb.0: # %entry
41; DISABLED-NEXT:    movl $7, -8(%rdi)
42; DISABLED-NEXT:    movups -16(%rdi), %xmm0
43; DISABLED-NEXT:    movups %xmm0, (%rdi)
44; DISABLED-NEXT:    movslq %esi, %rax
45; DISABLED-NEXT:    movq %rax, -9(%rdi)
46; DISABLED-NEXT:    movq %rax, -16(%rdi)
47; DISABLED-NEXT:    movb $0, -1(%rdi)
48; DISABLED-NEXT:    movups -16(%rdi), %xmm0
49; DISABLED-NEXT:    movups %xmm0, 16(%rdi)
50; DISABLED-NEXT:    retq
51;
52; CHECK-AVX2-LABEL: test_overlap_1:
53; CHECK-AVX2:       # %bb.0: # %entry
54; CHECK-AVX2-NEXT:    movl $7, -8(%rdi)
55; CHECK-AVX2-NEXT:    movq -16(%rdi), %rax
56; CHECK-AVX2-NEXT:    movq %rax, (%rdi)
57; CHECK-AVX2-NEXT:    movl -8(%rdi), %eax
58; CHECK-AVX2-NEXT:    movl %eax, 8(%rdi)
59; CHECK-AVX2-NEXT:    movl -4(%rdi), %eax
60; CHECK-AVX2-NEXT:    movl %eax, 12(%rdi)
61; CHECK-AVX2-NEXT:    movslq %esi, %rax
62; CHECK-AVX2-NEXT:    movq %rax, -9(%rdi)
63; CHECK-AVX2-NEXT:    movq %rax, -16(%rdi)
64; CHECK-AVX2-NEXT:    movb $0, -1(%rdi)
65; CHECK-AVX2-NEXT:    movq -16(%rdi), %rax
66; CHECK-AVX2-NEXT:    movq %rax, 16(%rdi)
67; CHECK-AVX2-NEXT:    movl -8(%rdi), %eax
68; CHECK-AVX2-NEXT:    movl %eax, 24(%rdi)
69; CHECK-AVX2-NEXT:    movzwl -4(%rdi), %eax
70; CHECK-AVX2-NEXT:    movw %ax, 28(%rdi)
71; CHECK-AVX2-NEXT:    movzbl -2(%rdi), %eax
72; CHECK-AVX2-NEXT:    movb %al, 30(%rdi)
73; CHECK-AVX2-NEXT:    movzbl -1(%rdi), %eax
74; CHECK-AVX2-NEXT:    movb %al, 31(%rdi)
75; CHECK-AVX2-NEXT:    retq
76;
77; CHECK-AVX512-LABEL: test_overlap_1:
78; CHECK-AVX512:       # %bb.0: # %entry
79; CHECK-AVX512-NEXT:    movl $7, -8(%rdi)
80; CHECK-AVX512-NEXT:    movq -16(%rdi), %rax
81; CHECK-AVX512-NEXT:    movq %rax, (%rdi)
82; CHECK-AVX512-NEXT:    movl -8(%rdi), %eax
83; CHECK-AVX512-NEXT:    movl %eax, 8(%rdi)
84; CHECK-AVX512-NEXT:    movl -4(%rdi), %eax
85; CHECK-AVX512-NEXT:    movl %eax, 12(%rdi)
86; CHECK-AVX512-NEXT:    movslq %esi, %rax
87; CHECK-AVX512-NEXT:    movq %rax, -9(%rdi)
88; CHECK-AVX512-NEXT:    movq %rax, -16(%rdi)
89; CHECK-AVX512-NEXT:    movb $0, -1(%rdi)
90; CHECK-AVX512-NEXT:    movq -16(%rdi), %rax
91; CHECK-AVX512-NEXT:    movq %rax, 16(%rdi)
92; CHECK-AVX512-NEXT:    movl -8(%rdi), %eax
93; CHECK-AVX512-NEXT:    movl %eax, 24(%rdi)
94; CHECK-AVX512-NEXT:    movzwl -4(%rdi), %eax
95; CHECK-AVX512-NEXT:    movw %ax, 28(%rdi)
96; CHECK-AVX512-NEXT:    movzbl -2(%rdi), %eax
97; CHECK-AVX512-NEXT:    movb %al, 30(%rdi)
98; CHECK-AVX512-NEXT:    movzbl -1(%rdi), %eax
99; CHECK-AVX512-NEXT:    movb %al, 31(%rdi)
100; CHECK-AVX512-NEXT:    retq
101entry:
102  %add.ptr = getelementptr inbounds i8, ptr %A, i64 -16
103  %add.ptr1 = getelementptr inbounds i8, ptr %A, i64 -8
104  store i32 7, ptr %add.ptr1, align 4
105  tail call void @llvm.memcpy.p0.p0.i64(ptr align 4 %A, ptr nonnull align 4 %add.ptr, i64 16, i1 false)
106  %conv = sext i32 %x to i64
107  %add.ptr2 = getelementptr inbounds i8, ptr %A, i64 -9
108  store i64 %conv, ptr %add.ptr2, align 8
109  store i64 %conv, ptr %add.ptr, align 8
110  %add.ptr5 = getelementptr inbounds i8, ptr %A, i64 -1
111  store i8 0, ptr %add.ptr5, align 1
112  %add.ptr6 = getelementptr inbounds i8, ptr %A, i64 16
113  tail call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 4 %add.ptr6, ptr nonnull align 4 %add.ptr, i64 16, i1 false)
114  ret void
115}
116
117; Function Attrs: argmemonly nounwind
118declare void @llvm.memcpy.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i1) #1
119
120; Function Attrs: nounwind uwtable
121define dso_local void @test_overlap_2(ptr nocapture %A, i32 %x) local_unnamed_addr #0 {
122; CHECK-LABEL: test_overlap_2:
123; CHECK:       # %bb.0: # %entry
124; CHECK-NEXT:    movslq %esi, %rax
125; CHECK-NEXT:    movq %rax, -16(%rdi)
126; CHECK-NEXT:    movq -16(%rdi), %rcx
127; CHECK-NEXT:    movq %rcx, (%rdi)
128; CHECK-NEXT:    movq -8(%rdi), %rcx
129; CHECK-NEXT:    movq %rcx, 8(%rdi)
130; CHECK-NEXT:    movq %rax, -8(%rdi)
131; CHECK-NEXT:    movl $7, -12(%rdi)
132; CHECK-NEXT:    movl -16(%rdi), %eax
133; CHECK-NEXT:    movl %eax, 16(%rdi)
134; CHECK-NEXT:    movl -12(%rdi), %eax
135; CHECK-NEXT:    movl %eax, 20(%rdi)
136; CHECK-NEXT:    movq -8(%rdi), %rax
137; CHECK-NEXT:    movq %rax, 24(%rdi)
138; CHECK-NEXT:    retq
139;
140; DISABLED-LABEL: test_overlap_2:
141; DISABLED:       # %bb.0: # %entry
142; DISABLED-NEXT:    movslq %esi, %rax
143; DISABLED-NEXT:    movq %rax, -16(%rdi)
144; DISABLED-NEXT:    movups -16(%rdi), %xmm0
145; DISABLED-NEXT:    movups %xmm0, (%rdi)
146; DISABLED-NEXT:    movq %rax, -8(%rdi)
147; DISABLED-NEXT:    movl $7, -12(%rdi)
148; DISABLED-NEXT:    movups -16(%rdi), %xmm0
149; DISABLED-NEXT:    movups %xmm0, 16(%rdi)
150; DISABLED-NEXT:    retq
151;
152; CHECK-AVX2-LABEL: test_overlap_2:
153; CHECK-AVX2:       # %bb.0: # %entry
154; CHECK-AVX2-NEXT:    movslq %esi, %rax
155; CHECK-AVX2-NEXT:    movq %rax, -16(%rdi)
156; CHECK-AVX2-NEXT:    movq -16(%rdi), %rcx
157; CHECK-AVX2-NEXT:    movq %rcx, (%rdi)
158; CHECK-AVX2-NEXT:    movq -8(%rdi), %rcx
159; CHECK-AVX2-NEXT:    movq %rcx, 8(%rdi)
160; CHECK-AVX2-NEXT:    movq %rax, -8(%rdi)
161; CHECK-AVX2-NEXT:    movl $7, -12(%rdi)
162; CHECK-AVX2-NEXT:    movl -16(%rdi), %eax
163; CHECK-AVX2-NEXT:    movl %eax, 16(%rdi)
164; CHECK-AVX2-NEXT:    movl -12(%rdi), %eax
165; CHECK-AVX2-NEXT:    movl %eax, 20(%rdi)
166; CHECK-AVX2-NEXT:    movq -8(%rdi), %rax
167; CHECK-AVX2-NEXT:    movq %rax, 24(%rdi)
168; CHECK-AVX2-NEXT:    retq
169;
170; CHECK-AVX512-LABEL: test_overlap_2:
171; CHECK-AVX512:       # %bb.0: # %entry
172; CHECK-AVX512-NEXT:    movslq %esi, %rax
173; CHECK-AVX512-NEXT:    movq %rax, -16(%rdi)
174; CHECK-AVX512-NEXT:    movq -16(%rdi), %rcx
175; CHECK-AVX512-NEXT:    movq %rcx, (%rdi)
176; CHECK-AVX512-NEXT:    movq -8(%rdi), %rcx
177; CHECK-AVX512-NEXT:    movq %rcx, 8(%rdi)
178; CHECK-AVX512-NEXT:    movq %rax, -8(%rdi)
179; CHECK-AVX512-NEXT:    movl $7, -12(%rdi)
180; CHECK-AVX512-NEXT:    movl -16(%rdi), %eax
181; CHECK-AVX512-NEXT:    movl %eax, 16(%rdi)
182; CHECK-AVX512-NEXT:    movl -12(%rdi), %eax
183; CHECK-AVX512-NEXT:    movl %eax, 20(%rdi)
184; CHECK-AVX512-NEXT:    movq -8(%rdi), %rax
185; CHECK-AVX512-NEXT:    movq %rax, 24(%rdi)
186; CHECK-AVX512-NEXT:    retq
187entry:
188  %add.ptr = getelementptr inbounds i8, ptr %A, i64 -16
189  %conv = sext i32 %x to i64
190  store i64 %conv, ptr %add.ptr, align 8
191  tail call void @llvm.memcpy.p0.p0.i64(ptr align 4 %A, ptr nonnull align 4 %add.ptr, i64 16, i1 false)
192  %add.ptr3 = getelementptr inbounds i8, ptr %A, i64 -8
193  store i64 %conv, ptr %add.ptr3, align 8
194  %add.ptr4 = getelementptr inbounds i8, ptr %A, i64 -12
195  store i32 7, ptr %add.ptr4, align 4
196  %add.ptr5 = getelementptr inbounds i8, ptr %A, i64 16
197  tail call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 4 %add.ptr5, ptr nonnull align 4 %add.ptr, i64 16, i1 false)
198  ret void
199}
200
201; Function Attrs: nounwind uwtable
202define dso_local void @test_overlap_3(ptr nocapture %A, i32 %x) local_unnamed_addr #0 {
203; CHECK-LABEL: test_overlap_3:
204; CHECK:       # %bb.0: # %entry
205; CHECK-NEXT:    movl $7, -10(%rdi)
206; CHECK-NEXT:    movl -16(%rdi), %eax
207; CHECK-NEXT:    movl %eax, (%rdi)
208; CHECK-NEXT:    movzwl -12(%rdi), %eax
209; CHECK-NEXT:    movw %ax, 4(%rdi)
210; CHECK-NEXT:    movl -10(%rdi), %eax
211; CHECK-NEXT:    movl %eax, 6(%rdi)
212; CHECK-NEXT:    movl -6(%rdi), %eax
213; CHECK-NEXT:    movl %eax, 10(%rdi)
214; CHECK-NEXT:    movzwl -2(%rdi), %eax
215; CHECK-NEXT:    movw %ax, 14(%rdi)
216; CHECK-NEXT:    movslq %esi, %rax
217; CHECK-NEXT:    movq %rax, -9(%rdi)
218; CHECK-NEXT:    movq %rax, -16(%rdi)
219; CHECK-NEXT:    movb $0, -1(%rdi)
220; CHECK-NEXT:    movq -16(%rdi), %rax
221; CHECK-NEXT:    movq %rax, 16(%rdi)
222; CHECK-NEXT:    movzwl -8(%rdi), %eax
223; CHECK-NEXT:    movw %ax, 24(%rdi)
224; CHECK-NEXT:    movl -6(%rdi), %eax
225; CHECK-NEXT:    movl %eax, 26(%rdi)
226; CHECK-NEXT:    movzbl -2(%rdi), %eax
227; CHECK-NEXT:    movb %al, 30(%rdi)
228; CHECK-NEXT:    movzbl -1(%rdi), %eax
229; CHECK-NEXT:    movb %al, 31(%rdi)
230; CHECK-NEXT:    retq
231;
232; DISABLED-LABEL: test_overlap_3:
233; DISABLED:       # %bb.0: # %entry
234; DISABLED-NEXT:    movl $7, -10(%rdi)
235; DISABLED-NEXT:    movups -16(%rdi), %xmm0
236; DISABLED-NEXT:    movups %xmm0, (%rdi)
237; DISABLED-NEXT:    movslq %esi, %rax
238; DISABLED-NEXT:    movq %rax, -9(%rdi)
239; DISABLED-NEXT:    movq %rax, -16(%rdi)
240; DISABLED-NEXT:    movb $0, -1(%rdi)
241; DISABLED-NEXT:    movups -16(%rdi), %xmm0
242; DISABLED-NEXT:    movups %xmm0, 16(%rdi)
243; DISABLED-NEXT:    retq
244;
245; CHECK-AVX2-LABEL: test_overlap_3:
246; CHECK-AVX2:       # %bb.0: # %entry
247; CHECK-AVX2-NEXT:    movl $7, -10(%rdi)
248; CHECK-AVX2-NEXT:    movl -16(%rdi), %eax
249; CHECK-AVX2-NEXT:    movl %eax, (%rdi)
250; CHECK-AVX2-NEXT:    movzwl -12(%rdi), %eax
251; CHECK-AVX2-NEXT:    movw %ax, 4(%rdi)
252; CHECK-AVX2-NEXT:    movl -10(%rdi), %eax
253; CHECK-AVX2-NEXT:    movl %eax, 6(%rdi)
254; CHECK-AVX2-NEXT:    movl -6(%rdi), %eax
255; CHECK-AVX2-NEXT:    movl %eax, 10(%rdi)
256; CHECK-AVX2-NEXT:    movzwl -2(%rdi), %eax
257; CHECK-AVX2-NEXT:    movw %ax, 14(%rdi)
258; CHECK-AVX2-NEXT:    movslq %esi, %rax
259; CHECK-AVX2-NEXT:    movq %rax, -9(%rdi)
260; CHECK-AVX2-NEXT:    movq %rax, -16(%rdi)
261; CHECK-AVX2-NEXT:    movb $0, -1(%rdi)
262; CHECK-AVX2-NEXT:    movq -16(%rdi), %rax
263; CHECK-AVX2-NEXT:    movq %rax, 16(%rdi)
264; CHECK-AVX2-NEXT:    movzwl -8(%rdi), %eax
265; CHECK-AVX2-NEXT:    movw %ax, 24(%rdi)
266; CHECK-AVX2-NEXT:    movl -6(%rdi), %eax
267; CHECK-AVX2-NEXT:    movl %eax, 26(%rdi)
268; CHECK-AVX2-NEXT:    movzbl -2(%rdi), %eax
269; CHECK-AVX2-NEXT:    movb %al, 30(%rdi)
270; CHECK-AVX2-NEXT:    movzbl -1(%rdi), %eax
271; CHECK-AVX2-NEXT:    movb %al, 31(%rdi)
272; CHECK-AVX2-NEXT:    retq
273;
274; CHECK-AVX512-LABEL: test_overlap_3:
275; CHECK-AVX512:       # %bb.0: # %entry
276; CHECK-AVX512-NEXT:    movl $7, -10(%rdi)
277; CHECK-AVX512-NEXT:    movl -16(%rdi), %eax
278; CHECK-AVX512-NEXT:    movl %eax, (%rdi)
279; CHECK-AVX512-NEXT:    movzwl -12(%rdi), %eax
280; CHECK-AVX512-NEXT:    movw %ax, 4(%rdi)
281; CHECK-AVX512-NEXT:    movl -10(%rdi), %eax
282; CHECK-AVX512-NEXT:    movl %eax, 6(%rdi)
283; CHECK-AVX512-NEXT:    movl -6(%rdi), %eax
284; CHECK-AVX512-NEXT:    movl %eax, 10(%rdi)
285; CHECK-AVX512-NEXT:    movzwl -2(%rdi), %eax
286; CHECK-AVX512-NEXT:    movw %ax, 14(%rdi)
287; CHECK-AVX512-NEXT:    movslq %esi, %rax
288; CHECK-AVX512-NEXT:    movq %rax, -9(%rdi)
289; CHECK-AVX512-NEXT:    movq %rax, -16(%rdi)
290; CHECK-AVX512-NEXT:    movb $0, -1(%rdi)
291; CHECK-AVX512-NEXT:    movq -16(%rdi), %rax
292; CHECK-AVX512-NEXT:    movq %rax, 16(%rdi)
293; CHECK-AVX512-NEXT:    movzwl -8(%rdi), %eax
294; CHECK-AVX512-NEXT:    movw %ax, 24(%rdi)
295; CHECK-AVX512-NEXT:    movl -6(%rdi), %eax
296; CHECK-AVX512-NEXT:    movl %eax, 26(%rdi)
297; CHECK-AVX512-NEXT:    movzbl -2(%rdi), %eax
298; CHECK-AVX512-NEXT:    movb %al, 30(%rdi)
299; CHECK-AVX512-NEXT:    movzbl -1(%rdi), %eax
300; CHECK-AVX512-NEXT:    movb %al, 31(%rdi)
301; CHECK-AVX512-NEXT:    retq
302entry:
303  %add.ptr = getelementptr inbounds i8, ptr %A, i64 -16
304  %add.ptr1 = getelementptr inbounds i8, ptr %A, i64 -10
305  store i32 7, ptr %add.ptr1, align 4
306  tail call void @llvm.memcpy.p0.p0.i64(ptr align 4 %A, ptr nonnull align 4 %add.ptr, i64 16, i1 false)
307  %conv = sext i32 %x to i64
308  %add.ptr2 = getelementptr inbounds i8, ptr %A, i64 -9
309  store i64 %conv, ptr %add.ptr2, align 8
310  store i64 %conv, ptr %add.ptr, align 8
311  %add.ptr5 = getelementptr inbounds i8, ptr %A, i64 -1
312  store i8 0, ptr %add.ptr5, align 1
313  %add.ptr6 = getelementptr inbounds i8, ptr %A, i64 16
314  tail call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 4 %add.ptr6, ptr nonnull align 4 %add.ptr, i64 16, i1 false)
315  ret void
316}
317
318; Function Attrs: nounwind uwtable
319define dso_local void @test_overlap_4(ptr nocapture %A, i32 %x) local_unnamed_addr #0 {
320; CHECK-LABEL: test_overlap_4:
321; CHECK:       # %bb.0: # %entry
322; CHECK-NEXT:    movups -16(%rdi), %xmm0
323; CHECK-NEXT:    movups %xmm0, (%rdi)
324; CHECK-NEXT:    movslq %esi, %rax
325; CHECK-NEXT:    movq %rax, -8(%rdi)
326; CHECK-NEXT:    movl %eax, -16(%rdi)
327; CHECK-NEXT:    movl $0, -11(%rdi)
328; CHECK-NEXT:    movl -16(%rdi), %eax
329; CHECK-NEXT:    movl %eax, 16(%rdi)
330; CHECK-NEXT:    movzbl -12(%rdi), %eax
331; CHECK-NEXT:    movb %al, 20(%rdi)
332; CHECK-NEXT:    movl -11(%rdi), %eax
333; CHECK-NEXT:    movl %eax, 21(%rdi)
334; CHECK-NEXT:    movl -7(%rdi), %eax
335; CHECK-NEXT:    movl %eax, 25(%rdi)
336; CHECK-NEXT:    movzwl -3(%rdi), %eax
337; CHECK-NEXT:    movw %ax, 29(%rdi)
338; CHECK-NEXT:    movzbl -1(%rdi), %eax
339; CHECK-NEXT:    movb %al, 31(%rdi)
340; CHECK-NEXT:    retq
341;
342; DISABLED-LABEL: test_overlap_4:
343; DISABLED:       # %bb.0: # %entry
344; DISABLED-NEXT:    movups -16(%rdi), %xmm0
345; DISABLED-NEXT:    movups %xmm0, (%rdi)
346; DISABLED-NEXT:    movslq %esi, %rax
347; DISABLED-NEXT:    movq %rax, -8(%rdi)
348; DISABLED-NEXT:    movl %eax, -16(%rdi)
349; DISABLED-NEXT:    movl $0, -11(%rdi)
350; DISABLED-NEXT:    movups -16(%rdi), %xmm0
351; DISABLED-NEXT:    movups %xmm0, 16(%rdi)
352; DISABLED-NEXT:    retq
353;
354; CHECK-AVX2-LABEL: test_overlap_4:
355; CHECK-AVX2:       # %bb.0: # %entry
356; CHECK-AVX2-NEXT:    vmovups -16(%rdi), %xmm0
357; CHECK-AVX2-NEXT:    vmovups %xmm0, (%rdi)
358; CHECK-AVX2-NEXT:    movslq %esi, %rax
359; CHECK-AVX2-NEXT:    movq %rax, -8(%rdi)
360; CHECK-AVX2-NEXT:    movl %eax, -16(%rdi)
361; CHECK-AVX2-NEXT:    movl $0, -11(%rdi)
362; CHECK-AVX2-NEXT:    movl -16(%rdi), %eax
363; CHECK-AVX2-NEXT:    movl %eax, 16(%rdi)
364; CHECK-AVX2-NEXT:    movzbl -12(%rdi), %eax
365; CHECK-AVX2-NEXT:    movb %al, 20(%rdi)
366; CHECK-AVX2-NEXT:    movl -11(%rdi), %eax
367; CHECK-AVX2-NEXT:    movl %eax, 21(%rdi)
368; CHECK-AVX2-NEXT:    movl -7(%rdi), %eax
369; CHECK-AVX2-NEXT:    movl %eax, 25(%rdi)
370; CHECK-AVX2-NEXT:    movzwl -3(%rdi), %eax
371; CHECK-AVX2-NEXT:    movw %ax, 29(%rdi)
372; CHECK-AVX2-NEXT:    movzbl -1(%rdi), %eax
373; CHECK-AVX2-NEXT:    movb %al, 31(%rdi)
374; CHECK-AVX2-NEXT:    retq
375;
376; CHECK-AVX512-LABEL: test_overlap_4:
377; CHECK-AVX512:       # %bb.0: # %entry
378; CHECK-AVX512-NEXT:    vmovups -16(%rdi), %xmm0
379; CHECK-AVX512-NEXT:    vmovups %xmm0, (%rdi)
380; CHECK-AVX512-NEXT:    movslq %esi, %rax
381; CHECK-AVX512-NEXT:    movq %rax, -8(%rdi)
382; CHECK-AVX512-NEXT:    movl %eax, -16(%rdi)
383; CHECK-AVX512-NEXT:    movl $0, -11(%rdi)
384; CHECK-AVX512-NEXT:    movl -16(%rdi), %eax
385; CHECK-AVX512-NEXT:    movl %eax, 16(%rdi)
386; CHECK-AVX512-NEXT:    movzbl -12(%rdi), %eax
387; CHECK-AVX512-NEXT:    movb %al, 20(%rdi)
388; CHECK-AVX512-NEXT:    movl -11(%rdi), %eax
389; CHECK-AVX512-NEXT:    movl %eax, 21(%rdi)
390; CHECK-AVX512-NEXT:    movl -7(%rdi), %eax
391; CHECK-AVX512-NEXT:    movl %eax, 25(%rdi)
392; CHECK-AVX512-NEXT:    movzwl -3(%rdi), %eax
393; CHECK-AVX512-NEXT:    movw %ax, 29(%rdi)
394; CHECK-AVX512-NEXT:    movzbl -1(%rdi), %eax
395; CHECK-AVX512-NEXT:    movb %al, 31(%rdi)
396; CHECK-AVX512-NEXT:    retq
397entry:
398  %add.ptr = getelementptr inbounds i8, ptr %A, i64 -16
399  tail call void @llvm.memcpy.p0.p0.i64(ptr align 4 %A, ptr nonnull align 4 %add.ptr, i64 16, i1 false)
400  %conv = sext i32 %x to i64
401  %add.ptr1 = getelementptr inbounds i8, ptr %A, i64 -8
402  store i64 %conv, ptr %add.ptr1, align 8
403  store i32 %x, ptr %add.ptr, align 4
404  %add.ptr3 = getelementptr inbounds i8, ptr %A, i64 -11
405  store i32 0, ptr %add.ptr3, align 4
406  %add.ptr4 = getelementptr inbounds i8, ptr %A, i64 16
407  tail call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 4 %add.ptr4, ptr nonnull align 4 %add.ptr, i64 16, i1 false)
408  ret void
409}
410
411; Function Attrs: nounwind uwtable
412define dso_local void @test_overlap_5(ptr nocapture %A, i32 %x) local_unnamed_addr #0 {
413; CHECK-LABEL: test_overlap_5:
414; CHECK:       # %bb.0: # %entry
415; CHECK-NEXT:    movups -16(%rdi), %xmm0
416; CHECK-NEXT:    movups %xmm0, (%rdi)
417; CHECK-NEXT:    movslq %esi, %rax
418; CHECK-NEXT:    movq %rax, -16(%rdi)
419; CHECK-NEXT:    movb %al, -14(%rdi)
420; CHECK-NEXT:    movb $0, -11(%rdi)
421; CHECK-NEXT:    movzwl -16(%rdi), %eax
422; CHECK-NEXT:    movw %ax, 16(%rdi)
423; CHECK-NEXT:    movzbl -14(%rdi), %eax
424; CHECK-NEXT:    movb %al, 18(%rdi)
425; CHECK-NEXT:    movzwl -13(%rdi), %eax
426; CHECK-NEXT:    movw %ax, 19(%rdi)
427; CHECK-NEXT:    movzbl -11(%rdi), %eax
428; CHECK-NEXT:    movb %al, 21(%rdi)
429; CHECK-NEXT:    movq -10(%rdi), %rax
430; CHECK-NEXT:    movq %rax, 22(%rdi)
431; CHECK-NEXT:    movzwl -2(%rdi), %eax
432; CHECK-NEXT:    movw %ax, 30(%rdi)
433; CHECK-NEXT:    retq
434;
435; DISABLED-LABEL: test_overlap_5:
436; DISABLED:       # %bb.0: # %entry
437; DISABLED-NEXT:    movups -16(%rdi), %xmm0
438; DISABLED-NEXT:    movups %xmm0, (%rdi)
439; DISABLED-NEXT:    movslq %esi, %rax
440; DISABLED-NEXT:    movq %rax, -16(%rdi)
441; DISABLED-NEXT:    movb %al, -14(%rdi)
442; DISABLED-NEXT:    movb $0, -11(%rdi)
443; DISABLED-NEXT:    movups -16(%rdi), %xmm0
444; DISABLED-NEXT:    movups %xmm0, 16(%rdi)
445; DISABLED-NEXT:    retq
446;
447; CHECK-AVX2-LABEL: test_overlap_5:
448; CHECK-AVX2:       # %bb.0: # %entry
449; CHECK-AVX2-NEXT:    vmovups -16(%rdi), %xmm0
450; CHECK-AVX2-NEXT:    vmovups %xmm0, (%rdi)
451; CHECK-AVX2-NEXT:    movslq %esi, %rax
452; CHECK-AVX2-NEXT:    movq %rax, -16(%rdi)
453; CHECK-AVX2-NEXT:    movb %al, -14(%rdi)
454; CHECK-AVX2-NEXT:    movb $0, -11(%rdi)
455; CHECK-AVX2-NEXT:    movzwl -16(%rdi), %eax
456; CHECK-AVX2-NEXT:    movw %ax, 16(%rdi)
457; CHECK-AVX2-NEXT:    movzbl -14(%rdi), %eax
458; CHECK-AVX2-NEXT:    movb %al, 18(%rdi)
459; CHECK-AVX2-NEXT:    movzwl -13(%rdi), %eax
460; CHECK-AVX2-NEXT:    movw %ax, 19(%rdi)
461; CHECK-AVX2-NEXT:    movzbl -11(%rdi), %eax
462; CHECK-AVX2-NEXT:    movb %al, 21(%rdi)
463; CHECK-AVX2-NEXT:    movq -10(%rdi), %rax
464; CHECK-AVX2-NEXT:    movq %rax, 22(%rdi)
465; CHECK-AVX2-NEXT:    movzwl -2(%rdi), %eax
466; CHECK-AVX2-NEXT:    movw %ax, 30(%rdi)
467; CHECK-AVX2-NEXT:    retq
468;
469; CHECK-AVX512-LABEL: test_overlap_5:
470; CHECK-AVX512:       # %bb.0: # %entry
471; CHECK-AVX512-NEXT:    vmovups -16(%rdi), %xmm0
472; CHECK-AVX512-NEXT:    vmovups %xmm0, (%rdi)
473; CHECK-AVX512-NEXT:    movslq %esi, %rax
474; CHECK-AVX512-NEXT:    movq %rax, -16(%rdi)
475; CHECK-AVX512-NEXT:    movb %al, -14(%rdi)
476; CHECK-AVX512-NEXT:    movb $0, -11(%rdi)
477; CHECK-AVX512-NEXT:    movzwl -16(%rdi), %eax
478; CHECK-AVX512-NEXT:    movw %ax, 16(%rdi)
479; CHECK-AVX512-NEXT:    movzbl -14(%rdi), %eax
480; CHECK-AVX512-NEXT:    movb %al, 18(%rdi)
481; CHECK-AVX512-NEXT:    movzwl -13(%rdi), %eax
482; CHECK-AVX512-NEXT:    movw %ax, 19(%rdi)
483; CHECK-AVX512-NEXT:    movzbl -11(%rdi), %eax
484; CHECK-AVX512-NEXT:    movb %al, 21(%rdi)
485; CHECK-AVX512-NEXT:    movq -10(%rdi), %rax
486; CHECK-AVX512-NEXT:    movq %rax, 22(%rdi)
487; CHECK-AVX512-NEXT:    movzwl -2(%rdi), %eax
488; CHECK-AVX512-NEXT:    movw %ax, 30(%rdi)
489; CHECK-AVX512-NEXT:    retq
490entry:
491  %add.ptr = getelementptr inbounds i8, ptr %A, i64 -16
492  tail call void @llvm.memcpy.p0.p0.i64(ptr align 4 %A, ptr nonnull align 4 %add.ptr, i64 16, i1 false)
493  %conv = sext i32 %x to i64
494  store i64 %conv, ptr %add.ptr, align 8
495  %conv2 = trunc i32 %x to i8
496  %add.ptr3 = getelementptr inbounds i8, ptr %A, i64 -14
497  store i8 %conv2, ptr %add.ptr3, align 1
498  %add.ptr4 = getelementptr inbounds i8, ptr %A, i64 -11
499  store i8 0, ptr %add.ptr4, align 1
500  %add.ptr5 = getelementptr inbounds i8, ptr %A, i64 16
501  tail call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 4 %add.ptr5, ptr nonnull align 4 %add.ptr, i64 16, i1 false)
502  ret void
503}
504
505attributes #0 = { nounwind uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "frame-pointer"="none" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
506
507attributes #1 = { argmemonly nounwind }
508