xref: /llvm-project/llvm/test/CodeGen/X86/avoid-sfb.ll (revision 189900eb149bb55ae3787346f57c1ccbdc50fb3c)
1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-linux -mcpu=x86-64 -verify-machineinstrs | FileCheck %s -check-prefixes=SSE,CHECK
3; RUN: llc < %s -mtriple=x86_64-linux -mcpu=x86-64 --x86-disable-avoid-SFB -verify-machineinstrs | FileCheck %s --check-prefixes=SSE,DISABLED
4; RUN: llc < %s -mtriple=x86_64-linux -mcpu=core-avx2 -verify-machineinstrs | FileCheck %s -check-prefixes=AVX
5; RUN: llc < %s -mtriple=x86_64-linux -mcpu=skx -verify-machineinstrs | FileCheck %s -check-prefixes=AVX
6
7target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
8target triple = "x86_64-unknown-linux-gnu"
9
10%struct.S = type { i32, i32, i32, i32 }
11
12; Function Attrs: nounwind uwtable
13define void @test_conditional_block(ptr nocapture noalias %s1 , ptr nocapture noalias %s2, i32 %x, ptr nocapture noalias  %s3, ptr nocapture noalias readonly %s4) local_unnamed_addr #0 {
14; CHECK-LABEL: test_conditional_block:
15; CHECK:       # %bb.0: # %entry
16; CHECK-NEXT:    cmpl $18, %edx
17; CHECK-NEXT:    jl .LBB0_2
18; CHECK-NEXT:  # %bb.1: # %if.then
19; CHECK-NEXT:    movl %edx, 4(%rdi)
20; CHECK-NEXT:  .LBB0_2: # %if.end
21; CHECK-NEXT:    movups (%r8), %xmm0
22; CHECK-NEXT:    movups %xmm0, (%rcx)
23; CHECK-NEXT:    movl (%rdi), %eax
24; CHECK-NEXT:    movl %eax, (%rsi)
25; CHECK-NEXT:    movl 4(%rdi), %eax
26; CHECK-NEXT:    movl %eax, 4(%rsi)
27; CHECK-NEXT:    movq 8(%rdi), %rax
28; CHECK-NEXT:    movq %rax, 8(%rsi)
29; CHECK-NEXT:    retq
30;
31; DISABLED-LABEL: test_conditional_block:
32; DISABLED:       # %bb.0: # %entry
33; DISABLED-NEXT:    cmpl $18, %edx
34; DISABLED-NEXT:    jl .LBB0_2
35; DISABLED-NEXT:  # %bb.1: # %if.then
36; DISABLED-NEXT:    movl %edx, 4(%rdi)
37; DISABLED-NEXT:  .LBB0_2: # %if.end
38; DISABLED-NEXT:    movups (%r8), %xmm0
39; DISABLED-NEXT:    movups %xmm0, (%rcx)
40; DISABLED-NEXT:    movups (%rdi), %xmm0
41; DISABLED-NEXT:    movups %xmm0, (%rsi)
42; DISABLED-NEXT:    retq
43;
44; AVX-LABEL: test_conditional_block:
45; AVX:       # %bb.0: # %entry
46; AVX-NEXT:    cmpl $18, %edx
47; AVX-NEXT:    jl .LBB0_2
48; AVX-NEXT:  # %bb.1: # %if.then
49; AVX-NEXT:    movl %edx, 4(%rdi)
50; AVX-NEXT:  .LBB0_2: # %if.end
51; AVX-NEXT:    vmovups (%r8), %xmm0
52; AVX-NEXT:    vmovups %xmm0, (%rcx)
53; AVX-NEXT:    movl (%rdi), %eax
54; AVX-NEXT:    movl %eax, (%rsi)
55; AVX-NEXT:    movl 4(%rdi), %eax
56; AVX-NEXT:    movl %eax, 4(%rsi)
57; AVX-NEXT:    movq 8(%rdi), %rax
58; AVX-NEXT:    movq %rax, 8(%rsi)
59; AVX-NEXT:    retq
60entry:
61  %cmp = icmp sgt i32 %x, 17
62  br i1 %cmp, label %if.then, label %if.end
63
64if.then:                                          ; preds = %entry
65  %b = getelementptr inbounds %struct.S, ptr %s1, i64 0, i32 1
66  store i32 %x, ptr %b, align 4
67  br label %if.end
68
69if.end:                                           ; preds = %if.then, %entry
70  tail call void @llvm.memcpy.p0.p0.i64(ptr %s3, ptr %s4, i64 16, i32 4, i1 false)
71  tail call void @llvm.memcpy.p0.p0.i64(ptr %s2, ptr %s1, i64 16, i32 4, i1 false)
72  ret void
73}
74
75; Function Attrs: nounwind uwtable
76define void @test_imm_store(ptr nocapture noalias %s1, ptr nocapture %s2, i32 %x, ptr nocapture %s3) local_unnamed_addr #0 {
77; CHECK-LABEL: test_imm_store:
78; CHECK:       # %bb.0: # %entry
79; CHECK-NEXT:    movl $0, (%rdi)
80; CHECK-NEXT:    movl $1, (%rcx)
81; CHECK-NEXT:    movl (%rdi), %eax
82; CHECK-NEXT:    movl %eax, (%rsi)
83; CHECK-NEXT:    movq 4(%rdi), %rax
84; CHECK-NEXT:    movq %rax, 4(%rsi)
85; CHECK-NEXT:    movl 12(%rdi), %eax
86; CHECK-NEXT:    movl %eax, 12(%rsi)
87; CHECK-NEXT:    retq
88;
89; DISABLED-LABEL: test_imm_store:
90; DISABLED:       # %bb.0: # %entry
91; DISABLED-NEXT:    movl $0, (%rdi)
92; DISABLED-NEXT:    movl $1, (%rcx)
93; DISABLED-NEXT:    movups (%rdi), %xmm0
94; DISABLED-NEXT:    movups %xmm0, (%rsi)
95; DISABLED-NEXT:    retq
96;
97; AVX-LABEL: test_imm_store:
98; AVX:       # %bb.0: # %entry
99; AVX-NEXT:    movl $0, (%rdi)
100; AVX-NEXT:    movl $1, (%rcx)
101; AVX-NEXT:    movl (%rdi), %eax
102; AVX-NEXT:    movl %eax, (%rsi)
103; AVX-NEXT:    movq 4(%rdi), %rax
104; AVX-NEXT:    movq %rax, 4(%rsi)
105; AVX-NEXT:    movl 12(%rdi), %eax
106; AVX-NEXT:    movl %eax, 12(%rsi)
107; AVX-NEXT:    retq
108entry:
109  store i32 0, ptr %s1, align 4
110  store i32 1, ptr %s3, align 4
111  tail call void @llvm.memcpy.p0.p0.i64(ptr %s2, ptr %s1, i64 16, i32 4, i1 false)
112  ret void
113}
114
115; Function Attrs: nounwind uwtable
116define void @test_nondirect_br(ptr nocapture noalias %s1, ptr nocapture %s2, i32 %x, ptr nocapture %s3, ptr nocapture readonly %s4, i32 %x2) local_unnamed_addr #0 {
117; CHECK-LABEL: test_nondirect_br:
118; CHECK:       # %bb.0: # %entry
119; CHECK-NEXT:    cmpl $18, %edx
120; CHECK-NEXT:    jl .LBB2_2
121; CHECK-NEXT:  # %bb.1: # %if.then
122; CHECK-NEXT:    movl %edx, 4(%rdi)
123; CHECK-NEXT:  .LBB2_2: # %if.end
124; CHECK-NEXT:    cmpl $14, %r9d
125; CHECK-NEXT:    jl .LBB2_4
126; CHECK-NEXT:  # %bb.3: # %if.then2
127; CHECK-NEXT:    movl %r9d, 12(%rdi)
128; CHECK-NEXT:  .LBB2_4: # %if.end3
129; CHECK-NEXT:    movups (%r8), %xmm0
130; CHECK-NEXT:    movups %xmm0, (%rcx)
131; CHECK-NEXT:    movq (%rdi), %rax
132; CHECK-NEXT:    movq %rax, (%rsi)
133; CHECK-NEXT:    movl 8(%rdi), %eax
134; CHECK-NEXT:    movl %eax, 8(%rsi)
135; CHECK-NEXT:    movl 12(%rdi), %eax
136; CHECK-NEXT:    movl %eax, 12(%rsi)
137; CHECK-NEXT:    retq
138;
139; DISABLED-LABEL: test_nondirect_br:
140; DISABLED:       # %bb.0: # %entry
141; DISABLED-NEXT:    cmpl $18, %edx
142; DISABLED-NEXT:    jl .LBB2_2
143; DISABLED-NEXT:  # %bb.1: # %if.then
144; DISABLED-NEXT:    movl %edx, 4(%rdi)
145; DISABLED-NEXT:  .LBB2_2: # %if.end
146; DISABLED-NEXT:    cmpl $14, %r9d
147; DISABLED-NEXT:    jl .LBB2_4
148; DISABLED-NEXT:  # %bb.3: # %if.then2
149; DISABLED-NEXT:    movl %r9d, 12(%rdi)
150; DISABLED-NEXT:  .LBB2_4: # %if.end3
151; DISABLED-NEXT:    movups (%r8), %xmm0
152; DISABLED-NEXT:    movups %xmm0, (%rcx)
153; DISABLED-NEXT:    movups (%rdi), %xmm0
154; DISABLED-NEXT:    movups %xmm0, (%rsi)
155; DISABLED-NEXT:    retq
156;
157; AVX-LABEL: test_nondirect_br:
158; AVX:       # %bb.0: # %entry
159; AVX-NEXT:    cmpl $18, %edx
160; AVX-NEXT:    jl .LBB2_2
161; AVX-NEXT:  # %bb.1: # %if.then
162; AVX-NEXT:    movl %edx, 4(%rdi)
163; AVX-NEXT:  .LBB2_2: # %if.end
164; AVX-NEXT:    cmpl $14, %r9d
165; AVX-NEXT:    jl .LBB2_4
166; AVX-NEXT:  # %bb.3: # %if.then2
167; AVX-NEXT:    movl %r9d, 12(%rdi)
168; AVX-NEXT:  .LBB2_4: # %if.end3
169; AVX-NEXT:    vmovups (%r8), %xmm0
170; AVX-NEXT:    vmovups %xmm0, (%rcx)
171; AVX-NEXT:    movq (%rdi), %rax
172; AVX-NEXT:    movq %rax, (%rsi)
173; AVX-NEXT:    movl 8(%rdi), %eax
174; AVX-NEXT:    movl %eax, 8(%rsi)
175; AVX-NEXT:    movl 12(%rdi), %eax
176; AVX-NEXT:    movl %eax, 12(%rsi)
177; AVX-NEXT:    retq
178entry:
179  %cmp = icmp sgt i32 %x, 17
180  br i1 %cmp, label %if.then, label %if.end
181
182if.then:                                          ; preds = %entry
183  %b = getelementptr inbounds %struct.S, ptr %s1, i64 0, i32 1
184  store i32 %x, ptr %b, align 4
185  br label %if.end
186
187if.end:                                           ; preds = %if.then, %entry
188  %cmp1 = icmp sgt i32 %x2, 13
189  br i1 %cmp1, label %if.then2, label %if.end3
190
191if.then2:                                         ; preds = %if.end
192  %d = getelementptr inbounds %struct.S, ptr %s1, i64 0, i32 3
193  store i32 %x2, ptr %d, align 4
194  br label %if.end3
195
196if.end3:                                          ; preds = %if.then2, %if.end
197  tail call void @llvm.memcpy.p0.p0.i64(ptr %s3, ptr %s4, i64 16, i32 4, i1 false)
198  tail call void @llvm.memcpy.p0.p0.i64(ptr %s2, ptr %s1, i64 16, i32 4, i1 false)
199  ret void
200}
201
202; Function Attrs: nounwind uwtable
203define void @test_2preds_block(ptr nocapture noalias %s1, ptr nocapture %s2, i32 %x, ptr nocapture %s3, ptr nocapture readonly %s4, i32 %x2) local_unnamed_addr #0 {
204; CHECK-LABEL: test_2preds_block:
205; CHECK:       # %bb.0: # %entry
206; CHECK-NEXT:    movl %r9d, 12(%rdi)
207; CHECK-NEXT:    cmpl $18, %edx
208; CHECK-NEXT:    jl .LBB3_2
209; CHECK-NEXT:  # %bb.1: # %if.then
210; CHECK-NEXT:    movl %edx, 4(%rdi)
211; CHECK-NEXT:  .LBB3_2: # %if.end
212; CHECK-NEXT:    movups (%r8), %xmm0
213; CHECK-NEXT:    movups %xmm0, (%rcx)
214; CHECK-NEXT:    movl (%rdi), %eax
215; CHECK-NEXT:    movl %eax, (%rsi)
216; CHECK-NEXT:    movl 4(%rdi), %eax
217; CHECK-NEXT:    movl %eax, 4(%rsi)
218; CHECK-NEXT:    movl 8(%rdi), %eax
219; CHECK-NEXT:    movl %eax, 8(%rsi)
220; CHECK-NEXT:    movl 12(%rdi), %eax
221; CHECK-NEXT:    movl %eax, 12(%rsi)
222; CHECK-NEXT:    retq
223;
224; DISABLED-LABEL: test_2preds_block:
225; DISABLED:       # %bb.0: # %entry
226; DISABLED-NEXT:    movl %r9d, 12(%rdi)
227; DISABLED-NEXT:    cmpl $18, %edx
228; DISABLED-NEXT:    jl .LBB3_2
229; DISABLED-NEXT:  # %bb.1: # %if.then
230; DISABLED-NEXT:    movl %edx, 4(%rdi)
231; DISABLED-NEXT:  .LBB3_2: # %if.end
232; DISABLED-NEXT:    movups (%r8), %xmm0
233; DISABLED-NEXT:    movups %xmm0, (%rcx)
234; DISABLED-NEXT:    movups (%rdi), %xmm0
235; DISABLED-NEXT:    movups %xmm0, (%rsi)
236; DISABLED-NEXT:    retq
237;
238; AVX-LABEL: test_2preds_block:
239; AVX:       # %bb.0: # %entry
240; AVX-NEXT:    movl %r9d, 12(%rdi)
241; AVX-NEXT:    cmpl $18, %edx
242; AVX-NEXT:    jl .LBB3_2
243; AVX-NEXT:  # %bb.1: # %if.then
244; AVX-NEXT:    movl %edx, 4(%rdi)
245; AVX-NEXT:  .LBB3_2: # %if.end
246; AVX-NEXT:    vmovups (%r8), %xmm0
247; AVX-NEXT:    vmovups %xmm0, (%rcx)
248; AVX-NEXT:    movl (%rdi), %eax
249; AVX-NEXT:    movl %eax, (%rsi)
250; AVX-NEXT:    movl 4(%rdi), %eax
251; AVX-NEXT:    movl %eax, 4(%rsi)
252; AVX-NEXT:    movl 8(%rdi), %eax
253; AVX-NEXT:    movl %eax, 8(%rsi)
254; AVX-NEXT:    movl 12(%rdi), %eax
255; AVX-NEXT:    movl %eax, 12(%rsi)
256; AVX-NEXT:    retq
257entry:
258  %d = getelementptr inbounds %struct.S, ptr %s1, i64 0, i32 3
259  store i32 %x2, ptr %d, align 4
260  %cmp = icmp sgt i32 %x, 17
261  br i1 %cmp, label %if.then, label %if.end
262
263if.then:                                          ; preds = %entry
264  %b = getelementptr inbounds %struct.S, ptr %s1, i64 0, i32 1
265  store i32 %x, ptr %b, align 4
266  br label %if.end
267
268if.end:                                           ; preds = %if.then, %entry
269  tail call void @llvm.memcpy.p0.p0.i64(ptr %s3, ptr %s4, i64 16, i32 4, i1 false)
270  tail call void @llvm.memcpy.p0.p0.i64(ptr %s2, ptr %s1, i64 16, i32 4, i1 false)
271  ret void
272}
273%struct.S2 = type { i64, i64 }
274
275; Function Attrs: nounwind uwtable
276define void @test_type64(ptr nocapture noalias %s1, ptr nocapture %s2, i32 %x, ptr nocapture %s3, ptr nocapture readonly %s4) local_unnamed_addr #0 {
277; CHECK-LABEL: test_type64:
278; CHECK:       # %bb.0: # %entry
279; CHECK-NEXT:    cmpl $18, %edx
280; CHECK-NEXT:    jl .LBB4_2
281; CHECK-NEXT:  # %bb.1: # %if.then
282; CHECK-NEXT:    movslq %edx, %rax
283; CHECK-NEXT:    movq %rax, 8(%rdi)
284; CHECK-NEXT:  .LBB4_2: # %if.end
285; CHECK-NEXT:    movups (%r8), %xmm0
286; CHECK-NEXT:    movups %xmm0, (%rcx)
287; CHECK-NEXT:    movq (%rdi), %rax
288; CHECK-NEXT:    movq %rax, (%rsi)
289; CHECK-NEXT:    movq 8(%rdi), %rax
290; CHECK-NEXT:    movq %rax, 8(%rsi)
291; CHECK-NEXT:    retq
292;
293; DISABLED-LABEL: test_type64:
294; DISABLED:       # %bb.0: # %entry
295; DISABLED-NEXT:    cmpl $18, %edx
296; DISABLED-NEXT:    jl .LBB4_2
297; DISABLED-NEXT:  # %bb.1: # %if.then
298; DISABLED-NEXT:    movslq %edx, %rax
299; DISABLED-NEXT:    movq %rax, 8(%rdi)
300; DISABLED-NEXT:  .LBB4_2: # %if.end
301; DISABLED-NEXT:    movups (%r8), %xmm0
302; DISABLED-NEXT:    movups %xmm0, (%rcx)
303; DISABLED-NEXT:    movups (%rdi), %xmm0
304; DISABLED-NEXT:    movups %xmm0, (%rsi)
305; DISABLED-NEXT:    retq
306;
307; AVX-LABEL: test_type64:
308; AVX:       # %bb.0: # %entry
309; AVX-NEXT:    cmpl $18, %edx
310; AVX-NEXT:    jl .LBB4_2
311; AVX-NEXT:  # %bb.1: # %if.then
312; AVX-NEXT:    movslq %edx, %rax
313; AVX-NEXT:    movq %rax, 8(%rdi)
314; AVX-NEXT:  .LBB4_2: # %if.end
315; AVX-NEXT:    vmovups (%r8), %xmm0
316; AVX-NEXT:    vmovups %xmm0, (%rcx)
317; AVX-NEXT:    movq (%rdi), %rax
318; AVX-NEXT:    movq %rax, (%rsi)
319; AVX-NEXT:    movq 8(%rdi), %rax
320; AVX-NEXT:    movq %rax, 8(%rsi)
321; AVX-NEXT:    retq
322entry:
323  %cmp = icmp sgt i32 %x, 17
324  br i1 %cmp, label %if.then, label %if.end
325
326if.then:                                          ; preds = %entry
327  %conv = sext i32 %x to i64
328  %b = getelementptr inbounds %struct.S2, ptr %s1, i64 0, i32 1
329  store i64 %conv, ptr %b, align 8
330  br label %if.end
331
332if.end:                                           ; preds = %if.then, %entry
333  tail call void @llvm.memcpy.p0.p0.i64(ptr %s3, ptr %s4, i64 16, i32 8, i1 false)
334  tail call void @llvm.memcpy.p0.p0.i64(ptr %s2, ptr %s1, i64 16, i32 8, i1 false)
335  ret void
336}
337%struct.S3 = type { i64, i8, i8, i16, i32 }
338
339; Function Attrs: noinline nounwind uwtable
340define void @test_mixed_type(ptr nocapture noalias %s1, ptr nocapture %s2, i32 %x, ptr nocapture readnone %s3, ptr nocapture readnone %s4) local_unnamed_addr #0 {
341; CHECK-LABEL: test_mixed_type:
342; CHECK:       # %bb.0: # %entry
343; CHECK-NEXT:    cmpl $18, %edx
344; CHECK-NEXT:    jl .LBB5_2
345; CHECK-NEXT:  # %bb.1: # %if.then
346; CHECK-NEXT:    movslq %edx, %rax
347; CHECK-NEXT:    movq %rax, (%rdi)
348; CHECK-NEXT:    movb %dl, 8(%rdi)
349; CHECK-NEXT:  .LBB5_2: # %if.end
350; CHECK-NEXT:    movq (%rdi), %rax
351; CHECK-NEXT:    movq %rax, (%rsi)
352; CHECK-NEXT:    movzbl 8(%rdi), %eax
353; CHECK-NEXT:    movb %al, 8(%rsi)
354; CHECK-NEXT:    movl 9(%rdi), %eax
355; CHECK-NEXT:    movl %eax, 9(%rsi)
356; CHECK-NEXT:    movzwl 13(%rdi), %eax
357; CHECK-NEXT:    movw %ax, 13(%rsi)
358; CHECK-NEXT:    movzbl 15(%rdi), %eax
359; CHECK-NEXT:    movb %al, 15(%rsi)
360; CHECK-NEXT:    retq
361;
362; DISABLED-LABEL: test_mixed_type:
363; DISABLED:       # %bb.0: # %entry
364; DISABLED-NEXT:    cmpl $18, %edx
365; DISABLED-NEXT:    jl .LBB5_2
366; DISABLED-NEXT:  # %bb.1: # %if.then
367; DISABLED-NEXT:    movslq %edx, %rax
368; DISABLED-NEXT:    movq %rax, (%rdi)
369; DISABLED-NEXT:    movb %dl, 8(%rdi)
370; DISABLED-NEXT:  .LBB5_2: # %if.end
371; DISABLED-NEXT:    movups (%rdi), %xmm0
372; DISABLED-NEXT:    movups %xmm0, (%rsi)
373; DISABLED-NEXT:    retq
374;
375; AVX-LABEL: test_mixed_type:
376; AVX:       # %bb.0: # %entry
377; AVX-NEXT:    cmpl $18, %edx
378; AVX-NEXT:    jl .LBB5_2
379; AVX-NEXT:  # %bb.1: # %if.then
380; AVX-NEXT:    movslq %edx, %rax
381; AVX-NEXT:    movq %rax, (%rdi)
382; AVX-NEXT:    movb %dl, 8(%rdi)
383; AVX-NEXT:  .LBB5_2: # %if.end
384; AVX-NEXT:    movq (%rdi), %rax
385; AVX-NEXT:    movq %rax, (%rsi)
386; AVX-NEXT:    movzbl 8(%rdi), %eax
387; AVX-NEXT:    movb %al, 8(%rsi)
388; AVX-NEXT:    movl 9(%rdi), %eax
389; AVX-NEXT:    movl %eax, 9(%rsi)
390; AVX-NEXT:    movzwl 13(%rdi), %eax
391; AVX-NEXT:    movw %ax, 13(%rsi)
392; AVX-NEXT:    movzbl 15(%rdi), %eax
393; AVX-NEXT:    movb %al, 15(%rsi)
394; AVX-NEXT:    retq
395entry:
396  %cmp = icmp sgt i32 %x, 17
397  br i1 %cmp, label %if.then, label %if.end
398
399if.then:                                          ; preds = %entry
400  %conv = sext i32 %x to i64
401  store i64 %conv, ptr %s1, align 8
402  %conv1 = trunc i32 %x to i8
403  %b = getelementptr inbounds %struct.S3, ptr %s1, i64 0, i32 1
404  store i8 %conv1, ptr %b, align 8
405  br label %if.end
406
407if.end:                                           ; preds = %if.then, %entry
408  tail call void @llvm.memcpy.p0.p0.i64(ptr %s2, ptr %s1, i64 16, i32 8, i1 false)
409  ret void
410}
411%struct.S4 = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
412
413; Function Attrs: nounwind uwtable
414define void @test_multiple_blocks(ptr nocapture noalias %s1, ptr nocapture %s2) local_unnamed_addr #0 {
415; CHECK-LABEL: test_multiple_blocks:
416; CHECK:       # %bb.0: # %entry
417; CHECK-NEXT:    movl $0, 4(%rdi)
418; CHECK-NEXT:    movl $0, 36(%rdi)
419; CHECK-NEXT:    movups 16(%rdi), %xmm0
420; CHECK-NEXT:    movups %xmm0, 16(%rsi)
421; CHECK-NEXT:    movl 32(%rdi), %eax
422; CHECK-NEXT:    movl %eax, 32(%rsi)
423; CHECK-NEXT:    movl 36(%rdi), %eax
424; CHECK-NEXT:    movl %eax, 36(%rsi)
425; CHECK-NEXT:    movq 40(%rdi), %rax
426; CHECK-NEXT:    movq %rax, 40(%rsi)
427; CHECK-NEXT:    movl (%rdi), %eax
428; CHECK-NEXT:    movl %eax, (%rsi)
429; CHECK-NEXT:    movl 4(%rdi), %eax
430; CHECK-NEXT:    movl %eax, 4(%rsi)
431; CHECK-NEXT:    movq 8(%rdi), %rax
432; CHECK-NEXT:    movq %rax, 8(%rsi)
433; CHECK-NEXT:    retq
434;
435; DISABLED-LABEL: test_multiple_blocks:
436; DISABLED:       # %bb.0: # %entry
437; DISABLED-NEXT:    movl $0, 4(%rdi)
438; DISABLED-NEXT:    movl $0, 36(%rdi)
439; DISABLED-NEXT:    movups 16(%rdi), %xmm0
440; DISABLED-NEXT:    movups %xmm0, 16(%rsi)
441; DISABLED-NEXT:    movups 32(%rdi), %xmm0
442; DISABLED-NEXT:    movups %xmm0, 32(%rsi)
443; DISABLED-NEXT:    movups (%rdi), %xmm0
444; DISABLED-NEXT:    movups %xmm0, (%rsi)
445; DISABLED-NEXT:    retq
446;
447; AVX-LABEL: test_multiple_blocks:
448; AVX:       # %bb.0: # %entry
449; AVX-NEXT:    movl $0, 4(%rdi)
450; AVX-NEXT:    movl $0, 36(%rdi)
451; AVX-NEXT:    vmovups 16(%rdi), %xmm0
452; AVX-NEXT:    vmovups %xmm0, 16(%rsi)
453; AVX-NEXT:    movl 32(%rdi), %eax
454; AVX-NEXT:    movl %eax, 32(%rsi)
455; AVX-NEXT:    movl 36(%rdi), %eax
456; AVX-NEXT:    movl %eax, 36(%rsi)
457; AVX-NEXT:    movq 40(%rdi), %rax
458; AVX-NEXT:    movq %rax, 40(%rsi)
459; AVX-NEXT:    movl (%rdi), %eax
460; AVX-NEXT:    movl %eax, (%rsi)
461; AVX-NEXT:    movl 4(%rdi), %eax
462; AVX-NEXT:    movl %eax, 4(%rsi)
463; AVX-NEXT:    vmovups 8(%rdi), %xmm0
464; AVX-NEXT:    vmovups %xmm0, 8(%rsi)
465; AVX-NEXT:    movq 24(%rdi), %rax
466; AVX-NEXT:    movq %rax, 24(%rsi)
467; AVX-NEXT:    retq
468entry:
469  %b = getelementptr inbounds %struct.S4, ptr %s1, i64 0, i32 1
470  store i32 0, ptr %b, align 4
471  %b3 = getelementptr inbounds %struct.S4, ptr %s1, i64 0, i32 9
472  store i32 0, ptr %b3, align 4
473  tail call void @llvm.memcpy.p0.p0.i64(ptr %s2, ptr %s1, i64 48, i32 4, i1 false)
474  ret void
475}
476%struct.S5 = type { i16, i16, i16, i16, i16, i16, i16, i16 }
477
478; Function Attrs: nounwind uwtable
479define void @test_type16(ptr nocapture noalias %s1, ptr nocapture %s2, i32 %x, ptr nocapture %s3, ptr nocapture readonly %s4) local_unnamed_addr #0 {
480; CHECK-LABEL: test_type16:
481; CHECK:       # %bb.0: # %entry
482; CHECK-NEXT:    cmpl $18, %edx
483; CHECK-NEXT:    jl .LBB7_2
484; CHECK-NEXT:  # %bb.1: # %if.then
485; CHECK-NEXT:    movw %dx, 2(%rdi)
486; CHECK-NEXT:  .LBB7_2: # %if.end
487; CHECK-NEXT:    movups (%r8), %xmm0
488; CHECK-NEXT:    movups %xmm0, (%rcx)
489; CHECK-NEXT:    movzwl (%rdi), %eax
490; CHECK-NEXT:    movw %ax, (%rsi)
491; CHECK-NEXT:    movzwl 2(%rdi), %eax
492; CHECK-NEXT:    movw %ax, 2(%rsi)
493; CHECK-NEXT:    movq 4(%rdi), %rax
494; CHECK-NEXT:    movq %rax, 4(%rsi)
495; CHECK-NEXT:    movl 12(%rdi), %eax
496; CHECK-NEXT:    movl %eax, 12(%rsi)
497; CHECK-NEXT:    retq
498;
499; DISABLED-LABEL: test_type16:
500; DISABLED:       # %bb.0: # %entry
501; DISABLED-NEXT:    cmpl $18, %edx
502; DISABLED-NEXT:    jl .LBB7_2
503; DISABLED-NEXT:  # %bb.1: # %if.then
504; DISABLED-NEXT:    movw %dx, 2(%rdi)
505; DISABLED-NEXT:  .LBB7_2: # %if.end
506; DISABLED-NEXT:    movups (%r8), %xmm0
507; DISABLED-NEXT:    movups %xmm0, (%rcx)
508; DISABLED-NEXT:    movups (%rdi), %xmm0
509; DISABLED-NEXT:    movups %xmm0, (%rsi)
510; DISABLED-NEXT:    retq
511;
512; AVX-LABEL: test_type16:
513; AVX:       # %bb.0: # %entry
514; AVX-NEXT:    cmpl $18, %edx
515; AVX-NEXT:    jl .LBB7_2
516; AVX-NEXT:  # %bb.1: # %if.then
517; AVX-NEXT:    movw %dx, 2(%rdi)
518; AVX-NEXT:  .LBB7_2: # %if.end
519; AVX-NEXT:    vmovups (%r8), %xmm0
520; AVX-NEXT:    vmovups %xmm0, (%rcx)
521; AVX-NEXT:    movzwl (%rdi), %eax
522; AVX-NEXT:    movw %ax, (%rsi)
523; AVX-NEXT:    movzwl 2(%rdi), %eax
524; AVX-NEXT:    movw %ax, 2(%rsi)
525; AVX-NEXT:    movq 4(%rdi), %rax
526; AVX-NEXT:    movq %rax, 4(%rsi)
527; AVX-NEXT:    movl 12(%rdi), %eax
528; AVX-NEXT:    movl %eax, 12(%rsi)
529; AVX-NEXT:    retq
530entry:
531  %cmp = icmp sgt i32 %x, 17
532  br i1 %cmp, label %if.then, label %if.end
533
534if.then:                                          ; preds = %entry
535  %conv = trunc i32 %x to i16
536  %b = getelementptr inbounds %struct.S5, ptr %s1, i64 0, i32 1
537  store i16 %conv, ptr %b, align 2
538  br label %if.end
539
540if.end:                                           ; preds = %if.then, %entry
541  tail call void @llvm.memcpy.p0.p0.i64(ptr %s3, ptr %s4, i64 16, i32 2, i1 false)
542  tail call void @llvm.memcpy.p0.p0.i64(ptr %s2, ptr %s1, i64 16, i32 2, i1 false)
543  ret void
544}
545
546%struct.S6 = type { [4 x i32], i32, i32, i32, i32 }
547
548; Function Attrs: nounwind uwtable
549define void @test_stack(ptr noalias nocapture sret(%struct.S6) %agg.result, ptr byval(%struct.S6) nocapture readnone align 8 %s1, ptr byval(%struct.S6) nocapture align 8 %s2, i32 %x) local_unnamed_addr #0 {
550; CHECK-LABEL: test_stack:
551; CHECK:       # %bb.0: # %entry
552; CHECK-NEXT:    movq %rdi, %rax
553; CHECK-NEXT:    movl %esi, {{[0-9]+}}(%rsp)
554; CHECK-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
555; CHECK-NEXT:    movups %xmm0, (%rdi)
556; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
557; CHECK-NEXT:    movq %rcx, 16(%rdi)
558; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
559; CHECK-NEXT:    movl %ecx, 24(%rdi)
560; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
561; CHECK-NEXT:    movl %ecx, 28(%rdi)
562; CHECK-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
563; CHECK-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
564; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %edx
565; CHECK-NEXT:    movl {{[0-9]+}}(%rsp), %esi
566; CHECK-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
567; CHECK-NEXT:    movq %rcx, {{[0-9]+}}(%rsp)
568; CHECK-NEXT:    movl %edx, {{[0-9]+}}(%rsp)
569; CHECK-NEXT:    movl %esi, {{[0-9]+}}(%rsp)
570; CHECK-NEXT:    retq
571;
572; DISABLED-LABEL: test_stack:
573; DISABLED:       # %bb.0: # %entry
574; DISABLED-NEXT:    movq %rdi, %rax
575; DISABLED-NEXT:    movl %esi, {{[0-9]+}}(%rsp)
576; DISABLED-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
577; DISABLED-NEXT:    movups %xmm0, (%rdi)
578; DISABLED-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
579; DISABLED-NEXT:    movups %xmm0, 16(%rdi)
580; DISABLED-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
581; DISABLED-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm1
582; DISABLED-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
583; DISABLED-NEXT:    movaps %xmm1, {{[0-9]+}}(%rsp)
584; DISABLED-NEXT:    retq
585;
586; AVX-LABEL: test_stack:
587; AVX:       # %bb.0: # %entry
588; AVX-NEXT:    movq %rdi, %rax
589; AVX-NEXT:    movl %esi, {{[0-9]+}}(%rsp)
590; AVX-NEXT:    vmovups {{[0-9]+}}(%rsp), %xmm0
591; AVX-NEXT:    vmovups %xmm0, (%rdi)
592; AVX-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
593; AVX-NEXT:    movq %rcx, 16(%rdi)
594; AVX-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
595; AVX-NEXT:    movl %ecx, 24(%rdi)
596; AVX-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
597; AVX-NEXT:    movl %ecx, 28(%rdi)
598; AVX-NEXT:    vmovups {{[0-9]+}}(%rsp), %xmm0
599; AVX-NEXT:    vmovups %xmm0, {{[0-9]+}}(%rsp)
600; AVX-NEXT:    movq {{[0-9]+}}(%rsp), %rcx
601; AVX-NEXT:    movq %rcx, {{[0-9]+}}(%rsp)
602; AVX-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
603; AVX-NEXT:    movl %ecx, {{[0-9]+}}(%rsp)
604; AVX-NEXT:    movl {{[0-9]+}}(%rsp), %ecx
605; AVX-NEXT:    movl %ecx, {{[0-9]+}}(%rsp)
606; AVX-NEXT:    retq
607entry:
608  %s6.sroa.3.0..sroa_idx4 = getelementptr inbounds %struct.S6, ptr %s2, i64 0, i32 3
609  store i32 %x, ptr %s6.sroa.3.0..sroa_idx4, align 8
610  call void @llvm.memcpy.p0.p0.i64(ptr %agg.result, ptr nonnull %s2, i64 32, i32 4, i1 false)
611  call void @llvm.memcpy.p0.p0.i64(ptr nonnull %s1, ptr nonnull %s2, i64 32, i32 4, i1 false)
612
613  ret void
614}
615
616; Function Attrs: nounwind uwtable
617define void @test_limit_all(ptr noalias  %s1, ptr nocapture %s2, i32 %x, ptr nocapture %s3, ptr nocapture readonly %s4, i32 %x2) local_unnamed_addr #0 {
618; SSE-LABEL: test_limit_all:
619; SSE:       # %bb.0: # %entry
620; SSE-NEXT:    pushq %rbp
621; SSE-NEXT:    .cfi_def_cfa_offset 16
622; SSE-NEXT:    pushq %r15
623; SSE-NEXT:    .cfi_def_cfa_offset 24
624; SSE-NEXT:    pushq %r14
625; SSE-NEXT:    .cfi_def_cfa_offset 32
626; SSE-NEXT:    pushq %r12
627; SSE-NEXT:    .cfi_def_cfa_offset 40
628; SSE-NEXT:    pushq %rbx
629; SSE-NEXT:    .cfi_def_cfa_offset 48
630; SSE-NEXT:    .cfi_offset %rbx, -48
631; SSE-NEXT:    .cfi_offset %r12, -40
632; SSE-NEXT:    .cfi_offset %r14, -32
633; SSE-NEXT:    .cfi_offset %r15, -24
634; SSE-NEXT:    .cfi_offset %rbp, -16
635; SSE-NEXT:    movq %r8, %r15
636; SSE-NEXT:    movq %rcx, %r14
637; SSE-NEXT:    movl %edx, %ebp
638; SSE-NEXT:    movq %rsi, %rbx
639; SSE-NEXT:    movq %rdi, %r12
640; SSE-NEXT:    movl %r9d, 12(%rdi)
641; SSE-NEXT:    callq bar@PLT
642; SSE-NEXT:    cmpl $18, %ebp
643; SSE-NEXT:    jl .LBB9_2
644; SSE-NEXT:  # %bb.1: # %if.then
645; SSE-NEXT:    movl %ebp, 4(%r12)
646; SSE-NEXT:    movq %r12, %rdi
647; SSE-NEXT:    callq bar@PLT
648; SSE-NEXT:  .LBB9_2: # %if.end
649; SSE-NEXT:    movups (%r15), %xmm0
650; SSE-NEXT:    movups %xmm0, (%r14)
651; SSE-NEXT:    movups (%r12), %xmm0
652; SSE-NEXT:    movups %xmm0, (%rbx)
653; SSE-NEXT:    popq %rbx
654; SSE-NEXT:    .cfi_def_cfa_offset 40
655; SSE-NEXT:    popq %r12
656; SSE-NEXT:    .cfi_def_cfa_offset 32
657; SSE-NEXT:    popq %r14
658; SSE-NEXT:    .cfi_def_cfa_offset 24
659; SSE-NEXT:    popq %r15
660; SSE-NEXT:    .cfi_def_cfa_offset 16
661; SSE-NEXT:    popq %rbp
662; SSE-NEXT:    .cfi_def_cfa_offset 8
663; SSE-NEXT:    retq
664;
665; AVX-LABEL: test_limit_all:
666; AVX:       # %bb.0: # %entry
667; AVX-NEXT:    pushq %rbp
668; AVX-NEXT:    .cfi_def_cfa_offset 16
669; AVX-NEXT:    pushq %r15
670; AVX-NEXT:    .cfi_def_cfa_offset 24
671; AVX-NEXT:    pushq %r14
672; AVX-NEXT:    .cfi_def_cfa_offset 32
673; AVX-NEXT:    pushq %r12
674; AVX-NEXT:    .cfi_def_cfa_offset 40
675; AVX-NEXT:    pushq %rbx
676; AVX-NEXT:    .cfi_def_cfa_offset 48
677; AVX-NEXT:    .cfi_offset %rbx, -48
678; AVX-NEXT:    .cfi_offset %r12, -40
679; AVX-NEXT:    .cfi_offset %r14, -32
680; AVX-NEXT:    .cfi_offset %r15, -24
681; AVX-NEXT:    .cfi_offset %rbp, -16
682; AVX-NEXT:    movq %r8, %r15
683; AVX-NEXT:    movq %rcx, %r14
684; AVX-NEXT:    movl %edx, %ebp
685; AVX-NEXT:    movq %rsi, %rbx
686; AVX-NEXT:    movq %rdi, %r12
687; AVX-NEXT:    movl %r9d, 12(%rdi)
688; AVX-NEXT:    callq bar@PLT
689; AVX-NEXT:    cmpl $18, %ebp
690; AVX-NEXT:    jl .LBB9_2
691; AVX-NEXT:  # %bb.1: # %if.then
692; AVX-NEXT:    movl %ebp, 4(%r12)
693; AVX-NEXT:    movq %r12, %rdi
694; AVX-NEXT:    callq bar@PLT
695; AVX-NEXT:  .LBB9_2: # %if.end
696; AVX-NEXT:    vmovups (%r15), %xmm0
697; AVX-NEXT:    vmovups %xmm0, (%r14)
698; AVX-NEXT:    vmovups (%r12), %xmm0
699; AVX-NEXT:    vmovups %xmm0, (%rbx)
700; AVX-NEXT:    popq %rbx
701; AVX-NEXT:    .cfi_def_cfa_offset 40
702; AVX-NEXT:    popq %r12
703; AVX-NEXT:    .cfi_def_cfa_offset 32
704; AVX-NEXT:    popq %r14
705; AVX-NEXT:    .cfi_def_cfa_offset 24
706; AVX-NEXT:    popq %r15
707; AVX-NEXT:    .cfi_def_cfa_offset 16
708; AVX-NEXT:    popq %rbp
709; AVX-NEXT:    .cfi_def_cfa_offset 8
710; AVX-NEXT:    retq
711entry:
712  %d = getelementptr inbounds %struct.S, ptr %s1, i64 0, i32 3
713  store i32 %x2, ptr %d, align 4
714  tail call void @bar(ptr %s1) #3
715  %cmp = icmp sgt i32 %x, 17
716  br i1 %cmp, label %if.then, label %if.end
717
718if.then:                                          ; preds = %entry
719  %b = getelementptr inbounds %struct.S, ptr %s1, i64 0, i32 1
720  store i32 %x, ptr %b, align 4
721  tail call void @bar(ptr nonnull %s1) #3
722  br label %if.end
723
724if.end:                                           ; preds = %if.then, %entry
725  tail call void @llvm.memcpy.p0.p0.i64(ptr %s3, ptr %s4, i64 16, i32 4, i1 false)
726  tail call void @llvm.memcpy.p0.p0.i64(ptr %s2, ptr %s1, i64 16, i32 4, i1 false)
727  ret void
728}
729
730; Function Attrs: nounwind uwtable
731define void @test_limit_one_pred(ptr noalias %s1, ptr nocapture %s2, i32 %x, ptr nocapture %s3, ptr nocapture readonly %s4, i32 %x2) local_unnamed_addr #0 {
732; CHECK-LABEL: test_limit_one_pred:
733; CHECK:       # %bb.0: # %entry
734; CHECK-NEXT:    pushq %r15
735; CHECK-NEXT:    .cfi_def_cfa_offset 16
736; CHECK-NEXT:    pushq %r14
737; CHECK-NEXT:    .cfi_def_cfa_offset 24
738; CHECK-NEXT:    pushq %r12
739; CHECK-NEXT:    .cfi_def_cfa_offset 32
740; CHECK-NEXT:    pushq %rbx
741; CHECK-NEXT:    .cfi_def_cfa_offset 40
742; CHECK-NEXT:    pushq %rax
743; CHECK-NEXT:    .cfi_def_cfa_offset 48
744; CHECK-NEXT:    .cfi_offset %rbx, -40
745; CHECK-NEXT:    .cfi_offset %r12, -32
746; CHECK-NEXT:    .cfi_offset %r14, -24
747; CHECK-NEXT:    .cfi_offset %r15, -16
748; CHECK-NEXT:    movq %r8, %r12
749; CHECK-NEXT:    movq %rcx, %r15
750; CHECK-NEXT:    movq %rsi, %rbx
751; CHECK-NEXT:    movq %rdi, %r14
752; CHECK-NEXT:    movl %r9d, 12(%rdi)
753; CHECK-NEXT:    cmpl $18, %edx
754; CHECK-NEXT:    jl .LBB10_2
755; CHECK-NEXT:  # %bb.1: # %if.then
756; CHECK-NEXT:    movl %edx, 4(%r14)
757; CHECK-NEXT:    movq %r14, %rdi
758; CHECK-NEXT:    callq bar@PLT
759; CHECK-NEXT:  .LBB10_2: # %if.end
760; CHECK-NEXT:    movups (%r12), %xmm0
761; CHECK-NEXT:    movups %xmm0, (%r15)
762; CHECK-NEXT:    movq (%r14), %rax
763; CHECK-NEXT:    movq %rax, (%rbx)
764; CHECK-NEXT:    movl 8(%r14), %eax
765; CHECK-NEXT:    movl %eax, 8(%rbx)
766; CHECK-NEXT:    movl 12(%r14), %eax
767; CHECK-NEXT:    movl %eax, 12(%rbx)
768; CHECK-NEXT:    addq $8, %rsp
769; CHECK-NEXT:    .cfi_def_cfa_offset 40
770; CHECK-NEXT:    popq %rbx
771; CHECK-NEXT:    .cfi_def_cfa_offset 32
772; CHECK-NEXT:    popq %r12
773; CHECK-NEXT:    .cfi_def_cfa_offset 24
774; CHECK-NEXT:    popq %r14
775; CHECK-NEXT:    .cfi_def_cfa_offset 16
776; CHECK-NEXT:    popq %r15
777; CHECK-NEXT:    .cfi_def_cfa_offset 8
778; CHECK-NEXT:    retq
779;
780; DISABLED-LABEL: test_limit_one_pred:
781; DISABLED:       # %bb.0: # %entry
782; DISABLED-NEXT:    pushq %r15
783; DISABLED-NEXT:    .cfi_def_cfa_offset 16
784; DISABLED-NEXT:    pushq %r14
785; DISABLED-NEXT:    .cfi_def_cfa_offset 24
786; DISABLED-NEXT:    pushq %r12
787; DISABLED-NEXT:    .cfi_def_cfa_offset 32
788; DISABLED-NEXT:    pushq %rbx
789; DISABLED-NEXT:    .cfi_def_cfa_offset 40
790; DISABLED-NEXT:    pushq %rax
791; DISABLED-NEXT:    .cfi_def_cfa_offset 48
792; DISABLED-NEXT:    .cfi_offset %rbx, -40
793; DISABLED-NEXT:    .cfi_offset %r12, -32
794; DISABLED-NEXT:    .cfi_offset %r14, -24
795; DISABLED-NEXT:    .cfi_offset %r15, -16
796; DISABLED-NEXT:    movq %r8, %r15
797; DISABLED-NEXT:    movq %rcx, %r14
798; DISABLED-NEXT:    movq %rsi, %rbx
799; DISABLED-NEXT:    movq %rdi, %r12
800; DISABLED-NEXT:    movl %r9d, 12(%rdi)
801; DISABLED-NEXT:    cmpl $18, %edx
802; DISABLED-NEXT:    jl .LBB10_2
803; DISABLED-NEXT:  # %bb.1: # %if.then
804; DISABLED-NEXT:    movl %edx, 4(%r12)
805; DISABLED-NEXT:    movq %r12, %rdi
806; DISABLED-NEXT:    callq bar@PLT
807; DISABLED-NEXT:  .LBB10_2: # %if.end
808; DISABLED-NEXT:    movups (%r15), %xmm0
809; DISABLED-NEXT:    movups %xmm0, (%r14)
810; DISABLED-NEXT:    movups (%r12), %xmm0
811; DISABLED-NEXT:    movups %xmm0, (%rbx)
812; DISABLED-NEXT:    addq $8, %rsp
813; DISABLED-NEXT:    .cfi_def_cfa_offset 40
814; DISABLED-NEXT:    popq %rbx
815; DISABLED-NEXT:    .cfi_def_cfa_offset 32
816; DISABLED-NEXT:    popq %r12
817; DISABLED-NEXT:    .cfi_def_cfa_offset 24
818; DISABLED-NEXT:    popq %r14
819; DISABLED-NEXT:    .cfi_def_cfa_offset 16
820; DISABLED-NEXT:    popq %r15
821; DISABLED-NEXT:    .cfi_def_cfa_offset 8
822; DISABLED-NEXT:    retq
823;
824; AVX-LABEL: test_limit_one_pred:
825; AVX:       # %bb.0: # %entry
826; AVX-NEXT:    pushq %r15
827; AVX-NEXT:    .cfi_def_cfa_offset 16
828; AVX-NEXT:    pushq %r14
829; AVX-NEXT:    .cfi_def_cfa_offset 24
830; AVX-NEXT:    pushq %r12
831; AVX-NEXT:    .cfi_def_cfa_offset 32
832; AVX-NEXT:    pushq %rbx
833; AVX-NEXT:    .cfi_def_cfa_offset 40
834; AVX-NEXT:    pushq %rax
835; AVX-NEXT:    .cfi_def_cfa_offset 48
836; AVX-NEXT:    .cfi_offset %rbx, -40
837; AVX-NEXT:    .cfi_offset %r12, -32
838; AVX-NEXT:    .cfi_offset %r14, -24
839; AVX-NEXT:    .cfi_offset %r15, -16
840; AVX-NEXT:    movq %r8, %r12
841; AVX-NEXT:    movq %rcx, %r15
842; AVX-NEXT:    movq %rsi, %rbx
843; AVX-NEXT:    movq %rdi, %r14
844; AVX-NEXT:    movl %r9d, 12(%rdi)
845; AVX-NEXT:    cmpl $18, %edx
846; AVX-NEXT:    jl .LBB10_2
847; AVX-NEXT:  # %bb.1: # %if.then
848; AVX-NEXT:    movl %edx, 4(%r14)
849; AVX-NEXT:    movq %r14, %rdi
850; AVX-NEXT:    callq bar@PLT
851; AVX-NEXT:  .LBB10_2: # %if.end
852; AVX-NEXT:    vmovups (%r12), %xmm0
853; AVX-NEXT:    vmovups %xmm0, (%r15)
854; AVX-NEXT:    movq (%r14), %rax
855; AVX-NEXT:    movq %rax, (%rbx)
856; AVX-NEXT:    movl 8(%r14), %eax
857; AVX-NEXT:    movl %eax, 8(%rbx)
858; AVX-NEXT:    movl 12(%r14), %eax
859; AVX-NEXT:    movl %eax, 12(%rbx)
860; AVX-NEXT:    addq $8, %rsp
861; AVX-NEXT:    .cfi_def_cfa_offset 40
862; AVX-NEXT:    popq %rbx
863; AVX-NEXT:    .cfi_def_cfa_offset 32
864; AVX-NEXT:    popq %r12
865; AVX-NEXT:    .cfi_def_cfa_offset 24
866; AVX-NEXT:    popq %r14
867; AVX-NEXT:    .cfi_def_cfa_offset 16
868; AVX-NEXT:    popq %r15
869; AVX-NEXT:    .cfi_def_cfa_offset 8
870; AVX-NEXT:    retq
871entry:
872  %d = getelementptr inbounds %struct.S, ptr %s1, i64 0, i32 3
873  store i32 %x2, ptr %d, align 4
874  %cmp = icmp sgt i32 %x, 17
875  br i1 %cmp, label %if.then, label %if.end
876
877if.then:                                          ; preds = %entry
878  %b = getelementptr inbounds %struct.S, ptr %s1, i64 0, i32 1
879  store i32 %x, ptr %b, align 4
880  tail call void @bar(ptr nonnull %s1) #3
881  br label %if.end
882
883if.end:                                           ; preds = %if.then, %entry
884  tail call void @llvm.memcpy.p0.p0.i64(ptr %s3, ptr %s4, i64 16, i32 4, i1 false)
885  tail call void @llvm.memcpy.p0.p0.i64(ptr %s2, ptr %s1, i64 16, i32 4, i1 false)
886  ret void
887}
888
889
890declare void @bar(ptr) local_unnamed_addr #1
891
892
893%struct.S7 = type { float, float, float , float, float, float, float, float }
894
895; Function Attrs: nounwind uwtable
896define void @test_conditional_block_float(ptr nocapture noalias %s1, ptr nocapture %s2, i32 %x, ptr nocapture %s3, ptr nocapture readonly %s4, float %y) local_unnamed_addr #0 {
897; CHECK-LABEL: test_conditional_block_float:
898; CHECK:       # %bb.0: # %entry
899; CHECK-NEXT:    cmpl $18, %edx
900; CHECK-NEXT:    jl .LBB11_2
901; CHECK-NEXT:  # %bb.1: # %if.then
902; CHECK-NEXT:    movl $1065353216, 4(%rdi) # imm = 0x3F800000
903; CHECK-NEXT:  .LBB11_2: # %if.end
904; CHECK-NEXT:    movups (%r8), %xmm0
905; CHECK-NEXT:    movups 16(%r8), %xmm1
906; CHECK-NEXT:    movups %xmm1, 16(%rcx)
907; CHECK-NEXT:    movups %xmm0, (%rcx)
908; CHECK-NEXT:    movl (%rdi), %eax
909; CHECK-NEXT:    movl 4(%rdi), %ecx
910; CHECK-NEXT:    movq 8(%rdi), %rdx
911; CHECK-NEXT:    movups 16(%rdi), %xmm0
912; CHECK-NEXT:    movups %xmm0, 16(%rsi)
913; CHECK-NEXT:    movl %eax, (%rsi)
914; CHECK-NEXT:    movl %ecx, 4(%rsi)
915; CHECK-NEXT:    movq %rdx, 8(%rsi)
916; CHECK-NEXT:    retq
917;
918; DISABLED-LABEL: test_conditional_block_float:
919; DISABLED:       # %bb.0: # %entry
920; DISABLED-NEXT:    cmpl $18, %edx
921; DISABLED-NEXT:    jl .LBB11_2
922; DISABLED-NEXT:  # %bb.1: # %if.then
923; DISABLED-NEXT:    movl $1065353216, 4(%rdi) # imm = 0x3F800000
924; DISABLED-NEXT:  .LBB11_2: # %if.end
925; DISABLED-NEXT:    movups (%r8), %xmm0
926; DISABLED-NEXT:    movups 16(%r8), %xmm1
927; DISABLED-NEXT:    movups %xmm1, 16(%rcx)
928; DISABLED-NEXT:    movups %xmm0, (%rcx)
929; DISABLED-NEXT:    movups (%rdi), %xmm0
930; DISABLED-NEXT:    movups 16(%rdi), %xmm1
931; DISABLED-NEXT:    movups %xmm1, 16(%rsi)
932; DISABLED-NEXT:    movups %xmm0, (%rsi)
933; DISABLED-NEXT:    retq
934;
935; AVX-LABEL: test_conditional_block_float:
936; AVX:       # %bb.0: # %entry
937; AVX-NEXT:    cmpl $18, %edx
938; AVX-NEXT:    jl .LBB11_2
939; AVX-NEXT:  # %bb.1: # %if.then
940; AVX-NEXT:    movl $1065353216, 4(%rdi) # imm = 0x3F800000
941; AVX-NEXT:  .LBB11_2: # %if.end
942; AVX-NEXT:    vmovups (%r8), %ymm0
943; AVX-NEXT:    vmovups %ymm0, (%rcx)
944; AVX-NEXT:    movl (%rdi), %eax
945; AVX-NEXT:    movl %eax, (%rsi)
946; AVX-NEXT:    movl 4(%rdi), %eax
947; AVX-NEXT:    movl %eax, 4(%rsi)
948; AVX-NEXT:    vmovups 8(%rdi), %xmm0
949; AVX-NEXT:    vmovups %xmm0, 8(%rsi)
950; AVX-NEXT:    movq 24(%rdi), %rax
951; AVX-NEXT:    movq %rax, 24(%rsi)
952; AVX-NEXT:    vzeroupper
953; AVX-NEXT:    retq
954entry:
955  %cmp = icmp sgt i32 %x, 17
956  br i1 %cmp, label %if.then, label %if.end
957
958if.then:                                          ; preds = %entry
959  %b = getelementptr inbounds %struct.S7, ptr %s1, i64 0, i32 1
960  store float 1.0, ptr %b, align 4
961  br label %if.end
962
963if.end:                                           ; preds = %if.then, %entry
964  tail call void @llvm.memcpy.p0.p0.i64(ptr %s3, ptr %s4, i64 32, i32 4, i1 false)
965  tail call void @llvm.memcpy.p0.p0.i64(ptr %s2, ptr %s1, i64 32, i32 4, i1 false)
966  ret void
967}
968
969%struct.S8 = type { i64, i64, i64, i64, i64, i64 }
970
971; Function Attrs: nounwind uwtable
972define void @test_conditional_block_ymm(ptr nocapture noalias %s1, ptr nocapture %s2, i32 %x, ptr nocapture %s3, ptr nocapture readonly %s4) local_unnamed_addr #0 {
973; CHECK-LABEL: test_conditional_block_ymm:
974; CHECK:       # %bb.0: # %entry
975; CHECK-NEXT:    cmpl $18, %edx
976; CHECK-NEXT:    jl .LBB12_2
977; CHECK-NEXT:  # %bb.1: # %if.then
978; CHECK-NEXT:    movq $1, 8(%rdi)
979; CHECK-NEXT:  .LBB12_2: # %if.end
980; CHECK-NEXT:    movups (%r8), %xmm0
981; CHECK-NEXT:    movups 16(%r8), %xmm1
982; CHECK-NEXT:    movups %xmm1, 16(%rcx)
983; CHECK-NEXT:    movups %xmm0, (%rcx)
984; CHECK-NEXT:    movq (%rdi), %rax
985; CHECK-NEXT:    movq 8(%rdi), %rcx
986; CHECK-NEXT:    movups 16(%rdi), %xmm0
987; CHECK-NEXT:    movups %xmm0, 16(%rsi)
988; CHECK-NEXT:    movq %rax, (%rsi)
989; CHECK-NEXT:    movq %rcx, 8(%rsi)
990; CHECK-NEXT:    retq
991;
992; DISABLED-LABEL: test_conditional_block_ymm:
993; DISABLED:       # %bb.0: # %entry
994; DISABLED-NEXT:    cmpl $18, %edx
995; DISABLED-NEXT:    jl .LBB12_2
996; DISABLED-NEXT:  # %bb.1: # %if.then
997; DISABLED-NEXT:    movq $1, 8(%rdi)
998; DISABLED-NEXT:  .LBB12_2: # %if.end
999; DISABLED-NEXT:    movups (%r8), %xmm0
1000; DISABLED-NEXT:    movups 16(%r8), %xmm1
1001; DISABLED-NEXT:    movups %xmm1, 16(%rcx)
1002; DISABLED-NEXT:    movups %xmm0, (%rcx)
1003; DISABLED-NEXT:    movups (%rdi), %xmm0
1004; DISABLED-NEXT:    movups 16(%rdi), %xmm1
1005; DISABLED-NEXT:    movups %xmm1, 16(%rsi)
1006; DISABLED-NEXT:    movups %xmm0, (%rsi)
1007; DISABLED-NEXT:    retq
1008;
1009; AVX-LABEL: test_conditional_block_ymm:
1010; AVX:       # %bb.0: # %entry
1011; AVX-NEXT:    cmpl $18, %edx
1012; AVX-NEXT:    jl .LBB12_2
1013; AVX-NEXT:  # %bb.1: # %if.then
1014; AVX-NEXT:    movq $1, 8(%rdi)
1015; AVX-NEXT:  .LBB12_2: # %if.end
1016; AVX-NEXT:    vmovups (%r8), %ymm0
1017; AVX-NEXT:    vmovups %ymm0, (%rcx)
1018; AVX-NEXT:    movq (%rdi), %rax
1019; AVX-NEXT:    movq %rax, (%rsi)
1020; AVX-NEXT:    movq 8(%rdi), %rax
1021; AVX-NEXT:    movq %rax, 8(%rsi)
1022; AVX-NEXT:    vmovups 16(%rdi), %xmm0
1023; AVX-NEXT:    vmovups %xmm0, 16(%rsi)
1024; AVX-NEXT:    vzeroupper
1025; AVX-NEXT:    retq
1026entry:
1027  %cmp = icmp sgt i32 %x, 17
1028  br i1 %cmp, label %if.then, label %if.end
1029
1030if.then:                                          ; preds = %entry
1031  %b = getelementptr inbounds %struct.S8, ptr %s1, i64 0, i32 1
1032  store i64 1, ptr %b, align 4
1033  br label %if.end
1034
1035if.end:                                           ; preds = %if.then, %entry
1036  tail call void @llvm.memcpy.p0.p0.i64(ptr %s3, ptr %s4, i64 32, i32 4, i1 false)
1037  tail call void @llvm.memcpy.p0.p0.i64(ptr %s2, ptr %s1, i64 32, i32 4, i1 false)
1038  ret void
1039}
1040
1041define dso_local void @test_alias(ptr nocapture %A, i32 %x) local_unnamed_addr #0 {
1042; SSE-LABEL: test_alias:
1043; SSE:       # %bb.0: # %entry
1044; SSE-NEXT:    movl %esi, (%rdi)
1045; SSE-NEXT:    movups (%rdi), %xmm0
1046; SSE-NEXT:    movups %xmm0, 4(%rdi)
1047; SSE-NEXT:    retq
1048;
1049; AVX-LABEL: test_alias:
1050; AVX:       # %bb.0: # %entry
1051; AVX-NEXT:    movl %esi, (%rdi)
1052; AVX-NEXT:    vmovups (%rdi), %xmm0
1053; AVX-NEXT:    vmovups %xmm0, 4(%rdi)
1054; AVX-NEXT:    retq
1055entry:
1056  store i32 %x, ptr %A, align 4
1057  %add.ptr = getelementptr inbounds i8, ptr %A, i64 4
1058  tail call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 4 %add.ptr, ptr align 4 %A, i64 16, i32 4, i1 false)
1059  ret void
1060}
1061
1062; Function Attrs: nounwind uwtable
1063define dso_local void @test_noalias(ptr nocapture %A, i32 %x) local_unnamed_addr #0 {
1064; CHECK-LABEL: test_noalias:
1065; CHECK:       # %bb.0: # %entry
1066; CHECK-NEXT:    movl %esi, (%rdi)
1067; CHECK-NEXT:    movl (%rdi), %eax
1068; CHECK-NEXT:    movl %eax, 20(%rdi)
1069; CHECK-NEXT:    movq 4(%rdi), %rax
1070; CHECK-NEXT:    movq %rax, 24(%rdi)
1071; CHECK-NEXT:    movl 12(%rdi), %eax
1072; CHECK-NEXT:    movl %eax, 32(%rdi)
1073; CHECK-NEXT:    retq
1074;
1075; DISABLED-LABEL: test_noalias:
1076; DISABLED:       # %bb.0: # %entry
1077; DISABLED-NEXT:    movl %esi, (%rdi)
1078; DISABLED-NEXT:    movups (%rdi), %xmm0
1079; DISABLED-NEXT:    movups %xmm0, 20(%rdi)
1080; DISABLED-NEXT:    retq
1081;
1082; AVX-LABEL: test_noalias:
1083; AVX:       # %bb.0: # %entry
1084; AVX-NEXT:    movl %esi, (%rdi)
1085; AVX-NEXT:    movl (%rdi), %eax
1086; AVX-NEXT:    movl %eax, 20(%rdi)
1087; AVX-NEXT:    movq 4(%rdi), %rax
1088; AVX-NEXT:    movq %rax, 24(%rdi)
1089; AVX-NEXT:    movl 12(%rdi), %eax
1090; AVX-NEXT:    movl %eax, 32(%rdi)
1091; AVX-NEXT:    retq
1092entry:
1093  store i32 %x, ptr %A, align 4
1094  %add.ptr = getelementptr inbounds i8, ptr %A, i64 20
1095  tail call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 4 %add.ptr, ptr align 4 %A, i64 16, i32 4, i1 false)
1096  ret void
1097}
1098
1099; Function Attrs: argmemonly nounwind
1100declare void @llvm.memcpy.p0.p0.i64(ptr nocapture writeonly, ptr nocapture readonly, i64, i32, i1) #1
1101
1102attributes #0 = { nounwind uwtable }
1103