xref: /llvm-project/llvm/test/Transforms/LoopVectorize/X86/avx512.ll (revision e3cf80c5c1fe55efd8216575ccadea0ab087e79c)
1; RUN: opt -mattr=+avx512f -passes=loop-vectorize -S < %s | llc -mattr=+avx512f | FileCheck %s
2; RUN: opt -mattr=+avx512vl,+prefer-256-bit -passes=loop-vectorize -S < %s | llc -mattr=+avx512f | FileCheck %s --check-prefix=CHECK-PREFER-AVX256
3
4target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
5target triple = "x86_64-apple-macosx10.9.0"
6
7; Verify that we generate 512-bit wide vectors for a basic integer memset
8; loop.
9
10; CHECK-LABEL: _f:
11; CHECK: %vec.epilog.vector.body
12; CHECK: %ymm
13; CHECK: %vector.body
14; CHECK-NOT: %ymm
15; CHECK: vmovdqu64 %zmm{{.}},
16
17; Verify that we don't generate 512-bit wide vectors when subtarget feature says not to
18
19; CHECK-PREFER-AVX256-LABEL: f:
20; CHECK-PREFER-AVX256: vmovdqu %ymm{{.}},
21; CHECK-PREFER-AVX256-NOT: %zmm
22
23define void @f(ptr %a, i32 %n) {
24entry:
25  %cmp4 = icmp sgt i32 %n, 0
26  br i1 %cmp4, label %for.body.preheader, label %for.end
27
28for.body.preheader:                               ; preds = %entry
29  br label %for.body
30
31for.body:                                         ; preds = %for.body.preheader, %for.body
32  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
33  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
34  store i32 %n, ptr %arrayidx, align 4
35  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
36  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
37  %exitcond = icmp eq i32 %lftr.wideiv, %n
38  br i1 %exitcond, label %for.end.loopexit, label %for.body
39
40for.end.loopexit:                                 ; preds = %for.body
41  br label %for.end
42
43for.end:                                          ; preds = %for.end.loopexit, %entry
44  ret void
45}
46
47; Verify that the "prefer-vector-width=256" attribute prevents the use of 512-bit
48; vectors
49
50; CHECK-LABEL: _g:
51; CHECK: vmovdqu %ymm{{.}},
52; CHECK-NOT: %zmm
53
54; CHECK-PREFER-AVX256-LABEL: g:
55; CHECK-PREFER-AVX256: vmovdqu %ymm{{.}},
56; CHECK-PREFER-AVX256-NOT: %zmm
57
58define void @g(ptr %a, i32 %n) "prefer-vector-width"="256" {
59entry:
60  %cmp4 = icmp sgt i32 %n, 0
61  br i1 %cmp4, label %for.body.preheader, label %for.end
62
63for.body.preheader:                               ; preds = %entry
64  br label %for.body
65
66for.body:                                         ; preds = %for.body.preheader, %for.body
67  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
68  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
69  store i32 %n, ptr %arrayidx, align 4
70  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
71  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
72  %exitcond = icmp eq i32 %lftr.wideiv, %n
73  br i1 %exitcond, label %for.end.loopexit, label %for.body
74
75for.end.loopexit:                                 ; preds = %for.body
76  br label %for.end
77
78for.end:                                          ; preds = %for.end.loopexit, %entry
79  ret void
80}
81
82; Verify that the "prefer-vector-width=512" attribute override the subtarget
83; vectors
84
85; CHECK-LABEL: _h:
86; CHECK: %vec.epilog.vector.body
87; CHECK: %ymm
88; CHECK: %vector.body
89; CHECK: vmovdqu64 %zmm{{.}},
90; CHECK-NOT: %ymm
91
92; CHECK-PREFER-AVX256-LABEL: h:
93; CHECK-PREFER-AVX256: %vec.epilog.vector.body
94; CHECK-PREFER-AVX256: %ymm
95; CHECK-PREFER-AVX256: %vector.body
96; CHECK-PREFER-AVX256: vmovdqu64 %zmm{{.}},
97; CHECK-PREFER-AVX256-NOT: %ymm
98
99define void @h(ptr %a, i32 %n) "prefer-vector-width"="512" {
100entry:
101  %cmp4 = icmp sgt i32 %n, 0
102  br i1 %cmp4, label %for.body.preheader, label %for.end
103
104for.body.preheader:                               ; preds = %entry
105  br label %for.body
106
107for.body:                                         ; preds = %for.body.preheader, %for.body
108  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
109  %arrayidx = getelementptr inbounds i32, ptr %a, i64 %indvars.iv
110  store i32 %n, ptr %arrayidx, align 4
111  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
112  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
113  %exitcond = icmp eq i32 %lftr.wideiv, %n
114  br i1 %exitcond, label %for.end.loopexit, label %for.body
115
116for.end.loopexit:                                 ; preds = %for.body
117  br label %for.end
118
119for.end:                                          ; preds = %for.end.loopexit, %entry
120  ret void
121}
122