xref: /llvm-project/llvm/test/Transforms/LoadStoreVectorizer/NVPTX/4x2xhalf.ll (revision 2be0abb7fe72ed4537b3eabcd3102d48ea845717)
1; RUN: opt -mtriple=nvptx64-nvidia-cuda -passes=load-store-vectorizer -S -o - %s | FileCheck %s
2
3define void @ldg_f16(ptr nocapture align 16 %rd0) {
4  %load1 = load <2 x half>, ptr %rd0, align 16
5  %p1 = fcmp ogt <2 x half> %load1, zeroinitializer
6  %s1 = select <2 x i1> %p1, <2 x half> %load1, <2 x half> zeroinitializer
7  store <2 x half> %s1, ptr %rd0, align 16
8  %in2 = getelementptr half, ptr %rd0, i64 2
9  %load2 = load <2 x half>, ptr %in2, align 4
10  %p2 = fcmp ogt <2 x half> %load2, zeroinitializer
11  %s2 = select <2 x i1> %p2, <2 x half> %load2, <2 x half> zeroinitializer
12  store <2 x half> %s2, ptr %in2, align 4
13  %in3 = getelementptr half, ptr %rd0, i64 4
14  %load3 = load <2 x half>, ptr %in3, align 4
15  %p3 = fcmp ogt <2 x half> %load3, zeroinitializer
16  %s3 = select <2 x i1> %p3, <2 x half> %load3, <2 x half> zeroinitializer
17  store <2 x half> %s3, ptr %in3, align 4
18  %in4 = getelementptr half, ptr %rd0, i64 6
19  %load4 = load <2 x half>, ptr %in4, align 4
20  %p4 = fcmp ogt <2 x half> %load4, zeroinitializer
21  %s4 = select <2 x i1> %p4, <2 x half> %load4, <2 x half> zeroinitializer
22  store <2 x half> %s4, ptr %in4, align 4
23  ret void
24
25; CHECK-LABEL: @ldg_f16
26; CHECK: %[[LD:.*]] = load <8 x half>, ptr
27; CHECK: shufflevector <8 x half> %[[LD]], <8 x half> poison, <2 x i32> <i32 0, i32 1>
28; CHECK: shufflevector <8 x half> %[[LD]], <8 x half> poison, <2 x i32> <i32 2, i32 3>
29; CHECK: shufflevector <8 x half> %[[LD]], <8 x half> poison, <2 x i32> <i32 4, i32 5>
30; CHECK: shufflevector <8 x half> %[[LD]], <8 x half> poison, <2 x i32> <i32 6, i32 7>
31; CHECK: store <8 x half>
32}
33
34define void @no_nonpow2_vector(ptr nocapture align 16 %rd0) {
35  %load1 = load <3 x half>, ptr %rd0, align 4
36  %p1 = fcmp ogt <3 x half> %load1, zeroinitializer
37  %s1 = select <3 x i1> %p1, <3 x half> %load1, <3 x half> zeroinitializer
38  store <3 x half> %s1, ptr %rd0, align 4
39  %in2 = getelementptr half, ptr %rd0, i64 3
40  %load2 = load <3 x half>, ptr %in2, align 4
41  %p2 = fcmp ogt <3 x half> %load2, zeroinitializer
42  %s2 = select <3 x i1> %p2, <3 x half> %load2, <3 x half> zeroinitializer
43  store <3 x half> %s2, ptr %in2, align 4
44  %in3 = getelementptr half, ptr %rd0, i64 6
45  %load3 = load <3 x half>, ptr %in3, align 4
46  %p3 = fcmp ogt <3 x half> %load3, zeroinitializer
47  %s3 = select <3 x i1> %p3, <3 x half> %load3, <3 x half> zeroinitializer
48  store <3 x half> %s3, ptr %in3, align 4
49  %in4 = getelementptr half, ptr %rd0, i64 9
50  %load4 = load <3 x half>, ptr %in4, align 4
51  %p4 = fcmp ogt <3 x half> %load4, zeroinitializer
52  %s4 = select <3 x i1> %p4, <3 x half> %load4, <3 x half> zeroinitializer
53  store <3 x half> %s4, ptr %in4, align 4
54  ret void
55
56; CHECK-LABEL: @no_nonpow2_vector
57; CHECK-NOT: shufflevector
58}
59
60define void @no_pointer_vector(ptr nocapture align 16 %rd0) {
61  %load1 = load <2 x ptr>, ptr %rd0, align 4
62  %p1 = icmp ne <2 x ptr> %load1, zeroinitializer
63  %s1 = select <2 x i1> %p1, <2 x ptr> %load1, <2 x ptr> zeroinitializer
64  store <2 x ptr> %s1, ptr %rd0, align 4
65  %in2 = getelementptr ptr, ptr %rd0, i64 2
66  %load2 = load <2 x ptr>, ptr %in2, align 4
67  %p2 = icmp ne <2 x ptr> %load2, zeroinitializer
68  %s2 = select <2 x i1> %p2, <2 x ptr> %load2, <2 x ptr> zeroinitializer
69  store <2 x ptr> %s2, ptr %in2, align 4
70  %in3 = getelementptr ptr, ptr %rd0, i64 4
71  %load3 = load <2 x ptr>, ptr %in3, align 4
72  %p3 = icmp ne <2 x ptr> %load3, zeroinitializer
73  %s3 = select <2 x i1> %p3, <2 x ptr> %load3, <2 x ptr> zeroinitializer
74  store <2 x ptr> %s3, ptr %in3, align 4
75  %in4 = getelementptr ptr, ptr %rd0, i64 6
76  %load4 = load <2 x ptr>, ptr %in4, align 4
77  %p4 = icmp ne <2 x ptr> %load4, zeroinitializer
78  %s4 = select <2 x i1> %p4, <2 x ptr> %load4, <2 x ptr> zeroinitializer
79  store <2 x ptr> %s4, ptr %in4, align 4
80  ret void
81
82; CHECK-LABEL: @no_pointer_vector
83; CHECK-NOT: shufflevector
84}
85