xref: /llvm-project/llvm/test/Transforms/CodeGenPrepare/AArch64/zext-to-shuffle.ll (revision f1ec0d12bb0843f0deab83ef2b5cf1339cbc4f0b)
1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -passes='require<profile-summary>,function(codegenprepare)' -S %s | FileCheck %s
3
4target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
5target triple = "arm64-apple-ios"
6
7; It's profitable to convert the zext to a shuffle, which in turn will be
8; lowered to 4 tbl instructions. The masks are materialized outside the loop.
9define void @zext_v16i8_to_v16i32_in_loop(ptr %src, ptr %dst) {
10; CHECK-LABEL: @zext_v16i8_to_v16i32_in_loop(
11; CHECK-NEXT:  entry:
12; CHECK-NEXT:    br label [[LOOP:%.*]]
13; CHECK:       loop:
14; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
15; CHECK-NEXT:    [[SRC_GEP:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 [[IV]]
16; CHECK-NEXT:    [[LOAD:%.*]] = load <16 x i8>, ptr [[SRC_GEP]], align 16
17; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <16 x i8> [[LOAD]], <16 x i8> <i8 0, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <64 x i32> <i32 0, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 2, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16, i32 4, i32 16, i32 16, i32 16, i32 5, i32 16, i32 16, i32 16, i32 6, i32 16, i32 16, i32 16, i32 7, i32 16, i32 16, i32 16, i32 8, i32 16, i32 16, i32 16, i32 9, i32 16, i32 16, i32 16, i32 10, i32 16, i32 16, i32 16, i32 11, i32 16, i32 16, i32 16, i32 12, i32 16, i32 16, i32 16, i32 13, i32 16, i32 16, i32 16, i32 14, i32 16, i32 16, i32 16, i32 15, i32 16, i32 16, i32 16>
18; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <64 x i8> [[TMP0]] to <16 x i32>
19; CHECK-NEXT:    [[DST_GEP:%.*]] = getelementptr i32, ptr [[DST:%.*]], i64 [[IV]]
20; CHECK-NEXT:    store <16 x i32> [[TMP1]], ptr [[DST_GEP]], align 64
21; CHECK-NEXT:    [[IV_NEXT]] = add nuw i64 [[IV]], 16
22; CHECK-NEXT:    [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 128
23; CHECK-NEXT:    br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
24; CHECK:       exit:
25; CHECK-NEXT:    ret void
26;
27entry:
28  br label %loop
29
30loop:
31  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
32  %src.gep = getelementptr i8, ptr %src, i64 %iv
33  %load = load <16 x i8>, ptr %src.gep
34  %ext = zext <16 x i8> %load to <16 x i32>
35  %dst.gep = getelementptr i32, ptr %dst, i64 %iv
36  store <16 x i32> %ext, ptr %dst.gep
37  %iv.next = add nuw i64 %iv, 16
38  %ec = icmp eq i64 %iv.next, 128
39  br i1 %ec, label %exit, label %loop
40
41exit:
42  ret void
43}
44
45; Not profitable to use shuffle/tbl, as 4 tbls + materializing the masks
46; require more instructions than lowering zext directly.
47define void @zext_v16i8_to_v16i32_no_loop(ptr %src, ptr %dst) {
48; CHECK-LABEL: @zext_v16i8_to_v16i32_no_loop(
49; CHECK-NEXT:  entry:
50; CHECK-NEXT:    [[LOAD:%.*]] = load <16 x i8>, ptr [[SRC:%.*]], align 16
51; CHECK-NEXT:    [[EXT:%.*]] = zext <16 x i8> [[LOAD]] to <16 x i32>
52; CHECK-NEXT:    store <16 x i32> [[EXT]], ptr [[DST:%.*]], align 64
53; CHECK-NEXT:    ret void
54;
55entry:
56  %load = load <16 x i8>, ptr %src
57  %ext = zext <16 x i8> %load to <16 x i32>
58  store <16 x i32> %ext, ptr %dst
59  ret void
60}
61
62define void @zext_v16i8_to_v16i16_in_loop(ptr %src, ptr %dst) {
63; CHECK-LABEL: @zext_v16i8_to_v16i16_in_loop(
64; CHECK-NEXT:  entry:
65; CHECK-NEXT:    br label [[LOOP:%.*]]
66; CHECK:       loop:
67; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
68; CHECK-NEXT:    [[SRC_GEP:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 [[IV]]
69; CHECK-NEXT:    [[LOAD:%.*]] = load <16 x i8>, ptr [[SRC_GEP]], align 16
70; CHECK-NEXT:    [[EXT:%.*]] = zext <16 x i8> [[LOAD]] to <16 x i16>
71; CHECK-NEXT:    [[DST_GEP:%.*]] = getelementptr i16, ptr [[DST:%.*]], i64 [[IV]]
72; CHECK-NEXT:    store <16 x i16> [[EXT]], ptr [[DST_GEP]], align 32
73; CHECK-NEXT:    [[IV_NEXT]] = add nuw i64 [[IV]], 16
74; CHECK-NEXT:    [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 128
75; CHECK-NEXT:    br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
76; CHECK:       exit:
77; CHECK-NEXT:    ret void
78;
79entry:
80  br label %loop
81
82loop:
83  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
84  %src.gep = getelementptr i8, ptr %src, i64 %iv
85  %load = load <16 x i8>, ptr %src.gep
86  %ext = zext <16 x i8> %load to <16 x i16>
87  %dst.gep = getelementptr i16, ptr %dst, i64 %iv
88  store <16 x i16> %ext, ptr %dst.gep
89  %iv.next = add nuw i64 %iv, 16
90  %ec = icmp eq i64 %iv.next, 128
91  br i1 %ec, label %exit, label %loop
92
93exit:
94  ret void
95}
96
97define void @zext_v8i8_to_v8i32_in_loop(ptr %src, ptr %dst) {
98; CHECK-LABEL: @zext_v8i8_to_v8i32_in_loop(
99; CHECK-NEXT:  entry:
100; CHECK-NEXT:    br label [[LOOP:%.*]]
101; CHECK:       loop:
102; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
103; CHECK-NEXT:    [[SRC_GEP:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 [[IV]]
104; CHECK-NEXT:    [[LOAD:%.*]] = load <8 x i8>, ptr [[SRC_GEP]], align 8
105; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <8 x i8> [[LOAD]], <8 x i8> <i8 0, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison, i8 poison>, <32 x i32> <i32 0, i32 8, i32 8, i32 8, i32 1, i32 8, i32 8, i32 8, i32 2, i32 8, i32 8, i32 8, i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8, i32 5, i32 8, i32 8, i32 8, i32 6, i32 8, i32 8, i32 8, i32 7, i32 8, i32 8, i32 8>
106; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <32 x i8> [[TMP0]] to <8 x i32>
107; CHECK-NEXT:    [[DST_GEP:%.*]] = getelementptr i32, ptr [[DST:%.*]], i64 [[IV]]
108; CHECK-NEXT:    store <8 x i32> [[TMP1]], ptr [[DST_GEP]], align 32
109; CHECK-NEXT:    [[IV_NEXT]] = add nuw i64 [[IV]], 16
110; CHECK-NEXT:    [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 128
111; CHECK-NEXT:    br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
112; CHECK:       exit:
113; CHECK-NEXT:    ret void
114;
115entry:
116  br label %loop
117
118loop:
119  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
120  %src.gep = getelementptr i8, ptr %src, i64 %iv
121  %load = load <8 x i8>, ptr %src.gep
122  %ext = zext <8 x i8> %load to <8 x i32>
123  %dst.gep = getelementptr i32, ptr %dst, i64 %iv
124  store <8 x i32> %ext, ptr %dst.gep
125  %iv.next = add nuw i64 %iv, 16
126  %ec = icmp eq i64 %iv.next, 128
127  br i1 %ec, label %exit, label %loop
128
129exit:
130  ret void
131}
132
133define void @zext_v16i8_to_v16i64_in_loop(ptr %src, ptr %dst) {
134; CHECK-LABEL: @zext_v16i8_to_v16i64_in_loop(
135; CHECK-NEXT:  entry:
136; CHECK-NEXT:    br label [[LOOP:%.*]]
137; CHECK:       loop:
138; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
139; CHECK-NEXT:    [[SRC_GEP:%.*]] = getelementptr i8, ptr [[SRC:%.*]], i64 [[IV]]
140; CHECK-NEXT:    [[LOAD:%.*]] = load <16 x i8>, ptr [[SRC_GEP]], align 16
141; CHECK-NEXT:    [[EXT:%.*]] = zext <16 x i8> [[LOAD]] to <16 x i64>
142; CHECK-NEXT:    [[DST_GEP:%.*]] = getelementptr i64, ptr [[DST:%.*]], i64 [[IV]]
143; CHECK-NEXT:    store <16 x i64> [[EXT]], ptr [[DST_GEP]], align 128
144; CHECK-NEXT:    [[IV_NEXT]] = add nuw i64 [[IV]], 16
145; CHECK-NEXT:    [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 128
146; CHECK-NEXT:    br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP]]
147; CHECK:       exit:
148; CHECK-NEXT:    ret void
149;
150entry:
151  br label %loop
152
153loop:
154  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
155  %src.gep = getelementptr i8, ptr %src, i64 %iv
156  %load = load <16 x i8>, ptr %src.gep
157  %ext = zext <16 x i8> %load to <16 x i64>
158  %dst.gep = getelementptr i64, ptr %dst, i64 %iv
159  store <16 x i64> %ext, ptr %dst.gep
160  %iv.next = add nuw i64 %iv, 16
161  %ec = icmp eq i64 %iv.next, 128
162  br i1 %ec, label %exit, label %loop
163
164exit:
165  ret void
166}
167