xref: /llvm-project/llvm/test/Analysis/CostModel/AArch64/sve-fixed-length.ll (revision d827865e9f778f5b27edb2afe003c2aa8474fd25)
1; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s -D#VBITS=128
2; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=128 | FileCheck %s -D#VBITS=128
3; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=256 | FileCheck %s -D#VBITS=256
4; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=384 | FileCheck %s -D#VBITS=256
5; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=512 | FileCheck %s -D#VBITS=512
6; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=640 | FileCheck %s -D#VBITS=512
7; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=768 | FileCheck %s -D#VBITS=512
8; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=896 | FileCheck %s -D#VBITS=512
9; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1024 | FileCheck %s -D#VBITS=1024
10; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1152 | FileCheck %s -D#VBITS=1024
11; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1280 | FileCheck %s -D#VBITS=1024
12; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1408 | FileCheck %s -D#VBITS=1024
13; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1536 | FileCheck %s -D#VBITS=1024
14; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1664 | FileCheck %s -D#VBITS=1024
15; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1792 | FileCheck %s -D#VBITS=1024
16; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1920 | FileCheck %s -D#VBITS=1024
17; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=2048 | FileCheck %s -D#VBITS=2048
18
19; VBITS represents the useful bit size of a vector register from the code
20; generator's point of view. It is clamped to power-of-2 values because
21; only power-of-2 vector lengths are considered legal, regardless of the
22; user specified vector length.
23
24target triple = "aarch64-unknown-linux-gnu"
25target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
26
27; Ensure the cost of legalisation is removed as the vector length grows.
28; NOTE: Assumes BaseCost_add=1, BaseCost_fadd=2.
29define void @add() #0 {
30; CHECK-LABEL: function 'add'
31; CHECK: cost of [[#div(127,VBITS)+1]] for instruction:   %add128 = add <4 x i32> undef, undef
32; CHECK: cost of [[#div(255,VBITS)+1]] for instruction:   %add256 = add <8 x i32> undef, undef
33; CHECK: cost of [[#div(511,VBITS)+1]] for instruction:   %add512 = add <16 x i32> undef, undef
34; CHECK: cost of [[#div(1023,VBITS)+1]] for instruction:   %add1024 = add <32 x i32> undef, undef
35; CHECK: cost of [[#div(2047,VBITS)+1]] for instruction:   %add2048 = add <64 x i32> undef, undef
36  %add128 = add <4 x i32> undef, undef
37  %add256 = add <8 x i32> undef, undef
38  %add512 = add <16 x i32> undef, undef
39  %add1024 = add <32 x i32> undef, undef
40  %add2048 = add <64 x i32> undef, undef
41
42; Using a single vector length, ensure all element types are recognised.
43; CHECK: cost of [[#div(511,VBITS)+1]] for instruction:   %add512.i8 = add <64 x i8> undef, undef
44; CHECK: cost of [[#div(511,VBITS)+1]] for instruction:   %add512.i16 = add <32 x i16> undef, undef
45; CHECK: cost of [[#div(511,VBITS)+1]] for instruction:   %add512.i32 = add <16 x i32> undef, undef
46; CHECK: cost of [[#div(511,VBITS)+1]] for instruction:   %add512.i64 = add <8 x i64> undef, undef
47; CHECK: cost of [[#div(511,VBITS)+1]] for instruction:   %add512.f16 = fadd <32 x half> undef, undef
48; CHECK: cost of [[#div(511,VBITS)+1]] for instruction:   %add512.f32 = fadd <16 x float> undef, undef
49; CHECK: cost of [[#div(511,VBITS)+1]] for instruction:   %add512.f64 = fadd <8 x double> undef, undef
50  %add512.i8 = add <64 x i8> undef, undef
51  %add512.i16 = add <32 x i16> undef, undef
52  %add512.i32 = add <16 x i32> undef, undef
53  %add512.i64 = add <8 x i64> undef, undef
54  %add512.f16 = fadd <32 x half> undef, undef
55  %add512.f32 = fadd <16 x float> undef, undef
56  %add512.f64 = fadd <8 x double> undef, undef
57
58  ret void
59}
60
61; Assuming base_cost = 2
62; Assuming legalization_cost = (vec_len-1/VBITS)+1
63; For fixed-length vectors >= 128, if element type is i8, multiply the cost by 8.
64; For fixed-length vectors >= 128, if element type is i16, multiply the cost by 4.
65define void @sdiv() #0 {
66; CHECK-LABEL: function 'sdiv'
67
68; CHECK: cost of 5 for instruction:  %sdiv16.i8   = sdiv <2 x i8> undef, undef
69  %sdiv16.i8   = sdiv <2 x i8> undef, undef
70
71; CHECK: cost of 8 for instruction:  %sdiv32.i8   = sdiv <4 x i8> undef, undef
72  %sdiv32.i8   = sdiv <4 x i8> undef, undef
73
74; CHECK: cost of 5 for instruction:  %sdiv32.i16   = sdiv <2 x i16> undef, undef
75  %sdiv32.i16  = sdiv <2 x i16> undef, undef
76
77; CHECK: cost of 8 for instruction:  %sdiv64.i8   = sdiv <8 x i8> undef, undef
78  %sdiv64.i8   = sdiv <8 x i8> undef, undef
79
80; CHECK: cost of 5 for instruction:  %sdiv64.i16   = sdiv <4 x i16> undef, undef
81  %sdiv64.i16  = sdiv <4 x i16> undef, undef
82
83; CHECK: cost of 1 for instruction:  %sdiv64.i32   = sdiv <2 x i32> undef, undef
84  %sdiv64.i32  = sdiv <2 x i32> undef, undef
85
86; CHECK: cost of [[#mul(mul(div(128-1, VBITS)+1, 2), 8)]] for instruction:  %sdiv128.i8   = sdiv <16 x i8> undef, undef
87  %sdiv128.i8 = sdiv <16 x i8> undef, undef
88
89; CHECK: cost of [[#mul(mul(div(128-1, VBITS)+1, 2), 4)]] for instruction:  %sdiv128.i16   = sdiv <8 x i16> undef, undef
90  %sdiv128.i16 = sdiv <8 x i16> undef, undef
91
92; CHECK: cost of [[#mul(div(128-1, VBITS)+1, 2)]] for instruction:  %sdiv128.i64   = sdiv <2 x i64> undef, undef
93  %sdiv128.i64 = sdiv <2 x i64> undef, undef
94
95; CHECK: cost of [[#mul(mul(div(512-1, VBITS)+1, 2), 8)]] for instruction:  %sdiv512.i8   = sdiv <64 x i8> undef, undef
96  %sdiv512.i8  = sdiv <64 x i8> undef, undef
97
98; CHECK: cost of [[#mul(mul(div(512-1, VBITS)+1, 2), 4)]] for instruction:  %sdiv512.i16   = sdiv <32 x i16> undef, undef
99  %sdiv512.i16 = sdiv <32 x i16> undef, undef
100
101; CHECK: cost of [[#mul(div(512-1, VBITS)+1, 2)]] for instruction:  %sdiv512.i32   = sdiv <16 x i32> undef, undef
102  %sdiv512.i32 = sdiv <16 x i32> undef, undef
103
104; CHECK: cost of [[#mul(div(512-1, VBITS)+1, 2)]] for instruction:  %sdiv512.i64   = sdiv <8 x i64> undef, undef
105  %sdiv512.i64 = sdiv <8 x i64> undef, undef
106
107  ret void
108}
109
110; Assuming base_cost = 2
111; Assuming legalization_cost = (vec_len-1/VBITS)+1
112; For fixed-length vectors >= 128, if element type is i8, multiply the cost by 8.
113; For fixed-length vectors >= 128, if element type is i16, multiply the cost by 4.
114define void @udiv() #0 {
115; CHECK-LABEL: function 'udiv'
116
117; CHECK: cost of 5 for instruction:  %udiv16.i8   = udiv <2 x i8> undef, undef
118  %udiv16.i8   = udiv <2 x i8> undef, undef
119
120; CHECK: cost of 8 for instruction:  %udiv32.i8   = udiv <4 x i8> undef, undef
121  %udiv32.i8   = udiv <4 x i8> undef, undef
122
123; CHECK: cost of 5 for instruction:  %udiv32.i16   = udiv <2 x i16> undef, undef
124  %udiv32.i16  = udiv <2 x i16> undef, undef
125
126; CHECK: cost of 8 for instruction:  %udiv64.i8   = udiv <8 x i8> undef, undef
127  %udiv64.i8   = udiv <8 x i8> undef, undef
128
129; CHECK: cost of 5 for instruction:  %udiv64.i16   = udiv <4 x i16> undef, undef
130  %udiv64.i16  = udiv <4 x i16> undef, undef
131
132; CHECK: cost of 1 for instruction:  %udiv64.i32   = udiv <2 x i32> undef, undef
133  %udiv64.i32  = udiv <2 x i32> undef, undef
134
135; CHECK: cost of [[#mul(mul(div(128-1, VBITS)+1, 2), 8)]] for instruction:  %udiv128.i8   = udiv <16 x i8> undef, undef
136  %udiv128.i8 = udiv <16 x i8> undef, undef
137
138; CHECK: cost of [[#mul(mul(div(128-1, VBITS)+1, 2), 4)]] for instruction:  %udiv128.i16   = udiv <8 x i16> undef, undef
139  %udiv128.i16 = udiv <8 x i16> undef, undef
140
141; CHECK: cost of [[#mul(div(128-1, VBITS)+1, 2)]] for instruction:  %udiv128.i64   = udiv <2 x i64> undef, undef
142  %udiv128.i64 = udiv <2 x i64> undef, undef
143
144; CHECK: cost of [[#mul(mul(div(512-1, VBITS)+1, 2), 8)]] for instruction:  %udiv512.i8   = udiv <64 x i8> undef, undef
145  %udiv512.i8  = udiv <64 x i8> undef, undef
146
147; CHECK: cost of [[#mul(mul(div(512-1, VBITS)+1, 2), 4)]] for instruction:  %udiv512.i16   = udiv <32 x i16> undef, undef
148  %udiv512.i16 = udiv <32 x i16> undef, undef
149
150; CHECK: cost of [[#mul(div(512-1, VBITS)+1, 2)]] for instruction:  %udiv512.i32   = udiv <16 x i32> undef, undef
151  %udiv512.i32 = udiv <16 x i32> undef, undef
152
153; CHECK: cost of [[#mul(div(512-1, VBITS)+1, 2)]] for instruction:  %udiv512.i64   = udiv <8 x i64> undef, undef
154  %udiv512.i64 = udiv <8 x i64> undef, undef
155
156  ret void
157}
158
159; The hard-coded expected cost is based on VBITS=128
160define void @mul() #0 {
161; CHECK: cost of [[#div(128-1, VBITS)+1]] for instruction:  %mul128.i64  = mul <2 x i64> undef, undef
162  %mul128.i64 = mul <2 x i64> undef, undef
163
164; CHECK: cost of [[#div(512-1, VBITS)+1]] for instruction:  %mul512.i64 = mul <8 x i64> undef, undef
165  %mul512.i64 = mul <8 x i64> undef, undef
166
167   ret void
168 }
169
170attributes #0 = { "target-features"="+sve" }
171