1; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output | FileCheck %s -D#VBITS=128 2; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=128 | FileCheck %s -D#VBITS=128 3; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=256 | FileCheck %s -D#VBITS=256 4; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=384 | FileCheck %s -D#VBITS=256 5; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=512 | FileCheck %s -D#VBITS=512 6; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=640 | FileCheck %s -D#VBITS=512 7; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=768 | FileCheck %s -D#VBITS=512 8; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=896 | FileCheck %s -D#VBITS=512 9; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1024 | FileCheck %s -D#VBITS=1024 10; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1152 | FileCheck %s -D#VBITS=1024 11; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1280 | FileCheck %s -D#VBITS=1024 12; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1408 | FileCheck %s -D#VBITS=1024 13; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1536 | FileCheck %s -D#VBITS=1024 14; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1664 | FileCheck %s -D#VBITS=1024 15; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1792 | FileCheck %s -D#VBITS=1024 16; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=1920 | FileCheck %s -D#VBITS=1024 17; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -aarch64-sve-vector-bits-min=2048 | FileCheck %s -D#VBITS=2048 18 19; VBITS represents the useful bit size of a vector register from the code 20; generator's point of view. It is clamped to power-of-2 values because 21; only power-of-2 vector lengths are considered legal, regardless of the 22; user specified vector length. 23 24target triple = "aarch64-unknown-linux-gnu" 25target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" 26 27; Ensure the cost of legalisation is removed as the vector length grows. 28; NOTE: Assumes BaseCost_add=1, BaseCost_fadd=2. 29define void @add() #0 { 30; CHECK-LABEL: function 'add' 31; CHECK: cost of [[#div(127,VBITS)+1]] for instruction: %add128 = add <4 x i32> undef, undef 32; CHECK: cost of [[#div(255,VBITS)+1]] for instruction: %add256 = add <8 x i32> undef, undef 33; CHECK: cost of [[#div(511,VBITS)+1]] for instruction: %add512 = add <16 x i32> undef, undef 34; CHECK: cost of [[#div(1023,VBITS)+1]] for instruction: %add1024 = add <32 x i32> undef, undef 35; CHECK: cost of [[#div(2047,VBITS)+1]] for instruction: %add2048 = add <64 x i32> undef, undef 36 %add128 = add <4 x i32> undef, undef 37 %add256 = add <8 x i32> undef, undef 38 %add512 = add <16 x i32> undef, undef 39 %add1024 = add <32 x i32> undef, undef 40 %add2048 = add <64 x i32> undef, undef 41 42; Using a single vector length, ensure all element types are recognised. 43; CHECK: cost of [[#div(511,VBITS)+1]] for instruction: %add512.i8 = add <64 x i8> undef, undef 44; CHECK: cost of [[#div(511,VBITS)+1]] for instruction: %add512.i16 = add <32 x i16> undef, undef 45; CHECK: cost of [[#div(511,VBITS)+1]] for instruction: %add512.i32 = add <16 x i32> undef, undef 46; CHECK: cost of [[#div(511,VBITS)+1]] for instruction: %add512.i64 = add <8 x i64> undef, undef 47; CHECK: cost of [[#div(511,VBITS)+1]] for instruction: %add512.f16 = fadd <32 x half> undef, undef 48; CHECK: cost of [[#div(511,VBITS)+1]] for instruction: %add512.f32 = fadd <16 x float> undef, undef 49; CHECK: cost of [[#div(511,VBITS)+1]] for instruction: %add512.f64 = fadd <8 x double> undef, undef 50 %add512.i8 = add <64 x i8> undef, undef 51 %add512.i16 = add <32 x i16> undef, undef 52 %add512.i32 = add <16 x i32> undef, undef 53 %add512.i64 = add <8 x i64> undef, undef 54 %add512.f16 = fadd <32 x half> undef, undef 55 %add512.f32 = fadd <16 x float> undef, undef 56 %add512.f64 = fadd <8 x double> undef, undef 57 58 ret void 59} 60 61; Assuming base_cost = 2 62; Assuming legalization_cost = (vec_len-1/VBITS)+1 63; For fixed-length vectors >= 128, if element type is i8, multiply the cost by 8. 64; For fixed-length vectors >= 128, if element type is i16, multiply the cost by 4. 65define void @sdiv() #0 { 66; CHECK-LABEL: function 'sdiv' 67 68; CHECK: cost of 5 for instruction: %sdiv16.i8 = sdiv <2 x i8> undef, undef 69 %sdiv16.i8 = sdiv <2 x i8> undef, undef 70 71; CHECK: cost of 8 for instruction: %sdiv32.i8 = sdiv <4 x i8> undef, undef 72 %sdiv32.i8 = sdiv <4 x i8> undef, undef 73 74; CHECK: cost of 5 for instruction: %sdiv32.i16 = sdiv <2 x i16> undef, undef 75 %sdiv32.i16 = sdiv <2 x i16> undef, undef 76 77; CHECK: cost of 8 for instruction: %sdiv64.i8 = sdiv <8 x i8> undef, undef 78 %sdiv64.i8 = sdiv <8 x i8> undef, undef 79 80; CHECK: cost of 5 for instruction: %sdiv64.i16 = sdiv <4 x i16> undef, undef 81 %sdiv64.i16 = sdiv <4 x i16> undef, undef 82 83; CHECK: cost of 1 for instruction: %sdiv64.i32 = sdiv <2 x i32> undef, undef 84 %sdiv64.i32 = sdiv <2 x i32> undef, undef 85 86; CHECK: cost of [[#mul(mul(div(128-1, VBITS)+1, 2), 8)]] for instruction: %sdiv128.i8 = sdiv <16 x i8> undef, undef 87 %sdiv128.i8 = sdiv <16 x i8> undef, undef 88 89; CHECK: cost of [[#mul(mul(div(128-1, VBITS)+1, 2), 4)]] for instruction: %sdiv128.i16 = sdiv <8 x i16> undef, undef 90 %sdiv128.i16 = sdiv <8 x i16> undef, undef 91 92; CHECK: cost of [[#mul(div(128-1, VBITS)+1, 2)]] for instruction: %sdiv128.i64 = sdiv <2 x i64> undef, undef 93 %sdiv128.i64 = sdiv <2 x i64> undef, undef 94 95; CHECK: cost of [[#mul(mul(div(512-1, VBITS)+1, 2), 8)]] for instruction: %sdiv512.i8 = sdiv <64 x i8> undef, undef 96 %sdiv512.i8 = sdiv <64 x i8> undef, undef 97 98; CHECK: cost of [[#mul(mul(div(512-1, VBITS)+1, 2), 4)]] for instruction: %sdiv512.i16 = sdiv <32 x i16> undef, undef 99 %sdiv512.i16 = sdiv <32 x i16> undef, undef 100 101; CHECK: cost of [[#mul(div(512-1, VBITS)+1, 2)]] for instruction: %sdiv512.i32 = sdiv <16 x i32> undef, undef 102 %sdiv512.i32 = sdiv <16 x i32> undef, undef 103 104; CHECK: cost of [[#mul(div(512-1, VBITS)+1, 2)]] for instruction: %sdiv512.i64 = sdiv <8 x i64> undef, undef 105 %sdiv512.i64 = sdiv <8 x i64> undef, undef 106 107 ret void 108} 109 110; Assuming base_cost = 2 111; Assuming legalization_cost = (vec_len-1/VBITS)+1 112; For fixed-length vectors >= 128, if element type is i8, multiply the cost by 8. 113; For fixed-length vectors >= 128, if element type is i16, multiply the cost by 4. 114define void @udiv() #0 { 115; CHECK-LABEL: function 'udiv' 116 117; CHECK: cost of 5 for instruction: %udiv16.i8 = udiv <2 x i8> undef, undef 118 %udiv16.i8 = udiv <2 x i8> undef, undef 119 120; CHECK: cost of 8 for instruction: %udiv32.i8 = udiv <4 x i8> undef, undef 121 %udiv32.i8 = udiv <4 x i8> undef, undef 122 123; CHECK: cost of 5 for instruction: %udiv32.i16 = udiv <2 x i16> undef, undef 124 %udiv32.i16 = udiv <2 x i16> undef, undef 125 126; CHECK: cost of 8 for instruction: %udiv64.i8 = udiv <8 x i8> undef, undef 127 %udiv64.i8 = udiv <8 x i8> undef, undef 128 129; CHECK: cost of 5 for instruction: %udiv64.i16 = udiv <4 x i16> undef, undef 130 %udiv64.i16 = udiv <4 x i16> undef, undef 131 132; CHECK: cost of 1 for instruction: %udiv64.i32 = udiv <2 x i32> undef, undef 133 %udiv64.i32 = udiv <2 x i32> undef, undef 134 135; CHECK: cost of [[#mul(mul(div(128-1, VBITS)+1, 2), 8)]] for instruction: %udiv128.i8 = udiv <16 x i8> undef, undef 136 %udiv128.i8 = udiv <16 x i8> undef, undef 137 138; CHECK: cost of [[#mul(mul(div(128-1, VBITS)+1, 2), 4)]] for instruction: %udiv128.i16 = udiv <8 x i16> undef, undef 139 %udiv128.i16 = udiv <8 x i16> undef, undef 140 141; CHECK: cost of [[#mul(div(128-1, VBITS)+1, 2)]] for instruction: %udiv128.i64 = udiv <2 x i64> undef, undef 142 %udiv128.i64 = udiv <2 x i64> undef, undef 143 144; CHECK: cost of [[#mul(mul(div(512-1, VBITS)+1, 2), 8)]] for instruction: %udiv512.i8 = udiv <64 x i8> undef, undef 145 %udiv512.i8 = udiv <64 x i8> undef, undef 146 147; CHECK: cost of [[#mul(mul(div(512-1, VBITS)+1, 2), 4)]] for instruction: %udiv512.i16 = udiv <32 x i16> undef, undef 148 %udiv512.i16 = udiv <32 x i16> undef, undef 149 150; CHECK: cost of [[#mul(div(512-1, VBITS)+1, 2)]] for instruction: %udiv512.i32 = udiv <16 x i32> undef, undef 151 %udiv512.i32 = udiv <16 x i32> undef, undef 152 153; CHECK: cost of [[#mul(div(512-1, VBITS)+1, 2)]] for instruction: %udiv512.i64 = udiv <8 x i64> undef, undef 154 %udiv512.i64 = udiv <8 x i64> undef, undef 155 156 ret void 157} 158 159; The hard-coded expected cost is based on VBITS=128 160define void @mul() #0 { 161; CHECK: cost of [[#div(128-1, VBITS)+1]] for instruction: %mul128.i64 = mul <2 x i64> undef, undef 162 %mul128.i64 = mul <2 x i64> undef, undef 163 164; CHECK: cost of [[#div(512-1, VBITS)+1]] for instruction: %mul512.i64 = mul <8 x i64> undef, undef 165 %mul512.i64 = mul <8 x i64> undef, undef 166 167 ret void 168 } 169 170attributes #0 = { "target-features"="+sve" } 171