1; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py 2; RUN: opt -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=throughput < %s | FileCheck %s --check-prefix=THRU 3; RUN: opt -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency < %s | FileCheck %s --check-prefix=LATE 4; RUN: opt -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=code-size < %s | FileCheck %s --check-prefix=SIZE 5; RUN: opt -mtriple=x86_64-- -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=size-latency < %s | FileCheck %s --check-prefix=SIZE_LATE 6 7; Test a cross-section of intrinsics for various cost-kinds. 8; Other test files may check for accuracy of a particular intrinsic 9; across subtargets or types. This is just a basic correctness check using the 10; default x86 target and a legal scalar type (i32/float) and/or an 11; illegal vector type (16 x i32/float). 12 13declare {i32, i1} @llvm.umul.with.overflow.i32(i32, i32) 14declare {<16 x i32>, <16 x i1>} @llvm.umul.with.overflow.v16i32(<16 x i32>, <16 x i32>) 15 16declare i32 @llvm.smax.i32(i32, i32) 17declare <16 x i32> @llvm.smax.v16i32(<16 x i32>, <16 x i32>) 18 19declare float @llvm.copysign.f32(float, float) 20declare <16 x float> @llvm.copysign.v16f32(<16 x float>, <16 x float>) 21 22declare float @llvm.fmuladd.f32(float, float, float) 23declare <16 x float> @llvm.fmuladd.v16f32(<16 x float>, <16 x float>, <16 x float>) 24 25declare float @llvm.log2.f32(float) 26declare <16 x float> @llvm.log2.v16f32(<16 x float>) 27 28declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) 29declare <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float>, <16 x float>, metadata, metadata) 30 31declare float @llvm.maximum.f32(float, float) 32declare <16 x float> @llvm.maximum.v16f32(<16 x float>, <16 x float>) 33 34declare i32 @llvm.cttz.i32(i32, i1) 35declare <16 x i32> @llvm.cttz.v16i32(<16 x i32>, i1) 36 37declare i32 @llvm.ctlz.i32(i32, i1) 38declare <16 x i32> @llvm.ctlz.v16i32(<16 x i32>, i1) 39 40declare i32 @llvm.fshl.i32(i32, i32, i32) 41declare <16 x i32> @llvm.fshl.v16i32(<16 x i32>, <16 x i32>, <16 x i32>) 42 43declare <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x float>) 44declare void @llvm.masked.scatter.v16f32.v16p0(<16 x float>, <16 x ptr>, i32, <16 x i1>) 45declare float @llvm.vector.reduce.fmax.v16f32(<16 x float>) 46declare float @llvm.vector.reduce.fmul.v16f32(float, <16 x float>) 47declare float @llvm.vector.reduce.fadd.v16f32(float, <16 x float>) 48 49declare void @llvm.memcpy.p0.p0.i32(ptr, ptr, i32, i1) 50 51define void @umul(i32 %a, i32 %b, <16 x i32> %va, <16 x i32> %vb) { 52; THRU-LABEL: 'umul' 53; THRU-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b) 54; THRU-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb) 55; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 56; 57; LATE-LABEL: 'umul' 58; LATE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b) 59; LATE-NEXT: Cost Model: Found an estimated cost of 88 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb) 60; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 61; 62; SIZE-LABEL: 'umul' 63; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b) 64; SIZE-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb) 65; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 66; 67; SIZE_LATE-LABEL: 'umul' 68; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %s = call { i32, i1 } @llvm.umul.with.overflow.i32(i32 %a, i32 %b) 69; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v = call { <16 x i32>, <16 x i1> } @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb) 70; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 71; 72 %s = call {i32, i1} @llvm.umul.with.overflow.i32(i32 %a, i32 %b) 73 %v = call {<16 x i32>, <16 x i1>} @llvm.umul.with.overflow.v16i32(<16 x i32> %va, <16 x i32> %vb) 74 ret void 75} 76 77define void @smax(i32 %a, i32 %b, <16 x i32> %va, <16 x i32> %vb) { 78; THRU-LABEL: 'smax' 79; THRU-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.smax.i32(i32 %a, i32 %b) 80; THRU-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb) 81; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 82; 83; LATE-LABEL: 'smax' 84; LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.smax.i32(i32 %a, i32 %b) 85; LATE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb) 86; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 87; 88; SIZE-LABEL: 'smax' 89; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.smax.i32(i32 %a, i32 %b) 90; SIZE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb) 91; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 92; 93; SIZE_LATE-LABEL: 'smax' 94; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %s = call i32 @llvm.smax.i32(i32 %a, i32 %b) 95; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb) 96; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 97; 98 %s = call i32 @llvm.smax.i32(i32 %a, i32 %b) 99 %v = call <16 x i32> @llvm.smax.v16i32(<16 x i32> %va, <16 x i32> %vb) 100 ret void 101} 102 103define void @fcopysign(float %a, float %b, <16 x float> %va, <16 x float> %vb) { 104; THRU-LABEL: 'fcopysign' 105; THRU-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call float @llvm.copysign.f32(float %a, float %b) 106; THRU-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x float> @llvm.copysign.v16f32(<16 x float> %va, <16 x float> %vb) 107; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 108; 109; LATE-LABEL: 'fcopysign' 110; LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call float @llvm.copysign.f32(float %a, float %b) 111; LATE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x float> @llvm.copysign.v16f32(<16 x float> %va, <16 x float> %vb) 112; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 113; 114; SIZE-LABEL: 'fcopysign' 115; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call float @llvm.copysign.f32(float %a, float %b) 116; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x float> @llvm.copysign.v16f32(<16 x float> %va, <16 x float> %vb) 117; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 118; 119; SIZE_LATE-LABEL: 'fcopysign' 120; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call float @llvm.copysign.f32(float %a, float %b) 121; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x float> @llvm.copysign.v16f32(<16 x float> %va, <16 x float> %vb) 122; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 123; 124 %s = call float @llvm.copysign.f32(float %a, float %b) 125 %v = call <16 x float> @llvm.copysign.v16f32(<16 x float> %va, <16 x float> %vb) 126 ret void 127} 128 129define void @fmuladd(float %a, float %b, float %c, <16 x float> %va, <16 x float> %vb, <16 x float> %vc) { 130; THRU-LABEL: 'fmuladd' 131; THRU-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c) 132; THRU-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc) 133; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 134; 135; LATE-LABEL: 'fmuladd' 136; LATE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c) 137; LATE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc) 138; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 139; 140; SIZE-LABEL: 'fmuladd' 141; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c) 142; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc) 143; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 144; 145; SIZE_LATE-LABEL: 'fmuladd' 146; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c) 147; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc) 148; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 149; 150 %s = call float @llvm.fmuladd.f32(float %a, float %b, float %c) 151 %v = call <16 x float> @llvm.fmuladd.v16f32(<16 x float> %va, <16 x float> %vb, <16 x float> %vc) 152 ret void 153} 154 155define void @log2(float %a, <16 x float> %va) { 156; THRU-LABEL: 'log2' 157; THRU-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %s = call float @llvm.log2.f32(float %a) 158; THRU-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va) 159; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 160; 161; LATE-LABEL: 'log2' 162; LATE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %s = call float @llvm.log2.f32(float %a) 163; LATE-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va) 164; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 165; 166; SIZE-LABEL: 'log2' 167; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.log2.f32(float %a) 168; SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va) 169; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 170; 171; SIZE_LATE-LABEL: 'log2' 172; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %s = call float @llvm.log2.f32(float %a) 173; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 184 for instruction: %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va) 174; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 175; 176 %s = call float @llvm.log2.f32(float %a) 177 %v = call <16 x float> @llvm.log2.v16f32(<16 x float> %va) 178 ret void 179} 180 181define void @constrained_fadd(float %a, <16 x float> %va) strictfp { 182; THRU-LABEL: 'constrained_fadd' 183; THRU-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.ignore") 184; THRU-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %va, <16 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore") 185; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 186; 187; LATE-LABEL: 'constrained_fadd' 188; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.ignore") 189; LATE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %va, <16 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore") 190; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 191; 192; SIZE-LABEL: 'constrained_fadd' 193; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.ignore") 194; SIZE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %va, <16 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore") 195; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 196; 197; SIZE_LATE-LABEL: 'constrained_fadd' 198; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.ignore") 199; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %t = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %va, <16 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore") 200; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 201; 202 %s = call float @llvm.experimental.constrained.fadd.f32(float %a, float %a, metadata !"round.dynamic", metadata !"fpexcept.ignore") 203 %t = call <16 x float> @llvm.experimental.constrained.fadd.v16f32(<16 x float> %va, <16 x float> %va, metadata !"round.dynamic", metadata !"fpexcept.ignore") 204 ret void 205} 206 207define void @fmaximum(float %a, float %b, <16 x float> %va, <16 x float> %vb) { 208; THRU-LABEL: 'fmaximum' 209; THRU-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b) 210; THRU-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb) 211; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 212; 213; LATE-LABEL: 'fmaximum' 214; LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b) 215; LATE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb) 216; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 217; 218; SIZE-LABEL: 'fmaximum' 219; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b) 220; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb) 221; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 222; 223; SIZE_LATE-LABEL: 'fmaximum' 224; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b) 225; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb) 226; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 227; 228 %s = call float @llvm.maximum.f32(float %a, float %b) 229 %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb) 230 ret void 231} 232 233define void @cttz(i32 %a, <16 x i32> %va) { 234; THRU-LABEL: 'cttz' 235; THRU-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false) 236; THRU-NEXT: Cost Model: Found an estimated cost of 72 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false) 237; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 238; 239; LATE-LABEL: 'cttz' 240; LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false) 241; LATE-NEXT: Cost Model: Found an estimated cost of 124 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false) 242; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 243; 244; SIZE-LABEL: 'cttz' 245; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false) 246; SIZE-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false) 247; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 248; 249; SIZE_LATE-LABEL: 'cttz' 250; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.cttz.i32(i32 %a, i1 false) 251; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 104 for instruction: %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false) 252; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 253; 254 %s = call i32 @llvm.cttz.i32(i32 %a, i1 false) 255 %v = call <16 x i32> @llvm.cttz.v16i32(<16 x i32> %va, i1 false) 256 ret void 257} 258 259define void @ctlz(i32 %a, <16 x i32> %va) { 260; THRU-LABEL: 'ctlz' 261; THRU-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true) 262; THRU-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true) 263; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 264; 265; LATE-LABEL: 'ctlz' 266; LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true) 267; LATE-NEXT: Cost Model: Found an estimated cost of 180 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true) 268; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 269; 270; SIZE-LABEL: 'ctlz' 271; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true) 272; SIZE-NEXT: Cost Model: Found an estimated cost of 152 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true) 273; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 274; 275; SIZE_LATE-LABEL: 'ctlz' 276; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true) 277; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true) 278; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 279; 280 %s = call i32 @llvm.ctlz.i32(i32 %a, i1 true) 281 %v = call <16 x i32> @llvm.ctlz.v16i32(<16 x i32> %va, i1 true) 282 ret void 283} 284 285define void @fshl(i32 %a, i32 %b, i32 %c, <16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) { 286; THRU-LABEL: 'fshl' 287; THRU-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) 288; THRU-NEXT: Cost Model: Found an estimated cost of 140 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) 289; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 290; 291; LATE-LABEL: 'fshl' 292; LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) 293; LATE-NEXT: Cost Model: Found an estimated cost of 145 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) 294; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 295; 296; SIZE-LABEL: 'fshl' 297; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) 298; SIZE-NEXT: Cost Model: Found an estimated cost of 125 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) 299; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 300; 301; SIZE_LATE-LABEL: 'fshl' 302; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) 303; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 149 for instruction: %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) 304; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 305; 306 %s = call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c) 307 %v = call <16 x i32> @llvm.fshl.v16i32(<16 x i32> %va, <16 x i32> %vb, <16 x i32> %vc) 308 ret void 309} 310 311define void @maskedgather(<16 x ptr> %va, <16 x i1> %vb, <16 x float> %vc) { 312; THRU-LABEL: 'maskedgather' 313; THRU-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %va, i32 1, <16 x i1> %vb, <16 x float> %vc) 314; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 315; 316; LATE-LABEL: 'maskedgather' 317; LATE-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %va, i32 1, <16 x i1> %vb, <16 x float> %vc) 318; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 319; 320; SIZE-LABEL: 'maskedgather' 321; SIZE-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %va, i32 1, <16 x i1> %vb, <16 x float> %vc) 322; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 323; 324; SIZE_LATE-LABEL: 'maskedgather' 325; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %va, i32 1, <16 x i1> %vb, <16 x float> %vc) 326; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 327; 328 %v = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %va, i32 1, <16 x i1> %vb, <16 x float> %vc) 329 ret void 330} 331 332define void @maskedscatter(<16 x float> %va, <16 x ptr> %vb, <16 x i1> %vc) { 333; THRU-LABEL: 'maskedscatter' 334; THRU-NEXT: Cost Model: Found an estimated cost of 61 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> %va, <16 x ptr> %vb, i32 1, <16 x i1> %vc) 335; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 336; 337; LATE-LABEL: 'maskedscatter' 338; LATE-NEXT: Cost Model: Found an estimated cost of 77 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> %va, <16 x ptr> %vb, i32 1, <16 x i1> %vc) 339; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 340; 341; SIZE-LABEL: 'maskedscatter' 342; SIZE-NEXT: Cost Model: Found an estimated cost of 77 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> %va, <16 x ptr> %vb, i32 1, <16 x i1> %vc) 343; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 344; 345; SIZE_LATE-LABEL: 'maskedscatter' 346; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 77 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> %va, <16 x ptr> %vb, i32 1, <16 x i1> %vc) 347; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 348; 349 call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> %va, <16 x ptr> %vb, i32 1, <16 x i1> %vc) 350 ret void 351} 352 353define void @reduce_fmax(<16 x float> %va) { 354; THRU-LABEL: 'reduce_fmax' 355; THRU-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) 356; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 357; 358; LATE-LABEL: 'reduce_fmax' 359; LATE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) 360; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 361; 362; SIZE-LABEL: 'reduce_fmax' 363; SIZE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) 364; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 365; 366; SIZE_LATE-LABEL: 'reduce_fmax' 367; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) 368; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 369; 370 %v = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %va) 371 ret void 372} 373 374define void @reduce_fmul(<16 x float> %va) { 375; THRU-LABEL: 'reduce_fmul' 376; THRU-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %v = call float @llvm.vector.reduce.fmul.v16f32(float 4.200000e+01, <16 x float> %va) 377; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 378; 379; LATE-LABEL: 'reduce_fmul' 380; LATE-NEXT: Cost Model: Found an estimated cost of 92 for instruction: %v = call float @llvm.vector.reduce.fmul.v16f32(float 4.200000e+01, <16 x float> %va) 381; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 382; 383; SIZE-LABEL: 'reduce_fmul' 384; SIZE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v = call float @llvm.vector.reduce.fmul.v16f32(float 4.200000e+01, <16 x float> %va) 385; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 386; 387; SIZE_LATE-LABEL: 'reduce_fmul' 388; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %v = call float @llvm.vector.reduce.fmul.v16f32(float 4.200000e+01, <16 x float> %va) 389; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 390; 391 %v = call float @llvm.vector.reduce.fmul.v16f32(float 42.0, <16 x float> %va) 392 ret void 393} 394 395define void @reduce_fadd_fast(<16 x float> %va) { 396; THRU-LABEL: 'reduce_fadd_fast' 397; THRU-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %v = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> %va) 398; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 399; 400; LATE-LABEL: 'reduce_fadd_fast' 401; LATE-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %v = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> %va) 402; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 403; 404; SIZE-LABEL: 'reduce_fadd_fast' 405; SIZE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> %va) 406; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 407; 408; SIZE_LATE-LABEL: 'reduce_fadd_fast' 409; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %v = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.000000e+00, <16 x float> %va) 410; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 411; 412 %v = call fast float @llvm.vector.reduce.fadd.v16f32(float 0.0, <16 x float> %va) 413 ret void 414} 415 416define void @memcpy(ptr %a, ptr %b, i32 %c) { 417; THRU-LABEL: 'memcpy' 418; THRU-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %a, ptr align 1 %b, i32 32, i1 false) 419; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void 420; 421; LATE-LABEL: 'memcpy' 422; LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %a, ptr align 1 %b, i32 32, i1 false) 423; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 424; 425; SIZE-LABEL: 'memcpy' 426; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %a, ptr align 1 %b, i32 32, i1 false) 427; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 428; 429; SIZE_LATE-LABEL: 'memcpy' 430; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.memcpy.p0.p0.i32(ptr align 1 %a, ptr align 1 %b, i32 32, i1 false) 431; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 432; 433 call void @llvm.memcpy.p0.p0.i32(ptr align 1 %a, ptr align 1 %b, i32 32, i1 false) 434 ret void 435} 436