1; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py 2; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2 3; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42 4; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 5; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 6; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mattr=+avx512f | FileCheck %s --check-prefixes=AVX512 7; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX512 8; 9; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mcpu=slm | FileCheck %s --check-prefixes=SSE,SSE42 10; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mcpu=goldmont | FileCheck %s --check-prefixes=SSE,SSE42 11; RUN: opt < %s -passes="print<cost-model>" 2>&1 -disable-output -cost-kind=latency -mtriple=x86_64-- -mcpu=btver2 | FileCheck %s --check-prefixes=AVX,AVX1 12 13define i32 @f32(i32 %arg) { 14; SSE2-LABEL: 'f32' 15; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %F32 = call float @llvm.maxnum.f32(float undef, float undef) 16; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F32 = call <2 x float> @llvm.maxnum.v2f32(<2 x float> undef, <2 x float> undef) 17; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F32 = call <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x float> undef) 18; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F32 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef) 19; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V16F32 = call <16 x float> @llvm.maxnum.v16f32(<16 x float> undef, <16 x float> undef) 20; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef 21; 22; SSE42-LABEL: 'f32' 23; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %F32 = call float @llvm.maxnum.f32(float undef, float undef) 24; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2F32 = call <2 x float> @llvm.maxnum.v2f32(<2 x float> undef, <2 x float> undef) 25; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x float> undef) 26; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef) 27; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.maxnum.v16f32(<16 x float> undef, <16 x float> undef) 28; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef 29; 30; AVX1-LABEL: 'f32' 31; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %F32 = call float @llvm.maxnum.f32(float undef, float undef) 32; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2F32 = call <2 x float> @llvm.maxnum.v2f32(<2 x float> undef, <2 x float> undef) 33; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F32 = call <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x float> undef) 34; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8F32 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef) 35; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16F32 = call <16 x float> @llvm.maxnum.v16f32(<16 x float> undef, <16 x float> undef) 36; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef 37; 38; AVX2-LABEL: 'f32' 39; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %F32 = call float @llvm.maxnum.f32(float undef, float undef) 40; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2F32 = call <2 x float> @llvm.maxnum.v2f32(<2 x float> undef, <2 x float> undef) 41; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x float> undef) 42; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8F32 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef) 43; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V16F32 = call <16 x float> @llvm.maxnum.v16f32(<16 x float> undef, <16 x float> undef) 44; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef 45; 46; AVX512-LABEL: 'f32' 47; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.maxnum.f32(float undef, float undef) 48; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = call <2 x float> @llvm.maxnum.v2f32(<2 x float> undef, <2 x float> undef) 49; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x float> undef) 50; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef) 51; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.maxnum.v16f32(<16 x float> undef, <16 x float> undef) 52; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef 53; 54 %F32 = call float @llvm.maxnum.f32(float undef, float undef) 55 %V2F32 = call <2 x float> @llvm.maxnum.v2f32(<2 x float> undef, <2 x float> undef) 56 %V4F32 = call <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x float> undef) 57 %V8F32 = call <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef) 58 %V16F32 = call <16 x float> @llvm.maxnum.v16f32(<16 x float> undef, <16 x float> undef) 59 ret i32 undef 60} 61 62define i32 @f64(i32 %arg) { 63; SSE2-LABEL: 'f64' 64; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f64 = call double @llvm.maxnum.f64(double undef, double undef) 65; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2f64 = call <2 x double> @llvm.maxnum.v2f64(<2 x double> undef, <2 x double> undef) 66; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4f64 = call <4 x double> @llvm.maxnum.v4f64(<4 x double> undef, <4 x double> undef) 67; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8f64 = call <8 x double> @llvm.maxnum.v8f64(<8 x double> undef, <8 x double> undef) 68; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16f64 = call <16 x double> @llvm.maxnum.v16f64(<16 x double> undef, <16 x double> undef) 69; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef 70; 71; SSE42-LABEL: 'f64' 72; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %f64 = call double @llvm.maxnum.f64(double undef, double undef) 73; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2f64 = call <2 x double> @llvm.maxnum.v2f64(<2 x double> undef, <2 x double> undef) 74; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4f64 = call <4 x double> @llvm.maxnum.v4f64(<4 x double> undef, <4 x double> undef) 75; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8f64 = call <8 x double> @llvm.maxnum.v8f64(<8 x double> undef, <8 x double> undef) 76; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16f64 = call <16 x double> @llvm.maxnum.v16f64(<16 x double> undef, <16 x double> undef) 77; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef 78; 79; AVX1-LABEL: 'f64' 80; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %f64 = call double @llvm.maxnum.f64(double undef, double undef) 81; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2f64 = call <2 x double> @llvm.maxnum.v2f64(<2 x double> undef, <2 x double> undef) 82; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4f64 = call <4 x double> @llvm.maxnum.v4f64(<4 x double> undef, <4 x double> undef) 83; AVX1-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8f64 = call <8 x double> @llvm.maxnum.v8f64(<8 x double> undef, <8 x double> undef) 84; AVX1-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16f64 = call <16 x double> @llvm.maxnum.v16f64(<16 x double> undef, <16 x double> undef) 85; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef 86; 87; AVX2-LABEL: 'f64' 88; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %f64 = call double @llvm.maxnum.f64(double undef, double undef) 89; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2f64 = call <2 x double> @llvm.maxnum.v2f64(<2 x double> undef, <2 x double> undef) 90; AVX2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4f64 = call <4 x double> @llvm.maxnum.v4f64(<4 x double> undef, <4 x double> undef) 91; AVX2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8f64 = call <8 x double> @llvm.maxnum.v8f64(<8 x double> undef, <8 x double> undef) 92; AVX2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16f64 = call <16 x double> @llvm.maxnum.v16f64(<16 x double> undef, <16 x double> undef) 93; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef 94; 95; AVX512-LABEL: 'f64' 96; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f64 = call double @llvm.maxnum.f64(double undef, double undef) 97; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2f64 = call <2 x double> @llvm.maxnum.v2f64(<2 x double> undef, <2 x double> undef) 98; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4f64 = call <4 x double> @llvm.maxnum.v4f64(<4 x double> undef, <4 x double> undef) 99; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V8f64 = call <8 x double> @llvm.maxnum.v8f64(<8 x double> undef, <8 x double> undef) 100; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16f64 = call <16 x double> @llvm.maxnum.v16f64(<16 x double> undef, <16 x double> undef) 101; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef 102; 103 %f64 = call double @llvm.maxnum.f64(double undef, double undef) 104 %V2f64 = call <2 x double> @llvm.maxnum.v2f64(<2 x double> undef, <2 x double> undef) 105 %V4f64 = call <4 x double> @llvm.maxnum.v4f64(<4 x double> undef, <4 x double> undef) 106 %V8f64 = call <8 x double> @llvm.maxnum.v8f64(<8 x double> undef, <8 x double> undef) 107 %V16f64 = call <16 x double> @llvm.maxnum.v16f64(<16 x double> undef, <16 x double> undef) 108 ret i32 undef 109} 110 111define i32 @f32_nnan(i32 %arg) { 112; SSE-LABEL: 'f32_nnan' 113; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call nnan float @llvm.maxnum.f32(float undef, float undef) 114; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = call nnan <2 x float> @llvm.maxnum.v2f32(<2 x float> undef, <2 x float> undef) 115; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x float> undef) 116; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call nnan <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef) 117; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call nnan <16 x float> @llvm.maxnum.v16f32(<16 x float> undef, <16 x float> undef) 118; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef 119; 120; AVX-LABEL: 'f32_nnan' 121; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call nnan float @llvm.maxnum.f32(float undef, float undef) 122; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = call nnan <2 x float> @llvm.maxnum.v2f32(<2 x float> undef, <2 x float> undef) 123; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x float> undef) 124; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call nnan <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef) 125; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16F32 = call nnan <16 x float> @llvm.maxnum.v16f32(<16 x float> undef, <16 x float> undef) 126; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef 127; 128; AVX512-LABEL: 'f32_nnan' 129; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %F32 = call nnan float @llvm.maxnum.f32(float undef, float undef) 130; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = call nnan <2 x float> @llvm.maxnum.v2f32(<2 x float> undef, <2 x float> undef) 131; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x float> undef) 132; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call nnan <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef) 133; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call nnan <16 x float> @llvm.maxnum.v16f32(<16 x float> undef, <16 x float> undef) 134; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef 135; 136 %F32 = call nnan float @llvm.maxnum.f32(float undef, float undef) 137 %V2F32 = call nnan <2 x float> @llvm.maxnum.v2f32(<2 x float> undef, <2 x float> undef) 138 %V4F32 = call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> undef, <4 x float> undef) 139 %V8F32 = call nnan <8 x float> @llvm.maxnum.v8f32(<8 x float> undef, <8 x float> undef) 140 %V16F32 = call nnan <16 x float> @llvm.maxnum.v16f32(<16 x float> undef, <16 x float> undef) 141 ret i32 undef 142} 143 144define i32 @f64_nnan(i32 %arg) { 145; SSE-LABEL: 'f64_nnan' 146; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f64 = call nnan double @llvm.maxnum.f64(double undef, double undef) 147; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2f64 = call nnan <2 x double> @llvm.maxnum.v2f64(<2 x double> undef, <2 x double> undef) 148; SSE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4f64 = call nnan <4 x double> @llvm.maxnum.v4f64(<4 x double> undef, <4 x double> undef) 149; SSE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8f64 = call nnan <8 x double> @llvm.maxnum.v8f64(<8 x double> undef, <8 x double> undef) 150; SSE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V16f64 = call nnan <16 x double> @llvm.maxnum.v16f64(<16 x double> undef, <16 x double> undef) 151; SSE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef 152; 153; AVX-LABEL: 'f64_nnan' 154; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f64 = call nnan double @llvm.maxnum.f64(double undef, double undef) 155; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2f64 = call nnan <2 x double> @llvm.maxnum.v2f64(<2 x double> undef, <2 x double> undef) 156; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4f64 = call nnan <4 x double> @llvm.maxnum.v4f64(<4 x double> undef, <4 x double> undef) 157; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8f64 = call nnan <8 x double> @llvm.maxnum.v8f64(<8 x double> undef, <8 x double> undef) 158; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16f64 = call nnan <16 x double> @llvm.maxnum.v16f64(<16 x double> undef, <16 x double> undef) 159; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef 160; 161; AVX512-LABEL: 'f64_nnan' 162; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f64 = call nnan double @llvm.maxnum.f64(double undef, double undef) 163; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2f64 = call nnan <2 x double> @llvm.maxnum.v2f64(<2 x double> undef, <2 x double> undef) 164; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4f64 = call nnan <4 x double> @llvm.maxnum.v4f64(<4 x double> undef, <4 x double> undef) 165; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8f64 = call nnan <8 x double> @llvm.maxnum.v8f64(<8 x double> undef, <8 x double> undef) 166; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V16f64 = call nnan <16 x double> @llvm.maxnum.v16f64(<16 x double> undef, <16 x double> undef) 167; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef 168; 169 %f64 = call nnan double @llvm.maxnum.f64(double undef, double undef) 170 %V2f64 = call nnan <2 x double> @llvm.maxnum.v2f64(<2 x double> undef, <2 x double> undef) 171 %V4f64 = call nnan <4 x double> @llvm.maxnum.v4f64(<4 x double> undef, <4 x double> undef) 172 %V8f64 = call nnan <8 x double> @llvm.maxnum.v8f64(<8 x double> undef, <8 x double> undef) 173 %V16f64 = call nnan <16 x double> @llvm.maxnum.v16f64(<16 x double> undef, <16 x double> undef) 174 ret i32 undef 175} 176 177declare float @llvm.maxnum.f32(float, float) 178declare <2 x float> @llvm.maxnum.v2f32(<2 x float>, <2 x float>) 179declare <4 x float> @llvm.maxnum.v4f32(<4 x float>, <4 x float>) 180declare <8 x float> @llvm.maxnum.v8f32(<8 x float>, <8 x float>) 181declare <16 x float> @llvm.maxnum.v16f32(<16 x float>, <16 x float>) 182 183declare double @llvm.maxnum.f64(double, double) 184declare <2 x double> @llvm.maxnum.v2f64(<2 x double>, <2 x double>) 185declare <4 x double> @llvm.maxnum.v4f64(<4 x double>, <4 x double>) 186declare <8 x double> @llvm.maxnum.v8f64(<8 x double>, <8 x double>) 187declare <16 x double> @llvm.maxnum.v16f64(<16 x double>, <16 x double>) 188