1; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py 2; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=GFX9,GFX90A-FASTF64 %s 3; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=GFX9,F32,FASTF64 %s 4; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=F32,SLOW %s 5; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=GFX9-SIZE,GFX90A-SIZE %s 6; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=SIZE,GFX9-SIZE %s 7; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=SIZE,SLOW-SIZE %s 8; END. 9 10define amdgpu_kernel void @fmul_f32() #0 { 11; GFX90A-FASTF64-LABEL: 'fmul_f32' 12; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32 = fmul float undef, undef 13; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = fmul <2 x float> undef, undef 14; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fmul <3 x float> undef, undef 15; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fmul <4 x float> undef, undef 16; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fmul <5 x float> undef, undef 17; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = fmul <8 x float> undef, undef 18; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v9f32 = fmul <9 x float> undef, undef 19; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void 20; 21; F32-LABEL: 'fmul_f32' 22; F32-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32 = fmul float undef, undef 23; F32-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = fmul <2 x float> undef, undef 24; F32-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fmul <3 x float> undef, undef 25; F32-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fmul <4 x float> undef, undef 26; F32-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fmul <5 x float> undef, undef 27; F32-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = fmul <8 x float> undef, undef 28; F32-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v9f32 = fmul <9 x float> undef, undef 29; F32-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void 30; 31; GFX90A-SIZE-LABEL: 'fmul_f32' 32; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32 = fmul float undef, undef 33; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = fmul <2 x float> undef, undef 34; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fmul <3 x float> undef, undef 35; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fmul <4 x float> undef, undef 36; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fmul <5 x float> undef, undef 37; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = fmul <8 x float> undef, undef 38; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %v9f32 = fmul <9 x float> undef, undef 39; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 40; 41; SIZE-LABEL: 'fmul_f32' 42; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f32 = fmul float undef, undef 43; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = fmul <2 x float> undef, undef 44; SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fmul <3 x float> undef, undef 45; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fmul <4 x float> undef, undef 46; SIZE-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fmul <5 x float> undef, undef 47; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = fmul <8 x float> undef, undef 48; SIZE-NEXT: Cost Model: Found an estimated cost of 27 for instruction: %v9f32 = fmul <9 x float> undef, undef 49; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 50; 51 %f32 = fmul float undef, undef 52 %v2f32 = fmul <2 x float> undef, undef 53 %v3f32 = fmul <3 x float> undef, undef 54 %v4f32 = fmul <4 x float> undef, undef 55 %v5f32 = fmul <5 x float> undef, undef 56 %v8f32 = fmul <8 x float> undef, undef 57 %v9f32 = fmul <9 x float> undef, undef 58 ret void 59} 60 61define amdgpu_kernel void @fmul_f64() #0 { 62; GFX90A-FASTF64-LABEL: 'fmul_f64' 63; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f64 = fmul double undef, undef 64; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = fmul <2 x double> undef, undef 65; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = fmul <3 x double> undef, undef 66; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = fmul <4 x double> undef, undef 67; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v5f64 = fmul <5 x double> undef, undef 68; GFX90A-FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void 69; 70; FASTF64-LABEL: 'fmul_f64' 71; FASTF64-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f64 = fmul double undef, undef 72; FASTF64-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = fmul <2 x double> undef, undef 73; FASTF64-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f64 = fmul <3 x double> undef, undef 74; FASTF64-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = fmul <4 x double> undef, undef 75; FASTF64-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v5f64 = fmul <5 x double> undef, undef 76; FASTF64-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void 77; 78; SLOW-LABEL: 'fmul_f64' 79; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %f64 = fmul double undef, undef 80; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v2f64 = fmul <2 x double> undef, undef 81; SLOW-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %v3f64 = fmul <3 x double> undef, undef 82; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v4f64 = fmul <4 x double> undef, undef 83; SLOW-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %v5f64 = fmul <5 x double> undef, undef 84; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void 85; 86; GFX90A-SIZE-LABEL: 'fmul_f64' 87; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f64 = fmul double undef, undef 88; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = fmul <2 x double> undef, undef 89; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = fmul <3 x double> undef, undef 90; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = fmul <4 x double> undef, undef 91; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %v5f64 = fmul <5 x double> undef, undef 92; GFX90A-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 93; 94; SIZE-LABEL: 'fmul_f64' 95; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %f64 = fmul double undef, undef 96; SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = fmul <2 x double> undef, undef 97; SIZE-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %v3f64 = fmul <3 x double> undef, undef 98; SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = fmul <4 x double> undef, undef 99; SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v5f64 = fmul <5 x double> undef, undef 100; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 101; 102 %f64 = fmul double undef, undef 103 %v2f64 = fmul <2 x double> undef, undef 104 %v3f64 = fmul <3 x double> undef, undef 105 %v4f64 = fmul <4 x double> undef, undef 106 %v5f64 = fmul <5 x double> undef, undef 107 ret void 108} 109 110define amdgpu_kernel void @fmul_f16() #0 { 111; GFX9-LABEL: 'fmul_f16' 112; GFX9-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16 = fmul half undef, undef 113; GFX9-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = fmul <2 x half> undef, undef 114; GFX9-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = fmul <3 x half> undef, undef 115; GFX9-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fmul <4 x half> undef, undef 116; GFX9-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = fmul <5 x half> undef, undef 117; GFX9-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = fmul <16 x half> undef, undef 118; GFX9-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v17f16 = fmul <17 x half> undef, undef 119; GFX9-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void 120; 121; SLOW-LABEL: 'fmul_f16' 122; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16 = fmul half undef, undef 123; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = fmul <2 x half> undef, undef 124; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = fmul <3 x half> undef, undef 125; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = fmul <4 x half> undef, undef 126; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = fmul <5 x half> undef, undef 127; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16f16 = fmul <16 x half> undef, undef 128; SLOW-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v17f16 = fmul <17 x half> undef, undef 129; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void 130; 131; GFX9-SIZE-LABEL: 'fmul_f16' 132; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16 = fmul half undef, undef 133; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = fmul <2 x half> undef, undef 134; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = fmul <3 x half> undef, undef 135; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fmul <4 x half> undef, undef 136; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = fmul <5 x half> undef, undef 137; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = fmul <16 x half> undef, undef 138; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %v17f16 = fmul <17 x half> undef, undef 139; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 140; 141; SLOW-SIZE-LABEL: 'fmul_f16' 142; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %f16 = fmul half undef, undef 143; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = fmul <2 x half> undef, undef 144; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = fmul <3 x half> undef, undef 145; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = fmul <4 x half> undef, undef 146; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = fmul <5 x half> undef, undef 147; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16f16 = fmul <16 x half> undef, undef 148; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v17f16 = fmul <17 x half> undef, undef 149; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 150; 151 %f16 = fmul half undef, undef 152 %v2f16 = fmul <2 x half> undef, undef 153 %v3f16 = fmul <3 x half> undef, undef 154 %v4f16 = fmul <4 x half> undef, undef 155 %v5f16 = fmul <5 x half> undef, undef 156 %v16f16 = fmul <16 x half> undef, undef 157 %v17f16 = fmul <17 x half> undef, undef 158 ret void 159} 160 161define amdgpu_kernel void @fmul_bf16() #0 { 162; GFX9-LABEL: 'fmul_bf16' 163; GFX9-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %bf16 = fmul bfloat undef, undef 164; GFX9-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %v2bf16 = fmul <2 x bfloat> undef, undef 165; GFX9-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v3bf16 = fmul <3 x bfloat> undef, undef 166; GFX9-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %v4bf16 = fmul <4 x bfloat> undef, undef 167; GFX9-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %v5bf16 = fmul <5 x bfloat> undef, undef 168; GFX9-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %v16bf16 = fmul <16 x bfloat> undef, undef 169; GFX9-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %v17bf16 = fmul <17 x bfloat> undef, undef 170; GFX9-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void 171; 172; SLOW-LABEL: 'fmul_bf16' 173; SLOW-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bf16 = fmul bfloat undef, undef 174; SLOW-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2bf16 = fmul <2 x bfloat> undef, undef 175; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3bf16 = fmul <3 x bfloat> undef, undef 176; SLOW-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4bf16 = fmul <4 x bfloat> undef, undef 177; SLOW-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5bf16 = fmul <5 x bfloat> undef, undef 178; SLOW-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16bf16 = fmul <16 x bfloat> undef, undef 179; SLOW-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v17bf16 = fmul <17 x bfloat> undef, undef 180; SLOW-NEXT: Cost Model: Found an estimated cost of 10 for instruction: ret void 181; 182; GFX9-SIZE-LABEL: 'fmul_bf16' 183; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bf16 = fmul bfloat undef, undef 184; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v2bf16 = fmul <2 x bfloat> undef, undef 185; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v3bf16 = fmul <3 x bfloat> undef, undef 186; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v4bf16 = fmul <4 x bfloat> undef, undef 187; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v5bf16 = fmul <5 x bfloat> undef, undef 188; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v16bf16 = fmul <16 x bfloat> undef, undef 189; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %v17bf16 = fmul <17 x bfloat> undef, undef 190; GFX9-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 191; 192; SLOW-SIZE-LABEL: 'fmul_bf16' 193; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %bf16 = fmul bfloat undef, undef 194; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %v2bf16 = fmul <2 x bfloat> undef, undef 195; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v3bf16 = fmul <3 x bfloat> undef, undef 196; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %v4bf16 = fmul <4 x bfloat> undef, undef 197; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %v5bf16 = fmul <5 x bfloat> undef, undef 198; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %v16bf16 = fmul <16 x bfloat> undef, undef 199; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %v17bf16 = fmul <17 x bfloat> undef, undef 200; SLOW-SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void 201; 202 %bf16 = fmul bfloat undef, undef 203 %v2bf16 = fmul <2 x bfloat> undef, undef 204 %v3bf16 = fmul <3 x bfloat> undef, undef 205 %v4bf16 = fmul <4 x bfloat> undef, undef 206 %v5bf16 = fmul <5 x bfloat> undef, undef 207 %v16bf16 = fmul <16 x bfloat> undef, undef 208 %v17bf16 = fmul <17 x bfloat> undef, undef 209 ret void 210} 211 212attributes #0 = { nounwind } 213