1; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX9,NOT-GFX12 %s 2; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX10,NOT-GFX12 %s 3; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX11,GFX1100,NOT-GFX12 %s 4; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX11,GFX1150,NOT-GFX12 %s 5; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -show-mc-encoding < %s | FileCheck -check-prefixes=GFX1200 %s 6 7declare float @llvm.fabs.f32(float) 8declare float @llvm.fma.f32(float, float, float) 9 10define float @v_mul_f32_vop2(float %x, float %y) { 11; GFX9-LABEL: v_mul_f32_vop2: 12; GFX9: ; %bb.0: 13; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 14; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x0a] 15; GFX9-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] 16; 17; GFX10-LABEL: v_mul_f32_vop2: 18; GFX10: ; %bb.0: 19; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 20; GFX10-NEXT: v_mul_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x10] 21; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] 22; 23; GFX11-LABEL: v_mul_f32_vop2: 24; GFX11: ; %bb.0: 25; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] 26; GFX11-NEXT: v_mul_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x10] 27; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] 28; 29; GFX1200-LABEL: v_mul_f32_vop2: 30; GFX1200: ; %bb.0: 31; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf] 32; GFX1200-NEXT: s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf] 33; GFX1200-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf] 34; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf] 35; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf] 36; GFX1200-NEXT: v_mul_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x10] 37; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] 38 %mul = fmul float %x, %y 39 ret float %mul 40} 41; NOT-GFX12: codeLenInByte = 12 42; GFX1200: codeLenInByte = 28 43 44define float @v_mul_f32_vop2_inline_imm(float %x) { 45; GFX9-LABEL: v_mul_f32_vop2_inline_imm: 46; GFX9: ; %bb.0: 47; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 48; GFX9-NEXT: v_mul_f32_e32 v0, 4.0, v0 ; encoding: [0xf6,0x00,0x00,0x0a] 49; GFX9-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] 50; 51; GFX10-LABEL: v_mul_f32_vop2_inline_imm: 52; GFX10: ; %bb.0: 53; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 54; GFX10-NEXT: v_mul_f32_e32 v0, 4.0, v0 ; encoding: [0xf6,0x00,0x00,0x10] 55; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] 56; 57; GFX11-LABEL: v_mul_f32_vop2_inline_imm: 58; GFX11: ; %bb.0: 59; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] 60; GFX11-NEXT: v_mul_f32_e32 v0, 4.0, v0 ; encoding: [0xf6,0x00,0x00,0x10] 61; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] 62; 63; GFX1200-LABEL: v_mul_f32_vop2_inline_imm: 64; GFX1200: ; %bb.0: 65; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf] 66; GFX1200-NEXT: s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf] 67; GFX1200-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf] 68; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf] 69; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf] 70; GFX1200-NEXT: v_mul_f32_e32 v0, 4.0, v0 ; encoding: [0xf6,0x00,0x00,0x10] 71; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] 72 %mul = fmul float %x, 4.0 73 ret float %mul 74} 75; NOT-GFX12: codeLenInByte = 12 76; GFX1200: codeLenInByte = 28 77 78define float @v_mul_f32_vop2_literal(float %x) { 79; GFX9-LABEL: v_mul_f32_vop2_literal: 80; GFX9: ; %bb.0: 81; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 82; GFX9-NEXT: v_mul_f32_e32 v0, 0x42f60000, v0 ; encoding: [0xff,0x00,0x00,0x0a,0x00,0x00,0xf6,0x42] 83; GFX9-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] 84; 85; GFX10-LABEL: v_mul_f32_vop2_literal: 86; GFX10: ; %bb.0: 87; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 88; GFX10-NEXT: v_mul_f32_e32 v0, 0x42f60000, v0 ; encoding: [0xff,0x00,0x00,0x10,0x00,0x00,0xf6,0x42] 89; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] 90; 91; GFX11-LABEL: v_mul_f32_vop2_literal: 92; GFX11: ; %bb.0: 93; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] 94; GFX11-NEXT: v_mul_f32_e32 v0, 0x42f60000, v0 ; encoding: [0xff,0x00,0x00,0x10,0x00,0x00,0xf6,0x42] 95; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] 96; 97; GFX1200-LABEL: v_mul_f32_vop2_literal: 98; GFX1200: ; %bb.0: 99; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf] 100; GFX1200-NEXT: s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf] 101; GFX1200-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf] 102; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf] 103; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf] 104; GFX1200-NEXT: v_mul_f32_e32 v0, 0x42f60000, v0 ; encoding: [0xff,0x00,0x00,0x10,0x00,0x00,0xf6,0x42] 105; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] 106 %mul = fmul float %x, 123.0 107 ret float %mul 108} 109; NOT-GFX12: codeLenInByte = 16 110; GFX1200: codeLenInByte = 32 111 112define float @v_mul_f32_vop3_src_mods(float %x, float %y) { 113; GFX9-LABEL: v_mul_f32_vop3_src_mods: 114; GFX9: ; %bb.0: 115; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 116; GFX9-NEXT: v_mul_f32_e64 v0, |v0|, v1 ; encoding: [0x00,0x01,0x05,0xd1,0x00,0x03,0x02,0x00] 117; GFX9-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] 118; 119; GFX10-LABEL: v_mul_f32_vop3_src_mods: 120; GFX10: ; %bb.0: 121; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 122; GFX10-NEXT: v_mul_f32_e64 v0, |v0|, v1 ; encoding: [0x00,0x01,0x08,0xd5,0x00,0x03,0x02,0x00] 123; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] 124; 125; GFX11-LABEL: v_mul_f32_vop3_src_mods: 126; GFX11: ; %bb.0: 127; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] 128; GFX11-NEXT: v_mul_f32_e64 v0, |v0|, v1 ; encoding: [0x00,0x01,0x08,0xd5,0x00,0x03,0x02,0x00] 129; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] 130; 131; GFX1200-LABEL: v_mul_f32_vop3_src_mods: 132; GFX1200: ; %bb.0: 133; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf] 134; GFX1200-NEXT: s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf] 135; GFX1200-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf] 136; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf] 137; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf] 138; GFX1200-NEXT: v_mul_f32_e64 v0, |v0|, v1 ; encoding: [0x00,0x01,0x08,0xd5,0x00,0x03,0x02,0x00] 139; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] 140 %fabs.x = call float @llvm.fabs.f32(float %x) 141 %mul = fmul float %fabs.x, %y 142 ret float %mul 143} 144; NOT-GFX12: codeLenInByte = 16 145; GFX1200: codeLenInByte = 32 146 147define float @v_mul_f32_vop3_src_mods_inline_imm(float %x, float %y) { 148; GFX9-LABEL: v_mul_f32_vop3_src_mods_inline_imm: 149; GFX9: ; %bb.0: 150; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 151; GFX9-NEXT: v_mul_f32_e64 v0, |v0|, 4.0 ; encoding: [0x00,0x01,0x05,0xd1,0x00,0xed,0x01,0x00] 152; GFX9-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] 153; 154; GFX10-LABEL: v_mul_f32_vop3_src_mods_inline_imm: 155; GFX10: ; %bb.0: 156; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 157; GFX10-NEXT: v_mul_f32_e64 v0, |v0|, 4.0 ; encoding: [0x00,0x01,0x08,0xd5,0x00,0xed,0x01,0x00] 158; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] 159; 160; GFX11-LABEL: v_mul_f32_vop3_src_mods_inline_imm: 161; GFX11: ; %bb.0: 162; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] 163; GFX11-NEXT: v_mul_f32_e64 v0, |v0|, 4.0 ; encoding: [0x00,0x01,0x08,0xd5,0x00,0xed,0x01,0x00] 164; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] 165; 166; GFX1200-LABEL: v_mul_f32_vop3_src_mods_inline_imm: 167; GFX1200: ; %bb.0: 168; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf] 169; GFX1200-NEXT: s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf] 170; GFX1200-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf] 171; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf] 172; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf] 173; GFX1200-NEXT: v_mul_f32_e64 v0, |v0|, 4.0 ; encoding: [0x00,0x01,0x08,0xd5,0x00,0xed,0x01,0x00] 174; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] 175 %fabs.x = call float @llvm.fabs.f32(float %x) 176 %mul = fmul float %fabs.x, 4.0 177 ret float %mul 178} 179 180; NOT-GFX12: codeLenInByte = 16 181; GFX1200: codeLenInByte = 32 182 183define float @v_mul_f32_vop3_src_mods_literal(float %x, float %y) { 184; GFX9-LABEL: v_mul_f32_vop3_src_mods_literal: 185; GFX9: ; %bb.0: 186; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 187; GFX9-NEXT: s_mov_b32 s4, 0x42f60000 ; encoding: [0xff,0x00,0x84,0xbe,0x00,0x00,0xf6,0x42] 188; GFX9-NEXT: v_mul_f32_e64 v0, |v0|, s4 ; encoding: [0x00,0x01,0x05,0xd1,0x00,0x09,0x00,0x00] 189; GFX9-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] 190; 191; GFX10-LABEL: v_mul_f32_vop3_src_mods_literal: 192; GFX10: ; %bb.0: 193; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 194; GFX10-NEXT: v_mul_f32_e64 v0, 0x42f60000, |v0| ; encoding: [0x00,0x02,0x08,0xd5,0xff,0x00,0x02,0x00,0x00,0x00,0xf6,0x42] 195; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] 196; 197; GFX11-LABEL: v_mul_f32_vop3_src_mods_literal: 198; GFX11: ; %bb.0: 199; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] 200; GFX11-NEXT: v_mul_f32_e64 v0, 0x42f60000, |v0| ; encoding: [0x00,0x02,0x08,0xd5,0xff,0x00,0x02,0x00,0x00,0x00,0xf6,0x42] 201; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] 202; 203; GFX1200-LABEL: v_mul_f32_vop3_src_mods_literal: 204; GFX1200: ; %bb.0: 205; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf] 206; GFX1200-NEXT: s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf] 207; GFX1200-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf] 208; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf] 209; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf] 210; GFX1200-NEXT: v_mul_f32_e64 v0, 0x42f60000, |v0| ; encoding: [0x00,0x02,0x08,0xd5,0xff,0x00,0x02,0x00,0x00,0x00,0xf6,0x42] 211; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] 212 %fabs.x = call float @llvm.fabs.f32(float %x) 213 %mul = fmul float %fabs.x, 123.0 214 ret float %mul 215} 216 217; GFX9: codeLenInByte = 24 218; GFX10: codeLenInByte = 20 219; GFX11: codeLenInByte = 20 220; GFX1200: codeLenInByte = 36 221 222define float @v_mul_f32_vop2_frame_index(float %x) { 223; GFX9-LABEL: v_mul_f32_vop2_frame_index: 224; GFX9: ; %bb.0: 225; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 226; GFX9-NEXT: v_lshrrev_b32_e64 v1, 6, s32 ; encoding: [0x01,0x00,0x10,0xd1,0x86,0x40,0x00,0x00] 227; GFX9-NEXT: v_mul_f32_e32 v0, v1, v0 ; encoding: [0x01,0x01,0x00,0x0a] 228; GFX9-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] 229; 230; GFX10-LABEL: v_mul_f32_vop2_frame_index: 231; GFX10: ; %bb.0: 232; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 233; GFX10-NEXT: v_lshrrev_b32_e64 v1, 5, s32 ; encoding: [0x01,0x00,0x16,0xd5,0x85,0x40,0x00,0x00] 234; GFX10-NEXT: v_mul_f32_e32 v0, v1, v0 ; encoding: [0x01,0x01,0x00,0x10] 235; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] 236; 237; GFX11-LABEL: v_mul_f32_vop2_frame_index: 238; GFX11: ; %bb.0: 239; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] 240; GFX11-NEXT: v_mul_f32_e32 v0, s32, v0 ; encoding: [0x20,0x00,0x00,0x10] 241; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] 242; 243; GFX1200-LABEL: v_mul_f32_vop2_frame_index: 244; GFX1200: ; %bb.0: 245; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf] 246; GFX1200-NEXT: s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf] 247; GFX1200-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf] 248; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf] 249; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf] 250; GFX1200-NEXT: v_mul_f32_e32 v0, s32, v0 ; encoding: [0x20,0x00,0x00,0x10] 251; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] 252 %alloca = alloca i32, addrspace(5) 253 %ptrtoint = ptrtoint ptr addrspace(5) %alloca to i32 254 %cast = bitcast i32 %ptrtoint to float 255 %mul = fmul float %x, %cast 256 ret float %mul 257} 258 259; GFX9: codeLenInByte = 20 260; GFX10: codeLenInByte = 20 261; GFX11: codeLenInByte = 12 262; GFX1200: codeLenInByte = 28 263 264define float @v_fma_f32(float %x, float %y, float %z) { 265; GFX9-LABEL: v_fma_f32: 266; GFX9: ; %bb.0: 267; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 268; GFX9-NEXT: v_fma_f32 v0, v0, v1, v2 ; encoding: [0x00,0x00,0xcb,0xd1,0x00,0x03,0x0a,0x04] 269; GFX9-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] 270; 271; GFX10-LABEL: v_fma_f32: 272; GFX10: ; %bb.0: 273; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 274; GFX10-NEXT: v_fma_f32 v0, v0, v1, v2 ; encoding: [0x00,0x00,0x4b,0xd5,0x00,0x03,0x0a,0x04] 275; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] 276; 277; GFX11-LABEL: v_fma_f32: 278; GFX11: ; %bb.0: 279; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] 280; GFX11-NEXT: v_fma_f32 v0, v0, v1, v2 ; encoding: [0x00,0x00,0x13,0xd6,0x00,0x03,0x0a,0x04] 281; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] 282; 283; GFX1200-LABEL: v_fma_f32: 284; GFX1200: ; %bb.0: 285; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf] 286; GFX1200-NEXT: s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf] 287; GFX1200-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf] 288; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf] 289; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf] 290; GFX1200-NEXT: v_fma_f32 v0, v0, v1, v2 ; encoding: [0x00,0x00,0x13,0xd6,0x00,0x03,0x0a,0x04] 291; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] 292 %fma = call float @llvm.fma.f32(float %x, float %y, float %z) 293 ret float %fma 294} 295 296; NOT-GFX12: codeLenInByte = 16 297; GFX1200: codeLenInByte = 32 298 299define float @v_fma_f32_src_mods(float %x, float %y, float %z) { 300; GFX9-LABEL: v_fma_f32_src_mods: 301; GFX9: ; %bb.0: 302; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 303; GFX9-NEXT: v_fma_f32 v0, |v0|, v1, v2 ; encoding: [0x00,0x01,0xcb,0xd1,0x00,0x03,0x0a,0x04] 304; GFX9-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] 305; 306; GFX10-LABEL: v_fma_f32_src_mods: 307; GFX10: ; %bb.0: 308; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 309; GFX10-NEXT: v_fma_f32 v0, |v0|, v1, v2 ; encoding: [0x00,0x01,0x4b,0xd5,0x00,0x03,0x0a,0x04] 310; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] 311; 312; GFX11-LABEL: v_fma_f32_src_mods: 313; GFX11: ; %bb.0: 314; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] 315; GFX11-NEXT: v_fma_f32 v0, |v0|, v1, v2 ; encoding: [0x00,0x01,0x13,0xd6,0x00,0x03,0x0a,0x04] 316; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] 317; 318; GFX1200-LABEL: v_fma_f32_src_mods: 319; GFX1200: ; %bb.0: 320; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf] 321; GFX1200-NEXT: s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf] 322; GFX1200-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf] 323; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf] 324; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf] 325; GFX1200-NEXT: v_fma_f32 v0, |v0|, v1, v2 ; encoding: [0x00,0x01,0x13,0xd6,0x00,0x03,0x0a,0x04] 326; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] 327 %fabs.x = call float @llvm.fabs.f32(float %x) 328 %fma = call float @llvm.fma.f32(float %fabs.x, float %y, float %z) 329 ret float %fma 330} 331 332; NOT-GFX12: codeLenInByte = 16 333; GFX1200: codeLenInByte = 32 334 335define float @v_fmac_f32(float %x, float %y) { 336; GFX9-LABEL: v_fmac_f32: 337; GFX9: ; %bb.0: 338; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 339; GFX9-NEXT: v_fma_f32 v0, v0, v1, v0 ; encoding: [0x00,0x00,0xcb,0xd1,0x00,0x03,0x02,0x04] 340; GFX9-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] 341; 342; GFX10-LABEL: v_fmac_f32: 343; GFX10: ; %bb.0: 344; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 345; GFX10-NEXT: v_fmac_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x56] 346; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] 347; 348; GFX11-LABEL: v_fmac_f32: 349; GFX11: ; %bb.0: 350; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] 351; GFX11-NEXT: v_fmac_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x56] 352; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] 353; 354; GFX1200-LABEL: v_fmac_f32: 355; GFX1200: ; %bb.0: 356; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf] 357; GFX1200-NEXT: s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf] 358; GFX1200-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf] 359; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf] 360; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf] 361; GFX1200-NEXT: v_fmac_f32_e32 v0, v0, v1 ; encoding: [0x00,0x03,0x00,0x56] 362; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] 363 %fma = call float @llvm.fma.f32(float %x, float %y, float %x) 364 ret float %fma 365} 366 367; GFX9: codeLenInByte = 16 368; GFX10: codeLenInByte = 12 369; GFX11: codeLenInByte = 12 370; GFX1200: codeLenInByte = 28 371 372define float @v_fmaak_f32(float %x, float %y) { 373; GFX9-LABEL: v_fmaak_f32: 374; GFX9: ; %bb.0: 375; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 376; GFX9-NEXT: s_mov_b32 s4, 0x43800000 ; encoding: [0xff,0x00,0x84,0xbe,0x00,0x00,0x80,0x43] 377; GFX9-NEXT: v_fma_f32 v0, v0, v1, s4 ; encoding: [0x00,0x00,0xcb,0xd1,0x00,0x03,0x12,0x00] 378; GFX9-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] 379; 380; GFX10-LABEL: v_fmaak_f32: 381; GFX10: ; %bb.0: 382; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 383; GFX10-NEXT: v_fmaak_f32 v0, v0, v1, 0x43800000 ; encoding: [0x00,0x03,0x00,0x5a,0x00,0x00,0x80,0x43] 384; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] 385; 386; GFX11-LABEL: v_fmaak_f32: 387; GFX11: ; %bb.0: 388; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] 389; GFX11-NEXT: v_fmaak_f32 v0, v0, v1, 0x43800000 ; encoding: [0x00,0x03,0x00,0x5a,0x00,0x00,0x80,0x43] 390; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] 391; 392; GFX1200-LABEL: v_fmaak_f32: 393; GFX1200: ; %bb.0: 394; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf] 395; GFX1200-NEXT: s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf] 396; GFX1200-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf] 397; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf] 398; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf] 399; GFX1200-NEXT: v_fmaak_f32 v0, v0, v1, 0x43800000 ; encoding: [0x00,0x03,0x00,0x5a,0x00,0x00,0x80,0x43] 400; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] 401 %fma = call float @llvm.fma.f32(float %x, float %y, float 256.0) 402 ret float %fma 403} 404 405; GFX9: codeLenInByte = 24 406; GFX10: codeLenInByte = 16 407; GFX11: codeLenInByte = 16 408; GFX1200: codeLenInByte = 32 409 410define float @v_fma_k_f32_src_mods(float %x, float %y) { 411; GFX9-LABEL: v_fma_k_f32_src_mods: 412; GFX9: ; %bb.0: 413; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 414; GFX9-NEXT: s_mov_b32 s4, 0x43800000 ; encoding: [0xff,0x00,0x84,0xbe,0x00,0x00,0x80,0x43] 415; GFX9-NEXT: v_fma_f32 v0, |v0|, v1, s4 ; encoding: [0x00,0x01,0xcb,0xd1,0x00,0x03,0x12,0x00] 416; GFX9-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] 417; 418; GFX10-LABEL: v_fma_k_f32_src_mods: 419; GFX10: ; %bb.0: 420; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 421; GFX10-NEXT: v_fma_f32 v0, |v0|, v1, 0x43800000 ; encoding: [0x00,0x01,0x4b,0xd5,0x00,0x03,0xfe,0x03,0x00,0x00,0x80,0x43] 422; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] 423; 424; GFX11-LABEL: v_fma_k_f32_src_mods: 425; GFX11: ; %bb.0: 426; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] 427; GFX11-NEXT: v_fma_f32 v0, |v0|, v1, 0x43800000 ; encoding: [0x00,0x01,0x13,0xd6,0x00,0x03,0xfe,0x03,0x00,0x00,0x80,0x43] 428; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] 429; 430; GFX1200-LABEL: v_fma_k_f32_src_mods: 431; GFX1200: ; %bb.0: 432; GFX1200-NEXT: s_wait_loadcnt_dscnt 0x0 ; encoding: [0x00,0x00,0xc8,0xbf] 433; GFX1200-NEXT: s_wait_expcnt 0x0 ; encoding: [0x00,0x00,0xc4,0xbf] 434; GFX1200-NEXT: s_wait_samplecnt 0x0 ; encoding: [0x00,0x00,0xc2,0xbf] 435; GFX1200-NEXT: s_wait_bvhcnt 0x0 ; encoding: [0x00,0x00,0xc3,0xbf] 436; GFX1200-NEXT: s_wait_kmcnt 0x0 ; encoding: [0x00,0x00,0xc7,0xbf] 437; GFX1200-NEXT: v_fma_f32 v0, |v0|, v1, 0x43800000 ; encoding: [0x00,0x01,0x13,0xd6,0x00,0x03,0xfe,0x03,0x00,0x00,0x80,0x43] 438; GFX1200-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] 439 %fabs.x = call float @llvm.fabs.f32(float %x) 440 %fma = call float @llvm.fma.f32(float %fabs.x, float %y, float 256.0) 441 ret float %fma 442} 443 444; GFX9: codeLenInByte = 24 445; GFX10: codeLenInByte = 20 446; GFX11: codeLenInByte = 20 447; GFX1200: codeLenInByte = 36 448 449define amdgpu_ps float @s_fmaak_f32(float inreg %x, float inreg %y) { 450; GFX9-LABEL: s_fmaak_f32: 451; GFX9: ; %bb.0: 452; GFX9-NEXT: v_mov_b32_e32 v0, s1 ; encoding: [0x01,0x02,0x00,0x7e] 453; GFX9-NEXT: v_mov_b32_e32 v1, 0x43800000 ; encoding: [0xff,0x02,0x02,0x7e,0x00,0x00,0x80,0x43] 454; GFX9-NEXT: v_fma_f32 v0, s0, v0, v1 ; encoding: [0x00,0x00,0xcb,0xd1,0x00,0x00,0x06,0x04] 455; GFX9-NEXT: ; return to shader part epilog 456; 457; GFX10-LABEL: s_fmaak_f32: 458; GFX10: ; %bb.0: 459; GFX10-NEXT: v_mov_b32_e32 v0, s1 ; encoding: [0x01,0x02,0x00,0x7e] 460; GFX10-NEXT: v_fmaak_f32 v0, s0, v0, 0x43800000 ; encoding: [0x00,0x00,0x00,0x5a,0x00,0x00,0x80,0x43] 461; GFX10-NEXT: ; return to shader part epilog 462; 463; GFX1100-LABEL: s_fmaak_f32: 464; GFX1100: ; %bb.0: 465; GFX1100-NEXT: v_mov_b32_e32 v0, s1 ; encoding: [0x01,0x02,0x00,0x7e] 466; GFX1100-NEXT: s_delay_alu instid0(VALU_DEP_1) ; encoding: [0x01,0x00,0x87,0xbf] 467; GFX1100-NEXT: v_fmaak_f32 v0, s0, v0, 0x43800000 ; encoding: [0x00,0x00,0x00,0x5a,0x00,0x00,0x80,0x43] 468; GFX1100-NEXT: ; return to shader part epilog 469; 470; GFX1150-LABEL: s_fmaak_f32: 471; GFX1150: ; %bb.0: 472; GFX1150-NEXT: s_fmaak_f32 s0, s0, s1, 0x43800000 ; encoding: [0x00,0x01,0x80,0xa2,0x00,0x00,0x80,0x43] 473; GFX1150-NEXT: s_delay_alu instid0(SALU_CYCLE_3) ; encoding: [0x0b,0x00,0x87,0xbf] 474; GFX1150-NEXT: v_mov_b32_e32 v0, s0 ; encoding: [0x00,0x02,0x00,0x7e] 475; GFX1150-NEXT: ; return to shader part epilog 476; 477; GFX1200-LABEL: s_fmaak_f32: 478; GFX1200: ; %bb.0: 479; GFX1200-NEXT: s_fmaak_f32 s0, s0, s1, 0x43800000 ; encoding: [0x00,0x01,0x80,0xa2,0x00,0x00,0x80,0x43] 480; GFX1200-NEXT: s_delay_alu instid0(SALU_CYCLE_3) ; encoding: [0x0b,0x00,0x87,0xbf] 481; GFX1200-NEXT: v_mov_b32_e32 v0, s0 ; encoding: [0x00,0x02,0x00,0x7e] 482; GFX1200-NEXT: ; return to shader part epilog 483 %fma = call float @llvm.fma.f32(float %x, float %y, float 256.0) 484 ret float %fma 485} 486 487; GFX9: codeLenInByte = 20 488; GFX10: codeLenInByte = 12 489; GFX1100: codeLenInByte = 16 490; GFX1150: codeLenInByte = 16 491; GFX1200: codeLenInByte = 16 492