1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs -enable-misched=false < %s | FileCheck -check-prefixes=SI %s 3; RUN: llc -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs -enable-misched=false < %s | FileCheck -check-prefixes=VI %s 4; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=+real-true16,-flat-for-global -verify-machineinstrs -enable-misched=false < %s | FileCheck -check-prefixes=GFX11-SDAG %s 5; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=+real-true16,-flat-for-global -verify-machineinstrs -enable-misched=false < %s | FileCheck -check-prefixes=GFX11-GISEL %s 6; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-real-true16,-flat-for-global -verify-machineinstrs -enable-misched=false < %s | FileCheck -check-prefixes=GFX11-FAKE16-SDAG %s 7; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-real-true16,-flat-for-global -verify-machineinstrs -enable-misched=false < %s | FileCheck -check-prefixes=GFX11-FAKE16-GISEL %s 8 9define amdgpu_kernel void @fadd_f16( 10; SI-LABEL: fadd_f16: 11; SI: ; %bb.0: ; %entry 12; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 13; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd 14; SI-NEXT: s_mov_b32 s11, 0xf000 15; SI-NEXT: s_mov_b32 s10, -1 16; SI-NEXT: s_mov_b32 s6, s10 17; SI-NEXT: s_waitcnt lgkmcnt(0) 18; SI-NEXT: s_mov_b32 s8, s0 19; SI-NEXT: s_mov_b32 s9, s1 20; SI-NEXT: s_mov_b32 s0, s2 21; SI-NEXT: s_mov_b32 s1, s3 22; SI-NEXT: s_mov_b32 s2, s10 23; SI-NEXT: s_mov_b32 s3, s11 24; SI-NEXT: s_mov_b32 s7, s11 25; SI-NEXT: buffer_load_ushort v0, off, s[0:3], 0 glc 26; SI-NEXT: s_waitcnt vmcnt(0) 27; SI-NEXT: buffer_load_ushort v1, off, s[4:7], 0 glc 28; SI-NEXT: s_waitcnt vmcnt(0) 29; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 30; SI-NEXT: v_cvt_f32_f16_e32 v1, v1 31; SI-NEXT: v_add_f32_e32 v0, v0, v1 32; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 33; SI-NEXT: buffer_store_short v0, off, s[8:11], 0 34; SI-NEXT: s_endpgm 35; 36; VI-LABEL: fadd_f16: 37; VI: ; %bb.0: ; %entry 38; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 39; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 40; VI-NEXT: s_mov_b32 s11, 0xf000 41; VI-NEXT: s_mov_b32 s10, -1 42; VI-NEXT: s_mov_b32 s6, s10 43; VI-NEXT: s_waitcnt lgkmcnt(0) 44; VI-NEXT: s_mov_b32 s8, s0 45; VI-NEXT: s_mov_b32 s9, s1 46; VI-NEXT: s_mov_b32 s0, s2 47; VI-NEXT: s_mov_b32 s1, s3 48; VI-NEXT: s_mov_b32 s2, s10 49; VI-NEXT: s_mov_b32 s3, s11 50; VI-NEXT: s_mov_b32 s7, s11 51; VI-NEXT: buffer_load_ushort v0, off, s[0:3], 0 glc 52; VI-NEXT: s_waitcnt vmcnt(0) 53; VI-NEXT: buffer_load_ushort v1, off, s[4:7], 0 glc 54; VI-NEXT: s_waitcnt vmcnt(0) 55; VI-NEXT: v_add_f16_e32 v0, v0, v1 56; VI-NEXT: buffer_store_short v0, off, s[8:11], 0 57; VI-NEXT: s_endpgm 58; 59; GFX11-SDAG-LABEL: fadd_f16: 60; GFX11-SDAG: ; %bb.0: ; %entry 61; GFX11-SDAG-NEXT: s_clause 0x1 62; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 63; GFX11-SDAG-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 64; GFX11-SDAG-NEXT: s_mov_b32 s11, 0x31016000 65; GFX11-SDAG-NEXT: s_mov_b32 s10, -1 66; GFX11-SDAG-NEXT: s_mov_b32 s7, s11 67; GFX11-SDAG-NEXT: s_mov_b32 s6, s10 68; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 69; GFX11-SDAG-NEXT: s_mov_b32 s8, s0 70; GFX11-SDAG-NEXT: s_mov_b32 s9, s1 71; GFX11-SDAG-NEXT: s_mov_b32 s0, s2 72; GFX11-SDAG-NEXT: s_mov_b32 s1, s3 73; GFX11-SDAG-NEXT: s_mov_b32 s2, s10 74; GFX11-SDAG-NEXT: s_mov_b32 s3, s11 75; GFX11-SDAG-NEXT: buffer_load_u16 v0, off, s[0:3], 0 glc dlc 76; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 77; GFX11-SDAG-NEXT: buffer_load_u16 v1, off, s[4:7], 0 glc dlc 78; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 79; GFX11-SDAG-NEXT: v_mov_b16_e32 v0.h, v1.l 80; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 81; GFX11-SDAG-NEXT: v_add_f16_e32 v0.l, v0.l, v0.h 82; GFX11-SDAG-NEXT: buffer_store_b16 v0, off, s[8:11], 0 83; GFX11-SDAG-NEXT: s_endpgm 84; 85; GFX11-GISEL-LABEL: fadd_f16: 86; GFX11-GISEL: ; %bb.0: ; %entry 87; GFX11-GISEL-NEXT: s_clause 0x1 88; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 89; GFX11-GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 90; GFX11-GISEL-NEXT: s_mov_b32 s10, -1 91; GFX11-GISEL-NEXT: s_mov_b32 s11, 0x31016000 92; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 93; GFX11-GISEL-NEXT: s_mov_b64 s[6:7], s[10:11] 94; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 95; GFX11-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3] 96; GFX11-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11] 97; GFX11-GISEL-NEXT: buffer_load_u16 v0, off, s[8:11], 0 glc dlc 98; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 99; GFX11-GISEL-NEXT: buffer_load_u16 v1, off, s[4:7], 0 glc dlc 100; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 101; GFX11-GISEL-NEXT: v_add_f16_e32 v0.l, v0.l, v1.l 102; GFX11-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0 103; GFX11-GISEL-NEXT: s_endpgm 104; 105; GFX11-FAKE16-SDAG-LABEL: fadd_f16: 106; GFX11-FAKE16-SDAG: ; %bb.0: ; %entry 107; GFX11-FAKE16-SDAG-NEXT: s_clause 0x1 108; GFX11-FAKE16-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 109; GFX11-FAKE16-SDAG-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 110; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s11, 0x31016000 111; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s10, -1 112; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s7, s11 113; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s6, s10 114; GFX11-FAKE16-SDAG-NEXT: s_waitcnt lgkmcnt(0) 115; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s8, s0 116; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s9, s1 117; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s0, s2 118; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s1, s3 119; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s2, s10 120; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s3, s11 121; GFX11-FAKE16-SDAG-NEXT: buffer_load_u16 v0, off, s[0:3], 0 glc dlc 122; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) 123; GFX11-FAKE16-SDAG-NEXT: buffer_load_u16 v1, off, s[4:7], 0 glc dlc 124; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) 125; GFX11-FAKE16-SDAG-NEXT: v_add_f16_e32 v0, v0, v1 126; GFX11-FAKE16-SDAG-NEXT: buffer_store_b16 v0, off, s[8:11], 0 127; GFX11-FAKE16-SDAG-NEXT: s_endpgm 128; 129; GFX11-FAKE16-GISEL-LABEL: fadd_f16: 130; GFX11-FAKE16-GISEL: ; %bb.0: ; %entry 131; GFX11-FAKE16-GISEL-NEXT: s_clause 0x1 132; GFX11-FAKE16-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 133; GFX11-FAKE16-GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 134; GFX11-FAKE16-GISEL-NEXT: s_mov_b32 s10, -1 135; GFX11-FAKE16-GISEL-NEXT: s_mov_b32 s11, 0x31016000 136; GFX11-FAKE16-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 137; GFX11-FAKE16-GISEL-NEXT: s_mov_b64 s[6:7], s[10:11] 138; GFX11-FAKE16-GISEL-NEXT: s_waitcnt lgkmcnt(0) 139; GFX11-FAKE16-GISEL-NEXT: s_mov_b64 s[8:9], s[2:3] 140; GFX11-FAKE16-GISEL-NEXT: s_mov_b64 s[2:3], s[10:11] 141; GFX11-FAKE16-GISEL-NEXT: buffer_load_u16 v0, off, s[8:11], 0 glc dlc 142; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) 143; GFX11-FAKE16-GISEL-NEXT: buffer_load_u16 v1, off, s[4:7], 0 glc dlc 144; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) 145; GFX11-FAKE16-GISEL-NEXT: v_add_f16_e32 v0, v0, v1 146; GFX11-FAKE16-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0 147; GFX11-FAKE16-GISEL-NEXT: s_endpgm 148; GFX11-LABEL: fadd_f16: 149; GFX11: ; %bb.0: ; %entry 150; GFX11-NEXT: s_clause 0x1 151; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x24 152; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x34 153; GFX11-NEXT: s_mov_b32 s11, 0x31016000 154; GFX11-NEXT: s_mov_b32 s10, -1 155; GFX11-NEXT: s_mov_b32 s3, s11 156; GFX11-NEXT: s_mov_b32 s2, s10 157; GFX11-NEXT: s_waitcnt lgkmcnt(0) 158; GFX11-NEXT: s_mov_b32 s8, s4 159; GFX11-NEXT: s_mov_b32 s9, s5 160; GFX11-NEXT: s_mov_b32 s4, s6 161; GFX11-NEXT: s_mov_b32 s5, s7 162; GFX11-NEXT: s_mov_b32 s6, s10 163; GFX11-NEXT: s_mov_b32 s7, s11 164; GFX11-NEXT: buffer_load_u16 v0, off, s[4:7], 0 glc dlc 165; GFX11-NEXT: s_waitcnt vmcnt(0) 166; GFX11-NEXT: buffer_load_u16 v1, off, s[0:3], 0 glc dlc 167; GFX11-NEXT: s_waitcnt vmcnt(0) 168; GFX11-NEXT: v_mov_b16_e32 v0.h, v1.l 169; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 170; GFX11-NEXT: v_add_f16_e32 v0.l, v0.l, v0.h 171; GFX11-NEXT: buffer_store_b16 v0, off, s[8:11], 0 172; GFX11-NEXT: s_nop 0 173; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 174; GFX11-NEXT: s_endpgm 175 ptr addrspace(1) %r, 176 ptr addrspace(1) %a, 177 ptr addrspace(1) %b) { 178entry: 179 %a.val = load volatile half, ptr addrspace(1) %a 180 %b.val = load volatile half, ptr addrspace(1) %b 181 %r.val = fadd half %a.val, %b.val 182 store half %r.val, ptr addrspace(1) %r 183 ret void 184} 185 186define amdgpu_kernel void @fadd_f16_imm_a( 187; SI-LABEL: fadd_f16_imm_a: 188; SI: ; %bb.0: ; %entry 189; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 190; SI-NEXT: s_mov_b32 s7, 0xf000 191; SI-NEXT: s_mov_b32 s6, -1 192; SI-NEXT: s_waitcnt lgkmcnt(0) 193; SI-NEXT: s_mov_b32 s4, s0 194; SI-NEXT: s_mov_b32 s5, s1 195; SI-NEXT: s_mov_b32 s0, s2 196; SI-NEXT: s_mov_b32 s1, s3 197; SI-NEXT: s_mov_b32 s2, s6 198; SI-NEXT: s_mov_b32 s3, s7 199; SI-NEXT: buffer_load_ushort v0, off, s[0:3], 0 200; SI-NEXT: s_waitcnt vmcnt(0) 201; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 202; SI-NEXT: v_add_f32_e32 v0, 1.0, v0 203; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 204; SI-NEXT: buffer_store_short v0, off, s[4:7], 0 205; SI-NEXT: s_endpgm 206; 207; VI-LABEL: fadd_f16_imm_a: 208; VI: ; %bb.0: ; %entry 209; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 210; VI-NEXT: s_mov_b32 s7, 0xf000 211; VI-NEXT: s_mov_b32 s6, -1 212; VI-NEXT: s_waitcnt lgkmcnt(0) 213; VI-NEXT: s_mov_b32 s4, s0 214; VI-NEXT: s_mov_b32 s5, s1 215; VI-NEXT: s_mov_b32 s0, s2 216; VI-NEXT: s_mov_b32 s1, s3 217; VI-NEXT: s_mov_b32 s2, s6 218; VI-NEXT: s_mov_b32 s3, s7 219; VI-NEXT: buffer_load_ushort v0, off, s[0:3], 0 220; VI-NEXT: s_waitcnt vmcnt(0) 221; VI-NEXT: v_add_f16_e32 v0, 1.0, v0 222; VI-NEXT: buffer_store_short v0, off, s[4:7], 0 223; VI-NEXT: s_endpgm 224; 225; GFX11-SDAG-LABEL: fadd_f16_imm_a: 226; GFX11-SDAG: ; %bb.0: ; %entry 227; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 228; GFX11-SDAG-NEXT: s_mov_b32 s7, 0x31016000 229; GFX11-SDAG-NEXT: s_mov_b32 s6, -1 230; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 231; GFX11-SDAG-NEXT: s_mov_b32 s4, s0 232; GFX11-SDAG-NEXT: s_mov_b32 s5, s1 233; GFX11-SDAG-NEXT: s_mov_b32 s0, s2 234; GFX11-SDAG-NEXT: s_mov_b32 s1, s3 235; GFX11-SDAG-NEXT: s_mov_b32 s2, s6 236; GFX11-SDAG-NEXT: s_mov_b32 s3, s7 237; GFX11-SDAG-NEXT: buffer_load_u16 v0, off, s[0:3], 0 238; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 239; GFX11-SDAG-NEXT: v_add_f16_e32 v0.l, 1.0, v0.l 240; GFX11-SDAG-NEXT: buffer_store_b16 v0, off, s[4:7], 0 241; GFX11-SDAG-NEXT: s_endpgm 242; 243; GFX11-GISEL-LABEL: fadd_f16_imm_a: 244; GFX11-GISEL: ; %bb.0: ; %entry 245; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 246; GFX11-GISEL-NEXT: s_mov_b32 s6, -1 247; GFX11-GISEL-NEXT: s_mov_b32 s7, 0x31016000 248; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 249; GFX11-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3] 250; GFX11-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7] 251; GFX11-GISEL-NEXT: buffer_load_u16 v0, off, s[4:7], 0 252; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 253; GFX11-GISEL-NEXT: v_add_f16_e32 v0.l, 1.0, v0.l 254; GFX11-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0 255; GFX11-GISEL-NEXT: s_endpgm 256; 257; GFX11-FAKE16-SDAG-LABEL: fadd_f16_imm_a: 258; GFX11-FAKE16-SDAG: ; %bb.0: ; %entry 259; GFX11-FAKE16-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 260; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s7, 0x31016000 261; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s6, -1 262; GFX11-FAKE16-SDAG-NEXT: s_waitcnt lgkmcnt(0) 263; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s4, s0 264; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s5, s1 265; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s0, s2 266; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s1, s3 267; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s2, s6 268; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s3, s7 269; GFX11-FAKE16-SDAG-NEXT: buffer_load_u16 v0, off, s[0:3], 0 270; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) 271; GFX11-FAKE16-SDAG-NEXT: v_add_f16_e32 v0, 1.0, v0 272; GFX11-FAKE16-SDAG-NEXT: buffer_store_b16 v0, off, s[4:7], 0 273; GFX11-FAKE16-SDAG-NEXT: s_endpgm 274; 275; GFX11-FAKE16-GISEL-LABEL: fadd_f16_imm_a: 276; GFX11-FAKE16-GISEL: ; %bb.0: ; %entry 277; GFX11-FAKE16-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 278; GFX11-FAKE16-GISEL-NEXT: s_mov_b32 s6, -1 279; GFX11-FAKE16-GISEL-NEXT: s_mov_b32 s7, 0x31016000 280; GFX11-FAKE16-GISEL-NEXT: s_waitcnt lgkmcnt(0) 281; GFX11-FAKE16-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3] 282; GFX11-FAKE16-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7] 283; GFX11-FAKE16-GISEL-NEXT: buffer_load_u16 v0, off, s[4:7], 0 284; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) 285; GFX11-FAKE16-GISEL-NEXT: v_add_f16_e32 v0, 1.0, v0 286; GFX11-FAKE16-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0 287; GFX11-FAKE16-GISEL-NEXT: s_endpgm 288; GFX11-LABEL: fadd_f16_imm_a: 289; GFX11: ; %bb.0: ; %entry 290; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 291; GFX11-NEXT: s_mov_b32 s7, 0x31016000 292; GFX11-NEXT: s_mov_b32 s6, -1 293; GFX11-NEXT: s_waitcnt lgkmcnt(0) 294; GFX11-NEXT: s_mov_b32 s4, s0 295; GFX11-NEXT: s_mov_b32 s5, s1 296; GFX11-NEXT: s_mov_b32 s0, s2 297; GFX11-NEXT: s_mov_b32 s1, s3 298; GFX11-NEXT: s_mov_b32 s2, s6 299; GFX11-NEXT: s_mov_b32 s3, s7 300; GFX11-NEXT: buffer_load_u16 v0, off, s[0:3], 0 301; GFX11-NEXT: s_waitcnt vmcnt(0) 302; GFX11-NEXT: v_mov_b16_e32 v0.h, 0x3c00 303; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 304; GFX11-NEXT: v_add_f16_e32 v0.l, v0.l, v0.h 305; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0 306; GFX11-NEXT: s_nop 0 307; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 308; GFX11-NEXT: s_endpgm 309 ptr addrspace(1) %r, 310 ptr addrspace(1) %b) { 311entry: 312 %b.val = load half, ptr addrspace(1) %b 313 %r.val = fadd half 1.0, %b.val 314 store half %r.val, ptr addrspace(1) %r 315 ret void 316} 317 318define amdgpu_kernel void @fadd_f16_imm_b( 319; SI-LABEL: fadd_f16_imm_b: 320; SI: ; %bb.0: ; %entry 321; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 322; SI-NEXT: s_mov_b32 s7, 0xf000 323; SI-NEXT: s_mov_b32 s6, -1 324; SI-NEXT: s_waitcnt lgkmcnt(0) 325; SI-NEXT: s_mov_b32 s4, s0 326; SI-NEXT: s_mov_b32 s5, s1 327; SI-NEXT: s_mov_b32 s0, s2 328; SI-NEXT: s_mov_b32 s1, s3 329; SI-NEXT: s_mov_b32 s2, s6 330; SI-NEXT: s_mov_b32 s3, s7 331; SI-NEXT: buffer_load_ushort v0, off, s[0:3], 0 332; SI-NEXT: s_waitcnt vmcnt(0) 333; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 334; SI-NEXT: v_add_f32_e32 v0, 2.0, v0 335; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 336; SI-NEXT: buffer_store_short v0, off, s[4:7], 0 337; SI-NEXT: s_endpgm 338; 339; VI-LABEL: fadd_f16_imm_b: 340; VI: ; %bb.0: ; %entry 341; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 342; VI-NEXT: s_mov_b32 s7, 0xf000 343; VI-NEXT: s_mov_b32 s6, -1 344; VI-NEXT: s_waitcnt lgkmcnt(0) 345; VI-NEXT: s_mov_b32 s4, s0 346; VI-NEXT: s_mov_b32 s5, s1 347; VI-NEXT: s_mov_b32 s0, s2 348; VI-NEXT: s_mov_b32 s1, s3 349; VI-NEXT: s_mov_b32 s2, s6 350; VI-NEXT: s_mov_b32 s3, s7 351; VI-NEXT: buffer_load_ushort v0, off, s[0:3], 0 352; VI-NEXT: s_waitcnt vmcnt(0) 353; VI-NEXT: v_add_f16_e32 v0, 2.0, v0 354; VI-NEXT: buffer_store_short v0, off, s[4:7], 0 355; VI-NEXT: s_endpgm 356; 357; GFX11-SDAG-LABEL: fadd_f16_imm_b: 358; GFX11-SDAG: ; %bb.0: ; %entry 359; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 360; GFX11-SDAG-NEXT: s_mov_b32 s7, 0x31016000 361; GFX11-SDAG-NEXT: s_mov_b32 s6, -1 362; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 363; GFX11-SDAG-NEXT: s_mov_b32 s4, s0 364; GFX11-SDAG-NEXT: s_mov_b32 s5, s1 365; GFX11-SDAG-NEXT: s_mov_b32 s0, s2 366; GFX11-SDAG-NEXT: s_mov_b32 s1, s3 367; GFX11-SDAG-NEXT: s_mov_b32 s2, s6 368; GFX11-SDAG-NEXT: s_mov_b32 s3, s7 369; GFX11-SDAG-NEXT: buffer_load_u16 v0, off, s[0:3], 0 370; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 371; GFX11-SDAG-NEXT: v_add_f16_e32 v0.l, 2.0, v0.l 372; GFX11-SDAG-NEXT: buffer_store_b16 v0, off, s[4:7], 0 373; GFX11-SDAG-NEXT: s_endpgm 374; 375; GFX11-GISEL-LABEL: fadd_f16_imm_b: 376; GFX11-GISEL: ; %bb.0: ; %entry 377; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 378; GFX11-GISEL-NEXT: s_mov_b32 s6, -1 379; GFX11-GISEL-NEXT: s_mov_b32 s7, 0x31016000 380; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 381; GFX11-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3] 382; GFX11-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7] 383; GFX11-GISEL-NEXT: buffer_load_u16 v0, off, s[4:7], 0 384; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 385; GFX11-GISEL-NEXT: v_add_f16_e32 v0.l, 2.0, v0.l 386; GFX11-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0 387; GFX11-GISEL-NEXT: s_endpgm 388; 389; GFX11-FAKE16-SDAG-LABEL: fadd_f16_imm_b: 390; GFX11-FAKE16-SDAG: ; %bb.0: ; %entry 391; GFX11-FAKE16-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 392; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s7, 0x31016000 393; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s6, -1 394; GFX11-FAKE16-SDAG-NEXT: s_waitcnt lgkmcnt(0) 395; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s4, s0 396; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s5, s1 397; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s0, s2 398; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s1, s3 399; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s2, s6 400; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s3, s7 401; GFX11-FAKE16-SDAG-NEXT: buffer_load_u16 v0, off, s[0:3], 0 402; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) 403; GFX11-FAKE16-SDAG-NEXT: v_add_f16_e32 v0, 2.0, v0 404; GFX11-FAKE16-SDAG-NEXT: buffer_store_b16 v0, off, s[4:7], 0 405; GFX11-FAKE16-SDAG-NEXT: s_endpgm 406; 407; GFX11-FAKE16-GISEL-LABEL: fadd_f16_imm_b: 408; GFX11-FAKE16-GISEL: ; %bb.0: ; %entry 409; GFX11-FAKE16-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 410; GFX11-FAKE16-GISEL-NEXT: s_mov_b32 s6, -1 411; GFX11-FAKE16-GISEL-NEXT: s_mov_b32 s7, 0x31016000 412; GFX11-FAKE16-GISEL-NEXT: s_waitcnt lgkmcnt(0) 413; GFX11-FAKE16-GISEL-NEXT: s_mov_b64 s[4:5], s[2:3] 414; GFX11-FAKE16-GISEL-NEXT: s_mov_b64 s[2:3], s[6:7] 415; GFX11-FAKE16-GISEL-NEXT: buffer_load_u16 v0, off, s[4:7], 0 416; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) 417; GFX11-FAKE16-GISEL-NEXT: v_add_f16_e32 v0, 2.0, v0 418; GFX11-FAKE16-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0 419; GFX11-FAKE16-GISEL-NEXT: s_endpgm 420; GFX11-LABEL: fadd_f16_imm_b: 421; GFX11: ; %bb.0: ; %entry 422; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 423; GFX11-NEXT: s_mov_b32 s7, 0x31016000 424; GFX11-NEXT: s_mov_b32 s6, -1 425; GFX11-NEXT: s_waitcnt lgkmcnt(0) 426; GFX11-NEXT: s_mov_b32 s4, s0 427; GFX11-NEXT: s_mov_b32 s5, s1 428; GFX11-NEXT: s_mov_b32 s0, s2 429; GFX11-NEXT: s_mov_b32 s1, s3 430; GFX11-NEXT: s_mov_b32 s2, s6 431; GFX11-NEXT: s_mov_b32 s3, s7 432; GFX11-NEXT: buffer_load_u16 v0, off, s[0:3], 0 433; GFX11-NEXT: s_waitcnt vmcnt(0) 434; GFX11-NEXT: v_mov_b16_e32 v0.h, 0x4000 435; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 436; GFX11-NEXT: v_add_f16_e32 v0.l, v0.l, v0.h 437; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0 438; GFX11-NEXT: s_nop 0 439; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 440; GFX11-NEXT: s_endpgm 441 ptr addrspace(1) %r, 442 ptr addrspace(1) %a) { 443entry: 444 %a.val = load half, ptr addrspace(1) %a 445 %r.val = fadd half %a.val, 2.0 446 store half %r.val, ptr addrspace(1) %r 447 ret void 448} 449 450define amdgpu_kernel void @fadd_v2f16( 451; SI-LABEL: fadd_v2f16: 452; SI: ; %bb.0: ; %entry 453; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 454; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd 455; SI-NEXT: s_mov_b32 s11, 0xf000 456; SI-NEXT: s_mov_b32 s14, 0 457; SI-NEXT: s_mov_b32 s15, s11 458; SI-NEXT: s_waitcnt lgkmcnt(0) 459; SI-NEXT: s_mov_b64 s[12:13], s[2:3] 460; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 461; SI-NEXT: v_mov_b32_e32 v1, 0 462; SI-NEXT: s_mov_b64 s[6:7], s[14:15] 463; SI-NEXT: buffer_load_dword v2, v[0:1], s[12:15], 0 addr64 464; SI-NEXT: buffer_load_dword v0, v[0:1], s[4:7], 0 addr64 465; SI-NEXT: s_mov_b32 s10, -1 466; SI-NEXT: s_mov_b32 s8, s0 467; SI-NEXT: s_mov_b32 s9, s1 468; SI-NEXT: s_waitcnt vmcnt(1) 469; SI-NEXT: v_cvt_f32_f16_e32 v3, v2 470; SI-NEXT: v_lshrrev_b32_e32 v2, 16, v2 471; SI-NEXT: s_waitcnt vmcnt(0) 472; SI-NEXT: v_cvt_f32_f16_e32 v1, v0 473; SI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 474; SI-NEXT: v_cvt_f32_f16_e32 v2, v2 475; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 476; SI-NEXT: v_add_f32_e32 v1, v3, v1 477; SI-NEXT: v_cvt_f16_f32_e32 v1, v1 478; SI-NEXT: v_add_f32_e32 v0, v2, v0 479; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 480; SI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 481; SI-NEXT: v_or_b32_e32 v0, v1, v0 482; SI-NEXT: buffer_store_dword v0, off, s[8:11], 0 483; SI-NEXT: s_endpgm 484; 485; VI-LABEL: fadd_v2f16: 486; VI: ; %bb.0: ; %entry 487; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 488; VI-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x34 489; VI-NEXT: v_lshlrev_b32_e32 v2, 2, v0 490; VI-NEXT: s_mov_b32 s7, 0xf000 491; VI-NEXT: s_mov_b32 s6, -1 492; VI-NEXT: s_waitcnt lgkmcnt(0) 493; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v2 494; VI-NEXT: v_mov_b32_e32 v1, s3 495; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 496; VI-NEXT: v_add_u32_e32 v2, vcc, s8, v2 497; VI-NEXT: v_mov_b32_e32 v3, s9 498; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc 499; VI-NEXT: flat_load_dword v0, v[0:1] 500; VI-NEXT: flat_load_dword v1, v[2:3] 501; VI-NEXT: s_mov_b32 s4, s0 502; VI-NEXT: s_mov_b32 s5, s1 503; VI-NEXT: s_waitcnt vmcnt(0) 504; VI-NEXT: v_add_f16_sdwa v2, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 505; VI-NEXT: v_add_f16_e32 v0, v0, v1 506; VI-NEXT: v_or_b32_e32 v0, v0, v2 507; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 508; VI-NEXT: s_endpgm 509; 510; GFX11-SDAG-LABEL: fadd_v2f16: 511; GFX11-SDAG: ; %bb.0: ; %entry 512; GFX11-SDAG-NEXT: s_clause 0x1 513; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 514; GFX11-SDAG-NEXT: s_load_b64 s[8:9], s[4:5], 0x34 515; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 516; GFX11-SDAG-NEXT: s_mov_b32 s7, 0x31016000 517; GFX11-SDAG-NEXT: s_mov_b32 s6, -1 518; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 519; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 520; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 521; GFX11-SDAG-NEXT: s_clause 0x1 522; GFX11-SDAG-NEXT: global_load_b32 v1, v0, s[2:3] 523; GFX11-SDAG-NEXT: global_load_b32 v0, v0, s[8:9] 524; GFX11-SDAG-NEXT: s_mov_b32 s4, s0 525; GFX11-SDAG-NEXT: s_mov_b32 s5, s1 526; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 527; GFX11-SDAG-NEXT: v_pk_add_f16 v0, v1, v0 528; GFX11-SDAG-NEXT: buffer_store_b32 v0, off, s[4:7], 0 529; GFX11-SDAG-NEXT: s_endpgm 530; 531; GFX11-GISEL-LABEL: fadd_v2f16: 532; GFX11-GISEL: ; %bb.0: ; %entry 533; GFX11-GISEL-NEXT: s_clause 0x1 534; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 535; GFX11-GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 536; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 537; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 538; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 539; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 540; GFX11-GISEL-NEXT: s_clause 0x1 541; GFX11-GISEL-NEXT: global_load_b32 v1, v0, s[2:3] 542; GFX11-GISEL-NEXT: global_load_b32 v0, v0, s[4:5] 543; GFX11-GISEL-NEXT: s_mov_b32 s2, -1 544; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000 545; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 546; GFX11-GISEL-NEXT: v_pk_add_f16 v0, v1, v0 547; GFX11-GISEL-NEXT: buffer_store_b32 v0, off, s[0:3], 0 548; GFX11-GISEL-NEXT: s_endpgm 549; 550; GFX11-FAKE16-SDAG-LABEL: fadd_v2f16: 551; GFX11-FAKE16-SDAG: ; %bb.0: ; %entry 552; GFX11-FAKE16-SDAG-NEXT: s_clause 0x1 553; GFX11-FAKE16-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 554; GFX11-FAKE16-SDAG-NEXT: s_load_b64 s[8:9], s[4:5], 0x34 555; GFX11-FAKE16-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 556; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s7, 0x31016000 557; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s6, -1 558; GFX11-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 559; GFX11-FAKE16-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 560; GFX11-FAKE16-SDAG-NEXT: s_waitcnt lgkmcnt(0) 561; GFX11-FAKE16-SDAG-NEXT: s_clause 0x1 562; GFX11-FAKE16-SDAG-NEXT: global_load_b32 v1, v0, s[2:3] 563; GFX11-FAKE16-SDAG-NEXT: global_load_b32 v0, v0, s[8:9] 564; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s4, s0 565; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s5, s1 566; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) 567; GFX11-FAKE16-SDAG-NEXT: v_pk_add_f16 v0, v1, v0 568; GFX11-FAKE16-SDAG-NEXT: buffer_store_b32 v0, off, s[4:7], 0 569; GFX11-FAKE16-SDAG-NEXT: s_endpgm 570; 571; GFX11-FAKE16-GISEL-LABEL: fadd_v2f16: 572; GFX11-FAKE16-GISEL: ; %bb.0: ; %entry 573; GFX11-FAKE16-GISEL-NEXT: s_clause 0x1 574; GFX11-FAKE16-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 575; GFX11-FAKE16-GISEL-NEXT: s_load_b64 s[4:5], s[4:5], 0x34 576; GFX11-FAKE16-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 577; GFX11-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 578; GFX11-FAKE16-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 579; GFX11-FAKE16-GISEL-NEXT: s_waitcnt lgkmcnt(0) 580; GFX11-FAKE16-GISEL-NEXT: s_clause 0x1 581; GFX11-FAKE16-GISEL-NEXT: global_load_b32 v1, v0, s[2:3] 582; GFX11-FAKE16-GISEL-NEXT: global_load_b32 v0, v0, s[4:5] 583; GFX11-FAKE16-GISEL-NEXT: s_mov_b32 s2, -1 584; GFX11-FAKE16-GISEL-NEXT: s_mov_b32 s3, 0x31016000 585; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) 586; GFX11-FAKE16-GISEL-NEXT: v_pk_add_f16 v0, v1, v0 587; GFX11-FAKE16-GISEL-NEXT: buffer_store_b32 v0, off, s[0:3], 0 588; GFX11-FAKE16-GISEL-NEXT: s_endpgm 589; GFX11-LABEL: fadd_v2f16: 590; GFX11: ; %bb.0: ; %entry 591; GFX11-NEXT: s_clause 0x1 592; GFX11-NEXT: s_load_b128 s[4:7], s[0:1], 0x24 593; GFX11-NEXT: s_load_b64 s[8:9], s[0:1], 0x34 594; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 595; GFX11-NEXT: s_mov_b32 s3, 0x31016000 596; GFX11-NEXT: s_mov_b32 s2, -1 597; GFX11-NEXT: s_waitcnt lgkmcnt(0) 598; GFX11-NEXT: s_clause 0x1 599; GFX11-NEXT: global_load_b32 v1, v0, s[6:7] 600; GFX11-NEXT: global_load_b32 v0, v0, s[8:9] 601; GFX11-NEXT: s_mov_b32 s0, s4 602; GFX11-NEXT: s_mov_b32 s1, s5 603; GFX11-NEXT: s_waitcnt vmcnt(0) 604; GFX11-NEXT: v_pk_add_f16 v0, v1, v0 605; GFX11-NEXT: buffer_store_b32 v0, off, s[0:3], 0 606; GFX11-NEXT: s_nop 0 607; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 608; GFX11-NEXT: s_endpgm 609 ptr addrspace(1) %r, 610 ptr addrspace(1) %a, 611 ptr addrspace(1) %b) { 612entry: 613 %tid = call i32 @llvm.amdgcn.workitem.id.x() 614 %gep.a = getelementptr inbounds <2 x half>, ptr addrspace(1) %a, i32 %tid 615 %gep.b = getelementptr inbounds <2 x half>, ptr addrspace(1) %b, i32 %tid 616 %a.val = load <2 x half>, ptr addrspace(1) %gep.a 617 %b.val = load <2 x half>, ptr addrspace(1) %gep.b 618 %r.val = fadd <2 x half> %a.val, %b.val 619 store <2 x half> %r.val, ptr addrspace(1) %r 620 ret void 621} 622 623define amdgpu_kernel void @fadd_v2f16_imm_a( 624; SI-LABEL: fadd_v2f16_imm_a: 625; SI: ; %bb.0: ; %entry 626; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 627; SI-NEXT: s_mov_b32 s7, 0xf000 628; SI-NEXT: s_mov_b32 s10, 0 629; SI-NEXT: s_mov_b32 s11, s7 630; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 631; SI-NEXT: s_waitcnt lgkmcnt(0) 632; SI-NEXT: s_mov_b64 s[8:9], s[2:3] 633; SI-NEXT: v_mov_b32_e32 v1, 0 634; SI-NEXT: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 635; SI-NEXT: s_mov_b32 s6, -1 636; SI-NEXT: s_mov_b32 s4, s0 637; SI-NEXT: s_mov_b32 s5, s1 638; SI-NEXT: s_waitcnt vmcnt(0) 639; SI-NEXT: v_cvt_f32_f16_e32 v1, v0 640; SI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 641; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 642; SI-NEXT: v_add_f32_e32 v1, 1.0, v1 643; SI-NEXT: v_cvt_f16_f32_e32 v1, v1 644; SI-NEXT: v_add_f32_e32 v0, 2.0, v0 645; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 646; SI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 647; SI-NEXT: v_or_b32_e32 v0, v1, v0 648; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 649; SI-NEXT: s_endpgm 650; 651; VI-LABEL: fadd_v2f16_imm_a: 652; VI: ; %bb.0: ; %entry 653; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 654; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 655; VI-NEXT: s_mov_b32 s7, 0xf000 656; VI-NEXT: s_mov_b32 s6, -1 657; VI-NEXT: s_waitcnt lgkmcnt(0) 658; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0 659; VI-NEXT: v_mov_b32_e32 v1, s3 660; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 661; VI-NEXT: flat_load_dword v0, v[0:1] 662; VI-NEXT: v_mov_b32_e32 v1, 0x4000 663; VI-NEXT: s_mov_b32 s4, s0 664; VI-NEXT: s_mov_b32 s5, s1 665; VI-NEXT: s_waitcnt vmcnt(0) 666; VI-NEXT: v_add_f16_sdwa v1, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 667; VI-NEXT: v_add_f16_e32 v0, 1.0, v0 668; VI-NEXT: v_or_b32_e32 v0, v0, v1 669; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 670; VI-NEXT: s_endpgm 671; 672; GFX11-SDAG-LABEL: fadd_v2f16_imm_a: 673; GFX11-SDAG: ; %bb.0: ; %entry 674; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 675; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 676; GFX11-SDAG-NEXT: s_mov_b32 s7, 0x31016000 677; GFX11-SDAG-NEXT: s_mov_b32 s6, -1 678; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 679; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 680; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 681; GFX11-SDAG-NEXT: global_load_b32 v0, v0, s[2:3] 682; GFX11-SDAG-NEXT: s_mov_b32 s4, s0 683; GFX11-SDAG-NEXT: s_mov_b32 s5, s1 684; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 685; GFX11-SDAG-NEXT: v_pk_add_f16 v0, 0x40003c00, v0 686; GFX11-SDAG-NEXT: buffer_store_b32 v0, off, s[4:7], 0 687; GFX11-SDAG-NEXT: s_endpgm 688; 689; GFX11-GISEL-LABEL: fadd_v2f16_imm_a: 690; GFX11-GISEL: ; %bb.0: ; %entry 691; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 692; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 693; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 694; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 695; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 696; GFX11-GISEL-NEXT: global_load_b32 v0, v0, s[2:3] 697; GFX11-GISEL-NEXT: s_mov_b32 s2, -1 698; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000 699; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 700; GFX11-GISEL-NEXT: v_pk_add_f16 v0, 0x40003c00, v0 701; GFX11-GISEL-NEXT: buffer_store_b32 v0, off, s[0:3], 0 702; GFX11-GISEL-NEXT: s_endpgm 703; 704; GFX11-FAKE16-SDAG-LABEL: fadd_v2f16_imm_a: 705; GFX11-FAKE16-SDAG: ; %bb.0: ; %entry 706; GFX11-FAKE16-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 707; GFX11-FAKE16-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 708; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s7, 0x31016000 709; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s6, -1 710; GFX11-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 711; GFX11-FAKE16-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 712; GFX11-FAKE16-SDAG-NEXT: s_waitcnt lgkmcnt(0) 713; GFX11-FAKE16-SDAG-NEXT: global_load_b32 v0, v0, s[2:3] 714; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s4, s0 715; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s5, s1 716; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) 717; GFX11-FAKE16-SDAG-NEXT: v_pk_add_f16 v0, 0x40003c00, v0 718; GFX11-FAKE16-SDAG-NEXT: buffer_store_b32 v0, off, s[4:7], 0 719; GFX11-FAKE16-SDAG-NEXT: s_endpgm 720; 721; GFX11-FAKE16-GISEL-LABEL: fadd_v2f16_imm_a: 722; GFX11-FAKE16-GISEL: ; %bb.0: ; %entry 723; GFX11-FAKE16-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 724; GFX11-FAKE16-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 725; GFX11-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 726; GFX11-FAKE16-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 727; GFX11-FAKE16-GISEL-NEXT: s_waitcnt lgkmcnt(0) 728; GFX11-FAKE16-GISEL-NEXT: global_load_b32 v0, v0, s[2:3] 729; GFX11-FAKE16-GISEL-NEXT: s_mov_b32 s2, -1 730; GFX11-FAKE16-GISEL-NEXT: s_mov_b32 s3, 0x31016000 731; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) 732; GFX11-FAKE16-GISEL-NEXT: v_pk_add_f16 v0, 0x40003c00, v0 733; GFX11-FAKE16-GISEL-NEXT: buffer_store_b32 v0, off, s[0:3], 0 734; GFX11-FAKE16-GISEL-NEXT: s_endpgm 735; GFX11-LABEL: fadd_v2f16_imm_a: 736; GFX11: ; %bb.0: ; %entry 737; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 738; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 739; GFX11-NEXT: s_mov_b32 s7, 0x31016000 740; GFX11-NEXT: s_mov_b32 s6, -1 741; GFX11-NEXT: s_waitcnt lgkmcnt(0) 742; GFX11-NEXT: global_load_b32 v0, v0, s[2:3] 743; GFX11-NEXT: s_mov_b32 s4, s0 744; GFX11-NEXT: s_mov_b32 s5, s1 745; GFX11-NEXT: s_waitcnt vmcnt(0) 746; GFX11-NEXT: v_pk_add_f16 v0, 0x40003c00, v0 747; GFX11-NEXT: buffer_store_b32 v0, off, s[4:7], 0 748; GFX11-NEXT: s_nop 0 749; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 750; GFX11-NEXT: s_endpgm 751 ptr addrspace(1) %r, 752 ptr addrspace(1) %b) { 753entry: 754 %tid = call i32 @llvm.amdgcn.workitem.id.x() 755 %gep.b = getelementptr inbounds <2 x half>, ptr addrspace(1) %b, i32 %tid 756 %b.val = load <2 x half>, ptr addrspace(1) %gep.b 757 %r.val = fadd <2 x half> <half 1.0, half 2.0>, %b.val 758 store <2 x half> %r.val, ptr addrspace(1) %r 759 ret void 760} 761 762define amdgpu_kernel void @fadd_v2f16_imm_b( 763; SI-LABEL: fadd_v2f16_imm_b: 764; SI: ; %bb.0: ; %entry 765; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 766; SI-NEXT: s_mov_b32 s7, 0xf000 767; SI-NEXT: s_mov_b32 s10, 0 768; SI-NEXT: s_mov_b32 s11, s7 769; SI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 770; SI-NEXT: s_waitcnt lgkmcnt(0) 771; SI-NEXT: s_mov_b64 s[8:9], s[2:3] 772; SI-NEXT: v_mov_b32_e32 v1, 0 773; SI-NEXT: buffer_load_dword v0, v[0:1], s[8:11], 0 addr64 774; SI-NEXT: s_mov_b32 s6, -1 775; SI-NEXT: s_mov_b32 s4, s0 776; SI-NEXT: s_mov_b32 s5, s1 777; SI-NEXT: s_waitcnt vmcnt(0) 778; SI-NEXT: v_cvt_f32_f16_e32 v1, v0 779; SI-NEXT: v_lshrrev_b32_e32 v0, 16, v0 780; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 781; SI-NEXT: v_add_f32_e32 v1, 2.0, v1 782; SI-NEXT: v_cvt_f16_f32_e32 v1, v1 783; SI-NEXT: v_add_f32_e32 v0, 1.0, v0 784; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 785; SI-NEXT: v_lshlrev_b32_e32 v0, 16, v0 786; SI-NEXT: v_or_b32_e32 v0, v1, v0 787; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 788; SI-NEXT: s_endpgm 789; 790; VI-LABEL: fadd_v2f16_imm_b: 791; VI: ; %bb.0: ; %entry 792; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 793; VI-NEXT: v_lshlrev_b32_e32 v0, 2, v0 794; VI-NEXT: s_mov_b32 s7, 0xf000 795; VI-NEXT: s_mov_b32 s6, -1 796; VI-NEXT: s_waitcnt lgkmcnt(0) 797; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0 798; VI-NEXT: v_mov_b32_e32 v1, s3 799; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 800; VI-NEXT: flat_load_dword v0, v[0:1] 801; VI-NEXT: v_mov_b32_e32 v1, 0x3c00 802; VI-NEXT: s_mov_b32 s4, s0 803; VI-NEXT: s_mov_b32 s5, s1 804; VI-NEXT: s_waitcnt vmcnt(0) 805; VI-NEXT: v_add_f16_sdwa v1, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 806; VI-NEXT: v_add_f16_e32 v0, 2.0, v0 807; VI-NEXT: v_or_b32_e32 v0, v0, v1 808; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0 809; VI-NEXT: s_endpgm 810; 811; GFX11-SDAG-LABEL: fadd_v2f16_imm_b: 812; GFX11-SDAG: ; %bb.0: ; %entry 813; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 814; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 815; GFX11-SDAG-NEXT: s_mov_b32 s7, 0x31016000 816; GFX11-SDAG-NEXT: s_mov_b32 s6, -1 817; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 818; GFX11-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 819; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 820; GFX11-SDAG-NEXT: global_load_b32 v0, v0, s[2:3] 821; GFX11-SDAG-NEXT: s_mov_b32 s4, s0 822; GFX11-SDAG-NEXT: s_mov_b32 s5, s1 823; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 824; GFX11-SDAG-NEXT: v_pk_add_f16 v0, 0x3c004000, v0 825; GFX11-SDAG-NEXT: buffer_store_b32 v0, off, s[4:7], 0 826; GFX11-SDAG-NEXT: s_endpgm 827; 828; GFX11-GISEL-LABEL: fadd_v2f16_imm_b: 829; GFX11-GISEL: ; %bb.0: ; %entry 830; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 831; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 832; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 833; GFX11-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 834; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 835; GFX11-GISEL-NEXT: global_load_b32 v0, v0, s[2:3] 836; GFX11-GISEL-NEXT: s_mov_b32 s2, -1 837; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000 838; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) 839; GFX11-GISEL-NEXT: v_pk_add_f16 v0, 0x3c004000, v0 840; GFX11-GISEL-NEXT: buffer_store_b32 v0, off, s[0:3], 0 841; GFX11-GISEL-NEXT: s_endpgm 842; 843; GFX11-FAKE16-SDAG-LABEL: fadd_v2f16_imm_b: 844; GFX11-FAKE16-SDAG: ; %bb.0: ; %entry 845; GFX11-FAKE16-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 846; GFX11-FAKE16-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 847; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s7, 0x31016000 848; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s6, -1 849; GFX11-FAKE16-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 850; GFX11-FAKE16-SDAG-NEXT: v_lshlrev_b32_e32 v0, 2, v0 851; GFX11-FAKE16-SDAG-NEXT: s_waitcnt lgkmcnt(0) 852; GFX11-FAKE16-SDAG-NEXT: global_load_b32 v0, v0, s[2:3] 853; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s4, s0 854; GFX11-FAKE16-SDAG-NEXT: s_mov_b32 s5, s1 855; GFX11-FAKE16-SDAG-NEXT: s_waitcnt vmcnt(0) 856; GFX11-FAKE16-SDAG-NEXT: v_pk_add_f16 v0, 0x3c004000, v0 857; GFX11-FAKE16-SDAG-NEXT: buffer_store_b32 v0, off, s[4:7], 0 858; GFX11-FAKE16-SDAG-NEXT: s_endpgm 859; 860; GFX11-FAKE16-GISEL-LABEL: fadd_v2f16_imm_b: 861; GFX11-FAKE16-GISEL: ; %bb.0: ; %entry 862; GFX11-FAKE16-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 863; GFX11-FAKE16-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 864; GFX11-FAKE16-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 865; GFX11-FAKE16-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0 866; GFX11-FAKE16-GISEL-NEXT: s_waitcnt lgkmcnt(0) 867; GFX11-FAKE16-GISEL-NEXT: global_load_b32 v0, v0, s[2:3] 868; GFX11-FAKE16-GISEL-NEXT: s_mov_b32 s2, -1 869; GFX11-FAKE16-GISEL-NEXT: s_mov_b32 s3, 0x31016000 870; GFX11-FAKE16-GISEL-NEXT: s_waitcnt vmcnt(0) 871; GFX11-FAKE16-GISEL-NEXT: v_pk_add_f16 v0, 0x3c004000, v0 872; GFX11-FAKE16-GISEL-NEXT: buffer_store_b32 v0, off, s[0:3], 0 873; GFX11-FAKE16-GISEL-NEXT: s_endpgm 874; GFX11-LABEL: fadd_v2f16_imm_b: 875; GFX11: ; %bb.0: ; %entry 876; GFX11-NEXT: s_load_b128 s[0:3], s[0:1], 0x24 877; GFX11-NEXT: v_lshlrev_b32_e32 v0, 2, v0 878; GFX11-NEXT: s_mov_b32 s7, 0x31016000 879; GFX11-NEXT: s_mov_b32 s6, -1 880; GFX11-NEXT: s_waitcnt lgkmcnt(0) 881; GFX11-NEXT: global_load_b32 v0, v0, s[2:3] 882; GFX11-NEXT: s_mov_b32 s4, s0 883; GFX11-NEXT: s_mov_b32 s5, s1 884; GFX11-NEXT: s_waitcnt vmcnt(0) 885; GFX11-NEXT: v_pk_add_f16 v0, 0x3c004000, v0 886; GFX11-NEXT: buffer_store_b32 v0, off, s[4:7], 0 887; GFX11-NEXT: s_nop 0 888; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) 889; GFX11-NEXT: s_endpgm 890 ptr addrspace(1) %r, 891 ptr addrspace(1) %a) { 892entry: 893 %tid = call i32 @llvm.amdgcn.workitem.id.x() 894 %gep.a = getelementptr inbounds <2 x half>, ptr addrspace(1) %a, i32 %tid 895 %a.val = load <2 x half>, ptr addrspace(1) %gep.a 896 %r.val = fadd <2 x half> %a.val, <half 2.0, half 1.0> 897 store <2 x half> %r.val, ptr addrspace(1) %r 898 ret void 899} 900 901declare i32 @llvm.amdgcn.workitem.id.x() #1 902 903attributes #0 = { nounwind } 904attributes #1 = { nounwind readnone } 905