1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=-flat-for-global -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=GFX10 %s 3; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -mattr=-flat-for-global -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=GFX11 %s 4; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=tonga -mattr=-flat-for-global -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=VI %s 5; RUN: llc -mtriple=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s 6 7; FIXME: Merge into imm.ll 8 9define amdgpu_kernel void @store_inline_imm_neg_0.0_i16(ptr addrspace(1) %out) { 10; GFX10-LABEL: store_inline_imm_neg_0.0_i16: 11; GFX10: ; %bb.0: 12; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 13; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff] 14; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 15; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 16; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 17; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 18; GFX10-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0xfd,0xbb] 19; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 20; 21; GFX11-LABEL: store_inline_imm_neg_0.0_i16: 22; GFX11: ; %bb.0: 23; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 24; GFX11-NEXT: v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff] 25; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 26; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 27; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 28; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 dlc ; encoding: [0x00,0x20,0x64,0xe0,0x00,0x00,0x00,0x80] 29; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; encoding: [0x00,0x00,0x7c,0xbc] 30; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 31; 32; VI-LABEL: store_inline_imm_neg_0.0_i16: 33; VI: ; %bb.0: 34; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 35; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 36; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 37; VI-NEXT: v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff] 38; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 39; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 40; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 41; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 42; 43; SI-LABEL: store_inline_imm_neg_0.0_i16: 44; SI: ; %bb.0: 45; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 46; SI-NEXT: s_mov_b32 s3, 0xf000 47; SI-NEXT: s_mov_b32 s2, -1 48; SI-NEXT: v_mov_b32_e32 v0, 0x8000 49; SI-NEXT: s_waitcnt lgkmcnt(0) 50; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 51; SI-NEXT: s_waitcnt vmcnt(0) 52; SI-NEXT: s_endpgm 53 store volatile i16 -32768, ptr addrspace(1) %out 54 ret void 55} 56 57define amdgpu_kernel void @store_inline_imm_0.0_f16(ptr addrspace(1) %out) { 58; GFX10-LABEL: store_inline_imm_0.0_f16: 59; GFX10: ; %bb.0: 60; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 61; GFX10-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e] 62; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 63; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 64; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 65; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 66; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 67; 68; GFX11-LABEL: store_inline_imm_0.0_f16: 69; GFX11: ; %bb.0: 70; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 71; GFX11-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e] 72; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 73; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 74; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 75; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 76; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 77; 78; VI-LABEL: store_inline_imm_0.0_f16: 79; VI: ; %bb.0: 80; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 81; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 82; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 83; VI-NEXT: v_mov_b32_e32 v0, 0 ; encoding: [0x80,0x02,0x00,0x7e] 84; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 85; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 86; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 87; 88; SI-LABEL: store_inline_imm_0.0_f16: 89; SI: ; %bb.0: 90; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 91; SI-NEXT: s_mov_b32 s3, 0xf000 92; SI-NEXT: s_mov_b32 s2, -1 93; SI-NEXT: v_mov_b32_e32 v0, 0 94; SI-NEXT: s_waitcnt lgkmcnt(0) 95; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 96; SI-NEXT: s_endpgm 97 store half 0.0, ptr addrspace(1) %out 98 ret void 99} 100 101define amdgpu_kernel void @store_imm_neg_0.0_f16(ptr addrspace(1) %out) { 102; GFX10-LABEL: store_imm_neg_0.0_f16: 103; GFX10: ; %bb.0: 104; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 105; GFX10-NEXT: v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff] 106; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 107; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 108; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 109; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 110; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 111; 112; GFX11-LABEL: store_imm_neg_0.0_f16: 113; GFX11: ; %bb.0: 114; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 115; GFX11-NEXT: v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff] 116; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 117; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 118; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 119; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 120; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 121; 122; VI-LABEL: store_imm_neg_0.0_f16: 123; VI: ; %bb.0: 124; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 125; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 126; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 127; VI-NEXT: v_mov_b32_e32 v0, 0xffff8000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x80,0xff,0xff] 128; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 129; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 130; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 131; 132; SI-LABEL: store_imm_neg_0.0_f16: 133; SI: ; %bb.0: 134; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 135; SI-NEXT: s_mov_b32 s3, 0xf000 136; SI-NEXT: s_mov_b32 s2, -1 137; SI-NEXT: v_mov_b32_e32 v0, 0x8000 138; SI-NEXT: s_waitcnt lgkmcnt(0) 139; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 140; SI-NEXT: s_endpgm 141 store half -0.0, ptr addrspace(1) %out 142 ret void 143} 144 145define amdgpu_kernel void @store_inline_imm_0.5_f16(ptr addrspace(1) %out) { 146; GFX10-LABEL: store_inline_imm_0.5_f16: 147; GFX10: ; %bb.0: 148; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 149; GFX10-NEXT: v_mov_b32_e32 v0, 0x3800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x38,0x00,0x00] 150; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 151; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 152; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 153; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 154; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 155; 156; GFX11-LABEL: store_inline_imm_0.5_f16: 157; GFX11: ; %bb.0: 158; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 159; GFX11-NEXT: v_mov_b32_e32 v0, 0x3800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x38,0x00,0x00] 160; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 161; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 162; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 163; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 164; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 165; 166; VI-LABEL: store_inline_imm_0.5_f16: 167; VI: ; %bb.0: 168; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 169; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 170; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 171; VI-NEXT: v_mov_b32_e32 v0, 0x3800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x38,0x00,0x00] 172; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 173; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 174; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 175; 176; SI-LABEL: store_inline_imm_0.5_f16: 177; SI: ; %bb.0: 178; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 179; SI-NEXT: s_mov_b32 s3, 0xf000 180; SI-NEXT: s_mov_b32 s2, -1 181; SI-NEXT: v_mov_b32_e32 v0, 0x3800 182; SI-NEXT: s_waitcnt lgkmcnt(0) 183; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 184; SI-NEXT: s_endpgm 185 store half 0.5, ptr addrspace(1) %out 186 ret void 187} 188 189define amdgpu_kernel void @store_inline_imm_m_0.5_f16(ptr addrspace(1) %out) { 190; GFX10-LABEL: store_inline_imm_m_0.5_f16: 191; GFX10: ; %bb.0: 192; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 193; GFX10-NEXT: v_mov_b32_e32 v0, 0xffffb800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xb8,0xff,0xff] 194; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 195; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 196; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 197; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 198; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 199; 200; GFX11-LABEL: store_inline_imm_m_0.5_f16: 201; GFX11: ; %bb.0: 202; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 203; GFX11-NEXT: v_mov_b32_e32 v0, 0xffffb800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xb8,0xff,0xff] 204; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 205; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 206; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 207; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 208; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 209; 210; VI-LABEL: store_inline_imm_m_0.5_f16: 211; VI: ; %bb.0: 212; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 213; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 214; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 215; VI-NEXT: v_mov_b32_e32 v0, 0xffffb800 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xb8,0xff,0xff] 216; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 217; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 218; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 219; 220; SI-LABEL: store_inline_imm_m_0.5_f16: 221; SI: ; %bb.0: 222; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 223; SI-NEXT: s_mov_b32 s3, 0xf000 224; SI-NEXT: s_mov_b32 s2, -1 225; SI-NEXT: v_mov_b32_e32 v0, 0xb800 226; SI-NEXT: s_waitcnt lgkmcnt(0) 227; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 228; SI-NEXT: s_endpgm 229 store half -0.5, ptr addrspace(1) %out 230 ret void 231} 232 233define amdgpu_kernel void @store_inline_imm_1.0_f16(ptr addrspace(1) %out) { 234; GFX10-LABEL: store_inline_imm_1.0_f16: 235; GFX10: ; %bb.0: 236; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 237; GFX10-NEXT: v_mov_b32_e32 v0, 0x3c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x3c,0x00,0x00] 238; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 239; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 240; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 241; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 242; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 243; 244; GFX11-LABEL: store_inline_imm_1.0_f16: 245; GFX11: ; %bb.0: 246; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 247; GFX11-NEXT: v_mov_b32_e32 v0, 0x3c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x3c,0x00,0x00] 248; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 249; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 250; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 251; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 252; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 253; 254; VI-LABEL: store_inline_imm_1.0_f16: 255; VI: ; %bb.0: 256; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 257; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 258; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 259; VI-NEXT: v_mov_b32_e32 v0, 0x3c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x3c,0x00,0x00] 260; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 261; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 262; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 263; 264; SI-LABEL: store_inline_imm_1.0_f16: 265; SI: ; %bb.0: 266; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 267; SI-NEXT: s_mov_b32 s3, 0xf000 268; SI-NEXT: s_mov_b32 s2, -1 269; SI-NEXT: v_mov_b32_e32 v0, 0x3c00 270; SI-NEXT: s_waitcnt lgkmcnt(0) 271; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 272; SI-NEXT: s_endpgm 273 store half 1.0, ptr addrspace(1) %out 274 ret void 275} 276 277define amdgpu_kernel void @store_inline_imm_m_1.0_f16(ptr addrspace(1) %out) { 278; GFX10-LABEL: store_inline_imm_m_1.0_f16: 279; GFX10: ; %bb.0: 280; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 281; GFX10-NEXT: v_mov_b32_e32 v0, 0xffffbc00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xbc,0xff,0xff] 282; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 283; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 284; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 285; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 286; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 287; 288; GFX11-LABEL: store_inline_imm_m_1.0_f16: 289; GFX11: ; %bb.0: 290; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 291; GFX11-NEXT: v_mov_b32_e32 v0, 0xffffbc00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xbc,0xff,0xff] 292; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 293; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 294; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 295; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 296; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 297; 298; VI-LABEL: store_inline_imm_m_1.0_f16: 299; VI: ; %bb.0: 300; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 301; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 302; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 303; VI-NEXT: v_mov_b32_e32 v0, 0xffffbc00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xbc,0xff,0xff] 304; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 305; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 306; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 307; 308; SI-LABEL: store_inline_imm_m_1.0_f16: 309; SI: ; %bb.0: 310; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 311; SI-NEXT: s_mov_b32 s3, 0xf000 312; SI-NEXT: s_mov_b32 s2, -1 313; SI-NEXT: v_mov_b32_e32 v0, 0xbc00 314; SI-NEXT: s_waitcnt lgkmcnt(0) 315; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 316; SI-NEXT: s_endpgm 317 store half -1.0, ptr addrspace(1) %out 318 ret void 319} 320 321define amdgpu_kernel void @store_inline_imm_2.0_f16(ptr addrspace(1) %out) { 322; GFX10-LABEL: store_inline_imm_2.0_f16: 323; GFX10: ; %bb.0: 324; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 325; GFX10-NEXT: v_mov_b32_e32 v0, 0x4000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x40,0x00,0x00] 326; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 327; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 328; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 329; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 330; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 331; 332; GFX11-LABEL: store_inline_imm_2.0_f16: 333; GFX11: ; %bb.0: 334; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 335; GFX11-NEXT: v_mov_b32_e32 v0, 0x4000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x40,0x00,0x00] 336; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 337; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 338; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 339; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 340; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 341; 342; VI-LABEL: store_inline_imm_2.0_f16: 343; VI: ; %bb.0: 344; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 345; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 346; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 347; VI-NEXT: v_mov_b32_e32 v0, 0x4000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x40,0x00,0x00] 348; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 349; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 350; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 351; 352; SI-LABEL: store_inline_imm_2.0_f16: 353; SI: ; %bb.0: 354; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 355; SI-NEXT: s_mov_b32 s3, 0xf000 356; SI-NEXT: s_mov_b32 s2, -1 357; SI-NEXT: v_mov_b32_e32 v0, 0x4000 358; SI-NEXT: s_waitcnt lgkmcnt(0) 359; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 360; SI-NEXT: s_endpgm 361 store half 2.0, ptr addrspace(1) %out 362 ret void 363} 364 365define amdgpu_kernel void @store_inline_imm_m_2.0_f16(ptr addrspace(1) %out) { 366; GFX10-LABEL: store_inline_imm_m_2.0_f16: 367; GFX10: ; %bb.0: 368; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 369; GFX10-NEXT: v_mov_b32_e32 v0, 0xffffc000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc0,0xff,0xff] 370; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 371; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 372; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 373; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 374; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 375; 376; GFX11-LABEL: store_inline_imm_m_2.0_f16: 377; GFX11: ; %bb.0: 378; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 379; GFX11-NEXT: v_mov_b32_e32 v0, 0xffffc000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc0,0xff,0xff] 380; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 381; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 382; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 383; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 384; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 385; 386; VI-LABEL: store_inline_imm_m_2.0_f16: 387; VI: ; %bb.0: 388; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 389; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 390; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 391; VI-NEXT: v_mov_b32_e32 v0, 0xffffc000 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc0,0xff,0xff] 392; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 393; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 394; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 395; 396; SI-LABEL: store_inline_imm_m_2.0_f16: 397; SI: ; %bb.0: 398; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 399; SI-NEXT: s_mov_b32 s3, 0xf000 400; SI-NEXT: s_mov_b32 s2, -1 401; SI-NEXT: v_mov_b32_e32 v0, 0xc000 402; SI-NEXT: s_waitcnt lgkmcnt(0) 403; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 404; SI-NEXT: s_endpgm 405 store half -2.0, ptr addrspace(1) %out 406 ret void 407} 408 409define amdgpu_kernel void @store_inline_imm_4.0_f16(ptr addrspace(1) %out) { 410; GFX10-LABEL: store_inline_imm_4.0_f16: 411; GFX10: ; %bb.0: 412; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 413; GFX10-NEXT: v_mov_b32_e32 v0, 0x4400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x44,0x00,0x00] 414; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 415; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 416; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 417; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 418; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 419; 420; GFX11-LABEL: store_inline_imm_4.0_f16: 421; GFX11: ; %bb.0: 422; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 423; GFX11-NEXT: v_mov_b32_e32 v0, 0x4400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x44,0x00,0x00] 424; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 425; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 426; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 427; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 428; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 429; 430; VI-LABEL: store_inline_imm_4.0_f16: 431; VI: ; %bb.0: 432; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 433; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 434; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 435; VI-NEXT: v_mov_b32_e32 v0, 0x4400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x44,0x00,0x00] 436; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 437; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 438; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 439; 440; SI-LABEL: store_inline_imm_4.0_f16: 441; SI: ; %bb.0: 442; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 443; SI-NEXT: s_mov_b32 s3, 0xf000 444; SI-NEXT: s_mov_b32 s2, -1 445; SI-NEXT: v_mov_b32_e32 v0, 0x4400 446; SI-NEXT: s_waitcnt lgkmcnt(0) 447; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 448; SI-NEXT: s_endpgm 449 store half 4.0, ptr addrspace(1) %out 450 ret void 451} 452 453define amdgpu_kernel void @store_inline_imm_m_4.0_f16(ptr addrspace(1) %out) { 454; GFX10-LABEL: store_inline_imm_m_4.0_f16: 455; GFX10: ; %bb.0: 456; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 457; GFX10-NEXT: v_mov_b32_e32 v0, 0xffffc400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc4,0xff,0xff] 458; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 459; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 460; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 461; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 462; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 463; 464; GFX11-LABEL: store_inline_imm_m_4.0_f16: 465; GFX11: ; %bb.0: 466; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 467; GFX11-NEXT: v_mov_b32_e32 v0, 0xffffc400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc4,0xff,0xff] 468; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 469; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 470; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 471; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 472; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 473; 474; VI-LABEL: store_inline_imm_m_4.0_f16: 475; VI: ; %bb.0: 476; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 477; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 478; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 479; VI-NEXT: v_mov_b32_e32 v0, 0xffffc400 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0xc4,0xff,0xff] 480; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 481; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 482; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 483; 484; SI-LABEL: store_inline_imm_m_4.0_f16: 485; SI: ; %bb.0: 486; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 487; SI-NEXT: s_mov_b32 s3, 0xf000 488; SI-NEXT: s_mov_b32 s2, -1 489; SI-NEXT: v_mov_b32_e32 v0, 0xc400 490; SI-NEXT: s_waitcnt lgkmcnt(0) 491; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 492; SI-NEXT: s_endpgm 493 store half -4.0, ptr addrspace(1) %out 494 ret void 495} 496 497define amdgpu_kernel void @store_inline_imm_inv_2pi_f16(ptr addrspace(1) %out) { 498; GFX10-LABEL: store_inline_imm_inv_2pi_f16: 499; GFX10: ; %bb.0: 500; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 501; GFX10-NEXT: v_mov_b32_e32 v0, 0x3118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0x31,0x00,0x00] 502; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 503; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 504; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 505; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 506; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 507; 508; GFX11-LABEL: store_inline_imm_inv_2pi_f16: 509; GFX11: ; %bb.0: 510; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 511; GFX11-NEXT: v_mov_b32_e32 v0, 0x3118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0x31,0x00,0x00] 512; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 513; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 514; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 515; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 516; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 517; 518; VI-LABEL: store_inline_imm_inv_2pi_f16: 519; VI: ; %bb.0: 520; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 521; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 522; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 523; VI-NEXT: v_mov_b32_e32 v0, 0x3118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0x31,0x00,0x00] 524; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 525; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 526; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 527; 528; SI-LABEL: store_inline_imm_inv_2pi_f16: 529; SI: ; %bb.0: 530; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 531; SI-NEXT: s_mov_b32 s3, 0xf000 532; SI-NEXT: s_mov_b32 s2, -1 533; SI-NEXT: v_mov_b32_e32 v0, 0x3118 534; SI-NEXT: s_waitcnt lgkmcnt(0) 535; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 536; SI-NEXT: s_endpgm 537 store half 0xH3118, ptr addrspace(1) %out 538 ret void 539} 540 541define amdgpu_kernel void @store_inline_imm_m_inv_2pi_f16(ptr addrspace(1) %out) { 542; GFX10-LABEL: store_inline_imm_m_inv_2pi_f16: 543; GFX10: ; %bb.0: 544; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 545; GFX10-NEXT: v_mov_b32_e32 v0, 0xffffb118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0xb1,0xff,0xff] 546; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 547; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 548; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 549; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 550; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 551; 552; GFX11-LABEL: store_inline_imm_m_inv_2pi_f16: 553; GFX11: ; %bb.0: 554; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 555; GFX11-NEXT: v_mov_b32_e32 v0, 0xffffb118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0xb1,0xff,0xff] 556; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 557; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 558; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 559; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 560; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 561; 562; VI-LABEL: store_inline_imm_m_inv_2pi_f16: 563; VI: ; %bb.0: 564; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 565; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 566; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 567; VI-NEXT: v_mov_b32_e32 v0, 0xffffb118 ; encoding: [0xff,0x02,0x00,0x7e,0x18,0xb1,0xff,0xff] 568; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 569; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 570; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 571; 572; SI-LABEL: store_inline_imm_m_inv_2pi_f16: 573; SI: ; %bb.0: 574; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 575; SI-NEXT: s_mov_b32 s3, 0xf000 576; SI-NEXT: s_mov_b32 s2, -1 577; SI-NEXT: v_mov_b32_e32 v0, 0xb118 578; SI-NEXT: s_waitcnt lgkmcnt(0) 579; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 580; SI-NEXT: s_endpgm 581 store half 0xHB118, ptr addrspace(1) %out 582 ret void 583} 584 585define amdgpu_kernel void @store_literal_imm_f16(ptr addrspace(1) %out) { 586; GFX10-LABEL: store_literal_imm_f16: 587; GFX10: ; %bb.0: 588; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 589; GFX10-NEXT: v_mov_b32_e32 v0, 0x6c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x6c,0x00,0x00] 590; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 591; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 592; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 593; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 594; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 595; 596; GFX11-LABEL: store_literal_imm_f16: 597; GFX11: ; %bb.0: 598; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 599; GFX11-NEXT: v_mov_b32_e32 v0, 0x6c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x6c,0x00,0x00] 600; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 601; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 602; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 603; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 604; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 605; 606; VI-LABEL: store_literal_imm_f16: 607; VI: ; %bb.0: 608; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 609; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 610; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 611; VI-NEXT: v_mov_b32_e32 v0, 0x6c00 ; encoding: [0xff,0x02,0x00,0x7e,0x00,0x6c,0x00,0x00] 612; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 613; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 614; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 615; 616; SI-LABEL: store_literal_imm_f16: 617; SI: ; %bb.0: 618; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 619; SI-NEXT: s_mov_b32 s3, 0xf000 620; SI-NEXT: s_mov_b32 s2, -1 621; SI-NEXT: v_mov_b32_e32 v0, 0x6c00 622; SI-NEXT: s_waitcnt lgkmcnt(0) 623; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 624; SI-NEXT: s_endpgm 625 store half 4096.0, ptr addrspace(1) %out 626 ret void 627} 628 629define amdgpu_kernel void @add_inline_imm_0.0_f16(ptr addrspace(1) %out, half %x) { 630; GFX10-LABEL: add_inline_imm_0.0_f16: 631; GFX10: ; %bb.0: 632; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf] 633; GFX10-NEXT: s_load_dword s2, s[8:9], 0x8 ; encoding: [0x84,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa] 634; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 635; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 636; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 637; GFX10-NEXT: v_add_f16_e64 v0, s2, 0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x00,0x01,0x00] 638; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 639; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 640; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 641; 642; GFX11-LABEL: add_inline_imm_0.0_f16: 643; GFX11: ; %bb.0: 644; GFX11-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf] 645; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8] 646; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 647; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 648; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 649; GFX11-NEXT: v_add_f16_e64 v0, s2, 0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x00,0x01,0x00] 650; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 651; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 652; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 653; 654; VI-LABEL: add_inline_imm_0.0_f16: 655; VI: ; %bb.0: 656; VI-NEXT: s_load_dword s4, s[8:9], 0x8 ; encoding: [0x04,0x01,0x02,0xc0,0x08,0x00,0x00,0x00] 657; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 658; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 659; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 660; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 661; VI-NEXT: v_add_f16_e64 v0, s4, 0 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0x00,0x01,0x00] 662; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 663; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 664; 665; SI-LABEL: add_inline_imm_0.0_f16: 666; SI: ; %bb.0: 667; SI-NEXT: s_load_dword s0, s[4:5], 0xb 668; SI-NEXT: s_mov_b32 s3, 0xf000 669; SI-NEXT: s_mov_b32 s2, -1 670; SI-NEXT: s_waitcnt lgkmcnt(0) 671; SI-NEXT: v_cvt_f32_f16_e32 v0, s0 672; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 673; SI-NEXT: v_add_f32_e32 v0, 0, v0 674; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 675; SI-NEXT: s_waitcnt lgkmcnt(0) 676; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 677; SI-NEXT: s_endpgm 678 %y = fadd half %x, 0.0 679 store half %y, ptr addrspace(1) %out 680 ret void 681} 682 683define amdgpu_kernel void @add_inline_imm_0.5_f16(ptr addrspace(1) %out, half %x) { 684; GFX10-LABEL: add_inline_imm_0.5_f16: 685; GFX10: ; %bb.0: 686; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf] 687; GFX10-NEXT: s_load_dword s2, s[8:9], 0x8 ; encoding: [0x84,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa] 688; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 689; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 690; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 691; GFX10-NEXT: v_add_f16_e64 v0, s2, 0.5 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe0,0x01,0x00] 692; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 693; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 694; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 695; 696; GFX11-LABEL: add_inline_imm_0.5_f16: 697; GFX11: ; %bb.0: 698; GFX11-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf] 699; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8] 700; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 701; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 702; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 703; GFX11-NEXT: v_add_f16_e64 v0, s2, 0.5 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe0,0x01,0x00] 704; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 705; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 706; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 707; 708; VI-LABEL: add_inline_imm_0.5_f16: 709; VI: ; %bb.0: 710; VI-NEXT: s_load_dword s4, s[8:9], 0x8 ; encoding: [0x04,0x01,0x02,0xc0,0x08,0x00,0x00,0x00] 711; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 712; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 713; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 714; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 715; VI-NEXT: v_add_f16_e64 v0, s4, 0.5 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0xe0,0x01,0x00] 716; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 717; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 718; 719; SI-LABEL: add_inline_imm_0.5_f16: 720; SI: ; %bb.0: 721; SI-NEXT: s_load_dword s0, s[4:5], 0xb 722; SI-NEXT: s_mov_b32 s3, 0xf000 723; SI-NEXT: s_mov_b32 s2, -1 724; SI-NEXT: s_waitcnt lgkmcnt(0) 725; SI-NEXT: v_cvt_f32_f16_e32 v0, s0 726; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 727; SI-NEXT: v_add_f32_e32 v0, 0.5, v0 728; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 729; SI-NEXT: s_waitcnt lgkmcnt(0) 730; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 731; SI-NEXT: s_endpgm 732 %y = fadd half %x, 0.5 733 store half %y, ptr addrspace(1) %out 734 ret void 735} 736 737define amdgpu_kernel void @add_inline_imm_neg_0.5_f16(ptr addrspace(1) %out, half %x) { 738; GFX10-LABEL: add_inline_imm_neg_0.5_f16: 739; GFX10: ; %bb.0: 740; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf] 741; GFX10-NEXT: s_load_dword s2, s[8:9], 0x8 ; encoding: [0x84,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa] 742; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 743; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 744; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 745; GFX10-NEXT: v_add_f16_e64 v0, s2, -0.5 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe2,0x01,0x00] 746; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 747; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 748; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 749; 750; GFX11-LABEL: add_inline_imm_neg_0.5_f16: 751; GFX11: ; %bb.0: 752; GFX11-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf] 753; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8] 754; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 755; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 756; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 757; GFX11-NEXT: v_add_f16_e64 v0, s2, -0.5 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe2,0x01,0x00] 758; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 759; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 760; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 761; 762; VI-LABEL: add_inline_imm_neg_0.5_f16: 763; VI: ; %bb.0: 764; VI-NEXT: s_load_dword s4, s[8:9], 0x8 ; encoding: [0x04,0x01,0x02,0xc0,0x08,0x00,0x00,0x00] 765; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 766; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 767; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 768; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 769; VI-NEXT: v_add_f16_e64 v0, s4, -0.5 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0xe2,0x01,0x00] 770; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 771; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 772; 773; SI-LABEL: add_inline_imm_neg_0.5_f16: 774; SI: ; %bb.0: 775; SI-NEXT: s_load_dword s0, s[4:5], 0xb 776; SI-NEXT: s_mov_b32 s3, 0xf000 777; SI-NEXT: s_mov_b32 s2, -1 778; SI-NEXT: s_waitcnt lgkmcnt(0) 779; SI-NEXT: v_cvt_f32_f16_e32 v0, s0 780; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 781; SI-NEXT: v_add_f32_e32 v0, -0.5, v0 782; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 783; SI-NEXT: s_waitcnt lgkmcnt(0) 784; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 785; SI-NEXT: s_endpgm 786 %y = fadd half %x, -0.5 787 store half %y, ptr addrspace(1) %out 788 ret void 789} 790 791define amdgpu_kernel void @add_inline_imm_1.0_f16(ptr addrspace(1) %out, half %x) { 792; GFX10-LABEL: add_inline_imm_1.0_f16: 793; GFX10: ; %bb.0: 794; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf] 795; GFX10-NEXT: s_load_dword s2, s[8:9], 0x8 ; encoding: [0x84,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa] 796; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 797; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 798; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 799; GFX10-NEXT: v_add_f16_e64 v0, s2, 1.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe4,0x01,0x00] 800; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 801; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 802; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 803; 804; GFX11-LABEL: add_inline_imm_1.0_f16: 805; GFX11: ; %bb.0: 806; GFX11-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf] 807; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8] 808; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 809; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 810; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 811; GFX11-NEXT: v_add_f16_e64 v0, s2, 1.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe4,0x01,0x00] 812; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 813; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 814; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 815; 816; VI-LABEL: add_inline_imm_1.0_f16: 817; VI: ; %bb.0: 818; VI-NEXT: s_load_dword s4, s[8:9], 0x8 ; encoding: [0x04,0x01,0x02,0xc0,0x08,0x00,0x00,0x00] 819; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 820; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 821; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 822; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 823; VI-NEXT: v_add_f16_e64 v0, s4, 1.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0xe4,0x01,0x00] 824; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 825; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 826; 827; SI-LABEL: add_inline_imm_1.0_f16: 828; SI: ; %bb.0: 829; SI-NEXT: s_load_dword s0, s[4:5], 0xb 830; SI-NEXT: s_mov_b32 s3, 0xf000 831; SI-NEXT: s_mov_b32 s2, -1 832; SI-NEXT: s_waitcnt lgkmcnt(0) 833; SI-NEXT: v_cvt_f32_f16_e32 v0, s0 834; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 835; SI-NEXT: v_add_f32_e32 v0, 1.0, v0 836; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 837; SI-NEXT: s_waitcnt lgkmcnt(0) 838; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 839; SI-NEXT: s_endpgm 840 %y = fadd half %x, 1.0 841 store half %y, ptr addrspace(1) %out 842 ret void 843} 844 845define amdgpu_kernel void @add_inline_imm_neg_1.0_f16(ptr addrspace(1) %out, half %x) { 846; GFX10-LABEL: add_inline_imm_neg_1.0_f16: 847; GFX10: ; %bb.0: 848; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf] 849; GFX10-NEXT: s_load_dword s2, s[8:9], 0x8 ; encoding: [0x84,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa] 850; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 851; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 852; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 853; GFX10-NEXT: v_add_f16_e64 v0, s2, -1.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe6,0x01,0x00] 854; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 855; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 856; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 857; 858; GFX11-LABEL: add_inline_imm_neg_1.0_f16: 859; GFX11: ; %bb.0: 860; GFX11-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf] 861; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8] 862; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 863; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 864; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 865; GFX11-NEXT: v_add_f16_e64 v0, s2, -1.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe6,0x01,0x00] 866; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 867; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 868; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 869; 870; VI-LABEL: add_inline_imm_neg_1.0_f16: 871; VI: ; %bb.0: 872; VI-NEXT: s_load_dword s4, s[8:9], 0x8 ; encoding: [0x04,0x01,0x02,0xc0,0x08,0x00,0x00,0x00] 873; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 874; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 875; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 876; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 877; VI-NEXT: v_add_f16_e64 v0, s4, -1.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0xe6,0x01,0x00] 878; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 879; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 880; 881; SI-LABEL: add_inline_imm_neg_1.0_f16: 882; SI: ; %bb.0: 883; SI-NEXT: s_load_dword s0, s[4:5], 0xb 884; SI-NEXT: s_mov_b32 s3, 0xf000 885; SI-NEXT: s_mov_b32 s2, -1 886; SI-NEXT: s_waitcnt lgkmcnt(0) 887; SI-NEXT: v_cvt_f32_f16_e32 v0, s0 888; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 889; SI-NEXT: v_add_f32_e32 v0, -1.0, v0 890; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 891; SI-NEXT: s_waitcnt lgkmcnt(0) 892; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 893; SI-NEXT: s_endpgm 894 %y = fadd half %x, -1.0 895 store half %y, ptr addrspace(1) %out 896 ret void 897} 898 899define amdgpu_kernel void @add_inline_imm_2.0_f16(ptr addrspace(1) %out, half %x) { 900; GFX10-LABEL: add_inline_imm_2.0_f16: 901; GFX10: ; %bb.0: 902; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf] 903; GFX10-NEXT: s_load_dword s2, s[8:9], 0x8 ; encoding: [0x84,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa] 904; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 905; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 906; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 907; GFX10-NEXT: v_add_f16_e64 v0, s2, 2.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe8,0x01,0x00] 908; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 909; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 910; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 911; 912; GFX11-LABEL: add_inline_imm_2.0_f16: 913; GFX11: ; %bb.0: 914; GFX11-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf] 915; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8] 916; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 917; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 918; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 919; GFX11-NEXT: v_add_f16_e64 v0, s2, 2.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xe8,0x01,0x00] 920; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 921; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 922; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 923; 924; VI-LABEL: add_inline_imm_2.0_f16: 925; VI: ; %bb.0: 926; VI-NEXT: s_load_dword s4, s[8:9], 0x8 ; encoding: [0x04,0x01,0x02,0xc0,0x08,0x00,0x00,0x00] 927; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 928; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 929; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 930; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 931; VI-NEXT: v_add_f16_e64 v0, s4, 2.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0xe8,0x01,0x00] 932; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 933; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 934; 935; SI-LABEL: add_inline_imm_2.0_f16: 936; SI: ; %bb.0: 937; SI-NEXT: s_load_dword s0, s[4:5], 0xb 938; SI-NEXT: s_mov_b32 s3, 0xf000 939; SI-NEXT: s_mov_b32 s2, -1 940; SI-NEXT: s_waitcnt lgkmcnt(0) 941; SI-NEXT: v_cvt_f32_f16_e32 v0, s0 942; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 943; SI-NEXT: v_add_f32_e32 v0, 2.0, v0 944; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 945; SI-NEXT: s_waitcnt lgkmcnt(0) 946; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 947; SI-NEXT: s_endpgm 948 %y = fadd half %x, 2.0 949 store half %y, ptr addrspace(1) %out 950 ret void 951} 952 953define amdgpu_kernel void @add_inline_imm_neg_2.0_f16(ptr addrspace(1) %out, half %x) { 954; GFX10-LABEL: add_inline_imm_neg_2.0_f16: 955; GFX10: ; %bb.0: 956; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf] 957; GFX10-NEXT: s_load_dword s2, s[8:9], 0x8 ; encoding: [0x84,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa] 958; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 959; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 960; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 961; GFX10-NEXT: v_add_f16_e64 v0, s2, -2.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xea,0x01,0x00] 962; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 963; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 964; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 965; 966; GFX11-LABEL: add_inline_imm_neg_2.0_f16: 967; GFX11: ; %bb.0: 968; GFX11-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf] 969; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8] 970; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 971; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 972; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 973; GFX11-NEXT: v_add_f16_e64 v0, s2, -2.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xea,0x01,0x00] 974; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 975; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 976; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 977; 978; VI-LABEL: add_inline_imm_neg_2.0_f16: 979; VI: ; %bb.0: 980; VI-NEXT: s_load_dword s4, s[8:9], 0x8 ; encoding: [0x04,0x01,0x02,0xc0,0x08,0x00,0x00,0x00] 981; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 982; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 983; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 984; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 985; VI-NEXT: v_add_f16_e64 v0, s4, -2.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0xea,0x01,0x00] 986; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 987; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 988; 989; SI-LABEL: add_inline_imm_neg_2.0_f16: 990; SI: ; %bb.0: 991; SI-NEXT: s_load_dword s0, s[4:5], 0xb 992; SI-NEXT: s_mov_b32 s3, 0xf000 993; SI-NEXT: s_mov_b32 s2, -1 994; SI-NEXT: s_waitcnt lgkmcnt(0) 995; SI-NEXT: v_cvt_f32_f16_e32 v0, s0 996; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 997; SI-NEXT: v_add_f32_e32 v0, -2.0, v0 998; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 999; SI-NEXT: s_waitcnt lgkmcnt(0) 1000; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 1001; SI-NEXT: s_endpgm 1002 %y = fadd half %x, -2.0 1003 store half %y, ptr addrspace(1) %out 1004 ret void 1005} 1006 1007define amdgpu_kernel void @add_inline_imm_4.0_f16(ptr addrspace(1) %out, half %x) { 1008; GFX10-LABEL: add_inline_imm_4.0_f16: 1009; GFX10: ; %bb.0: 1010; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf] 1011; GFX10-NEXT: s_load_dword s2, s[8:9], 0x8 ; encoding: [0x84,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa] 1012; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 1013; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 1014; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 1015; GFX10-NEXT: v_add_f16_e64 v0, s2, 4.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xec,0x01,0x00] 1016; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 1017; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 1018; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1019; 1020; GFX11-LABEL: add_inline_imm_4.0_f16: 1021; GFX11: ; %bb.0: 1022; GFX11-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf] 1023; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8] 1024; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 1025; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 1026; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 1027; GFX11-NEXT: v_add_f16_e64 v0, s2, 4.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xec,0x01,0x00] 1028; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 1029; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 1030; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 1031; 1032; VI-LABEL: add_inline_imm_4.0_f16: 1033; VI: ; %bb.0: 1034; VI-NEXT: s_load_dword s4, s[8:9], 0x8 ; encoding: [0x04,0x01,0x02,0xc0,0x08,0x00,0x00,0x00] 1035; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 1036; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 1037; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 1038; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 1039; VI-NEXT: v_add_f16_e64 v0, s4, 4.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0xec,0x01,0x00] 1040; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 1041; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1042; 1043; SI-LABEL: add_inline_imm_4.0_f16: 1044; SI: ; %bb.0: 1045; SI-NEXT: s_load_dword s0, s[4:5], 0xb 1046; SI-NEXT: s_mov_b32 s3, 0xf000 1047; SI-NEXT: s_mov_b32 s2, -1 1048; SI-NEXT: s_waitcnt lgkmcnt(0) 1049; SI-NEXT: v_cvt_f32_f16_e32 v0, s0 1050; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 1051; SI-NEXT: v_add_f32_e32 v0, 4.0, v0 1052; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 1053; SI-NEXT: s_waitcnt lgkmcnt(0) 1054; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 1055; SI-NEXT: s_endpgm 1056 %y = fadd half %x, 4.0 1057 store half %y, ptr addrspace(1) %out 1058 ret void 1059} 1060 1061define amdgpu_kernel void @add_inline_imm_neg_4.0_f16(ptr addrspace(1) %out, half %x) { 1062; GFX10-LABEL: add_inline_imm_neg_4.0_f16: 1063; GFX10: ; %bb.0: 1064; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf] 1065; GFX10-NEXT: s_load_dword s2, s[8:9], 0x8 ; encoding: [0x84,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa] 1066; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 1067; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 1068; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 1069; GFX10-NEXT: v_add_f16_e64 v0, s2, -4.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xee,0x01,0x00] 1070; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 1071; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 1072; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1073; 1074; GFX11-LABEL: add_inline_imm_neg_4.0_f16: 1075; GFX11: ; %bb.0: 1076; GFX11-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf] 1077; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8] 1078; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 1079; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 1080; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 1081; GFX11-NEXT: v_add_f16_e64 v0, s2, -4.0 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0xee,0x01,0x00] 1082; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 1083; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 1084; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 1085; 1086; VI-LABEL: add_inline_imm_neg_4.0_f16: 1087; VI: ; %bb.0: 1088; VI-NEXT: s_load_dword s4, s[8:9], 0x8 ; encoding: [0x04,0x01,0x02,0xc0,0x08,0x00,0x00,0x00] 1089; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 1090; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 1091; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 1092; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 1093; VI-NEXT: v_add_f16_e64 v0, s4, -4.0 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0xee,0x01,0x00] 1094; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 1095; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1096; 1097; SI-LABEL: add_inline_imm_neg_4.0_f16: 1098; SI: ; %bb.0: 1099; SI-NEXT: s_load_dword s0, s[4:5], 0xb 1100; SI-NEXT: s_mov_b32 s3, 0xf000 1101; SI-NEXT: s_mov_b32 s2, -1 1102; SI-NEXT: s_waitcnt lgkmcnt(0) 1103; SI-NEXT: v_cvt_f32_f16_e32 v0, s0 1104; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 1105; SI-NEXT: v_add_f32_e32 v0, -4.0, v0 1106; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 1107; SI-NEXT: s_waitcnt lgkmcnt(0) 1108; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 1109; SI-NEXT: s_endpgm 1110 %y = fadd half %x, -4.0 1111 store half %y, ptr addrspace(1) %out 1112 ret void 1113} 1114 1115define amdgpu_kernel void @commute_add_inline_imm_0.5_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) { 1116; GFX10-LABEL: commute_add_inline_imm_0.5_f16: 1117; GFX10: ; %bb.0: 1118; GFX10-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 ; encoding: [0x04,0x00,0x08,0xf4,0x00,0x00,0x00,0xfa] 1119; GFX10-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x03,0x86,0xbe] 1120; GFX10-NEXT: s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x03,0x87,0xbe,0x00,0x60,0x01,0x31] 1121; GFX10-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x03,0x8a,0xbe] 1122; GFX10-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x03,0x8b,0xbe] 1123; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 1124; GFX10-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x03,0x88,0xbe] 1125; GFX10-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x03,0x89,0xbe] 1126; GFX10-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x03,0x84,0xbe] 1127; GFX10-NEXT: buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80] 1128; GFX10-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe] 1129; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] 1130; GFX10-NEXT: v_add_f16_e32 v0, 0.5, v0 ; encoding: [0xf0,0x00,0x00,0x64] 1131; GFX10-NEXT: buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80] 1132; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1133; 1134; GFX11-LABEL: commute_add_inline_imm_0.5_f16: 1135; GFX11: ; %bb.0: 1136; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x08,0xf4,0x00,0x00,0x00,0xf8] 1137; GFX11-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe] 1138; GFX11-NEXT: s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0x60,0x01,0x31] 1139; GFX11-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe] 1140; GFX11-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe] 1141; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 1142; GFX11-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe] 1143; GFX11-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe] 1144; GFX11-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe] 1145; GFX11-NEXT: buffer_load_u16 v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80] 1146; GFX11-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe] 1147; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf] 1148; GFX11-NEXT: v_add_f16_e32 v0, 0.5, v0 ; encoding: [0xf0,0x00,0x00,0x64] 1149; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x01,0x80] 1150; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 1151; 1152; VI-LABEL: commute_add_inline_imm_0.5_f16: 1153; VI: ; %bb.0: 1154; VI-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 ; encoding: [0x04,0x00,0x0a,0xc0,0x00,0x00,0x00,0x00] 1155; VI-NEXT: s_mov_b32 s7, 0x1100f000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0xf0,0x00,0x11] 1156; VI-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe] 1157; VI-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe] 1158; VI-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe] 1159; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 1160; VI-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe] 1161; VI-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe] 1162; VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80] 1163; VI-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe] 1164; VI-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe] 1165; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 1166; VI-NEXT: v_add_f16_e32 v0, 0.5, v0 ; encoding: [0xf0,0x00,0x00,0x3e] 1167; VI-NEXT: buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80] 1168; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1169; 1170; SI-LABEL: commute_add_inline_imm_0.5_f16: 1171; SI: ; %bb.0: 1172; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1173; SI-NEXT: s_mov_b32 s7, 0xf000 1174; SI-NEXT: s_mov_b32 s6, -1 1175; SI-NEXT: s_mov_b32 s10, s6 1176; SI-NEXT: s_mov_b32 s11, s7 1177; SI-NEXT: s_waitcnt lgkmcnt(0) 1178; SI-NEXT: s_mov_b32 s8, s2 1179; SI-NEXT: s_mov_b32 s9, s3 1180; SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 1181; SI-NEXT: s_mov_b32 s4, s0 1182; SI-NEXT: s_mov_b32 s5, s1 1183; SI-NEXT: s_waitcnt vmcnt(0) 1184; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 1185; SI-NEXT: v_add_f32_e32 v0, 0.5, v0 1186; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 1187; SI-NEXT: buffer_store_short v0, off, s[4:7], 0 1188; SI-NEXT: s_endpgm 1189 %x = load half, ptr addrspace(1) %in 1190 %y = fadd half %x, 0.5 1191 store half %y, ptr addrspace(1) %out 1192 ret void 1193} 1194 1195define amdgpu_kernel void @commute_add_literal_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) { 1196; GFX10-LABEL: commute_add_literal_f16: 1197; GFX10: ; %bb.0: 1198; GFX10-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 ; encoding: [0x04,0x00,0x08,0xf4,0x00,0x00,0x00,0xfa] 1199; GFX10-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x03,0x86,0xbe] 1200; GFX10-NEXT: s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x03,0x87,0xbe,0x00,0x60,0x01,0x31] 1201; GFX10-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x03,0x8a,0xbe] 1202; GFX10-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x03,0x8b,0xbe] 1203; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 1204; GFX10-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x03,0x88,0xbe] 1205; GFX10-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x03,0x89,0xbe] 1206; GFX10-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x03,0x84,0xbe] 1207; GFX10-NEXT: buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80] 1208; GFX10-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe] 1209; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] 1210; GFX10-NEXT: v_add_f16_e32 v0, 0x6400, v0 ; encoding: [0xff,0x00,0x00,0x64,0x00,0x64,0x00,0x00] 1211; GFX10-NEXT: buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80] 1212; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1213; 1214; GFX11-LABEL: commute_add_literal_f16: 1215; GFX11: ; %bb.0: 1216; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x08,0xf4,0x00,0x00,0x00,0xf8] 1217; GFX11-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe] 1218; GFX11-NEXT: s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0x60,0x01,0x31] 1219; GFX11-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe] 1220; GFX11-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe] 1221; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 1222; GFX11-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe] 1223; GFX11-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe] 1224; GFX11-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe] 1225; GFX11-NEXT: buffer_load_u16 v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80] 1226; GFX11-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe] 1227; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf] 1228; GFX11-NEXT: v_add_f16_e32 v0, 0x6400, v0 ; encoding: [0xff,0x00,0x00,0x64,0x00,0x64,0x00,0x00] 1229; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x01,0x80] 1230; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 1231; 1232; VI-LABEL: commute_add_literal_f16: 1233; VI: ; %bb.0: 1234; VI-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 ; encoding: [0x04,0x00,0x0a,0xc0,0x00,0x00,0x00,0x00] 1235; VI-NEXT: s_mov_b32 s7, 0x1100f000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0xf0,0x00,0x11] 1236; VI-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe] 1237; VI-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe] 1238; VI-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe] 1239; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 1240; VI-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe] 1241; VI-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe] 1242; VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80] 1243; VI-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe] 1244; VI-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe] 1245; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 1246; VI-NEXT: v_add_f16_e32 v0, 0x6400, v0 ; encoding: [0xff,0x00,0x00,0x3e,0x00,0x64,0x00,0x00] 1247; VI-NEXT: buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80] 1248; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1249; 1250; SI-LABEL: commute_add_literal_f16: 1251; SI: ; %bb.0: 1252; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1253; SI-NEXT: s_mov_b32 s7, 0xf000 1254; SI-NEXT: s_mov_b32 s6, -1 1255; SI-NEXT: s_mov_b32 s10, s6 1256; SI-NEXT: s_mov_b32 s11, s7 1257; SI-NEXT: s_waitcnt lgkmcnt(0) 1258; SI-NEXT: s_mov_b32 s8, s2 1259; SI-NEXT: s_mov_b32 s9, s3 1260; SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 1261; SI-NEXT: s_mov_b32 s4, s0 1262; SI-NEXT: s_mov_b32 s5, s1 1263; SI-NEXT: s_waitcnt vmcnt(0) 1264; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 1265; SI-NEXT: v_add_f32_e32 v0, 0x44800000, v0 1266; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 1267; SI-NEXT: buffer_store_short v0, off, s[4:7], 0 1268; SI-NEXT: s_endpgm 1269 %x = load half, ptr addrspace(1) %in 1270 %y = fadd half %x, 1024.0 1271 store half %y, ptr addrspace(1) %out 1272 ret void 1273} 1274 1275define amdgpu_kernel void @add_inline_imm_1_f16(ptr addrspace(1) %out, half %x) { 1276; GFX10-LABEL: add_inline_imm_1_f16: 1277; GFX10: ; %bb.0: 1278; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf] 1279; GFX10-NEXT: s_load_dword s2, s[8:9], 0x8 ; encoding: [0x84,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa] 1280; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 1281; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 1282; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 1283; GFX10-NEXT: v_add_f16_e64 v0, s2, 1 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x02,0x01,0x00] 1284; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 1285; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 1286; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1287; 1288; GFX11-LABEL: add_inline_imm_1_f16: 1289; GFX11: ; %bb.0: 1290; GFX11-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf] 1291; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8] 1292; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 1293; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 1294; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 1295; GFX11-NEXT: v_add_f16_e64 v0, s2, 1 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x02,0x01,0x00] 1296; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 1297; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 1298; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 1299; 1300; VI-LABEL: add_inline_imm_1_f16: 1301; VI: ; %bb.0: 1302; VI-NEXT: s_load_dword s4, s[8:9], 0x8 ; encoding: [0x04,0x01,0x02,0xc0,0x08,0x00,0x00,0x00] 1303; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 1304; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 1305; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 1306; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 1307; VI-NEXT: v_add_f16_e64 v0, s4, 1 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0x02,0x01,0x00] 1308; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 1309; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1310; 1311; SI-LABEL: add_inline_imm_1_f16: 1312; SI: ; %bb.0: 1313; SI-NEXT: s_load_dword s0, s[4:5], 0xb 1314; SI-NEXT: s_mov_b32 s3, 0xf000 1315; SI-NEXT: s_mov_b32 s2, -1 1316; SI-NEXT: s_waitcnt lgkmcnt(0) 1317; SI-NEXT: v_cvt_f32_f16_e32 v0, s0 1318; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 1319; SI-NEXT: v_add_f32_e32 v0, 0x33800000, v0 1320; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 1321; SI-NEXT: s_waitcnt lgkmcnt(0) 1322; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 1323; SI-NEXT: s_endpgm 1324 %y = fadd half %x, 0xH0001 1325 store half %y, ptr addrspace(1) %out 1326 ret void 1327} 1328 1329define amdgpu_kernel void @add_inline_imm_2_f16(ptr addrspace(1) %out, half %x) { 1330; GFX10-LABEL: add_inline_imm_2_f16: 1331; GFX10: ; %bb.0: 1332; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf] 1333; GFX10-NEXT: s_load_dword s2, s[8:9], 0x8 ; encoding: [0x84,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa] 1334; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 1335; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 1336; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 1337; GFX10-NEXT: v_add_f16_e64 v0, s2, 2 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x04,0x01,0x00] 1338; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 1339; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 1340; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1341; 1342; GFX11-LABEL: add_inline_imm_2_f16: 1343; GFX11: ; %bb.0: 1344; GFX11-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf] 1345; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8] 1346; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 1347; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 1348; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 1349; GFX11-NEXT: v_add_f16_e64 v0, s2, 2 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x04,0x01,0x00] 1350; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 1351; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 1352; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 1353; 1354; VI-LABEL: add_inline_imm_2_f16: 1355; VI: ; %bb.0: 1356; VI-NEXT: s_load_dword s4, s[8:9], 0x8 ; encoding: [0x04,0x01,0x02,0xc0,0x08,0x00,0x00,0x00] 1357; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 1358; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 1359; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 1360; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 1361; VI-NEXT: v_add_f16_e64 v0, s4, 2 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0x04,0x01,0x00] 1362; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 1363; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1364; 1365; SI-LABEL: add_inline_imm_2_f16: 1366; SI: ; %bb.0: 1367; SI-NEXT: s_load_dword s0, s[4:5], 0xb 1368; SI-NEXT: s_mov_b32 s3, 0xf000 1369; SI-NEXT: s_mov_b32 s2, -1 1370; SI-NEXT: s_waitcnt lgkmcnt(0) 1371; SI-NEXT: v_cvt_f32_f16_e32 v0, s0 1372; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 1373; SI-NEXT: v_add_f32_e32 v0, 0x34000000, v0 1374; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 1375; SI-NEXT: s_waitcnt lgkmcnt(0) 1376; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 1377; SI-NEXT: s_endpgm 1378 %y = fadd half %x, 0xH0002 1379 store half %y, ptr addrspace(1) %out 1380 ret void 1381} 1382 1383define amdgpu_kernel void @add_inline_imm_16_f16(ptr addrspace(1) %out, half %x) { 1384; GFX10-LABEL: add_inline_imm_16_f16: 1385; GFX10: ; %bb.0: 1386; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf] 1387; GFX10-NEXT: s_load_dword s2, s[8:9], 0x8 ; encoding: [0x84,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa] 1388; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 1389; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 1390; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 1391; GFX10-NEXT: v_add_f16_e64 v0, s2, 16 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x20,0x01,0x00] 1392; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 1393; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 1394; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1395; 1396; GFX11-LABEL: add_inline_imm_16_f16: 1397; GFX11: ; %bb.0: 1398; GFX11-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf] 1399; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8] 1400; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 1401; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 1402; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 1403; GFX11-NEXT: v_add_f16_e64 v0, s2, 16 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x20,0x01,0x00] 1404; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 1405; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 1406; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 1407; 1408; VI-LABEL: add_inline_imm_16_f16: 1409; VI: ; %bb.0: 1410; VI-NEXT: s_load_dword s4, s[8:9], 0x8 ; encoding: [0x04,0x01,0x02,0xc0,0x08,0x00,0x00,0x00] 1411; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 1412; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 1413; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 1414; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 1415; VI-NEXT: v_add_f16_e64 v0, s4, 16 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0x20,0x01,0x00] 1416; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 1417; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1418; 1419; SI-LABEL: add_inline_imm_16_f16: 1420; SI: ; %bb.0: 1421; SI-NEXT: s_load_dword s0, s[4:5], 0xb 1422; SI-NEXT: s_mov_b32 s3, 0xf000 1423; SI-NEXT: s_mov_b32 s2, -1 1424; SI-NEXT: s_waitcnt lgkmcnt(0) 1425; SI-NEXT: v_cvt_f32_f16_e32 v0, s0 1426; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 1427; SI-NEXT: v_add_f32_e32 v0, 0x35800000, v0 1428; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 1429; SI-NEXT: s_waitcnt lgkmcnt(0) 1430; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 1431; SI-NEXT: s_endpgm 1432 %y = fadd half %x, 0xH0010 1433 store half %y, ptr addrspace(1) %out 1434 ret void 1435} 1436 1437define amdgpu_kernel void @add_inline_imm_neg_1_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) { 1438; GFX10-LABEL: add_inline_imm_neg_1_f16: 1439; GFX10: ; %bb.0: 1440; GFX10-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 ; encoding: [0x04,0x00,0x08,0xf4,0x00,0x00,0x00,0xfa] 1441; GFX10-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x03,0x86,0xbe] 1442; GFX10-NEXT: s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x03,0x87,0xbe,0x00,0x60,0x01,0x31] 1443; GFX10-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x03,0x8a,0xbe] 1444; GFX10-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x03,0x8b,0xbe] 1445; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 1446; GFX10-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x03,0x88,0xbe] 1447; GFX10-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x03,0x89,0xbe] 1448; GFX10-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x03,0x84,0xbe] 1449; GFX10-NEXT: buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80] 1450; GFX10-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe] 1451; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] 1452; GFX10-NEXT: v_add_nc_u32_e32 v0, -1, v0 ; encoding: [0xc1,0x00,0x00,0x4a] 1453; GFX10-NEXT: buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80] 1454; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1455; 1456; GFX11-LABEL: add_inline_imm_neg_1_f16: 1457; GFX11: ; %bb.0: 1458; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x08,0xf4,0x00,0x00,0x00,0xf8] 1459; GFX11-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe] 1460; GFX11-NEXT: s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0x60,0x01,0x31] 1461; GFX11-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe] 1462; GFX11-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe] 1463; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 1464; GFX11-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe] 1465; GFX11-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe] 1466; GFX11-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe] 1467; GFX11-NEXT: buffer_load_u16 v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80] 1468; GFX11-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe] 1469; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf] 1470; GFX11-NEXT: v_add_nc_u32_e32 v0, -1, v0 ; encoding: [0xc1,0x00,0x00,0x4a] 1471; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x01,0x80] 1472; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 1473; 1474; VI-LABEL: add_inline_imm_neg_1_f16: 1475; VI: ; %bb.0: 1476; VI-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 ; encoding: [0x04,0x00,0x0a,0xc0,0x00,0x00,0x00,0x00] 1477; VI-NEXT: s_mov_b32 s7, 0x1100f000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0xf0,0x00,0x11] 1478; VI-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe] 1479; VI-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe] 1480; VI-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe] 1481; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 1482; VI-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe] 1483; VI-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe] 1484; VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80] 1485; VI-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe] 1486; VI-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe] 1487; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 1488; VI-NEXT: v_add_u32_e32 v0, vcc, -1, v0 ; encoding: [0xc1,0x00,0x00,0x32] 1489; VI-NEXT: buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80] 1490; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1491; 1492; SI-LABEL: add_inline_imm_neg_1_f16: 1493; SI: ; %bb.0: 1494; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1495; SI-NEXT: s_mov_b32 s7, 0xf000 1496; SI-NEXT: s_mov_b32 s6, -1 1497; SI-NEXT: s_mov_b32 s10, s6 1498; SI-NEXT: s_mov_b32 s11, s7 1499; SI-NEXT: s_waitcnt lgkmcnt(0) 1500; SI-NEXT: s_mov_b32 s8, s2 1501; SI-NEXT: s_mov_b32 s9, s3 1502; SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 1503; SI-NEXT: s_mov_b32 s4, s0 1504; SI-NEXT: s_mov_b32 s5, s1 1505; SI-NEXT: s_waitcnt vmcnt(0) 1506; SI-NEXT: v_add_i32_e32 v0, vcc, -1, v0 1507; SI-NEXT: buffer_store_short v0, off, s[4:7], 0 1508; SI-NEXT: s_endpgm 1509 %x = load i16, ptr addrspace(1) %in 1510 %y = add i16 %x, -1 1511 %ybc = bitcast i16 %y to half 1512 store half %ybc, ptr addrspace(1) %out 1513 ret void 1514} 1515 1516define amdgpu_kernel void @add_inline_imm_neg_2_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) { 1517; GFX10-LABEL: add_inline_imm_neg_2_f16: 1518; GFX10: ; %bb.0: 1519; GFX10-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 ; encoding: [0x04,0x00,0x08,0xf4,0x00,0x00,0x00,0xfa] 1520; GFX10-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x03,0x86,0xbe] 1521; GFX10-NEXT: s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x03,0x87,0xbe,0x00,0x60,0x01,0x31] 1522; GFX10-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x03,0x8a,0xbe] 1523; GFX10-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x03,0x8b,0xbe] 1524; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 1525; GFX10-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x03,0x88,0xbe] 1526; GFX10-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x03,0x89,0xbe] 1527; GFX10-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x03,0x84,0xbe] 1528; GFX10-NEXT: buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80] 1529; GFX10-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe] 1530; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] 1531; GFX10-NEXT: v_add_nc_u32_e32 v0, 0xfffe, v0 ; encoding: [0xff,0x00,0x00,0x4a,0xfe,0xff,0x00,0x00] 1532; GFX10-NEXT: buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80] 1533; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1534; 1535; GFX11-LABEL: add_inline_imm_neg_2_f16: 1536; GFX11: ; %bb.0: 1537; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x08,0xf4,0x00,0x00,0x00,0xf8] 1538; GFX11-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe] 1539; GFX11-NEXT: s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0x60,0x01,0x31] 1540; GFX11-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe] 1541; GFX11-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe] 1542; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 1543; GFX11-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe] 1544; GFX11-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe] 1545; GFX11-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe] 1546; GFX11-NEXT: buffer_load_u16 v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80] 1547; GFX11-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe] 1548; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf] 1549; GFX11-NEXT: v_add_nc_u32_e32 v0, 0xfffe, v0 ; encoding: [0xff,0x00,0x00,0x4a,0xfe,0xff,0x00,0x00] 1550; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x01,0x80] 1551; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 1552; 1553; VI-LABEL: add_inline_imm_neg_2_f16: 1554; VI: ; %bb.0: 1555; VI-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 ; encoding: [0x04,0x00,0x0a,0xc0,0x00,0x00,0x00,0x00] 1556; VI-NEXT: s_mov_b32 s7, 0x1100f000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0xf0,0x00,0x11] 1557; VI-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe] 1558; VI-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe] 1559; VI-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe] 1560; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 1561; VI-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe] 1562; VI-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe] 1563; VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80] 1564; VI-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe] 1565; VI-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe] 1566; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 1567; VI-NEXT: v_add_u32_e32 v0, vcc, 0xfffe, v0 ; encoding: [0xff,0x00,0x00,0x32,0xfe,0xff,0x00,0x00] 1568; VI-NEXT: buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80] 1569; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1570; 1571; SI-LABEL: add_inline_imm_neg_2_f16: 1572; SI: ; %bb.0: 1573; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1574; SI-NEXT: s_mov_b32 s7, 0xf000 1575; SI-NEXT: s_mov_b32 s6, -1 1576; SI-NEXT: s_mov_b32 s10, s6 1577; SI-NEXT: s_mov_b32 s11, s7 1578; SI-NEXT: s_waitcnt lgkmcnt(0) 1579; SI-NEXT: s_mov_b32 s8, s2 1580; SI-NEXT: s_mov_b32 s9, s3 1581; SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 1582; SI-NEXT: s_mov_b32 s4, s0 1583; SI-NEXT: s_mov_b32 s5, s1 1584; SI-NEXT: s_waitcnt vmcnt(0) 1585; SI-NEXT: v_add_i32_e32 v0, vcc, -2, v0 1586; SI-NEXT: buffer_store_short v0, off, s[4:7], 0 1587; SI-NEXT: s_endpgm 1588 %x = load i16, ptr addrspace(1) %in 1589 %y = add i16 %x, -2 1590 %ybc = bitcast i16 %y to half 1591 store half %ybc, ptr addrspace(1) %out 1592 ret void 1593} 1594 1595define amdgpu_kernel void @add_inline_imm_neg_16_f16(ptr addrspace(1) %out, ptr addrspace(1) %in) { 1596; GFX10-LABEL: add_inline_imm_neg_16_f16: 1597; GFX10: ; %bb.0: 1598; GFX10-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 ; encoding: [0x04,0x00,0x08,0xf4,0x00,0x00,0x00,0xfa] 1599; GFX10-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x03,0x86,0xbe] 1600; GFX10-NEXT: s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x03,0x87,0xbe,0x00,0x60,0x01,0x31] 1601; GFX10-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x03,0x8a,0xbe] 1602; GFX10-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x03,0x8b,0xbe] 1603; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 1604; GFX10-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x03,0x88,0xbe] 1605; GFX10-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x03,0x89,0xbe] 1606; GFX10-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x03,0x84,0xbe] 1607; GFX10-NEXT: buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x28,0xe0,0x00,0x00,0x02,0x80] 1608; GFX10-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x03,0x85,0xbe] 1609; GFX10-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x3f,0x8c,0xbf] 1610; GFX10-NEXT: v_add_nc_u32_e32 v0, 0xfff0, v0 ; encoding: [0xff,0x00,0x00,0x4a,0xf0,0xff,0x00,0x00] 1611; GFX10-NEXT: buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80] 1612; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1613; 1614; GFX11-LABEL: add_inline_imm_neg_16_f16: 1615; GFX11: ; %bb.0: 1616; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 ; encoding: [0x02,0x00,0x08,0xf4,0x00,0x00,0x00,0xf8] 1617; GFX11-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe] 1618; GFX11-NEXT: s_mov_b32 s7, 0x31016000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0x60,0x01,0x31] 1619; GFX11-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe] 1620; GFX11-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe] 1621; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 1622; GFX11-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe] 1623; GFX11-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe] 1624; GFX11-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe] 1625; GFX11-NEXT: buffer_load_u16 v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80] 1626; GFX11-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe] 1627; GFX11-NEXT: s_waitcnt vmcnt(0) ; encoding: [0xf7,0x03,0x89,0xbf] 1628; GFX11-NEXT: v_add_nc_u32_e32 v0, 0xfff0, v0 ; encoding: [0xff,0x00,0x00,0x4a,0xf0,0xff,0x00,0x00] 1629; GFX11-NEXT: buffer_store_b16 v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x01,0x80] 1630; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 1631; 1632; VI-LABEL: add_inline_imm_neg_16_f16: 1633; VI: ; %bb.0: 1634; VI-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 ; encoding: [0x04,0x00,0x0a,0xc0,0x00,0x00,0x00,0x00] 1635; VI-NEXT: s_mov_b32 s7, 0x1100f000 ; encoding: [0xff,0x00,0x87,0xbe,0x00,0xf0,0x00,0x11] 1636; VI-NEXT: s_mov_b32 s6, -1 ; encoding: [0xc1,0x00,0x86,0xbe] 1637; VI-NEXT: s_mov_b32 s10, s6 ; encoding: [0x06,0x00,0x8a,0xbe] 1638; VI-NEXT: s_mov_b32 s11, s7 ; encoding: [0x07,0x00,0x8b,0xbe] 1639; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 1640; VI-NEXT: s_mov_b32 s8, s2 ; encoding: [0x02,0x00,0x88,0xbe] 1641; VI-NEXT: s_mov_b32 s9, s3 ; encoding: [0x03,0x00,0x89,0xbe] 1642; VI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 ; encoding: [0x00,0x00,0x48,0xe0,0x00,0x00,0x02,0x80] 1643; VI-NEXT: s_mov_b32 s4, s0 ; encoding: [0x00,0x00,0x84,0xbe] 1644; VI-NEXT: s_mov_b32 s5, s1 ; encoding: [0x01,0x00,0x85,0xbe] 1645; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 1646; VI-NEXT: v_add_u32_e32 v0, vcc, 0xfff0, v0 ; encoding: [0xff,0x00,0x00,0x32,0xf0,0xff,0x00,0x00] 1647; VI-NEXT: buffer_store_short v0, off, s[4:7], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x01,0x80] 1648; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1649; 1650; SI-LABEL: add_inline_imm_neg_16_f16: 1651; SI: ; %bb.0: 1652; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1653; SI-NEXT: s_mov_b32 s7, 0xf000 1654; SI-NEXT: s_mov_b32 s6, -1 1655; SI-NEXT: s_mov_b32 s10, s6 1656; SI-NEXT: s_mov_b32 s11, s7 1657; SI-NEXT: s_waitcnt lgkmcnt(0) 1658; SI-NEXT: s_mov_b32 s8, s2 1659; SI-NEXT: s_mov_b32 s9, s3 1660; SI-NEXT: buffer_load_ushort v0, off, s[8:11], 0 1661; SI-NEXT: s_mov_b32 s4, s0 1662; SI-NEXT: s_mov_b32 s5, s1 1663; SI-NEXT: s_waitcnt vmcnt(0) 1664; SI-NEXT: v_add_i32_e32 v0, vcc, -16, v0 1665; SI-NEXT: buffer_store_short v0, off, s[4:7], 0 1666; SI-NEXT: s_endpgm 1667 %x = load i16, ptr addrspace(1) %in 1668 %y = add i16 %x, -16 1669 %ybc = bitcast i16 %y to half 1670 store half %ybc, ptr addrspace(1) %out 1671 ret void 1672} 1673 1674define amdgpu_kernel void @add_inline_imm_63_f16(ptr addrspace(1) %out, half %x) { 1675; GFX10-LABEL: add_inline_imm_63_f16: 1676; GFX10: ; %bb.0: 1677; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf] 1678; GFX10-NEXT: s_load_dword s2, s[8:9], 0x8 ; encoding: [0x84,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa] 1679; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 1680; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 1681; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 1682; GFX10-NEXT: v_add_f16_e64 v0, s2, 63 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x7e,0x01,0x00] 1683; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 1684; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 1685; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1686; 1687; GFX11-LABEL: add_inline_imm_63_f16: 1688; GFX11: ; %bb.0: 1689; GFX11-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf] 1690; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8] 1691; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 1692; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 1693; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 1694; GFX11-NEXT: v_add_f16_e64 v0, s2, 63 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x7e,0x01,0x00] 1695; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 1696; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 1697; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 1698; 1699; VI-LABEL: add_inline_imm_63_f16: 1700; VI: ; %bb.0: 1701; VI-NEXT: s_load_dword s4, s[8:9], 0x8 ; encoding: [0x04,0x01,0x02,0xc0,0x08,0x00,0x00,0x00] 1702; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 1703; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 1704; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 1705; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 1706; VI-NEXT: v_add_f16_e64 v0, s4, 63 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0x7e,0x01,0x00] 1707; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 1708; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1709; 1710; SI-LABEL: add_inline_imm_63_f16: 1711; SI: ; %bb.0: 1712; SI-NEXT: s_load_dword s0, s[4:5], 0xb 1713; SI-NEXT: s_mov_b32 s3, 0xf000 1714; SI-NEXT: s_mov_b32 s2, -1 1715; SI-NEXT: s_waitcnt lgkmcnt(0) 1716; SI-NEXT: v_cvt_f32_f16_e32 v0, s0 1717; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 1718; SI-NEXT: v_add_f32_e32 v0, 0x367c0000, v0 1719; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 1720; SI-NEXT: s_waitcnt lgkmcnt(0) 1721; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 1722; SI-NEXT: s_endpgm 1723 %y = fadd half %x, 0xH003F 1724 store half %y, ptr addrspace(1) %out 1725 ret void 1726} 1727 1728define amdgpu_kernel void @add_inline_imm_64_f16(ptr addrspace(1) %out, half %x) { 1729; GFX10-LABEL: add_inline_imm_64_f16: 1730; GFX10: ; %bb.0: 1731; GFX10-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0xa1,0xbf] 1732; GFX10-NEXT: s_load_dword s2, s[8:9], 0x8 ; encoding: [0x84,0x00,0x00,0xf4,0x08,0x00,0x00,0xfa] 1733; GFX10-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x04,0xf4,0x00,0x00,0x00,0xfa] 1734; GFX10-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x03,0x83,0xbe,0x00,0x60,0x01,0x31] 1735; GFX10-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0xc0,0x8c,0xbf] 1736; GFX10-NEXT: v_add_f16_e64 v0, s2, 64 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x80,0x01,0x00] 1737; GFX10-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x03,0x82,0xbe] 1738; GFX10-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 1739; GFX10-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1740; 1741; GFX11-LABEL: add_inline_imm_64_f16: 1742; GFX11: ; %bb.0: 1743; GFX11-NEXT: s_clause 0x1 ; encoding: [0x01,0x00,0x85,0xbf] 1744; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x8 ; encoding: [0x82,0x00,0x00,0xf4,0x08,0x00,0x00,0xf8] 1745; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x0 ; encoding: [0x02,0x00,0x04,0xf4,0x00,0x00,0x00,0xf8] 1746; GFX11-NEXT: s_mov_b32 s3, 0x31016000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0x60,0x01,0x31] 1747; GFX11-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x07,0xfc,0x89,0xbf] 1748; GFX11-NEXT: v_add_f16_e64 v0, s2, 64 ; encoding: [0x00,0x00,0x32,0xd5,0x02,0x80,0x01,0x00] 1749; GFX11-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 1750; GFX11-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x64,0xe0,0x00,0x00,0x00,0x80] 1751; GFX11-NEXT: s_endpgm ; encoding: [0x00,0x00,0xb0,0xbf] 1752; 1753; VI-LABEL: add_inline_imm_64_f16: 1754; VI: ; %bb.0: 1755; VI-NEXT: s_load_dword s4, s[8:9], 0x8 ; encoding: [0x04,0x01,0x02,0xc0,0x08,0x00,0x00,0x00] 1756; VI-NEXT: s_load_dwordx2 s[0:1], s[8:9], 0x0 ; encoding: [0x04,0x00,0x06,0xc0,0x00,0x00,0x00,0x00] 1757; VI-NEXT: s_mov_b32 s3, 0x1100f000 ; encoding: [0xff,0x00,0x83,0xbe,0x00,0xf0,0x00,0x11] 1758; VI-NEXT: s_mov_b32 s2, -1 ; encoding: [0xc1,0x00,0x82,0xbe] 1759; VI-NEXT: s_waitcnt lgkmcnt(0) ; encoding: [0x7f,0x00,0x8c,0xbf] 1760; VI-NEXT: v_add_f16_e64 v0, s4, 64 ; encoding: [0x00,0x00,0x1f,0xd1,0x04,0x80,0x01,0x00] 1761; VI-NEXT: buffer_store_short v0, off, s[0:3], 0 ; encoding: [0x00,0x00,0x68,0xe0,0x00,0x00,0x00,0x80] 1762; VI-NEXT: s_endpgm ; encoding: [0x00,0x00,0x81,0xbf] 1763; 1764; SI-LABEL: add_inline_imm_64_f16: 1765; SI: ; %bb.0: 1766; SI-NEXT: s_load_dword s0, s[4:5], 0xb 1767; SI-NEXT: s_mov_b32 s3, 0xf000 1768; SI-NEXT: s_mov_b32 s2, -1 1769; SI-NEXT: s_waitcnt lgkmcnt(0) 1770; SI-NEXT: v_cvt_f32_f16_e32 v0, s0 1771; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 1772; SI-NEXT: v_add_f32_e32 v0, 0x36800000, v0 1773; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 1774; SI-NEXT: s_waitcnt lgkmcnt(0) 1775; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 1776; SI-NEXT: s_endpgm 1777 %y = fadd half %x, 0xH0040 1778 store half %y, ptr addrspace(1) %out 1779 ret void 1780} 1781 1782; This needs to be emitted as a literal constant since the 16-bit 1783; float values do not work for 16-bit integer operations. 1784define void @mul_inline_imm_0.5_i16(ptr addrspace(1) %out, i16 %x) { 1785; GFX10-LABEL: mul_inline_imm_0.5_i16: 1786; GFX10: ; %bb.0: 1787; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 1788; GFX10-NEXT: v_mul_lo_u16 v2, 0x3800, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00] 1789; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] 1790; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] 1791; 1792; GFX11-LABEL: mul_inline_imm_0.5_i16: 1793; GFX11: ; %bb.0: 1794; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] 1795; GFX11-NEXT: v_mul_lo_u16 v2, 0x3800, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x38,0x00,0x00] 1796; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00] 1797; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] 1798; 1799; VI-LABEL: mul_inline_imm_0.5_i16: 1800; VI: ; %bb.0: 1801; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 1802; VI-NEXT: v_mul_lo_u16_e32 v2, 0x3800, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0x38,0x00,0x00] 1803; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00] 1804; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 1805; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] 1806; 1807; SI-LABEL: mul_inline_imm_0.5_i16: 1808; SI: ; %bb.0: 1809; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1810; SI-NEXT: s_mov_b32 s6, 0 1811; SI-NEXT: v_and_b32_e32 v2, 0xffff, v2 1812; SI-NEXT: s_mov_b32 s7, 0xf000 1813; SI-NEXT: s_mov_b32 s4, s6 1814; SI-NEXT: s_mov_b32 s5, s6 1815; SI-NEXT: v_mul_u32_u24_e32 v2, 0x3800, v2 1816; SI-NEXT: buffer_store_short v2, v[0:1], s[4:7], 0 addr64 1817; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1818; SI-NEXT: s_setpc_b64 s[30:31] 1819 %y = mul i16 %x, bitcast (half 0.5 to i16) 1820 store i16 %y, ptr addrspace(1) %out 1821 ret void 1822} 1823 1824define void @mul_inline_imm_neg_0.5_i16(ptr addrspace(1) %out, i16 %x) { 1825; GFX10-LABEL: mul_inline_imm_neg_0.5_i16: 1826; GFX10: ; %bb.0: 1827; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 1828; GFX10-NEXT: v_mul_lo_u16 v2, 0xb800, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xb8,0xff,0xff] 1829; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] 1830; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] 1831; 1832; GFX11-LABEL: mul_inline_imm_neg_0.5_i16: 1833; GFX11: ; %bb.0: 1834; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] 1835; GFX11-NEXT: v_mul_lo_u16 v2, 0xb800, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xb8,0xff,0xff] 1836; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00] 1837; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] 1838; 1839; VI-LABEL: mul_inline_imm_neg_0.5_i16: 1840; VI: ; %bb.0: 1841; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 1842; VI-NEXT: v_mul_lo_u16_e32 v2, 0xb800, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0xb8,0xff,0xff] 1843; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00] 1844; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 1845; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] 1846; 1847; SI-LABEL: mul_inline_imm_neg_0.5_i16: 1848; SI: ; %bb.0: 1849; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1850; SI-NEXT: s_mov_b32 s6, 0 1851; SI-NEXT: v_and_b32_e32 v2, 0xffff, v2 1852; SI-NEXT: s_mov_b32 s7, 0xf000 1853; SI-NEXT: s_mov_b32 s4, s6 1854; SI-NEXT: s_mov_b32 s5, s6 1855; SI-NEXT: v_mul_u32_u24_e32 v2, 0xb800, v2 1856; SI-NEXT: buffer_store_short v2, v[0:1], s[4:7], 0 addr64 1857; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1858; SI-NEXT: s_setpc_b64 s[30:31] 1859 %y = mul i16 %x, bitcast (half -0.5 to i16) 1860 store i16 %y, ptr addrspace(1) %out 1861 ret void 1862} 1863 1864define void @mul_inline_imm_1.0_i16(ptr addrspace(1) %out, i16 %x) { 1865; GFX10-LABEL: mul_inline_imm_1.0_i16: 1866; GFX10: ; %bb.0: 1867; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 1868; GFX10-NEXT: v_mul_lo_u16 v2, 0x3c00, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x3c,0x00,0x00] 1869; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] 1870; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] 1871; 1872; GFX11-LABEL: mul_inline_imm_1.0_i16: 1873; GFX11: ; %bb.0: 1874; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] 1875; GFX11-NEXT: v_mul_lo_u16 v2, 0x3c00, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x3c,0x00,0x00] 1876; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00] 1877; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] 1878; 1879; VI-LABEL: mul_inline_imm_1.0_i16: 1880; VI: ; %bb.0: 1881; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 1882; VI-NEXT: v_mul_lo_u16_e32 v2, 0x3c00, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0x3c,0x00,0x00] 1883; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00] 1884; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 1885; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] 1886; 1887; SI-LABEL: mul_inline_imm_1.0_i16: 1888; SI: ; %bb.0: 1889; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1890; SI-NEXT: s_mov_b32 s6, 0 1891; SI-NEXT: v_and_b32_e32 v2, 0xffff, v2 1892; SI-NEXT: s_mov_b32 s7, 0xf000 1893; SI-NEXT: s_mov_b32 s4, s6 1894; SI-NEXT: s_mov_b32 s5, s6 1895; SI-NEXT: v_mul_u32_u24_e32 v2, 0x3c00, v2 1896; SI-NEXT: buffer_store_short v2, v[0:1], s[4:7], 0 addr64 1897; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1898; SI-NEXT: s_setpc_b64 s[30:31] 1899 %y = mul i16 %x, bitcast (half 1.0 to i16) 1900 store i16 %y, ptr addrspace(1) %out 1901 ret void 1902} 1903 1904define void @mul_inline_imm_neg_1.0_i16(ptr addrspace(1) %out, i16 %x) { 1905; GFX10-LABEL: mul_inline_imm_neg_1.0_i16: 1906; GFX10: ; %bb.0: 1907; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 1908; GFX10-NEXT: v_mul_lo_u16 v2, 0xbc00, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xbc,0xff,0xff] 1909; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] 1910; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] 1911; 1912; GFX11-LABEL: mul_inline_imm_neg_1.0_i16: 1913; GFX11: ; %bb.0: 1914; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] 1915; GFX11-NEXT: v_mul_lo_u16 v2, 0xbc00, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xbc,0xff,0xff] 1916; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00] 1917; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] 1918; 1919; VI-LABEL: mul_inline_imm_neg_1.0_i16: 1920; VI: ; %bb.0: 1921; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 1922; VI-NEXT: v_mul_lo_u16_e32 v2, 0xbc00, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0xbc,0xff,0xff] 1923; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00] 1924; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 1925; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] 1926; 1927; SI-LABEL: mul_inline_imm_neg_1.0_i16: 1928; SI: ; %bb.0: 1929; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1930; SI-NEXT: s_mov_b32 s6, 0 1931; SI-NEXT: v_and_b32_e32 v2, 0xffff, v2 1932; SI-NEXT: s_mov_b32 s7, 0xf000 1933; SI-NEXT: s_mov_b32 s4, s6 1934; SI-NEXT: s_mov_b32 s5, s6 1935; SI-NEXT: v_mul_u32_u24_e32 v2, 0xbc00, v2 1936; SI-NEXT: buffer_store_short v2, v[0:1], s[4:7], 0 addr64 1937; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1938; SI-NEXT: s_setpc_b64 s[30:31] 1939 %y = mul i16 %x, bitcast (half -1.0 to i16) 1940 store i16 %y, ptr addrspace(1) %out 1941 ret void 1942} 1943 1944define void @shl_inline_imm_2.0_i16(ptr addrspace(1) %out, i16 %x) { 1945; GFX10-LABEL: shl_inline_imm_2.0_i16: 1946; GFX10: ; %bb.0: 1947; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 1948; GFX10-NEXT: v_lshlrev_b16 v2, v2, 0x4000 ; encoding: [0x02,0x00,0x14,0xd7,0x02,0xff,0x01,0x00,0x00,0x40,0x00,0x00] 1949; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] 1950; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] 1951; 1952; GFX11-LABEL: shl_inline_imm_2.0_i16: 1953; GFX11: ; %bb.0: 1954; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] 1955; GFX11-NEXT: v_lshlrev_b16 v2, v2, 0x4000 ; encoding: [0x02,0x00,0x38,0xd7,0x02,0xff,0x01,0x00,0x00,0x40,0x00,0x00] 1956; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00] 1957; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] 1958; 1959; VI-LABEL: shl_inline_imm_2.0_i16: 1960; VI: ; %bb.0: 1961; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 1962; VI-NEXT: s_movk_i32 s4, 0x4000 ; encoding: [0x00,0x40,0x04,0xb0] 1963; VI-NEXT: v_lshlrev_b16_e64 v2, v2, s4 ; encoding: [0x02,0x00,0x2a,0xd1,0x02,0x09,0x00,0x00] 1964; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00] 1965; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 1966; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] 1967; 1968; SI-LABEL: shl_inline_imm_2.0_i16: 1969; SI: ; %bb.0: 1970; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1971; SI-NEXT: s_mov_b32 s6, 0 1972; SI-NEXT: s_mov_b32 s7, 0xf000 1973; SI-NEXT: s_mov_b32 s4, s6 1974; SI-NEXT: s_mov_b32 s5, s6 1975; SI-NEXT: v_lshl_b32_e32 v2, 0x4000, v2 1976; SI-NEXT: buffer_store_short v2, v[0:1], s[4:7], 0 addr64 1977; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1978; SI-NEXT: s_setpc_b64 s[30:31] 1979 %y = shl i16 bitcast (half 2.0 to i16), %x 1980 store i16 %y, ptr addrspace(1) %out 1981 ret void 1982} 1983 1984define void @shl_inline_imm_neg_2.0_i16(ptr addrspace(1) %out, i16 %x) { 1985; GFX10-LABEL: shl_inline_imm_neg_2.0_i16: 1986; GFX10: ; %bb.0: 1987; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 1988; GFX10-NEXT: v_lshlrev_b16 v2, v2, 0xc000 ; encoding: [0x02,0x00,0x14,0xd7,0x02,0xff,0x01,0x00,0x00,0xc0,0xff,0xff] 1989; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] 1990; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] 1991; 1992; GFX11-LABEL: shl_inline_imm_neg_2.0_i16: 1993; GFX11: ; %bb.0: 1994; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] 1995; GFX11-NEXT: v_lshlrev_b16 v2, v2, 0xc000 ; encoding: [0x02,0x00,0x38,0xd7,0x02,0xff,0x01,0x00,0x00,0xc0,0xff,0xff] 1996; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00] 1997; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] 1998; 1999; VI-LABEL: shl_inline_imm_neg_2.0_i16: 2000; VI: ; %bb.0: 2001; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 2002; VI-NEXT: s_movk_i32 s4, 0xc000 ; encoding: [0x00,0xc0,0x04,0xb0] 2003; VI-NEXT: v_lshlrev_b16_e64 v2, v2, s4 ; encoding: [0x02,0x00,0x2a,0xd1,0x02,0x09,0x00,0x00] 2004; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00] 2005; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 2006; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] 2007; 2008; SI-LABEL: shl_inline_imm_neg_2.0_i16: 2009; SI: ; %bb.0: 2010; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2011; SI-NEXT: s_mov_b32 s6, 0 2012; SI-NEXT: s_mov_b32 s7, 0xf000 2013; SI-NEXT: s_mov_b32 s4, s6 2014; SI-NEXT: s_mov_b32 s5, s6 2015; SI-NEXT: v_lshl_b32_e32 v2, 0xffffc000, v2 2016; SI-NEXT: buffer_store_short v2, v[0:1], s[4:7], 0 addr64 2017; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) 2018; SI-NEXT: s_setpc_b64 s[30:31] 2019 %y = shl i16 bitcast (half -2.0 to i16), %x 2020 store i16 %y, ptr addrspace(1) %out 2021 ret void 2022} 2023 2024define void @mul_inline_imm_4.0_i16(ptr addrspace(1) %out, i16 %x) { 2025; GFX10-LABEL: mul_inline_imm_4.0_i16: 2026; GFX10: ; %bb.0: 2027; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 2028; GFX10-NEXT: v_mul_lo_u16 v2, 0x4400, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x44,0x00,0x00] 2029; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] 2030; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] 2031; 2032; GFX11-LABEL: mul_inline_imm_4.0_i16: 2033; GFX11: ; %bb.0: 2034; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] 2035; GFX11-NEXT: v_mul_lo_u16 v2, 0x4400, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0x44,0x00,0x00] 2036; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00] 2037; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] 2038; 2039; VI-LABEL: mul_inline_imm_4.0_i16: 2040; VI: ; %bb.0: 2041; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 2042; VI-NEXT: v_mul_lo_u16_e32 v2, 0x4400, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0x44,0x00,0x00] 2043; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00] 2044; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 2045; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] 2046; 2047; SI-LABEL: mul_inline_imm_4.0_i16: 2048; SI: ; %bb.0: 2049; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2050; SI-NEXT: s_mov_b32 s6, 0 2051; SI-NEXT: v_and_b32_e32 v2, 0xffff, v2 2052; SI-NEXT: s_mov_b32 s7, 0xf000 2053; SI-NEXT: s_mov_b32 s4, s6 2054; SI-NEXT: s_mov_b32 s5, s6 2055; SI-NEXT: v_mul_u32_u24_e32 v2, 0x4400, v2 2056; SI-NEXT: buffer_store_short v2, v[0:1], s[4:7], 0 addr64 2057; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) 2058; SI-NEXT: s_setpc_b64 s[30:31] 2059 %y = mul i16 %x, bitcast (half 4.0 to i16) 2060 store i16 %y, ptr addrspace(1) %out 2061 ret void 2062} 2063 2064define void @mul_inline_imm_neg_4.0_i16(ptr addrspace(1) %out, i16 %x) { 2065; GFX10-LABEL: mul_inline_imm_neg_4.0_i16: 2066; GFX10: ; %bb.0: 2067; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 2068; GFX10-NEXT: v_mul_lo_u16 v2, 0xc400, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0xff,0xff] 2069; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] 2070; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] 2071; 2072; GFX11-LABEL: mul_inline_imm_neg_4.0_i16: 2073; GFX11: ; %bb.0: 2074; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] 2075; GFX11-NEXT: v_mul_lo_u16 v2, 0xc400, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x00,0xc4,0xff,0xff] 2076; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00] 2077; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] 2078; 2079; VI-LABEL: mul_inline_imm_neg_4.0_i16: 2080; VI: ; %bb.0: 2081; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 2082; VI-NEXT: v_mul_lo_u16_e32 v2, 0xc400, v2 ; encoding: [0xff,0x04,0x04,0x52,0x00,0xc4,0xff,0xff] 2083; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00] 2084; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 2085; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] 2086; 2087; SI-LABEL: mul_inline_imm_neg_4.0_i16: 2088; SI: ; %bb.0: 2089; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2090; SI-NEXT: s_mov_b32 s6, 0 2091; SI-NEXT: v_and_b32_e32 v2, 0xffff, v2 2092; SI-NEXT: s_mov_b32 s7, 0xf000 2093; SI-NEXT: s_mov_b32 s4, s6 2094; SI-NEXT: s_mov_b32 s5, s6 2095; SI-NEXT: v_mul_u32_u24_e32 v2, 0xc400, v2 2096; SI-NEXT: buffer_store_short v2, v[0:1], s[4:7], 0 addr64 2097; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) 2098; SI-NEXT: s_setpc_b64 s[30:31] 2099 %y = mul i16 %x, bitcast (half -4.0 to i16) 2100 store i16 %y, ptr addrspace(1) %out 2101 ret void 2102} 2103 2104define void @mul_inline_imm_inv2pi_i16(ptr addrspace(1) %out, i16 %x) { 2105; GFX10-LABEL: mul_inline_imm_inv2pi_i16: 2106; GFX10: ; %bb.0: 2107; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 2108; GFX10-NEXT: v_mul_lo_u16 v2, 0x3118, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x18,0x31,0x00,0x00] 2109; GFX10-NEXT: global_store_short v[0:1], v2, off ; encoding: [0x00,0x80,0x68,0xdc,0x00,0x02,0x7d,0x00] 2110; GFX10-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x20,0x80,0xbe] 2111; 2112; GFX11-LABEL: mul_inline_imm_inv2pi_i16: 2113; GFX11: ; %bb.0: 2114; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x89,0xbf] 2115; GFX11-NEXT: v_mul_lo_u16 v2, 0x3118, v2 ; encoding: [0x02,0x00,0x05,0xd7,0xff,0x04,0x02,0x00,0x18,0x31,0x00,0x00] 2116; GFX11-NEXT: global_store_b16 v[0:1], v2, off ; encoding: [0x00,0x00,0x66,0xdc,0x00,0x02,0x7c,0x00] 2117; GFX11-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x48,0x80,0xbe] 2118; 2119; VI-LABEL: mul_inline_imm_inv2pi_i16: 2120; VI: ; %bb.0: 2121; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; encoding: [0x00,0x00,0x8c,0xbf] 2122; VI-NEXT: v_mul_lo_u16_e32 v2, 0x3118, v2 ; encoding: [0xff,0x04,0x04,0x52,0x18,0x31,0x00,0x00] 2123; VI-NEXT: flat_store_short v[0:1], v2 ; encoding: [0x00,0x00,0x68,0xdc,0x00,0x02,0x00,0x00] 2124; VI-NEXT: s_waitcnt vmcnt(0) ; encoding: [0x70,0x0f,0x8c,0xbf] 2125; VI-NEXT: s_setpc_b64 s[30:31] ; encoding: [0x1e,0x1d,0x80,0xbe] 2126; 2127; SI-LABEL: mul_inline_imm_inv2pi_i16: 2128; SI: ; %bb.0: 2129; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2130; SI-NEXT: s_mov_b32 s6, 0 2131; SI-NEXT: v_and_b32_e32 v2, 0xffff, v2 2132; SI-NEXT: s_mov_b32 s7, 0xf000 2133; SI-NEXT: s_mov_b32 s4, s6 2134; SI-NEXT: s_mov_b32 s5, s6 2135; SI-NEXT: v_mul_u32_u24_e32 v2, 0x3118, v2 2136; SI-NEXT: buffer_store_short v2, v[0:1], s[4:7], 0 addr64 2137; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) 2138; SI-NEXT: s_setpc_b64 s[30:31] 2139 %y = mul i16 %x, bitcast (half 0xH3118 to i16) 2140 store i16 %y, ptr addrspace(1) %out 2141 ret void 2142} 2143