1; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=gfx1100 -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10 %s 2; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=gfx1010 -mattr=-flat-for-global,-xnack -verify-machineinstrs -show-mc-encoding < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX10 %s 3; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=gfx900 -mattr=-flat-for-global,-xnack -verify-machineinstrs -show-mc-encoding < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s 4; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=fiji -mattr=-flat-for-global,-xnack -verify-machineinstrs -show-mc-encoding < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s 5; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=kaveri -mattr=-flat-for-global -verify-machineinstrs -show-mc-encoding < %s | FileCheck -enable-var-scope -check-prefixes=GCN %s 6; FIXME: Merge into imm.ll 7 8; GCN-LABEL: {{^}}store_inline_imm_neg_0.0_v2i16: 9; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80008000 ; encoding 10; GCN: buffer_store_{{dword|b32}} [[REG]] 11define amdgpu_kernel void @store_inline_imm_neg_0.0_v2i16(ptr addrspace(1) %out) #0 { 12 store <2 x i16> <i16 -32768, i16 -32768>, ptr addrspace(1) %out 13 ret void 14} 15 16; GCN-LABEL: {{^}}store_inline_imm_0.0_v2f16: 17; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0 ; encoding 18; GCN: buffer_store_{{dword|b32}} [[REG]] 19define amdgpu_kernel void @store_inline_imm_0.0_v2f16(ptr addrspace(1) %out) #0 { 20 store <2 x half> <half 0.0, half 0.0>, ptr addrspace(1) %out 21 ret void 22} 23 24; GCN-LABEL: {{^}}store_imm_neg_0.0_v2f16: 25; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x80008000 ; encoding 26; GCN: buffer_store_{{dword|b32}} [[REG]] 27define amdgpu_kernel void @store_imm_neg_0.0_v2f16(ptr addrspace(1) %out) #0 { 28 store <2 x half> <half -0.0, half -0.0>, ptr addrspace(1) %out 29 ret void 30} 31 32; GCN-LABEL: {{^}}store_inline_imm_0.5_v2f16: 33; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x38003800 ; encoding 34; GCN: buffer_store_{{dword|b32}} [[REG]] 35define amdgpu_kernel void @store_inline_imm_0.5_v2f16(ptr addrspace(1) %out) #0 { 36 store <2 x half> <half 0.5, half 0.5>, ptr addrspace(1) %out 37 ret void 38} 39 40; GCN-LABEL: {{^}}store_inline_imm_m_0.5_v2f16: 41; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xb800b800 ; encoding 42; GCN: buffer_store_{{dword|b32}} [[REG]] 43define amdgpu_kernel void @store_inline_imm_m_0.5_v2f16(ptr addrspace(1) %out) #0 { 44 store <2 x half> <half -0.5, half -0.5>, ptr addrspace(1) %out 45 ret void 46} 47 48; GCN-LABEL: {{^}}store_inline_imm_1.0_v2f16: 49; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x3c003c00 ; encoding 50; GCN: buffer_store_{{dword|b32}} [[REG]] 51define amdgpu_kernel void @store_inline_imm_1.0_v2f16(ptr addrspace(1) %out) #0 { 52 store <2 x half> <half 1.0, half 1.0>, ptr addrspace(1) %out 53 ret void 54} 55 56; GCN-LABEL: {{^}}store_inline_imm_m_1.0_v2f16: 57; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xbc00bc00 ; encoding 58; GCN: buffer_store_{{dword|b32}} [[REG]] 59define amdgpu_kernel void @store_inline_imm_m_1.0_v2f16(ptr addrspace(1) %out) #0 { 60 store <2 x half> <half -1.0, half -1.0>, ptr addrspace(1) %out 61 ret void 62} 63 64; GCN-LABEL: {{^}}store_inline_imm_2.0_v2f16: 65; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x40004000 ; encoding 66; GCN: buffer_store_{{dword|b32}} [[REG]] 67define amdgpu_kernel void @store_inline_imm_2.0_v2f16(ptr addrspace(1) %out) #0 { 68 store <2 x half> <half 2.0, half 2.0>, ptr addrspace(1) %out 69 ret void 70} 71 72; GCN-LABEL: {{^}}store_inline_imm_m_2.0_v2f16: 73; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xc000c000 ; encoding 74; GCN: buffer_store_{{dword|b32}} [[REG]] 75define amdgpu_kernel void @store_inline_imm_m_2.0_v2f16(ptr addrspace(1) %out) #0 { 76 store <2 x half> <half -2.0, half -2.0>, ptr addrspace(1) %out 77 ret void 78} 79 80; GCN-LABEL: {{^}}store_inline_imm_4.0_v2f16: 81; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x44004400 ; encoding 82; GCN: buffer_store_{{dword|b32}} [[REG]] 83define amdgpu_kernel void @store_inline_imm_4.0_v2f16(ptr addrspace(1) %out) #0 { 84 store <2 x half> <half 4.0, half 4.0>, ptr addrspace(1) %out 85 ret void 86} 87 88; GCN-LABEL: {{^}}store_inline_imm_m_4.0_v2f16: 89; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xc400c400 ; encoding 90; GCN: buffer_store_{{dword|b32}} [[REG]] 91define amdgpu_kernel void @store_inline_imm_m_4.0_v2f16(ptr addrspace(1) %out) #0 { 92 store <2 x half> <half -4.0, half -4.0>, ptr addrspace(1) %out 93 ret void 94} 95 96; GCN-LABEL: {{^}}store_inline_imm_inv_2pi_v2f16: 97; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x31183118 ; encoding 98; GCN: buffer_store_{{dword|b32}} [[REG]] 99define amdgpu_kernel void @store_inline_imm_inv_2pi_v2f16(ptr addrspace(1) %out) #0 { 100 store <2 x half> <half 0xH3118, half 0xH3118>, ptr addrspace(1) %out 101 ret void 102} 103 104; GCN-LABEL: {{^}}store_inline_imm_m_inv_2pi_v2f16: 105; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0xb118b118 ; encoding 106; GCN: buffer_store_{{dword|b32}} [[REG]] 107define amdgpu_kernel void @store_inline_imm_m_inv_2pi_v2f16(ptr addrspace(1) %out) #0 { 108 store <2 x half> <half 0xHB118, half 0xHB118>, ptr addrspace(1) %out 109 ret void 110} 111 112; GCN-LABEL: {{^}}store_literal_imm_v2f16: 113; GCN: v_mov_b32_e32 [[REG:v[0-9]+]], 0x6c006c00 114; GCN: buffer_store_{{dword|b32}} [[REG]] 115define amdgpu_kernel void @store_literal_imm_v2f16(ptr addrspace(1) %out) #0 { 116 store <2 x half> <half 4096.0, half 4096.0>, ptr addrspace(1) %out 117 ret void 118} 119 120; GCN-LABEL: {{^}}add_inline_imm_0.0_v2f16: 121; GFX9: s_load_dword [[VAL:s[0-9]+]] 122; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 0 ; encoding 123; GFX9: buffer_store_dword [[REG]] 124 125; FIXME: Shouldn't need right shift and SDWA, also extra copy 126; VI-DAG: s_load_dword [[VAL:s[0-9]+]] 127; VI-DAG: v_mov_b32_e32 [[CONST0:v[0-9]+]], 0 128; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16 129; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]] 130 131; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST0]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 132; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 0 133; VI: v_or_b32 134; VI: buffer_store_dword 135define amdgpu_kernel void @add_inline_imm_0.0_v2f16(ptr addrspace(1) %out, <2 x half> %x) #0 { 136 %y = fadd <2 x half> %x, <half 0.0, half 0.0> 137 store <2 x half> %y, ptr addrspace(1) %out 138 ret void 139} 140 141; GCN-LABEL: {{^}}add_inline_imm_0.5_v2f16: 142; GFX10: s_load_{{dword|b32}} [[VAL:s[0-9]+]] 143; GFX10: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 0.5 op_sel_hi:[1,0] ; encoding: [0x00,0x40,0x0f,0xcc,0x02,0xe0,0x01,0x08] 144; GFX10: buffer_store_{{dword|b32}} [[REG]] 145 146; GFX9: s_load_dword [[VAL:s[0-9]+]] 147; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 0.5 op_sel_hi:[1,0] ; encoding: [0x00,0x40,0x8f,0xd3,0x06,0xe0,0x01,0x08] 148; GFX9: buffer_store_dword [[REG]] 149 150; FIXME: Shouldn't need right shift and SDWA, also extra copy 151; VI-DAG: s_load_dword [[VAL:s[0-9]+]] 152; VI-DAG: v_mov_b32_e32 [[CONST05:v[0-9]+]], 0x3800 153; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16 154; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]] 155 156; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST05]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 157; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 0.5 158; VI: v_or_b32 159; VI: buffer_store_dword 160define amdgpu_kernel void @add_inline_imm_0.5_v2f16(ptr addrspace(1) %out, <2 x half> %x) #0 { 161 %y = fadd <2 x half> %x, <half 0.5, half 0.5> 162 store <2 x half> %y, ptr addrspace(1) %out 163 ret void 164} 165 166; GCN-LABEL: {{^}}add_inline_imm_neg_0.5_v2f16: 167; GFX10: s_load_{{dword|b32}} [[VAL:s[0-9]+]] 168; GFX10: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -0.5 op_sel_hi:[1,0] ; encoding: [0x00,0x40,0x0f,0xcc,0x02,0xe2,0x01,0x08] 169; GFX10: buffer_store_{{dword|b32}} [[REG]] 170 171; GFX9: s_load_dword [[VAL:s[0-9]+]] 172; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -0.5 op_sel_hi:[1,0] ; encoding: [0x00,0x40,0x8f,0xd3,0x06,0xe2,0x01,0x08] 173; GFX9: buffer_store_dword [[REG]] 174 175; FIXME: Shouldn't need right shift and SDWA, also extra copy 176; VI-DAG: s_load_dword [[VAL:s[0-9]+]] 177; VI-DAG: v_mov_b32_e32 [[CONSTM05:v[0-9]+]], 0xb800 178; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16 179; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]] 180 181; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONSTM05]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 182; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], -0.5 183; VI: v_or_b32 184; VI: buffer_store_dword 185define amdgpu_kernel void @add_inline_imm_neg_0.5_v2f16(ptr addrspace(1) %out, <2 x half> %x) #0 { 186 %y = fadd <2 x half> %x, <half -0.5, half -0.5> 187 store <2 x half> %y, ptr addrspace(1) %out 188 ret void 189} 190 191; GCN-LABEL: {{^}}add_inline_imm_1.0_v2f16: 192; GFX10: s_load_{{dword|b32}} [[VAL:s[0-9]+]] 193; GFX10: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 1.0 op_sel_hi:[1,0] ; encoding: 194; GFX10: buffer_store_{{dword|b32}} [[REG]] 195 196; GFX9: s_load_dword [[VAL:s[0-9]+]] 197; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 1.0 op_sel_hi:[1,0] ; encoding 198; GFX9: buffer_store_dword [[REG]] 199 200; FIXME: Shouldn't need right shift and SDWA, also extra copy 201; VI-DAG: s_load_dword [[VAL:s[0-9]+]] 202; VI-DAG: v_mov_b32_e32 [[CONST1:v[0-9]+]], 0x3c00 203; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16 204; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]] 205 206; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 207; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 1.0 208; VI: v_or_b32 209; VI: buffer_store_dword 210define amdgpu_kernel void @add_inline_imm_1.0_v2f16(ptr addrspace(1) %out, <2 x half> %x) #0 { 211 %y = fadd <2 x half> %x, <half 1.0, half 1.0> 212 store <2 x half> %y, ptr addrspace(1) %out 213 ret void 214} 215 216; GCN-LABEL: {{^}}add_inline_imm_neg_1.0_v2f16: 217; GFX10: s_load_{{dword|b32}} [[VAL:s[0-9]+]] 218; GFX10: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -1.0 op_sel_hi:[1,0] ; encoding 219; GFX10: buffer_store_{{dword|b32}} [[REG]] 220 221; GFX9: s_load_dword [[VAL:s[0-9]+]] 222; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -1.0 op_sel_hi:[1,0] ; encoding 223; GFX9: buffer_store_dword [[REG]] 224 225 226; FIXME: Shouldn't need right shift and SDWA, also extra copy 227; VI-DAG: s_load_dword [[VAL:s[0-9]+]] 228; VI-DAG: v_mov_b32_e32 [[CONST1:v[0-9]+]], 0xbc00 229; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16 230; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]] 231 232; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 233; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], -1.0 234; VI: v_or_b32 235; VI: buffer_store_dword 236define amdgpu_kernel void @add_inline_imm_neg_1.0_v2f16(ptr addrspace(1) %out, <2 x half> %x) #0 { 237 %y = fadd <2 x half> %x, <half -1.0, half -1.0> 238 store <2 x half> %y, ptr addrspace(1) %out 239 ret void 240} 241 242; GCN-LABEL: {{^}}add_inline_imm_2.0_v2f16: 243; GFX10: s_load_{{dword|b32}} [[VAL:s[0-9]+]] 244; GFX10: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 2.0 op_sel_hi:[1,0] ; encoding 245; GFX10: buffer_store_{{dword|b32}} [[REG]] 246 247; GFX9: s_load_dword [[VAL:s[0-9]+]] 248; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 2.0 op_sel_hi:[1,0] ; encoding 249; GFX9: buffer_store_dword [[REG]] 250 251; FIXME: Shouldn't need right shift and SDWA, also extra copy 252; VI-DAG: s_load_dword [[VAL:s[0-9]+]] 253; VI-DAG: v_mov_b32_e32 [[CONST2:v[0-9]+]], 0x4000 254; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16 255; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]] 256 257; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST2]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 258; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 2.0 259; VI: v_or_b32 260; VI: buffer_store_dword 261define amdgpu_kernel void @add_inline_imm_2.0_v2f16(ptr addrspace(1) %out, <2 x half> %x) #0 { 262 %y = fadd <2 x half> %x, <half 2.0, half 2.0> 263 store <2 x half> %y, ptr addrspace(1) %out 264 ret void 265} 266 267; GCN-LABEL: {{^}}add_inline_imm_neg_2.0_v2f16: 268; GFX10: s_load_{{dword|b32}} [[VAL:s[0-9]+]] 269; GFX10: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -2.0 op_sel_hi:[1,0] ; encoding 270; GFX10: buffer_store_{{dword|b32}} [[REG]] 271 272; GFX9: s_load_dword [[VAL:s[0-9]+]] 273; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -2.0 op_sel_hi:[1,0] ; encoding 274; GFX9: buffer_store_dword [[REG]] 275 276; FIXME: Shouldn't need right shift and SDWA, also extra copy 277; VI-DAG: s_load_dword [[VAL:s[0-9]+]] 278; VI-DAG: v_mov_b32_e32 [[CONSTM2:v[0-9]+]], 0xc000 279; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16 280; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]] 281 282; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONSTM2]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 283; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], -2.0 284; VI: v_or_b32 285; VI: buffer_store_dword 286define amdgpu_kernel void @add_inline_imm_neg_2.0_v2f16(ptr addrspace(1) %out, <2 x half> %x) #0 { 287 %y = fadd <2 x half> %x, <half -2.0, half -2.0> 288 store <2 x half> %y, ptr addrspace(1) %out 289 ret void 290} 291 292; GCN-LABEL: {{^}}add_inline_imm_4.0_v2f16: 293; GFX10: s_load_{{dword|b32}} [[VAL:s[0-9]+]] 294; GFX10: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 4.0 op_sel_hi:[1,0] ; encoding 295; GFX10: buffer_store_{{dword|b32}} [[REG]] 296 297; GFX9: s_load_dword [[VAL:s[0-9]+]] 298; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 4.0 op_sel_hi:[1,0] ; encoding 299; GFX9: buffer_store_dword [[REG]] 300 301; FIXME: Shouldn't need right shift and SDWA, also extra copy 302; VI-DAG: s_load_dword [[VAL:s[0-9]+]] 303; VI-DAG: v_mov_b32_e32 [[CONST4:v[0-9]+]], 0x4400 304; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16 305; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]] 306 307; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST4]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 308; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 4.0 309; VI: v_or_b32 310; VI: buffer_store_dword 311define amdgpu_kernel void @add_inline_imm_4.0_v2f16(ptr addrspace(1) %out, <2 x half> %x) #0 { 312 %y = fadd <2 x half> %x, <half 4.0, half 4.0> 313 store <2 x half> %y, ptr addrspace(1) %out 314 ret void 315} 316 317; GCN-LABEL: {{^}}add_inline_imm_neg_4.0_v2f16: 318; GFX10: s_load_{{dword|b32}} [[VAL:s[0-9]+]] 319; GFX10: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -4.0 op_sel_hi:[1,0] ; encoding 320; GFX10: buffer_store_{{dword|b32}} [[REG]] 321 322; GFX9: s_load_dword [[VAL:s[0-9]+]] 323; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], -4.0 op_sel_hi:[1,0] ; encoding 324; GFX9: buffer_store_dword [[REG]] 325 326; FIXME: Shouldn't need right shift and SDWA, also extra copy 327; VI-DAG: s_load_dword [[VAL:s[0-9]+]] 328; VI-DAG: v_mov_b32_e32 [[CONSTM4:v[0-9]+]], 0xc400 329; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16 330; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]] 331 332; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONSTM4]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 333; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], -4.0 334; VI: v_or_b32 335; VI: buffer_store_dword 336define amdgpu_kernel void @add_inline_imm_neg_4.0_v2f16(ptr addrspace(1) %out, <2 x half> %x) #0 { 337 %y = fadd <2 x half> %x, <half -4.0, half -4.0> 338 store <2 x half> %y, ptr addrspace(1) %out 339 ret void 340} 341 342; GCN-LABEL: {{^}}commute_add_inline_imm_0.5_v2f16: 343; GFX10: buffer_load_{{dword|b32}} [[VAL:v[0-9]+]] 344; GFX10: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 0.5 345; GFX10: buffer_store_{{dword|b32}} [[REG]] 346 347; GFX9: buffer_load_dword [[VAL:v[0-9]+]] 348; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 0.5 349; GFX9: buffer_store_dword [[REG]] 350 351; VI-DAG: v_mov_b32_e32 [[CONST05:v[0-9]+]], 0x3800 352; VI-DAG: buffer_load_dword 353; VI-NOT: and 354; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[CONST05]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 355; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 0.5, v{{[0-9]+}} 356; VI: v_or_b32 357; VI: buffer_store_dword 358define amdgpu_kernel void @commute_add_inline_imm_0.5_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 359 %x = load <2 x half>, ptr addrspace(1) %in 360 %y = fadd <2 x half> %x, <half 0.5, half 0.5> 361 store <2 x half> %y, ptr addrspace(1) %out 362 ret void 363} 364 365; GCN-LABEL: {{^}}commute_add_literal_v2f16: 366; GFX10: v_pk_add_f16 v0, 0x6400, v0 op_sel_hi:[0,1] ; encoding: [0x00,0x40,0x0f,0xcc,0xff,0x00,0x02,0x10,0x00,0x64,0x00,0x00] 367 368; GFX9-DAG: buffer_load_dword [[VAL:v[0-9]+]] 369; GFX9-DAG: s_movk_i32 [[K:s[0-9]+]], 0x6400 ; encoding 370; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], [[K]] op_sel_hi:[1,0] ; encoding: [0x00,0x40,0x8f,0xd3,0x00,0x01,0x00,0x08] 371; GFX9: buffer_store_dword [[REG]] 372 373; VI-DAG: buffer_load_dword 374; VI-NOT: and 375; VI-DAG: v_add_f16_e32 v{{[0-9]+}}, 0x6400, v{{[0-9]+}} 376; gfx8 does not support sreg or imm in sdwa - this will be move then 377; VI-DAG: v_mov_b32_e32 [[VK:v[0-9]+]], 0x6400 378; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, v{{[0-9]+}}, [[VK]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 379; VI: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} 380; VI: buffer_store_dword 381define amdgpu_kernel void @commute_add_literal_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 { 382 %x = load <2 x half>, ptr addrspace(1) %in 383 %y = fadd <2 x half> %x, <half 1024.0, half 1024.0> 384 store <2 x half> %y, ptr addrspace(1) %out 385 ret void 386} 387 388; GCN-LABEL: {{^}}add_inline_imm_1_v2f16: 389; GFX10: s_load_{{dword|b32}} [[VAL:s[0-9]+]] 390; GFX10: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 1 op_sel_hi:[1,0] ; encoding 391; GFX10: buffer_store_{{dword|b32}} [[REG]] 392 393; GFX9: s_load_dword [[VAL:s[0-9]+]] 394; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 1 op_sel_hi:[1,0] ; encoding 395; GFX9: buffer_store_dword [[REG]] 396 397; FIXME: Shouldn't need right shift and SDWA, also extra copy 398; VI-DAG: s_load_dword [[VAL:s[0-9]+]] 399; VI-DAG: v_mov_b32_e32 [[CONST1:v[0-9]+]], 1 ; encoding 400; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16 401; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]] 402 403; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST1]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 404; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 1 ; encoding 405; VI: v_or_b32 406; VI: buffer_store_dword 407define amdgpu_kernel void @add_inline_imm_1_v2f16(ptr addrspace(1) %out, <2 x half> %x) #0 { 408 %y = fadd <2 x half> %x, <half 0xH0001, half 0xH0001> 409 store <2 x half> %y, ptr addrspace(1) %out 410 ret void 411} 412 413; GCN-LABEL: {{^}}add_inline_imm_2_v2f16: 414; GFX10: s_load_{{dword|b32}} [[VAL:s[0-9]+]] 415; GFX10: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 2 op_sel_hi:[1,0] ; encoding 416; GFX10: buffer_store_{{dword|b32}} [[REG]] 417 418; GFX9: s_load_dword [[VAL:s[0-9]+]] 419; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 2 op_sel_hi:[1,0] ; encoding 420; GFX9: buffer_store_dword [[REG]] 421 422 423; FIXME: Shouldn't need right shift and SDWA, also extra copy 424; VI-DAG: s_load_dword [[VAL:s[0-9]+]] 425; VI-DAG: v_mov_b32_e32 [[CONST2:v[0-9]+]], 2 ; encoding 426; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16 427; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]] 428 429; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST2]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 430; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 2 ; encoding 431; VI: v_or_b32 432; VI: buffer_store_dword 433define amdgpu_kernel void @add_inline_imm_2_v2f16(ptr addrspace(1) %out, <2 x half> %x) #0 { 434 %y = fadd <2 x half> %x, <half 0xH0002, half 0xH0002> 435 store <2 x half> %y, ptr addrspace(1) %out 436 ret void 437} 438 439; GCN-LABEL: {{^}}add_inline_imm_16_v2f16: 440; GFX10: s_load_{{dword|b32}} [[VAL:s[0-9]+]] 441; GFX10: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 16 op_sel_hi:[1,0] ; encoding 442; GFX10: buffer_store_{{dword|b32}} [[REG]] 443 444; GFX9: s_load_dword [[VAL:s[0-9]+]] 445; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 16 op_sel_hi:[1,0] ; encoding 446; GFX9: buffer_store_dword [[REG]] 447 448 449; FIXME: Shouldn't need right shift and SDWA, also extra copy 450; VI-DAG: s_load_dword [[VAL:s[0-9]+]] 451; VI-DAG: v_mov_b32_e32 [[CONST16:v[0-9]+]], 16 ; encoding 452; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16 453; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]] 454 455; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST16]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 456; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 16 ; encoding 457; VI: v_or_b32 458; VI: buffer_store_dword 459define amdgpu_kernel void @add_inline_imm_16_v2f16(ptr addrspace(1) %out, <2 x half> %x) #0 { 460 %y = fadd <2 x half> %x, <half 0xH0010, half 0xH0010> 461 store <2 x half> %y, ptr addrspace(1) %out 462 ret void 463} 464 465; GCN-LABEL: {{^}}add_inline_imm_neg_1_v2f16: 466; GFX10: s_add_i32 [[VAL:s[0-9]+]], s{{[0-9]+}}, -1 467; GFX10: v_mov_b32_e32 [[REG:v[0-9]+]], [[VAL]] 468; GFX10: buffer_store_{{dword|b32}} [[REG]] 469 470; GFX9: s_add_i32 [[VAL:s[0-9]+]], s6, -1 471; GFX9: v_mov_b32_e32 [[REG:v[0-9]+]], [[VAL]] 472; GFX9: buffer_store_dword [[REG]] 473 474; VI: s_load_dword [[VAL:s[0-9]+]] 475; VI: s_add_i32 [[ADD:s[0-9]+]], [[VAL]], -1 ; encoding 476; VI: v_mov_b32_e32 [[REG:v[0-9]+]], [[ADD]] 477; VI: buffer_store_dword [[REG]] 478define amdgpu_kernel void @add_inline_imm_neg_1_v2f16(ptr addrspace(1) %out, <2 x half> %x) #0 { 479 %xbc = bitcast <2 x half> %x to i32 480 %y = add i32 %xbc, -1 481 %ybc = bitcast i32 %y to <2 x half> 482 store <2 x half> %ybc, ptr addrspace(1) %out 483 ret void 484} 485 486; GCN-LABEL: {{^}}add_inline_imm_neg_2_v2f16: 487; GFX10: s_add_i32 [[VAL:s[0-9]+]], s{{[0-9]+}}, 0xfffefffe 488; GFX10: v_mov_b32_e32 [[REG:v[0-9]+]], [[VAL]] 489; GFX10: buffer_store_{{dword|b32}} [[REG]] 490 491; GFX9: s_add_i32 [[VAL:s[0-9]+]], s6, 0xfffefffe 492; GFX9: v_mov_b32_e32 [[REG:v[0-9]+]], [[VAL]] 493; GFX9: buffer_store_dword [[REG]] 494 495; VI: s_load_dword [[VAL:s[0-9]+]] 496; VI: s_add_i32 [[ADD:s[0-9]+]], [[VAL]], 0xfffefffe ; encoding 497; VI: v_mov_b32_e32 [[REG:v[0-9]+]], [[ADD]] 498; VI: buffer_store_dword [[REG]] 499define amdgpu_kernel void @add_inline_imm_neg_2_v2f16(ptr addrspace(1) %out, <2 x half> %x) #0 { 500 %xbc = bitcast <2 x half> %x to i32 501 %y = add i32 %xbc, 4294901758 ; 0xfffefffe 502 %ybc = bitcast i32 %y to <2 x half> 503 store <2 x half> %ybc, ptr addrspace(1) %out 504 ret void 505} 506 507; GCN-LABEL: {{^}}add_inline_imm_neg_16_v2f16: 508; GFX10: s_add_i32 [[VAL:s[0-9]+]], s{{[0-9]+}}, 0xfff0fff0 509; GFX10: v_mov_b32_e32 [[REG:v[0-9]+]], [[VAL]] 510; GFX10: buffer_store_{{dword|b32}} [[REG]] 511 512; GFX9: s_add_i32 [[VAL:s[0-9]+]], s6, 0xfff0fff0 513; GFX9: v_mov_b32_e32 [[REG:v[0-9]+]], [[VAL]] 514; GFX9: buffer_store_dword [[REG]] 515 516 517; VI: s_load_dword [[VAL:s[0-9]+]] 518; VI: s_add_i32 [[ADD:s[0-9]+]], [[VAL]], 0xfff0fff0 ; encoding 519; VI: v_mov_b32_e32 [[REG:v[0-9]+]], [[ADD]] 520; VI: buffer_store_dword [[REG]] 521define amdgpu_kernel void @add_inline_imm_neg_16_v2f16(ptr addrspace(1) %out, <2 x half> %x) #0 { 522 %xbc = bitcast <2 x half> %x to i32 523 %y = add i32 %xbc, 4293984240 ; 0xfff0fff0 524 %ybc = bitcast i32 %y to <2 x half> 525 store <2 x half> %ybc, ptr addrspace(1) %out 526 ret void 527} 528 529; GCN-LABEL: {{^}}add_inline_imm_63_v2f16: 530; GFX10: s_load_{{dword|b32}} [[VAL:s[0-9]+]] 531; GFX10: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 63 532; GFX10: buffer_store_{{dword|b32}} [[REG]] 533 534; GFX9: s_load_dword [[VAL:s[0-9]+]] 535; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 63 536; GFX9: buffer_store_dword [[REG]] 537 538; FIXME: Shouldn't need right shift and SDWA, also extra copy 539; VI-DAG: s_load_dword [[VAL:s[0-9]+]] 540; VI-DAG: v_mov_b32_e32 [[CONST63:v[0-9]+]], 63 541; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16 542; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]] 543 544; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST63]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 545; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 63 546; VI: v_or_b32 547; VI: buffer_store_dword 548define amdgpu_kernel void @add_inline_imm_63_v2f16(ptr addrspace(1) %out, <2 x half> %x) #0 { 549 %y = fadd <2 x half> %x, <half 0xH003F, half 0xH003F> 550 store <2 x half> %y, ptr addrspace(1) %out 551 ret void 552} 553 554; GCN-LABEL: {{^}}add_inline_imm_64_v2f16: 555; GFX10: s_load_{{dword|b32}} [[VAL:s[0-9]+]] 556; GFX10: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 64 557; GFX10: buffer_store_{{dword|b32}} [[REG]] 558 559; GFX9: s_load_dword [[VAL:s[0-9]+]] 560; GFX9: v_pk_add_f16 [[REG:v[0-9]+]], [[VAL]], 64 561; GFX9: buffer_store_dword [[REG]] 562 563; FIXME: Shouldn't need right shift and SDWA, also extra copy 564; VI-DAG: s_load_dword [[VAL:s[0-9]+]] 565; VI-DAG: v_mov_b32_e32 [[CONST64:v[0-9]+]], 64 566; VI-DAG: s_lshr_b32 [[SHR:s[0-9]+]], [[VAL]], 16 567; VI-DAG: v_mov_b32_e32 [[V_SHR:v[0-9]+]], [[SHR]] 568 569; VI-DAG: v_add_f16_sdwa v{{[0-9]+}}, [[V_SHR]], [[CONST64]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 570; VI-DAG: v_add_f16_e64 v{{[0-9]+}}, [[VAL]], 64 571; VI: v_or_b32 572; VI: buffer_store_dword 573define amdgpu_kernel void @add_inline_imm_64_v2f16(ptr addrspace(1) %out, <2 x half> %x) #0 { 574 %y = fadd <2 x half> %x, <half 0xH0040, half 0xH0040> 575 store <2 x half> %y, ptr addrspace(1) %out 576 ret void 577} 578 579; GCN-LABEL: {{^}}mul_inline_imm_0.5_v2i16: 580; GFX9: s_movk_i32 [[K:s[0-9]+]], 0x3800 581; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0] 582 583; GFX10: v_pk_mul_lo_u16 v0, 0x3800, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x38,0x00,0x00] 584define <2 x i16> @mul_inline_imm_0.5_v2i16(<2 x i16> %x) { 585 %y = mul <2 x i16> %x, bitcast (<2 x half> <half 0.5, half 0.5> to <2 x i16>) 586 ret <2 x i16> %y 587} 588 589; GCN-LABEL: {{^}}mul_inline_imm_neg_0.5_v2i16: 590; GFX9: s_movk_i32 [[K:s[0-9]+]], 0xb800 591; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0] 592 593; GFX10: v_pk_mul_lo_u16 v0, 0xffffb800, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xb8,0xff,0xff] 594define <2 x i16> @mul_inline_imm_neg_0.5_v2i16(<2 x i16> %x) { 595 %y = mul <2 x i16> %x, bitcast (<2 x half> <half -0.5, half -0.5> to <2 x i16>) 596 ret <2 x i16> %y 597} 598 599; GCN-LABEL: {{^}}mul_inline_imm_1.0_v2i16: 600; GFX9: s_movk_i32 [[K:s[0-9]+]], 0x3c00 601; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0] 602 603; GFX10: v_pk_mul_lo_u16 v0, 0x3c00, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x3c,0x00,0x00] 604define <2 x i16> @mul_inline_imm_1.0_v2i16(<2 x i16> %x) { 605 %y = mul <2 x i16> %x, bitcast (<2 x half> <half 1.0, half 1.0> to <2 x i16>) 606 ret <2 x i16> %y 607} 608 609; GCN-LABEL: {{^}}mul_inline_imm_neg_1.0_v2i16: 610; GFX9: s_movk_i32 [[K:s[0-9]+]], 0xbc00 611; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0] 612 613; GFX10: v_pk_mul_lo_u16 v0, 0xffffbc00, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xbc,0xff,0xff] 614define <2 x i16> @mul_inline_imm_neg_1.0_v2i16(<2 x i16> %x) { 615 %y = mul <2 x i16> %x, bitcast (<2 x half> <half -1.0, half -1.0> to <2 x i16>) 616 ret <2 x i16> %y 617} 618 619; GCN-LABEL: {{^}}shl_inline_imm_2.0_v2i16: 620; GFX9: v_pk_lshlrev_b16 v0, v0, 2.0 op_sel:[0,1] 621 622; GFX10: v_pk_lshlrev_b16 v0, v0, 2.0 op_sel:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xe9,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}}] 623define <2 x i16> @shl_inline_imm_2.0_v2i16(<2 x i16> %x) { 624 %y = shl <2 x i16> bitcast (<2 x half> <half 2.0, half 2.0> to <2 x i16>), %x 625 ret <2 x i16> %y 626} 627 628; GCN-LABEL: {{^}}shl_inline_imm_neg_2.0_v2i16: 629; GFX9: v_pk_lshlrev_b16 v0, v0, -2.0 op_sel:[0,1] 630 631; GFX10: v_pk_lshlrev_b16 v0, v0, -2.0 op_sel:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xeb,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}}] 632define <2 x i16> @shl_inline_imm_neg_2.0_v2i16(<2 x i16> %x) { 633 %y = shl <2 x i16> bitcast (<2 x half> <half -2.0, half -2.0> to <2 x i16>), %x 634 ret <2 x i16> %y 635} 636 637; GCN-LABEL: {{^}}mul_inline_imm_4.0_v2i16: 638; GFX9: s_movk_i32 [[K:s[0-9]+]], 0x4400 639; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0] 640 641; GFX10: v_pk_mul_lo_u16 v0, 0x4400, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0x44,0x00,0x00] 642define <2 x i16> @mul_inline_imm_4.0_v2i16(<2 x i16> %x) { 643 %y = mul <2 x i16> %x, bitcast (<2 x half> <half 4.0, half 4.0> to <2 x i16>) 644 ret <2 x i16> %y 645 646} 647 648; GCN-LABEL: {{^}}mul_inline_imm_neg_4.0_v2i16: 649; GFX9: s_movk_i32 [[K:s[0-9]+]], 0xc400 650; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0] 651 652; GFX10: v_pk_mul_lo_u16 v0, 0xffffc400, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x00,0xc4,0xff,0xff] 653define <2 x i16> @mul_inline_imm_neg_4.0_v2i16(<2 x i16> %x) { 654 %y = mul <2 x i16> %x, bitcast (<2 x half> <half -4.0, half -4.0> to <2 x i16>) 655 ret <2 x i16> %y 656} 657 658; GCN-LABEL: {{^}}mul_inline_imm_inv2pi_v2i16: 659; GFX9: s_movk_i32 [[K:s[0-9]+]], 0x3118 660; GFX9: v_pk_mul_lo_u16 v0, v0, [[K]] op_sel_hi:[1,0] 661 662; GFX10: v_pk_mul_lo_u16 v0, 0x3118, v0 op_sel_hi:[0,1] ; encoding: [0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0xff,0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x{{[0-9a-f]+}},0x18,0x31,0x00,0x00] 663define <2 x i16> @mul_inline_imm_inv2pi_v2i16(<2 x i16> %x) { 664 %y = mul <2 x i16> %x, bitcast (<2 x half> <half 0xH3118, half 0xH3118> to <2 x i16>) 665 ret <2 x i16> %y 666} 667 668attributes #0 = { nounwind "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" } 669