1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -global-isel=0 -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=SI-SDAG %s 3; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -global-isel=1 -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=SI-GISEL %s 4; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -global-isel=0 -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=VI-SDAG %s 5; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -global-isel=1 -mattr=-flat-for-global -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=VI-GISEL %s 6; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 -global-isel=0 -mattr=-flat-for-global -denormal-fp-math=preserve-sign -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-SDAG %s 7; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 -global-isel=1 -mattr=-flat-for-global -denormal-fp-math=preserve-sign -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-GISEL %s 8; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx950 -global-isel=0 -mattr=-flat-for-global -denormal-fp-math=preserve-sign -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=GFX950-SDAG %s 9; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx950 -global-isel=1 -mattr=-flat-for-global -denormal-fp-math=preserve-sign -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=GFX950-GISEL %s 10; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global -denormal-fp-math=preserve-sign -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-SDAG %s 11; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global -denormal-fp-math=preserve-sign -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-GISEL %s 12 13define amdgpu_kernel void @fptrunc_f32_to_f16( 14; SI-SDAG-LABEL: fptrunc_f32_to_f16: 15; SI-SDAG: ; %bb.0: ; %entry 16; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 17; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 18; SI-SDAG-NEXT: s_mov_b32 s6, -1 19; SI-SDAG-NEXT: s_mov_b32 s10, s6 20; SI-SDAG-NEXT: s_mov_b32 s11, s7 21; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 22; SI-SDAG-NEXT: s_mov_b32 s8, s2 23; SI-SDAG-NEXT: s_mov_b32 s9, s3 24; SI-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0 25; SI-SDAG-NEXT: s_mov_b32 s4, s0 26; SI-SDAG-NEXT: s_mov_b32 s5, s1 27; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 28; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 29; SI-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0 30; SI-SDAG-NEXT: s_endpgm 31; 32; SI-GISEL-LABEL: fptrunc_f32_to_f16: 33; SI-GISEL: ; %bb.0: ; %entry 34; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 35; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 36; SI-GISEL-NEXT: s_load_dword s3, s[2:3], 0x0 37; SI-GISEL-NEXT: s_mov_b32 s2, -1 38; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 39; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s3 40; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 41; SI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 42; SI-GISEL-NEXT: s_endpgm 43; 44; VI-SDAG-LABEL: fptrunc_f32_to_f16: 45; VI-SDAG: ; %bb.0: ; %entry 46; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 47; VI-SDAG-NEXT: s_mov_b32 s7, 0xf000 48; VI-SDAG-NEXT: s_mov_b32 s6, -1 49; VI-SDAG-NEXT: s_mov_b32 s10, s6 50; VI-SDAG-NEXT: s_mov_b32 s11, s7 51; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 52; VI-SDAG-NEXT: s_mov_b32 s8, s2 53; VI-SDAG-NEXT: s_mov_b32 s9, s3 54; VI-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0 55; VI-SDAG-NEXT: s_mov_b32 s4, s0 56; VI-SDAG-NEXT: s_mov_b32 s5, s1 57; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 58; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 59; VI-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0 60; VI-SDAG-NEXT: s_endpgm 61; 62; VI-GISEL-LABEL: fptrunc_f32_to_f16: 63; VI-GISEL: ; %bb.0: ; %entry 64; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 65; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 66; VI-GISEL-NEXT: s_load_dword s2, s[2:3], 0x0 67; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000 68; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 69; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s2 70; VI-GISEL-NEXT: s_mov_b32 s2, -1 71; VI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 72; VI-GISEL-NEXT: s_endpgm 73; 74; GFX9-SDAG-LABEL: fptrunc_f32_to_f16: 75; GFX9-SDAG: ; %bb.0: ; %entry 76; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 77; GFX9-SDAG-NEXT: s_mov_b32 s7, 0xf000 78; GFX9-SDAG-NEXT: s_mov_b32 s6, -1 79; GFX9-SDAG-NEXT: s_mov_b32 s10, s6 80; GFX9-SDAG-NEXT: s_mov_b32 s11, s7 81; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 82; GFX9-SDAG-NEXT: s_mov_b32 s8, s2 83; GFX9-SDAG-NEXT: s_mov_b32 s9, s3 84; GFX9-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0 85; GFX9-SDAG-NEXT: s_mov_b32 s4, s0 86; GFX9-SDAG-NEXT: s_mov_b32 s5, s1 87; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 88; GFX9-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 89; GFX9-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0 90; GFX9-SDAG-NEXT: s_endpgm 91; 92; GFX9-GISEL-LABEL: fptrunc_f32_to_f16: 93; GFX9-GISEL: ; %bb.0: ; %entry 94; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 95; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 96; GFX9-GISEL-NEXT: s_load_dword s2, s[2:3], 0x0 97; GFX9-GISEL-NEXT: s_mov_b32 s3, 0xf000 98; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 99; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s2 100; GFX9-GISEL-NEXT: s_mov_b32 s2, -1 101; GFX9-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 102; GFX9-GISEL-NEXT: s_endpgm 103; 104; GFX950-SDAG-LABEL: fptrunc_f32_to_f16: 105; GFX950-SDAG: ; %bb.0: ; %entry 106; GFX950-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 107; GFX950-SDAG-NEXT: s_mov_b32 s7, 0xf000 108; GFX950-SDAG-NEXT: s_mov_b32 s6, -1 109; GFX950-SDAG-NEXT: s_mov_b32 s10, s6 110; GFX950-SDAG-NEXT: s_mov_b32 s11, s7 111; GFX950-SDAG-NEXT: s_waitcnt lgkmcnt(0) 112; GFX950-SDAG-NEXT: s_mov_b32 s8, s2 113; GFX950-SDAG-NEXT: s_mov_b32 s9, s3 114; GFX950-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0 115; GFX950-SDAG-NEXT: s_mov_b32 s4, s0 116; GFX950-SDAG-NEXT: s_mov_b32 s5, s1 117; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) 118; GFX950-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 119; GFX950-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0 120; GFX950-SDAG-NEXT: s_endpgm 121; 122; GFX950-GISEL-LABEL: fptrunc_f32_to_f16: 123; GFX950-GISEL: ; %bb.0: ; %entry 124; GFX950-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 125; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0) 126; GFX950-GISEL-NEXT: s_load_dword s2, s[2:3], 0x0 127; GFX950-GISEL-NEXT: s_mov_b32 s3, 0xf000 128; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0) 129; GFX950-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s2 130; GFX950-GISEL-NEXT: s_mov_b32 s2, -1 131; GFX950-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 132; GFX950-GISEL-NEXT: s_endpgm 133; 134; GFX11-SDAG-LABEL: fptrunc_f32_to_f16: 135; GFX11-SDAG: ; %bb.0: ; %entry 136; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 137; GFX11-SDAG-NEXT: s_mov_b32 s6, -1 138; GFX11-SDAG-NEXT: s_mov_b32 s7, 0x31016000 139; GFX11-SDAG-NEXT: s_mov_b32 s10, s6 140; GFX11-SDAG-NEXT: s_mov_b32 s11, s7 141; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 142; GFX11-SDAG-NEXT: s_mov_b32 s8, s2 143; GFX11-SDAG-NEXT: s_mov_b32 s9, s3 144; GFX11-SDAG-NEXT: s_mov_b32 s4, s0 145; GFX11-SDAG-NEXT: buffer_load_b32 v0, off, s[8:11], 0 146; GFX11-SDAG-NEXT: s_mov_b32 s5, s1 147; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 148; GFX11-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 149; GFX11-SDAG-NEXT: buffer_store_b16 v0, off, s[4:7], 0 150; GFX11-SDAG-NEXT: s_endpgm 151; 152; GFX11-GISEL-LABEL: fptrunc_f32_to_f16: 153; GFX11-GISEL: ; %bb.0: ; %entry 154; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 155; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 156; GFX11-GISEL-NEXT: s_load_b32 s2, s[2:3], 0x0 157; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000 158; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 159; GFX11-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s2 160; GFX11-GISEL-NEXT: s_mov_b32 s2, -1 161; GFX11-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0 162; GFX11-GISEL-NEXT: s_endpgm 163 ptr addrspace(1) %r, 164 ptr addrspace(1) %a) { 165entry: 166 %a.val = load float, ptr addrspace(1) %a 167 %r.val = fptrunc float %a.val to half 168 store half %r.val, ptr addrspace(1) %r 169 ret void 170} 171 172define amdgpu_kernel void @fptrunc_f64_to_f16( 173; SI-SDAG-LABEL: fptrunc_f64_to_f16: 174; SI-SDAG: ; %bb.0: ; %entry 175; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 176; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 177; SI-SDAG-NEXT: s_mov_b32 s6, -1 178; SI-SDAG-NEXT: s_mov_b32 s10, s6 179; SI-SDAG-NEXT: s_mov_b32 s11, s7 180; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 181; SI-SDAG-NEXT: s_mov_b32 s8, s2 182; SI-SDAG-NEXT: s_mov_b32 s9, s3 183; SI-SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 184; SI-SDAG-NEXT: s_mov_b32 s4, s0 185; SI-SDAG-NEXT: s_mov_b32 s5, s1 186; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 187; SI-SDAG-NEXT: v_cvt_f32_f64_e32 v0, v[0:1] 188; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 189; SI-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0 190; SI-SDAG-NEXT: s_endpgm 191; 192; SI-GISEL-LABEL: fptrunc_f64_to_f16: 193; SI-GISEL: ; %bb.0: ; %entry 194; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 195; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 196; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 197; SI-GISEL-NEXT: s_mov_b32 s2, -1 198; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 199; SI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[4:5] 200; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 201; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 202; SI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 203; SI-GISEL-NEXT: s_endpgm 204; 205; VI-SDAG-LABEL: fptrunc_f64_to_f16: 206; VI-SDAG: ; %bb.0: ; %entry 207; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 208; VI-SDAG-NEXT: s_mov_b32 s7, 0xf000 209; VI-SDAG-NEXT: s_mov_b32 s6, -1 210; VI-SDAG-NEXT: s_mov_b32 s10, s6 211; VI-SDAG-NEXT: s_mov_b32 s11, s7 212; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 213; VI-SDAG-NEXT: s_mov_b32 s8, s2 214; VI-SDAG-NEXT: s_mov_b32 s9, s3 215; VI-SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 216; VI-SDAG-NEXT: s_mov_b32 s4, s0 217; VI-SDAG-NEXT: s_mov_b32 s5, s1 218; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 219; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v0, v[0:1] 220; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 221; VI-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0 222; VI-SDAG-NEXT: s_endpgm 223; 224; VI-GISEL-LABEL: fptrunc_f64_to_f16: 225; VI-GISEL: ; %bb.0: ; %entry 226; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 227; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 228; VI-GISEL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 229; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 230; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] 231; VI-GISEL-NEXT: s_mov_b32 s2, -1 232; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000 233; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 234; VI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 235; VI-GISEL-NEXT: s_endpgm 236; 237; GFX9-SDAG-LABEL: fptrunc_f64_to_f16: 238; GFX9-SDAG: ; %bb.0: ; %entry 239; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 240; GFX9-SDAG-NEXT: s_mov_b32 s7, 0xf000 241; GFX9-SDAG-NEXT: s_mov_b32 s6, -1 242; GFX9-SDAG-NEXT: s_mov_b32 s10, s6 243; GFX9-SDAG-NEXT: s_mov_b32 s11, s7 244; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 245; GFX9-SDAG-NEXT: s_mov_b32 s8, s2 246; GFX9-SDAG-NEXT: s_mov_b32 s9, s3 247; GFX9-SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 248; GFX9-SDAG-NEXT: s_mov_b32 s4, s0 249; GFX9-SDAG-NEXT: s_mov_b32 s5, s1 250; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 251; GFX9-SDAG-NEXT: v_cvt_f32_f64_e32 v0, v[0:1] 252; GFX9-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 253; GFX9-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0 254; GFX9-SDAG-NEXT: s_endpgm 255; 256; GFX9-GISEL-LABEL: fptrunc_f64_to_f16: 257; GFX9-GISEL: ; %bb.0: ; %entry 258; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 259; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 260; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 261; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 262; GFX9-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] 263; GFX9-GISEL-NEXT: s_mov_b32 s2, -1 264; GFX9-GISEL-NEXT: s_mov_b32 s3, 0xf000 265; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 266; GFX9-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 267; GFX9-GISEL-NEXT: s_endpgm 268; 269; GFX950-SDAG-LABEL: fptrunc_f64_to_f16: 270; GFX950-SDAG: ; %bb.0: ; %entry 271; GFX950-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 272; GFX950-SDAG-NEXT: s_mov_b32 s7, 0xf000 273; GFX950-SDAG-NEXT: s_mov_b32 s6, -1 274; GFX950-SDAG-NEXT: s_mov_b32 s10, s6 275; GFX950-SDAG-NEXT: s_mov_b32 s11, s7 276; GFX950-SDAG-NEXT: s_waitcnt lgkmcnt(0) 277; GFX950-SDAG-NEXT: s_mov_b32 s8, s2 278; GFX950-SDAG-NEXT: s_mov_b32 s9, s3 279; GFX950-SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 280; GFX950-SDAG-NEXT: s_mov_b32 s4, s0 281; GFX950-SDAG-NEXT: s_mov_b32 s5, s1 282; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) 283; GFX950-SDAG-NEXT: v_cvt_f32_f64_e32 v0, v[0:1] 284; GFX950-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 285; GFX950-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0 286; GFX950-SDAG-NEXT: s_endpgm 287; 288; GFX950-GISEL-LABEL: fptrunc_f64_to_f16: 289; GFX950-GISEL: ; %bb.0: ; %entry 290; GFX950-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 291; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0) 292; GFX950-GISEL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 293; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0) 294; GFX950-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] 295; GFX950-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 296; GFX950-GISEL-NEXT: s_mov_b32 s2, -1 297; GFX950-GISEL-NEXT: s_mov_b32 s3, 0xf000 298; GFX950-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 299; GFX950-GISEL-NEXT: s_endpgm 300; 301; GFX11-SDAG-LABEL: fptrunc_f64_to_f16: 302; GFX11-SDAG: ; %bb.0: ; %entry 303; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 304; GFX11-SDAG-NEXT: s_mov_b32 s6, -1 305; GFX11-SDAG-NEXT: s_mov_b32 s7, 0x31016000 306; GFX11-SDAG-NEXT: s_mov_b32 s10, s6 307; GFX11-SDAG-NEXT: s_mov_b32 s11, s7 308; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 309; GFX11-SDAG-NEXT: s_mov_b32 s8, s2 310; GFX11-SDAG-NEXT: s_mov_b32 s9, s3 311; GFX11-SDAG-NEXT: s_mov_b32 s4, s0 312; GFX11-SDAG-NEXT: buffer_load_b64 v[0:1], off, s[8:11], 0 313; GFX11-SDAG-NEXT: s_mov_b32 s5, s1 314; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 315; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v0, v[0:1] 316; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 317; GFX11-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 318; GFX11-SDAG-NEXT: buffer_store_b16 v0, off, s[4:7], 0 319; GFX11-SDAG-NEXT: s_endpgm 320; 321; GFX11-GISEL-LABEL: fptrunc_f64_to_f16: 322; GFX11-GISEL: ; %bb.0: ; %entry 323; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 324; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 325; GFX11-GISEL-NEXT: s_load_b64 s[2:3], s[2:3], 0x0 326; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 327; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] 328; GFX11-GISEL-NEXT: s_mov_b32 s2, -1 329; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000 330; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 331; GFX11-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 332; GFX11-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0 333; GFX11-GISEL-NEXT: s_endpgm 334 ptr addrspace(1) %r, 335 ptr addrspace(1) %a) { 336entry: 337 %a.val = load double, ptr addrspace(1) %a 338 %r.val = fptrunc double %a.val to half 339 store half %r.val, ptr addrspace(1) %r 340 ret void 341} 342 343define amdgpu_kernel void @fptrunc_v2f32_to_v2f16( 344; SI-SDAG-LABEL: fptrunc_v2f32_to_v2f16: 345; SI-SDAG: ; %bb.0: ; %entry 346; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 347; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 348; SI-SDAG-NEXT: s_mov_b32 s6, -1 349; SI-SDAG-NEXT: s_mov_b32 s10, s6 350; SI-SDAG-NEXT: s_mov_b32 s11, s7 351; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 352; SI-SDAG-NEXT: s_mov_b32 s8, s2 353; SI-SDAG-NEXT: s_mov_b32 s9, s3 354; SI-SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 355; SI-SDAG-NEXT: s_mov_b32 s4, s0 356; SI-SDAG-NEXT: s_mov_b32 s5, s1 357; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 358; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 359; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 360; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 361; SI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 362; SI-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0 363; SI-SDAG-NEXT: s_endpgm 364; 365; SI-GISEL-LABEL: fptrunc_v2f32_to_v2f16: 366; SI-GISEL: ; %bb.0: ; %entry 367; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 368; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 369; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 370; SI-GISEL-NEXT: s_mov_b32 s2, -1 371; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 372; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s4 373; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, s5 374; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 375; SI-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 376; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 377; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 378; SI-GISEL-NEXT: s_endpgm 379; 380; VI-SDAG-LABEL: fptrunc_v2f32_to_v2f16: 381; VI-SDAG: ; %bb.0: ; %entry 382; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 383; VI-SDAG-NEXT: s_mov_b32 s7, 0xf000 384; VI-SDAG-NEXT: s_mov_b32 s6, -1 385; VI-SDAG-NEXT: s_mov_b32 s10, s6 386; VI-SDAG-NEXT: s_mov_b32 s11, s7 387; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 388; VI-SDAG-NEXT: s_mov_b32 s8, s2 389; VI-SDAG-NEXT: s_mov_b32 s9, s3 390; VI-SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 391; VI-SDAG-NEXT: s_mov_b32 s4, s0 392; VI-SDAG-NEXT: s_mov_b32 s5, s1 393; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 394; VI-SDAG-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 395; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 396; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 397; VI-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0 398; VI-SDAG-NEXT: s_endpgm 399; 400; VI-GISEL-LABEL: fptrunc_v2f32_to_v2f16: 401; VI-GISEL: ; %bb.0: ; %entry 402; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 403; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 404; VI-GISEL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 405; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 406; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 407; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s2 408; VI-GISEL-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 409; VI-GISEL-NEXT: s_mov_b32 s2, -1 410; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000 411; VI-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 412; VI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 413; VI-GISEL-NEXT: s_endpgm 414; 415; GFX9-SDAG-LABEL: fptrunc_v2f32_to_v2f16: 416; GFX9-SDAG: ; %bb.0: ; %entry 417; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 418; GFX9-SDAG-NEXT: s_mov_b32 s7, 0xf000 419; GFX9-SDAG-NEXT: s_mov_b32 s6, -1 420; GFX9-SDAG-NEXT: s_mov_b32 s10, s6 421; GFX9-SDAG-NEXT: s_mov_b32 s11, s7 422; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 423; GFX9-SDAG-NEXT: s_mov_b32 s8, s2 424; GFX9-SDAG-NEXT: s_mov_b32 s9, s3 425; GFX9-SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 426; GFX9-SDAG-NEXT: s_mov_b32 s4, s0 427; GFX9-SDAG-NEXT: s_mov_b32 s5, s1 428; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 429; GFX9-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 430; GFX9-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 431; GFX9-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1 432; GFX9-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0 433; GFX9-SDAG-NEXT: s_endpgm 434; 435; GFX9-GISEL-LABEL: fptrunc_v2f32_to_v2f16: 436; GFX9-GISEL: ; %bb.0: ; %entry 437; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 438; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 439; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 440; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 441; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s2 442; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v1, s3 443; GFX9-GISEL-NEXT: s_mov_b32 s2, -1 444; GFX9-GISEL-NEXT: s_mov_b32 s3, 0xf000 445; GFX9-GISEL-NEXT: v_pack_b32_f16 v0, v0, v1 446; GFX9-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 447; GFX9-GISEL-NEXT: s_endpgm 448; 449; GFX950-SDAG-LABEL: fptrunc_v2f32_to_v2f16: 450; GFX950-SDAG: ; %bb.0: ; %entry 451; GFX950-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 452; GFX950-SDAG-NEXT: s_mov_b32 s7, 0xf000 453; GFX950-SDAG-NEXT: s_mov_b32 s6, -1 454; GFX950-SDAG-NEXT: s_mov_b32 s10, s6 455; GFX950-SDAG-NEXT: s_mov_b32 s11, s7 456; GFX950-SDAG-NEXT: s_waitcnt lgkmcnt(0) 457; GFX950-SDAG-NEXT: s_mov_b32 s8, s2 458; GFX950-SDAG-NEXT: s_mov_b32 s9, s3 459; GFX950-SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 460; GFX950-SDAG-NEXT: s_mov_b32 s4, s0 461; GFX950-SDAG-NEXT: s_mov_b32 s5, s1 462; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) 463; GFX950-SDAG-NEXT: v_cvt_pk_f16_f32 v0, v0, v1 464; GFX950-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0 465; GFX950-SDAG-NEXT: s_endpgm 466; 467; GFX950-GISEL-LABEL: fptrunc_v2f32_to_v2f16: 468; GFX950-GISEL: ; %bb.0: ; %entry 469; GFX950-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 470; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0) 471; GFX950-GISEL-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 472; GFX950-GISEL-NEXT: s_mov_b32 s2, -1 473; GFX950-GISEL-NEXT: s_mov_b32 s3, 0xf000 474; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0) 475; GFX950-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[4:5] 476; GFX950-GISEL-NEXT: v_cvt_pk_f16_f32 v0, v0, v1 477; GFX950-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 478; GFX950-GISEL-NEXT: s_endpgm 479; 480; GFX11-SDAG-LABEL: fptrunc_v2f32_to_v2f16: 481; GFX11-SDAG: ; %bb.0: ; %entry 482; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 483; GFX11-SDAG-NEXT: s_mov_b32 s6, -1 484; GFX11-SDAG-NEXT: s_mov_b32 s7, 0x31016000 485; GFX11-SDAG-NEXT: s_mov_b32 s10, s6 486; GFX11-SDAG-NEXT: s_mov_b32 s11, s7 487; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 488; GFX11-SDAG-NEXT: s_mov_b32 s8, s2 489; GFX11-SDAG-NEXT: s_mov_b32 s9, s3 490; GFX11-SDAG-NEXT: s_mov_b32 s4, s0 491; GFX11-SDAG-NEXT: buffer_load_b64 v[0:1], off, s[8:11], 0 492; GFX11-SDAG-NEXT: s_mov_b32 s5, s1 493; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 494; GFX11-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 495; GFX11-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 496; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 497; GFX11-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1 498; GFX11-SDAG-NEXT: buffer_store_b32 v0, off, s[4:7], 0 499; GFX11-SDAG-NEXT: s_endpgm 500; 501; GFX11-GISEL-LABEL: fptrunc_v2f32_to_v2f16: 502; GFX11-GISEL: ; %bb.0: ; %entry 503; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 504; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 505; GFX11-GISEL-NEXT: s_load_b64 s[2:3], s[2:3], 0x0 506; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 507; GFX11-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s2 508; GFX11-GISEL-NEXT: v_cvt_f16_f32_e32 v1, s3 509; GFX11-GISEL-NEXT: s_mov_b32 s2, -1 510; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000 511; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 512; GFX11-GISEL-NEXT: v_pack_b32_f16 v0, v0, v1 513; GFX11-GISEL-NEXT: buffer_store_b32 v0, off, s[0:3], 0 514; GFX11-GISEL-NEXT: s_endpgm 515 ptr addrspace(1) %r, 516 ptr addrspace(1) %a) { 517entry: 518 %a.val = load <2 x float>, ptr addrspace(1) %a 519 %r.val = fptrunc <2 x float> %a.val to <2 x half> 520 store <2 x half> %r.val, ptr addrspace(1) %r 521 ret void 522} 523 524define amdgpu_kernel void @fptrunc_v2f64_to_v2f16( 525; SI-SDAG-LABEL: fptrunc_v2f64_to_v2f16: 526; SI-SDAG: ; %bb.0: ; %entry 527; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 528; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 529; SI-SDAG-NEXT: s_mov_b32 s6, -1 530; SI-SDAG-NEXT: s_mov_b32 s10, s6 531; SI-SDAG-NEXT: s_mov_b32 s11, s7 532; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 533; SI-SDAG-NEXT: s_mov_b32 s8, s2 534; SI-SDAG-NEXT: s_mov_b32 s9, s3 535; SI-SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 536; SI-SDAG-NEXT: s_mov_b32 s4, s0 537; SI-SDAG-NEXT: s_mov_b32 s5, s1 538; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 539; SI-SDAG-NEXT: v_cvt_f32_f64_e32 v2, v[2:3] 540; SI-SDAG-NEXT: v_cvt_f32_f64_e32 v0, v[0:1] 541; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v2 542; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 543; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 544; SI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 545; SI-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0 546; SI-SDAG-NEXT: s_endpgm 547; 548; SI-GISEL-LABEL: fptrunc_v2f64_to_v2f16: 549; SI-GISEL: ; %bb.0: ; %entry 550; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 551; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 552; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 553; SI-GISEL-NEXT: s_mov_b32 s2, -1 554; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 555; SI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[4:5] 556; SI-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[6:7] 557; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 558; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 559; SI-GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 560; SI-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 561; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 562; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 563; SI-GISEL-NEXT: s_endpgm 564; 565; VI-SDAG-LABEL: fptrunc_v2f64_to_v2f16: 566; VI-SDAG: ; %bb.0: ; %entry 567; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 568; VI-SDAG-NEXT: s_mov_b32 s7, 0xf000 569; VI-SDAG-NEXT: s_mov_b32 s6, -1 570; VI-SDAG-NEXT: s_mov_b32 s10, s6 571; VI-SDAG-NEXT: s_mov_b32 s11, s7 572; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 573; VI-SDAG-NEXT: s_mov_b32 s8, s2 574; VI-SDAG-NEXT: s_mov_b32 s9, s3 575; VI-SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 576; VI-SDAG-NEXT: s_mov_b32 s4, s0 577; VI-SDAG-NEXT: s_mov_b32 s5, s1 578; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 579; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v2, v[2:3] 580; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v0, v[0:1] 581; VI-SDAG-NEXT: v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 582; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 583; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 584; VI-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0 585; VI-SDAG-NEXT: s_endpgm 586; 587; VI-GISEL-LABEL: fptrunc_v2f64_to_v2f16: 588; VI-GISEL: ; %bb.0: ; %entry 589; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 590; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 591; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 592; VI-GISEL-NEXT: s_mov_b32 s2, -1 593; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000 594; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 595; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[4:5] 596; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[6:7] 597; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 598; VI-GISEL-NEXT: v_cvt_f16_f32_sdwa v1, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD 599; VI-GISEL-NEXT: v_or_b32_e32 v0, v0, v1 600; VI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 601; VI-GISEL-NEXT: s_endpgm 602; 603; GFX9-SDAG-LABEL: fptrunc_v2f64_to_v2f16: 604; GFX9-SDAG: ; %bb.0: ; %entry 605; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 606; GFX9-SDAG-NEXT: s_mov_b32 s7, 0xf000 607; GFX9-SDAG-NEXT: s_mov_b32 s6, -1 608; GFX9-SDAG-NEXT: s_mov_b32 s10, s6 609; GFX9-SDAG-NEXT: s_mov_b32 s11, s7 610; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 611; GFX9-SDAG-NEXT: s_mov_b32 s8, s2 612; GFX9-SDAG-NEXT: s_mov_b32 s9, s3 613; GFX9-SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 614; GFX9-SDAG-NEXT: s_mov_b32 s4, s0 615; GFX9-SDAG-NEXT: s_mov_b32 s5, s1 616; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 617; GFX9-SDAG-NEXT: v_cvt_f32_f64_e32 v2, v[2:3] 618; GFX9-SDAG-NEXT: v_cvt_f32_f64_e32 v0, v[0:1] 619; GFX9-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v2 620; GFX9-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 621; GFX9-SDAG-NEXT: v_lshl_or_b32 v0, v1, 16, v0 622; GFX9-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0 623; GFX9-SDAG-NEXT: s_endpgm 624; 625; GFX9-GISEL-LABEL: fptrunc_v2f64_to_v2f16: 626; GFX9-GISEL: ; %bb.0: ; %entry 627; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 628; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 629; GFX9-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 630; GFX9-GISEL-NEXT: s_mov_b32 s2, -1 631; GFX9-GISEL-NEXT: s_mov_b32 s3, 0xf000 632; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 633; GFX9-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[4:5] 634; GFX9-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[6:7] 635; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 636; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 637; GFX9-GISEL-NEXT: v_pack_b32_f16 v0, v0, v1 638; GFX9-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 639; GFX9-GISEL-NEXT: s_endpgm 640; 641; GFX950-SDAG-LABEL: fptrunc_v2f64_to_v2f16: 642; GFX950-SDAG: ; %bb.0: ; %entry 643; GFX950-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 644; GFX950-SDAG-NEXT: s_mov_b32 s7, 0xf000 645; GFX950-SDAG-NEXT: s_mov_b32 s6, -1 646; GFX950-SDAG-NEXT: s_mov_b32 s10, s6 647; GFX950-SDAG-NEXT: s_mov_b32 s11, s7 648; GFX950-SDAG-NEXT: s_waitcnt lgkmcnt(0) 649; GFX950-SDAG-NEXT: s_mov_b32 s8, s2 650; GFX950-SDAG-NEXT: s_mov_b32 s9, s3 651; GFX950-SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 652; GFX950-SDAG-NEXT: s_mov_b32 s4, s0 653; GFX950-SDAG-NEXT: s_mov_b32 s5, s1 654; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) 655; GFX950-SDAG-NEXT: v_cvt_f32_f64_e32 v2, v[2:3] 656; GFX950-SDAG-NEXT: v_cvt_f32_f64_e32 v0, v[0:1] 657; GFX950-SDAG-NEXT: v_cvt_pk_f16_f32 v0, v0, v2 658; GFX950-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0 659; GFX950-SDAG-NEXT: s_endpgm 660; 661; GFX950-GISEL-LABEL: fptrunc_v2f64_to_v2f16: 662; GFX950-GISEL: ; %bb.0: ; %entry 663; GFX950-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 664; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0) 665; GFX950-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 666; GFX950-GISEL-NEXT: s_mov_b32 s2, -1 667; GFX950-GISEL-NEXT: s_mov_b32 s3, 0xf000 668; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0) 669; GFX950-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[4:5] 670; GFX950-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[6:7] 671; GFX950-GISEL-NEXT: v_cvt_f32_f64_e32 v2, v[2:3] 672; GFX950-GISEL-NEXT: v_cvt_f32_f64_e32 v0, v[0:1] 673; GFX950-GISEL-NEXT: v_cvt_pk_f16_f32 v0, v0, v2 674; GFX950-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 675; GFX950-GISEL-NEXT: s_endpgm 676; 677; GFX11-SDAG-LABEL: fptrunc_v2f64_to_v2f16: 678; GFX11-SDAG: ; %bb.0: ; %entry 679; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 680; GFX11-SDAG-NEXT: s_mov_b32 s6, -1 681; GFX11-SDAG-NEXT: s_mov_b32 s7, 0x31016000 682; GFX11-SDAG-NEXT: s_mov_b32 s10, s6 683; GFX11-SDAG-NEXT: s_mov_b32 s11, s7 684; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 685; GFX11-SDAG-NEXT: s_mov_b32 s8, s2 686; GFX11-SDAG-NEXT: s_mov_b32 s9, s3 687; GFX11-SDAG-NEXT: s_mov_b32 s4, s0 688; GFX11-SDAG-NEXT: buffer_load_b128 v[0:3], off, s[8:11], 0 689; GFX11-SDAG-NEXT: s_mov_b32 s5, s1 690; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 691; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v0, v[0:1] 692; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v1, v[2:3] 693; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 694; GFX11-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 695; GFX11-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v1 696; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 697; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 698; GFX11-SDAG-NEXT: v_lshl_or_b32 v0, v1, 16, v0 699; GFX11-SDAG-NEXT: buffer_store_b32 v0, off, s[4:7], 0 700; GFX11-SDAG-NEXT: s_endpgm 701; 702; GFX11-GISEL-LABEL: fptrunc_v2f64_to_v2f16: 703; GFX11-GISEL: ; %bb.0: ; %entry 704; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 705; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 706; GFX11-GISEL-NEXT: s_load_b128 s[4:7], s[2:3], 0x0 707; GFX11-GISEL-NEXT: s_mov_b32 s2, -1 708; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000 709; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 710; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[4:5] 711; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[6:7] 712; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 713; GFX11-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 714; GFX11-GISEL-NEXT: v_cvt_f16_f32_e32 v1, v1 715; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 716; GFX11-GISEL-NEXT: v_pack_b32_f16 v0, v0, v1 717; GFX11-GISEL-NEXT: buffer_store_b32 v0, off, s[0:3], 0 718; GFX11-GISEL-NEXT: s_endpgm 719 ptr addrspace(1) %r, 720 ptr addrspace(1) %a) { 721entry: 722 %a.val = load <2 x double>, ptr addrspace(1) %a 723 %r.val = fptrunc <2 x double> %a.val to <2 x half> 724 store <2 x half> %r.val, ptr addrspace(1) %r 725 ret void 726} 727 728define amdgpu_kernel void @fneg_fptrunc_f32_to_f16( 729; SI-SDAG-LABEL: fneg_fptrunc_f32_to_f16: 730; SI-SDAG: ; %bb.0: ; %entry 731; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 732; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 733; SI-SDAG-NEXT: s_mov_b32 s6, -1 734; SI-SDAG-NEXT: s_mov_b32 s10, s6 735; SI-SDAG-NEXT: s_mov_b32 s11, s7 736; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 737; SI-SDAG-NEXT: s_mov_b32 s8, s2 738; SI-SDAG-NEXT: s_mov_b32 s9, s3 739; SI-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0 740; SI-SDAG-NEXT: s_mov_b32 s4, s0 741; SI-SDAG-NEXT: s_mov_b32 s5, s1 742; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 743; SI-SDAG-NEXT: v_cvt_f16_f32_e64 v0, -v0 744; SI-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0 745; SI-SDAG-NEXT: s_endpgm 746; 747; SI-GISEL-LABEL: fneg_fptrunc_f32_to_f16: 748; SI-GISEL: ; %bb.0: ; %entry 749; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 750; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 751; SI-GISEL-NEXT: s_load_dword s3, s[2:3], 0x0 752; SI-GISEL-NEXT: s_mov_b32 s2, -1 753; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 754; SI-GISEL-NEXT: v_cvt_f16_f32_e64 v0, -s3 755; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 756; SI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 757; SI-GISEL-NEXT: s_endpgm 758; 759; VI-SDAG-LABEL: fneg_fptrunc_f32_to_f16: 760; VI-SDAG: ; %bb.0: ; %entry 761; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 762; VI-SDAG-NEXT: s_mov_b32 s7, 0xf000 763; VI-SDAG-NEXT: s_mov_b32 s6, -1 764; VI-SDAG-NEXT: s_mov_b32 s10, s6 765; VI-SDAG-NEXT: s_mov_b32 s11, s7 766; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 767; VI-SDAG-NEXT: s_mov_b32 s8, s2 768; VI-SDAG-NEXT: s_mov_b32 s9, s3 769; VI-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0 770; VI-SDAG-NEXT: s_mov_b32 s4, s0 771; VI-SDAG-NEXT: s_mov_b32 s5, s1 772; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 773; VI-SDAG-NEXT: v_cvt_f16_f32_e64 v0, -v0 774; VI-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0 775; VI-SDAG-NEXT: s_endpgm 776; 777; VI-GISEL-LABEL: fneg_fptrunc_f32_to_f16: 778; VI-GISEL: ; %bb.0: ; %entry 779; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 780; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 781; VI-GISEL-NEXT: s_load_dword s2, s[2:3], 0x0 782; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000 783; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 784; VI-GISEL-NEXT: v_cvt_f16_f32_e64 v0, -s2 785; VI-GISEL-NEXT: s_mov_b32 s2, -1 786; VI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 787; VI-GISEL-NEXT: s_endpgm 788; 789; GFX9-SDAG-LABEL: fneg_fptrunc_f32_to_f16: 790; GFX9-SDAG: ; %bb.0: ; %entry 791; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 792; GFX9-SDAG-NEXT: s_mov_b32 s7, 0xf000 793; GFX9-SDAG-NEXT: s_mov_b32 s6, -1 794; GFX9-SDAG-NEXT: s_mov_b32 s10, s6 795; GFX9-SDAG-NEXT: s_mov_b32 s11, s7 796; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 797; GFX9-SDAG-NEXT: s_mov_b32 s8, s2 798; GFX9-SDAG-NEXT: s_mov_b32 s9, s3 799; GFX9-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0 800; GFX9-SDAG-NEXT: s_mov_b32 s4, s0 801; GFX9-SDAG-NEXT: s_mov_b32 s5, s1 802; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 803; GFX9-SDAG-NEXT: v_cvt_f16_f32_e64 v0, -v0 804; GFX9-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0 805; GFX9-SDAG-NEXT: s_endpgm 806; 807; GFX9-GISEL-LABEL: fneg_fptrunc_f32_to_f16: 808; GFX9-GISEL: ; %bb.0: ; %entry 809; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 810; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 811; GFX9-GISEL-NEXT: s_load_dword s2, s[2:3], 0x0 812; GFX9-GISEL-NEXT: s_mov_b32 s3, 0xf000 813; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 814; GFX9-GISEL-NEXT: v_cvt_f16_f32_e64 v0, -s2 815; GFX9-GISEL-NEXT: s_mov_b32 s2, -1 816; GFX9-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 817; GFX9-GISEL-NEXT: s_endpgm 818; 819; GFX950-SDAG-LABEL: fneg_fptrunc_f32_to_f16: 820; GFX950-SDAG: ; %bb.0: ; %entry 821; GFX950-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 822; GFX950-SDAG-NEXT: s_mov_b32 s7, 0xf000 823; GFX950-SDAG-NEXT: s_mov_b32 s6, -1 824; GFX950-SDAG-NEXT: s_mov_b32 s10, s6 825; GFX950-SDAG-NEXT: s_mov_b32 s11, s7 826; GFX950-SDAG-NEXT: s_waitcnt lgkmcnt(0) 827; GFX950-SDAG-NEXT: s_mov_b32 s8, s2 828; GFX950-SDAG-NEXT: s_mov_b32 s9, s3 829; GFX950-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0 830; GFX950-SDAG-NEXT: s_mov_b32 s4, s0 831; GFX950-SDAG-NEXT: s_mov_b32 s5, s1 832; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) 833; GFX950-SDAG-NEXT: v_cvt_f16_f32_e64 v0, -v0 834; GFX950-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0 835; GFX950-SDAG-NEXT: s_endpgm 836; 837; GFX950-GISEL-LABEL: fneg_fptrunc_f32_to_f16: 838; GFX950-GISEL: ; %bb.0: ; %entry 839; GFX950-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 840; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0) 841; GFX950-GISEL-NEXT: s_load_dword s2, s[2:3], 0x0 842; GFX950-GISEL-NEXT: s_mov_b32 s3, 0xf000 843; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0) 844; GFX950-GISEL-NEXT: v_cvt_f16_f32_e64 v0, -s2 845; GFX950-GISEL-NEXT: s_mov_b32 s2, -1 846; GFX950-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 847; GFX950-GISEL-NEXT: s_endpgm 848; 849; GFX11-SDAG-LABEL: fneg_fptrunc_f32_to_f16: 850; GFX11-SDAG: ; %bb.0: ; %entry 851; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 852; GFX11-SDAG-NEXT: s_mov_b32 s6, -1 853; GFX11-SDAG-NEXT: s_mov_b32 s7, 0x31016000 854; GFX11-SDAG-NEXT: s_mov_b32 s10, s6 855; GFX11-SDAG-NEXT: s_mov_b32 s11, s7 856; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 857; GFX11-SDAG-NEXT: s_mov_b32 s8, s2 858; GFX11-SDAG-NEXT: s_mov_b32 s9, s3 859; GFX11-SDAG-NEXT: s_mov_b32 s4, s0 860; GFX11-SDAG-NEXT: buffer_load_b32 v0, off, s[8:11], 0 861; GFX11-SDAG-NEXT: s_mov_b32 s5, s1 862; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 863; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v0, -v0 864; GFX11-SDAG-NEXT: buffer_store_b16 v0, off, s[4:7], 0 865; GFX11-SDAG-NEXT: s_endpgm 866; 867; GFX11-GISEL-LABEL: fneg_fptrunc_f32_to_f16: 868; GFX11-GISEL: ; %bb.0: ; %entry 869; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 870; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 871; GFX11-GISEL-NEXT: s_load_b32 s2, s[2:3], 0x0 872; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000 873; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 874; GFX11-GISEL-NEXT: v_cvt_f16_f32_e64 v0, -s2 875; GFX11-GISEL-NEXT: s_mov_b32 s2, -1 876; GFX11-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0 877; GFX11-GISEL-NEXT: s_endpgm 878 ptr addrspace(1) %r, 879 ptr addrspace(1) %a) { 880entry: 881 %a.val = load float, ptr addrspace(1) %a 882 %a.fneg = fneg float %a.val 883 %r.val = fptrunc float %a.fneg to half 884 store half %r.val, ptr addrspace(1) %r 885 ret void 886} 887 888define amdgpu_kernel void @fabs_fptrunc_f32_to_f16( 889; SI-SDAG-LABEL: fabs_fptrunc_f32_to_f16: 890; SI-SDAG: ; %bb.0: ; %entry 891; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 892; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 893; SI-SDAG-NEXT: s_mov_b32 s6, -1 894; SI-SDAG-NEXT: s_mov_b32 s10, s6 895; SI-SDAG-NEXT: s_mov_b32 s11, s7 896; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 897; SI-SDAG-NEXT: s_mov_b32 s8, s2 898; SI-SDAG-NEXT: s_mov_b32 s9, s3 899; SI-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0 900; SI-SDAG-NEXT: s_mov_b32 s4, s0 901; SI-SDAG-NEXT: s_mov_b32 s5, s1 902; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 903; SI-SDAG-NEXT: v_cvt_f16_f32_e64 v0, |v0| 904; SI-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0 905; SI-SDAG-NEXT: s_endpgm 906; 907; SI-GISEL-LABEL: fabs_fptrunc_f32_to_f16: 908; SI-GISEL: ; %bb.0: ; %entry 909; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 910; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 911; SI-GISEL-NEXT: s_load_dword s3, s[2:3], 0x0 912; SI-GISEL-NEXT: s_mov_b32 s2, -1 913; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 914; SI-GISEL-NEXT: v_cvt_f16_f32_e64 v0, |s3| 915; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 916; SI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 917; SI-GISEL-NEXT: s_endpgm 918; 919; VI-SDAG-LABEL: fabs_fptrunc_f32_to_f16: 920; VI-SDAG: ; %bb.0: ; %entry 921; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 922; VI-SDAG-NEXT: s_mov_b32 s7, 0xf000 923; VI-SDAG-NEXT: s_mov_b32 s6, -1 924; VI-SDAG-NEXT: s_mov_b32 s10, s6 925; VI-SDAG-NEXT: s_mov_b32 s11, s7 926; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 927; VI-SDAG-NEXT: s_mov_b32 s8, s2 928; VI-SDAG-NEXT: s_mov_b32 s9, s3 929; VI-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0 930; VI-SDAG-NEXT: s_mov_b32 s4, s0 931; VI-SDAG-NEXT: s_mov_b32 s5, s1 932; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 933; VI-SDAG-NEXT: v_cvt_f16_f32_e64 v0, |v0| 934; VI-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0 935; VI-SDAG-NEXT: s_endpgm 936; 937; VI-GISEL-LABEL: fabs_fptrunc_f32_to_f16: 938; VI-GISEL: ; %bb.0: ; %entry 939; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 940; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 941; VI-GISEL-NEXT: s_load_dword s2, s[2:3], 0x0 942; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000 943; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 944; VI-GISEL-NEXT: v_cvt_f16_f32_e64 v0, |s2| 945; VI-GISEL-NEXT: s_mov_b32 s2, -1 946; VI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 947; VI-GISEL-NEXT: s_endpgm 948; 949; GFX9-SDAG-LABEL: fabs_fptrunc_f32_to_f16: 950; GFX9-SDAG: ; %bb.0: ; %entry 951; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 952; GFX9-SDAG-NEXT: s_mov_b32 s7, 0xf000 953; GFX9-SDAG-NEXT: s_mov_b32 s6, -1 954; GFX9-SDAG-NEXT: s_mov_b32 s10, s6 955; GFX9-SDAG-NEXT: s_mov_b32 s11, s7 956; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 957; GFX9-SDAG-NEXT: s_mov_b32 s8, s2 958; GFX9-SDAG-NEXT: s_mov_b32 s9, s3 959; GFX9-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0 960; GFX9-SDAG-NEXT: s_mov_b32 s4, s0 961; GFX9-SDAG-NEXT: s_mov_b32 s5, s1 962; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 963; GFX9-SDAG-NEXT: v_cvt_f16_f32_e64 v0, |v0| 964; GFX9-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0 965; GFX9-SDAG-NEXT: s_endpgm 966; 967; GFX9-GISEL-LABEL: fabs_fptrunc_f32_to_f16: 968; GFX9-GISEL: ; %bb.0: ; %entry 969; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 970; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 971; GFX9-GISEL-NEXT: s_load_dword s2, s[2:3], 0x0 972; GFX9-GISEL-NEXT: s_mov_b32 s3, 0xf000 973; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 974; GFX9-GISEL-NEXT: v_cvt_f16_f32_e64 v0, |s2| 975; GFX9-GISEL-NEXT: s_mov_b32 s2, -1 976; GFX9-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 977; GFX9-GISEL-NEXT: s_endpgm 978; 979; GFX950-SDAG-LABEL: fabs_fptrunc_f32_to_f16: 980; GFX950-SDAG: ; %bb.0: ; %entry 981; GFX950-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 982; GFX950-SDAG-NEXT: s_mov_b32 s7, 0xf000 983; GFX950-SDAG-NEXT: s_mov_b32 s6, -1 984; GFX950-SDAG-NEXT: s_mov_b32 s10, s6 985; GFX950-SDAG-NEXT: s_mov_b32 s11, s7 986; GFX950-SDAG-NEXT: s_waitcnt lgkmcnt(0) 987; GFX950-SDAG-NEXT: s_mov_b32 s8, s2 988; GFX950-SDAG-NEXT: s_mov_b32 s9, s3 989; GFX950-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0 990; GFX950-SDAG-NEXT: s_mov_b32 s4, s0 991; GFX950-SDAG-NEXT: s_mov_b32 s5, s1 992; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) 993; GFX950-SDAG-NEXT: v_cvt_f16_f32_e64 v0, |v0| 994; GFX950-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0 995; GFX950-SDAG-NEXT: s_endpgm 996; 997; GFX950-GISEL-LABEL: fabs_fptrunc_f32_to_f16: 998; GFX950-GISEL: ; %bb.0: ; %entry 999; GFX950-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1000; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1001; GFX950-GISEL-NEXT: s_load_dword s2, s[2:3], 0x0 1002; GFX950-GISEL-NEXT: s_mov_b32 s3, 0xf000 1003; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1004; GFX950-GISEL-NEXT: v_cvt_f16_f32_e64 v0, |s2| 1005; GFX950-GISEL-NEXT: s_mov_b32 s2, -1 1006; GFX950-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 1007; GFX950-GISEL-NEXT: s_endpgm 1008; 1009; GFX11-SDAG-LABEL: fabs_fptrunc_f32_to_f16: 1010; GFX11-SDAG: ; %bb.0: ; %entry 1011; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1012; GFX11-SDAG-NEXT: s_mov_b32 s6, -1 1013; GFX11-SDAG-NEXT: s_mov_b32 s7, 0x31016000 1014; GFX11-SDAG-NEXT: s_mov_b32 s10, s6 1015; GFX11-SDAG-NEXT: s_mov_b32 s11, s7 1016; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1017; GFX11-SDAG-NEXT: s_mov_b32 s8, s2 1018; GFX11-SDAG-NEXT: s_mov_b32 s9, s3 1019; GFX11-SDAG-NEXT: s_mov_b32 s4, s0 1020; GFX11-SDAG-NEXT: buffer_load_b32 v0, off, s[8:11], 0 1021; GFX11-SDAG-NEXT: s_mov_b32 s5, s1 1022; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 1023; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v0, |v0| 1024; GFX11-SDAG-NEXT: buffer_store_b16 v0, off, s[4:7], 0 1025; GFX11-SDAG-NEXT: s_endpgm 1026; 1027; GFX11-GISEL-LABEL: fabs_fptrunc_f32_to_f16: 1028; GFX11-GISEL: ; %bb.0: ; %entry 1029; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1030; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1031; GFX11-GISEL-NEXT: s_load_b32 s2, s[2:3], 0x0 1032; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000 1033; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1034; GFX11-GISEL-NEXT: v_cvt_f16_f32_e64 v0, |s2| 1035; GFX11-GISEL-NEXT: s_mov_b32 s2, -1 1036; GFX11-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0 1037; GFX11-GISEL-NEXT: s_endpgm 1038 ptr addrspace(1) %r, 1039 ptr addrspace(1) %a) { 1040entry: 1041 %a.val = load float, ptr addrspace(1) %a 1042 %a.fabs = call float @llvm.fabs.f32(float %a.val) 1043 %r.val = fptrunc float %a.fabs to half 1044 store half %r.val, ptr addrspace(1) %r 1045 ret void 1046} 1047 1048define amdgpu_kernel void @fneg_fabs_fptrunc_f32_to_f16( 1049; SI-SDAG-LABEL: fneg_fabs_fptrunc_f32_to_f16: 1050; SI-SDAG: ; %bb.0: ; %entry 1051; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1052; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 1053; SI-SDAG-NEXT: s_mov_b32 s6, -1 1054; SI-SDAG-NEXT: s_mov_b32 s10, s6 1055; SI-SDAG-NEXT: s_mov_b32 s11, s7 1056; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1057; SI-SDAG-NEXT: s_mov_b32 s8, s2 1058; SI-SDAG-NEXT: s_mov_b32 s9, s3 1059; SI-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0 1060; SI-SDAG-NEXT: s_mov_b32 s4, s0 1061; SI-SDAG-NEXT: s_mov_b32 s5, s1 1062; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 1063; SI-SDAG-NEXT: v_cvt_f16_f32_e64 v0, -|v0| 1064; SI-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0 1065; SI-SDAG-NEXT: s_endpgm 1066; 1067; SI-GISEL-LABEL: fneg_fabs_fptrunc_f32_to_f16: 1068; SI-GISEL: ; %bb.0: ; %entry 1069; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1070; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1071; SI-GISEL-NEXT: s_load_dword s3, s[2:3], 0x0 1072; SI-GISEL-NEXT: s_mov_b32 s2, -1 1073; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1074; SI-GISEL-NEXT: v_cvt_f16_f32_e64 v0, -|s3| 1075; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 1076; SI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 1077; SI-GISEL-NEXT: s_endpgm 1078; 1079; VI-SDAG-LABEL: fneg_fabs_fptrunc_f32_to_f16: 1080; VI-SDAG: ; %bb.0: ; %entry 1081; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1082; VI-SDAG-NEXT: s_mov_b32 s7, 0xf000 1083; VI-SDAG-NEXT: s_mov_b32 s6, -1 1084; VI-SDAG-NEXT: s_mov_b32 s10, s6 1085; VI-SDAG-NEXT: s_mov_b32 s11, s7 1086; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1087; VI-SDAG-NEXT: s_mov_b32 s8, s2 1088; VI-SDAG-NEXT: s_mov_b32 s9, s3 1089; VI-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0 1090; VI-SDAG-NEXT: s_mov_b32 s4, s0 1091; VI-SDAG-NEXT: s_mov_b32 s5, s1 1092; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 1093; VI-SDAG-NEXT: v_cvt_f16_f32_e64 v0, -|v0| 1094; VI-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0 1095; VI-SDAG-NEXT: s_endpgm 1096; 1097; VI-GISEL-LABEL: fneg_fabs_fptrunc_f32_to_f16: 1098; VI-GISEL: ; %bb.0: ; %entry 1099; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1100; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1101; VI-GISEL-NEXT: s_load_dword s2, s[2:3], 0x0 1102; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000 1103; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1104; VI-GISEL-NEXT: v_cvt_f16_f32_e64 v0, -|s2| 1105; VI-GISEL-NEXT: s_mov_b32 s2, -1 1106; VI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 1107; VI-GISEL-NEXT: s_endpgm 1108; 1109; GFX9-SDAG-LABEL: fneg_fabs_fptrunc_f32_to_f16: 1110; GFX9-SDAG: ; %bb.0: ; %entry 1111; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1112; GFX9-SDAG-NEXT: s_mov_b32 s7, 0xf000 1113; GFX9-SDAG-NEXT: s_mov_b32 s6, -1 1114; GFX9-SDAG-NEXT: s_mov_b32 s10, s6 1115; GFX9-SDAG-NEXT: s_mov_b32 s11, s7 1116; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1117; GFX9-SDAG-NEXT: s_mov_b32 s8, s2 1118; GFX9-SDAG-NEXT: s_mov_b32 s9, s3 1119; GFX9-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0 1120; GFX9-SDAG-NEXT: s_mov_b32 s4, s0 1121; GFX9-SDAG-NEXT: s_mov_b32 s5, s1 1122; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 1123; GFX9-SDAG-NEXT: v_cvt_f16_f32_e64 v0, -|v0| 1124; GFX9-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0 1125; GFX9-SDAG-NEXT: s_endpgm 1126; 1127; GFX9-GISEL-LABEL: fneg_fabs_fptrunc_f32_to_f16: 1128; GFX9-GISEL: ; %bb.0: ; %entry 1129; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1130; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1131; GFX9-GISEL-NEXT: s_load_dword s2, s[2:3], 0x0 1132; GFX9-GISEL-NEXT: s_mov_b32 s3, 0xf000 1133; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1134; GFX9-GISEL-NEXT: v_cvt_f16_f32_e64 v0, -|s2| 1135; GFX9-GISEL-NEXT: s_mov_b32 s2, -1 1136; GFX9-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 1137; GFX9-GISEL-NEXT: s_endpgm 1138; 1139; GFX950-SDAG-LABEL: fneg_fabs_fptrunc_f32_to_f16: 1140; GFX950-SDAG: ; %bb.0: ; %entry 1141; GFX950-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1142; GFX950-SDAG-NEXT: s_mov_b32 s7, 0xf000 1143; GFX950-SDAG-NEXT: s_mov_b32 s6, -1 1144; GFX950-SDAG-NEXT: s_mov_b32 s10, s6 1145; GFX950-SDAG-NEXT: s_mov_b32 s11, s7 1146; GFX950-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1147; GFX950-SDAG-NEXT: s_mov_b32 s8, s2 1148; GFX950-SDAG-NEXT: s_mov_b32 s9, s3 1149; GFX950-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0 1150; GFX950-SDAG-NEXT: s_mov_b32 s4, s0 1151; GFX950-SDAG-NEXT: s_mov_b32 s5, s1 1152; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) 1153; GFX950-SDAG-NEXT: v_cvt_f16_f32_e64 v0, -|v0| 1154; GFX950-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0 1155; GFX950-SDAG-NEXT: s_endpgm 1156; 1157; GFX950-GISEL-LABEL: fneg_fabs_fptrunc_f32_to_f16: 1158; GFX950-GISEL: ; %bb.0: ; %entry 1159; GFX950-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1160; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1161; GFX950-GISEL-NEXT: s_load_dword s2, s[2:3], 0x0 1162; GFX950-GISEL-NEXT: s_mov_b32 s3, 0xf000 1163; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1164; GFX950-GISEL-NEXT: v_cvt_f16_f32_e64 v0, -|s2| 1165; GFX950-GISEL-NEXT: s_mov_b32 s2, -1 1166; GFX950-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 1167; GFX950-GISEL-NEXT: s_endpgm 1168; 1169; GFX11-SDAG-LABEL: fneg_fabs_fptrunc_f32_to_f16: 1170; GFX11-SDAG: ; %bb.0: ; %entry 1171; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1172; GFX11-SDAG-NEXT: s_mov_b32 s6, -1 1173; GFX11-SDAG-NEXT: s_mov_b32 s7, 0x31016000 1174; GFX11-SDAG-NEXT: s_mov_b32 s10, s6 1175; GFX11-SDAG-NEXT: s_mov_b32 s11, s7 1176; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1177; GFX11-SDAG-NEXT: s_mov_b32 s8, s2 1178; GFX11-SDAG-NEXT: s_mov_b32 s9, s3 1179; GFX11-SDAG-NEXT: s_mov_b32 s4, s0 1180; GFX11-SDAG-NEXT: buffer_load_b32 v0, off, s[8:11], 0 1181; GFX11-SDAG-NEXT: s_mov_b32 s5, s1 1182; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 1183; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v0, -|v0| 1184; GFX11-SDAG-NEXT: buffer_store_b16 v0, off, s[4:7], 0 1185; GFX11-SDAG-NEXT: s_endpgm 1186; 1187; GFX11-GISEL-LABEL: fneg_fabs_fptrunc_f32_to_f16: 1188; GFX11-GISEL: ; %bb.0: ; %entry 1189; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1190; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1191; GFX11-GISEL-NEXT: s_load_b32 s2, s[2:3], 0x0 1192; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000 1193; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1194; GFX11-GISEL-NEXT: v_cvt_f16_f32_e64 v0, -|s2| 1195; GFX11-GISEL-NEXT: s_mov_b32 s2, -1 1196; GFX11-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0 1197; GFX11-GISEL-NEXT: s_endpgm 1198 ptr addrspace(1) %r, 1199 ptr addrspace(1) %a) #0 { 1200entry: 1201 %a.val = load float, ptr addrspace(1) %a 1202 %a.fabs = call float @llvm.fabs.f32(float %a.val) 1203 %a.fneg.fabs = fneg float %a.fabs 1204 %r.val = fptrunc float %a.fneg.fabs to half 1205 store half %r.val, ptr addrspace(1) %r 1206 ret void 1207} 1208 1209define amdgpu_kernel void @fptrunc_f32_to_f16_zext_i32( 1210; SI-SDAG-LABEL: fptrunc_f32_to_f16_zext_i32: 1211; SI-SDAG: ; %bb.0: ; %entry 1212; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1213; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 1214; SI-SDAG-NEXT: s_mov_b32 s6, -1 1215; SI-SDAG-NEXT: s_mov_b32 s10, s6 1216; SI-SDAG-NEXT: s_mov_b32 s11, s7 1217; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1218; SI-SDAG-NEXT: s_mov_b32 s8, s2 1219; SI-SDAG-NEXT: s_mov_b32 s9, s3 1220; SI-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0 1221; SI-SDAG-NEXT: s_mov_b32 s4, s0 1222; SI-SDAG-NEXT: s_mov_b32 s5, s1 1223; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 1224; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 1225; SI-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0 1226; SI-SDAG-NEXT: s_endpgm 1227; 1228; SI-GISEL-LABEL: fptrunc_f32_to_f16_zext_i32: 1229; SI-GISEL: ; %bb.0: ; %entry 1230; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1231; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1232; SI-GISEL-NEXT: s_load_dword s3, s[2:3], 0x0 1233; SI-GISEL-NEXT: s_mov_b32 s2, -1 1234; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1235; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s3 1236; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 1237; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 1238; SI-GISEL-NEXT: s_endpgm 1239; 1240; VI-SDAG-LABEL: fptrunc_f32_to_f16_zext_i32: 1241; VI-SDAG: ; %bb.0: ; %entry 1242; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1243; VI-SDAG-NEXT: s_mov_b32 s7, 0xf000 1244; VI-SDAG-NEXT: s_mov_b32 s6, -1 1245; VI-SDAG-NEXT: s_mov_b32 s10, s6 1246; VI-SDAG-NEXT: s_mov_b32 s11, s7 1247; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1248; VI-SDAG-NEXT: s_mov_b32 s8, s2 1249; VI-SDAG-NEXT: s_mov_b32 s9, s3 1250; VI-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0 1251; VI-SDAG-NEXT: s_mov_b32 s4, s0 1252; VI-SDAG-NEXT: s_mov_b32 s5, s1 1253; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 1254; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 1255; VI-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0 1256; VI-SDAG-NEXT: s_endpgm 1257; 1258; VI-GISEL-LABEL: fptrunc_f32_to_f16_zext_i32: 1259; VI-GISEL: ; %bb.0: ; %entry 1260; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1261; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1262; VI-GISEL-NEXT: s_load_dword s2, s[2:3], 0x0 1263; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000 1264; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1265; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s2 1266; VI-GISEL-NEXT: s_mov_b32 s2, -1 1267; VI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 1268; VI-GISEL-NEXT: s_endpgm 1269; 1270; GFX9-SDAG-LABEL: fptrunc_f32_to_f16_zext_i32: 1271; GFX9-SDAG: ; %bb.0: ; %entry 1272; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1273; GFX9-SDAG-NEXT: s_mov_b32 s7, 0xf000 1274; GFX9-SDAG-NEXT: s_mov_b32 s6, -1 1275; GFX9-SDAG-NEXT: s_mov_b32 s10, s6 1276; GFX9-SDAG-NEXT: s_mov_b32 s11, s7 1277; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1278; GFX9-SDAG-NEXT: s_mov_b32 s8, s2 1279; GFX9-SDAG-NEXT: s_mov_b32 s9, s3 1280; GFX9-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0 1281; GFX9-SDAG-NEXT: s_mov_b32 s4, s0 1282; GFX9-SDAG-NEXT: s_mov_b32 s5, s1 1283; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 1284; GFX9-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 1285; GFX9-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0 1286; GFX9-SDAG-NEXT: s_endpgm 1287; 1288; GFX9-GISEL-LABEL: fptrunc_f32_to_f16_zext_i32: 1289; GFX9-GISEL: ; %bb.0: ; %entry 1290; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1291; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1292; GFX9-GISEL-NEXT: s_load_dword s2, s[2:3], 0x0 1293; GFX9-GISEL-NEXT: s_mov_b32 s3, 0xf000 1294; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1295; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s2 1296; GFX9-GISEL-NEXT: s_mov_b32 s2, -1 1297; GFX9-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 1298; GFX9-GISEL-NEXT: s_endpgm 1299; 1300; GFX950-SDAG-LABEL: fptrunc_f32_to_f16_zext_i32: 1301; GFX950-SDAG: ; %bb.0: ; %entry 1302; GFX950-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1303; GFX950-SDAG-NEXT: s_mov_b32 s7, 0xf000 1304; GFX950-SDAG-NEXT: s_mov_b32 s6, -1 1305; GFX950-SDAG-NEXT: s_mov_b32 s10, s6 1306; GFX950-SDAG-NEXT: s_mov_b32 s11, s7 1307; GFX950-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1308; GFX950-SDAG-NEXT: s_mov_b32 s8, s2 1309; GFX950-SDAG-NEXT: s_mov_b32 s9, s3 1310; GFX950-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0 1311; GFX950-SDAG-NEXT: s_mov_b32 s4, s0 1312; GFX950-SDAG-NEXT: s_mov_b32 s5, s1 1313; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) 1314; GFX950-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 1315; GFX950-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0 1316; GFX950-SDAG-NEXT: s_endpgm 1317; 1318; GFX950-GISEL-LABEL: fptrunc_f32_to_f16_zext_i32: 1319; GFX950-GISEL: ; %bb.0: ; %entry 1320; GFX950-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1321; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1322; GFX950-GISEL-NEXT: s_load_dword s2, s[2:3], 0x0 1323; GFX950-GISEL-NEXT: s_mov_b32 s3, 0xf000 1324; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1325; GFX950-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s2 1326; GFX950-GISEL-NEXT: s_mov_b32 s2, -1 1327; GFX950-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 1328; GFX950-GISEL-NEXT: s_endpgm 1329; 1330; GFX11-SDAG-LABEL: fptrunc_f32_to_f16_zext_i32: 1331; GFX11-SDAG: ; %bb.0: ; %entry 1332; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1333; GFX11-SDAG-NEXT: s_mov_b32 s6, -1 1334; GFX11-SDAG-NEXT: s_mov_b32 s7, 0x31016000 1335; GFX11-SDAG-NEXT: s_mov_b32 s10, s6 1336; GFX11-SDAG-NEXT: s_mov_b32 s11, s7 1337; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1338; GFX11-SDAG-NEXT: s_mov_b32 s8, s2 1339; GFX11-SDAG-NEXT: s_mov_b32 s9, s3 1340; GFX11-SDAG-NEXT: s_mov_b32 s4, s0 1341; GFX11-SDAG-NEXT: buffer_load_b32 v0, off, s[8:11], 0 1342; GFX11-SDAG-NEXT: s_mov_b32 s5, s1 1343; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 1344; GFX11-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 1345; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 1346; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 1347; GFX11-SDAG-NEXT: buffer_store_b32 v0, off, s[4:7], 0 1348; GFX11-SDAG-NEXT: s_endpgm 1349; 1350; GFX11-GISEL-LABEL: fptrunc_f32_to_f16_zext_i32: 1351; GFX11-GISEL: ; %bb.0: ; %entry 1352; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1353; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1354; GFX11-GISEL-NEXT: s_load_b32 s2, s[2:3], 0x0 1355; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000 1356; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1357; GFX11-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s2 1358; GFX11-GISEL-NEXT: s_mov_b32 s2, -1 1359; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 1360; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 1361; GFX11-GISEL-NEXT: buffer_store_b32 v0, off, s[0:3], 0 1362; GFX11-GISEL-NEXT: s_endpgm 1363 ptr addrspace(1) %r, 1364 ptr addrspace(1) %a) #0 { 1365entry: 1366 %a.val = load float, ptr addrspace(1) %a 1367 %r.val = fptrunc float %a.val to half 1368 %r.i16 = bitcast half %r.val to i16 1369 %zext = zext i16 %r.i16 to i32 1370 store i32 %zext, ptr addrspace(1) %r 1371 ret void 1372} 1373 1374define amdgpu_kernel void @fptrunc_fabs_f32_to_f16_zext_i32( 1375; SI-SDAG-LABEL: fptrunc_fabs_f32_to_f16_zext_i32: 1376; SI-SDAG: ; %bb.0: ; %entry 1377; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1378; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 1379; SI-SDAG-NEXT: s_mov_b32 s6, -1 1380; SI-SDAG-NEXT: s_mov_b32 s10, s6 1381; SI-SDAG-NEXT: s_mov_b32 s11, s7 1382; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1383; SI-SDAG-NEXT: s_mov_b32 s8, s2 1384; SI-SDAG-NEXT: s_mov_b32 s9, s3 1385; SI-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0 1386; SI-SDAG-NEXT: s_mov_b32 s4, s0 1387; SI-SDAG-NEXT: s_mov_b32 s5, s1 1388; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 1389; SI-SDAG-NEXT: v_cvt_f16_f32_e64 v0, |v0| 1390; SI-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0 1391; SI-SDAG-NEXT: s_endpgm 1392; 1393; SI-GISEL-LABEL: fptrunc_fabs_f32_to_f16_zext_i32: 1394; SI-GISEL: ; %bb.0: ; %entry 1395; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1396; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1397; SI-GISEL-NEXT: s_load_dword s3, s[2:3], 0x0 1398; SI-GISEL-NEXT: s_mov_b32 s2, -1 1399; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1400; SI-GISEL-NEXT: v_cvt_f16_f32_e64 v0, |s3| 1401; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 1402; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 1403; SI-GISEL-NEXT: s_endpgm 1404; 1405; VI-SDAG-LABEL: fptrunc_fabs_f32_to_f16_zext_i32: 1406; VI-SDAG: ; %bb.0: ; %entry 1407; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1408; VI-SDAG-NEXT: s_mov_b32 s7, 0xf000 1409; VI-SDAG-NEXT: s_mov_b32 s6, -1 1410; VI-SDAG-NEXT: s_mov_b32 s10, s6 1411; VI-SDAG-NEXT: s_mov_b32 s11, s7 1412; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1413; VI-SDAG-NEXT: s_mov_b32 s8, s2 1414; VI-SDAG-NEXT: s_mov_b32 s9, s3 1415; VI-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0 1416; VI-SDAG-NEXT: s_mov_b32 s4, s0 1417; VI-SDAG-NEXT: s_mov_b32 s5, s1 1418; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 1419; VI-SDAG-NEXT: v_cvt_f16_f32_e64 v0, |v0| 1420; VI-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0 1421; VI-SDAG-NEXT: s_endpgm 1422; 1423; VI-GISEL-LABEL: fptrunc_fabs_f32_to_f16_zext_i32: 1424; VI-GISEL: ; %bb.0: ; %entry 1425; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1426; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1427; VI-GISEL-NEXT: s_load_dword s2, s[2:3], 0x0 1428; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000 1429; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1430; VI-GISEL-NEXT: v_cvt_f16_f32_e64 v0, |s2| 1431; VI-GISEL-NEXT: s_mov_b32 s2, -1 1432; VI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 1433; VI-GISEL-NEXT: s_endpgm 1434; 1435; GFX9-SDAG-LABEL: fptrunc_fabs_f32_to_f16_zext_i32: 1436; GFX9-SDAG: ; %bb.0: ; %entry 1437; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1438; GFX9-SDAG-NEXT: s_mov_b32 s7, 0xf000 1439; GFX9-SDAG-NEXT: s_mov_b32 s6, -1 1440; GFX9-SDAG-NEXT: s_mov_b32 s10, s6 1441; GFX9-SDAG-NEXT: s_mov_b32 s11, s7 1442; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1443; GFX9-SDAG-NEXT: s_mov_b32 s8, s2 1444; GFX9-SDAG-NEXT: s_mov_b32 s9, s3 1445; GFX9-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0 1446; GFX9-SDAG-NEXT: s_mov_b32 s4, s0 1447; GFX9-SDAG-NEXT: s_mov_b32 s5, s1 1448; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 1449; GFX9-SDAG-NEXT: v_cvt_f16_f32_e64 v0, |v0| 1450; GFX9-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0 1451; GFX9-SDAG-NEXT: s_endpgm 1452; 1453; GFX9-GISEL-LABEL: fptrunc_fabs_f32_to_f16_zext_i32: 1454; GFX9-GISEL: ; %bb.0: ; %entry 1455; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1456; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1457; GFX9-GISEL-NEXT: s_load_dword s2, s[2:3], 0x0 1458; GFX9-GISEL-NEXT: s_mov_b32 s3, 0xf000 1459; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1460; GFX9-GISEL-NEXT: v_cvt_f16_f32_e64 v0, |s2| 1461; GFX9-GISEL-NEXT: s_mov_b32 s2, -1 1462; GFX9-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 1463; GFX9-GISEL-NEXT: s_endpgm 1464; 1465; GFX950-SDAG-LABEL: fptrunc_fabs_f32_to_f16_zext_i32: 1466; GFX950-SDAG: ; %bb.0: ; %entry 1467; GFX950-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1468; GFX950-SDAG-NEXT: s_mov_b32 s7, 0xf000 1469; GFX950-SDAG-NEXT: s_mov_b32 s6, -1 1470; GFX950-SDAG-NEXT: s_mov_b32 s10, s6 1471; GFX950-SDAG-NEXT: s_mov_b32 s11, s7 1472; GFX950-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1473; GFX950-SDAG-NEXT: s_mov_b32 s8, s2 1474; GFX950-SDAG-NEXT: s_mov_b32 s9, s3 1475; GFX950-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0 1476; GFX950-SDAG-NEXT: s_mov_b32 s4, s0 1477; GFX950-SDAG-NEXT: s_mov_b32 s5, s1 1478; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) 1479; GFX950-SDAG-NEXT: v_cvt_f16_f32_e64 v0, |v0| 1480; GFX950-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0 1481; GFX950-SDAG-NEXT: s_endpgm 1482; 1483; GFX950-GISEL-LABEL: fptrunc_fabs_f32_to_f16_zext_i32: 1484; GFX950-GISEL: ; %bb.0: ; %entry 1485; GFX950-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1486; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1487; GFX950-GISEL-NEXT: s_load_dword s2, s[2:3], 0x0 1488; GFX950-GISEL-NEXT: s_mov_b32 s3, 0xf000 1489; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1490; GFX950-GISEL-NEXT: v_cvt_f16_f32_e64 v0, |s2| 1491; GFX950-GISEL-NEXT: s_mov_b32 s2, -1 1492; GFX950-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 1493; GFX950-GISEL-NEXT: s_endpgm 1494; 1495; GFX11-SDAG-LABEL: fptrunc_fabs_f32_to_f16_zext_i32: 1496; GFX11-SDAG: ; %bb.0: ; %entry 1497; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1498; GFX11-SDAG-NEXT: s_mov_b32 s6, -1 1499; GFX11-SDAG-NEXT: s_mov_b32 s7, 0x31016000 1500; GFX11-SDAG-NEXT: s_mov_b32 s10, s6 1501; GFX11-SDAG-NEXT: s_mov_b32 s11, s7 1502; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1503; GFX11-SDAG-NEXT: s_mov_b32 s8, s2 1504; GFX11-SDAG-NEXT: s_mov_b32 s9, s3 1505; GFX11-SDAG-NEXT: s_mov_b32 s4, s0 1506; GFX11-SDAG-NEXT: buffer_load_b32 v0, off, s[8:11], 0 1507; GFX11-SDAG-NEXT: s_mov_b32 s5, s1 1508; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 1509; GFX11-SDAG-NEXT: v_cvt_f16_f32_e64 v0, |v0| 1510; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 1511; GFX11-SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 1512; GFX11-SDAG-NEXT: buffer_store_b32 v0, off, s[4:7], 0 1513; GFX11-SDAG-NEXT: s_endpgm 1514; 1515; GFX11-GISEL-LABEL: fptrunc_fabs_f32_to_f16_zext_i32: 1516; GFX11-GISEL: ; %bb.0: ; %entry 1517; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1518; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1519; GFX11-GISEL-NEXT: s_load_b32 s2, s[2:3], 0x0 1520; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000 1521; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1522; GFX11-GISEL-NEXT: v_cvt_f16_f32_e64 v0, |s2| 1523; GFX11-GISEL-NEXT: s_mov_b32 s2, -1 1524; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 1525; GFX11-GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 1526; GFX11-GISEL-NEXT: buffer_store_b32 v0, off, s[0:3], 0 1527; GFX11-GISEL-NEXT: s_endpgm 1528 ptr addrspace(1) %r, 1529 ptr addrspace(1) %a) #0 { 1530entry: 1531 %a.val = load float, ptr addrspace(1) %a 1532 %a.fabs = call float @llvm.fabs.f32(float %a.val) 1533 %r.val = fptrunc float %a.fabs to half 1534 %r.i16 = bitcast half %r.val to i16 1535 %zext = zext i16 %r.i16 to i32 1536 store i32 %zext, ptr addrspace(1) %r 1537 ret void 1538} 1539 1540define amdgpu_kernel void @fptrunc_f32_to_f16_sext_i32( 1541; SI-SDAG-LABEL: fptrunc_f32_to_f16_sext_i32: 1542; SI-SDAG: ; %bb.0: ; %entry 1543; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1544; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 1545; SI-SDAG-NEXT: s_mov_b32 s6, -1 1546; SI-SDAG-NEXT: s_mov_b32 s10, s6 1547; SI-SDAG-NEXT: s_mov_b32 s11, s7 1548; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1549; SI-SDAG-NEXT: s_mov_b32 s8, s2 1550; SI-SDAG-NEXT: s_mov_b32 s9, s3 1551; SI-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0 1552; SI-SDAG-NEXT: s_mov_b32 s4, s0 1553; SI-SDAG-NEXT: s_mov_b32 s5, s1 1554; SI-SDAG-NEXT: s_waitcnt vmcnt(0) 1555; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 1556; SI-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16 1557; SI-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0 1558; SI-SDAG-NEXT: s_endpgm 1559; 1560; SI-GISEL-LABEL: fptrunc_f32_to_f16_sext_i32: 1561; SI-GISEL: ; %bb.0: ; %entry 1562; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 1563; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1564; SI-GISEL-NEXT: s_load_dword s3, s[2:3], 0x0 1565; SI-GISEL-NEXT: s_mov_b32 s2, -1 1566; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1567; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s3 1568; SI-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16 1569; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 1570; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 1571; SI-GISEL-NEXT: s_endpgm 1572; 1573; VI-SDAG-LABEL: fptrunc_f32_to_f16_sext_i32: 1574; VI-SDAG: ; %bb.0: ; %entry 1575; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1576; VI-SDAG-NEXT: s_mov_b32 s7, 0xf000 1577; VI-SDAG-NEXT: s_mov_b32 s6, -1 1578; VI-SDAG-NEXT: s_mov_b32 s10, s6 1579; VI-SDAG-NEXT: s_mov_b32 s11, s7 1580; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1581; VI-SDAG-NEXT: s_mov_b32 s8, s2 1582; VI-SDAG-NEXT: s_mov_b32 s9, s3 1583; VI-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0 1584; VI-SDAG-NEXT: s_mov_b32 s4, s0 1585; VI-SDAG-NEXT: s_mov_b32 s5, s1 1586; VI-SDAG-NEXT: s_waitcnt vmcnt(0) 1587; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 1588; VI-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16 1589; VI-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0 1590; VI-SDAG-NEXT: s_endpgm 1591; 1592; VI-GISEL-LABEL: fptrunc_f32_to_f16_sext_i32: 1593; VI-GISEL: ; %bb.0: ; %entry 1594; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1595; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1596; VI-GISEL-NEXT: s_load_dword s2, s[2:3], 0x0 1597; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000 1598; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1599; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s2 1600; VI-GISEL-NEXT: s_mov_b32 s2, -1 1601; VI-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16 1602; VI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 1603; VI-GISEL-NEXT: s_endpgm 1604; 1605; GFX9-SDAG-LABEL: fptrunc_f32_to_f16_sext_i32: 1606; GFX9-SDAG: ; %bb.0: ; %entry 1607; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1608; GFX9-SDAG-NEXT: s_mov_b32 s7, 0xf000 1609; GFX9-SDAG-NEXT: s_mov_b32 s6, -1 1610; GFX9-SDAG-NEXT: s_mov_b32 s10, s6 1611; GFX9-SDAG-NEXT: s_mov_b32 s11, s7 1612; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1613; GFX9-SDAG-NEXT: s_mov_b32 s8, s2 1614; GFX9-SDAG-NEXT: s_mov_b32 s9, s3 1615; GFX9-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0 1616; GFX9-SDAG-NEXT: s_mov_b32 s4, s0 1617; GFX9-SDAG-NEXT: s_mov_b32 s5, s1 1618; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) 1619; GFX9-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 1620; GFX9-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16 1621; GFX9-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0 1622; GFX9-SDAG-NEXT: s_endpgm 1623; 1624; GFX9-GISEL-LABEL: fptrunc_f32_to_f16_sext_i32: 1625; GFX9-GISEL: ; %bb.0: ; %entry 1626; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1627; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1628; GFX9-GISEL-NEXT: s_load_dword s2, s[2:3], 0x0 1629; GFX9-GISEL-NEXT: s_mov_b32 s3, 0xf000 1630; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1631; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s2 1632; GFX9-GISEL-NEXT: s_mov_b32 s2, -1 1633; GFX9-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16 1634; GFX9-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 1635; GFX9-GISEL-NEXT: s_endpgm 1636; 1637; GFX950-SDAG-LABEL: fptrunc_f32_to_f16_sext_i32: 1638; GFX950-SDAG: ; %bb.0: ; %entry 1639; GFX950-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1640; GFX950-SDAG-NEXT: s_mov_b32 s7, 0xf000 1641; GFX950-SDAG-NEXT: s_mov_b32 s6, -1 1642; GFX950-SDAG-NEXT: s_mov_b32 s10, s6 1643; GFX950-SDAG-NEXT: s_mov_b32 s11, s7 1644; GFX950-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1645; GFX950-SDAG-NEXT: s_mov_b32 s8, s2 1646; GFX950-SDAG-NEXT: s_mov_b32 s9, s3 1647; GFX950-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0 1648; GFX950-SDAG-NEXT: s_mov_b32 s4, s0 1649; GFX950-SDAG-NEXT: s_mov_b32 s5, s1 1650; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) 1651; GFX950-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 1652; GFX950-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16 1653; GFX950-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0 1654; GFX950-SDAG-NEXT: s_endpgm 1655; 1656; GFX950-GISEL-LABEL: fptrunc_f32_to_f16_sext_i32: 1657; GFX950-GISEL: ; %bb.0: ; %entry 1658; GFX950-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1659; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1660; GFX950-GISEL-NEXT: s_load_dword s2, s[2:3], 0x0 1661; GFX950-GISEL-NEXT: s_mov_b32 s3, 0xf000 1662; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1663; GFX950-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s2 1664; GFX950-GISEL-NEXT: s_mov_b32 s2, -1 1665; GFX950-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16 1666; GFX950-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 1667; GFX950-GISEL-NEXT: s_endpgm 1668; 1669; GFX11-SDAG-LABEL: fptrunc_f32_to_f16_sext_i32: 1670; GFX11-SDAG: ; %bb.0: ; %entry 1671; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1672; GFX11-SDAG-NEXT: s_mov_b32 s6, -1 1673; GFX11-SDAG-NEXT: s_mov_b32 s7, 0x31016000 1674; GFX11-SDAG-NEXT: s_mov_b32 s10, s6 1675; GFX11-SDAG-NEXT: s_mov_b32 s11, s7 1676; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1677; GFX11-SDAG-NEXT: s_mov_b32 s8, s2 1678; GFX11-SDAG-NEXT: s_mov_b32 s9, s3 1679; GFX11-SDAG-NEXT: s_mov_b32 s4, s0 1680; GFX11-SDAG-NEXT: buffer_load_b32 v0, off, s[8:11], 0 1681; GFX11-SDAG-NEXT: s_mov_b32 s5, s1 1682; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) 1683; GFX11-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 1684; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 1685; GFX11-SDAG-NEXT: v_bfe_i32 v0, v0, 0, 16 1686; GFX11-SDAG-NEXT: buffer_store_b32 v0, off, s[4:7], 0 1687; GFX11-SDAG-NEXT: s_endpgm 1688; 1689; GFX11-GISEL-LABEL: fptrunc_f32_to_f16_sext_i32: 1690; GFX11-GISEL: ; %bb.0: ; %entry 1691; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1692; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1693; GFX11-GISEL-NEXT: s_load_b32 s2, s[2:3], 0x0 1694; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000 1695; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1696; GFX11-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s2 1697; GFX11-GISEL-NEXT: s_mov_b32 s2, -1 1698; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 1699; GFX11-GISEL-NEXT: v_bfe_i32 v0, v0, 0, 16 1700; GFX11-GISEL-NEXT: buffer_store_b32 v0, off, s[0:3], 0 1701; GFX11-GISEL-NEXT: s_endpgm 1702 ptr addrspace(1) %r, 1703 ptr addrspace(1) %a) #0 { 1704entry: 1705 %a.val = load float, ptr addrspace(1) %a 1706 %r.val = fptrunc float %a.val to half 1707 %r.i16 = bitcast half %r.val to i16 1708 %zext = sext i16 %r.i16 to i32 1709 store i32 %zext, ptr addrspace(1) %r 1710 ret void 1711} 1712 1713declare float @llvm.fabs.f32(float) #1 1714 1715attributes #0 = { nounwind } 1716attributes #1 = { nounwind readnone } 1717