1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=SI %s 3; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel=0 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=VI-SDAG,VI-SAFE-SDAG %s 4; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel=1 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=VI-GISEL,VI-SAFE-GISEL %s 5; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel=0 -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefixes=VI-SDAG,VI-UNSAFE-SDAG %s 6; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel=1 -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefixes=VI-GISEL,VI-UNSAFE-GISEL %s 7; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -global-isel=0 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10-SDAG,GFX10-SAFE-SDAG %s 8; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -global-isel=1 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10-GISEL,GFX10-SAFE-GISEL %s 9; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -global-isel=0 -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10-SDAG,GFX10-UNSAFE-SDAG %s 10; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -global-isel=1 -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10-GISEL,GFX10-UNSAFE-GISEL %s 11; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-SDAG,GFX11-SAFE-SDAG %s 12; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-GISEL,GFX11-SAFE-GISEL %s 13; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-SDAG,GFX11-UNSAFE-SDAG %s 14; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global -enable-unsafe-fp-math -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11-GISEL,GFX11-UNSAFE-GISEL %s 15 16define amdgpu_kernel void @fptrunc_f64_to_f32(ptr addrspace(1) %out, double %in) { 17; SI-LABEL: fptrunc_f64_to_f32: 18; SI: ; %bb.0: 19; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 20; SI-NEXT: s_mov_b32 s7, 0xf000 21; SI-NEXT: s_mov_b32 s6, -1 22; SI-NEXT: s_waitcnt lgkmcnt(0) 23; SI-NEXT: s_mov_b32 s4, s0 24; SI-NEXT: s_mov_b32 s5, s1 25; SI-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] 26; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 27; SI-NEXT: s_endpgm 28; 29; VI-SDAG-LABEL: fptrunc_f64_to_f32: 30; VI-SDAG: ; %bb.0: 31; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 32; VI-SDAG-NEXT: s_mov_b32 s7, 0xf000 33; VI-SDAG-NEXT: s_mov_b32 s6, -1 34; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 35; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] 36; VI-SDAG-NEXT: s_mov_b32 s4, s0 37; VI-SDAG-NEXT: s_mov_b32 s5, s1 38; VI-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0 39; VI-SDAG-NEXT: s_endpgm 40; 41; VI-GISEL-LABEL: fptrunc_f64_to_f32: 42; VI-GISEL: ; %bb.0: 43; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 44; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 45; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] 46; VI-GISEL-NEXT: s_mov_b32 s2, -1 47; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000 48; VI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 49; VI-GISEL-NEXT: s_endpgm 50; 51; GFX10-SDAG-LABEL: fptrunc_f64_to_f32: 52; GFX10-SDAG: ; %bb.0: 53; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 54; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0) 55; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] 56; GFX10-SDAG-NEXT: s_mov_b32 s3, 0x31016000 57; GFX10-SDAG-NEXT: s_mov_b32 s2, -1 58; GFX10-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0 59; GFX10-SDAG-NEXT: s_endpgm 60; 61; GFX10-GISEL-LABEL: fptrunc_f64_to_f32: 62; GFX10-GISEL: ; %bb.0: 63; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 64; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) 65; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] 66; GFX10-GISEL-NEXT: s_mov_b32 s2, -1 67; GFX10-GISEL-NEXT: s_mov_b32 s3, 0x31016000 68; GFX10-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 69; GFX10-GISEL-NEXT: s_endpgm 70; 71; GFX11-SDAG-LABEL: fptrunc_f64_to_f32: 72; GFX11-SDAG: ; %bb.0: 73; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 74; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 75; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] 76; GFX11-SDAG-NEXT: s_mov_b32 s3, 0x31016000 77; GFX11-SDAG-NEXT: s_mov_b32 s2, -1 78; GFX11-SDAG-NEXT: buffer_store_b32 v0, off, s[0:3], 0 79; GFX11-SDAG-NEXT: s_endpgm 80; 81; GFX11-GISEL-LABEL: fptrunc_f64_to_f32: 82; GFX11-GISEL: ; %bb.0: 83; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 84; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 85; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] 86; GFX11-GISEL-NEXT: s_mov_b32 s2, -1 87; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000 88; GFX11-GISEL-NEXT: buffer_store_b32 v0, off, s[0:3], 0 89; GFX11-GISEL-NEXT: s_endpgm 90 %result = fptrunc double %in to float 91 store float %result, ptr addrspace(1) %out 92 ret void 93} 94 95define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in) { 96; SI-LABEL: fptrunc_f64_to_f16: 97; SI: ; %bb.0: 98; SI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9 99; SI-NEXT: s_mov_b32 s3, 0xf000 100; SI-NEXT: s_mov_b32 s2, -1 101; SI-NEXT: s_waitcnt lgkmcnt(0) 102; SI-NEXT: s_mov_b32 s0, s4 103; SI-NEXT: s_mov_b32 s1, s5 104; SI-NEXT: s_lshr_b32 s4, s7, 8 105; SI-NEXT: s_and_b32 s5, s7, 0x1ff 106; SI-NEXT: s_and_b32 s8, s4, 0xffe 107; SI-NEXT: s_or_b32 s4, s5, s6 108; SI-NEXT: s_cmp_lg_u32 s4, 0 109; SI-NEXT: s_cselect_b64 s[4:5], -1, 0 110; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 111; SI-NEXT: s_bfe_u32 s4, s7, 0xb0014 112; SI-NEXT: v_readfirstlane_b32 s5, v0 113; SI-NEXT: s_sub_i32 s6, 0x3f1, s4 114; SI-NEXT: s_add_i32 s10, s4, 0xfffffc10 115; SI-NEXT: s_or_b32 s11, s8, s5 116; SI-NEXT: v_med3_i32 v0, s6, 0, 13 117; SI-NEXT: s_lshl_b32 s4, s10, 12 118; SI-NEXT: s_or_b32 s5, s11, 0x1000 119; SI-NEXT: v_readfirstlane_b32 s6, v0 120; SI-NEXT: s_or_b32 s4, s11, s4 121; SI-NEXT: s_lshr_b32 s6, s5, s6 122; SI-NEXT: v_lshl_b32_e32 v0, s6, v0 123; SI-NEXT: v_cmp_ne_u32_e32 vcc, s5, v0 124; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 125; SI-NEXT: v_readfirstlane_b32 s5, v0 126; SI-NEXT: s_or_b32 s5, s6, s5 127; SI-NEXT: s_cmp_lt_i32 s10, 1 128; SI-NEXT: s_cselect_b32 s6, s5, s4 129; SI-NEXT: s_and_b32 s8, s6, 7 130; SI-NEXT: s_cmp_gt_i32 s8, 5 131; SI-NEXT: s_cselect_b64 s[4:5], -1, 0 132; SI-NEXT: s_cmp_eq_u32 s8, 3 133; SI-NEXT: s_cselect_b64 s[8:9], -1, 0 134; SI-NEXT: s_lshr_b32 s6, s6, 2 135; SI-NEXT: s_or_b64 s[4:5], s[8:9], s[4:5] 136; SI-NEXT: s_or_b32 s4, s4, s5 137; SI-NEXT: s_cmp_lg_u32 s4, 0 138; SI-NEXT: s_addc_u32 s4, s6, 0 139; SI-NEXT: s_cmp_lt_i32 s10, 31 140; SI-NEXT: s_cselect_b32 s6, s4, 0x7c00 141; SI-NEXT: s_cmp_lg_u32 s11, 0 142; SI-NEXT: s_cselect_b64 s[4:5], -1, 0 143; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 144; SI-NEXT: s_cmpk_eq_i32 s10, 0x40f 145; SI-NEXT: v_mov_b32_e32 v1, s6 146; SI-NEXT: v_lshlrev_b32_e32 v0, 9, v0 147; SI-NEXT: v_or_b32_e32 v0, 0x7c00, v0 148; SI-NEXT: s_cselect_b64 vcc, -1, 0 149; SI-NEXT: s_lshr_b32 s4, s7, 16 150; SI-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 151; SI-NEXT: s_and_b32 s4, s4, 0x8000 152; SI-NEXT: v_or_b32_e32 v0, s4, v0 153; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 154; SI-NEXT: s_endpgm 155; 156; VI-SAFE-SDAG-LABEL: fptrunc_f64_to_f16: 157; VI-SAFE-SDAG: ; %bb.0: 158; VI-SAFE-SDAG-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x24 159; VI-SAFE-SDAG-NEXT: s_mov_b32 s3, 0xf000 160; VI-SAFE-SDAG-NEXT: s_mov_b32 s2, -1 161; VI-SAFE-SDAG-NEXT: s_waitcnt lgkmcnt(0) 162; VI-SAFE-SDAG-NEXT: s_mov_b32 s0, s4 163; VI-SAFE-SDAG-NEXT: s_lshr_b32 s4, s7, 8 164; VI-SAFE-SDAG-NEXT: s_and_b32 s8, s4, 0xffe 165; VI-SAFE-SDAG-NEXT: s_and_b32 s4, s7, 0x1ff 166; VI-SAFE-SDAG-NEXT: s_or_b32 s4, s4, s6 167; VI-SAFE-SDAG-NEXT: s_cmp_lg_u32 s4, 0 168; VI-SAFE-SDAG-NEXT: s_mov_b32 s1, s5 169; VI-SAFE-SDAG-NEXT: s_cselect_b64 s[4:5], -1, 0 170; VI-SAFE-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 171; VI-SAFE-SDAG-NEXT: v_readfirstlane_b32 s4, v0 172; VI-SAFE-SDAG-NEXT: s_bfe_u32 s5, s7, 0xb0014 173; VI-SAFE-SDAG-NEXT: s_or_b32 s6, s8, s4 174; VI-SAFE-SDAG-NEXT: s_sub_i32 s8, 0x3f1, s5 175; VI-SAFE-SDAG-NEXT: v_med3_i32 v0, s8, 0, 13 176; VI-SAFE-SDAG-NEXT: s_or_b32 s4, s6, 0x1000 177; VI-SAFE-SDAG-NEXT: v_readfirstlane_b32 s8, v0 178; VI-SAFE-SDAG-NEXT: s_lshr_b32 s8, s4, s8 179; VI-SAFE-SDAG-NEXT: v_lshlrev_b32_e64 v0, v0, s8 180; VI-SAFE-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, s4, v0 181; VI-SAFE-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 182; VI-SAFE-SDAG-NEXT: s_add_i32 s10, s5, 0xfffffc10 183; VI-SAFE-SDAG-NEXT: v_readfirstlane_b32 s4, v0 184; VI-SAFE-SDAG-NEXT: s_lshl_b32 s5, s10, 12 185; VI-SAFE-SDAG-NEXT: s_or_b32 s4, s8, s4 186; VI-SAFE-SDAG-NEXT: s_or_b32 s5, s6, s5 187; VI-SAFE-SDAG-NEXT: s_cmp_lt_i32 s10, 1 188; VI-SAFE-SDAG-NEXT: s_cselect_b32 s11, s4, s5 189; VI-SAFE-SDAG-NEXT: s_and_b32 s8, s11, 7 190; VI-SAFE-SDAG-NEXT: s_cmp_gt_i32 s8, 5 191; VI-SAFE-SDAG-NEXT: s_cselect_b64 s[4:5], -1, 0 192; VI-SAFE-SDAG-NEXT: s_cmp_eq_u32 s8, 3 193; VI-SAFE-SDAG-NEXT: s_cselect_b64 s[8:9], -1, 0 194; VI-SAFE-SDAG-NEXT: s_or_b64 s[4:5], s[8:9], s[4:5] 195; VI-SAFE-SDAG-NEXT: s_lshr_b32 s8, s11, 2 196; VI-SAFE-SDAG-NEXT: s_cmp_lg_u64 s[4:5], 0 197; VI-SAFE-SDAG-NEXT: s_addc_u32 s4, s8, 0 198; VI-SAFE-SDAG-NEXT: s_cmp_lt_i32 s10, 31 199; VI-SAFE-SDAG-NEXT: s_cselect_b32 s8, s4, 0x7c00 200; VI-SAFE-SDAG-NEXT: s_cmp_lg_u32 s6, 0 201; VI-SAFE-SDAG-NEXT: s_cselect_b64 s[4:5], -1, 0 202; VI-SAFE-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 203; VI-SAFE-SDAG-NEXT: v_lshlrev_b32_e32 v0, 9, v0 204; VI-SAFE-SDAG-NEXT: s_cmpk_eq_i32 s10, 0x40f 205; VI-SAFE-SDAG-NEXT: v_or_b32_e32 v0, 0x7c00, v0 206; VI-SAFE-SDAG-NEXT: v_mov_b32_e32 v1, s8 207; VI-SAFE-SDAG-NEXT: s_cselect_b64 vcc, -1, 0 208; VI-SAFE-SDAG-NEXT: s_lshr_b32 s4, s7, 16 209; VI-SAFE-SDAG-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 210; VI-SAFE-SDAG-NEXT: s_and_b32 s4, s4, 0x8000 211; VI-SAFE-SDAG-NEXT: v_or_b32_e32 v0, s4, v0 212; VI-SAFE-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0 213; VI-SAFE-SDAG-NEXT: s_endpgm 214; 215; VI-SAFE-GISEL-LABEL: fptrunc_f64_to_f16: 216; VI-SAFE-GISEL: ; %bb.0: 217; VI-SAFE-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 218; VI-SAFE-GISEL-NEXT: s_waitcnt lgkmcnt(0) 219; VI-SAFE-GISEL-NEXT: s_bfe_u32 s4, s3, 0xb0014 220; VI-SAFE-GISEL-NEXT: s_lshr_b32 s5, s3, 8 221; VI-SAFE-GISEL-NEXT: s_and_b32 s6, s3, 0x1ff 222; VI-SAFE-GISEL-NEXT: s_addk_i32 s4, 0xfc10 223; VI-SAFE-GISEL-NEXT: s_and_b32 s5, s5, 0xffe 224; VI-SAFE-GISEL-NEXT: s_or_b32 s2, s6, s2 225; VI-SAFE-GISEL-NEXT: s_cmp_lg_u32 s2, 0 226; VI-SAFE-GISEL-NEXT: s_cselect_b32 s2, 1, 0 227; VI-SAFE-GISEL-NEXT: s_or_b32 s2, s5, s2 228; VI-SAFE-GISEL-NEXT: s_cmp_lg_u32 s2, 0 229; VI-SAFE-GISEL-NEXT: s_cselect_b32 s5, 1, 0 230; VI-SAFE-GISEL-NEXT: s_sub_i32 s7, 1, s4 231; VI-SAFE-GISEL-NEXT: s_lshl_b32 s6, s4, 12 232; VI-SAFE-GISEL-NEXT: s_max_i32 s7, s7, 0 233; VI-SAFE-GISEL-NEXT: s_or_b32 s6, s2, s6 234; VI-SAFE-GISEL-NEXT: s_min_i32 s7, s7, 13 235; VI-SAFE-GISEL-NEXT: s_bitset1_b32 s2, 12 236; VI-SAFE-GISEL-NEXT: s_lshl_b32 s5, s5, 9 237; VI-SAFE-GISEL-NEXT: s_lshr_b32 s8, s2, s7 238; VI-SAFE-GISEL-NEXT: s_or_b32 s5, s5, 0x7c00 239; VI-SAFE-GISEL-NEXT: s_lshl_b32 s7, s8, s7 240; VI-SAFE-GISEL-NEXT: s_cmp_lg_u32 s7, s2 241; VI-SAFE-GISEL-NEXT: s_cselect_b32 s2, 1, 0 242; VI-SAFE-GISEL-NEXT: s_or_b32 s2, s8, s2 243; VI-SAFE-GISEL-NEXT: s_cmp_lt_i32 s4, 1 244; VI-SAFE-GISEL-NEXT: s_cselect_b32 s2, s2, s6 245; VI-SAFE-GISEL-NEXT: s_and_b32 s6, s2, 7 246; VI-SAFE-GISEL-NEXT: s_lshr_b32 s2, s2, 2 247; VI-SAFE-GISEL-NEXT: s_cmp_eq_u32 s6, 3 248; VI-SAFE-GISEL-NEXT: s_cselect_b32 s7, 1, 0 249; VI-SAFE-GISEL-NEXT: s_cmp_gt_i32 s6, 5 250; VI-SAFE-GISEL-NEXT: s_cselect_b32 s6, 1, 0 251; VI-SAFE-GISEL-NEXT: s_or_b32 s6, s7, s6 252; VI-SAFE-GISEL-NEXT: s_and_b32 s6, s6, 1 253; VI-SAFE-GISEL-NEXT: s_add_i32 s2, s2, s6 254; VI-SAFE-GISEL-NEXT: s_cmp_gt_i32 s4, 30 255; VI-SAFE-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2 256; VI-SAFE-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f 257; VI-SAFE-GISEL-NEXT: s_cselect_b32 s2, s5, s2 258; VI-SAFE-GISEL-NEXT: s_lshr_b32 s3, s3, 16 259; VI-SAFE-GISEL-NEXT: s_and_b32 s3, s3, 0x8000 260; VI-SAFE-GISEL-NEXT: s_or_b32 s2, s3, s2 261; VI-SAFE-GISEL-NEXT: v_mov_b32_e32 v0, s2 262; VI-SAFE-GISEL-NEXT: s_mov_b32 s2, -1 263; VI-SAFE-GISEL-NEXT: s_mov_b32 s3, 0xf000 264; VI-SAFE-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 265; VI-SAFE-GISEL-NEXT: s_endpgm 266; 267; VI-UNSAFE-SDAG-LABEL: fptrunc_f64_to_f16: 268; VI-UNSAFE-SDAG: ; %bb.0: 269; VI-UNSAFE-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 270; VI-UNSAFE-SDAG-NEXT: s_waitcnt lgkmcnt(0) 271; VI-UNSAFE-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] 272; VI-UNSAFE-SDAG-NEXT: s_mov_b32 s3, 0xf000 273; VI-UNSAFE-SDAG-NEXT: s_mov_b32 s2, -1 274; VI-UNSAFE-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 275; VI-UNSAFE-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0 276; VI-UNSAFE-SDAG-NEXT: s_endpgm 277; 278; VI-UNSAFE-GISEL-LABEL: fptrunc_f64_to_f16: 279; VI-UNSAFE-GISEL: ; %bb.0: 280; VI-UNSAFE-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 281; VI-UNSAFE-GISEL-NEXT: s_waitcnt lgkmcnt(0) 282; VI-UNSAFE-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] 283; VI-UNSAFE-GISEL-NEXT: s_mov_b32 s2, -1 284; VI-UNSAFE-GISEL-NEXT: s_mov_b32 s3, 0xf000 285; VI-UNSAFE-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 286; VI-UNSAFE-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 287; VI-UNSAFE-GISEL-NEXT: s_endpgm 288; 289; GFX10-SAFE-SDAG-LABEL: fptrunc_f64_to_f16: 290; GFX10-SAFE-SDAG: ; %bb.0: 291; GFX10-SAFE-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 292; GFX10-SAFE-SDAG-NEXT: s_waitcnt lgkmcnt(0) 293; GFX10-SAFE-SDAG-NEXT: s_and_b32 s4, s3, 0x1ff 294; GFX10-SAFE-SDAG-NEXT: s_lshr_b32 s5, s3, 8 295; GFX10-SAFE-SDAG-NEXT: s_or_b32 s2, s4, s2 296; GFX10-SAFE-SDAG-NEXT: s_and_b32 s4, s5, 0xffe 297; GFX10-SAFE-SDAG-NEXT: s_cmp_lg_u32 s2, 0 298; GFX10-SAFE-SDAG-NEXT: s_cselect_b32 s2, -1, 0 299; GFX10-SAFE-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2 300; GFX10-SAFE-SDAG-NEXT: s_bfe_u32 s2, s3, 0xb0014 301; GFX10-SAFE-SDAG-NEXT: s_sub_i32 s5, 0x3f1, s2 302; GFX10-SAFE-SDAG-NEXT: s_addk_i32 s2, 0xfc10 303; GFX10-SAFE-SDAG-NEXT: v_med3_i32 v1, s5, 0, 13 304; GFX10-SAFE-SDAG-NEXT: v_readfirstlane_b32 s5, v0 305; GFX10-SAFE-SDAG-NEXT: s_lshl_b32 s7, s2, 12 306; GFX10-SAFE-SDAG-NEXT: v_readfirstlane_b32 s6, v1 307; GFX10-SAFE-SDAG-NEXT: s_or_b32 s4, s4, s5 308; GFX10-SAFE-SDAG-NEXT: s_or_b32 s5, s4, 0x1000 309; GFX10-SAFE-SDAG-NEXT: s_or_b32 s7, s4, s7 310; GFX10-SAFE-SDAG-NEXT: s_lshr_b32 s6, s5, s6 311; GFX10-SAFE-SDAG-NEXT: v_lshlrev_b32_e64 v0, v1, s6 312; GFX10-SAFE-SDAG-NEXT: v_cmp_ne_u32_e32 vcc_lo, s5, v0 313; GFX10-SAFE-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 314; GFX10-SAFE-SDAG-NEXT: v_readfirstlane_b32 s5, v0 315; GFX10-SAFE-SDAG-NEXT: s_or_b32 s5, s6, s5 316; GFX10-SAFE-SDAG-NEXT: s_cmp_lt_i32 s2, 1 317; GFX10-SAFE-SDAG-NEXT: s_cselect_b32 s5, s5, s7 318; GFX10-SAFE-SDAG-NEXT: s_and_b32 s6, s5, 7 319; GFX10-SAFE-SDAG-NEXT: s_cmp_gt_i32 s6, 5 320; GFX10-SAFE-SDAG-NEXT: s_cselect_b32 s7, -1, 0 321; GFX10-SAFE-SDAG-NEXT: s_cmp_eq_u32 s6, 3 322; GFX10-SAFE-SDAG-NEXT: s_cselect_b32 s6, -1, 0 323; GFX10-SAFE-SDAG-NEXT: s_lshr_b32 s5, s5, 2 324; GFX10-SAFE-SDAG-NEXT: s_or_b32 s6, s6, s7 325; GFX10-SAFE-SDAG-NEXT: s_cmp_lg_u32 s6, 0 326; GFX10-SAFE-SDAG-NEXT: s_addc_u32 s5, s5, 0 327; GFX10-SAFE-SDAG-NEXT: s_cmp_lt_i32 s2, 31 328; GFX10-SAFE-SDAG-NEXT: s_cselect_b32 s5, s5, 0x7c00 329; GFX10-SAFE-SDAG-NEXT: s_cmp_lg_u32 s4, 0 330; GFX10-SAFE-SDAG-NEXT: s_cselect_b32 s4, -1, 0 331; GFX10-SAFE-SDAG-NEXT: s_cmpk_eq_i32 s2, 0x40f 332; GFX10-SAFE-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 333; GFX10-SAFE-SDAG-NEXT: s_cselect_b32 vcc_lo, -1, 0 334; GFX10-SAFE-SDAG-NEXT: s_lshr_b32 s2, s3, 16 335; GFX10-SAFE-SDAG-NEXT: s_mov_b32 s3, 0x31016000 336; GFX10-SAFE-SDAG-NEXT: s_and_b32 s2, s2, 0x8000 337; GFX10-SAFE-SDAG-NEXT: v_lshlrev_b32_e32 v0, 9, v0 338; GFX10-SAFE-SDAG-NEXT: v_or_b32_e32 v0, 0x7c00, v0 339; GFX10-SAFE-SDAG-NEXT: v_cndmask_b32_e32 v0, s5, v0, vcc_lo 340; GFX10-SAFE-SDAG-NEXT: v_or_b32_e32 v0, s2, v0 341; GFX10-SAFE-SDAG-NEXT: s_mov_b32 s2, -1 342; GFX10-SAFE-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0 343; GFX10-SAFE-SDAG-NEXT: s_endpgm 344; 345; GFX10-SAFE-GISEL-LABEL: fptrunc_f64_to_f16: 346; GFX10-SAFE-GISEL: ; %bb.0: 347; GFX10-SAFE-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 348; GFX10-SAFE-GISEL-NEXT: s_waitcnt lgkmcnt(0) 349; GFX10-SAFE-GISEL-NEXT: s_and_b32 s6, s3, 0x1ff 350; GFX10-SAFE-GISEL-NEXT: s_bfe_u32 s4, s3, 0xb0014 351; GFX10-SAFE-GISEL-NEXT: s_lshr_b32 s5, s3, 8 352; GFX10-SAFE-GISEL-NEXT: s_or_b32 s2, s6, s2 353; GFX10-SAFE-GISEL-NEXT: s_addk_i32 s4, 0xfc10 354; GFX10-SAFE-GISEL-NEXT: s_and_b32 s5, s5, 0xffe 355; GFX10-SAFE-GISEL-NEXT: s_cmp_lg_u32 s2, 0 356; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s2, 1, 0 357; GFX10-SAFE-GISEL-NEXT: s_or_b32 s2, s5, s2 358; GFX10-SAFE-GISEL-NEXT: s_cmp_lg_u32 s2, 0 359; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s5, 1, 0 360; GFX10-SAFE-GISEL-NEXT: s_sub_i32 s6, 1, s4 361; GFX10-SAFE-GISEL-NEXT: s_or_b32 s8, s2, 0x1000 362; GFX10-SAFE-GISEL-NEXT: s_max_i32 s6, s6, 0 363; GFX10-SAFE-GISEL-NEXT: s_lshl_b32 s7, s4, 12 364; GFX10-SAFE-GISEL-NEXT: s_min_i32 s6, s6, 13 365; GFX10-SAFE-GISEL-NEXT: s_lshl_b32 s5, s5, 9 366; GFX10-SAFE-GISEL-NEXT: s_lshr_b32 s9, s8, s6 367; GFX10-SAFE-GISEL-NEXT: s_or_b32 s2, s2, s7 368; GFX10-SAFE-GISEL-NEXT: s_lshl_b32 s6, s9, s6 369; GFX10-SAFE-GISEL-NEXT: s_or_b32 s5, s5, 0x7c00 370; GFX10-SAFE-GISEL-NEXT: s_cmp_lg_u32 s6, s8 371; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s6, 1, 0 372; GFX10-SAFE-GISEL-NEXT: s_or_b32 s6, s9, s6 373; GFX10-SAFE-GISEL-NEXT: s_cmp_lt_i32 s4, 1 374; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s2, s6, s2 375; GFX10-SAFE-GISEL-NEXT: s_and_b32 s6, s2, 7 376; GFX10-SAFE-GISEL-NEXT: s_lshr_b32 s2, s2, 2 377; GFX10-SAFE-GISEL-NEXT: s_cmp_eq_u32 s6, 3 378; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s7, 1, 0 379; GFX10-SAFE-GISEL-NEXT: s_cmp_gt_i32 s6, 5 380; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s6, 1, 0 381; GFX10-SAFE-GISEL-NEXT: s_or_b32 s6, s7, s6 382; GFX10-SAFE-GISEL-NEXT: s_and_b32 s6, s6, 1 383; GFX10-SAFE-GISEL-NEXT: s_add_i32 s2, s2, s6 384; GFX10-SAFE-GISEL-NEXT: s_cmp_gt_i32 s4, 30 385; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2 386; GFX10-SAFE-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f 387; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s2, s5, s2 388; GFX10-SAFE-GISEL-NEXT: s_lshr_b32 s3, s3, 16 389; GFX10-SAFE-GISEL-NEXT: s_and_b32 s3, s3, 0x8000 390; GFX10-SAFE-GISEL-NEXT: s_or_b32 s2, s3, s2 391; GFX10-SAFE-GISEL-NEXT: s_mov_b32 s3, 0x31016000 392; GFX10-SAFE-GISEL-NEXT: v_mov_b32_e32 v0, s2 393; GFX10-SAFE-GISEL-NEXT: s_mov_b32 s2, -1 394; GFX10-SAFE-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 395; GFX10-SAFE-GISEL-NEXT: s_endpgm 396; 397; GFX10-UNSAFE-SDAG-LABEL: fptrunc_f64_to_f16: 398; GFX10-UNSAFE-SDAG: ; %bb.0: 399; GFX10-UNSAFE-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 400; GFX10-UNSAFE-SDAG-NEXT: s_waitcnt lgkmcnt(0) 401; GFX10-UNSAFE-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] 402; GFX10-UNSAFE-SDAG-NEXT: s_mov_b32 s3, 0x31016000 403; GFX10-UNSAFE-SDAG-NEXT: s_mov_b32 s2, -1 404; GFX10-UNSAFE-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 405; GFX10-UNSAFE-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0 406; GFX10-UNSAFE-SDAG-NEXT: s_endpgm 407; 408; GFX10-UNSAFE-GISEL-LABEL: fptrunc_f64_to_f16: 409; GFX10-UNSAFE-GISEL: ; %bb.0: 410; GFX10-UNSAFE-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 411; GFX10-UNSAFE-GISEL-NEXT: s_waitcnt lgkmcnt(0) 412; GFX10-UNSAFE-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] 413; GFX10-UNSAFE-GISEL-NEXT: s_mov_b32 s2, -1 414; GFX10-UNSAFE-GISEL-NEXT: s_mov_b32 s3, 0x31016000 415; GFX10-UNSAFE-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 416; GFX10-UNSAFE-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 417; GFX10-UNSAFE-GISEL-NEXT: s_endpgm 418; 419; GFX11-SAFE-SDAG-LABEL: fptrunc_f64_to_f16: 420; GFX11-SAFE-SDAG: ; %bb.0: 421; GFX11-SAFE-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 422; GFX11-SAFE-SDAG-NEXT: s_waitcnt lgkmcnt(0) 423; GFX11-SAFE-SDAG-NEXT: s_and_b32 s4, s3, 0x1ff 424; GFX11-SAFE-SDAG-NEXT: s_lshr_b32 s5, s3, 8 425; GFX11-SAFE-SDAG-NEXT: s_or_b32 s2, s4, s2 426; GFX11-SAFE-SDAG-NEXT: s_and_b32 s4, s5, 0xffe 427; GFX11-SAFE-SDAG-NEXT: s_cmp_lg_u32 s2, 0 428; GFX11-SAFE-SDAG-NEXT: s_cselect_b32 s2, -1, 0 429; GFX11-SAFE-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) 430; GFX11-SAFE-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2 431; GFX11-SAFE-SDAG-NEXT: s_bfe_u32 s2, s3, 0xb0014 432; GFX11-SAFE-SDAG-NEXT: s_sub_i32 s5, 0x3f1, s2 433; GFX11-SAFE-SDAG-NEXT: s_addk_i32 s2, 0xfc10 434; GFX11-SAFE-SDAG-NEXT: v_med3_i32 v1, s5, 0, 13 435; GFX11-SAFE-SDAG-NEXT: v_readfirstlane_b32 s5, v0 436; GFX11-SAFE-SDAG-NEXT: s_lshl_b32 s7, s2, 12 437; GFX11-SAFE-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 438; GFX11-SAFE-SDAG-NEXT: v_readfirstlane_b32 s6, v1 439; GFX11-SAFE-SDAG-NEXT: s_or_b32 s4, s4, s5 440; GFX11-SAFE-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 441; GFX11-SAFE-SDAG-NEXT: s_or_b32 s5, s4, 0x1000 442; GFX11-SAFE-SDAG-NEXT: s_or_b32 s7, s4, s7 443; GFX11-SAFE-SDAG-NEXT: s_lshr_b32 s6, s5, s6 444; GFX11-SAFE-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) 445; GFX11-SAFE-SDAG-NEXT: v_lshlrev_b32_e64 v0, v1, s6 446; GFX11-SAFE-SDAG-NEXT: v_cmp_ne_u32_e32 vcc_lo, s5, v0 447; GFX11-SAFE-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 448; GFX11-SAFE-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 449; GFX11-SAFE-SDAG-NEXT: v_readfirstlane_b32 s5, v0 450; GFX11-SAFE-SDAG-NEXT: s_or_b32 s5, s6, s5 451; GFX11-SAFE-SDAG-NEXT: s_cmp_lt_i32 s2, 1 452; GFX11-SAFE-SDAG-NEXT: s_cselect_b32 s5, s5, s7 453; GFX11-SAFE-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 454; GFX11-SAFE-SDAG-NEXT: s_and_b32 s6, s5, 7 455; GFX11-SAFE-SDAG-NEXT: s_cmp_gt_i32 s6, 5 456; GFX11-SAFE-SDAG-NEXT: s_cselect_b32 s7, -1, 0 457; GFX11-SAFE-SDAG-NEXT: s_cmp_eq_u32 s6, 3 458; GFX11-SAFE-SDAG-NEXT: s_cselect_b32 s6, -1, 0 459; GFX11-SAFE-SDAG-NEXT: s_lshr_b32 s5, s5, 2 460; GFX11-SAFE-SDAG-NEXT: s_or_b32 s6, s6, s7 461; GFX11-SAFE-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 462; GFX11-SAFE-SDAG-NEXT: s_cmp_lg_u32 s6, 0 463; GFX11-SAFE-SDAG-NEXT: s_addc_u32 s5, s5, 0 464; GFX11-SAFE-SDAG-NEXT: s_cmp_lt_i32 s2, 31 465; GFX11-SAFE-SDAG-NEXT: s_cselect_b32 s5, s5, 0x7c00 466; GFX11-SAFE-SDAG-NEXT: s_cmp_lg_u32 s4, 0 467; GFX11-SAFE-SDAG-NEXT: s_cselect_b32 s4, -1, 0 468; GFX11-SAFE-SDAG-NEXT: s_cmpk_eq_i32 s2, 0x40f 469; GFX11-SAFE-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 470; GFX11-SAFE-SDAG-NEXT: s_cselect_b32 vcc_lo, -1, 0 471; GFX11-SAFE-SDAG-NEXT: s_lshr_b32 s2, s3, 16 472; GFX11-SAFE-SDAG-NEXT: s_mov_b32 s3, 0x31016000 473; GFX11-SAFE-SDAG-NEXT: s_and_b32 s2, s2, 0x8000 474; GFX11-SAFE-SDAG-NEXT: v_lshlrev_b32_e32 v0, 9, v0 475; GFX11-SAFE-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 476; GFX11-SAFE-SDAG-NEXT: v_or_b32_e32 v0, 0x7c00, v0 477; GFX11-SAFE-SDAG-NEXT: v_cndmask_b32_e32 v0, s5, v0, vcc_lo 478; GFX11-SAFE-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 479; GFX11-SAFE-SDAG-NEXT: v_or_b32_e32 v0, s2, v0 480; GFX11-SAFE-SDAG-NEXT: s_mov_b32 s2, -1 481; GFX11-SAFE-SDAG-NEXT: buffer_store_b16 v0, off, s[0:3], 0 482; GFX11-SAFE-SDAG-NEXT: s_endpgm 483; 484; GFX11-SAFE-GISEL-LABEL: fptrunc_f64_to_f16: 485; GFX11-SAFE-GISEL: ; %bb.0: 486; GFX11-SAFE-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 487; GFX11-SAFE-GISEL-NEXT: s_waitcnt lgkmcnt(0) 488; GFX11-SAFE-GISEL-NEXT: s_and_b32 s6, s3, 0x1ff 489; GFX11-SAFE-GISEL-NEXT: s_bfe_u32 s4, s3, 0xb0014 490; GFX11-SAFE-GISEL-NEXT: s_lshr_b32 s5, s3, 8 491; GFX11-SAFE-GISEL-NEXT: s_or_b32 s2, s6, s2 492; GFX11-SAFE-GISEL-NEXT: s_addk_i32 s4, 0xfc10 493; GFX11-SAFE-GISEL-NEXT: s_and_b32 s5, s5, 0xffe 494; GFX11-SAFE-GISEL-NEXT: s_cmp_lg_u32 s2, 0 495; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s2, 1, 0 496; GFX11-SAFE-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 497; GFX11-SAFE-GISEL-NEXT: s_or_b32 s2, s5, s2 498; GFX11-SAFE-GISEL-NEXT: s_cmp_lg_u32 s2, 0 499; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s5, 1, 0 500; GFX11-SAFE-GISEL-NEXT: s_sub_i32 s6, 1, s4 501; GFX11-SAFE-GISEL-NEXT: s_or_b32 s8, s2, 0x1000 502; GFX11-SAFE-GISEL-NEXT: s_max_i32 s6, s6, 0 503; GFX11-SAFE-GISEL-NEXT: s_lshl_b32 s7, s4, 12 504; GFX11-SAFE-GISEL-NEXT: s_min_i32 s6, s6, 13 505; GFX11-SAFE-GISEL-NEXT: s_lshl_b32 s5, s5, 9 506; GFX11-SAFE-GISEL-NEXT: s_lshr_b32 s9, s8, s6 507; GFX11-SAFE-GISEL-NEXT: s_or_b32 s2, s2, s7 508; GFX11-SAFE-GISEL-NEXT: s_lshl_b32 s6, s9, s6 509; GFX11-SAFE-GISEL-NEXT: s_or_b32 s5, s5, 0x7c00 510; GFX11-SAFE-GISEL-NEXT: s_cmp_lg_u32 s6, s8 511; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s6, 1, 0 512; GFX11-SAFE-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) 513; GFX11-SAFE-GISEL-NEXT: s_or_b32 s6, s9, s6 514; GFX11-SAFE-GISEL-NEXT: s_cmp_lt_i32 s4, 1 515; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s2, s6, s2 516; GFX11-SAFE-GISEL-NEXT: s_and_b32 s6, s2, 7 517; GFX11-SAFE-GISEL-NEXT: s_lshr_b32 s2, s2, 2 518; GFX11-SAFE-GISEL-NEXT: s_cmp_eq_u32 s6, 3 519; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s7, 1, 0 520; GFX11-SAFE-GISEL-NEXT: s_cmp_gt_i32 s6, 5 521; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s6, 1, 0 522; GFX11-SAFE-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 523; GFX11-SAFE-GISEL-NEXT: s_or_b32 s6, s7, s6 524; GFX11-SAFE-GISEL-NEXT: s_and_b32 s6, s6, 1 525; GFX11-SAFE-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 526; GFX11-SAFE-GISEL-NEXT: s_add_i32 s2, s2, s6 527; GFX11-SAFE-GISEL-NEXT: s_cmp_gt_i32 s4, 30 528; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2 529; GFX11-SAFE-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f 530; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s2, s5, s2 531; GFX11-SAFE-GISEL-NEXT: s_lshr_b32 s3, s3, 16 532; GFX11-SAFE-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 533; GFX11-SAFE-GISEL-NEXT: s_and_b32 s3, s3, 0x8000 534; GFX11-SAFE-GISEL-NEXT: s_or_b32 s2, s3, s2 535; GFX11-SAFE-GISEL-NEXT: s_mov_b32 s3, 0x31016000 536; GFX11-SAFE-GISEL-NEXT: v_mov_b32_e32 v0, s2 537; GFX11-SAFE-GISEL-NEXT: s_mov_b32 s2, -1 538; GFX11-SAFE-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0 539; GFX11-SAFE-GISEL-NEXT: s_endpgm 540; 541; GFX11-UNSAFE-SDAG-LABEL: fptrunc_f64_to_f16: 542; GFX11-UNSAFE-SDAG: ; %bb.0: 543; GFX11-UNSAFE-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 544; GFX11-UNSAFE-SDAG-NEXT: s_waitcnt lgkmcnt(0) 545; GFX11-UNSAFE-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] 546; GFX11-UNSAFE-SDAG-NEXT: s_mov_b32 s3, 0x31016000 547; GFX11-UNSAFE-SDAG-NEXT: s_mov_b32 s2, -1 548; GFX11-UNSAFE-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 549; GFX11-UNSAFE-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 550; GFX11-UNSAFE-SDAG-NEXT: buffer_store_b16 v0, off, s[0:3], 0 551; GFX11-UNSAFE-SDAG-NEXT: s_endpgm 552; 553; GFX11-UNSAFE-GISEL-LABEL: fptrunc_f64_to_f16: 554; GFX11-UNSAFE-GISEL: ; %bb.0: 555; GFX11-UNSAFE-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 556; GFX11-UNSAFE-GISEL-NEXT: s_waitcnt lgkmcnt(0) 557; GFX11-UNSAFE-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] 558; GFX11-UNSAFE-GISEL-NEXT: s_mov_b32 s2, -1 559; GFX11-UNSAFE-GISEL-NEXT: s_mov_b32 s3, 0x31016000 560; GFX11-UNSAFE-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 561; GFX11-UNSAFE-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 562; GFX11-UNSAFE-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0 563; GFX11-UNSAFE-GISEL-NEXT: s_endpgm 564 %result = fptrunc double %in to half 565 %result_i16 = bitcast half %result to i16 566 store i16 %result_i16, ptr addrspace(1) %out 567 ret void 568} 569 570define amdgpu_kernel void @fptrunc_v2f64_to_v2f32(ptr addrspace(1) %out, <2 x double> %in) { 571; SI-LABEL: fptrunc_v2f64_to_v2f32: 572; SI: ; %bb.0: 573; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0xd 574; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 575; SI-NEXT: s_mov_b32 s7, 0xf000 576; SI-NEXT: s_mov_b32 s6, -1 577; SI-NEXT: s_waitcnt lgkmcnt(0) 578; SI-NEXT: v_cvt_f32_f64_e32 v1, s[2:3] 579; SI-NEXT: v_cvt_f32_f64_e32 v0, s[0:1] 580; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 581; SI-NEXT: s_endpgm 582; 583; VI-SDAG-LABEL: fptrunc_v2f64_to_v2f32: 584; VI-SDAG: ; %bb.0: 585; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34 586; VI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24 587; VI-SDAG-NEXT: s_mov_b32 s7, 0xf000 588; VI-SDAG-NEXT: s_mov_b32 s6, -1 589; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 590; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v1, s[2:3] 591; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[0:1] 592; VI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 593; VI-SDAG-NEXT: s_endpgm 594; 595; VI-GISEL-LABEL: fptrunc_v2f64_to_v2f32: 596; VI-GISEL: ; %bb.0: 597; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34 598; VI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24 599; VI-GISEL-NEXT: s_mov_b32 s6, -1 600; VI-GISEL-NEXT: s_mov_b32 s7, 0xf000 601; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 602; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[0:1] 603; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[2:3] 604; VI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 605; VI-GISEL-NEXT: s_endpgm 606; 607; GFX10-SDAG-LABEL: fptrunc_v2f64_to_v2f32: 608; GFX10-SDAG: ; %bb.0: 609; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34 610; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0) 611; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v1, s[2:3] 612; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[0:1] 613; GFX10-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 614; GFX10-SDAG-NEXT: s_mov_b32 s3, 0x31016000 615; GFX10-SDAG-NEXT: s_mov_b32 s2, -1 616; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0) 617; GFX10-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 618; GFX10-SDAG-NEXT: s_endpgm 619; 620; GFX10-GISEL-LABEL: fptrunc_v2f64_to_v2f32: 621; GFX10-GISEL: ; %bb.0: 622; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34 623; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) 624; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[0:1] 625; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[2:3] 626; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 627; GFX10-GISEL-NEXT: s_mov_b32 s2, -1 628; GFX10-GISEL-NEXT: s_mov_b32 s3, 0x31016000 629; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) 630; GFX10-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 631; GFX10-GISEL-NEXT: s_endpgm 632; 633; GFX11-SDAG-LABEL: fptrunc_v2f64_to_v2f32: 634; GFX11-SDAG: ; %bb.0: 635; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x34 636; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 637; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v1, s[2:3] 638; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[0:1] 639; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 640; GFX11-SDAG-NEXT: s_mov_b32 s3, 0x31016000 641; GFX11-SDAG-NEXT: s_mov_b32 s2, -1 642; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 643; GFX11-SDAG-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0 644; GFX11-SDAG-NEXT: s_endpgm 645; 646; GFX11-GISEL-LABEL: fptrunc_v2f64_to_v2f32: 647; GFX11-GISEL: ; %bb.0: 648; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x34 649; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 650; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[0:1] 651; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[2:3] 652; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 653; GFX11-GISEL-NEXT: s_mov_b32 s2, -1 654; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000 655; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 656; GFX11-GISEL-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0 657; GFX11-GISEL-NEXT: s_endpgm 658 %result = fptrunc <2 x double> %in to <2 x float> 659 store <2 x float> %result, ptr addrspace(1) %out 660 ret void 661} 662 663define amdgpu_kernel void @fptrunc_v3f64_to_v3f32(ptr addrspace(1) %out, <3 x double> %in) { 664; SI-LABEL: fptrunc_v3f64_to_v3f32: 665; SI: ; %bb.0: 666; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 667; SI-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x11 668; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x15 669; SI-NEXT: s_mov_b32 s3, 0xf000 670; SI-NEXT: s_mov_b32 s2, -1 671; SI-NEXT: s_waitcnt lgkmcnt(0) 672; SI-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] 673; SI-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] 674; SI-NEXT: v_cvt_f32_f64_e32 v2, s[4:5] 675; SI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:8 676; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 677; SI-NEXT: s_endpgm 678; 679; VI-SDAG-LABEL: fptrunc_v3f64_to_v3f32: 680; VI-SDAG: ; %bb.0: 681; VI-SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x54 682; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x44 683; VI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24 684; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 685; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v2, s[6:7] 686; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v1, s[2:3] 687; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[0:1] 688; VI-SDAG-NEXT: s_mov_b32 s7, 0xf000 689; VI-SDAG-NEXT: s_mov_b32 s6, -1 690; VI-SDAG-NEXT: buffer_store_dwordx3 v[0:2], off, s[4:7], 0 691; VI-SDAG-NEXT: s_endpgm 692; 693; VI-GISEL-LABEL: fptrunc_v3f64_to_v3f32: 694; VI-GISEL: ; %bb.0: 695; VI-GISEL-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x44 696; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 697; VI-GISEL-NEXT: s_mov_b32 s2, -1 698; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000 699; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 700; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] 701; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] 702; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v2, s[12:13] 703; VI-GISEL-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0 704; VI-GISEL-NEXT: s_endpgm 705; 706; GFX10-SDAG-LABEL: fptrunc_v3f64_to_v3f32: 707; GFX10-SDAG: ; %bb.0: 708; GFX10-SDAG-NEXT: s_clause 0x1 709; GFX10-SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x54 710; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x44 711; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0) 712; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v2, s[6:7] 713; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v1, s[2:3] 714; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[0:1] 715; GFX10-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 716; GFX10-SDAG-NEXT: s_mov_b32 s3, 0x31016000 717; GFX10-SDAG-NEXT: s_mov_b32 s2, -1 718; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0) 719; GFX10-SDAG-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0 720; GFX10-SDAG-NEXT: s_endpgm 721; 722; GFX10-GISEL-LABEL: fptrunc_v3f64_to_v3f32: 723; GFX10-GISEL: ; %bb.0: 724; GFX10-GISEL-NEXT: s_clause 0x1 725; GFX10-GISEL-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x44 726; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 727; GFX10-GISEL-NEXT: s_mov_b32 s2, -1 728; GFX10-GISEL-NEXT: s_mov_b32 s3, 0x31016000 729; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) 730; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] 731; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] 732; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v2, s[12:13] 733; GFX10-GISEL-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0 734; GFX10-GISEL-NEXT: s_endpgm 735; 736; GFX11-SDAG-LABEL: fptrunc_v3f64_to_v3f32: 737; GFX11-SDAG: ; %bb.0: 738; GFX11-SDAG-NEXT: s_clause 0x1 739; GFX11-SDAG-NEXT: s_load_b64 s[6:7], s[4:5], 0x54 740; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x44 741; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 742; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v2, s[6:7] 743; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v1, s[2:3] 744; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[0:1] 745; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 746; GFX11-SDAG-NEXT: s_mov_b32 s3, 0x31016000 747; GFX11-SDAG-NEXT: s_mov_b32 s2, -1 748; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 749; GFX11-SDAG-NEXT: buffer_store_b96 v[0:2], off, s[0:3], 0 750; GFX11-SDAG-NEXT: s_endpgm 751; 752; GFX11-GISEL-LABEL: fptrunc_v3f64_to_v3f32: 753; GFX11-GISEL: ; %bb.0: 754; GFX11-GISEL-NEXT: s_clause 0x1 755; GFX11-GISEL-NEXT: s_load_b256 s[8:15], s[4:5], 0x44 756; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 757; GFX11-GISEL-NEXT: s_mov_b32 s2, -1 758; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000 759; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 760; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] 761; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] 762; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v2, s[12:13] 763; GFX11-GISEL-NEXT: buffer_store_b96 v[0:2], off, s[0:3], 0 764; GFX11-GISEL-NEXT: s_endpgm 765 %result = fptrunc <3 x double> %in to <3 x float> 766 store <3 x float> %result, ptr addrspace(1) %out 767 ret void 768} 769 770define amdgpu_kernel void @fptrunc_v4f64_to_v4f32(ptr addrspace(1) %out, <4 x double> %in) { 771; SI-LABEL: fptrunc_v4f64_to_v4f32: 772; SI: ; %bb.0: 773; SI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x11 774; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 775; SI-NEXT: s_mov_b32 s3, 0xf000 776; SI-NEXT: s_mov_b32 s2, -1 777; SI-NEXT: s_waitcnt lgkmcnt(0) 778; SI-NEXT: v_cvt_f32_f64_e32 v3, s[14:15] 779; SI-NEXT: v_cvt_f32_f64_e32 v2, s[12:13] 780; SI-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] 781; SI-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] 782; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 783; SI-NEXT: s_endpgm 784; 785; VI-SDAG-LABEL: fptrunc_v4f64_to_v4f32: 786; VI-SDAG: ; %bb.0: 787; VI-SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x44 788; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 789; VI-SDAG-NEXT: s_mov_b32 s3, 0xf000 790; VI-SDAG-NEXT: s_mov_b32 s2, -1 791; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 792; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v3, s[14:15] 793; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v2, s[12:13] 794; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] 795; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] 796; VI-SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 797; VI-SDAG-NEXT: s_endpgm 798; 799; VI-GISEL-LABEL: fptrunc_v4f64_to_v4f32: 800; VI-GISEL: ; %bb.0: 801; VI-GISEL-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x44 802; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 803; VI-GISEL-NEXT: s_mov_b32 s2, -1 804; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000 805; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 806; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] 807; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] 808; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v2, s[12:13] 809; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v3, s[14:15] 810; VI-GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 811; VI-GISEL-NEXT: s_endpgm 812; 813; GFX10-SDAG-LABEL: fptrunc_v4f64_to_v4f32: 814; GFX10-SDAG: ; %bb.0: 815; GFX10-SDAG-NEXT: s_clause 0x1 816; GFX10-SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x44 817; GFX10-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 818; GFX10-SDAG-NEXT: s_mov_b32 s3, 0x31016000 819; GFX10-SDAG-NEXT: s_mov_b32 s2, -1 820; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0) 821; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v3, s[14:15] 822; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v2, s[12:13] 823; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] 824; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] 825; GFX10-SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 826; GFX10-SDAG-NEXT: s_endpgm 827; 828; GFX10-GISEL-LABEL: fptrunc_v4f64_to_v4f32: 829; GFX10-GISEL: ; %bb.0: 830; GFX10-GISEL-NEXT: s_clause 0x1 831; GFX10-GISEL-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x44 832; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 833; GFX10-GISEL-NEXT: s_mov_b32 s2, -1 834; GFX10-GISEL-NEXT: s_mov_b32 s3, 0x31016000 835; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) 836; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] 837; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] 838; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v2, s[12:13] 839; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v3, s[14:15] 840; GFX10-GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 841; GFX10-GISEL-NEXT: s_endpgm 842; 843; GFX11-SDAG-LABEL: fptrunc_v4f64_to_v4f32: 844; GFX11-SDAG: ; %bb.0: 845; GFX11-SDAG-NEXT: s_clause 0x1 846; GFX11-SDAG-NEXT: s_load_b256 s[8:15], s[4:5], 0x44 847; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 848; GFX11-SDAG-NEXT: s_mov_b32 s3, 0x31016000 849; GFX11-SDAG-NEXT: s_mov_b32 s2, -1 850; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 851; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v3, s[14:15] 852; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v2, s[12:13] 853; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] 854; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] 855; GFX11-SDAG-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 856; GFX11-SDAG-NEXT: s_endpgm 857; 858; GFX11-GISEL-LABEL: fptrunc_v4f64_to_v4f32: 859; GFX11-GISEL: ; %bb.0: 860; GFX11-GISEL-NEXT: s_clause 0x1 861; GFX11-GISEL-NEXT: s_load_b256 s[8:15], s[4:5], 0x44 862; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 863; GFX11-GISEL-NEXT: s_mov_b32 s2, -1 864; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000 865; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 866; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] 867; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] 868; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v2, s[12:13] 869; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v3, s[14:15] 870; GFX11-GISEL-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 871; GFX11-GISEL-NEXT: s_endpgm 872 %result = fptrunc <4 x double> %in to <4 x float> 873 store <4 x float> %result, ptr addrspace(1) %out 874 ret void 875} 876 877define amdgpu_kernel void @fptrunc_v8f64_to_v8f32(ptr addrspace(1) %out, <8 x double> %in) { 878; SI-LABEL: fptrunc_v8f64_to_v8f32: 879; SI: ; %bb.0: 880; SI-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x19 881; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 882; SI-NEXT: s_mov_b32 s3, 0xf000 883; SI-NEXT: s_mov_b32 s2, -1 884; SI-NEXT: s_waitcnt lgkmcnt(0) 885; SI-NEXT: v_cvt_f32_f64_e32 v3, s[14:15] 886; SI-NEXT: v_cvt_f32_f64_e32 v2, s[12:13] 887; SI-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] 888; SI-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] 889; SI-NEXT: v_cvt_f32_f64_e32 v7, s[22:23] 890; SI-NEXT: v_cvt_f32_f64_e32 v6, s[20:21] 891; SI-NEXT: v_cvt_f32_f64_e32 v5, s[18:19] 892; SI-NEXT: v_cvt_f32_f64_e32 v4, s[16:17] 893; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16 894; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 895; SI-NEXT: s_endpgm 896; 897; VI-SDAG-LABEL: fptrunc_v8f64_to_v8f32: 898; VI-SDAG: ; %bb.0: 899; VI-SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 900; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 901; VI-SDAG-NEXT: s_mov_b32 s3, 0xf000 902; VI-SDAG-NEXT: s_mov_b32 s2, -1 903; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 904; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v7, s[22:23] 905; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v6, s[20:21] 906; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v5, s[18:19] 907; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v4, s[16:17] 908; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v3, s[14:15] 909; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v2, s[12:13] 910; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] 911; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] 912; VI-SDAG-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16 913; VI-SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 914; VI-SDAG-NEXT: s_endpgm 915; 916; VI-GISEL-LABEL: fptrunc_v8f64_to_v8f32: 917; VI-GISEL: ; %bb.0: 918; VI-GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 919; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 920; VI-GISEL-NEXT: s_mov_b32 s2, -1 921; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000 922; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 923; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] 924; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] 925; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v2, s[12:13] 926; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v3, s[14:15] 927; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v4, s[16:17] 928; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v5, s[18:19] 929; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v6, s[20:21] 930; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v7, s[22:23] 931; VI-GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 932; VI-GISEL-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16 933; VI-GISEL-NEXT: s_endpgm 934; 935; GFX10-SDAG-LABEL: fptrunc_v8f64_to_v8f32: 936; GFX10-SDAG: ; %bb.0: 937; GFX10-SDAG-NEXT: s_clause 0x1 938; GFX10-SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 939; GFX10-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 940; GFX10-SDAG-NEXT: s_mov_b32 s3, 0x31016000 941; GFX10-SDAG-NEXT: s_mov_b32 s2, -1 942; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0) 943; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v7, s[22:23] 944; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v6, s[20:21] 945; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v5, s[18:19] 946; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v4, s[16:17] 947; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v3, s[14:15] 948; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v2, s[12:13] 949; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] 950; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] 951; GFX10-SDAG-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16 952; GFX10-SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 953; GFX10-SDAG-NEXT: s_endpgm 954; 955; GFX10-GISEL-LABEL: fptrunc_v8f64_to_v8f32: 956; GFX10-GISEL: ; %bb.0: 957; GFX10-GISEL-NEXT: s_clause 0x1 958; GFX10-GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 959; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 960; GFX10-GISEL-NEXT: s_mov_b32 s2, -1 961; GFX10-GISEL-NEXT: s_mov_b32 s3, 0x31016000 962; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) 963; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] 964; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] 965; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v2, s[12:13] 966; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v3, s[14:15] 967; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v4, s[16:17] 968; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v5, s[18:19] 969; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v6, s[20:21] 970; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v7, s[22:23] 971; GFX10-GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 972; GFX10-GISEL-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16 973; GFX10-GISEL-NEXT: s_endpgm 974; 975; GFX11-SDAG-LABEL: fptrunc_v8f64_to_v8f32: 976; GFX11-SDAG: ; %bb.0: 977; GFX11-SDAG-NEXT: s_clause 0x1 978; GFX11-SDAG-NEXT: s_load_b512 s[8:23], s[4:5], 0x64 979; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 980; GFX11-SDAG-NEXT: s_mov_b32 s3, 0x31016000 981; GFX11-SDAG-NEXT: s_mov_b32 s2, -1 982; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) 983; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v7, s[22:23] 984; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v6, s[20:21] 985; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v5, s[18:19] 986; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v4, s[16:17] 987; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v3, s[14:15] 988; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v2, s[12:13] 989; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] 990; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] 991; GFX11-SDAG-NEXT: s_clause 0x1 992; GFX11-SDAG-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 offset:16 993; GFX11-SDAG-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 994; GFX11-SDAG-NEXT: s_endpgm 995; 996; GFX11-GISEL-LABEL: fptrunc_v8f64_to_v8f32: 997; GFX11-GISEL: ; %bb.0: 998; GFX11-GISEL-NEXT: s_clause 0x1 999; GFX11-GISEL-NEXT: s_load_b512 s[8:23], s[4:5], 0x64 1000; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 1001; GFX11-GISEL-NEXT: s_mov_b32 s2, -1 1002; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000 1003; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1004; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] 1005; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] 1006; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v2, s[12:13] 1007; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v3, s[14:15] 1008; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v4, s[16:17] 1009; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v5, s[18:19] 1010; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v6, s[20:21] 1011; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v7, s[22:23] 1012; GFX11-GISEL-NEXT: s_clause 0x1 1013; GFX11-GISEL-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 1014; GFX11-GISEL-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 offset:16 1015; GFX11-GISEL-NEXT: s_endpgm 1016 %result = fptrunc <8 x double> %in to <8 x float> 1017 store <8 x float> %result, ptr addrspace(1) %out 1018 ret void 1019} 1020