1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2 3; Test with buggy fract, shouldn't match 4; RUN: opt -S -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -amdgpu-codegenprepare %s | FileCheck -check-prefixes=IR,GFX6-IR %s 5 6; Working fract, but no f16 7; RUN: opt -S -mtriple=amdgcn-amd-amdpal -mcpu=kaveri -amdgpu-codegenprepare %s | FileCheck -check-prefixes=IR,IR-FRACT,GFX7-IR %s 8 9; Working fract and f16 support 10; RUN: opt -S -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -amdgpu-codegenprepare %s | FileCheck -check-prefixes=IR,IR-FRACT,IR-LEGALF16 %s 11; RUN: opt -S -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-codegenprepare %s | FileCheck -check-prefixes=IR,IR-FRACT,IR-LEGALF16 %s 12 13; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s 14; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=kaveri < %s | FileCheck -check-prefixes=GCN,GFX7 %s 15; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX8 %s 16; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GCN,GFX11 %s 17; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx1200 < %s | FileCheck -check-prefixes=GFX12 %s 18 19; Test patterns to match v_fract_* instructions. 20 21; Expansion as it appears in the library with the extra output for 22; floor. We can fold in the nan check into the instruction, but the 23; inf check must remain. 24define float @safe_math_fract_f32(float %x, ptr addrspace(1) writeonly captures(none) %ip) { 25; GFX6-IR-LABEL: define float @safe_math_fract_f32 26; GFX6-IR-SAME: (float [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0:[0-9]+]] { 27; GFX6-IR-NEXT: entry: 28; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]]) 29; GFX6-IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]] 30; GFX6-IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000) 31; GFX6-IR-NEXT: [[UNO:%.*]] = fcmp uno float [[X]], 0.000000e+00 32; GFX6-IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], float [[X]], float [[MIN]] 33; GFX6-IR-NEXT: [[FABS:%.*]] = tail call float @llvm.fabs.f32(float [[X]]) 34; GFX6-IR-NEXT: [[CMPINF:%.*]] = fcmp oeq float [[FABS]], 0x7FF0000000000000 35; GFX6-IR-NEXT: [[COND6:%.*]] = select i1 [[CMPINF]], float 0.000000e+00, float [[COND]] 36; GFX6-IR-NEXT: store float [[FLOOR]], ptr addrspace(1) [[IP]], align 4 37; GFX6-IR-NEXT: ret float [[COND6]] 38; 39; IR-FRACT-LABEL: define float @safe_math_fract_f32 40; IR-FRACT-SAME: (float [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0:[0-9]+]] { 41; IR-FRACT-NEXT: entry: 42; IR-FRACT-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]]) 43; IR-FRACT-NEXT: [[COND:%.*]] = call float @llvm.amdgcn.fract.f32(float [[X]]) 44; IR-FRACT-NEXT: [[FABS:%.*]] = tail call float @llvm.fabs.f32(float [[X]]) 45; IR-FRACT-NEXT: [[CMPINF:%.*]] = fcmp oeq float [[FABS]], 0x7FF0000000000000 46; IR-FRACT-NEXT: [[COND6:%.*]] = select i1 [[CMPINF]], float 0.000000e+00, float [[COND]] 47; IR-FRACT-NEXT: store float [[FLOOR]], ptr addrspace(1) [[IP]], align 4 48; IR-FRACT-NEXT: ret float [[COND6]] 49; 50; GFX6-LABEL: safe_math_fract_f32: 51; GFX6: ; %bb.0: ; %entry 52; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 53; GFX6-NEXT: v_floor_f32_e32 v3, v0 54; GFX6-NEXT: v_sub_f32_e32 v4, v0, v3 55; GFX6-NEXT: v_min_f32_e32 v4, 0x3f7fffff, v4 56; GFX6-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 57; GFX6-NEXT: s_mov_b32 s8, 0x7f800000 58; GFX6-NEXT: s_mov_b32 s6, 0 59; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc 60; GFX6-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s8 61; GFX6-NEXT: s_mov_b32 s7, 0xf000 62; GFX6-NEXT: s_mov_b32 s4, s6 63; GFX6-NEXT: s_mov_b32 s5, s6 64; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc 65; GFX6-NEXT: buffer_store_dword v3, v[1:2], s[4:7], 0 addr64 66; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 67; GFX6-NEXT: s_setpc_b64 s[30:31] 68; 69; GFX7-LABEL: safe_math_fract_f32: 70; GFX7: ; %bb.0: ; %entry 71; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 72; GFX7-NEXT: s_mov_b32 s8, 0x7f800000 73; GFX7-NEXT: s_mov_b32 s6, 0 74; GFX7-NEXT: v_fract_f32_e32 v4, v0 75; GFX7-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s8 76; GFX7-NEXT: s_mov_b32 s7, 0xf000 77; GFX7-NEXT: s_mov_b32 s4, s6 78; GFX7-NEXT: s_mov_b32 s5, s6 79; GFX7-NEXT: v_floor_f32_e32 v3, v0 80; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc 81; GFX7-NEXT: buffer_store_dword v3, v[1:2], s[4:7], 0 addr64 82; GFX7-NEXT: s_waitcnt vmcnt(0) 83; GFX7-NEXT: s_setpc_b64 s[30:31] 84; 85; GFX8-LABEL: safe_math_fract_f32: 86; GFX8: ; %bb.0: ; %entry 87; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 88; GFX8-NEXT: s_mov_b32 s4, 0x7f800000 89; GFX8-NEXT: v_fract_f32_e32 v4, v0 90; GFX8-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s4 91; GFX8-NEXT: v_floor_f32_e32 v3, v0 92; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc 93; GFX8-NEXT: global_store_dword v[1:2], v3, off 94; GFX8-NEXT: s_waitcnt vmcnt(0) 95; GFX8-NEXT: s_setpc_b64 s[30:31] 96; 97; GFX11-LABEL: safe_math_fract_f32: 98; GFX11: ; %bb.0: ; %entry 99; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 100; GFX11-NEXT: v_fract_f32_e32 v3, v0 101; GFX11-NEXT: v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0| 102; GFX11-NEXT: v_floor_f32_e32 v4, v0 103; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) 104; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc_lo 105; GFX11-NEXT: global_store_b32 v[1:2], v4, off 106; GFX11-NEXT: s_setpc_b64 s[30:31] 107; 108; GFX12-LABEL: safe_math_fract_f32: 109; GFX12: ; %bb.0: ; %entry 110; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 111; GFX12-NEXT: s_wait_expcnt 0x0 112; GFX12-NEXT: s_wait_samplecnt 0x0 113; GFX12-NEXT: s_wait_bvhcnt 0x0 114; GFX12-NEXT: s_wait_kmcnt 0x0 115; GFX12-NEXT: v_fract_f32_e32 v3, v0 116; GFX12-NEXT: v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0| 117; GFX12-NEXT: v_floor_f32_e32 v4, v0 118; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) 119; GFX12-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc_lo 120; GFX12-NEXT: global_store_b32 v[1:2], v4, off 121; GFX12-NEXT: s_setpc_b64 s[30:31] 122entry: 123 %floor = tail call float @llvm.floor.f32(float %x) 124 %sub = fsub float %x, %floor 125 %min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000) 126 %uno = fcmp uno float %x, 0.000000e+00 127 %cond = select i1 %uno, float %x, float %min 128 %fabs = tail call float @llvm.fabs.f32(float %x) 129 %cmpinf = fcmp oeq float %fabs, 0x7FF0000000000000 130 %cond6 = select i1 %cmpinf, float 0.000000e+00, float %cond 131 store float %floor, ptr addrspace(1) %ip, align 4 132 ret float %cond6 133} 134 135define float @safe_math_fract_f32_noinf_check(float %x, ptr addrspace(1) writeonly captures(none) %ip) { 136; GFX6-IR-LABEL: define float @safe_math_fract_f32_noinf_check 137; GFX6-IR-SAME: (float [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] { 138; GFX6-IR-NEXT: entry: 139; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]]) 140; GFX6-IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]] 141; GFX6-IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000) 142; GFX6-IR-NEXT: [[UNO:%.*]] = fcmp uno float [[X]], 0.000000e+00 143; GFX6-IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], float [[X]], float [[MIN]] 144; GFX6-IR-NEXT: store float [[FLOOR]], ptr addrspace(1) [[IP]], align 4 145; GFX6-IR-NEXT: ret float [[COND]] 146; 147; IR-FRACT-LABEL: define float @safe_math_fract_f32_noinf_check 148; IR-FRACT-SAME: (float [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] { 149; IR-FRACT-NEXT: entry: 150; IR-FRACT-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]]) 151; IR-FRACT-NEXT: [[COND:%.*]] = call float @llvm.amdgcn.fract.f32(float [[X]]) 152; IR-FRACT-NEXT: store float [[FLOOR]], ptr addrspace(1) [[IP]], align 4 153; IR-FRACT-NEXT: ret float [[COND]] 154; 155; GFX6-LABEL: safe_math_fract_f32_noinf_check: 156; GFX6: ; %bb.0: ; %entry 157; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 158; GFX6-NEXT: v_floor_f32_e32 v3, v0 159; GFX6-NEXT: v_sub_f32_e32 v4, v0, v3 160; GFX6-NEXT: s_mov_b32 s6, 0 161; GFX6-NEXT: v_min_f32_e32 v4, 0x3f7fffff, v4 162; GFX6-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 163; GFX6-NEXT: s_mov_b32 s7, 0xf000 164; GFX6-NEXT: s_mov_b32 s4, s6 165; GFX6-NEXT: s_mov_b32 s5, s6 166; GFX6-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc 167; GFX6-NEXT: buffer_store_dword v3, v[1:2], s[4:7], 0 addr64 168; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 169; GFX6-NEXT: s_setpc_b64 s[30:31] 170; 171; GFX7-LABEL: safe_math_fract_f32_noinf_check: 172; GFX7: ; %bb.0: ; %entry 173; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 174; GFX7-NEXT: s_mov_b32 s6, 0 175; GFX7-NEXT: s_mov_b32 s7, 0xf000 176; GFX7-NEXT: s_mov_b32 s4, s6 177; GFX7-NEXT: s_mov_b32 s5, s6 178; GFX7-NEXT: v_floor_f32_e32 v3, v0 179; GFX7-NEXT: v_fract_f32_e32 v0, v0 180; GFX7-NEXT: buffer_store_dword v3, v[1:2], s[4:7], 0 addr64 181; GFX7-NEXT: s_waitcnt vmcnt(0) 182; GFX7-NEXT: s_setpc_b64 s[30:31] 183; 184; GFX8-LABEL: safe_math_fract_f32_noinf_check: 185; GFX8: ; %bb.0: ; %entry 186; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 187; GFX8-NEXT: v_floor_f32_e32 v3, v0 188; GFX8-NEXT: v_fract_f32_e32 v0, v0 189; GFX8-NEXT: global_store_dword v[1:2], v3, off 190; GFX8-NEXT: s_waitcnt vmcnt(0) 191; GFX8-NEXT: s_setpc_b64 s[30:31] 192; 193; GFX11-LABEL: safe_math_fract_f32_noinf_check: 194; GFX11: ; %bb.0: ; %entry 195; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 196; GFX11-NEXT: v_floor_f32_e32 v3, v0 197; GFX11-NEXT: v_fract_f32_e32 v0, v0 198; GFX11-NEXT: global_store_b32 v[1:2], v3, off 199; GFX11-NEXT: s_setpc_b64 s[30:31] 200; 201; GFX12-LABEL: safe_math_fract_f32_noinf_check: 202; GFX12: ; %bb.0: ; %entry 203; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 204; GFX12-NEXT: s_wait_expcnt 0x0 205; GFX12-NEXT: s_wait_samplecnt 0x0 206; GFX12-NEXT: s_wait_bvhcnt 0x0 207; GFX12-NEXT: s_wait_kmcnt 0x0 208; GFX12-NEXT: v_floor_f32_e32 v3, v0 209; GFX12-NEXT: v_fract_f32_e32 v0, v0 210; GFX12-NEXT: global_store_b32 v[1:2], v3, off 211; GFX12-NEXT: s_setpc_b64 s[30:31] 212entry: 213 %floor = tail call float @llvm.floor.f32(float %x) 214 %sub = fsub float %x, %floor 215 %min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000) 216 %uno = fcmp uno float %x, 0.000000e+00 217 %cond = select i1 %uno, float %x, float %min 218 store float %floor, ptr addrspace(1) %ip, align 4 219 ret float %cond 220} 221 222; Cannot match fract without a nan check or no-nans. 223define float @no_nan_check_math_fract_f32(float %x, ptr addrspace(1) writeonly captures(none) %ip) { 224; IR-LABEL: define float @no_nan_check_math_fract_f32 225; IR-SAME: (float [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0:[0-9]+]] { 226; IR-NEXT: entry: 227; IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]]) 228; IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]] 229; IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000) 230; IR-NEXT: [[FABS:%.*]] = tail call float @llvm.fabs.f32(float [[X]]) 231; IR-NEXT: [[CMPINF:%.*]] = fcmp oeq float [[FABS]], 0x7FF0000000000000 232; IR-NEXT: [[COND6:%.*]] = select i1 [[CMPINF]], float 0.000000e+00, float [[MIN]] 233; IR-NEXT: store float [[FLOOR]], ptr addrspace(1) [[IP]], align 4 234; IR-NEXT: ret float [[COND6]] 235; 236; GFX6-LABEL: no_nan_check_math_fract_f32: 237; GFX6: ; %bb.0: ; %entry 238; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 239; GFX6-NEXT: v_floor_f32_e32 v3, v0 240; GFX6-NEXT: v_sub_f32_e32 v4, v0, v3 241; GFX6-NEXT: s_mov_b32 s8, 0x7f800000 242; GFX6-NEXT: s_mov_b32 s6, 0 243; GFX6-NEXT: v_min_f32_e32 v4, 0x3f7fffff, v4 244; GFX6-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s8 245; GFX6-NEXT: s_mov_b32 s7, 0xf000 246; GFX6-NEXT: s_mov_b32 s4, s6 247; GFX6-NEXT: s_mov_b32 s5, s6 248; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc 249; GFX6-NEXT: buffer_store_dword v3, v[1:2], s[4:7], 0 addr64 250; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 251; GFX6-NEXT: s_setpc_b64 s[30:31] 252; 253; GFX7-LABEL: no_nan_check_math_fract_f32: 254; GFX7: ; %bb.0: ; %entry 255; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 256; GFX7-NEXT: v_floor_f32_e32 v3, v0 257; GFX7-NEXT: v_sub_f32_e32 v4, v0, v3 258; GFX7-NEXT: s_mov_b32 s8, 0x7f800000 259; GFX7-NEXT: s_mov_b32 s6, 0 260; GFX7-NEXT: v_min_f32_e32 v4, 0x3f7fffff, v4 261; GFX7-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s8 262; GFX7-NEXT: s_mov_b32 s7, 0xf000 263; GFX7-NEXT: s_mov_b32 s4, s6 264; GFX7-NEXT: s_mov_b32 s5, s6 265; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc 266; GFX7-NEXT: buffer_store_dword v3, v[1:2], s[4:7], 0 addr64 267; GFX7-NEXT: s_waitcnt vmcnt(0) 268; GFX7-NEXT: s_setpc_b64 s[30:31] 269; 270; GFX8-LABEL: no_nan_check_math_fract_f32: 271; GFX8: ; %bb.0: ; %entry 272; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 273; GFX8-NEXT: v_floor_f32_e32 v3, v0 274; GFX8-NEXT: v_sub_f32_e32 v4, v0, v3 275; GFX8-NEXT: s_mov_b32 s4, 0x7f800000 276; GFX8-NEXT: v_min_f32_e32 v4, 0x3f7fffff, v4 277; GFX8-NEXT: v_cmp_neq_f32_e64 vcc, |v0|, s4 278; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc 279; GFX8-NEXT: global_store_dword v[1:2], v3, off 280; GFX8-NEXT: s_waitcnt vmcnt(0) 281; GFX8-NEXT: s_setpc_b64 s[30:31] 282; 283; GFX11-LABEL: no_nan_check_math_fract_f32: 284; GFX11: ; %bb.0: ; %entry 285; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 286; GFX11-NEXT: v_floor_f32_e32 v3, v0 287; GFX11-NEXT: v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0| 288; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) 289; GFX11-NEXT: v_sub_f32_e32 v4, v0, v3 290; GFX11-NEXT: global_store_b32 v[1:2], v3, off 291; GFX11-NEXT: v_min_f32_e32 v4, 0x3f7fffff, v4 292; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc_lo 293; GFX11-NEXT: s_setpc_b64 s[30:31] 294; 295; GFX12-LABEL: no_nan_check_math_fract_f32: 296; GFX12: ; %bb.0: ; %entry 297; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 298; GFX12-NEXT: s_wait_expcnt 0x0 299; GFX12-NEXT: s_wait_samplecnt 0x0 300; GFX12-NEXT: s_wait_bvhcnt 0x0 301; GFX12-NEXT: s_wait_kmcnt 0x0 302; GFX12-NEXT: v_floor_f32_e32 v3, v0 303; GFX12-NEXT: v_cmp_neq_f32_e64 vcc_lo, 0x7f800000, |v0| 304; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1) 305; GFX12-NEXT: v_sub_f32_e32 v4, v0, v3 306; GFX12-NEXT: global_store_b32 v[1:2], v3, off 307; GFX12-NEXT: v_min_num_f32_e32 v4, 0x3f7fffff, v4 308; GFX12-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc_lo 309; GFX12-NEXT: s_setpc_b64 s[30:31] 310entry: 311 %floor = tail call float @llvm.floor.f32(float %x) 312 %sub = fsub float %x, %floor 313 %min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000) 314 %fabs = tail call float @llvm.fabs.f32(float %x) 315 %cmpinf = fcmp oeq float %fabs, 0x7FF0000000000000 316 %cond6 = select i1 %cmpinf, float 0.000000e+00, float %min 317 store float %floor, ptr addrspace(1) %ip, align 4 318 ret float %cond6 319} 320 321define float @basic_fract_f32_nonans(float nofpclass(nan) %x) { 322; GFX6-IR-LABEL: define float @basic_fract_f32_nonans 323; GFX6-IR-SAME: (float nofpclass(nan) [[X:%.*]]) #[[ATTR0]] { 324; GFX6-IR-NEXT: entry: 325; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]]) 326; GFX6-IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]] 327; GFX6-IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000) 328; GFX6-IR-NEXT: ret float [[MIN]] 329; 330; IR-FRACT-LABEL: define float @basic_fract_f32_nonans 331; IR-FRACT-SAME: (float nofpclass(nan) [[X:%.*]]) #[[ATTR0]] { 332; IR-FRACT-NEXT: entry: 333; IR-FRACT-NEXT: [[MIN:%.*]] = call nnan float @llvm.amdgcn.fract.f32(float [[X]]) 334; IR-FRACT-NEXT: ret float [[MIN]] 335; 336; GFX6-LABEL: basic_fract_f32_nonans: 337; GFX6: ; %bb.0: ; %entry 338; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 339; GFX6-NEXT: v_floor_f32_e32 v1, v0 340; GFX6-NEXT: v_sub_f32_e32 v0, v0, v1 341; GFX6-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0 342; GFX6-NEXT: s_setpc_b64 s[30:31] 343; 344; GFX7-LABEL: basic_fract_f32_nonans: 345; GFX7: ; %bb.0: ; %entry 346; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 347; GFX7-NEXT: v_fract_f32_e32 v0, v0 348; GFX7-NEXT: s_setpc_b64 s[30:31] 349; 350; GFX8-LABEL: basic_fract_f32_nonans: 351; GFX8: ; %bb.0: ; %entry 352; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 353; GFX8-NEXT: v_fract_f32_e32 v0, v0 354; GFX8-NEXT: s_setpc_b64 s[30:31] 355; 356; GFX11-LABEL: basic_fract_f32_nonans: 357; GFX11: ; %bb.0: ; %entry 358; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 359; GFX11-NEXT: v_fract_f32_e32 v0, v0 360; GFX11-NEXT: s_setpc_b64 s[30:31] 361; 362; GFX12-LABEL: basic_fract_f32_nonans: 363; GFX12: ; %bb.0: ; %entry 364; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 365; GFX12-NEXT: s_wait_expcnt 0x0 366; GFX12-NEXT: s_wait_samplecnt 0x0 367; GFX12-NEXT: s_wait_bvhcnt 0x0 368; GFX12-NEXT: s_wait_kmcnt 0x0 369; GFX12-NEXT: v_fract_f32_e32 v0, v0 370; GFX12-NEXT: s_setpc_b64 s[30:31] 371entry: 372 %floor = tail call float @llvm.floor.f32(float %x) 373 %sub = fsub float %x, %floor 374 %min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000) 375 ret float %min 376} 377 378define float @basic_fract_f32_flags_minnum(float %x) { 379; IR-LABEL: define float @basic_fract_f32_flags_minnum 380; IR-SAME: (float [[X:%.*]]) #[[ATTR0]] { 381; IR-NEXT: entry: 382; IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]]) 383; IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]] 384; IR-NEXT: [[MIN:%.*]] = tail call nsz float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000) 385; IR-NEXT: ret float [[MIN]] 386; 387; GFX6-LABEL: basic_fract_f32_flags_minnum: 388; GFX6: ; %bb.0: ; %entry 389; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 390; GFX6-NEXT: v_floor_f32_e32 v1, v0 391; GFX6-NEXT: v_sub_f32_e32 v0, v0, v1 392; GFX6-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0 393; GFX6-NEXT: s_setpc_b64 s[30:31] 394; 395; GFX7-LABEL: basic_fract_f32_flags_minnum: 396; GFX7: ; %bb.0: ; %entry 397; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 398; GFX7-NEXT: v_floor_f32_e32 v1, v0 399; GFX7-NEXT: v_sub_f32_e32 v0, v0, v1 400; GFX7-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0 401; GFX7-NEXT: s_setpc_b64 s[30:31] 402; 403; GFX8-LABEL: basic_fract_f32_flags_minnum: 404; GFX8: ; %bb.0: ; %entry 405; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 406; GFX8-NEXT: v_floor_f32_e32 v1, v0 407; GFX8-NEXT: v_sub_f32_e32 v0, v0, v1 408; GFX8-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0 409; GFX8-NEXT: s_setpc_b64 s[30:31] 410; 411; GFX11-LABEL: basic_fract_f32_flags_minnum: 412; GFX11: ; %bb.0: ; %entry 413; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 414; GFX11-NEXT: v_floor_f32_e32 v1, v0 415; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 416; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1 417; GFX11-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0 418; GFX11-NEXT: s_setpc_b64 s[30:31] 419; 420; GFX12-LABEL: basic_fract_f32_flags_minnum: 421; GFX12: ; %bb.0: ; %entry 422; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 423; GFX12-NEXT: s_wait_expcnt 0x0 424; GFX12-NEXT: s_wait_samplecnt 0x0 425; GFX12-NEXT: s_wait_bvhcnt 0x0 426; GFX12-NEXT: s_wait_kmcnt 0x0 427; GFX12-NEXT: v_floor_f32_e32 v1, v0 428; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 429; GFX12-NEXT: v_sub_f32_e32 v0, v0, v1 430; GFX12-NEXT: v_min_num_f32_e32 v0, 0x3f7fffff, v0 431; GFX12-NEXT: s_setpc_b64 s[30:31] 432entry: 433 %floor = tail call float @llvm.floor.f32(float %x) 434 %sub = fsub float %x, %floor 435 %min = tail call nsz float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000) 436 ret float %min 437} 438 439define float @basic_fract_f32_flags_fsub(float nofpclass(nan) %x) { 440; GFX6-IR-LABEL: define float @basic_fract_f32_flags_fsub 441; GFX6-IR-SAME: (float nofpclass(nan) [[X:%.*]]) #[[ATTR0]] { 442; GFX6-IR-NEXT: entry: 443; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]]) 444; GFX6-IR-NEXT: [[SUB:%.*]] = fsub nsz float [[X]], [[FLOOR]] 445; GFX6-IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000) 446; GFX6-IR-NEXT: ret float [[MIN]] 447; 448; IR-FRACT-LABEL: define float @basic_fract_f32_flags_fsub 449; IR-FRACT-SAME: (float nofpclass(nan) [[X:%.*]]) #[[ATTR0]] { 450; IR-FRACT-NEXT: entry: 451; IR-FRACT-NEXT: [[MIN:%.*]] = call nnan float @llvm.amdgcn.fract.f32(float [[X]]) 452; IR-FRACT-NEXT: ret float [[MIN]] 453; 454; GFX6-LABEL: basic_fract_f32_flags_fsub: 455; GFX6: ; %bb.0: ; %entry 456; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 457; GFX6-NEXT: v_floor_f32_e32 v1, v0 458; GFX6-NEXT: v_sub_f32_e32 v0, v0, v1 459; GFX6-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0 460; GFX6-NEXT: s_setpc_b64 s[30:31] 461; 462; GFX7-LABEL: basic_fract_f32_flags_fsub: 463; GFX7: ; %bb.0: ; %entry 464; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 465; GFX7-NEXT: v_fract_f32_e32 v0, v0 466; GFX7-NEXT: s_setpc_b64 s[30:31] 467; 468; GFX8-LABEL: basic_fract_f32_flags_fsub: 469; GFX8: ; %bb.0: ; %entry 470; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 471; GFX8-NEXT: v_fract_f32_e32 v0, v0 472; GFX8-NEXT: s_setpc_b64 s[30:31] 473; 474; GFX11-LABEL: basic_fract_f32_flags_fsub: 475; GFX11: ; %bb.0: ; %entry 476; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 477; GFX11-NEXT: v_fract_f32_e32 v0, v0 478; GFX11-NEXT: s_setpc_b64 s[30:31] 479; 480; GFX12-LABEL: basic_fract_f32_flags_fsub: 481; GFX12: ; %bb.0: ; %entry 482; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 483; GFX12-NEXT: s_wait_expcnt 0x0 484; GFX12-NEXT: s_wait_samplecnt 0x0 485; GFX12-NEXT: s_wait_bvhcnt 0x0 486; GFX12-NEXT: s_wait_kmcnt 0x0 487; GFX12-NEXT: v_fract_f32_e32 v0, v0 488; GFX12-NEXT: s_setpc_b64 s[30:31] 489entry: 490 %floor = tail call float @llvm.floor.f32(float %x) 491 %sub = fsub nsz float %x, %floor 492 %min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000) 493 ret float %min 494} 495 496define <2 x float> @basic_fract_v2f32_nonans(<2 x float> nofpclass(nan) %x) { 497; GFX6-IR-LABEL: define <2 x float> @basic_fract_v2f32_nonans 498; GFX6-IR-SAME: (<2 x float> nofpclass(nan) [[X:%.*]]) #[[ATTR0]] { 499; GFX6-IR-NEXT: entry: 500; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call <2 x float> @llvm.floor.v2f32(<2 x float> [[X]]) 501; GFX6-IR-NEXT: [[SUB:%.*]] = fsub <2 x float> [[X]], [[FLOOR]] 502; GFX6-IR-NEXT: [[MIN:%.*]] = tail call <2 x float> @llvm.minnum.v2f32(<2 x float> [[SUB]], <2 x float> splat (float 0x3FEFFFFFE0000000)) 503; GFX6-IR-NEXT: ret <2 x float> [[MIN]] 504; 505; IR-FRACT-LABEL: define <2 x float> @basic_fract_v2f32_nonans 506; IR-FRACT-SAME: (<2 x float> nofpclass(nan) [[X:%.*]]) #[[ATTR0]] { 507; IR-FRACT-NEXT: entry: 508; IR-FRACT-NEXT: [[TMP0:%.*]] = extractelement <2 x float> [[X]], i64 0 509; IR-FRACT-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X]], i64 1 510; IR-FRACT-NEXT: [[TMP2:%.*]] = call nnan float @llvm.amdgcn.fract.f32(float [[TMP0]]) 511; IR-FRACT-NEXT: [[TMP3:%.*]] = call nnan float @llvm.amdgcn.fract.f32(float [[TMP1]]) 512; IR-FRACT-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i64 0 513; IR-FRACT-NEXT: [[MIN:%.*]] = insertelement <2 x float> [[TMP4]], float [[TMP3]], i64 1 514; IR-FRACT-NEXT: ret <2 x float> [[MIN]] 515; 516; GFX6-LABEL: basic_fract_v2f32_nonans: 517; GFX6: ; %bb.0: ; %entry 518; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 519; GFX6-NEXT: v_floor_f32_e32 v2, v0 520; GFX6-NEXT: v_floor_f32_e32 v3, v1 521; GFX6-NEXT: v_sub_f32_e32 v1, v1, v3 522; GFX6-NEXT: v_sub_f32_e32 v0, v0, v2 523; GFX6-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0 524; GFX6-NEXT: v_min_f32_e32 v1, 0x3f7fffff, v1 525; GFX6-NEXT: s_setpc_b64 s[30:31] 526; 527; GFX7-LABEL: basic_fract_v2f32_nonans: 528; GFX7: ; %bb.0: ; %entry 529; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 530; GFX7-NEXT: v_fract_f32_e32 v0, v0 531; GFX7-NEXT: v_fract_f32_e32 v1, v1 532; GFX7-NEXT: s_setpc_b64 s[30:31] 533; 534; GFX8-LABEL: basic_fract_v2f32_nonans: 535; GFX8: ; %bb.0: ; %entry 536; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 537; GFX8-NEXT: v_fract_f32_e32 v0, v0 538; GFX8-NEXT: v_fract_f32_e32 v1, v1 539; GFX8-NEXT: s_setpc_b64 s[30:31] 540; 541; GFX11-LABEL: basic_fract_v2f32_nonans: 542; GFX11: ; %bb.0: ; %entry 543; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 544; GFX11-NEXT: v_fract_f32_e32 v0, v0 545; GFX11-NEXT: v_fract_f32_e32 v1, v1 546; GFX11-NEXT: s_setpc_b64 s[30:31] 547; 548; GFX12-LABEL: basic_fract_v2f32_nonans: 549; GFX12: ; %bb.0: ; %entry 550; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 551; GFX12-NEXT: s_wait_expcnt 0x0 552; GFX12-NEXT: s_wait_samplecnt 0x0 553; GFX12-NEXT: s_wait_bvhcnt 0x0 554; GFX12-NEXT: s_wait_kmcnt 0x0 555; GFX12-NEXT: v_fract_f32_e32 v0, v0 556; GFX12-NEXT: v_fract_f32_e32 v1, v1 557; GFX12-NEXT: s_setpc_b64 s[30:31] 558entry: 559 %floor = tail call <2 x float> @llvm.floor.v2f32(<2 x float> %x) 560 %sub = fsub <2 x float> %x, %floor 561 %min = tail call <2 x float> @llvm.minnum.v2f32(<2 x float> %sub, <2 x float> <float 0x3FEFFFFFE0000000, float 0x3FEFFFFFE0000000>) 562 ret <2 x float> %min 563} 564 565define float @basic_fract_f32_multi_use_fsub_nonans(float nofpclass(nan) %x, ptr addrspace(1) %ptr) { 566; GFX6-IR-LABEL: define float @basic_fract_f32_multi_use_fsub_nonans 567; GFX6-IR-SAME: (float nofpclass(nan) [[X:%.*]], ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] { 568; GFX6-IR-NEXT: entry: 569; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]]) 570; GFX6-IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]] 571; GFX6-IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000) 572; GFX6-IR-NEXT: store float [[SUB]], ptr addrspace(1) [[PTR]], align 4 573; GFX6-IR-NEXT: ret float [[MIN]] 574; 575; IR-FRACT-LABEL: define float @basic_fract_f32_multi_use_fsub_nonans 576; IR-FRACT-SAME: (float nofpclass(nan) [[X:%.*]], ptr addrspace(1) [[PTR:%.*]]) #[[ATTR0]] { 577; IR-FRACT-NEXT: entry: 578; IR-FRACT-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]]) 579; IR-FRACT-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]] 580; IR-FRACT-NEXT: [[MIN:%.*]] = call nnan float @llvm.amdgcn.fract.f32(float [[X]]) 581; IR-FRACT-NEXT: store float [[SUB]], ptr addrspace(1) [[PTR]], align 4 582; IR-FRACT-NEXT: ret float [[MIN]] 583; 584; GFX6-LABEL: basic_fract_f32_multi_use_fsub_nonans: 585; GFX6: ; %bb.0: ; %entry 586; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 587; GFX6-NEXT: v_floor_f32_e32 v3, v0 588; GFX6-NEXT: s_mov_b32 s6, 0 589; GFX6-NEXT: v_sub_f32_e32 v3, v0, v3 590; GFX6-NEXT: s_mov_b32 s7, 0xf000 591; GFX6-NEXT: s_mov_b32 s4, s6 592; GFX6-NEXT: s_mov_b32 s5, s6 593; GFX6-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v3 594; GFX6-NEXT: buffer_store_dword v3, v[1:2], s[4:7], 0 addr64 595; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 596; GFX6-NEXT: s_setpc_b64 s[30:31] 597; 598; GFX7-LABEL: basic_fract_f32_multi_use_fsub_nonans: 599; GFX7: ; %bb.0: ; %entry 600; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 601; GFX7-NEXT: s_mov_b32 s6, 0 602; GFX7-NEXT: v_floor_f32_e32 v3, v0 603; GFX7-NEXT: s_mov_b32 s7, 0xf000 604; GFX7-NEXT: s_mov_b32 s4, s6 605; GFX7-NEXT: s_mov_b32 s5, s6 606; GFX7-NEXT: v_sub_f32_e32 v3, v0, v3 607; GFX7-NEXT: v_fract_f32_e32 v0, v0 608; GFX7-NEXT: buffer_store_dword v3, v[1:2], s[4:7], 0 addr64 609; GFX7-NEXT: s_waitcnt vmcnt(0) 610; GFX7-NEXT: s_setpc_b64 s[30:31] 611; 612; GFX8-LABEL: basic_fract_f32_multi_use_fsub_nonans: 613; GFX8: ; %bb.0: ; %entry 614; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 615; GFX8-NEXT: v_floor_f32_e32 v3, v0 616; GFX8-NEXT: v_sub_f32_e32 v3, v0, v3 617; GFX8-NEXT: v_fract_f32_e32 v0, v0 618; GFX8-NEXT: global_store_dword v[1:2], v3, off 619; GFX8-NEXT: s_waitcnt vmcnt(0) 620; GFX8-NEXT: s_setpc_b64 s[30:31] 621; 622; GFX11-LABEL: basic_fract_f32_multi_use_fsub_nonans: 623; GFX11: ; %bb.0: ; %entry 624; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 625; GFX11-NEXT: v_floor_f32_e32 v3, v0 626; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 627; GFX11-NEXT: v_sub_f32_e32 v3, v0, v3 628; GFX11-NEXT: v_fract_f32_e32 v0, v0 629; GFX11-NEXT: global_store_b32 v[1:2], v3, off 630; GFX11-NEXT: s_setpc_b64 s[30:31] 631; 632; GFX12-LABEL: basic_fract_f32_multi_use_fsub_nonans: 633; GFX12: ; %bb.0: ; %entry 634; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 635; GFX12-NEXT: s_wait_expcnt 0x0 636; GFX12-NEXT: s_wait_samplecnt 0x0 637; GFX12-NEXT: s_wait_bvhcnt 0x0 638; GFX12-NEXT: s_wait_kmcnt 0x0 639; GFX12-NEXT: v_floor_f32_e32 v3, v0 640; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 641; GFX12-NEXT: v_sub_f32_e32 v3, v0, v3 642; GFX12-NEXT: v_fract_f32_e32 v0, v0 643; GFX12-NEXT: global_store_b32 v[1:2], v3, off 644; GFX12-NEXT: s_setpc_b64 s[30:31] 645entry: 646 %floor = tail call float @llvm.floor.f32(float %x) 647 %sub = fsub float %x, %floor 648 %min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000) 649 store float %sub, ptr addrspace(1) %ptr 650 ret float %min 651} 652 653define float @nnan_minnum_fract_f32(float %x) { 654; GFX6-IR-LABEL: define float @nnan_minnum_fract_f32 655; GFX6-IR-SAME: (float [[X:%.*]]) #[[ATTR0]] { 656; GFX6-IR-NEXT: entry: 657; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]]) 658; GFX6-IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]] 659; GFX6-IR-NEXT: [[MIN:%.*]] = tail call nnan float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000) 660; GFX6-IR-NEXT: ret float [[MIN]] 661; 662; IR-FRACT-LABEL: define float @nnan_minnum_fract_f32 663; IR-FRACT-SAME: (float [[X:%.*]]) #[[ATTR0]] { 664; IR-FRACT-NEXT: entry: 665; IR-FRACT-NEXT: [[MIN:%.*]] = call nnan float @llvm.amdgcn.fract.f32(float [[X]]) 666; IR-FRACT-NEXT: ret float [[MIN]] 667; 668; GFX6-LABEL: nnan_minnum_fract_f32: 669; GFX6: ; %bb.0: ; %entry 670; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 671; GFX6-NEXT: v_floor_f32_e32 v1, v0 672; GFX6-NEXT: v_sub_f32_e32 v0, v0, v1 673; GFX6-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0 674; GFX6-NEXT: s_setpc_b64 s[30:31] 675; 676; GFX7-LABEL: nnan_minnum_fract_f32: 677; GFX7: ; %bb.0: ; %entry 678; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 679; GFX7-NEXT: v_fract_f32_e32 v0, v0 680; GFX7-NEXT: s_setpc_b64 s[30:31] 681; 682; GFX8-LABEL: nnan_minnum_fract_f32: 683; GFX8: ; %bb.0: ; %entry 684; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 685; GFX8-NEXT: v_fract_f32_e32 v0, v0 686; GFX8-NEXT: s_setpc_b64 s[30:31] 687; 688; GFX11-LABEL: nnan_minnum_fract_f32: 689; GFX11: ; %bb.0: ; %entry 690; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 691; GFX11-NEXT: v_fract_f32_e32 v0, v0 692; GFX11-NEXT: s_setpc_b64 s[30:31] 693; 694; GFX12-LABEL: nnan_minnum_fract_f32: 695; GFX12: ; %bb.0: ; %entry 696; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 697; GFX12-NEXT: s_wait_expcnt 0x0 698; GFX12-NEXT: s_wait_samplecnt 0x0 699; GFX12-NEXT: s_wait_bvhcnt 0x0 700; GFX12-NEXT: s_wait_kmcnt 0x0 701; GFX12-NEXT: v_fract_f32_e32 v0, v0 702; GFX12-NEXT: s_setpc_b64 s[30:31] 703entry: 704 %floor = tail call float @llvm.floor.f32(float %x) 705 %sub = fsub float %x, %floor 706 %min = tail call nnan float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000) 707 ret float %min 708} 709 710; TODO: Could match if we checked isKnownNeverNaN on the minnum src 711; instead of the pattern input source. 712define float @nnan_fsub_fract_f32(float %x) { 713; IR-LABEL: define float @nnan_fsub_fract_f32 714; IR-SAME: (float [[X:%.*]]) #[[ATTR0]] { 715; IR-NEXT: entry: 716; IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]]) 717; IR-NEXT: [[SUB:%.*]] = fsub nnan float [[X]], [[FLOOR]] 718; IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000) 719; IR-NEXT: ret float [[MIN]] 720; 721; GFX6-LABEL: nnan_fsub_fract_f32: 722; GFX6: ; %bb.0: ; %entry 723; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 724; GFX6-NEXT: v_floor_f32_e32 v1, v0 725; GFX6-NEXT: v_sub_f32_e32 v0, v0, v1 726; GFX6-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0 727; GFX6-NEXT: s_setpc_b64 s[30:31] 728; 729; GFX7-LABEL: nnan_fsub_fract_f32: 730; GFX7: ; %bb.0: ; %entry 731; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 732; GFX7-NEXT: v_floor_f32_e32 v1, v0 733; GFX7-NEXT: v_sub_f32_e32 v0, v0, v1 734; GFX7-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0 735; GFX7-NEXT: s_setpc_b64 s[30:31] 736; 737; GFX8-LABEL: nnan_fsub_fract_f32: 738; GFX8: ; %bb.0: ; %entry 739; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 740; GFX8-NEXT: v_floor_f32_e32 v1, v0 741; GFX8-NEXT: v_sub_f32_e32 v0, v0, v1 742; GFX8-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0 743; GFX8-NEXT: s_setpc_b64 s[30:31] 744; 745; GFX11-LABEL: nnan_fsub_fract_f32: 746; GFX11: ; %bb.0: ; %entry 747; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 748; GFX11-NEXT: v_floor_f32_e32 v1, v0 749; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 750; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1 751; GFX11-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0 752; GFX11-NEXT: s_setpc_b64 s[30:31] 753; 754; GFX12-LABEL: nnan_fsub_fract_f32: 755; GFX12: ; %bb.0: ; %entry 756; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 757; GFX12-NEXT: s_wait_expcnt 0x0 758; GFX12-NEXT: s_wait_samplecnt 0x0 759; GFX12-NEXT: s_wait_bvhcnt 0x0 760; GFX12-NEXT: s_wait_kmcnt 0x0 761; GFX12-NEXT: v_floor_f32_e32 v1, v0 762; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 763; GFX12-NEXT: v_sub_f32_e32 v0, v0, v1 764; GFX12-NEXT: v_min_num_f32_e32 v0, 0x3f7fffff, v0 765; GFX12-NEXT: s_setpc_b64 s[30:31] 766entry: 767 %floor = tail call float @llvm.floor.f32(float %x) 768 %sub = fsub nnan float %x, %floor 769 %min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000) 770 ret float %min 771} 772 773define float @nnan_floor_fract_f32(float %x) { 774; IR-LABEL: define float @nnan_floor_fract_f32 775; IR-SAME: (float [[X:%.*]]) #[[ATTR0]] { 776; IR-NEXT: entry: 777; IR-NEXT: [[FLOOR:%.*]] = tail call nnan float @llvm.floor.f32(float [[X]]) 778; IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]] 779; IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000) 780; IR-NEXT: ret float [[MIN]] 781; 782; GFX6-LABEL: nnan_floor_fract_f32: 783; GFX6: ; %bb.0: ; %entry 784; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 785; GFX6-NEXT: v_floor_f32_e32 v1, v0 786; GFX6-NEXT: v_sub_f32_e32 v0, v0, v1 787; GFX6-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0 788; GFX6-NEXT: s_setpc_b64 s[30:31] 789; 790; GFX7-LABEL: nnan_floor_fract_f32: 791; GFX7: ; %bb.0: ; %entry 792; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 793; GFX7-NEXT: v_floor_f32_e32 v1, v0 794; GFX7-NEXT: v_sub_f32_e32 v0, v0, v1 795; GFX7-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0 796; GFX7-NEXT: s_setpc_b64 s[30:31] 797; 798; GFX8-LABEL: nnan_floor_fract_f32: 799; GFX8: ; %bb.0: ; %entry 800; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 801; GFX8-NEXT: v_floor_f32_e32 v1, v0 802; GFX8-NEXT: v_sub_f32_e32 v0, v0, v1 803; GFX8-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0 804; GFX8-NEXT: s_setpc_b64 s[30:31] 805; 806; GFX11-LABEL: nnan_floor_fract_f32: 807; GFX11: ; %bb.0: ; %entry 808; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 809; GFX11-NEXT: v_floor_f32_e32 v1, v0 810; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 811; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1 812; GFX11-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0 813; GFX11-NEXT: s_setpc_b64 s[30:31] 814; 815; GFX12-LABEL: nnan_floor_fract_f32: 816; GFX12: ; %bb.0: ; %entry 817; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 818; GFX12-NEXT: s_wait_expcnt 0x0 819; GFX12-NEXT: s_wait_samplecnt 0x0 820; GFX12-NEXT: s_wait_bvhcnt 0x0 821; GFX12-NEXT: s_wait_kmcnt 0x0 822; GFX12-NEXT: v_floor_f32_e32 v1, v0 823; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 824; GFX12-NEXT: v_sub_f32_e32 v0, v0, v1 825; GFX12-NEXT: v_min_num_f32_e32 v0, 0x3f7fffff, v0 826; GFX12-NEXT: s_setpc_b64 s[30:31] 827entry: 828 %floor = tail call nnan float @llvm.floor.f32(float %x) 829 %sub = fsub float %x, %floor 830 %min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000) 831 ret float %min 832} 833 834define float @nnan_src_fract_f32(float nofpclass(nan) %x) { 835; GFX6-IR-LABEL: define float @nnan_src_fract_f32 836; GFX6-IR-SAME: (float nofpclass(nan) [[X:%.*]]) #[[ATTR0]] { 837; GFX6-IR-NEXT: entry: 838; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]]) 839; GFX6-IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]] 840; GFX6-IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000) 841; GFX6-IR-NEXT: ret float [[MIN]] 842; 843; IR-FRACT-LABEL: define float @nnan_src_fract_f32 844; IR-FRACT-SAME: (float nofpclass(nan) [[X:%.*]]) #[[ATTR0]] { 845; IR-FRACT-NEXT: entry: 846; IR-FRACT-NEXT: [[MIN:%.*]] = call nnan float @llvm.amdgcn.fract.f32(float [[X]]) 847; IR-FRACT-NEXT: ret float [[MIN]] 848; 849; GFX6-LABEL: nnan_src_fract_f32: 850; GFX6: ; %bb.0: ; %entry 851; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 852; GFX6-NEXT: v_floor_f32_e32 v1, v0 853; GFX6-NEXT: v_sub_f32_e32 v0, v0, v1 854; GFX6-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0 855; GFX6-NEXT: s_setpc_b64 s[30:31] 856; 857; GFX7-LABEL: nnan_src_fract_f32: 858; GFX7: ; %bb.0: ; %entry 859; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 860; GFX7-NEXT: v_fract_f32_e32 v0, v0 861; GFX7-NEXT: s_setpc_b64 s[30:31] 862; 863; GFX8-LABEL: nnan_src_fract_f32: 864; GFX8: ; %bb.0: ; %entry 865; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 866; GFX8-NEXT: v_fract_f32_e32 v0, v0 867; GFX8-NEXT: s_setpc_b64 s[30:31] 868; 869; GFX11-LABEL: nnan_src_fract_f32: 870; GFX11: ; %bb.0: ; %entry 871; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 872; GFX11-NEXT: v_fract_f32_e32 v0, v0 873; GFX11-NEXT: s_setpc_b64 s[30:31] 874; 875; GFX12-LABEL: nnan_src_fract_f32: 876; GFX12: ; %bb.0: ; %entry 877; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 878; GFX12-NEXT: s_wait_expcnt 0x0 879; GFX12-NEXT: s_wait_samplecnt 0x0 880; GFX12-NEXT: s_wait_bvhcnt 0x0 881; GFX12-NEXT: s_wait_kmcnt 0x0 882; GFX12-NEXT: v_fract_f32_e32 v0, v0 883; GFX12-NEXT: s_setpc_b64 s[30:31] 884entry: 885 %floor = tail call float @llvm.floor.f32(float %x) 886 %sub = fsub float %x, %floor 887 %min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000) 888 ret float %min 889} 890 891; Negative test 892define float @not_fract_f32_wrong_const(float nofpclass(nan) %x) { 893; IR-LABEL: define float @not_fract_f32_wrong_const 894; IR-SAME: (float nofpclass(nan) [[X:%.*]]) #[[ATTR0]] { 895; IR-NEXT: entry: 896; IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]]) 897; IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]] 898; IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFC0000000) 899; IR-NEXT: ret float [[MIN]] 900; 901; GFX6-LABEL: not_fract_f32_wrong_const: 902; GFX6: ; %bb.0: ; %entry 903; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 904; GFX6-NEXT: v_floor_f32_e32 v1, v0 905; GFX6-NEXT: v_sub_f32_e32 v0, v0, v1 906; GFX6-NEXT: v_min_f32_e32 v0, 0x3f7ffffe, v0 907; GFX6-NEXT: s_setpc_b64 s[30:31] 908; 909; GFX7-LABEL: not_fract_f32_wrong_const: 910; GFX7: ; %bb.0: ; %entry 911; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 912; GFX7-NEXT: v_floor_f32_e32 v1, v0 913; GFX7-NEXT: v_sub_f32_e32 v0, v0, v1 914; GFX7-NEXT: v_min_f32_e32 v0, 0x3f7ffffe, v0 915; GFX7-NEXT: s_setpc_b64 s[30:31] 916; 917; GFX8-LABEL: not_fract_f32_wrong_const: 918; GFX8: ; %bb.0: ; %entry 919; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 920; GFX8-NEXT: v_floor_f32_e32 v1, v0 921; GFX8-NEXT: v_sub_f32_e32 v0, v0, v1 922; GFX8-NEXT: v_min_f32_e32 v0, 0x3f7ffffe, v0 923; GFX8-NEXT: s_setpc_b64 s[30:31] 924; 925; GFX11-LABEL: not_fract_f32_wrong_const: 926; GFX11: ; %bb.0: ; %entry 927; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 928; GFX11-NEXT: v_floor_f32_e32 v1, v0 929; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 930; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1 931; GFX11-NEXT: v_min_f32_e32 v0, 0x3f7ffffe, v0 932; GFX11-NEXT: s_setpc_b64 s[30:31] 933; 934; GFX12-LABEL: not_fract_f32_wrong_const: 935; GFX12: ; %bb.0: ; %entry 936; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 937; GFX12-NEXT: s_wait_expcnt 0x0 938; GFX12-NEXT: s_wait_samplecnt 0x0 939; GFX12-NEXT: s_wait_bvhcnt 0x0 940; GFX12-NEXT: s_wait_kmcnt 0x0 941; GFX12-NEXT: v_floor_f32_e32 v1, v0 942; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 943; GFX12-NEXT: v_sub_f32_e32 v0, v0, v1 944; GFX12-NEXT: v_min_num_f32_e32 v0, 0x3f7ffffe, v0 945; GFX12-NEXT: s_setpc_b64 s[30:31] 946entry: 947 %floor = tail call float @llvm.floor.f32(float %x) 948 %sub = fsub float %x, %floor 949 %min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFC0000000) 950 ret float %min 951} 952 953; Negative test 954define float @not_fract_f32_swapped_fsub(float nofpclass(nan) %x) { 955; IR-LABEL: define float @not_fract_f32_swapped_fsub 956; IR-SAME: (float nofpclass(nan) [[X:%.*]]) #[[ATTR0]] { 957; IR-NEXT: entry: 958; IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]]) 959; IR-NEXT: [[SUB:%.*]] = fsub float [[FLOOR]], [[X]] 960; IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000) 961; IR-NEXT: ret float [[MIN]] 962; 963; GFX6-LABEL: not_fract_f32_swapped_fsub: 964; GFX6: ; %bb.0: ; %entry 965; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 966; GFX6-NEXT: v_floor_f32_e32 v1, v0 967; GFX6-NEXT: v_sub_f32_e32 v0, v1, v0 968; GFX6-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0 969; GFX6-NEXT: s_setpc_b64 s[30:31] 970; 971; GFX7-LABEL: not_fract_f32_swapped_fsub: 972; GFX7: ; %bb.0: ; %entry 973; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 974; GFX7-NEXT: v_floor_f32_e32 v1, v0 975; GFX7-NEXT: v_sub_f32_e32 v0, v1, v0 976; GFX7-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0 977; GFX7-NEXT: s_setpc_b64 s[30:31] 978; 979; GFX8-LABEL: not_fract_f32_swapped_fsub: 980; GFX8: ; %bb.0: ; %entry 981; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 982; GFX8-NEXT: v_floor_f32_e32 v1, v0 983; GFX8-NEXT: v_sub_f32_e32 v0, v1, v0 984; GFX8-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0 985; GFX8-NEXT: s_setpc_b64 s[30:31] 986; 987; GFX11-LABEL: not_fract_f32_swapped_fsub: 988; GFX11: ; %bb.0: ; %entry 989; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 990; GFX11-NEXT: v_floor_f32_e32 v1, v0 991; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 992; GFX11-NEXT: v_sub_f32_e32 v0, v1, v0 993; GFX11-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0 994; GFX11-NEXT: s_setpc_b64 s[30:31] 995; 996; GFX12-LABEL: not_fract_f32_swapped_fsub: 997; GFX12: ; %bb.0: ; %entry 998; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 999; GFX12-NEXT: s_wait_expcnt 0x0 1000; GFX12-NEXT: s_wait_samplecnt 0x0 1001; GFX12-NEXT: s_wait_bvhcnt 0x0 1002; GFX12-NEXT: s_wait_kmcnt 0x0 1003; GFX12-NEXT: v_floor_f32_e32 v1, v0 1004; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1005; GFX12-NEXT: v_sub_f32_e32 v0, v1, v0 1006; GFX12-NEXT: v_min_num_f32_e32 v0, 0x3f7fffff, v0 1007; GFX12-NEXT: s_setpc_b64 s[30:31] 1008entry: 1009 %floor = tail call float @llvm.floor.f32(float %x) 1010 %sub = fsub float %floor, %x 1011 %min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000) 1012 ret float %min 1013} 1014 1015; Negative test 1016define float @not_fract_f32_not_floor(float nofpclass(nan) %x) { 1017; IR-LABEL: define float @not_fract_f32_not_floor 1018; IR-SAME: (float nofpclass(nan) [[X:%.*]]) #[[ATTR0]] { 1019; IR-NEXT: entry: 1020; IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.trunc.f32(float [[X]]) 1021; IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]] 1022; IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000) 1023; IR-NEXT: ret float [[MIN]] 1024; 1025; GFX6-LABEL: not_fract_f32_not_floor: 1026; GFX6: ; %bb.0: ; %entry 1027; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1028; GFX6-NEXT: v_trunc_f32_e32 v1, v0 1029; GFX6-NEXT: v_sub_f32_e32 v0, v0, v1 1030; GFX6-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0 1031; GFX6-NEXT: s_setpc_b64 s[30:31] 1032; 1033; GFX7-LABEL: not_fract_f32_not_floor: 1034; GFX7: ; %bb.0: ; %entry 1035; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1036; GFX7-NEXT: v_trunc_f32_e32 v1, v0 1037; GFX7-NEXT: v_sub_f32_e32 v0, v0, v1 1038; GFX7-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0 1039; GFX7-NEXT: s_setpc_b64 s[30:31] 1040; 1041; GFX8-LABEL: not_fract_f32_not_floor: 1042; GFX8: ; %bb.0: ; %entry 1043; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1044; GFX8-NEXT: v_trunc_f32_e32 v1, v0 1045; GFX8-NEXT: v_sub_f32_e32 v0, v0, v1 1046; GFX8-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0 1047; GFX8-NEXT: s_setpc_b64 s[30:31] 1048; 1049; GFX11-LABEL: not_fract_f32_not_floor: 1050; GFX11: ; %bb.0: ; %entry 1051; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1052; GFX11-NEXT: v_trunc_f32_e32 v1, v0 1053; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1054; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1 1055; GFX11-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0 1056; GFX11-NEXT: s_setpc_b64 s[30:31] 1057; 1058; GFX12-LABEL: not_fract_f32_not_floor: 1059; GFX12: ; %bb.0: ; %entry 1060; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1061; GFX12-NEXT: s_wait_expcnt 0x0 1062; GFX12-NEXT: s_wait_samplecnt 0x0 1063; GFX12-NEXT: s_wait_bvhcnt 0x0 1064; GFX12-NEXT: s_wait_kmcnt 0x0 1065; GFX12-NEXT: v_trunc_f32_e32 v1, v0 1066; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1067; GFX12-NEXT: v_sub_f32_e32 v0, v0, v1 1068; GFX12-NEXT: v_min_num_f32_e32 v0, 0x3f7fffff, v0 1069; GFX12-NEXT: s_setpc_b64 s[30:31] 1070entry: 1071 %floor = tail call float @llvm.trunc.f32(float %x) 1072 %sub = fsub float %x, %floor 1073 %min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000) 1074 ret float %min 1075} 1076 1077; Negative test 1078define float @not_fract_f32_different_floor(float %x, float %y) { 1079; IR-LABEL: define float @not_fract_f32_different_floor 1080; IR-SAME: (float [[X:%.*]], float [[Y:%.*]]) #[[ATTR0]] { 1081; IR-NEXT: entry: 1082; IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[Y]]) 1083; IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]] 1084; IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000) 1085; IR-NEXT: ret float [[MIN]] 1086; 1087; GFX6-LABEL: not_fract_f32_different_floor: 1088; GFX6: ; %bb.0: ; %entry 1089; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1090; GFX6-NEXT: v_floor_f32_e32 v1, v1 1091; GFX6-NEXT: v_sub_f32_e32 v0, v0, v1 1092; GFX6-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0 1093; GFX6-NEXT: s_setpc_b64 s[30:31] 1094; 1095; GFX7-LABEL: not_fract_f32_different_floor: 1096; GFX7: ; %bb.0: ; %entry 1097; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1098; GFX7-NEXT: v_floor_f32_e32 v1, v1 1099; GFX7-NEXT: v_sub_f32_e32 v0, v0, v1 1100; GFX7-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0 1101; GFX7-NEXT: s_setpc_b64 s[30:31] 1102; 1103; GFX8-LABEL: not_fract_f32_different_floor: 1104; GFX8: ; %bb.0: ; %entry 1105; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1106; GFX8-NEXT: v_floor_f32_e32 v1, v1 1107; GFX8-NEXT: v_sub_f32_e32 v0, v0, v1 1108; GFX8-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0 1109; GFX8-NEXT: s_setpc_b64 s[30:31] 1110; 1111; GFX11-LABEL: not_fract_f32_different_floor: 1112; GFX11: ; %bb.0: ; %entry 1113; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1114; GFX11-NEXT: v_floor_f32_e32 v1, v1 1115; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1116; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1 1117; GFX11-NEXT: v_min_f32_e32 v0, 0x3f7fffff, v0 1118; GFX11-NEXT: s_setpc_b64 s[30:31] 1119; 1120; GFX12-LABEL: not_fract_f32_different_floor: 1121; GFX12: ; %bb.0: ; %entry 1122; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1123; GFX12-NEXT: s_wait_expcnt 0x0 1124; GFX12-NEXT: s_wait_samplecnt 0x0 1125; GFX12-NEXT: s_wait_bvhcnt 0x0 1126; GFX12-NEXT: s_wait_kmcnt 0x0 1127; GFX12-NEXT: v_floor_f32_e32 v1, v1 1128; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1129; GFX12-NEXT: v_sub_f32_e32 v0, v0, v1 1130; GFX12-NEXT: v_min_num_f32_e32 v0, 0x3f7fffff, v0 1131; GFX12-NEXT: s_setpc_b64 s[30:31] 1132entry: 1133 %floor = tail call float @llvm.floor.f32(float %y) 1134 %sub = fsub float %x, %floor 1135 %min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000) 1136 ret float %min 1137} 1138 1139; Negative test 1140define float @not_fract_f32_maxnum(float nofpclass(nan) %x) { 1141; IR-LABEL: define float @not_fract_f32_maxnum 1142; IR-SAME: (float nofpclass(nan) [[X:%.*]]) #[[ATTR0]] { 1143; IR-NEXT: entry: 1144; IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]]) 1145; IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]] 1146; IR-NEXT: [[MIN:%.*]] = tail call float @llvm.maxnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000) 1147; IR-NEXT: ret float [[MIN]] 1148; 1149; GFX6-LABEL: not_fract_f32_maxnum: 1150; GFX6: ; %bb.0: ; %entry 1151; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1152; GFX6-NEXT: v_floor_f32_e32 v1, v0 1153; GFX6-NEXT: v_sub_f32_e32 v0, v0, v1 1154; GFX6-NEXT: v_max_f32_e32 v0, 0x3f7fffff, v0 1155; GFX6-NEXT: s_setpc_b64 s[30:31] 1156; 1157; GFX7-LABEL: not_fract_f32_maxnum: 1158; GFX7: ; %bb.0: ; %entry 1159; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1160; GFX7-NEXT: v_floor_f32_e32 v1, v0 1161; GFX7-NEXT: v_sub_f32_e32 v0, v0, v1 1162; GFX7-NEXT: v_max_f32_e32 v0, 0x3f7fffff, v0 1163; GFX7-NEXT: s_setpc_b64 s[30:31] 1164; 1165; GFX8-LABEL: not_fract_f32_maxnum: 1166; GFX8: ; %bb.0: ; %entry 1167; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1168; GFX8-NEXT: v_floor_f32_e32 v1, v0 1169; GFX8-NEXT: v_sub_f32_e32 v0, v0, v1 1170; GFX8-NEXT: v_max_f32_e32 v0, 0x3f7fffff, v0 1171; GFX8-NEXT: s_setpc_b64 s[30:31] 1172; 1173; GFX11-LABEL: not_fract_f32_maxnum: 1174; GFX11: ; %bb.0: ; %entry 1175; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1176; GFX11-NEXT: v_floor_f32_e32 v1, v0 1177; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1178; GFX11-NEXT: v_sub_f32_e32 v0, v0, v1 1179; GFX11-NEXT: v_max_f32_e32 v0, 0x3f7fffff, v0 1180; GFX11-NEXT: s_setpc_b64 s[30:31] 1181; 1182; GFX12-LABEL: not_fract_f32_maxnum: 1183; GFX12: ; %bb.0: ; %entry 1184; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1185; GFX12-NEXT: s_wait_expcnt 0x0 1186; GFX12-NEXT: s_wait_samplecnt 0x0 1187; GFX12-NEXT: s_wait_bvhcnt 0x0 1188; GFX12-NEXT: s_wait_kmcnt 0x0 1189; GFX12-NEXT: v_floor_f32_e32 v1, v0 1190; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1191; GFX12-NEXT: v_sub_f32_e32 v0, v0, v1 1192; GFX12-NEXT: v_max_num_f32_e32 v0, 0x3f7fffff, v0 1193; GFX12-NEXT: s_setpc_b64 s[30:31] 1194entry: 1195 %floor = tail call float @llvm.floor.f32(float %x) 1196 %sub = fsub float %x, %floor 1197 %min = tail call float @llvm.maxnum.f32(float %sub, float 0x3FEFFFFFE0000000) 1198 ret float %min 1199} 1200 1201define float @fcmp_uno_check_is_nan_f32(float %x) { 1202; IR-LABEL: define float @fcmp_uno_check_is_nan_f32 1203; IR-SAME: (float [[X:%.*]]) #[[ATTR0]] { 1204; IR-NEXT: entry: 1205; IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]]) 1206; IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]] 1207; IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000) 1208; IR-NEXT: [[UNO:%.*]] = fcmp uno float [[X]], 0x7FF8000000000000 1209; IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], float [[X]], float [[MIN]] 1210; IR-NEXT: ret float [[COND]] 1211; 1212; GCN-LABEL: fcmp_uno_check_is_nan_f32: 1213; GCN: ; %bb.0: ; %entry 1214; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1215; GCN-NEXT: s_setpc_b64 s[30:31] 1216; 1217; GFX12-LABEL: fcmp_uno_check_is_nan_f32: 1218; GFX12: ; %bb.0: ; %entry 1219; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1220; GFX12-NEXT: s_wait_expcnt 0x0 1221; GFX12-NEXT: s_wait_samplecnt 0x0 1222; GFX12-NEXT: s_wait_bvhcnt 0x0 1223; GFX12-NEXT: s_wait_kmcnt 0x0 1224; GFX12-NEXT: s_setpc_b64 s[30:31] 1225entry: 1226 %floor = tail call float @llvm.floor.f32(float %x) 1227 %sub = fsub float %x, %floor 1228 %min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000) 1229 %uno = fcmp uno float %x, 0x7FF8000000000000 1230 %cond = select i1 %uno, float %x, float %min 1231 ret float %cond 1232} 1233 1234; No inf check 1235define float @select_nan_fract_f32(float %x) { 1236; GFX6-IR-LABEL: define float @select_nan_fract_f32 1237; GFX6-IR-SAME: (float [[X:%.*]]) #[[ATTR0]] { 1238; GFX6-IR-NEXT: entry: 1239; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]]) 1240; GFX6-IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]] 1241; GFX6-IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000) 1242; GFX6-IR-NEXT: [[UNO:%.*]] = fcmp uno float [[X]], 0.000000e+00 1243; GFX6-IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], float [[X]], float [[MIN]] 1244; GFX6-IR-NEXT: ret float [[COND]] 1245; 1246; IR-FRACT-LABEL: define float @select_nan_fract_f32 1247; IR-FRACT-SAME: (float [[X:%.*]]) #[[ATTR0]] { 1248; IR-FRACT-NEXT: entry: 1249; IR-FRACT-NEXT: [[COND:%.*]] = call float @llvm.amdgcn.fract.f32(float [[X]]) 1250; IR-FRACT-NEXT: ret float [[COND]] 1251; 1252; GFX6-LABEL: select_nan_fract_f32: 1253; GFX6: ; %bb.0: ; %entry 1254; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1255; GFX6-NEXT: v_floor_f32_e32 v1, v0 1256; GFX6-NEXT: v_sub_f32_e32 v1, v0, v1 1257; GFX6-NEXT: v_min_f32_e32 v1, 0x3f7fffff, v1 1258; GFX6-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 1259; GFX6-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 1260; GFX6-NEXT: s_setpc_b64 s[30:31] 1261; 1262; GFX7-LABEL: select_nan_fract_f32: 1263; GFX7: ; %bb.0: ; %entry 1264; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1265; GFX7-NEXT: v_fract_f32_e32 v0, v0 1266; GFX7-NEXT: s_setpc_b64 s[30:31] 1267; 1268; GFX8-LABEL: select_nan_fract_f32: 1269; GFX8: ; %bb.0: ; %entry 1270; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1271; GFX8-NEXT: v_fract_f32_e32 v0, v0 1272; GFX8-NEXT: s_setpc_b64 s[30:31] 1273; 1274; GFX11-LABEL: select_nan_fract_f32: 1275; GFX11: ; %bb.0: ; %entry 1276; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1277; GFX11-NEXT: v_fract_f32_e32 v0, v0 1278; GFX11-NEXT: s_setpc_b64 s[30:31] 1279; 1280; GFX12-LABEL: select_nan_fract_f32: 1281; GFX12: ; %bb.0: ; %entry 1282; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1283; GFX12-NEXT: s_wait_expcnt 0x0 1284; GFX12-NEXT: s_wait_samplecnt 0x0 1285; GFX12-NEXT: s_wait_bvhcnt 0x0 1286; GFX12-NEXT: s_wait_kmcnt 0x0 1287; GFX12-NEXT: v_fract_f32_e32 v0, v0 1288; GFX12-NEXT: s_setpc_b64 s[30:31] 1289entry: 1290 %floor = tail call float @llvm.floor.f32(float %x) 1291 %sub = fsub float %x, %floor 1292 %min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000) 1293 %uno = fcmp uno float %x, 0.000000e+00 1294 %cond = select i1 %uno, float %x, float %min 1295 ret float %cond 1296} 1297 1298define float @commuted_select_nan_fract_f32(float %x) { 1299; GFX6-IR-LABEL: define float @commuted_select_nan_fract_f32 1300; GFX6-IR-SAME: (float [[X:%.*]]) #[[ATTR0]] { 1301; GFX6-IR-NEXT: entry: 1302; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]]) 1303; GFX6-IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]] 1304; GFX6-IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000) 1305; GFX6-IR-NEXT: [[UNO:%.*]] = fcmp ord float [[X]], 0.000000e+00 1306; GFX6-IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], float [[MIN]], float [[X]] 1307; GFX6-IR-NEXT: ret float [[COND]] 1308; 1309; IR-FRACT-LABEL: define float @commuted_select_nan_fract_f32 1310; IR-FRACT-SAME: (float [[X:%.*]]) #[[ATTR0]] { 1311; IR-FRACT-NEXT: entry: 1312; IR-FRACT-NEXT: [[COND:%.*]] = call float @llvm.amdgcn.fract.f32(float [[X]]) 1313; IR-FRACT-NEXT: ret float [[COND]] 1314; 1315; GFX6-LABEL: commuted_select_nan_fract_f32: 1316; GFX6: ; %bb.0: ; %entry 1317; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1318; GFX6-NEXT: v_floor_f32_e32 v1, v0 1319; GFX6-NEXT: v_sub_f32_e32 v1, v0, v1 1320; GFX6-NEXT: v_min_f32_e32 v1, 0x3f7fffff, v1 1321; GFX6-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 1322; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1323; GFX6-NEXT: s_setpc_b64 s[30:31] 1324; 1325; GFX7-LABEL: commuted_select_nan_fract_f32: 1326; GFX7: ; %bb.0: ; %entry 1327; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1328; GFX7-NEXT: v_fract_f32_e32 v0, v0 1329; GFX7-NEXT: s_setpc_b64 s[30:31] 1330; 1331; GFX8-LABEL: commuted_select_nan_fract_f32: 1332; GFX8: ; %bb.0: ; %entry 1333; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1334; GFX8-NEXT: v_fract_f32_e32 v0, v0 1335; GFX8-NEXT: s_setpc_b64 s[30:31] 1336; 1337; GFX11-LABEL: commuted_select_nan_fract_f32: 1338; GFX11: ; %bb.0: ; %entry 1339; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1340; GFX11-NEXT: v_fract_f32_e32 v0, v0 1341; GFX11-NEXT: s_setpc_b64 s[30:31] 1342; 1343; GFX12-LABEL: commuted_select_nan_fract_f32: 1344; GFX12: ; %bb.0: ; %entry 1345; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1346; GFX12-NEXT: s_wait_expcnt 0x0 1347; GFX12-NEXT: s_wait_samplecnt 0x0 1348; GFX12-NEXT: s_wait_bvhcnt 0x0 1349; GFX12-NEXT: s_wait_kmcnt 0x0 1350; GFX12-NEXT: v_fract_f32_e32 v0, v0 1351; GFX12-NEXT: s_setpc_b64 s[30:31] 1352entry: 1353 %floor = tail call float @llvm.floor.f32(float %x) 1354 %sub = fsub float %x, %floor 1355 %min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000) 1356 %uno = fcmp ord float %x, 0.000000e+00 1357 %cond = select i1 %uno, float %min, float %x 1358 ret float %cond 1359} 1360 1361define float @wrong_commuted_nan_select_f32(float %x) { 1362; IR-LABEL: define float @wrong_commuted_nan_select_f32 1363; IR-SAME: (float [[X:%.*]]) #[[ATTR0]] { 1364; IR-NEXT: entry: 1365; IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]]) 1366; IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]] 1367; IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000) 1368; IR-NEXT: [[UNO:%.*]] = fcmp uno float [[X]], 0.000000e+00 1369; IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], float [[MIN]], float [[X]] 1370; IR-NEXT: ret float [[COND]] 1371; 1372; GFX6-LABEL: wrong_commuted_nan_select_f32: 1373; GFX6: ; %bb.0: ; %entry 1374; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1375; GFX6-NEXT: v_floor_f32_e32 v1, v0 1376; GFX6-NEXT: v_sub_f32_e32 v1, v0, v1 1377; GFX6-NEXT: v_min_f32_e32 v1, 0x3f7fffff, v1 1378; GFX6-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 1379; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1380; GFX6-NEXT: s_setpc_b64 s[30:31] 1381; 1382; GFX7-LABEL: wrong_commuted_nan_select_f32: 1383; GFX7: ; %bb.0: ; %entry 1384; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1385; GFX7-NEXT: v_floor_f32_e32 v1, v0 1386; GFX7-NEXT: v_sub_f32_e32 v1, v0, v1 1387; GFX7-NEXT: v_min_f32_e32 v1, 0x3f7fffff, v1 1388; GFX7-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 1389; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1390; GFX7-NEXT: s_setpc_b64 s[30:31] 1391; 1392; GFX8-LABEL: wrong_commuted_nan_select_f32: 1393; GFX8: ; %bb.0: ; %entry 1394; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1395; GFX8-NEXT: v_floor_f32_e32 v1, v0 1396; GFX8-NEXT: v_sub_f32_e32 v1, v0, v1 1397; GFX8-NEXT: v_min_f32_e32 v1, 0x3f7fffff, v1 1398; GFX8-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 1399; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1400; GFX8-NEXT: s_setpc_b64 s[30:31] 1401; 1402; GFX11-LABEL: wrong_commuted_nan_select_f32: 1403; GFX11: ; %bb.0: ; %entry 1404; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1405; GFX11-NEXT: v_floor_f32_e32 v1, v0 1406; GFX11-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 1407; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 1408; GFX11-NEXT: v_sub_f32_e32 v1, v0, v1 1409; GFX11-NEXT: v_min_f32_e32 v1, 0x3f7fffff, v1 1410; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1411; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 1412; GFX11-NEXT: s_setpc_b64 s[30:31] 1413; 1414; GFX12-LABEL: wrong_commuted_nan_select_f32: 1415; GFX12: ; %bb.0: ; %entry 1416; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1417; GFX12-NEXT: s_wait_expcnt 0x0 1418; GFX12-NEXT: s_wait_samplecnt 0x0 1419; GFX12-NEXT: s_wait_bvhcnt 0x0 1420; GFX12-NEXT: s_wait_kmcnt 0x0 1421; GFX12-NEXT: v_floor_f32_e32 v1, v0 1422; GFX12-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v0 1423; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 1424; GFX12-NEXT: v_sub_f32_e32 v1, v0, v1 1425; GFX12-NEXT: v_min_num_f32_e32 v1, 0x3f7fffff, v1 1426; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 1427; GFX12-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 1428; GFX12-NEXT: s_setpc_b64 s[30:31] 1429entry: 1430 %floor = tail call float @llvm.floor.f32(float %x) 1431 %sub = fsub float %x, %floor 1432 %min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000) 1433 %uno = fcmp uno float %x, 0.000000e+00 1434 %cond = select i1 %uno, float %min, float %x 1435 ret float %cond 1436} 1437 1438define half @basic_fract_f16_nonan(half nofpclass(nan) %x) { 1439; GFX6-IR-LABEL: define half @basic_fract_f16_nonan 1440; GFX6-IR-SAME: (half nofpclass(nan) [[X:%.*]]) #[[ATTR0]] { 1441; GFX6-IR-NEXT: entry: 1442; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call half @llvm.floor.f16(half [[X]]) 1443; GFX6-IR-NEXT: [[SUB:%.*]] = fsub half [[X]], [[FLOOR]] 1444; GFX6-IR-NEXT: [[MIN:%.*]] = tail call half @llvm.minnum.f16(half [[SUB]], half 0xH3BFF) 1445; GFX6-IR-NEXT: ret half [[MIN]] 1446; 1447; GFX7-IR-LABEL: define half @basic_fract_f16_nonan 1448; GFX7-IR-SAME: (half nofpclass(nan) [[X:%.*]]) #[[ATTR0]] { 1449; GFX7-IR-NEXT: entry: 1450; GFX7-IR-NEXT: [[FLOOR:%.*]] = tail call half @llvm.floor.f16(half [[X]]) 1451; GFX7-IR-NEXT: [[SUB:%.*]] = fsub half [[X]], [[FLOOR]] 1452; GFX7-IR-NEXT: [[MIN:%.*]] = tail call half @llvm.minnum.f16(half [[SUB]], half 0xH3BFF) 1453; GFX7-IR-NEXT: ret half [[MIN]] 1454; 1455; IR-LEGALF16-LABEL: define half @basic_fract_f16_nonan 1456; IR-LEGALF16-SAME: (half nofpclass(nan) [[X:%.*]]) #[[ATTR0]] { 1457; IR-LEGALF16-NEXT: entry: 1458; IR-LEGALF16-NEXT: [[MIN:%.*]] = call nnan half @llvm.amdgcn.fract.f16(half [[X]]) 1459; IR-LEGALF16-NEXT: ret half [[MIN]] 1460; 1461; GFX6-LABEL: basic_fract_f16_nonan: 1462; GFX6: ; %bb.0: ; %entry 1463; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1464; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 1465; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 1466; GFX6-NEXT: v_floor_f32_e32 v1, v0 1467; GFX6-NEXT: v_sub_f32_e32 v0, v0, v1 1468; GFX6-NEXT: v_min_f32_e32 v0, 0x3f7fe000, v0 1469; GFX6-NEXT: s_setpc_b64 s[30:31] 1470; 1471; GFX7-LABEL: basic_fract_f16_nonan: 1472; GFX7: ; %bb.0: ; %entry 1473; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1474; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 1475; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 1476; GFX7-NEXT: v_floor_f32_e32 v1, v0 1477; GFX7-NEXT: v_sub_f32_e32 v0, v0, v1 1478; GFX7-NEXT: v_min_f32_e32 v0, 0x3f7fe000, v0 1479; GFX7-NEXT: s_setpc_b64 s[30:31] 1480; 1481; GFX8-LABEL: basic_fract_f16_nonan: 1482; GFX8: ; %bb.0: ; %entry 1483; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1484; GFX8-NEXT: v_fract_f16_e32 v0, v0 1485; GFX8-NEXT: s_setpc_b64 s[30:31] 1486; 1487; GFX11-LABEL: basic_fract_f16_nonan: 1488; GFX11: ; %bb.0: ; %entry 1489; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1490; GFX11-NEXT: v_fract_f16_e32 v0, v0 1491; GFX11-NEXT: s_setpc_b64 s[30:31] 1492; 1493; GFX12-LABEL: basic_fract_f16_nonan: 1494; GFX12: ; %bb.0: ; %entry 1495; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1496; GFX12-NEXT: s_wait_expcnt 0x0 1497; GFX12-NEXT: s_wait_samplecnt 0x0 1498; GFX12-NEXT: s_wait_bvhcnt 0x0 1499; GFX12-NEXT: s_wait_kmcnt 0x0 1500; GFX12-NEXT: v_fract_f16_e32 v0, v0 1501; GFX12-NEXT: s_setpc_b64 s[30:31] 1502entry: 1503 %floor = tail call half @llvm.floor.f16(half %x) 1504 %sub = fsub half %x, %floor 1505 %min = tail call half @llvm.minnum.f16(half %sub, half 0xH3BFF) 1506 ret half %min 1507} 1508 1509define <2 x half> @basic_fract_v2f16_nonan(<2 x half> nofpclass(nan) %x) { 1510; GFX6-IR-LABEL: define <2 x half> @basic_fract_v2f16_nonan 1511; GFX6-IR-SAME: (<2 x half> nofpclass(nan) [[X:%.*]]) #[[ATTR0]] { 1512; GFX6-IR-NEXT: entry: 1513; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call <2 x half> @llvm.floor.v2f16(<2 x half> [[X]]) 1514; GFX6-IR-NEXT: [[SUB:%.*]] = fsub <2 x half> [[X]], [[FLOOR]] 1515; GFX6-IR-NEXT: [[MIN:%.*]] = tail call <2 x half> @llvm.minnum.v2f16(<2 x half> [[SUB]], <2 x half> splat (half 0xH3BFF)) 1516; GFX6-IR-NEXT: ret <2 x half> [[MIN]] 1517; 1518; GFX7-IR-LABEL: define <2 x half> @basic_fract_v2f16_nonan 1519; GFX7-IR-SAME: (<2 x half> nofpclass(nan) [[X:%.*]]) #[[ATTR0]] { 1520; GFX7-IR-NEXT: entry: 1521; GFX7-IR-NEXT: [[FLOOR:%.*]] = tail call <2 x half> @llvm.floor.v2f16(<2 x half> [[X]]) 1522; GFX7-IR-NEXT: [[SUB:%.*]] = fsub <2 x half> [[X]], [[FLOOR]] 1523; GFX7-IR-NEXT: [[MIN:%.*]] = tail call <2 x half> @llvm.minnum.v2f16(<2 x half> [[SUB]], <2 x half> splat (half 0xH3BFF)) 1524; GFX7-IR-NEXT: ret <2 x half> [[MIN]] 1525; 1526; IR-LEGALF16-LABEL: define <2 x half> @basic_fract_v2f16_nonan 1527; IR-LEGALF16-SAME: (<2 x half> nofpclass(nan) [[X:%.*]]) #[[ATTR0]] { 1528; IR-LEGALF16-NEXT: entry: 1529; IR-LEGALF16-NEXT: [[TMP0:%.*]] = extractelement <2 x half> [[X]], i64 0 1530; IR-LEGALF16-NEXT: [[TMP1:%.*]] = extractelement <2 x half> [[X]], i64 1 1531; IR-LEGALF16-NEXT: [[TMP2:%.*]] = call nnan half @llvm.amdgcn.fract.f16(half [[TMP0]]) 1532; IR-LEGALF16-NEXT: [[TMP3:%.*]] = call nnan half @llvm.amdgcn.fract.f16(half [[TMP1]]) 1533; IR-LEGALF16-NEXT: [[TMP4:%.*]] = insertelement <2 x half> poison, half [[TMP2]], i64 0 1534; IR-LEGALF16-NEXT: [[MIN:%.*]] = insertelement <2 x half> [[TMP4]], half [[TMP3]], i64 1 1535; IR-LEGALF16-NEXT: ret <2 x half> [[MIN]] 1536; 1537; GFX6-LABEL: basic_fract_v2f16_nonan: 1538; GFX6: ; %bb.0: ; %entry 1539; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1540; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 1541; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 1542; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 1543; GFX6-NEXT: v_cvt_f32_f16_e32 v1, v1 1544; GFX6-NEXT: v_floor_f32_e32 v2, v0 1545; GFX6-NEXT: v_floor_f32_e32 v3, v1 1546; GFX6-NEXT: v_sub_f32_e32 v1, v1, v3 1547; GFX6-NEXT: v_sub_f32_e32 v0, v0, v2 1548; GFX6-NEXT: v_min_f32_e32 v0, 0x3f7fe000, v0 1549; GFX6-NEXT: v_min_f32_e32 v1, 0x3f7fe000, v1 1550; GFX6-NEXT: s_setpc_b64 s[30:31] 1551; 1552; GFX7-LABEL: basic_fract_v2f16_nonan: 1553; GFX7: ; %bb.0: ; %entry 1554; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1555; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 1556; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 1557; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 1558; GFX7-NEXT: v_cvt_f32_f16_e32 v1, v1 1559; GFX7-NEXT: v_floor_f32_e32 v2, v0 1560; GFX7-NEXT: v_floor_f32_e32 v3, v1 1561; GFX7-NEXT: v_sub_f32_e32 v1, v1, v3 1562; GFX7-NEXT: v_sub_f32_e32 v0, v0, v2 1563; GFX7-NEXT: v_min_f32_e32 v0, 0x3f7fe000, v0 1564; GFX7-NEXT: v_min_f32_e32 v1, 0x3f7fe000, v1 1565; GFX7-NEXT: s_setpc_b64 s[30:31] 1566; 1567; GFX8-LABEL: basic_fract_v2f16_nonan: 1568; GFX8: ; %bb.0: ; %entry 1569; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1570; GFX8-NEXT: v_fract_f16_e32 v1, v0 1571; GFX8-NEXT: v_fract_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 1572; GFX8-NEXT: v_pack_b32_f16 v0, v1, v0 1573; GFX8-NEXT: s_setpc_b64 s[30:31] 1574; 1575; GFX11-LABEL: basic_fract_v2f16_nonan: 1576; GFX11: ; %bb.0: ; %entry 1577; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1578; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v0 1579; GFX11-NEXT: v_fract_f16_e32 v0, v0 1580; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 1581; GFX11-NEXT: v_fract_f16_e32 v1, v1 1582; GFX11-NEXT: v_pack_b32_f16 v0, v0, v1 1583; GFX11-NEXT: s_setpc_b64 s[30:31] 1584; 1585; GFX12-LABEL: basic_fract_v2f16_nonan: 1586; GFX12: ; %bb.0: ; %entry 1587; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1588; GFX12-NEXT: s_wait_expcnt 0x0 1589; GFX12-NEXT: s_wait_samplecnt 0x0 1590; GFX12-NEXT: s_wait_bvhcnt 0x0 1591; GFX12-NEXT: s_wait_kmcnt 0x0 1592; GFX12-NEXT: v_lshrrev_b32_e32 v1, 16, v0 1593; GFX12-NEXT: v_fract_f16_e32 v0, v0 1594; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 1595; GFX12-NEXT: v_fract_f16_e32 v1, v1 1596; GFX12-NEXT: v_pack_b32_f16 v0, v0, v1 1597; GFX12-NEXT: s_setpc_b64 s[30:31] 1598entry: 1599 %floor = tail call <2 x half> @llvm.floor.v2f16(<2 x half> %x) 1600 %sub = fsub <2 x half> %x, %floor 1601 %min = tail call <2 x half> @llvm.minnum.v2f16(<2 x half> %sub, <2 x half> <half 0xH3BFF, half 0xH3BFF>) 1602 ret <2 x half> %min 1603} 1604 1605define double @basic_fract_f64_nanans(double nofpclass(nan) %x) { 1606; GFX6-IR-LABEL: define double @basic_fract_f64_nanans 1607; GFX6-IR-SAME: (double nofpclass(nan) [[X:%.*]]) #[[ATTR0]] { 1608; GFX6-IR-NEXT: entry: 1609; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call double @llvm.floor.f64(double [[X]]) 1610; GFX6-IR-NEXT: [[SUB:%.*]] = fsub double [[X]], [[FLOOR]] 1611; GFX6-IR-NEXT: [[MIN:%.*]] = tail call double @llvm.minnum.f64(double [[SUB]], double 0x3FEFFFFFFFFFFFFF) 1612; GFX6-IR-NEXT: ret double [[MIN]] 1613; 1614; IR-FRACT-LABEL: define double @basic_fract_f64_nanans 1615; IR-FRACT-SAME: (double nofpclass(nan) [[X:%.*]]) #[[ATTR0]] { 1616; IR-FRACT-NEXT: entry: 1617; IR-FRACT-NEXT: [[MIN:%.*]] = call nnan double @llvm.amdgcn.fract.f64(double [[X]]) 1618; IR-FRACT-NEXT: ret double [[MIN]] 1619; 1620; GFX6-LABEL: basic_fract_f64_nanans: 1621; GFX6: ; %bb.0: ; %entry 1622; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1623; GFX6-NEXT: v_fract_f64_e32 v[2:3], v[0:1] 1624; GFX6-NEXT: v_mov_b32_e32 v4, -1 1625; GFX6-NEXT: v_mov_b32_e32 v5, 0x3fefffff 1626; GFX6-NEXT: v_min_f64 v[2:3], v[2:3], v[4:5] 1627; GFX6-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 3 1628; GFX6-NEXT: s_mov_b32 s4, -1 1629; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v0, vcc 1630; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v1, vcc 1631; GFX6-NEXT: v_add_f64 v[2:3], v[0:1], -v[2:3] 1632; GFX6-NEXT: s_mov_b32 s5, 0x3fefffff 1633; GFX6-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3] 1634; GFX6-NEXT: v_min_f64 v[0:1], v[0:1], s[4:5] 1635; GFX6-NEXT: s_setpc_b64 s[30:31] 1636; 1637; GFX7-LABEL: basic_fract_f64_nanans: 1638; GFX7: ; %bb.0: ; %entry 1639; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1640; GFX7-NEXT: v_fract_f64_e32 v[0:1], v[0:1] 1641; GFX7-NEXT: s_setpc_b64 s[30:31] 1642; 1643; GFX8-LABEL: basic_fract_f64_nanans: 1644; GFX8: ; %bb.0: ; %entry 1645; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1646; GFX8-NEXT: v_fract_f64_e32 v[0:1], v[0:1] 1647; GFX8-NEXT: s_setpc_b64 s[30:31] 1648; 1649; GFX11-LABEL: basic_fract_f64_nanans: 1650; GFX11: ; %bb.0: ; %entry 1651; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1652; GFX11-NEXT: v_fract_f64_e32 v[0:1], v[0:1] 1653; GFX11-NEXT: s_setpc_b64 s[30:31] 1654; 1655; GFX12-LABEL: basic_fract_f64_nanans: 1656; GFX12: ; %bb.0: ; %entry 1657; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1658; GFX12-NEXT: s_wait_expcnt 0x0 1659; GFX12-NEXT: s_wait_samplecnt 0x0 1660; GFX12-NEXT: s_wait_bvhcnt 0x0 1661; GFX12-NEXT: s_wait_kmcnt 0x0 1662; GFX12-NEXT: v_fract_f64_e32 v[0:1], v[0:1] 1663; GFX12-NEXT: s_setpc_b64 s[30:31] 1664entry: 1665 %floor = tail call double @llvm.floor.f64(double %x) 1666 %sub = fsub double %x, %floor 1667 %min = tail call double @llvm.minnum.f64(double %sub, double 0x3FEFFFFFFFFFFFFF) 1668 ret double %min 1669} 1670 1671define half @safe_math_fract_f16_noinf_check(half %x, ptr addrspace(1) writeonly captures(none) %ip) { 1672; GFX6-IR-LABEL: define half @safe_math_fract_f16_noinf_check 1673; GFX6-IR-SAME: (half [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] { 1674; GFX6-IR-NEXT: entry: 1675; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call half @llvm.floor.f16(half [[X]]) 1676; GFX6-IR-NEXT: [[SUB:%.*]] = fsub half [[X]], [[FLOOR]] 1677; GFX6-IR-NEXT: [[MIN:%.*]] = tail call half @llvm.minnum.f16(half [[SUB]], half 0xH3BFF) 1678; GFX6-IR-NEXT: [[UNO:%.*]] = fcmp uno half [[X]], 0xH0000 1679; GFX6-IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], half [[X]], half [[MIN]] 1680; GFX6-IR-NEXT: store half [[FLOOR]], ptr addrspace(1) [[IP]], align 4 1681; GFX6-IR-NEXT: ret half [[COND]] 1682; 1683; GFX7-IR-LABEL: define half @safe_math_fract_f16_noinf_check 1684; GFX7-IR-SAME: (half [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] { 1685; GFX7-IR-NEXT: entry: 1686; GFX7-IR-NEXT: [[FLOOR:%.*]] = tail call half @llvm.floor.f16(half [[X]]) 1687; GFX7-IR-NEXT: [[SUB:%.*]] = fsub half [[X]], [[FLOOR]] 1688; GFX7-IR-NEXT: [[MIN:%.*]] = tail call half @llvm.minnum.f16(half [[SUB]], half 0xH3BFF) 1689; GFX7-IR-NEXT: [[UNO:%.*]] = fcmp uno half [[X]], 0xH0000 1690; GFX7-IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], half [[X]], half [[MIN]] 1691; GFX7-IR-NEXT: store half [[FLOOR]], ptr addrspace(1) [[IP]], align 4 1692; GFX7-IR-NEXT: ret half [[COND]] 1693; 1694; IR-LEGALF16-LABEL: define half @safe_math_fract_f16_noinf_check 1695; IR-LEGALF16-SAME: (half [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] { 1696; IR-LEGALF16-NEXT: entry: 1697; IR-LEGALF16-NEXT: [[FLOOR:%.*]] = tail call half @llvm.floor.f16(half [[X]]) 1698; IR-LEGALF16-NEXT: [[COND:%.*]] = call half @llvm.amdgcn.fract.f16(half [[X]]) 1699; IR-LEGALF16-NEXT: store half [[FLOOR]], ptr addrspace(1) [[IP]], align 4 1700; IR-LEGALF16-NEXT: ret half [[COND]] 1701; 1702; GFX6-LABEL: safe_math_fract_f16_noinf_check: 1703; GFX6: ; %bb.0: ; %entry 1704; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1705; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 1706; GFX6-NEXT: s_mov_b32 s6, 0 1707; GFX6-NEXT: s_mov_b32 s7, 0xf000 1708; GFX6-NEXT: s_mov_b32 s4, s6 1709; GFX6-NEXT: v_cvt_f32_f16_e32 v0, v0 1710; GFX6-NEXT: s_mov_b32 s5, s6 1711; GFX6-NEXT: v_floor_f32_e32 v3, v0 1712; GFX6-NEXT: v_sub_f32_e32 v4, v0, v3 1713; GFX6-NEXT: v_cvt_f16_f32_e32 v3, v3 1714; GFX6-NEXT: v_min_f32_e32 v4, 0x3f7fe000, v4 1715; GFX6-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 1716; GFX6-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc 1717; GFX6-NEXT: buffer_store_short v3, v[1:2], s[4:7], 0 addr64 1718; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1719; GFX6-NEXT: s_setpc_b64 s[30:31] 1720; 1721; GFX7-LABEL: safe_math_fract_f16_noinf_check: 1722; GFX7: ; %bb.0: ; %entry 1723; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1724; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 1725; GFX7-NEXT: s_mov_b32 s6, 0 1726; GFX7-NEXT: s_mov_b32 s7, 0xf000 1727; GFX7-NEXT: s_mov_b32 s4, s6 1728; GFX7-NEXT: v_cvt_f32_f16_e32 v0, v0 1729; GFX7-NEXT: s_mov_b32 s5, s6 1730; GFX7-NEXT: v_floor_f32_e32 v3, v0 1731; GFX7-NEXT: v_sub_f32_e32 v4, v0, v3 1732; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 1733; GFX7-NEXT: v_min_f32_e32 v4, 0x3f7fe000, v4 1734; GFX7-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 1735; GFX7-NEXT: v_cndmask_b32_e32 v0, v4, v0, vcc 1736; GFX7-NEXT: buffer_store_short v3, v[1:2], s[4:7], 0 addr64 1737; GFX7-NEXT: s_waitcnt vmcnt(0) 1738; GFX7-NEXT: s_setpc_b64 s[30:31] 1739; 1740; GFX8-LABEL: safe_math_fract_f16_noinf_check: 1741; GFX8: ; %bb.0: ; %entry 1742; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1743; GFX8-NEXT: v_floor_f16_e32 v3, v0 1744; GFX8-NEXT: v_fract_f16_e32 v0, v0 1745; GFX8-NEXT: global_store_short v[1:2], v3, off 1746; GFX8-NEXT: s_waitcnt vmcnt(0) 1747; GFX8-NEXT: s_setpc_b64 s[30:31] 1748; 1749; GFX11-LABEL: safe_math_fract_f16_noinf_check: 1750; GFX11: ; %bb.0: ; %entry 1751; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1752; GFX11-NEXT: v_floor_f16_e32 v3, v0 1753; GFX11-NEXT: v_fract_f16_e32 v0, v0 1754; GFX11-NEXT: global_store_b16 v[1:2], v3, off 1755; GFX11-NEXT: s_setpc_b64 s[30:31] 1756; 1757; GFX12-LABEL: safe_math_fract_f16_noinf_check: 1758; GFX12: ; %bb.0: ; %entry 1759; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1760; GFX12-NEXT: s_wait_expcnt 0x0 1761; GFX12-NEXT: s_wait_samplecnt 0x0 1762; GFX12-NEXT: s_wait_bvhcnt 0x0 1763; GFX12-NEXT: s_wait_kmcnt 0x0 1764; GFX12-NEXT: v_floor_f16_e32 v3, v0 1765; GFX12-NEXT: v_fract_f16_e32 v0, v0 1766; GFX12-NEXT: global_store_b16 v[1:2], v3, off 1767; GFX12-NEXT: s_setpc_b64 s[30:31] 1768entry: 1769 %floor = tail call half @llvm.floor.f16(half %x) 1770 %sub = fsub half %x, %floor 1771 %min = tail call half @llvm.minnum.f16(half %sub, half 0xH3BFF) 1772 %uno = fcmp uno half %x, 0.000000e+00 1773 %cond = select i1 %uno, half %x, half %min 1774 store half %floor, ptr addrspace(1) %ip, align 4 1775 ret half %cond 1776} 1777 1778define double @safe_math_fract_f64_noinf_check(double %x, ptr addrspace(1) writeonly captures(none) %ip) { 1779; GFX6-IR-LABEL: define double @safe_math_fract_f64_noinf_check 1780; GFX6-IR-SAME: (double [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] { 1781; GFX6-IR-NEXT: entry: 1782; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call double @llvm.floor.f64(double [[X]]) 1783; GFX6-IR-NEXT: [[SUB:%.*]] = fsub double [[X]], [[FLOOR]] 1784; GFX6-IR-NEXT: [[MIN:%.*]] = tail call double @llvm.minnum.f64(double [[SUB]], double 0x3FEFFFFFFFFFFFFF) 1785; GFX6-IR-NEXT: [[UNO:%.*]] = fcmp uno double [[X]], 0.000000e+00 1786; GFX6-IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], double [[X]], double [[MIN]] 1787; GFX6-IR-NEXT: store double [[FLOOR]], ptr addrspace(1) [[IP]], align 4 1788; GFX6-IR-NEXT: ret double [[COND]] 1789; 1790; IR-FRACT-LABEL: define double @safe_math_fract_f64_noinf_check 1791; IR-FRACT-SAME: (double [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] { 1792; IR-FRACT-NEXT: entry: 1793; IR-FRACT-NEXT: [[FLOOR:%.*]] = tail call double @llvm.floor.f64(double [[X]]) 1794; IR-FRACT-NEXT: [[COND:%.*]] = call double @llvm.amdgcn.fract.f64(double [[X]]) 1795; IR-FRACT-NEXT: store double [[FLOOR]], ptr addrspace(1) [[IP]], align 4 1796; IR-FRACT-NEXT: ret double [[COND]] 1797; 1798; GFX6-LABEL: safe_math_fract_f64_noinf_check: 1799; GFX6: ; %bb.0: ; %entry 1800; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1801; GFX6-NEXT: v_fract_f64_e32 v[4:5], v[0:1] 1802; GFX6-NEXT: v_mov_b32_e32 v6, -1 1803; GFX6-NEXT: v_mov_b32_e32 v7, 0x3fefffff 1804; GFX6-NEXT: v_min_f64 v[4:5], v[4:5], v[6:7] 1805; GFX6-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 3 1806; GFX6-NEXT: s_mov_b32 s8, -1 1807; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc 1808; GFX6-NEXT: v_cndmask_b32_e32 v5, v5, v1, vcc 1809; GFX6-NEXT: v_add_f64 v[4:5], v[0:1], -v[4:5] 1810; GFX6-NEXT: s_mov_b32 s9, 0x3fefffff 1811; GFX6-NEXT: v_add_f64 v[6:7], v[0:1], -v[4:5] 1812; GFX6-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1] 1813; GFX6-NEXT: v_min_f64 v[6:7], v[6:7], s[8:9] 1814; GFX6-NEXT: s_mov_b32 s6, 0 1815; GFX6-NEXT: s_mov_b32 s7, 0xf000 1816; GFX6-NEXT: s_mov_b32 s4, s6 1817; GFX6-NEXT: s_mov_b32 s5, s6 1818; GFX6-NEXT: v_cndmask_b32_e32 v0, v6, v0, vcc 1819; GFX6-NEXT: v_cndmask_b32_e32 v1, v7, v1, vcc 1820; GFX6-NEXT: buffer_store_dwordx2 v[4:5], v[2:3], s[4:7], 0 addr64 1821; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 1822; GFX6-NEXT: s_setpc_b64 s[30:31] 1823; 1824; GFX7-LABEL: safe_math_fract_f64_noinf_check: 1825; GFX7: ; %bb.0: ; %entry 1826; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1827; GFX7-NEXT: v_floor_f64_e32 v[4:5], v[0:1] 1828; GFX7-NEXT: v_fract_f64_e32 v[0:1], v[0:1] 1829; GFX7-NEXT: s_mov_b32 s6, 0 1830; GFX7-NEXT: s_mov_b32 s7, 0xf000 1831; GFX7-NEXT: s_mov_b32 s4, s6 1832; GFX7-NEXT: s_mov_b32 s5, s6 1833; GFX7-NEXT: buffer_store_dwordx2 v[4:5], v[2:3], s[4:7], 0 addr64 1834; GFX7-NEXT: s_waitcnt vmcnt(0) 1835; GFX7-NEXT: s_setpc_b64 s[30:31] 1836; 1837; GFX8-LABEL: safe_math_fract_f64_noinf_check: 1838; GFX8: ; %bb.0: ; %entry 1839; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1840; GFX8-NEXT: v_floor_f64_e32 v[4:5], v[0:1] 1841; GFX8-NEXT: v_fract_f64_e32 v[0:1], v[0:1] 1842; GFX8-NEXT: global_store_dwordx2 v[2:3], v[4:5], off 1843; GFX8-NEXT: s_waitcnt vmcnt(0) 1844; GFX8-NEXT: s_setpc_b64 s[30:31] 1845; 1846; GFX11-LABEL: safe_math_fract_f64_noinf_check: 1847; GFX11: ; %bb.0: ; %entry 1848; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1849; GFX11-NEXT: v_floor_f64_e32 v[4:5], v[0:1] 1850; GFX11-NEXT: v_fract_f64_e32 v[0:1], v[0:1] 1851; GFX11-NEXT: global_store_b64 v[2:3], v[4:5], off 1852; GFX11-NEXT: s_setpc_b64 s[30:31] 1853; 1854; GFX12-LABEL: safe_math_fract_f64_noinf_check: 1855; GFX12: ; %bb.0: ; %entry 1856; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1857; GFX12-NEXT: s_wait_expcnt 0x0 1858; GFX12-NEXT: s_wait_samplecnt 0x0 1859; GFX12-NEXT: s_wait_bvhcnt 0x0 1860; GFX12-NEXT: s_wait_kmcnt 0x0 1861; GFX12-NEXT: v_floor_f64_e32 v[4:5], v[0:1] 1862; GFX12-NEXT: v_fract_f64_e32 v[0:1], v[0:1] 1863; GFX12-NEXT: global_store_b64 v[2:3], v[4:5], off 1864; GFX12-NEXT: s_setpc_b64 s[30:31] 1865entry: 1866 %floor = tail call double @llvm.floor.f64(double %x) 1867 %sub = fsub double %x, %floor 1868 %min = tail call double @llvm.minnum.f64(double %sub, double 0x3FEFFFFFFFFFFFFF) 1869 %uno = fcmp uno double %x, 0.000000e+00 1870 %cond = select i1 %uno, double %x, double %min 1871 store double %floor, ptr addrspace(1) %ip, align 4 1872 ret double %cond 1873} 1874 1875define float @select_nan_fract_f32_flags_select(float %x) { 1876; GFX6-IR-LABEL: define float @select_nan_fract_f32_flags_select 1877; GFX6-IR-SAME: (float [[X:%.*]]) #[[ATTR0]] { 1878; GFX6-IR-NEXT: entry: 1879; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]]) 1880; GFX6-IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]] 1881; GFX6-IR-NEXT: [[MIN:%.*]] = tail call float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000) 1882; GFX6-IR-NEXT: [[UNO:%.*]] = fcmp uno float [[X]], 0.000000e+00 1883; GFX6-IR-NEXT: [[COND:%.*]] = select nsz i1 [[UNO]], float [[X]], float [[MIN]] 1884; GFX6-IR-NEXT: ret float [[COND]] 1885; 1886; IR-FRACT-LABEL: define float @select_nan_fract_f32_flags_select 1887; IR-FRACT-SAME: (float [[X:%.*]]) #[[ATTR0]] { 1888; IR-FRACT-NEXT: entry: 1889; IR-FRACT-NEXT: [[COND:%.*]] = call nsz float @llvm.amdgcn.fract.f32(float [[X]]) 1890; IR-FRACT-NEXT: ret float [[COND]] 1891; 1892; GFX6-LABEL: select_nan_fract_f32_flags_select: 1893; GFX6: ; %bb.0: ; %entry 1894; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1895; GFX6-NEXT: v_floor_f32_e32 v1, v0 1896; GFX6-NEXT: v_sub_f32_e32 v1, v0, v1 1897; GFX6-NEXT: v_min_f32_e32 v1, 0x3f7fffff, v1 1898; GFX6-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 1899; GFX6-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 1900; GFX6-NEXT: s_setpc_b64 s[30:31] 1901; 1902; GFX7-LABEL: select_nan_fract_f32_flags_select: 1903; GFX7: ; %bb.0: ; %entry 1904; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1905; GFX7-NEXT: v_fract_f32_e32 v0, v0 1906; GFX7-NEXT: s_setpc_b64 s[30:31] 1907; 1908; GFX8-LABEL: select_nan_fract_f32_flags_select: 1909; GFX8: ; %bb.0: ; %entry 1910; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1911; GFX8-NEXT: v_fract_f32_e32 v0, v0 1912; GFX8-NEXT: s_setpc_b64 s[30:31] 1913; 1914; GFX11-LABEL: select_nan_fract_f32_flags_select: 1915; GFX11: ; %bb.0: ; %entry 1916; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1917; GFX11-NEXT: v_fract_f32_e32 v0, v0 1918; GFX11-NEXT: s_setpc_b64 s[30:31] 1919; 1920; GFX12-LABEL: select_nan_fract_f32_flags_select: 1921; GFX12: ; %bb.0: ; %entry 1922; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1923; GFX12-NEXT: s_wait_expcnt 0x0 1924; GFX12-NEXT: s_wait_samplecnt 0x0 1925; GFX12-NEXT: s_wait_bvhcnt 0x0 1926; GFX12-NEXT: s_wait_kmcnt 0x0 1927; GFX12-NEXT: v_fract_f32_e32 v0, v0 1928; GFX12-NEXT: s_setpc_b64 s[30:31] 1929entry: 1930 %floor = tail call float @llvm.floor.f32(float %x) 1931 %sub = fsub float %x, %floor 1932 %min = tail call float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000) 1933 %uno = fcmp uno float %x, 0.000000e+00 1934 %cond = select nsz i1 %uno, float %x, float %min 1935 ret float %cond 1936} 1937 1938define float @select_nan_fract_f32_flags_minnum(float %x) { 1939; GFX6-IR-LABEL: define float @select_nan_fract_f32_flags_minnum 1940; GFX6-IR-SAME: (float [[X:%.*]]) #[[ATTR0]] { 1941; GFX6-IR-NEXT: entry: 1942; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call float @llvm.floor.f32(float [[X]]) 1943; GFX6-IR-NEXT: [[SUB:%.*]] = fsub float [[X]], [[FLOOR]] 1944; GFX6-IR-NEXT: [[MIN:%.*]] = tail call nsz float @llvm.minnum.f32(float [[SUB]], float 0x3FEFFFFFE0000000) 1945; GFX6-IR-NEXT: [[UNO:%.*]] = fcmp uno float [[X]], 0.000000e+00 1946; GFX6-IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], float [[X]], float [[MIN]] 1947; GFX6-IR-NEXT: ret float [[COND]] 1948; 1949; IR-FRACT-LABEL: define float @select_nan_fract_f32_flags_minnum 1950; IR-FRACT-SAME: (float [[X:%.*]]) #[[ATTR0]] { 1951; IR-FRACT-NEXT: entry: 1952; IR-FRACT-NEXT: [[COND:%.*]] = call float @llvm.amdgcn.fract.f32(float [[X]]) 1953; IR-FRACT-NEXT: ret float [[COND]] 1954; 1955; GFX6-LABEL: select_nan_fract_f32_flags_minnum: 1956; GFX6: ; %bb.0: ; %entry 1957; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1958; GFX6-NEXT: v_floor_f32_e32 v1, v0 1959; GFX6-NEXT: v_sub_f32_e32 v1, v0, v1 1960; GFX6-NEXT: v_min_f32_e32 v1, 0x3f7fffff, v1 1961; GFX6-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 1962; GFX6-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc 1963; GFX6-NEXT: s_setpc_b64 s[30:31] 1964; 1965; GFX7-LABEL: select_nan_fract_f32_flags_minnum: 1966; GFX7: ; %bb.0: ; %entry 1967; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1968; GFX7-NEXT: v_fract_f32_e32 v0, v0 1969; GFX7-NEXT: s_setpc_b64 s[30:31] 1970; 1971; GFX8-LABEL: select_nan_fract_f32_flags_minnum: 1972; GFX8: ; %bb.0: ; %entry 1973; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1974; GFX8-NEXT: v_fract_f32_e32 v0, v0 1975; GFX8-NEXT: s_setpc_b64 s[30:31] 1976; 1977; GFX11-LABEL: select_nan_fract_f32_flags_minnum: 1978; GFX11: ; %bb.0: ; %entry 1979; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1980; GFX11-NEXT: v_fract_f32_e32 v0, v0 1981; GFX11-NEXT: s_setpc_b64 s[30:31] 1982; 1983; GFX12-LABEL: select_nan_fract_f32_flags_minnum: 1984; GFX12: ; %bb.0: ; %entry 1985; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1986; GFX12-NEXT: s_wait_expcnt 0x0 1987; GFX12-NEXT: s_wait_samplecnt 0x0 1988; GFX12-NEXT: s_wait_bvhcnt 0x0 1989; GFX12-NEXT: s_wait_kmcnt 0x0 1990; GFX12-NEXT: v_fract_f32_e32 v0, v0 1991; GFX12-NEXT: s_setpc_b64 s[30:31] 1992entry: 1993 %floor = tail call float @llvm.floor.f32(float %x) 1994 %sub = fsub float %x, %floor 1995 %min = tail call nsz float @llvm.minnum.f32(float %sub, float 0x3FEFFFFFE0000000) 1996 %uno = fcmp uno float %x, 0.000000e+00 1997 %cond = select i1 %uno, float %x, float %min 1998 ret float %cond 1999} 2000 2001define <2 x float> @safe_math_fract_v2f32(<2 x float> %x, ptr addrspace(1) writeonly captures(none) %ip) { 2002; GFX6-IR-LABEL: define <2 x float> @safe_math_fract_v2f32 2003; GFX6-IR-SAME: (<2 x float> [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] { 2004; GFX6-IR-NEXT: entry: 2005; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call <2 x float> @llvm.floor.v2f32(<2 x float> [[X]]) 2006; GFX6-IR-NEXT: [[SUB:%.*]] = fsub <2 x float> [[X]], [[FLOOR]] 2007; GFX6-IR-NEXT: [[MIN:%.*]] = tail call <2 x float> @llvm.minnum.v2f32(<2 x float> [[SUB]], <2 x float> splat (float 0x3FEFFFFFE0000000)) 2008; GFX6-IR-NEXT: [[UNO:%.*]] = fcmp uno <2 x float> [[X]], zeroinitializer 2009; GFX6-IR-NEXT: [[COND:%.*]] = select <2 x i1> [[UNO]], <2 x float> [[X]], <2 x float> [[MIN]] 2010; GFX6-IR-NEXT: [[FABS:%.*]] = tail call <2 x float> @llvm.fabs.v2f32(<2 x float> [[X]]) 2011; GFX6-IR-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x float> [[FABS]], splat (float 0x7FF0000000000000) 2012; GFX6-IR-NEXT: [[COND6:%.*]] = select <2 x i1> [[CMPINF]], <2 x float> zeroinitializer, <2 x float> [[COND]] 2013; GFX6-IR-NEXT: store <2 x float> [[FLOOR]], ptr addrspace(1) [[IP]], align 4 2014; GFX6-IR-NEXT: ret <2 x float> [[COND6]] 2015; 2016; IR-FRACT-LABEL: define <2 x float> @safe_math_fract_v2f32 2017; IR-FRACT-SAME: (<2 x float> [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] { 2018; IR-FRACT-NEXT: entry: 2019; IR-FRACT-NEXT: [[FLOOR:%.*]] = tail call <2 x float> @llvm.floor.v2f32(<2 x float> [[X]]) 2020; IR-FRACT-NEXT: [[TMP0:%.*]] = extractelement <2 x float> [[X]], i64 0 2021; IR-FRACT-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[X]], i64 1 2022; IR-FRACT-NEXT: [[TMP2:%.*]] = call float @llvm.amdgcn.fract.f32(float [[TMP0]]) 2023; IR-FRACT-NEXT: [[TMP3:%.*]] = call float @llvm.amdgcn.fract.f32(float [[TMP1]]) 2024; IR-FRACT-NEXT: [[TMP4:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i64 0 2025; IR-FRACT-NEXT: [[COND:%.*]] = insertelement <2 x float> [[TMP4]], float [[TMP3]], i64 1 2026; IR-FRACT-NEXT: [[FABS:%.*]] = tail call <2 x float> @llvm.fabs.v2f32(<2 x float> [[X]]) 2027; IR-FRACT-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x float> [[FABS]], splat (float 0x7FF0000000000000) 2028; IR-FRACT-NEXT: [[COND6:%.*]] = select <2 x i1> [[CMPINF]], <2 x float> zeroinitializer, <2 x float> [[COND]] 2029; IR-FRACT-NEXT: store <2 x float> [[FLOOR]], ptr addrspace(1) [[IP]], align 4 2030; IR-FRACT-NEXT: ret <2 x float> [[COND6]] 2031; 2032; GFX6-LABEL: safe_math_fract_v2f32: 2033; GFX6: ; %bb.0: ; %entry 2034; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2035; GFX6-NEXT: v_floor_f32_e32 v5, v1 2036; GFX6-NEXT: v_floor_f32_e32 v4, v0 2037; GFX6-NEXT: v_sub_f32_e32 v6, v1, v5 2038; GFX6-NEXT: v_sub_f32_e32 v7, v0, v4 2039; GFX6-NEXT: v_min_f32_e32 v6, 0x3f7fffff, v6 2040; GFX6-NEXT: v_cmp_u_f32_e32 vcc, v1, v1 2041; GFX6-NEXT: v_min_f32_e32 v7, 0x3f7fffff, v7 2042; GFX6-NEXT: v_cndmask_b32_e32 v6, v6, v1, vcc 2043; GFX6-NEXT: v_cmp_u_f32_e32 vcc, v0, v0 2044; GFX6-NEXT: v_mov_b32_e32 v8, 0x204 2045; GFX6-NEXT: v_cndmask_b32_e32 v7, v7, v0, vcc 2046; GFX6-NEXT: v_cmp_class_f32_e32 vcc, v0, v8 2047; GFX6-NEXT: s_mov_b32 s6, 0 2048; GFX6-NEXT: v_cndmask_b32_e64 v0, v7, 0, vcc 2049; GFX6-NEXT: v_cmp_class_f32_e32 vcc, v1, v8 2050; GFX6-NEXT: s_mov_b32 s7, 0xf000 2051; GFX6-NEXT: s_mov_b32 s4, s6 2052; GFX6-NEXT: s_mov_b32 s5, s6 2053; GFX6-NEXT: v_cndmask_b32_e64 v1, v6, 0, vcc 2054; GFX6-NEXT: buffer_store_dwordx2 v[4:5], v[2:3], s[4:7], 0 addr64 2055; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 2056; GFX6-NEXT: s_setpc_b64 s[30:31] 2057; 2058; GFX7-LABEL: safe_math_fract_v2f32: 2059; GFX7: ; %bb.0: ; %entry 2060; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2061; GFX7-NEXT: v_mov_b32_e32 v8, 0x204 2062; GFX7-NEXT: v_fract_f32_e32 v6, v0 2063; GFX7-NEXT: v_cmp_class_f32_e32 vcc, v0, v8 2064; GFX7-NEXT: s_mov_b32 s6, 0 2065; GFX7-NEXT: v_floor_f32_e32 v4, v0 2066; GFX7-NEXT: v_fract_f32_e32 v7, v1 2067; GFX7-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc 2068; GFX7-NEXT: v_cmp_class_f32_e32 vcc, v1, v8 2069; GFX7-NEXT: s_mov_b32 s7, 0xf000 2070; GFX7-NEXT: s_mov_b32 s4, s6 2071; GFX7-NEXT: s_mov_b32 s5, s6 2072; GFX7-NEXT: v_floor_f32_e32 v5, v1 2073; GFX7-NEXT: v_cndmask_b32_e64 v1, v7, 0, vcc 2074; GFX7-NEXT: buffer_store_dwordx2 v[4:5], v[2:3], s[4:7], 0 addr64 2075; GFX7-NEXT: s_waitcnt vmcnt(0) 2076; GFX7-NEXT: s_setpc_b64 s[30:31] 2077; 2078; GFX8-LABEL: safe_math_fract_v2f32: 2079; GFX8: ; %bb.0: ; %entry 2080; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2081; GFX8-NEXT: v_mov_b32_e32 v8, 0x204 2082; GFX8-NEXT: v_fract_f32_e32 v6, v0 2083; GFX8-NEXT: v_cmp_class_f32_e32 vcc, v0, v8 2084; GFX8-NEXT: v_floor_f32_e32 v4, v0 2085; GFX8-NEXT: v_fract_f32_e32 v7, v1 2086; GFX8-NEXT: v_cndmask_b32_e64 v0, v6, 0, vcc 2087; GFX8-NEXT: v_cmp_class_f32_e32 vcc, v1, v8 2088; GFX8-NEXT: v_floor_f32_e32 v5, v1 2089; GFX8-NEXT: v_cndmask_b32_e64 v1, v7, 0, vcc 2090; GFX8-NEXT: global_store_dwordx2 v[2:3], v[4:5], off 2091; GFX8-NEXT: s_waitcnt vmcnt(0) 2092; GFX8-NEXT: s_setpc_b64 s[30:31] 2093; 2094; GFX11-LABEL: safe_math_fract_v2f32: 2095; GFX11: ; %bb.0: ; %entry 2096; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2097; GFX11-NEXT: v_fract_f32_e32 v6, v0 2098; GFX11-NEXT: v_cmp_class_f32_e64 s0, v0, 0x204 2099; GFX11-NEXT: v_fract_f32_e32 v7, v1 2100; GFX11-NEXT: v_floor_f32_e32 v4, v0 2101; GFX11-NEXT: v_floor_f32_e32 v5, v1 2102; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) 2103; GFX11-NEXT: v_cndmask_b32_e64 v0, v6, 0, s0 2104; GFX11-NEXT: v_cmp_class_f32_e64 s0, v1, 0x204 2105; GFX11-NEXT: global_store_b64 v[2:3], v[4:5], off 2106; GFX11-NEXT: v_cndmask_b32_e64 v1, v7, 0, s0 2107; GFX11-NEXT: s_setpc_b64 s[30:31] 2108; 2109; GFX12-LABEL: safe_math_fract_v2f32: 2110; GFX12: ; %bb.0: ; %entry 2111; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2112; GFX12-NEXT: s_wait_expcnt 0x0 2113; GFX12-NEXT: s_wait_samplecnt 0x0 2114; GFX12-NEXT: s_wait_bvhcnt 0x0 2115; GFX12-NEXT: s_wait_kmcnt 0x0 2116; GFX12-NEXT: v_fract_f32_e32 v6, v0 2117; GFX12-NEXT: v_cmp_class_f32_e64 s0, v0, 0x204 2118; GFX12-NEXT: v_fract_f32_e32 v7, v1 2119; GFX12-NEXT: v_floor_f32_e32 v4, v0 2120; GFX12-NEXT: v_floor_f32_e32 v5, v1 2121; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) 2122; GFX12-NEXT: v_cndmask_b32_e64 v0, v6, 0, s0 2123; GFX12-NEXT: v_cmp_class_f32_e64 s0, v1, 0x204 2124; GFX12-NEXT: global_store_b64 v[2:3], v[4:5], off 2125; GFX12-NEXT: v_cndmask_b32_e64 v1, v7, 0, s0 2126; GFX12-NEXT: s_setpc_b64 s[30:31] 2127entry: 2128 %floor = tail call <2 x float> @llvm.floor.v2f32(<2 x float> %x) 2129 %sub = fsub <2 x float> %x, %floor 2130 %min = tail call <2 x float> @llvm.minnum.v2f32(<2 x float> %sub, <2 x float> <float 0x3FEFFFFFE0000000, float 0x3FEFFFFFE0000000>) 2131 %uno = fcmp uno <2 x float> %x, zeroinitializer 2132 %cond = select <2 x i1> %uno, <2 x float> %x, <2 x float> %min 2133 %fabs = tail call <2 x float> @llvm.fabs.v2f32(<2 x float> %x) 2134 %cmpinf = fcmp oeq <2 x float> %fabs, <float 0x7FF0000000000000, float 0x7FF0000000000000> 2135 %cond6 = select <2 x i1> %cmpinf, <2 x float> zeroinitializer, <2 x float> %cond 2136 store <2 x float> %floor, ptr addrspace(1) %ip, align 4 2137 ret <2 x float> %cond6 2138} 2139 2140define double @safe_math_fract_f64(double %x, ptr addrspace(1) writeonly captures(none) %ip) { 2141; GFX6-IR-LABEL: define double @safe_math_fract_f64 2142; GFX6-IR-SAME: (double [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] { 2143; GFX6-IR-NEXT: entry: 2144; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call double @llvm.floor.f64(double [[X]]) 2145; GFX6-IR-NEXT: [[SUB:%.*]] = fsub double [[X]], [[FLOOR]] 2146; GFX6-IR-NEXT: [[MIN:%.*]] = tail call double @llvm.minnum.f64(double [[SUB]], double 0x3FEFFFFFFFFFFFFF) 2147; GFX6-IR-NEXT: [[UNO:%.*]] = fcmp uno double [[X]], 0.000000e+00 2148; GFX6-IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], double [[X]], double [[MIN]] 2149; GFX6-IR-NEXT: [[FABS:%.*]] = tail call double @llvm.fabs.f64(double [[X]]) 2150; GFX6-IR-NEXT: [[CMPINF:%.*]] = fcmp oeq double [[FABS]], 0x7FF0000000000000 2151; GFX6-IR-NEXT: [[COND6:%.*]] = select i1 [[CMPINF]], double 0.000000e+00, double [[COND]] 2152; GFX6-IR-NEXT: store double [[FLOOR]], ptr addrspace(1) [[IP]], align 4 2153; GFX6-IR-NEXT: ret double [[COND6]] 2154; 2155; IR-FRACT-LABEL: define double @safe_math_fract_f64 2156; IR-FRACT-SAME: (double [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] { 2157; IR-FRACT-NEXT: entry: 2158; IR-FRACT-NEXT: [[FLOOR:%.*]] = tail call double @llvm.floor.f64(double [[X]]) 2159; IR-FRACT-NEXT: [[COND:%.*]] = call double @llvm.amdgcn.fract.f64(double [[X]]) 2160; IR-FRACT-NEXT: [[FABS:%.*]] = tail call double @llvm.fabs.f64(double [[X]]) 2161; IR-FRACT-NEXT: [[CMPINF:%.*]] = fcmp oeq double [[FABS]], 0x7FF0000000000000 2162; IR-FRACT-NEXT: [[COND6:%.*]] = select i1 [[CMPINF]], double 0.000000e+00, double [[COND]] 2163; IR-FRACT-NEXT: store double [[FLOOR]], ptr addrspace(1) [[IP]], align 4 2164; IR-FRACT-NEXT: ret double [[COND6]] 2165; 2166; GFX6-LABEL: safe_math_fract_f64: 2167; GFX6: ; %bb.0: ; %entry 2168; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2169; GFX6-NEXT: v_fract_f64_e32 v[4:5], v[0:1] 2170; GFX6-NEXT: v_mov_b32_e32 v6, -1 2171; GFX6-NEXT: v_mov_b32_e32 v7, 0x3fefffff 2172; GFX6-NEXT: v_min_f64 v[4:5], v[4:5], v[6:7] 2173; GFX6-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 3 2174; GFX6-NEXT: s_mov_b32 s8, -1 2175; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v0, vcc 2176; GFX6-NEXT: v_cndmask_b32_e32 v5, v5, v1, vcc 2177; GFX6-NEXT: v_add_f64 v[4:5], v[0:1], -v[4:5] 2178; GFX6-NEXT: s_mov_b32 s9, 0x3fefffff 2179; GFX6-NEXT: v_add_f64 v[6:7], v[0:1], -v[4:5] 2180; GFX6-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1] 2181; GFX6-NEXT: v_min_f64 v[6:7], v[6:7], s[8:9] 2182; GFX6-NEXT: s_mov_b32 s8, 0 2183; GFX6-NEXT: s_mov_b32 s9, 0x7ff00000 2184; GFX6-NEXT: v_cndmask_b32_e32 v7, v7, v1, vcc 2185; GFX6-NEXT: v_cndmask_b32_e32 v6, v6, v0, vcc 2186; GFX6-NEXT: v_cmp_neq_f64_e64 vcc, |v[0:1]|, s[8:9] 2187; GFX6-NEXT: s_mov_b32 s6, 0 2188; GFX6-NEXT: s_mov_b32 s7, 0xf000 2189; GFX6-NEXT: s_mov_b32 s4, s6 2190; GFX6-NEXT: s_mov_b32 s5, s6 2191; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc 2192; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v7, vcc 2193; GFX6-NEXT: buffer_store_dwordx2 v[4:5], v[2:3], s[4:7], 0 addr64 2194; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 2195; GFX6-NEXT: s_setpc_b64 s[30:31] 2196; 2197; GFX7-LABEL: safe_math_fract_f64: 2198; GFX7: ; %bb.0: ; %entry 2199; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2200; GFX7-NEXT: s_mov_b32 s4, 0 2201; GFX7-NEXT: s_mov_b32 s5, 0x7ff00000 2202; GFX7-NEXT: v_fract_f64_e32 v[4:5], v[0:1] 2203; GFX7-NEXT: v_cmp_neq_f64_e64 vcc, |v[0:1]|, s[4:5] 2204; GFX7-NEXT: v_floor_f64_e32 v[6:7], v[0:1] 2205; GFX7-NEXT: s_mov_b32 s6, 0 2206; GFX7-NEXT: s_mov_b32 s7, 0xf000 2207; GFX7-NEXT: s_mov_b32 s4, s6 2208; GFX7-NEXT: s_mov_b32 s5, s6 2209; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc 2210; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v5, vcc 2211; GFX7-NEXT: buffer_store_dwordx2 v[6:7], v[2:3], s[4:7], 0 addr64 2212; GFX7-NEXT: s_waitcnt vmcnt(0) 2213; GFX7-NEXT: s_setpc_b64 s[30:31] 2214; 2215; GFX8-LABEL: safe_math_fract_f64: 2216; GFX8: ; %bb.0: ; %entry 2217; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2218; GFX8-NEXT: s_mov_b32 s4, 0 2219; GFX8-NEXT: s_mov_b32 s5, 0x7ff00000 2220; GFX8-NEXT: v_fract_f64_e32 v[4:5], v[0:1] 2221; GFX8-NEXT: v_cmp_neq_f64_e64 vcc, |v[0:1]|, s[4:5] 2222; GFX8-NEXT: v_floor_f64_e32 v[6:7], v[0:1] 2223; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc 2224; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v5, vcc 2225; GFX8-NEXT: global_store_dwordx2 v[2:3], v[6:7], off 2226; GFX8-NEXT: s_waitcnt vmcnt(0) 2227; GFX8-NEXT: s_setpc_b64 s[30:31] 2228; 2229; GFX11-LABEL: safe_math_fract_f64: 2230; GFX11: ; %bb.0: ; %entry 2231; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2232; GFX11-NEXT: v_fract_f64_e32 v[4:5], v[0:1] 2233; GFX11-NEXT: v_cmp_neq_f64_e64 vcc_lo, 0x7ff00000, |v[0:1]| 2234; GFX11-NEXT: v_floor_f64_e32 v[6:7], v[0:1] 2235; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) 2236; GFX11-NEXT: v_dual_cndmask_b32 v0, 0, v4 :: v_dual_cndmask_b32 v1, 0, v5 2237; GFX11-NEXT: global_store_b64 v[2:3], v[6:7], off 2238; GFX11-NEXT: s_setpc_b64 s[30:31] 2239; 2240; GFX12-LABEL: safe_math_fract_f64: 2241; GFX12: ; %bb.0: ; %entry 2242; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2243; GFX12-NEXT: s_wait_expcnt 0x0 2244; GFX12-NEXT: s_wait_samplecnt 0x0 2245; GFX12-NEXT: s_wait_bvhcnt 0x0 2246; GFX12-NEXT: s_wait_kmcnt 0x0 2247; GFX12-NEXT: v_fract_f64_e32 v[4:5], v[0:1] 2248; GFX12-NEXT: v_cmp_neq_f64_e64 vcc_lo, 0x7ff00000, |v[0:1]| 2249; GFX12-NEXT: v_floor_f64_e32 v[6:7], v[0:1] 2250; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) 2251; GFX12-NEXT: v_dual_cndmask_b32 v0, 0, v4 :: v_dual_cndmask_b32 v1, 0, v5 2252; GFX12-NEXT: global_store_b64 v[2:3], v[6:7], off 2253; GFX12-NEXT: s_setpc_b64 s[30:31] 2254entry: 2255 %floor = tail call double @llvm.floor.f64(double %x) 2256 %sub = fsub double %x, %floor 2257 %min = tail call double @llvm.minnum.f64(double %sub, double 0x3FEFFFFFFFFFFFFF) 2258 %uno = fcmp uno double %x, 0.000000e+00 2259 %cond = select i1 %uno, double %x, double %min 2260 %fabs = tail call double @llvm.fabs.f64(double %x) 2261 %cmpinf = fcmp oeq double %fabs, 0x7FF0000000000000 2262 %cond6 = select i1 %cmpinf, double 0.000000e+00, double %cond 2263 store double %floor, ptr addrspace(1) %ip, align 4 2264 ret double %cond6 2265} 2266 2267define half @safe_math_fract_f16(half %x, ptr addrspace(1) writeonly captures(none) %ip) { 2268; GFX6-IR-LABEL: define half @safe_math_fract_f16 2269; GFX6-IR-SAME: (half [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] { 2270; GFX6-IR-NEXT: entry: 2271; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call half @llvm.floor.f16(half [[X]]) 2272; GFX6-IR-NEXT: [[SUB:%.*]] = fsub half [[X]], [[FLOOR]] 2273; GFX6-IR-NEXT: [[MIN:%.*]] = tail call half @llvm.minnum.f16(half [[SUB]], half 0xH3BFF) 2274; GFX6-IR-NEXT: [[UNO:%.*]] = fcmp uno half [[X]], 0xH0000 2275; GFX6-IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], half [[X]], half [[MIN]] 2276; GFX6-IR-NEXT: [[FABS:%.*]] = tail call half @llvm.fabs.f16(half [[X]]) 2277; GFX6-IR-NEXT: [[CMPINF:%.*]] = fcmp oeq half [[FABS]], 0xH7C00 2278; GFX6-IR-NEXT: [[COND6:%.*]] = select i1 [[CMPINF]], half 0xH0000, half [[COND]] 2279; GFX6-IR-NEXT: store half [[FLOOR]], ptr addrspace(1) [[IP]], align 4 2280; GFX6-IR-NEXT: ret half [[COND6]] 2281; 2282; GFX7-IR-LABEL: define half @safe_math_fract_f16 2283; GFX7-IR-SAME: (half [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] { 2284; GFX7-IR-NEXT: entry: 2285; GFX7-IR-NEXT: [[FLOOR:%.*]] = tail call half @llvm.floor.f16(half [[X]]) 2286; GFX7-IR-NEXT: [[SUB:%.*]] = fsub half [[X]], [[FLOOR]] 2287; GFX7-IR-NEXT: [[MIN:%.*]] = tail call half @llvm.minnum.f16(half [[SUB]], half 0xH3BFF) 2288; GFX7-IR-NEXT: [[UNO:%.*]] = fcmp uno half [[X]], 0xH0000 2289; GFX7-IR-NEXT: [[COND:%.*]] = select i1 [[UNO]], half [[X]], half [[MIN]] 2290; GFX7-IR-NEXT: [[FABS:%.*]] = tail call half @llvm.fabs.f16(half [[X]]) 2291; GFX7-IR-NEXT: [[CMPINF:%.*]] = fcmp oeq half [[FABS]], 0xH7C00 2292; GFX7-IR-NEXT: [[COND6:%.*]] = select i1 [[CMPINF]], half 0xH0000, half [[COND]] 2293; GFX7-IR-NEXT: store half [[FLOOR]], ptr addrspace(1) [[IP]], align 4 2294; GFX7-IR-NEXT: ret half [[COND6]] 2295; 2296; IR-LEGALF16-LABEL: define half @safe_math_fract_f16 2297; IR-LEGALF16-SAME: (half [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] { 2298; IR-LEGALF16-NEXT: entry: 2299; IR-LEGALF16-NEXT: [[FLOOR:%.*]] = tail call half @llvm.floor.f16(half [[X]]) 2300; IR-LEGALF16-NEXT: [[COND:%.*]] = call half @llvm.amdgcn.fract.f16(half [[X]]) 2301; IR-LEGALF16-NEXT: [[FABS:%.*]] = tail call half @llvm.fabs.f16(half [[X]]) 2302; IR-LEGALF16-NEXT: [[CMPINF:%.*]] = fcmp oeq half [[FABS]], 0xH7C00 2303; IR-LEGALF16-NEXT: [[COND6:%.*]] = select i1 [[CMPINF]], half 0xH0000, half [[COND]] 2304; IR-LEGALF16-NEXT: store half [[FLOOR]], ptr addrspace(1) [[IP]], align 4 2305; IR-LEGALF16-NEXT: ret half [[COND6]] 2306; 2307; GFX6-LABEL: safe_math_fract_f16: 2308; GFX6: ; %bb.0: ; %entry 2309; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2310; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 2311; GFX6-NEXT: s_movk_i32 s8, 0x7c00 2312; GFX6-NEXT: s_mov_b32 s6, 0 2313; GFX6-NEXT: s_mov_b32 s7, 0xf000 2314; GFX6-NEXT: v_cvt_f32_f16_e32 v3, v0 2315; GFX6-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2316; GFX6-NEXT: s_mov_b32 s4, s6 2317; GFX6-NEXT: s_mov_b32 s5, s6 2318; GFX6-NEXT: v_floor_f32_e32 v4, v3 2319; GFX6-NEXT: v_sub_f32_e32 v5, v3, v4 2320; GFX6-NEXT: v_cvt_f16_f32_e32 v4, v4 2321; GFX6-NEXT: v_min_f32_e32 v5, 0x3f7fe000, v5 2322; GFX6-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 2323; GFX6-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc 2324; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, s8, v0 2325; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc 2326; GFX6-NEXT: buffer_store_short v4, v[1:2], s[4:7], 0 addr64 2327; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 2328; GFX6-NEXT: s_setpc_b64 s[30:31] 2329; 2330; GFX7-LABEL: safe_math_fract_f16: 2331; GFX7: ; %bb.0: ; %entry 2332; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2333; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 2334; GFX7-NEXT: s_movk_i32 s8, 0x7c00 2335; GFX7-NEXT: s_mov_b32 s6, 0 2336; GFX7-NEXT: s_mov_b32 s7, 0xf000 2337; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v0 2338; GFX7-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2339; GFX7-NEXT: s_mov_b32 s4, s6 2340; GFX7-NEXT: s_mov_b32 s5, s6 2341; GFX7-NEXT: v_floor_f32_e32 v4, v3 2342; GFX7-NEXT: v_sub_f32_e32 v5, v3, v4 2343; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 2344; GFX7-NEXT: v_min_f32_e32 v5, 0x3f7fe000, v5 2345; GFX7-NEXT: v_cmp_u_f32_e32 vcc, v3, v3 2346; GFX7-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc 2347; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, s8, v0 2348; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc 2349; GFX7-NEXT: buffer_store_short v4, v[1:2], s[4:7], 0 addr64 2350; GFX7-NEXT: s_waitcnt vmcnt(0) 2351; GFX7-NEXT: s_setpc_b64 s[30:31] 2352; 2353; GFX8-LABEL: safe_math_fract_f16: 2354; GFX8: ; %bb.0: ; %entry 2355; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2356; GFX8-NEXT: s_movk_i32 s4, 0x7c00 2357; GFX8-NEXT: v_fract_f16_e32 v4, v0 2358; GFX8-NEXT: v_cmp_neq_f16_e64 vcc, |v0|, s4 2359; GFX8-NEXT: v_floor_f16_e32 v3, v0 2360; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v4, vcc 2361; GFX8-NEXT: global_store_short v[1:2], v3, off 2362; GFX8-NEXT: s_waitcnt vmcnt(0) 2363; GFX8-NEXT: s_setpc_b64 s[30:31] 2364; 2365; GFX11-LABEL: safe_math_fract_f16: 2366; GFX11: ; %bb.0: ; %entry 2367; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2368; GFX11-NEXT: v_fract_f16_e32 v3, v0 2369; GFX11-NEXT: v_cmp_neq_f16_e64 vcc_lo, 0x7c00, |v0| 2370; GFX11-NEXT: v_floor_f16_e32 v4, v0 2371; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) 2372; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc_lo 2373; GFX11-NEXT: global_store_b16 v[1:2], v4, off 2374; GFX11-NEXT: s_setpc_b64 s[30:31] 2375; 2376; GFX12-LABEL: safe_math_fract_f16: 2377; GFX12: ; %bb.0: ; %entry 2378; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2379; GFX12-NEXT: s_wait_expcnt 0x0 2380; GFX12-NEXT: s_wait_samplecnt 0x0 2381; GFX12-NEXT: s_wait_bvhcnt 0x0 2382; GFX12-NEXT: s_wait_kmcnt 0x0 2383; GFX12-NEXT: v_fract_f16_e32 v3, v0 2384; GFX12-NEXT: v_cmp_neq_f16_e64 vcc_lo, 0x7c00, |v0| 2385; GFX12-NEXT: v_floor_f16_e32 v4, v0 2386; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) 2387; GFX12-NEXT: v_cndmask_b32_e32 v0, 0, v3, vcc_lo 2388; GFX12-NEXT: global_store_b16 v[1:2], v4, off 2389; GFX12-NEXT: s_setpc_b64 s[30:31] 2390entry: 2391 %floor = tail call half @llvm.floor.f16(half %x) 2392 %sub = fsub half %x, %floor 2393 %min = tail call half @llvm.minnum.f16(half %sub, half 0xH3BFF) 2394 %uno = fcmp uno half %x, 0.000000e+00 2395 %cond = select i1 %uno, half %x, half %min 2396 %fabs = tail call half @llvm.fabs.f16(half %x) 2397 %cmpinf = fcmp oeq half %fabs, 0xH7C00 2398 %cond6 = select i1 %cmpinf, half 0.000000e+00, half %cond 2399 store half %floor, ptr addrspace(1) %ip, align 4 2400 ret half %cond6 2401} 2402 2403define <2 x half> @safe_math_fract_v2f16(<2 x half> %x, ptr addrspace(1) writeonly captures(none) %ip) { 2404; GFX6-IR-LABEL: define <2 x half> @safe_math_fract_v2f16 2405; GFX6-IR-SAME: (<2 x half> [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] { 2406; GFX6-IR-NEXT: entry: 2407; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call <2 x half> @llvm.floor.v2f16(<2 x half> [[X]]) 2408; GFX6-IR-NEXT: [[SUB:%.*]] = fsub <2 x half> [[X]], [[FLOOR]] 2409; GFX6-IR-NEXT: [[MIN:%.*]] = tail call <2 x half> @llvm.minnum.v2f16(<2 x half> [[SUB]], <2 x half> splat (half 0xH3BFF)) 2410; GFX6-IR-NEXT: [[UNO:%.*]] = fcmp uno <2 x half> [[X]], zeroinitializer 2411; GFX6-IR-NEXT: [[COND:%.*]] = select <2 x i1> [[UNO]], <2 x half> [[X]], <2 x half> [[MIN]] 2412; GFX6-IR-NEXT: [[FABS:%.*]] = tail call <2 x half> @llvm.fabs.v2f16(<2 x half> [[X]]) 2413; GFX6-IR-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x half> [[FABS]], splat (half 0xH7C00) 2414; GFX6-IR-NEXT: [[COND6:%.*]] = select <2 x i1> [[CMPINF]], <2 x half> zeroinitializer, <2 x half> [[COND]] 2415; GFX6-IR-NEXT: store <2 x half> [[FLOOR]], ptr addrspace(1) [[IP]], align 4 2416; GFX6-IR-NEXT: ret <2 x half> [[COND6]] 2417; 2418; GFX7-IR-LABEL: define <2 x half> @safe_math_fract_v2f16 2419; GFX7-IR-SAME: (<2 x half> [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] { 2420; GFX7-IR-NEXT: entry: 2421; GFX7-IR-NEXT: [[FLOOR:%.*]] = tail call <2 x half> @llvm.floor.v2f16(<2 x half> [[X]]) 2422; GFX7-IR-NEXT: [[SUB:%.*]] = fsub <2 x half> [[X]], [[FLOOR]] 2423; GFX7-IR-NEXT: [[MIN:%.*]] = tail call <2 x half> @llvm.minnum.v2f16(<2 x half> [[SUB]], <2 x half> splat (half 0xH3BFF)) 2424; GFX7-IR-NEXT: [[UNO:%.*]] = fcmp uno <2 x half> [[X]], zeroinitializer 2425; GFX7-IR-NEXT: [[COND:%.*]] = select <2 x i1> [[UNO]], <2 x half> [[X]], <2 x half> [[MIN]] 2426; GFX7-IR-NEXT: [[FABS:%.*]] = tail call <2 x half> @llvm.fabs.v2f16(<2 x half> [[X]]) 2427; GFX7-IR-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x half> [[FABS]], splat (half 0xH7C00) 2428; GFX7-IR-NEXT: [[COND6:%.*]] = select <2 x i1> [[CMPINF]], <2 x half> zeroinitializer, <2 x half> [[COND]] 2429; GFX7-IR-NEXT: store <2 x half> [[FLOOR]], ptr addrspace(1) [[IP]], align 4 2430; GFX7-IR-NEXT: ret <2 x half> [[COND6]] 2431; 2432; IR-LEGALF16-LABEL: define <2 x half> @safe_math_fract_v2f16 2433; IR-LEGALF16-SAME: (<2 x half> [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] { 2434; IR-LEGALF16-NEXT: entry: 2435; IR-LEGALF16-NEXT: [[FLOOR:%.*]] = tail call <2 x half> @llvm.floor.v2f16(<2 x half> [[X]]) 2436; IR-LEGALF16-NEXT: [[TMP0:%.*]] = extractelement <2 x half> [[X]], i64 0 2437; IR-LEGALF16-NEXT: [[TMP1:%.*]] = extractelement <2 x half> [[X]], i64 1 2438; IR-LEGALF16-NEXT: [[TMP2:%.*]] = call half @llvm.amdgcn.fract.f16(half [[TMP0]]) 2439; IR-LEGALF16-NEXT: [[TMP3:%.*]] = call half @llvm.amdgcn.fract.f16(half [[TMP1]]) 2440; IR-LEGALF16-NEXT: [[TMP4:%.*]] = insertelement <2 x half> poison, half [[TMP2]], i64 0 2441; IR-LEGALF16-NEXT: [[COND:%.*]] = insertelement <2 x half> [[TMP4]], half [[TMP3]], i64 1 2442; IR-LEGALF16-NEXT: [[FABS:%.*]] = tail call <2 x half> @llvm.fabs.v2f16(<2 x half> [[X]]) 2443; IR-LEGALF16-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x half> [[FABS]], splat (half 0xH7C00) 2444; IR-LEGALF16-NEXT: [[COND6:%.*]] = select <2 x i1> [[CMPINF]], <2 x half> zeroinitializer, <2 x half> [[COND]] 2445; IR-LEGALF16-NEXT: store <2 x half> [[FLOOR]], ptr addrspace(1) [[IP]], align 4 2446; IR-LEGALF16-NEXT: ret <2 x half> [[COND6]] 2447; 2448; GFX6-LABEL: safe_math_fract_v2f16: 2449; GFX6: ; %bb.0: ; %entry 2450; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2451; GFX6-NEXT: v_cvt_f16_f32_e32 v1, v1 2452; GFX6-NEXT: v_cvt_f16_f32_e32 v0, v0 2453; GFX6-NEXT: s_movk_i32 s8, 0x7c00 2454; GFX6-NEXT: s_mov_b32 s6, 0 2455; GFX6-NEXT: v_cvt_f32_f16_e32 v4, v1 2456; GFX6-NEXT: v_cvt_f32_f16_e32 v5, v0 2457; GFX6-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2458; GFX6-NEXT: v_and_b32_e32 v1, 0x7fff, v1 2459; GFX6-NEXT: v_floor_f32_e32 v6, v4 2460; GFX6-NEXT: v_cvt_f16_f32_e32 v7, v6 2461; GFX6-NEXT: v_floor_f32_e32 v8, v5 2462; GFX6-NEXT: v_sub_f32_e32 v6, v4, v6 2463; GFX6-NEXT: v_cvt_f16_f32_e32 v9, v8 2464; GFX6-NEXT: v_sub_f32_e32 v8, v5, v8 2465; GFX6-NEXT: v_min_f32_e32 v6, 0x3f7fe000, v6 2466; GFX6-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 2467; GFX6-NEXT: v_min_f32_e32 v8, 0x3f7fe000, v8 2468; GFX6-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc 2469; GFX6-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 2470; GFX6-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc 2471; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, s8, v0 2472; GFX6-NEXT: v_lshlrev_b32_e32 v7, 16, v7 2473; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v5, vcc 2474; GFX6-NEXT: v_cmp_ne_u32_e32 vcc, s8, v1 2475; GFX6-NEXT: s_mov_b32 s7, 0xf000 2476; GFX6-NEXT: s_mov_b32 s4, s6 2477; GFX6-NEXT: s_mov_b32 s5, s6 2478; GFX6-NEXT: v_or_b32_e32 v7, v9, v7 2479; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc 2480; GFX6-NEXT: buffer_store_dword v7, v[2:3], s[4:7], 0 addr64 2481; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 2482; GFX6-NEXT: s_setpc_b64 s[30:31] 2483; 2484; GFX7-LABEL: safe_math_fract_v2f16: 2485; GFX7: ; %bb.0: ; %entry 2486; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2487; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 2488; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 2489; GFX7-NEXT: s_movk_i32 s8, 0x7c00 2490; GFX7-NEXT: s_mov_b32 s6, 0 2491; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v1 2492; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v0 2493; GFX7-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2494; GFX7-NEXT: v_and_b32_e32 v1, 0x7fff, v1 2495; GFX7-NEXT: v_floor_f32_e32 v6, v4 2496; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v6 2497; GFX7-NEXT: v_floor_f32_e32 v8, v5 2498; GFX7-NEXT: v_sub_f32_e32 v6, v4, v6 2499; GFX7-NEXT: v_cvt_f16_f32_e32 v9, v8 2500; GFX7-NEXT: v_sub_f32_e32 v8, v5, v8 2501; GFX7-NEXT: v_min_f32_e32 v6, 0x3f7fe000, v6 2502; GFX7-NEXT: v_cmp_u_f32_e32 vcc, v4, v4 2503; GFX7-NEXT: v_min_f32_e32 v8, 0x3f7fe000, v8 2504; GFX7-NEXT: v_cndmask_b32_e32 v4, v6, v4, vcc 2505; GFX7-NEXT: v_cmp_u_f32_e32 vcc, v5, v5 2506; GFX7-NEXT: v_cndmask_b32_e32 v5, v8, v5, vcc 2507; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, s8, v0 2508; GFX7-NEXT: v_lshlrev_b32_e32 v7, 16, v7 2509; GFX7-NEXT: v_cndmask_b32_e32 v0, 0, v5, vcc 2510; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, s8, v1 2511; GFX7-NEXT: s_mov_b32 s7, 0xf000 2512; GFX7-NEXT: s_mov_b32 s4, s6 2513; GFX7-NEXT: s_mov_b32 s5, s6 2514; GFX7-NEXT: v_or_b32_e32 v7, v9, v7 2515; GFX7-NEXT: v_cndmask_b32_e32 v1, 0, v4, vcc 2516; GFX7-NEXT: buffer_store_dword v7, v[2:3], s[4:7], 0 addr64 2517; GFX7-NEXT: s_waitcnt vmcnt(0) 2518; GFX7-NEXT: s_setpc_b64 s[30:31] 2519; 2520; GFX8-LABEL: safe_math_fract_v2f16: 2521; GFX8: ; %bb.0: ; %entry 2522; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2523; GFX8-NEXT: s_movk_i32 s6, 0x204 2524; GFX8-NEXT: v_floor_f16_sdwa v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 2525; GFX8-NEXT: v_floor_f16_e32 v4, v0 2526; GFX8-NEXT: v_fract_f16_sdwa v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 2527; GFX8-NEXT: v_cmp_class_f16_sdwa s[4:5], v0, s6 src0_sel:WORD_1 src1_sel:DWORD 2528; GFX8-NEXT: v_pack_b32_f16 v3, v4, v3 2529; GFX8-NEXT: v_fract_f16_e32 v4, v0 2530; GFX8-NEXT: v_cndmask_b32_e64 v5, v5, 0, s[4:5] 2531; GFX8-NEXT: v_cmp_class_f16_e64 s[4:5], v0, s6 2532; GFX8-NEXT: v_cndmask_b32_e64 v0, v4, 0, s[4:5] 2533; GFX8-NEXT: v_pack_b32_f16 v0, v0, v5 2534; GFX8-NEXT: global_store_dword v[1:2], v3, off 2535; GFX8-NEXT: s_waitcnt vmcnt(0) 2536; GFX8-NEXT: s_setpc_b64 s[30:31] 2537; 2538; GFX11-LABEL: safe_math_fract_v2f16: 2539; GFX11: ; %bb.0: ; %entry 2540; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2541; GFX11-NEXT: v_lshrrev_b32_e32 v3, 16, v0 2542; GFX11-NEXT: v_fract_f16_e32 v6, v0 2543; GFX11-NEXT: v_floor_f16_e32 v5, v0 2544; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2) 2545; GFX11-NEXT: v_fract_f16_e32 v4, v3 2546; GFX11-NEXT: v_cmp_class_f16_e64 s0, v3, 0x204 2547; GFX11-NEXT: v_floor_f16_e32 v7, v3 2548; GFX11-NEXT: v_cndmask_b32_e64 v3, v4, 0, s0 2549; GFX11-NEXT: v_cmp_class_f16_e64 s0, v0, 0x204 2550; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 2551; GFX11-NEXT: v_pack_b32_f16 v4, v5, v7 2552; GFX11-NEXT: v_cndmask_b32_e64 v0, v6, 0, s0 2553; GFX11-NEXT: global_store_b32 v[1:2], v4, off 2554; GFX11-NEXT: v_pack_b32_f16 v0, v0, v3 2555; GFX11-NEXT: s_setpc_b64 s[30:31] 2556; 2557; GFX12-LABEL: safe_math_fract_v2f16: 2558; GFX12: ; %bb.0: ; %entry 2559; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2560; GFX12-NEXT: s_wait_expcnt 0x0 2561; GFX12-NEXT: s_wait_samplecnt 0x0 2562; GFX12-NEXT: s_wait_bvhcnt 0x0 2563; GFX12-NEXT: s_wait_kmcnt 0x0 2564; GFX12-NEXT: v_lshrrev_b32_e32 v3, 16, v0 2565; GFX12-NEXT: v_fract_f16_e32 v6, v0 2566; GFX12-NEXT: v_floor_f16_e32 v5, v0 2567; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_2) 2568; GFX12-NEXT: v_fract_f16_e32 v4, v3 2569; GFX12-NEXT: v_cmp_class_f16_e64 s0, v3, 0x204 2570; GFX12-NEXT: v_floor_f16_e32 v7, v3 2571; GFX12-NEXT: v_cndmask_b32_e64 v3, v4, 0, s0 2572; GFX12-NEXT: v_cmp_class_f16_e64 s0, v0, 0x204 2573; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 2574; GFX12-NEXT: v_pack_b32_f16 v4, v5, v7 2575; GFX12-NEXT: v_cndmask_b32_e64 v0, v6, 0, s0 2576; GFX12-NEXT: global_store_b32 v[1:2], v4, off 2577; GFX12-NEXT: v_pack_b32_f16 v0, v0, v3 2578; GFX12-NEXT: s_setpc_b64 s[30:31] 2579entry: 2580 %floor = tail call <2 x half> @llvm.floor.v2f16(<2 x half> %x) 2581 %sub = fsub <2 x half> %x, %floor 2582 %min = tail call <2 x half> @llvm.minnum.v2f16(<2 x half> %sub, <2 x half> <half 0xH3BFF, half 0xH3BFF>) 2583 %uno = fcmp uno <2 x half> %x, zeroinitializer 2584 %cond = select <2 x i1> %uno, <2 x half> %x, <2 x half> %min 2585 %fabs = tail call <2 x half> @llvm.fabs.v2f16(<2 x half> %x) 2586 %cmpinf = fcmp oeq <2 x half> %fabs, <half 0xH7C00, half 0xH7C00> 2587 %cond6 = select <2 x i1> %cmpinf, <2 x half> zeroinitializer, <2 x half> %cond 2588 store <2 x half> %floor, ptr addrspace(1) %ip, align 4 2589 ret <2 x half> %cond6 2590} 2591 2592define <2 x double> @safe_math_fract_v2f64(<2 x double> %x, ptr addrspace(1) writeonly captures(none) %ip) { 2593; GFX6-IR-LABEL: define <2 x double> @safe_math_fract_v2f64 2594; GFX6-IR-SAME: (<2 x double> [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] { 2595; GFX6-IR-NEXT: entry: 2596; GFX6-IR-NEXT: [[FLOOR:%.*]] = tail call <2 x double> @llvm.floor.v2f64(<2 x double> [[X]]) 2597; GFX6-IR-NEXT: [[SUB:%.*]] = fsub <2 x double> [[X]], [[FLOOR]] 2598; GFX6-IR-NEXT: [[MIN:%.*]] = tail call <2 x double> @llvm.minnum.v2f64(<2 x double> [[SUB]], <2 x double> splat (double 0x3FEFFFFFFFFFFFFF)) 2599; GFX6-IR-NEXT: [[UNO:%.*]] = fcmp uno <2 x double> [[X]], zeroinitializer 2600; GFX6-IR-NEXT: [[COND:%.*]] = select <2 x i1> [[UNO]], <2 x double> [[X]], <2 x double> [[MIN]] 2601; GFX6-IR-NEXT: [[FABS:%.*]] = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> [[X]]) 2602; GFX6-IR-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x double> [[FABS]], splat (double 0x7FF0000000000000) 2603; GFX6-IR-NEXT: [[COND6:%.*]] = select <2 x i1> [[CMPINF]], <2 x double> zeroinitializer, <2 x double> [[COND]] 2604; GFX6-IR-NEXT: store <2 x double> [[FLOOR]], ptr addrspace(1) [[IP]], align 4 2605; GFX6-IR-NEXT: ret <2 x double> [[COND6]] 2606; 2607; IR-FRACT-LABEL: define <2 x double> @safe_math_fract_v2f64 2608; IR-FRACT-SAME: (<2 x double> [[X:%.*]], ptr addrspace(1) writeonly captures(none) [[IP:%.*]]) #[[ATTR0]] { 2609; IR-FRACT-NEXT: entry: 2610; IR-FRACT-NEXT: [[FLOOR:%.*]] = tail call <2 x double> @llvm.floor.v2f64(<2 x double> [[X]]) 2611; IR-FRACT-NEXT: [[TMP0:%.*]] = extractelement <2 x double> [[X]], i64 0 2612; IR-FRACT-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[X]], i64 1 2613; IR-FRACT-NEXT: [[TMP2:%.*]] = call double @llvm.amdgcn.fract.f64(double [[TMP0]]) 2614; IR-FRACT-NEXT: [[TMP3:%.*]] = call double @llvm.amdgcn.fract.f64(double [[TMP1]]) 2615; IR-FRACT-NEXT: [[TMP4:%.*]] = insertelement <2 x double> poison, double [[TMP2]], i64 0 2616; IR-FRACT-NEXT: [[COND:%.*]] = insertelement <2 x double> [[TMP4]], double [[TMP3]], i64 1 2617; IR-FRACT-NEXT: [[FABS:%.*]] = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> [[X]]) 2618; IR-FRACT-NEXT: [[CMPINF:%.*]] = fcmp oeq <2 x double> [[FABS]], splat (double 0x7FF0000000000000) 2619; IR-FRACT-NEXT: [[COND6:%.*]] = select <2 x i1> [[CMPINF]], <2 x double> zeroinitializer, <2 x double> [[COND]] 2620; IR-FRACT-NEXT: store <2 x double> [[FLOOR]], ptr addrspace(1) [[IP]], align 4 2621; IR-FRACT-NEXT: ret <2 x double> [[COND6]] 2622; 2623; GFX6-LABEL: safe_math_fract_v2f64: 2624; GFX6: ; %bb.0: ; %entry 2625; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2626; GFX6-NEXT: v_fract_f64_e32 v[6:7], v[2:3] 2627; GFX6-NEXT: v_mov_b32_e32 v10, -1 2628; GFX6-NEXT: v_mov_b32_e32 v11, 0x3fefffff 2629; GFX6-NEXT: v_min_f64 v[6:7], v[6:7], v[10:11] 2630; GFX6-NEXT: v_cmp_class_f64_e64 vcc, v[2:3], 3 2631; GFX6-NEXT: v_fract_f64_e32 v[12:13], v[0:1] 2632; GFX6-NEXT: v_cndmask_b32_e32 v6, v6, v2, vcc 2633; GFX6-NEXT: v_cndmask_b32_e32 v7, v7, v3, vcc 2634; GFX6-NEXT: v_add_f64 v[8:9], v[2:3], -v[6:7] 2635; GFX6-NEXT: v_min_f64 v[6:7], v[12:13], v[10:11] 2636; GFX6-NEXT: v_cmp_class_f64_e64 vcc, v[0:1], 3 2637; GFX6-NEXT: v_add_f64 v[10:11], v[2:3], -v[8:9] 2638; GFX6-NEXT: v_cndmask_b32_e32 v6, v6, v0, vcc 2639; GFX6-NEXT: v_cndmask_b32_e32 v7, v7, v1, vcc 2640; GFX6-NEXT: v_add_f64 v[6:7], v[0:1], -v[6:7] 2641; GFX6-NEXT: s_mov_b32 s8, -1 2642; GFX6-NEXT: s_mov_b32 s9, 0x3fefffff 2643; GFX6-NEXT: v_add_f64 v[12:13], v[0:1], -v[6:7] 2644; GFX6-NEXT: v_min_f64 v[10:11], v[10:11], s[8:9] 2645; GFX6-NEXT: v_cmp_u_f64_e32 vcc, v[2:3], v[2:3] 2646; GFX6-NEXT: v_min_f64 v[12:13], v[12:13], s[8:9] 2647; GFX6-NEXT: v_cndmask_b32_e32 v11, v11, v3, vcc 2648; GFX6-NEXT: v_cndmask_b32_e32 v10, v10, v2, vcc 2649; GFX6-NEXT: v_cmp_u_f64_e32 vcc, v[0:1], v[0:1] 2650; GFX6-NEXT: v_mov_b32_e32 v14, 0x204 2651; GFX6-NEXT: v_cndmask_b32_e32 v13, v13, v1, vcc 2652; GFX6-NEXT: v_cndmask_b32_e32 v12, v12, v0, vcc 2653; GFX6-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v14 2654; GFX6-NEXT: s_mov_b32 s6, 0 2655; GFX6-NEXT: v_cndmask_b32_e64 v0, v12, 0, vcc 2656; GFX6-NEXT: v_cndmask_b32_e64 v1, v13, 0, vcc 2657; GFX6-NEXT: v_cmp_class_f64_e32 vcc, v[2:3], v14 2658; GFX6-NEXT: s_mov_b32 s7, 0xf000 2659; GFX6-NEXT: s_mov_b32 s4, s6 2660; GFX6-NEXT: s_mov_b32 s5, s6 2661; GFX6-NEXT: v_cndmask_b32_e64 v2, v10, 0, vcc 2662; GFX6-NEXT: v_cndmask_b32_e64 v3, v11, 0, vcc 2663; GFX6-NEXT: buffer_store_dwordx4 v[6:9], v[4:5], s[4:7], 0 addr64 2664; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) 2665; GFX6-NEXT: s_setpc_b64 s[30:31] 2666; 2667; GFX7-LABEL: safe_math_fract_v2f64: 2668; GFX7: ; %bb.0: ; %entry 2669; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2670; GFX7-NEXT: v_mov_b32_e32 v6, 0x204 2671; GFX7-NEXT: v_fract_f64_e32 v[10:11], v[0:1] 2672; GFX7-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v6 2673; GFX7-NEXT: v_fract_f64_e32 v[12:13], v[2:3] 2674; GFX7-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v6 2675; GFX7-NEXT: v_floor_f64_e32 v[8:9], v[2:3] 2676; GFX7-NEXT: v_floor_f64_e32 v[6:7], v[0:1] 2677; GFX7-NEXT: s_mov_b32 s10, 0 2678; GFX7-NEXT: s_mov_b32 s11, 0xf000 2679; GFX7-NEXT: s_mov_b32 s8, s10 2680; GFX7-NEXT: s_mov_b32 s9, s10 2681; GFX7-NEXT: v_cndmask_b32_e64 v0, v10, 0, vcc 2682; GFX7-NEXT: v_cndmask_b32_e64 v1, v11, 0, vcc 2683; GFX7-NEXT: v_cndmask_b32_e64 v2, v12, 0, s[4:5] 2684; GFX7-NEXT: v_cndmask_b32_e64 v3, v13, 0, s[4:5] 2685; GFX7-NEXT: buffer_store_dwordx4 v[6:9], v[4:5], s[8:11], 0 addr64 2686; GFX7-NEXT: s_waitcnt vmcnt(0) 2687; GFX7-NEXT: s_setpc_b64 s[30:31] 2688; 2689; GFX8-LABEL: safe_math_fract_v2f64: 2690; GFX8: ; %bb.0: ; %entry 2691; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2692; GFX8-NEXT: v_mov_b32_e32 v6, 0x204 2693; GFX8-NEXT: v_fract_f64_e32 v[10:11], v[0:1] 2694; GFX8-NEXT: v_cmp_class_f64_e32 vcc, v[0:1], v6 2695; GFX8-NEXT: v_fract_f64_e32 v[12:13], v[2:3] 2696; GFX8-NEXT: v_cmp_class_f64_e64 s[4:5], v[2:3], v6 2697; GFX8-NEXT: v_floor_f64_e32 v[8:9], v[2:3] 2698; GFX8-NEXT: v_floor_f64_e32 v[6:7], v[0:1] 2699; GFX8-NEXT: v_cndmask_b32_e64 v0, v10, 0, vcc 2700; GFX8-NEXT: v_cndmask_b32_e64 v1, v11, 0, vcc 2701; GFX8-NEXT: v_cndmask_b32_e64 v2, v12, 0, s[4:5] 2702; GFX8-NEXT: v_cndmask_b32_e64 v3, v13, 0, s[4:5] 2703; GFX8-NEXT: global_store_dwordx4 v[4:5], v[6:9], off 2704; GFX8-NEXT: s_waitcnt vmcnt(0) 2705; GFX8-NEXT: s_setpc_b64 s[30:31] 2706; 2707; GFX11-LABEL: safe_math_fract_v2f64: 2708; GFX11: ; %bb.0: ; %entry 2709; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2710; GFX11-NEXT: v_fract_f64_e32 v[10:11], v[0:1] 2711; GFX11-NEXT: v_cmp_class_f64_e64 s0, v[0:1], 0x204 2712; GFX11-NEXT: v_fract_f64_e32 v[12:13], v[2:3] 2713; GFX11-NEXT: v_cmp_class_f64_e64 s1, v[2:3], 0x204 2714; GFX11-NEXT: v_floor_f64_e32 v[8:9], v[2:3] 2715; GFX11-NEXT: v_floor_f64_e32 v[6:7], v[0:1] 2716; GFX11-NEXT: v_cndmask_b32_e64 v0, v10, 0, s0 2717; GFX11-NEXT: v_cndmask_b32_e64 v1, v11, 0, s0 2718; GFX11-NEXT: v_cndmask_b32_e64 v2, v12, 0, s1 2719; GFX11-NEXT: v_cndmask_b32_e64 v3, v13, 0, s1 2720; GFX11-NEXT: global_store_b128 v[4:5], v[6:9], off 2721; GFX11-NEXT: s_setpc_b64 s[30:31] 2722; 2723; GFX12-LABEL: safe_math_fract_v2f64: 2724; GFX12: ; %bb.0: ; %entry 2725; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 2726; GFX12-NEXT: s_wait_expcnt 0x0 2727; GFX12-NEXT: s_wait_samplecnt 0x0 2728; GFX12-NEXT: s_wait_bvhcnt 0x0 2729; GFX12-NEXT: s_wait_kmcnt 0x0 2730; GFX12-NEXT: v_fract_f64_e32 v[10:11], v[0:1] 2731; GFX12-NEXT: v_cmp_class_f64_e64 s0, v[0:1], 0x204 2732; GFX12-NEXT: v_fract_f64_e32 v[12:13], v[2:3] 2733; GFX12-NEXT: v_cmp_class_f64_e64 s1, v[2:3], 0x204 2734; GFX12-NEXT: v_floor_f64_e32 v[8:9], v[2:3] 2735; GFX12-NEXT: v_floor_f64_e32 v[6:7], v[0:1] 2736; GFX12-NEXT: v_cndmask_b32_e64 v0, v10, 0, s0 2737; GFX12-NEXT: v_cndmask_b32_e64 v1, v11, 0, s0 2738; GFX12-NEXT: v_cndmask_b32_e64 v2, v12, 0, s1 2739; GFX12-NEXT: v_cndmask_b32_e64 v3, v13, 0, s1 2740; GFX12-NEXT: global_store_b128 v[4:5], v[6:9], off 2741; GFX12-NEXT: s_setpc_b64 s[30:31] 2742entry: 2743 %floor = tail call <2 x double> @llvm.floor.v2f64(<2 x double> %x) 2744 %sub = fsub <2 x double> %x, %floor 2745 %min = tail call <2 x double> @llvm.minnum.v2f64(<2 x double> %sub, <2 x double> <double 0x3FEFFFFFFFFFFFFF, double 0x3FEFFFFFFFFFFFFF>) 2746 %uno = fcmp uno <2 x double> %x, zeroinitializer 2747 %cond = select <2 x i1> %uno, <2 x double> %x, <2 x double> %min 2748 %fabs = tail call <2 x double> @llvm.fabs.v2f64(<2 x double> %x) 2749 %cmpinf = fcmp oeq <2 x double> %fabs, <double 0x7FF0000000000000, double 0x7FF0000000000000> 2750 %cond6 = select <2 x i1> %cmpinf, <2 x double> zeroinitializer, <2 x double> %cond 2751 store <2 x double> %floor, ptr addrspace(1) %ip, align 4 2752 ret <2 x double> %cond6 2753} 2754 2755declare half @llvm.floor.f16(half) #0 2756declare float @llvm.floor.f32(float) #0 2757declare double @llvm.floor.f64(double) #0 2758declare <2 x double> @llvm.floor.v2f64(<2 x double>) #0 2759declare <2 x float> @llvm.floor.v2f32(<2 x float>) #0 2760declare <2 x half> @llvm.floor.v2f16(<2 x half>) #0 2761declare float @llvm.trunc.f32(float) #0 2762declare float @llvm.minnum.f32(float, float) #0 2763declare half @llvm.minnum.f16(half, half) #0 2764declare double @llvm.minnum.f64(double, double) #0 2765declare <2 x float> @llvm.minnum.v2f32(<2 x float>, <2 x float>) #0 2766declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) #0 2767declare <2 x double> @llvm.minnum.v2f64(<2 x double>, <2 x double>) #0 2768declare float @llvm.maxnum.f32(float, float) #0 2769declare float @llvm.fabs.f32(float) #0 2770declare double @llvm.fabs.f64(double) #0 2771declare <2 x float> @llvm.fabs.v2f32(<2 x float>) #0 2772declare half @llvm.fabs.f16(half) #0 2773declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #0 2774declare <2 x double> @llvm.fabs.v2f64(<2 x double>) #0 2775 2776attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } 2777