1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn -mcpu=hawaii -mattr=+flat-for-global < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-SAFE,SI,SI-SAFE %s 3; RUN: llc -enable-no-signed-zeros-fp-math -mtriple=amdgcn -mcpu=hawaii -mattr=+flat-for-global < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-NSZ,SI,SI-NSZ %s 4 5; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-SAFE,VI,VI-SAFE %s 6; RUN: llc -enable-no-signed-zeros-fp-math -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -enable-var-scope --check-prefixes=GCN,GCN-NSZ,VI,VI-NSZ %s 7 8; -------------------------------------------------------------------------------- 9; fadd tests 10; -------------------------------------------------------------------------------- 11 12define float @v_fneg_add_f32(float %a, float %b) #0 { 13; GCN-SAFE-LABEL: v_fneg_add_f32: 14; GCN-SAFE: ; %bb.0: 15; GCN-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 16; GCN-SAFE-NEXT: v_add_f32_e32 v0, v0, v1 17; GCN-SAFE-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 18; GCN-SAFE-NEXT: s_setpc_b64 s[30:31] 19; 20; GCN-NSZ-LABEL: v_fneg_add_f32: 21; GCN-NSZ: ; %bb.0: 22; GCN-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23; GCN-NSZ-NEXT: v_sub_f32_e64 v0, -v0, v1 24; GCN-NSZ-NEXT: s_setpc_b64 s[30:31] 25 %add = fadd float %a, %b 26 %fneg = fneg float %add 27 ret float %fneg 28} 29 30define { float, float } @v_fneg_add_store_use_add_f32(float %a, float %b) #0 { 31; GCN-LABEL: v_fneg_add_store_use_add_f32: 32; GCN: ; %bb.0: 33; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 34; GCN-NEXT: v_add_f32_e32 v1, v0, v1 35; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v1 36; GCN-NEXT: s_setpc_b64 s[30:31] 37 %add = fadd float %a, %b 38 %fneg = fneg float %add 39 %insert.0 = insertvalue { float, float } poison, float %fneg, 0 40 %insert.1 = insertvalue { float, float } %insert.0, float %add, 1 41 ret { float, float } %insert.1 42} 43 44define { float, float } @v_fneg_add_multi_use_add_f32(float %a, float %b) #0 { 45; GCN-SAFE-LABEL: v_fneg_add_multi_use_add_f32: 46; GCN-SAFE: ; %bb.0: 47; GCN-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 48; GCN-SAFE-NEXT: v_add_f32_e32 v1, v0, v1 49; GCN-SAFE-NEXT: v_xor_b32_e32 v0, 0x80000000, v1 50; GCN-SAFE-NEXT: v_mul_f32_e32 v1, 4.0, v1 51; GCN-SAFE-NEXT: s_setpc_b64 s[30:31] 52; 53; GCN-NSZ-LABEL: v_fneg_add_multi_use_add_f32: 54; GCN-NSZ: ; %bb.0: 55; GCN-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 56; GCN-NSZ-NEXT: v_sub_f32_e64 v0, -v0, v1 57; GCN-NSZ-NEXT: v_mul_f32_e32 v1, -4.0, v0 58; GCN-NSZ-NEXT: s_setpc_b64 s[30:31] 59 %add = fadd float %a, %b 60 %fneg = fneg float %add 61 %use1 = fmul float %add, 4.0 62 63 %insert.0 = insertvalue { float, float } poison, float %fneg, 0 64 %insert.1 = insertvalue { float, float } %insert.0, float %use1, 1 65 ret { float, float } %insert.1 66} 67 68define float @v_fneg_add_fneg_x_f32(float %a, float %b) #0 { 69; GCN-SAFE-LABEL: v_fneg_add_fneg_x_f32: 70; GCN-SAFE: ; %bb.0: 71; GCN-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 72; GCN-SAFE-NEXT: v_sub_f32_e32 v0, v1, v0 73; GCN-SAFE-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 74; GCN-SAFE-NEXT: s_setpc_b64 s[30:31] 75; 76; GCN-NSZ-LABEL: v_fneg_add_fneg_x_f32: 77; GCN-NSZ: ; %bb.0: 78; GCN-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 79; GCN-NSZ-NEXT: v_sub_f32_e32 v0, v0, v1 80; GCN-NSZ-NEXT: s_setpc_b64 s[30:31] 81 %fneg.a = fneg float %a 82 %add = fadd float %fneg.a, %b 83 %fneg = fneg float %add 84 ret float %fneg 85} 86 87define float @v_fneg_add_x_fneg_f32(float %a, float %b) #0 { 88; GCN-SAFE-LABEL: v_fneg_add_x_fneg_f32: 89; GCN-SAFE: ; %bb.0: 90; GCN-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 91; GCN-SAFE-NEXT: v_sub_f32_e32 v0, v0, v1 92; GCN-SAFE-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 93; GCN-SAFE-NEXT: s_setpc_b64 s[30:31] 94; 95; GCN-NSZ-LABEL: v_fneg_add_x_fneg_f32: 96; GCN-NSZ: ; %bb.0: 97; GCN-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 98; GCN-NSZ-NEXT: v_sub_f32_e32 v0, v1, v0 99; GCN-NSZ-NEXT: s_setpc_b64 s[30:31] 100 %fneg.b = fneg float %b 101 %add = fadd float %a, %fneg.b 102 %fneg = fneg float %add 103 ret float %fneg 104} 105 106define float @v_fneg_add_fneg_fneg_f32(float %a, float %b) #0 { 107; GCN-SAFE-LABEL: v_fneg_add_fneg_fneg_f32: 108; GCN-SAFE: ; %bb.0: 109; GCN-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 110; GCN-SAFE-NEXT: v_sub_f32_e64 v0, -v0, v1 111; GCN-SAFE-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 112; GCN-SAFE-NEXT: s_setpc_b64 s[30:31] 113; 114; GCN-NSZ-LABEL: v_fneg_add_fneg_fneg_f32: 115; GCN-NSZ: ; %bb.0: 116; GCN-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 117; GCN-NSZ-NEXT: v_add_f32_e32 v0, v0, v1 118; GCN-NSZ-NEXT: s_setpc_b64 s[30:31] 119 %fneg.a = fneg float %a 120 %fneg.b = fneg float %b 121 %add = fadd float %fneg.a, %fneg.b 122 %fneg = fneg float %add 123 ret float %fneg 124} 125 126define { float, float } @v_fneg_add_store_use_fneg_x_f32(float %a, float %b) #0 { 127; GCN-SAFE-LABEL: v_fneg_add_store_use_fneg_x_f32: 128; GCN-SAFE: ; %bb.0: 129; GCN-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 130; GCN-SAFE-NEXT: v_xor_b32_e32 v2, 0x80000000, v0 131; GCN-SAFE-NEXT: v_sub_f32_e32 v0, v1, v0 132; GCN-SAFE-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 133; GCN-SAFE-NEXT: v_mov_b32_e32 v1, v2 134; GCN-SAFE-NEXT: s_setpc_b64 s[30:31] 135; 136; GCN-NSZ-LABEL: v_fneg_add_store_use_fneg_x_f32: 137; GCN-NSZ: ; %bb.0: 138; GCN-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 139; GCN-NSZ-NEXT: v_xor_b32_e32 v2, 0x80000000, v0 140; GCN-NSZ-NEXT: v_sub_f32_e32 v0, v0, v1 141; GCN-NSZ-NEXT: v_mov_b32_e32 v1, v2 142; GCN-NSZ-NEXT: s_setpc_b64 s[30:31] 143 %fneg.a = fneg float %a 144 %add = fadd float %fneg.a, %b 145 %fneg = fneg float %add 146 %insert.0 = insertvalue { float, float } poison, float %fneg, 0 147 %insert.1 = insertvalue { float, float } %insert.0, float %fneg.a, 1 148 ret { float, float } %insert.1 149} 150 151define { float, float } @v_fneg_add_multi_use_fneg_x_f32(float %a, float %b, float %c) #0 { 152; GCN-SAFE-LABEL: v_fneg_add_multi_use_fneg_x_f32: 153; GCN-SAFE: ; %bb.0: 154; GCN-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 155; GCN-SAFE-NEXT: v_sub_f32_e32 v1, v1, v0 156; GCN-SAFE-NEXT: v_xor_b32_e32 v3, 0x80000000, v1 157; GCN-SAFE-NEXT: v_mul_f32_e64 v1, -v0, v2 158; GCN-SAFE-NEXT: v_mov_b32_e32 v0, v3 159; GCN-SAFE-NEXT: s_setpc_b64 s[30:31] 160; 161; GCN-NSZ-LABEL: v_fneg_add_multi_use_fneg_x_f32: 162; GCN-NSZ: ; %bb.0: 163; GCN-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 164; GCN-NSZ-NEXT: v_sub_f32_e32 v3, v0, v1 165; GCN-NSZ-NEXT: v_mul_f32_e64 v1, -v0, v2 166; GCN-NSZ-NEXT: v_mov_b32_e32 v0, v3 167; GCN-NSZ-NEXT: s_setpc_b64 s[30:31] 168 %fneg.a = fneg float %a 169 %add = fadd float %fneg.a, %b 170 %fneg = fneg float %add 171 %use1 = fmul float %fneg.a, %c 172 173 %insert.0 = insertvalue { float, float } poison, float %fneg, 0 174 %insert.1 = insertvalue { float, float } %insert.0, float %use1, 1 175 ret { float, float } %insert.1 176} 177 178; This one asserted with -enable-no-signed-zeros-fp-math 179define amdgpu_ps float @fneg_fadd_0_f32(float inreg %tmp2, float inreg %tmp6, <4 x i32> %arg) #0 { 180; SI-SAFE-LABEL: fneg_fadd_0_f32: 181; SI-SAFE: ; %bb.0: ; %.entry 182; SI-SAFE-NEXT: v_div_scale_f32 v0, s[2:3], s1, s1, 1.0 183; SI-SAFE-NEXT: v_rcp_f32_e32 v1, v0 184; SI-SAFE-NEXT: v_div_scale_f32 v2, vcc, 1.0, s1, 1.0 185; SI-SAFE-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 186; SI-SAFE-NEXT: v_fma_f32 v3, -v0, v1, 1.0 187; SI-SAFE-NEXT: v_fma_f32 v1, v3, v1, v1 188; SI-SAFE-NEXT: v_mul_f32_e32 v3, v2, v1 189; SI-SAFE-NEXT: v_fma_f32 v4, -v0, v3, v2 190; SI-SAFE-NEXT: v_fma_f32 v3, v4, v1, v3 191; SI-SAFE-NEXT: v_fma_f32 v0, -v0, v3, v2 192; SI-SAFE-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 193; SI-SAFE-NEXT: v_div_fmas_f32 v0, v0, v1, v3 194; SI-SAFE-NEXT: v_div_fixup_f32 v0, v0, s1, 1.0 195; SI-SAFE-NEXT: v_mad_f32 v0, v0, 0, 0 196; SI-SAFE-NEXT: v_mov_b32_e32 v1, s0 197; SI-SAFE-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v0 198; SI-SAFE-NEXT: v_cndmask_b32_e64 v0, -v0, v1, vcc 199; SI-SAFE-NEXT: v_mov_b32_e32 v1, 0x7fc00000 200; SI-SAFE-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0 201; SI-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc 202; SI-SAFE-NEXT: ; return to shader part epilog 203; 204; SI-NSZ-LABEL: fneg_fadd_0_f32: 205; SI-NSZ: ; %bb.0: ; %.entry 206; SI-NSZ-NEXT: v_div_scale_f32 v0, s[2:3], s1, s1, 1.0 207; SI-NSZ-NEXT: v_rcp_f32_e32 v1, v0 208; SI-NSZ-NEXT: v_div_scale_f32 v2, vcc, 1.0, s1, 1.0 209; SI-NSZ-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 210; SI-NSZ-NEXT: v_fma_f32 v3, -v0, v1, 1.0 211; SI-NSZ-NEXT: v_fma_f32 v1, v3, v1, v1 212; SI-NSZ-NEXT: v_mul_f32_e32 v3, v2, v1 213; SI-NSZ-NEXT: v_fma_f32 v4, -v0, v3, v2 214; SI-NSZ-NEXT: v_fma_f32 v3, v4, v1, v3 215; SI-NSZ-NEXT: v_fma_f32 v0, -v0, v3, v2 216; SI-NSZ-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 217; SI-NSZ-NEXT: v_div_fmas_f32 v0, v0, v1, v3 218; SI-NSZ-NEXT: v_div_fixup_f32 v0, v0, s1, 1.0 219; SI-NSZ-NEXT: v_mul_f32_e32 v0, 0, v0 220; SI-NSZ-NEXT: v_mov_b32_e32 v1, s0 221; SI-NSZ-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v0 222; SI-NSZ-NEXT: v_cndmask_b32_e64 v0, -v0, v1, vcc 223; SI-NSZ-NEXT: v_mov_b32_e32 v1, 0x7fc00000 224; SI-NSZ-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0 225; SI-NSZ-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc 226; SI-NSZ-NEXT: ; return to shader part epilog 227; 228; VI-SAFE-LABEL: fneg_fadd_0_f32: 229; VI-SAFE: ; %bb.0: ; %.entry 230; VI-SAFE-NEXT: v_div_scale_f32 v0, s[2:3], s1, s1, 1.0 231; VI-SAFE-NEXT: v_div_scale_f32 v1, vcc, 1.0, s1, 1.0 232; VI-SAFE-NEXT: v_rcp_f32_e32 v2, v0 233; VI-SAFE-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 234; VI-SAFE-NEXT: v_fma_f32 v3, -v0, v2, 1.0 235; VI-SAFE-NEXT: v_fma_f32 v2, v3, v2, v2 236; VI-SAFE-NEXT: v_mul_f32_e32 v3, v1, v2 237; VI-SAFE-NEXT: v_fma_f32 v4, -v0, v3, v1 238; VI-SAFE-NEXT: v_fma_f32 v3, v4, v2, v3 239; VI-SAFE-NEXT: v_fma_f32 v0, -v0, v3, v1 240; VI-SAFE-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 241; VI-SAFE-NEXT: v_div_fmas_f32 v0, v0, v2, v3 242; VI-SAFE-NEXT: v_mov_b32_e32 v2, s0 243; VI-SAFE-NEXT: v_mov_b32_e32 v1, 0x7fc00000 244; VI-SAFE-NEXT: v_div_fixup_f32 v0, v0, s1, 1.0 245; VI-SAFE-NEXT: v_mad_f32 v0, v0, 0, 0 246; VI-SAFE-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v0 247; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, -v0, v2, vcc 248; VI-SAFE-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0 249; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc 250; VI-SAFE-NEXT: ; return to shader part epilog 251; 252; VI-NSZ-LABEL: fneg_fadd_0_f32: 253; VI-NSZ: ; %bb.0: ; %.entry 254; VI-NSZ-NEXT: v_div_scale_f32 v0, s[2:3], s1, s1, 1.0 255; VI-NSZ-NEXT: v_div_scale_f32 v1, vcc, 1.0, s1, 1.0 256; VI-NSZ-NEXT: v_rcp_f32_e32 v2, v0 257; VI-NSZ-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3 258; VI-NSZ-NEXT: v_fma_f32 v3, -v0, v2, 1.0 259; VI-NSZ-NEXT: v_fma_f32 v2, v3, v2, v2 260; VI-NSZ-NEXT: v_mul_f32_e32 v3, v1, v2 261; VI-NSZ-NEXT: v_fma_f32 v4, -v0, v3, v1 262; VI-NSZ-NEXT: v_fma_f32 v3, v4, v2, v3 263; VI-NSZ-NEXT: v_fma_f32 v0, -v0, v3, v1 264; VI-NSZ-NEXT: s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0 265; VI-NSZ-NEXT: v_div_fmas_f32 v0, v0, v2, v3 266; VI-NSZ-NEXT: v_mov_b32_e32 v2, s0 267; VI-NSZ-NEXT: v_mov_b32_e32 v1, 0x7fc00000 268; VI-NSZ-NEXT: v_div_fixup_f32 v0, v0, s1, 1.0 269; VI-NSZ-NEXT: v_mul_f32_e32 v0, 0, v0 270; VI-NSZ-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v0 271; VI-NSZ-NEXT: v_cndmask_b32_e64 v0, -v0, v2, vcc 272; VI-NSZ-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0 273; VI-NSZ-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc 274; VI-NSZ-NEXT: ; return to shader part epilog 275.entry: 276 %tmp7 = fdiv float 1.000000e+00, %tmp6 277 %tmp8 = fmul float 0.000000e+00, %tmp7 278 %tmp9 = fmul reassoc nnan arcp contract float 0.000000e+00, %tmp8 279 %.i188 = fadd float %tmp9, 0.000000e+00 280 %tmp10 = fcmp uge float %.i188, %tmp2 281 %tmp11 = fneg float %.i188 282 %.i092 = select i1 %tmp10, float %tmp2, float %tmp11 283 %tmp12 = fcmp ule float %.i092, 0.000000e+00 284 %.i198 = select i1 %tmp12, float 0.000000e+00, float 0x7FF8000000000000 285 ret float %.i198 286} 287 288; This is a workaround because -enable-no-signed-zeros-fp-math does not set up 289; function attribute unsafe-fp-math automatically. Combine with the previous test 290; when that is done. 291define amdgpu_ps float @fneg_fadd_0_nsz_f32(float inreg %tmp2, float inreg %tmp6, <4 x i32> %arg) #2 { 292; SI-SAFE-LABEL: fneg_fadd_0_nsz_f32: 293; SI-SAFE: ; %bb.0: ; %.entry 294; SI-SAFE-NEXT: v_min_legacy_f32_e64 v0, 0, s0 295; SI-SAFE-NEXT: s_brev_b32 s0, 1 296; SI-SAFE-NEXT: v_mov_b32_e32 v1, 0x7fc00000 297; SI-SAFE-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v0 298; SI-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc 299; SI-SAFE-NEXT: ; return to shader part epilog 300; 301; GCN-NSZ-LABEL: fneg_fadd_0_nsz_f32: 302; GCN-NSZ: ; %bb.0: ; %.entry 303; GCN-NSZ-NEXT: v_rcp_f32_e32 v0, s1 304; GCN-NSZ-NEXT: v_mov_b32_e32 v1, s0 305; GCN-NSZ-NEXT: v_mul_f32_e32 v0, 0, v0 306; GCN-NSZ-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v0 307; GCN-NSZ-NEXT: v_cndmask_b32_e64 v0, -v0, v1, vcc 308; GCN-NSZ-NEXT: v_mov_b32_e32 v1, 0x7fc00000 309; GCN-NSZ-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0 310; GCN-NSZ-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc 311; GCN-NSZ-NEXT: ; return to shader part epilog 312; 313; VI-SAFE-LABEL: fneg_fadd_0_nsz_f32: 314; VI-SAFE: ; %bb.0: ; %.entry 315; VI-SAFE-NEXT: v_rcp_f32_e32 v0, s1 316; VI-SAFE-NEXT: v_mov_b32_e32 v1, s0 317; VI-SAFE-NEXT: v_mul_f32_e32 v0, 0, v0 318; VI-SAFE-NEXT: v_add_f32_e32 v0, 0, v0 319; VI-SAFE-NEXT: v_cmp_ngt_f32_e32 vcc, s0, v0 320; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, -v0, v1, vcc 321; VI-SAFE-NEXT: v_mov_b32_e32 v1, 0x7fc00000 322; VI-SAFE-NEXT: v_cmp_nlt_f32_e32 vcc, 0, v0 323; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, v1, 0, vcc 324; VI-SAFE-NEXT: ; return to shader part epilog 325.entry: 326 %tmp7 = fdiv afn float 1.000000e+00, %tmp6 327 %tmp8 = fmul float 0.000000e+00, %tmp7 328 %tmp9 = fmul reassoc nnan arcp contract float 0.000000e+00, %tmp8 329 %.i188 = fadd float %tmp9, 0.000000e+00 330 %tmp10 = fcmp uge float %.i188, %tmp2 331 %tmp11 = fneg float %.i188 332 %.i092 = select i1 %tmp10, float %tmp2, float %tmp11 333 %tmp12 = fcmp ule float %.i092, 0.000000e+00 334 %.i198 = select i1 %tmp12, float 0.000000e+00, float 0x7FF8000000000000 335 ret float %.i198 336} 337 338define double @v_fneg_add_f64(double %a, double %b) #0 { 339; GCN-SAFE-LABEL: v_fneg_add_f64: 340; GCN-SAFE: ; %bb.0: 341; GCN-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 342; GCN-SAFE-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] 343; GCN-SAFE-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 344; GCN-SAFE-NEXT: s_setpc_b64 s[30:31] 345; 346; GCN-NSZ-LABEL: v_fneg_add_f64: 347; GCN-NSZ: ; %bb.0: 348; GCN-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 349; GCN-NSZ-NEXT: v_add_f64 v[0:1], -v[0:1], -v[2:3] 350; GCN-NSZ-NEXT: s_setpc_b64 s[30:31] 351 %add = fadd double %a, %b 352 %fneg = fneg double %add 353 ret double %fneg 354} 355 356define { double, double } @v_fneg_add_store_use_add_f64(double %a, double %b) #0 { 357; GCN-LABEL: v_fneg_add_store_use_add_f64: 358; GCN: ; %bb.0: 359; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 360; GCN-NEXT: v_add_f64 v[2:3], v[0:1], v[2:3] 361; GCN-NEXT: v_xor_b32_e32 v1, 0x80000000, v3 362; GCN-NEXT: v_mov_b32_e32 v0, v2 363; GCN-NEXT: s_setpc_b64 s[30:31] 364 %add = fadd double %a, %b 365 %fneg = fneg double %add 366 %insert.0 = insertvalue { double, double } poison, double %fneg, 0 367 %insert.1 = insertvalue { double, double } %insert.0, double %add, 1 368 ret { double, double } %insert.1 369} 370 371define { double, double } @v_fneg_add_multi_use_add_f64(double %a, double %b) #0 { 372; SI-SAFE-LABEL: v_fneg_add_multi_use_add_f64: 373; SI-SAFE: ; %bb.0: 374; SI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 375; SI-SAFE-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] 376; SI-SAFE-NEXT: v_xor_b32_e32 v4, 0x80000000, v1 377; SI-SAFE-NEXT: v_mul_f64 v[2:3], v[0:1], 4.0 378; SI-SAFE-NEXT: v_mov_b32_e32 v1, v4 379; SI-SAFE-NEXT: s_setpc_b64 s[30:31] 380; 381; GCN-NSZ-LABEL: v_fneg_add_multi_use_add_f64: 382; GCN-NSZ: ; %bb.0: 383; GCN-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 384; GCN-NSZ-NEXT: v_add_f64 v[0:1], -v[0:1], -v[2:3] 385; GCN-NSZ-NEXT: v_mul_f64 v[2:3], v[0:1], -4.0 386; GCN-NSZ-NEXT: s_setpc_b64 s[30:31] 387; 388; VI-SAFE-LABEL: v_fneg_add_multi_use_add_f64: 389; VI-SAFE: ; %bb.0: 390; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 391; VI-SAFE-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] 392; VI-SAFE-NEXT: v_mul_f64 v[2:3], v[0:1], 4.0 393; VI-SAFE-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 394; VI-SAFE-NEXT: s_setpc_b64 s[30:31] 395 %add = fadd double %a, %b 396 %fneg = fneg double %add 397 %use1 = fmul double %add, 4.0 398 399 %insert.0 = insertvalue { double, double } poison, double %fneg, 0 400 %insert.1 = insertvalue { double, double } %insert.0, double %use1, 1 401 ret { double, double } %insert.1 402} 403 404define double @v_fneg_add_fneg_x_f64(double %a, double %b) #0 { 405; GCN-SAFE-LABEL: v_fneg_add_fneg_x_f64: 406; GCN-SAFE: ; %bb.0: 407; GCN-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 408; GCN-SAFE-NEXT: v_add_f64 v[0:1], v[2:3], -v[0:1] 409; GCN-SAFE-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 410; GCN-SAFE-NEXT: s_setpc_b64 s[30:31] 411; 412; GCN-NSZ-LABEL: v_fneg_add_fneg_x_f64: 413; GCN-NSZ: ; %bb.0: 414; GCN-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 415; GCN-NSZ-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3] 416; GCN-NSZ-NEXT: s_setpc_b64 s[30:31] 417 %fneg.a = fneg double %a 418 %add = fadd double %fneg.a, %b 419 %fneg = fneg double %add 420 ret double %fneg 421} 422 423define double @v_fneg_add_x_fneg_f64(double %a, double %b) #0 { 424; GCN-SAFE-LABEL: v_fneg_add_x_fneg_f64: 425; GCN-SAFE: ; %bb.0: 426; GCN-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 427; GCN-SAFE-NEXT: v_add_f64 v[0:1], v[0:1], -v[2:3] 428; GCN-SAFE-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 429; GCN-SAFE-NEXT: s_setpc_b64 s[30:31] 430; 431; GCN-NSZ-LABEL: v_fneg_add_x_fneg_f64: 432; GCN-NSZ: ; %bb.0: 433; GCN-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 434; GCN-NSZ-NEXT: v_add_f64 v[0:1], v[2:3], -v[0:1] 435; GCN-NSZ-NEXT: s_setpc_b64 s[30:31] 436 %fneg.b = fneg double %b 437 %add = fadd double %a, %fneg.b 438 %fneg = fneg double %add 439 ret double %fneg 440} 441 442define double @v_fneg_add_fneg_fneg_f64(double %a, double %b) #0 { 443; GCN-SAFE-LABEL: v_fneg_add_fneg_fneg_f64: 444; GCN-SAFE: ; %bb.0: 445; GCN-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 446; GCN-SAFE-NEXT: v_add_f64 v[0:1], -v[0:1], -v[2:3] 447; GCN-SAFE-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 448; GCN-SAFE-NEXT: s_setpc_b64 s[30:31] 449; 450; GCN-NSZ-LABEL: v_fneg_add_fneg_fneg_f64: 451; GCN-NSZ: ; %bb.0: 452; GCN-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 453; GCN-NSZ-NEXT: v_add_f64 v[0:1], v[0:1], v[2:3] 454; GCN-NSZ-NEXT: s_setpc_b64 s[30:31] 455 %fneg.a = fneg double %a 456 %fneg.b = fneg double %b 457 %add = fadd double %fneg.a, %fneg.b 458 %fneg = fneg double %add 459 ret double %fneg 460} 461 462define { double, double } @v_fneg_add_store_use_fneg_x_f64(double %a, double %b) #0 { 463; SI-SAFE-LABEL: v_fneg_add_store_use_fneg_x_f64: 464; SI-SAFE: ; %bb.0: 465; SI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 466; SI-SAFE-NEXT: v_mov_b32_e32 v5, v1 467; SI-SAFE-NEXT: v_mov_b32_e32 v4, v0 468; SI-SAFE-NEXT: v_add_f64 v[0:1], v[2:3], -v[4:5] 469; SI-SAFE-NEXT: v_xor_b32_e32 v3, 0x80000000, v5 470; SI-SAFE-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 471; SI-SAFE-NEXT: v_mov_b32_e32 v2, v4 472; SI-SAFE-NEXT: s_setpc_b64 s[30:31] 473; 474; SI-NSZ-LABEL: v_fneg_add_store_use_fneg_x_f64: 475; SI-NSZ: ; %bb.0: 476; SI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 477; SI-NSZ-NEXT: v_mov_b32_e32 v5, v1 478; SI-NSZ-NEXT: v_mov_b32_e32 v4, v0 479; SI-NSZ-NEXT: v_add_f64 v[0:1], v[4:5], -v[2:3] 480; SI-NSZ-NEXT: v_xor_b32_e32 v3, 0x80000000, v5 481; SI-NSZ-NEXT: v_mov_b32_e32 v2, v4 482; SI-NSZ-NEXT: s_setpc_b64 s[30:31] 483; 484; VI-SAFE-LABEL: v_fneg_add_store_use_fneg_x_f64: 485; VI-SAFE: ; %bb.0: 486; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 487; VI-SAFE-NEXT: v_add_f64 v[4:5], v[2:3], -v[0:1] 488; VI-SAFE-NEXT: v_xor_b32_e32 v3, 0x80000000, v1 489; VI-SAFE-NEXT: v_mov_b32_e32 v2, v0 490; VI-SAFE-NEXT: v_xor_b32_e32 v1, 0x80000000, v5 491; VI-SAFE-NEXT: v_mov_b32_e32 v0, v4 492; VI-SAFE-NEXT: s_setpc_b64 s[30:31] 493; 494; VI-NSZ-LABEL: v_fneg_add_store_use_fneg_x_f64: 495; VI-NSZ: ; %bb.0: 496; VI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 497; VI-NSZ-NEXT: v_add_f64 v[4:5], v[0:1], -v[2:3] 498; VI-NSZ-NEXT: v_xor_b32_e32 v3, 0x80000000, v1 499; VI-NSZ-NEXT: v_mov_b32_e32 v2, v0 500; VI-NSZ-NEXT: v_mov_b32_e32 v0, v4 501; VI-NSZ-NEXT: v_mov_b32_e32 v1, v5 502; VI-NSZ-NEXT: s_setpc_b64 s[30:31] 503 %fneg.a = fneg double %a 504 %add = fadd double %fneg.a, %b 505 %fneg = fneg double %add 506 %insert.0 = insertvalue { double, double } poison, double %fneg, 0 507 %insert.1 = insertvalue { double, double } %insert.0, double %fneg.a, 1 508 ret { double, double } %insert.1 509} 510 511define { double, double } @v_fneg_add_multi_use_fneg_x_f64(double %a, double %b, double %c) #0 { 512; SI-SAFE-LABEL: v_fneg_add_multi_use_fneg_x_f64: 513; SI-SAFE: ; %bb.0: 514; SI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 515; SI-SAFE-NEXT: v_add_f64 v[6:7], v[2:3], -v[0:1] 516; SI-SAFE-NEXT: v_mul_f64 v[2:3], -v[0:1], v[4:5] 517; SI-SAFE-NEXT: v_xor_b32_e32 v7, 0x80000000, v7 518; SI-SAFE-NEXT: v_mov_b32_e32 v0, v6 519; SI-SAFE-NEXT: v_mov_b32_e32 v1, v7 520; SI-SAFE-NEXT: s_setpc_b64 s[30:31] 521; 522; GCN-NSZ-LABEL: v_fneg_add_multi_use_fneg_x_f64: 523; GCN-NSZ: ; %bb.0: 524; GCN-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 525; GCN-NSZ-NEXT: v_add_f64 v[6:7], v[0:1], -v[2:3] 526; GCN-NSZ-NEXT: v_mul_f64 v[2:3], -v[0:1], v[4:5] 527; GCN-NSZ-NEXT: v_mov_b32_e32 v0, v6 528; GCN-NSZ-NEXT: v_mov_b32_e32 v1, v7 529; GCN-NSZ-NEXT: s_setpc_b64 s[30:31] 530; 531; VI-SAFE-LABEL: v_fneg_add_multi_use_fneg_x_f64: 532; VI-SAFE: ; %bb.0: 533; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 534; VI-SAFE-NEXT: v_add_f64 v[6:7], v[2:3], -v[0:1] 535; VI-SAFE-NEXT: v_mul_f64 v[2:3], -v[0:1], v[4:5] 536; VI-SAFE-NEXT: v_xor_b32_e32 v1, 0x80000000, v7 537; VI-SAFE-NEXT: v_mov_b32_e32 v0, v6 538; VI-SAFE-NEXT: s_setpc_b64 s[30:31] 539 %fneg.a = fneg double %a 540 %add = fadd double %fneg.a, %b 541 %fneg = fneg double %add 542 %use1 = fmul double %fneg.a, %c 543 544 %insert.0 = insertvalue { double, double } poison, double %fneg, 0 545 %insert.1 = insertvalue { double, double } %insert.0, double %use1, 1 546 ret { double, double } %insert.1 547} 548 549; This one asserted with -enable-no-signed-zeros-fp-math 550define amdgpu_ps double @fneg_fadd_0_f64(double inreg %tmp2, double inreg %tmp6, <4 x i32> %arg) #0 { 551; SI-SAFE-LABEL: fneg_fadd_0_f64: 552; SI-SAFE: ; %bb.0: ; %.entry 553; SI-SAFE-NEXT: v_div_scale_f64 v[0:1], s[4:5], s[2:3], s[2:3], 1.0 554; SI-SAFE-NEXT: v_rcp_f64_e32 v[2:3], v[0:1] 555; SI-SAFE-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0 556; SI-SAFE-NEXT: v_fma_f64 v[2:3], v[2:3], v[4:5], v[2:3] 557; SI-SAFE-NEXT: v_div_scale_f64 v[4:5], vcc, 1.0, s[2:3], 1.0 558; SI-SAFE-NEXT: v_fma_f64 v[6:7], -v[0:1], v[2:3], 1.0 559; SI-SAFE-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3] 560; SI-SAFE-NEXT: v_mul_f64 v[6:7], v[4:5], v[2:3] 561; SI-SAFE-NEXT: v_fma_f64 v[0:1], -v[0:1], v[6:7], v[4:5] 562; SI-SAFE-NEXT: v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[6:7] 563; SI-SAFE-NEXT: v_mov_b32_e32 v2, s1 564; SI-SAFE-NEXT: v_mov_b32_e32 v3, s0 565; SI-SAFE-NEXT: v_div_fixup_f64 v[0:1], v[0:1], s[2:3], 1.0 566; SI-SAFE-NEXT: v_mul_f64 v[0:1], v[0:1], 0 567; SI-SAFE-NEXT: v_add_f64 v[0:1], v[0:1], 0 568; SI-SAFE-NEXT: v_cmp_ngt_f64_e32 vcc, s[0:1], v[0:1] 569; SI-SAFE-NEXT: v_xor_b32_e32 v4, 0x80000000, v1 570; SI-SAFE-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc 571; SI-SAFE-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 572; SI-SAFE-NEXT: v_cmp_nlt_f64_e32 vcc, 0, v[0:1] 573; SI-SAFE-NEXT: s_and_b64 s[0:1], vcc, exec 574; SI-SAFE-NEXT: s_cselect_b32 s1, 0, 0x7ff80000 575; SI-SAFE-NEXT: s_mov_b32 s0, 0 576; SI-SAFE-NEXT: ; return to shader part epilog 577; 578; SI-NSZ-LABEL: fneg_fadd_0_f64: 579; SI-NSZ: ; %bb.0: ; %.entry 580; SI-NSZ-NEXT: v_div_scale_f64 v[0:1], s[4:5], s[2:3], s[2:3], 1.0 581; SI-NSZ-NEXT: s_mov_b32 s4, 0 582; SI-NSZ-NEXT: s_brev_b32 s5, 1 583; SI-NSZ-NEXT: v_rcp_f64_e32 v[2:3], v[0:1] 584; SI-NSZ-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0 585; SI-NSZ-NEXT: v_fma_f64 v[2:3], v[2:3], v[4:5], v[2:3] 586; SI-NSZ-NEXT: v_div_scale_f64 v[4:5], vcc, 1.0, s[2:3], 1.0 587; SI-NSZ-NEXT: v_fma_f64 v[6:7], -v[0:1], v[2:3], 1.0 588; SI-NSZ-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3] 589; SI-NSZ-NEXT: v_mul_f64 v[6:7], v[4:5], v[2:3] 590; SI-NSZ-NEXT: v_fma_f64 v[0:1], -v[0:1], v[6:7], v[4:5] 591; SI-NSZ-NEXT: v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[6:7] 592; SI-NSZ-NEXT: v_mov_b32_e32 v2, s1 593; SI-NSZ-NEXT: v_mov_b32_e32 v3, s0 594; SI-NSZ-NEXT: v_div_fixup_f64 v[0:1], v[0:1], s[2:3], 1.0 595; SI-NSZ-NEXT: v_mul_f64 v[0:1], v[0:1], s[4:5] 596; SI-NSZ-NEXT: v_cmp_nlt_f64_e64 vcc, -v[0:1], s[0:1] 597; SI-NSZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 598; SI-NSZ-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 599; SI-NSZ-NEXT: v_cmp_nlt_f64_e32 vcc, 0, v[0:1] 600; SI-NSZ-NEXT: s_and_b64 s[0:1], vcc, exec 601; SI-NSZ-NEXT: s_cselect_b32 s1, 0, 0x7ff80000 602; SI-NSZ-NEXT: s_mov_b32 s0, 0 603; SI-NSZ-NEXT: ; return to shader part epilog 604; 605; VI-SAFE-LABEL: fneg_fadd_0_f64: 606; VI-SAFE: ; %bb.0: ; %.entry 607; VI-SAFE-NEXT: v_div_scale_f64 v[0:1], s[4:5], s[2:3], s[2:3], 1.0 608; VI-SAFE-NEXT: v_rcp_f64_e32 v[2:3], v[0:1] 609; VI-SAFE-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0 610; VI-SAFE-NEXT: v_fma_f64 v[2:3], v[2:3], v[4:5], v[2:3] 611; VI-SAFE-NEXT: v_div_scale_f64 v[4:5], vcc, 1.0, s[2:3], 1.0 612; VI-SAFE-NEXT: v_fma_f64 v[6:7], -v[0:1], v[2:3], 1.0 613; VI-SAFE-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3] 614; VI-SAFE-NEXT: v_mul_f64 v[6:7], v[4:5], v[2:3] 615; VI-SAFE-NEXT: v_fma_f64 v[0:1], -v[0:1], v[6:7], v[4:5] 616; VI-SAFE-NEXT: v_mov_b32_e32 v4, s0 617; VI-SAFE-NEXT: v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[6:7] 618; VI-SAFE-NEXT: v_mov_b32_e32 v2, s1 619; VI-SAFE-NEXT: v_div_fixup_f64 v[0:1], v[0:1], s[2:3], 1.0 620; VI-SAFE-NEXT: v_mul_f64 v[0:1], v[0:1], 0 621; VI-SAFE-NEXT: v_add_f64 v[0:1], v[0:1], 0 622; VI-SAFE-NEXT: v_cmp_ngt_f64_e32 vcc, s[0:1], v[0:1] 623; VI-SAFE-NEXT: v_xor_b32_e32 v3, 0x80000000, v1 624; VI-SAFE-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc 625; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 626; VI-SAFE-NEXT: v_cmp_nlt_f64_e32 vcc, 0, v[0:1] 627; VI-SAFE-NEXT: s_and_b64 s[0:1], vcc, exec 628; VI-SAFE-NEXT: s_cselect_b32 s1, 0, 0x7ff80000 629; VI-SAFE-NEXT: s_mov_b32 s0, 0 630; VI-SAFE-NEXT: ; return to shader part epilog 631; 632; VI-NSZ-LABEL: fneg_fadd_0_f64: 633; VI-NSZ: ; %bb.0: ; %.entry 634; VI-NSZ-NEXT: v_div_scale_f64 v[0:1], s[4:5], s[2:3], s[2:3], 1.0 635; VI-NSZ-NEXT: v_rcp_f64_e32 v[2:3], v[0:1] 636; VI-NSZ-NEXT: v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0 637; VI-NSZ-NEXT: v_fma_f64 v[2:3], v[2:3], v[4:5], v[2:3] 638; VI-NSZ-NEXT: v_div_scale_f64 v[4:5], vcc, 1.0, s[2:3], 1.0 639; VI-NSZ-NEXT: v_fma_f64 v[6:7], -v[0:1], v[2:3], 1.0 640; VI-NSZ-NEXT: v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3] 641; VI-NSZ-NEXT: v_mul_f64 v[6:7], v[4:5], v[2:3] 642; VI-NSZ-NEXT: v_fma_f64 v[0:1], -v[0:1], v[6:7], v[4:5] 643; VI-NSZ-NEXT: v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[6:7] 644; VI-NSZ-NEXT: v_mov_b32_e32 v2, s1 645; VI-NSZ-NEXT: v_mov_b32_e32 v3, s0 646; VI-NSZ-NEXT: v_div_fixup_f64 v[0:1], v[0:1], s[2:3], 1.0 647; VI-NSZ-NEXT: s_mov_b32 s2, 0 648; VI-NSZ-NEXT: s_brev_b32 s3, 1 649; VI-NSZ-NEXT: v_mul_f64 v[0:1], v[0:1], s[2:3] 650; VI-NSZ-NEXT: v_cmp_nlt_f64_e64 vcc, -v[0:1], s[0:1] 651; VI-NSZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 652; VI-NSZ-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 653; VI-NSZ-NEXT: v_cmp_nlt_f64_e32 vcc, 0, v[0:1] 654; VI-NSZ-NEXT: s_and_b64 s[0:1], vcc, exec 655; VI-NSZ-NEXT: s_cselect_b32 s1, 0, 0x7ff80000 656; VI-NSZ-NEXT: s_mov_b32 s0, 0 657; VI-NSZ-NEXT: ; return to shader part epilog 658.entry: 659 %tmp7 = fdiv double 1.000000e+00, %tmp6 660 %tmp8 = fmul double 0.000000e+00, %tmp7 661 %tmp9 = fmul reassoc nnan arcp contract double 0.000000e+00, %tmp8 662 %.i188 = fadd double %tmp9, 0.000000e+00 663 %tmp10 = fcmp uge double %.i188, %tmp2 664 %tmp11 = fneg double %.i188 665 %.i092 = select i1 %tmp10, double %tmp2, double %tmp11 666 %tmp12 = fcmp ule double %.i092, 0.000000e+00 667 %.i198 = select i1 %tmp12, double 0.000000e+00, double 0x7FF8000000000000 668 ret double %.i198 669} 670 671; This is a workaround because -enable-no-signed-zeros-fp-math does not set up 672; function attribute unsafe-fp-math automatically. Combine with the previous test 673; when that is done. 674define amdgpu_ps double @fneg_fadd_0_nsz_f64(double inreg %tmp2, double inreg %tmp6, <4 x i32> %arg) #2 { 675; GCN-SAFE-LABEL: fneg_fadd_0_nsz_f64: 676; GCN-SAFE: ; %bb.0: ; %.entry 677; GCN-SAFE-NEXT: v_cmp_ngt_f64_e64 s[2:3], s[0:1], 0 678; GCN-SAFE-NEXT: s_and_b64 s[2:3], s[2:3], exec 679; GCN-SAFE-NEXT: s_cselect_b32 s1, s1, 0x80000000 680; GCN-SAFE-NEXT: s_cselect_b32 s0, s0, 0 681; GCN-SAFE-NEXT: v_cmp_ngt_f64_e64 s[0:1], s[0:1], 0 682; GCN-SAFE-NEXT: s_and_b64 s[0:1], s[0:1], exec 683; GCN-SAFE-NEXT: s_cselect_b32 s1, 0, 0x7ff80000 684; GCN-SAFE-NEXT: s_mov_b32 s0, 0 685; GCN-SAFE-NEXT: ; return to shader part epilog 686; 687; SI-NSZ-LABEL: fneg_fadd_0_nsz_f64: 688; SI-NSZ: ; %bb.0: ; %.entry 689; SI-NSZ-NEXT: v_rcp_f64_e32 v[0:1], s[2:3] 690; SI-NSZ-NEXT: v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0 691; SI-NSZ-NEXT: v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1] 692; SI-NSZ-NEXT: v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0 693; SI-NSZ-NEXT: v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1] 694; SI-NSZ-NEXT: v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0 695; SI-NSZ-NEXT: s_mov_b32 s2, 0 696; SI-NSZ-NEXT: v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1] 697; SI-NSZ-NEXT: s_brev_b32 s3, 1 698; SI-NSZ-NEXT: v_mul_f64 v[0:1], v[0:1], s[2:3] 699; SI-NSZ-NEXT: v_mov_b32_e32 v2, s1 700; SI-NSZ-NEXT: v_cmp_nlt_f64_e64 vcc, -v[0:1], s[0:1] 701; SI-NSZ-NEXT: v_mov_b32_e32 v3, s0 702; SI-NSZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 703; SI-NSZ-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 704; SI-NSZ-NEXT: v_cmp_nlt_f64_e32 vcc, 0, v[0:1] 705; SI-NSZ-NEXT: s_and_b64 s[0:1], vcc, exec 706; SI-NSZ-NEXT: s_cselect_b32 s1, 0, 0x7ff80000 707; SI-NSZ-NEXT: s_mov_b32 s0, 0 708; SI-NSZ-NEXT: ; return to shader part epilog 709; 710; VI-NSZ-LABEL: fneg_fadd_0_nsz_f64: 711; VI-NSZ: ; %bb.0: ; %.entry 712; VI-NSZ-NEXT: v_rcp_f64_e32 v[0:1], s[2:3] 713; VI-NSZ-NEXT: v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0 714; VI-NSZ-NEXT: v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1] 715; VI-NSZ-NEXT: v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0 716; VI-NSZ-NEXT: v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1] 717; VI-NSZ-NEXT: v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0 718; VI-NSZ-NEXT: s_mov_b32 s2, 0 719; VI-NSZ-NEXT: s_brev_b32 s3, 1 720; VI-NSZ-NEXT: v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1] 721; VI-NSZ-NEXT: v_mov_b32_e32 v2, s1 722; VI-NSZ-NEXT: v_mov_b32_e32 v3, s0 723; VI-NSZ-NEXT: v_mul_f64 v[0:1], v[0:1], s[2:3] 724; VI-NSZ-NEXT: v_cmp_nlt_f64_e64 vcc, -v[0:1], s[0:1] 725; VI-NSZ-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 726; VI-NSZ-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 727; VI-NSZ-NEXT: v_cmp_nlt_f64_e32 vcc, 0, v[0:1] 728; VI-NSZ-NEXT: s_and_b64 s[0:1], vcc, exec 729; VI-NSZ-NEXT: s_cselect_b32 s1, 0, 0x7ff80000 730; VI-NSZ-NEXT: s_mov_b32 s0, 0 731; VI-NSZ-NEXT: ; return to shader part epilog 732.entry: 733 %tmp7 = fdiv afn double 1.000000e+00, %tmp6 734 %tmp8 = fmul double 0.000000e+00, %tmp7 735 %tmp9 = fmul reassoc nnan arcp contract double 0.000000e+00, %tmp8 736 %.i188 = fadd double %tmp9, 0.000000e+00 737 %tmp10 = fcmp uge double %.i188, %tmp2 738 %tmp11 = fneg double %.i188 739 %.i092 = select i1 %tmp10, double %tmp2, double %tmp11 740 %tmp12 = fcmp ule double %.i092, 0.000000e+00 741 %.i198 = select i1 %tmp12, double 0.000000e+00, double 0x7FF8000000000000 742 ret double %.i198 743} 744 745; -------------------------------------------------------------------------------- 746; fmul tests 747; -------------------------------------------------------------------------------- 748 749define float @v_fneg_mul_f32(float %a, float %b) #0 { 750; GCN-LABEL: v_fneg_mul_f32: 751; GCN: ; %bb.0: 752; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 753; GCN-NEXT: v_mul_f32_e64 v0, v0, -v1 754; GCN-NEXT: s_setpc_b64 s[30:31] 755 %mul = fmul float %a, %b 756 %fneg = fneg float %mul 757 ret float %fneg 758} 759 760define { float, float } @v_fneg_mul_store_use_mul_f32(float %a, float %b) #0 { 761; GCN-LABEL: v_fneg_mul_store_use_mul_f32: 762; GCN: ; %bb.0: 763; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 764; GCN-NEXT: v_mul_f32_e32 v1, v0, v1 765; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v1 766; GCN-NEXT: s_setpc_b64 s[30:31] 767 %mul = fmul float %a, %b 768 %fneg = fneg float %mul 769 %insert.0 = insertvalue { float, float } poison, float %fneg, 0 770 %insert.1 = insertvalue { float, float } %insert.0, float %mul, 1 771 ret { float, float } %insert.1 772} 773 774define { float, float } @v_fneg_mul_multi_use_mul_f32(float %a, float %b) #0 { 775; GCN-LABEL: v_fneg_mul_multi_use_mul_f32: 776; GCN: ; %bb.0: 777; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 778; GCN-NEXT: v_mul_f32_e64 v0, v0, -v1 779; GCN-NEXT: v_mul_f32_e32 v1, -4.0, v0 780; GCN-NEXT: s_setpc_b64 s[30:31] 781 %mul = fmul float %a, %b 782 %fneg = fneg float %mul 783 %use1 = fmul float %mul, 4.0 784 %insert.0 = insertvalue { float, float } poison, float %fneg, 0 785 %insert.1 = insertvalue { float, float } %insert.0, float %use1, 1 786 ret { float, float } %insert.1 787} 788 789define float @v_fneg_mul_fneg_x_f32(float %a, float %b) #0 { 790; GCN-LABEL: v_fneg_mul_fneg_x_f32: 791; GCN: ; %bb.0: 792; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 793; GCN-NEXT: v_mul_f32_e32 v0, v0, v1 794; GCN-NEXT: s_setpc_b64 s[30:31] 795 %fneg.a = fneg float %a 796 %mul = fmul float %fneg.a, %b 797 %fneg = fneg float %mul 798 ret float %fneg 799} 800 801define float @v_fneg_mul_x_fneg_f32(float %a, float %b) #0 { 802; GCN-LABEL: v_fneg_mul_x_fneg_f32: 803; GCN: ; %bb.0: 804; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 805; GCN-NEXT: v_mul_f32_e32 v0, v0, v1 806; GCN-NEXT: s_setpc_b64 s[30:31] 807 %fneg.b = fneg float %b 808 %mul = fmul float %a, %fneg.b 809 %fneg = fneg float %mul 810 ret float %fneg 811} 812 813define float @v_fneg_mul_fneg_fneg_f32(float %a, float %b) #0 { 814; GCN-LABEL: v_fneg_mul_fneg_fneg_f32: 815; GCN: ; %bb.0: 816; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 817; GCN-NEXT: v_mul_f32_e64 v0, v0, -v1 818; GCN-NEXT: s_setpc_b64 s[30:31] 819 %fneg.a = fneg float %a 820 %fneg.b = fneg float %b 821 %mul = fmul float %fneg.a, %fneg.b 822 %fneg = fneg float %mul 823 ret float %fneg 824} 825 826define { float, float } @v_fneg_mul_store_use_fneg_x_f32(float %a, float %b) #0 { 827; GCN-LABEL: v_fneg_mul_store_use_fneg_x_f32: 828; GCN: ; %bb.0: 829; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 830; GCN-NEXT: v_xor_b32_e32 v2, 0x80000000, v0 831; GCN-NEXT: v_mul_f32_e32 v0, v0, v1 832; GCN-NEXT: v_mov_b32_e32 v1, v2 833; GCN-NEXT: s_setpc_b64 s[30:31] 834 %fneg.a = fneg float %a 835 %mul = fmul float %fneg.a, %b 836 %fneg = fneg float %mul 837 %insert.0 = insertvalue { float, float } poison, float %fneg, 0 838 %insert.1 = insertvalue { float, float } %insert.0, float %fneg.a, 1 839 ret { float, float } %insert.1 840} 841 842define { float, float } @v_fneg_mul_multi_use_fneg_x_f32(float %a, float %b, float %c) #0 { 843; GCN-LABEL: v_fneg_mul_multi_use_fneg_x_f32: 844; GCN: ; %bb.0: 845; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 846; GCN-NEXT: v_mul_f32_e32 v3, v0, v1 847; GCN-NEXT: v_mul_f32_e64 v1, -v0, v2 848; GCN-NEXT: v_mov_b32_e32 v0, v3 849; GCN-NEXT: s_setpc_b64 s[30:31] 850 %fneg.a = fneg float %a 851 %mul = fmul float %fneg.a, %b 852 %fneg = fneg float %mul 853 %use1 = fmul float %fneg.a, %c 854 %insert.0 = insertvalue { float, float } poison, float %fneg, 0 855 %insert.1 = insertvalue { float, float } %insert.0, float %use1, 1 856 ret { float, float } %insert.1 857} 858 859; -------------------------------------------------------------------------------- 860; fminnum tests 861; -------------------------------------------------------------------------------- 862 863define float @v_fneg_minnum_f32_ieee(float %a, float %b) #0 { 864; GCN-LABEL: v_fneg_minnum_f32_ieee: 865; GCN: ; %bb.0: 866; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 867; GCN-NEXT: v_mul_f32_e32 v1, -1.0, v1 868; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0 869; GCN-NEXT: v_max_f32_e32 v0, v0, v1 870; GCN-NEXT: s_setpc_b64 s[30:31] 871 %min = call float @llvm.minnum.f32(float %a, float %b) 872 %fneg = fneg float %min 873 ret float %fneg 874} 875 876define float @v_fneg_minnum_f32_no_ieee(float %a, float %b) #4 { 877; GCN-LABEL: v_fneg_minnum_f32_no_ieee: 878; GCN: ; %bb.0: 879; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 880; GCN-NEXT: v_max_f32_e64 v0, -v0, -v1 881; GCN-NEXT: s_setpc_b64 s[30:31] 882 %min = call float @llvm.minnum.f32(float %a, float %b) 883 %fneg = fneg float %min 884 ret float %fneg 885} 886 887define float @v_fneg_self_minnum_f32_ieee(float %a) #0 { 888; GCN-LABEL: v_fneg_self_minnum_f32_ieee: 889; GCN: ; %bb.0: 890; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 891; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 892; GCN-NEXT: s_setpc_b64 s[30:31] 893 %min = call float @llvm.minnum.f32(float %a, float %a) 894 %min.fneg = fneg float %min 895 ret float %min.fneg 896} 897 898define float @v_fneg_self_minnum_f32_no_ieee(float %a) #4 { 899; GCN-LABEL: v_fneg_self_minnum_f32_no_ieee: 900; GCN: ; %bb.0: 901; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 902; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 903; GCN-NEXT: s_setpc_b64 s[30:31] 904 %min = call float @llvm.minnum.f32(float %a, float %a) 905 %min.fneg = fneg float %min 906 ret float %min.fneg 907} 908 909define float @v_fneg_posk_minnum_f32_ieee(float %a) #0 { 910; GCN-LABEL: v_fneg_posk_minnum_f32_ieee: 911; GCN: ; %bb.0: 912; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 913; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0 914; GCN-NEXT: v_max_f32_e32 v0, -4.0, v0 915; GCN-NEXT: s_setpc_b64 s[30:31] 916 %min = call float @llvm.minnum.f32(float 4.0, float %a) 917 %fneg = fneg float %min 918 ret float %fneg 919} 920 921define float @v_fneg_posk_minnum_f32_no_ieee(float %a) #4 { 922; GCN-LABEL: v_fneg_posk_minnum_f32_no_ieee: 923; GCN: ; %bb.0: 924; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 925; GCN-NEXT: v_max_f32_e64 v0, -v0, -4.0 926; GCN-NEXT: s_setpc_b64 s[30:31] 927 %min = call float @llvm.minnum.f32(float 4.0, float %a) 928 %fneg = fneg float %min 929 ret float %fneg 930} 931 932define float @v_fneg_negk_minnum_f32_ieee(float %a) #0 { 933; GCN-LABEL: v_fneg_negk_minnum_f32_ieee: 934; GCN: ; %bb.0: 935; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 936; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0 937; GCN-NEXT: v_max_f32_e32 v0, 4.0, v0 938; GCN-NEXT: s_setpc_b64 s[30:31] 939 %min = call float @llvm.minnum.f32(float -4.0, float %a) 940 %fneg = fneg float %min 941 ret float %fneg 942} 943 944define float @v_fneg_negk_minnum_f32_no_ieee(float %a) #4 { 945; GCN-LABEL: v_fneg_negk_minnum_f32_no_ieee: 946; GCN: ; %bb.0: 947; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 948; GCN-NEXT: v_max_f32_e64 v0, -v0, 4.0 949; GCN-NEXT: s_setpc_b64 s[30:31] 950 %min = call float @llvm.minnum.f32(float -4.0, float %a) 951 %fneg = fneg float %min 952 ret float %fneg 953} 954 955define float @v_fneg_0_minnum_f32(float %a) #0 { 956; GCN-LABEL: v_fneg_0_minnum_f32: 957; GCN: ; %bb.0: 958; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 959; GCN-NEXT: v_min_f32_e32 v0, 0, v0 960; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 961; GCN-NEXT: s_setpc_b64 s[30:31] 962 %min = call nnan float @llvm.minnum.f32(float 0.0, float %a) 963 %fneg = fneg float %min 964 ret float %fneg 965} 966 967define float @v_fneg_neg0_minnum_f32_ieee(float %a) #0 { 968; GCN-LABEL: v_fneg_neg0_minnum_f32_ieee: 969; GCN: ; %bb.0: 970; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 971; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0 972; GCN-NEXT: v_max_f32_e32 v0, 0, v0 973; GCN-NEXT: s_setpc_b64 s[30:31] 974 %min = call float @llvm.minnum.f32(float -0.0, float %a) 975 %fneg = fneg float %min 976 ret float %fneg 977} 978 979define float @v_fneg_inv2pi_minnum_f32(float %a) #0 { 980; SI-LABEL: v_fneg_inv2pi_minnum_f32: 981; SI: ; %bb.0: 982; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 983; SI-NEXT: v_mul_f32_e32 v0, -1.0, v0 984; SI-NEXT: v_max_f32_e32 v0, 0xbe22f983, v0 985; SI-NEXT: s_setpc_b64 s[30:31] 986; 987; VI-LABEL: v_fneg_inv2pi_minnum_f32: 988; VI: ; %bb.0: 989; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 990; VI-NEXT: v_mul_f32_e32 v0, 1.0, v0 991; VI-NEXT: v_min_f32_e32 v0, 0.15915494, v0 992; VI-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 993; VI-NEXT: s_setpc_b64 s[30:31] 994 %min = call float @llvm.minnum.f32(float 0x3FC45F3060000000, float %a) 995 %fneg = fneg float %min 996 ret float %fneg 997} 998 999define float @v_fneg_neg_inv2pi_minnum_f32(float %a) #0 { 1000; SI-LABEL: v_fneg_neg_inv2pi_minnum_f32: 1001; SI: ; %bb.0: 1002; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1003; SI-NEXT: v_mul_f32_e32 v0, -1.0, v0 1004; SI-NEXT: v_max_f32_e32 v0, 0x3e22f983, v0 1005; SI-NEXT: s_setpc_b64 s[30:31] 1006; 1007; VI-LABEL: v_fneg_neg_inv2pi_minnum_f32: 1008; VI: ; %bb.0: 1009; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1010; VI-NEXT: v_mul_f32_e32 v0, -1.0, v0 1011; VI-NEXT: v_max_f32_e32 v0, 0.15915494, v0 1012; VI-NEXT: s_setpc_b64 s[30:31] 1013 %min = call float @llvm.minnum.f32(float 0xBFC45F3060000000, float %a) 1014 %fneg = fneg float %min 1015 ret float %fneg 1016} 1017 1018define half @v_fneg_inv2pi_minnum_f16(half %a) #0 { 1019; SI-LABEL: v_fneg_inv2pi_minnum_f16: 1020; SI: ; %bb.0: 1021; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1022; SI-NEXT: v_cvt_f16_f32_e64 v0, -v0 1023; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 1024; SI-NEXT: v_max_f32_e32 v0, 0xbe230000, v0 1025; SI-NEXT: s_setpc_b64 s[30:31] 1026; 1027; VI-LABEL: v_fneg_inv2pi_minnum_f16: 1028; VI: ; %bb.0: 1029; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1030; VI-NEXT: v_max_f16_e32 v0, v0, v0 1031; VI-NEXT: v_min_f16_e32 v0, 0.15915494, v0 1032; VI-NEXT: v_xor_b32_e32 v0, 0x8000, v0 1033; VI-NEXT: s_setpc_b64 s[30:31] 1034 %min = call half @llvm.minnum.f16(half 0xH3118, half %a) 1035 %fneg = fneg half %min 1036 ret half %fneg 1037} 1038 1039define half @v_fneg_neg_inv2pi_minnum_f16(half %a) #0 { 1040; SI-LABEL: v_fneg_neg_inv2pi_minnum_f16: 1041; SI: ; %bb.0: 1042; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1043; SI-NEXT: v_cvt_f16_f32_e64 v0, -v0 1044; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 1045; SI-NEXT: v_max_f32_e32 v0, 0x3e230000, v0 1046; SI-NEXT: s_setpc_b64 s[30:31] 1047; 1048; VI-LABEL: v_fneg_neg_inv2pi_minnum_f16: 1049; VI: ; %bb.0: 1050; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1051; VI-NEXT: v_max_f16_e64 v0, -v0, -v0 1052; VI-NEXT: v_max_f16_e32 v0, 0.15915494, v0 1053; VI-NEXT: s_setpc_b64 s[30:31] 1054 %min = call half @llvm.minnum.f16(half 0xHB118, half %a) 1055 %fneg = fneg half %min 1056 ret half %fneg 1057} 1058 1059define double @v_fneg_inv2pi_minnum_f64(double %a) #0 { 1060; SI-LABEL: v_fneg_inv2pi_minnum_f64: 1061; SI: ; %bb.0: 1062; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1063; SI-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] 1064; SI-NEXT: s_mov_b32 s4, 0x6dc9c882 1065; SI-NEXT: s_mov_b32 s5, 0xbfc45f30 1066; SI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5] 1067; SI-NEXT: s_setpc_b64 s[30:31] 1068; 1069; VI-LABEL: v_fneg_inv2pi_minnum_f64: 1070; VI: ; %bb.0: 1071; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1072; VI-NEXT: v_max_f64 v[0:1], v[0:1], v[0:1] 1073; VI-NEXT: v_min_f64 v[0:1], v[0:1], 0.15915494309189532 1074; VI-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 1075; VI-NEXT: s_setpc_b64 s[30:31] 1076 %min = call double @llvm.minnum.f64(double 0x3fc45f306dc9c882, double %a) 1077 %fneg = fneg double %min 1078 ret double %fneg 1079} 1080 1081define double @v_fneg_neg_inv2pi_minnum_f64(double %a) #0 { 1082; SI-LABEL: v_fneg_neg_inv2pi_minnum_f64: 1083; SI: ; %bb.0: 1084; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1085; SI-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] 1086; SI-NEXT: s_mov_b32 s4, 0x6dc9c882 1087; SI-NEXT: s_mov_b32 s5, 0x3fc45f30 1088; SI-NEXT: v_max_f64 v[0:1], v[0:1], s[4:5] 1089; SI-NEXT: s_setpc_b64 s[30:31] 1090; 1091; VI-LABEL: v_fneg_neg_inv2pi_minnum_f64: 1092; VI: ; %bb.0: 1093; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1094; VI-NEXT: v_max_f64 v[0:1], -v[0:1], -v[0:1] 1095; VI-NEXT: v_max_f64 v[0:1], v[0:1], 0.15915494309189532 1096; VI-NEXT: s_setpc_b64 s[30:31] 1097 %min = call double @llvm.minnum.f64(double 0xbfc45f306dc9c882, double %a) 1098 %fneg = fneg double %min 1099 ret double %fneg 1100} 1101 1102define float @v_fneg_neg0_minnum_f32_no_ieee(float %a) #4 { 1103; GCN-LABEL: v_fneg_neg0_minnum_f32_no_ieee: 1104; GCN: ; %bb.0: 1105; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1106; GCN-NEXT: v_max_f32_e64 v0, -v0, 0 1107; GCN-NEXT: s_setpc_b64 s[30:31] 1108 %min = call float @llvm.minnum.f32(float -0.0, float %a) 1109 %fneg = fneg float %min 1110 ret float %fneg 1111} 1112 1113define float @v_fneg_0_minnum_foldable_use_f32_ieee(float %a, float %b) #0 { 1114; GCN-LABEL: v_fneg_0_minnum_foldable_use_f32_ieee: 1115; GCN: ; %bb.0: 1116; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1117; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0 1118; GCN-NEXT: v_min_f32_e32 v0, 0, v0 1119; GCN-NEXT: v_mul_f32_e64 v0, -v0, v1 1120; GCN-NEXT: s_setpc_b64 s[30:31] 1121 %min = call float @llvm.minnum.f32(float 0.0, float %a) 1122 %fneg = fneg float %min 1123 %mul = fmul float %fneg, %b 1124 ret float %mul 1125} 1126 1127define float @v_fneg_inv2pi_minnum_foldable_use_f32(float %a, float %b) #0 { 1128; SI-LABEL: v_fneg_inv2pi_minnum_foldable_use_f32: 1129; SI: ; %bb.0: 1130; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1131; SI-NEXT: v_mul_f32_e32 v0, -1.0, v0 1132; SI-NEXT: v_max_f32_e32 v0, 0xbe22f983, v0 1133; SI-NEXT: v_mul_f32_e32 v0, v0, v1 1134; SI-NEXT: s_setpc_b64 s[30:31] 1135; 1136; VI-LABEL: v_fneg_inv2pi_minnum_foldable_use_f32: 1137; VI: ; %bb.0: 1138; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1139; VI-NEXT: v_mul_f32_e32 v0, 1.0, v0 1140; VI-NEXT: v_min_f32_e32 v0, 0.15915494, v0 1141; VI-NEXT: v_mul_f32_e64 v0, -v0, v1 1142; VI-NEXT: s_setpc_b64 s[30:31] 1143 %min = call float @llvm.minnum.f32(float 0x3FC45F3060000000, float %a) 1144 %fneg = fneg float %min 1145 %mul = fmul float %fneg, %b 1146 ret float %mul 1147} 1148 1149define float @v_fneg_0_minnum_foldable_use_f32_no_ieee(float %a, float %b) #4 { 1150; GCN-LABEL: v_fneg_0_minnum_foldable_use_f32_no_ieee: 1151; GCN: ; %bb.0: 1152; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1153; GCN-NEXT: v_min_f32_e32 v0, 0, v0 1154; GCN-NEXT: v_mul_f32_e64 v0, -v0, v1 1155; GCN-NEXT: s_setpc_b64 s[30:31] 1156 %min = call float @llvm.minnum.f32(float 0.0, float %a) 1157 %fneg = fneg float %min 1158 %mul = fmul float %fneg, %b 1159 ret float %mul 1160} 1161 1162define { float, float } @v_fneg_minnum_multi_use_minnum_f32_ieee(float %a, float %b) #0 { 1163; GCN-LABEL: v_fneg_minnum_multi_use_minnum_f32_ieee: 1164; GCN: ; %bb.0: 1165; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1166; GCN-NEXT: v_mul_f32_e32 v1, -1.0, v1 1167; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0 1168; GCN-NEXT: v_max_f32_e32 v0, v0, v1 1169; GCN-NEXT: v_mul_f32_e32 v1, -4.0, v0 1170; GCN-NEXT: s_setpc_b64 s[30:31] 1171 %min = call float @llvm.minnum.f32(float %a, float %b) 1172 %fneg = fneg float %min 1173 %use1 = fmul float %min, 4.0 1174 %insert.0 = insertvalue { float, float } poison, float %fneg, 0 1175 %insert.1 = insertvalue { float, float } %insert.0, float %use1, 1 1176 ret { float, float } %insert.1 1177} 1178 1179define <2 x float> @v_fneg_minnum_multi_use_minnum_f32_no_ieee(float %a, float %b) #4 { 1180; GCN-LABEL: v_fneg_minnum_multi_use_minnum_f32_no_ieee: 1181; GCN: ; %bb.0: 1182; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1183; GCN-NEXT: v_max_f32_e64 v0, -v0, -v1 1184; GCN-NEXT: v_mul_f32_e32 v1, -4.0, v0 1185; GCN-NEXT: s_setpc_b64 s[30:31] 1186 %min = call float @llvm.minnum.f32(float %a, float %b) 1187 %fneg = fneg float %min 1188 %use1 = fmul float %min, 4.0 1189 %ins0 = insertelement <2 x float> undef, float %fneg, i32 0 1190 %ins1 = insertelement <2 x float> %ins0, float %use1, i32 1 1191 ret <2 x float> %ins1 1192} 1193 1194; -------------------------------------------------------------------------------- 1195; fmaxnum tests 1196; -------------------------------------------------------------------------------- 1197 1198define float @v_fneg_maxnum_f32_ieee(float %a, float %b) #0 { 1199; GCN-LABEL: v_fneg_maxnum_f32_ieee: 1200; GCN: ; %bb.0: 1201; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1202; GCN-NEXT: v_mul_f32_e32 v1, -1.0, v1 1203; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0 1204; GCN-NEXT: v_min_f32_e32 v0, v0, v1 1205; GCN-NEXT: s_setpc_b64 s[30:31] 1206 %max = call float @llvm.maxnum.f32(float %a, float %b) 1207 %fneg = fneg float %max 1208 ret float %fneg 1209} 1210 1211define float @v_fneg_maxnum_f32_no_ieee(float %a, float %b) #4 { 1212; GCN-LABEL: v_fneg_maxnum_f32_no_ieee: 1213; GCN: ; %bb.0: 1214; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1215; GCN-NEXT: v_min_f32_e64 v0, -v0, -v1 1216; GCN-NEXT: s_setpc_b64 s[30:31] 1217 %max = call float @llvm.maxnum.f32(float %a, float %b) 1218 %fneg = fneg float %max 1219 ret float %fneg 1220} 1221 1222define float @v_fneg_self_maxnum_f32_ieee(float %a) #0 { 1223; GCN-LABEL: v_fneg_self_maxnum_f32_ieee: 1224; GCN: ; %bb.0: 1225; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1226; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 1227; GCN-NEXT: s_setpc_b64 s[30:31] 1228 %max = call float @llvm.maxnum.f32(float %a, float %a) 1229 %max.fneg = fneg float %max 1230 ret float %max.fneg 1231} 1232 1233define float @v_fneg_self_maxnum_f32_no_ieee(float %a) #4 { 1234; GCN-LABEL: v_fneg_self_maxnum_f32_no_ieee: 1235; GCN: ; %bb.0: 1236; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1237; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 1238; GCN-NEXT: s_setpc_b64 s[30:31] 1239 %max = call float @llvm.maxnum.f32(float %a, float %a) 1240 %max.fneg = fneg float %max 1241 ret float %max.fneg 1242} 1243 1244define float @v_fneg_posk_maxnum_f32_ieee(float %a) #0 { 1245; GCN-LABEL: v_fneg_posk_maxnum_f32_ieee: 1246; GCN: ; %bb.0: 1247; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1248; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0 1249; GCN-NEXT: v_min_f32_e32 v0, -4.0, v0 1250; GCN-NEXT: s_setpc_b64 s[30:31] 1251 %max = call float @llvm.maxnum.f32(float 4.0, float %a) 1252 %fneg = fneg float %max 1253 ret float %fneg 1254} 1255 1256define float @v_fneg_posk_maxnum_f32_no_ieee(float %a) #4 { 1257; GCN-LABEL: v_fneg_posk_maxnum_f32_no_ieee: 1258; GCN: ; %bb.0: 1259; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1260; GCN-NEXT: v_min_f32_e64 v0, -v0, -4.0 1261; GCN-NEXT: s_setpc_b64 s[30:31] 1262 %max = call float @llvm.maxnum.f32(float 4.0, float %a) 1263 %fneg = fneg float %max 1264 ret float %fneg 1265} 1266 1267define float @v_fneg_negk_maxnum_f32_ieee(float %a) #0 { 1268; GCN-LABEL: v_fneg_negk_maxnum_f32_ieee: 1269; GCN: ; %bb.0: 1270; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1271; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0 1272; GCN-NEXT: v_min_f32_e32 v0, 4.0, v0 1273; GCN-NEXT: s_setpc_b64 s[30:31] 1274 %max = call float @llvm.maxnum.f32(float -4.0, float %a) 1275 %fneg = fneg float %max 1276 ret float %fneg 1277} 1278 1279define float @v_fneg_negk_maxnum_f32_no_ieee(float %a) #4 { 1280; GCN-LABEL: v_fneg_negk_maxnum_f32_no_ieee: 1281; GCN: ; %bb.0: 1282; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1283; GCN-NEXT: v_min_f32_e64 v0, -v0, 4.0 1284; GCN-NEXT: s_setpc_b64 s[30:31] 1285 %max = call float @llvm.maxnum.f32(float -4.0, float %a) 1286 %fneg = fneg float %max 1287 ret float %fneg 1288} 1289 1290define float @v_fneg_0_maxnum_f32(float %a) #0 { 1291; GCN-LABEL: v_fneg_0_maxnum_f32: 1292; GCN: ; %bb.0: 1293; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1294; GCN-NEXT: v_max_f32_e32 v0, 0, v0 1295; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 1296; GCN-NEXT: s_setpc_b64 s[30:31] 1297 %max = call nnan float @llvm.maxnum.f32(float 0.0, float %a) 1298 %fneg = fneg float %max 1299 ret float %fneg 1300} 1301 1302define float @v_fneg_neg0_maxnum_f32_ieee(float %a) #0 { 1303; GCN-LABEL: v_fneg_neg0_maxnum_f32_ieee: 1304; GCN: ; %bb.0: 1305; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1306; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0 1307; GCN-NEXT: v_min_f32_e32 v0, 0, v0 1308; GCN-NEXT: s_setpc_b64 s[30:31] 1309 %max = call float @llvm.maxnum.f32(float -0.0, float %a) 1310 %fneg = fneg float %max 1311 ret float %fneg 1312} 1313 1314define float @v_fneg_neg0_maxnum_f32_no_ieee(float %a) #4 { 1315; GCN-LABEL: v_fneg_neg0_maxnum_f32_no_ieee: 1316; GCN: ; %bb.0: 1317; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1318; GCN-NEXT: v_min_f32_e64 v0, -v0, 0 1319; GCN-NEXT: s_setpc_b64 s[30:31] 1320 %max = call float @llvm.maxnum.f32(float -0.0, float %a) 1321 %fneg = fneg float %max 1322 ret float %fneg 1323} 1324 1325define float @v_fneg_0_maxnum_foldable_use_f32_ieee(float %a, float %b) #0 { 1326; GCN-LABEL: v_fneg_0_maxnum_foldable_use_f32_ieee: 1327; GCN: ; %bb.0: 1328; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1329; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0 1330; GCN-NEXT: v_max_f32_e32 v0, 0, v0 1331; GCN-NEXT: v_mul_f32_e64 v0, -v0, v1 1332; GCN-NEXT: s_setpc_b64 s[30:31] 1333 %max = call float @llvm.maxnum.f32(float 0.0, float %a) 1334 %fneg = fneg float %max 1335 %mul = fmul float %fneg, %b 1336 ret float %mul 1337} 1338 1339define float @v_fneg_0_maxnum_foldable_use_f32_no_ieee(float %a, float %b) #4 { 1340; GCN-LABEL: v_fneg_0_maxnum_foldable_use_f32_no_ieee: 1341; GCN: ; %bb.0: 1342; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1343; GCN-NEXT: v_max_f32_e32 v0, 0, v0 1344; GCN-NEXT: v_mul_f32_e64 v0, -v0, v1 1345; GCN-NEXT: s_setpc_b64 s[30:31] 1346 %max = call float @llvm.maxnum.f32(float 0.0, float %a) 1347 %fneg = fneg float %max 1348 %mul = fmul float %fneg, %b 1349 ret float %mul 1350} 1351 1352define { float, float } @v_fneg_maxnum_multi_use_maxnum_f32_ieee(float %a, float %b) #0 { 1353; GCN-LABEL: v_fneg_maxnum_multi_use_maxnum_f32_ieee: 1354; GCN: ; %bb.0: 1355; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1356; GCN-NEXT: v_mul_f32_e32 v1, -1.0, v1 1357; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0 1358; GCN-NEXT: v_min_f32_e32 v0, v0, v1 1359; GCN-NEXT: v_mul_f32_e32 v1, -4.0, v0 1360; GCN-NEXT: s_setpc_b64 s[30:31] 1361 %max = call float @llvm.maxnum.f32(float %a, float %b) 1362 %fneg = fneg float %max 1363 %use1 = fmul float %max, 4.0 1364 %insert.0 = insertvalue { float, float } poison, float %fneg, 0 1365 %insert.1 = insertvalue { float, float } %insert.0, float %use1, 1 1366 ret { float, float } %insert.1 1367} 1368 1369define <2 x float> @v_fneg_maxnum_multi_use_maxnum_f32_no_ieee(float %a, float %b) #4 { 1370; GCN-LABEL: v_fneg_maxnum_multi_use_maxnum_f32_no_ieee: 1371; GCN: ; %bb.0: 1372; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1373; GCN-NEXT: v_min_f32_e64 v0, -v0, -v1 1374; GCN-NEXT: v_mul_f32_e32 v1, -4.0, v0 1375; GCN-NEXT: s_setpc_b64 s[30:31] 1376 %max = call float @llvm.maxnum.f32(float %a, float %b) 1377 %fneg = fneg float %max 1378 %use1 = fmul float %max, 4.0 1379 %ins0 = insertelement <2 x float> undef, float %fneg, i32 0 1380 %ins1 = insertelement <2 x float> %ins0, float %use1, i32 1 1381 ret <2 x float> %ins1 1382} 1383 1384; -------------------------------------------------------------------------------- 1385; fma tests 1386; -------------------------------------------------------------------------------- 1387 1388define float @v_fneg_fma_f32(float %a, float %b, float %c) #0 { 1389; GCN-SAFE-LABEL: v_fneg_fma_f32: 1390; GCN-SAFE: ; %bb.0: 1391; GCN-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1392; GCN-SAFE-NEXT: v_fma_f32 v0, v0, v1, v2 1393; GCN-SAFE-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 1394; GCN-SAFE-NEXT: s_setpc_b64 s[30:31] 1395; 1396; GCN-NSZ-LABEL: v_fneg_fma_f32: 1397; GCN-NSZ: ; %bb.0: 1398; GCN-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1399; GCN-NSZ-NEXT: v_fma_f32 v0, v0, -v1, -v2 1400; GCN-NSZ-NEXT: s_setpc_b64 s[30:31] 1401 %fma = call float @llvm.fma.f32(float %a, float %b, float %c) 1402 %fneg = fneg float %fma 1403 ret float %fneg 1404} 1405 1406define { float, float } @v_fneg_fma_store_use_fma_f32(float %a, float %b, float %c) #0 { 1407; GCN-LABEL: v_fneg_fma_store_use_fma_f32: 1408; GCN: ; %bb.0: 1409; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1410; GCN-NEXT: v_fma_f32 v1, v0, v1, v2 1411; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v1 1412; GCN-NEXT: s_setpc_b64 s[30:31] 1413 %fma = call float @llvm.fma.f32(float %a, float %b, float %c) 1414 %fneg = fneg float %fma 1415 %insert.0 = insertvalue { float, float } poison, float %fneg, 0 1416 %insert.1 = insertvalue { float, float } %insert.0, float %fma, 1 1417 ret { float, float } %insert.1 1418} 1419 1420define { float, float } @v_fneg_fma_multi_use_fma_f32(float %a, float %b, float %c) #0 { 1421; GCN-SAFE-LABEL: v_fneg_fma_multi_use_fma_f32: 1422; GCN-SAFE: ; %bb.0: 1423; GCN-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1424; GCN-SAFE-NEXT: v_fma_f32 v1, v0, v1, v2 1425; GCN-SAFE-NEXT: v_xor_b32_e32 v0, 0x80000000, v1 1426; GCN-SAFE-NEXT: v_mul_f32_e32 v1, 4.0, v1 1427; GCN-SAFE-NEXT: s_setpc_b64 s[30:31] 1428; 1429; GCN-NSZ-LABEL: v_fneg_fma_multi_use_fma_f32: 1430; GCN-NSZ: ; %bb.0: 1431; GCN-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1432; GCN-NSZ-NEXT: v_fma_f32 v0, v0, -v1, -v2 1433; GCN-NSZ-NEXT: v_mul_f32_e32 v1, -4.0, v0 1434; GCN-NSZ-NEXT: s_setpc_b64 s[30:31] 1435 %fma = call float @llvm.fma.f32(float %a, float %b, float %c) 1436 %fneg = fneg float %fma 1437 %use1 = fmul float %fma, 4.0 1438 %insert.0 = insertvalue { float, float } poison, float %fneg, 0 1439 %insert.1 = insertvalue { float, float } %insert.0, float %use1, 1 1440 ret { float, float } %insert.1 1441} 1442 1443define float @v_fneg_fma_fneg_x_y_f32(float %a, float %b, float %c) #0 { 1444; GCN-SAFE-LABEL: v_fneg_fma_fneg_x_y_f32: 1445; GCN-SAFE: ; %bb.0: 1446; GCN-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1447; GCN-SAFE-NEXT: v_fma_f32 v0, -v0, v1, v2 1448; GCN-SAFE-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 1449; GCN-SAFE-NEXT: s_setpc_b64 s[30:31] 1450; 1451; GCN-NSZ-LABEL: v_fneg_fma_fneg_x_y_f32: 1452; GCN-NSZ: ; %bb.0: 1453; GCN-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1454; GCN-NSZ-NEXT: v_fma_f32 v0, v0, v1, -v2 1455; GCN-NSZ-NEXT: s_setpc_b64 s[30:31] 1456 %fneg.a = fneg float %a 1457 %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c) 1458 %fneg = fneg float %fma 1459 ret float %fneg 1460} 1461 1462define float @v_fneg_fma_x_fneg_y_f32(float %a, float %b, float %c) #0 { 1463; GCN-SAFE-LABEL: v_fneg_fma_x_fneg_y_f32: 1464; GCN-SAFE: ; %bb.0: 1465; GCN-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1466; GCN-SAFE-NEXT: v_fma_f32 v0, v0, -v1, v2 1467; GCN-SAFE-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 1468; GCN-SAFE-NEXT: s_setpc_b64 s[30:31] 1469; 1470; GCN-NSZ-LABEL: v_fneg_fma_x_fneg_y_f32: 1471; GCN-NSZ: ; %bb.0: 1472; GCN-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1473; GCN-NSZ-NEXT: v_fma_f32 v0, v0, v1, -v2 1474; GCN-NSZ-NEXT: s_setpc_b64 s[30:31] 1475 %fneg.b = fneg float %b 1476 %fma = call float @llvm.fma.f32(float %a, float %fneg.b, float %c) 1477 %fneg = fneg float %fma 1478 ret float %fneg 1479} 1480 1481define float @v_fneg_fma_fneg_fneg_y_f32(float %a, float %b, float %c) #0 { 1482; GCN-SAFE-LABEL: v_fneg_fma_fneg_fneg_y_f32: 1483; GCN-SAFE: ; %bb.0: 1484; GCN-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1485; GCN-SAFE-NEXT: v_fma_f32 v0, v0, v1, v2 1486; GCN-SAFE-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 1487; GCN-SAFE-NEXT: s_setpc_b64 s[30:31] 1488; 1489; GCN-NSZ-LABEL: v_fneg_fma_fneg_fneg_y_f32: 1490; GCN-NSZ: ; %bb.0: 1491; GCN-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1492; GCN-NSZ-NEXT: v_fma_f32 v0, v0, -v1, -v2 1493; GCN-NSZ-NEXT: s_setpc_b64 s[30:31] 1494 %fneg.a = fneg float %a 1495 %fneg.b = fneg float %b 1496 %fma = call float @llvm.fma.f32(float %fneg.a, float %fneg.b, float %c) 1497 %fneg = fneg float %fma 1498 ret float %fneg 1499} 1500 1501define float @v_fneg_fma_fneg_x_fneg_f32(float %a, float %b, float %c) #0 { 1502; GCN-SAFE-LABEL: v_fneg_fma_fneg_x_fneg_f32: 1503; GCN-SAFE: ; %bb.0: 1504; GCN-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1505; GCN-SAFE-NEXT: v_fma_f32 v0, -v0, v1, -v2 1506; GCN-SAFE-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 1507; GCN-SAFE-NEXT: s_setpc_b64 s[30:31] 1508; 1509; GCN-NSZ-LABEL: v_fneg_fma_fneg_x_fneg_f32: 1510; GCN-NSZ: ; %bb.0: 1511; GCN-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1512; GCN-NSZ-NEXT: v_fma_f32 v0, v0, v1, v2 1513; GCN-NSZ-NEXT: s_setpc_b64 s[30:31] 1514 %fneg.a = fneg float %a 1515 %fneg.c = fneg float %c 1516 %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %fneg.c) 1517 %fneg = fneg float %fma 1518 ret float %fneg 1519} 1520 1521define float @v_fneg_fma_x_y_fneg_f32(float %a, float %b, float %c) #0 { 1522; GCN-SAFE-LABEL: v_fneg_fma_x_y_fneg_f32: 1523; GCN-SAFE: ; %bb.0: 1524; GCN-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1525; GCN-SAFE-NEXT: v_fma_f32 v0, v0, v1, -v2 1526; GCN-SAFE-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 1527; GCN-SAFE-NEXT: s_setpc_b64 s[30:31] 1528; 1529; GCN-NSZ-LABEL: v_fneg_fma_x_y_fneg_f32: 1530; GCN-NSZ: ; %bb.0: 1531; GCN-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1532; GCN-NSZ-NEXT: v_fma_f32 v0, v0, -v1, v2 1533; GCN-NSZ-NEXT: s_setpc_b64 s[30:31] 1534 %fneg.c = fneg float %c 1535 %fma = call float @llvm.fma.f32(float %a, float %b, float %fneg.c) 1536 %fneg = fneg float %fma 1537 ret float %fneg 1538} 1539 1540define { float, float } @v_fneg_fma_store_use_fneg_x_y_f32(float %a, float %b, float %c) #0 { 1541; GCN-SAFE-LABEL: v_fneg_fma_store_use_fneg_x_y_f32: 1542; GCN-SAFE: ; %bb.0: 1543; GCN-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1544; GCN-SAFE-NEXT: v_xor_b32_e32 v3, 0x80000000, v0 1545; GCN-SAFE-NEXT: v_fma_f32 v0, -v0, v1, v2 1546; GCN-SAFE-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 1547; GCN-SAFE-NEXT: v_mov_b32_e32 v1, v3 1548; GCN-SAFE-NEXT: s_setpc_b64 s[30:31] 1549; 1550; GCN-NSZ-LABEL: v_fneg_fma_store_use_fneg_x_y_f32: 1551; GCN-NSZ: ; %bb.0: 1552; GCN-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1553; GCN-NSZ-NEXT: v_xor_b32_e32 v3, 0x80000000, v0 1554; GCN-NSZ-NEXT: v_fma_f32 v0, v0, v1, -v2 1555; GCN-NSZ-NEXT: v_mov_b32_e32 v1, v3 1556; GCN-NSZ-NEXT: s_setpc_b64 s[30:31] 1557 %fneg.a = fneg float %a 1558 %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c) 1559 %fneg = fneg float %fma 1560 %insert.0 = insertvalue { float, float } poison, float %fneg, 0 1561 %insert.1 = insertvalue { float, float } %insert.0, float %fneg.a, 1 1562 ret { float, float } %insert.1 1563} 1564 1565define { float, float } @v_fneg_fma_multi_use_fneg_x_y_f32(float %a, float %b, float %c, float %d) #0 { 1566; GCN-SAFE-LABEL: v_fneg_fma_multi_use_fneg_x_y_f32: 1567; GCN-SAFE: ; %bb.0: 1568; GCN-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1569; GCN-SAFE-NEXT: v_fma_f32 v1, -v0, v1, v2 1570; GCN-SAFE-NEXT: v_xor_b32_e32 v2, 0x80000000, v1 1571; GCN-SAFE-NEXT: v_mul_f32_e64 v1, -v0, v3 1572; GCN-SAFE-NEXT: v_mov_b32_e32 v0, v2 1573; GCN-SAFE-NEXT: s_setpc_b64 s[30:31] 1574; 1575; GCN-NSZ-LABEL: v_fneg_fma_multi_use_fneg_x_y_f32: 1576; GCN-NSZ: ; %bb.0: 1577; GCN-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1578; GCN-NSZ-NEXT: v_fma_f32 v2, v0, v1, -v2 1579; GCN-NSZ-NEXT: v_mul_f32_e64 v1, -v0, v3 1580; GCN-NSZ-NEXT: v_mov_b32_e32 v0, v2 1581; GCN-NSZ-NEXT: s_setpc_b64 s[30:31] 1582 %fneg.a = fneg float %a 1583 %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c) 1584 %fneg = fneg float %fma 1585 %use1 = fmul float %fneg.a, %d 1586 %insert.0 = insertvalue { float, float } poison, float %fneg, 0 1587 %insert.1 = insertvalue { float, float } %insert.0, float %use1, 1 1588 ret { float, float } %insert.1 1589} 1590 1591; -------------------------------------------------------------------------------- 1592; fmad tests 1593; -------------------------------------------------------------------------------- 1594 1595define float @v_fneg_fmad_f32(float %a, float %b, float %c) #0 { 1596; GCN-SAFE-LABEL: v_fneg_fmad_f32: 1597; GCN-SAFE: ; %bb.0: 1598; GCN-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1599; GCN-SAFE-NEXT: v_mac_f32_e32 v2, v0, v1 1600; GCN-SAFE-NEXT: v_xor_b32_e32 v0, 0x80000000, v2 1601; GCN-SAFE-NEXT: s_setpc_b64 s[30:31] 1602; 1603; GCN-NSZ-LABEL: v_fneg_fmad_f32: 1604; GCN-NSZ: ; %bb.0: 1605; GCN-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1606; GCN-NSZ-NEXT: v_mad_f32 v0, v0, -v1, -v2 1607; GCN-NSZ-NEXT: s_setpc_b64 s[30:31] 1608 %fma = call float @llvm.fmuladd.f32(float %a, float %b, float %c) 1609 %fneg = fneg float %fma 1610 ret float %fneg 1611} 1612 1613define <4 x float> @v_fneg_fmad_v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) #0 { 1614; GCN-SAFE-LABEL: v_fneg_fmad_v4f32: 1615; GCN-SAFE: ; %bb.0: 1616; GCN-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1617; GCN-SAFE-NEXT: v_mac_f32_e32 v11, v3, v7 1618; GCN-SAFE-NEXT: v_mac_f32_e32 v10, v2, v6 1619; GCN-SAFE-NEXT: v_mac_f32_e32 v9, v1, v5 1620; GCN-SAFE-NEXT: v_mac_f32_e32 v8, v0, v4 1621; GCN-SAFE-NEXT: v_xor_b32_e32 v0, 0x80000000, v8 1622; GCN-SAFE-NEXT: v_xor_b32_e32 v1, 0x80000000, v9 1623; GCN-SAFE-NEXT: v_xor_b32_e32 v2, 0x80000000, v10 1624; GCN-SAFE-NEXT: v_xor_b32_e32 v3, 0x80000000, v11 1625; GCN-SAFE-NEXT: s_setpc_b64 s[30:31] 1626; 1627; GCN-NSZ-LABEL: v_fneg_fmad_v4f32: 1628; GCN-NSZ: ; %bb.0: 1629; GCN-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1630; GCN-NSZ-NEXT: v_mad_f32 v0, v0, -v4, -v8 1631; GCN-NSZ-NEXT: v_mad_f32 v1, v1, -v5, -v9 1632; GCN-NSZ-NEXT: v_mad_f32 v2, v2, -v6, -v10 1633; GCN-NSZ-NEXT: v_mad_f32 v3, v3, -v7, -v11 1634; GCN-NSZ-NEXT: s_setpc_b64 s[30:31] 1635 %fma = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) 1636 %fneg = fneg <4 x float> %fma 1637 ret <4 x float> %fneg 1638} 1639 1640define { float, float } @v_fneg_fmad_multi_use_fmad_f32(float %a, float %b, float %c) #0 { 1641; GCN-SAFE-LABEL: v_fneg_fmad_multi_use_fmad_f32: 1642; GCN-SAFE: ; %bb.0: 1643; GCN-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1644; GCN-SAFE-NEXT: v_mac_f32_e32 v2, v0, v1 1645; GCN-SAFE-NEXT: v_xor_b32_e32 v0, 0x80000000, v2 1646; GCN-SAFE-NEXT: v_mul_f32_e32 v1, 4.0, v2 1647; GCN-SAFE-NEXT: s_setpc_b64 s[30:31] 1648; 1649; GCN-NSZ-LABEL: v_fneg_fmad_multi_use_fmad_f32: 1650; GCN-NSZ: ; %bb.0: 1651; GCN-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1652; GCN-NSZ-NEXT: v_mad_f32 v0, v0, -v1, -v2 1653; GCN-NSZ-NEXT: v_mul_f32_e32 v1, -4.0, v0 1654; GCN-NSZ-NEXT: s_setpc_b64 s[30:31] 1655 %fma = call float @llvm.fmuladd.f32(float %a, float %b, float %c) 1656 %fneg = fneg float %fma 1657 %use1 = fmul float %fma, 4.0 1658 %insert.0 = insertvalue { float, float } poison, float %fneg, 0 1659 %insert.1 = insertvalue { float, float } %insert.0, float %use1, 1 1660 ret { float, float } %insert.1 1661} 1662 1663; -------------------------------------------------------------------------------- 1664; fp_extend tests 1665; -------------------------------------------------------------------------------- 1666 1667define double @v_fneg_fp_extend_f32_to_f64(float %a) #0 { 1668; GCN-LABEL: v_fneg_fp_extend_f32_to_f64: 1669; GCN: ; %bb.0: 1670; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1671; GCN-NEXT: v_cvt_f64_f32_e64 v[0:1], -v0 1672; GCN-NEXT: s_setpc_b64 s[30:31] 1673 %fpext = fpext float %a to double 1674 %fneg = fneg double %fpext 1675 ret double %fneg 1676} 1677 1678define double @v_fneg_fp_extend_fneg_f32_to_f64(float %a) #0 { 1679; GCN-LABEL: v_fneg_fp_extend_fneg_f32_to_f64: 1680; GCN: ; %bb.0: 1681; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1682; GCN-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 1683; GCN-NEXT: s_setpc_b64 s[30:31] 1684 %fneg.a = fneg float %a 1685 %fpext = fpext float %fneg.a to double 1686 %fneg = fneg double %fpext 1687 ret double %fneg 1688} 1689 1690define { double, float } @v_fneg_fp_extend_store_use_fneg_f32_to_f64(float %a) #0 { 1691; GCN-LABEL: v_fneg_fp_extend_store_use_fneg_f32_to_f64: 1692; GCN: ; %bb.0: 1693; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1694; GCN-NEXT: v_mov_b32_e32 v2, v0 1695; GCN-NEXT: v_cvt_f64_f32_e32 v[0:1], v2 1696; GCN-NEXT: v_xor_b32_e32 v2, 0x80000000, v2 1697; GCN-NEXT: s_setpc_b64 s[30:31] 1698 %fneg.a = fneg float %a 1699 %fpext = fpext float %fneg.a to double 1700 %fneg = fneg double %fpext 1701 %insert.0 = insertvalue { double, float } poison, double %fneg, 0 1702 %insert.1 = insertvalue { double, float } %insert.0, float %fneg.a, 1 1703 ret { double, float } %insert.1 1704} 1705 1706define { double, double } @v_fneg_multi_use_fp_extend_fneg_f32_to_f64(float %a) #0 { 1707; GCN-LABEL: v_fneg_multi_use_fp_extend_fneg_f32_to_f64: 1708; GCN: ; %bb.0: 1709; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1710; GCN-NEXT: v_cvt_f64_f32_e32 v[2:3], v0 1711; GCN-NEXT: v_xor_b32_e32 v1, 0x80000000, v3 1712; GCN-NEXT: v_mov_b32_e32 v0, v2 1713; GCN-NEXT: s_setpc_b64 s[30:31] 1714 %fpext = fpext float %a to double 1715 %fneg = fneg double %fpext 1716 %insert.0 = insertvalue { double, double } poison, double %fneg, 0 1717 %insert.1 = insertvalue { double, double } %insert.0, double %fpext, 1 1718 ret { double, double } %insert.1 1719} 1720 1721define { double, double } @v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64(float %a) #0 { 1722; SI-LABEL: v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64: 1723; SI: ; %bb.0: 1724; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1725; SI-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 1726; SI-NEXT: v_xor_b32_e32 v4, 0x80000000, v1 1727; SI-NEXT: v_mul_f64 v[2:3], v[0:1], 4.0 1728; SI-NEXT: v_mov_b32_e32 v1, v4 1729; SI-NEXT: s_setpc_b64 s[30:31] 1730; 1731; VI-LABEL: v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64: 1732; VI: ; %bb.0: 1733; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1734; VI-NEXT: v_cvt_f64_f32_e32 v[0:1], v0 1735; VI-NEXT: v_mul_f64 v[2:3], v[0:1], 4.0 1736; VI-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 1737; VI-NEXT: s_setpc_b64 s[30:31] 1738 %fpext = fpext float %a to double 1739 %fneg = fneg double %fpext 1740 %mul = fmul double %fpext, 4.0 1741 %insert.0 = insertvalue { double, double } poison, double %fneg, 0 1742 %insert.1 = insertvalue { double, double } %insert.0, double %mul, 1 1743 ret { double, double } %insert.1 1744} 1745 1746; FIXME: Source modifiers not folded for f16->f32 1747define { float, float } @v_fneg_multi_use_fp_extend_fneg_f16_to_f32(half %a) #0 { 1748; SI-LABEL: v_fneg_multi_use_fp_extend_fneg_f16_to_f32: 1749; SI: ; %bb.0: 1750; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1751; SI-NEXT: v_mov_b32_e32 v1, v0 1752; SI-NEXT: v_xor_b32_e32 v0, 0x80000000, v1 1753; SI-NEXT: s_setpc_b64 s[30:31] 1754; 1755; VI-LABEL: v_fneg_multi_use_fp_extend_fneg_f16_to_f32: 1756; VI: ; %bb.0: 1757; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1758; VI-NEXT: v_cvt_f32_f16_e32 v1, v0 1759; VI-NEXT: v_xor_b32_e32 v0, 0x80000000, v1 1760; VI-NEXT: s_setpc_b64 s[30:31] 1761 %fpext = fpext half %a to float 1762 %fneg = fneg float %fpext 1763 %insert.0 = insertvalue { float, float } poison, float %fneg, 0 1764 %insert.1 = insertvalue { float, float } %insert.0, float %fpext, 1 1765 ret { float, float } %insert.1 1766} 1767 1768define { float, float } @v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32(half %a) #0 { 1769; SI-LABEL: v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32: 1770; SI: ; %bb.0: 1771; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1772; SI-NEXT: v_xor_b32_e32 v2, 0x80000000, v0 1773; SI-NEXT: v_mul_f32_e32 v1, 4.0, v0 1774; SI-NEXT: v_mov_b32_e32 v0, v2 1775; SI-NEXT: s_setpc_b64 s[30:31] 1776; 1777; VI-LABEL: v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32: 1778; VI: ; %bb.0: 1779; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1780; VI-NEXT: v_cvt_f32_f16_e32 v1, v0 1781; VI-NEXT: v_xor_b32_e32 v0, 0x80000000, v1 1782; VI-NEXT: v_mul_f32_e32 v1, 4.0, v1 1783; VI-NEXT: s_setpc_b64 s[30:31] 1784 %fpext = fpext half %a to float 1785 %fneg = fneg float %fpext 1786 %mul = fmul float %fpext, 4.0 1787 %insert.0 = insertvalue { float, float } poison, float %fneg, 0 1788 %insert.1 = insertvalue { float, float } %insert.0, float %mul, 1 1789 ret { float, float } %insert.1 1790} 1791 1792; -------------------------------------------------------------------------------- 1793; fp_round tests 1794; -------------------------------------------------------------------------------- 1795 1796define float @v_fneg_fp_round_f64_to_f32(double %a) #0 { 1797; GCN-LABEL: v_fneg_fp_round_f64_to_f32: 1798; GCN: ; %bb.0: 1799; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1800; GCN-NEXT: v_cvt_f32_f64_e64 v0, -v[0:1] 1801; GCN-NEXT: s_setpc_b64 s[30:31] 1802 %fpround = fptrunc double %a to float 1803 %fneg = fneg float %fpround 1804 ret float %fneg 1805} 1806 1807define float @v_fneg_fp_round_fneg_f64_to_f32(double %a) #0 { 1808; GCN-LABEL: v_fneg_fp_round_fneg_f64_to_f32: 1809; GCN: ; %bb.0: 1810; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1811; GCN-NEXT: v_cvt_f32_f64_e32 v0, v[0:1] 1812; GCN-NEXT: s_setpc_b64 s[30:31] 1813 %fneg.a = fneg double %a 1814 %fpround = fptrunc double %fneg.a to float 1815 %fneg = fneg float %fpround 1816 ret float %fneg 1817} 1818 1819define { float, double } @v_fneg_fp_round_store_use_fneg_f64_to_f32(double %a) #0 { 1820; GCN-LABEL: v_fneg_fp_round_store_use_fneg_f64_to_f32: 1821; GCN: ; %bb.0: 1822; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1823; GCN-NEXT: v_cvt_f32_f64_e32 v3, v[0:1] 1824; GCN-NEXT: v_xor_b32_e32 v2, 0x80000000, v1 1825; GCN-NEXT: v_mov_b32_e32 v1, v0 1826; GCN-NEXT: v_mov_b32_e32 v0, v3 1827; GCN-NEXT: s_setpc_b64 s[30:31] 1828 %fneg.a = fneg double %a 1829 %fpround = fptrunc double %fneg.a to float 1830 %fneg = fneg float %fpround 1831 %insert.0 = insertvalue { float, double } poison, float %fneg, 0 1832 %insert.1 = insertvalue { float, double } %insert.0, double %fneg.a, 1 1833 ret { float, double } %insert.1 1834} 1835 1836define { float, double } @v_fneg_fp_round_multi_use_fneg_f64_to_f32(double %a, double %c) #0 { 1837; SI-LABEL: v_fneg_fp_round_multi_use_fneg_f64_to_f32: 1838; SI: ; %bb.0: 1839; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1840; SI-NEXT: v_cvt_f32_f64_e32 v4, v[0:1] 1841; SI-NEXT: v_mul_f64 v[1:2], -v[0:1], v[2:3] 1842; SI-NEXT: v_mov_b32_e32 v0, v4 1843; SI-NEXT: s_setpc_b64 s[30:31] 1844; 1845; VI-LABEL: v_fneg_fp_round_multi_use_fneg_f64_to_f32: 1846; VI: ; %bb.0: 1847; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1848; VI-NEXT: v_mul_f64 v[2:3], -v[0:1], v[2:3] 1849; VI-NEXT: v_cvt_f32_f64_e32 v0, v[0:1] 1850; VI-NEXT: v_mov_b32_e32 v1, v2 1851; VI-NEXT: v_mov_b32_e32 v2, v3 1852; VI-NEXT: s_setpc_b64 s[30:31] 1853 %fneg.a = fneg double %a 1854 %fpround = fptrunc double %fneg.a to float 1855 %fneg = fneg float %fpround 1856 %use1 = fmul double %fneg.a, %c 1857 %insert.0 = insertvalue { float, double } poison, float %fneg, 0 1858 %insert.1 = insertvalue { float, double } %insert.0, double %use1, 1 1859 ret { float, double } %insert.1 1860} 1861 1862define half @v_fneg_fp_round_f32_to_f16(float %a) #0 { 1863; SI-LABEL: v_fneg_fp_round_f32_to_f16: 1864; SI: ; %bb.0: 1865; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1866; SI-NEXT: v_cvt_f16_f32_e64 v0, -v0 1867; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 1868; SI-NEXT: s_setpc_b64 s[30:31] 1869; 1870; VI-LABEL: v_fneg_fp_round_f32_to_f16: 1871; VI: ; %bb.0: 1872; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1873; VI-NEXT: v_cvt_f16_f32_e64 v0, -v0 1874; VI-NEXT: s_setpc_b64 s[30:31] 1875 %fpround = fptrunc float %a to half 1876 %fneg = fneg half %fpround 1877 ret half %fneg 1878} 1879 1880define half @v_fneg_fp_round_fneg_f32_to_f16(float %a) #0 { 1881; SI-LABEL: v_fneg_fp_round_fneg_f32_to_f16: 1882; SI: ; %bb.0: 1883; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1884; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 1885; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 1886; SI-NEXT: s_setpc_b64 s[30:31] 1887; 1888; VI-LABEL: v_fneg_fp_round_fneg_f32_to_f16: 1889; VI: ; %bb.0: 1890; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1891; VI-NEXT: v_cvt_f16_f32_e32 v0, v0 1892; VI-NEXT: s_setpc_b64 s[30:31] 1893 %fneg.a = fneg float %a 1894 %fpround = fptrunc float %fneg.a to half 1895 %fneg = fneg half %fpround 1896 ret half %fneg 1897} 1898 1899define { float, float } @v_fneg_multi_use_fp_round_fneg_f64_to_f32(double %a) #0 { 1900; GCN-LABEL: v_fneg_multi_use_fp_round_fneg_f64_to_f32: 1901; GCN: ; %bb.0: 1902; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1903; GCN-NEXT: v_cvt_f32_f64_e32 v1, v[0:1] 1904; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v1 1905; GCN-NEXT: s_setpc_b64 s[30:31] 1906 %fpround = fptrunc double %a to float 1907 %fneg = fneg float %fpround 1908 %insert.0 = insertvalue { float, float } poison, float %fneg, 0 1909 %insert.1 = insertvalue { float, float } %insert.0, float %fpround, 1 1910 ret { float, float } %insert.1 1911} 1912 1913define { half, float } @v_fneg_fp_round_store_use_fneg_f32_to_f16(float %a) #0 { 1914; SI-LABEL: v_fneg_fp_round_store_use_fneg_f32_to_f16: 1915; SI: ; %bb.0: 1916; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1917; SI-NEXT: v_cvt_f16_f32_e32 v1, v0 1918; SI-NEXT: v_cvt_f32_f16_e32 v2, v1 1919; SI-NEXT: v_xor_b32_e32 v1, 0x80000000, v0 1920; SI-NEXT: v_mov_b32_e32 v0, v2 1921; SI-NEXT: s_setpc_b64 s[30:31] 1922; 1923; VI-LABEL: v_fneg_fp_round_store_use_fneg_f32_to_f16: 1924; VI: ; %bb.0: 1925; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1926; VI-NEXT: v_cvt_f16_f32_e32 v2, v0 1927; VI-NEXT: v_xor_b32_e32 v1, 0x80000000, v0 1928; VI-NEXT: v_mov_b32_e32 v0, v2 1929; VI-NEXT: s_setpc_b64 s[30:31] 1930 %fneg.a = fneg float %a 1931 %fpround = fptrunc float %fneg.a to half 1932 %fneg = fneg half %fpround 1933 %insert.0 = insertvalue { half, float } poison, half %fneg, 0 1934 %insert.1 = insertvalue { half, float } %insert.0, float %fneg.a, 1 1935 ret { half, float } %insert.1 1936} 1937 1938define { half, float } @v_fneg_fp_round_multi_use_fneg_f32_to_f16(float %a, float %c) #0 { 1939; SI-LABEL: v_fneg_fp_round_multi_use_fneg_f32_to_f16: 1940; SI: ; %bb.0: 1941; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1942; SI-NEXT: v_cvt_f16_f32_e32 v2, v0 1943; SI-NEXT: v_mul_f32_e64 v1, -v0, v1 1944; SI-NEXT: v_cvt_f32_f16_e32 v2, v2 1945; SI-NEXT: v_mov_b32_e32 v0, v2 1946; SI-NEXT: s_setpc_b64 s[30:31] 1947; 1948; VI-LABEL: v_fneg_fp_round_multi_use_fneg_f32_to_f16: 1949; VI: ; %bb.0: 1950; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1951; VI-NEXT: v_cvt_f16_f32_e32 v2, v0 1952; VI-NEXT: v_mul_f32_e64 v1, -v0, v1 1953; VI-NEXT: v_mov_b32_e32 v0, v2 1954; VI-NEXT: s_setpc_b64 s[30:31] 1955 %fneg.a = fneg float %a 1956 %fpround = fptrunc float %fneg.a to half 1957 %fneg = fneg half %fpround 1958 %use1 = fmul float %fneg.a, %c 1959 %insert.0 = insertvalue { half, float } poison, half %fneg, 0 1960 %insert.1 = insertvalue { half, float } %insert.0, float %use1, 1 1961 ret { half, float } %insert.1 1962} 1963 1964; -------------------------------------------------------------------------------- 1965; rcp tests 1966; -------------------------------------------------------------------------------- 1967 1968define float @v_fneg_rcp_f32(float %a) #0 { 1969; GCN-LABEL: v_fneg_rcp_f32: 1970; GCN: ; %bb.0: 1971; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1972; GCN-NEXT: v_rcp_f32_e64 v0, -v0 1973; GCN-NEXT: s_setpc_b64 s[30:31] 1974 %rcp = call float @llvm.amdgcn.rcp.f32(float %a) 1975 %fneg = fneg float %rcp 1976 ret float %fneg 1977} 1978 1979define float @v_fneg_rcp_fneg_f32(float %a) #0 { 1980; GCN-LABEL: v_fneg_rcp_fneg_f32: 1981; GCN: ; %bb.0: 1982; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1983; GCN-NEXT: v_rcp_f32_e32 v0, v0 1984; GCN-NEXT: s_setpc_b64 s[30:31] 1985 %fneg.a = fneg float %a 1986 %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a) 1987 %fneg = fneg float %rcp 1988 ret float %fneg 1989} 1990 1991define void @v_fneg_rcp_store_use_fneg_f32(float %a, ptr addrspace(1) %ptr0, ptr addrspace(1) %ptr1) #0 { 1992; GCN-LABEL: v_fneg_rcp_store_use_fneg_f32: 1993; GCN: ; %bb.0: 1994; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1995; GCN-NEXT: v_rcp_f32_e32 v5, v0 1996; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 1997; GCN-NEXT: flat_store_dword v[1:2], v5 1998; GCN-NEXT: flat_store_dword v[3:4], v0 1999; GCN-NEXT: s_waitcnt vmcnt(0) 2000; GCN-NEXT: s_setpc_b64 s[30:31] 2001 %fneg.a = fneg float %a 2002 %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a) 2003 %fneg = fneg float %rcp 2004 store float %fneg, ptr addrspace(1) %ptr0 2005 store float %fneg.a, ptr addrspace(1) %ptr1 2006 ret void 2007} 2008 2009define { float, float } @v_fneg_rcp_multi_use_fneg_f32(float %a, float %c) #0 { 2010; GCN-LABEL: v_fneg_rcp_multi_use_fneg_f32: 2011; GCN: ; %bb.0: 2012; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2013; GCN-NEXT: v_rcp_f32_e32 v2, v0 2014; GCN-NEXT: v_mul_f32_e64 v1, -v0, v1 2015; GCN-NEXT: v_mov_b32_e32 v0, v2 2016; GCN-NEXT: s_setpc_b64 s[30:31] 2017 %fneg.a = fneg float %a 2018 %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a) 2019 %fneg = fneg float %rcp 2020 %use1 = fmul float %fneg.a, %c 2021 %insert.0 = insertvalue { float, float } poison, float %fneg, 0 2022 %insert.1 = insertvalue { float, float } %insert.0, float %use1, 1 2023 ret { float, float } %insert.1 2024} 2025 2026; Test getNegatedExpression works for rcp nodes 2027define float @v_negated_rcp_f32(float %arg0, float %arg1) #1 { 2028; GCN-LABEL: v_negated_rcp_f32: 2029; GCN: ; %bb.0: 2030; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2031; GCN-NEXT: v_fma_f32 v0, v0, v1, 2.0 2032; GCN-NEXT: v_rcp_f32_e32 v0, v0 2033; GCN-NEXT: v_add_f32_e32 v0, v1, v0 2034; GCN-NEXT: s_setpc_b64 s[30:31] 2035 %neg.arg0 = fneg float %arg0 2036 %fma = call nsz float @llvm.fma.f32(float %neg.arg0, float %arg1, float -2.0) 2037 %rcp0 = call float @llvm.amdgcn.rcp.f32(float %fma) 2038 %mul = fsub nsz float %arg1, %rcp0 2039 ret float %mul 2040} 2041 2042; -------------------------------------------------------------------------------- 2043; fmul_legacy tests 2044; -------------------------------------------------------------------------------- 2045 2046define float @v_fneg_mul_legacy_f32(float %a, float %b) #0 { 2047; GCN-LABEL: v_fneg_mul_legacy_f32: 2048; GCN: ; %bb.0: 2049; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2050; GCN-NEXT: v_mul_legacy_f32_e64 v0, v0, -v1 2051; GCN-NEXT: s_setpc_b64 s[30:31] 2052 %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b) 2053 %fneg = fneg float %mul 2054 ret float %fneg 2055} 2056 2057define { float, float } @v_fneg_mul_legacy_store_use_mul_legacy_f32(float %a, float %b) #0 { 2058; GCN-LABEL: v_fneg_mul_legacy_store_use_mul_legacy_f32: 2059; GCN: ; %bb.0: 2060; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2061; GCN-NEXT: v_mul_legacy_f32_e32 v1, v0, v1 2062; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v1 2063; GCN-NEXT: s_setpc_b64 s[30:31] 2064 %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b) 2065 %fneg = fneg float %mul 2066 %insert.0 = insertvalue { float, float } poison, float %fneg, 0 2067 %insert.1 = insertvalue { float, float } %insert.0, float %mul, 1 2068 ret { float, float } %insert.1 2069} 2070 2071define { float, float } @v_fneg_mul_legacy_multi_use_mul_legacy_f32(float %a, float %b) #0 { 2072; GCN-LABEL: v_fneg_mul_legacy_multi_use_mul_legacy_f32: 2073; GCN: ; %bb.0: 2074; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2075; GCN-NEXT: v_mul_legacy_f32_e64 v0, v0, -v1 2076; GCN-NEXT: v_mul_legacy_f32_e64 v1, -v0, 4.0 2077; GCN-NEXT: s_setpc_b64 s[30:31] 2078 %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b) 2079 %fneg = fneg float %mul 2080 %use1 = call float @llvm.amdgcn.fmul.legacy(float %mul, float 4.0) 2081 %insert.0 = insertvalue { float, float } poison, float %fneg, 0 2082 %insert.1 = insertvalue { float, float } %insert.0, float %use1, 1 2083 ret { float, float } %insert.1 2084} 2085 2086define float @v_fneg_mul_legacy_fneg_x_f32(float %a, float %b) #0 { 2087; GCN-LABEL: v_fneg_mul_legacy_fneg_x_f32: 2088; GCN: ; %bb.0: 2089; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2090; GCN-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 2091; GCN-NEXT: s_setpc_b64 s[30:31] 2092 %fneg.a = fneg float %a 2093 %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b) 2094 %fneg = fneg float %mul 2095 ret float %fneg 2096} 2097 2098define float @v_fneg_mul_legacy_x_fneg_f32(float %a, float %b) #0 { 2099; GCN-LABEL: v_fneg_mul_legacy_x_fneg_f32: 2100; GCN: ; %bb.0: 2101; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2102; GCN-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 2103; GCN-NEXT: s_setpc_b64 s[30:31] 2104 %fneg.b = fneg float %b 2105 %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %fneg.b) 2106 %fneg = fneg float %mul 2107 ret float %fneg 2108} 2109 2110define float @v_fneg_mul_legacy_fneg_fneg_f32(float %a, float %b) #0 { 2111; GCN-LABEL: v_fneg_mul_legacy_fneg_fneg_f32: 2112; GCN: ; %bb.0: 2113; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2114; GCN-NEXT: v_mul_legacy_f32_e64 v0, v0, -v1 2115; GCN-NEXT: s_setpc_b64 s[30:31] 2116 %fneg.a = fneg float %a 2117 %fneg.b = fneg float %b 2118 %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %fneg.b) 2119 %fneg = fneg float %mul 2120 ret float %fneg 2121} 2122 2123define { float, float } @v_fneg_mul_legacy_store_use_fneg_x_f32(float %a, float %b) #0 { 2124; GCN-LABEL: v_fneg_mul_legacy_store_use_fneg_x_f32: 2125; GCN: ; %bb.0: 2126; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2127; GCN-NEXT: v_xor_b32_e32 v2, 0x80000000, v0 2128; GCN-NEXT: v_mul_legacy_f32_e32 v0, v0, v1 2129; GCN-NEXT: v_mov_b32_e32 v1, v2 2130; GCN-NEXT: s_setpc_b64 s[30:31] 2131 %fneg.a = fneg float %a 2132 %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b) 2133 %fneg = fneg float %mul 2134 2135 %insert.0 = insertvalue { float, float } poison, float %fneg, 0 2136 %insert.1 = insertvalue { float, float } %insert.0, float %fneg.a, 1 2137 ret { float, float } %insert.1 2138} 2139 2140define { float, float } @v_fneg_mul_legacy_multi_use_fneg_x_f32(float %a, float %b, float %c) #0 { 2141; GCN-LABEL: v_fneg_mul_legacy_multi_use_fneg_x_f32: 2142; GCN: ; %bb.0: 2143; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2144; GCN-NEXT: v_mul_legacy_f32_e32 v3, v0, v1 2145; GCN-NEXT: v_mul_legacy_f32_e64 v1, -v0, v2 2146; GCN-NEXT: v_mov_b32_e32 v0, v3 2147; GCN-NEXT: s_setpc_b64 s[30:31] 2148 %fneg.a = fneg float %a 2149 %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b) 2150 %fneg = fneg float %mul 2151 %use1 = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %c) 2152 %insert.0 = insertvalue { float, float } poison, float %fneg, 0 2153 %insert.1 = insertvalue { float, float } %insert.0, float %use1, 1 2154 ret { float, float } %insert.1 2155} 2156 2157; -------------------------------------------------------------------------------- 2158; sin tests 2159; -------------------------------------------------------------------------------- 2160 2161define float @v_fneg_sin_f32(float %a) #0 { 2162; GCN-LABEL: v_fneg_sin_f32: 2163; GCN: ; %bb.0: 2164; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2165; GCN-NEXT: v_mul_f32_e32 v0, 0xbe22f983, v0 2166; GCN-NEXT: v_fract_f32_e32 v0, v0 2167; GCN-NEXT: v_sin_f32_e32 v0, v0 2168; GCN-NEXT: s_setpc_b64 s[30:31] 2169 %sin = call float @llvm.sin.f32(float %a) 2170 %fneg = fneg float %sin 2171 ret float %fneg 2172} 2173 2174define float @v_fneg_amdgcn_sin_f32(float %a) #0 { 2175; GCN-LABEL: v_fneg_amdgcn_sin_f32: 2176; GCN: ; %bb.0: 2177; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2178; GCN-NEXT: v_sin_f32_e64 v0, -v0 2179; GCN-NEXT: s_setpc_b64 s[30:31] 2180 %sin = call float @llvm.amdgcn.sin.f32(float %a) 2181 %fneg = fneg float %sin 2182 ret float %fneg 2183} 2184 2185; -------------------------------------------------------------------------------- 2186; ftrunc tests 2187; -------------------------------------------------------------------------------- 2188 2189define float @v_fneg_trunc_f32(float %a) #0 { 2190; GCN-LABEL: v_fneg_trunc_f32: 2191; GCN: ; %bb.0: 2192; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2193; GCN-NEXT: v_trunc_f32_e64 v0, -v0 2194; GCN-NEXT: s_setpc_b64 s[30:31] 2195 %trunc = call float @llvm.trunc.f32(float %a) 2196 %fneg = fneg float %trunc 2197 ret float %fneg 2198} 2199 2200; -------------------------------------------------------------------------------- 2201; fround tests 2202; -------------------------------------------------------------------------------- 2203 2204define float @v_fneg_round_f32(float %a) #0 { 2205; GCN-SAFE-LABEL: v_fneg_round_f32: 2206; GCN-SAFE: ; %bb.0: 2207; GCN-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2208; GCN-SAFE-NEXT: v_trunc_f32_e32 v1, v0 2209; GCN-SAFE-NEXT: v_sub_f32_e32 v2, v0, v1 2210; GCN-SAFE-NEXT: v_cmp_ge_f32_e64 s[4:5], |v2|, 0.5 2211; GCN-SAFE-NEXT: v_cndmask_b32_e64 v2, 0, 1.0, s[4:5] 2212; GCN-SAFE-NEXT: s_brev_b32 s4, -2 2213; GCN-SAFE-NEXT: v_bfi_b32 v0, s4, v2, v0 2214; GCN-SAFE-NEXT: v_add_f32_e32 v0, v1, v0 2215; GCN-SAFE-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 2216; GCN-SAFE-NEXT: s_setpc_b64 s[30:31] 2217; 2218; GCN-NSZ-LABEL: v_fneg_round_f32: 2219; GCN-NSZ: ; %bb.0: 2220; GCN-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2221; GCN-NSZ-NEXT: v_trunc_f32_e32 v1, v0 2222; GCN-NSZ-NEXT: v_sub_f32_e32 v2, v0, v1 2223; GCN-NSZ-NEXT: v_cmp_ge_f32_e64 s[4:5], |v2|, 0.5 2224; GCN-NSZ-NEXT: v_cndmask_b32_e64 v2, 0, 1.0, s[4:5] 2225; GCN-NSZ-NEXT: s_brev_b32 s4, -2 2226; GCN-NSZ-NEXT: v_bfi_b32 v0, s4, v2, v0 2227; GCN-NSZ-NEXT: v_sub_f32_e64 v0, -v1, v0 2228; GCN-NSZ-NEXT: s_setpc_b64 s[30:31] 2229 %round = call float @llvm.round.f32(float %a) 2230 %fneg = fneg float %round 2231 ret float %fneg 2232} 2233 2234; -------------------------------------------------------------------------------- 2235; rint tests 2236; -------------------------------------------------------------------------------- 2237 2238define float @v_fneg_rint_f32(float %a) #0 { 2239; GCN-LABEL: v_fneg_rint_f32: 2240; GCN: ; %bb.0: 2241; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2242; GCN-NEXT: v_rndne_f32_e64 v0, -v0 2243; GCN-NEXT: s_setpc_b64 s[30:31] 2244 %rint = call float @llvm.rint.f32(float %a) 2245 %fneg = fneg float %rint 2246 ret float %fneg 2247} 2248 2249; -------------------------------------------------------------------------------- 2250; nearbyint tests 2251; -------------------------------------------------------------------------------- 2252 2253define float @v_fneg_nearbyint_f32(float %a) #0 { 2254; GCN-LABEL: v_fneg_nearbyint_f32: 2255; GCN: ; %bb.0: 2256; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2257; GCN-NEXT: v_rndne_f32_e64 v0, -v0 2258; GCN-NEXT: s_setpc_b64 s[30:31] 2259 %nearbyint = call float @llvm.nearbyint.f32(float %a) 2260 %fneg = fneg float %nearbyint 2261 ret float %fneg 2262} 2263 2264; -------------------------------------------------------------------------------- 2265; fcanonicalize tests 2266; -------------------------------------------------------------------------------- 2267 2268define float @v_fneg_canonicalize_f32(float %a) #0 { 2269; GCN-LABEL: v_fneg_canonicalize_f32: 2270; GCN: ; %bb.0: 2271; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2272; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0 2273; GCN-NEXT: s_setpc_b64 s[30:31] 2274 %trunc = call float @llvm.canonicalize.f32(float %a) 2275 %fneg = fneg float %trunc 2276 ret float %fneg 2277} 2278 2279; -------------------------------------------------------------------------------- 2280; arithmetic.fence tests 2281; -------------------------------------------------------------------------------- 2282 2283define float @v_fneg_arithmetic_fence_f32(float %a) #0 { 2284; GCN-LABEL: v_fneg_arithmetic_fence_f32: 2285; GCN: ; %bb.0: 2286; GCN-NEXT: ;ARITH_FENCE 2287; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2288; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 2289; GCN-NEXT: s_setpc_b64 s[30:31] 2290 %fence = call float @llvm.arithmetic.fence.f32(float %a) 2291 %fneg = fneg float %fence 2292 ret float %fneg 2293} 2294 2295define float @v_fneg_arithmetic_fence_fmul_f32(float %a, float %b) #0 { 2296; GCN-LABEL: v_fneg_arithmetic_fence_fmul_f32: 2297; GCN: ; %bb.0: 2298; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2299; GCN-NEXT: v_mul_f32_e32 v0, v0, v1 2300; GCN-NEXT: ;ARITH_FENCE 2301; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 2302; GCN-NEXT: s_setpc_b64 s[30:31] 2303 %mul = fmul float %a, %b 2304 %fence = call float @llvm.arithmetic.fence.f32(float %mul) 2305 %fneg = fneg float %fence 2306 ret float %fneg 2307} 2308 2309; -------------------------------------------------------------------------------- 2310; vintrp tests 2311; -------------------------------------------------------------------------------- 2312 2313define { float, float } @v_fneg_interp_p1_f32(float %a, float %b) #0 { 2314; SI-LABEL: v_fneg_interp_p1_f32: 2315; SI: ; %bb.0: 2316; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2317; SI-NEXT: v_mul_f32_e64 v1, v0, -v1 2318; SI-NEXT: s_mov_b32 m0, 0 2319; SI-NEXT: v_interp_p1_f32 v0, v1, attr0.x 2320; SI-NEXT: v_interp_p1_f32 v1, v1, attr0.y 2321; SI-NEXT: s_setpc_b64 s[30:31] 2322; 2323; VI-LABEL: v_fneg_interp_p1_f32: 2324; VI: ; %bb.0: 2325; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2326; VI-NEXT: v_mul_f32_e64 v1, v0, -v1 2327; VI-NEXT: s_mov_b32 m0, 0 2328; VI-NEXT: v_interp_p1_f32_e32 v0, v1, attr0.x 2329; VI-NEXT: v_interp_p1_f32_e32 v1, v1, attr0.y 2330; VI-NEXT: s_setpc_b64 s[30:31] 2331 %mul = fmul float %a, %b 2332 %fneg = fneg float %mul 2333 %intrp0 = call float @llvm.amdgcn.interp.p1(float %fneg, i32 0, i32 0, i32 0) 2334 %intrp1 = call float @llvm.amdgcn.interp.p1(float %fneg, i32 1, i32 0, i32 0) 2335 %insert.0 = insertvalue { float, float } poison, float %intrp0, 0 2336 %insert.1 = insertvalue { float, float } %insert.0, float %intrp1, 1 2337 ret { float, float } %insert.1 2338} 2339 2340define { float, float } @v_fneg_interp_p2_f32(float %a, float %b) #0 { 2341; SI-LABEL: v_fneg_interp_p2_f32: 2342; SI: ; %bb.0: 2343; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2344; SI-NEXT: v_mul_f32_e64 v2, v0, -v1 2345; SI-NEXT: v_mov_b32_e32 v1, 4.0 2346; SI-NEXT: v_mov_b32_e32 v0, 4.0 2347; SI-NEXT: s_mov_b32 m0, 0 2348; SI-NEXT: v_interp_p2_f32 v0, v2, attr0.x 2349; SI-NEXT: v_interp_p2_f32 v1, v2, attr0.y 2350; SI-NEXT: s_setpc_b64 s[30:31] 2351; 2352; VI-LABEL: v_fneg_interp_p2_f32: 2353; VI: ; %bb.0: 2354; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2355; VI-NEXT: v_mul_f32_e64 v2, v0, -v1 2356; VI-NEXT: v_mov_b32_e32 v1, 4.0 2357; VI-NEXT: v_mov_b32_e32 v0, 4.0 2358; VI-NEXT: s_mov_b32 m0, 0 2359; VI-NEXT: v_interp_p2_f32_e32 v0, v2, attr0.x 2360; VI-NEXT: v_interp_p2_f32_e32 v1, v2, attr0.y 2361; VI-NEXT: s_setpc_b64 s[30:31] 2362 %mul = fmul float %a, %b 2363 %fneg = fneg float %mul 2364 %intrp0 = call float @llvm.amdgcn.interp.p2(float 4.0, float %fneg, i32 0, i32 0, i32 0) 2365 %intrp1 = call float @llvm.amdgcn.interp.p2(float 4.0, float %fneg, i32 1, i32 0, i32 0) 2366 %insert.0 = insertvalue { float, float } poison, float %intrp0, 0 2367 %insert.1 = insertvalue { float, float } %insert.0, float %intrp1, 1 2368 ret { float, float } %insert.1 2369} 2370 2371; -------------------------------------------------------------------------------- 2372; CopyToReg tests 2373; -------------------------------------------------------------------------------- 2374 2375define void @v_fneg_copytoreg_f32(ptr addrspace(1) %out, float %a, float %b, float %c, i32 %d) #0 { 2376; SI-LABEL: v_fneg_copytoreg_f32: 2377; SI: ; %bb.0: 2378; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2379; SI-NEXT: v_and_b32_e32 v6, 0x3ff, v31 2380; SI-NEXT: v_lshlrev_b32_e32 v6, 2, v6 2381; SI-NEXT: v_add_i32_e32 v0, vcc, v0, v6 2382; SI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2383; SI-NEXT: v_mul_f32_e32 v2, v2, v3 2384; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 2385; SI-NEXT: s_and_saveexec_b64 s[4:5], vcc 2386; SI-NEXT: s_cbranch_execz .LBB118_2 2387; SI-NEXT: ; %bb.1: ; %if 2388; SI-NEXT: v_mul_f32_e64 v3, -v2, v4 2389; SI-NEXT: flat_store_dword v[0:1], v3 2390; SI-NEXT: s_waitcnt vmcnt(0) 2391; SI-NEXT: .LBB118_2: ; %endif 2392; SI-NEXT: s_or_b64 exec, exec, s[4:5] 2393; SI-NEXT: flat_store_dword v[0:1], v2 2394; SI-NEXT: s_waitcnt vmcnt(0) 2395; SI-NEXT: s_setpc_b64 s[30:31] 2396; 2397; VI-LABEL: v_fneg_copytoreg_f32: 2398; VI: ; %bb.0: 2399; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2400; VI-NEXT: v_and_b32_e32 v6, 0x3ff, v31 2401; VI-NEXT: v_lshlrev_b32_e32 v6, 2, v6 2402; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v6 2403; VI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc 2404; VI-NEXT: v_mul_f32_e32 v2, v2, v3 2405; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 2406; VI-NEXT: s_and_saveexec_b64 s[4:5], vcc 2407; VI-NEXT: s_cbranch_execz .LBB118_2 2408; VI-NEXT: ; %bb.1: ; %if 2409; VI-NEXT: v_mul_f32_e64 v3, -v2, v4 2410; VI-NEXT: flat_store_dword v[0:1], v3 2411; VI-NEXT: s_waitcnt vmcnt(0) 2412; VI-NEXT: .LBB118_2: ; %endif 2413; VI-NEXT: s_or_b64 exec, exec, s[4:5] 2414; VI-NEXT: flat_store_dword v[0:1], v2 2415; VI-NEXT: s_waitcnt vmcnt(0) 2416; VI-NEXT: s_setpc_b64 s[30:31] 2417 %tid = call i32 @llvm.amdgcn.workitem.id.x() 2418 %tid.ext = sext i32 %tid to i64 2419 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext 2420 %mul = fmul float %a, %b 2421 %fneg = fneg float %mul 2422 %cmp0 = icmp eq i32 %d, 0 2423 br i1 %cmp0, label %if, label %endif 2424 2425if: 2426 %mul1 = fmul float %fneg, %c 2427 store volatile float %mul1, ptr addrspace(1) %out.gep 2428 br label %endif 2429 2430endif: 2431 store volatile float %mul, ptr addrspace(1) %out.gep 2432 ret void 2433} 2434 2435; -------------------------------------------------------------------------------- 2436; inlineasm tests 2437; -------------------------------------------------------------------------------- 2438 2439; Can't fold into use, so should fold into source 2440define float @v_fneg_inlineasm_f32(float %a, float %b, float %c, i32 %d) #0 { 2441; GCN-LABEL: v_fneg_inlineasm_f32: 2442; GCN: ; %bb.0: 2443; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2444; GCN-NEXT: v_mul_f32_e64 v0, v0, -v1 2445; GCN-NEXT: ;;#ASMSTART 2446; GCN-NEXT: ; use v0 2447; GCN-NEXT: ;;#ASMEND 2448; GCN-NEXT: s_setpc_b64 s[30:31] 2449 %mul = fmul float %a, %b 2450 %fneg = fneg float %mul 2451 call void asm sideeffect "; use $0", "v"(float %fneg) #0 2452 ret float %fneg 2453} 2454 2455; -------------------------------------------------------------------------------- 2456; inlineasm tests 2457; -------------------------------------------------------------------------------- 2458 2459; Can't fold into use, so should fold into source 2460define float @v_fneg_inlineasm_multi_use_src_f32(ptr addrspace(1) %out, float %a, float %b, float %c, i32 %d) #0 { 2461; GCN-LABEL: v_fneg_inlineasm_multi_use_src_f32: 2462; GCN: ; %bb.0: 2463; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2464; GCN-NEXT: v_mul_f32_e32 v0, v2, v3 2465; GCN-NEXT: v_xor_b32_e32 v1, 0x80000000, v0 2466; GCN-NEXT: ;;#ASMSTART 2467; GCN-NEXT: ; use v1 2468; GCN-NEXT: ;;#ASMEND 2469; GCN-NEXT: s_setpc_b64 s[30:31] 2470 %mul = fmul float %a, %b 2471 %fneg = fneg float %mul 2472 call void asm sideeffect "; use $0", "v"(float %fneg) #0 2473 ret float %mul 2474} 2475 2476; -------------------------------------------------------------------------------- 2477; code size regression tests 2478; -------------------------------------------------------------------------------- 2479 2480; There are multiple users of the fneg that must use a VOP3 2481; instruction, so there is no penalty 2482define { float, float } @multiuse_fneg_2_vop3_users_f32(float %a, float %b, float %c) #0 { 2483; GCN-LABEL: multiuse_fneg_2_vop3_users_f32: 2484; GCN: ; %bb.0: 2485; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2486; GCN-NEXT: v_fma_f32 v3, -v0, v1, v2 2487; GCN-NEXT: v_fma_f32 v1, -v0, v2, 2.0 2488; GCN-NEXT: v_mov_b32_e32 v0, v3 2489; GCN-NEXT: s_setpc_b64 s[30:31] 2490 %fneg.a = fneg float %a 2491 %fma0 = call float @llvm.fma.f32(float %fneg.a, float %b, float %c) 2492 %fma1 = call float @llvm.fma.f32(float %fneg.a, float %c, float 2.0) 2493 %insert.0 = insertvalue { float, float } poison, float %fma0, 0 2494 %insert.1 = insertvalue { float, float } %insert.0, float %fma1, 1 2495 ret { float, float } %insert.1 2496} 2497 2498; There are multiple users, but both require using a larger encoding 2499; for the modifier. 2500define { float, float } @multiuse_fneg_2_vop2_users_f32(float %a, float %b, float %c) #0 { 2501; GCN-LABEL: multiuse_fneg_2_vop2_users_f32: 2502; GCN: ; %bb.0: 2503; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2504; GCN-NEXT: v_mul_f32_e64 v3, -v0, v1 2505; GCN-NEXT: v_mul_f32_e64 v1, -v0, v2 2506; GCN-NEXT: v_mov_b32_e32 v0, v3 2507; GCN-NEXT: s_setpc_b64 s[30:31] 2508 %fneg.a = fneg float %a 2509 %mul0 = fmul float %fneg.a, %b 2510 %mul1 = fmul float %fneg.a, %c 2511 %insert.0 = insertvalue { float, float } poison, float %mul0, 0 2512 %insert.1 = insertvalue { float, float } %insert.0, float %mul1, 1 2513 ret { float, float } %insert.1 2514} 2515 2516; One user is VOP3 so has no cost to folding the modifier, the other does. 2517define { float, float } @multiuse_fneg_vop2_vop3_users_f32(float %a, float %b, float %c) #0 { 2518; GCN-LABEL: multiuse_fneg_vop2_vop3_users_f32: 2519; GCN: ; %bb.0: 2520; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2521; GCN-NEXT: v_fma_f32 v3, -v0, v1, 2.0 2522; GCN-NEXT: v_mul_f32_e64 v1, -v0, v2 2523; GCN-NEXT: v_mov_b32_e32 v0, v3 2524; GCN-NEXT: s_setpc_b64 s[30:31] 2525 %fneg.a = fneg float %a 2526 %fma0 = call float @llvm.fma.f32(float %fneg.a, float %b, float 2.0) 2527 %mul1 = fmul float %fneg.a, %c 2528 2529 %insert.0 = insertvalue { float, float } poison, float %fma0, 0 2530 %insert.1 = insertvalue { float, float } %insert.0, float %mul1, 1 2531 ret { float, float } %insert.1 2532} 2533 2534; The use of the fneg requires a code size increase, but folding into 2535; the source does not 2536define { float, float } @free_fold_src_code_size_cost_use_f32(ptr addrspace(1) %out, float %a, float %b, float %c, float %d) #0 { 2537; GCN-SAFE-LABEL: free_fold_src_code_size_cost_use_f32: 2538; GCN-SAFE: ; %bb.0: 2539; GCN-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2540; GCN-SAFE-NEXT: v_fma_f32 v1, v2, v3, 2.0 2541; GCN-SAFE-NEXT: v_mul_f32_e64 v0, -v1, v4 2542; GCN-SAFE-NEXT: v_mul_f32_e64 v1, -v1, v5 2543; GCN-SAFE-NEXT: s_setpc_b64 s[30:31] 2544; 2545; GCN-NSZ-LABEL: free_fold_src_code_size_cost_use_f32: 2546; GCN-NSZ: ; %bb.0: 2547; GCN-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2548; GCN-NSZ-NEXT: v_fma_f32 v1, v2, -v3, -2.0 2549; GCN-NSZ-NEXT: v_mul_f32_e32 v0, v1, v4 2550; GCN-NSZ-NEXT: v_mul_f32_e32 v1, v1, v5 2551; GCN-NSZ-NEXT: s_setpc_b64 s[30:31] 2552 %fma0 = call float @llvm.fma.f32(float %a, float %b, float 2.0) 2553 %fneg.fma0 = fneg float %fma0 2554 %mul1 = fmul float %fneg.fma0, %c 2555 %mul2 = fmul float %fneg.fma0, %d 2556 2557 %insert.0 = insertvalue { float, float } poison, float %mul1, 0 2558 %insert.1 = insertvalue { float, float } %insert.0, float %mul2, 1 2559 ret { float, float } %insert.1 2560} 2561 2562define { double, double } @free_fold_src_code_size_cost_use_f64(double %a, double %b, double %c, double %d) #0 { 2563; GCN-LABEL: free_fold_src_code_size_cost_use_f64: 2564; GCN: ; %bb.0: 2565; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2566; GCN-NEXT: v_fma_f64 v[2:3], v[0:1], v[2:3], 2.0 2567; GCN-NEXT: v_mul_f64 v[0:1], -v[2:3], v[4:5] 2568; GCN-NEXT: v_mul_f64 v[2:3], -v[2:3], v[6:7] 2569; GCN-NEXT: s_setpc_b64 s[30:31] 2570 %fma0 = call double @llvm.fma.f64(double %a, double %b, double 2.0) 2571 %fneg.fma0 = fneg double %fma0 2572 %mul1 = fmul double %fneg.fma0, %c 2573 %mul2 = fmul double %fneg.fma0, %d 2574 2575 %insert.0 = insertvalue { double, double } poison, double %mul1, 0 2576 %insert.1 = insertvalue { double, double } %insert.0, double %mul2, 1 2577 ret { double, double } %insert.1 2578} 2579 2580; %trunc.a has one fneg use, but it requires a code size increase and 2581; %the fneg can instead be folded for free into the fma. 2582define float @one_use_cost_to_fold_into_src_f32(float %a, float %b, float %c, float %d) #0 { 2583; GCN-LABEL: one_use_cost_to_fold_into_src_f32: 2584; GCN: ; %bb.0: 2585; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2586; GCN-NEXT: v_trunc_f32_e32 v0, v0 2587; GCN-NEXT: v_fma_f32 v0, -v0, v1, v2 2588; GCN-NEXT: s_setpc_b64 s[30:31] 2589 %trunc.a = call float @llvm.trunc.f32(float %a) 2590 %trunc.fneg.a = fneg float %trunc.a 2591 %fma0 = call float @llvm.fma.f32(float %trunc.fneg.a, float %b, float %c) 2592 ret float %fma0 2593} 2594 2595define { float, float } @multi_use_cost_to_fold_into_src(float %a, float %b, float %c, float %d) #0 { 2596; GCN-LABEL: multi_use_cost_to_fold_into_src: 2597; GCN: ; %bb.0: 2598; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2599; GCN-NEXT: v_trunc_f32_e32 v4, v0 2600; GCN-NEXT: v_fma_f32 v0, -v4, v1, v2 2601; GCN-NEXT: v_mul_f32_e32 v1, v4, v3 2602; GCN-NEXT: s_setpc_b64 s[30:31] 2603 %trunc.a = call float @llvm.trunc.f32(float %a) 2604 %trunc.fneg.a = fneg float %trunc.a 2605 %fma0 = call float @llvm.fma.f32(float %trunc.fneg.a, float %b, float %c) 2606 %mul1 = fmul float %trunc.a, %d 2607 %insert.0 = insertvalue { float, float } poison, float %fma0, 0 2608 %insert.1 = insertvalue { float, float } %insert.0, float %mul1, 1 2609 ret { float, float } %insert.1 2610} 2611 2612; The AMDGPU combine to pull fneg into the FMA operands was being 2613; undone by the generic combine to pull the fneg out of the fma if 2614; !isFNegFree. We were reporting false for v2f32 even though it will 2615; be split into f32 where it will be free. 2616define <2 x float> @fneg_fma_fneg_dagcombine_loop(<2 x float> %arg, <2 x float> %arg1, <2 x float> %arg2) #0 { 2617; GCN-LABEL: fneg_fma_fneg_dagcombine_loop: 2618; GCN: ; %bb.0: ; %bb 2619; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2620; GCN-NEXT: s_brev_b32 s4, 1 2621; GCN-NEXT: v_fma_f32 v3, v3, -v5, s4 2622; GCN-NEXT: v_fma_f32 v2, v2, -v4, s4 2623; GCN-NEXT: v_sub_f32_e32 v1, v3, v1 2624; GCN-NEXT: v_sub_f32_e32 v0, v2, v0 2625; GCN-NEXT: v_mul_f32_e32 v0, v0, v4 2626; GCN-NEXT: v_mul_f32_e32 v1, v1, v5 2627; GCN-NEXT: s_setpc_b64 s[30:31] 2628bb: 2629 %i3 = call fast <2 x float> @llvm.fma.v2f32(<2 x float> %arg1, <2 x float> %arg2, <2 x float> zeroinitializer) 2630 %i4 = fadd fast <2 x float> %i3, %arg 2631 %i5 = fneg <2 x float> %i4 2632 %i6 = fmul fast <2 x float> %i5, %arg2 2633 ret <2 x float> %i6 2634} 2635 2636; This expects denormal flushing, so can't turn this fmul into fneg 2637; TODO: Keeping this as fmul saves encoding size 2638define float @nnan_fmul_neg1_to_fneg(float %x, float %y) #0 { 2639; GCN-LABEL: nnan_fmul_neg1_to_fneg: 2640; GCN: ; %bb.0: 2641; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2642; GCN-NEXT: v_mul_f32_e64 v0, -v0, v1 2643; GCN-NEXT: s_setpc_b64 s[30:31] 2644 %mul = fmul float %x, -1.0 2645 %add = fmul nnan float %mul, %y 2646 ret float %add 2647} 2648 2649; It's legal to turn this fmul into an fneg since denormals are 2650; preserved and we know an snan can't happen from the flag. 2651define float @denormal_fmul_neg1_to_fneg(float %x, float %y) { 2652; GCN-LABEL: denormal_fmul_neg1_to_fneg: 2653; GCN: ; %bb.0: 2654; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2655; GCN-NEXT: v_mul_f32_e64 v0, -v0, v1 2656; GCN-NEXT: s_setpc_b64 s[30:31] 2657 %mul = fmul nnan float %x, -1.0 2658 %add = fmul float %mul, %y 2659 ret float %add 2660} 2661 2662; know the source can't be an snan 2663define float @denorm_snan_fmul_neg1_to_fneg(float %x, float %y) { 2664; GCN-LABEL: denorm_snan_fmul_neg1_to_fneg: 2665; GCN: ; %bb.0: 2666; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2667; GCN-NEXT: v_mul_f32_e64 v0, v0, -v0 2668; GCN-NEXT: v_mul_f32_e32 v0, v0, v1 2669; GCN-NEXT: s_setpc_b64 s[30:31] 2670 %canonical = fmul float %x, %x 2671 %mul = fmul float %canonical, -1.0 2672 %add = fmul float %mul, %y 2673 ret float %add 2674} 2675 2676define float @flush_snan_fmul_neg1_to_fneg(float %x, float %y) #0 { 2677; GCN-LABEL: flush_snan_fmul_neg1_to_fneg: 2678; GCN: ; %bb.0: 2679; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2680; GCN-NEXT: v_mul_f32_e32 v0, 1.0, v0 2681; GCN-NEXT: v_mul_f32_e64 v0, -v0, v1 2682; GCN-NEXT: s_setpc_b64 s[30:31] 2683 %quiet = call float @llvm.canonicalize.f32(float %x) 2684 %mul = fmul float %quiet, -1.0 2685 %add = fmul float %mul, %y 2686 ret float %add 2687} 2688 2689define float @fadd_select_fneg_fneg_f32(i32 %arg0, float %x, float %y, float %z) { 2690; GCN-LABEL: fadd_select_fneg_fneg_f32: 2691; GCN: ; %bb.0: 2692; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2693; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 2694; GCN-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 2695; GCN-NEXT: v_sub_f32_e32 v0, v3, v0 2696; GCN-NEXT: s_setpc_b64 s[30:31] 2697 %cmp = icmp eq i32 %arg0, 0 2698 %neg.x = fneg float %x 2699 %neg.y = fneg float %y 2700 %select = select i1 %cmp, float %neg.x, float %neg.y 2701 %add = fadd float %select, %z 2702 ret float %add 2703} 2704 2705define double @fadd_select_fneg_fneg_f64(i32 %arg0, double %x, double %y, double %z) { 2706; GCN-LABEL: fadd_select_fneg_fneg_f64: 2707; GCN: ; %bb.0: 2708; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2709; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 2710; GCN-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc 2711; GCN-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 2712; GCN-NEXT: v_add_f64 v[0:1], v[5:6], -v[1:2] 2713; GCN-NEXT: s_setpc_b64 s[30:31] 2714 %cmp = icmp eq i32 %arg0, 0 2715 %neg.x = fneg double %x 2716 %neg.y = fneg double %y 2717 %select = select i1 %cmp, double %neg.x, double %neg.y 2718 %add = fadd double %select, %z 2719 ret double %add 2720} 2721 2722define half @fadd_select_fneg_fneg_f16(i32 %arg0, half %x, half %y, half %z) { 2723; SI-LABEL: fadd_select_fneg_fneg_f16: 2724; SI: ; %bb.0: 2725; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2726; SI-NEXT: v_cvt_f16_f32_e32 v1, v1 2727; SI-NEXT: v_cvt_f16_f32_e32 v2, v2 2728; SI-NEXT: v_cvt_f16_f32_e32 v3, v3 2729; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 2730; SI-NEXT: v_cvt_f32_f16_e32 v1, v1 2731; SI-NEXT: v_cvt_f32_f16_e32 v2, v2 2732; SI-NEXT: v_cvt_f32_f16_e32 v3, v3 2733; SI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 2734; SI-NEXT: v_sub_f32_e32 v0, v3, v0 2735; SI-NEXT: s_setpc_b64 s[30:31] 2736; 2737; VI-LABEL: fadd_select_fneg_fneg_f16: 2738; VI: ; %bb.0: 2739; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2740; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 2741; VI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 2742; VI-NEXT: v_sub_f16_e32 v0, v3, v0 2743; VI-NEXT: s_setpc_b64 s[30:31] 2744 %cmp = icmp eq i32 %arg0, 0 2745 %neg.x = fneg half %x 2746 %neg.y = fneg half %y 2747 %select = select i1 %cmp, half %neg.x, half %neg.y 2748 %add = fadd half %select, %z 2749 ret half %add 2750} 2751 2752; FIXME: Terrible code for SI 2753define <2 x half> @fadd_select_fneg_fneg_v2f16(i32 %arg0, <2 x half> %x, <2 x half> %y, <2 x half> %z) { 2754; SI-LABEL: fadd_select_fneg_fneg_v2f16: 2755; SI: ; %bb.0: 2756; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2757; SI-NEXT: v_cvt_f16_f32_e32 v2, v2 2758; SI-NEXT: v_cvt_f16_f32_e32 v1, v1 2759; SI-NEXT: v_cvt_f16_f32_e32 v3, v3 2760; SI-NEXT: v_cvt_f16_f32_e32 v5, v5 2761; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 2762; SI-NEXT: v_or_b32_e32 v1, v1, v2 2763; SI-NEXT: v_cvt_f16_f32_e32 v2, v4 2764; SI-NEXT: v_cvt_f16_f32_e32 v4, v6 2765; SI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 2766; SI-NEXT: v_lshlrev_b32_e32 v2, 16, v2 2767; SI-NEXT: v_or_b32_e32 v2, v3, v2 2768; SI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 2769; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v0 2770; SI-NEXT: v_cvt_f32_f16_e32 v3, v4 2771; SI-NEXT: v_cvt_f32_f16_e32 v4, v5 2772; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 2773; SI-NEXT: v_cvt_f32_f16_e32 v1, v1 2774; SI-NEXT: v_sub_f32_e32 v0, v4, v0 2775; SI-NEXT: v_sub_f32_e32 v1, v3, v1 2776; SI-NEXT: s_setpc_b64 s[30:31] 2777; 2778; VI-LABEL: fadd_select_fneg_fneg_v2f16: 2779; VI: ; %bb.0: 2780; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2781; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 2782; VI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 2783; VI-NEXT: v_sub_f16_sdwa v1, v3, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 2784; VI-NEXT: v_sub_f16_e32 v0, v3, v0 2785; VI-NEXT: v_or_b32_e32 v0, v0, v1 2786; VI-NEXT: s_setpc_b64 s[30:31] 2787 %cmp = icmp eq i32 %arg0, 0 2788 %neg.x = fneg <2 x half> %x 2789 %neg.y = fneg <2 x half> %y 2790 %select = select i1 %cmp, <2 x half> %neg.x, <2 x half> %neg.y 2791 %add = fadd <2 x half> %select, %z 2792 ret <2 x half> %add 2793} 2794 2795; -------------------------------------------------------------------------------- 2796; select tests 2797; -------------------------------------------------------------------------------- 2798 2799define amdgpu_kernel void @s_fneg_select_infloop_regression_f32(float %arg, i1 %arg1, ptr addrspace(1) %ptr) { 2800; SI-LABEL: s_fneg_select_infloop_regression_f32: 2801; SI: ; %bb.0: 2802; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 2803; SI-NEXT: s_waitcnt lgkmcnt(0) 2804; SI-NEXT: s_bitcmp1_b32 s1, 0 2805; SI-NEXT: v_mov_b32_e32 v0, s0 2806; SI-NEXT: s_cselect_b64 s[0:1], -1, 0 2807; SI-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[0:1] 2808; SI-NEXT: v_cndmask_b32_e64 v2, -v0, 0, s[0:1] 2809; SI-NEXT: v_mov_b32_e32 v0, s2 2810; SI-NEXT: v_mov_b32_e32 v1, s3 2811; SI-NEXT: flat_store_dword v[0:1], v2 2812; SI-NEXT: s_endpgm 2813; 2814; VI-LABEL: s_fneg_select_infloop_regression_f32: 2815; VI: ; %bb.0: 2816; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 2817; VI-NEXT: s_waitcnt lgkmcnt(0) 2818; VI-NEXT: s_bitcmp1_b32 s1, 0 2819; VI-NEXT: v_mov_b32_e32 v0, s0 2820; VI-NEXT: s_cselect_b64 s[0:1], -1, 0 2821; VI-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[0:1] 2822; VI-NEXT: v_cndmask_b32_e64 v2, -v0, 0, s[0:1] 2823; VI-NEXT: v_mov_b32_e32 v0, s2 2824; VI-NEXT: v_mov_b32_e32 v1, s3 2825; VI-NEXT: flat_store_dword v[0:1], v2 2826; VI-NEXT: s_endpgm 2827 %i = select i1 %arg1, float 0.0, float %arg 2828 %i2 = fneg float %i 2829 %i3 = select i1 %arg1, float 0.0, float %i2 2830 store float %i3, ptr addrspace(1) %ptr, align 4 2831 ret void 2832} 2833 2834define float @v_fneg_select_infloop_regression_f32(float %arg, i1 %arg1) { 2835; GCN-LABEL: v_fneg_select_infloop_regression_f32: 2836; GCN: ; %bb.0: 2837; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2838; GCN-NEXT: v_and_b32_e32 v1, 1, v1 2839; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 2840; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc 2841; GCN-NEXT: v_cndmask_b32_e64 v0, -v0, 0, vcc 2842; GCN-NEXT: s_setpc_b64 s[30:31] 2843 %i = select i1 %arg1, float 0.0, float %arg 2844 %i2 = fneg float %i 2845 %i3 = select i1 %arg1, float 0.0, float %i2 2846 ret float %i3 2847} 2848 2849define float @v_fneg_select_infloop_regression_f32_commute0(float %arg, i1 %arg1) { 2850; GCN-LABEL: v_fneg_select_infloop_regression_f32_commute0: 2851; GCN: ; %bb.0: 2852; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2853; GCN-NEXT: v_and_b32_e32 v1, 1, v1 2854; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 2855; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc 2856; GCN-NEXT: v_cndmask_b32_e64 v0, -v0, 0, vcc 2857; GCN-NEXT: s_setpc_b64 s[30:31] 2858 %i = select i1 %arg1, float %arg, float 0.0 2859 %i2 = fneg float %i 2860 %i3 = select i1 %arg1, float 0.0, float %i2 2861 ret float %i3 2862} 2863 2864define float @v_fneg_select_infloop_regression_f32_commute1(float %arg, i1 %arg1) { 2865; GCN-LABEL: v_fneg_select_infloop_regression_f32_commute1: 2866; GCN: ; %bb.0: 2867; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2868; GCN-NEXT: v_and_b32_e32 v1, 1, v1 2869; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 2870; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc 2871; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -v0, vcc 2872; GCN-NEXT: s_setpc_b64 s[30:31] 2873 %i = select i1 %arg1, float 0.0, float %arg 2874 %i2 = fneg float %i 2875 %i3 = select i1 %arg1, float %i2, float 0.0 2876 ret float %i3 2877} 2878 2879define float @v_fneg_select_infloop_regression_f32_commute2(float %arg, i1 %arg1) { 2880; GCN-LABEL: v_fneg_select_infloop_regression_f32_commute2: 2881; GCN: ; %bb.0: 2882; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2883; GCN-NEXT: v_and_b32_e32 v1, 1, v1 2884; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 2885; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc 2886; GCN-NEXT: v_cndmask_b32_e64 v0, 0, -v0, vcc 2887; GCN-NEXT: s_setpc_b64 s[30:31] 2888 %i = select i1 %arg1, float %arg, float 0.0 2889 %i2 = fneg float %i 2890 %i3 = select i1 %arg1, float %i2, float 0.0 2891 ret float %i3 2892} 2893 2894; Check with an inline constant that's equally cheap to negate 2895define float @v_fneg_select_infloop_regression_inline_imm_f32(float %arg, i1 %arg1) { 2896; GCN-LABEL: v_fneg_select_infloop_regression_inline_imm_f32: 2897; GCN: ; %bb.0: 2898; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2899; GCN-NEXT: v_and_b32_e32 v1, 1, v1 2900; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 2901; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 2.0, vcc 2902; GCN-NEXT: v_cndmask_b32_e64 v0, -v0, 2.0, vcc 2903; GCN-NEXT: s_setpc_b64 s[30:31] 2904 %i = select i1 %arg1, float 2.0, float %arg 2905 %i2 = fneg float %i 2906 %i3 = select i1 %arg1, float 2.0, float %i2 2907 ret float %i3 2908} 2909 2910define float @v_fneg_select_infloop_regression_inline_imm_f32_commute0(float %arg, i1 %arg1) { 2911; GCN-LABEL: v_fneg_select_infloop_regression_inline_imm_f32_commute0: 2912; GCN: ; %bb.0: 2913; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2914; GCN-NEXT: v_and_b32_e32 v1, 1, v1 2915; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 2916; GCN-NEXT: v_cndmask_b32_e32 v0, 2.0, v0, vcc 2917; GCN-NEXT: v_cndmask_b32_e64 v0, -v0, 2.0, vcc 2918; GCN-NEXT: s_setpc_b64 s[30:31] 2919 %i = select i1 %arg1, float %arg, float 2.0 2920 %i2 = fneg float %i 2921 %i3 = select i1 %arg1, float 2.0, float %i2 2922 ret float %i3 2923} 2924 2925define float @v_fneg_select_infloop_regression_inline_imm_f32_commute1(float %arg, i1 %arg1) { 2926; GCN-LABEL: v_fneg_select_infloop_regression_inline_imm_f32_commute1: 2927; GCN: ; %bb.0: 2928; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2929; GCN-NEXT: v_and_b32_e32 v1, 1, v1 2930; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 2931; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 2.0, vcc 2932; GCN-NEXT: v_cndmask_b32_e64 v0, 2.0, -v0, vcc 2933; GCN-NEXT: s_setpc_b64 s[30:31] 2934 %i = select i1 %arg1, float 2.0, float %arg 2935 %i2 = fneg float %i 2936 %i3 = select i1 %arg1, float %i2, float 2.0 2937 ret float %i3 2938} 2939 2940define float @v_fneg_select_infloop_regression_inline_imm_f32_commute2(float %arg, i1 %arg1) { 2941; GCN-LABEL: v_fneg_select_infloop_regression_inline_imm_f32_commute2: 2942; GCN: ; %bb.0: 2943; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2944; GCN-NEXT: v_and_b32_e32 v1, 1, v1 2945; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 2946; GCN-NEXT: v_cndmask_b32_e32 v0, 2.0, v0, vcc 2947; GCN-NEXT: v_cndmask_b32_e64 v0, 2.0, -v0, vcc 2948; GCN-NEXT: s_setpc_b64 s[30:31] 2949 %i = select i1 %arg1, float %arg, float 2.0 2950 %i2 = fneg float %i 2951 %i3 = select i1 %arg1, float %i2, float 2.0 2952 ret float %i3 2953} 2954 2955; Check with an inline constant that's equally cheap to negate 2956define float @v_fneg_select_infloop_regression_neg_inline_imm_f32(float %arg, i1 %arg1) { 2957; GCN-LABEL: v_fneg_select_infloop_regression_neg_inline_imm_f32: 2958; GCN: ; %bb.0: 2959; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2960; GCN-NEXT: v_and_b32_e32 v1, 1, v1 2961; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 2962; GCN-NEXT: v_cndmask_b32_e64 v0, v0, -2.0, vcc 2963; GCN-NEXT: v_cndmask_b32_e64 v0, -v0, -2.0, vcc 2964; GCN-NEXT: s_setpc_b64 s[30:31] 2965 %i = select i1 %arg1, float -2.0, float %arg 2966 %i2 = fneg float %i 2967 %i3 = select i1 %arg1, float -2.0, float %i2 2968 ret float %i3 2969} 2970 2971define float @v_fneg_select_infloop_regression_neg_inline_imm_f32_commute0(float %arg, i1 %arg1) { 2972; GCN-LABEL: v_fneg_select_infloop_regression_neg_inline_imm_f32_commute0: 2973; GCN: ; %bb.0: 2974; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2975; GCN-NEXT: v_and_b32_e32 v1, 1, v1 2976; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 2977; GCN-NEXT: v_cndmask_b32_e32 v0, -2.0, v0, vcc 2978; GCN-NEXT: v_cndmask_b32_e64 v0, -v0, -2.0, vcc 2979; GCN-NEXT: s_setpc_b64 s[30:31] 2980 %i = select i1 %arg1, float %arg, float -2.0 2981 %i2 = fneg float %i 2982 %i3 = select i1 %arg1, float -2.0, float %i2 2983 ret float %i3 2984} 2985 2986define float @v_fneg_select_infloop_regression_neg_inline_imm_f32_commute1(float %arg, i1 %arg1) { 2987; GCN-LABEL: v_fneg_select_infloop_regression_neg_inline_imm_f32_commute1: 2988; GCN: ; %bb.0: 2989; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2990; GCN-NEXT: v_and_b32_e32 v1, 1, v1 2991; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 2992; GCN-NEXT: v_cndmask_b32_e64 v0, v0, -2.0, vcc 2993; GCN-NEXT: v_cndmask_b32_e64 v0, -2.0, -v0, vcc 2994; GCN-NEXT: s_setpc_b64 s[30:31] 2995 %i = select i1 %arg1, float -2.0, float %arg 2996 %i2 = fneg float %i 2997 %i3 = select i1 %arg1, float %i2, float -2.0 2998 ret float %i3 2999} 3000 3001define float @v_fneg_select_infloop_regression_neg_inline_imm_f32_commute2(float %arg, i1 %arg1) { 3002; GCN-LABEL: v_fneg_select_infloop_regression_neg_inline_imm_f32_commute2: 3003; GCN: ; %bb.0: 3004; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3005; GCN-NEXT: v_and_b32_e32 v1, 1, v1 3006; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 3007; GCN-NEXT: v_cndmask_b32_e32 v0, -2.0, v0, vcc 3008; GCN-NEXT: v_cndmask_b32_e64 v0, -2.0, -v0, vcc 3009; GCN-NEXT: s_setpc_b64 s[30:31] 3010 %i = select i1 %arg1, float %arg, float -2.0 3011 %i2 = fneg float %i 3012 %i3 = select i1 %arg1, float %i2, float -2.0 3013 ret float %i3 3014} 3015 3016define amdgpu_kernel void @s_fneg_select_infloop_regression_f64(double %arg, i1 %arg1, ptr addrspace(1) %ptr) { 3017; SI-LABEL: s_fneg_select_infloop_regression_f64: 3018; SI: ; %bb.0: 3019; SI-NEXT: s_load_dword s6, s[4:5], 0xb 3020; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 3021; SI-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0xd 3022; SI-NEXT: v_bfrev_b32_e32 v0, 1 3023; SI-NEXT: s_waitcnt lgkmcnt(0) 3024; SI-NEXT: s_bitcmp1_b32 s6, 0 3025; SI-NEXT: s_cselect_b64 s[4:5], -1, 0 3026; SI-NEXT: v_mov_b32_e32 v1, s1 3027; SI-NEXT: s_and_b64 s[6:7], s[4:5], exec 3028; SI-NEXT: v_cndmask_b32_e64 v0, -v1, v0, s[4:5] 3029; SI-NEXT: s_cselect_b32 s0, 0, s0 3030; SI-NEXT: v_mov_b32_e32 v2, s2 3031; SI-NEXT: v_cndmask_b32_e64 v1, v0, 0, s[4:5] 3032; SI-NEXT: v_mov_b32_e32 v0, s0 3033; SI-NEXT: v_mov_b32_e32 v3, s3 3034; SI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 3035; SI-NEXT: s_endpgm 3036; 3037; VI-LABEL: s_fneg_select_infloop_regression_f64: 3038; VI: ; %bb.0: 3039; VI-NEXT: s_load_dword s6, s[4:5], 0x2c 3040; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 3041; VI-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x34 3042; VI-NEXT: v_bfrev_b32_e32 v0, 1 3043; VI-NEXT: s_waitcnt lgkmcnt(0) 3044; VI-NEXT: s_bitcmp1_b32 s6, 0 3045; VI-NEXT: s_cselect_b64 s[4:5], -1, 0 3046; VI-NEXT: v_mov_b32_e32 v1, s1 3047; VI-NEXT: s_and_b64 s[6:7], s[4:5], exec 3048; VI-NEXT: v_cndmask_b32_e64 v0, -v1, v0, s[4:5] 3049; VI-NEXT: s_cselect_b32 s0, 0, s0 3050; VI-NEXT: v_mov_b32_e32 v2, s2 3051; VI-NEXT: v_cndmask_b32_e64 v1, v0, 0, s[4:5] 3052; VI-NEXT: v_mov_b32_e32 v0, s0 3053; VI-NEXT: v_mov_b32_e32 v3, s3 3054; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 3055; VI-NEXT: s_endpgm 3056 %i = select i1 %arg1, double 0.0, double %arg 3057 %i2 = fneg double %i 3058 %i3 = select i1 %arg1, double 0.0, double %i2 3059 store double %i3, ptr addrspace(1) %ptr, align 4 3060 ret void 3061} 3062 3063define double @v_fneg_select_infloop_regression_f64(double %arg, i1 %arg1) { 3064; GCN-LABEL: v_fneg_select_infloop_regression_f64: 3065; GCN: ; %bb.0: 3066; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3067; GCN-NEXT: v_and_b32_e32 v2, 1, v2 3068; GCN-NEXT: v_bfrev_b32_e32 v3, 1 3069; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 3070; GCN-NEXT: v_cndmask_b32_e64 v1, -v1, v3, vcc 3071; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc 3072; GCN-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc 3073; GCN-NEXT: s_setpc_b64 s[30:31] 3074 %i = select i1 %arg1, double 0.0, double %arg 3075 %i2 = fneg double %i 3076 %i3 = select i1 %arg1, double 0.0, double %i2 3077 ret double %i3 3078} 3079 3080define amdgpu_kernel void @s_fneg_select_infloop_regression_f16(half %arg, i1 %arg1, ptr addrspace(1) %ptr) { 3081; SI-LABEL: s_fneg_select_infloop_regression_f16: 3082; SI: ; %bb.0: 3083; SI-NEXT: s_load_dword s2, s[4:5], 0x9 3084; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xb 3085; SI-NEXT: s_waitcnt lgkmcnt(0) 3086; SI-NEXT: v_cvt_f32_f16_e32 v0, s2 3087; SI-NEXT: s_bitcmp1_b32 s2, 16 3088; SI-NEXT: s_cselect_b64 s[2:3], -1, 0 3089; SI-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[2:3] 3090; SI-NEXT: v_cndmask_b32_e64 v0, -v0, 0, s[2:3] 3091; SI-NEXT: v_cvt_f16_f32_e32 v2, v0 3092; SI-NEXT: v_mov_b32_e32 v0, s0 3093; SI-NEXT: v_mov_b32_e32 v1, s1 3094; SI-NEXT: flat_store_short v[0:1], v2 3095; SI-NEXT: s_endpgm 3096; 3097; VI-LABEL: s_fneg_select_infloop_regression_f16: 3098; VI: ; %bb.0: 3099; VI-NEXT: s_load_dword s2, s[4:5], 0x24 3100; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2c 3101; VI-NEXT: s_waitcnt lgkmcnt(0) 3102; VI-NEXT: s_bitcmp1_b32 s2, 16 3103; VI-NEXT: v_mov_b32_e32 v0, s2 3104; VI-NEXT: s_cselect_b64 s[2:3], -1, 0 3105; VI-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[2:3] 3106; VI-NEXT: v_xor_b32_e32 v0, 0x8000, v0 3107; VI-NEXT: v_cndmask_b32_e64 v2, v0, 0, s[2:3] 3108; VI-NEXT: v_mov_b32_e32 v0, s0 3109; VI-NEXT: v_mov_b32_e32 v1, s1 3110; VI-NEXT: flat_store_short v[0:1], v2 3111; VI-NEXT: s_endpgm 3112 %i = select i1 %arg1, half 0.0, half %arg 3113 %i2 = fneg half %i 3114 %i3 = select i1 %arg1, half 0.0, half %i2 3115 store half %i3, ptr addrspace(1) %ptr, align 4 3116 ret void 3117} 3118 3119define half @v_fneg_select_infloop_regression_f16(half %arg, i1 %arg1) { 3120; SI-LABEL: v_fneg_select_infloop_regression_f16: 3121; SI: ; %bb.0: 3122; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3123; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 3124; SI-NEXT: v_and_b32_e32 v1, 1, v1 3125; SI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 3126; SI-NEXT: v_cvt_f32_f16_e32 v0, v0 3127; SI-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc 3128; SI-NEXT: v_cndmask_b32_e64 v0, -v0, 0, vcc 3129; SI-NEXT: s_setpc_b64 s[30:31] 3130; 3131; VI-LABEL: v_fneg_select_infloop_regression_f16: 3132; VI: ; %bb.0: 3133; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3134; VI-NEXT: v_and_b32_e32 v1, 1, v1 3135; VI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 3136; VI-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc 3137; VI-NEXT: v_xor_b32_e32 v0, 0x8000, v0 3138; VI-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc 3139; VI-NEXT: s_setpc_b64 s[30:31] 3140 %i = select i1 %arg1, half 0.0, half %arg 3141 %i2 = fneg half %i 3142 %i3 = select i1 %arg1, half 0.0, half %i2 3143 ret half %i3 3144} 3145 3146define amdgpu_kernel void @s_fneg_select_infloop_regression_v2f16(<2 x half> %arg, i1 %arg1, ptr addrspace(1) %ptr) { 3147; SI-LABEL: s_fneg_select_infloop_regression_v2f16: 3148; SI: ; %bb.0: 3149; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 3150; SI-NEXT: s_waitcnt lgkmcnt(0) 3151; SI-NEXT: s_and_b32 s1, 1, s1 3152; SI-NEXT: s_cselect_b32 s0, 0, s0 3153; SI-NEXT: s_xor_b32 s0, s0, 0x80008000 3154; SI-NEXT: s_cmp_eq_u32 s1, 1 3155; SI-NEXT: s_cselect_b32 s0, 0, s0 3156; SI-NEXT: v_mov_b32_e32 v0, s2 3157; SI-NEXT: v_mov_b32_e32 v1, s3 3158; SI-NEXT: v_mov_b32_e32 v2, s0 3159; SI-NEXT: flat_store_dword v[0:1], v2 3160; SI-NEXT: s_endpgm 3161; 3162; VI-LABEL: s_fneg_select_infloop_regression_v2f16: 3163; VI: ; %bb.0: 3164; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 3165; VI-NEXT: s_waitcnt lgkmcnt(0) 3166; VI-NEXT: s_and_b32 s1, 1, s1 3167; VI-NEXT: s_cselect_b32 s0, 0, s0 3168; VI-NEXT: s_xor_b32 s0, s0, 0x80008000 3169; VI-NEXT: s_cmp_eq_u32 s1, 1 3170; VI-NEXT: s_cselect_b32 s0, 0, s0 3171; VI-NEXT: v_mov_b32_e32 v0, s2 3172; VI-NEXT: v_mov_b32_e32 v1, s3 3173; VI-NEXT: v_mov_b32_e32 v2, s0 3174; VI-NEXT: flat_store_dword v[0:1], v2 3175; VI-NEXT: s_endpgm 3176 %i = select i1 %arg1, <2 x half> zeroinitializer, <2 x half> %arg 3177 %i2 = fneg <2 x half> %i 3178 %i3 = select i1 %arg1, <2 x half> zeroinitializer, <2 x half> %i2 3179 store <2 x half> %i3, ptr addrspace(1) %ptr, align 4 3180 ret void 3181} 3182 3183define <2 x half> @v_fneg_select_infloop_regression_v2f16(<2 x half> %arg, i1 %arg1) { 3184; SI-LABEL: v_fneg_select_infloop_regression_v2f16: 3185; SI: ; %bb.0: 3186; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3187; SI-NEXT: v_cvt_f16_f32_e32 v1, v1 3188; SI-NEXT: v_cvt_f16_f32_e32 v0, v0 3189; SI-NEXT: v_lshlrev_b32_e32 v1, 16, v1 3190; SI-NEXT: v_or_b32_e32 v0, v0, v1 3191; SI-NEXT: v_and_b32_e32 v1, 1, v2 3192; SI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 3193; SI-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc 3194; SI-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 3195; SI-NEXT: v_cndmask_b32_e64 v1, v0, 0, vcc 3196; SI-NEXT: v_cvt_f32_f16_e32 v0, v1 3197; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v1 3198; SI-NEXT: v_cvt_f32_f16_e32 v1, v1 3199; SI-NEXT: s_setpc_b64 s[30:31] 3200; 3201; VI-LABEL: v_fneg_select_infloop_regression_v2f16: 3202; VI: ; %bb.0: 3203; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3204; VI-NEXT: v_and_b32_e32 v1, 1, v1 3205; VI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 3206; VI-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc 3207; VI-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 3208; VI-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc 3209; VI-NEXT: s_setpc_b64 s[30:31] 3210 %i = select i1 %arg1, <2 x half> zeroinitializer, <2 x half> %arg 3211 %i2 = fneg <2 x half> %i 3212 %i3 = select i1 %arg1, <2 x half> zeroinitializer, <2 x half> %i2 3213 ret <2 x half> %i3 3214} 3215 3216define amdgpu_kernel void @s_fneg_select_infloop_regression_v2f32(<2 x float> %arg, i1 %arg1, ptr addrspace(1) %ptr) { 3217; SI-LABEL: s_fneg_select_infloop_regression_v2f32: 3218; SI: ; %bb.0: 3219; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 3220; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0xd 3221; SI-NEXT: v_bfrev_b32_e32 v0, 1 3222; SI-NEXT: s_waitcnt lgkmcnt(0) 3223; SI-NEXT: s_bitcmp1_b32 s2, 0 3224; SI-NEXT: v_mov_b32_e32 v1, s0 3225; SI-NEXT: s_cselect_b64 s[2:3], -1, 0 3226; SI-NEXT: v_cndmask_b32_e64 v2, -v1, v0, s[2:3] 3227; SI-NEXT: v_mov_b32_e32 v1, s1 3228; SI-NEXT: v_cndmask_b32_e64 v0, -v1, v0, s[2:3] 3229; SI-NEXT: v_cndmask_b32_e64 v1, v0, 0, s[2:3] 3230; SI-NEXT: v_cndmask_b32_e64 v0, v2, 0, s[2:3] 3231; SI-NEXT: v_mov_b32_e32 v2, s4 3232; SI-NEXT: v_mov_b32_e32 v3, s5 3233; SI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 3234; SI-NEXT: s_endpgm 3235; 3236; VI-LABEL: s_fneg_select_infloop_regression_v2f32: 3237; VI: ; %bb.0: 3238; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 3239; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34 3240; VI-NEXT: v_bfrev_b32_e32 v0, 1 3241; VI-NEXT: s_waitcnt lgkmcnt(0) 3242; VI-NEXT: s_bitcmp1_b32 s2, 0 3243; VI-NEXT: v_mov_b32_e32 v1, s0 3244; VI-NEXT: s_cselect_b64 s[2:3], -1, 0 3245; VI-NEXT: v_cndmask_b32_e64 v2, -v1, v0, s[2:3] 3246; VI-NEXT: v_mov_b32_e32 v1, s1 3247; VI-NEXT: v_cndmask_b32_e64 v0, -v1, v0, s[2:3] 3248; VI-NEXT: v_cndmask_b32_e64 v1, v0, 0, s[2:3] 3249; VI-NEXT: v_cndmask_b32_e64 v0, v2, 0, s[2:3] 3250; VI-NEXT: v_mov_b32_e32 v2, s4 3251; VI-NEXT: v_mov_b32_e32 v3, s5 3252; VI-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 3253; VI-NEXT: s_endpgm 3254 %i = select i1 %arg1, <2 x float> zeroinitializer, <2 x float> %arg 3255 %i2 = fneg <2 x float> %i 3256 %i3 = select i1 %arg1, <2 x float> zeroinitializer, <2 x float> %i2 3257 store <2 x float> %i3, ptr addrspace(1) %ptr, align 4 3258 ret void 3259} 3260 3261define <2 x float> @v_fneg_select_infloop_regression_v2f32(<2 x float> %arg, i1 %arg1) { 3262; GCN-LABEL: v_fneg_select_infloop_regression_v2f32: 3263; GCN: ; %bb.0: 3264; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3265; GCN-NEXT: v_and_b32_e32 v2, 1, v2 3266; GCN-NEXT: v_bfrev_b32_e32 v3, 1 3267; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 3268; GCN-NEXT: v_cndmask_b32_e64 v1, -v1, v3, vcc 3269; GCN-NEXT: v_cndmask_b32_e64 v0, -v0, v3, vcc 3270; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc 3271; GCN-NEXT: v_cndmask_b32_e64 v1, v1, 0, vcc 3272; GCN-NEXT: s_setpc_b64 s[30:31] 3273 %i = select i1 %arg1, <2 x float> zeroinitializer, <2 x float> %arg 3274 %i2 = fneg <2 x float> %i 3275 %i3 = select i1 %arg1, <2 x float> zeroinitializer, <2 x float> %i2 3276 ret <2 x float> %i3 3277} 3278 3279define amdgpu_kernel void @s_fabs_select_infloop_regression_f32(float %arg, i1 %arg1, ptr addrspace(1) %ptr) { 3280; SI-LABEL: s_fabs_select_infloop_regression_f32: 3281; SI: ; %bb.0: 3282; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 3283; SI-NEXT: s_waitcnt lgkmcnt(0) 3284; SI-NEXT: s_bitcmp1_b32 s1, 0 3285; SI-NEXT: v_mov_b32_e32 v0, s0 3286; SI-NEXT: s_cselect_b64 s[0:1], -1, 0 3287; SI-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[0:1] 3288; SI-NEXT: v_cndmask_b32_e64 v2, |v0|, 0, s[0:1] 3289; SI-NEXT: v_mov_b32_e32 v0, s2 3290; SI-NEXT: v_mov_b32_e32 v1, s3 3291; SI-NEXT: flat_store_dword v[0:1], v2 3292; SI-NEXT: s_endpgm 3293; 3294; VI-LABEL: s_fabs_select_infloop_regression_f32: 3295; VI: ; %bb.0: 3296; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 3297; VI-NEXT: s_waitcnt lgkmcnt(0) 3298; VI-NEXT: s_bitcmp1_b32 s1, 0 3299; VI-NEXT: v_mov_b32_e32 v0, s0 3300; VI-NEXT: s_cselect_b64 s[0:1], -1, 0 3301; VI-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[0:1] 3302; VI-NEXT: v_cndmask_b32_e64 v2, |v0|, 0, s[0:1] 3303; VI-NEXT: v_mov_b32_e32 v0, s2 3304; VI-NEXT: v_mov_b32_e32 v1, s3 3305; VI-NEXT: flat_store_dword v[0:1], v2 3306; VI-NEXT: s_endpgm 3307 %i = select i1 %arg1, float 0.0, float %arg 3308 %i2 = call float @llvm.fabs.f32(float %i) 3309 %i3 = select i1 %arg1, float 0.0, float %i2 3310 store float %i3, ptr addrspace(1) %ptr, align 4 3311 ret void 3312} 3313 3314define float @v_fabs_select_infloop_regression_f32(float %arg, i1 %arg1) { 3315; GCN-LABEL: v_fabs_select_infloop_regression_f32: 3316; GCN: ; %bb.0: 3317; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3318; GCN-NEXT: v_and_b32_e32 v1, 1, v1 3319; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 3320; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc 3321; GCN-NEXT: v_cndmask_b32_e64 v0, |v0|, 0, vcc 3322; GCN-NEXT: s_setpc_b64 s[30:31] 3323 %i = select i1 %arg1, float 0.0, float %arg 3324 %i2 = call float @llvm.fabs.f32(float %i) 3325 %i3 = select i1 %arg1, float 0.0, float %i2 3326 ret float %i3 3327} 3328 3329define amdgpu_kernel void @s_fneg_fabs_select_infloop_regression(float %arg, i1 %arg1, ptr addrspace(1) %ptr) { 3330; SI-LABEL: s_fneg_fabs_select_infloop_regression: 3331; SI: ; %bb.0: 3332; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 3333; SI-NEXT: s_waitcnt lgkmcnt(0) 3334; SI-NEXT: s_bitcmp1_b32 s1, 0 3335; SI-NEXT: v_mov_b32_e32 v0, s0 3336; SI-NEXT: s_cselect_b64 s[0:1], -1, 0 3337; SI-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[0:1] 3338; SI-NEXT: v_cndmask_b32_e64 v2, -|v0|, 0, s[0:1] 3339; SI-NEXT: v_mov_b32_e32 v0, s2 3340; SI-NEXT: v_mov_b32_e32 v1, s3 3341; SI-NEXT: flat_store_dword v[0:1], v2 3342; SI-NEXT: s_endpgm 3343; 3344; VI-LABEL: s_fneg_fabs_select_infloop_regression: 3345; VI: ; %bb.0: 3346; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 3347; VI-NEXT: s_waitcnt lgkmcnt(0) 3348; VI-NEXT: s_bitcmp1_b32 s1, 0 3349; VI-NEXT: v_mov_b32_e32 v0, s0 3350; VI-NEXT: s_cselect_b64 s[0:1], -1, 0 3351; VI-NEXT: v_cndmask_b32_e64 v0, v0, 0, s[0:1] 3352; VI-NEXT: v_cndmask_b32_e64 v2, -|v0|, 0, s[0:1] 3353; VI-NEXT: v_mov_b32_e32 v0, s2 3354; VI-NEXT: v_mov_b32_e32 v1, s3 3355; VI-NEXT: flat_store_dword v[0:1], v2 3356; VI-NEXT: s_endpgm 3357 %i = select i1 %arg1, float 0.0, float %arg 3358 %i2 = call float @llvm.fabs.f32(float %i) 3359 %neg.i2 = fneg float %i2 3360 %i3 = select i1 %arg1, float 0.0, float %neg.i2 3361 store float %i3, ptr addrspace(1) %ptr, align 4 3362 ret void 3363} 3364 3365define float @v_fneg_fabs_select_infloop_regression(float %arg, i1 %arg1) { 3366; GCN-LABEL: v_fneg_fabs_select_infloop_regression: 3367; GCN: ; %bb.0: 3368; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3369; GCN-NEXT: v_and_b32_e32 v1, 1, v1 3370; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 3371; GCN-NEXT: v_cndmask_b32_e64 v0, v0, 0, vcc 3372; GCN-NEXT: v_cndmask_b32_e64 v0, -|v0|, 0, vcc 3373; GCN-NEXT: s_setpc_b64 s[30:31] 3374 %i = select i1 %arg1, float 0.0, float %arg 3375 %i2 = call float @llvm.fabs.f32(float %i) 3376 %neg.i2 = fneg float %i2 3377 %i3 = select i1 %arg1, float 0.0, float %neg.i2 3378 ret float %i3 3379} 3380 3381define float @v_fmul_0_fsub_0_infloop_regression(float %arg) { 3382; GCN-SAFE-LABEL: v_fmul_0_fsub_0_infloop_regression: 3383; GCN-SAFE: ; %bb.0: ; %bb 3384; GCN-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3385; GCN-SAFE-NEXT: v_mul_f32_e32 v0, 0, v0 3386; GCN-SAFE-NEXT: v_sub_f32_e32 v0, 0, v0 3387; GCN-SAFE-NEXT: s_setpc_b64 s[30:31] 3388; 3389; GCN-NSZ-LABEL: v_fmul_0_fsub_0_infloop_regression: 3390; GCN-NSZ: ; %bb.0: ; %bb 3391; GCN-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3392; GCN-NSZ-NEXT: v_mul_f32_e32 v0, 0x80000000, v0 3393; GCN-NSZ-NEXT: s_setpc_b64 s[30:31] 3394bb: 3395 %i = fmul float %arg, 0.0 3396 %i1 = fsub float 0.0, %i 3397 ret float %i1 3398} 3399 3400declare i32 @llvm.amdgcn.workitem.id.x() #1 3401declare float @llvm.fma.f32(float, float, float) #1 3402declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>) 3403declare float @llvm.fmuladd.f32(float, float, float) #1 3404declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>) #1 3405declare float @llvm.fabs.f32(float) #1 3406declare float @llvm.sin.f32(float) #1 3407declare float @llvm.trunc.f32(float) #1 3408declare float @llvm.round.f32(float) #1 3409declare float @llvm.rint.f32(float) #1 3410declare float @llvm.nearbyint.f32(float) #1 3411declare float @llvm.roundeven.f32(float) #1 3412declare float @llvm.canonicalize.f32(float) #1 3413declare float @llvm.arithmetic.fence.f32(float) #1 3414declare float @llvm.minnum.f32(float, float) #1 3415declare float @llvm.maxnum.f32(float, float) #1 3416declare double @llvm.minnum.f64(double, double) #1 3417declare double @llvm.fma.f64(double, double, double) #1 3418 3419declare float @llvm.amdgcn.sin.f32(float) #1 3420declare float @llvm.amdgcn.rcp.f32(float) #1 3421declare float @llvm.amdgcn.rcp.legacy(float) #1 3422declare float @llvm.amdgcn.fmul.legacy(float, float) #1 3423declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #0 3424declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0 3425 3426declare half @llvm.fma.f16(half, half, half) #1 3427declare <2 x half> @llvm.fma.v2f16(<2 x half>, <2 x half>, <2 x half>) 3428declare half @llvm.fmuladd.f16(half, half, half) #1 3429declare <4 x half> @llvm.fmuladd.v4f16(<4 x half>, <4 x half>, <4 x half>) #1 3430declare half @llvm.sin.f16(half) #1 3431declare half @llvm.trunc.f16(half) #1 3432declare half @llvm.round.f16(half) #1 3433declare half @llvm.rint.f16(half) #1 3434declare half @llvm.nearbyint.f16(half) #1 3435declare half @llvm.canonicalize.f16(half) #1 3436declare half @llvm.minnum.f16(half, half) #1 3437declare half @llvm.maxnum.f16(half, half) #1 3438declare half @llvm.amdgcn.sin.f16(half) #1 3439declare half @llvm.amdgcn.rcp.f16(half) #1 3440 3441attributes #0 = { nounwind "denormal-fp-math-f32"="preserve-sign,preserve-sign" } 3442attributes #1 = { nounwind readnone } 3443attributes #2 = { nounwind "unsafe-fp-math"="true" } 3444attributes #3 = { nounwind "no-signed-zeros-fp-math"="true" } 3445attributes #4 = { nounwind "amdgpu-ieee"="false" "denormal-fp-math-f32"="preserve-sign,preserve-sign" } 3446