1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s 3 4define amdgpu_cs float @div_sqrt(float inreg %arg1) { 5; GCN-LABEL: div_sqrt: 6; GCN: ; %bb.0: ; %.entry 7; GCN-NEXT: v_mul_f32_e64 v0, 0x4f800000, s0 8; GCN-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0xf800000, s0 9; GCN-NEXT: v_cndmask_b32_e32 v0, s0, v0, vcc_lo 10; GCN-NEXT: v_sqrt_f32_e32 v1, v0 11; GCN-NEXT: v_add_nc_u32_e32 v2, -1, v1 12; GCN-NEXT: v_add_nc_u32_e32 v3, 1, v1 13; GCN-NEXT: v_fma_f32 v4, -v2, v1, v0 14; GCN-NEXT: v_fma_f32 v5, -v3, v1, v0 15; GCN-NEXT: v_cmp_ge_f32_e64 s0, 0, v4 16; GCN-NEXT: v_cndmask_b32_e64 v1, v1, v2, s0 17; GCN-NEXT: v_cmp_lt_f32_e64 s0, 0, v5 18; GCN-NEXT: v_cndmask_b32_e64 v1, v1, v3, s0 19; GCN-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 20; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo 21; GCN-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 0x260 22; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo 23; GCN-NEXT: v_rcp_f32_e32 v0, v0 24; GCN-NEXT: ; return to shader part epilog 25.entry: 26 %a = call float @llvm.sqrt.f32(float %arg1) 27 %b = fdiv afn float 1.000000e+00, %a 28 ret float %b 29} 30 31define amdgpu_cs float @sqrt_div(float inreg %arg1) { 32; GCN-LABEL: sqrt_div: 33; GCN: ; %bb.0: ; %.entry 34; GCN-NEXT: v_rcp_f32_e32 v0, s0 35; GCN-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0 36; GCN-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xf800000, v0 37; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 38; GCN-NEXT: v_sqrt_f32_e32 v1, v0 39; GCN-NEXT: v_add_nc_u32_e32 v2, -1, v1 40; GCN-NEXT: v_add_nc_u32_e32 v3, 1, v1 41; GCN-NEXT: v_fma_f32 v4, -v2, v1, v0 42; GCN-NEXT: v_fma_f32 v5, -v3, v1, v0 43; GCN-NEXT: v_cmp_ge_f32_e64 s0, 0, v4 44; GCN-NEXT: v_cndmask_b32_e64 v1, v1, v2, s0 45; GCN-NEXT: v_cmp_lt_f32_e64 s0, 0, v5 46; GCN-NEXT: v_cndmask_b32_e64 v1, v1, v3, s0 47; GCN-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 48; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo 49; GCN-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 0x260 50; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo 51; GCN-NEXT: ; return to shader part epilog 52.entry: 53 %a = fdiv afn float 1.000000e+00, %arg1 54 %b = call float @llvm.sqrt.f32(float %a) 55 ret float %b 56} 57 58define amdgpu_cs float @rcp_sqrt(float inreg %arg1) { 59; GCN-LABEL: rcp_sqrt: 60; GCN: ; %bb.0: ; %.entry 61; GCN-NEXT: v_mul_f32_e64 v0, 0x4f800000, s0 62; GCN-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0xf800000, s0 63; GCN-NEXT: v_cndmask_b32_e32 v0, s0, v0, vcc_lo 64; GCN-NEXT: v_sqrt_f32_e32 v1, v0 65; GCN-NEXT: v_add_nc_u32_e32 v2, -1, v1 66; GCN-NEXT: v_add_nc_u32_e32 v3, 1, v1 67; GCN-NEXT: v_fma_f32 v4, -v2, v1, v0 68; GCN-NEXT: v_fma_f32 v5, -v3, v1, v0 69; GCN-NEXT: v_cmp_ge_f32_e64 s0, 0, v4 70; GCN-NEXT: v_cndmask_b32_e64 v1, v1, v2, s0 71; GCN-NEXT: v_cmp_lt_f32_e64 s0, 0, v5 72; GCN-NEXT: v_cndmask_b32_e64 v1, v1, v3, s0 73; GCN-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 74; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo 75; GCN-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 0x260 76; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo 77; GCN-NEXT: v_rcp_f32_e32 v0, v0 78; GCN-NEXT: ; return to shader part epilog 79.entry: 80 %a = call float @llvm.sqrt.f32(float %arg1) 81 %b = call float @llvm.amdgcn.rcp.f32(float %a) 82 ret float %b 83} 84 85define amdgpu_cs float @sqrt_rcp(float inreg %arg1) { 86; GCN-LABEL: sqrt_rcp: 87; GCN: ; %bb.0: ; %.entry 88; GCN-NEXT: v_rcp_f32_e32 v0, s0 89; GCN-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0 90; GCN-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xf800000, v0 91; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 92; GCN-NEXT: v_sqrt_f32_e32 v1, v0 93; GCN-NEXT: v_add_nc_u32_e32 v2, -1, v1 94; GCN-NEXT: v_add_nc_u32_e32 v3, 1, v1 95; GCN-NEXT: v_fma_f32 v4, -v2, v1, v0 96; GCN-NEXT: v_fma_f32 v5, -v3, v1, v0 97; GCN-NEXT: v_cmp_ge_f32_e64 s0, 0, v4 98; GCN-NEXT: v_cndmask_b32_e64 v1, v1, v2, s0 99; GCN-NEXT: v_cmp_lt_f32_e64 s0, 0, v5 100; GCN-NEXT: v_cndmask_b32_e64 v1, v1, v3, s0 101; GCN-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 102; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo 103; GCN-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 0x260 104; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo 105; GCN-NEXT: ; return to shader part epilog 106.entry: 107 %a = call float @llvm.amdgcn.rcp.f32(float %arg1) 108 %b = call float @llvm.sqrt.f32(float %a) 109 ret float %b 110} 111 112define amdgpu_cs float @div_sqrt_contract(float inreg %arg1) { 113; GCN-LABEL: div_sqrt_contract: 114; GCN: ; %bb.0: ; %.entry 115; GCN-NEXT: v_mul_f32_e64 v0, 0x4f800000, s0 116; GCN-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0xf800000, s0 117; GCN-NEXT: v_cndmask_b32_e32 v0, s0, v0, vcc_lo 118; GCN-NEXT: v_sqrt_f32_e32 v1, v0 119; GCN-NEXT: v_add_nc_u32_e32 v2, -1, v1 120; GCN-NEXT: v_add_nc_u32_e32 v3, 1, v1 121; GCN-NEXT: v_fma_f32 v4, -v2, v1, v0 122; GCN-NEXT: v_fma_f32 v5, -v3, v1, v0 123; GCN-NEXT: v_cmp_ge_f32_e64 s0, 0, v4 124; GCN-NEXT: v_cndmask_b32_e64 v1, v1, v2, s0 125; GCN-NEXT: v_cmp_lt_f32_e64 s0, 0, v5 126; GCN-NEXT: v_cndmask_b32_e64 v1, v1, v3, s0 127; GCN-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 128; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo 129; GCN-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 0x260 130; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo 131; GCN-NEXT: v_rcp_f32_e32 v0, v0 132; GCN-NEXT: ; return to shader part epilog 133.entry: 134 %a = call contract float @llvm.sqrt.f32(float %arg1) 135 %b = fdiv afn contract float 1.000000e+00, %a 136 ret float %b 137} 138 139define amdgpu_cs float @sqrt_div_contract(float inreg %arg1) { 140; GCN-LABEL: sqrt_div_contract: 141; GCN: ; %bb.0: ; %.entry 142; GCN-NEXT: v_rcp_f32_e32 v0, s0 143; GCN-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0 144; GCN-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xf800000, v0 145; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 146; GCN-NEXT: v_sqrt_f32_e32 v1, v0 147; GCN-NEXT: v_add_nc_u32_e32 v2, -1, v1 148; GCN-NEXT: v_add_nc_u32_e32 v3, 1, v1 149; GCN-NEXT: v_fma_f32 v4, -v2, v1, v0 150; GCN-NEXT: v_fma_f32 v5, -v3, v1, v0 151; GCN-NEXT: v_cmp_ge_f32_e64 s0, 0, v4 152; GCN-NEXT: v_cndmask_b32_e64 v1, v1, v2, s0 153; GCN-NEXT: v_cmp_lt_f32_e64 s0, 0, v5 154; GCN-NEXT: v_cndmask_b32_e64 v1, v1, v3, s0 155; GCN-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 156; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo 157; GCN-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 0x260 158; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo 159; GCN-NEXT: ; return to shader part epilog 160.entry: 161 %a = fdiv afn contract float 1.000000e+00, %arg1 162 %b = call contract float @llvm.sqrt.f32(float %a) 163 ret float %b 164} 165 166define amdgpu_cs float @rcp_sqrt_contract(float inreg %arg1) { 167; GCN-LABEL: rcp_sqrt_contract: 168; GCN: ; %bb.0: ; %.entry 169; GCN-NEXT: v_mul_f32_e64 v0, 0x4f800000, s0 170; GCN-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0xf800000, s0 171; GCN-NEXT: v_cndmask_b32_e32 v0, s0, v0, vcc_lo 172; GCN-NEXT: v_sqrt_f32_e32 v1, v0 173; GCN-NEXT: v_add_nc_u32_e32 v2, -1, v1 174; GCN-NEXT: v_add_nc_u32_e32 v3, 1, v1 175; GCN-NEXT: v_fma_f32 v4, -v2, v1, v0 176; GCN-NEXT: v_fma_f32 v5, -v3, v1, v0 177; GCN-NEXT: v_cmp_ge_f32_e64 s0, 0, v4 178; GCN-NEXT: v_cndmask_b32_e64 v1, v1, v2, s0 179; GCN-NEXT: v_cmp_lt_f32_e64 s0, 0, v5 180; GCN-NEXT: v_cndmask_b32_e64 v1, v1, v3, s0 181; GCN-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 182; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo 183; GCN-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 0x260 184; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo 185; GCN-NEXT: v_rcp_f32_e32 v0, v0 186; GCN-NEXT: ; return to shader part epilog 187.entry: 188 %a = call contract float @llvm.sqrt.f32(float %arg1) 189 %b = call contract float @llvm.amdgcn.rcp.f32(float %a) 190 ret float %b 191} 192 193define amdgpu_cs float @sqrt_rcp_contract(float inreg %arg1) { 194; GCN-LABEL: sqrt_rcp_contract: 195; GCN: ; %bb.0: ; %.entry 196; GCN-NEXT: v_rcp_f32_e32 v0, s0 197; GCN-NEXT: v_mul_f32_e32 v1, 0x4f800000, v0 198; GCN-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0xf800000, v0 199; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 200; GCN-NEXT: v_sqrt_f32_e32 v1, v0 201; GCN-NEXT: v_add_nc_u32_e32 v2, -1, v1 202; GCN-NEXT: v_add_nc_u32_e32 v3, 1, v1 203; GCN-NEXT: v_fma_f32 v4, -v2, v1, v0 204; GCN-NEXT: v_fma_f32 v5, -v3, v1, v0 205; GCN-NEXT: v_cmp_ge_f32_e64 s0, 0, v4 206; GCN-NEXT: v_cndmask_b32_e64 v1, v1, v2, s0 207; GCN-NEXT: v_cmp_lt_f32_e64 s0, 0, v5 208; GCN-NEXT: v_cndmask_b32_e64 v1, v1, v3, s0 209; GCN-NEXT: v_mul_f32_e32 v2, 0x37800000, v1 210; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo 211; GCN-NEXT: v_cmp_class_f32_e64 vcc_lo, v0, 0x260 212; GCN-NEXT: v_cndmask_b32_e32 v0, v1, v0, vcc_lo 213; GCN-NEXT: ; return to shader part epilog 214.entry: 215 %a = call contract float @llvm.amdgcn.rcp.f32(float %arg1) 216 %b = call contract float @llvm.sqrt.f32(float %a) 217 ret float %b 218} 219 220declare float @llvm.sqrt.f32(float) 221declare float @llvm.amdgcn.rcp.f32(float) 222