1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s 3 4; Reduce a 64-bit add by a constant if we know the low 32-bits are all 5; zero. 6 7; add i64:x, K if computeTrailingZeros(K) >= 32 8; => build_pair (add x.hi, K.hi), x.lo 9 10define amdgpu_ps i64 @s_add_i64_const_low_bits_known0_0(i64 inreg %reg) { 11; GFX9-LABEL: s_add_i64_const_low_bits_known0_0: 12; GFX9: ; %bb.0: 13; GFX9-NEXT: s_add_i32 s1, s1, 0x40000 14; GFX9-NEXT: ; return to shader part epilog 15 %add = add i64 %reg, 1125899906842624 ; (1 << 50) 16 ret i64 %add 17} 18 19define amdgpu_ps i64 @s_add_i64_const_low_bits_known0_1(i64 inreg %reg) { 20; GFX9-LABEL: s_add_i64_const_low_bits_known0_1: 21; GFX9: ; %bb.0: 22; GFX9-NEXT: s_add_i32 s1, s1, 1 23; GFX9-NEXT: ; return to shader part epilog 24 %add = add i64 %reg, 4294967296 ; (1 << 32) 25 ret i64 %add 26} 27 28define amdgpu_ps i64 @s_add_i64_const_low_bits_known0_2(i64 inreg %reg) { 29; GFX9-LABEL: s_add_i64_const_low_bits_known0_2: 30; GFX9: ; %bb.0: 31; GFX9-NEXT: s_add_i32 s1, s1, 2 32; GFX9-NEXT: ; return to shader part epilog 33 %add = add i64 %reg, 8589934592 ; (1 << 33) 34 ret i64 %add 35} 36 37define amdgpu_ps i64 @s_add_i64_const_low_bits_known0_3(i64 inreg %reg) { 38; GFX9-LABEL: s_add_i64_const_low_bits_known0_3: 39; GFX9: ; %bb.0: 40; GFX9-NEXT: s_add_i32 s1, s1, 0x80000000 41; GFX9-NEXT: ; return to shader part epilog 42 %add = add i64 %reg, -9223372036854775808 ; (1 << 63) 43 ret i64 %add 44} 45 46define amdgpu_ps i64 @s_add_i64_const_low_bits_known0_4(i64 inreg %reg) { 47; GFX9-LABEL: s_add_i64_const_low_bits_known0_4: 48; GFX9: ; %bb.0: 49; GFX9-NEXT: s_add_i32 s1, s1, -1 50; GFX9-NEXT: ; return to shader part epilog 51 %add = add i64 %reg, -4294967296 ; 0xffffffff00000000 52 ret i64 %add 53} 54 55define i64 @v_add_i64_const_low_bits_known0_0(i64 %reg) { 56; GFX9-LABEL: v_add_i64_const_low_bits_known0_0: 57; GFX9: ; %bb.0: 58; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 59; GFX9-NEXT: v_add_u32_e32 v1, 0x40000, v1 60; GFX9-NEXT: s_setpc_b64 s[30:31] 61 %add = add i64 %reg, 1125899906842624 ; (1 << 50) 62 ret i64 %add 63} 64 65define i64 @v_add_i64_const_low_bits_known0_1(i64 %reg) { 66; GFX9-LABEL: v_add_i64_const_low_bits_known0_1: 67; GFX9: ; %bb.0: 68; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 69; GFX9-NEXT: v_add_u32_e32 v1, 1, v1 70; GFX9-NEXT: s_setpc_b64 s[30:31] 71 %add = add i64 %reg, 4294967296 ; (1 << 32) 72 ret i64 %add 73} 74 75define i64 @v_add_i64_const_low_bits_known0_2(i64 %reg) { 76; GFX9-LABEL: v_add_i64_const_low_bits_known0_2: 77; GFX9: ; %bb.0: 78; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 79; GFX9-NEXT: v_add_u32_e32 v1, 2, v1 80; GFX9-NEXT: s_setpc_b64 s[30:31] 81 %add = add i64 %reg, 8589934592 ; (1 << 33) 82 ret i64 %add 83} 84 85define i64 @v_add_i64_const_low_bits_known0_3(i64 %reg) { 86; GFX9-LABEL: v_add_i64_const_low_bits_known0_3: 87; GFX9: ; %bb.0: 88; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 89; GFX9-NEXT: v_add_u32_e32 v1, 0x80000000, v1 90; GFX9-NEXT: s_setpc_b64 s[30:31] 91 %add = add i64 %reg, -9223372036854775808 ; (1 << 63) 92 ret i64 %add 93} 94 95define i64 @v_add_i64_const_low_bits_known0_4(i64 %reg) { 96; GFX9-LABEL: v_add_i64_const_low_bits_known0_4: 97; GFX9: ; %bb.0: 98; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 99; GFX9-NEXT: v_add_u32_e32 v1, -1, v1 100; GFX9-NEXT: s_setpc_b64 s[30:31] 101 %add = add i64 %reg, -4294967296 ; 0xffffffff00000000 102 ret i64 %add 103} 104 105define amdgpu_ps i64 @s_add_i64_const_high_bits_known0_0(i64 inreg %reg) { 106; GFX9-LABEL: s_add_i64_const_high_bits_known0_0: 107; GFX9: ; %bb.0: 108; GFX9-NEXT: s_add_u32 s0, s0, -1 109; GFX9-NEXT: s_addc_u32 s1, s1, 0 110; GFX9-NEXT: ; return to shader part epilog 111 %add = add i64 %reg, 4294967295 ; (1 << 31) 112 ret i64 %add 113} 114 115define i64 @v_add_i64_const_high_bits_known0_0(i64 %reg) { 116; GFX9-LABEL: v_add_i64_const_high_bits_known0_0: 117; GFX9: ; %bb.0: 118; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 119; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, -1, v0 120; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc 121; GFX9-NEXT: s_setpc_b64 s[30:31] 122 %add = add i64 %reg, 4294967295 ; (1 << 31) 123 ret i64 %add 124} 125 126define <2 x i64> @v_add_v2i64_splat_const_low_bits_known0_0(<2 x i64> %reg) { 127; GFX9-LABEL: v_add_v2i64_splat_const_low_bits_known0_0: 128; GFX9: ; %bb.0: 129; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 130; GFX9-NEXT: v_add_u32_e32 v1, 1, v1 131; GFX9-NEXT: v_add_u32_e32 v3, 1, v3 132; GFX9-NEXT: s_setpc_b64 s[30:31] 133 %add = add <2 x i64> %reg, <i64 4294967296, i64 4294967296> ; (1 << 32) 134 ret <2 x i64> %add 135} 136 137define <2 x i64> @v_add_v2i64_nonsplat_const_low_bits_known0_0(<2 x i64> %reg) { 138; GFX9-LABEL: v_add_v2i64_nonsplat_const_low_bits_known0_0: 139; GFX9: ; %bb.0: 140; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 141; GFX9-NEXT: v_add_u32_e32 v1, 1, v1 142; GFX9-NEXT: v_add_u32_e32 v3, 2, v3 143; GFX9-NEXT: s_setpc_b64 s[30:31] 144 %add = add <2 x i64> %reg, <i64 4294967296, i64 8589934592> ; (1 << 32), (1 << 33) 145 ret <2 x i64> %add 146} 147 148define amdgpu_ps <2 x i64> @s_add_v2i64_splat_const_low_bits_known0_0(<2 x i64> inreg %reg) { 149; GFX9-LABEL: s_add_v2i64_splat_const_low_bits_known0_0: 150; GFX9: ; %bb.0: 151; GFX9-NEXT: s_add_i32 s1, s1, 1 152; GFX9-NEXT: s_add_i32 s3, s3, 1 153; GFX9-NEXT: ; return to shader part epilog 154 %add = add <2 x i64> %reg, <i64 4294967296, i64 4294967296> ; (1 << 32) 155 ret <2 x i64> %add 156} 157 158define amdgpu_ps <2 x i64> @s_add_v2i64_nonsplat_const_low_bits_known0_0(<2 x i64> inreg %reg) { 159; GFX9-LABEL: s_add_v2i64_nonsplat_const_low_bits_known0_0: 160; GFX9: ; %bb.0: 161; GFX9-NEXT: s_add_i32 s1, s1, 1 162; GFX9-NEXT: s_add_i32 s3, s3, 2 163; GFX9-NEXT: ; return to shader part epilog 164 %add = add <2 x i64> %reg, <i64 4294967296, i64 8589934592> ; (1 << 32), (1 << 33) 165 ret <2 x i64> %add 166} 167 168; We could reduce this to use a 32-bit add if we use computeKnownBits 169define i64 @v_add_i64_variable_high_bits_known0_0(i64 %reg, i32 %offset.hi32) { 170; GFX9-LABEL: v_add_i64_variable_high_bits_known0_0: 171; GFX9: ; %bb.0: 172; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 173; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0 174; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc 175; GFX9-NEXT: s_setpc_b64 s[30:31] 176 %zext.offset.hi32 = zext i32 %offset.hi32 to i64 177 %in.high.bits = shl i64 %zext.offset.hi32, 32 178 %add = add i64 %reg, %in.high.bits 179 ret i64 %add 180} 181 182; We could reduce this to use a 32-bit add if we use computeKnownBits 183define amdgpu_ps i64 @s_add_i64_variable_high_bits_known0_0(i64 inreg %reg, i32 inreg %offset.hi32) { 184; GFX9-LABEL: s_add_i64_variable_high_bits_known0_0: 185; GFX9: ; %bb.0: 186; GFX9-NEXT: s_add_u32 s0, s0, 0 187; GFX9-NEXT: s_addc_u32 s1, s1, s2 188; GFX9-NEXT: ; return to shader part epilog 189 %zext.offset.hi32 = zext i32 %offset.hi32 to i64 190 %in.high.bits = shl i64 %zext.offset.hi32, 32 191 %add = add i64 %reg, %in.high.bits 192 ret i64 %add 193} 194