1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=amdgcn-amd-mesa3d -mcpu=fiji -verify-machineinstrs | FileCheck -check-prefix=VI %s 3; RUN: llc < %s -mtriple=amdgcn-amd-mesa3d -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefix=GFX9 %s 4; RUN: llc < %s -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefix=GFX10 %s 5; RUN: llc < %s -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs | FileCheck -check-prefix=GFX10 %s 6 7; =================================================================================== 8; V_ADD_LSHL_U32 9; =================================================================================== 10 11define amdgpu_ps float @add_shl(i32 %a, i32 %b, i32 %c) { 12; VI-LABEL: add_shl: 13; VI: ; %bb.0: 14; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1 15; VI-NEXT: v_lshlrev_b32_e32 v0, v2, v0 16; VI-NEXT: ; return to shader part epilog 17; 18; GFX9-LABEL: add_shl: 19; GFX9: ; %bb.0: 20; GFX9-NEXT: v_add_lshl_u32 v0, v0, v1, v2 21; GFX9-NEXT: ; return to shader part epilog 22; 23; GFX10-LABEL: add_shl: 24; GFX10: ; %bb.0: 25; GFX10-NEXT: v_add_lshl_u32 v0, v0, v1, v2 26; GFX10-NEXT: ; return to shader part epilog 27 %x = add i32 %a, %b 28 %result = shl i32 %x, %c 29 %bc = bitcast i32 %result to float 30 ret float %bc 31} 32 33define amdgpu_ps float @add_shl_vgpr_c(i32 inreg %a, i32 inreg %b, i32 %c) { 34; VI-LABEL: add_shl_vgpr_c: 35; VI: ; %bb.0: 36; VI-NEXT: s_add_i32 s2, s2, s3 37; VI-NEXT: v_lshlrev_b32_e64 v0, v0, s2 38; VI-NEXT: ; return to shader part epilog 39; 40; GFX9-LABEL: add_shl_vgpr_c: 41; GFX9: ; %bb.0: 42; GFX9-NEXT: s_add_i32 s2, s2, s3 43; GFX9-NEXT: v_lshlrev_b32_e64 v0, v0, s2 44; GFX9-NEXT: ; return to shader part epilog 45; 46; GFX10-LABEL: add_shl_vgpr_c: 47; GFX10: ; %bb.0: 48; GFX10-NEXT: v_add_lshl_u32 v0, s2, s3, v0 49; GFX10-NEXT: ; return to shader part epilog 50 %x = add i32 %a, %b 51 %result = shl i32 %x, %c 52 %bc = bitcast i32 %result to float 53 ret float %bc 54} 55 56define amdgpu_ps float @add_shl_vgpr_ac(i32 %a, i32 inreg %b, i32 %c) { 57; VI-LABEL: add_shl_vgpr_ac: 58; VI: ; %bb.0: 59; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0 60; VI-NEXT: v_lshlrev_b32_e32 v0, v1, v0 61; VI-NEXT: ; return to shader part epilog 62; 63; GFX9-LABEL: add_shl_vgpr_ac: 64; GFX9: ; %bb.0: 65; GFX9-NEXT: v_add_lshl_u32 v0, v0, s2, v1 66; GFX9-NEXT: ; return to shader part epilog 67; 68; GFX10-LABEL: add_shl_vgpr_ac: 69; GFX10: ; %bb.0: 70; GFX10-NEXT: v_add_lshl_u32 v0, v0, s2, v1 71; GFX10-NEXT: ; return to shader part epilog 72 %x = add i32 %a, %b 73 %result = shl i32 %x, %c 74 %bc = bitcast i32 %result to float 75 ret float %bc 76} 77 78define amdgpu_ps float @add_shl_vgpr_const(i32 %a, i32 %b) { 79; VI-LABEL: add_shl_vgpr_const: 80; VI: ; %bb.0: 81; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1 82; VI-NEXT: v_lshlrev_b32_e32 v0, 9, v0 83; VI-NEXT: ; return to shader part epilog 84; 85; GFX9-LABEL: add_shl_vgpr_const: 86; GFX9: ; %bb.0: 87; GFX9-NEXT: v_add_lshl_u32 v0, v0, v1, 9 88; GFX9-NEXT: ; return to shader part epilog 89; 90; GFX10-LABEL: add_shl_vgpr_const: 91; GFX10: ; %bb.0: 92; GFX10-NEXT: v_add_lshl_u32 v0, v0, v1, 9 93; GFX10-NEXT: ; return to shader part epilog 94 %x = add i32 %a, %b 95 %result = shl i32 %x, 9 96 %bc = bitcast i32 %result to float 97 ret float %bc 98} 99 100define amdgpu_ps float @add_shl_vgpr_const_inline_const(i32 %a) { 101; VI-LABEL: add_shl_vgpr_const_inline_const: 102; VI: ; %bb.0: 103; VI-NEXT: v_lshlrev_b32_e32 v0, 9, v0 104; VI-NEXT: v_add_u32_e32 v0, vcc, 0x7e800, v0 105; VI-NEXT: ; return to shader part epilog 106; 107; GFX9-LABEL: add_shl_vgpr_const_inline_const: 108; GFX9: ; %bb.0: 109; GFX9-NEXT: v_mov_b32_e32 v1, 0x7e800 110; GFX9-NEXT: v_lshl_add_u32 v0, v0, 9, v1 111; GFX9-NEXT: ; return to shader part epilog 112; 113; GFX10-LABEL: add_shl_vgpr_const_inline_const: 114; GFX10: ; %bb.0: 115; GFX10-NEXT: v_lshl_add_u32 v0, v0, 9, 0x7e800 116; GFX10-NEXT: ; return to shader part epilog 117 %x = add i32 %a, 1012 118 %result = shl i32 %x, 9 119 %bc = bitcast i32 %result to float 120 ret float %bc 121} 122 123; TODO: Non-optimal code generation because SelectionDAG combines 124; (shl (add x, CONST), y) ---> (add (shl x, y), CONST'). 125; 126define amdgpu_ps float @add_shl_vgpr_inline_const_x2(i32 %a) { 127; VI-LABEL: add_shl_vgpr_inline_const_x2: 128; VI: ; %bb.0: 129; VI-NEXT: v_lshlrev_b32_e32 v0, 9, v0 130; VI-NEXT: v_add_u32_e32 v0, vcc, 0x600, v0 131; VI-NEXT: ; return to shader part epilog 132; 133; GFX9-LABEL: add_shl_vgpr_inline_const_x2: 134; GFX9: ; %bb.0: 135; GFX9-NEXT: v_mov_b32_e32 v1, 0x600 136; GFX9-NEXT: v_lshl_add_u32 v0, v0, 9, v1 137; GFX9-NEXT: ; return to shader part epilog 138; 139; GFX10-LABEL: add_shl_vgpr_inline_const_x2: 140; GFX10: ; %bb.0: 141; GFX10-NEXT: v_lshl_add_u32 v0, v0, 9, 0x600 142; GFX10-NEXT: ; return to shader part epilog 143 %x = add i32 %a, 3 144 %result = shl i32 %x, 9 145 %bc = bitcast i32 %result to float 146 ret float %bc 147} 148