1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=amdgcn-amd-mesa3d -mcpu=fiji -verify-machineinstrs | FileCheck -check-prefix=VI %s 3; RUN: llc < %s -mtriple=amdgcn-amd-mesa3d -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefix=GFX9 %s 4; RUN: llc < %s -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefix=GFX10 %s 5; RUN: llc < %s -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs | FileCheck -check-prefix=GFX10 %s 6 7; =================================================================================== 8; V_OR3_B32 9; =================================================================================== 10 11define amdgpu_ps float @or3(i32 %a, i32 %b, i32 %c) { 12; VI-LABEL: or3: 13; VI: ; %bb.0: 14; VI-NEXT: v_or_b32_e32 v0, v0, v1 15; VI-NEXT: v_or_b32_e32 v0, v0, v2 16; VI-NEXT: ; return to shader part epilog 17; 18; GFX9-LABEL: or3: 19; GFX9: ; %bb.0: 20; GFX9-NEXT: v_or3_b32 v0, v0, v1, v2 21; GFX9-NEXT: ; return to shader part epilog 22; 23; GFX10-LABEL: or3: 24; GFX10: ; %bb.0: 25; GFX10-NEXT: v_or3_b32 v0, v0, v1, v2 26; GFX10-NEXT: ; return to shader part epilog 27 %x = or i32 %a, %b 28 %result = or i32 %x, %c 29 %bc = bitcast i32 %result to float 30 ret float %bc 31} 32 33; ThreeOp instruction variant not used due to Constant Bus Limitations 34; TODO: with reassociation it is possible to replace a v_or_b32_e32 with an s_or_b32 35define amdgpu_ps float @or3_vgpr_a(i32 %a, i32 inreg %b, i32 inreg %c) { 36; VI-LABEL: or3_vgpr_a: 37; VI: ; %bb.0: 38; VI-NEXT: v_or_b32_e32 v0, s2, v0 39; VI-NEXT: v_or_b32_e32 v0, s3, v0 40; VI-NEXT: ; return to shader part epilog 41; 42; GFX9-LABEL: or3_vgpr_a: 43; GFX9: ; %bb.0: 44; GFX9-NEXT: v_or_b32_e32 v0, s2, v0 45; GFX9-NEXT: v_or_b32_e32 v0, s3, v0 46; GFX9-NEXT: ; return to shader part epilog 47; 48; GFX10-LABEL: or3_vgpr_a: 49; GFX10: ; %bb.0: 50; GFX10-NEXT: v_or3_b32 v0, v0, s2, s3 51; GFX10-NEXT: ; return to shader part epilog 52 %x = or i32 %a, %b 53 %result = or i32 %x, %c 54 %bc = bitcast i32 %result to float 55 ret float %bc 56} 57 58define amdgpu_ps float @or3_vgpr_all2(i32 %a, i32 %b, i32 %c) { 59; VI-LABEL: or3_vgpr_all2: 60; VI: ; %bb.0: 61; VI-NEXT: v_or_b32_e32 v1, v1, v2 62; VI-NEXT: v_or_b32_e32 v0, v0, v1 63; VI-NEXT: ; return to shader part epilog 64; 65; GFX9-LABEL: or3_vgpr_all2: 66; GFX9: ; %bb.0: 67; GFX9-NEXT: v_or3_b32 v0, v1, v2, v0 68; GFX9-NEXT: ; return to shader part epilog 69; 70; GFX10-LABEL: or3_vgpr_all2: 71; GFX10: ; %bb.0: 72; GFX10-NEXT: v_or3_b32 v0, v1, v2, v0 73; GFX10-NEXT: ; return to shader part epilog 74 %x = or i32 %b, %c 75 %result = or i32 %a, %x 76 %bc = bitcast i32 %result to float 77 ret float %bc 78} 79 80define amdgpu_ps float @or3_vgpr_bc(i32 inreg %a, i32 %b, i32 %c) { 81; VI-LABEL: or3_vgpr_bc: 82; VI: ; %bb.0: 83; VI-NEXT: v_or_b32_e32 v0, s2, v0 84; VI-NEXT: v_or_b32_e32 v0, v0, v1 85; VI-NEXT: ; return to shader part epilog 86; 87; GFX9-LABEL: or3_vgpr_bc: 88; GFX9: ; %bb.0: 89; GFX9-NEXT: v_or3_b32 v0, s2, v0, v1 90; GFX9-NEXT: ; return to shader part epilog 91; 92; GFX10-LABEL: or3_vgpr_bc: 93; GFX10: ; %bb.0: 94; GFX10-NEXT: v_or3_b32 v0, s2, v0, v1 95; GFX10-NEXT: ; return to shader part epilog 96 %x = or i32 %a, %b 97 %result = or i32 %x, %c 98 %bc = bitcast i32 %result to float 99 ret float %bc 100} 101 102define amdgpu_ps float @or3_vgpr_const(i32 %a, i32 %b) { 103; VI-LABEL: or3_vgpr_const: 104; VI: ; %bb.0: 105; VI-NEXT: v_or_b32_e32 v0, v1, v0 106; VI-NEXT: v_or_b32_e32 v0, 64, v0 107; VI-NEXT: ; return to shader part epilog 108; 109; GFX9-LABEL: or3_vgpr_const: 110; GFX9: ; %bb.0: 111; GFX9-NEXT: v_or3_b32 v0, v1, v0, 64 112; GFX9-NEXT: ; return to shader part epilog 113; 114; GFX10-LABEL: or3_vgpr_const: 115; GFX10: ; %bb.0: 116; GFX10-NEXT: v_or3_b32 v0, v1, v0, 64 117; GFX10-NEXT: ; return to shader part epilog 118 %x = or i32 64, %b 119 %result = or i32 %x, %a 120 %bc = bitcast i32 %result to float 121 ret float %bc 122} 123