1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=amdgcn-amd-mesa3d -mcpu=fiji -verify-machineinstrs | FileCheck -check-prefix=VI %s 3; RUN: llc < %s -mtriple=amdgcn-amd-mesa3d -mcpu=gfx900 -verify-machineinstrs | FileCheck -check-prefix=GFX9 %s 4; RUN: llc < %s -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefix=GFX10 %s 5; RUN: llc < %s -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 -verify-machineinstrs | FileCheck -check-prefix=GFX10 %s 6 7; =================================================================================== 8; V_XAD_U32 9; =================================================================================== 10 11define amdgpu_ps float @xor_add(i32 %a, i32 %b, i32 %c) { 12; VI-LABEL: xor_add: 13; VI: ; %bb.0: 14; VI-NEXT: v_xor_b32_e32 v0, v0, v1 15; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v2 16; VI-NEXT: ; return to shader part epilog 17; 18; GFX9-LABEL: xor_add: 19; GFX9: ; %bb.0: 20; GFX9-NEXT: v_xad_u32 v0, v0, v1, v2 21; GFX9-NEXT: ; return to shader part epilog 22; 23; GFX10-LABEL: xor_add: 24; GFX10: ; %bb.0: 25; GFX10-NEXT: v_xad_u32 v0, v0, v1, v2 26; GFX10-NEXT: ; return to shader part epilog 27 %x = xor i32 %a, %b 28 %result = add i32 %x, %c 29 %bc = bitcast i32 %result to float 30 ret float %bc 31} 32 33; ThreeOp instruction variant not used due to Constant Bus Limitations 34define amdgpu_ps float @xor_add_vgpr_a(i32 %a, i32 inreg %b, i32 inreg %c) { 35; VI-LABEL: xor_add_vgpr_a: 36; VI: ; %bb.0: 37; VI-NEXT: v_xor_b32_e32 v0, s2, v0 38; VI-NEXT: v_add_u32_e32 v0, vcc, s3, v0 39; VI-NEXT: ; return to shader part epilog 40; 41; GFX9-LABEL: xor_add_vgpr_a: 42; GFX9: ; %bb.0: 43; GFX9-NEXT: v_xor_b32_e32 v0, s2, v0 44; GFX9-NEXT: v_add_u32_e32 v0, s3, v0 45; GFX9-NEXT: ; return to shader part epilog 46; 47; GFX10-LABEL: xor_add_vgpr_a: 48; GFX10: ; %bb.0: 49; GFX10-NEXT: v_xad_u32 v0, v0, s2, s3 50; GFX10-NEXT: ; return to shader part epilog 51 %x = xor i32 %a, %b 52 %result = add i32 %x, %c 53 %bc = bitcast i32 %result to float 54 ret float %bc 55} 56 57define amdgpu_ps float @xor_add_vgpr_all(i32 %a, i32 %b, i32 %c) { 58; VI-LABEL: xor_add_vgpr_all: 59; VI: ; %bb.0: 60; VI-NEXT: v_xor_b32_e32 v0, v0, v1 61; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v2 62; VI-NEXT: ; return to shader part epilog 63; 64; GFX9-LABEL: xor_add_vgpr_all: 65; GFX9: ; %bb.0: 66; GFX9-NEXT: v_xad_u32 v0, v0, v1, v2 67; GFX9-NEXT: ; return to shader part epilog 68; 69; GFX10-LABEL: xor_add_vgpr_all: 70; GFX10: ; %bb.0: 71; GFX10-NEXT: v_xad_u32 v0, v0, v1, v2 72; GFX10-NEXT: ; return to shader part epilog 73 %x = xor i32 %a, %b 74 %result = add i32 %x, %c 75 %bc = bitcast i32 %result to float 76 ret float %bc 77} 78 79define amdgpu_ps float @xor_add_vgpr_ab(i32 %a, i32 %b, i32 inreg %c) { 80; VI-LABEL: xor_add_vgpr_ab: 81; VI: ; %bb.0: 82; VI-NEXT: v_xor_b32_e32 v0, v0, v1 83; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v0 84; VI-NEXT: ; return to shader part epilog 85; 86; GFX9-LABEL: xor_add_vgpr_ab: 87; GFX9: ; %bb.0: 88; GFX9-NEXT: v_xad_u32 v0, v0, v1, s2 89; GFX9-NEXT: ; return to shader part epilog 90; 91; GFX10-LABEL: xor_add_vgpr_ab: 92; GFX10: ; %bb.0: 93; GFX10-NEXT: v_xad_u32 v0, v0, v1, s2 94; GFX10-NEXT: ; return to shader part epilog 95 %x = xor i32 %a, %b 96 %result = add i32 %x, %c 97 %bc = bitcast i32 %result to float 98 ret float %bc 99} 100 101define amdgpu_ps float @xor_add_vgpr_const(i32 %a, i32 %b) { 102; VI-LABEL: xor_add_vgpr_const: 103; VI: ; %bb.0: 104; VI-NEXT: v_xor_b32_e32 v0, 3, v0 105; VI-NEXT: v_add_u32_e32 v0, vcc, v0, v1 106; VI-NEXT: ; return to shader part epilog 107; 108; GFX9-LABEL: xor_add_vgpr_const: 109; GFX9: ; %bb.0: 110; GFX9-NEXT: v_xad_u32 v0, v0, 3, v1 111; GFX9-NEXT: ; return to shader part epilog 112; 113; GFX10-LABEL: xor_add_vgpr_const: 114; GFX10: ; %bb.0: 115; GFX10-NEXT: v_xad_u32 v0, v0, 3, v1 116; GFX10-NEXT: ; return to shader part epilog 117 %x = xor i32 %a, 3 118 %result = add i32 %x, %b 119 %bc = bitcast i32 %result to float 120 ret float %bc 121} 122