1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood < %s | FileCheck --check-prefix=R600 %s 3 4define amdgpu_kernel void @fneg_fabsf_fadd_f32(ptr addrspace(1) %out, float %x, float %y) { 5; R600-LABEL: fneg_fabsf_fadd_f32: 6; R600: ; %bb.0: 7; R600-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] 8; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1 9; R600-NEXT: CF_END 10; R600-NEXT: PAD 11; R600-NEXT: ALU clause starting at 4: 12; R600-NEXT: LSHR T0.X, KC0[2].Y, literal.x, 13; R600-NEXT: ADD * T1.X, KC0[2].W, -|KC0[2].Z|, 14; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) 15 %fabs = call float @llvm.fabs.f32(float %x) 16 %fsub = fsub float -0.000000e+00, %fabs 17 %fadd = fadd float %y, %fsub 18 store float %fadd, ptr addrspace(1) %out, align 4 19 ret void 20} 21 22define amdgpu_kernel void @fneg_fabsf_fmul_f32(ptr addrspace(1) %out, float %x, float %y) { 23; R600-LABEL: fneg_fabsf_fmul_f32: 24; R600: ; %bb.0: 25; R600-NEXT: ALU 2, @4, KC0[CB0:0-32], KC1[] 26; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T1.X, T0.X, 1 27; R600-NEXT: CF_END 28; R600-NEXT: PAD 29; R600-NEXT: ALU clause starting at 4: 30; R600-NEXT: LSHR T0.X, KC0[2].Y, literal.x, 31; R600-NEXT: MUL_IEEE * T1.X, KC0[2].W, -|KC0[2].Z|, 32; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) 33 %fabs = call float @llvm.fabs.f32(float %x) 34 %fsub = fsub float -0.000000e+00, %fabs 35 %fmul = fmul float %y, %fsub 36 store float %fmul, ptr addrspace(1) %out, align 4 37 ret void 38} 39 40define amdgpu_kernel void @fneg_fabsf_free_f32(ptr addrspace(1) %out, i32 %in) { 41; R600-LABEL: fneg_fabsf_free_f32: 42; R600: ; %bb.0: 43; R600-NEXT: ALU 4, @4, KC0[CB0:0-32], KC1[] 44; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 45; R600-NEXT: CF_END 46; R600-NEXT: PAD 47; R600-NEXT: ALU clause starting at 4: 48; R600-NEXT: MOV * T0.W, KC0[2].Z, 49; R600-NEXT: MOV * T0.W, |PV.W|, 50; R600-NEXT: MOV T0.X, -PV.W, 51; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 52; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) 53 %bc = bitcast i32 %in to float 54 %fabs = call float @llvm.fabs.f32(float %bc) 55 %fsub = fsub float -0.000000e+00, %fabs 56 store float %fsub, ptr addrspace(1) %out 57 ret void 58} 59 60define amdgpu_kernel void @fneg_fabsf_fn_free_f32(ptr addrspace(1) %out, i32 %in) { 61; R600-LABEL: fneg_fabsf_fn_free_f32: 62; R600: ; %bb.0: 63; R600-NEXT: ALU 4, @4, KC0[CB0:0-32], KC1[] 64; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 65; R600-NEXT: CF_END 66; R600-NEXT: PAD 67; R600-NEXT: ALU clause starting at 4: 68; R600-NEXT: MOV * T0.W, KC0[2].Z, 69; R600-NEXT: MOV * T0.W, |PV.W|, 70; R600-NEXT: MOV T0.X, -PV.W, 71; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 72; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) 73 %bc = bitcast i32 %in to float 74 %fabs = call float @fabsf(float %bc) 75 %fsub = fsub float -0.000000e+00, %fabs 76 store float %fsub, ptr addrspace(1) %out 77 ret void 78} 79 80define amdgpu_kernel void @fneg_fabsf_f32(ptr addrspace(1) %out, float %in) { 81; R600-LABEL: fneg_fabsf_f32: 82; R600: ; %bb.0: 83; R600-NEXT: ALU 4, @4, KC0[CB0:0-32], KC1[] 84; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 85; R600-NEXT: CF_END 86; R600-NEXT: PAD 87; R600-NEXT: ALU clause starting at 4: 88; R600-NEXT: MOV * T0.W, KC0[2].Z, 89; R600-NEXT: MOV * T0.W, |PV.W|, 90; R600-NEXT: MOV T0.X, -PV.W, 91; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 92; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) 93 %fabs = call float @llvm.fabs.f32(float %in) 94 %fsub = fsub float -0.000000e+00, %fabs 95 store float %fsub, ptr addrspace(1) %out, align 4 96 ret void 97} 98 99define amdgpu_kernel void @v_fneg_fabsf_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) { 100; R600-LABEL: v_fneg_fabsf_f32: 101; R600: ; %bb.0: 102; R600-NEXT: ALU 0, @8, KC0[CB0:0-32], KC1[] 103; R600-NEXT: TEX 0 @6 104; R600-NEXT: ALU 3, @9, KC0[CB0:0-32], KC1[] 105; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1 106; R600-NEXT: CF_END 107; R600-NEXT: PAD 108; R600-NEXT: Fetch clause starting at 6: 109; R600-NEXT: VTX_READ_32 T0.X, T0.X, 0, #1 110; R600-NEXT: ALU clause starting at 8: 111; R600-NEXT: MOV * T0.X, KC0[2].Z, 112; R600-NEXT: ALU clause starting at 9: 113; R600-NEXT: MOV * T0.W, |T0.X|, 114; R600-NEXT: MOV T0.X, -PV.W, 115; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 116; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) 117 %val = load float, ptr addrspace(1) %in, align 4 118 %fabs = call float @llvm.fabs.f32(float %val) 119 %fsub = fsub float -0.000000e+00, %fabs 120 store float %fsub, ptr addrspace(1) %out, align 4 121 ret void 122} 123 124define amdgpu_kernel void @fneg_fabsf_v2f32(ptr addrspace(1) %out, <2 x float> %in) { 125; R600-LABEL: fneg_fabsf_v2f32: 126; R600: ; %bb.0: 127; R600-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[] 128; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1 129; R600-NEXT: CF_END 130; R600-NEXT: PAD 131; R600-NEXT: ALU clause starting at 4: 132; R600-NEXT: MOV T0.W, KC0[3].X, 133; R600-NEXT: MOV * T1.W, KC0[2].W, 134; R600-NEXT: MOV * T0.W, |PV.W|, 135; R600-NEXT: MOV T0.Y, -PV.W, 136; R600-NEXT: MOV * T0.W, |T1.W|, 137; R600-NEXT: MOV T0.X, -PV.W, 138; R600-NEXT: LSHR * T1.X, KC0[2].Y, literal.x, 139; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) 140 %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %in) 141 %fsub = fsub <2 x float> <float -0.000000e+00, float -0.000000e+00>, %fabs 142 store <2 x float> %fsub, ptr addrspace(1) %out 143 ret void 144} 145 146define amdgpu_kernel void @fneg_fabsf_v4f32(ptr addrspace(1) %out, <4 x float> %in) { 147; R600-LABEL: fneg_fabsf_v4f32: 148; R600: ; %bb.0: 149; R600-NEXT: ALU 13, @4, KC0[CB0:0-32], KC1[] 150; R600-NEXT: MEM_RAT_CACHELESS STORE_RAW T2.XYZW, T0.X, 1 151; R600-NEXT: CF_END 152; R600-NEXT: PAD 153; R600-NEXT: ALU clause starting at 4: 154; R600-NEXT: MOV * T0.W, KC0[4].X, 155; R600-NEXT: MOV T0.W, |PV.W|, 156; R600-NEXT: MOV * T1.W, KC0[3].W, 157; R600-NEXT: MOV T0.Z, KC0[3].Z, 158; R600-NEXT: MOV T1.W, |PS|, 159; R600-NEXT: MOV * T2.W, -PV.W, 160; R600-NEXT: MOV T2.Z, -PV.W, 161; R600-NEXT: MOV T0.W, KC0[3].Y, 162; R600-NEXT: MOV * T1.W, |PV.Z|, 163; R600-NEXT: MOV T2.Y, -PS, 164; R600-NEXT: MOV * T0.W, |PV.W|, 165; R600-NEXT: MOV T2.X, -PV.W, 166; R600-NEXT: LSHR * T0.X, KC0[2].Y, literal.x, 167; R600-NEXT: 2(2.802597e-45), 0(0.000000e+00) 168 %fabs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %in) 169 %fsub = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %fabs 170 store <4 x float> %fsub, ptr addrspace(1) %out 171 ret void 172} 173 174declare float @fabsf(float) readnone 175declare float @llvm.fabs.f32(float) readnone 176declare <2 x float> @llvm.fabs.v2f32(<2 x float>) readnone 177declare <4 x float> @llvm.fabs.v4f32(<4 x float>) readnone 178 179!llvm.module.flags = !{!0} 180!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} 181