1; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=EG,FUNC %s 2 3declare i32 @llvm.r600.read.tidig.x() readnone 4 5; FUNC-LABEL: {{^}}s_sub_i32: 6define amdgpu_kernel void @s_sub_i32(ptr addrspace(1) %out, i32 %a, i32 %b) { 7 %result = sub i32 %a, %b 8 store i32 %result, ptr addrspace(1) %out 9 ret void 10} 11 12; FUNC-LABEL: {{^}}s_sub_imm_i32: 13define amdgpu_kernel void @s_sub_imm_i32(ptr addrspace(1) %out, i32 %a) { 14 %result = sub i32 1234, %a 15 store i32 %result, ptr addrspace(1) %out 16 ret void 17} 18 19; FUNC-LABEL: {{^}}test_sub_i32: 20; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 21define amdgpu_kernel void @test_sub_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) { 22 %b_ptr = getelementptr i32, ptr addrspace(1) %in, i32 1 23 %a = load i32, ptr addrspace(1) %in 24 %b = load i32, ptr addrspace(1) %b_ptr 25 %result = sub i32 %a, %b 26 store i32 %result, ptr addrspace(1) %out 27 ret void 28} 29 30; FUNC-LABEL: {{^}}test_sub_imm_i32: 31; EG: SUB_INT 32define amdgpu_kernel void @test_sub_imm_i32(ptr addrspace(1) %out, ptr addrspace(1) %in) { 33 %a = load i32, ptr addrspace(1) %in 34 %result = sub i32 123, %a 35 store i32 %result, ptr addrspace(1) %out 36 ret void 37} 38 39; FUNC-LABEL: {{^}}test_sub_v2i32: 40; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 41; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 42define amdgpu_kernel void @test_sub_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %in) { 43 %b_ptr = getelementptr <2 x i32>, ptr addrspace(1) %in, i32 1 44 %a = load <2 x i32>, ptr addrspace(1) %in 45 %b = load <2 x i32>, ptr addrspace(1) %b_ptr 46 %result = sub <2 x i32> %a, %b 47 store <2 x i32> %result, ptr addrspace(1) %out 48 ret void 49} 50 51; FUNC-LABEL: {{^}}test_sub_v4i32: 52; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 53; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 54; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 55; EG: SUB_INT {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 56define amdgpu_kernel void @test_sub_v4i32(ptr addrspace(1) %out, ptr addrspace(1) %in) { 57 %b_ptr = getelementptr <4 x i32>, ptr addrspace(1) %in, i32 1 58 %a = load <4 x i32>, ptr addrspace(1) %in 59 %b = load <4 x i32>, ptr addrspace(1) %b_ptr 60 %result = sub <4 x i32> %a, %b 61 store <4 x i32> %result, ptr addrspace(1) %out 62 ret void 63} 64 65; FUNC-LABEL: {{^}}test_sub_i16: 66define amdgpu_kernel void @test_sub_i16(ptr addrspace(1) %out, ptr addrspace(1) %in) { 67 %tid = call i32 @llvm.r600.read.tidig.x() 68 %gep = getelementptr i16, ptr addrspace(1) %in, i32 %tid 69 %b_ptr = getelementptr i16, ptr addrspace(1) %gep, i32 1 70 %a = load volatile i16, ptr addrspace(1) %gep 71 %b = load volatile i16, ptr addrspace(1) %b_ptr 72 %result = sub i16 %a, %b 73 store i16 %result, ptr addrspace(1) %out 74 ret void 75} 76 77; FUNC-LABEL: {{^}}test_sub_v2i16: 78define amdgpu_kernel void @test_sub_v2i16(ptr addrspace(1) %out, ptr addrspace(1) %in) { 79 %tid = call i32 @llvm.r600.read.tidig.x() 80 %gep = getelementptr <2 x i16>, ptr addrspace(1) %in, i32 %tid 81 %b_ptr = getelementptr <2 x i16>, ptr addrspace(1) %gep, i16 1 82 %a = load <2 x i16>, ptr addrspace(1) %gep 83 %b = load <2 x i16>, ptr addrspace(1) %b_ptr 84 %result = sub <2 x i16> %a, %b 85 store <2 x i16> %result, ptr addrspace(1) %out 86 ret void 87} 88 89; FUNC-LABEL: {{^}}test_sub_v4i16: 90define amdgpu_kernel void @test_sub_v4i16(ptr addrspace(1) %out, ptr addrspace(1) %in) { 91 %tid = call i32 @llvm.r600.read.tidig.x() 92 %gep = getelementptr <4 x i16>, ptr addrspace(1) %in, i32 %tid 93 %b_ptr = getelementptr <4 x i16>, ptr addrspace(1) %gep, i16 1 94 %a = load <4 x i16>, ptr addrspace(1) %gep 95 %b = load <4 x i16>, ptr addrspace(1) %b_ptr 96 %result = sub <4 x i16> %a, %b 97 store <4 x i16> %result, ptr addrspace(1) %out 98 ret void 99} 100 101; FUNC-LABEL: {{^}}s_sub_i64: 102; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY 103; EG-DAG: SUB_INT {{[* ]*}} 104; EG-DAG: SUBB_UINT 105; EG-DAG: SUB_INT 106; EG-DAG: SUB_INT {{[* ]*}} 107define amdgpu_kernel void @s_sub_i64(ptr addrspace(1) noalias %out, i64 %a, i64 %b) nounwind { 108 %result = sub i64 %a, %b 109 store i64 %result, ptr addrspace(1) %out, align 8 110 ret void 111} 112 113; FUNC-LABEL: {{^}}v_sub_i64: 114; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY 115; EG-DAG: SUB_INT {{[* ]*}} 116; EG-DAG: SUBB_UINT 117; EG-DAG: SUB_INT 118; EG-DAG: SUB_INT {{[* ]*}} 119define amdgpu_kernel void @v_sub_i64(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %inA, ptr addrspace(1) noalias %inB) nounwind { 120 %tid = call i32 @llvm.r600.read.tidig.x() readnone 121 %a_ptr = getelementptr i64, ptr addrspace(1) %inA, i32 %tid 122 %b_ptr = getelementptr i64, ptr addrspace(1) %inB, i32 %tid 123 %a = load i64, ptr addrspace(1) %a_ptr 124 %b = load i64, ptr addrspace(1) %b_ptr 125 %result = sub i64 %a, %b 126 store i64 %result, ptr addrspace(1) %out, align 8 127 ret void 128} 129 130; FUNC-LABEL: {{^}}v_test_sub_v2i64: 131define amdgpu_kernel void @v_test_sub_v2i64(ptr addrspace(1) %out, ptr addrspace(1) noalias %inA, ptr addrspace(1) noalias %inB) { 132 %tid = call i32 @llvm.r600.read.tidig.x() readnone 133 %a_ptr = getelementptr <2 x i64>, ptr addrspace(1) %inA, i32 %tid 134 %b_ptr = getelementptr <2 x i64>, ptr addrspace(1) %inB, i32 %tid 135 %a = load <2 x i64>, ptr addrspace(1) %a_ptr 136 %b = load <2 x i64>, ptr addrspace(1) %b_ptr 137 %result = sub <2 x i64> %a, %b 138 store <2 x i64> %result, ptr addrspace(1) %out 139 ret void 140} 141 142; FUNC-LABEL: {{^}}v_test_sub_v4i64: 143define amdgpu_kernel void @v_test_sub_v4i64(ptr addrspace(1) %out, ptr addrspace(1) noalias %inA, ptr addrspace(1) noalias %inB) { 144 %tid = call i32 @llvm.r600.read.tidig.x() readnone 145 %a_ptr = getelementptr <4 x i64>, ptr addrspace(1) %inA, i32 %tid 146 %b_ptr = getelementptr <4 x i64>, ptr addrspace(1) %inB, i32 %tid 147 %a = load <4 x i64>, ptr addrspace(1) %a_ptr 148 %b = load <4 x i64>, ptr addrspace(1) %b_ptr 149 %result = sub <4 x i64> %a, %b 150 store <4 x i64> %result, ptr addrspace(1) %out 151 ret void 152} 153