xref: /llvm-project/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-short-clamp.ll (revision fe8335babba1725e18d6ea94073c3dbb92958bfa)
1; RUN: llc -global-isel -mcpu=tahiti -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX678,GFX6789 %s
2; RUN: llc -global-isel -mcpu=gfx900 -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9,GFX6789 %s
3; RUN: llc -global-isel -mcpu=gfx1010 -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX10 %s
4; RUN: llc -global-isel -mcpu=gfx1100 -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX10 %s
5
6declare i64 @llvm.smax.i64(i64, i64)
7declare i64 @llvm.smin.i64(i64, i64)
8
9; GCN-LABEL: {{^}}v_clamp_i64_i16
10; GFX678: v_cvt_pk_i16_i32_e32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
11; GFX9: v_cvt_pk_i16_i32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
12; GFX6789: v_mov_b32_e32 [[B]], 0xffff8000
13; GFX6789: v_mov_b32_e32 [[C:v[0-9]+]], 0x7fff
14; GFX6789: v_med3_i32 [[A]], [[B]], [[A]], [[C]]
15; GFX10: v_cvt_pk_i16_i32{{(_e64)?}} [[A:v[0-9]+]], {{v[0-9]+}}, [[B:v[0-9]+]]
16; GFX10: v_mov_b32_e32 [[B]], 0x7fff
17; GFX10: v_med3_i32 [[A]], 0xffff8000, [[A]], [[B]]
18define i16 @v_clamp_i64_i16(i64 %in) #0 {
19entry:
20  %max = call i64 @llvm.smax.i64(i64 %in, i64 -32768)
21  %min = call i64 @llvm.smin.i64(i64 %max, i64 32767)
22  %result = trunc i64 %min to i16
23  ret i16 %result
24}
25
26; GCN-LABEL: {{^}}v_clamp_i64_i16_reverse
27; GFX678: v_cvt_pk_i16_i32_e32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
28; GFX9: v_cvt_pk_i16_i32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
29; GFX6789: v_mov_b32_e32 [[B]], 0xffff8000
30; GFX6789: v_mov_b32_e32 [[C:v[0-9]+]], 0x7fff
31; GFX6789: v_med3_i32 [[A]], [[B]], [[A]], [[C]]
32; GFX10: v_cvt_pk_i16_i32{{(_e64)?}} [[A:v[0-9]+]], {{v[0-9]+}}, [[B:v[0-9]+]]
33; GFX10: v_mov_b32_e32 [[B]], 0x7fff
34; GFX10: v_med3_i32 [[A]], 0xffff8000, [[A]], [[B]]
35define i16 @v_clamp_i64_i16_reverse(i64 %in) #0 {
36entry:
37  %min = call i64 @llvm.smin.i64(i64 %in, i64 32767)
38  %max = call i64 @llvm.smax.i64(i64 %min, i64 -32768)
39  %result = trunc i64 %max to i16
40  ret i16 %result
41}
42
43; GCN-LABEL: {{^}}v_clamp_i64_i16_invalid_lower
44; GFX6789: v_mov_b32_e32 v{{[0-9]+}}, 0x8001
45; GFX6789: v_mov_b32_e32 [[B:v[0-9]+]], 0x8001
46; GFX6789: v_cndmask_b32_e32 [[A:v[0-9]+]], [[B]], [[A]], vcc
47; GFX6789: v_cndmask_b32_e32 [[C:v[0-9]+]], 0, [[C]], vcc
48
49; GFX10: v_{{(dual_)?}}cndmask_b32{{(_e32)?}} [[A:v[0-9]+]], 0x8001, [[A]]
50; GFX10: v_cndmask_b32_e32 [[B:v[0-9]+]], 0, [[B]], vcc_lo
51define i16 @v_clamp_i64_i16_invalid_lower(i64 %in) #0 {
52entry:
53  %min = call i64 @llvm.smin.i64(i64 %in, i64 32769)
54  %max = call i64 @llvm.smax.i64(i64 %min, i64 -32768)
55  %result = trunc i64 %max to i16
56  ret i16 %result
57}
58
59; GCN-LABEL: {{^}}v_clamp_i64_i16_invalid_lower_and_higher
60; GFX6789: v_mov_b32_e32 v{{[0-9]+}}, 0x8000
61; GFX6789: v_mov_b32_e32 [[B:v[0-9]+]], 0x8000
62; GFX6789: v_cndmask_b32_e32 [[A:v[0-9]+]], [[B]], [[A]], vcc
63; GFX10: v_cndmask_b32_e32 [[A:v[0-9]+]], 0x8000, [[A]], vcc_lo
64define i16 @v_clamp_i64_i16_invalid_lower_and_higher(i64 %in) #0 {
65entry:
66  %max = call i64 @llvm.smax.i64(i64 %in, i64 -32769)
67  %min = call i64 @llvm.smin.i64(i64 %max, i64 32768)
68  %result = trunc i64 %min to i16
69  ret i16 %result
70}
71
72; GCN-LABEL: {{^}}v_clamp_i64_i16_lower_than_short
73; GFX678: v_cvt_pk_i16_i32_e32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
74; GFX9: v_cvt_pk_i16_i32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
75; GFX6789: v_mov_b32_e32 [[B]], 0xffffff01
76; GFX6789: v_mov_b32_e32 [[C:v[0-9]+]], 0x100
77; GFX6789: v_med3_i32 [[A]], [[B]], [[A]], [[C]]
78; GFX10: v_cvt_pk_i16_i32{{(_e64)?}} [[A:v[0-9]+]], {{v[0-9]+}}, [[B:v[0-9]+]]
79; GFX10: v_mov_b32_e32 [[B]], 0x100
80; GFX10: v_med3_i32 [[A]], 0xffffff01, [[A]], [[B]]
81define i16 @v_clamp_i64_i16_lower_than_short(i64 %in) #0 {
82entry:
83  %min = call i64 @llvm.smin.i64(i64 %in, i64 256)
84  %max = call i64 @llvm.smax.i64(i64 %min, i64 -255)
85  %result = trunc i64 %max to i16
86  ret i16 %result
87}
88
89; GCN-LABEL: {{^}}v_clamp_i64_i16_lower_than_short_reverse
90; GFX678: v_cvt_pk_i16_i32_e32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
91; GFX9: v_cvt_pk_i16_i32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]]
92; GFX6789: v_mov_b32_e32 [[B]], 0xffffff01
93; GFX6789: v_mov_b32_e32 [[C:v[0-9]+]], 0x100
94; GFX6789: v_med3_i32 [[A]], [[B]], [[A]], [[C]]
95; GFX10: v_cvt_pk_i16_i32{{(_e64)?}} [[A:v[0-9]+]], {{v[0-9]+}}, [[B:v[0-9]+]]
96; GFX10: v_mov_b32_e32 [[B]], 0x100
97; GFX10: v_med3_i32 [[A]], 0xffffff01, [[A]], [[B]]
98define i16 @v_clamp_i64_i16_lower_than_short_reverse(i64 %in) #0 {
99entry:
100  %max = call i64 @llvm.smax.i64(i64 %in, i64 -255)
101  %min = call i64 @llvm.smin.i64(i64 %max, i64 256)
102  %result = trunc i64 %min to i16
103  ret i16 %result
104}
105
106; GCN-LABEL: {{^}}v_clamp_i64_i16_zero
107; GFX6789: v_mov_b32_e32 v0, 0
108; GFX10: v_mov_b32_e32 v0, 0
109define i16 @v_clamp_i64_i16_zero(i64 %in) #0 {
110entry:
111  %max = call i64 @llvm.smax.i64(i64 %in, i64 0)
112  %min = call i64 @llvm.smin.i64(i64 %max, i64 0)
113  %result = trunc i64 %min to i16
114  ret i16 %result
115}
116