1; RUN: llc -global-isel -mcpu=tahiti -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX678,GFX6789 %s 2; RUN: llc -global-isel -mcpu=gfx900 -mtriple=amdgcn-amd-amdhsa -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX9,GFX6789 %s 3; RUN: llc -global-isel -mcpu=gfx1010 -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX10 %s 4; RUN: llc -global-isel -mcpu=gfx1100 -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX10 %s 5 6declare i64 @llvm.smax.i64(i64, i64) 7declare i64 @llvm.smin.i64(i64, i64) 8 9; GCN-LABEL: {{^}}v_clamp_i64_i16 10; GFX678: v_cvt_pk_i16_i32_e32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]] 11; GFX9: v_cvt_pk_i16_i32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]] 12; GFX6789: v_mov_b32_e32 [[B]], 0xffff8000 13; GFX6789: v_mov_b32_e32 [[C:v[0-9]+]], 0x7fff 14; GFX6789: v_med3_i32 [[A]], [[B]], [[A]], [[C]] 15; GFX10: v_cvt_pk_i16_i32{{(_e64)?}} [[A:v[0-9]+]], {{v[0-9]+}}, [[B:v[0-9]+]] 16; GFX10: v_mov_b32_e32 [[B]], 0x7fff 17; GFX10: v_med3_i32 [[A]], 0xffff8000, [[A]], [[B]] 18define i16 @v_clamp_i64_i16(i64 %in) #0 { 19entry: 20 %max = call i64 @llvm.smax.i64(i64 %in, i64 -32768) 21 %min = call i64 @llvm.smin.i64(i64 %max, i64 32767) 22 %result = trunc i64 %min to i16 23 ret i16 %result 24} 25 26; GCN-LABEL: {{^}}v_clamp_i64_i16_reverse 27; GFX678: v_cvt_pk_i16_i32_e32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]] 28; GFX9: v_cvt_pk_i16_i32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]] 29; GFX6789: v_mov_b32_e32 [[B]], 0xffff8000 30; GFX6789: v_mov_b32_e32 [[C:v[0-9]+]], 0x7fff 31; GFX6789: v_med3_i32 [[A]], [[B]], [[A]], [[C]] 32; GFX10: v_cvt_pk_i16_i32{{(_e64)?}} [[A:v[0-9]+]], {{v[0-9]+}}, [[B:v[0-9]+]] 33; GFX10: v_mov_b32_e32 [[B]], 0x7fff 34; GFX10: v_med3_i32 [[A]], 0xffff8000, [[A]], [[B]] 35define i16 @v_clamp_i64_i16_reverse(i64 %in) #0 { 36entry: 37 %min = call i64 @llvm.smin.i64(i64 %in, i64 32767) 38 %max = call i64 @llvm.smax.i64(i64 %min, i64 -32768) 39 %result = trunc i64 %max to i16 40 ret i16 %result 41} 42 43; GCN-LABEL: {{^}}v_clamp_i64_i16_invalid_lower 44; GFX6789: v_mov_b32_e32 v{{[0-9]+}}, 0x8001 45; GFX6789: v_mov_b32_e32 [[B:v[0-9]+]], 0x8001 46; GFX6789: v_cndmask_b32_e32 [[A:v[0-9]+]], [[B]], [[A]], vcc 47; GFX6789: v_cndmask_b32_e32 [[C:v[0-9]+]], 0, [[C]], vcc 48 49; GFX10: v_{{(dual_)?}}cndmask_b32{{(_e32)?}} [[A:v[0-9]+]], 0x8001, [[A]] 50; GFX10: v_cndmask_b32_e32 [[B:v[0-9]+]], 0, [[B]], vcc_lo 51define i16 @v_clamp_i64_i16_invalid_lower(i64 %in) #0 { 52entry: 53 %min = call i64 @llvm.smin.i64(i64 %in, i64 32769) 54 %max = call i64 @llvm.smax.i64(i64 %min, i64 -32768) 55 %result = trunc i64 %max to i16 56 ret i16 %result 57} 58 59; GCN-LABEL: {{^}}v_clamp_i64_i16_invalid_lower_and_higher 60; GFX6789: v_mov_b32_e32 v{{[0-9]+}}, 0x8000 61; GFX6789: v_mov_b32_e32 [[B:v[0-9]+]], 0x8000 62; GFX6789: v_cndmask_b32_e32 [[A:v[0-9]+]], [[B]], [[A]], vcc 63; GFX10: v_cndmask_b32_e32 [[A:v[0-9]+]], 0x8000, [[A]], vcc_lo 64define i16 @v_clamp_i64_i16_invalid_lower_and_higher(i64 %in) #0 { 65entry: 66 %max = call i64 @llvm.smax.i64(i64 %in, i64 -32769) 67 %min = call i64 @llvm.smin.i64(i64 %max, i64 32768) 68 %result = trunc i64 %min to i16 69 ret i16 %result 70} 71 72; GCN-LABEL: {{^}}v_clamp_i64_i16_lower_than_short 73; GFX678: v_cvt_pk_i16_i32_e32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]] 74; GFX9: v_cvt_pk_i16_i32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]] 75; GFX6789: v_mov_b32_e32 [[B]], 0xffffff01 76; GFX6789: v_mov_b32_e32 [[C:v[0-9]+]], 0x100 77; GFX6789: v_med3_i32 [[A]], [[B]], [[A]], [[C]] 78; GFX10: v_cvt_pk_i16_i32{{(_e64)?}} [[A:v[0-9]+]], {{v[0-9]+}}, [[B:v[0-9]+]] 79; GFX10: v_mov_b32_e32 [[B]], 0x100 80; GFX10: v_med3_i32 [[A]], 0xffffff01, [[A]], [[B]] 81define i16 @v_clamp_i64_i16_lower_than_short(i64 %in) #0 { 82entry: 83 %min = call i64 @llvm.smin.i64(i64 %in, i64 256) 84 %max = call i64 @llvm.smax.i64(i64 %min, i64 -255) 85 %result = trunc i64 %max to i16 86 ret i16 %result 87} 88 89; GCN-LABEL: {{^}}v_clamp_i64_i16_lower_than_short_reverse 90; GFX678: v_cvt_pk_i16_i32_e32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]] 91; GFX9: v_cvt_pk_i16_i32 [[A:v[0-9]+]], [[A]], [[B:v[0-9]+]] 92; GFX6789: v_mov_b32_e32 [[B]], 0xffffff01 93; GFX6789: v_mov_b32_e32 [[C:v[0-9]+]], 0x100 94; GFX6789: v_med3_i32 [[A]], [[B]], [[A]], [[C]] 95; GFX10: v_cvt_pk_i16_i32{{(_e64)?}} [[A:v[0-9]+]], {{v[0-9]+}}, [[B:v[0-9]+]] 96; GFX10: v_mov_b32_e32 [[B]], 0x100 97; GFX10: v_med3_i32 [[A]], 0xffffff01, [[A]], [[B]] 98define i16 @v_clamp_i64_i16_lower_than_short_reverse(i64 %in) #0 { 99entry: 100 %max = call i64 @llvm.smax.i64(i64 %in, i64 -255) 101 %min = call i64 @llvm.smin.i64(i64 %max, i64 256) 102 %result = trunc i64 %min to i16 103 ret i16 %result 104} 105 106; GCN-LABEL: {{^}}v_clamp_i64_i16_zero 107; GFX6789: v_mov_b32_e32 v0, 0 108; GFX10: v_mov_b32_e32 v0, 0 109define i16 @v_clamp_i64_i16_zero(i64 %in) #0 { 110entry: 111 %max = call i64 @llvm.smax.i64(i64 %in, i64 0) 112 %min = call i64 @llvm.smin.i64(i64 %max, i64 0) 113 %result = trunc i64 %min to i16 114 ret i16 %result 115} 116