1; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=FUNC %s 2; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=FUNC %s 3; RUN: llc -mtriple=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s 4 5; FUNC-LABEL: {{^}}sext_bool_icmp_eq_0: 6; GCN-NOT: v_cmp 7; GCN: s_cmp_lg_u32 8; GCN: s_cselect_b64 [[CC:[^,]+]], -1, 0 9; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CC]] 10; GCN-NEXT:buffer_store_byte [[RESULT]] 11; GCN-NEXT: s_endpgm 12 13; EG: SETNE_INT * [[CMP:T[0-9]+]].[[CMPCHAN:[XYZW]]], KC0[2].Z, KC0[2].W 14; EG: AND_INT T{{[0-9]+.[XYZW]}}, PS, 1 15define amdgpu_kernel void @sext_bool_icmp_eq_0(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind { 16 %icmp0 = icmp eq i32 %a, %b 17 %ext = sext i1 %icmp0 to i32 18 %icmp1 = icmp eq i32 %ext, 0 19 store i1 %icmp1, ptr addrspace(1) %out 20 ret void 21} 22 23; FUNC-LABEL: {{^}}sext_bool_icmp_ne_0: 24; GCN-NOT: v_cmp 25; GCN: s_cmp_lg_u32 26; GCN: s_cselect_b64 [[CC:[^,]+]], -1, 0 27; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CC]] 28; GCN-NEXT: buffer_store_byte [[RESULT]] 29; GCN-NEXT: s_endpgm 30 31; EG: SETNE_INT * [[CMP:T[0-9]+]].[[CMPCHAN:[XYZW]]], KC0[2].Z, KC0[2].W 32; EG: AND_INT T{{[0-9]+.[XYZW]}}, PS, 1 33define amdgpu_kernel void @sext_bool_icmp_ne_0(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind { 34 %icmp0 = icmp ne i32 %a, %b 35 %ext = sext i1 %icmp0 to i32 36 %icmp1 = icmp ne i32 %ext, 0 37 store i1 %icmp1, ptr addrspace(1) %out 38 ret void 39} 40 41; FUNC-LABEL: {{^}}sext_bool_icmp_eq_neg1: 42; GCN-NOT: v_cmp 43; GCN: s_cmp_eq_u32 44; GCN: s_cselect_b64 [[CC:[^,]+]], -1, 0 45; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CC]] 46; GCN-NEXT: buffer_store_byte [[RESULT]] 47; GCN-NEXT: s_endpgm 48define amdgpu_kernel void @sext_bool_icmp_eq_neg1(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind { 49 %icmp0 = icmp eq i32 %a, %b 50 %ext = sext i1 %icmp0 to i32 51 %icmp1 = icmp eq i32 %ext, -1 52 store i1 %icmp1, ptr addrspace(1) %out 53 ret void 54} 55 56; FUNC-LABEL: {{^}}sext_bool_icmp_ne_neg1: 57; GCN-NOT: v_cmp 58; GCN: s_cmp_eq_u32 59; GCN: s_cselect_b64 [[CC:[^,]+]], -1, 0 60; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CC]] 61; GCN-NEXT: buffer_store_byte [[RESULT]] 62; GCN-NEXT: s_endpgm 63define amdgpu_kernel void @sext_bool_icmp_ne_neg1(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind { 64 %icmp0 = icmp ne i32 %a, %b 65 %ext = sext i1 %icmp0 to i32 66 %icmp1 = icmp ne i32 %ext, -1 67 store i1 %icmp1, ptr addrspace(1) %out 68 ret void 69} 70 71; FUNC-LABEL: {{^}}zext_bool_icmp_eq_0: 72; GCN-NOT: v_cmp 73; GCN: s_cmp_lg_u32 74; GCN: s_cselect_b64 [[CC:[^,]+]], -1, 0 75; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CC]] 76; GCN-NEXT: buffer_store_byte [[RESULT]] 77; GCN-NEXT: s_endpgm 78define amdgpu_kernel void @zext_bool_icmp_eq_0(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind { 79 %icmp0 = icmp eq i32 %a, %b 80 %ext = zext i1 %icmp0 to i32 81 %icmp1 = icmp eq i32 %ext, 0 82 store i1 %icmp1, ptr addrspace(1) %out 83 ret void 84} 85 86; FUNC-LABEL: {{^}}zext_bool_icmp_ne_0: 87; GCN-NOT: v_cmp 88; GCN: s_cmp_lg_u32 89; GCN: s_cselect_b64 [[CC:[^,]+]], -1, 0 90; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CC]] 91; GCN-NEXT: buffer_store_byte [[RESULT]] 92; GCN-NEXT: s_endpgm 93define amdgpu_kernel void @zext_bool_icmp_ne_0(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind { 94 %icmp0 = icmp ne i32 %a, %b 95 %ext = zext i1 %icmp0 to i32 96 %icmp1 = icmp ne i32 %ext, 0 97 store i1 %icmp1, ptr addrspace(1) %out 98 ret void 99} 100 101; FUNC-LABEL: {{^}}zext_bool_icmp_eq_1: 102; GCN-NOT: v_cmp 103; GCN: s_cmp_eq_u32 104; GCN: s_cselect_b64 [[CC:[^,]+]], -1, 0 105; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CC]] 106; GCN-NEXT: buffer_store_byte [[RESULT]] 107; GCN-NEXT: s_endpgm 108define amdgpu_kernel void @zext_bool_icmp_eq_1(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind { 109 %icmp0 = icmp eq i32 %a, %b 110 %ext = zext i1 %icmp0 to i32 111 %icmp1 = icmp eq i32 %ext, 1 112 store i1 %icmp1, ptr addrspace(1) %out 113 ret void 114} 115 116; FUNC-LABEL: {{^}}zext_bool_icmp_ne_1: 117; GCN-NOT: v_cmp 118; GCN: s_cmp_eq_u32 119; GCN: s_cselect_b64 [[CC:[^,]+]], -1, 0 120; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CC]] 121; GCN-NEXT: buffer_store_byte [[RESULT]] 122define amdgpu_kernel void @zext_bool_icmp_ne_1(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind { 123 %icmp0 = icmp ne i32 %a, %b 124 %ext = zext i1 %icmp0 to i32 125 %icmp1 = icmp ne i32 %ext, 1 126 store i1 %icmp1, ptr addrspace(1) %out 127 ret void 128} 129 130; Reduces to false: 131; FUNC-LABEL: {{^}}zext_bool_icmp_eq_neg1: 132; GCN: v_mov_b32_e32 [[TMP:v[0-9]+]], 0{{$}} 133; GCN: buffer_store_byte [[TMP]] 134; GCN-NEXT: s_endpgm 135define amdgpu_kernel void @zext_bool_icmp_eq_neg1(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind { 136 %icmp0 = icmp eq i32 %a, %b 137 %ext = zext i1 %icmp0 to i32 138 %icmp1 = icmp eq i32 %ext, -1 139 store i1 %icmp1, ptr addrspace(1) %out 140 ret void 141} 142 143; Reduces to true: 144; FUNC-LABEL: {{^}}zext_bool_icmp_ne_neg1: 145; GCN: v_mov_b32_e32 [[TMP:v[0-9]+]], 1{{$}} 146; GCN: buffer_store_byte [[TMP]] 147; GCN-NEXT: s_endpgm 148define amdgpu_kernel void @zext_bool_icmp_ne_neg1(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind { 149 %icmp0 = icmp ne i32 %a, %b 150 %ext = zext i1 %icmp0 to i32 151 %icmp1 = icmp ne i32 %ext, -1 152 store i1 %icmp1, ptr addrspace(1) %out 153 ret void 154} 155 156; FUNC-LABEL: {{^}}cmp_zext_k_i8max: 157; GCN: s_load_dword [[VALUE:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}} 158; GCN-DAG: s_and_b32 [[B:s[0-9]+]], [[VALUE]], 0xff 159; GCN: s_cmpk_lg_i32 [[B]], 0xff 160; GCN: s_cselect_b64 [[CC:[^,]+]], -1, 0 161 162; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CC]] 163; GCN: buffer_store_byte [[RESULT]] 164; GCN: s_endpgm 165define amdgpu_kernel void @cmp_zext_k_i8max(ptr addrspace(1) %out, i8 %b) nounwind { 166 %b.ext = zext i8 %b to i32 167 %icmp0 = icmp ne i32 %b.ext, 255 168 store i1 %icmp0, ptr addrspace(1) %out 169 ret void 170} 171 172; FUNC-LABEL: {{^}}cmp_sext_k_neg1: 173; GCN: buffer_load_sbyte [[B:v[0-9]+]] 174; GCN: v_cmp_ne_u32_e32 vcc, -1, [[B]]{{$}} 175; GCN-NEXT: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, vcc 176; GCN: buffer_store_byte [[RESULT]] 177; GCN: s_endpgm 178define amdgpu_kernel void @cmp_sext_k_neg1(ptr addrspace(1) %out, ptr addrspace(1) %b.ptr) nounwind { 179 %b = load i8, ptr addrspace(1) %b.ptr 180 %b.ext = sext i8 %b to i32 181 %icmp0 = icmp ne i32 %b.ext, -1 182 store i1 %icmp0, ptr addrspace(1) %out 183 ret void 184} 185 186; FUNC-LABEL: {{^}}v_cmp_sext_k_neg1_i8_sext_arg: 187; GCN: v_cmp_ne_u32_e32 vcc, -1, v0 188; GCN: v_cndmask_b32_e64 [[SELECT:v[0-9]+]], 0, 1, vcc 189; GCN: buffer_store_byte [[SELECT]] 190define void @v_cmp_sext_k_neg1_i8_sext_arg(i8 signext %b) nounwind { 191 %b.ext = sext i8 %b to i32 192 %icmp0 = icmp ne i32 %b.ext, -1 193 store i1 %icmp0, ptr addrspace(1) undef 194 ret void 195} 196 197; FIXME: This ends up doing a buffer_load_ubyte, and and compare to 198; 255. Seems to be because of ordering problems when not allowing load widths to be reduced. 199; Should do a buffer_load_sbyte and compare with -1 200 201; FUNC-LABEL: {{^}}cmp_sext_k_neg1_i8_arg: 202; SI: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9]+:[0-9]+}}], 0xb 203; VI: s_load_dword [[VAL:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c 204; GCN-DAG: s_and_b32 [[B:s[0-9]+]], [[VAL]], 0xff 205; GCN: s_cmpk_lg_i32 [[B]], 0xff{{$}} 206; GCN: s_cselect_b64 [[CC:[^,]+]], -1, 0 207; GCN: v_cndmask_b32_e64 [[RESULT:v[0-9]+]], 0, 1, [[CC]] 208; GCN: buffer_store_byte [[RESULT]] 209; GCN: s_endpgm 210define amdgpu_kernel void @cmp_sext_k_neg1_i8_arg(ptr addrspace(1) %out, i8 %b) nounwind { 211 %b.ext = sext i8 %b to i32 212 %icmp0 = icmp ne i32 %b.ext, -1 213 store i1 %icmp0, ptr addrspace(1) %out 214 ret void 215} 216 217; FUNC-LABEL: {{^}}cmp_zext_k_neg1: 218; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}} 219; GCN: buffer_store_byte [[RESULT]] 220; GCN: s_endpgm 221define amdgpu_kernel void @cmp_zext_k_neg1(ptr addrspace(1) %out, i8 %b) nounwind { 222 %b.ext = zext i8 %b to i32 223 %icmp0 = icmp ne i32 %b.ext, -1 224 store i1 %icmp0, ptr addrspace(1) %out 225 ret void 226} 227 228; FUNC-LABEL: {{^}}zext_bool_icmp_ne_k: 229; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 1{{$}} 230; GCN: buffer_store_byte [[RESULT]] 231; GCN-NEXT: s_endpgm 232define amdgpu_kernel void @zext_bool_icmp_ne_k(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind { 233 %icmp0 = icmp ne i32 %a, %b 234 %ext = zext i1 %icmp0 to i32 235 %icmp1 = icmp ne i32 %ext, 2 236 store i1 %icmp1, ptr addrspace(1) %out 237 ret void 238} 239 240; FUNC-LABEL: {{^}}zext_bool_icmp_eq_k: 241; GCN: v_mov_b32_e32 [[RESULT:v[0-9]+]], 0{{$}} 242; GCN: buffer_store_byte [[RESULT]] 243; GCN-NEXT: s_endpgm 244define amdgpu_kernel void @zext_bool_icmp_eq_k(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind { 245 %icmp0 = icmp ne i32 %a, %b 246 %ext = zext i1 %icmp0 to i32 247 %icmp1 = icmp eq i32 %ext, 2 248 store i1 %icmp1, ptr addrspace(1) %out 249 ret void 250} 251 252; FIXME: These cases should really be able fold to true/false in 253; DAGCombiner 254 255; This really folds away to false 256; FUNC-LABEL: {{^}}sext_bool_icmp_eq_1: 257; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0{{$}} 258; GCN: buffer_store_byte [[K]] 259define amdgpu_kernel void @sext_bool_icmp_eq_1(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind { 260 %icmp0 = icmp eq i32 %a, %b 261 %ext = sext i1 %icmp0 to i32 262 %icmp1 = icmp eq i32 %ext, 1 263 store i1 %icmp1, ptr addrspace(1) %out 264 ret void 265} 266 267; FUNC-LABEL: {{^}}sext_bool_icmp_ne_1: 268; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 1{{$}} 269; GCN: buffer_store_byte [[K]] 270define amdgpu_kernel void @sext_bool_icmp_ne_1(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind { 271 %icmp0 = icmp ne i32 %a, %b 272 %ext = sext i1 %icmp0 to i32 273 %icmp1 = icmp ne i32 %ext, 1 274 store i1 %icmp1, ptr addrspace(1) %out 275 ret void 276} 277 278; FUNC-LABEL: {{^}}sext_bool_icmp_ne_k: 279; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 1{{$}} 280; GCN: buffer_store_byte [[K]] 281define amdgpu_kernel void @sext_bool_icmp_ne_k(ptr addrspace(1) %out, i32 %a, i32 %b) nounwind { 282 %icmp0 = icmp ne i32 %a, %b 283 %ext = sext i1 %icmp0 to i32 284 %icmp1 = icmp ne i32 %ext, 2 285 store i1 %icmp1, ptr addrspace(1) %out 286 ret void 287} 288