1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; FIXME: globalisel crashes on v3 3; RUN: llc -mtriple=amdgcn -mcpu=gfx704 < %s | FileCheck --check-prefixes=GFX7CHECK,GFX7SELDAG %s 4; xUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx704 < %s | FileCheck --check-prefixes=GFX7CHECK,GFX7GLISEL %s 5; RUN: llc -mtriple=amdgcn -mcpu=gfx803 < %s | FileCheck --check-prefixes=GFX8CHECK,GFX8SELDAG %s 6; xUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx803 < %s | FileCheck --check-prefixes=GFX8CHECK,GFX8GLISEL %s 7; RUN: llc -mtriple=amdgcn -mcpu=gfx908 < %s | FileCheck --check-prefixes=GFX9CHECK,GFX9SELDAG %s 8; xUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx908 < %s | FileCheck --check-prefixes=GFX9CHECK,GFX9GLISEL %s 9; RUN: llc -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK,GFX10SELDAG %s 10; xUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1031 < %s | FileCheck --check-prefixes=GFX10CHECK,GFX10GLISEL %s 11; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11SELDAG %s 12; xUN: llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck --check-prefixes=GFX11CHECK,GFX11GLISEL %s 13 14define amdgpu_kernel void @sgpr_isnan_bf16(ptr addrspace(1) %out, bfloat %x) { 15; GFX7CHECK-LABEL: sgpr_isnan_bf16: 16; GFX7CHECK: ; %bb.0: 17; GFX7CHECK-NEXT: s_load_dword s6, s[4:5], 0xb 18; GFX7CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 19; GFX7CHECK-NEXT: s_mov_b32 s3, 0xf000 20; GFX7CHECK-NEXT: s_mov_b32 s2, -1 21; GFX7CHECK-NEXT: s_waitcnt lgkmcnt(0) 22; GFX7CHECK-NEXT: s_and_b32 s4, s6, 0x7fff 23; GFX7CHECK-NEXT: s_cmpk_gt_i32 s4, 0x7f80 24; GFX7CHECK-NEXT: s_cselect_b64 s[4:5], -1, 0 25; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, -1, s[4:5] 26; GFX7CHECK-NEXT: buffer_store_dword v0, off, s[0:3], 0 27; GFX7CHECK-NEXT: s_endpgm 28; 29; GFX8CHECK-LABEL: sgpr_isnan_bf16: 30; GFX8CHECK: ; %bb.0: 31; GFX8CHECK-NEXT: s_load_dword s2, s[4:5], 0x2c 32; GFX8CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 33; GFX8CHECK-NEXT: s_waitcnt lgkmcnt(0) 34; GFX8CHECK-NEXT: s_and_b32 s2, s2, 0x7fff 35; GFX8CHECK-NEXT: s_cmpk_gt_i32 s2, 0x7f80 36; GFX8CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0 37; GFX8CHECK-NEXT: v_mov_b32_e32 v0, s0 38; GFX8CHECK-NEXT: v_cndmask_b32_e64 v2, 0, -1, s[2:3] 39; GFX8CHECK-NEXT: v_mov_b32_e32 v1, s1 40; GFX8CHECK-NEXT: flat_store_dword v[0:1], v2 41; GFX8CHECK-NEXT: s_endpgm 42; 43; GFX9CHECK-LABEL: sgpr_isnan_bf16: 44; GFX9CHECK: ; %bb.0: 45; GFX9CHECK-NEXT: s_load_dword s2, s[4:5], 0x2c 46; GFX9CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 47; GFX9CHECK-NEXT: v_mov_b32_e32 v0, 0 48; GFX9CHECK-NEXT: s_waitcnt lgkmcnt(0) 49; GFX9CHECK-NEXT: s_and_b32 s2, s2, 0x7fff 50; GFX9CHECK-NEXT: s_cmpk_gt_i32 s2, 0x7f80 51; GFX9CHECK-NEXT: s_cselect_b64 s[2:3], -1, 0 52; GFX9CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, s[2:3] 53; GFX9CHECK-NEXT: global_store_dword v0, v1, s[0:1] 54; GFX9CHECK-NEXT: s_endpgm 55; 56; GFX10CHECK-LABEL: sgpr_isnan_bf16: 57; GFX10CHECK: ; %bb.0: 58; GFX10CHECK-NEXT: s_clause 0x1 59; GFX10CHECK-NEXT: s_load_dword s2, s[4:5], 0x2c 60; GFX10CHECK-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 61; GFX10CHECK-NEXT: v_mov_b32_e32 v0, 0 62; GFX10CHECK-NEXT: s_waitcnt lgkmcnt(0) 63; GFX10CHECK-NEXT: s_and_b32 s2, s2, 0x7fff 64; GFX10CHECK-NEXT: s_cmpk_gt_i32 s2, 0x7f80 65; GFX10CHECK-NEXT: s_cselect_b32 s2, -1, 0 66; GFX10CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 67; GFX10CHECK-NEXT: global_store_dword v0, v1, s[0:1] 68; GFX10CHECK-NEXT: s_endpgm 69; 70; GFX11CHECK-LABEL: sgpr_isnan_bf16: 71; GFX11CHECK: ; %bb.0: 72; GFX11CHECK-NEXT: s_clause 0x1 73; GFX11CHECK-NEXT: s_load_b32 s2, s[4:5], 0x2c 74; GFX11CHECK-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 75; GFX11CHECK-NEXT: v_mov_b32_e32 v0, 0 76; GFX11CHECK-NEXT: s_waitcnt lgkmcnt(0) 77; GFX11CHECK-NEXT: s_and_b32 s2, s2, 0x7fff 78; GFX11CHECK-NEXT: s_cmpk_gt_i32 s2, 0x7f80 79; GFX11CHECK-NEXT: s_cselect_b32 s2, -1, 0 80; GFX11CHECK-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 81; GFX11CHECK-NEXT: global_store_b32 v0, v1, s[0:1] 82; GFX11CHECK-NEXT: s_endpgm 83 %result = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 3) 84 %sext = sext i1 %result to i32 85 store i32 %sext, ptr addrspace(1) %out, align 4 86 ret void 87} 88 89define i1 @zeromask_bf16(bfloat %x) nounwind { 90; GFX7CHECK-LABEL: zeromask_bf16: 91; GFX7CHECK: ; %bb.0: 92; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 93; GFX7CHECK-NEXT: v_mov_b32_e32 v0, 0 94; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] 95; 96; GFX8CHECK-LABEL: zeromask_bf16: 97; GFX8CHECK: ; %bb.0: 98; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 99; GFX8CHECK-NEXT: v_mov_b32_e32 v0, 0 100; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] 101; 102; GFX9CHECK-LABEL: zeromask_bf16: 103; GFX9CHECK: ; %bb.0: 104; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 105; GFX9CHECK-NEXT: v_mov_b32_e32 v0, 0 106; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] 107; 108; GFX10CHECK-LABEL: zeromask_bf16: 109; GFX10CHECK: ; %bb.0: 110; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 111; GFX10CHECK-NEXT: v_mov_b32_e32 v0, 0 112; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] 113; 114; GFX11CHECK-LABEL: zeromask_bf16: 115; GFX11CHECK: ; %bb.0: 116; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 117; GFX11CHECK-NEXT: v_mov_b32_e32 v0, 0 118; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] 119 %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 0) 120 ret i1 %1 121} 122 123; FIXME: DAG and GlobalISel return different values for i1 true 124define i1 @allflags_bf16(bfloat %x) nounwind { 125; GFX7CHECK-LABEL: allflags_bf16: 126; GFX7CHECK: ; %bb.0: 127; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 128; GFX7CHECK-NEXT: v_mov_b32_e32 v0, 1 129; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] 130; 131; GFX8CHECK-LABEL: allflags_bf16: 132; GFX8CHECK: ; %bb.0: 133; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 134; GFX8CHECK-NEXT: v_mov_b32_e32 v0, 1 135; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] 136; 137; GFX9CHECK-LABEL: allflags_bf16: 138; GFX9CHECK: ; %bb.0: 139; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 140; GFX9CHECK-NEXT: v_mov_b32_e32 v0, 1 141; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] 142; 143; GFX10CHECK-LABEL: allflags_bf16: 144; GFX10CHECK: ; %bb.0: 145; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 146; GFX10CHECK-NEXT: v_mov_b32_e32 v0, 1 147; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] 148; 149; GFX11CHECK-LABEL: allflags_bf16: 150; GFX11CHECK: ; %bb.0: 151; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 152; GFX11CHECK-NEXT: v_mov_b32_e32 v0, 1 153; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] 154 %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 1023) ; 0x3ff 155 ret i1 %1 156} 157 158define i1 @snan_bf16(bfloat %x) nounwind { 159; GFX7CHECK-LABEL: snan_bf16: 160; GFX7CHECK: ; %bb.0: 161; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 162; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 163; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15 164; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7fc0 165; GFX7CHECK-NEXT: v_cmp_gt_i32_e32 vcc, s4, v0 166; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f80 167; GFX7CHECK-NEXT: v_cmp_lt_i32_e64 s[4:5], s4, v0 168; GFX7CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc 169; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 170; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] 171; 172; GFX8CHECK-LABEL: snan_bf16: 173; GFX8CHECK: ; %bb.0: 174; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 175; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 176; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7fc0 177; GFX8CHECK-NEXT: v_cmp_gt_i16_e32 vcc, s4, v0 178; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f80 179; GFX8CHECK-NEXT: v_cmp_lt_i16_e64 s[4:5], s4, v0 180; GFX8CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc 181; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 182; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] 183; 184; GFX9CHECK-LABEL: snan_bf16: 185; GFX9CHECK: ; %bb.0: 186; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 187; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 188; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7fc0 189; GFX9CHECK-NEXT: v_cmp_gt_i16_e32 vcc, s4, v0 190; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f80 191; GFX9CHECK-NEXT: v_cmp_lt_i16_e64 s[4:5], s4, v0 192; GFX9CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc 193; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 194; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] 195; 196; GFX10CHECK-LABEL: snan_bf16: 197; GFX10CHECK: ; %bb.0: 198; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 199; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 200; GFX10CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0x7fc0, v0 201; GFX10CHECK-NEXT: v_cmp_lt_i16_e64 s4, 0x7f80, v0 202; GFX10CHECK-NEXT: s_and_b32 s4, s4, vcc_lo 203; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 204; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] 205; 206; GFX11CHECK-LABEL: snan_bf16: 207; GFX11CHECK: ; %bb.0: 208; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 209; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 210; GFX11CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0x7fc0, v0 211; GFX11CHECK-NEXT: v_cmp_lt_i16_e64 s0, 0x7f80, v0 212; GFX11CHECK-NEXT: s_and_b32 s0, s0, vcc_lo 213; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 214; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] 215 %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 1) ; 0x001 216 ret i1 %1 217} 218 219define i1 @qnan_bf16(bfloat %x) nounwind { 220; GFX7CHECK-LABEL: qnan_bf16: 221; GFX7CHECK: ; %bb.0: 222; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 223; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 224; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15 225; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7fbf 226; GFX7CHECK-NEXT: v_cmp_lt_i32_e32 vcc, s4, v0 227; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 228; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] 229; 230; GFX8CHECK-LABEL: qnan_bf16: 231; GFX8CHECK: ; %bb.0: 232; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 233; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 234; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7fbf 235; GFX8CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v0 236; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 237; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] 238; 239; GFX9CHECK-LABEL: qnan_bf16: 240; GFX9CHECK: ; %bb.0: 241; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 242; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 243; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7fbf 244; GFX9CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v0 245; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 246; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] 247; 248; GFX10CHECK-LABEL: qnan_bf16: 249; GFX10CHECK: ; %bb.0: 250; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 251; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 252; GFX10CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7fbf, v0 253; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 254; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] 255; 256; GFX11CHECK-LABEL: qnan_bf16: 257; GFX11CHECK: ; %bb.0: 258; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 259; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 260; GFX11CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7fbf, v0 261; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 262; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] 263 %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 2) ; 0x002 264 ret i1 %1 265} 266 267define i1 @posinf_bf16(bfloat %x) nounwind { 268; GFX7CHECK-LABEL: posinf_bf16: 269; GFX7CHECK: ; %bb.0: 270; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 271; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 272; GFX7CHECK-NEXT: v_lshrrev_b32_e32 v0, 16, v0 273; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f80 274; GFX7CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s4, v0 275; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 276; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] 277; 278; GFX8CHECK-LABEL: posinf_bf16: 279; GFX8CHECK: ; %bb.0: 280; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 281; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f80 282; GFX8CHECK-NEXT: v_cmp_eq_u16_e32 vcc, s4, v0 283; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 284; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] 285; 286; GFX9CHECK-LABEL: posinf_bf16: 287; GFX9CHECK: ; %bb.0: 288; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 289; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f80 290; GFX9CHECK-NEXT: v_cmp_eq_u16_e32 vcc, s4, v0 291; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 292; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] 293; 294; GFX10CHECK-LABEL: posinf_bf16: 295; GFX10CHECK: ; %bb.0: 296; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 297; GFX10CHECK-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x7f80, v0 298; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 299; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] 300; 301; GFX11CHECK-LABEL: posinf_bf16: 302; GFX11CHECK: ; %bb.0: 303; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 304; GFX11CHECK-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x7f80, v0 305; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 306; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] 307 %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 512) ; 0x200 308 ret i1 %1 309} 310 311define i1 @neginf_bf16(bfloat %x) nounwind { 312; GFX7CHECK-LABEL: neginf_bf16: 313; GFX7CHECK: ; %bb.0: 314; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 315; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 316; GFX7CHECK-NEXT: v_lshrrev_b32_e32 v0, 16, v0 317; GFX7CHECK-NEXT: s_mov_b32 s4, 0xff80 318; GFX7CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s4, v0 319; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 320; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] 321; 322; GFX8CHECK-LABEL: neginf_bf16: 323; GFX8CHECK: ; %bb.0: 324; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 325; GFX8CHECK-NEXT: s_movk_i32 s4, 0xff80 326; GFX8CHECK-NEXT: v_cmp_eq_u16_e32 vcc, s4, v0 327; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 328; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] 329; 330; GFX9CHECK-LABEL: neginf_bf16: 331; GFX9CHECK: ; %bb.0: 332; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 333; GFX9CHECK-NEXT: s_movk_i32 s4, 0xff80 334; GFX9CHECK-NEXT: v_cmp_eq_u16_e32 vcc, s4, v0 335; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 336; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] 337; 338; GFX10CHECK-LABEL: neginf_bf16: 339; GFX10CHECK: ; %bb.0: 340; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 341; GFX10CHECK-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0xff80, v0 342; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 343; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] 344; 345; GFX11CHECK-LABEL: neginf_bf16: 346; GFX11CHECK: ; %bb.0: 347; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 348; GFX11CHECK-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0xff80, v0 349; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 350; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] 351 %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 4) ; 0x004 352 ret i1 %1 353} 354 355define i1 @posnormal_bf16(bfloat %x) nounwind { 356; GFX7CHECK-LABEL: posnormal_bf16: 357; GFX7CHECK: ; %bb.0: 358; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 359; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 360; GFX7CHECK-NEXT: v_ashrrev_i32_e32 v1, 16, v0 361; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15 362; GFX7CHECK-NEXT: v_add_i32_e32 v0, vcc, 0xffffff80, v0 363; GFX7CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0 364; GFX7CHECK-NEXT: s_movk_i32 s6, 0x7f00 365; GFX7CHECK-NEXT: v_cmp_lt_i32_e64 s[4:5], -1, v1 366; GFX7CHECK-NEXT: v_cmp_gt_u32_e32 vcc, s6, v0 367; GFX7CHECK-NEXT: s_and_b64 s[4:5], vcc, s[4:5] 368; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 369; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] 370; 371; GFX8CHECK-LABEL: posnormal_bf16: 372; GFX8CHECK: ; %bb.0: 373; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 374; GFX8CHECK-NEXT: v_cmp_lt_i16_e32 vcc, -1, v0 375; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 376; GFX8CHECK-NEXT: v_add_u16_e32 v0, 0xff80, v0 377; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f00 378; GFX8CHECK-NEXT: v_cmp_gt_u16_e64 s[4:5], s4, v0 379; GFX8CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc 380; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 381; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] 382; 383; GFX9CHECK-LABEL: posnormal_bf16: 384; GFX9CHECK: ; %bb.0: 385; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 386; GFX9CHECK-NEXT: v_cmp_lt_i16_e32 vcc, -1, v0 387; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 388; GFX9CHECK-NEXT: v_add_u16_e32 v0, 0xff80, v0 389; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f00 390; GFX9CHECK-NEXT: v_cmp_gt_u16_e64 s[4:5], s4, v0 391; GFX9CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc 392; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 393; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] 394; 395; GFX10CHECK-LABEL: posnormal_bf16: 396; GFX10CHECK: ; %bb.0: 397; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 398; GFX10CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0 399; GFX10CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, -1, v0 400; GFX10CHECK-NEXT: v_add_nc_u16 v1, 0xff80, v1 401; GFX10CHECK-NEXT: v_cmp_gt_u16_e64 s4, 0x7f00, v1 402; GFX10CHECK-NEXT: s_and_b32 s4, s4, vcc_lo 403; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 404; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] 405; 406; GFX11CHECK-LABEL: posnormal_bf16: 407; GFX11CHECK: ; %bb.0: 408; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 409; GFX11CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0 410; GFX11CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, -1, v0 411; GFX11CHECK-NEXT: v_add_nc_u16 v1, 0xff80, v1 412; GFX11CHECK-NEXT: v_cmp_gt_u16_e64 s0, 0x7f00, v1 413; GFX11CHECK-NEXT: s_and_b32 s0, s0, vcc_lo 414; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 415; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] 416 %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 256) ; 0x100 417 ret i1 %1 418} 419 420define i1 @negnormal_bf16(bfloat %x) nounwind { 421; GFX7CHECK-LABEL: negnormal_bf16: 422; GFX7CHECK: ; %bb.0: 423; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 424; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 425; GFX7CHECK-NEXT: v_ashrrev_i32_e32 v1, 16, v0 426; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15 427; GFX7CHECK-NEXT: v_add_i32_e32 v0, vcc, 0xffffff80, v0 428; GFX7CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0 429; GFX7CHECK-NEXT: s_movk_i32 s6, 0x7f00 430; GFX7CHECK-NEXT: v_cmp_gt_i32_e64 s[4:5], 0, v1 431; GFX7CHECK-NEXT: v_cmp_gt_u32_e32 vcc, s6, v0 432; GFX7CHECK-NEXT: s_and_b64 s[4:5], vcc, s[4:5] 433; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 434; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] 435; 436; GFX8CHECK-LABEL: negnormal_bf16: 437; GFX8CHECK: ; %bb.0: 438; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 439; GFX8CHECK-NEXT: v_cmp_gt_i16_e32 vcc, 0, v0 440; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 441; GFX8CHECK-NEXT: v_add_u16_e32 v0, 0xff80, v0 442; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f00 443; GFX8CHECK-NEXT: v_cmp_gt_u16_e64 s[4:5], s4, v0 444; GFX8CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc 445; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 446; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] 447; 448; GFX9CHECK-LABEL: negnormal_bf16: 449; GFX9CHECK: ; %bb.0: 450; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 451; GFX9CHECK-NEXT: v_cmp_gt_i16_e32 vcc, 0, v0 452; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 453; GFX9CHECK-NEXT: v_add_u16_e32 v0, 0xff80, v0 454; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f00 455; GFX9CHECK-NEXT: v_cmp_gt_u16_e64 s[4:5], s4, v0 456; GFX9CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc 457; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 458; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] 459; 460; GFX10CHECK-LABEL: negnormal_bf16: 461; GFX10CHECK: ; %bb.0: 462; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 463; GFX10CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0 464; GFX10CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0, v0 465; GFX10CHECK-NEXT: v_add_nc_u16 v1, 0xff80, v1 466; GFX10CHECK-NEXT: v_cmp_gt_u16_e64 s4, 0x7f00, v1 467; GFX10CHECK-NEXT: s_and_b32 s4, s4, vcc_lo 468; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 469; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] 470; 471; GFX11CHECK-LABEL: negnormal_bf16: 472; GFX11CHECK: ; %bb.0: 473; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 474; GFX11CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0 475; GFX11CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0, v0 476; GFX11CHECK-NEXT: v_add_nc_u16 v1, 0xff80, v1 477; GFX11CHECK-NEXT: v_cmp_gt_u16_e64 s0, 0x7f00, v1 478; GFX11CHECK-NEXT: s_and_b32 s0, s0, vcc_lo 479; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 480; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] 481 %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 8) ; 0x008 482 ret i1 %1 483} 484 485define i1 @possubnormal_bf16(bfloat %x) nounwind { 486; GFX7CHECK-LABEL: possubnormal_bf16: 487; GFX7CHECK: ; %bb.0: 488; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 489; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 490; GFX7CHECK-NEXT: v_lshrrev_b32_e32 v0, 16, v0 491; GFX7CHECK-NEXT: v_add_i32_e32 v0, vcc, -1, v0 492; GFX7CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0 493; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f 494; GFX7CHECK-NEXT: v_cmp_gt_u32_e32 vcc, s4, v0 495; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 496; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] 497; 498; GFX8CHECK-LABEL: possubnormal_bf16: 499; GFX8CHECK: ; %bb.0: 500; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 501; GFX8CHECK-NEXT: v_add_u16_e32 v0, -1, v0 502; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f 503; GFX8CHECK-NEXT: v_cmp_gt_u16_e32 vcc, s4, v0 504; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 505; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] 506; 507; GFX9CHECK-LABEL: possubnormal_bf16: 508; GFX9CHECK: ; %bb.0: 509; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 510; GFX9CHECK-NEXT: v_add_u16_e32 v0, -1, v0 511; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f 512; GFX9CHECK-NEXT: v_cmp_gt_u16_e32 vcc, s4, v0 513; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 514; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] 515; 516; GFX10CHECK-LABEL: possubnormal_bf16: 517; GFX10CHECK: ; %bb.0: 518; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 519; GFX10CHECK-NEXT: v_add_nc_u16 v0, v0, -1 520; GFX10CHECK-NEXT: v_cmp_gt_u16_e32 vcc_lo, 0x7f, v0 521; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 522; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] 523; 524; GFX11CHECK-LABEL: possubnormal_bf16: 525; GFX11CHECK: ; %bb.0: 526; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 527; GFX11CHECK-NEXT: v_add_nc_u16 v0, v0, -1 528; GFX11CHECK-NEXT: v_cmp_gt_u16_e32 vcc_lo, 0x7f, v0 529; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 530; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] 531 %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 128) ; 0x080 532 ret i1 %1 533} 534 535define i1 @negsubnormal_bf16(bfloat %x) nounwind { 536; GFX7CHECK-LABEL: negsubnormal_bf16: 537; GFX7CHECK: ; %bb.0: 538; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 539; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 540; GFX7CHECK-NEXT: v_ashrrev_i32_e32 v1, 16, v0 541; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15 542; GFX7CHECK-NEXT: v_add_i32_e64 v0, s[4:5], -1, v0 543; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f 544; GFX7CHECK-NEXT: v_cmp_gt_i32_e32 vcc, 0, v1 545; GFX7CHECK-NEXT: v_cmp_gt_u32_e64 s[4:5], s4, v0 546; GFX7CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc 547; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 548; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] 549; 550; GFX8CHECK-LABEL: negsubnormal_bf16: 551; GFX8CHECK: ; %bb.0: 552; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 553; GFX8CHECK-NEXT: v_cmp_gt_i16_e32 vcc, 0, v0 554; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 555; GFX8CHECK-NEXT: v_add_u16_e32 v0, -1, v0 556; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f 557; GFX8CHECK-NEXT: v_cmp_gt_u16_e64 s[4:5], s4, v0 558; GFX8CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc 559; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 560; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] 561; 562; GFX9CHECK-LABEL: negsubnormal_bf16: 563; GFX9CHECK: ; %bb.0: 564; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 565; GFX9CHECK-NEXT: v_cmp_gt_i16_e32 vcc, 0, v0 566; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 567; GFX9CHECK-NEXT: v_add_u16_e32 v0, -1, v0 568; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f 569; GFX9CHECK-NEXT: v_cmp_gt_u16_e64 s[4:5], s4, v0 570; GFX9CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc 571; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 572; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] 573; 574; GFX10CHECK-LABEL: negsubnormal_bf16: 575; GFX10CHECK: ; %bb.0: 576; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 577; GFX10CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0 578; GFX10CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0, v0 579; GFX10CHECK-NEXT: v_add_nc_u16 v1, v1, -1 580; GFX10CHECK-NEXT: v_cmp_gt_u16_e64 s4, 0x7f, v1 581; GFX10CHECK-NEXT: s_and_b32 s4, s4, vcc_lo 582; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 583; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] 584; 585; GFX11CHECK-LABEL: negsubnormal_bf16: 586; GFX11CHECK: ; %bb.0: 587; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 588; GFX11CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0 589; GFX11CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0, v0 590; GFX11CHECK-NEXT: v_add_nc_u16 v1, v1, -1 591; GFX11CHECK-NEXT: v_cmp_gt_u16_e64 s0, 0x7f, v1 592; GFX11CHECK-NEXT: s_and_b32 s0, s0, vcc_lo 593; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 594; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] 595 %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 16) ; 0x010 596 ret i1 %1 597} 598 599define i1 @poszero_bf16(bfloat %x) nounwind { 600; GFX7CHECK-LABEL: poszero_bf16: 601; GFX7CHECK: ; %bb.0: 602; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 603; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 604; GFX7CHECK-NEXT: v_lshrrev_b32_e32 v0, 16, v0 605; GFX7CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 606; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 607; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] 608; 609; GFX8CHECK-LABEL: poszero_bf16: 610; GFX8CHECK: ; %bb.0: 611; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 612; GFX8CHECK-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0 613; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 614; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] 615; 616; GFX9CHECK-LABEL: poszero_bf16: 617; GFX9CHECK: ; %bb.0: 618; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 619; GFX9CHECK-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0 620; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 621; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] 622; 623; GFX10CHECK-LABEL: poszero_bf16: 624; GFX10CHECK: ; %bb.0: 625; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 626; GFX10CHECK-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0, v0 627; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 628; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] 629; 630; GFX11CHECK-LABEL: poszero_bf16: 631; GFX11CHECK: ; %bb.0: 632; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 633; GFX11CHECK-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0, v0 634; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 635; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] 636 %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 64) ; 0x040 637 ret i1 %1 638} 639 640define i1 @negzero_bf16(bfloat %x) nounwind { 641; GFX7CHECK-LABEL: negzero_bf16: 642; GFX7CHECK: ; %bb.0: 643; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 644; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 645; GFX7CHECK-NEXT: v_lshrrev_b32_e32 v0, 16, v0 646; GFX7CHECK-NEXT: s_mov_b32 s4, 0x8000 647; GFX7CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s4, v0 648; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 649; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] 650; 651; GFX8CHECK-LABEL: negzero_bf16: 652; GFX8CHECK: ; %bb.0: 653; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 654; GFX8CHECK-NEXT: s_movk_i32 s4, 0x8000 655; GFX8CHECK-NEXT: v_cmp_eq_u16_e32 vcc, s4, v0 656; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 657; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] 658; 659; GFX9CHECK-LABEL: negzero_bf16: 660; GFX9CHECK: ; %bb.0: 661; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 662; GFX9CHECK-NEXT: s_movk_i32 s4, 0x8000 663; GFX9CHECK-NEXT: v_cmp_eq_u16_e32 vcc, s4, v0 664; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 665; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] 666; 667; GFX10CHECK-LABEL: negzero_bf16: 668; GFX10CHECK: ; %bb.0: 669; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 670; GFX10CHECK-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v0 671; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 672; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] 673; 674; GFX11CHECK-LABEL: negzero_bf16: 675; GFX11CHECK: ; %bb.0: 676; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 677; GFX11CHECK-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x8000, v0 678; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 679; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] 680 %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 32) ; 0x020 681 ret i1 %1 682} 683 684define i1 @posfinite_bf16(bfloat %x) nounwind { 685; GFX7CHECK-LABEL: posfinite_bf16: 686; GFX7CHECK: ; %bb.0: 687; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 688; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 689; GFX7CHECK-NEXT: v_lshrrev_b32_e32 v0, 16, v0 690; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f80 691; GFX7CHECK-NEXT: v_cmp_gt_u32_e32 vcc, s4, v0 692; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 693; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] 694; 695; GFX8CHECK-LABEL: posfinite_bf16: 696; GFX8CHECK: ; %bb.0: 697; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 698; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f80 699; GFX8CHECK-NEXT: v_cmp_gt_u16_e32 vcc, s4, v0 700; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 701; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] 702; 703; GFX9CHECK-LABEL: posfinite_bf16: 704; GFX9CHECK: ; %bb.0: 705; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 706; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f80 707; GFX9CHECK-NEXT: v_cmp_gt_u16_e32 vcc, s4, v0 708; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 709; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] 710; 711; GFX10CHECK-LABEL: posfinite_bf16: 712; GFX10CHECK: ; %bb.0: 713; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 714; GFX10CHECK-NEXT: v_cmp_gt_u16_e32 vcc_lo, 0x7f80, v0 715; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 716; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] 717; 718; GFX11CHECK-LABEL: posfinite_bf16: 719; GFX11CHECK: ; %bb.0: 720; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 721; GFX11CHECK-NEXT: v_cmp_gt_u16_e32 vcc_lo, 0x7f80, v0 722; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 723; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] 724 %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 448) ; 0x1c0 725 ret i1 %1 726} 727 728define i1 @negfinite_bf16(bfloat %x) nounwind { 729; GFX7CHECK-LABEL: negfinite_bf16: 730; GFX7CHECK: ; %bb.0: 731; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 732; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 733; GFX7CHECK-NEXT: v_ashrrev_i32_e32 v1, 16, v0 734; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15 735; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f80 736; GFX7CHECK-NEXT: v_cmp_gt_i32_e32 vcc, 0, v1 737; GFX7CHECK-NEXT: v_cmp_gt_i32_e64 s[4:5], s4, v0 738; GFX7CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc 739; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 740; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] 741; 742; GFX8CHECK-LABEL: negfinite_bf16: 743; GFX8CHECK: ; %bb.0: 744; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 745; GFX8CHECK-NEXT: v_cmp_gt_i16_e32 vcc, 0, v0 746; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 747; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f80 748; GFX8CHECK-NEXT: v_cmp_gt_i16_e64 s[4:5], s4, v0 749; GFX8CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc 750; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 751; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] 752; 753; GFX9CHECK-LABEL: negfinite_bf16: 754; GFX9CHECK: ; %bb.0: 755; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 756; GFX9CHECK-NEXT: v_cmp_gt_i16_e32 vcc, 0, v0 757; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 758; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f80 759; GFX9CHECK-NEXT: v_cmp_gt_i16_e64 s[4:5], s4, v0 760; GFX9CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc 761; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 762; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] 763; 764; GFX10CHECK-LABEL: negfinite_bf16: 765; GFX10CHECK: ; %bb.0: 766; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 767; GFX10CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0 768; GFX10CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0, v0 769; GFX10CHECK-NEXT: v_cmp_gt_i16_e64 s4, 0x7f80, v1 770; GFX10CHECK-NEXT: s_and_b32 s4, s4, vcc_lo 771; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 772; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] 773; 774; GFX11CHECK-LABEL: negfinite_bf16: 775; GFX11CHECK: ; %bb.0: 776; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 777; GFX11CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0 778; GFX11CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0, v0 779; GFX11CHECK-NEXT: v_cmp_gt_i16_e64 s0, 0x7f80, v1 780; GFX11CHECK-NEXT: s_and_b32 s0, s0, vcc_lo 781; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 782; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] 783 %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 56) ; 0x038 784 ret i1 %1 785} 786 787define i1 @isnan_bf16(bfloat %x) nounwind { 788; GFX7CHECK-LABEL: isnan_bf16: 789; GFX7CHECK: ; %bb.0: 790; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 791; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 792; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15 793; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f80 794; GFX7CHECK-NEXT: v_cmp_lt_i32_e32 vcc, s4, v0 795; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 796; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] 797; 798; GFX8CHECK-LABEL: isnan_bf16: 799; GFX8CHECK: ; %bb.0: 800; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 801; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 802; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f80 803; GFX8CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v0 804; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 805; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] 806; 807; GFX9CHECK-LABEL: isnan_bf16: 808; GFX9CHECK: ; %bb.0: 809; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 810; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 811; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f80 812; GFX9CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v0 813; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 814; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] 815; 816; GFX10CHECK-LABEL: isnan_bf16: 817; GFX10CHECK: ; %bb.0: 818; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 819; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 820; GFX10CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v0 821; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 822; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] 823; 824; GFX11CHECK-LABEL: isnan_bf16: 825; GFX11CHECK: ; %bb.0: 826; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 827; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 828; GFX11CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v0 829; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 830; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] 831 %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 3) ; nan 832 ret i1 %1 833} 834 835define i1 @not_isnan_bf16(bfloat %x) { 836; GFX7CHECK-LABEL: not_isnan_bf16: 837; GFX7CHECK: ; %bb.0: 838; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 839; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 840; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15 841; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f81 842; GFX7CHECK-NEXT: v_cmp_gt_i32_e32 vcc, s4, v0 843; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 844; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] 845; 846; GFX8CHECK-LABEL: not_isnan_bf16: 847; GFX8CHECK: ; %bb.0: 848; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 849; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 850; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f81 851; GFX8CHECK-NEXT: v_cmp_gt_i16_e32 vcc, s4, v0 852; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 853; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] 854; 855; GFX9CHECK-LABEL: not_isnan_bf16: 856; GFX9CHECK: ; %bb.0: 857; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 858; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 859; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f81 860; GFX9CHECK-NEXT: v_cmp_gt_i16_e32 vcc, s4, v0 861; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 862; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] 863; 864; GFX10CHECK-LABEL: not_isnan_bf16: 865; GFX10CHECK: ; %bb.0: 866; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 867; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 868; GFX10CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0x7f81, v0 869; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 870; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] 871; 872; GFX11CHECK-LABEL: not_isnan_bf16: 873; GFX11CHECK: ; %bb.0: 874; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 875; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 876; GFX11CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0x7f81, v0 877; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 878; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] 879 %class = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 1020) ; ~nan 880 ret i1 %class 881} 882 883define <2 x i1> @isnan_v2bf16(<2 x bfloat> %x) nounwind { 884; GFX7CHECK-LABEL: isnan_v2bf16: 885; GFX7CHECK: ; %bb.0: 886; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 887; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 888; GFX7CHECK-NEXT: v_mul_f32_e32 v1, 1.0, v1 889; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f80 890; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15 891; GFX7CHECK-NEXT: v_bfe_u32 v1, v1, 16, 15 892; GFX7CHECK-NEXT: v_cmp_lt_i32_e32 vcc, s4, v0 893; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 894; GFX7CHECK-NEXT: v_cmp_lt_i32_e32 vcc, s4, v1 895; GFX7CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 896; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] 897; 898; GFX8CHECK-LABEL: isnan_v2bf16: 899; GFX8CHECK: ; %bb.0: 900; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 901; GFX8CHECK-NEXT: v_bfe_u32 v1, v0, 16, 15 902; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f80 903; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 904; GFX8CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v0 905; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 906; GFX8CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v1 907; GFX8CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 908; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] 909; 910; GFX9CHECK-LABEL: isnan_v2bf16: 911; GFX9CHECK: ; %bb.0: 912; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 913; GFX9CHECK-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v0 914; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f80 915; GFX9CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v1 916; GFX9CHECK-NEXT: v_cmp_gt_i16_sdwa s[4:5], v1, s4 src0_sel:WORD_1 src1_sel:DWORD 917; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 918; GFX9CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] 919; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] 920; 921; GFX10CHECK-LABEL: isnan_v2bf16: 922; GFX10CHECK: ; %bb.0: 923; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 924; GFX10CHECK-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v0 925; GFX10CHECK-NEXT: v_mov_b32_e32 v2, 0x7f80 926; GFX10CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v1 927; GFX10CHECK-NEXT: v_cmp_gt_i16_sdwa s4, v1, v2 src0_sel:WORD_1 src1_sel:DWORD 928; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 929; GFX10CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s4 930; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] 931; 932; GFX11CHECK-LABEL: isnan_v2bf16: 933; GFX11CHECK: ; %bb.0: 934; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 935; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 936; GFX11CHECK-NEXT: v_lshrrev_b32_e32 v1, 16, v0 937; GFX11CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v0 938; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 939; GFX11CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v1 940; GFX11CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo 941; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] 942 %1 = call <2 x i1> @llvm.is.fpclass.v2bf16(<2 x bfloat> %x, i32 3) ; nan 943 ret <2 x i1> %1 944} 945 946define <3 x i1> @isnan_v3bf16(<3 x bfloat> %x) nounwind { 947; GFX7CHECK-LABEL: isnan_v3bf16: 948; GFX7CHECK: ; %bb.0: 949; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 950; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 951; GFX7CHECK-NEXT: v_mul_f32_e32 v1, 1.0, v1 952; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f80 953; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15 954; GFX7CHECK-NEXT: v_mul_f32_e32 v2, 1.0, v2 955; GFX7CHECK-NEXT: v_bfe_u32 v1, v1, 16, 15 956; GFX7CHECK-NEXT: v_cmp_lt_i32_e32 vcc, s4, v0 957; GFX7CHECK-NEXT: v_bfe_u32 v2, v2, 16, 15 958; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 959; GFX7CHECK-NEXT: v_cmp_lt_i32_e32 vcc, s4, v1 960; GFX7CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 961; GFX7CHECK-NEXT: v_cmp_lt_i32_e32 vcc, s4, v2 962; GFX7CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 963; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] 964; 965; GFX8CHECK-LABEL: isnan_v3bf16: 966; GFX8CHECK: ; %bb.0: 967; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 968; GFX8CHECK-NEXT: v_and_b32_e32 v2, 0x7fff, v1 969; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f80 970; GFX8CHECK-NEXT: v_bfe_u32 v1, v0, 16, 15 971; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 972; GFX8CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v0 973; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 974; GFX8CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v1 975; GFX8CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 976; GFX8CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v2 977; GFX8CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 978; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] 979; 980; GFX9CHECK-LABEL: isnan_v3bf16: 981; GFX9CHECK: ; %bb.0: 982; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 983; GFX9CHECK-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v0 984; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f80 985; GFX9CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v1 986; GFX9CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v3 987; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 988; GFX9CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v1 989; GFX9CHECK-NEXT: v_cmp_gt_i16_sdwa s[4:5], v3, s4 src0_sel:WORD_1 src1_sel:DWORD 990; GFX9CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 991; GFX9CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] 992; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] 993; 994; GFX10CHECK-LABEL: isnan_v3bf16: 995; GFX10CHECK: ; %bb.0: 996; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 997; GFX10CHECK-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v0 998; GFX10CHECK-NEXT: v_mov_b32_e32 v3, 0x7f80 999; GFX10CHECK-NEXT: v_and_b32_e32 v4, 0x7fff, v1 1000; GFX10CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v2 1001; GFX10CHECK-NEXT: v_cmp_gt_i16_sdwa s4, v2, v3 src0_sel:WORD_1 src1_sel:DWORD 1002; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 1003; GFX10CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v4 1004; GFX10CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s4 1005; GFX10CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo 1006; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] 1007; 1008; GFX11CHECK-LABEL: isnan_v3bf16: 1009; GFX11CHECK: ; %bb.0: 1010; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1011; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 1012; GFX11CHECK-NEXT: v_and_b32_e32 v3, 0x7fff, v1 1013; GFX11CHECK-NEXT: v_lshrrev_b32_e32 v2, 16, v0 1014; GFX11CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v0 1015; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 1016; GFX11CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v2 1017; GFX11CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo 1018; GFX11CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v3 1019; GFX11CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo 1020; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] 1021 %1 = call <3 x i1> @llvm.is.fpclass.v3bf16(<3 x bfloat> %x, i32 3) ; nan 1022 ret <3 x i1> %1 1023} 1024 1025define <4 x i1> @isnan_v4bf16(<4 x bfloat> %x) nounwind { 1026; GFX7CHECK-LABEL: isnan_v4bf16: 1027; GFX7CHECK: ; %bb.0: 1028; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1029; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 1030; GFX7CHECK-NEXT: v_mul_f32_e32 v1, 1.0, v1 1031; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f80 1032; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15 1033; GFX7CHECK-NEXT: v_mul_f32_e32 v2, 1.0, v2 1034; GFX7CHECK-NEXT: v_bfe_u32 v1, v1, 16, 15 1035; GFX7CHECK-NEXT: v_cmp_lt_i32_e32 vcc, s4, v0 1036; GFX7CHECK-NEXT: v_mul_f32_e32 v3, 1.0, v3 1037; GFX7CHECK-NEXT: v_bfe_u32 v2, v2, 16, 15 1038; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 1039; GFX7CHECK-NEXT: v_cmp_lt_i32_e32 vcc, s4, v1 1040; GFX7CHECK-NEXT: v_bfe_u32 v3, v3, 16, 15 1041; GFX7CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 1042; GFX7CHECK-NEXT: v_cmp_lt_i32_e32 vcc, s4, v2 1043; GFX7CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1044; GFX7CHECK-NEXT: v_cmp_lt_i32_e32 vcc, s4, v3 1045; GFX7CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 1046; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] 1047; 1048; GFX8CHECK-LABEL: isnan_v4bf16: 1049; GFX8CHECK: ; %bb.0: 1050; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1051; GFX8CHECK-NEXT: v_bfe_u32 v3, v1, 16, 15 1052; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f80 1053; GFX8CHECK-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v1 1054; GFX8CHECK-NEXT: v_bfe_u32 v1, v0, 16, 15 1055; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 1056; GFX8CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v0 1057; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 1058; GFX8CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v1 1059; GFX8CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc 1060; GFX8CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v2 1061; GFX8CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1062; GFX8CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v3 1063; GFX8CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc 1064; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] 1065; 1066; GFX9CHECK-LABEL: isnan_v4bf16: 1067; GFX9CHECK: ; %bb.0: 1068; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1069; GFX9CHECK-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v1 1070; GFX9CHECK-NEXT: s_movk_i32 s6, 0x7f80 1071; GFX9CHECK-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v0 1072; GFX9CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s6, v1 1073; GFX9CHECK-NEXT: v_cmp_gt_i16_sdwa s[4:5], v1, s6 src0_sel:WORD_1 src1_sel:DWORD 1074; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 1075; GFX9CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s6, v3 1076; GFX9CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[4:5] 1077; GFX9CHECK-NEXT: v_cmp_gt_i16_sdwa s[4:5], v3, s6 src0_sel:WORD_1 src1_sel:DWORD 1078; GFX9CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 1079; GFX9CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s[4:5] 1080; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] 1081; 1082; GFX10CHECK-LABEL: isnan_v4bf16: 1083; GFX10CHECK: ; %bb.0: 1084; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1085; GFX10CHECK-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v0 1086; GFX10CHECK-NEXT: v_mov_b32_e32 v5, 0x7f80 1087; GFX10CHECK-NEXT: v_and_b32_e32 v4, 0x7fff7fff, v1 1088; GFX10CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v3 1089; GFX10CHECK-NEXT: v_cmp_gt_i16_sdwa s4, v3, v5 src0_sel:WORD_1 src1_sel:DWORD 1090; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 1091; GFX10CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v4 1092; GFX10CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, s4 1093; GFX10CHECK-NEXT: v_cmp_gt_i16_sdwa s4, v4, v5 src0_sel:WORD_1 src1_sel:DWORD 1094; GFX10CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo 1095; GFX10CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4 1096; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] 1097; 1098; GFX11CHECK-LABEL: isnan_v4bf16: 1099; GFX11CHECK: ; %bb.0: 1100; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1101; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v0 1102; GFX11CHECK-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v1 1103; GFX11CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v0 1104; GFX11CHECK-NEXT: v_lshrrev_b32_e32 v4, 16, v0 1105; GFX11CHECK-NEXT: v_lshrrev_b32_e32 v3, 16, v1 1106; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 1107; GFX11CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v1 1108; GFX11CHECK-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo 1109; GFX11CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v4 1110; GFX11CHECK-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo 1111; GFX11CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v3 1112; GFX11CHECK-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc_lo 1113; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] 1114 %1 = call <4 x i1> @llvm.is.fpclass.v4bf16(<4 x bfloat> %x, i32 3) ; nan 1115 ret <4 x i1> %1 1116} 1117 1118; FIXME: Broken for gfx6/7 1119; define i1 @isnan_bf16_strictfp(bfloat %x) strictfp nounwind { 1120; %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 3) strictfp ; nan 1121; ret i1 %1 1122; } 1123 1124define i1 @isinf_bf16(bfloat %x) nounwind { 1125; GFX7CHECK-LABEL: isinf_bf16: 1126; GFX7CHECK: ; %bb.0: 1127; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1128; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 1129; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15 1130; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f80 1131; GFX7CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s4, v0 1132; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 1133; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] 1134; 1135; GFX8CHECK-LABEL: isinf_bf16: 1136; GFX8CHECK: ; %bb.0: 1137; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1138; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 1139; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f80 1140; GFX8CHECK-NEXT: v_cmp_eq_u16_e32 vcc, s4, v0 1141; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 1142; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] 1143; 1144; GFX9CHECK-LABEL: isinf_bf16: 1145; GFX9CHECK: ; %bb.0: 1146; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1147; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 1148; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f80 1149; GFX9CHECK-NEXT: v_cmp_eq_u16_e32 vcc, s4, v0 1150; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 1151; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] 1152; 1153; GFX10CHECK-LABEL: isinf_bf16: 1154; GFX10CHECK: ; %bb.0: 1155; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1156; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 1157; GFX10CHECK-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x7f80, v0 1158; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 1159; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] 1160; 1161; GFX11CHECK-LABEL: isinf_bf16: 1162; GFX11CHECK: ; %bb.0: 1163; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1164; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 1165; GFX11CHECK-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x7f80, v0 1166; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 1167; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] 1168 %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 516) ; 0x204 = "inf" 1169 ret i1 %1 1170} 1171 1172define i1 @isfinite_bf16(bfloat %x) nounwind { 1173; GFX7CHECK-LABEL: isfinite_bf16: 1174; GFX7CHECK: ; %bb.0: 1175; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1176; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 1177; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15 1178; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f80 1179; GFX7CHECK-NEXT: v_cmp_gt_i32_e32 vcc, s4, v0 1180; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 1181; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] 1182; 1183; GFX8CHECK-LABEL: isfinite_bf16: 1184; GFX8CHECK: ; %bb.0: 1185; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1186; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 1187; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f80 1188; GFX8CHECK-NEXT: v_cmp_gt_i16_e32 vcc, s4, v0 1189; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 1190; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] 1191; 1192; GFX9CHECK-LABEL: isfinite_bf16: 1193; GFX9CHECK: ; %bb.0: 1194; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1195; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 1196; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f80 1197; GFX9CHECK-NEXT: v_cmp_gt_i16_e32 vcc, s4, v0 1198; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 1199; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] 1200; 1201; GFX10CHECK-LABEL: isfinite_bf16: 1202; GFX10CHECK: ; %bb.0: 1203; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1204; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 1205; GFX10CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0x7f80, v0 1206; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 1207; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] 1208; 1209; GFX11CHECK-LABEL: isfinite_bf16: 1210; GFX11CHECK: ; %bb.0: 1211; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1212; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 1213; GFX11CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0x7f80, v0 1214; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 1215; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] 1216 %1 = call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 504) ; 0x1f8 = "finite" 1217 ret i1 %1 1218} 1219 1220define i1 @issubnormal_or_zero_bf16(bfloat %x) { 1221; GFX7CHECK-LABEL: issubnormal_or_zero_bf16: 1222; GFX7CHECK: ; %bb.0: ; %entry 1223; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1224; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 1225; GFX7CHECK-NEXT: v_lshrrev_b32_e32 v0, 16, v0 1226; GFX7CHECK-NEXT: v_and_b32_e32 v0, 0x7f80, v0 1227; GFX7CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1228; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 1229; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] 1230; 1231; GFX8CHECK-LABEL: issubnormal_or_zero_bf16: 1232; GFX8CHECK: ; %bb.0: ; %entry 1233; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1234; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7f80, v0 1235; GFX8CHECK-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0 1236; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 1237; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] 1238; 1239; GFX9CHECK-LABEL: issubnormal_or_zero_bf16: 1240; GFX9CHECK: ; %bb.0: ; %entry 1241; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1242; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7f80, v0 1243; GFX9CHECK-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0 1244; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 1245; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] 1246; 1247; GFX10CHECK-LABEL: issubnormal_or_zero_bf16: 1248; GFX10CHECK: ; %bb.0: ; %entry 1249; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1250; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7f80, v0 1251; GFX10CHECK-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0, v0 1252; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 1253; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] 1254; 1255; GFX11CHECK-LABEL: issubnormal_or_zero_bf16: 1256; GFX11CHECK: ; %bb.0: ; %entry 1257; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1258; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7f80, v0 1259; GFX11CHECK-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0, v0 1260; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 1261; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] 1262entry: 1263 %class = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 240) ; 0xf0 = "subnormal|zero" 1264 ret i1 %class 1265} 1266 1267define i1 @not_issubnormal_or_zero_bf16(bfloat %x) { 1268; GFX7CHECK-LABEL: not_issubnormal_or_zero_bf16: 1269; GFX7CHECK: ; %bb.0: ; %entry 1270; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1271; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 1272; GFX7CHECK-NEXT: v_lshrrev_b32_e32 v0, 16, v0 1273; GFX7CHECK-NEXT: v_and_b32_e32 v0, 0x7f80, v0 1274; GFX7CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 1275; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 1276; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] 1277; 1278; GFX8CHECK-LABEL: not_issubnormal_or_zero_bf16: 1279; GFX8CHECK: ; %bb.0: ; %entry 1280; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1281; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7f80, v0 1282; GFX8CHECK-NEXT: v_cmp_ne_u16_e32 vcc, 0, v0 1283; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 1284; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] 1285; 1286; GFX9CHECK-LABEL: not_issubnormal_or_zero_bf16: 1287; GFX9CHECK: ; %bb.0: ; %entry 1288; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1289; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7f80, v0 1290; GFX9CHECK-NEXT: v_cmp_ne_u16_e32 vcc, 0, v0 1291; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 1292; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] 1293; 1294; GFX10CHECK-LABEL: not_issubnormal_or_zero_bf16: 1295; GFX10CHECK: ; %bb.0: ; %entry 1296; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1297; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7f80, v0 1298; GFX10CHECK-NEXT: v_cmp_ne_u16_e32 vcc_lo, 0, v0 1299; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 1300; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] 1301; 1302; GFX11CHECK-LABEL: not_issubnormal_or_zero_bf16: 1303; GFX11CHECK: ; %bb.0: ; %entry 1304; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1305; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7f80, v0 1306; GFX11CHECK-NEXT: v_cmp_ne_u16_e32 vcc_lo, 0, v0 1307; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 1308; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] 1309entry: 1310 %class = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 783) ; ~0xf0 = "~(subnormal|zero)" 1311 ret i1 %class 1312} 1313 1314define i1 @isnormal_bf16(bfloat %x) { 1315; GFX7CHECK-LABEL: isnormal_bf16: 1316; GFX7CHECK: ; %bb.0: 1317; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1318; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 1319; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15 1320; GFX7CHECK-NEXT: v_add_i32_e32 v0, vcc, 0xffffff80, v0 1321; GFX7CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0 1322; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f00 1323; GFX7CHECK-NEXT: v_cmp_gt_u32_e32 vcc, s4, v0 1324; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 1325; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] 1326; 1327; GFX8CHECK-LABEL: isnormal_bf16: 1328; GFX8CHECK: ; %bb.0: 1329; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1330; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 1331; GFX8CHECK-NEXT: v_add_u16_e32 v0, 0xff80, v0 1332; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f00 1333; GFX8CHECK-NEXT: v_cmp_gt_u16_e32 vcc, s4, v0 1334; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 1335; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] 1336; 1337; GFX9CHECK-LABEL: isnormal_bf16: 1338; GFX9CHECK: ; %bb.0: 1339; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1340; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 1341; GFX9CHECK-NEXT: v_add_u16_e32 v0, 0xff80, v0 1342; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f00 1343; GFX9CHECK-NEXT: v_cmp_gt_u16_e32 vcc, s4, v0 1344; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 1345; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] 1346; 1347; GFX10CHECK-LABEL: isnormal_bf16: 1348; GFX10CHECK: ; %bb.0: 1349; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1350; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 1351; GFX10CHECK-NEXT: v_add_nc_u16 v0, 0xff80, v0 1352; GFX10CHECK-NEXT: v_cmp_gt_u16_e32 vcc_lo, 0x7f00, v0 1353; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 1354; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] 1355; 1356; GFX11CHECK-LABEL: isnormal_bf16: 1357; GFX11CHECK: ; %bb.0: 1358; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1359; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 1360; GFX11CHECK-NEXT: v_add_nc_u16 v0, 0xff80, v0 1361; GFX11CHECK-NEXT: v_cmp_gt_u16_e32 vcc_lo, 0x7f00, v0 1362; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 1363; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] 1364 %class = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 264) ; 0x108 = "normal" 1365 ret i1 %class 1366} 1367 1368define i1 @not_isnormal_bf16(bfloat %x) { 1369; GFX7CHECK-LABEL: not_isnormal_bf16: 1370; GFX7CHECK: ; %bb.0: 1371; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1372; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 1373; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15 1374; GFX7CHECK-NEXT: v_add_i32_e32 v0, vcc, 0xffffff80, v0 1375; GFX7CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0 1376; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7eff 1377; GFX7CHECK-NEXT: v_cmp_lt_u32_e32 vcc, s4, v0 1378; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 1379; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] 1380; 1381; GFX8CHECK-LABEL: not_isnormal_bf16: 1382; GFX8CHECK: ; %bb.0: 1383; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1384; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 1385; GFX8CHECK-NEXT: v_add_u16_e32 v0, 0xff80, v0 1386; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7eff 1387; GFX8CHECK-NEXT: v_cmp_lt_u16_e32 vcc, s4, v0 1388; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 1389; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] 1390; 1391; GFX9CHECK-LABEL: not_isnormal_bf16: 1392; GFX9CHECK: ; %bb.0: 1393; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1394; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 1395; GFX9CHECK-NEXT: v_add_u16_e32 v0, 0xff80, v0 1396; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7eff 1397; GFX9CHECK-NEXT: v_cmp_lt_u16_e32 vcc, s4, v0 1398; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 1399; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] 1400; 1401; GFX10CHECK-LABEL: not_isnormal_bf16: 1402; GFX10CHECK: ; %bb.0: 1403; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1404; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 1405; GFX10CHECK-NEXT: v_add_nc_u16 v0, 0xff80, v0 1406; GFX10CHECK-NEXT: v_cmp_lt_u16_e32 vcc_lo, 0x7eff, v0 1407; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 1408; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] 1409; 1410; GFX11CHECK-LABEL: not_isnormal_bf16: 1411; GFX11CHECK: ; %bb.0: 1412; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1413; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 1414; GFX11CHECK-NEXT: v_add_nc_u16 v0, 0xff80, v0 1415; GFX11CHECK-NEXT: v_cmp_lt_u16_e32 vcc_lo, 0x7eff, v0 1416; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 1417; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] 1418 %class = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 759) ; ~0x108 = "~normal" 1419 ret i1 %class 1420} 1421 1422define i1 @not_is_plus_normal_bf16(bfloat %x) { 1423; GFX7CHECK-LABEL: not_is_plus_normal_bf16: 1424; GFX7CHECK: ; %bb.0: 1425; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1426; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 1427; GFX7CHECK-NEXT: v_ashrrev_i32_e32 v1, 16, v0 1428; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15 1429; GFX7CHECK-NEXT: v_add_i32_e32 v0, vcc, 0xffffff80, v0 1430; GFX7CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0 1431; GFX7CHECK-NEXT: s_movk_i32 s6, 0x7eff 1432; GFX7CHECK-NEXT: v_cmp_gt_i32_e64 s[4:5], 0, v1 1433; GFX7CHECK-NEXT: v_cmp_lt_u32_e32 vcc, s6, v0 1434; GFX7CHECK-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 1435; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 1436; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] 1437; 1438; GFX8CHECK-LABEL: not_is_plus_normal_bf16: 1439; GFX8CHECK: ; %bb.0: 1440; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1441; GFX8CHECK-NEXT: v_cmp_gt_i16_e32 vcc, 0, v0 1442; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 1443; GFX8CHECK-NEXT: v_add_u16_e32 v0, 0xff80, v0 1444; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7eff 1445; GFX8CHECK-NEXT: v_cmp_lt_u16_e64 s[4:5], s4, v0 1446; GFX8CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc 1447; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 1448; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] 1449; 1450; GFX9CHECK-LABEL: not_is_plus_normal_bf16: 1451; GFX9CHECK: ; %bb.0: 1452; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1453; GFX9CHECK-NEXT: v_cmp_gt_i16_e32 vcc, 0, v0 1454; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 1455; GFX9CHECK-NEXT: v_add_u16_e32 v0, 0xff80, v0 1456; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7eff 1457; GFX9CHECK-NEXT: v_cmp_lt_u16_e64 s[4:5], s4, v0 1458; GFX9CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc 1459; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 1460; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] 1461; 1462; GFX10CHECK-LABEL: not_is_plus_normal_bf16: 1463; GFX10CHECK: ; %bb.0: 1464; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1465; GFX10CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0 1466; GFX10CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0, v0 1467; GFX10CHECK-NEXT: v_add_nc_u16 v1, 0xff80, v1 1468; GFX10CHECK-NEXT: v_cmp_lt_u16_e64 s4, 0x7eff, v1 1469; GFX10CHECK-NEXT: s_or_b32 s4, s4, vcc_lo 1470; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 1471; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] 1472; 1473; GFX11CHECK-LABEL: not_is_plus_normal_bf16: 1474; GFX11CHECK: ; %bb.0: 1475; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1476; GFX11CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0 1477; GFX11CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0, v0 1478; GFX11CHECK-NEXT: v_add_nc_u16 v1, 0xff80, v1 1479; GFX11CHECK-NEXT: v_cmp_lt_u16_e64 s0, 0x7eff, v1 1480; GFX11CHECK-NEXT: s_or_b32 s0, s0, vcc_lo 1481; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 1482; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] 1483 %class = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 767) ; ~0x100 = ~"+normal" 1484 ret i1 %class 1485} 1486 1487define i1 @not_is_neg_normal_bf16(bfloat %x) { 1488; GFX7CHECK-LABEL: not_is_neg_normal_bf16: 1489; GFX7CHECK: ; %bb.0: 1490; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1491; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 1492; GFX7CHECK-NEXT: v_ashrrev_i32_e32 v1, 16, v0 1493; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15 1494; GFX7CHECK-NEXT: v_add_i32_e32 v0, vcc, 0xffffff80, v0 1495; GFX7CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0 1496; GFX7CHECK-NEXT: s_movk_i32 s6, 0x7eff 1497; GFX7CHECK-NEXT: v_cmp_lt_i32_e64 s[4:5], -1, v1 1498; GFX7CHECK-NEXT: v_cmp_lt_u32_e32 vcc, s6, v0 1499; GFX7CHECK-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 1500; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 1501; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] 1502; 1503; GFX8CHECK-LABEL: not_is_neg_normal_bf16: 1504; GFX8CHECK: ; %bb.0: 1505; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1506; GFX8CHECK-NEXT: v_cmp_lt_i16_e32 vcc, -1, v0 1507; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 1508; GFX8CHECK-NEXT: v_add_u16_e32 v0, 0xff80, v0 1509; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7eff 1510; GFX8CHECK-NEXT: v_cmp_lt_u16_e64 s[4:5], s4, v0 1511; GFX8CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc 1512; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 1513; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] 1514; 1515; GFX9CHECK-LABEL: not_is_neg_normal_bf16: 1516; GFX9CHECK: ; %bb.0: 1517; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1518; GFX9CHECK-NEXT: v_cmp_lt_i16_e32 vcc, -1, v0 1519; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 1520; GFX9CHECK-NEXT: v_add_u16_e32 v0, 0xff80, v0 1521; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7eff 1522; GFX9CHECK-NEXT: v_cmp_lt_u16_e64 s[4:5], s4, v0 1523; GFX9CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc 1524; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 1525; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] 1526; 1527; GFX10CHECK-LABEL: not_is_neg_normal_bf16: 1528; GFX10CHECK: ; %bb.0: 1529; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1530; GFX10CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0 1531; GFX10CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, -1, v0 1532; GFX10CHECK-NEXT: v_add_nc_u16 v1, 0xff80, v1 1533; GFX10CHECK-NEXT: v_cmp_lt_u16_e64 s4, 0x7eff, v1 1534; GFX10CHECK-NEXT: s_or_b32 s4, s4, vcc_lo 1535; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 1536; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] 1537; 1538; GFX11CHECK-LABEL: not_is_neg_normal_bf16: 1539; GFX11CHECK: ; %bb.0: 1540; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1541; GFX11CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0 1542; GFX11CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, -1, v0 1543; GFX11CHECK-NEXT: v_add_nc_u16 v1, 0xff80, v1 1544; GFX11CHECK-NEXT: v_cmp_lt_u16_e64 s0, 0x7eff, v1 1545; GFX11CHECK-NEXT: s_or_b32 s0, s0, vcc_lo 1546; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 1547; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] 1548 %class = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 1015) ; ~0x008 = ~"-normal" 1549 ret i1 %class 1550} 1551 1552define i1 @issubnormal_bf16(bfloat %x) { 1553; GFX7CHECK-LABEL: issubnormal_bf16: 1554; GFX7CHECK: ; %bb.0: 1555; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1556; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 1557; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15 1558; GFX7CHECK-NEXT: v_add_i32_e32 v0, vcc, -1, v0 1559; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f 1560; GFX7CHECK-NEXT: v_cmp_gt_u32_e32 vcc, s4, v0 1561; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 1562; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] 1563; 1564; GFX8CHECK-LABEL: issubnormal_bf16: 1565; GFX8CHECK: ; %bb.0: 1566; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1567; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 1568; GFX8CHECK-NEXT: v_add_u16_e32 v0, -1, v0 1569; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f 1570; GFX8CHECK-NEXT: v_cmp_gt_u16_e32 vcc, s4, v0 1571; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 1572; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] 1573; 1574; GFX9CHECK-LABEL: issubnormal_bf16: 1575; GFX9CHECK: ; %bb.0: 1576; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1577; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 1578; GFX9CHECK-NEXT: v_add_u16_e32 v0, -1, v0 1579; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f 1580; GFX9CHECK-NEXT: v_cmp_gt_u16_e32 vcc, s4, v0 1581; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 1582; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] 1583; 1584; GFX10CHECK-LABEL: issubnormal_bf16: 1585; GFX10CHECK: ; %bb.0: 1586; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1587; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 1588; GFX10CHECK-NEXT: v_add_nc_u16 v0, v0, -1 1589; GFX10CHECK-NEXT: v_cmp_gt_u16_e32 vcc_lo, 0x7f, v0 1590; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 1591; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] 1592; 1593; GFX11CHECK-LABEL: issubnormal_bf16: 1594; GFX11CHECK: ; %bb.0: 1595; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1596; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 1597; GFX11CHECK-NEXT: v_add_nc_u16 v0, v0, -1 1598; GFX11CHECK-NEXT: v_cmp_gt_u16_e32 vcc_lo, 0x7f, v0 1599; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 1600; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] 1601 %class = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 144) ; 0x90 = "subnormal" 1602 ret i1 %class 1603} 1604 1605define i1 @not_issubnormal_bf16(bfloat %x) { 1606; GFX7CHECK-LABEL: not_issubnormal_bf16: 1607; GFX7CHECK: ; %bb.0: 1608; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1609; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 1610; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15 1611; GFX7CHECK-NEXT: v_add_i32_e32 v0, vcc, -1, v0 1612; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7e 1613; GFX7CHECK-NEXT: v_cmp_lt_u32_e32 vcc, s4, v0 1614; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 1615; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] 1616; 1617; GFX8CHECK-LABEL: not_issubnormal_bf16: 1618; GFX8CHECK: ; %bb.0: 1619; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1620; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 1621; GFX8CHECK-NEXT: v_add_u16_e32 v0, -1, v0 1622; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7e 1623; GFX8CHECK-NEXT: v_cmp_lt_u16_e32 vcc, s4, v0 1624; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 1625; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] 1626; 1627; GFX9CHECK-LABEL: not_issubnormal_bf16: 1628; GFX9CHECK: ; %bb.0: 1629; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1630; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 1631; GFX9CHECK-NEXT: v_add_u16_e32 v0, -1, v0 1632; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7e 1633; GFX9CHECK-NEXT: v_cmp_lt_u16_e32 vcc, s4, v0 1634; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 1635; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] 1636; 1637; GFX10CHECK-LABEL: not_issubnormal_bf16: 1638; GFX10CHECK: ; %bb.0: 1639; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1640; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 1641; GFX10CHECK-NEXT: v_add_nc_u16 v0, v0, -1 1642; GFX10CHECK-NEXT: v_cmp_lt_u16_e32 vcc_lo, 0x7e, v0 1643; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 1644; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] 1645; 1646; GFX11CHECK-LABEL: not_issubnormal_bf16: 1647; GFX11CHECK: ; %bb.0: 1648; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1649; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 1650; GFX11CHECK-NEXT: v_add_nc_u16 v0, v0, -1 1651; GFX11CHECK-NEXT: v_cmp_lt_u16_e32 vcc_lo, 0x7e, v0 1652; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 1653; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] 1654 %class = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 879) ; ~0x90 = ~"subnormal" 1655 ret i1 %class 1656} 1657 1658define i1 @iszero_bf16(bfloat %x) { 1659; GFX7CHECK-LABEL: iszero_bf16: 1660; GFX7CHECK: ; %bb.0: 1661; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1662; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 1663; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15 1664; GFX7CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1665; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 1666; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] 1667; 1668; GFX8CHECK-LABEL: iszero_bf16: 1669; GFX8CHECK: ; %bb.0: 1670; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1671; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 1672; GFX8CHECK-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0 1673; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 1674; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] 1675; 1676; GFX9CHECK-LABEL: iszero_bf16: 1677; GFX9CHECK: ; %bb.0: 1678; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1679; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 1680; GFX9CHECK-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0 1681; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 1682; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] 1683; 1684; GFX10CHECK-LABEL: iszero_bf16: 1685; GFX10CHECK: ; %bb.0: 1686; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1687; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 1688; GFX10CHECK-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0, v0 1689; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 1690; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] 1691; 1692; GFX11CHECK-LABEL: iszero_bf16: 1693; GFX11CHECK: ; %bb.0: 1694; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1695; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 1696; GFX11CHECK-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0, v0 1697; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 1698; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] 1699 %class = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 96) ; 0x60 = "zero" 1700 ret i1 %class 1701} 1702 1703define i1 @not_iszero_bf16(bfloat %x) { 1704; GFX7CHECK-LABEL: not_iszero_bf16: 1705; GFX7CHECK: ; %bb.0: 1706; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1707; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 1708; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15 1709; GFX7CHECK-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 1710; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 1711; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] 1712; 1713; GFX8CHECK-LABEL: not_iszero_bf16: 1714; GFX8CHECK: ; %bb.0: 1715; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1716; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 1717; GFX8CHECK-NEXT: v_cmp_ne_u16_e32 vcc, 0, v0 1718; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 1719; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] 1720; 1721; GFX9CHECK-LABEL: not_iszero_bf16: 1722; GFX9CHECK: ; %bb.0: 1723; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1724; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 1725; GFX9CHECK-NEXT: v_cmp_ne_u16_e32 vcc, 0, v0 1726; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 1727; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] 1728; 1729; GFX10CHECK-LABEL: not_iszero_bf16: 1730; GFX10CHECK: ; %bb.0: 1731; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1732; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 1733; GFX10CHECK-NEXT: v_cmp_ne_u16_e32 vcc_lo, 0, v0 1734; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 1735; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] 1736; 1737; GFX11CHECK-LABEL: not_iszero_bf16: 1738; GFX11CHECK: ; %bb.0: 1739; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1740; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 1741; GFX11CHECK-NEXT: v_cmp_ne_u16_e32 vcc_lo, 0, v0 1742; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 1743; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] 1744 %class = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 927) ; ~0x60 = ~"zero" 1745 ret i1 %class 1746} 1747 1748define i1 @ispositive_bf16(bfloat %x) { 1749; GFX7CHECK-LABEL: ispositive_bf16: 1750; GFX7CHECK: ; %bb.0: 1751; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1752; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 1753; GFX7CHECK-NEXT: v_lshrrev_b32_e32 v0, 16, v0 1754; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f81 1755; GFX7CHECK-NEXT: v_cmp_gt_u32_e32 vcc, s4, v0 1756; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 1757; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] 1758; 1759; GFX8CHECK-LABEL: ispositive_bf16: 1760; GFX8CHECK: ; %bb.0: 1761; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1762; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f81 1763; GFX8CHECK-NEXT: v_cmp_gt_u16_e32 vcc, s4, v0 1764; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 1765; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] 1766; 1767; GFX9CHECK-LABEL: ispositive_bf16: 1768; GFX9CHECK: ; %bb.0: 1769; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1770; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f81 1771; GFX9CHECK-NEXT: v_cmp_gt_u16_e32 vcc, s4, v0 1772; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 1773; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] 1774; 1775; GFX10CHECK-LABEL: ispositive_bf16: 1776; GFX10CHECK: ; %bb.0: 1777; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1778; GFX10CHECK-NEXT: v_cmp_gt_u16_e32 vcc_lo, 0x7f81, v0 1779; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 1780; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] 1781; 1782; GFX11CHECK-LABEL: ispositive_bf16: 1783; GFX11CHECK: ; %bb.0: 1784; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1785; GFX11CHECK-NEXT: v_cmp_gt_u16_e32 vcc_lo, 0x7f81, v0 1786; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 1787; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] 1788 %class = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 960) ; fcPositive 1789 ret i1 %class 1790} 1791 1792define i1 @not_ispositive_bf16(bfloat %x) { 1793; GFX7CHECK-LABEL: not_ispositive_bf16: 1794; GFX7CHECK: ; %bb.0: 1795; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1796; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 1797; GFX7CHECK-NEXT: v_lshrrev_b32_e32 v1, 16, v0 1798; GFX7CHECK-NEXT: v_ashrrev_i32_e32 v2, 16, v0 1799; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15 1800; GFX7CHECK-NEXT: s_movk_i32 s6, 0x7f80 1801; GFX7CHECK-NEXT: v_cmp_gt_i32_e32 vcc, 0, v2 1802; GFX7CHECK-NEXT: v_cmp_gt_i32_e64 s[4:5], s6, v0 1803; GFX7CHECK-NEXT: s_mov_b32 s7, 0xff80 1804; GFX7CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc 1805; GFX7CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s7, v1 1806; GFX7CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc 1807; GFX7CHECK-NEXT: v_cmp_lt_i32_e32 vcc, s6, v0 1808; GFX7CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc 1809; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 1810; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] 1811; 1812; GFX8CHECK-LABEL: not_ispositive_bf16: 1813; GFX8CHECK: ; %bb.0: 1814; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1815; GFX8CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0 1816; GFX8CHECK-NEXT: s_movk_i32 s6, 0x7f80 1817; GFX8CHECK-NEXT: v_cmp_gt_i16_e32 vcc, 0, v0 1818; GFX8CHECK-NEXT: v_cmp_gt_i16_e64 s[4:5], s6, v1 1819; GFX8CHECK-NEXT: s_movk_i32 s7, 0xff80 1820; GFX8CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc 1821; GFX8CHECK-NEXT: v_cmp_eq_u16_e32 vcc, s7, v0 1822; GFX8CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc 1823; GFX8CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s6, v1 1824; GFX8CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc 1825; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 1826; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] 1827; 1828; GFX9CHECK-LABEL: not_ispositive_bf16: 1829; GFX9CHECK: ; %bb.0: 1830; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1831; GFX9CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0 1832; GFX9CHECK-NEXT: s_movk_i32 s6, 0x7f80 1833; GFX9CHECK-NEXT: v_cmp_gt_i16_e32 vcc, 0, v0 1834; GFX9CHECK-NEXT: v_cmp_gt_i16_e64 s[4:5], s6, v1 1835; GFX9CHECK-NEXT: s_movk_i32 s7, 0xff80 1836; GFX9CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc 1837; GFX9CHECK-NEXT: v_cmp_eq_u16_e32 vcc, s7, v0 1838; GFX9CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc 1839; GFX9CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s6, v1 1840; GFX9CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc 1841; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 1842; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] 1843; 1844; GFX10CHECK-LABEL: not_ispositive_bf16: 1845; GFX10CHECK: ; %bb.0: 1846; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1847; GFX10CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0 1848; GFX10CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0, v0 1849; GFX10CHECK-NEXT: v_cmp_eq_u16_e64 s5, 0xff80, v0 1850; GFX10CHECK-NEXT: v_cmp_gt_i16_e64 s4, 0x7f80, v1 1851; GFX10CHECK-NEXT: v_cmp_lt_i16_e64 s6, 0x7f80, v1 1852; GFX10CHECK-NEXT: s_and_b32 s4, s4, vcc_lo 1853; GFX10CHECK-NEXT: s_or_b32 s4, s4, s5 1854; GFX10CHECK-NEXT: s_or_b32 s4, s4, s6 1855; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 1856; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] 1857; 1858; GFX11CHECK-LABEL: not_ispositive_bf16: 1859; GFX11CHECK: ; %bb.0: 1860; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1861; GFX11CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0 1862; GFX11CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0, v0 1863; GFX11CHECK-NEXT: v_cmp_eq_u16_e64 s1, 0xff80, v0 1864; GFX11CHECK-NEXT: v_cmp_gt_i16_e64 s0, 0x7f80, v1 1865; GFX11CHECK-NEXT: v_cmp_lt_i16_e64 s2, 0x7f80, v1 1866; GFX11CHECK-NEXT: s_and_b32 s0, s0, vcc_lo 1867; GFX11CHECK-NEXT: s_or_b32 s0, s0, s1 1868; GFX11CHECK-NEXT: s_or_b32 s0, s0, s2 1869; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 1870; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] 1871 %class = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 63) ; ~fcPositive 1872 ret i1 %class 1873} 1874 1875define i1 @isnegative_bf16(bfloat %x) { 1876; GFX7CHECK-LABEL: isnegative_bf16: 1877; GFX7CHECK: ; %bb.0: 1878; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1879; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 1880; GFX7CHECK-NEXT: v_lshrrev_b32_e32 v1, 16, v0 1881; GFX7CHECK-NEXT: v_ashrrev_i32_e32 v2, 16, v0 1882; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15 1883; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f80 1884; GFX7CHECK-NEXT: v_cmp_gt_i32_e32 vcc, 0, v2 1885; GFX7CHECK-NEXT: v_cmp_gt_i32_e64 s[4:5], s4, v0 1886; GFX7CHECK-NEXT: s_mov_b32 s6, 0xff80 1887; GFX7CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc 1888; GFX7CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s6, v1 1889; GFX7CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc 1890; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 1891; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] 1892; 1893; GFX8CHECK-LABEL: isnegative_bf16: 1894; GFX8CHECK: ; %bb.0: 1895; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1896; GFX8CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0 1897; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f80 1898; GFX8CHECK-NEXT: v_cmp_gt_i16_e32 vcc, 0, v0 1899; GFX8CHECK-NEXT: v_cmp_gt_i16_e64 s[4:5], s4, v1 1900; GFX8CHECK-NEXT: s_movk_i32 s6, 0xff80 1901; GFX8CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc 1902; GFX8CHECK-NEXT: v_cmp_eq_u16_e32 vcc, s6, v0 1903; GFX8CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc 1904; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 1905; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] 1906; 1907; GFX9CHECK-LABEL: isnegative_bf16: 1908; GFX9CHECK: ; %bb.0: 1909; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1910; GFX9CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0 1911; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f80 1912; GFX9CHECK-NEXT: v_cmp_gt_i16_e32 vcc, 0, v0 1913; GFX9CHECK-NEXT: v_cmp_gt_i16_e64 s[4:5], s4, v1 1914; GFX9CHECK-NEXT: s_movk_i32 s6, 0xff80 1915; GFX9CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc 1916; GFX9CHECK-NEXT: v_cmp_eq_u16_e32 vcc, s6, v0 1917; GFX9CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc 1918; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 1919; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] 1920; 1921; GFX10CHECK-LABEL: isnegative_bf16: 1922; GFX10CHECK: ; %bb.0: 1923; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1924; GFX10CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0 1925; GFX10CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0, v0 1926; GFX10CHECK-NEXT: v_cmp_eq_u16_e64 s5, 0xff80, v0 1927; GFX10CHECK-NEXT: v_cmp_gt_i16_e64 s4, 0x7f80, v1 1928; GFX10CHECK-NEXT: s_and_b32 s4, s4, vcc_lo 1929; GFX10CHECK-NEXT: s_or_b32 s4, s4, s5 1930; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 1931; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] 1932; 1933; GFX11CHECK-LABEL: isnegative_bf16: 1934; GFX11CHECK: ; %bb.0: 1935; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1936; GFX11CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0 1937; GFX11CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0, v0 1938; GFX11CHECK-NEXT: v_cmp_eq_u16_e64 s1, 0xff80, v0 1939; GFX11CHECK-NEXT: v_cmp_gt_i16_e64 s0, 0x7f80, v1 1940; GFX11CHECK-NEXT: s_and_b32 s0, s0, vcc_lo 1941; GFX11CHECK-NEXT: s_or_b32 s0, s0, s1 1942; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 1943; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] 1944 %class = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 60) ; fcNegative 1945 ret i1 %class 1946} 1947 1948define i1 @not_isnegative_bf16(bfloat %x) { 1949; GFX7CHECK-LABEL: not_isnegative_bf16: 1950; GFX7CHECK: ; %bb.0: 1951; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1952; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 1953; GFX7CHECK-NEXT: v_lshrrev_b32_e32 v1, 16, v0 1954; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15 1955; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f80 1956; GFX7CHECK-NEXT: v_cmp_lt_i32_e32 vcc, s4, v0 1957; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f81 1958; GFX7CHECK-NEXT: v_cmp_gt_u32_e64 s[4:5], s4, v1 1959; GFX7CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc 1960; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 1961; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] 1962; 1963; GFX8CHECK-LABEL: not_isnegative_bf16: 1964; GFX8CHECK: ; %bb.0: 1965; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1966; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f81 1967; GFX8CHECK-NEXT: v_cmp_gt_u16_e32 vcc, s4, v0 1968; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 1969; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f80 1970; GFX8CHECK-NEXT: v_cmp_lt_i16_e64 s[4:5], s4, v0 1971; GFX8CHECK-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 1972; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 1973; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] 1974; 1975; GFX9CHECK-LABEL: not_isnegative_bf16: 1976; GFX9CHECK: ; %bb.0: 1977; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1978; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f81 1979; GFX9CHECK-NEXT: v_cmp_gt_u16_e32 vcc, s4, v0 1980; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 1981; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f80 1982; GFX9CHECK-NEXT: v_cmp_lt_i16_e64 s[4:5], s4, v0 1983; GFX9CHECK-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 1984; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 1985; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] 1986; 1987; GFX10CHECK-LABEL: not_isnegative_bf16: 1988; GFX10CHECK: ; %bb.0: 1989; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1990; GFX10CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0 1991; GFX10CHECK-NEXT: v_cmp_gt_u16_e32 vcc_lo, 0x7f81, v0 1992; GFX10CHECK-NEXT: v_cmp_lt_i16_e64 s4, 0x7f80, v1 1993; GFX10CHECK-NEXT: s_or_b32 s4, vcc_lo, s4 1994; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 1995; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] 1996; 1997; GFX11CHECK-LABEL: not_isnegative_bf16: 1998; GFX11CHECK: ; %bb.0: 1999; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2000; GFX11CHECK-NEXT: v_and_b32_e32 v1, 0x7fff, v0 2001; GFX11CHECK-NEXT: v_cmp_gt_u16_e32 vcc_lo, 0x7f81, v0 2002; GFX11CHECK-NEXT: v_cmp_lt_i16_e64 s0, 0x7f80, v1 2003; GFX11CHECK-NEXT: s_or_b32 s0, vcc_lo, s0 2004; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 2005; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] 2006 %class = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 963) ; ~fcNegative 2007 ret i1 %class 2008} 2009 2010define i1 @iszero_or_nan_bf16(bfloat %x) { 2011; GFX7CHECK-LABEL: iszero_or_nan_bf16: 2012; GFX7CHECK: ; %bb.0: ; %entry 2013; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2014; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 2015; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15 2016; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f80 2017; GFX7CHECK-NEXT: v_cmp_lt_i32_e32 vcc, s4, v0 2018; GFX7CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 2019; GFX7CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc 2020; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 2021; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] 2022; 2023; GFX8CHECK-LABEL: iszero_or_nan_bf16: 2024; GFX8CHECK: ; %bb.0: ; %entry 2025; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2026; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2027; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f80 2028; GFX8CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v0 2029; GFX8CHECK-NEXT: v_cmp_eq_u16_e64 s[4:5], 0, v0 2030; GFX8CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc 2031; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 2032; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] 2033; 2034; GFX9CHECK-LABEL: iszero_or_nan_bf16: 2035; GFX9CHECK: ; %bb.0: ; %entry 2036; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2037; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2038; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f80 2039; GFX9CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v0 2040; GFX9CHECK-NEXT: v_cmp_eq_u16_e64 s[4:5], 0, v0 2041; GFX9CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc 2042; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 2043; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] 2044; 2045; GFX10CHECK-LABEL: iszero_or_nan_bf16: 2046; GFX10CHECK: ; %bb.0: ; %entry 2047; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2048; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2049; GFX10CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v0 2050; GFX10CHECK-NEXT: v_cmp_eq_u16_e64 s4, 0, v0 2051; GFX10CHECK-NEXT: s_or_b32 s4, s4, vcc_lo 2052; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 2053; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] 2054; 2055; GFX11CHECK-LABEL: iszero_or_nan_bf16: 2056; GFX11CHECK: ; %bb.0: ; %entry 2057; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2058; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2059; GFX11CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v0 2060; GFX11CHECK-NEXT: v_cmp_eq_u16_e64 s0, 0, v0 2061; GFX11CHECK-NEXT: s_or_b32 s0, s0, vcc_lo 2062; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 2063; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] 2064entry: 2065 %0 = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 99) ; 0x60|0x3 = "zero|nan" 2066 ret i1 %0 2067} 2068 2069define i1 @iszero_or_nan_f_daz(bfloat %x) #0 { 2070; GFX7CHECK-LABEL: iszero_or_nan_f_daz: 2071; GFX7CHECK: ; %bb.0: ; %entry 2072; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2073; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 2074; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15 2075; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f80 2076; GFX7CHECK-NEXT: v_cmp_lt_i32_e32 vcc, s4, v0 2077; GFX7CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 2078; GFX7CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc 2079; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 2080; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] 2081; 2082; GFX8CHECK-LABEL: iszero_or_nan_f_daz: 2083; GFX8CHECK: ; %bb.0: ; %entry 2084; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2085; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2086; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f80 2087; GFX8CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v0 2088; GFX8CHECK-NEXT: v_cmp_eq_u16_e64 s[4:5], 0, v0 2089; GFX8CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc 2090; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 2091; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] 2092; 2093; GFX9CHECK-LABEL: iszero_or_nan_f_daz: 2094; GFX9CHECK: ; %bb.0: ; %entry 2095; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2096; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2097; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f80 2098; GFX9CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v0 2099; GFX9CHECK-NEXT: v_cmp_eq_u16_e64 s[4:5], 0, v0 2100; GFX9CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc 2101; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 2102; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] 2103; 2104; GFX10CHECK-LABEL: iszero_or_nan_f_daz: 2105; GFX10CHECK: ; %bb.0: ; %entry 2106; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2107; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2108; GFX10CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v0 2109; GFX10CHECK-NEXT: v_cmp_eq_u16_e64 s4, 0, v0 2110; GFX10CHECK-NEXT: s_or_b32 s4, s4, vcc_lo 2111; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 2112; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] 2113; 2114; GFX11CHECK-LABEL: iszero_or_nan_f_daz: 2115; GFX11CHECK: ; %bb.0: ; %entry 2116; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2117; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2118; GFX11CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v0 2119; GFX11CHECK-NEXT: v_cmp_eq_u16_e64 s0, 0, v0 2120; GFX11CHECK-NEXT: s_or_b32 s0, s0, vcc_lo 2121; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 2122; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] 2123entry: 2124 %0 = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 99) ; 0x60|0x3 = "zero|nan" 2125 ret i1 %0 2126} 2127 2128define i1 @iszero_or_nan_f_maybe_daz(bfloat %x) #1 { 2129; GFX7CHECK-LABEL: iszero_or_nan_f_maybe_daz: 2130; GFX7CHECK: ; %bb.0: ; %entry 2131; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2132; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 2133; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15 2134; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f80 2135; GFX7CHECK-NEXT: v_cmp_lt_i32_e32 vcc, s4, v0 2136; GFX7CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 2137; GFX7CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc 2138; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 2139; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] 2140; 2141; GFX8CHECK-LABEL: iszero_or_nan_f_maybe_daz: 2142; GFX8CHECK: ; %bb.0: ; %entry 2143; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2144; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2145; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f80 2146; GFX8CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v0 2147; GFX8CHECK-NEXT: v_cmp_eq_u16_e64 s[4:5], 0, v0 2148; GFX8CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc 2149; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 2150; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] 2151; 2152; GFX9CHECK-LABEL: iszero_or_nan_f_maybe_daz: 2153; GFX9CHECK: ; %bb.0: ; %entry 2154; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2155; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2156; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f80 2157; GFX9CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v0 2158; GFX9CHECK-NEXT: v_cmp_eq_u16_e64 s[4:5], 0, v0 2159; GFX9CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc 2160; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 2161; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] 2162; 2163; GFX10CHECK-LABEL: iszero_or_nan_f_maybe_daz: 2164; GFX10CHECK: ; %bb.0: ; %entry 2165; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2166; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2167; GFX10CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v0 2168; GFX10CHECK-NEXT: v_cmp_eq_u16_e64 s4, 0, v0 2169; GFX10CHECK-NEXT: s_or_b32 s4, s4, vcc_lo 2170; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 2171; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] 2172; 2173; GFX11CHECK-LABEL: iszero_or_nan_f_maybe_daz: 2174; GFX11CHECK: ; %bb.0: ; %entry 2175; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2176; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2177; GFX11CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f80, v0 2178; GFX11CHECK-NEXT: v_cmp_eq_u16_e64 s0, 0, v0 2179; GFX11CHECK-NEXT: s_or_b32 s0, s0, vcc_lo 2180; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 2181; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] 2182entry: 2183 %0 = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 99) ; 0x60|0x3 = "zero|nan" 2184 ret i1 %0 2185} 2186 2187define i1 @not_iszero_or_nan_bf16(bfloat %x) { 2188; GFX7CHECK-LABEL: not_iszero_or_nan_bf16: 2189; GFX7CHECK: ; %bb.0: ; %entry 2190; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2191; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 2192; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15 2193; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f81 2194; GFX7CHECK-NEXT: v_cmp_gt_i32_e32 vcc, s4, v0 2195; GFX7CHECK-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v0 2196; GFX7CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc 2197; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 2198; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] 2199; 2200; GFX8CHECK-LABEL: not_iszero_or_nan_bf16: 2201; GFX8CHECK: ; %bb.0: ; %entry 2202; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2203; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2204; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f81 2205; GFX8CHECK-NEXT: v_cmp_gt_i16_e32 vcc, s4, v0 2206; GFX8CHECK-NEXT: v_cmp_ne_u16_e64 s[4:5], 0, v0 2207; GFX8CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc 2208; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 2209; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] 2210; 2211; GFX9CHECK-LABEL: not_iszero_or_nan_bf16: 2212; GFX9CHECK: ; %bb.0: ; %entry 2213; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2214; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2215; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f81 2216; GFX9CHECK-NEXT: v_cmp_gt_i16_e32 vcc, s4, v0 2217; GFX9CHECK-NEXT: v_cmp_ne_u16_e64 s[4:5], 0, v0 2218; GFX9CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc 2219; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 2220; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] 2221; 2222; GFX10CHECK-LABEL: not_iszero_or_nan_bf16: 2223; GFX10CHECK: ; %bb.0: ; %entry 2224; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2225; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2226; GFX10CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0x7f81, v0 2227; GFX10CHECK-NEXT: v_cmp_ne_u16_e64 s4, 0, v0 2228; GFX10CHECK-NEXT: s_and_b32 s4, s4, vcc_lo 2229; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 2230; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] 2231; 2232; GFX11CHECK-LABEL: not_iszero_or_nan_bf16: 2233; GFX11CHECK: ; %bb.0: ; %entry 2234; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2235; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2236; GFX11CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0x7f81, v0 2237; GFX11CHECK-NEXT: v_cmp_ne_u16_e64 s0, 0, v0 2238; GFX11CHECK-NEXT: s_and_b32 s0, s0, vcc_lo 2239; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 2240; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] 2241entry: 2242 %0 = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 924) ; ~0x60 = "~(zero|nan)" 2243 ret i1 %0 2244} 2245 2246define i1 @not_iszero_or_nan_f_daz(bfloat %x) #0 { 2247; GFX7CHECK-LABEL: not_iszero_or_nan_f_daz: 2248; GFX7CHECK: ; %bb.0: ; %entry 2249; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2250; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 2251; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15 2252; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f81 2253; GFX7CHECK-NEXT: v_cmp_gt_i32_e32 vcc, s4, v0 2254; GFX7CHECK-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v0 2255; GFX7CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc 2256; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 2257; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] 2258; 2259; GFX8CHECK-LABEL: not_iszero_or_nan_f_daz: 2260; GFX8CHECK: ; %bb.0: ; %entry 2261; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2262; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2263; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f81 2264; GFX8CHECK-NEXT: v_cmp_gt_i16_e32 vcc, s4, v0 2265; GFX8CHECK-NEXT: v_cmp_ne_u16_e64 s[4:5], 0, v0 2266; GFX8CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc 2267; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 2268; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] 2269; 2270; GFX9CHECK-LABEL: not_iszero_or_nan_f_daz: 2271; GFX9CHECK: ; %bb.0: ; %entry 2272; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2273; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2274; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f81 2275; GFX9CHECK-NEXT: v_cmp_gt_i16_e32 vcc, s4, v0 2276; GFX9CHECK-NEXT: v_cmp_ne_u16_e64 s[4:5], 0, v0 2277; GFX9CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc 2278; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 2279; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] 2280; 2281; GFX10CHECK-LABEL: not_iszero_or_nan_f_daz: 2282; GFX10CHECK: ; %bb.0: ; %entry 2283; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2284; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2285; GFX10CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0x7f81, v0 2286; GFX10CHECK-NEXT: v_cmp_ne_u16_e64 s4, 0, v0 2287; GFX10CHECK-NEXT: s_and_b32 s4, s4, vcc_lo 2288; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 2289; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] 2290; 2291; GFX11CHECK-LABEL: not_iszero_or_nan_f_daz: 2292; GFX11CHECK: ; %bb.0: ; %entry 2293; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2294; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2295; GFX11CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0x7f81, v0 2296; GFX11CHECK-NEXT: v_cmp_ne_u16_e64 s0, 0, v0 2297; GFX11CHECK-NEXT: s_and_b32 s0, s0, vcc_lo 2298; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 2299; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] 2300entry: 2301 %0 = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 924) ; ~(0x60|0x3) = "~(zero|nan)" 2302 ret i1 %0 2303} 2304 2305define i1 @not_iszero_or_nan_f_maybe_daz(bfloat %x) #1 { 2306; GFX7CHECK-LABEL: not_iszero_or_nan_f_maybe_daz: 2307; GFX7CHECK: ; %bb.0: ; %entry 2308; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2309; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 2310; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15 2311; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f81 2312; GFX7CHECK-NEXT: v_cmp_gt_i32_e32 vcc, s4, v0 2313; GFX7CHECK-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v0 2314; GFX7CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc 2315; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 2316; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] 2317; 2318; GFX8CHECK-LABEL: not_iszero_or_nan_f_maybe_daz: 2319; GFX8CHECK: ; %bb.0: ; %entry 2320; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2321; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2322; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f81 2323; GFX8CHECK-NEXT: v_cmp_gt_i16_e32 vcc, s4, v0 2324; GFX8CHECK-NEXT: v_cmp_ne_u16_e64 s[4:5], 0, v0 2325; GFX8CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc 2326; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 2327; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] 2328; 2329; GFX9CHECK-LABEL: not_iszero_or_nan_f_maybe_daz: 2330; GFX9CHECK: ; %bb.0: ; %entry 2331; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2332; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2333; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f81 2334; GFX9CHECK-NEXT: v_cmp_gt_i16_e32 vcc, s4, v0 2335; GFX9CHECK-NEXT: v_cmp_ne_u16_e64 s[4:5], 0, v0 2336; GFX9CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc 2337; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 2338; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] 2339; 2340; GFX10CHECK-LABEL: not_iszero_or_nan_f_maybe_daz: 2341; GFX10CHECK: ; %bb.0: ; %entry 2342; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2343; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2344; GFX10CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0x7f81, v0 2345; GFX10CHECK-NEXT: v_cmp_ne_u16_e64 s4, 0, v0 2346; GFX10CHECK-NEXT: s_and_b32 s4, s4, vcc_lo 2347; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 2348; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] 2349; 2350; GFX11CHECK-LABEL: not_iszero_or_nan_f_maybe_daz: 2351; GFX11CHECK: ; %bb.0: ; %entry 2352; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2353; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2354; GFX11CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0x7f81, v0 2355; GFX11CHECK-NEXT: v_cmp_ne_u16_e64 s0, 0, v0 2356; GFX11CHECK-NEXT: s_and_b32 s0, s0, vcc_lo 2357; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 2358; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] 2359entry: 2360 %0 = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 924) ; ~(0x60|0x3) = "~(zero|nan)" 2361 ret i1 %0 2362} 2363 2364define i1 @iszero_or_qnan_bf16(bfloat %x) { 2365; GFX7CHECK-LABEL: iszero_or_qnan_bf16: 2366; GFX7CHECK: ; %bb.0: ; %entry 2367; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2368; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 2369; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15 2370; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7fbf 2371; GFX7CHECK-NEXT: v_cmp_lt_i32_e32 vcc, s4, v0 2372; GFX7CHECK-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0 2373; GFX7CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc 2374; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 2375; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] 2376; 2377; GFX8CHECK-LABEL: iszero_or_qnan_bf16: 2378; GFX8CHECK: ; %bb.0: ; %entry 2379; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2380; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2381; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7fbf 2382; GFX8CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v0 2383; GFX8CHECK-NEXT: v_cmp_eq_u16_e64 s[4:5], 0, v0 2384; GFX8CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc 2385; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 2386; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] 2387; 2388; GFX9CHECK-LABEL: iszero_or_qnan_bf16: 2389; GFX9CHECK: ; %bb.0: ; %entry 2390; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2391; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2392; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7fbf 2393; GFX9CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v0 2394; GFX9CHECK-NEXT: v_cmp_eq_u16_e64 s[4:5], 0, v0 2395; GFX9CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc 2396; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 2397; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] 2398; 2399; GFX10CHECK-LABEL: iszero_or_qnan_bf16: 2400; GFX10CHECK: ; %bb.0: ; %entry 2401; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2402; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2403; GFX10CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7fbf, v0 2404; GFX10CHECK-NEXT: v_cmp_eq_u16_e64 s4, 0, v0 2405; GFX10CHECK-NEXT: s_or_b32 s4, s4, vcc_lo 2406; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 2407; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] 2408; 2409; GFX11CHECK-LABEL: iszero_or_qnan_bf16: 2410; GFX11CHECK: ; %bb.0: ; %entry 2411; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2412; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2413; GFX11CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7fbf, v0 2414; GFX11CHECK-NEXT: v_cmp_eq_u16_e64 s0, 0, v0 2415; GFX11CHECK-NEXT: s_or_b32 s0, s0, vcc_lo 2416; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 2417; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] 2418entry: 2419 %0 = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 98) ; 0x60|0x2 = "zero|qnan" 2420 ret i1 %0 2421} 2422 2423define i1 @iszero_or_snan_bf16(bfloat %x) { 2424; GFX7CHECK-LABEL: iszero_or_snan_bf16: 2425; GFX7CHECK: ; %bb.0: ; %entry 2426; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2427; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 2428; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15 2429; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7fc0 2430; GFX7CHECK-NEXT: v_cmp_gt_i32_e32 vcc, s4, v0 2431; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f80 2432; GFX7CHECK-NEXT: v_cmp_lt_i32_e64 s[4:5], s4, v0 2433; GFX7CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc 2434; GFX7CHECK-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 2435; GFX7CHECK-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 2436; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 2437; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] 2438; 2439; GFX8CHECK-LABEL: iszero_or_snan_bf16: 2440; GFX8CHECK: ; %bb.0: ; %entry 2441; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2442; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2443; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7fc0 2444; GFX8CHECK-NEXT: v_cmp_gt_i16_e32 vcc, s4, v0 2445; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f80 2446; GFX8CHECK-NEXT: v_cmp_lt_i16_e64 s[4:5], s4, v0 2447; GFX8CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc 2448; GFX8CHECK-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0 2449; GFX8CHECK-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 2450; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 2451; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] 2452; 2453; GFX9CHECK-LABEL: iszero_or_snan_bf16: 2454; GFX9CHECK: ; %bb.0: ; %entry 2455; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2456; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2457; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7fc0 2458; GFX9CHECK-NEXT: v_cmp_gt_i16_e32 vcc, s4, v0 2459; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f80 2460; GFX9CHECK-NEXT: v_cmp_lt_i16_e64 s[4:5], s4, v0 2461; GFX9CHECK-NEXT: s_and_b64 s[4:5], s[4:5], vcc 2462; GFX9CHECK-NEXT: v_cmp_eq_u16_e32 vcc, 0, v0 2463; GFX9CHECK-NEXT: s_or_b64 s[4:5], vcc, s[4:5] 2464; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 2465; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] 2466; 2467; GFX10CHECK-LABEL: iszero_or_snan_bf16: 2468; GFX10CHECK: ; %bb.0: ; %entry 2469; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2470; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2471; GFX10CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0x7fc0, v0 2472; GFX10CHECK-NEXT: v_cmp_lt_i16_e64 s4, 0x7f80, v0 2473; GFX10CHECK-NEXT: v_cmp_eq_u16_e64 s5, 0, v0 2474; GFX10CHECK-NEXT: s_and_b32 s4, s4, vcc_lo 2475; GFX10CHECK-NEXT: s_or_b32 s4, s5, s4 2476; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 2477; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] 2478; 2479; GFX11CHECK-LABEL: iszero_or_snan_bf16: 2480; GFX11CHECK: ; %bb.0: ; %entry 2481; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2482; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2483; GFX11CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0x7fc0, v0 2484; GFX11CHECK-NEXT: v_cmp_lt_i16_e64 s0, 0x7f80, v0 2485; GFX11CHECK-NEXT: v_cmp_eq_u16_e64 s1, 0, v0 2486; GFX11CHECK-NEXT: s_and_b32 s0, s0, vcc_lo 2487; GFX11CHECK-NEXT: s_or_b32 s0, s1, s0 2488; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 2489; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] 2490entry: 2491 %0 = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 97) ; 0x60|0x1 = "zero|snan" 2492 ret i1 %0 2493} 2494 2495define i1 @not_iszero_or_qnan_bf16(bfloat %x) { 2496; GFX7CHECK-LABEL: not_iszero_or_qnan_bf16: 2497; GFX7CHECK: ; %bb.0: ; %entry 2498; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2499; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 2500; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15 2501; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7fc0 2502; GFX7CHECK-NEXT: s_movk_i32 s8, 0x7f80 2503; GFX7CHECK-NEXT: v_cmp_gt_i32_e32 vcc, s4, v0 2504; GFX7CHECK-NEXT: v_cmp_lt_i32_e64 s[4:5], s8, v0 2505; GFX7CHECK-NEXT: s_and_b64 s[6:7], s[4:5], vcc 2506; GFX7CHECK-NEXT: v_add_i32_e64 v1, s[4:5], -1, v0 2507; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f 2508; GFX7CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s8, v0 2509; GFX7CHECK-NEXT: v_cmp_gt_u32_e64 s[4:5], s4, v1 2510; GFX7CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc 2511; GFX7CHECK-NEXT: v_add_i32_e32 v0, vcc, 0xffffff80, v0 2512; GFX7CHECK-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7] 2513; GFX7CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0 2514; GFX7CHECK-NEXT: s_movk_i32 s6, 0x7f00 2515; GFX7CHECK-NEXT: v_cmp_gt_u32_e32 vcc, s6, v0 2516; GFX7CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc 2517; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 2518; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] 2519; 2520; GFX8CHECK-LABEL: not_iszero_or_qnan_bf16: 2521; GFX8CHECK: ; %bb.0: ; %entry 2522; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2523; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2524; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7fc0 2525; GFX8CHECK-NEXT: s_movk_i32 s8, 0x7f80 2526; GFX8CHECK-NEXT: v_cmp_gt_i16_e32 vcc, s4, v0 2527; GFX8CHECK-NEXT: v_cmp_lt_i16_e64 s[4:5], s8, v0 2528; GFX8CHECK-NEXT: s_and_b64 s[6:7], s[4:5], vcc 2529; GFX8CHECK-NEXT: v_add_u16_e32 v1, -1, v0 2530; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f 2531; GFX8CHECK-NEXT: v_cmp_eq_u16_e32 vcc, s8, v0 2532; GFX8CHECK-NEXT: v_cmp_gt_u16_e64 s[4:5], s4, v1 2533; GFX8CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc 2534; GFX8CHECK-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7] 2535; GFX8CHECK-NEXT: v_add_u16_e32 v0, 0xff80, v0 2536; GFX8CHECK-NEXT: s_movk_i32 s6, 0x7f00 2537; GFX8CHECK-NEXT: v_cmp_gt_u16_e32 vcc, s6, v0 2538; GFX8CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc 2539; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 2540; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] 2541; 2542; GFX9CHECK-LABEL: not_iszero_or_qnan_bf16: 2543; GFX9CHECK: ; %bb.0: ; %entry 2544; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2545; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2546; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7fc0 2547; GFX9CHECK-NEXT: s_movk_i32 s8, 0x7f80 2548; GFX9CHECK-NEXT: v_cmp_gt_i16_e32 vcc, s4, v0 2549; GFX9CHECK-NEXT: v_cmp_lt_i16_e64 s[4:5], s8, v0 2550; GFX9CHECK-NEXT: s_and_b64 s[6:7], s[4:5], vcc 2551; GFX9CHECK-NEXT: v_add_u16_e32 v1, -1, v0 2552; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f 2553; GFX9CHECK-NEXT: v_cmp_eq_u16_e32 vcc, s8, v0 2554; GFX9CHECK-NEXT: v_cmp_gt_u16_e64 s[4:5], s4, v1 2555; GFX9CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc 2556; GFX9CHECK-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7] 2557; GFX9CHECK-NEXT: v_add_u16_e32 v0, 0xff80, v0 2558; GFX9CHECK-NEXT: s_movk_i32 s6, 0x7f00 2559; GFX9CHECK-NEXT: v_cmp_gt_u16_e32 vcc, s6, v0 2560; GFX9CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc 2561; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 2562; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] 2563; 2564; GFX10CHECK-LABEL: not_iszero_or_qnan_bf16: 2565; GFX10CHECK: ; %bb.0: ; %entry 2566; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2567; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2568; GFX10CHECK-NEXT: v_add_nc_u16 v1, v0, -1 2569; GFX10CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0x7fc0, v0 2570; GFX10CHECK-NEXT: v_cmp_lt_i16_e64 s4, 0x7f80, v0 2571; GFX10CHECK-NEXT: v_cmp_eq_u16_e64 s5, 0x7f80, v0 2572; GFX10CHECK-NEXT: v_add_nc_u16 v0, 0xff80, v0 2573; GFX10CHECK-NEXT: v_cmp_gt_u16_e64 s6, 0x7f, v1 2574; GFX10CHECK-NEXT: s_and_b32 s4, s4, vcc_lo 2575; GFX10CHECK-NEXT: v_cmp_gt_u16_e32 vcc_lo, 0x7f00, v0 2576; GFX10CHECK-NEXT: s_or_b32 s5, s6, s5 2577; GFX10CHECK-NEXT: s_or_b32 s4, s5, s4 2578; GFX10CHECK-NEXT: s_or_b32 s4, s4, vcc_lo 2579; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 2580; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] 2581; 2582; GFX11CHECK-LABEL: not_iszero_or_qnan_bf16: 2583; GFX11CHECK: ; %bb.0: ; %entry 2584; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2585; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2586; GFX11CHECK-NEXT: v_add_nc_u16 v1, v0, -1 2587; GFX11CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0x7fc0, v0 2588; GFX11CHECK-NEXT: v_cmp_lt_i16_e64 s0, 0x7f80, v0 2589; GFX11CHECK-NEXT: v_cmp_eq_u16_e64 s1, 0x7f80, v0 2590; GFX11CHECK-NEXT: v_add_nc_u16 v0, 0xff80, v0 2591; GFX11CHECK-NEXT: v_cmp_gt_u16_e64 s2, 0x7f, v1 2592; GFX11CHECK-NEXT: s_and_b32 s0, s0, vcc_lo 2593; GFX11CHECK-NEXT: v_cmp_gt_u16_e32 vcc_lo, 0x7f00, v0 2594; GFX11CHECK-NEXT: s_or_b32 s1, s2, s1 2595; GFX11CHECK-NEXT: s_or_b32 s0, s1, s0 2596; GFX11CHECK-NEXT: s_or_b32 s0, s0, vcc_lo 2597; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 2598; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] 2599entry: 2600 %0 = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 925) ; ~(0x60|0x2) = "~(zero|qnan)" 2601 ret i1 %0 2602} 2603 2604define i1 @not_iszero_or_snan_bf16(bfloat %x) { 2605; GFX7CHECK-LABEL: not_iszero_or_snan_bf16: 2606; GFX7CHECK: ; %bb.0: ; %entry 2607; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2608; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 2609; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15 2610; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f80 2611; GFX7CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s4, v0 2612; GFX7CHECK-NEXT: v_add_i32_e64 v1, s[4:5], -1, v0 2613; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f 2614; GFX7CHECK-NEXT: v_cmp_gt_u32_e64 s[4:5], s4, v1 2615; GFX7CHECK-NEXT: s_movk_i32 s6, 0x7fbf 2616; GFX7CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc 2617; GFX7CHECK-NEXT: v_cmp_lt_i32_e32 vcc, s6, v0 2618; GFX7CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc 2619; GFX7CHECK-NEXT: v_add_i32_e32 v0, vcc, 0xffffff80, v0 2620; GFX7CHECK-NEXT: v_and_b32_e32 v0, 0xffff, v0 2621; GFX7CHECK-NEXT: s_movk_i32 s6, 0x7f00 2622; GFX7CHECK-NEXT: v_cmp_gt_u32_e32 vcc, s6, v0 2623; GFX7CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc 2624; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 2625; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] 2626; 2627; GFX8CHECK-LABEL: not_iszero_or_snan_bf16: 2628; GFX8CHECK: ; %bb.0: ; %entry 2629; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2630; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2631; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f80 2632; GFX8CHECK-NEXT: v_cmp_eq_u16_e32 vcc, s4, v0 2633; GFX8CHECK-NEXT: v_add_u16_e32 v1, -1, v0 2634; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f 2635; GFX8CHECK-NEXT: v_cmp_gt_u16_e64 s[4:5], s4, v1 2636; GFX8CHECK-NEXT: s_movk_i32 s6, 0x7fbf 2637; GFX8CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc 2638; GFX8CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s6, v0 2639; GFX8CHECK-NEXT: v_add_u16_e32 v0, 0xff80, v0 2640; GFX8CHECK-NEXT: s_movk_i32 s6, 0x7f00 2641; GFX8CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc 2642; GFX8CHECK-NEXT: v_cmp_gt_u16_e32 vcc, s6, v0 2643; GFX8CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc 2644; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 2645; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] 2646; 2647; GFX9CHECK-LABEL: not_iszero_or_snan_bf16: 2648; GFX9CHECK: ; %bb.0: ; %entry 2649; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2650; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2651; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f80 2652; GFX9CHECK-NEXT: v_cmp_eq_u16_e32 vcc, s4, v0 2653; GFX9CHECK-NEXT: v_add_u16_e32 v1, -1, v0 2654; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f 2655; GFX9CHECK-NEXT: v_cmp_gt_u16_e64 s[4:5], s4, v1 2656; GFX9CHECK-NEXT: s_movk_i32 s6, 0x7fbf 2657; GFX9CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc 2658; GFX9CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s6, v0 2659; GFX9CHECK-NEXT: v_add_u16_e32 v0, 0xff80, v0 2660; GFX9CHECK-NEXT: s_movk_i32 s6, 0x7f00 2661; GFX9CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc 2662; GFX9CHECK-NEXT: v_cmp_gt_u16_e32 vcc, s6, v0 2663; GFX9CHECK-NEXT: s_or_b64 s[4:5], s[4:5], vcc 2664; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 2665; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] 2666; 2667; GFX10CHECK-LABEL: not_iszero_or_snan_bf16: 2668; GFX10CHECK: ; %bb.0: ; %entry 2669; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2670; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2671; GFX10CHECK-NEXT: v_add_nc_u16 v1, v0, -1 2672; GFX10CHECK-NEXT: v_add_nc_u16 v2, 0xff80, v0 2673; GFX10CHECK-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x7f80, v0 2674; GFX10CHECK-NEXT: v_cmp_lt_i16_e64 s5, 0x7fbf, v0 2675; GFX10CHECK-NEXT: v_cmp_gt_u16_e64 s4, 0x7f, v1 2676; GFX10CHECK-NEXT: v_cmp_gt_u16_e64 s6, 0x7f00, v2 2677; GFX10CHECK-NEXT: s_or_b32 s4, s4, vcc_lo 2678; GFX10CHECK-NEXT: s_or_b32 s4, s4, s5 2679; GFX10CHECK-NEXT: s_or_b32 s4, s4, s6 2680; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s4 2681; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] 2682; 2683; GFX11CHECK-LABEL: not_iszero_or_snan_bf16: 2684; GFX11CHECK: ; %bb.0: ; %entry 2685; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2686; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2687; GFX11CHECK-NEXT: v_add_nc_u16 v1, v0, -1 2688; GFX11CHECK-NEXT: v_add_nc_u16 v2, 0xff80, v0 2689; GFX11CHECK-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x7f80, v0 2690; GFX11CHECK-NEXT: v_cmp_lt_i16_e64 s1, 0x7fbf, v0 2691; GFX11CHECK-NEXT: v_cmp_gt_u16_e64 s0, 0x7f, v1 2692; GFX11CHECK-NEXT: v_cmp_gt_u16_e64 s2, 0x7f00, v2 2693; GFX11CHECK-NEXT: s_or_b32 s0, s0, vcc_lo 2694; GFX11CHECK-NEXT: s_or_b32 s0, s0, s1 2695; GFX11CHECK-NEXT: s_or_b32 s0, s0, s2 2696; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 2697; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] 2698entry: 2699 %0 = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 926) ; ~(0x60|0x1) = "~(zero|snan)" 2700 ret i1 %0 2701} 2702 2703define i1 @isinf_or_nan_bf16(bfloat %x) { 2704; GFX7CHECK-LABEL: isinf_or_nan_bf16: 2705; GFX7CHECK: ; %bb.0: ; %entry 2706; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2707; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 2708; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15 2709; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f7f 2710; GFX7CHECK-NEXT: v_cmp_lt_i32_e32 vcc, s4, v0 2711; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 2712; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] 2713; 2714; GFX8CHECK-LABEL: isinf_or_nan_bf16: 2715; GFX8CHECK: ; %bb.0: ; %entry 2716; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2717; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2718; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f7f 2719; GFX8CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v0 2720; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 2721; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] 2722; 2723; GFX9CHECK-LABEL: isinf_or_nan_bf16: 2724; GFX9CHECK: ; %bb.0: ; %entry 2725; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2726; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2727; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f7f 2728; GFX9CHECK-NEXT: v_cmp_lt_i16_e32 vcc, s4, v0 2729; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 2730; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] 2731; 2732; GFX10CHECK-LABEL: isinf_or_nan_bf16: 2733; GFX10CHECK: ; %bb.0: ; %entry 2734; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2735; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2736; GFX10CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f7f, v0 2737; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 2738; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] 2739; 2740; GFX11CHECK-LABEL: isinf_or_nan_bf16: 2741; GFX11CHECK: ; %bb.0: ; %entry 2742; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2743; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2744; GFX11CHECK-NEXT: v_cmp_lt_i16_e32 vcc_lo, 0x7f7f, v0 2745; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 2746; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] 2747entry: 2748 %0 = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 519) ; 0x204|0x3 = "inf|nan" 2749 ret i1 %0 2750} 2751 2752define i1 @not_isinf_or_nan_bf16(bfloat %x) { 2753; GFX7CHECK-LABEL: not_isinf_or_nan_bf16: 2754; GFX7CHECK: ; %bb.0: ; %entry 2755; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2756; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 2757; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15 2758; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f80 2759; GFX7CHECK-NEXT: v_cmp_gt_i32_e32 vcc, s4, v0 2760; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 2761; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] 2762; 2763; GFX8CHECK-LABEL: not_isinf_or_nan_bf16: 2764; GFX8CHECK: ; %bb.0: ; %entry 2765; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2766; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2767; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f80 2768; GFX8CHECK-NEXT: v_cmp_gt_i16_e32 vcc, s4, v0 2769; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 2770; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] 2771; 2772; GFX9CHECK-LABEL: not_isinf_or_nan_bf16: 2773; GFX9CHECK: ; %bb.0: ; %entry 2774; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2775; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2776; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f80 2777; GFX9CHECK-NEXT: v_cmp_gt_i16_e32 vcc, s4, v0 2778; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 2779; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] 2780; 2781; GFX10CHECK-LABEL: not_isinf_or_nan_bf16: 2782; GFX10CHECK: ; %bb.0: ; %entry 2783; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2784; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2785; GFX10CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0x7f80, v0 2786; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 2787; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] 2788; 2789; GFX11CHECK-LABEL: not_isinf_or_nan_bf16: 2790; GFX11CHECK: ; %bb.0: ; %entry 2791; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2792; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2793; GFX11CHECK-NEXT: v_cmp_gt_i16_e32 vcc_lo, 0x7f80, v0 2794; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 2795; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] 2796entry: 2797 %0 = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 504) ; ~(0x204|0x3) = "~(inf|nan)" 2798 ret i1 %0 2799} 2800 2801define i1 @isfinite_or_nan_f(bfloat %x) { 2802; GFX7CHECK-LABEL: isfinite_or_nan_f: 2803; GFX7CHECK: ; %bb.0: ; %entry 2804; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2805; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 2806; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15 2807; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f80 2808; GFX7CHECK-NEXT: v_cmp_ne_u32_e32 vcc, s4, v0 2809; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 2810; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] 2811; 2812; GFX8CHECK-LABEL: isfinite_or_nan_f: 2813; GFX8CHECK: ; %bb.0: ; %entry 2814; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2815; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2816; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f80 2817; GFX8CHECK-NEXT: v_cmp_ne_u16_e32 vcc, s4, v0 2818; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 2819; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] 2820; 2821; GFX9CHECK-LABEL: isfinite_or_nan_f: 2822; GFX9CHECK: ; %bb.0: ; %entry 2823; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2824; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2825; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f80 2826; GFX9CHECK-NEXT: v_cmp_ne_u16_e32 vcc, s4, v0 2827; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 2828; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] 2829; 2830; GFX10CHECK-LABEL: isfinite_or_nan_f: 2831; GFX10CHECK: ; %bb.0: ; %entry 2832; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2833; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2834; GFX10CHECK-NEXT: v_cmp_ne_u16_e32 vcc_lo, 0x7f80, v0 2835; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 2836; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] 2837; 2838; GFX11CHECK-LABEL: isfinite_or_nan_f: 2839; GFX11CHECK: ; %bb.0: ; %entry 2840; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2841; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2842; GFX11CHECK-NEXT: v_cmp_ne_u16_e32 vcc_lo, 0x7f80, v0 2843; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 2844; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] 2845entry: 2846 %0 = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 507) ; 0x1f8|0x3 = "finite|nan" 2847 ret i1 %0 2848} 2849 2850define i1 @not_isfinite_or_nan_f(bfloat %x) { 2851; GFX7CHECK-LABEL: not_isfinite_or_nan_f: 2852; GFX7CHECK: ; %bb.0: ; %entry 2853; GFX7CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2854; GFX7CHECK-NEXT: v_mul_f32_e32 v0, 1.0, v0 2855; GFX7CHECK-NEXT: v_bfe_u32 v0, v0, 16, 15 2856; GFX7CHECK-NEXT: s_movk_i32 s4, 0x7f80 2857; GFX7CHECK-NEXT: v_cmp_eq_u32_e32 vcc, s4, v0 2858; GFX7CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 2859; GFX7CHECK-NEXT: s_setpc_b64 s[30:31] 2860; 2861; GFX8CHECK-LABEL: not_isfinite_or_nan_f: 2862; GFX8CHECK: ; %bb.0: ; %entry 2863; GFX8CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2864; GFX8CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2865; GFX8CHECK-NEXT: s_movk_i32 s4, 0x7f80 2866; GFX8CHECK-NEXT: v_cmp_eq_u16_e32 vcc, s4, v0 2867; GFX8CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 2868; GFX8CHECK-NEXT: s_setpc_b64 s[30:31] 2869; 2870; GFX9CHECK-LABEL: not_isfinite_or_nan_f: 2871; GFX9CHECK: ; %bb.0: ; %entry 2872; GFX9CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2873; GFX9CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2874; GFX9CHECK-NEXT: s_movk_i32 s4, 0x7f80 2875; GFX9CHECK-NEXT: v_cmp_eq_u16_e32 vcc, s4, v0 2876; GFX9CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 2877; GFX9CHECK-NEXT: s_setpc_b64 s[30:31] 2878; 2879; GFX10CHECK-LABEL: not_isfinite_or_nan_f: 2880; GFX10CHECK: ; %bb.0: ; %entry 2881; GFX10CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2882; GFX10CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2883; GFX10CHECK-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x7f80, v0 2884; GFX10CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 2885; GFX10CHECK-NEXT: s_setpc_b64 s[30:31] 2886; 2887; GFX11CHECK-LABEL: not_isfinite_or_nan_f: 2888; GFX11CHECK: ; %bb.0: ; %entry 2889; GFX11CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2890; GFX11CHECK-NEXT: v_and_b32_e32 v0, 0x7fff, v0 2891; GFX11CHECK-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0x7f80, v0 2892; GFX11CHECK-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 2893; GFX11CHECK-NEXT: s_setpc_b64 s[30:31] 2894entry: 2895 %0 = tail call i1 @llvm.is.fpclass.bf16(bfloat %x, i32 516) ; ~(0x1f8|0x3) = "~(finite|nan)" 2896 ret i1 %0 2897} 2898 2899declare i1 @llvm.is.fpclass.bf16(bfloat, i32) 2900declare <2 x i1> @llvm.is.fpclass.v2bf16(<2 x bfloat>, i32) 2901declare <3 x i1> @llvm.is.fpclass.v3bf16(<3 x bfloat>, i32) 2902declare <4 x i1> @llvm.is.fpclass.v4bf16(<4 x bfloat>, i32) 2903 2904; Assume DAZ 2905attributes #0 = { "denormal-fp-math"="ieee,preserve-sign" } 2906 2907; Maybe daz 2908attributes #1 = { "denormal-fp-math"="ieee,dynamic" } 2909;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 2910; GFX10SELDAG: {{.*}} 2911; GFX11SELDAG: {{.*}} 2912; GFX7SELDAG: {{.*}} 2913; GFX8SELDAG: {{.*}} 2914; GFX9SELDAG: {{.*}} 2915