1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 2; RUN: llc -mtriple=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=SI %s 3; RUN: llc -mtriple=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=VI %s 4; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GFX11 %s 5 6declare float @llvm.fabs.f32(float) #1 7declare double @llvm.fabs.f64(double) #1 8 9define amdgpu_kernel void @test_isinf_pattern(ptr addrspace(1) nocapture %out, float %x) #0 { 10; SI-LABEL: test_isinf_pattern: 11; SI: ; %bb.0: 12; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 13; SI-NEXT: s_load_dword s4, s[4:5], 0xb 14; SI-NEXT: s_mov_b32 s3, 0xf000 15; SI-NEXT: s_mov_b32 s2, -1 16; SI-NEXT: v_mov_b32_e32 v0, 0x204 17; SI-NEXT: s_waitcnt lgkmcnt(0) 18; SI-NEXT: v_cmp_class_f32_e32 vcc, s4, v0 19; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 20; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 21; SI-NEXT: s_endpgm 22; 23; VI-LABEL: test_isinf_pattern: 24; VI: ; %bb.0: 25; VI-NEXT: s_load_dword s2, s[4:5], 0x2c 26; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 27; VI-NEXT: v_mov_b32_e32 v0, 0x204 28; VI-NEXT: s_waitcnt lgkmcnt(0) 29; VI-NEXT: v_cmp_class_f32_e32 vcc, s2, v0 30; VI-NEXT: v_mov_b32_e32 v0, s0 31; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 32; VI-NEXT: v_mov_b32_e32 v1, s1 33; VI-NEXT: flat_store_dword v[0:1], v2 34; VI-NEXT: s_endpgm 35; 36; GFX11-LABEL: test_isinf_pattern: 37; GFX11: ; %bb.0: 38; GFX11-NEXT: s_clause 0x1 39; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 40; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 41; GFX11-NEXT: v_mov_b32_e32 v0, 0 42; GFX11-NEXT: s_waitcnt lgkmcnt(0) 43; GFX11-NEXT: v_cmp_class_f32_e64 s2, s2, 0x204 44; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 45; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2 46; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 47; GFX11-NEXT: s_endpgm 48 %fabs = tail call float @llvm.fabs.f32(float %x) #1 49 %cmp = fcmp oeq float %fabs, 0x7FF0000000000000 50 %ext = zext i1 %cmp to i32 51 store i32 %ext, ptr addrspace(1) %out, align 4 52 ret void 53} 54 55define amdgpu_kernel void @test_not_isinf_pattern_0(ptr addrspace(1) nocapture %out, float %x) #0 { 56; SI-LABEL: test_not_isinf_pattern_0: 57; SI: ; %bb.0: 58; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 59; SI-NEXT: s_load_dword s4, s[4:5], 0xb 60; SI-NEXT: s_mov_b32 s3, 0xf000 61; SI-NEXT: s_mov_b32 s2, -1 62; SI-NEXT: v_mov_b32_e32 v0, 0x7f800000 63; SI-NEXT: s_waitcnt lgkmcnt(0) 64; SI-NEXT: v_cmp_nlg_f32_e64 s[4:5], |s4|, v0 65; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 66; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 67; SI-NEXT: s_endpgm 68; 69; VI-LABEL: test_not_isinf_pattern_0: 70; VI: ; %bb.0: 71; VI-NEXT: s_load_dword s2, s[4:5], 0x2c 72; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 73; VI-NEXT: v_mov_b32_e32 v0, 0x7f800000 74; VI-NEXT: s_waitcnt lgkmcnt(0) 75; VI-NEXT: v_cmp_nlg_f32_e64 s[2:3], |s2|, v0 76; VI-NEXT: v_mov_b32_e32 v0, s0 77; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[2:3] 78; VI-NEXT: v_mov_b32_e32 v1, s1 79; VI-NEXT: flat_store_dword v[0:1], v2 80; VI-NEXT: s_endpgm 81; 82; GFX11-LABEL: test_not_isinf_pattern_0: 83; GFX11: ; %bb.0: 84; GFX11-NEXT: s_clause 0x1 85; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 86; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 87; GFX11-NEXT: v_mov_b32_e32 v0, 0 88; GFX11-NEXT: s_waitcnt lgkmcnt(0) 89; GFX11-NEXT: v_cmp_nlg_f32_e64 s2, 0x7f800000, |s2| 90; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 91; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2 92; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 93; GFX11-NEXT: s_endpgm 94 %fabs = tail call float @llvm.fabs.f32(float %x) #1 95 %cmp = fcmp ueq float %fabs, 0x7FF0000000000000 96 %ext = zext i1 %cmp to i32 97 store i32 %ext, ptr addrspace(1) %out, align 4 98 ret void 99} 100 101define amdgpu_kernel void @test_not_isinf_pattern_1(ptr addrspace(1) nocapture %out, float %x) #0 { 102; SI-LABEL: test_not_isinf_pattern_1: 103; SI: ; %bb.0: 104; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 105; SI-NEXT: s_mov_b32 s3, 0xf000 106; SI-NEXT: s_mov_b32 s2, -1 107; SI-NEXT: v_mov_b32_e32 v0, 0 108; SI-NEXT: s_waitcnt lgkmcnt(0) 109; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 110; SI-NEXT: s_endpgm 111; 112; VI-LABEL: test_not_isinf_pattern_1: 113; VI: ; %bb.0: 114; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 115; VI-NEXT: v_mov_b32_e32 v2, 0 116; VI-NEXT: s_waitcnt lgkmcnt(0) 117; VI-NEXT: v_mov_b32_e32 v0, s0 118; VI-NEXT: v_mov_b32_e32 v1, s1 119; VI-NEXT: flat_store_dword v[0:1], v2 120; VI-NEXT: s_endpgm 121; 122; GFX11-LABEL: test_not_isinf_pattern_1: 123; GFX11: ; %bb.0: 124; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 125; GFX11-NEXT: v_mov_b32_e32 v0, 0 126; GFX11-NEXT: s_waitcnt lgkmcnt(0) 127; GFX11-NEXT: global_store_b32 v0, v0, s[0:1] 128; GFX11-NEXT: s_endpgm 129 %fabs = tail call float @llvm.fabs.f32(float %x) #1 130 %cmp = fcmp oeq float %fabs, 0xFFF0000000000000 131 %ext = zext i1 %cmp to i32 132 store i32 %ext, ptr addrspace(1) %out, align 4 133 ret void 134} 135 136define amdgpu_kernel void @test_isfinite_pattern_0(ptr addrspace(1) nocapture %out, float %x) #0 { 137; SI-LABEL: test_isfinite_pattern_0: 138; SI: ; %bb.0: 139; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 140; SI-NEXT: s_load_dword s4, s[4:5], 0xb 141; SI-NEXT: s_mov_b32 s3, 0xf000 142; SI-NEXT: s_mov_b32 s2, -1 143; SI-NEXT: v_mov_b32_e32 v0, 0x1f8 144; SI-NEXT: s_waitcnt lgkmcnt(0) 145; SI-NEXT: v_cmp_class_f32_e32 vcc, s4, v0 146; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 147; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 148; SI-NEXT: s_endpgm 149; 150; VI-LABEL: test_isfinite_pattern_0: 151; VI: ; %bb.0: 152; VI-NEXT: s_load_dword s2, s[4:5], 0x2c 153; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 154; VI-NEXT: v_mov_b32_e32 v0, 0x1f8 155; VI-NEXT: s_waitcnt lgkmcnt(0) 156; VI-NEXT: v_cmp_class_f32_e32 vcc, s2, v0 157; VI-NEXT: v_mov_b32_e32 v0, s0 158; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 159; VI-NEXT: v_mov_b32_e32 v1, s1 160; VI-NEXT: flat_store_dword v[0:1], v2 161; VI-NEXT: s_endpgm 162; 163; GFX11-LABEL: test_isfinite_pattern_0: 164; GFX11: ; %bb.0: 165; GFX11-NEXT: s_clause 0x1 166; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 167; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 168; GFX11-NEXT: v_mov_b32_e32 v0, 0 169; GFX11-NEXT: s_waitcnt lgkmcnt(0) 170; GFX11-NEXT: v_cmp_class_f32_e64 s2, s2, 0x1f8 171; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 172; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2 173; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 174; GFX11-NEXT: s_endpgm 175 %ord = fcmp ord float %x, 0.000000e+00 176 %x.fabs = tail call float @llvm.fabs.f32(float %x) #1 177 %ninf = fcmp une float %x.fabs, 0x7FF0000000000000 178 %and = and i1 %ord, %ninf 179 %ext = zext i1 %and to i32 180 store i32 %ext, ptr addrspace(1) %out, align 4 181 ret void 182} 183 184define amdgpu_kernel void @test_isfinite_pattern_1(ptr addrspace(1) nocapture %out, float %x) #0 { 185; SI-LABEL: test_isfinite_pattern_1: 186; SI: ; %bb.0: 187; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 188; SI-NEXT: s_load_dword s4, s[4:5], 0xb 189; SI-NEXT: s_mov_b32 s3, 0xf000 190; SI-NEXT: s_mov_b32 s2, -1 191; SI-NEXT: v_mov_b32_e32 v0, 0x1f8 192; SI-NEXT: s_waitcnt lgkmcnt(0) 193; SI-NEXT: v_cmp_class_f32_e32 vcc, s4, v0 194; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 195; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 196; SI-NEXT: s_endpgm 197; 198; VI-LABEL: test_isfinite_pattern_1: 199; VI: ; %bb.0: 200; VI-NEXT: s_load_dword s2, s[4:5], 0x2c 201; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 202; VI-NEXT: v_mov_b32_e32 v0, 0x1f8 203; VI-NEXT: s_waitcnt lgkmcnt(0) 204; VI-NEXT: v_cmp_class_f32_e32 vcc, s2, v0 205; VI-NEXT: v_mov_b32_e32 v0, s0 206; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 207; VI-NEXT: v_mov_b32_e32 v1, s1 208; VI-NEXT: flat_store_dword v[0:1], v2 209; VI-NEXT: s_endpgm 210; 211; GFX11-LABEL: test_isfinite_pattern_1: 212; GFX11: ; %bb.0: 213; GFX11-NEXT: s_clause 0x1 214; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 215; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 216; GFX11-NEXT: v_mov_b32_e32 v0, 0 217; GFX11-NEXT: s_waitcnt lgkmcnt(0) 218; GFX11-NEXT: v_cmp_class_f32_e64 s2, s2, 0x1f8 219; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 220; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2 221; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 222; GFX11-NEXT: s_endpgm 223 %x.fabs = tail call float @llvm.fabs.f32(float %x) #3 224 %cmpinf = fcmp one float %x.fabs, 0x7FF0000000000000 225 %ext = zext i1 %cmpinf to i32 226 store i32 %ext, ptr addrspace(1) %out, align 4 227 ret void 228} 229 230; Use negative infinity 231define amdgpu_kernel void @test_isfinite_not_pattern_0(ptr addrspace(1) nocapture %out, float %x) #0 { 232; SI-LABEL: test_isfinite_not_pattern_0: 233; SI: ; %bb.0: 234; SI-NEXT: s_load_dword s6, s[4:5], 0xb 235; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 236; SI-NEXT: s_mov_b32 s3, 0xf000 237; SI-NEXT: s_mov_b32 s2, -1 238; SI-NEXT: s_waitcnt lgkmcnt(0) 239; SI-NEXT: v_cmp_o_f32_e64 s[4:5], s6, s6 240; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 241; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 242; SI-NEXT: s_endpgm 243; 244; VI-LABEL: test_isfinite_not_pattern_0: 245; VI: ; %bb.0: 246; VI-NEXT: s_load_dword s2, s[4:5], 0x2c 247; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 248; VI-NEXT: s_waitcnt lgkmcnt(0) 249; VI-NEXT: v_cmp_o_f32_e64 s[2:3], s2, s2 250; VI-NEXT: v_mov_b32_e32 v0, s0 251; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[2:3] 252; VI-NEXT: v_mov_b32_e32 v1, s1 253; VI-NEXT: flat_store_dword v[0:1], v2 254; VI-NEXT: s_endpgm 255; 256; GFX11-LABEL: test_isfinite_not_pattern_0: 257; GFX11: ; %bb.0: 258; GFX11-NEXT: s_clause 0x1 259; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 260; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 261; GFX11-NEXT: v_mov_b32_e32 v0, 0 262; GFX11-NEXT: s_waitcnt lgkmcnt(0) 263; GFX11-NEXT: v_cmp_o_f32_e64 s2, s2, s2 264; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 265; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2 266; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 267; GFX11-NEXT: s_endpgm 268 %ord = fcmp ord float %x, 0.000000e+00 269 %x.fabs = tail call float @llvm.fabs.f32(float %x) #1 270 %ninf = fcmp une float %x.fabs, 0xFFF0000000000000 271 %and = and i1 %ord, %ninf 272 %ext = zext i1 %and to i32 273 store i32 %ext, ptr addrspace(1) %out, align 4 274 ret void 275} 276 277; No fabs 278define amdgpu_kernel void @test_isfinite_not_pattern_1(ptr addrspace(1) nocapture %out, float %x) #0 { 279; SI-LABEL: test_isfinite_not_pattern_1: 280; SI: ; %bb.0: 281; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 282; SI-NEXT: s_load_dword s6, s[4:5], 0xb 283; SI-NEXT: s_mov_b32 s3, 0xf000 284; SI-NEXT: s_mov_b32 s2, -1 285; SI-NEXT: v_mov_b32_e32 v0, 0x7f800000 286; SI-NEXT: s_waitcnt lgkmcnt(0) 287; SI-NEXT: v_cmp_o_f32_e64 s[4:5], s6, s6 288; SI-NEXT: v_cmp_neq_f32_e32 vcc, s6, v0 289; SI-NEXT: s_and_b64 s[4:5], s[4:5], vcc 290; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 291; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 292; SI-NEXT: s_endpgm 293; 294; VI-LABEL: test_isfinite_not_pattern_1: 295; VI: ; %bb.0: 296; VI-NEXT: s_load_dword s6, s[4:5], 0x2c 297; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 298; VI-NEXT: v_mov_b32_e32 v0, 0x7f800000 299; VI-NEXT: s_waitcnt lgkmcnt(0) 300; VI-NEXT: v_cmp_o_f32_e64 s[2:3], s6, s6 301; VI-NEXT: v_cmp_neq_f32_e32 vcc, s6, v0 302; VI-NEXT: s_and_b64 s[2:3], s[2:3], vcc 303; VI-NEXT: v_mov_b32_e32 v0, s0 304; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[2:3] 305; VI-NEXT: v_mov_b32_e32 v1, s1 306; VI-NEXT: flat_store_dword v[0:1], v2 307; VI-NEXT: s_endpgm 308; 309; GFX11-LABEL: test_isfinite_not_pattern_1: 310; GFX11: ; %bb.0: 311; GFX11-NEXT: s_clause 0x1 312; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 313; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 314; GFX11-NEXT: v_mov_b32_e32 v0, 0 315; GFX11-NEXT: s_waitcnt lgkmcnt(0) 316; GFX11-NEXT: v_cmp_o_f32_e64 s3, s2, s2 317; GFX11-NEXT: v_cmp_neq_f32_e64 s2, 0x7f800000, s2 318; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 319; GFX11-NEXT: s_and_b32 s2, s3, s2 320; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2 321; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 322; GFX11-NEXT: s_endpgm 323 %ord = fcmp ord float %x, 0.000000e+00 324 %ninf = fcmp une float %x, 0x7FF0000000000000 325 %and = and i1 %ord, %ninf 326 %ext = zext i1 %and to i32 327 store i32 %ext, ptr addrspace(1) %out, align 4 328 ret void 329} 330 331; fabs of different value 332define amdgpu_kernel void @test_isfinite_not_pattern_2(ptr addrspace(1) nocapture %out, float %x, float %y) #0 { 333; SI-LABEL: test_isfinite_not_pattern_2: 334; SI: ; %bb.0: 335; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 336; SI-NEXT: s_mov_b32 s7, 0xf000 337; SI-NEXT: s_mov_b32 s6, -1 338; SI-NEXT: v_mov_b32_e32 v0, 0x7f800000 339; SI-NEXT: s_waitcnt lgkmcnt(0) 340; SI-NEXT: s_mov_b32 s4, s0 341; SI-NEXT: s_mov_b32 s5, s1 342; SI-NEXT: v_cmp_o_f32_e64 s[0:1], s2, s2 343; SI-NEXT: v_cmp_neq_f32_e64 s[2:3], |s3|, v0 344; SI-NEXT: s_and_b64 s[0:1], s[0:1], s[2:3] 345; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] 346; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 347; SI-NEXT: s_endpgm 348; 349; VI-LABEL: test_isfinite_not_pattern_2: 350; VI: ; %bb.0: 351; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 352; VI-NEXT: v_mov_b32_e32 v0, 0x7f800000 353; VI-NEXT: s_waitcnt lgkmcnt(0) 354; VI-NEXT: v_cmp_o_f32_e64 s[4:5], s2, s2 355; VI-NEXT: v_cmp_neq_f32_e64 s[2:3], |s3|, v0 356; VI-NEXT: s_and_b64 s[2:3], s[4:5], s[2:3] 357; VI-NEXT: v_mov_b32_e32 v0, s0 358; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[2:3] 359; VI-NEXT: v_mov_b32_e32 v1, s1 360; VI-NEXT: flat_store_dword v[0:1], v2 361; VI-NEXT: s_endpgm 362; 363; GFX11-LABEL: test_isfinite_not_pattern_2: 364; GFX11: ; %bb.0: 365; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 366; GFX11-NEXT: v_mov_b32_e32 v0, 0 367; GFX11-NEXT: s_waitcnt lgkmcnt(0) 368; GFX11-NEXT: v_cmp_o_f32_e64 s2, s2, s2 369; GFX11-NEXT: v_cmp_neq_f32_e64 s3, 0x7f800000, |s3| 370; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 371; GFX11-NEXT: s_and_b32 s2, s2, s3 372; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2 373; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 374; GFX11-NEXT: s_endpgm 375 %ord = fcmp ord float %x, 0.000000e+00 376 %x.fabs = tail call float @llvm.fabs.f32(float %y) #1 377 %ninf = fcmp une float %x.fabs, 0x7FF0000000000000 378 %and = and i1 %ord, %ninf 379 %ext = zext i1 %and to i32 380 store i32 %ext, ptr addrspace(1) %out, align 4 381 ret void 382} 383 384; Wrong ordered compare type 385define amdgpu_kernel void @test_isfinite_not_pattern_3(ptr addrspace(1) nocapture %out, float %x) #0 { 386; SI-LABEL: test_isfinite_not_pattern_3: 387; SI: ; %bb.0: 388; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 389; SI-NEXT: s_load_dword s6, s[4:5], 0xb 390; SI-NEXT: s_mov_b32 s3, 0xf000 391; SI-NEXT: s_mov_b32 s2, -1 392; SI-NEXT: v_mov_b32_e32 v0, 0x7f800000 393; SI-NEXT: s_waitcnt lgkmcnt(0) 394; SI-NEXT: v_cmp_u_f32_e64 s[4:5], s6, s6 395; SI-NEXT: v_cmp_neq_f32_e64 s[6:7], |s6|, v0 396; SI-NEXT: s_and_b64 s[4:5], s[4:5], s[6:7] 397; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 398; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 399; SI-NEXT: s_endpgm 400; 401; VI-LABEL: test_isfinite_not_pattern_3: 402; VI: ; %bb.0: 403; VI-NEXT: s_load_dword s6, s[4:5], 0x2c 404; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 405; VI-NEXT: v_mov_b32_e32 v0, 0x7f800000 406; VI-NEXT: s_waitcnt lgkmcnt(0) 407; VI-NEXT: v_cmp_u_f32_e64 s[2:3], s6, s6 408; VI-NEXT: v_cmp_neq_f32_e64 s[4:5], |s6|, v0 409; VI-NEXT: s_and_b64 s[2:3], s[2:3], s[4:5] 410; VI-NEXT: v_mov_b32_e32 v0, s0 411; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[2:3] 412; VI-NEXT: v_mov_b32_e32 v1, s1 413; VI-NEXT: flat_store_dword v[0:1], v2 414; VI-NEXT: s_endpgm 415; 416; GFX11-LABEL: test_isfinite_not_pattern_3: 417; GFX11: ; %bb.0: 418; GFX11-NEXT: s_clause 0x1 419; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 420; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 421; GFX11-NEXT: v_mov_b32_e32 v0, 0 422; GFX11-NEXT: s_waitcnt lgkmcnt(0) 423; GFX11-NEXT: v_cmp_u_f32_e64 s3, s2, s2 424; GFX11-NEXT: v_cmp_neq_f32_e64 s2, 0x7f800000, |s2| 425; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 426; GFX11-NEXT: s_and_b32 s2, s3, s2 427; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2 428; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 429; GFX11-NEXT: s_endpgm 430 %ord = fcmp uno float %x, 0.000000e+00 431 %x.fabs = tail call float @llvm.fabs.f32(float %x) #1 432 %ninf = fcmp une float %x.fabs, 0x7FF0000000000000 433 %and = and i1 %ord, %ninf 434 %ext = zext i1 %and to i32 435 store i32 %ext, ptr addrspace(1) %out, align 4 436 ret void 437} 438 439define amdgpu_kernel void @test_isfinite_pattern_4(ptr addrspace(1) nocapture %out, float %x) #0 { 440; SI-LABEL: test_isfinite_pattern_4: 441; SI: ; %bb.0: 442; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 443; SI-NEXT: s_load_dword s4, s[4:5], 0xb 444; SI-NEXT: s_mov_b32 s3, 0xf000 445; SI-NEXT: s_mov_b32 s2, -1 446; SI-NEXT: v_mov_b32_e32 v0, 0x1f8 447; SI-NEXT: s_waitcnt lgkmcnt(0) 448; SI-NEXT: v_cmp_class_f32_e32 vcc, s4, v0 449; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 450; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 451; SI-NEXT: s_endpgm 452; 453; VI-LABEL: test_isfinite_pattern_4: 454; VI: ; %bb.0: 455; VI-NEXT: s_load_dword s2, s[4:5], 0x2c 456; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 457; VI-NEXT: v_mov_b32_e32 v0, 0x1f8 458; VI-NEXT: s_waitcnt lgkmcnt(0) 459; VI-NEXT: v_cmp_class_f32_e32 vcc, s2, v0 460; VI-NEXT: v_mov_b32_e32 v0, s0 461; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 462; VI-NEXT: v_mov_b32_e32 v1, s1 463; VI-NEXT: flat_store_dword v[0:1], v2 464; VI-NEXT: s_endpgm 465; 466; GFX11-LABEL: test_isfinite_pattern_4: 467; GFX11: ; %bb.0: 468; GFX11-NEXT: s_clause 0x1 469; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 470; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 471; GFX11-NEXT: v_mov_b32_e32 v0, 0 472; GFX11-NEXT: s_waitcnt lgkmcnt(0) 473; GFX11-NEXT: v_cmp_class_f32_e64 s2, s2, 0x1f8 474; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 475; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2 476; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 477; GFX11-NEXT: s_endpgm 478 %ord = fcmp ord float %x, 0.000000e+00 479 %x.fabs = tail call float @llvm.fabs.f32(float %x) #1 480 %ninf = fcmp one float %x.fabs, 0x7FF0000000000000 481 %and = and i1 %ord, %ninf 482 %ext = zext i1 %and to i32 483 store i32 %ext, ptr addrspace(1) %out, align 4 484 ret void 485} 486 487define amdgpu_kernel void @test_isfinite_pattern_4_commute_and(ptr addrspace(1) nocapture %out, float %x) #0 { 488; SI-LABEL: test_isfinite_pattern_4_commute_and: 489; SI: ; %bb.0: 490; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 491; SI-NEXT: s_load_dword s4, s[4:5], 0xb 492; SI-NEXT: s_mov_b32 s3, 0xf000 493; SI-NEXT: s_mov_b32 s2, -1 494; SI-NEXT: v_mov_b32_e32 v0, 0x1f8 495; SI-NEXT: s_waitcnt lgkmcnt(0) 496; SI-NEXT: v_cmp_class_f32_e32 vcc, s4, v0 497; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 498; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 499; SI-NEXT: s_endpgm 500; 501; VI-LABEL: test_isfinite_pattern_4_commute_and: 502; VI: ; %bb.0: 503; VI-NEXT: s_load_dword s2, s[4:5], 0x2c 504; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 505; VI-NEXT: v_mov_b32_e32 v0, 0x1f8 506; VI-NEXT: s_waitcnt lgkmcnt(0) 507; VI-NEXT: v_cmp_class_f32_e32 vcc, s2, v0 508; VI-NEXT: v_mov_b32_e32 v0, s0 509; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 510; VI-NEXT: v_mov_b32_e32 v1, s1 511; VI-NEXT: flat_store_dword v[0:1], v2 512; VI-NEXT: s_endpgm 513; 514; GFX11-LABEL: test_isfinite_pattern_4_commute_and: 515; GFX11: ; %bb.0: 516; GFX11-NEXT: s_clause 0x1 517; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 518; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 519; GFX11-NEXT: v_mov_b32_e32 v0, 0 520; GFX11-NEXT: s_waitcnt lgkmcnt(0) 521; GFX11-NEXT: v_cmp_class_f32_e64 s2, s2, 0x1f8 522; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 523; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2 524; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 525; GFX11-NEXT: s_endpgm 526 %ord = fcmp ord float %x, 0.000000e+00 527 %x.fabs = tail call float @llvm.fabs.f32(float %x) #1 528 %ninf = fcmp one float %x.fabs, 0x7FF0000000000000 529 %and = and i1 %ninf, %ord 530 %ext = zext i1 %and to i32 531 store i32 %ext, ptr addrspace(1) %out, align 4 532 ret void 533} 534 535define amdgpu_kernel void @test_not_isfinite_pattern_4_wrong_ord_test(ptr addrspace(1) nocapture %out, float %x, [8 x i32], float %y) #0 { 536; SI-LABEL: test_not_isfinite_pattern_4_wrong_ord_test: 537; SI: ; %bb.0: 538; SI-NEXT: s_load_dword s0, s[4:5], 0x14 539; SI-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0x9 540; SI-NEXT: s_load_dword s1, s[4:5], 0xb 541; SI-NEXT: s_mov_b32 s11, 0xf000 542; SI-NEXT: s_mov_b32 s10, -1 543; SI-NEXT: v_mov_b32_e32 v0, 0x1f8 544; SI-NEXT: s_waitcnt lgkmcnt(0) 545; SI-NEXT: v_mov_b32_e32 v1, s0 546; SI-NEXT: v_cmp_o_f32_e32 vcc, s1, v1 547; SI-NEXT: v_cmp_class_f32_e64 s[0:1], s1, v0 548; SI-NEXT: s_and_b64 s[0:1], vcc, s[0:1] 549; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] 550; SI-NEXT: buffer_store_dword v0, off, s[8:11], 0 551; SI-NEXT: s_endpgm 552; 553; VI-LABEL: test_not_isfinite_pattern_4_wrong_ord_test: 554; VI: ; %bb.0: 555; VI-NEXT: s_load_dword s0, s[4:5], 0x50 556; VI-NEXT: s_load_dword s1, s[4:5], 0x2c 557; VI-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x24 558; VI-NEXT: v_mov_b32_e32 v0, 0x1f8 559; VI-NEXT: s_waitcnt lgkmcnt(0) 560; VI-NEXT: v_mov_b32_e32 v1, s0 561; VI-NEXT: v_cmp_class_f32_e32 vcc, s1, v0 562; VI-NEXT: v_cmp_o_f32_e64 s[0:1], s1, v1 563; VI-NEXT: s_and_b64 s[0:1], s[0:1], vcc 564; VI-NEXT: v_mov_b32_e32 v0, s2 565; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1] 566; VI-NEXT: v_mov_b32_e32 v1, s3 567; VI-NEXT: flat_store_dword v[0:1], v2 568; VI-NEXT: s_endpgm 569; 570; GFX11-LABEL: test_not_isfinite_pattern_4_wrong_ord_test: 571; GFX11: ; %bb.0: 572; GFX11-NEXT: s_clause 0x2 573; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 574; GFX11-NEXT: s_load_b32 s3, s[4:5], 0x50 575; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 576; GFX11-NEXT: v_mov_b32_e32 v0, 0 577; GFX11-NEXT: s_waitcnt lgkmcnt(0) 578; GFX11-NEXT: v_cmp_o_f32_e64 s3, s2, s3 579; GFX11-NEXT: v_cmp_class_f32_e64 s2, s2, 0x1f8 580; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 581; GFX11-NEXT: s_and_b32 s2, s3, s2 582; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2 583; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 584; GFX11-NEXT: s_endpgm 585 %ord = fcmp ord float %x, %y 586 %x.fabs = tail call float @llvm.fabs.f32(float %x) #1 587 %ninf = fcmp one float %x.fabs, 0x7FF0000000000000 588 %and = and i1 %ord, %ninf 589 %ext = zext i1 %and to i32 590 store i32 %ext, ptr addrspace(1) %out, align 4 591 ret void 592} 593 594define amdgpu_kernel void @test_isinf_pattern_f16(ptr addrspace(1) nocapture %out, half %x) #0 { 595; SI-LABEL: test_isinf_pattern_f16: 596; SI: ; %bb.0: 597; SI-NEXT: s_load_dword s6, s[4:5], 0xb 598; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 599; SI-NEXT: s_mov_b32 s3, 0xf000 600; SI-NEXT: s_mov_b32 s2, -1 601; SI-NEXT: s_waitcnt lgkmcnt(0) 602; SI-NEXT: s_and_b32 s4, s6, 0x7fff 603; SI-NEXT: s_cmpk_eq_i32 s4, 0x7c00 604; SI-NEXT: s_cselect_b64 s[4:5], -1, 0 605; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 606; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 607; SI-NEXT: s_endpgm 608; 609; VI-LABEL: test_isinf_pattern_f16: 610; VI: ; %bb.0: 611; VI-NEXT: s_load_dword s2, s[4:5], 0x2c 612; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 613; VI-NEXT: v_mov_b32_e32 v0, 0x204 614; VI-NEXT: s_waitcnt lgkmcnt(0) 615; VI-NEXT: v_cmp_class_f16_e32 vcc, s2, v0 616; VI-NEXT: v_mov_b32_e32 v0, s0 617; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 618; VI-NEXT: v_mov_b32_e32 v1, s1 619; VI-NEXT: flat_store_dword v[0:1], v2 620; VI-NEXT: s_endpgm 621; 622; GFX11-LABEL: test_isinf_pattern_f16: 623; GFX11: ; %bb.0: 624; GFX11-NEXT: s_clause 0x1 625; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 626; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 627; GFX11-NEXT: v_mov_b32_e32 v0, 0 628; GFX11-NEXT: s_waitcnt lgkmcnt(0) 629; GFX11-NEXT: v_cmp_class_f16_e64 s2, s2, 0x204 630; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 631; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2 632; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 633; GFX11-NEXT: s_endpgm 634 %fabs = tail call half @llvm.fabs.f16(half %x) #1 635 %cmp = fcmp oeq half %fabs, 0xH7C00 636 %ext = zext i1 %cmp to i32 637 store i32 %ext, ptr addrspace(1) %out, align 4 638 ret void 639} 640 641define amdgpu_kernel void @test_isfinite_pattern_0_f16(ptr addrspace(1) nocapture %out, half %x) #0 { 642; SI-LABEL: test_isfinite_pattern_0_f16: 643; SI: ; %bb.0: 644; SI-NEXT: s_load_dword s6, s[4:5], 0xb 645; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 646; SI-NEXT: s_mov_b32 s3, 0xf000 647; SI-NEXT: s_mov_b32 s2, -1 648; SI-NEXT: s_waitcnt lgkmcnt(0) 649; SI-NEXT: v_cvt_f32_f16_e32 v0, s6 650; SI-NEXT: s_and_b32 s4, s6, 0x7fff 651; SI-NEXT: v_cmp_o_f32_e32 vcc, v0, v0 652; SI-NEXT: s_cmpk_lg_i32 s4, 0x7c00 653; SI-NEXT: s_cselect_b64 s[4:5], -1, 0 654; SI-NEXT: s_and_b64 s[4:5], vcc, s[4:5] 655; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 656; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 657; SI-NEXT: s_endpgm 658; 659; VI-LABEL: test_isfinite_pattern_0_f16: 660; VI: ; %bb.0: 661; VI-NEXT: s_load_dword s2, s[4:5], 0x2c 662; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 663; VI-NEXT: v_mov_b32_e32 v0, 0x1f8 664; VI-NEXT: s_waitcnt lgkmcnt(0) 665; VI-NEXT: v_cmp_class_f16_e32 vcc, s2, v0 666; VI-NEXT: v_mov_b32_e32 v0, s0 667; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 668; VI-NEXT: v_mov_b32_e32 v1, s1 669; VI-NEXT: flat_store_dword v[0:1], v2 670; VI-NEXT: s_endpgm 671; 672; GFX11-LABEL: test_isfinite_pattern_0_f16: 673; GFX11: ; %bb.0: 674; GFX11-NEXT: s_clause 0x1 675; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 676; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 677; GFX11-NEXT: v_mov_b32_e32 v0, 0 678; GFX11-NEXT: s_waitcnt lgkmcnt(0) 679; GFX11-NEXT: v_cmp_class_f16_e64 s2, s2, 0x1f8 680; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 681; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2 682; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 683; GFX11-NEXT: s_endpgm 684 %ord = fcmp ord half %x, 0.0 685 %x.fabs = tail call half @llvm.fabs.f16(half %x) #1 686 %ninf = fcmp une half %x.fabs, 0xH7C00 687 %and = and i1 %ord, %ninf 688 %ext = zext i1 %and to i32 689 store i32 %ext, ptr addrspace(1) %out, align 4 690 ret void 691} 692 693define amdgpu_kernel void @test_isfinite_pattern_4_f16(ptr addrspace(1) nocapture %out, half %x) #0 { 694; SI-LABEL: test_isfinite_pattern_4_f16: 695; SI: ; %bb.0: 696; SI-NEXT: s_load_dword s6, s[4:5], 0xb 697; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 698; SI-NEXT: s_mov_b32 s3, 0xf000 699; SI-NEXT: s_mov_b32 s2, -1 700; SI-NEXT: s_waitcnt lgkmcnt(0) 701; SI-NEXT: s_and_b32 s4, s6, 0x7fff 702; SI-NEXT: s_cmpk_lt_i32 s4, 0x7c00 703; SI-NEXT: s_cselect_b64 s[4:5], -1, 0 704; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] 705; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0 706; SI-NEXT: s_endpgm 707; 708; VI-LABEL: test_isfinite_pattern_4_f16: 709; VI: ; %bb.0: 710; VI-NEXT: s_load_dword s2, s[4:5], 0x2c 711; VI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 712; VI-NEXT: v_mov_b32_e32 v0, 0x1f8 713; VI-NEXT: s_waitcnt lgkmcnt(0) 714; VI-NEXT: v_cmp_class_f16_e32 vcc, s2, v0 715; VI-NEXT: v_mov_b32_e32 v0, s0 716; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 717; VI-NEXT: v_mov_b32_e32 v1, s1 718; VI-NEXT: flat_store_dword v[0:1], v2 719; VI-NEXT: s_endpgm 720; 721; GFX11-LABEL: test_isfinite_pattern_4_f16: 722; GFX11: ; %bb.0: 723; GFX11-NEXT: s_clause 0x1 724; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 725; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 726; GFX11-NEXT: v_mov_b32_e32 v0, 0 727; GFX11-NEXT: s_waitcnt lgkmcnt(0) 728; GFX11-NEXT: v_cmp_class_f16_e64 s2, s2, 0x1f8 729; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 730; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, s2 731; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 732; GFX11-NEXT: s_endpgm 733 %ord = fcmp ord half %x, 0.0 734 %x.fabs = tail call half @llvm.fabs.f16(half %x) #1 735 %ninf = fcmp one half %x.fabs, 0xH7C00 736 %and = and i1 %ord, %ninf 737 %ext = zext i1 %and to i32 738 store i32 %ext, ptr addrspace(1) %out, align 4 739 ret void 740} 741 742declare half @llvm.fabs.f16(half) #1 743 744attributes #0 = { nounwind } 745attributes #1 = { nounwind readnone } 746