1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize64" -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG %s 3; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize64" -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL %s 4; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s 5; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s 6; RUN: llc -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=VI-SDAG %s 7; RUN: llc -global-isel -mtriple=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=VI-GISEL %s 8 9declare i64 @llvm.amdgcn.fcmp.f32(float, float, i32) #0 10declare i64 @llvm.amdgcn.fcmp.f64(double, double, i32) #0 11declare float @llvm.fabs.f32(float) #0 12 13declare i64 @llvm.amdgcn.fcmp.f16(half, half, i32) #0 14declare half @llvm.fabs.f16(half) #0 15 16define amdgpu_kernel void @v_fcmp_f32_oeq_with_fabs(ptr addrspace(1) %out, float %src, float %a) { 17; GFX11-LABEL: v_fcmp_f32_oeq_with_fabs: 18; GFX11: ; %bb.0: 19; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 20; GFX11-NEXT: v_mov_b32_e32 v2, 0 21; GFX11-NEXT: s_waitcnt lgkmcnt(0) 22; GFX11-NEXT: v_cmp_eq_f32_e64 s[2:3], s2, |s3| 23; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 24; GFX11-NEXT: v_mov_b32_e32 v0, s2 25; GFX11-NEXT: v_mov_b32_e32 v1, s3 26; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 27; GFX11-NEXT: s_endpgm 28; 29; GFX9-LABEL: v_fcmp_f32_oeq_with_fabs: 30; GFX9: ; %bb.0: 31; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 32; GFX9-NEXT: v_mov_b32_e32 v2, 0 33; GFX9-NEXT: s_waitcnt lgkmcnt(0) 34; GFX9-NEXT: v_mov_b32_e32 v0, s3 35; GFX9-NEXT: v_cmp_eq_f32_e64 s[2:3], s2, |v0| 36; GFX9-NEXT: v_mov_b32_e32 v0, s2 37; GFX9-NEXT: v_mov_b32_e32 v1, s3 38; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 39; GFX9-NEXT: s_endpgm 40; 41; VI-SDAG-LABEL: v_fcmp_f32_oeq_with_fabs: 42; VI-SDAG: ; %bb.0: 43; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 44; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 45; VI-SDAG-NEXT: v_mov_b32_e32 v0, s3 46; VI-SDAG-NEXT: v_cmp_eq_f32_e64 s[2:3], s2, |v0| 47; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 48; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 49; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 50; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 51; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 52; VI-SDAG-NEXT: s_endpgm 53; 54; VI-GISEL-LABEL: v_fcmp_f32_oeq_with_fabs: 55; VI-GISEL: ; %bb.0: 56; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 57; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 58; VI-GISEL-NEXT: v_mov_b32_e32 v0, s3 59; VI-GISEL-NEXT: v_cmp_eq_f32_e64 s[2:3], s2, |v0| 60; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 61; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 62; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 63; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 64; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 65; VI-GISEL-NEXT: s_endpgm 66 %temp = call float @llvm.fabs.f32(float %a) 67 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float %temp, i32 1) 68 store i64 %result, ptr addrspace(1) %out 69 ret void 70} 71 72define amdgpu_kernel void @v_fcmp_f32_oeq_both_operands_with_fabs(ptr addrspace(1) %out, float %src, float %a) { 73; GFX11-LABEL: v_fcmp_f32_oeq_both_operands_with_fabs: 74; GFX11: ; %bb.0: 75; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 76; GFX11-NEXT: v_mov_b32_e32 v2, 0 77; GFX11-NEXT: s_waitcnt lgkmcnt(0) 78; GFX11-NEXT: v_cmp_eq_f32_e64 s[2:3], |s2|, |s3| 79; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 80; GFX11-NEXT: v_mov_b32_e32 v0, s2 81; GFX11-NEXT: v_mov_b32_e32 v1, s3 82; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 83; GFX11-NEXT: s_endpgm 84; 85; GFX9-LABEL: v_fcmp_f32_oeq_both_operands_with_fabs: 86; GFX9: ; %bb.0: 87; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 88; GFX9-NEXT: v_mov_b32_e32 v2, 0 89; GFX9-NEXT: s_waitcnt lgkmcnt(0) 90; GFX9-NEXT: v_mov_b32_e32 v0, s3 91; GFX9-NEXT: v_cmp_eq_f32_e64 s[2:3], |s2|, |v0| 92; GFX9-NEXT: v_mov_b32_e32 v0, s2 93; GFX9-NEXT: v_mov_b32_e32 v1, s3 94; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 95; GFX9-NEXT: s_endpgm 96; 97; VI-SDAG-LABEL: v_fcmp_f32_oeq_both_operands_with_fabs: 98; VI-SDAG: ; %bb.0: 99; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 100; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 101; VI-SDAG-NEXT: v_mov_b32_e32 v0, s3 102; VI-SDAG-NEXT: v_cmp_eq_f32_e64 s[2:3], |s2|, |v0| 103; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 104; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 105; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 106; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 107; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 108; VI-SDAG-NEXT: s_endpgm 109; 110; VI-GISEL-LABEL: v_fcmp_f32_oeq_both_operands_with_fabs: 111; VI-GISEL: ; %bb.0: 112; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 113; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 114; VI-GISEL-NEXT: v_mov_b32_e32 v0, s3 115; VI-GISEL-NEXT: v_cmp_eq_f32_e64 s[2:3], |s2|, |v0| 116; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 117; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 118; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 119; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 120; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 121; VI-GISEL-NEXT: s_endpgm 122 %temp = call float @llvm.fabs.f32(float %a) 123 %src_input = call float @llvm.fabs.f32(float %src) 124 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src_input, float %temp, i32 1) 125 store i64 %result, ptr addrspace(1) %out 126 ret void 127} 128 129define amdgpu_kernel void @v_fcmp_f32(ptr addrspace(1) %out, float %src) { 130; GFX11-SDAG-LABEL: v_fcmp_f32: 131; GFX11-SDAG: ; %bb.0: 132; GFX11-SDAG-NEXT: s_endpgm 133; 134; GFX11-GISEL-LABEL: v_fcmp_f32: 135; GFX11-GISEL: ; %bb.0: 136; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 137; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0 138; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 139; GFX11-GISEL-NEXT: global_store_b64 v0, v[0:1], s[0:1] 140; GFX11-GISEL-NEXT: s_endpgm 141; 142; GFX9-SDAG-LABEL: v_fcmp_f32: 143; GFX9-SDAG: ; %bb.0: 144; GFX9-SDAG-NEXT: s_endpgm 145; 146; GFX9-GISEL-LABEL: v_fcmp_f32: 147; GFX9-GISEL: ; %bb.0: 148; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 149; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0 150; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 151; GFX9-GISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1] 152; GFX9-GISEL-NEXT: s_endpgm 153; 154; VI-SDAG-LABEL: v_fcmp_f32: 155; VI-SDAG: ; %bb.0: 156; VI-SDAG-NEXT: s_endpgm 157; 158; VI-GISEL-LABEL: v_fcmp_f32: 159; VI-GISEL: ; %bb.0: 160; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 161; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 162; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 163; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 164; VI-GISEL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 165; VI-GISEL-NEXT: s_endpgm 166 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 -1) 167 store i64 %result, ptr addrspace(1) %out 168 ret void 169} 170 171define amdgpu_kernel void @v_fcmp_f32_oeq(ptr addrspace(1) %out, float %src) { 172; GFX11-LABEL: v_fcmp_f32_oeq: 173; GFX11: ; %bb.0: 174; GFX11-NEXT: s_clause 0x1 175; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 176; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 177; GFX11-NEXT: v_mov_b32_e32 v2, 0 178; GFX11-NEXT: s_waitcnt lgkmcnt(0) 179; GFX11-NEXT: v_cmp_eq_f32_e64 s[2:3], 0x42c80000, s2 180; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 181; GFX11-NEXT: v_mov_b32_e32 v0, s2 182; GFX11-NEXT: v_mov_b32_e32 v1, s3 183; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 184; GFX11-NEXT: s_endpgm 185; 186; GFX9-LABEL: v_fcmp_f32_oeq: 187; GFX9: ; %bb.0: 188; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c 189; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 190; GFX9-NEXT: v_mov_b32_e32 v0, 0x42c80000 191; GFX9-NEXT: v_mov_b32_e32 v2, 0 192; GFX9-NEXT: s_waitcnt lgkmcnt(0) 193; GFX9-NEXT: v_cmp_eq_f32_e64 s[2:3], s2, v0 194; GFX9-NEXT: v_mov_b32_e32 v0, s2 195; GFX9-NEXT: v_mov_b32_e32 v1, s3 196; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 197; GFX9-NEXT: s_endpgm 198; 199; VI-SDAG-LABEL: v_fcmp_f32_oeq: 200; VI-SDAG: ; %bb.0: 201; VI-SDAG-NEXT: s_load_dword s2, s[4:5], 0x2c 202; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 203; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x42c80000 204; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 205; VI-SDAG-NEXT: v_cmp_eq_f32_e64 s[2:3], s2, v0 206; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 207; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 208; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 209; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 210; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 211; VI-SDAG-NEXT: s_endpgm 212; 213; VI-GISEL-LABEL: v_fcmp_f32_oeq: 214; VI-GISEL: ; %bb.0: 215; VI-GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 216; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 217; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x42c80000 218; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 219; VI-GISEL-NEXT: v_cmp_eq_f32_e64 s[2:3], s2, v0 220; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 221; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 222; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 223; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 224; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 225; VI-GISEL-NEXT: s_endpgm 226 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 1) 227 store i64 %result, ptr addrspace(1) %out 228 ret void 229} 230 231define amdgpu_kernel void @v_fcmp_f32_one(ptr addrspace(1) %out, float %src) { 232; GFX11-LABEL: v_fcmp_f32_one: 233; GFX11: ; %bb.0: 234; GFX11-NEXT: s_clause 0x1 235; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 236; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 237; GFX11-NEXT: v_mov_b32_e32 v2, 0 238; GFX11-NEXT: s_waitcnt lgkmcnt(0) 239; GFX11-NEXT: v_cmp_neq_f32_e64 s[2:3], 0x42c80000, s2 240; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 241; GFX11-NEXT: v_mov_b32_e32 v0, s2 242; GFX11-NEXT: v_mov_b32_e32 v1, s3 243; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 244; GFX11-NEXT: s_endpgm 245; 246; GFX9-LABEL: v_fcmp_f32_one: 247; GFX9: ; %bb.0: 248; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c 249; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 250; GFX9-NEXT: v_mov_b32_e32 v0, 0x42c80000 251; GFX9-NEXT: v_mov_b32_e32 v2, 0 252; GFX9-NEXT: s_waitcnt lgkmcnt(0) 253; GFX9-NEXT: v_cmp_neq_f32_e64 s[2:3], s2, v0 254; GFX9-NEXT: v_mov_b32_e32 v0, s2 255; GFX9-NEXT: v_mov_b32_e32 v1, s3 256; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 257; GFX9-NEXT: s_endpgm 258; 259; VI-SDAG-LABEL: v_fcmp_f32_one: 260; VI-SDAG: ; %bb.0: 261; VI-SDAG-NEXT: s_load_dword s2, s[4:5], 0x2c 262; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 263; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x42c80000 264; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 265; VI-SDAG-NEXT: v_cmp_neq_f32_e64 s[2:3], s2, v0 266; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 267; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 268; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 269; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 270; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 271; VI-SDAG-NEXT: s_endpgm 272; 273; VI-GISEL-LABEL: v_fcmp_f32_one: 274; VI-GISEL: ; %bb.0: 275; VI-GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 276; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 277; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x42c80000 278; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 279; VI-GISEL-NEXT: v_cmp_neq_f32_e64 s[2:3], s2, v0 280; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 281; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 282; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 283; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 284; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 285; VI-GISEL-NEXT: s_endpgm 286 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 6) 287 store i64 %result, ptr addrspace(1) %out 288 ret void 289} 290 291define amdgpu_kernel void @v_fcmp_f32_ogt(ptr addrspace(1) %out, float %src) { 292; GFX11-LABEL: v_fcmp_f32_ogt: 293; GFX11: ; %bb.0: 294; GFX11-NEXT: s_clause 0x1 295; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 296; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 297; GFX11-NEXT: v_mov_b32_e32 v2, 0 298; GFX11-NEXT: s_waitcnt lgkmcnt(0) 299; GFX11-NEXT: v_cmp_lt_f32_e64 s[2:3], 0x42c80000, s2 300; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 301; GFX11-NEXT: v_mov_b32_e32 v0, s2 302; GFX11-NEXT: v_mov_b32_e32 v1, s3 303; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 304; GFX11-NEXT: s_endpgm 305; 306; GFX9-LABEL: v_fcmp_f32_ogt: 307; GFX9: ; %bb.0: 308; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c 309; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 310; GFX9-NEXT: v_mov_b32_e32 v0, 0x42c80000 311; GFX9-NEXT: v_mov_b32_e32 v2, 0 312; GFX9-NEXT: s_waitcnt lgkmcnt(0) 313; GFX9-NEXT: v_cmp_gt_f32_e64 s[2:3], s2, v0 314; GFX9-NEXT: v_mov_b32_e32 v0, s2 315; GFX9-NEXT: v_mov_b32_e32 v1, s3 316; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 317; GFX9-NEXT: s_endpgm 318; 319; VI-SDAG-LABEL: v_fcmp_f32_ogt: 320; VI-SDAG: ; %bb.0: 321; VI-SDAG-NEXT: s_load_dword s2, s[4:5], 0x2c 322; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 323; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x42c80000 324; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 325; VI-SDAG-NEXT: v_cmp_gt_f32_e64 s[2:3], s2, v0 326; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 327; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 328; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 329; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 330; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 331; VI-SDAG-NEXT: s_endpgm 332; 333; VI-GISEL-LABEL: v_fcmp_f32_ogt: 334; VI-GISEL: ; %bb.0: 335; VI-GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 336; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 337; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x42c80000 338; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 339; VI-GISEL-NEXT: v_cmp_gt_f32_e64 s[2:3], s2, v0 340; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 341; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 342; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 343; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 344; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 345; VI-GISEL-NEXT: s_endpgm 346 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 2) 347 store i64 %result, ptr addrspace(1) %out 348 ret void 349} 350 351define amdgpu_kernel void @v_fcmp_f32_oge(ptr addrspace(1) %out, float %src) { 352; GFX11-LABEL: v_fcmp_f32_oge: 353; GFX11: ; %bb.0: 354; GFX11-NEXT: s_clause 0x1 355; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 356; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 357; GFX11-NEXT: v_mov_b32_e32 v2, 0 358; GFX11-NEXT: s_waitcnt lgkmcnt(0) 359; GFX11-NEXT: v_cmp_le_f32_e64 s[2:3], 0x42c80000, s2 360; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 361; GFX11-NEXT: v_mov_b32_e32 v0, s2 362; GFX11-NEXT: v_mov_b32_e32 v1, s3 363; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 364; GFX11-NEXT: s_endpgm 365; 366; GFX9-LABEL: v_fcmp_f32_oge: 367; GFX9: ; %bb.0: 368; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c 369; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 370; GFX9-NEXT: v_mov_b32_e32 v0, 0x42c80000 371; GFX9-NEXT: v_mov_b32_e32 v2, 0 372; GFX9-NEXT: s_waitcnt lgkmcnt(0) 373; GFX9-NEXT: v_cmp_ge_f32_e64 s[2:3], s2, v0 374; GFX9-NEXT: v_mov_b32_e32 v0, s2 375; GFX9-NEXT: v_mov_b32_e32 v1, s3 376; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 377; GFX9-NEXT: s_endpgm 378; 379; VI-SDAG-LABEL: v_fcmp_f32_oge: 380; VI-SDAG: ; %bb.0: 381; VI-SDAG-NEXT: s_load_dword s2, s[4:5], 0x2c 382; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 383; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x42c80000 384; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 385; VI-SDAG-NEXT: v_cmp_ge_f32_e64 s[2:3], s2, v0 386; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 387; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 388; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 389; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 390; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 391; VI-SDAG-NEXT: s_endpgm 392; 393; VI-GISEL-LABEL: v_fcmp_f32_oge: 394; VI-GISEL: ; %bb.0: 395; VI-GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 396; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 397; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x42c80000 398; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 399; VI-GISEL-NEXT: v_cmp_ge_f32_e64 s[2:3], s2, v0 400; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 401; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 402; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 403; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 404; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 405; VI-GISEL-NEXT: s_endpgm 406 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 3) 407 store i64 %result, ptr addrspace(1) %out 408 ret void 409} 410 411define amdgpu_kernel void @v_fcmp_f32_olt(ptr addrspace(1) %out, float %src) { 412; GFX11-LABEL: v_fcmp_f32_olt: 413; GFX11: ; %bb.0: 414; GFX11-NEXT: s_clause 0x1 415; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 416; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 417; GFX11-NEXT: v_mov_b32_e32 v2, 0 418; GFX11-NEXT: s_waitcnt lgkmcnt(0) 419; GFX11-NEXT: v_cmp_gt_f32_e64 s[2:3], 0x42c80000, s2 420; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 421; GFX11-NEXT: v_mov_b32_e32 v0, s2 422; GFX11-NEXT: v_mov_b32_e32 v1, s3 423; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 424; GFX11-NEXT: s_endpgm 425; 426; GFX9-LABEL: v_fcmp_f32_olt: 427; GFX9: ; %bb.0: 428; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c 429; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 430; GFX9-NEXT: v_mov_b32_e32 v0, 0x42c80000 431; GFX9-NEXT: v_mov_b32_e32 v2, 0 432; GFX9-NEXT: s_waitcnt lgkmcnt(0) 433; GFX9-NEXT: v_cmp_lt_f32_e64 s[2:3], s2, v0 434; GFX9-NEXT: v_mov_b32_e32 v0, s2 435; GFX9-NEXT: v_mov_b32_e32 v1, s3 436; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 437; GFX9-NEXT: s_endpgm 438; 439; VI-SDAG-LABEL: v_fcmp_f32_olt: 440; VI-SDAG: ; %bb.0: 441; VI-SDAG-NEXT: s_load_dword s2, s[4:5], 0x2c 442; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 443; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x42c80000 444; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 445; VI-SDAG-NEXT: v_cmp_lt_f32_e64 s[2:3], s2, v0 446; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 447; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 448; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 449; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 450; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 451; VI-SDAG-NEXT: s_endpgm 452; 453; VI-GISEL-LABEL: v_fcmp_f32_olt: 454; VI-GISEL: ; %bb.0: 455; VI-GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 456; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 457; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x42c80000 458; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 459; VI-GISEL-NEXT: v_cmp_lt_f32_e64 s[2:3], s2, v0 460; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 461; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 462; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 463; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 464; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 465; VI-GISEL-NEXT: s_endpgm 466 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 4) 467 store i64 %result, ptr addrspace(1) %out 468 ret void 469} 470 471define amdgpu_kernel void @v_fcmp_f32_ole(ptr addrspace(1) %out, float %src) { 472; GFX11-LABEL: v_fcmp_f32_ole: 473; GFX11: ; %bb.0: 474; GFX11-NEXT: s_clause 0x1 475; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 476; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 477; GFX11-NEXT: v_mov_b32_e32 v2, 0 478; GFX11-NEXT: s_waitcnt lgkmcnt(0) 479; GFX11-NEXT: v_cmp_ge_f32_e64 s[2:3], 0x42c80000, s2 480; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 481; GFX11-NEXT: v_mov_b32_e32 v0, s2 482; GFX11-NEXT: v_mov_b32_e32 v1, s3 483; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 484; GFX11-NEXT: s_endpgm 485; 486; GFX9-LABEL: v_fcmp_f32_ole: 487; GFX9: ; %bb.0: 488; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c 489; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 490; GFX9-NEXT: v_mov_b32_e32 v0, 0x42c80000 491; GFX9-NEXT: v_mov_b32_e32 v2, 0 492; GFX9-NEXT: s_waitcnt lgkmcnt(0) 493; GFX9-NEXT: v_cmp_le_f32_e64 s[2:3], s2, v0 494; GFX9-NEXT: v_mov_b32_e32 v0, s2 495; GFX9-NEXT: v_mov_b32_e32 v1, s3 496; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 497; GFX9-NEXT: s_endpgm 498; 499; VI-SDAG-LABEL: v_fcmp_f32_ole: 500; VI-SDAG: ; %bb.0: 501; VI-SDAG-NEXT: s_load_dword s2, s[4:5], 0x2c 502; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 503; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x42c80000 504; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 505; VI-SDAG-NEXT: v_cmp_le_f32_e64 s[2:3], s2, v0 506; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 507; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 508; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 509; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 510; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 511; VI-SDAG-NEXT: s_endpgm 512; 513; VI-GISEL-LABEL: v_fcmp_f32_ole: 514; VI-GISEL: ; %bb.0: 515; VI-GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 516; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 517; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x42c80000 518; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 519; VI-GISEL-NEXT: v_cmp_le_f32_e64 s[2:3], s2, v0 520; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 521; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 522; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 523; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 524; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 525; VI-GISEL-NEXT: s_endpgm 526 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 5) 527 store i64 %result, ptr addrspace(1) %out 528 ret void 529} 530 531define amdgpu_kernel void @v_fcmp_f32_o(ptr addrspace(1) %out, float %src) { 532; GFX11-LABEL: v_fcmp_f32_o: 533; GFX11: ; %bb.0: 534; GFX11-NEXT: s_clause 0x1 535; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 536; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 537; GFX11-NEXT: v_mov_b32_e32 v2, 0 538; GFX11-NEXT: s_waitcnt lgkmcnt(0) 539; GFX11-NEXT: v_cmp_o_f32_e64 s[2:3], 0x42c80000, s2 540; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 541; GFX11-NEXT: v_mov_b32_e32 v0, s2 542; GFX11-NEXT: v_mov_b32_e32 v1, s3 543; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 544; GFX11-NEXT: s_endpgm 545; 546; GFX9-LABEL: v_fcmp_f32_o: 547; GFX9: ; %bb.0: 548; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c 549; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 550; GFX9-NEXT: v_mov_b32_e32 v0, 0x42c80000 551; GFX9-NEXT: v_mov_b32_e32 v2, 0 552; GFX9-NEXT: s_waitcnt lgkmcnt(0) 553; GFX9-NEXT: v_cmp_o_f32_e64 s[2:3], s2, v0 554; GFX9-NEXT: v_mov_b32_e32 v0, s2 555; GFX9-NEXT: v_mov_b32_e32 v1, s3 556; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 557; GFX9-NEXT: s_endpgm 558; 559; VI-SDAG-LABEL: v_fcmp_f32_o: 560; VI-SDAG: ; %bb.0: 561; VI-SDAG-NEXT: s_load_dword s2, s[4:5], 0x2c 562; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 563; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x42c80000 564; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 565; VI-SDAG-NEXT: v_cmp_o_f32_e64 s[2:3], s2, v0 566; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 567; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 568; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 569; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 570; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 571; VI-SDAG-NEXT: s_endpgm 572; 573; VI-GISEL-LABEL: v_fcmp_f32_o: 574; VI-GISEL: ; %bb.0: 575; VI-GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 576; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 577; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x42c80000 578; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 579; VI-GISEL-NEXT: v_cmp_o_f32_e64 s[2:3], s2, v0 580; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 581; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 582; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 583; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 584; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 585; VI-GISEL-NEXT: s_endpgm 586 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 7) 587 store i64 %result, ptr addrspace(1) %out 588 ret void 589} 590 591define amdgpu_kernel void @v_fcmp_f32_uo(ptr addrspace(1) %out, float %src) { 592; GFX11-LABEL: v_fcmp_f32_uo: 593; GFX11: ; %bb.0: 594; GFX11-NEXT: s_clause 0x1 595; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 596; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 597; GFX11-NEXT: v_mov_b32_e32 v2, 0 598; GFX11-NEXT: s_waitcnt lgkmcnt(0) 599; GFX11-NEXT: v_cmp_u_f32_e64 s[2:3], 0x42c80000, s2 600; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 601; GFX11-NEXT: v_mov_b32_e32 v0, s2 602; GFX11-NEXT: v_mov_b32_e32 v1, s3 603; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 604; GFX11-NEXT: s_endpgm 605; 606; GFX9-LABEL: v_fcmp_f32_uo: 607; GFX9: ; %bb.0: 608; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c 609; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 610; GFX9-NEXT: v_mov_b32_e32 v0, 0x42c80000 611; GFX9-NEXT: v_mov_b32_e32 v2, 0 612; GFX9-NEXT: s_waitcnt lgkmcnt(0) 613; GFX9-NEXT: v_cmp_u_f32_e64 s[2:3], s2, v0 614; GFX9-NEXT: v_mov_b32_e32 v0, s2 615; GFX9-NEXT: v_mov_b32_e32 v1, s3 616; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 617; GFX9-NEXT: s_endpgm 618; 619; VI-SDAG-LABEL: v_fcmp_f32_uo: 620; VI-SDAG: ; %bb.0: 621; VI-SDAG-NEXT: s_load_dword s2, s[4:5], 0x2c 622; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 623; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x42c80000 624; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 625; VI-SDAG-NEXT: v_cmp_u_f32_e64 s[2:3], s2, v0 626; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 627; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 628; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 629; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 630; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 631; VI-SDAG-NEXT: s_endpgm 632; 633; VI-GISEL-LABEL: v_fcmp_f32_uo: 634; VI-GISEL: ; %bb.0: 635; VI-GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 636; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 637; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x42c80000 638; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 639; VI-GISEL-NEXT: v_cmp_u_f32_e64 s[2:3], s2, v0 640; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 641; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 642; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 643; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 644; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 645; VI-GISEL-NEXT: s_endpgm 646 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 8) 647 store i64 %result, ptr addrspace(1) %out 648 ret void 649} 650 651define amdgpu_kernel void @v_fcmp_f32_ueq(ptr addrspace(1) %out, float %src) { 652; GFX11-LABEL: v_fcmp_f32_ueq: 653; GFX11: ; %bb.0: 654; GFX11-NEXT: s_clause 0x1 655; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 656; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 657; GFX11-NEXT: v_mov_b32_e32 v2, 0 658; GFX11-NEXT: s_waitcnt lgkmcnt(0) 659; GFX11-NEXT: v_cmp_nlg_f32_e64 s[2:3], 0x42c80000, s2 660; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 661; GFX11-NEXT: v_mov_b32_e32 v0, s2 662; GFX11-NEXT: v_mov_b32_e32 v1, s3 663; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 664; GFX11-NEXT: s_endpgm 665; 666; GFX9-LABEL: v_fcmp_f32_ueq: 667; GFX9: ; %bb.0: 668; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c 669; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 670; GFX9-NEXT: v_mov_b32_e32 v0, 0x42c80000 671; GFX9-NEXT: v_mov_b32_e32 v2, 0 672; GFX9-NEXT: s_waitcnt lgkmcnt(0) 673; GFX9-NEXT: v_cmp_nlg_f32_e64 s[2:3], s2, v0 674; GFX9-NEXT: v_mov_b32_e32 v0, s2 675; GFX9-NEXT: v_mov_b32_e32 v1, s3 676; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 677; GFX9-NEXT: s_endpgm 678; 679; VI-SDAG-LABEL: v_fcmp_f32_ueq: 680; VI-SDAG: ; %bb.0: 681; VI-SDAG-NEXT: s_load_dword s2, s[4:5], 0x2c 682; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 683; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x42c80000 684; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 685; VI-SDAG-NEXT: v_cmp_nlg_f32_e64 s[2:3], s2, v0 686; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 687; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 688; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 689; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 690; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 691; VI-SDAG-NEXT: s_endpgm 692; 693; VI-GISEL-LABEL: v_fcmp_f32_ueq: 694; VI-GISEL: ; %bb.0: 695; VI-GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 696; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 697; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x42c80000 698; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 699; VI-GISEL-NEXT: v_cmp_nlg_f32_e64 s[2:3], s2, v0 700; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 701; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 702; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 703; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 704; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 705; VI-GISEL-NEXT: s_endpgm 706 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 9) 707 store i64 %result, ptr addrspace(1) %out 708 ret void 709} 710 711define amdgpu_kernel void @v_fcmp_f32_une(ptr addrspace(1) %out, float %src) { 712; GFX11-LABEL: v_fcmp_f32_une: 713; GFX11: ; %bb.0: 714; GFX11-NEXT: s_clause 0x1 715; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 716; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 717; GFX11-NEXT: v_mov_b32_e32 v2, 0 718; GFX11-NEXT: s_waitcnt lgkmcnt(0) 719; GFX11-NEXT: v_cmp_neq_f32_e64 s[2:3], 0x42c80000, s2 720; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 721; GFX11-NEXT: v_mov_b32_e32 v0, s2 722; GFX11-NEXT: v_mov_b32_e32 v1, s3 723; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 724; GFX11-NEXT: s_endpgm 725; 726; GFX9-LABEL: v_fcmp_f32_une: 727; GFX9: ; %bb.0: 728; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c 729; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 730; GFX9-NEXT: v_mov_b32_e32 v0, 0x42c80000 731; GFX9-NEXT: v_mov_b32_e32 v2, 0 732; GFX9-NEXT: s_waitcnt lgkmcnt(0) 733; GFX9-NEXT: v_cmp_neq_f32_e64 s[2:3], s2, v0 734; GFX9-NEXT: v_mov_b32_e32 v0, s2 735; GFX9-NEXT: v_mov_b32_e32 v1, s3 736; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 737; GFX9-NEXT: s_endpgm 738; 739; VI-SDAG-LABEL: v_fcmp_f32_une: 740; VI-SDAG: ; %bb.0: 741; VI-SDAG-NEXT: s_load_dword s2, s[4:5], 0x2c 742; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 743; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x42c80000 744; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 745; VI-SDAG-NEXT: v_cmp_neq_f32_e64 s[2:3], s2, v0 746; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 747; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 748; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 749; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 750; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 751; VI-SDAG-NEXT: s_endpgm 752; 753; VI-GISEL-LABEL: v_fcmp_f32_une: 754; VI-GISEL: ; %bb.0: 755; VI-GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 756; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 757; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x42c80000 758; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 759; VI-GISEL-NEXT: v_cmp_neq_f32_e64 s[2:3], s2, v0 760; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 761; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 762; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 763; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 764; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 765; VI-GISEL-NEXT: s_endpgm 766 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 14) 767 store i64 %result, ptr addrspace(1) %out 768 ret void 769} 770 771define amdgpu_kernel void @v_fcmp_f32_ugt(ptr addrspace(1) %out, float %src) { 772; GFX11-LABEL: v_fcmp_f32_ugt: 773; GFX11: ; %bb.0: 774; GFX11-NEXT: s_clause 0x1 775; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 776; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 777; GFX11-NEXT: v_mov_b32_e32 v2, 0 778; GFX11-NEXT: s_waitcnt lgkmcnt(0) 779; GFX11-NEXT: v_cmp_nge_f32_e64 s[2:3], 0x42c80000, s2 780; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 781; GFX11-NEXT: v_mov_b32_e32 v0, s2 782; GFX11-NEXT: v_mov_b32_e32 v1, s3 783; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 784; GFX11-NEXT: s_endpgm 785; 786; GFX9-LABEL: v_fcmp_f32_ugt: 787; GFX9: ; %bb.0: 788; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c 789; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 790; GFX9-NEXT: v_mov_b32_e32 v0, 0x42c80000 791; GFX9-NEXT: v_mov_b32_e32 v2, 0 792; GFX9-NEXT: s_waitcnt lgkmcnt(0) 793; GFX9-NEXT: v_cmp_nle_f32_e64 s[2:3], s2, v0 794; GFX9-NEXT: v_mov_b32_e32 v0, s2 795; GFX9-NEXT: v_mov_b32_e32 v1, s3 796; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 797; GFX9-NEXT: s_endpgm 798; 799; VI-SDAG-LABEL: v_fcmp_f32_ugt: 800; VI-SDAG: ; %bb.0: 801; VI-SDAG-NEXT: s_load_dword s2, s[4:5], 0x2c 802; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 803; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x42c80000 804; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 805; VI-SDAG-NEXT: v_cmp_nle_f32_e64 s[2:3], s2, v0 806; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 807; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 808; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 809; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 810; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 811; VI-SDAG-NEXT: s_endpgm 812; 813; VI-GISEL-LABEL: v_fcmp_f32_ugt: 814; VI-GISEL: ; %bb.0: 815; VI-GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 816; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 817; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x42c80000 818; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 819; VI-GISEL-NEXT: v_cmp_nle_f32_e64 s[2:3], s2, v0 820; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 821; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 822; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 823; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 824; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 825; VI-GISEL-NEXT: s_endpgm 826 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 10) 827 store i64 %result, ptr addrspace(1) %out 828 ret void 829} 830 831define amdgpu_kernel void @v_fcmp_f32_uge(ptr addrspace(1) %out, float %src) { 832; GFX11-LABEL: v_fcmp_f32_uge: 833; GFX11: ; %bb.0: 834; GFX11-NEXT: s_clause 0x1 835; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 836; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 837; GFX11-NEXT: v_mov_b32_e32 v2, 0 838; GFX11-NEXT: s_waitcnt lgkmcnt(0) 839; GFX11-NEXT: v_cmp_ngt_f32_e64 s[2:3], 0x42c80000, s2 840; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 841; GFX11-NEXT: v_mov_b32_e32 v0, s2 842; GFX11-NEXT: v_mov_b32_e32 v1, s3 843; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 844; GFX11-NEXT: s_endpgm 845; 846; GFX9-LABEL: v_fcmp_f32_uge: 847; GFX9: ; %bb.0: 848; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c 849; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 850; GFX9-NEXT: v_mov_b32_e32 v0, 0x42c80000 851; GFX9-NEXT: v_mov_b32_e32 v2, 0 852; GFX9-NEXT: s_waitcnt lgkmcnt(0) 853; GFX9-NEXT: v_cmp_nlt_f32_e64 s[2:3], s2, v0 854; GFX9-NEXT: v_mov_b32_e32 v0, s2 855; GFX9-NEXT: v_mov_b32_e32 v1, s3 856; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 857; GFX9-NEXT: s_endpgm 858; 859; VI-SDAG-LABEL: v_fcmp_f32_uge: 860; VI-SDAG: ; %bb.0: 861; VI-SDAG-NEXT: s_load_dword s2, s[4:5], 0x2c 862; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 863; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x42c80000 864; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 865; VI-SDAG-NEXT: v_cmp_nlt_f32_e64 s[2:3], s2, v0 866; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 867; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 868; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 869; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 870; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 871; VI-SDAG-NEXT: s_endpgm 872; 873; VI-GISEL-LABEL: v_fcmp_f32_uge: 874; VI-GISEL: ; %bb.0: 875; VI-GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 876; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 877; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x42c80000 878; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 879; VI-GISEL-NEXT: v_cmp_nlt_f32_e64 s[2:3], s2, v0 880; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 881; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 882; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 883; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 884; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 885; VI-GISEL-NEXT: s_endpgm 886 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 11) 887 store i64 %result, ptr addrspace(1) %out 888 ret void 889} 890 891define amdgpu_kernel void @v_fcmp_f32_ult(ptr addrspace(1) %out, float %src) { 892; GFX11-LABEL: v_fcmp_f32_ult: 893; GFX11: ; %bb.0: 894; GFX11-NEXT: s_clause 0x1 895; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 896; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 897; GFX11-NEXT: v_mov_b32_e32 v2, 0 898; GFX11-NEXT: s_waitcnt lgkmcnt(0) 899; GFX11-NEXT: v_cmp_nle_f32_e64 s[2:3], 0x42c80000, s2 900; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 901; GFX11-NEXT: v_mov_b32_e32 v0, s2 902; GFX11-NEXT: v_mov_b32_e32 v1, s3 903; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 904; GFX11-NEXT: s_endpgm 905; 906; GFX9-LABEL: v_fcmp_f32_ult: 907; GFX9: ; %bb.0: 908; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c 909; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 910; GFX9-NEXT: v_mov_b32_e32 v0, 0x42c80000 911; GFX9-NEXT: v_mov_b32_e32 v2, 0 912; GFX9-NEXT: s_waitcnt lgkmcnt(0) 913; GFX9-NEXT: v_cmp_nge_f32_e64 s[2:3], s2, v0 914; GFX9-NEXT: v_mov_b32_e32 v0, s2 915; GFX9-NEXT: v_mov_b32_e32 v1, s3 916; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 917; GFX9-NEXT: s_endpgm 918; 919; VI-SDAG-LABEL: v_fcmp_f32_ult: 920; VI-SDAG: ; %bb.0: 921; VI-SDAG-NEXT: s_load_dword s2, s[4:5], 0x2c 922; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 923; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x42c80000 924; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 925; VI-SDAG-NEXT: v_cmp_nge_f32_e64 s[2:3], s2, v0 926; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 927; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 928; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 929; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 930; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 931; VI-SDAG-NEXT: s_endpgm 932; 933; VI-GISEL-LABEL: v_fcmp_f32_ult: 934; VI-GISEL: ; %bb.0: 935; VI-GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 936; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 937; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x42c80000 938; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 939; VI-GISEL-NEXT: v_cmp_nge_f32_e64 s[2:3], s2, v0 940; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 941; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 942; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 943; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 944; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 945; VI-GISEL-NEXT: s_endpgm 946 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 12) 947 store i64 %result, ptr addrspace(1) %out 948 ret void 949} 950 951define amdgpu_kernel void @v_fcmp_f32_ule(ptr addrspace(1) %out, float %src) { 952; GFX11-LABEL: v_fcmp_f32_ule: 953; GFX11: ; %bb.0: 954; GFX11-NEXT: s_clause 0x1 955; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 956; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 957; GFX11-NEXT: v_mov_b32_e32 v2, 0 958; GFX11-NEXT: s_waitcnt lgkmcnt(0) 959; GFX11-NEXT: v_cmp_nlt_f32_e64 s[2:3], 0x42c80000, s2 960; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 961; GFX11-NEXT: v_mov_b32_e32 v0, s2 962; GFX11-NEXT: v_mov_b32_e32 v1, s3 963; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 964; GFX11-NEXT: s_endpgm 965; 966; GFX9-LABEL: v_fcmp_f32_ule: 967; GFX9: ; %bb.0: 968; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c 969; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 970; GFX9-NEXT: v_mov_b32_e32 v0, 0x42c80000 971; GFX9-NEXT: v_mov_b32_e32 v2, 0 972; GFX9-NEXT: s_waitcnt lgkmcnt(0) 973; GFX9-NEXT: v_cmp_ngt_f32_e64 s[2:3], s2, v0 974; GFX9-NEXT: v_mov_b32_e32 v0, s2 975; GFX9-NEXT: v_mov_b32_e32 v1, s3 976; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 977; GFX9-NEXT: s_endpgm 978; 979; VI-SDAG-LABEL: v_fcmp_f32_ule: 980; VI-SDAG: ; %bb.0: 981; VI-SDAG-NEXT: s_load_dword s2, s[4:5], 0x2c 982; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 983; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x42c80000 984; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 985; VI-SDAG-NEXT: v_cmp_ngt_f32_e64 s[2:3], s2, v0 986; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 987; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 988; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 989; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 990; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 991; VI-SDAG-NEXT: s_endpgm 992; 993; VI-GISEL-LABEL: v_fcmp_f32_ule: 994; VI-GISEL: ; %bb.0: 995; VI-GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 996; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 997; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x42c80000 998; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 999; VI-GISEL-NEXT: v_cmp_ngt_f32_e64 s[2:3], s2, v0 1000; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 1001; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 1002; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 1003; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 1004; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 1005; VI-GISEL-NEXT: s_endpgm 1006 %result = call i64 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 13) 1007 store i64 %result, ptr addrspace(1) %out 1008 ret void 1009} 1010 1011define amdgpu_kernel void @v_fcmp_f64_oeq(ptr addrspace(1) %out, double %src) { 1012; GFX11-LABEL: v_fcmp_f64_oeq: 1013; GFX11: ; %bb.0: 1014; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1015; GFX11-NEXT: v_mov_b32_e32 v2, 0 1016; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1017; GFX11-NEXT: v_cmp_eq_f64_e64 s[2:3], 0x40590000, s[2:3] 1018; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 1019; GFX11-NEXT: v_mov_b32_e32 v0, s2 1020; GFX11-NEXT: v_mov_b32_e32 v1, s3 1021; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 1022; GFX11-NEXT: s_endpgm 1023; 1024; GFX9-LABEL: v_fcmp_f64_oeq: 1025; GFX9: ; %bb.0: 1026; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1027; GFX9-NEXT: v_mov_b32_e32 v0, 0 1028; GFX9-NEXT: v_mov_b32_e32 v1, 0x40590000 1029; GFX9-NEXT: v_mov_b32_e32 v2, 0 1030; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1031; GFX9-NEXT: v_cmp_eq_f64_e64 s[2:3], s[2:3], v[0:1] 1032; GFX9-NEXT: v_mov_b32_e32 v0, s2 1033; GFX9-NEXT: v_mov_b32_e32 v1, s3 1034; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 1035; GFX9-NEXT: s_endpgm 1036; 1037; VI-SDAG-LABEL: v_fcmp_f64_oeq: 1038; VI-SDAG: ; %bb.0: 1039; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1040; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0 1041; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x40590000 1042; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1043; VI-SDAG-NEXT: v_cmp_eq_f64_e64 s[2:3], s[2:3], v[0:1] 1044; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 1045; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 1046; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 1047; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 1048; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 1049; VI-SDAG-NEXT: s_endpgm 1050; 1051; VI-GISEL-LABEL: v_fcmp_f64_oeq: 1052; VI-GISEL: ; %bb.0: 1053; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1054; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0 1055; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40590000 1056; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1057; VI-GISEL-NEXT: v_cmp_eq_f64_e64 s[2:3], s[2:3], v[0:1] 1058; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 1059; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 1060; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 1061; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 1062; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 1063; VI-GISEL-NEXT: s_endpgm 1064 %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 1) 1065 store i64 %result, ptr addrspace(1) %out 1066 ret void 1067} 1068 1069define amdgpu_kernel void @v_fcmp_f64_one(ptr addrspace(1) %out, double %src) { 1070; GFX11-LABEL: v_fcmp_f64_one: 1071; GFX11: ; %bb.0: 1072; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1073; GFX11-NEXT: v_mov_b32_e32 v2, 0 1074; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1075; GFX11-NEXT: v_cmp_neq_f64_e64 s[2:3], 0x40590000, s[2:3] 1076; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 1077; GFX11-NEXT: v_mov_b32_e32 v0, s2 1078; GFX11-NEXT: v_mov_b32_e32 v1, s3 1079; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 1080; GFX11-NEXT: s_endpgm 1081; 1082; GFX9-LABEL: v_fcmp_f64_one: 1083; GFX9: ; %bb.0: 1084; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1085; GFX9-NEXT: v_mov_b32_e32 v0, 0 1086; GFX9-NEXT: v_mov_b32_e32 v1, 0x40590000 1087; GFX9-NEXT: v_mov_b32_e32 v2, 0 1088; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1089; GFX9-NEXT: v_cmp_neq_f64_e64 s[2:3], s[2:3], v[0:1] 1090; GFX9-NEXT: v_mov_b32_e32 v0, s2 1091; GFX9-NEXT: v_mov_b32_e32 v1, s3 1092; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 1093; GFX9-NEXT: s_endpgm 1094; 1095; VI-SDAG-LABEL: v_fcmp_f64_one: 1096; VI-SDAG: ; %bb.0: 1097; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1098; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0 1099; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x40590000 1100; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1101; VI-SDAG-NEXT: v_cmp_neq_f64_e64 s[2:3], s[2:3], v[0:1] 1102; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 1103; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 1104; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 1105; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 1106; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 1107; VI-SDAG-NEXT: s_endpgm 1108; 1109; VI-GISEL-LABEL: v_fcmp_f64_one: 1110; VI-GISEL: ; %bb.0: 1111; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1112; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0 1113; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40590000 1114; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1115; VI-GISEL-NEXT: v_cmp_neq_f64_e64 s[2:3], s[2:3], v[0:1] 1116; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 1117; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 1118; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 1119; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 1120; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 1121; VI-GISEL-NEXT: s_endpgm 1122 %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 6) 1123 store i64 %result, ptr addrspace(1) %out 1124 ret void 1125} 1126 1127define amdgpu_kernel void @v_fcmp_f64_ogt(ptr addrspace(1) %out, double %src) { 1128; GFX11-LABEL: v_fcmp_f64_ogt: 1129; GFX11: ; %bb.0: 1130; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1131; GFX11-NEXT: v_mov_b32_e32 v2, 0 1132; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1133; GFX11-NEXT: v_cmp_lt_f64_e64 s[2:3], 0x40590000, s[2:3] 1134; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 1135; GFX11-NEXT: v_mov_b32_e32 v0, s2 1136; GFX11-NEXT: v_mov_b32_e32 v1, s3 1137; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 1138; GFX11-NEXT: s_endpgm 1139; 1140; GFX9-LABEL: v_fcmp_f64_ogt: 1141; GFX9: ; %bb.0: 1142; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1143; GFX9-NEXT: v_mov_b32_e32 v0, 0 1144; GFX9-NEXT: v_mov_b32_e32 v1, 0x40590000 1145; GFX9-NEXT: v_mov_b32_e32 v2, 0 1146; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1147; GFX9-NEXT: v_cmp_gt_f64_e64 s[2:3], s[2:3], v[0:1] 1148; GFX9-NEXT: v_mov_b32_e32 v0, s2 1149; GFX9-NEXT: v_mov_b32_e32 v1, s3 1150; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 1151; GFX9-NEXT: s_endpgm 1152; 1153; VI-SDAG-LABEL: v_fcmp_f64_ogt: 1154; VI-SDAG: ; %bb.0: 1155; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1156; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0 1157; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x40590000 1158; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1159; VI-SDAG-NEXT: v_cmp_gt_f64_e64 s[2:3], s[2:3], v[0:1] 1160; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 1161; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 1162; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 1163; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 1164; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 1165; VI-SDAG-NEXT: s_endpgm 1166; 1167; VI-GISEL-LABEL: v_fcmp_f64_ogt: 1168; VI-GISEL: ; %bb.0: 1169; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1170; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0 1171; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40590000 1172; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1173; VI-GISEL-NEXT: v_cmp_gt_f64_e64 s[2:3], s[2:3], v[0:1] 1174; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 1175; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 1176; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 1177; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 1178; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 1179; VI-GISEL-NEXT: s_endpgm 1180 %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 2) 1181 store i64 %result, ptr addrspace(1) %out 1182 ret void 1183} 1184 1185define amdgpu_kernel void @v_fcmp_f64_oge(ptr addrspace(1) %out, double %src) { 1186; GFX11-LABEL: v_fcmp_f64_oge: 1187; GFX11: ; %bb.0: 1188; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1189; GFX11-NEXT: v_mov_b32_e32 v2, 0 1190; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1191; GFX11-NEXT: v_cmp_le_f64_e64 s[2:3], 0x40590000, s[2:3] 1192; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 1193; GFX11-NEXT: v_mov_b32_e32 v0, s2 1194; GFX11-NEXT: v_mov_b32_e32 v1, s3 1195; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 1196; GFX11-NEXT: s_endpgm 1197; 1198; GFX9-LABEL: v_fcmp_f64_oge: 1199; GFX9: ; %bb.0: 1200; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1201; GFX9-NEXT: v_mov_b32_e32 v0, 0 1202; GFX9-NEXT: v_mov_b32_e32 v1, 0x40590000 1203; GFX9-NEXT: v_mov_b32_e32 v2, 0 1204; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1205; GFX9-NEXT: v_cmp_ge_f64_e64 s[2:3], s[2:3], v[0:1] 1206; GFX9-NEXT: v_mov_b32_e32 v0, s2 1207; GFX9-NEXT: v_mov_b32_e32 v1, s3 1208; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 1209; GFX9-NEXT: s_endpgm 1210; 1211; VI-SDAG-LABEL: v_fcmp_f64_oge: 1212; VI-SDAG: ; %bb.0: 1213; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1214; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0 1215; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x40590000 1216; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1217; VI-SDAG-NEXT: v_cmp_ge_f64_e64 s[2:3], s[2:3], v[0:1] 1218; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 1219; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 1220; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 1221; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 1222; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 1223; VI-SDAG-NEXT: s_endpgm 1224; 1225; VI-GISEL-LABEL: v_fcmp_f64_oge: 1226; VI-GISEL: ; %bb.0: 1227; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1228; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0 1229; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40590000 1230; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1231; VI-GISEL-NEXT: v_cmp_ge_f64_e64 s[2:3], s[2:3], v[0:1] 1232; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 1233; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 1234; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 1235; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 1236; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 1237; VI-GISEL-NEXT: s_endpgm 1238 %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 3) 1239 store i64 %result, ptr addrspace(1) %out 1240 ret void 1241} 1242 1243define amdgpu_kernel void @v_fcmp_f64_olt(ptr addrspace(1) %out, double %src) { 1244; GFX11-LABEL: v_fcmp_f64_olt: 1245; GFX11: ; %bb.0: 1246; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1247; GFX11-NEXT: v_mov_b32_e32 v2, 0 1248; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1249; GFX11-NEXT: v_cmp_gt_f64_e64 s[2:3], 0x40590000, s[2:3] 1250; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 1251; GFX11-NEXT: v_mov_b32_e32 v0, s2 1252; GFX11-NEXT: v_mov_b32_e32 v1, s3 1253; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 1254; GFX11-NEXT: s_endpgm 1255; 1256; GFX9-LABEL: v_fcmp_f64_olt: 1257; GFX9: ; %bb.0: 1258; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1259; GFX9-NEXT: v_mov_b32_e32 v0, 0 1260; GFX9-NEXT: v_mov_b32_e32 v1, 0x40590000 1261; GFX9-NEXT: v_mov_b32_e32 v2, 0 1262; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1263; GFX9-NEXT: v_cmp_lt_f64_e64 s[2:3], s[2:3], v[0:1] 1264; GFX9-NEXT: v_mov_b32_e32 v0, s2 1265; GFX9-NEXT: v_mov_b32_e32 v1, s3 1266; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 1267; GFX9-NEXT: s_endpgm 1268; 1269; VI-SDAG-LABEL: v_fcmp_f64_olt: 1270; VI-SDAG: ; %bb.0: 1271; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1272; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0 1273; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x40590000 1274; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1275; VI-SDAG-NEXT: v_cmp_lt_f64_e64 s[2:3], s[2:3], v[0:1] 1276; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 1277; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 1278; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 1279; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 1280; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 1281; VI-SDAG-NEXT: s_endpgm 1282; 1283; VI-GISEL-LABEL: v_fcmp_f64_olt: 1284; VI-GISEL: ; %bb.0: 1285; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1286; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0 1287; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40590000 1288; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1289; VI-GISEL-NEXT: v_cmp_lt_f64_e64 s[2:3], s[2:3], v[0:1] 1290; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 1291; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 1292; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 1293; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 1294; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 1295; VI-GISEL-NEXT: s_endpgm 1296 %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 4) 1297 store i64 %result, ptr addrspace(1) %out 1298 ret void 1299} 1300 1301define amdgpu_kernel void @v_fcmp_f64_ole(ptr addrspace(1) %out, double %src) { 1302; GFX11-LABEL: v_fcmp_f64_ole: 1303; GFX11: ; %bb.0: 1304; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1305; GFX11-NEXT: v_mov_b32_e32 v2, 0 1306; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1307; GFX11-NEXT: v_cmp_ge_f64_e64 s[2:3], 0x40590000, s[2:3] 1308; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 1309; GFX11-NEXT: v_mov_b32_e32 v0, s2 1310; GFX11-NEXT: v_mov_b32_e32 v1, s3 1311; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 1312; GFX11-NEXT: s_endpgm 1313; 1314; GFX9-LABEL: v_fcmp_f64_ole: 1315; GFX9: ; %bb.0: 1316; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1317; GFX9-NEXT: v_mov_b32_e32 v0, 0 1318; GFX9-NEXT: v_mov_b32_e32 v1, 0x40590000 1319; GFX9-NEXT: v_mov_b32_e32 v2, 0 1320; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1321; GFX9-NEXT: v_cmp_le_f64_e64 s[2:3], s[2:3], v[0:1] 1322; GFX9-NEXT: v_mov_b32_e32 v0, s2 1323; GFX9-NEXT: v_mov_b32_e32 v1, s3 1324; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 1325; GFX9-NEXT: s_endpgm 1326; 1327; VI-SDAG-LABEL: v_fcmp_f64_ole: 1328; VI-SDAG: ; %bb.0: 1329; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1330; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0 1331; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x40590000 1332; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1333; VI-SDAG-NEXT: v_cmp_le_f64_e64 s[2:3], s[2:3], v[0:1] 1334; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 1335; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 1336; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 1337; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 1338; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 1339; VI-SDAG-NEXT: s_endpgm 1340; 1341; VI-GISEL-LABEL: v_fcmp_f64_ole: 1342; VI-GISEL: ; %bb.0: 1343; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1344; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0 1345; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40590000 1346; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1347; VI-GISEL-NEXT: v_cmp_le_f64_e64 s[2:3], s[2:3], v[0:1] 1348; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 1349; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 1350; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 1351; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 1352; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 1353; VI-GISEL-NEXT: s_endpgm 1354 %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 5) 1355 store i64 %result, ptr addrspace(1) %out 1356 ret void 1357} 1358 1359define amdgpu_kernel void @v_fcmp_f64_ueq(ptr addrspace(1) %out, double %src) { 1360; GFX11-LABEL: v_fcmp_f64_ueq: 1361; GFX11: ; %bb.0: 1362; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1363; GFX11-NEXT: v_mov_b32_e32 v2, 0 1364; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1365; GFX11-NEXT: v_cmp_nlg_f64_e64 s[2:3], 0x40590000, s[2:3] 1366; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 1367; GFX11-NEXT: v_mov_b32_e32 v0, s2 1368; GFX11-NEXT: v_mov_b32_e32 v1, s3 1369; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 1370; GFX11-NEXT: s_endpgm 1371; 1372; GFX9-LABEL: v_fcmp_f64_ueq: 1373; GFX9: ; %bb.0: 1374; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1375; GFX9-NEXT: v_mov_b32_e32 v0, 0 1376; GFX9-NEXT: v_mov_b32_e32 v1, 0x40590000 1377; GFX9-NEXT: v_mov_b32_e32 v2, 0 1378; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1379; GFX9-NEXT: v_cmp_nlg_f64_e64 s[2:3], s[2:3], v[0:1] 1380; GFX9-NEXT: v_mov_b32_e32 v0, s2 1381; GFX9-NEXT: v_mov_b32_e32 v1, s3 1382; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 1383; GFX9-NEXT: s_endpgm 1384; 1385; VI-SDAG-LABEL: v_fcmp_f64_ueq: 1386; VI-SDAG: ; %bb.0: 1387; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1388; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0 1389; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x40590000 1390; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1391; VI-SDAG-NEXT: v_cmp_nlg_f64_e64 s[2:3], s[2:3], v[0:1] 1392; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 1393; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 1394; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 1395; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 1396; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 1397; VI-SDAG-NEXT: s_endpgm 1398; 1399; VI-GISEL-LABEL: v_fcmp_f64_ueq: 1400; VI-GISEL: ; %bb.0: 1401; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1402; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0 1403; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40590000 1404; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1405; VI-GISEL-NEXT: v_cmp_nlg_f64_e64 s[2:3], s[2:3], v[0:1] 1406; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 1407; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 1408; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 1409; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 1410; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 1411; VI-GISEL-NEXT: s_endpgm 1412 %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 9) 1413 store i64 %result, ptr addrspace(1) %out 1414 ret void 1415} 1416 1417define amdgpu_kernel void @v_fcmp_f64_o(ptr addrspace(1) %out, double %src) { 1418; GFX11-LABEL: v_fcmp_f64_o: 1419; GFX11: ; %bb.0: 1420; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1421; GFX11-NEXT: v_mov_b32_e32 v2, 0 1422; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1423; GFX11-NEXT: v_cmp_o_f64_e64 s[2:3], 0x40590000, s[2:3] 1424; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 1425; GFX11-NEXT: v_mov_b32_e32 v0, s2 1426; GFX11-NEXT: v_mov_b32_e32 v1, s3 1427; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 1428; GFX11-NEXT: s_endpgm 1429; 1430; GFX9-LABEL: v_fcmp_f64_o: 1431; GFX9: ; %bb.0: 1432; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1433; GFX9-NEXT: v_mov_b32_e32 v0, 0 1434; GFX9-NEXT: v_mov_b32_e32 v1, 0x40590000 1435; GFX9-NEXT: v_mov_b32_e32 v2, 0 1436; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1437; GFX9-NEXT: v_cmp_o_f64_e64 s[2:3], s[2:3], v[0:1] 1438; GFX9-NEXT: v_mov_b32_e32 v0, s2 1439; GFX9-NEXT: v_mov_b32_e32 v1, s3 1440; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 1441; GFX9-NEXT: s_endpgm 1442; 1443; VI-SDAG-LABEL: v_fcmp_f64_o: 1444; VI-SDAG: ; %bb.0: 1445; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1446; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0 1447; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x40590000 1448; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1449; VI-SDAG-NEXT: v_cmp_o_f64_e64 s[2:3], s[2:3], v[0:1] 1450; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 1451; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 1452; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 1453; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 1454; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 1455; VI-SDAG-NEXT: s_endpgm 1456; 1457; VI-GISEL-LABEL: v_fcmp_f64_o: 1458; VI-GISEL: ; %bb.0: 1459; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1460; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0 1461; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40590000 1462; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1463; VI-GISEL-NEXT: v_cmp_o_f64_e64 s[2:3], s[2:3], v[0:1] 1464; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 1465; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 1466; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 1467; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 1468; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 1469; VI-GISEL-NEXT: s_endpgm 1470 %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 7) 1471 store i64 %result, ptr addrspace(1) %out 1472 ret void 1473} 1474 1475define amdgpu_kernel void @v_fcmp_f64_uo(ptr addrspace(1) %out, double %src) { 1476; GFX11-LABEL: v_fcmp_f64_uo: 1477; GFX11: ; %bb.0: 1478; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1479; GFX11-NEXT: v_mov_b32_e32 v2, 0 1480; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1481; GFX11-NEXT: v_cmp_u_f64_e64 s[2:3], 0x40590000, s[2:3] 1482; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 1483; GFX11-NEXT: v_mov_b32_e32 v0, s2 1484; GFX11-NEXT: v_mov_b32_e32 v1, s3 1485; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 1486; GFX11-NEXT: s_endpgm 1487; 1488; GFX9-LABEL: v_fcmp_f64_uo: 1489; GFX9: ; %bb.0: 1490; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1491; GFX9-NEXT: v_mov_b32_e32 v0, 0 1492; GFX9-NEXT: v_mov_b32_e32 v1, 0x40590000 1493; GFX9-NEXT: v_mov_b32_e32 v2, 0 1494; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1495; GFX9-NEXT: v_cmp_u_f64_e64 s[2:3], s[2:3], v[0:1] 1496; GFX9-NEXT: v_mov_b32_e32 v0, s2 1497; GFX9-NEXT: v_mov_b32_e32 v1, s3 1498; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 1499; GFX9-NEXT: s_endpgm 1500; 1501; VI-SDAG-LABEL: v_fcmp_f64_uo: 1502; VI-SDAG: ; %bb.0: 1503; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1504; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0 1505; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x40590000 1506; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1507; VI-SDAG-NEXT: v_cmp_u_f64_e64 s[2:3], s[2:3], v[0:1] 1508; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 1509; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 1510; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 1511; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 1512; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 1513; VI-SDAG-NEXT: s_endpgm 1514; 1515; VI-GISEL-LABEL: v_fcmp_f64_uo: 1516; VI-GISEL: ; %bb.0: 1517; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1518; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0 1519; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40590000 1520; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1521; VI-GISEL-NEXT: v_cmp_u_f64_e64 s[2:3], s[2:3], v[0:1] 1522; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 1523; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 1524; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 1525; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 1526; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 1527; VI-GISEL-NEXT: s_endpgm 1528 %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 8) 1529 store i64 %result, ptr addrspace(1) %out 1530 ret void 1531} 1532 1533define amdgpu_kernel void @v_fcmp_f64_une(ptr addrspace(1) %out, double %src) { 1534; GFX11-LABEL: v_fcmp_f64_une: 1535; GFX11: ; %bb.0: 1536; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1537; GFX11-NEXT: v_mov_b32_e32 v2, 0 1538; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1539; GFX11-NEXT: v_cmp_neq_f64_e64 s[2:3], 0x40590000, s[2:3] 1540; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 1541; GFX11-NEXT: v_mov_b32_e32 v0, s2 1542; GFX11-NEXT: v_mov_b32_e32 v1, s3 1543; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 1544; GFX11-NEXT: s_endpgm 1545; 1546; GFX9-LABEL: v_fcmp_f64_une: 1547; GFX9: ; %bb.0: 1548; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1549; GFX9-NEXT: v_mov_b32_e32 v0, 0 1550; GFX9-NEXT: v_mov_b32_e32 v1, 0x40590000 1551; GFX9-NEXT: v_mov_b32_e32 v2, 0 1552; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1553; GFX9-NEXT: v_cmp_neq_f64_e64 s[2:3], s[2:3], v[0:1] 1554; GFX9-NEXT: v_mov_b32_e32 v0, s2 1555; GFX9-NEXT: v_mov_b32_e32 v1, s3 1556; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 1557; GFX9-NEXT: s_endpgm 1558; 1559; VI-SDAG-LABEL: v_fcmp_f64_une: 1560; VI-SDAG: ; %bb.0: 1561; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1562; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0 1563; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x40590000 1564; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1565; VI-SDAG-NEXT: v_cmp_neq_f64_e64 s[2:3], s[2:3], v[0:1] 1566; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 1567; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 1568; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 1569; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 1570; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 1571; VI-SDAG-NEXT: s_endpgm 1572; 1573; VI-GISEL-LABEL: v_fcmp_f64_une: 1574; VI-GISEL: ; %bb.0: 1575; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1576; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0 1577; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40590000 1578; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1579; VI-GISEL-NEXT: v_cmp_neq_f64_e64 s[2:3], s[2:3], v[0:1] 1580; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 1581; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 1582; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 1583; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 1584; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 1585; VI-GISEL-NEXT: s_endpgm 1586 %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 14) 1587 store i64 %result, ptr addrspace(1) %out 1588 ret void 1589} 1590 1591define amdgpu_kernel void @v_fcmp_f64_ugt(ptr addrspace(1) %out, double %src) { 1592; GFX11-LABEL: v_fcmp_f64_ugt: 1593; GFX11: ; %bb.0: 1594; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1595; GFX11-NEXT: v_mov_b32_e32 v2, 0 1596; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1597; GFX11-NEXT: v_cmp_nge_f64_e64 s[2:3], 0x40590000, s[2:3] 1598; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 1599; GFX11-NEXT: v_mov_b32_e32 v0, s2 1600; GFX11-NEXT: v_mov_b32_e32 v1, s3 1601; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 1602; GFX11-NEXT: s_endpgm 1603; 1604; GFX9-LABEL: v_fcmp_f64_ugt: 1605; GFX9: ; %bb.0: 1606; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1607; GFX9-NEXT: v_mov_b32_e32 v0, 0 1608; GFX9-NEXT: v_mov_b32_e32 v1, 0x40590000 1609; GFX9-NEXT: v_mov_b32_e32 v2, 0 1610; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1611; GFX9-NEXT: v_cmp_nle_f64_e64 s[2:3], s[2:3], v[0:1] 1612; GFX9-NEXT: v_mov_b32_e32 v0, s2 1613; GFX9-NEXT: v_mov_b32_e32 v1, s3 1614; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 1615; GFX9-NEXT: s_endpgm 1616; 1617; VI-SDAG-LABEL: v_fcmp_f64_ugt: 1618; VI-SDAG: ; %bb.0: 1619; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1620; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0 1621; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x40590000 1622; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1623; VI-SDAG-NEXT: v_cmp_nle_f64_e64 s[2:3], s[2:3], v[0:1] 1624; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 1625; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 1626; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 1627; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 1628; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 1629; VI-SDAG-NEXT: s_endpgm 1630; 1631; VI-GISEL-LABEL: v_fcmp_f64_ugt: 1632; VI-GISEL: ; %bb.0: 1633; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1634; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0 1635; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40590000 1636; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1637; VI-GISEL-NEXT: v_cmp_nle_f64_e64 s[2:3], s[2:3], v[0:1] 1638; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 1639; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 1640; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 1641; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 1642; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 1643; VI-GISEL-NEXT: s_endpgm 1644 %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 10) 1645 store i64 %result, ptr addrspace(1) %out 1646 ret void 1647} 1648 1649define amdgpu_kernel void @v_fcmp_f64_uge(ptr addrspace(1) %out, double %src) { 1650; GFX11-LABEL: v_fcmp_f64_uge: 1651; GFX11: ; %bb.0: 1652; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1653; GFX11-NEXT: v_mov_b32_e32 v2, 0 1654; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1655; GFX11-NEXT: v_cmp_ngt_f64_e64 s[2:3], 0x40590000, s[2:3] 1656; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 1657; GFX11-NEXT: v_mov_b32_e32 v0, s2 1658; GFX11-NEXT: v_mov_b32_e32 v1, s3 1659; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 1660; GFX11-NEXT: s_endpgm 1661; 1662; GFX9-LABEL: v_fcmp_f64_uge: 1663; GFX9: ; %bb.0: 1664; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1665; GFX9-NEXT: v_mov_b32_e32 v0, 0 1666; GFX9-NEXT: v_mov_b32_e32 v1, 0x40590000 1667; GFX9-NEXT: v_mov_b32_e32 v2, 0 1668; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1669; GFX9-NEXT: v_cmp_nlt_f64_e64 s[2:3], s[2:3], v[0:1] 1670; GFX9-NEXT: v_mov_b32_e32 v0, s2 1671; GFX9-NEXT: v_mov_b32_e32 v1, s3 1672; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 1673; GFX9-NEXT: s_endpgm 1674; 1675; VI-SDAG-LABEL: v_fcmp_f64_uge: 1676; VI-SDAG: ; %bb.0: 1677; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1678; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0 1679; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x40590000 1680; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1681; VI-SDAG-NEXT: v_cmp_nlt_f64_e64 s[2:3], s[2:3], v[0:1] 1682; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 1683; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 1684; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 1685; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 1686; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 1687; VI-SDAG-NEXT: s_endpgm 1688; 1689; VI-GISEL-LABEL: v_fcmp_f64_uge: 1690; VI-GISEL: ; %bb.0: 1691; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1692; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0 1693; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40590000 1694; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1695; VI-GISEL-NEXT: v_cmp_nlt_f64_e64 s[2:3], s[2:3], v[0:1] 1696; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 1697; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 1698; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 1699; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 1700; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 1701; VI-GISEL-NEXT: s_endpgm 1702 %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 11) 1703 store i64 %result, ptr addrspace(1) %out 1704 ret void 1705} 1706 1707define amdgpu_kernel void @v_fcmp_f64_ult(ptr addrspace(1) %out, double %src) { 1708; GFX11-LABEL: v_fcmp_f64_ult: 1709; GFX11: ; %bb.0: 1710; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1711; GFX11-NEXT: v_mov_b32_e32 v2, 0 1712; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1713; GFX11-NEXT: v_cmp_nle_f64_e64 s[2:3], 0x40590000, s[2:3] 1714; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 1715; GFX11-NEXT: v_mov_b32_e32 v0, s2 1716; GFX11-NEXT: v_mov_b32_e32 v1, s3 1717; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 1718; GFX11-NEXT: s_endpgm 1719; 1720; GFX9-LABEL: v_fcmp_f64_ult: 1721; GFX9: ; %bb.0: 1722; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1723; GFX9-NEXT: v_mov_b32_e32 v0, 0 1724; GFX9-NEXT: v_mov_b32_e32 v1, 0x40590000 1725; GFX9-NEXT: v_mov_b32_e32 v2, 0 1726; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1727; GFX9-NEXT: v_cmp_nge_f64_e64 s[2:3], s[2:3], v[0:1] 1728; GFX9-NEXT: v_mov_b32_e32 v0, s2 1729; GFX9-NEXT: v_mov_b32_e32 v1, s3 1730; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 1731; GFX9-NEXT: s_endpgm 1732; 1733; VI-SDAG-LABEL: v_fcmp_f64_ult: 1734; VI-SDAG: ; %bb.0: 1735; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1736; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0 1737; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x40590000 1738; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1739; VI-SDAG-NEXT: v_cmp_nge_f64_e64 s[2:3], s[2:3], v[0:1] 1740; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 1741; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 1742; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 1743; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 1744; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 1745; VI-SDAG-NEXT: s_endpgm 1746; 1747; VI-GISEL-LABEL: v_fcmp_f64_ult: 1748; VI-GISEL: ; %bb.0: 1749; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1750; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0 1751; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40590000 1752; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1753; VI-GISEL-NEXT: v_cmp_nge_f64_e64 s[2:3], s[2:3], v[0:1] 1754; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 1755; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 1756; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 1757; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 1758; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 1759; VI-GISEL-NEXT: s_endpgm 1760 %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 12) 1761 store i64 %result, ptr addrspace(1) %out 1762 ret void 1763} 1764 1765define amdgpu_kernel void @v_fcmp_f64_ule(ptr addrspace(1) %out, double %src) { 1766; GFX11-LABEL: v_fcmp_f64_ule: 1767; GFX11: ; %bb.0: 1768; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1769; GFX11-NEXT: v_mov_b32_e32 v2, 0 1770; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1771; GFX11-NEXT: v_cmp_nlt_f64_e64 s[2:3], 0x40590000, s[2:3] 1772; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 1773; GFX11-NEXT: v_mov_b32_e32 v0, s2 1774; GFX11-NEXT: v_mov_b32_e32 v1, s3 1775; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 1776; GFX11-NEXT: s_endpgm 1777; 1778; GFX9-LABEL: v_fcmp_f64_ule: 1779; GFX9: ; %bb.0: 1780; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1781; GFX9-NEXT: v_mov_b32_e32 v0, 0 1782; GFX9-NEXT: v_mov_b32_e32 v1, 0x40590000 1783; GFX9-NEXT: v_mov_b32_e32 v2, 0 1784; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1785; GFX9-NEXT: v_cmp_ngt_f64_e64 s[2:3], s[2:3], v[0:1] 1786; GFX9-NEXT: v_mov_b32_e32 v0, s2 1787; GFX9-NEXT: v_mov_b32_e32 v1, s3 1788; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 1789; GFX9-NEXT: s_endpgm 1790; 1791; VI-SDAG-LABEL: v_fcmp_f64_ule: 1792; VI-SDAG: ; %bb.0: 1793; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1794; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0 1795; VI-SDAG-NEXT: v_mov_b32_e32 v1, 0x40590000 1796; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1797; VI-SDAG-NEXT: v_cmp_ngt_f64_e64 s[2:3], s[2:3], v[0:1] 1798; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 1799; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 1800; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 1801; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 1802; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 1803; VI-SDAG-NEXT: s_endpgm 1804; 1805; VI-GISEL-LABEL: v_fcmp_f64_ule: 1806; VI-GISEL: ; %bb.0: 1807; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1808; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0 1809; VI-GISEL-NEXT: v_mov_b32_e32 v1, 0x40590000 1810; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1811; VI-GISEL-NEXT: v_cmp_ngt_f64_e64 s[2:3], s[2:3], v[0:1] 1812; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 1813; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 1814; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 1815; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 1816; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 1817; VI-GISEL-NEXT: s_endpgm 1818 %result = call i64 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 13) 1819 store i64 %result, ptr addrspace(1) %out 1820 ret void 1821} 1822 1823 1824define amdgpu_kernel void @v_fcmp_f16_oeq_with_fabs(ptr addrspace(1) %out, half %src, half %a) { 1825; GFX11-LABEL: v_fcmp_f16_oeq_with_fabs: 1826; GFX11: ; %bb.0: 1827; GFX11-NEXT: s_clause 0x1 1828; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 1829; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 1830; GFX11-NEXT: v_mov_b32_e32 v2, 0 1831; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1832; GFX11-NEXT: s_lshr_b32 s3, s2, 16 1833; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1834; GFX11-NEXT: v_cmp_eq_f16_e64 s[2:3], s2, |s3| 1835; GFX11-NEXT: v_mov_b32_e32 v0, s2 1836; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 1837; GFX11-NEXT: v_mov_b32_e32 v1, s3 1838; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 1839; GFX11-NEXT: s_endpgm 1840; 1841; GFX9-LABEL: v_fcmp_f16_oeq_with_fabs: 1842; GFX9: ; %bb.0: 1843; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c 1844; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1845; GFX9-NEXT: v_mov_b32_e32 v2, 0 1846; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1847; GFX9-NEXT: s_lshr_b32 s3, s2, 16 1848; GFX9-NEXT: v_mov_b32_e32 v0, s3 1849; GFX9-NEXT: v_cmp_eq_f16_e64 s[2:3], s2, |v0| 1850; GFX9-NEXT: v_mov_b32_e32 v0, s2 1851; GFX9-NEXT: v_mov_b32_e32 v1, s3 1852; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 1853; GFX9-NEXT: s_endpgm 1854; 1855; VI-SDAG-LABEL: v_fcmp_f16_oeq_with_fabs: 1856; VI-SDAG: ; %bb.0: 1857; VI-SDAG-NEXT: s_load_dword s2, s[4:5], 0x2c 1858; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1859; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1860; VI-SDAG-NEXT: s_lshr_b32 s3, s2, 16 1861; VI-SDAG-NEXT: v_mov_b32_e32 v0, s3 1862; VI-SDAG-NEXT: v_cmp_eq_f16_e64 s[2:3], s2, |v0| 1863; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 1864; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 1865; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 1866; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 1867; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 1868; VI-SDAG-NEXT: s_endpgm 1869; 1870; VI-GISEL-LABEL: v_fcmp_f16_oeq_with_fabs: 1871; VI-GISEL: ; %bb.0: 1872; VI-GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 1873; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1874; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1875; VI-GISEL-NEXT: s_lshr_b32 s3, s2, 16 1876; VI-GISEL-NEXT: v_mov_b32_e32 v0, s3 1877; VI-GISEL-NEXT: v_cmp_eq_f16_e64 s[2:3], s2, |v0| 1878; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 1879; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 1880; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 1881; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 1882; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 1883; VI-GISEL-NEXT: s_endpgm 1884 %temp = call half @llvm.fabs.f16(half %a) 1885 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half %temp, i32 1) 1886 store i64 %result, ptr addrspace(1) %out 1887 ret void 1888} 1889 1890 1891define amdgpu_kernel void @v_fcmp_f16_oeq_both_operands_with_fabs(ptr addrspace(1) %out, half %src, half %a) { 1892; GFX11-LABEL: v_fcmp_f16_oeq_both_operands_with_fabs: 1893; GFX11: ; %bb.0: 1894; GFX11-NEXT: s_clause 0x1 1895; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 1896; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 1897; GFX11-NEXT: v_mov_b32_e32 v2, 0 1898; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1899; GFX11-NEXT: s_lshr_b32 s3, s2, 16 1900; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1901; GFX11-NEXT: v_cmp_eq_f16_e64 s[2:3], |s2|, |s3| 1902; GFX11-NEXT: v_mov_b32_e32 v0, s2 1903; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 1904; GFX11-NEXT: v_mov_b32_e32 v1, s3 1905; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 1906; GFX11-NEXT: s_endpgm 1907; 1908; GFX9-LABEL: v_fcmp_f16_oeq_both_operands_with_fabs: 1909; GFX9: ; %bb.0: 1910; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c 1911; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1912; GFX9-NEXT: v_mov_b32_e32 v2, 0 1913; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1914; GFX9-NEXT: s_lshr_b32 s3, s2, 16 1915; GFX9-NEXT: v_mov_b32_e32 v0, s3 1916; GFX9-NEXT: v_cmp_eq_f16_e64 s[2:3], |s2|, |v0| 1917; GFX9-NEXT: v_mov_b32_e32 v0, s2 1918; GFX9-NEXT: v_mov_b32_e32 v1, s3 1919; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 1920; GFX9-NEXT: s_endpgm 1921; 1922; VI-SDAG-LABEL: v_fcmp_f16_oeq_both_operands_with_fabs: 1923; VI-SDAG: ; %bb.0: 1924; VI-SDAG-NEXT: s_load_dword s2, s[4:5], 0x2c 1925; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1926; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 1927; VI-SDAG-NEXT: s_lshr_b32 s3, s2, 16 1928; VI-SDAG-NEXT: v_mov_b32_e32 v0, s3 1929; VI-SDAG-NEXT: v_cmp_eq_f16_e64 s[2:3], |s2|, |v0| 1930; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 1931; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 1932; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 1933; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 1934; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 1935; VI-SDAG-NEXT: s_endpgm 1936; 1937; VI-GISEL-LABEL: v_fcmp_f16_oeq_both_operands_with_fabs: 1938; VI-GISEL: ; %bb.0: 1939; VI-GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 1940; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1941; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1942; VI-GISEL-NEXT: s_lshr_b32 s3, s2, 16 1943; VI-GISEL-NEXT: v_mov_b32_e32 v0, s3 1944; VI-GISEL-NEXT: v_cmp_eq_f16_e64 s[2:3], |s2|, |v0| 1945; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 1946; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 1947; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 1948; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 1949; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 1950; VI-GISEL-NEXT: s_endpgm 1951 %temp = call half @llvm.fabs.f16(half %a) 1952 %src_input = call half @llvm.fabs.f16(half %src) 1953 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src_input, half %temp, i32 1) 1954 store i64 %result, ptr addrspace(1) %out 1955 ret void 1956} 1957 1958define amdgpu_kernel void @v_fcmp_f16(ptr addrspace(1) %out, half %src) { 1959; GFX11-SDAG-LABEL: v_fcmp_f16: 1960; GFX11-SDAG: ; %bb.0: 1961; GFX11-SDAG-NEXT: s_endpgm 1962; 1963; GFX11-GISEL-LABEL: v_fcmp_f16: 1964; GFX11-GISEL: ; %bb.0: 1965; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 1966; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, 0 1967; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1968; GFX11-GISEL-NEXT: global_store_b64 v0, v[0:1], s[0:1] 1969; GFX11-GISEL-NEXT: s_endpgm 1970; 1971; GFX9-SDAG-LABEL: v_fcmp_f16: 1972; GFX9-SDAG: ; %bb.0: 1973; GFX9-SDAG-NEXT: s_endpgm 1974; 1975; GFX9-GISEL-LABEL: v_fcmp_f16: 1976; GFX9-GISEL: ; %bb.0: 1977; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1978; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, 0 1979; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1980; GFX9-GISEL-NEXT: global_store_dwordx2 v0, v[0:1], s[0:1] 1981; GFX9-GISEL-NEXT: s_endpgm 1982; 1983; VI-SDAG-LABEL: v_fcmp_f16: 1984; VI-SDAG: ; %bb.0: 1985; VI-SDAG-NEXT: s_endpgm 1986; 1987; VI-GISEL-LABEL: v_fcmp_f16: 1988; VI-GISEL: ; %bb.0: 1989; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1990; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 1991; VI-GISEL-NEXT: v_mov_b32_e32 v0, s0 1992; VI-GISEL-NEXT: v_mov_b32_e32 v1, s1 1993; VI-GISEL-NEXT: flat_store_dwordx2 v[0:1], v[0:1] 1994; VI-GISEL-NEXT: s_endpgm 1995 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 -1) 1996 store i64 %result, ptr addrspace(1) %out 1997 ret void 1998} 1999 2000 2001define amdgpu_kernel void @v_fcmp_f16_oeq(ptr addrspace(1) %out, half %src) { 2002; GFX11-LABEL: v_fcmp_f16_oeq: 2003; GFX11: ; %bb.0: 2004; GFX11-NEXT: s_clause 0x1 2005; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 2006; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2007; GFX11-NEXT: v_mov_b32_e32 v2, 0 2008; GFX11-NEXT: s_waitcnt lgkmcnt(0) 2009; GFX11-NEXT: v_cmp_eq_f16_e64 s[2:3], 0x5640, s2 2010; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 2011; GFX11-NEXT: v_mov_b32_e32 v0, s2 2012; GFX11-NEXT: v_mov_b32_e32 v1, s3 2013; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 2014; GFX11-NEXT: s_endpgm 2015; 2016; GFX9-LABEL: v_fcmp_f16_oeq: 2017; GFX9: ; %bb.0: 2018; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c 2019; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2020; GFX9-NEXT: v_mov_b32_e32 v0, 0x5640 2021; GFX9-NEXT: v_mov_b32_e32 v2, 0 2022; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2023; GFX9-NEXT: v_cmp_eq_f16_e64 s[2:3], s2, v0 2024; GFX9-NEXT: v_mov_b32_e32 v0, s2 2025; GFX9-NEXT: v_mov_b32_e32 v1, s3 2026; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 2027; GFX9-NEXT: s_endpgm 2028; 2029; VI-SDAG-LABEL: v_fcmp_f16_oeq: 2030; VI-SDAG: ; %bb.0: 2031; VI-SDAG-NEXT: s_load_dword s2, s[4:5], 0x2c 2032; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2033; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x5640 2034; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2035; VI-SDAG-NEXT: v_cmp_eq_f16_e64 s[2:3], s2, v0 2036; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 2037; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 2038; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 2039; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 2040; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 2041; VI-SDAG-NEXT: s_endpgm 2042; 2043; VI-GISEL-LABEL: v_fcmp_f16_oeq: 2044; VI-GISEL: ; %bb.0: 2045; VI-GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 2046; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2047; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x5640 2048; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2049; VI-GISEL-NEXT: v_cmp_eq_f16_e64 s[2:3], s2, v0 2050; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 2051; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 2052; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 2053; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 2054; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 2055; VI-GISEL-NEXT: s_endpgm 2056 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 1) 2057 store i64 %result, ptr addrspace(1) %out 2058 ret void 2059} 2060 2061 2062define amdgpu_kernel void @v_fcmp_f16_one(ptr addrspace(1) %out, half %src) { 2063; GFX11-LABEL: v_fcmp_f16_one: 2064; GFX11: ; %bb.0: 2065; GFX11-NEXT: s_clause 0x1 2066; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 2067; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2068; GFX11-NEXT: v_mov_b32_e32 v2, 0 2069; GFX11-NEXT: s_waitcnt lgkmcnt(0) 2070; GFX11-NEXT: v_cmp_neq_f16_e64 s[2:3], 0x5640, s2 2071; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 2072; GFX11-NEXT: v_mov_b32_e32 v0, s2 2073; GFX11-NEXT: v_mov_b32_e32 v1, s3 2074; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 2075; GFX11-NEXT: s_endpgm 2076; 2077; GFX9-LABEL: v_fcmp_f16_one: 2078; GFX9: ; %bb.0: 2079; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c 2080; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2081; GFX9-NEXT: v_mov_b32_e32 v0, 0x5640 2082; GFX9-NEXT: v_mov_b32_e32 v2, 0 2083; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2084; GFX9-NEXT: v_cmp_neq_f16_e64 s[2:3], s2, v0 2085; GFX9-NEXT: v_mov_b32_e32 v0, s2 2086; GFX9-NEXT: v_mov_b32_e32 v1, s3 2087; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 2088; GFX9-NEXT: s_endpgm 2089; 2090; VI-SDAG-LABEL: v_fcmp_f16_one: 2091; VI-SDAG: ; %bb.0: 2092; VI-SDAG-NEXT: s_load_dword s2, s[4:5], 0x2c 2093; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2094; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x5640 2095; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2096; VI-SDAG-NEXT: v_cmp_neq_f16_e64 s[2:3], s2, v0 2097; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 2098; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 2099; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 2100; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 2101; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 2102; VI-SDAG-NEXT: s_endpgm 2103; 2104; VI-GISEL-LABEL: v_fcmp_f16_one: 2105; VI-GISEL: ; %bb.0: 2106; VI-GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 2107; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2108; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x5640 2109; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2110; VI-GISEL-NEXT: v_cmp_neq_f16_e64 s[2:3], s2, v0 2111; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 2112; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 2113; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 2114; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 2115; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 2116; VI-GISEL-NEXT: s_endpgm 2117 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 6) 2118 store i64 %result, ptr addrspace(1) %out 2119 ret void 2120} 2121 2122 2123define amdgpu_kernel void @v_fcmp_f16_ogt(ptr addrspace(1) %out, half %src) { 2124; GFX11-LABEL: v_fcmp_f16_ogt: 2125; GFX11: ; %bb.0: 2126; GFX11-NEXT: s_clause 0x1 2127; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 2128; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2129; GFX11-NEXT: v_mov_b32_e32 v2, 0 2130; GFX11-NEXT: s_waitcnt lgkmcnt(0) 2131; GFX11-NEXT: v_cmp_lt_f16_e64 s[2:3], 0x5640, s2 2132; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 2133; GFX11-NEXT: v_mov_b32_e32 v0, s2 2134; GFX11-NEXT: v_mov_b32_e32 v1, s3 2135; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 2136; GFX11-NEXT: s_endpgm 2137; 2138; GFX9-LABEL: v_fcmp_f16_ogt: 2139; GFX9: ; %bb.0: 2140; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c 2141; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2142; GFX9-NEXT: v_mov_b32_e32 v0, 0x5640 2143; GFX9-NEXT: v_mov_b32_e32 v2, 0 2144; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2145; GFX9-NEXT: v_cmp_gt_f16_e64 s[2:3], s2, v0 2146; GFX9-NEXT: v_mov_b32_e32 v0, s2 2147; GFX9-NEXT: v_mov_b32_e32 v1, s3 2148; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 2149; GFX9-NEXT: s_endpgm 2150; 2151; VI-SDAG-LABEL: v_fcmp_f16_ogt: 2152; VI-SDAG: ; %bb.0: 2153; VI-SDAG-NEXT: s_load_dword s2, s[4:5], 0x2c 2154; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2155; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x5640 2156; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2157; VI-SDAG-NEXT: v_cmp_gt_f16_e64 s[2:3], s2, v0 2158; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 2159; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 2160; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 2161; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 2162; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 2163; VI-SDAG-NEXT: s_endpgm 2164; 2165; VI-GISEL-LABEL: v_fcmp_f16_ogt: 2166; VI-GISEL: ; %bb.0: 2167; VI-GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 2168; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2169; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x5640 2170; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2171; VI-GISEL-NEXT: v_cmp_gt_f16_e64 s[2:3], s2, v0 2172; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 2173; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 2174; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 2175; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 2176; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 2177; VI-GISEL-NEXT: s_endpgm 2178 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 2) 2179 store i64 %result, ptr addrspace(1) %out 2180 ret void 2181} 2182 2183 2184define amdgpu_kernel void @v_fcmp_f16_oge(ptr addrspace(1) %out, half %src) { 2185; GFX11-LABEL: v_fcmp_f16_oge: 2186; GFX11: ; %bb.0: 2187; GFX11-NEXT: s_clause 0x1 2188; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 2189; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2190; GFX11-NEXT: v_mov_b32_e32 v2, 0 2191; GFX11-NEXT: s_waitcnt lgkmcnt(0) 2192; GFX11-NEXT: v_cmp_le_f16_e64 s[2:3], 0x5640, s2 2193; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 2194; GFX11-NEXT: v_mov_b32_e32 v0, s2 2195; GFX11-NEXT: v_mov_b32_e32 v1, s3 2196; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 2197; GFX11-NEXT: s_endpgm 2198; 2199; GFX9-LABEL: v_fcmp_f16_oge: 2200; GFX9: ; %bb.0: 2201; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c 2202; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2203; GFX9-NEXT: v_mov_b32_e32 v0, 0x5640 2204; GFX9-NEXT: v_mov_b32_e32 v2, 0 2205; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2206; GFX9-NEXT: v_cmp_ge_f16_e64 s[2:3], s2, v0 2207; GFX9-NEXT: v_mov_b32_e32 v0, s2 2208; GFX9-NEXT: v_mov_b32_e32 v1, s3 2209; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 2210; GFX9-NEXT: s_endpgm 2211; 2212; VI-SDAG-LABEL: v_fcmp_f16_oge: 2213; VI-SDAG: ; %bb.0: 2214; VI-SDAG-NEXT: s_load_dword s2, s[4:5], 0x2c 2215; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2216; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x5640 2217; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2218; VI-SDAG-NEXT: v_cmp_ge_f16_e64 s[2:3], s2, v0 2219; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 2220; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 2221; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 2222; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 2223; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 2224; VI-SDAG-NEXT: s_endpgm 2225; 2226; VI-GISEL-LABEL: v_fcmp_f16_oge: 2227; VI-GISEL: ; %bb.0: 2228; VI-GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 2229; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2230; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x5640 2231; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2232; VI-GISEL-NEXT: v_cmp_ge_f16_e64 s[2:3], s2, v0 2233; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 2234; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 2235; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 2236; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 2237; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 2238; VI-GISEL-NEXT: s_endpgm 2239 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 3) 2240 store i64 %result, ptr addrspace(1) %out 2241 ret void 2242} 2243 2244 2245define amdgpu_kernel void @v_fcmp_f16_olt(ptr addrspace(1) %out, half %src) { 2246; GFX11-LABEL: v_fcmp_f16_olt: 2247; GFX11: ; %bb.0: 2248; GFX11-NEXT: s_clause 0x1 2249; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 2250; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2251; GFX11-NEXT: v_mov_b32_e32 v2, 0 2252; GFX11-NEXT: s_waitcnt lgkmcnt(0) 2253; GFX11-NEXT: v_cmp_gt_f16_e64 s[2:3], 0x5640, s2 2254; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 2255; GFX11-NEXT: v_mov_b32_e32 v0, s2 2256; GFX11-NEXT: v_mov_b32_e32 v1, s3 2257; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 2258; GFX11-NEXT: s_endpgm 2259; 2260; GFX9-LABEL: v_fcmp_f16_olt: 2261; GFX9: ; %bb.0: 2262; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c 2263; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2264; GFX9-NEXT: v_mov_b32_e32 v0, 0x5640 2265; GFX9-NEXT: v_mov_b32_e32 v2, 0 2266; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2267; GFX9-NEXT: v_cmp_lt_f16_e64 s[2:3], s2, v0 2268; GFX9-NEXT: v_mov_b32_e32 v0, s2 2269; GFX9-NEXT: v_mov_b32_e32 v1, s3 2270; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 2271; GFX9-NEXT: s_endpgm 2272; 2273; VI-SDAG-LABEL: v_fcmp_f16_olt: 2274; VI-SDAG: ; %bb.0: 2275; VI-SDAG-NEXT: s_load_dword s2, s[4:5], 0x2c 2276; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2277; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x5640 2278; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2279; VI-SDAG-NEXT: v_cmp_lt_f16_e64 s[2:3], s2, v0 2280; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 2281; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 2282; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 2283; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 2284; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 2285; VI-SDAG-NEXT: s_endpgm 2286; 2287; VI-GISEL-LABEL: v_fcmp_f16_olt: 2288; VI-GISEL: ; %bb.0: 2289; VI-GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 2290; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2291; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x5640 2292; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2293; VI-GISEL-NEXT: v_cmp_lt_f16_e64 s[2:3], s2, v0 2294; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 2295; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 2296; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 2297; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 2298; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 2299; VI-GISEL-NEXT: s_endpgm 2300 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 4) 2301 store i64 %result, ptr addrspace(1) %out 2302 ret void 2303} 2304 2305 2306define amdgpu_kernel void @v_fcmp_f16_ole(ptr addrspace(1) %out, half %src) { 2307; GFX11-LABEL: v_fcmp_f16_ole: 2308; GFX11: ; %bb.0: 2309; GFX11-NEXT: s_clause 0x1 2310; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 2311; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2312; GFX11-NEXT: v_mov_b32_e32 v2, 0 2313; GFX11-NEXT: s_waitcnt lgkmcnt(0) 2314; GFX11-NEXT: v_cmp_ge_f16_e64 s[2:3], 0x5640, s2 2315; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 2316; GFX11-NEXT: v_mov_b32_e32 v0, s2 2317; GFX11-NEXT: v_mov_b32_e32 v1, s3 2318; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 2319; GFX11-NEXT: s_endpgm 2320; 2321; GFX9-LABEL: v_fcmp_f16_ole: 2322; GFX9: ; %bb.0: 2323; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c 2324; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2325; GFX9-NEXT: v_mov_b32_e32 v0, 0x5640 2326; GFX9-NEXT: v_mov_b32_e32 v2, 0 2327; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2328; GFX9-NEXT: v_cmp_le_f16_e64 s[2:3], s2, v0 2329; GFX9-NEXT: v_mov_b32_e32 v0, s2 2330; GFX9-NEXT: v_mov_b32_e32 v1, s3 2331; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 2332; GFX9-NEXT: s_endpgm 2333; 2334; VI-SDAG-LABEL: v_fcmp_f16_ole: 2335; VI-SDAG: ; %bb.0: 2336; VI-SDAG-NEXT: s_load_dword s2, s[4:5], 0x2c 2337; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2338; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x5640 2339; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2340; VI-SDAG-NEXT: v_cmp_le_f16_e64 s[2:3], s2, v0 2341; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 2342; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 2343; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 2344; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 2345; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 2346; VI-SDAG-NEXT: s_endpgm 2347; 2348; VI-GISEL-LABEL: v_fcmp_f16_ole: 2349; VI-GISEL: ; %bb.0: 2350; VI-GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 2351; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2352; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x5640 2353; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2354; VI-GISEL-NEXT: v_cmp_le_f16_e64 s[2:3], s2, v0 2355; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 2356; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 2357; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 2358; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 2359; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 2360; VI-GISEL-NEXT: s_endpgm 2361 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 5) 2362 store i64 %result, ptr addrspace(1) %out 2363 ret void 2364} 2365 2366 2367define amdgpu_kernel void @v_fcmp_f16_ueq(ptr addrspace(1) %out, half %src) { 2368; GFX11-LABEL: v_fcmp_f16_ueq: 2369; GFX11: ; %bb.0: 2370; GFX11-NEXT: s_clause 0x1 2371; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 2372; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2373; GFX11-NEXT: v_mov_b32_e32 v2, 0 2374; GFX11-NEXT: s_waitcnt lgkmcnt(0) 2375; GFX11-NEXT: v_cmp_nlg_f16_e64 s[2:3], 0x5640, s2 2376; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 2377; GFX11-NEXT: v_mov_b32_e32 v0, s2 2378; GFX11-NEXT: v_mov_b32_e32 v1, s3 2379; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 2380; GFX11-NEXT: s_endpgm 2381; 2382; GFX9-LABEL: v_fcmp_f16_ueq: 2383; GFX9: ; %bb.0: 2384; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c 2385; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2386; GFX9-NEXT: v_mov_b32_e32 v0, 0x5640 2387; GFX9-NEXT: v_mov_b32_e32 v2, 0 2388; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2389; GFX9-NEXT: v_cmp_nlg_f16_e64 s[2:3], s2, v0 2390; GFX9-NEXT: v_mov_b32_e32 v0, s2 2391; GFX9-NEXT: v_mov_b32_e32 v1, s3 2392; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 2393; GFX9-NEXT: s_endpgm 2394; 2395; VI-SDAG-LABEL: v_fcmp_f16_ueq: 2396; VI-SDAG: ; %bb.0: 2397; VI-SDAG-NEXT: s_load_dword s2, s[4:5], 0x2c 2398; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2399; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x5640 2400; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2401; VI-SDAG-NEXT: v_cmp_nlg_f16_e64 s[2:3], s2, v0 2402; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 2403; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 2404; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 2405; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 2406; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 2407; VI-SDAG-NEXT: s_endpgm 2408; 2409; VI-GISEL-LABEL: v_fcmp_f16_ueq: 2410; VI-GISEL: ; %bb.0: 2411; VI-GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 2412; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2413; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x5640 2414; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2415; VI-GISEL-NEXT: v_cmp_nlg_f16_e64 s[2:3], s2, v0 2416; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 2417; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 2418; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 2419; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 2420; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 2421; VI-GISEL-NEXT: s_endpgm 2422 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 9) 2423 store i64 %result, ptr addrspace(1) %out 2424 ret void 2425} 2426 2427 2428define amdgpu_kernel void @v_fcmp_f16_une(ptr addrspace(1) %out, half %src) { 2429; GFX11-LABEL: v_fcmp_f16_une: 2430; GFX11: ; %bb.0: 2431; GFX11-NEXT: s_clause 0x1 2432; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 2433; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2434; GFX11-NEXT: v_mov_b32_e32 v2, 0 2435; GFX11-NEXT: s_waitcnt lgkmcnt(0) 2436; GFX11-NEXT: v_cmp_neq_f16_e64 s[2:3], 0x5640, s2 2437; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 2438; GFX11-NEXT: v_mov_b32_e32 v0, s2 2439; GFX11-NEXT: v_mov_b32_e32 v1, s3 2440; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 2441; GFX11-NEXT: s_endpgm 2442; 2443; GFX9-LABEL: v_fcmp_f16_une: 2444; GFX9: ; %bb.0: 2445; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c 2446; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2447; GFX9-NEXT: v_mov_b32_e32 v0, 0x5640 2448; GFX9-NEXT: v_mov_b32_e32 v2, 0 2449; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2450; GFX9-NEXT: v_cmp_neq_f16_e64 s[2:3], s2, v0 2451; GFX9-NEXT: v_mov_b32_e32 v0, s2 2452; GFX9-NEXT: v_mov_b32_e32 v1, s3 2453; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 2454; GFX9-NEXT: s_endpgm 2455; 2456; VI-SDAG-LABEL: v_fcmp_f16_une: 2457; VI-SDAG: ; %bb.0: 2458; VI-SDAG-NEXT: s_load_dword s2, s[4:5], 0x2c 2459; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2460; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x5640 2461; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2462; VI-SDAG-NEXT: v_cmp_neq_f16_e64 s[2:3], s2, v0 2463; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 2464; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 2465; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 2466; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 2467; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 2468; VI-SDAG-NEXT: s_endpgm 2469; 2470; VI-GISEL-LABEL: v_fcmp_f16_une: 2471; VI-GISEL: ; %bb.0: 2472; VI-GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 2473; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2474; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x5640 2475; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2476; VI-GISEL-NEXT: v_cmp_neq_f16_e64 s[2:3], s2, v0 2477; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 2478; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 2479; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 2480; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 2481; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 2482; VI-GISEL-NEXT: s_endpgm 2483 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 14) 2484 store i64 %result, ptr addrspace(1) %out 2485 ret void 2486} 2487 2488 2489define amdgpu_kernel void @v_fcmp_f16_ugt(ptr addrspace(1) %out, half %src) { 2490; GFX11-LABEL: v_fcmp_f16_ugt: 2491; GFX11: ; %bb.0: 2492; GFX11-NEXT: s_clause 0x1 2493; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 2494; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2495; GFX11-NEXT: v_mov_b32_e32 v2, 0 2496; GFX11-NEXT: s_waitcnt lgkmcnt(0) 2497; GFX11-NEXT: v_cmp_nge_f16_e64 s[2:3], 0x5640, s2 2498; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 2499; GFX11-NEXT: v_mov_b32_e32 v0, s2 2500; GFX11-NEXT: v_mov_b32_e32 v1, s3 2501; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 2502; GFX11-NEXT: s_endpgm 2503; 2504; GFX9-LABEL: v_fcmp_f16_ugt: 2505; GFX9: ; %bb.0: 2506; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c 2507; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2508; GFX9-NEXT: v_mov_b32_e32 v0, 0x5640 2509; GFX9-NEXT: v_mov_b32_e32 v2, 0 2510; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2511; GFX9-NEXT: v_cmp_nle_f16_e64 s[2:3], s2, v0 2512; GFX9-NEXT: v_mov_b32_e32 v0, s2 2513; GFX9-NEXT: v_mov_b32_e32 v1, s3 2514; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 2515; GFX9-NEXT: s_endpgm 2516; 2517; VI-SDAG-LABEL: v_fcmp_f16_ugt: 2518; VI-SDAG: ; %bb.0: 2519; VI-SDAG-NEXT: s_load_dword s2, s[4:5], 0x2c 2520; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2521; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x5640 2522; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2523; VI-SDAG-NEXT: v_cmp_nle_f16_e64 s[2:3], s2, v0 2524; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 2525; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 2526; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 2527; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 2528; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 2529; VI-SDAG-NEXT: s_endpgm 2530; 2531; VI-GISEL-LABEL: v_fcmp_f16_ugt: 2532; VI-GISEL: ; %bb.0: 2533; VI-GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 2534; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2535; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x5640 2536; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2537; VI-GISEL-NEXT: v_cmp_nle_f16_e64 s[2:3], s2, v0 2538; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 2539; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 2540; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 2541; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 2542; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 2543; VI-GISEL-NEXT: s_endpgm 2544 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 10) 2545 store i64 %result, ptr addrspace(1) %out 2546 ret void 2547} 2548 2549 2550define amdgpu_kernel void @v_fcmp_f16_uge(ptr addrspace(1) %out, half %src) { 2551; GFX11-LABEL: v_fcmp_f16_uge: 2552; GFX11: ; %bb.0: 2553; GFX11-NEXT: s_clause 0x1 2554; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 2555; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2556; GFX11-NEXT: v_mov_b32_e32 v2, 0 2557; GFX11-NEXT: s_waitcnt lgkmcnt(0) 2558; GFX11-NEXT: v_cmp_ngt_f16_e64 s[2:3], 0x5640, s2 2559; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 2560; GFX11-NEXT: v_mov_b32_e32 v0, s2 2561; GFX11-NEXT: v_mov_b32_e32 v1, s3 2562; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 2563; GFX11-NEXT: s_endpgm 2564; 2565; GFX9-LABEL: v_fcmp_f16_uge: 2566; GFX9: ; %bb.0: 2567; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c 2568; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2569; GFX9-NEXT: v_mov_b32_e32 v0, 0x5640 2570; GFX9-NEXT: v_mov_b32_e32 v2, 0 2571; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2572; GFX9-NEXT: v_cmp_nlt_f16_e64 s[2:3], s2, v0 2573; GFX9-NEXT: v_mov_b32_e32 v0, s2 2574; GFX9-NEXT: v_mov_b32_e32 v1, s3 2575; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 2576; GFX9-NEXT: s_endpgm 2577; 2578; VI-SDAG-LABEL: v_fcmp_f16_uge: 2579; VI-SDAG: ; %bb.0: 2580; VI-SDAG-NEXT: s_load_dword s2, s[4:5], 0x2c 2581; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2582; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x5640 2583; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2584; VI-SDAG-NEXT: v_cmp_nlt_f16_e64 s[2:3], s2, v0 2585; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 2586; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 2587; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 2588; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 2589; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 2590; VI-SDAG-NEXT: s_endpgm 2591; 2592; VI-GISEL-LABEL: v_fcmp_f16_uge: 2593; VI-GISEL: ; %bb.0: 2594; VI-GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 2595; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2596; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x5640 2597; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2598; VI-GISEL-NEXT: v_cmp_nlt_f16_e64 s[2:3], s2, v0 2599; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 2600; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 2601; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 2602; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 2603; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 2604; VI-GISEL-NEXT: s_endpgm 2605 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 11) 2606 store i64 %result, ptr addrspace(1) %out 2607 ret void 2608} 2609 2610 2611define amdgpu_kernel void @v_fcmp_f16_ult(ptr addrspace(1) %out, half %src) { 2612; GFX11-LABEL: v_fcmp_f16_ult: 2613; GFX11: ; %bb.0: 2614; GFX11-NEXT: s_clause 0x1 2615; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 2616; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2617; GFX11-NEXT: v_mov_b32_e32 v2, 0 2618; GFX11-NEXT: s_waitcnt lgkmcnt(0) 2619; GFX11-NEXT: v_cmp_nle_f16_e64 s[2:3], 0x5640, s2 2620; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 2621; GFX11-NEXT: v_mov_b32_e32 v0, s2 2622; GFX11-NEXT: v_mov_b32_e32 v1, s3 2623; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 2624; GFX11-NEXT: s_endpgm 2625; 2626; GFX9-LABEL: v_fcmp_f16_ult: 2627; GFX9: ; %bb.0: 2628; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c 2629; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2630; GFX9-NEXT: v_mov_b32_e32 v0, 0x5640 2631; GFX9-NEXT: v_mov_b32_e32 v2, 0 2632; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2633; GFX9-NEXT: v_cmp_nge_f16_e64 s[2:3], s2, v0 2634; GFX9-NEXT: v_mov_b32_e32 v0, s2 2635; GFX9-NEXT: v_mov_b32_e32 v1, s3 2636; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 2637; GFX9-NEXT: s_endpgm 2638; 2639; VI-SDAG-LABEL: v_fcmp_f16_ult: 2640; VI-SDAG: ; %bb.0: 2641; VI-SDAG-NEXT: s_load_dword s2, s[4:5], 0x2c 2642; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2643; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x5640 2644; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2645; VI-SDAG-NEXT: v_cmp_nge_f16_e64 s[2:3], s2, v0 2646; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 2647; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 2648; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 2649; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 2650; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 2651; VI-SDAG-NEXT: s_endpgm 2652; 2653; VI-GISEL-LABEL: v_fcmp_f16_ult: 2654; VI-GISEL: ; %bb.0: 2655; VI-GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 2656; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2657; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x5640 2658; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2659; VI-GISEL-NEXT: v_cmp_nge_f16_e64 s[2:3], s2, v0 2660; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 2661; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 2662; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 2663; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 2664; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 2665; VI-GISEL-NEXT: s_endpgm 2666 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 12) 2667 store i64 %result, ptr addrspace(1) %out 2668 ret void 2669} 2670 2671define amdgpu_kernel void @v_fcmp_f16_o(ptr addrspace(1) %out, half %src) { 2672; GFX11-LABEL: v_fcmp_f16_o: 2673; GFX11: ; %bb.0: 2674; GFX11-NEXT: s_clause 0x1 2675; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 2676; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2677; GFX11-NEXT: v_mov_b32_e32 v2, 0 2678; GFX11-NEXT: s_waitcnt lgkmcnt(0) 2679; GFX11-NEXT: v_cmp_o_f16_e64 s[2:3], 0x5640, s2 2680; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 2681; GFX11-NEXT: v_mov_b32_e32 v0, s2 2682; GFX11-NEXT: v_mov_b32_e32 v1, s3 2683; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 2684; GFX11-NEXT: s_endpgm 2685; 2686; GFX9-LABEL: v_fcmp_f16_o: 2687; GFX9: ; %bb.0: 2688; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c 2689; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2690; GFX9-NEXT: v_mov_b32_e32 v0, 0x5640 2691; GFX9-NEXT: v_mov_b32_e32 v2, 0 2692; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2693; GFX9-NEXT: v_cmp_o_f16_e64 s[2:3], s2, v0 2694; GFX9-NEXT: v_mov_b32_e32 v0, s2 2695; GFX9-NEXT: v_mov_b32_e32 v1, s3 2696; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 2697; GFX9-NEXT: s_endpgm 2698; 2699; VI-SDAG-LABEL: v_fcmp_f16_o: 2700; VI-SDAG: ; %bb.0: 2701; VI-SDAG-NEXT: s_load_dword s2, s[4:5], 0x2c 2702; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2703; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x5640 2704; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2705; VI-SDAG-NEXT: v_cmp_o_f16_e64 s[2:3], s2, v0 2706; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 2707; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 2708; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 2709; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 2710; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 2711; VI-SDAG-NEXT: s_endpgm 2712; 2713; VI-GISEL-LABEL: v_fcmp_f16_o: 2714; VI-GISEL: ; %bb.0: 2715; VI-GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 2716; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2717; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x5640 2718; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2719; VI-GISEL-NEXT: v_cmp_o_f16_e64 s[2:3], s2, v0 2720; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 2721; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 2722; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 2723; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 2724; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 2725; VI-GISEL-NEXT: s_endpgm 2726 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 7) 2727 store i64 %result, ptr addrspace(1) %out 2728 ret void 2729} 2730 2731define amdgpu_kernel void @v_fcmp_f16_uo(ptr addrspace(1) %out, half %src) { 2732; GFX11-LABEL: v_fcmp_f16_uo: 2733; GFX11: ; %bb.0: 2734; GFX11-NEXT: s_clause 0x1 2735; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 2736; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2737; GFX11-NEXT: v_mov_b32_e32 v2, 0 2738; GFX11-NEXT: s_waitcnt lgkmcnt(0) 2739; GFX11-NEXT: v_cmp_u_f16_e64 s[2:3], 0x5640, s2 2740; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 2741; GFX11-NEXT: v_mov_b32_e32 v0, s2 2742; GFX11-NEXT: v_mov_b32_e32 v1, s3 2743; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 2744; GFX11-NEXT: s_endpgm 2745; 2746; GFX9-LABEL: v_fcmp_f16_uo: 2747; GFX9: ; %bb.0: 2748; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c 2749; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2750; GFX9-NEXT: v_mov_b32_e32 v0, 0x5640 2751; GFX9-NEXT: v_mov_b32_e32 v2, 0 2752; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2753; GFX9-NEXT: v_cmp_u_f16_e64 s[2:3], s2, v0 2754; GFX9-NEXT: v_mov_b32_e32 v0, s2 2755; GFX9-NEXT: v_mov_b32_e32 v1, s3 2756; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 2757; GFX9-NEXT: s_endpgm 2758; 2759; VI-SDAG-LABEL: v_fcmp_f16_uo: 2760; VI-SDAG: ; %bb.0: 2761; VI-SDAG-NEXT: s_load_dword s2, s[4:5], 0x2c 2762; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2763; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x5640 2764; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2765; VI-SDAG-NEXT: v_cmp_u_f16_e64 s[2:3], s2, v0 2766; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 2767; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 2768; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 2769; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 2770; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 2771; VI-SDAG-NEXT: s_endpgm 2772; 2773; VI-GISEL-LABEL: v_fcmp_f16_uo: 2774; VI-GISEL: ; %bb.0: 2775; VI-GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 2776; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2777; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x5640 2778; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2779; VI-GISEL-NEXT: v_cmp_u_f16_e64 s[2:3], s2, v0 2780; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 2781; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 2782; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 2783; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 2784; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 2785; VI-GISEL-NEXT: s_endpgm 2786 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 8) 2787 store i64 %result, ptr addrspace(1) %out 2788 ret void 2789} 2790 2791define amdgpu_kernel void @v_fcmp_f16_ule(ptr addrspace(1) %out, half %src) { 2792; GFX11-LABEL: v_fcmp_f16_ule: 2793; GFX11: ; %bb.0: 2794; GFX11-NEXT: s_clause 0x1 2795; GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 2796; GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2797; GFX11-NEXT: v_mov_b32_e32 v2, 0 2798; GFX11-NEXT: s_waitcnt lgkmcnt(0) 2799; GFX11-NEXT: v_cmp_nlt_f16_e64 s[2:3], 0x5640, s2 2800; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 2801; GFX11-NEXT: v_mov_b32_e32 v0, s2 2802; GFX11-NEXT: v_mov_b32_e32 v1, s3 2803; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1] 2804; GFX11-NEXT: s_endpgm 2805; 2806; GFX9-LABEL: v_fcmp_f16_ule: 2807; GFX9: ; %bb.0: 2808; GFX9-NEXT: s_load_dword s2, s[4:5], 0x2c 2809; GFX9-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2810; GFX9-NEXT: v_mov_b32_e32 v0, 0x5640 2811; GFX9-NEXT: v_mov_b32_e32 v2, 0 2812; GFX9-NEXT: s_waitcnt lgkmcnt(0) 2813; GFX9-NEXT: v_cmp_ngt_f16_e64 s[2:3], s2, v0 2814; GFX9-NEXT: v_mov_b32_e32 v0, s2 2815; GFX9-NEXT: v_mov_b32_e32 v1, s3 2816; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1] 2817; GFX9-NEXT: s_endpgm 2818; 2819; VI-SDAG-LABEL: v_fcmp_f16_ule: 2820; VI-SDAG: ; %bb.0: 2821; VI-SDAG-NEXT: s_load_dword s2, s[4:5], 0x2c 2822; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2823; VI-SDAG-NEXT: v_mov_b32_e32 v0, 0x5640 2824; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) 2825; VI-SDAG-NEXT: v_cmp_ngt_f16_e64 s[2:3], s2, v0 2826; VI-SDAG-NEXT: v_mov_b32_e32 v0, s0 2827; VI-SDAG-NEXT: v_mov_b32_e32 v2, s2 2828; VI-SDAG-NEXT: v_mov_b32_e32 v1, s1 2829; VI-SDAG-NEXT: v_mov_b32_e32 v3, s3 2830; VI-SDAG-NEXT: flat_store_dwordx2 v[0:1], v[2:3] 2831; VI-SDAG-NEXT: s_endpgm 2832; 2833; VI-GISEL-LABEL: v_fcmp_f16_ule: 2834; VI-GISEL: ; %bb.0: 2835; VI-GISEL-NEXT: s_load_dword s2, s[4:5], 0x2c 2836; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2837; VI-GISEL-NEXT: v_mov_b32_e32 v0, 0x5640 2838; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) 2839; VI-GISEL-NEXT: v_cmp_ngt_f16_e64 s[2:3], s2, v0 2840; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 2841; VI-GISEL-NEXT: v_mov_b32_e32 v3, s1 2842; VI-GISEL-NEXT: v_mov_b32_e32 v1, s3 2843; VI-GISEL-NEXT: v_mov_b32_e32 v2, s0 2844; VI-GISEL-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 2845; VI-GISEL-NEXT: s_endpgm 2846 %result = call i64 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 13) 2847 store i64 %result, ptr addrspace(1) %out 2848 ret void 2849} 2850 2851attributes #0 = { nounwind readnone convergent } 2852