1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize32" -verify-machineinstrs < %s | FileCheck -check-prefixes=SDAG-GFX11 %s 3; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr="+wavefrontsize32" -verify-machineinstrs < %s | FileCheck -check-prefixes=SDAG-GFX10 %s 4 5; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -mattr="+wavefrontsize32" -verify-machineinstrs < %s | FileCheck -check-prefixes=GISEL-GFX11 %s 6; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -mattr="+wavefrontsize32" -verify-machineinstrs < %s | FileCheck -check-prefixes=GISEL-GFX10 %s 7 8declare i32 @llvm.amdgcn.fcmp.f32(float, float, i32) #0 9declare i32 @llvm.amdgcn.fcmp.f64(double, double, i32) #0 10declare float @llvm.fabs.f32(float) #0 11 12declare i32 @llvm.amdgcn.fcmp.f16(half, half, i32) #0 13declare half @llvm.fabs.f16(half) #0 14 15define amdgpu_kernel void @v_fcmp_f32_oeq_with_fabs(ptr addrspace(1) %out, float %src, float %a) { 16; SDAG-GFX11-LABEL: v_fcmp_f32_oeq_with_fabs: 17; SDAG-GFX11: ; %bb.0: 18; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 19; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) 20; SDAG-GFX11-NEXT: v_cmp_eq_f32_e64 s2, s2, |s3| 21; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 22; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 23; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 24; SDAG-GFX11-NEXT: s_endpgm 25; 26; SDAG-GFX10-LABEL: v_fcmp_f32_oeq_with_fabs: 27; SDAG-GFX10: ; %bb.0: 28; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 29; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 30; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) 31; SDAG-GFX10-NEXT: v_cmp_eq_f32_e64 s2, s2, |s3| 32; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 33; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] 34; SDAG-GFX10-NEXT: s_endpgm 35; 36; GISEL-GFX11-LABEL: v_fcmp_f32_oeq_with_fabs: 37; GISEL-GFX11: ; %bb.0: 38; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 39; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 40; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) 41; GISEL-GFX11-NEXT: v_cmp_eq_f32_e64 s2, s2, |s3| 42; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 43; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 44; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 45; GISEL-GFX11-NEXT: s_endpgm 46; 47; GISEL-GFX10-LABEL: v_fcmp_f32_oeq_with_fabs: 48; GISEL-GFX10: ; %bb.0: 49; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 50; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 51; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 52; GISEL-GFX10-NEXT: v_cmp_eq_f32_e64 s2, s2, |s3| 53; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 54; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] 55; GISEL-GFX10-NEXT: s_endpgm 56 %temp = call float @llvm.fabs.f32(float %a) 57 %result = call i32 @llvm.amdgcn.fcmp.f32(float %src, float %temp, i32 1) 58 store i32 %result, ptr addrspace(1) %out 59 ret void 60} 61 62define amdgpu_kernel void @v_fcmp_f32_oeq_both_operands_with_fabs(ptr addrspace(1) %out, float %src, float %a) { 63; SDAG-GFX11-LABEL: v_fcmp_f32_oeq_both_operands_with_fabs: 64; SDAG-GFX11: ; %bb.0: 65; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 66; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) 67; SDAG-GFX11-NEXT: v_cmp_eq_f32_e64 s2, |s2|, |s3| 68; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 69; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 70; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 71; SDAG-GFX11-NEXT: s_endpgm 72; 73; SDAG-GFX10-LABEL: v_fcmp_f32_oeq_both_operands_with_fabs: 74; SDAG-GFX10: ; %bb.0: 75; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 76; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 77; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) 78; SDAG-GFX10-NEXT: v_cmp_eq_f32_e64 s2, |s2|, |s3| 79; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 80; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] 81; SDAG-GFX10-NEXT: s_endpgm 82; 83; GISEL-GFX11-LABEL: v_fcmp_f32_oeq_both_operands_with_fabs: 84; GISEL-GFX11: ; %bb.0: 85; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 86; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 87; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) 88; GISEL-GFX11-NEXT: v_cmp_eq_f32_e64 s2, |s2|, |s3| 89; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 90; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 91; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 92; GISEL-GFX11-NEXT: s_endpgm 93; 94; GISEL-GFX10-LABEL: v_fcmp_f32_oeq_both_operands_with_fabs: 95; GISEL-GFX10: ; %bb.0: 96; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 97; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 98; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 99; GISEL-GFX10-NEXT: v_cmp_eq_f32_e64 s2, |s2|, |s3| 100; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 101; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] 102; GISEL-GFX10-NEXT: s_endpgm 103 %temp = call float @llvm.fabs.f32(float %a) 104 %src_input = call float @llvm.fabs.f32(float %src) 105 %result = call i32 @llvm.amdgcn.fcmp.f32(float %src_input, float %temp, i32 1) 106 store i32 %result, ptr addrspace(1) %out 107 ret void 108} 109 110define amdgpu_kernel void @v_fcmp_f32(ptr addrspace(1) %out, float %src) { 111; SDAG-GFX11-LABEL: v_fcmp_f32: 112; SDAG-GFX11: ; %bb.0: 113; SDAG-GFX11-NEXT: s_endpgm 114; 115; SDAG-GFX10-LABEL: v_fcmp_f32: 116; SDAG-GFX10: ; %bb.0: 117; SDAG-GFX10-NEXT: s_endpgm 118; 119; GISEL-GFX11-LABEL: v_fcmp_f32: 120; GISEL-GFX11: ; %bb.0: 121; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 122; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, 0 123; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) 124; GISEL-GFX11-NEXT: global_store_b32 v0, v0, s[0:1] 125; GISEL-GFX11-NEXT: s_endpgm 126; 127; GISEL-GFX10-LABEL: v_fcmp_f32: 128; GISEL-GFX10: ; %bb.0: 129; GISEL-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 130; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, 0 131; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 132; GISEL-GFX10-NEXT: global_store_dword v0, v0, s[0:1] 133; GISEL-GFX10-NEXT: s_endpgm 134 %result = call i32 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 -1) 135 store i32 %result, ptr addrspace(1) %out 136 ret void 137} 138 139define amdgpu_kernel void @v_fcmp_f32_oeq(ptr addrspace(1) %out, float %src) { 140; SDAG-GFX11-LABEL: v_fcmp_f32_oeq: 141; SDAG-GFX11: ; %bb.0: 142; SDAG-GFX11-NEXT: s_clause 0x1 143; SDAG-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 144; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 145; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) 146; SDAG-GFX11-NEXT: v_cmp_eq_f32_e64 s2, 0x42c80000, s2 147; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 148; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 149; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 150; SDAG-GFX11-NEXT: s_endpgm 151; 152; SDAG-GFX10-LABEL: v_fcmp_f32_oeq: 153; SDAG-GFX10: ; %bb.0: 154; SDAG-GFX10-NEXT: s_clause 0x1 155; SDAG-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 156; SDAG-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 157; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 158; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) 159; SDAG-GFX10-NEXT: v_cmp_eq_f32_e64 s2, 0x42c80000, s2 160; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 161; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] 162; SDAG-GFX10-NEXT: s_endpgm 163; 164; GISEL-GFX11-LABEL: v_fcmp_f32_oeq: 165; GISEL-GFX11: ; %bb.0: 166; GISEL-GFX11-NEXT: s_clause 0x1 167; GISEL-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 168; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 169; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 170; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) 171; GISEL-GFX11-NEXT: v_cmp_eq_f32_e64 s2, 0x42c80000, s2 172; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 173; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 174; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 175; GISEL-GFX11-NEXT: s_endpgm 176; 177; GISEL-GFX10-LABEL: v_fcmp_f32_oeq: 178; GISEL-GFX10: ; %bb.0: 179; GISEL-GFX10-NEXT: s_clause 0x1 180; GISEL-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 181; GISEL-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 182; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 183; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 184; GISEL-GFX10-NEXT: v_cmp_eq_f32_e64 s2, 0x42c80000, s2 185; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 186; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] 187; GISEL-GFX10-NEXT: s_endpgm 188 %result = call i32 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 1) 189 store i32 %result, ptr addrspace(1) %out 190 ret void 191} 192 193define amdgpu_kernel void @v_fcmp_f32_one(ptr addrspace(1) %out, float %src) { 194; SDAG-GFX11-LABEL: v_fcmp_f32_one: 195; SDAG-GFX11: ; %bb.0: 196; SDAG-GFX11-NEXT: s_clause 0x1 197; SDAG-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 198; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 199; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) 200; SDAG-GFX11-NEXT: v_cmp_neq_f32_e64 s2, 0x42c80000, s2 201; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 202; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 203; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 204; SDAG-GFX11-NEXT: s_endpgm 205; 206; SDAG-GFX10-LABEL: v_fcmp_f32_one: 207; SDAG-GFX10: ; %bb.0: 208; SDAG-GFX10-NEXT: s_clause 0x1 209; SDAG-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 210; SDAG-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 211; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 212; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) 213; SDAG-GFX10-NEXT: v_cmp_neq_f32_e64 s2, 0x42c80000, s2 214; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 215; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] 216; SDAG-GFX10-NEXT: s_endpgm 217; 218; GISEL-GFX11-LABEL: v_fcmp_f32_one: 219; GISEL-GFX11: ; %bb.0: 220; GISEL-GFX11-NEXT: s_clause 0x1 221; GISEL-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 222; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 223; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 224; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) 225; GISEL-GFX11-NEXT: v_cmp_neq_f32_e64 s2, 0x42c80000, s2 226; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 227; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 228; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 229; GISEL-GFX11-NEXT: s_endpgm 230; 231; GISEL-GFX10-LABEL: v_fcmp_f32_one: 232; GISEL-GFX10: ; %bb.0: 233; GISEL-GFX10-NEXT: s_clause 0x1 234; GISEL-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 235; GISEL-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 236; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 237; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 238; GISEL-GFX10-NEXT: v_cmp_neq_f32_e64 s2, 0x42c80000, s2 239; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 240; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] 241; GISEL-GFX10-NEXT: s_endpgm 242 %result = call i32 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 6) 243 store i32 %result, ptr addrspace(1) %out 244 ret void 245} 246 247define amdgpu_kernel void @v_fcmp_f32_ogt(ptr addrspace(1) %out, float %src) { 248; SDAG-GFX11-LABEL: v_fcmp_f32_ogt: 249; SDAG-GFX11: ; %bb.0: 250; SDAG-GFX11-NEXT: s_clause 0x1 251; SDAG-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 252; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 253; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) 254; SDAG-GFX11-NEXT: v_cmp_lt_f32_e64 s2, 0x42c80000, s2 255; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 256; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 257; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 258; SDAG-GFX11-NEXT: s_endpgm 259; 260; SDAG-GFX10-LABEL: v_fcmp_f32_ogt: 261; SDAG-GFX10: ; %bb.0: 262; SDAG-GFX10-NEXT: s_clause 0x1 263; SDAG-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 264; SDAG-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 265; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 266; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) 267; SDAG-GFX10-NEXT: v_cmp_lt_f32_e64 s2, 0x42c80000, s2 268; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 269; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] 270; SDAG-GFX10-NEXT: s_endpgm 271; 272; GISEL-GFX11-LABEL: v_fcmp_f32_ogt: 273; GISEL-GFX11: ; %bb.0: 274; GISEL-GFX11-NEXT: s_clause 0x1 275; GISEL-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 276; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 277; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 278; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) 279; GISEL-GFX11-NEXT: v_cmp_lt_f32_e64 s2, 0x42c80000, s2 280; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 281; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 282; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 283; GISEL-GFX11-NEXT: s_endpgm 284; 285; GISEL-GFX10-LABEL: v_fcmp_f32_ogt: 286; GISEL-GFX10: ; %bb.0: 287; GISEL-GFX10-NEXT: s_clause 0x1 288; GISEL-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 289; GISEL-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 290; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 291; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 292; GISEL-GFX10-NEXT: v_cmp_lt_f32_e64 s2, 0x42c80000, s2 293; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 294; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] 295; GISEL-GFX10-NEXT: s_endpgm 296 %result = call i32 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 2) 297 store i32 %result, ptr addrspace(1) %out 298 ret void 299} 300 301define amdgpu_kernel void @v_fcmp_f32_oge(ptr addrspace(1) %out, float %src) { 302; SDAG-GFX11-LABEL: v_fcmp_f32_oge: 303; SDAG-GFX11: ; %bb.0: 304; SDAG-GFX11-NEXT: s_clause 0x1 305; SDAG-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 306; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 307; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) 308; SDAG-GFX11-NEXT: v_cmp_le_f32_e64 s2, 0x42c80000, s2 309; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 310; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 311; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 312; SDAG-GFX11-NEXT: s_endpgm 313; 314; SDAG-GFX10-LABEL: v_fcmp_f32_oge: 315; SDAG-GFX10: ; %bb.0: 316; SDAG-GFX10-NEXT: s_clause 0x1 317; SDAG-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 318; SDAG-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 319; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 320; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) 321; SDAG-GFX10-NEXT: v_cmp_le_f32_e64 s2, 0x42c80000, s2 322; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 323; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] 324; SDAG-GFX10-NEXT: s_endpgm 325; 326; GISEL-GFX11-LABEL: v_fcmp_f32_oge: 327; GISEL-GFX11: ; %bb.0: 328; GISEL-GFX11-NEXT: s_clause 0x1 329; GISEL-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 330; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 331; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 332; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) 333; GISEL-GFX11-NEXT: v_cmp_le_f32_e64 s2, 0x42c80000, s2 334; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 335; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 336; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 337; GISEL-GFX11-NEXT: s_endpgm 338; 339; GISEL-GFX10-LABEL: v_fcmp_f32_oge: 340; GISEL-GFX10: ; %bb.0: 341; GISEL-GFX10-NEXT: s_clause 0x1 342; GISEL-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 343; GISEL-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 344; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 345; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 346; GISEL-GFX10-NEXT: v_cmp_le_f32_e64 s2, 0x42c80000, s2 347; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 348; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] 349; GISEL-GFX10-NEXT: s_endpgm 350 %result = call i32 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 3) 351 store i32 %result, ptr addrspace(1) %out 352 ret void 353} 354 355define amdgpu_kernel void @v_fcmp_f32_olt(ptr addrspace(1) %out, float %src) { 356; SDAG-GFX11-LABEL: v_fcmp_f32_olt: 357; SDAG-GFX11: ; %bb.0: 358; SDAG-GFX11-NEXT: s_clause 0x1 359; SDAG-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 360; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 361; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) 362; SDAG-GFX11-NEXT: v_cmp_gt_f32_e64 s2, 0x42c80000, s2 363; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 364; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 365; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 366; SDAG-GFX11-NEXT: s_endpgm 367; 368; SDAG-GFX10-LABEL: v_fcmp_f32_olt: 369; SDAG-GFX10: ; %bb.0: 370; SDAG-GFX10-NEXT: s_clause 0x1 371; SDAG-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 372; SDAG-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 373; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 374; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) 375; SDAG-GFX10-NEXT: v_cmp_gt_f32_e64 s2, 0x42c80000, s2 376; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 377; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] 378; SDAG-GFX10-NEXT: s_endpgm 379; 380; GISEL-GFX11-LABEL: v_fcmp_f32_olt: 381; GISEL-GFX11: ; %bb.0: 382; GISEL-GFX11-NEXT: s_clause 0x1 383; GISEL-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 384; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 385; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 386; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) 387; GISEL-GFX11-NEXT: v_cmp_gt_f32_e64 s2, 0x42c80000, s2 388; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 389; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 390; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 391; GISEL-GFX11-NEXT: s_endpgm 392; 393; GISEL-GFX10-LABEL: v_fcmp_f32_olt: 394; GISEL-GFX10: ; %bb.0: 395; GISEL-GFX10-NEXT: s_clause 0x1 396; GISEL-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 397; GISEL-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 398; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 399; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 400; GISEL-GFX10-NEXT: v_cmp_gt_f32_e64 s2, 0x42c80000, s2 401; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 402; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] 403; GISEL-GFX10-NEXT: s_endpgm 404 %result = call i32 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 4) 405 store i32 %result, ptr addrspace(1) %out 406 ret void 407} 408 409define amdgpu_kernel void @v_fcmp_f32_ole(ptr addrspace(1) %out, float %src) { 410; SDAG-GFX11-LABEL: v_fcmp_f32_ole: 411; SDAG-GFX11: ; %bb.0: 412; SDAG-GFX11-NEXT: s_clause 0x1 413; SDAG-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 414; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 415; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) 416; SDAG-GFX11-NEXT: v_cmp_ge_f32_e64 s2, 0x42c80000, s2 417; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 418; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 419; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 420; SDAG-GFX11-NEXT: s_endpgm 421; 422; SDAG-GFX10-LABEL: v_fcmp_f32_ole: 423; SDAG-GFX10: ; %bb.0: 424; SDAG-GFX10-NEXT: s_clause 0x1 425; SDAG-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 426; SDAG-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 427; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 428; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) 429; SDAG-GFX10-NEXT: v_cmp_ge_f32_e64 s2, 0x42c80000, s2 430; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 431; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] 432; SDAG-GFX10-NEXT: s_endpgm 433; 434; GISEL-GFX11-LABEL: v_fcmp_f32_ole: 435; GISEL-GFX11: ; %bb.0: 436; GISEL-GFX11-NEXT: s_clause 0x1 437; GISEL-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 438; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 439; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 440; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) 441; GISEL-GFX11-NEXT: v_cmp_ge_f32_e64 s2, 0x42c80000, s2 442; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 443; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 444; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 445; GISEL-GFX11-NEXT: s_endpgm 446; 447; GISEL-GFX10-LABEL: v_fcmp_f32_ole: 448; GISEL-GFX10: ; %bb.0: 449; GISEL-GFX10-NEXT: s_clause 0x1 450; GISEL-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 451; GISEL-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 452; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 453; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 454; GISEL-GFX10-NEXT: v_cmp_ge_f32_e64 s2, 0x42c80000, s2 455; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 456; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] 457; GISEL-GFX10-NEXT: s_endpgm 458 %result = call i32 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 5) 459 store i32 %result, ptr addrspace(1) %out 460 ret void 461} 462 463define amdgpu_kernel void @v_fcmp_f32_o(ptr addrspace(1) %out, float %src) { 464; SDAG-GFX11-LABEL: v_fcmp_f32_o: 465; SDAG-GFX11: ; %bb.0: 466; SDAG-GFX11-NEXT: s_clause 0x1 467; SDAG-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 468; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 469; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) 470; SDAG-GFX11-NEXT: v_cmp_o_f32_e64 s2, 0x42c80000, s2 471; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 472; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 473; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 474; SDAG-GFX11-NEXT: s_endpgm 475; 476; SDAG-GFX10-LABEL: v_fcmp_f32_o: 477; SDAG-GFX10: ; %bb.0: 478; SDAG-GFX10-NEXT: s_clause 0x1 479; SDAG-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 480; SDAG-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 481; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 482; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) 483; SDAG-GFX10-NEXT: v_cmp_o_f32_e64 s2, 0x42c80000, s2 484; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 485; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] 486; SDAG-GFX10-NEXT: s_endpgm 487; 488; GISEL-GFX11-LABEL: v_fcmp_f32_o: 489; GISEL-GFX11: ; %bb.0: 490; GISEL-GFX11-NEXT: s_clause 0x1 491; GISEL-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 492; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 493; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 494; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) 495; GISEL-GFX11-NEXT: v_cmp_o_f32_e64 s2, 0x42c80000, s2 496; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 497; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 498; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 499; GISEL-GFX11-NEXT: s_endpgm 500; 501; GISEL-GFX10-LABEL: v_fcmp_f32_o: 502; GISEL-GFX10: ; %bb.0: 503; GISEL-GFX10-NEXT: s_clause 0x1 504; GISEL-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 505; GISEL-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 506; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 507; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 508; GISEL-GFX10-NEXT: v_cmp_o_f32_e64 s2, 0x42c80000, s2 509; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 510; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] 511; GISEL-GFX10-NEXT: s_endpgm 512 %result = call i32 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 7) 513 store i32 %result, ptr addrspace(1) %out 514 ret void 515} 516 517define amdgpu_kernel void @v_fcmp_f32_uo(ptr addrspace(1) %out, float %src) { 518; SDAG-GFX11-LABEL: v_fcmp_f32_uo: 519; SDAG-GFX11: ; %bb.0: 520; SDAG-GFX11-NEXT: s_clause 0x1 521; SDAG-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 522; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 523; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) 524; SDAG-GFX11-NEXT: v_cmp_u_f32_e64 s2, 0x42c80000, s2 525; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 526; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 527; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 528; SDAG-GFX11-NEXT: s_endpgm 529; 530; SDAG-GFX10-LABEL: v_fcmp_f32_uo: 531; SDAG-GFX10: ; %bb.0: 532; SDAG-GFX10-NEXT: s_clause 0x1 533; SDAG-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 534; SDAG-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 535; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 536; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) 537; SDAG-GFX10-NEXT: v_cmp_u_f32_e64 s2, 0x42c80000, s2 538; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 539; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] 540; SDAG-GFX10-NEXT: s_endpgm 541; 542; GISEL-GFX11-LABEL: v_fcmp_f32_uo: 543; GISEL-GFX11: ; %bb.0: 544; GISEL-GFX11-NEXT: s_clause 0x1 545; GISEL-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 546; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 547; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 548; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) 549; GISEL-GFX11-NEXT: v_cmp_u_f32_e64 s2, 0x42c80000, s2 550; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 551; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 552; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 553; GISEL-GFX11-NEXT: s_endpgm 554; 555; GISEL-GFX10-LABEL: v_fcmp_f32_uo: 556; GISEL-GFX10: ; %bb.0: 557; GISEL-GFX10-NEXT: s_clause 0x1 558; GISEL-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 559; GISEL-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 560; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 561; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 562; GISEL-GFX10-NEXT: v_cmp_u_f32_e64 s2, 0x42c80000, s2 563; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 564; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] 565; GISEL-GFX10-NEXT: s_endpgm 566 %result = call i32 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 8) 567 store i32 %result, ptr addrspace(1) %out 568 ret void 569} 570 571define amdgpu_kernel void @v_fcmp_f32_ueq(ptr addrspace(1) %out, float %src) { 572; SDAG-GFX11-LABEL: v_fcmp_f32_ueq: 573; SDAG-GFX11: ; %bb.0: 574; SDAG-GFX11-NEXT: s_clause 0x1 575; SDAG-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 576; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 577; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) 578; SDAG-GFX11-NEXT: v_cmp_nlg_f32_e64 s2, 0x42c80000, s2 579; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 580; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 581; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 582; SDAG-GFX11-NEXT: s_endpgm 583; 584; SDAG-GFX10-LABEL: v_fcmp_f32_ueq: 585; SDAG-GFX10: ; %bb.0: 586; SDAG-GFX10-NEXT: s_clause 0x1 587; SDAG-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 588; SDAG-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 589; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 590; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) 591; SDAG-GFX10-NEXT: v_cmp_nlg_f32_e64 s2, 0x42c80000, s2 592; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 593; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] 594; SDAG-GFX10-NEXT: s_endpgm 595; 596; GISEL-GFX11-LABEL: v_fcmp_f32_ueq: 597; GISEL-GFX11: ; %bb.0: 598; GISEL-GFX11-NEXT: s_clause 0x1 599; GISEL-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 600; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 601; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 602; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) 603; GISEL-GFX11-NEXT: v_cmp_nlg_f32_e64 s2, 0x42c80000, s2 604; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 605; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 606; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 607; GISEL-GFX11-NEXT: s_endpgm 608; 609; GISEL-GFX10-LABEL: v_fcmp_f32_ueq: 610; GISEL-GFX10: ; %bb.0: 611; GISEL-GFX10-NEXT: s_clause 0x1 612; GISEL-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 613; GISEL-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 614; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 615; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 616; GISEL-GFX10-NEXT: v_cmp_nlg_f32_e64 s2, 0x42c80000, s2 617; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 618; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] 619; GISEL-GFX10-NEXT: s_endpgm 620 %result = call i32 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 9) 621 store i32 %result, ptr addrspace(1) %out 622 ret void 623} 624 625define amdgpu_kernel void @v_fcmp_f32_une(ptr addrspace(1) %out, float %src) { 626; SDAG-GFX11-LABEL: v_fcmp_f32_une: 627; SDAG-GFX11: ; %bb.0: 628; SDAG-GFX11-NEXT: s_clause 0x1 629; SDAG-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 630; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 631; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) 632; SDAG-GFX11-NEXT: v_cmp_neq_f32_e64 s2, 0x42c80000, s2 633; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 634; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 635; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 636; SDAG-GFX11-NEXT: s_endpgm 637; 638; SDAG-GFX10-LABEL: v_fcmp_f32_une: 639; SDAG-GFX10: ; %bb.0: 640; SDAG-GFX10-NEXT: s_clause 0x1 641; SDAG-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 642; SDAG-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 643; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 644; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) 645; SDAG-GFX10-NEXT: v_cmp_neq_f32_e64 s2, 0x42c80000, s2 646; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 647; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] 648; SDAG-GFX10-NEXT: s_endpgm 649; 650; GISEL-GFX11-LABEL: v_fcmp_f32_une: 651; GISEL-GFX11: ; %bb.0: 652; GISEL-GFX11-NEXT: s_clause 0x1 653; GISEL-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 654; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 655; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 656; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) 657; GISEL-GFX11-NEXT: v_cmp_neq_f32_e64 s2, 0x42c80000, s2 658; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 659; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 660; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 661; GISEL-GFX11-NEXT: s_endpgm 662; 663; GISEL-GFX10-LABEL: v_fcmp_f32_une: 664; GISEL-GFX10: ; %bb.0: 665; GISEL-GFX10-NEXT: s_clause 0x1 666; GISEL-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 667; GISEL-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 668; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 669; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 670; GISEL-GFX10-NEXT: v_cmp_neq_f32_e64 s2, 0x42c80000, s2 671; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 672; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] 673; GISEL-GFX10-NEXT: s_endpgm 674 %result = call i32 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 14) 675 store i32 %result, ptr addrspace(1) %out 676 ret void 677} 678 679define amdgpu_kernel void @v_fcmp_f32_ugt(ptr addrspace(1) %out, float %src) { 680; SDAG-GFX11-LABEL: v_fcmp_f32_ugt: 681; SDAG-GFX11: ; %bb.0: 682; SDAG-GFX11-NEXT: s_clause 0x1 683; SDAG-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 684; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 685; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) 686; SDAG-GFX11-NEXT: v_cmp_nge_f32_e64 s2, 0x42c80000, s2 687; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 688; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 689; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 690; SDAG-GFX11-NEXT: s_endpgm 691; 692; SDAG-GFX10-LABEL: v_fcmp_f32_ugt: 693; SDAG-GFX10: ; %bb.0: 694; SDAG-GFX10-NEXT: s_clause 0x1 695; SDAG-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 696; SDAG-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 697; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 698; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) 699; SDAG-GFX10-NEXT: v_cmp_nge_f32_e64 s2, 0x42c80000, s2 700; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 701; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] 702; SDAG-GFX10-NEXT: s_endpgm 703; 704; GISEL-GFX11-LABEL: v_fcmp_f32_ugt: 705; GISEL-GFX11: ; %bb.0: 706; GISEL-GFX11-NEXT: s_clause 0x1 707; GISEL-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 708; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 709; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 710; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) 711; GISEL-GFX11-NEXT: v_cmp_nge_f32_e64 s2, 0x42c80000, s2 712; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 713; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 714; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 715; GISEL-GFX11-NEXT: s_endpgm 716; 717; GISEL-GFX10-LABEL: v_fcmp_f32_ugt: 718; GISEL-GFX10: ; %bb.0: 719; GISEL-GFX10-NEXT: s_clause 0x1 720; GISEL-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 721; GISEL-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 722; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 723; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 724; GISEL-GFX10-NEXT: v_cmp_nge_f32_e64 s2, 0x42c80000, s2 725; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 726; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] 727; GISEL-GFX10-NEXT: s_endpgm 728 %result = call i32 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 10) 729 store i32 %result, ptr addrspace(1) %out 730 ret void 731} 732 733define amdgpu_kernel void @v_fcmp_f32_uge(ptr addrspace(1) %out, float %src) { 734; SDAG-GFX11-LABEL: v_fcmp_f32_uge: 735; SDAG-GFX11: ; %bb.0: 736; SDAG-GFX11-NEXT: s_clause 0x1 737; SDAG-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 738; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 739; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) 740; SDAG-GFX11-NEXT: v_cmp_ngt_f32_e64 s2, 0x42c80000, s2 741; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 742; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 743; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 744; SDAG-GFX11-NEXT: s_endpgm 745; 746; SDAG-GFX10-LABEL: v_fcmp_f32_uge: 747; SDAG-GFX10: ; %bb.0: 748; SDAG-GFX10-NEXT: s_clause 0x1 749; SDAG-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 750; SDAG-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 751; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 752; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) 753; SDAG-GFX10-NEXT: v_cmp_ngt_f32_e64 s2, 0x42c80000, s2 754; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 755; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] 756; SDAG-GFX10-NEXT: s_endpgm 757; 758; GISEL-GFX11-LABEL: v_fcmp_f32_uge: 759; GISEL-GFX11: ; %bb.0: 760; GISEL-GFX11-NEXT: s_clause 0x1 761; GISEL-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 762; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 763; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 764; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) 765; GISEL-GFX11-NEXT: v_cmp_ngt_f32_e64 s2, 0x42c80000, s2 766; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 767; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 768; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 769; GISEL-GFX11-NEXT: s_endpgm 770; 771; GISEL-GFX10-LABEL: v_fcmp_f32_uge: 772; GISEL-GFX10: ; %bb.0: 773; GISEL-GFX10-NEXT: s_clause 0x1 774; GISEL-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 775; GISEL-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 776; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 777; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 778; GISEL-GFX10-NEXT: v_cmp_ngt_f32_e64 s2, 0x42c80000, s2 779; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 780; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] 781; GISEL-GFX10-NEXT: s_endpgm 782 %result = call i32 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 11) 783 store i32 %result, ptr addrspace(1) %out 784 ret void 785} 786 787define amdgpu_kernel void @v_fcmp_f32_ult(ptr addrspace(1) %out, float %src) { 788; SDAG-GFX11-LABEL: v_fcmp_f32_ult: 789; SDAG-GFX11: ; %bb.0: 790; SDAG-GFX11-NEXT: s_clause 0x1 791; SDAG-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 792; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 793; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) 794; SDAG-GFX11-NEXT: v_cmp_nle_f32_e64 s2, 0x42c80000, s2 795; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 796; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 797; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 798; SDAG-GFX11-NEXT: s_endpgm 799; 800; SDAG-GFX10-LABEL: v_fcmp_f32_ult: 801; SDAG-GFX10: ; %bb.0: 802; SDAG-GFX10-NEXT: s_clause 0x1 803; SDAG-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 804; SDAG-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 805; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 806; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) 807; SDAG-GFX10-NEXT: v_cmp_nle_f32_e64 s2, 0x42c80000, s2 808; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 809; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] 810; SDAG-GFX10-NEXT: s_endpgm 811; 812; GISEL-GFX11-LABEL: v_fcmp_f32_ult: 813; GISEL-GFX11: ; %bb.0: 814; GISEL-GFX11-NEXT: s_clause 0x1 815; GISEL-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 816; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 817; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 818; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) 819; GISEL-GFX11-NEXT: v_cmp_nle_f32_e64 s2, 0x42c80000, s2 820; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 821; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 822; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 823; GISEL-GFX11-NEXT: s_endpgm 824; 825; GISEL-GFX10-LABEL: v_fcmp_f32_ult: 826; GISEL-GFX10: ; %bb.0: 827; GISEL-GFX10-NEXT: s_clause 0x1 828; GISEL-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 829; GISEL-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 830; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 831; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 832; GISEL-GFX10-NEXT: v_cmp_nle_f32_e64 s2, 0x42c80000, s2 833; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 834; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] 835; GISEL-GFX10-NEXT: s_endpgm 836 %result = call i32 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 12) 837 store i32 %result, ptr addrspace(1) %out 838 ret void 839} 840 841define amdgpu_kernel void @v_fcmp_f32_ule(ptr addrspace(1) %out, float %src) { 842; SDAG-GFX11-LABEL: v_fcmp_f32_ule: 843; SDAG-GFX11: ; %bb.0: 844; SDAG-GFX11-NEXT: s_clause 0x1 845; SDAG-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 846; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 847; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) 848; SDAG-GFX11-NEXT: v_cmp_nlt_f32_e64 s2, 0x42c80000, s2 849; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 850; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 851; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 852; SDAG-GFX11-NEXT: s_endpgm 853; 854; SDAG-GFX10-LABEL: v_fcmp_f32_ule: 855; SDAG-GFX10: ; %bb.0: 856; SDAG-GFX10-NEXT: s_clause 0x1 857; SDAG-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 858; SDAG-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 859; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 860; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) 861; SDAG-GFX10-NEXT: v_cmp_nlt_f32_e64 s2, 0x42c80000, s2 862; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 863; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] 864; SDAG-GFX10-NEXT: s_endpgm 865; 866; GISEL-GFX11-LABEL: v_fcmp_f32_ule: 867; GISEL-GFX11: ; %bb.0: 868; GISEL-GFX11-NEXT: s_clause 0x1 869; GISEL-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 870; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 871; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 872; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) 873; GISEL-GFX11-NEXT: v_cmp_nlt_f32_e64 s2, 0x42c80000, s2 874; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 875; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 876; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 877; GISEL-GFX11-NEXT: s_endpgm 878; 879; GISEL-GFX10-LABEL: v_fcmp_f32_ule: 880; GISEL-GFX10: ; %bb.0: 881; GISEL-GFX10-NEXT: s_clause 0x1 882; GISEL-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 883; GISEL-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 884; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 885; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 886; GISEL-GFX10-NEXT: v_cmp_nlt_f32_e64 s2, 0x42c80000, s2 887; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 888; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] 889; GISEL-GFX10-NEXT: s_endpgm 890 %result = call i32 @llvm.amdgcn.fcmp.f32(float %src, float 100.00, i32 13) 891 store i32 %result, ptr addrspace(1) %out 892 ret void 893} 894 895define amdgpu_kernel void @v_fcmp_f64_oeq(ptr addrspace(1) %out, double %src) { 896; SDAG-GFX11-LABEL: v_fcmp_f64_oeq: 897; SDAG-GFX11: ; %bb.0: 898; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 899; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) 900; SDAG-GFX11-NEXT: v_cmp_eq_f64_e64 s2, 0x40590000, s[2:3] 901; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 902; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 903; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 904; SDAG-GFX11-NEXT: s_endpgm 905; 906; SDAG-GFX10-LABEL: v_fcmp_f64_oeq: 907; SDAG-GFX10: ; %bb.0: 908; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 909; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 910; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) 911; SDAG-GFX10-NEXT: v_cmp_eq_f64_e64 s2, 0x40590000, s[2:3] 912; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 913; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] 914; SDAG-GFX10-NEXT: s_endpgm 915; 916; GISEL-GFX11-LABEL: v_fcmp_f64_oeq: 917; GISEL-GFX11: ; %bb.0: 918; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 919; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 920; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) 921; GISEL-GFX11-NEXT: v_cmp_eq_f64_e64 s2, 0x40590000, s[2:3] 922; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 923; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 924; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 925; GISEL-GFX11-NEXT: s_endpgm 926; 927; GISEL-GFX10-LABEL: v_fcmp_f64_oeq: 928; GISEL-GFX10: ; %bb.0: 929; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 930; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 931; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 932; GISEL-GFX10-NEXT: v_cmp_eq_f64_e64 s2, 0x40590000, s[2:3] 933; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 934; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] 935; GISEL-GFX10-NEXT: s_endpgm 936 %result = call i32 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 1) 937 store i32 %result, ptr addrspace(1) %out 938 ret void 939} 940 941define amdgpu_kernel void @v_fcmp_f64_one(ptr addrspace(1) %out, double %src) { 942; SDAG-GFX11-LABEL: v_fcmp_f64_one: 943; SDAG-GFX11: ; %bb.0: 944; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 945; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) 946; SDAG-GFX11-NEXT: v_cmp_neq_f64_e64 s2, 0x40590000, s[2:3] 947; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 948; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 949; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 950; SDAG-GFX11-NEXT: s_endpgm 951; 952; SDAG-GFX10-LABEL: v_fcmp_f64_one: 953; SDAG-GFX10: ; %bb.0: 954; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 955; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 956; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) 957; SDAG-GFX10-NEXT: v_cmp_neq_f64_e64 s2, 0x40590000, s[2:3] 958; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 959; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] 960; SDAG-GFX10-NEXT: s_endpgm 961; 962; GISEL-GFX11-LABEL: v_fcmp_f64_one: 963; GISEL-GFX11: ; %bb.0: 964; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 965; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 966; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) 967; GISEL-GFX11-NEXT: v_cmp_neq_f64_e64 s2, 0x40590000, s[2:3] 968; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 969; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 970; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 971; GISEL-GFX11-NEXT: s_endpgm 972; 973; GISEL-GFX10-LABEL: v_fcmp_f64_one: 974; GISEL-GFX10: ; %bb.0: 975; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 976; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 977; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 978; GISEL-GFX10-NEXT: v_cmp_neq_f64_e64 s2, 0x40590000, s[2:3] 979; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 980; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] 981; GISEL-GFX10-NEXT: s_endpgm 982 %result = call i32 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 6) 983 store i32 %result, ptr addrspace(1) %out 984 ret void 985} 986 987define amdgpu_kernel void @v_fcmp_f64_ogt(ptr addrspace(1) %out, double %src) { 988; SDAG-GFX11-LABEL: v_fcmp_f64_ogt: 989; SDAG-GFX11: ; %bb.0: 990; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 991; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) 992; SDAG-GFX11-NEXT: v_cmp_lt_f64_e64 s2, 0x40590000, s[2:3] 993; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 994; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 995; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 996; SDAG-GFX11-NEXT: s_endpgm 997; 998; SDAG-GFX10-LABEL: v_fcmp_f64_ogt: 999; SDAG-GFX10: ; %bb.0: 1000; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1001; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 1002; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) 1003; SDAG-GFX10-NEXT: v_cmp_lt_f64_e64 s2, 0x40590000, s[2:3] 1004; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 1005; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] 1006; SDAG-GFX10-NEXT: s_endpgm 1007; 1008; GISEL-GFX11-LABEL: v_fcmp_f64_ogt: 1009; GISEL-GFX11: ; %bb.0: 1010; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1011; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 1012; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) 1013; GISEL-GFX11-NEXT: v_cmp_lt_f64_e64 s2, 0x40590000, s[2:3] 1014; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1015; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 1016; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 1017; GISEL-GFX11-NEXT: s_endpgm 1018; 1019; GISEL-GFX10-LABEL: v_fcmp_f64_ogt: 1020; GISEL-GFX10: ; %bb.0: 1021; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1022; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 1023; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 1024; GISEL-GFX10-NEXT: v_cmp_lt_f64_e64 s2, 0x40590000, s[2:3] 1025; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 1026; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] 1027; GISEL-GFX10-NEXT: s_endpgm 1028 %result = call i32 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 2) 1029 store i32 %result, ptr addrspace(1) %out 1030 ret void 1031} 1032 1033define amdgpu_kernel void @v_fcmp_f64_oge(ptr addrspace(1) %out, double %src) { 1034; SDAG-GFX11-LABEL: v_fcmp_f64_oge: 1035; SDAG-GFX11: ; %bb.0: 1036; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1037; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) 1038; SDAG-GFX11-NEXT: v_cmp_le_f64_e64 s2, 0x40590000, s[2:3] 1039; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1040; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 1041; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 1042; SDAG-GFX11-NEXT: s_endpgm 1043; 1044; SDAG-GFX10-LABEL: v_fcmp_f64_oge: 1045; SDAG-GFX10: ; %bb.0: 1046; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1047; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 1048; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) 1049; SDAG-GFX10-NEXT: v_cmp_le_f64_e64 s2, 0x40590000, s[2:3] 1050; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 1051; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] 1052; SDAG-GFX10-NEXT: s_endpgm 1053; 1054; GISEL-GFX11-LABEL: v_fcmp_f64_oge: 1055; GISEL-GFX11: ; %bb.0: 1056; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1057; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 1058; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) 1059; GISEL-GFX11-NEXT: v_cmp_le_f64_e64 s2, 0x40590000, s[2:3] 1060; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1061; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 1062; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 1063; GISEL-GFX11-NEXT: s_endpgm 1064; 1065; GISEL-GFX10-LABEL: v_fcmp_f64_oge: 1066; GISEL-GFX10: ; %bb.0: 1067; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1068; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 1069; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 1070; GISEL-GFX10-NEXT: v_cmp_le_f64_e64 s2, 0x40590000, s[2:3] 1071; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 1072; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] 1073; GISEL-GFX10-NEXT: s_endpgm 1074 %result = call i32 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 3) 1075 store i32 %result, ptr addrspace(1) %out 1076 ret void 1077} 1078 1079define amdgpu_kernel void @v_fcmp_f64_olt(ptr addrspace(1) %out, double %src) { 1080; SDAG-GFX11-LABEL: v_fcmp_f64_olt: 1081; SDAG-GFX11: ; %bb.0: 1082; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1083; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) 1084; SDAG-GFX11-NEXT: v_cmp_gt_f64_e64 s2, 0x40590000, s[2:3] 1085; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1086; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 1087; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 1088; SDAG-GFX11-NEXT: s_endpgm 1089; 1090; SDAG-GFX10-LABEL: v_fcmp_f64_olt: 1091; SDAG-GFX10: ; %bb.0: 1092; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1093; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 1094; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) 1095; SDAG-GFX10-NEXT: v_cmp_gt_f64_e64 s2, 0x40590000, s[2:3] 1096; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 1097; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] 1098; SDAG-GFX10-NEXT: s_endpgm 1099; 1100; GISEL-GFX11-LABEL: v_fcmp_f64_olt: 1101; GISEL-GFX11: ; %bb.0: 1102; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1103; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 1104; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) 1105; GISEL-GFX11-NEXT: v_cmp_gt_f64_e64 s2, 0x40590000, s[2:3] 1106; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1107; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 1108; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 1109; GISEL-GFX11-NEXT: s_endpgm 1110; 1111; GISEL-GFX10-LABEL: v_fcmp_f64_olt: 1112; GISEL-GFX10: ; %bb.0: 1113; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1114; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 1115; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 1116; GISEL-GFX10-NEXT: v_cmp_gt_f64_e64 s2, 0x40590000, s[2:3] 1117; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 1118; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] 1119; GISEL-GFX10-NEXT: s_endpgm 1120 %result = call i32 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 4) 1121 store i32 %result, ptr addrspace(1) %out 1122 ret void 1123} 1124 1125define amdgpu_kernel void @v_fcmp_f64_ole(ptr addrspace(1) %out, double %src) { 1126; SDAG-GFX11-LABEL: v_fcmp_f64_ole: 1127; SDAG-GFX11: ; %bb.0: 1128; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1129; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) 1130; SDAG-GFX11-NEXT: v_cmp_ge_f64_e64 s2, 0x40590000, s[2:3] 1131; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1132; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 1133; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 1134; SDAG-GFX11-NEXT: s_endpgm 1135; 1136; SDAG-GFX10-LABEL: v_fcmp_f64_ole: 1137; SDAG-GFX10: ; %bb.0: 1138; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1139; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 1140; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) 1141; SDAG-GFX10-NEXT: v_cmp_ge_f64_e64 s2, 0x40590000, s[2:3] 1142; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 1143; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] 1144; SDAG-GFX10-NEXT: s_endpgm 1145; 1146; GISEL-GFX11-LABEL: v_fcmp_f64_ole: 1147; GISEL-GFX11: ; %bb.0: 1148; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1149; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 1150; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) 1151; GISEL-GFX11-NEXT: v_cmp_ge_f64_e64 s2, 0x40590000, s[2:3] 1152; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1153; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 1154; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 1155; GISEL-GFX11-NEXT: s_endpgm 1156; 1157; GISEL-GFX10-LABEL: v_fcmp_f64_ole: 1158; GISEL-GFX10: ; %bb.0: 1159; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1160; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 1161; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 1162; GISEL-GFX10-NEXT: v_cmp_ge_f64_e64 s2, 0x40590000, s[2:3] 1163; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 1164; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] 1165; GISEL-GFX10-NEXT: s_endpgm 1166 %result = call i32 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 5) 1167 store i32 %result, ptr addrspace(1) %out 1168 ret void 1169} 1170 1171define amdgpu_kernel void @v_fcmp_f64_ueq(ptr addrspace(1) %out, double %src) { 1172; SDAG-GFX11-LABEL: v_fcmp_f64_ueq: 1173; SDAG-GFX11: ; %bb.0: 1174; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1175; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) 1176; SDAG-GFX11-NEXT: v_cmp_nlg_f64_e64 s2, 0x40590000, s[2:3] 1177; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1178; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 1179; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 1180; SDAG-GFX11-NEXT: s_endpgm 1181; 1182; SDAG-GFX10-LABEL: v_fcmp_f64_ueq: 1183; SDAG-GFX10: ; %bb.0: 1184; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1185; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 1186; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) 1187; SDAG-GFX10-NEXT: v_cmp_nlg_f64_e64 s2, 0x40590000, s[2:3] 1188; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 1189; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] 1190; SDAG-GFX10-NEXT: s_endpgm 1191; 1192; GISEL-GFX11-LABEL: v_fcmp_f64_ueq: 1193; GISEL-GFX11: ; %bb.0: 1194; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1195; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 1196; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) 1197; GISEL-GFX11-NEXT: v_cmp_nlg_f64_e64 s2, 0x40590000, s[2:3] 1198; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1199; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 1200; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 1201; GISEL-GFX11-NEXT: s_endpgm 1202; 1203; GISEL-GFX10-LABEL: v_fcmp_f64_ueq: 1204; GISEL-GFX10: ; %bb.0: 1205; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1206; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 1207; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 1208; GISEL-GFX10-NEXT: v_cmp_nlg_f64_e64 s2, 0x40590000, s[2:3] 1209; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 1210; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] 1211; GISEL-GFX10-NEXT: s_endpgm 1212 %result = call i32 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 9) 1213 store i32 %result, ptr addrspace(1) %out 1214 ret void 1215} 1216 1217define amdgpu_kernel void @v_fcmp_f64_o(ptr addrspace(1) %out, double %src) { 1218; SDAG-GFX11-LABEL: v_fcmp_f64_o: 1219; SDAG-GFX11: ; %bb.0: 1220; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1221; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) 1222; SDAG-GFX11-NEXT: v_cmp_o_f64_e64 s2, 0x40590000, s[2:3] 1223; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1224; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 1225; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 1226; SDAG-GFX11-NEXT: s_endpgm 1227; 1228; SDAG-GFX10-LABEL: v_fcmp_f64_o: 1229; SDAG-GFX10: ; %bb.0: 1230; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1231; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 1232; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) 1233; SDAG-GFX10-NEXT: v_cmp_o_f64_e64 s2, 0x40590000, s[2:3] 1234; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 1235; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] 1236; SDAG-GFX10-NEXT: s_endpgm 1237; 1238; GISEL-GFX11-LABEL: v_fcmp_f64_o: 1239; GISEL-GFX11: ; %bb.0: 1240; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1241; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 1242; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) 1243; GISEL-GFX11-NEXT: v_cmp_o_f64_e64 s2, 0x40590000, s[2:3] 1244; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1245; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 1246; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 1247; GISEL-GFX11-NEXT: s_endpgm 1248; 1249; GISEL-GFX10-LABEL: v_fcmp_f64_o: 1250; GISEL-GFX10: ; %bb.0: 1251; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1252; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 1253; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 1254; GISEL-GFX10-NEXT: v_cmp_o_f64_e64 s2, 0x40590000, s[2:3] 1255; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 1256; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] 1257; GISEL-GFX10-NEXT: s_endpgm 1258 %result = call i32 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 7) 1259 store i32 %result, ptr addrspace(1) %out 1260 ret void 1261} 1262 1263define amdgpu_kernel void @v_fcmp_f64_uo(ptr addrspace(1) %out, double %src) { 1264; SDAG-GFX11-LABEL: v_fcmp_f64_uo: 1265; SDAG-GFX11: ; %bb.0: 1266; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1267; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) 1268; SDAG-GFX11-NEXT: v_cmp_u_f64_e64 s2, 0x40590000, s[2:3] 1269; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1270; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 1271; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 1272; SDAG-GFX11-NEXT: s_endpgm 1273; 1274; SDAG-GFX10-LABEL: v_fcmp_f64_uo: 1275; SDAG-GFX10: ; %bb.0: 1276; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1277; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 1278; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) 1279; SDAG-GFX10-NEXT: v_cmp_u_f64_e64 s2, 0x40590000, s[2:3] 1280; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 1281; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] 1282; SDAG-GFX10-NEXT: s_endpgm 1283; 1284; GISEL-GFX11-LABEL: v_fcmp_f64_uo: 1285; GISEL-GFX11: ; %bb.0: 1286; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1287; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 1288; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) 1289; GISEL-GFX11-NEXT: v_cmp_u_f64_e64 s2, 0x40590000, s[2:3] 1290; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1291; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 1292; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 1293; GISEL-GFX11-NEXT: s_endpgm 1294; 1295; GISEL-GFX10-LABEL: v_fcmp_f64_uo: 1296; GISEL-GFX10: ; %bb.0: 1297; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1298; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 1299; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 1300; GISEL-GFX10-NEXT: v_cmp_u_f64_e64 s2, 0x40590000, s[2:3] 1301; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 1302; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] 1303; GISEL-GFX10-NEXT: s_endpgm 1304 %result = call i32 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 8) 1305 store i32 %result, ptr addrspace(1) %out 1306 ret void 1307} 1308 1309define amdgpu_kernel void @v_fcmp_f64_une(ptr addrspace(1) %out, double %src) { 1310; SDAG-GFX11-LABEL: v_fcmp_f64_une: 1311; SDAG-GFX11: ; %bb.0: 1312; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1313; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) 1314; SDAG-GFX11-NEXT: v_cmp_neq_f64_e64 s2, 0x40590000, s[2:3] 1315; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1316; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 1317; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 1318; SDAG-GFX11-NEXT: s_endpgm 1319; 1320; SDAG-GFX10-LABEL: v_fcmp_f64_une: 1321; SDAG-GFX10: ; %bb.0: 1322; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1323; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 1324; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) 1325; SDAG-GFX10-NEXT: v_cmp_neq_f64_e64 s2, 0x40590000, s[2:3] 1326; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 1327; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] 1328; SDAG-GFX10-NEXT: s_endpgm 1329; 1330; GISEL-GFX11-LABEL: v_fcmp_f64_une: 1331; GISEL-GFX11: ; %bb.0: 1332; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1333; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 1334; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) 1335; GISEL-GFX11-NEXT: v_cmp_neq_f64_e64 s2, 0x40590000, s[2:3] 1336; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1337; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 1338; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 1339; GISEL-GFX11-NEXT: s_endpgm 1340; 1341; GISEL-GFX10-LABEL: v_fcmp_f64_une: 1342; GISEL-GFX10: ; %bb.0: 1343; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1344; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 1345; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 1346; GISEL-GFX10-NEXT: v_cmp_neq_f64_e64 s2, 0x40590000, s[2:3] 1347; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 1348; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] 1349; GISEL-GFX10-NEXT: s_endpgm 1350 %result = call i32 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 14) 1351 store i32 %result, ptr addrspace(1) %out 1352 ret void 1353} 1354 1355define amdgpu_kernel void @v_fcmp_f64_ugt(ptr addrspace(1) %out, double %src) { 1356; SDAG-GFX11-LABEL: v_fcmp_f64_ugt: 1357; SDAG-GFX11: ; %bb.0: 1358; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1359; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) 1360; SDAG-GFX11-NEXT: v_cmp_nge_f64_e64 s2, 0x40590000, s[2:3] 1361; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1362; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 1363; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 1364; SDAG-GFX11-NEXT: s_endpgm 1365; 1366; SDAG-GFX10-LABEL: v_fcmp_f64_ugt: 1367; SDAG-GFX10: ; %bb.0: 1368; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1369; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 1370; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) 1371; SDAG-GFX10-NEXT: v_cmp_nge_f64_e64 s2, 0x40590000, s[2:3] 1372; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 1373; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] 1374; SDAG-GFX10-NEXT: s_endpgm 1375; 1376; GISEL-GFX11-LABEL: v_fcmp_f64_ugt: 1377; GISEL-GFX11: ; %bb.0: 1378; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1379; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 1380; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) 1381; GISEL-GFX11-NEXT: v_cmp_nge_f64_e64 s2, 0x40590000, s[2:3] 1382; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1383; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 1384; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 1385; GISEL-GFX11-NEXT: s_endpgm 1386; 1387; GISEL-GFX10-LABEL: v_fcmp_f64_ugt: 1388; GISEL-GFX10: ; %bb.0: 1389; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1390; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 1391; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 1392; GISEL-GFX10-NEXT: v_cmp_nge_f64_e64 s2, 0x40590000, s[2:3] 1393; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 1394; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] 1395; GISEL-GFX10-NEXT: s_endpgm 1396 %result = call i32 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 10) 1397 store i32 %result, ptr addrspace(1) %out 1398 ret void 1399} 1400 1401define amdgpu_kernel void @v_fcmp_f64_uge(ptr addrspace(1) %out, double %src) { 1402; SDAG-GFX11-LABEL: v_fcmp_f64_uge: 1403; SDAG-GFX11: ; %bb.0: 1404; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1405; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) 1406; SDAG-GFX11-NEXT: v_cmp_ngt_f64_e64 s2, 0x40590000, s[2:3] 1407; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1408; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 1409; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 1410; SDAG-GFX11-NEXT: s_endpgm 1411; 1412; SDAG-GFX10-LABEL: v_fcmp_f64_uge: 1413; SDAG-GFX10: ; %bb.0: 1414; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1415; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 1416; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) 1417; SDAG-GFX10-NEXT: v_cmp_ngt_f64_e64 s2, 0x40590000, s[2:3] 1418; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 1419; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] 1420; SDAG-GFX10-NEXT: s_endpgm 1421; 1422; GISEL-GFX11-LABEL: v_fcmp_f64_uge: 1423; GISEL-GFX11: ; %bb.0: 1424; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1425; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 1426; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) 1427; GISEL-GFX11-NEXT: v_cmp_ngt_f64_e64 s2, 0x40590000, s[2:3] 1428; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1429; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 1430; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 1431; GISEL-GFX11-NEXT: s_endpgm 1432; 1433; GISEL-GFX10-LABEL: v_fcmp_f64_uge: 1434; GISEL-GFX10: ; %bb.0: 1435; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1436; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 1437; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 1438; GISEL-GFX10-NEXT: v_cmp_ngt_f64_e64 s2, 0x40590000, s[2:3] 1439; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 1440; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] 1441; GISEL-GFX10-NEXT: s_endpgm 1442 %result = call i32 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 11) 1443 store i32 %result, ptr addrspace(1) %out 1444 ret void 1445} 1446 1447define amdgpu_kernel void @v_fcmp_f64_ult(ptr addrspace(1) %out, double %src) { 1448; SDAG-GFX11-LABEL: v_fcmp_f64_ult: 1449; SDAG-GFX11: ; %bb.0: 1450; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1451; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) 1452; SDAG-GFX11-NEXT: v_cmp_nle_f64_e64 s2, 0x40590000, s[2:3] 1453; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1454; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 1455; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 1456; SDAG-GFX11-NEXT: s_endpgm 1457; 1458; SDAG-GFX10-LABEL: v_fcmp_f64_ult: 1459; SDAG-GFX10: ; %bb.0: 1460; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1461; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 1462; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) 1463; SDAG-GFX10-NEXT: v_cmp_nle_f64_e64 s2, 0x40590000, s[2:3] 1464; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 1465; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] 1466; SDAG-GFX10-NEXT: s_endpgm 1467; 1468; GISEL-GFX11-LABEL: v_fcmp_f64_ult: 1469; GISEL-GFX11: ; %bb.0: 1470; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1471; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 1472; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) 1473; GISEL-GFX11-NEXT: v_cmp_nle_f64_e64 s2, 0x40590000, s[2:3] 1474; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1475; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 1476; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 1477; GISEL-GFX11-NEXT: s_endpgm 1478; 1479; GISEL-GFX10-LABEL: v_fcmp_f64_ult: 1480; GISEL-GFX10: ; %bb.0: 1481; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1482; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 1483; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 1484; GISEL-GFX10-NEXT: v_cmp_nle_f64_e64 s2, 0x40590000, s[2:3] 1485; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 1486; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] 1487; GISEL-GFX10-NEXT: s_endpgm 1488 %result = call i32 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 12) 1489 store i32 %result, ptr addrspace(1) %out 1490 ret void 1491} 1492 1493define amdgpu_kernel void @v_fcmp_f64_ule(ptr addrspace(1) %out, double %src) { 1494; SDAG-GFX11-LABEL: v_fcmp_f64_ule: 1495; SDAG-GFX11: ; %bb.0: 1496; SDAG-GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1497; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) 1498; SDAG-GFX11-NEXT: v_cmp_nlt_f64_e64 s2, 0x40590000, s[2:3] 1499; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1500; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 1501; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 1502; SDAG-GFX11-NEXT: s_endpgm 1503; 1504; SDAG-GFX10-LABEL: v_fcmp_f64_ule: 1505; SDAG-GFX10: ; %bb.0: 1506; SDAG-GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1507; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 1508; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) 1509; SDAG-GFX10-NEXT: v_cmp_nlt_f64_e64 s2, 0x40590000, s[2:3] 1510; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 1511; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] 1512; SDAG-GFX10-NEXT: s_endpgm 1513; 1514; GISEL-GFX11-LABEL: v_fcmp_f64_ule: 1515; GISEL-GFX11: ; %bb.0: 1516; GISEL-GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 1517; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 1518; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) 1519; GISEL-GFX11-NEXT: v_cmp_nlt_f64_e64 s2, 0x40590000, s[2:3] 1520; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1521; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 1522; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 1523; GISEL-GFX11-NEXT: s_endpgm 1524; 1525; GISEL-GFX10-LABEL: v_fcmp_f64_ule: 1526; GISEL-GFX10: ; %bb.0: 1527; GISEL-GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 1528; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 1529; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 1530; GISEL-GFX10-NEXT: v_cmp_nlt_f64_e64 s2, 0x40590000, s[2:3] 1531; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 1532; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] 1533; GISEL-GFX10-NEXT: s_endpgm 1534 %result = call i32 @llvm.amdgcn.fcmp.f64(double %src, double 100.00, i32 13) 1535 store i32 %result, ptr addrspace(1) %out 1536 ret void 1537} 1538 1539 1540define amdgpu_kernel void @v_fcmp_f16_oeq_with_fabs(ptr addrspace(1) %out, half %src, half %a) { 1541; SDAG-GFX11-LABEL: v_fcmp_f16_oeq_with_fabs: 1542; SDAG-GFX11: ; %bb.0: 1543; SDAG-GFX11-NEXT: s_clause 0x1 1544; SDAG-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 1545; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 1546; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) 1547; SDAG-GFX11-NEXT: s_lshr_b32 s3, s2, 16 1548; SDAG-GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1549; SDAG-GFX11-NEXT: v_cmp_eq_f16_e64 s2, s2, |s3| 1550; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 1551; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 1552; SDAG-GFX11-NEXT: s_endpgm 1553; 1554; SDAG-GFX10-LABEL: v_fcmp_f16_oeq_with_fabs: 1555; SDAG-GFX10: ; %bb.0: 1556; SDAG-GFX10-NEXT: s_clause 0x1 1557; SDAG-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 1558; SDAG-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1559; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 1560; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) 1561; SDAG-GFX10-NEXT: s_lshr_b32 s3, s2, 16 1562; SDAG-GFX10-NEXT: v_cmp_eq_f16_e64 s2, s2, |s3| 1563; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 1564; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] 1565; SDAG-GFX10-NEXT: s_endpgm 1566; 1567; GISEL-GFX11-LABEL: v_fcmp_f16_oeq_with_fabs: 1568; GISEL-GFX11: ; %bb.0: 1569; GISEL-GFX11-NEXT: s_clause 0x1 1570; GISEL-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 1571; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 1572; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 1573; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) 1574; GISEL-GFX11-NEXT: s_lshr_b32 s3, s2, 16 1575; GISEL-GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1576; GISEL-GFX11-NEXT: v_cmp_eq_f16_e64 s2, s2, |s3| 1577; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 1578; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 1579; GISEL-GFX11-NEXT: s_endpgm 1580; 1581; GISEL-GFX10-LABEL: v_fcmp_f16_oeq_with_fabs: 1582; GISEL-GFX10: ; %bb.0: 1583; GISEL-GFX10-NEXT: s_clause 0x1 1584; GISEL-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 1585; GISEL-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1586; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 1587; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 1588; GISEL-GFX10-NEXT: s_lshr_b32 s3, s2, 16 1589; GISEL-GFX10-NEXT: v_cmp_eq_f16_e64 s2, s2, |s3| 1590; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 1591; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] 1592; GISEL-GFX10-NEXT: s_endpgm 1593 %temp = call half @llvm.fabs.f16(half %a) 1594 %result = call i32 @llvm.amdgcn.fcmp.f16(half %src, half %temp, i32 1) 1595 store i32 %result, ptr addrspace(1) %out 1596 ret void 1597} 1598 1599 1600define amdgpu_kernel void @v_fcmp_f16_oeq_both_operands_with_fabs(ptr addrspace(1) %out, half %src, half %a) { 1601; SDAG-GFX11-LABEL: v_fcmp_f16_oeq_both_operands_with_fabs: 1602; SDAG-GFX11: ; %bb.0: 1603; SDAG-GFX11-NEXT: s_clause 0x1 1604; SDAG-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 1605; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 1606; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) 1607; SDAG-GFX11-NEXT: s_lshr_b32 s3, s2, 16 1608; SDAG-GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1609; SDAG-GFX11-NEXT: v_cmp_eq_f16_e64 s2, |s2|, |s3| 1610; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 1611; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 1612; SDAG-GFX11-NEXT: s_endpgm 1613; 1614; SDAG-GFX10-LABEL: v_fcmp_f16_oeq_both_operands_with_fabs: 1615; SDAG-GFX10: ; %bb.0: 1616; SDAG-GFX10-NEXT: s_clause 0x1 1617; SDAG-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 1618; SDAG-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1619; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 1620; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) 1621; SDAG-GFX10-NEXT: s_lshr_b32 s3, s2, 16 1622; SDAG-GFX10-NEXT: v_cmp_eq_f16_e64 s2, |s2|, |s3| 1623; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 1624; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] 1625; SDAG-GFX10-NEXT: s_endpgm 1626; 1627; GISEL-GFX11-LABEL: v_fcmp_f16_oeq_both_operands_with_fabs: 1628; GISEL-GFX11: ; %bb.0: 1629; GISEL-GFX11-NEXT: s_clause 0x1 1630; GISEL-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 1631; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 1632; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 1633; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) 1634; GISEL-GFX11-NEXT: s_lshr_b32 s3, s2, 16 1635; GISEL-GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1636; GISEL-GFX11-NEXT: v_cmp_eq_f16_e64 s2, |s2|, |s3| 1637; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 1638; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 1639; GISEL-GFX11-NEXT: s_endpgm 1640; 1641; GISEL-GFX10-LABEL: v_fcmp_f16_oeq_both_operands_with_fabs: 1642; GISEL-GFX10: ; %bb.0: 1643; GISEL-GFX10-NEXT: s_clause 0x1 1644; GISEL-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 1645; GISEL-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1646; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 1647; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 1648; GISEL-GFX10-NEXT: s_lshr_b32 s3, s2, 16 1649; GISEL-GFX10-NEXT: v_cmp_eq_f16_e64 s2, |s2|, |s3| 1650; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 1651; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] 1652; GISEL-GFX10-NEXT: s_endpgm 1653 %temp = call half @llvm.fabs.f16(half %a) 1654 %src_input = call half @llvm.fabs.f16(half %src) 1655 %result = call i32 @llvm.amdgcn.fcmp.f16(half %src_input, half %temp, i32 1) 1656 store i32 %result, ptr addrspace(1) %out 1657 ret void 1658} 1659 1660define amdgpu_kernel void @v_fcmp_f16(ptr addrspace(1) %out, half %src) { 1661; SDAG-GFX11-LABEL: v_fcmp_f16: 1662; SDAG-GFX11: ; %bb.0: 1663; SDAG-GFX11-NEXT: s_endpgm 1664; 1665; SDAG-GFX10-LABEL: v_fcmp_f16: 1666; SDAG-GFX10: ; %bb.0: 1667; SDAG-GFX10-NEXT: s_endpgm 1668; 1669; GISEL-GFX11-LABEL: v_fcmp_f16: 1670; GISEL-GFX11: ; %bb.0: 1671; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 1672; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, 0 1673; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) 1674; GISEL-GFX11-NEXT: global_store_b32 v0, v0, s[0:1] 1675; GISEL-GFX11-NEXT: s_endpgm 1676; 1677; GISEL-GFX10-LABEL: v_fcmp_f16: 1678; GISEL-GFX10: ; %bb.0: 1679; GISEL-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1680; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, 0 1681; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 1682; GISEL-GFX10-NEXT: global_store_dword v0, v0, s[0:1] 1683; GISEL-GFX10-NEXT: s_endpgm 1684 %result = call i32 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 -1) 1685 store i32 %result, ptr addrspace(1) %out 1686 ret void 1687} 1688 1689 1690define amdgpu_kernel void @v_fcmp_f16_oeq(ptr addrspace(1) %out, half %src) { 1691; SDAG-GFX11-LABEL: v_fcmp_f16_oeq: 1692; SDAG-GFX11: ; %bb.0: 1693; SDAG-GFX11-NEXT: s_clause 0x1 1694; SDAG-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 1695; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 1696; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) 1697; SDAG-GFX11-NEXT: v_cmp_eq_f16_e64 s2, 0x5640, s2 1698; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1699; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 1700; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 1701; SDAG-GFX11-NEXT: s_endpgm 1702; 1703; SDAG-GFX10-LABEL: v_fcmp_f16_oeq: 1704; SDAG-GFX10: ; %bb.0: 1705; SDAG-GFX10-NEXT: s_clause 0x1 1706; SDAG-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 1707; SDAG-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1708; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 1709; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) 1710; SDAG-GFX10-NEXT: v_cmp_eq_f16_e64 s2, 0x5640, s2 1711; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 1712; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] 1713; SDAG-GFX10-NEXT: s_endpgm 1714; 1715; GISEL-GFX11-LABEL: v_fcmp_f16_oeq: 1716; GISEL-GFX11: ; %bb.0: 1717; GISEL-GFX11-NEXT: s_clause 0x1 1718; GISEL-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 1719; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 1720; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 1721; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) 1722; GISEL-GFX11-NEXT: v_cmp_eq_f16_e64 s2, 0x5640, s2 1723; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1724; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 1725; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 1726; GISEL-GFX11-NEXT: s_endpgm 1727; 1728; GISEL-GFX10-LABEL: v_fcmp_f16_oeq: 1729; GISEL-GFX10: ; %bb.0: 1730; GISEL-GFX10-NEXT: s_clause 0x1 1731; GISEL-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 1732; GISEL-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1733; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 1734; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 1735; GISEL-GFX10-NEXT: v_cmp_eq_f16_e64 s2, 0x5640, s2 1736; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 1737; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] 1738; GISEL-GFX10-NEXT: s_endpgm 1739 %result = call i32 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 1) 1740 store i32 %result, ptr addrspace(1) %out 1741 ret void 1742} 1743 1744 1745define amdgpu_kernel void @v_fcmp_f16_one(ptr addrspace(1) %out, half %src) { 1746; SDAG-GFX11-LABEL: v_fcmp_f16_one: 1747; SDAG-GFX11: ; %bb.0: 1748; SDAG-GFX11-NEXT: s_clause 0x1 1749; SDAG-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 1750; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 1751; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) 1752; SDAG-GFX11-NEXT: v_cmp_neq_f16_e64 s2, 0x5640, s2 1753; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1754; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 1755; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 1756; SDAG-GFX11-NEXT: s_endpgm 1757; 1758; SDAG-GFX10-LABEL: v_fcmp_f16_one: 1759; SDAG-GFX10: ; %bb.0: 1760; SDAG-GFX10-NEXT: s_clause 0x1 1761; SDAG-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 1762; SDAG-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1763; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 1764; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) 1765; SDAG-GFX10-NEXT: v_cmp_neq_f16_e64 s2, 0x5640, s2 1766; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 1767; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] 1768; SDAG-GFX10-NEXT: s_endpgm 1769; 1770; GISEL-GFX11-LABEL: v_fcmp_f16_one: 1771; GISEL-GFX11: ; %bb.0: 1772; GISEL-GFX11-NEXT: s_clause 0x1 1773; GISEL-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 1774; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 1775; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 1776; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) 1777; GISEL-GFX11-NEXT: v_cmp_neq_f16_e64 s2, 0x5640, s2 1778; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1779; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 1780; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 1781; GISEL-GFX11-NEXT: s_endpgm 1782; 1783; GISEL-GFX10-LABEL: v_fcmp_f16_one: 1784; GISEL-GFX10: ; %bb.0: 1785; GISEL-GFX10-NEXT: s_clause 0x1 1786; GISEL-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 1787; GISEL-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1788; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 1789; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 1790; GISEL-GFX10-NEXT: v_cmp_neq_f16_e64 s2, 0x5640, s2 1791; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 1792; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] 1793; GISEL-GFX10-NEXT: s_endpgm 1794 %result = call i32 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 6) 1795 store i32 %result, ptr addrspace(1) %out 1796 ret void 1797} 1798 1799 1800define amdgpu_kernel void @v_fcmp_f16_ogt(ptr addrspace(1) %out, half %src) { 1801; SDAG-GFX11-LABEL: v_fcmp_f16_ogt: 1802; SDAG-GFX11: ; %bb.0: 1803; SDAG-GFX11-NEXT: s_clause 0x1 1804; SDAG-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 1805; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 1806; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) 1807; SDAG-GFX11-NEXT: v_cmp_lt_f16_e64 s2, 0x5640, s2 1808; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1809; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 1810; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 1811; SDAG-GFX11-NEXT: s_endpgm 1812; 1813; SDAG-GFX10-LABEL: v_fcmp_f16_ogt: 1814; SDAG-GFX10: ; %bb.0: 1815; SDAG-GFX10-NEXT: s_clause 0x1 1816; SDAG-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 1817; SDAG-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1818; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 1819; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) 1820; SDAG-GFX10-NEXT: v_cmp_lt_f16_e64 s2, 0x5640, s2 1821; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 1822; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] 1823; SDAG-GFX10-NEXT: s_endpgm 1824; 1825; GISEL-GFX11-LABEL: v_fcmp_f16_ogt: 1826; GISEL-GFX11: ; %bb.0: 1827; GISEL-GFX11-NEXT: s_clause 0x1 1828; GISEL-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 1829; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 1830; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 1831; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) 1832; GISEL-GFX11-NEXT: v_cmp_lt_f16_e64 s2, 0x5640, s2 1833; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1834; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 1835; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 1836; GISEL-GFX11-NEXT: s_endpgm 1837; 1838; GISEL-GFX10-LABEL: v_fcmp_f16_ogt: 1839; GISEL-GFX10: ; %bb.0: 1840; GISEL-GFX10-NEXT: s_clause 0x1 1841; GISEL-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 1842; GISEL-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1843; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 1844; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 1845; GISEL-GFX10-NEXT: v_cmp_lt_f16_e64 s2, 0x5640, s2 1846; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 1847; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] 1848; GISEL-GFX10-NEXT: s_endpgm 1849 %result = call i32 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 2) 1850 store i32 %result, ptr addrspace(1) %out 1851 ret void 1852} 1853 1854 1855define amdgpu_kernel void @v_fcmp_f16_oge(ptr addrspace(1) %out, half %src) { 1856; SDAG-GFX11-LABEL: v_fcmp_f16_oge: 1857; SDAG-GFX11: ; %bb.0: 1858; SDAG-GFX11-NEXT: s_clause 0x1 1859; SDAG-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 1860; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 1861; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) 1862; SDAG-GFX11-NEXT: v_cmp_le_f16_e64 s2, 0x5640, s2 1863; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1864; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 1865; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 1866; SDAG-GFX11-NEXT: s_endpgm 1867; 1868; SDAG-GFX10-LABEL: v_fcmp_f16_oge: 1869; SDAG-GFX10: ; %bb.0: 1870; SDAG-GFX10-NEXT: s_clause 0x1 1871; SDAG-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 1872; SDAG-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1873; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 1874; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) 1875; SDAG-GFX10-NEXT: v_cmp_le_f16_e64 s2, 0x5640, s2 1876; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 1877; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] 1878; SDAG-GFX10-NEXT: s_endpgm 1879; 1880; GISEL-GFX11-LABEL: v_fcmp_f16_oge: 1881; GISEL-GFX11: ; %bb.0: 1882; GISEL-GFX11-NEXT: s_clause 0x1 1883; GISEL-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 1884; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 1885; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 1886; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) 1887; GISEL-GFX11-NEXT: v_cmp_le_f16_e64 s2, 0x5640, s2 1888; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1889; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 1890; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 1891; GISEL-GFX11-NEXT: s_endpgm 1892; 1893; GISEL-GFX10-LABEL: v_fcmp_f16_oge: 1894; GISEL-GFX10: ; %bb.0: 1895; GISEL-GFX10-NEXT: s_clause 0x1 1896; GISEL-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 1897; GISEL-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1898; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 1899; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 1900; GISEL-GFX10-NEXT: v_cmp_le_f16_e64 s2, 0x5640, s2 1901; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 1902; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] 1903; GISEL-GFX10-NEXT: s_endpgm 1904 %result = call i32 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 3) 1905 store i32 %result, ptr addrspace(1) %out 1906 ret void 1907} 1908 1909 1910define amdgpu_kernel void @v_fcmp_f16_olt(ptr addrspace(1) %out, half %src) { 1911; SDAG-GFX11-LABEL: v_fcmp_f16_olt: 1912; SDAG-GFX11: ; %bb.0: 1913; SDAG-GFX11-NEXT: s_clause 0x1 1914; SDAG-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 1915; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 1916; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) 1917; SDAG-GFX11-NEXT: v_cmp_gt_f16_e64 s2, 0x5640, s2 1918; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1919; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 1920; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 1921; SDAG-GFX11-NEXT: s_endpgm 1922; 1923; SDAG-GFX10-LABEL: v_fcmp_f16_olt: 1924; SDAG-GFX10: ; %bb.0: 1925; SDAG-GFX10-NEXT: s_clause 0x1 1926; SDAG-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 1927; SDAG-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1928; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 1929; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) 1930; SDAG-GFX10-NEXT: v_cmp_gt_f16_e64 s2, 0x5640, s2 1931; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 1932; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] 1933; SDAG-GFX10-NEXT: s_endpgm 1934; 1935; GISEL-GFX11-LABEL: v_fcmp_f16_olt: 1936; GISEL-GFX11: ; %bb.0: 1937; GISEL-GFX11-NEXT: s_clause 0x1 1938; GISEL-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 1939; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 1940; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 1941; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) 1942; GISEL-GFX11-NEXT: v_cmp_gt_f16_e64 s2, 0x5640, s2 1943; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1944; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 1945; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 1946; GISEL-GFX11-NEXT: s_endpgm 1947; 1948; GISEL-GFX10-LABEL: v_fcmp_f16_olt: 1949; GISEL-GFX10: ; %bb.0: 1950; GISEL-GFX10-NEXT: s_clause 0x1 1951; GISEL-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 1952; GISEL-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1953; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 1954; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 1955; GISEL-GFX10-NEXT: v_cmp_gt_f16_e64 s2, 0x5640, s2 1956; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 1957; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] 1958; GISEL-GFX10-NEXT: s_endpgm 1959 %result = call i32 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 4) 1960 store i32 %result, ptr addrspace(1) %out 1961 ret void 1962} 1963 1964 1965define amdgpu_kernel void @v_fcmp_f16_ole(ptr addrspace(1) %out, half %src) { 1966; SDAG-GFX11-LABEL: v_fcmp_f16_ole: 1967; SDAG-GFX11: ; %bb.0: 1968; SDAG-GFX11-NEXT: s_clause 0x1 1969; SDAG-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 1970; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 1971; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) 1972; SDAG-GFX11-NEXT: v_cmp_ge_f16_e64 s2, 0x5640, s2 1973; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1974; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 1975; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 1976; SDAG-GFX11-NEXT: s_endpgm 1977; 1978; SDAG-GFX10-LABEL: v_fcmp_f16_ole: 1979; SDAG-GFX10: ; %bb.0: 1980; SDAG-GFX10-NEXT: s_clause 0x1 1981; SDAG-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 1982; SDAG-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 1983; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 1984; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) 1985; SDAG-GFX10-NEXT: v_cmp_ge_f16_e64 s2, 0x5640, s2 1986; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 1987; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] 1988; SDAG-GFX10-NEXT: s_endpgm 1989; 1990; GISEL-GFX11-LABEL: v_fcmp_f16_ole: 1991; GISEL-GFX11: ; %bb.0: 1992; GISEL-GFX11-NEXT: s_clause 0x1 1993; GISEL-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 1994; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 1995; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 1996; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) 1997; GISEL-GFX11-NEXT: v_cmp_ge_f16_e64 s2, 0x5640, s2 1998; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1999; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 2000; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 2001; GISEL-GFX11-NEXT: s_endpgm 2002; 2003; GISEL-GFX10-LABEL: v_fcmp_f16_ole: 2004; GISEL-GFX10: ; %bb.0: 2005; GISEL-GFX10-NEXT: s_clause 0x1 2006; GISEL-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 2007; GISEL-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2008; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 2009; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 2010; GISEL-GFX10-NEXT: v_cmp_ge_f16_e64 s2, 0x5640, s2 2011; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 2012; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] 2013; GISEL-GFX10-NEXT: s_endpgm 2014 %result = call i32 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 5) 2015 store i32 %result, ptr addrspace(1) %out 2016 ret void 2017} 2018 2019 2020define amdgpu_kernel void @v_fcmp_f16_ueq(ptr addrspace(1) %out, half %src) { 2021; SDAG-GFX11-LABEL: v_fcmp_f16_ueq: 2022; SDAG-GFX11: ; %bb.0: 2023; SDAG-GFX11-NEXT: s_clause 0x1 2024; SDAG-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 2025; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2026; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) 2027; SDAG-GFX11-NEXT: v_cmp_nlg_f16_e64 s2, 0x5640, s2 2028; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2029; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 2030; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 2031; SDAG-GFX11-NEXT: s_endpgm 2032; 2033; SDAG-GFX10-LABEL: v_fcmp_f16_ueq: 2034; SDAG-GFX10: ; %bb.0: 2035; SDAG-GFX10-NEXT: s_clause 0x1 2036; SDAG-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 2037; SDAG-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2038; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 2039; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) 2040; SDAG-GFX10-NEXT: v_cmp_nlg_f16_e64 s2, 0x5640, s2 2041; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 2042; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] 2043; SDAG-GFX10-NEXT: s_endpgm 2044; 2045; GISEL-GFX11-LABEL: v_fcmp_f16_ueq: 2046; GISEL-GFX11: ; %bb.0: 2047; GISEL-GFX11-NEXT: s_clause 0x1 2048; GISEL-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 2049; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2050; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 2051; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) 2052; GISEL-GFX11-NEXT: v_cmp_nlg_f16_e64 s2, 0x5640, s2 2053; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2054; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 2055; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 2056; GISEL-GFX11-NEXT: s_endpgm 2057; 2058; GISEL-GFX10-LABEL: v_fcmp_f16_ueq: 2059; GISEL-GFX10: ; %bb.0: 2060; GISEL-GFX10-NEXT: s_clause 0x1 2061; GISEL-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 2062; GISEL-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2063; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 2064; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 2065; GISEL-GFX10-NEXT: v_cmp_nlg_f16_e64 s2, 0x5640, s2 2066; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 2067; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] 2068; GISEL-GFX10-NEXT: s_endpgm 2069 %result = call i32 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 9) 2070 store i32 %result, ptr addrspace(1) %out 2071 ret void 2072} 2073 2074 2075define amdgpu_kernel void @v_fcmp_f16_une(ptr addrspace(1) %out, half %src) { 2076; SDAG-GFX11-LABEL: v_fcmp_f16_une: 2077; SDAG-GFX11: ; %bb.0: 2078; SDAG-GFX11-NEXT: s_clause 0x1 2079; SDAG-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 2080; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2081; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) 2082; SDAG-GFX11-NEXT: v_cmp_neq_f16_e64 s2, 0x5640, s2 2083; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2084; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 2085; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 2086; SDAG-GFX11-NEXT: s_endpgm 2087; 2088; SDAG-GFX10-LABEL: v_fcmp_f16_une: 2089; SDAG-GFX10: ; %bb.0: 2090; SDAG-GFX10-NEXT: s_clause 0x1 2091; SDAG-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 2092; SDAG-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2093; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 2094; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) 2095; SDAG-GFX10-NEXT: v_cmp_neq_f16_e64 s2, 0x5640, s2 2096; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 2097; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] 2098; SDAG-GFX10-NEXT: s_endpgm 2099; 2100; GISEL-GFX11-LABEL: v_fcmp_f16_une: 2101; GISEL-GFX11: ; %bb.0: 2102; GISEL-GFX11-NEXT: s_clause 0x1 2103; GISEL-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 2104; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2105; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 2106; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) 2107; GISEL-GFX11-NEXT: v_cmp_neq_f16_e64 s2, 0x5640, s2 2108; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2109; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 2110; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 2111; GISEL-GFX11-NEXT: s_endpgm 2112; 2113; GISEL-GFX10-LABEL: v_fcmp_f16_une: 2114; GISEL-GFX10: ; %bb.0: 2115; GISEL-GFX10-NEXT: s_clause 0x1 2116; GISEL-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 2117; GISEL-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2118; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 2119; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 2120; GISEL-GFX10-NEXT: v_cmp_neq_f16_e64 s2, 0x5640, s2 2121; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 2122; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] 2123; GISEL-GFX10-NEXT: s_endpgm 2124 %result = call i32 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 14) 2125 store i32 %result, ptr addrspace(1) %out 2126 ret void 2127} 2128 2129 2130define amdgpu_kernel void @v_fcmp_f16_ugt(ptr addrspace(1) %out, half %src) { 2131; SDAG-GFX11-LABEL: v_fcmp_f16_ugt: 2132; SDAG-GFX11: ; %bb.0: 2133; SDAG-GFX11-NEXT: s_clause 0x1 2134; SDAG-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 2135; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2136; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) 2137; SDAG-GFX11-NEXT: v_cmp_nge_f16_e64 s2, 0x5640, s2 2138; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2139; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 2140; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 2141; SDAG-GFX11-NEXT: s_endpgm 2142; 2143; SDAG-GFX10-LABEL: v_fcmp_f16_ugt: 2144; SDAG-GFX10: ; %bb.0: 2145; SDAG-GFX10-NEXT: s_clause 0x1 2146; SDAG-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 2147; SDAG-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2148; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 2149; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) 2150; SDAG-GFX10-NEXT: v_cmp_nge_f16_e64 s2, 0x5640, s2 2151; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 2152; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] 2153; SDAG-GFX10-NEXT: s_endpgm 2154; 2155; GISEL-GFX11-LABEL: v_fcmp_f16_ugt: 2156; GISEL-GFX11: ; %bb.0: 2157; GISEL-GFX11-NEXT: s_clause 0x1 2158; GISEL-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 2159; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2160; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 2161; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) 2162; GISEL-GFX11-NEXT: v_cmp_nge_f16_e64 s2, 0x5640, s2 2163; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2164; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 2165; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 2166; GISEL-GFX11-NEXT: s_endpgm 2167; 2168; GISEL-GFX10-LABEL: v_fcmp_f16_ugt: 2169; GISEL-GFX10: ; %bb.0: 2170; GISEL-GFX10-NEXT: s_clause 0x1 2171; GISEL-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 2172; GISEL-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2173; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 2174; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 2175; GISEL-GFX10-NEXT: v_cmp_nge_f16_e64 s2, 0x5640, s2 2176; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 2177; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] 2178; GISEL-GFX10-NEXT: s_endpgm 2179 %result = call i32 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 10) 2180 store i32 %result, ptr addrspace(1) %out 2181 ret void 2182} 2183 2184 2185define amdgpu_kernel void @v_fcmp_f16_uge(ptr addrspace(1) %out, half %src) { 2186; SDAG-GFX11-LABEL: v_fcmp_f16_uge: 2187; SDAG-GFX11: ; %bb.0: 2188; SDAG-GFX11-NEXT: s_clause 0x1 2189; SDAG-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 2190; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2191; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) 2192; SDAG-GFX11-NEXT: v_cmp_ngt_f16_e64 s2, 0x5640, s2 2193; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2194; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 2195; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 2196; SDAG-GFX11-NEXT: s_endpgm 2197; 2198; SDAG-GFX10-LABEL: v_fcmp_f16_uge: 2199; SDAG-GFX10: ; %bb.0: 2200; SDAG-GFX10-NEXT: s_clause 0x1 2201; SDAG-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 2202; SDAG-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2203; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 2204; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) 2205; SDAG-GFX10-NEXT: v_cmp_ngt_f16_e64 s2, 0x5640, s2 2206; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 2207; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] 2208; SDAG-GFX10-NEXT: s_endpgm 2209; 2210; GISEL-GFX11-LABEL: v_fcmp_f16_uge: 2211; GISEL-GFX11: ; %bb.0: 2212; GISEL-GFX11-NEXT: s_clause 0x1 2213; GISEL-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 2214; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2215; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 2216; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) 2217; GISEL-GFX11-NEXT: v_cmp_ngt_f16_e64 s2, 0x5640, s2 2218; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2219; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 2220; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 2221; GISEL-GFX11-NEXT: s_endpgm 2222; 2223; GISEL-GFX10-LABEL: v_fcmp_f16_uge: 2224; GISEL-GFX10: ; %bb.0: 2225; GISEL-GFX10-NEXT: s_clause 0x1 2226; GISEL-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 2227; GISEL-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2228; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 2229; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 2230; GISEL-GFX10-NEXT: v_cmp_ngt_f16_e64 s2, 0x5640, s2 2231; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 2232; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] 2233; GISEL-GFX10-NEXT: s_endpgm 2234 %result = call i32 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 11) 2235 store i32 %result, ptr addrspace(1) %out 2236 ret void 2237} 2238 2239 2240define amdgpu_kernel void @v_fcmp_f16_ult(ptr addrspace(1) %out, half %src) { 2241; SDAG-GFX11-LABEL: v_fcmp_f16_ult: 2242; SDAG-GFX11: ; %bb.0: 2243; SDAG-GFX11-NEXT: s_clause 0x1 2244; SDAG-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 2245; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2246; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) 2247; SDAG-GFX11-NEXT: v_cmp_nle_f16_e64 s2, 0x5640, s2 2248; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2249; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 2250; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 2251; SDAG-GFX11-NEXT: s_endpgm 2252; 2253; SDAG-GFX10-LABEL: v_fcmp_f16_ult: 2254; SDAG-GFX10: ; %bb.0: 2255; SDAG-GFX10-NEXT: s_clause 0x1 2256; SDAG-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 2257; SDAG-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2258; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 2259; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) 2260; SDAG-GFX10-NEXT: v_cmp_nle_f16_e64 s2, 0x5640, s2 2261; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 2262; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] 2263; SDAG-GFX10-NEXT: s_endpgm 2264; 2265; GISEL-GFX11-LABEL: v_fcmp_f16_ult: 2266; GISEL-GFX11: ; %bb.0: 2267; GISEL-GFX11-NEXT: s_clause 0x1 2268; GISEL-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 2269; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2270; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 2271; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) 2272; GISEL-GFX11-NEXT: v_cmp_nle_f16_e64 s2, 0x5640, s2 2273; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2274; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 2275; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 2276; GISEL-GFX11-NEXT: s_endpgm 2277; 2278; GISEL-GFX10-LABEL: v_fcmp_f16_ult: 2279; GISEL-GFX10: ; %bb.0: 2280; GISEL-GFX10-NEXT: s_clause 0x1 2281; GISEL-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 2282; GISEL-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2283; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 2284; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 2285; GISEL-GFX10-NEXT: v_cmp_nle_f16_e64 s2, 0x5640, s2 2286; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 2287; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] 2288; GISEL-GFX10-NEXT: s_endpgm 2289 %result = call i32 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 12) 2290 store i32 %result, ptr addrspace(1) %out 2291 ret void 2292} 2293 2294define amdgpu_kernel void @v_fcmp_f16_o(ptr addrspace(1) %out, half %src) { 2295; SDAG-GFX11-LABEL: v_fcmp_f16_o: 2296; SDAG-GFX11: ; %bb.0: 2297; SDAG-GFX11-NEXT: s_clause 0x1 2298; SDAG-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 2299; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2300; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) 2301; SDAG-GFX11-NEXT: v_cmp_o_f16_e64 s2, 0x5640, s2 2302; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2303; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 2304; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 2305; SDAG-GFX11-NEXT: s_endpgm 2306; 2307; SDAG-GFX10-LABEL: v_fcmp_f16_o: 2308; SDAG-GFX10: ; %bb.0: 2309; SDAG-GFX10-NEXT: s_clause 0x1 2310; SDAG-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 2311; SDAG-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2312; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 2313; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) 2314; SDAG-GFX10-NEXT: v_cmp_o_f16_e64 s2, 0x5640, s2 2315; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 2316; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] 2317; SDAG-GFX10-NEXT: s_endpgm 2318; 2319; GISEL-GFX11-LABEL: v_fcmp_f16_o: 2320; GISEL-GFX11: ; %bb.0: 2321; GISEL-GFX11-NEXT: s_clause 0x1 2322; GISEL-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 2323; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2324; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 2325; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) 2326; GISEL-GFX11-NEXT: v_cmp_o_f16_e64 s2, 0x5640, s2 2327; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2328; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 2329; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 2330; GISEL-GFX11-NEXT: s_endpgm 2331; 2332; GISEL-GFX10-LABEL: v_fcmp_f16_o: 2333; GISEL-GFX10: ; %bb.0: 2334; GISEL-GFX10-NEXT: s_clause 0x1 2335; GISEL-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 2336; GISEL-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2337; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 2338; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 2339; GISEL-GFX10-NEXT: v_cmp_o_f16_e64 s2, 0x5640, s2 2340; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 2341; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] 2342; GISEL-GFX10-NEXT: s_endpgm 2343 %result = call i32 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 7) 2344 store i32 %result, ptr addrspace(1) %out 2345 ret void 2346} 2347 2348define amdgpu_kernel void @v_fcmp_f16_uo(ptr addrspace(1) %out, half %src) { 2349; SDAG-GFX11-LABEL: v_fcmp_f16_uo: 2350; SDAG-GFX11: ; %bb.0: 2351; SDAG-GFX11-NEXT: s_clause 0x1 2352; SDAG-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 2353; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2354; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) 2355; SDAG-GFX11-NEXT: v_cmp_u_f16_e64 s2, 0x5640, s2 2356; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2357; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 2358; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 2359; SDAG-GFX11-NEXT: s_endpgm 2360; 2361; SDAG-GFX10-LABEL: v_fcmp_f16_uo: 2362; SDAG-GFX10: ; %bb.0: 2363; SDAG-GFX10-NEXT: s_clause 0x1 2364; SDAG-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 2365; SDAG-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2366; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 2367; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) 2368; SDAG-GFX10-NEXT: v_cmp_u_f16_e64 s2, 0x5640, s2 2369; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 2370; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] 2371; SDAG-GFX10-NEXT: s_endpgm 2372; 2373; GISEL-GFX11-LABEL: v_fcmp_f16_uo: 2374; GISEL-GFX11: ; %bb.0: 2375; GISEL-GFX11-NEXT: s_clause 0x1 2376; GISEL-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 2377; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2378; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 2379; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) 2380; GISEL-GFX11-NEXT: v_cmp_u_f16_e64 s2, 0x5640, s2 2381; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2382; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 2383; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 2384; GISEL-GFX11-NEXT: s_endpgm 2385; 2386; GISEL-GFX10-LABEL: v_fcmp_f16_uo: 2387; GISEL-GFX10: ; %bb.0: 2388; GISEL-GFX10-NEXT: s_clause 0x1 2389; GISEL-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 2390; GISEL-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2391; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 2392; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 2393; GISEL-GFX10-NEXT: v_cmp_u_f16_e64 s2, 0x5640, s2 2394; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 2395; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] 2396; GISEL-GFX10-NEXT: s_endpgm 2397 %result = call i32 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 8) 2398 store i32 %result, ptr addrspace(1) %out 2399 ret void 2400} 2401 2402define amdgpu_kernel void @v_fcmp_f16_ule(ptr addrspace(1) %out, half %src) { 2403; SDAG-GFX11-LABEL: v_fcmp_f16_ule: 2404; SDAG-GFX11: ; %bb.0: 2405; SDAG-GFX11-NEXT: s_clause 0x1 2406; SDAG-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 2407; SDAG-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2408; SDAG-GFX11-NEXT: s_waitcnt lgkmcnt(0) 2409; SDAG-GFX11-NEXT: v_cmp_nlt_f16_e64 s2, 0x5640, s2 2410; SDAG-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2411; SDAG-GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, s2 2412; SDAG-GFX11-NEXT: global_store_b32 v0, v1, s[0:1] 2413; SDAG-GFX11-NEXT: s_endpgm 2414; 2415; SDAG-GFX10-LABEL: v_fcmp_f16_ule: 2416; SDAG-GFX10: ; %bb.0: 2417; SDAG-GFX10-NEXT: s_clause 0x1 2418; SDAG-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 2419; SDAG-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2420; SDAG-GFX10-NEXT: v_mov_b32_e32 v0, 0 2421; SDAG-GFX10-NEXT: s_waitcnt lgkmcnt(0) 2422; SDAG-GFX10-NEXT: v_cmp_nlt_f16_e64 s2, 0x5640, s2 2423; SDAG-GFX10-NEXT: v_mov_b32_e32 v1, s2 2424; SDAG-GFX10-NEXT: global_store_dword v0, v1, s[0:1] 2425; SDAG-GFX10-NEXT: s_endpgm 2426; 2427; GISEL-GFX11-LABEL: v_fcmp_f16_ule: 2428; GISEL-GFX11: ; %bb.0: 2429; GISEL-GFX11-NEXT: s_clause 0x1 2430; GISEL-GFX11-NEXT: s_load_b32 s2, s[4:5], 0x2c 2431; GISEL-GFX11-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 2432; GISEL-GFX11-NEXT: v_mov_b32_e32 v1, 0 2433; GISEL-GFX11-NEXT: s_waitcnt lgkmcnt(0) 2434; GISEL-GFX11-NEXT: v_cmp_nlt_f16_e64 s2, 0x5640, s2 2435; GISEL-GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2436; GISEL-GFX11-NEXT: v_mov_b32_e32 v0, s2 2437; GISEL-GFX11-NEXT: global_store_b32 v1, v0, s[0:1] 2438; GISEL-GFX11-NEXT: s_endpgm 2439; 2440; GISEL-GFX10-LABEL: v_fcmp_f16_ule: 2441; GISEL-GFX10: ; %bb.0: 2442; GISEL-GFX10-NEXT: s_clause 0x1 2443; GISEL-GFX10-NEXT: s_load_dword s2, s[4:5], 0x2c 2444; GISEL-GFX10-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 2445; GISEL-GFX10-NEXT: v_mov_b32_e32 v1, 0 2446; GISEL-GFX10-NEXT: s_waitcnt lgkmcnt(0) 2447; GISEL-GFX10-NEXT: v_cmp_nlt_f16_e64 s2, 0x5640, s2 2448; GISEL-GFX10-NEXT: v_mov_b32_e32 v0, s2 2449; GISEL-GFX10-NEXT: global_store_dword v1, v0, s[0:1] 2450; GISEL-GFX10-NEXT: s_endpgm 2451 %result = call i32 @llvm.amdgcn.fcmp.f16(half %src, half 100.00, i32 13) 2452 store i32 %result, ptr addrspace(1) %out 2453 ret void 2454} 2455 2456attributes #0 = { nounwind readnone convergent } 2457