1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn -mcpu=gfx1150 -amdgpu-sdwa-peephole=0 -verify-machineinstrs < %s | FileCheck -check-prefix=SDAG %s 3; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1150 -amdgpu-sdwa-peephole=0 -verify-machineinstrs < %s | FileCheck -check-prefix=GISEL %s 4 5define amdgpu_vs void @fcmp_f32_olt_to_ogt(ptr addrspace(1) inreg %out, float inreg %a) { 6; SDAG-LABEL: fcmp_f32_olt_to_ogt: 7; SDAG: ; %bb.0: ; %entry 8; SDAG-NEXT: s_cmp_gt_f32 s2, 2.0 9; SDAG-NEXT: v_mov_b32_e32 v0, 0 10; SDAG-NEXT: s_cselect_b32 s2, -1, 0 11; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 12; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 13; SDAG-NEXT: global_store_b32 v0, v1, s[0:1] 14; SDAG-NEXT: s_endpgm 15; 16; GISEL-LABEL: fcmp_f32_olt_to_ogt: 17; GISEL: ; %bb.0: ; %entry 18; GISEL-NEXT: s_cmp_gt_f32 s2, 2.0 19; GISEL-NEXT: v_mov_b32_e32 v1, 0 20; GISEL-NEXT: s_cselect_b32 s2, 1, 0 21; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 22; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 23; GISEL-NEXT: v_mov_b32_e32 v0, s2 24; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 25; GISEL-NEXT: s_endpgm 26entry: 27 %0 = fcmp olt float 2.0, %a 28 %1 = sext i1 %0 to i32 29 store i32 %1, ptr addrspace(1) %out 30 ret void 31} 32 33define amdgpu_vs void @fcmp_f32_ogt_to_olt(ptr addrspace(1) inreg %out, float inreg %a) { 34; SDAG-LABEL: fcmp_f32_ogt_to_olt: 35; SDAG: ; %bb.0: ; %entry 36; SDAG-NEXT: s_cmp_lt_f32 s2, 2.0 37; SDAG-NEXT: v_mov_b32_e32 v0, 0 38; SDAG-NEXT: s_cselect_b32 s2, -1, 0 39; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 40; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 41; SDAG-NEXT: global_store_b32 v0, v1, s[0:1] 42; SDAG-NEXT: s_endpgm 43; 44; GISEL-LABEL: fcmp_f32_ogt_to_olt: 45; GISEL: ; %bb.0: ; %entry 46; GISEL-NEXT: s_cmp_lt_f32 s2, 2.0 47; GISEL-NEXT: v_mov_b32_e32 v1, 0 48; GISEL-NEXT: s_cselect_b32 s2, 1, 0 49; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 50; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 51; GISEL-NEXT: v_mov_b32_e32 v0, s2 52; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 53; GISEL-NEXT: s_endpgm 54entry: 55 %0 = fcmp ogt float 2.0, %a 56 %1 = sext i1 %0 to i32 57 store i32 %1, ptr addrspace(1) %out 58 ret void 59} 60 61define amdgpu_vs void @fcmp_f32_ole_to_oge(ptr addrspace(1) inreg %out, float inreg %a) { 62; SDAG-LABEL: fcmp_f32_ole_to_oge: 63; SDAG: ; %bb.0: ; %entry 64; SDAG-NEXT: s_cmp_ge_f32 s2, 2.0 65; SDAG-NEXT: v_mov_b32_e32 v0, 0 66; SDAG-NEXT: s_cselect_b32 s2, -1, 0 67; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 68; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 69; SDAG-NEXT: global_store_b32 v0, v1, s[0:1] 70; SDAG-NEXT: s_endpgm 71; 72; GISEL-LABEL: fcmp_f32_ole_to_oge: 73; GISEL: ; %bb.0: ; %entry 74; GISEL-NEXT: s_cmp_ge_f32 s2, 2.0 75; GISEL-NEXT: v_mov_b32_e32 v1, 0 76; GISEL-NEXT: s_cselect_b32 s2, 1, 0 77; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 78; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 79; GISEL-NEXT: v_mov_b32_e32 v0, s2 80; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 81; GISEL-NEXT: s_endpgm 82entry: 83 %0 = fcmp ole float 2.0, %a 84 %1 = sext i1 %0 to i32 85 store i32 %1, ptr addrspace(1) %out 86 ret void 87} 88 89define amdgpu_vs void @fcmp_f32_oge_to_ole(ptr addrspace(1) inreg %out, float inreg %a) { 90; SDAG-LABEL: fcmp_f32_oge_to_ole: 91; SDAG: ; %bb.0: ; %entry 92; SDAG-NEXT: s_cmp_le_f32 s2, 2.0 93; SDAG-NEXT: v_mov_b32_e32 v0, 0 94; SDAG-NEXT: s_cselect_b32 s2, -1, 0 95; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 96; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 97; SDAG-NEXT: global_store_b32 v0, v1, s[0:1] 98; SDAG-NEXT: s_endpgm 99; 100; GISEL-LABEL: fcmp_f32_oge_to_ole: 101; GISEL: ; %bb.0: ; %entry 102; GISEL-NEXT: s_cmp_le_f32 s2, 2.0 103; GISEL-NEXT: v_mov_b32_e32 v1, 0 104; GISEL-NEXT: s_cselect_b32 s2, 1, 0 105; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 106; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 107; GISEL-NEXT: v_mov_b32_e32 v0, s2 108; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 109; GISEL-NEXT: s_endpgm 110entry: 111 %0 = fcmp oge float 2.0, %a 112 %1 = sext i1 %0 to i32 113 store i32 %1, ptr addrspace(1) %out 114 ret void 115} 116 117define amdgpu_vs void @fcmp_f32_ult_to_ugt(ptr addrspace(1) inreg %out, float inreg %a) { 118; SDAG-LABEL: fcmp_f32_ult_to_ugt: 119; SDAG: ; %bb.0: ; %entry 120; SDAG-NEXT: s_cmp_nle_f32 s2, 2.0 121; SDAG-NEXT: v_mov_b32_e32 v0, 0 122; SDAG-NEXT: s_cselect_b32 s2, -1, 0 123; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 124; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 125; SDAG-NEXT: global_store_b32 v0, v1, s[0:1] 126; SDAG-NEXT: s_endpgm 127; 128; GISEL-LABEL: fcmp_f32_ult_to_ugt: 129; GISEL: ; %bb.0: ; %entry 130; GISEL-NEXT: s_cmp_nle_f32 s2, 2.0 131; GISEL-NEXT: v_mov_b32_e32 v1, 0 132; GISEL-NEXT: s_cselect_b32 s2, 1, 0 133; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 134; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 135; GISEL-NEXT: v_mov_b32_e32 v0, s2 136; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 137; GISEL-NEXT: s_endpgm 138entry: 139 %0 = fcmp ult float 2.0, %a 140 %1 = sext i1 %0 to i32 141 store i32 %1, ptr addrspace(1) %out 142 ret void 143} 144 145define amdgpu_vs void @fcmp_f32_ugt_to_ult(ptr addrspace(1) inreg %out, float inreg %a) { 146; SDAG-LABEL: fcmp_f32_ugt_to_ult: 147; SDAG: ; %bb.0: ; %entry 148; SDAG-NEXT: s_cmp_nge_f32 s2, 2.0 149; SDAG-NEXT: v_mov_b32_e32 v0, 0 150; SDAG-NEXT: s_cselect_b32 s2, -1, 0 151; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 152; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 153; SDAG-NEXT: global_store_b32 v0, v1, s[0:1] 154; SDAG-NEXT: s_endpgm 155; 156; GISEL-LABEL: fcmp_f32_ugt_to_ult: 157; GISEL: ; %bb.0: ; %entry 158; GISEL-NEXT: s_cmp_nge_f32 s2, 2.0 159; GISEL-NEXT: v_mov_b32_e32 v1, 0 160; GISEL-NEXT: s_cselect_b32 s2, 1, 0 161; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 162; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 163; GISEL-NEXT: v_mov_b32_e32 v0, s2 164; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 165; GISEL-NEXT: s_endpgm 166entry: 167 %0 = fcmp ugt float 2.0, %a 168 %1 = sext i1 %0 to i32 169 store i32 %1, ptr addrspace(1) %out 170 ret void 171} 172 173define amdgpu_vs void @fcmp_f32_ule_to_uge(ptr addrspace(1) inreg %out, float inreg %a) { 174; SDAG-LABEL: fcmp_f32_ule_to_uge: 175; SDAG: ; %bb.0: ; %entry 176; SDAG-NEXT: s_cmp_nlt_f32 s2, 2.0 177; SDAG-NEXT: v_mov_b32_e32 v0, 0 178; SDAG-NEXT: s_cselect_b32 s2, -1, 0 179; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 180; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 181; SDAG-NEXT: global_store_b32 v0, v1, s[0:1] 182; SDAG-NEXT: s_endpgm 183; 184; GISEL-LABEL: fcmp_f32_ule_to_uge: 185; GISEL: ; %bb.0: ; %entry 186; GISEL-NEXT: s_cmp_nlt_f32 s2, 2.0 187; GISEL-NEXT: v_mov_b32_e32 v1, 0 188; GISEL-NEXT: s_cselect_b32 s2, 1, 0 189; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 190; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 191; GISEL-NEXT: v_mov_b32_e32 v0, s2 192; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 193; GISEL-NEXT: s_endpgm 194entry: 195 %0 = fcmp ule float 2.0, %a 196 %1 = sext i1 %0 to i32 197 store i32 %1, ptr addrspace(1) %out 198 ret void 199} 200 201define amdgpu_vs void @fcmp_f32_uge_to_ule(ptr addrspace(1) inreg %out, float inreg %a) { 202; SDAG-LABEL: fcmp_f32_uge_to_ule: 203; SDAG: ; %bb.0: ; %entry 204; SDAG-NEXT: s_cmp_ngt_f32 s2, 2.0 205; SDAG-NEXT: v_mov_b32_e32 v0, 0 206; SDAG-NEXT: s_cselect_b32 s2, -1, 0 207; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 208; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 209; SDAG-NEXT: global_store_b32 v0, v1, s[0:1] 210; SDAG-NEXT: s_endpgm 211; 212; GISEL-LABEL: fcmp_f32_uge_to_ule: 213; GISEL: ; %bb.0: ; %entry 214; GISEL-NEXT: s_cmp_ngt_f32 s2, 2.0 215; GISEL-NEXT: v_mov_b32_e32 v1, 0 216; GISEL-NEXT: s_cselect_b32 s2, 1, 0 217; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 218; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 219; GISEL-NEXT: v_mov_b32_e32 v0, s2 220; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 221; GISEL-NEXT: s_endpgm 222entry: 223 %0 = fcmp uge float 2.0, %a 224 %1 = sext i1 %0 to i32 225 store i32 %1, ptr addrspace(1) %out 226 ret void 227} 228 229define amdgpu_vs void @fcmp_f16_olt_to_ogt(ptr addrspace(1) inreg %out, half inreg %a) { 230; SDAG-LABEL: fcmp_f16_olt_to_ogt: 231; SDAG: ; %bb.0: ; %entry 232; SDAG-NEXT: s_cmp_gt_f16 s2, 0x4000 233; SDAG-NEXT: v_mov_b32_e32 v0, 0 234; SDAG-NEXT: s_cselect_b32 s2, -1, 0 235; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 236; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 237; SDAG-NEXT: global_store_b32 v0, v1, s[0:1] 238; SDAG-NEXT: s_endpgm 239; 240; GISEL-LABEL: fcmp_f16_olt_to_ogt: 241; GISEL: ; %bb.0: ; %entry 242; GISEL-NEXT: s_cmp_gt_f16 s2, 0x4000 243; GISEL-NEXT: v_mov_b32_e32 v1, 0 244; GISEL-NEXT: s_cselect_b32 s2, 1, 0 245; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 246; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 247; GISEL-NEXT: v_mov_b32_e32 v0, s2 248; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 249; GISEL-NEXT: s_endpgm 250entry: 251 %0 = fcmp olt half 2.0, %a 252 %1 = sext i1 %0 to i32 253 store i32 %1, ptr addrspace(1) %out 254 ret void 255} 256 257define amdgpu_vs void @fcmp_f16_ogt_to_olt(ptr addrspace(1) inreg %out, half inreg %a) { 258; SDAG-LABEL: fcmp_f16_ogt_to_olt: 259; SDAG: ; %bb.0: ; %entry 260; SDAG-NEXT: s_cmp_lt_f16 s2, 0x4000 261; SDAG-NEXT: v_mov_b32_e32 v0, 0 262; SDAG-NEXT: s_cselect_b32 s2, -1, 0 263; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 264; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 265; SDAG-NEXT: global_store_b32 v0, v1, s[0:1] 266; SDAG-NEXT: s_endpgm 267; 268; GISEL-LABEL: fcmp_f16_ogt_to_olt: 269; GISEL: ; %bb.0: ; %entry 270; GISEL-NEXT: s_cmp_lt_f16 s2, 0x4000 271; GISEL-NEXT: v_mov_b32_e32 v1, 0 272; GISEL-NEXT: s_cselect_b32 s2, 1, 0 273; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 274; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 275; GISEL-NEXT: v_mov_b32_e32 v0, s2 276; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 277; GISEL-NEXT: s_endpgm 278entry: 279 %0 = fcmp ogt half 2.0, %a 280 %1 = sext i1 %0 to i32 281 store i32 %1, ptr addrspace(1) %out 282 ret void 283} 284 285define amdgpu_vs void @fcmp_f16_ole_to_oge(ptr addrspace(1) inreg %out, half inreg %a) { 286; SDAG-LABEL: fcmp_f16_ole_to_oge: 287; SDAG: ; %bb.0: ; %entry 288; SDAG-NEXT: s_cmp_ge_f16 s2, 0x4000 289; SDAG-NEXT: v_mov_b32_e32 v0, 0 290; SDAG-NEXT: s_cselect_b32 s2, -1, 0 291; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 292; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 293; SDAG-NEXT: global_store_b32 v0, v1, s[0:1] 294; SDAG-NEXT: s_endpgm 295; 296; GISEL-LABEL: fcmp_f16_ole_to_oge: 297; GISEL: ; %bb.0: ; %entry 298; GISEL-NEXT: s_cmp_ge_f16 s2, 0x4000 299; GISEL-NEXT: v_mov_b32_e32 v1, 0 300; GISEL-NEXT: s_cselect_b32 s2, 1, 0 301; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 302; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 303; GISEL-NEXT: v_mov_b32_e32 v0, s2 304; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 305; GISEL-NEXT: s_endpgm 306entry: 307 %0 = fcmp ole half 2.0, %a 308 %1 = sext i1 %0 to i32 309 store i32 %1, ptr addrspace(1) %out 310 ret void 311} 312 313define amdgpu_vs void @fcmp_f16_oge_to_ole(ptr addrspace(1) inreg %out, half inreg %a) { 314; SDAG-LABEL: fcmp_f16_oge_to_ole: 315; SDAG: ; %bb.0: ; %entry 316; SDAG-NEXT: s_cmp_le_f16 s2, 0x4000 317; SDAG-NEXT: v_mov_b32_e32 v0, 0 318; SDAG-NEXT: s_cselect_b32 s2, -1, 0 319; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 320; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 321; SDAG-NEXT: global_store_b32 v0, v1, s[0:1] 322; SDAG-NEXT: s_endpgm 323; 324; GISEL-LABEL: fcmp_f16_oge_to_ole: 325; GISEL: ; %bb.0: ; %entry 326; GISEL-NEXT: s_cmp_le_f16 s2, 0x4000 327; GISEL-NEXT: v_mov_b32_e32 v1, 0 328; GISEL-NEXT: s_cselect_b32 s2, 1, 0 329; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 330; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 331; GISEL-NEXT: v_mov_b32_e32 v0, s2 332; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 333; GISEL-NEXT: s_endpgm 334entry: 335 %0 = fcmp oge half 2.0, %a 336 %1 = sext i1 %0 to i32 337 store i32 %1, ptr addrspace(1) %out 338 ret void 339} 340 341define amdgpu_vs void @fcmp_f16_ult_to_ugt(ptr addrspace(1) inreg %out, half inreg %a) { 342; SDAG-LABEL: fcmp_f16_ult_to_ugt: 343; SDAG: ; %bb.0: ; %entry 344; SDAG-NEXT: s_cmp_nle_f16 s2, 0x4000 345; SDAG-NEXT: v_mov_b32_e32 v0, 0 346; SDAG-NEXT: s_cselect_b32 s2, -1, 0 347; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 348; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 349; SDAG-NEXT: global_store_b32 v0, v1, s[0:1] 350; SDAG-NEXT: s_endpgm 351; 352; GISEL-LABEL: fcmp_f16_ult_to_ugt: 353; GISEL: ; %bb.0: ; %entry 354; GISEL-NEXT: s_cmp_nle_f16 s2, 0x4000 355; GISEL-NEXT: v_mov_b32_e32 v1, 0 356; GISEL-NEXT: s_cselect_b32 s2, 1, 0 357; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 358; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 359; GISEL-NEXT: v_mov_b32_e32 v0, s2 360; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 361; GISEL-NEXT: s_endpgm 362entry: 363 %0 = fcmp ult half 2.0, %a 364 %1 = sext i1 %0 to i32 365 store i32 %1, ptr addrspace(1) %out 366 ret void 367} 368 369define amdgpu_vs void @fcmp_f16_ugt_to_ult(ptr addrspace(1) inreg %out, half inreg %a) { 370; SDAG-LABEL: fcmp_f16_ugt_to_ult: 371; SDAG: ; %bb.0: ; %entry 372; SDAG-NEXT: s_cmp_nge_f16 s2, 0x4000 373; SDAG-NEXT: v_mov_b32_e32 v0, 0 374; SDAG-NEXT: s_cselect_b32 s2, -1, 0 375; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 376; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 377; SDAG-NEXT: global_store_b32 v0, v1, s[0:1] 378; SDAG-NEXT: s_endpgm 379; 380; GISEL-LABEL: fcmp_f16_ugt_to_ult: 381; GISEL: ; %bb.0: ; %entry 382; GISEL-NEXT: s_cmp_nge_f16 s2, 0x4000 383; GISEL-NEXT: v_mov_b32_e32 v1, 0 384; GISEL-NEXT: s_cselect_b32 s2, 1, 0 385; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 386; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 387; GISEL-NEXT: v_mov_b32_e32 v0, s2 388; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 389; GISEL-NEXT: s_endpgm 390entry: 391 %0 = fcmp ugt half 2.0, %a 392 %1 = sext i1 %0 to i32 393 store i32 %1, ptr addrspace(1) %out 394 ret void 395} 396 397define amdgpu_vs void @fcmp_ule_to_uge(ptr addrspace(1) inreg %out, half inreg %a) { 398; SDAG-LABEL: fcmp_ule_to_uge: 399; SDAG: ; %bb.0: ; %entry 400; SDAG-NEXT: s_cmp_nlt_f16 s2, 0x4000 401; SDAG-NEXT: v_mov_b32_e32 v0, 0 402; SDAG-NEXT: s_cselect_b32 s2, -1, 0 403; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 404; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 405; SDAG-NEXT: global_store_b32 v0, v1, s[0:1] 406; SDAG-NEXT: s_endpgm 407; 408; GISEL-LABEL: fcmp_ule_to_uge: 409; GISEL: ; %bb.0: ; %entry 410; GISEL-NEXT: s_cmp_nlt_f16 s2, 0x4000 411; GISEL-NEXT: v_mov_b32_e32 v1, 0 412; GISEL-NEXT: s_cselect_b32 s2, 1, 0 413; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 414; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 415; GISEL-NEXT: v_mov_b32_e32 v0, s2 416; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 417; GISEL-NEXT: s_endpgm 418entry: 419 %0 = fcmp ule half 2.0, %a 420 %1 = sext i1 %0 to i32 421 store i32 %1, ptr addrspace(1) %out 422 ret void 423} 424 425define amdgpu_vs void @fcmp_uge_to_ule(ptr addrspace(1) inreg %out, half inreg %a) { 426; SDAG-LABEL: fcmp_uge_to_ule: 427; SDAG: ; %bb.0: ; %entry 428; SDAG-NEXT: s_cmp_ngt_f16 s2, 0x4000 429; SDAG-NEXT: v_mov_b32_e32 v0, 0 430; SDAG-NEXT: s_cselect_b32 s2, -1, 0 431; SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 432; SDAG-NEXT: v_cndmask_b32_e64 v1, 0, -1, s2 433; SDAG-NEXT: global_store_b32 v0, v1, s[0:1] 434; SDAG-NEXT: s_endpgm 435; 436; GISEL-LABEL: fcmp_uge_to_ule: 437; GISEL: ; %bb.0: ; %entry 438; GISEL-NEXT: s_cmp_ngt_f16 s2, 0x4000 439; GISEL-NEXT: v_mov_b32_e32 v1, 0 440; GISEL-NEXT: s_cselect_b32 s2, 1, 0 441; GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 442; GISEL-NEXT: s_bfe_i32 s2, s2, 0x10000 443; GISEL-NEXT: v_mov_b32_e32 v0, s2 444; GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 445; GISEL-NEXT: s_endpgm 446entry: 447 %0 = fcmp uge half 2.0, %a 448 %1 = sext i1 %0 to i32 449 store i32 %1, ptr addrspace(1) %out 450 ret void 451} 452