1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefixes=CI,CI-SAFE %s 3; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=VI,VI-SAFE %s 4; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11,GFX11-SAFE %s 5 6; RUN: llc -mtriple=amdgcn -mcpu=hawaii -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefixes=CI,CI-NSZ %s 7; RUN: llc -mtriple=amdgcn -mcpu=fiji -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefixes=VI,VI-NSZ %s 8; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefixes=GFX11,GFX11-NSZ %s 9 10define half @add_select_fabs_fabs_f16(i32 %c, half %x, half %y, half %z) { 11; CI-LABEL: add_select_fabs_fabs_f16: 12; CI: ; %bb.0: 13; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 15; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 16; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 17; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 18; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 19; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 20; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 21; CI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 22; CI-NEXT: v_add_f32_e64 v0, |v0|, v3 23; CI-NEXT: s_setpc_b64 s[30:31] 24; 25; VI-LABEL: add_select_fabs_fabs_f16: 26; VI: ; %bb.0: 27; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 28; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 29; VI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 30; VI-NEXT: v_add_f16_e64 v0, |v0|, v3 31; VI-NEXT: s_setpc_b64 s[30:31] 32; 33; GFX11-LABEL: add_select_fabs_fabs_f16: 34; GFX11: ; %bb.0: 35; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 36; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 37; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo 38; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 39; GFX11-NEXT: v_add_f16_e64 v0, |v0|, v3 40; GFX11-NEXT: s_setpc_b64 s[30:31] 41 %cmp = icmp eq i32 %c, 0 42 %fabs.x = call half @llvm.fabs.f16(half %x) 43 %fabs.y = call half @llvm.fabs.f16(half %y) 44 %select = select i1 %cmp, half %fabs.x, half %fabs.y 45 %add = fadd half %select, %z 46 ret half %add 47} 48 49define { half, half } @add_select_multi_use_lhs_fabs_fabs_f16(i32 %c, half %x, half %y, half %w, half %z) { 50; CI-LABEL: add_select_multi_use_lhs_fabs_fabs_f16: 51; CI: ; %bb.0: 52; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 53; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 54; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 55; CI-NEXT: v_cvt_f16_f32_e32 v4, v4 56; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 57; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 58; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 59; CI-NEXT: v_cvt_f32_f16_e32 v4, v4 60; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 61; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 62; CI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 63; CI-NEXT: v_add_f32_e64 v0, |v0|, v4 64; CI-NEXT: v_add_f32_e64 v1, |v1|, v3 65; CI-NEXT: s_setpc_b64 s[30:31] 66; 67; VI-LABEL: add_select_multi_use_lhs_fabs_fabs_f16: 68; VI: ; %bb.0: 69; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 70; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 71; VI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 72; VI-NEXT: v_add_f16_e64 v0, |v0|, v4 73; VI-NEXT: v_add_f16_e64 v1, |v1|, v3 74; VI-NEXT: s_setpc_b64 s[30:31] 75; 76; GFX11-LABEL: add_select_multi_use_lhs_fabs_fabs_f16: 77; GFX11: ; %bb.0: 78; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 79; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 80; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo 81; GFX11-NEXT: v_add_f16_e64 v1, |v1|, v3 82; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 83; GFX11-NEXT: v_add_f16_e64 v0, |v0|, v4 84; GFX11-NEXT: s_setpc_b64 s[30:31] 85 %cmp = icmp eq i32 %c, 0 86 %fabs.x = call half @llvm.fabs.f16(half %x) 87 %fabs.y = call half @llvm.fabs.f16(half %y) 88 %select = select i1 %cmp, half %fabs.x, half %fabs.y 89 %add0 = fadd half %select, %z 90 %add1 = fadd half %fabs.x, %w 91 %insert.0 = insertvalue { half, half } poison, half %add0, 0 92 %insert.1 = insertvalue { half, half } %insert.0, half %add1, 1 93 ret { half, half } %insert.1 94} 95 96define { half, half } @add_select_multi_store_use_lhs_fabs_fabs_f16(i32 %c, half %x, half %y, half %z) { 97; CI-LABEL: add_select_multi_store_use_lhs_fabs_fabs_f16: 98; CI: ; %bb.0: 99; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 100; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 101; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 102; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 103; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 104; CI-NEXT: v_cvt_f32_f16_e32 v4, v1 105; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 106; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 107; CI-NEXT: v_and_b32_e32 v1, 0x7fffffff, v4 108; CI-NEXT: v_cndmask_b32_e32 v0, v2, v4, vcc 109; CI-NEXT: v_add_f32_e64 v0, |v0|, v3 110; CI-NEXT: s_setpc_b64 s[30:31] 111; 112; VI-LABEL: add_select_multi_store_use_lhs_fabs_fabs_f16: 113; VI: ; %bb.0: 114; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 115; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 116; VI-NEXT: v_and_b32_e32 v4, 0x7fff, v1 117; VI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 118; VI-NEXT: v_add_f16_e64 v0, |v0|, v3 119; VI-NEXT: v_mov_b32_e32 v1, v4 120; VI-NEXT: s_setpc_b64 s[30:31] 121; 122; GFX11-LABEL: add_select_multi_store_use_lhs_fabs_fabs_f16: 123; GFX11: ; %bb.0: 124; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 125; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 126; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo 127; GFX11-NEXT: v_and_b32_e32 v1, 0x7fff, v1 128; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 129; GFX11-NEXT: v_add_f16_e64 v0, |v0|, v3 130; GFX11-NEXT: s_setpc_b64 s[30:31] 131 %cmp = icmp eq i32 %c, 0 132 %fabs.x = call half @llvm.fabs.f16(half %x) 133 %fabs.y = call half @llvm.fabs.f16(half %y) 134 %select = select i1 %cmp, half %fabs.x, half %fabs.y 135 %add0 = fadd half %select, %z 136 %insert.0 = insertvalue { half, half } poison, half %add0, 0 137 %insert.1 = insertvalue { half, half } %insert.0, half %fabs.x, 1 138 ret { half, half } %insert.1 139} 140 141define { half, half } @add_select_multi_use_rhs_fabs_fabs_f16(i32 %c, half %x, half %y, half %z, half %w) { 142; CI-LABEL: add_select_multi_use_rhs_fabs_fabs_f16: 143; CI: ; %bb.0: 144; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 145; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 146; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 147; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 148; CI-NEXT: v_cvt_f16_f32_e32 v4, v4 149; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 150; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 151; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 152; CI-NEXT: v_cvt_f32_f16_e32 v4, v4 153; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 154; CI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 155; CI-NEXT: v_add_f32_e64 v0, |v0|, v3 156; CI-NEXT: v_add_f32_e64 v1, |v2|, v4 157; CI-NEXT: s_setpc_b64 s[30:31] 158; 159; VI-LABEL: add_select_multi_use_rhs_fabs_fabs_f16: 160; VI: ; %bb.0: 161; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 162; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 163; VI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 164; VI-NEXT: v_add_f16_e64 v0, |v0|, v3 165; VI-NEXT: v_add_f16_e64 v1, |v2|, v4 166; VI-NEXT: s_setpc_b64 s[30:31] 167; 168; GFX11-LABEL: add_select_multi_use_rhs_fabs_fabs_f16: 169; GFX11: ; %bb.0: 170; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 171; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 172; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo 173; GFX11-NEXT: v_add_f16_e64 v1, |v2|, v4 174; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 175; GFX11-NEXT: v_add_f16_e64 v0, |v0|, v3 176; GFX11-NEXT: s_setpc_b64 s[30:31] 177 %cmp = icmp eq i32 %c, 0 178 %fabs.x = call half @llvm.fabs.f16(half %x) 179 %fabs.y = call half @llvm.fabs.f16(half %y) 180 %select = select i1 %cmp, half %fabs.x, half %fabs.y 181 %add0 = fadd half %select, %z 182 %add1 = fadd half %fabs.y, %w 183 %insert.0 = insertvalue { half, half } poison, half %add0, 0 184 %insert.1 = insertvalue { half, half } %insert.0, half %add1, 1 185 ret { half, half } %insert.1 186} 187 188define half @add_select_fabs_var_f16(i32 %c, half %x, half %y, half %z) { 189; CI-LABEL: add_select_fabs_var_f16: 190; CI: ; %bb.0: 191; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 192; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 193; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 194; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 195; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 196; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 197; CI-NEXT: v_cvt_f32_f16_e64 v1, |v1| 198; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 199; CI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 200; CI-NEXT: v_add_f32_e32 v0, v0, v3 201; CI-NEXT: s_setpc_b64 s[30:31] 202; 203; VI-LABEL: add_select_fabs_var_f16: 204; VI: ; %bb.0: 205; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 206; VI-NEXT: v_and_b32_e32 v1, 0x7fff, v1 207; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 208; VI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 209; VI-NEXT: v_add_f16_e32 v0, v0, v3 210; VI-NEXT: s_setpc_b64 s[30:31] 211; 212; GFX11-LABEL: add_select_fabs_var_f16: 213; GFX11: ; %bb.0: 214; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 215; GFX11-NEXT: v_and_b32_e32 v1, 0x7fff, v1 216; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 217; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 218; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo 219; GFX11-NEXT: v_add_f16_e32 v0, v0, v3 220; GFX11-NEXT: s_setpc_b64 s[30:31] 221 %cmp = icmp eq i32 %c, 0 222 %fabs.x = call half @llvm.fabs.f16(half %x) 223 %select = select i1 %cmp, half %fabs.x, half %y 224 %add = fadd half %select, %z 225 ret half %add 226} 227 228define half @add_select_fabs_negk_f16(i32 %c, half %x, half %y) { 229; CI-LABEL: add_select_fabs_negk_f16: 230; CI: ; %bb.0: 231; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 232; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 233; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 234; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 235; CI-NEXT: v_cvt_f32_f16_e64 v1, |v1| 236; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 237; CI-NEXT: v_cndmask_b32_e32 v0, -1.0, v1, vcc 238; CI-NEXT: v_add_f32_e32 v0, v0, v2 239; CI-NEXT: s_setpc_b64 s[30:31] 240; 241; VI-LABEL: add_select_fabs_negk_f16: 242; VI: ; %bb.0: 243; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 244; VI-NEXT: v_and_b32_e32 v1, 0x7fff, v1 245; VI-NEXT: v_mov_b32_e32 v3, 0xbc00 246; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 247; VI-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc 248; VI-NEXT: v_add_f16_e32 v0, v0, v2 249; VI-NEXT: s_setpc_b64 s[30:31] 250; 251; GFX11-LABEL: add_select_fabs_negk_f16: 252; GFX11: ; %bb.0: 253; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 254; GFX11-NEXT: v_and_b32_e32 v1, 0x7fff, v1 255; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 256; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 257; GFX11-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v1, vcc_lo 258; GFX11-NEXT: v_add_f16_e32 v0, v0, v2 259; GFX11-NEXT: s_setpc_b64 s[30:31] 260 %cmp = icmp eq i32 %c, 0 261 %fabs = call half @llvm.fabs.f16(half %x) 262 %select = select i1 %cmp, half %fabs, half -1.0 263 %add = fadd half %select, %y 264 ret half %add 265} 266 267; FIXME: fabs should fold away 268define half @add_select_fabs_negk_negk_f16(i32 %c, half %x) { 269; CI-LABEL: add_select_fabs_negk_negk_f16: 270; CI: ; %bb.0: 271; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 272; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 273; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 274; CI-NEXT: v_cndmask_b32_e64 v0, -1.0, -2.0, vcc 275; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 276; CI-NEXT: v_add_f32_e64 v0, |v0|, v1 277; CI-NEXT: s_setpc_b64 s[30:31] 278; 279; VI-LABEL: add_select_fabs_negk_negk_f16: 280; VI: ; %bb.0: 281; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 282; VI-NEXT: v_mov_b32_e32 v2, 0xbc00 283; VI-NEXT: v_mov_b32_e32 v3, 0xc000 284; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 285; VI-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc 286; VI-NEXT: v_add_f16_e64 v0, |v0|, v1 287; VI-NEXT: s_setpc_b64 s[30:31] 288; 289; GFX11-LABEL: add_select_fabs_negk_negk_f16: 290; GFX11: ; %bb.0: 291; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 292; GFX11-NEXT: v_mov_b32_e32 v2, 0xc000 293; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 294; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 295; GFX11-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo 296; GFX11-NEXT: v_add_f16_e64 v0, |v0|, v1 297; GFX11-NEXT: s_setpc_b64 s[30:31] 298 %cmp = icmp eq i32 %c, 0 299 %select = select i1 %cmp, half -2.0, half -1.0 300 %fabs = call half @llvm.fabs.f16(half %select) 301 %add = fadd half %fabs, %x 302 ret half %add 303} 304 305define half @add_select_posk_posk_f16(i32 %c, half %x) { 306; CI-LABEL: add_select_posk_posk_f16: 307; CI: ; %bb.0: 308; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 309; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 310; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 311; CI-NEXT: v_cndmask_b32_e64 v0, 1.0, 2.0, vcc 312; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 313; CI-NEXT: v_add_f32_e32 v0, v0, v1 314; CI-NEXT: s_setpc_b64 s[30:31] 315; 316; VI-LABEL: add_select_posk_posk_f16: 317; VI: ; %bb.0: 318; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 319; VI-NEXT: v_mov_b32_e32 v2, 0x3c00 320; VI-NEXT: v_mov_b32_e32 v3, 0x4000 321; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 322; VI-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc 323; VI-NEXT: v_add_f16_e32 v0, v0, v1 324; VI-NEXT: s_setpc_b64 s[30:31] 325; 326; GFX11-LABEL: add_select_posk_posk_f16: 327; GFX11: ; %bb.0: 328; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 329; GFX11-NEXT: v_mov_b32_e32 v2, 0x4000 330; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 331; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 332; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo 333; GFX11-NEXT: v_add_f16_e32 v0, v0, v1 334; GFX11-NEXT: s_setpc_b64 s[30:31] 335 %cmp = icmp eq i32 %c, 0 336 %select = select i1 %cmp, half 2.0, half 1.0 337 %add = fadd half %select, %x 338 ret half %add 339} 340 341define half @add_select_negk_fabs_f16(i32 %c, half %x, half %y) { 342; CI-LABEL: add_select_negk_fabs_f16: 343; CI: ; %bb.0: 344; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 345; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 346; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 347; CI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 348; CI-NEXT: v_cvt_f32_f16_e64 v1, |v1| 349; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 350; CI-NEXT: v_cndmask_b32_e32 v0, -1.0, v1, vcc 351; CI-NEXT: v_add_f32_e32 v0, v0, v2 352; CI-NEXT: s_setpc_b64 s[30:31] 353; 354; VI-LABEL: add_select_negk_fabs_f16: 355; VI: ; %bb.0: 356; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 357; VI-NEXT: v_and_b32_e32 v1, 0x7fff, v1 358; VI-NEXT: v_mov_b32_e32 v3, 0xbc00 359; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 360; VI-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc 361; VI-NEXT: v_add_f16_e32 v0, v0, v2 362; VI-NEXT: s_setpc_b64 s[30:31] 363; 364; GFX11-LABEL: add_select_negk_fabs_f16: 365; GFX11: ; %bb.0: 366; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 367; GFX11-NEXT: v_and_b32_e32 v1, 0x7fff, v1 368; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 369; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 370; GFX11-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v1, vcc_lo 371; GFX11-NEXT: v_add_f16_e32 v0, v0, v2 372; GFX11-NEXT: s_setpc_b64 s[30:31] 373 %cmp = icmp eq i32 %c, 0 374 %fabs = call half @llvm.fabs.f16(half %x) 375 %select = select i1 %cmp, half -1.0, half %fabs 376 %add = fadd half %select, %y 377 ret half %add 378} 379 380define half @add_select_negliteralk_fabs_f16(i32 %c, half %x, half %y) { 381; CI-LABEL: add_select_negliteralk_fabs_f16: 382; CI: ; %bb.0: 383; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 384; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 385; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 386; CI-NEXT: v_mov_b32_e32 v3, 0xc4800000 387; CI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 388; CI-NEXT: v_cvt_f32_f16_e64 v1, |v1| 389; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 390; CI-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc 391; CI-NEXT: v_add_f32_e32 v0, v0, v2 392; CI-NEXT: s_setpc_b64 s[30:31] 393; 394; VI-LABEL: add_select_negliteralk_fabs_f16: 395; VI: ; %bb.0: 396; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 397; VI-NEXT: v_and_b32_e32 v1, 0x7fff, v1 398; VI-NEXT: v_mov_b32_e32 v3, 0xe400 399; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 400; VI-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc 401; VI-NEXT: v_add_f16_e32 v0, v0, v2 402; VI-NEXT: s_setpc_b64 s[30:31] 403; 404; GFX11-LABEL: add_select_negliteralk_fabs_f16: 405; GFX11: ; %bb.0: 406; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 407; GFX11-NEXT: v_and_b32_e32 v1, 0x7fff, v1 408; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 409; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 410; GFX11-NEXT: v_cndmask_b32_e32 v0, 0xe400, v1, vcc_lo 411; GFX11-NEXT: v_add_f16_e32 v0, v0, v2 412; GFX11-NEXT: s_setpc_b64 s[30:31] 413 %cmp = icmp eq i32 %c, 0 414 %fabs = call half @llvm.fabs.f16(half %x) 415 %select = select i1 %cmp, half -1024.0, half %fabs 416 %add = fadd half %select, %y 417 ret half %add 418} 419 420define half @add_select_fabs_posk_f16(i32 %c, half %x, half %y) { 421; CI-LABEL: add_select_fabs_posk_f16: 422; CI: ; %bb.0: 423; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 424; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 425; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 426; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 427; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 428; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 429; CI-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc 430; CI-NEXT: v_add_f32_e64 v0, |v0|, v2 431; CI-NEXT: s_setpc_b64 s[30:31] 432; 433; VI-LABEL: add_select_fabs_posk_f16: 434; VI: ; %bb.0: 435; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 436; VI-NEXT: v_mov_b32_e32 v3, 0x3c00 437; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 438; VI-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc 439; VI-NEXT: v_add_f16_e64 v0, |v0|, v2 440; VI-NEXT: s_setpc_b64 s[30:31] 441; 442; GFX11-LABEL: add_select_fabs_posk_f16: 443; GFX11: ; %bb.0: 444; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 445; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 446; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v1, vcc_lo 447; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 448; GFX11-NEXT: v_add_f16_e64 v0, |v0|, v2 449; GFX11-NEXT: s_setpc_b64 s[30:31] 450 %cmp = icmp eq i32 %c, 0 451 %fabs = call half @llvm.fabs.f16(half %x) 452 %select = select i1 %cmp, half %fabs, half 1.0 453 %add = fadd half %select, %y 454 ret half %add 455} 456 457define half @add_select_posk_fabs_f16(i32 %c, half %x, half %y) { 458; CI-LABEL: add_select_posk_fabs_f16: 459; CI: ; %bb.0: 460; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 461; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 462; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 463; CI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 464; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 465; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 466; CI-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc 467; CI-NEXT: v_add_f32_e64 v0, |v0|, v2 468; CI-NEXT: s_setpc_b64 s[30:31] 469; 470; VI-LABEL: add_select_posk_fabs_f16: 471; VI: ; %bb.0: 472; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 473; VI-NEXT: v_mov_b32_e32 v3, 0x3c00 474; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 475; VI-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc 476; VI-NEXT: v_add_f16_e64 v0, |v0|, v2 477; VI-NEXT: s_setpc_b64 s[30:31] 478; 479; GFX11-LABEL: add_select_posk_fabs_f16: 480; GFX11: ; %bb.0: 481; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 482; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 483; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v1, vcc_lo 484; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 485; GFX11-NEXT: v_add_f16_e64 v0, |v0|, v2 486; GFX11-NEXT: s_setpc_b64 s[30:31] 487 %cmp = icmp eq i32 %c, 0 488 %fabs = call half @llvm.fabs.f16(half %x) 489 %select = select i1 %cmp, half 1.0, half %fabs 490 %add = fadd half %select, %y 491 ret half %add 492} 493 494define half @add_select_fneg_fneg_f16(i32 %c, half %x, half %y, half %z) { 495; CI-LABEL: add_select_fneg_fneg_f16: 496; CI: ; %bb.0: 497; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 498; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 499; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 500; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 501; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 502; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 503; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 504; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 505; CI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 506; CI-NEXT: v_sub_f32_e32 v0, v3, v0 507; CI-NEXT: s_setpc_b64 s[30:31] 508; 509; VI-LABEL: add_select_fneg_fneg_f16: 510; VI: ; %bb.0: 511; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 512; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 513; VI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 514; VI-NEXT: v_sub_f16_e32 v0, v3, v0 515; VI-NEXT: s_setpc_b64 s[30:31] 516; 517; GFX11-LABEL: add_select_fneg_fneg_f16: 518; GFX11: ; %bb.0: 519; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 520; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 521; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo 522; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 523; GFX11-NEXT: v_sub_f16_e32 v0, v3, v0 524; GFX11-NEXT: s_setpc_b64 s[30:31] 525 %cmp = icmp eq i32 %c, 0 526 %fneg.x = fneg half %x 527 %fneg.y = fneg half %y 528 %select = select i1 %cmp, half %fneg.x, half %fneg.y 529 %add = fadd half %select, %z 530 ret half %add 531} 532 533define { half, half } @add_select_multi_use_lhs_fneg_fneg_f16(i32 %c, half %x, half %y, half %z, half %w) { 534; CI-LABEL: add_select_multi_use_lhs_fneg_fneg_f16: 535; CI: ; %bb.0: 536; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 537; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 538; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 539; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 540; CI-NEXT: v_cvt_f16_f32_e32 v4, v4 541; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 542; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 543; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 544; CI-NEXT: v_cvt_f32_f16_e32 v4, v4 545; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 546; CI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 547; CI-NEXT: v_sub_f32_e32 v0, v3, v0 548; CI-NEXT: v_sub_f32_e32 v1, v4, v1 549; CI-NEXT: s_setpc_b64 s[30:31] 550; 551; VI-LABEL: add_select_multi_use_lhs_fneg_fneg_f16: 552; VI: ; %bb.0: 553; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 554; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 555; VI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 556; VI-NEXT: v_sub_f16_e32 v0, v3, v0 557; VI-NEXT: v_sub_f16_e32 v1, v4, v1 558; VI-NEXT: s_setpc_b64 s[30:31] 559; 560; GFX11-LABEL: add_select_multi_use_lhs_fneg_fneg_f16: 561; GFX11: ; %bb.0: 562; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 563; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 564; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo 565; GFX11-NEXT: v_sub_f16_e32 v1, v4, v1 566; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 567; GFX11-NEXT: v_sub_f16_e32 v0, v3, v0 568; GFX11-NEXT: s_setpc_b64 s[30:31] 569 %cmp = icmp eq i32 %c, 0 570 %fneg.x = fneg half %x 571 %fneg.y = fneg half %y 572 %select = select i1 %cmp, half %fneg.x, half %fneg.y 573 %add0 = fadd half %select, %z 574 %add1 = fadd half %fneg.x, %w 575 %insert.0 = insertvalue { half, half } poison, half %add0, 0 576 %insert.1 = insertvalue { half, half } %insert.0, half %add1, 1 577 ret { half, half } %insert.1 578} 579 580define { half, half } @add_select_multi_store_use_lhs_fneg_fneg_f16(i32 %c, half %x, half %y, half %z) { 581; CI-LABEL: add_select_multi_store_use_lhs_fneg_fneg_f16: 582; CI: ; %bb.0: 583; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 584; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 585; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 586; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 587; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 588; CI-NEXT: v_cvt_f32_f16_e32 v4, v1 589; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 590; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 591; CI-NEXT: v_cvt_f32_f16_e64 v1, -v1 592; CI-NEXT: v_cndmask_b32_e32 v0, v2, v4, vcc 593; CI-NEXT: v_sub_f32_e32 v0, v3, v0 594; CI-NEXT: s_setpc_b64 s[30:31] 595; 596; VI-LABEL: add_select_multi_store_use_lhs_fneg_fneg_f16: 597; VI: ; %bb.0: 598; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 599; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 600; VI-NEXT: v_xor_b32_e32 v4, 0x8000, v1 601; VI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 602; VI-NEXT: v_sub_f16_e32 v0, v3, v0 603; VI-NEXT: v_mov_b32_e32 v1, v4 604; VI-NEXT: s_setpc_b64 s[30:31] 605; 606; GFX11-LABEL: add_select_multi_store_use_lhs_fneg_fneg_f16: 607; GFX11: ; %bb.0: 608; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 609; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 610; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo 611; GFX11-NEXT: v_xor_b32_e32 v1, 0x8000, v1 612; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 613; GFX11-NEXT: v_sub_f16_e32 v0, v3, v0 614; GFX11-NEXT: s_setpc_b64 s[30:31] 615 %cmp = icmp eq i32 %c, 0 616 %fneg.x = fneg half %x 617 %fneg.y = fneg half %y 618 %select = select i1 %cmp, half %fneg.x, half %fneg.y 619 %add0 = fadd half %select, %z 620 %insert.0 = insertvalue { half, half } poison, half %add0, 0 621 %insert.1 = insertvalue { half, half } %insert.0, half %fneg.x, 1 622 ret { half, half } %insert.1 623} 624 625define { half, half } @add_select_multi_use_rhs_fneg_fneg_f16(i32 %c, half %x, half %y, half %z, half %w) { 626; CI-LABEL: add_select_multi_use_rhs_fneg_fneg_f16: 627; CI: ; %bb.0: 628; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 629; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 630; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 631; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 632; CI-NEXT: v_cvt_f16_f32_e32 v4, v4 633; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 634; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 635; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 636; CI-NEXT: v_cvt_f32_f16_e32 v4, v4 637; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 638; CI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 639; CI-NEXT: v_sub_f32_e32 v0, v3, v0 640; CI-NEXT: v_sub_f32_e32 v1, v4, v2 641; CI-NEXT: s_setpc_b64 s[30:31] 642; 643; VI-LABEL: add_select_multi_use_rhs_fneg_fneg_f16: 644; VI: ; %bb.0: 645; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 646; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 647; VI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 648; VI-NEXT: v_sub_f16_e32 v0, v3, v0 649; VI-NEXT: v_sub_f16_e32 v1, v4, v2 650; VI-NEXT: s_setpc_b64 s[30:31] 651; 652; GFX11-LABEL: add_select_multi_use_rhs_fneg_fneg_f16: 653; GFX11: ; %bb.0: 654; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 655; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 656; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo 657; GFX11-NEXT: v_sub_f16_e32 v1, v4, v2 658; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 659; GFX11-NEXT: v_sub_f16_e32 v0, v3, v0 660; GFX11-NEXT: s_setpc_b64 s[30:31] 661 %cmp = icmp eq i32 %c, 0 662 %fneg.x = fneg half %x 663 %fneg.y = fneg half %y 664 %select = select i1 %cmp, half %fneg.x, half %fneg.y 665 %add0 = fadd half %select, %z 666 %add1 = fadd half %fneg.y, %w 667 %insert.0 = insertvalue { half, half } poison, half %add0, 0 668 %insert.1 = insertvalue { half, half } %insert.0, half %add1, 1 669 ret { half, half } %insert.1 670} 671 672define half @add_select_fneg_var_f16(i32 %c, half %x, half %y, half %z) { 673; CI-LABEL: add_select_fneg_var_f16: 674; CI: ; %bb.0: 675; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 676; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 677; CI-NEXT: v_cvt_f16_f32_e64 v1, -v1 678; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 679; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 680; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 681; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 682; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 683; CI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 684; CI-NEXT: v_add_f32_e32 v0, v0, v3 685; CI-NEXT: s_setpc_b64 s[30:31] 686; 687; VI-LABEL: add_select_fneg_var_f16: 688; VI: ; %bb.0: 689; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 690; VI-NEXT: v_xor_b32_e32 v1, 0x8000, v1 691; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 692; VI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 693; VI-NEXT: v_add_f16_e32 v0, v0, v3 694; VI-NEXT: s_setpc_b64 s[30:31] 695; 696; GFX11-LABEL: add_select_fneg_var_f16: 697; GFX11: ; %bb.0: 698; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 699; GFX11-NEXT: v_xor_b32_e32 v1, 0x8000, v1 700; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 701; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 702; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo 703; GFX11-NEXT: v_add_f16_e32 v0, v0, v3 704; GFX11-NEXT: s_setpc_b64 s[30:31] 705 %cmp = icmp eq i32 %c, 0 706 %fneg.x = fneg half %x 707 %select = select i1 %cmp, half %fneg.x, half %y 708 %add = fadd half %select, %z 709 ret half %add 710} 711 712define half @add_select_fneg_negk_f16(i32 %c, half %x, half %y) { 713; CI-LABEL: add_select_fneg_negk_f16: 714; CI: ; %bb.0: 715; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 716; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 717; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 718; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 719; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 720; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 721; CI-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc 722; CI-NEXT: v_sub_f32_e32 v0, v2, v0 723; CI-NEXT: s_setpc_b64 s[30:31] 724; 725; VI-LABEL: add_select_fneg_negk_f16: 726; VI: ; %bb.0: 727; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 728; VI-NEXT: v_mov_b32_e32 v3, 0x3c00 729; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 730; VI-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc 731; VI-NEXT: v_sub_f16_e32 v0, v2, v0 732; VI-NEXT: s_setpc_b64 s[30:31] 733; 734; GFX11-LABEL: add_select_fneg_negk_f16: 735; GFX11: ; %bb.0: 736; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 737; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 738; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v1, vcc_lo 739; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 740; GFX11-NEXT: v_sub_f16_e32 v0, v2, v0 741; GFX11-NEXT: s_setpc_b64 s[30:31] 742 %cmp = icmp eq i32 %c, 0 743 %fneg.x = fneg half %x 744 %select = select i1 %cmp, half %fneg.x, half -1.0 745 %add = fadd half %select, %y 746 ret half %add 747} 748 749define half @add_select_fneg_inv2pi_f16(i32 %c, half %x, half %y) { 750; CI-LABEL: add_select_fneg_inv2pi_f16: 751; CI: ; %bb.0: 752; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 753; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 754; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 755; CI-NEXT: v_mov_b32_e32 v3, 0xbe230000 756; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 757; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 758; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 759; CI-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc 760; CI-NEXT: v_sub_f32_e32 v0, v2, v0 761; CI-NEXT: s_setpc_b64 s[30:31] 762; 763; VI-LABEL: add_select_fneg_inv2pi_f16: 764; VI: ; %bb.0: 765; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 766; VI-NEXT: v_mov_b32_e32 v3, 0xb118 767; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 768; VI-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc 769; VI-NEXT: v_sub_f16_e32 v0, v2, v0 770; VI-NEXT: s_setpc_b64 s[30:31] 771; 772; GFX11-LABEL: add_select_fneg_inv2pi_f16: 773; GFX11: ; %bb.0: 774; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 775; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 776; GFX11-NEXT: v_cndmask_b32_e32 v0, 0xb118, v1, vcc_lo 777; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 778; GFX11-NEXT: v_sub_f16_e32 v0, v2, v0 779; GFX11-NEXT: s_setpc_b64 s[30:31] 780 %cmp = icmp eq i32 %c, 0 781 %fneg.x = fneg half %x 782 %select = select i1 %cmp, half %fneg.x, half 0xH3118 783 %add = fadd half %select, %y 784 ret half %add 785} 786 787define half @add_select_fneg_neginv2pi_f16(i32 %c, half %x, half %y) { 788; CI-LABEL: add_select_fneg_neginv2pi_f16: 789; CI: ; %bb.0: 790; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 791; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 792; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 793; CI-NEXT: v_mov_b32_e32 v3, 0x3e230000 794; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 795; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 796; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 797; CI-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc 798; CI-NEXT: v_sub_f32_e32 v0, v2, v0 799; CI-NEXT: s_setpc_b64 s[30:31] 800; 801; VI-LABEL: add_select_fneg_neginv2pi_f16: 802; VI: ; %bb.0: 803; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 804; VI-NEXT: v_mov_b32_e32 v3, 0x3118 805; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 806; VI-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc 807; VI-NEXT: v_sub_f16_e32 v0, v2, v0 808; VI-NEXT: s_setpc_b64 s[30:31] 809; 810; GFX11-LABEL: add_select_fneg_neginv2pi_f16: 811; GFX11: ; %bb.0: 812; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 813; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 814; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x3118, v1, vcc_lo 815; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 816; GFX11-NEXT: v_sub_f16_e32 v0, v2, v0 817; GFX11-NEXT: s_setpc_b64 s[30:31] 818 %cmp = icmp eq i32 %c, 0 819 %fneg.x = fneg half %x 820 %select = select i1 %cmp, half %fneg.x, half 0xHB118 821 %add = fadd half %select, %y 822 ret half %add 823} 824 825define half @add_select_negk_negk_f16(i32 %c, half %x) { 826; CI-LABEL: add_select_negk_negk_f16: 827; CI: ; %bb.0: 828; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 829; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 830; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 831; CI-NEXT: v_cndmask_b32_e64 v0, -1.0, -2.0, vcc 832; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 833; CI-NEXT: v_add_f32_e32 v0, v0, v1 834; CI-NEXT: s_setpc_b64 s[30:31] 835; 836; VI-LABEL: add_select_negk_negk_f16: 837; VI: ; %bb.0: 838; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 839; VI-NEXT: v_mov_b32_e32 v2, 0xbc00 840; VI-NEXT: v_mov_b32_e32 v3, 0xc000 841; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 842; VI-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc 843; VI-NEXT: v_add_f16_e32 v0, v0, v1 844; VI-NEXT: s_setpc_b64 s[30:31] 845; 846; GFX11-LABEL: add_select_negk_negk_f16: 847; GFX11: ; %bb.0: 848; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 849; GFX11-NEXT: v_mov_b32_e32 v2, 0xc000 850; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 851; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 852; GFX11-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo 853; GFX11-NEXT: v_add_f16_e32 v0, v0, v1 854; GFX11-NEXT: s_setpc_b64 s[30:31] 855 %cmp = icmp eq i32 %c, 0 856 %select = select i1 %cmp, half -2.0, half -1.0 857 %add = fadd half %select, %x 858 ret half %add 859} 860 861define half @add_select_negliteralk_negliteralk_f16(i32 %c, half %x) { 862; CI-LABEL: add_select_negliteralk_negliteralk_f16: 863; CI: ; %bb.0: 864; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 865; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 866; CI-NEXT: v_mov_b32_e32 v2, 0xc5800000 867; CI-NEXT: v_mov_b32_e32 v3, 0xc5000000 868; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 869; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 870; CI-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc 871; CI-NEXT: v_add_f32_e32 v0, v0, v1 872; CI-NEXT: s_setpc_b64 s[30:31] 873; 874; VI-LABEL: add_select_negliteralk_negliteralk_f16: 875; VI: ; %bb.0: 876; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 877; VI-NEXT: v_mov_b32_e32 v2, 0xec00 878; VI-NEXT: v_mov_b32_e32 v3, 0xe800 879; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 880; VI-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc 881; VI-NEXT: v_add_f16_e32 v0, v0, v1 882; VI-NEXT: s_setpc_b64 s[30:31] 883; 884; GFX11-LABEL: add_select_negliteralk_negliteralk_f16: 885; GFX11: ; %bb.0: 886; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 887; GFX11-NEXT: v_mov_b32_e32 v2, 0xe800 888; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 889; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 890; GFX11-NEXT: v_cndmask_b32_e32 v0, 0xec00, v2, vcc_lo 891; GFX11-NEXT: v_add_f16_e32 v0, v0, v1 892; GFX11-NEXT: s_setpc_b64 s[30:31] 893 %cmp = icmp eq i32 %c, 0 894 %select = select i1 %cmp, half -2048.0, half -4096.0 895 %add = fadd half %select, %x 896 ret half %add 897} 898 899define half @add_select_fneg_negk_negk_f16(i32 %c, half %x) { 900; CI-LABEL: add_select_fneg_negk_negk_f16: 901; CI: ; %bb.0: 902; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 903; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 904; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 905; CI-NEXT: v_cndmask_b32_e64 v0, -1.0, -2.0, vcc 906; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 907; CI-NEXT: v_sub_f32_e32 v0, v1, v0 908; CI-NEXT: s_setpc_b64 s[30:31] 909; 910; VI-LABEL: add_select_fneg_negk_negk_f16: 911; VI: ; %bb.0: 912; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 913; VI-NEXT: v_mov_b32_e32 v2, 0xbc00 914; VI-NEXT: v_mov_b32_e32 v3, 0xc000 915; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 916; VI-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc 917; VI-NEXT: v_sub_f16_e32 v0, v1, v0 918; VI-NEXT: s_setpc_b64 s[30:31] 919; 920; GFX11-LABEL: add_select_fneg_negk_negk_f16: 921; GFX11: ; %bb.0: 922; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 923; GFX11-NEXT: v_mov_b32_e32 v2, 0xc000 924; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 925; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 926; GFX11-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo 927; GFX11-NEXT: v_sub_f16_e32 v0, v1, v0 928; GFX11-NEXT: s_setpc_b64 s[30:31] 929 %cmp = icmp eq i32 %c, 0 930 %select = select i1 %cmp, half -2.0, half -1.0 931 %fneg.x = fneg half %select 932 %add = fadd half %fneg.x, %x 933 ret half %add 934} 935 936define half @add_select_negk_fneg_f16(i32 %c, half %x, half %y) { 937; CI-LABEL: add_select_negk_fneg_f16: 938; CI: ; %bb.0: 939; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 940; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 941; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 942; CI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 943; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 944; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 945; CI-NEXT: v_cndmask_b32_e32 v0, 1.0, v1, vcc 946; CI-NEXT: v_sub_f32_e32 v0, v2, v0 947; CI-NEXT: s_setpc_b64 s[30:31] 948; 949; VI-LABEL: add_select_negk_fneg_f16: 950; VI: ; %bb.0: 951; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 952; VI-NEXT: v_mov_b32_e32 v3, 0x3c00 953; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 954; VI-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc 955; VI-NEXT: v_sub_f16_e32 v0, v2, v0 956; VI-NEXT: s_setpc_b64 s[30:31] 957; 958; GFX11-LABEL: add_select_negk_fneg_f16: 959; GFX11: ; %bb.0: 960; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 961; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 962; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v1, vcc_lo 963; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 964; GFX11-NEXT: v_sub_f16_e32 v0, v2, v0 965; GFX11-NEXT: s_setpc_b64 s[30:31] 966 %cmp = icmp eq i32 %c, 0 967 %fneg.x = fneg half %x 968 %select = select i1 %cmp, half -1.0, half %fneg.x 969 %add = fadd half %select, %y 970 ret half %add 971} 972 973define half @add_select_fneg_posk_f16(i32 %c, half %x, half %y) { 974; CI-LABEL: add_select_fneg_posk_f16: 975; CI: ; %bb.0: 976; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 977; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 978; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 979; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 980; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 981; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 982; CI-NEXT: v_cndmask_b32_e32 v0, -1.0, v1, vcc 983; CI-NEXT: v_sub_f32_e32 v0, v2, v0 984; CI-NEXT: s_setpc_b64 s[30:31] 985; 986; VI-LABEL: add_select_fneg_posk_f16: 987; VI: ; %bb.0: 988; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 989; VI-NEXT: v_mov_b32_e32 v3, 0xbc00 990; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 991; VI-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc 992; VI-NEXT: v_sub_f16_e32 v0, v2, v0 993; VI-NEXT: s_setpc_b64 s[30:31] 994; 995; GFX11-LABEL: add_select_fneg_posk_f16: 996; GFX11: ; %bb.0: 997; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 998; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 999; GFX11-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v1, vcc_lo 1000; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1001; GFX11-NEXT: v_sub_f16_e32 v0, v2, v0 1002; GFX11-NEXT: s_setpc_b64 s[30:31] 1003 %cmp = icmp eq i32 %c, 0 1004 %fneg.x = fneg half %x 1005 %select = select i1 %cmp, half %fneg.x, half 1.0 1006 %add = fadd half %select, %y 1007 ret half %add 1008} 1009 1010define half @add_select_posk_fneg_f16(i32 %c, half %x, half %y) { 1011; CI-LABEL: add_select_posk_fneg_f16: 1012; CI: ; %bb.0: 1013; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1014; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 1015; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 1016; CI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 1017; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 1018; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 1019; CI-NEXT: v_cndmask_b32_e32 v0, -1.0, v1, vcc 1020; CI-NEXT: v_sub_f32_e32 v0, v2, v0 1021; CI-NEXT: s_setpc_b64 s[30:31] 1022; 1023; VI-LABEL: add_select_posk_fneg_f16: 1024; VI: ; %bb.0: 1025; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1026; VI-NEXT: v_mov_b32_e32 v3, 0xbc00 1027; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 1028; VI-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc 1029; VI-NEXT: v_sub_f16_e32 v0, v2, v0 1030; VI-NEXT: s_setpc_b64 s[30:31] 1031; 1032; GFX11-LABEL: add_select_posk_fneg_f16: 1033; GFX11: ; %bb.0: 1034; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1035; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 1036; GFX11-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v1, vcc_lo 1037; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1038; GFX11-NEXT: v_sub_f16_e32 v0, v2, v0 1039; GFX11-NEXT: s_setpc_b64 s[30:31] 1040 %cmp = icmp eq i32 %c, 0 1041 %fneg.x = fneg half %x 1042 %select = select i1 %cmp, half 1.0, half %fneg.x 1043 %add = fadd half %select, %y 1044 ret half %add 1045} 1046 1047define half @add_select_negfabs_fabs_f16(i32 %c, half %x, half %y, half %z) { 1048; CI-LABEL: add_select_negfabs_fabs_f16: 1049; CI: ; %bb.0: 1050; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1051; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 1052; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 1053; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 1054; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1055; CI-NEXT: v_cvt_f32_f16_e64 v1, -|v1| 1056; CI-NEXT: v_cvt_f32_f16_e64 v2, |v2| 1057; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 1058; CI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 1059; CI-NEXT: v_add_f32_e32 v0, v0, v3 1060; CI-NEXT: s_setpc_b64 s[30:31] 1061; 1062; VI-LABEL: add_select_negfabs_fabs_f16: 1063; VI: ; %bb.0: 1064; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1065; VI-NEXT: v_or_b32_e32 v1, 0x8000, v1 1066; VI-NEXT: v_and_b32_e32 v2, 0x7fff, v2 1067; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1068; VI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 1069; VI-NEXT: v_add_f16_e32 v0, v0, v3 1070; VI-NEXT: s_setpc_b64 s[30:31] 1071; 1072; GFX11-LABEL: add_select_negfabs_fabs_f16: 1073; GFX11: ; %bb.0: 1074; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1075; GFX11-NEXT: v_or_b32_e32 v1, 0x8000, v1 1076; GFX11-NEXT: v_and_b32_e32 v2, 0x7fff, v2 1077; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 1078; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 1079; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo 1080; GFX11-NEXT: v_add_f16_e32 v0, v0, v3 1081; GFX11-NEXT: s_setpc_b64 s[30:31] 1082 %cmp = icmp eq i32 %c, 0 1083 %fabs.x = call half @llvm.fabs.f16(half %x) 1084 %fneg.fabs.x = fsub half -0.000000e+00, %fabs.x 1085 %fabs.y = call half @llvm.fabs.f16(half %y) 1086 %select = select i1 %cmp, half %fneg.fabs.x, half %fabs.y 1087 %add = fadd half %select, %z 1088 ret half %add 1089} 1090 1091define half @add_select_fabs_negfabs_f16(i32 %c, half %x, half %y, half %z) { 1092; CI-LABEL: add_select_fabs_negfabs_f16: 1093; CI: ; %bb.0: 1094; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1095; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 1096; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 1097; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 1098; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1099; CI-NEXT: v_cvt_f32_f16_e64 v1, |v1| 1100; CI-NEXT: v_cvt_f32_f16_e64 v2, -|v2| 1101; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 1102; CI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 1103; CI-NEXT: v_add_f32_e32 v0, v0, v3 1104; CI-NEXT: s_setpc_b64 s[30:31] 1105; 1106; VI-LABEL: add_select_fabs_negfabs_f16: 1107; VI: ; %bb.0: 1108; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1109; VI-NEXT: v_and_b32_e32 v1, 0x7fff, v1 1110; VI-NEXT: v_or_b32_e32 v2, 0x8000, v2 1111; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1112; VI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 1113; VI-NEXT: v_add_f16_e32 v0, v0, v3 1114; VI-NEXT: s_setpc_b64 s[30:31] 1115; 1116; GFX11-LABEL: add_select_fabs_negfabs_f16: 1117; GFX11: ; %bb.0: 1118; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1119; GFX11-NEXT: v_and_b32_e32 v1, 0x7fff, v1 1120; GFX11-NEXT: v_or_b32_e32 v2, 0x8000, v2 1121; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 1122; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 1123; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo 1124; GFX11-NEXT: v_add_f16_e32 v0, v0, v3 1125; GFX11-NEXT: s_setpc_b64 s[30:31] 1126 %cmp = icmp eq i32 %c, 0 1127 %fabs.x = call half @llvm.fabs.f16(half %x) 1128 %fabs.y = call half @llvm.fabs.f16(half %y) 1129 %fneg.fabs.y = fsub half -0.000000e+00, %fabs.y 1130 %select = select i1 %cmp, half %fabs.x, half %fneg.fabs.y 1131 %add = fadd half %select, %z 1132 ret half %add 1133} 1134 1135define half @add_select_neg_fabs_f16(i32 %c, half %x, half %y, half %z) { 1136; CI-LABEL: add_select_neg_fabs_f16: 1137; CI: ; %bb.0: 1138; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1139; CI-NEXT: v_cvt_f16_f32_e64 v1, -v1 1140; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 1141; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 1142; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1143; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 1144; CI-NEXT: v_cvt_f32_f16_e64 v2, |v2| 1145; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 1146; CI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 1147; CI-NEXT: v_add_f32_e32 v0, v0, v3 1148; CI-NEXT: s_setpc_b64 s[30:31] 1149; 1150; VI-LABEL: add_select_neg_fabs_f16: 1151; VI: ; %bb.0: 1152; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1153; VI-NEXT: v_xor_b32_e32 v1, 0x8000, v1 1154; VI-NEXT: v_and_b32_e32 v2, 0x7fff, v2 1155; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1156; VI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 1157; VI-NEXT: v_add_f16_e32 v0, v0, v3 1158; VI-NEXT: s_setpc_b64 s[30:31] 1159; 1160; GFX11-LABEL: add_select_neg_fabs_f16: 1161; GFX11: ; %bb.0: 1162; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1163; GFX11-NEXT: v_xor_b32_e32 v1, 0x8000, v1 1164; GFX11-NEXT: v_and_b32_e32 v2, 0x7fff, v2 1165; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 1166; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 1167; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo 1168; GFX11-NEXT: v_add_f16_e32 v0, v0, v3 1169; GFX11-NEXT: s_setpc_b64 s[30:31] 1170 %cmp = icmp eq i32 %c, 0 1171 %fneg.x = fsub half -0.000000e+00, %x 1172 %fabs.y = call half @llvm.fabs.f16(half %y) 1173 %select = select i1 %cmp, half %fneg.x, half %fabs.y 1174 %add = fadd half %select, %z 1175 ret half %add 1176} 1177 1178define half @add_select_fabs_neg_f16(i32 %c, half %x, half %y, half %z) { 1179; CI-LABEL: add_select_fabs_neg_f16: 1180; CI: ; %bb.0: 1181; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1182; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 1183; CI-NEXT: v_cvt_f16_f32_e64 v2, -v2 1184; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 1185; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1186; CI-NEXT: v_cvt_f32_f16_e64 v1, |v1| 1187; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 1188; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 1189; CI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 1190; CI-NEXT: v_add_f32_e32 v0, v0, v3 1191; CI-NEXT: s_setpc_b64 s[30:31] 1192; 1193; VI-LABEL: add_select_fabs_neg_f16: 1194; VI: ; %bb.0: 1195; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1196; VI-NEXT: v_and_b32_e32 v1, 0x7fff, v1 1197; VI-NEXT: v_xor_b32_e32 v2, 0x8000, v2 1198; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1199; VI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 1200; VI-NEXT: v_add_f16_e32 v0, v0, v3 1201; VI-NEXT: s_setpc_b64 s[30:31] 1202; 1203; GFX11-LABEL: add_select_fabs_neg_f16: 1204; GFX11: ; %bb.0: 1205; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1206; GFX11-NEXT: v_and_b32_e32 v1, 0x7fff, v1 1207; GFX11-NEXT: v_xor_b32_e32 v2, 0x8000, v2 1208; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 1209; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 1210; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo 1211; GFX11-NEXT: v_add_f16_e32 v0, v0, v3 1212; GFX11-NEXT: s_setpc_b64 s[30:31] 1213 %cmp = icmp eq i32 %c, 0 1214 %fabs.x = call half @llvm.fabs.f16(half %x) 1215 %fneg.y = fsub half -0.000000e+00, %y 1216 %select = select i1 %cmp, half %fabs.x, half %fneg.y 1217 %add = fadd half %select, %z 1218 ret half %add 1219} 1220 1221define half @add_select_neg_negfabs_f16(i32 %c, half %x, half %y, half %z) { 1222; CI-LABEL: add_select_neg_negfabs_f16: 1223; CI: ; %bb.0: 1224; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1225; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 1226; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 1227; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 1228; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1229; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 1230; CI-NEXT: v_cvt_f32_f16_e64 v2, |v2| 1231; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 1232; CI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 1233; CI-NEXT: v_sub_f32_e32 v0, v3, v0 1234; CI-NEXT: s_setpc_b64 s[30:31] 1235; 1236; VI-LABEL: add_select_neg_negfabs_f16: 1237; VI: ; %bb.0: 1238; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1239; VI-NEXT: v_and_b32_e32 v2, 0x7fff, v2 1240; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1241; VI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 1242; VI-NEXT: v_sub_f16_e32 v0, v3, v0 1243; VI-NEXT: s_setpc_b64 s[30:31] 1244; 1245; GFX11-LABEL: add_select_neg_negfabs_f16: 1246; GFX11: ; %bb.0: 1247; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1248; GFX11-NEXT: v_and_b32_e32 v2, 0x7fff, v2 1249; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 1250; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 1251; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo 1252; GFX11-NEXT: v_sub_f16_e32 v0, v3, v0 1253; GFX11-NEXT: s_setpc_b64 s[30:31] 1254 %cmp = icmp eq i32 %c, 0 1255 %fneg.x = fsub half -0.000000e+00, %x 1256 %fabs.y = call half @llvm.fabs.f16(half %y) 1257 %fneg.fabs.y = fsub half -0.000000e+00, %fabs.y 1258 %select = select i1 %cmp, half %fneg.x, half %fneg.fabs.y 1259 %add = fadd half %select, %z 1260 ret half %add 1261} 1262 1263define half @add_select_negfabs_neg_f16(i32 %c, half %x, half %y, half %z) { 1264; CI-LABEL: add_select_negfabs_neg_f16: 1265; CI: ; %bb.0: 1266; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1267; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 1268; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 1269; CI-NEXT: v_cvt_f16_f32_e32 v3, v3 1270; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1271; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 1272; CI-NEXT: v_cvt_f32_f16_e64 v1, |v1| 1273; CI-NEXT: v_cvt_f32_f16_e32 v3, v3 1274; CI-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc 1275; CI-NEXT: v_sub_f32_e32 v0, v3, v0 1276; CI-NEXT: s_setpc_b64 s[30:31] 1277; 1278; VI-LABEL: add_select_negfabs_neg_f16: 1279; VI: ; %bb.0: 1280; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1281; VI-NEXT: v_and_b32_e32 v1, 0x7fff, v1 1282; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1283; VI-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc 1284; VI-NEXT: v_sub_f16_e32 v0, v3, v0 1285; VI-NEXT: s_setpc_b64 s[30:31] 1286; 1287; GFX11-LABEL: add_select_negfabs_neg_f16: 1288; GFX11: ; %bb.0: 1289; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1290; GFX11-NEXT: v_and_b32_e32 v1, 0x7fff, v1 1291; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 1292; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 1293; GFX11-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc_lo 1294; GFX11-NEXT: v_sub_f16_e32 v0, v3, v0 1295; GFX11-NEXT: s_setpc_b64 s[30:31] 1296 %cmp = icmp eq i32 %c, 0 1297 %fabs.x = call half @llvm.fabs.f16(half %x) 1298 %fneg.fabs.x = fsub half -0.000000e+00, %fabs.x 1299 %fneg.y = fsub half -0.000000e+00, %y 1300 %select = select i1 %cmp, half %fneg.y, half %fneg.fabs.x 1301 %add = fadd half %select, %z 1302 ret half %add 1303} 1304 1305define half @mul_select_negfabs_posk_f16(i32 %c, half %x, half %y) { 1306; CI-LABEL: mul_select_negfabs_posk_f16: 1307; CI: ; %bb.0: 1308; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1309; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 1310; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 1311; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1312; CI-NEXT: v_cvt_f32_f16_e64 v1, -|v1| 1313; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 1314; CI-NEXT: v_cndmask_b32_e32 v0, 4.0, v1, vcc 1315; CI-NEXT: v_mul_f32_e32 v0, v0, v2 1316; CI-NEXT: s_setpc_b64 s[30:31] 1317; 1318; VI-LABEL: mul_select_negfabs_posk_f16: 1319; VI: ; %bb.0: 1320; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1321; VI-NEXT: v_or_b32_e32 v1, 0x8000, v1 1322; VI-NEXT: v_mov_b32_e32 v3, 0x4400 1323; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1324; VI-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc 1325; VI-NEXT: v_mul_f16_e32 v0, v0, v2 1326; VI-NEXT: s_setpc_b64 s[30:31] 1327; 1328; GFX11-LABEL: mul_select_negfabs_posk_f16: 1329; GFX11: ; %bb.0: 1330; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1331; GFX11-NEXT: v_or_b32_e32 v1, 0x8000, v1 1332; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 1333; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 1334; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x4400, v1, vcc_lo 1335; GFX11-NEXT: v_mul_f16_e32 v0, v0, v2 1336; GFX11-NEXT: s_setpc_b64 s[30:31] 1337 %cmp = icmp eq i32 %c, 0 1338 %fabs.x = call half @llvm.fabs.f16(half %x) 1339 %fneg.fabs.x = fsub half -0.000000e+00, %fabs.x 1340 %select = select i1 %cmp, half %fneg.fabs.x, half 4.0 1341 %add = fmul half %select, %y 1342 ret half %add 1343} 1344 1345define half @mul_select_posk_negfabs_f16(i32 %c, half %x, half %y) { 1346; CI-LABEL: mul_select_posk_negfabs_f16: 1347; CI: ; %bb.0: 1348; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1349; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 1350; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 1351; CI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 1352; CI-NEXT: v_cvt_f32_f16_e64 v1, -|v1| 1353; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 1354; CI-NEXT: v_cndmask_b32_e32 v0, 4.0, v1, vcc 1355; CI-NEXT: v_mul_f32_e32 v0, v0, v2 1356; CI-NEXT: s_setpc_b64 s[30:31] 1357; 1358; VI-LABEL: mul_select_posk_negfabs_f16: 1359; VI: ; %bb.0: 1360; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1361; VI-NEXT: v_or_b32_e32 v1, 0x8000, v1 1362; VI-NEXT: v_mov_b32_e32 v3, 0x4400 1363; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 1364; VI-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc 1365; VI-NEXT: v_mul_f16_e32 v0, v0, v2 1366; VI-NEXT: s_setpc_b64 s[30:31] 1367; 1368; GFX11-LABEL: mul_select_posk_negfabs_f16: 1369; GFX11: ; %bb.0: 1370; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1371; GFX11-NEXT: v_or_b32_e32 v1, 0x8000, v1 1372; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 1373; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 1374; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x4400, v1, vcc_lo 1375; GFX11-NEXT: v_mul_f16_e32 v0, v0, v2 1376; GFX11-NEXT: s_setpc_b64 s[30:31] 1377 %cmp = icmp eq i32 %c, 0 1378 %fabs.x = call half @llvm.fabs.f16(half %x) 1379 %fneg.fabs.x = fsub half -0.000000e+00, %fabs.x 1380 %select = select i1 %cmp, half 4.0, half %fneg.fabs.x 1381 %add = fmul half %select, %y 1382 ret half %add 1383} 1384 1385define half @mul_select_negfabs_negk_f16(i32 %c, half %x, half %y) { 1386; CI-LABEL: mul_select_negfabs_negk_f16: 1387; CI: ; %bb.0: 1388; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1389; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 1390; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 1391; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1392; CI-NEXT: v_cvt_f32_f16_e64 v1, -|v1| 1393; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 1394; CI-NEXT: v_cndmask_b32_e32 v0, -4.0, v1, vcc 1395; CI-NEXT: v_mul_f32_e32 v0, v0, v2 1396; CI-NEXT: s_setpc_b64 s[30:31] 1397; 1398; VI-LABEL: mul_select_negfabs_negk_f16: 1399; VI: ; %bb.0: 1400; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1401; VI-NEXT: v_or_b32_e32 v1, 0x8000, v1 1402; VI-NEXT: v_mov_b32_e32 v3, 0xc400 1403; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1404; VI-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc 1405; VI-NEXT: v_mul_f16_e32 v0, v0, v2 1406; VI-NEXT: s_setpc_b64 s[30:31] 1407; 1408; GFX11-LABEL: mul_select_negfabs_negk_f16: 1409; GFX11: ; %bb.0: 1410; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1411; GFX11-NEXT: v_or_b32_e32 v1, 0x8000, v1 1412; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 1413; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 1414; GFX11-NEXT: v_cndmask_b32_e32 v0, 0xc400, v1, vcc_lo 1415; GFX11-NEXT: v_mul_f16_e32 v0, v0, v2 1416; GFX11-NEXT: s_setpc_b64 s[30:31] 1417 %cmp = icmp eq i32 %c, 0 1418 %fabs.x = call half @llvm.fabs.f16(half %x) 1419 %fneg.fabs.x = fsub half -0.000000e+00, %fabs.x 1420 %select = select i1 %cmp, half %fneg.fabs.x, half -4.0 1421 %add = fmul half %select, %y 1422 ret half %add 1423} 1424 1425define half @mul_select_negk_negfabs_f16(i32 %c, half %x, half %y) { 1426; CI-LABEL: mul_select_negk_negfabs_f16: 1427; CI: ; %bb.0: 1428; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1429; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 1430; CI-NEXT: v_cvt_f16_f32_e32 v2, v2 1431; CI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 1432; CI-NEXT: v_cvt_f32_f16_e64 v1, -|v1| 1433; CI-NEXT: v_cvt_f32_f16_e32 v2, v2 1434; CI-NEXT: v_cndmask_b32_e32 v0, -4.0, v1, vcc 1435; CI-NEXT: v_mul_f32_e32 v0, v0, v2 1436; CI-NEXT: s_setpc_b64 s[30:31] 1437; 1438; VI-LABEL: mul_select_negk_negfabs_f16: 1439; VI: ; %bb.0: 1440; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1441; VI-NEXT: v_or_b32_e32 v1, 0x8000, v1 1442; VI-NEXT: v_mov_b32_e32 v3, 0xc400 1443; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 1444; VI-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc 1445; VI-NEXT: v_mul_f16_e32 v0, v0, v2 1446; VI-NEXT: s_setpc_b64 s[30:31] 1447; 1448; GFX11-LABEL: mul_select_negk_negfabs_f16: 1449; GFX11: ; %bb.0: 1450; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1451; GFX11-NEXT: v_or_b32_e32 v1, 0x8000, v1 1452; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 1453; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 1454; GFX11-NEXT: v_cndmask_b32_e32 v0, 0xc400, v1, vcc_lo 1455; GFX11-NEXT: v_mul_f16_e32 v0, v0, v2 1456; GFX11-NEXT: s_setpc_b64 s[30:31] 1457 %cmp = icmp eq i32 %c, 0 1458 %fabs.x = call half @llvm.fabs.f16(half %x) 1459 %fneg.fabs.x = fsub half -0.000000e+00, %fabs.x 1460 %select = select i1 %cmp, half -4.0, half %fneg.fabs.x 1461 %add = fmul half %select, %y 1462 ret half %add 1463} 1464 1465; -------------------------------------------------------------------------------- 1466; Don't fold if fneg can fold into the source 1467; -------------------------------------------------------------------------------- 1468 1469define half @select_fneg_posk_src_add_f16(i32 %c, half %x, half %y) { 1470; CI-SAFE-LABEL: select_fneg_posk_src_add_f16: 1471; CI-SAFE: ; %bb.0: 1472; CI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1473; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1 1474; CI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1475; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1 1476; CI-SAFE-NEXT: v_add_f32_e32 v1, 4.0, v1 1477; CI-SAFE-NEXT: v_cndmask_b32_e64 v0, 2.0, -v1, vcc 1478; CI-SAFE-NEXT: s_setpc_b64 s[30:31] 1479; 1480; VI-SAFE-LABEL: select_fneg_posk_src_add_f16: 1481; VI-SAFE: ; %bb.0: 1482; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1483; VI-SAFE-NEXT: v_add_f16_e32 v1, 4.0, v1 1484; VI-SAFE-NEXT: v_xor_b32_e32 v1, 0x8000, v1 1485; VI-SAFE-NEXT: v_mov_b32_e32 v2, 0x4000 1486; VI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1487; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 1488; VI-SAFE-NEXT: s_setpc_b64 s[30:31] 1489; 1490; GFX11-SAFE-LABEL: select_fneg_posk_src_add_f16: 1491; GFX11-SAFE: ; %bb.0: 1492; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1493; GFX11-SAFE-NEXT: v_add_f16_e32 v1, 4.0, v1 1494; GFX11-SAFE-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 1495; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 1496; GFX11-SAFE-NEXT: v_xor_b32_e32 v1, 0x8000, v1 1497; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v0, 0x4000, v1, vcc_lo 1498; GFX11-SAFE-NEXT: s_setpc_b64 s[30:31] 1499; 1500; CI-NSZ-LABEL: select_fneg_posk_src_add_f16: 1501; CI-NSZ: ; %bb.0: 1502; CI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1503; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v1, v1 1504; CI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1505; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v1, v1 1506; CI-NSZ-NEXT: v_sub_f32_e32 v1, -4.0, v1 1507; CI-NSZ-NEXT: v_cndmask_b32_e32 v0, 2.0, v1, vcc 1508; CI-NSZ-NEXT: s_setpc_b64 s[30:31] 1509; 1510; VI-NSZ-LABEL: select_fneg_posk_src_add_f16: 1511; VI-NSZ: ; %bb.0: 1512; VI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1513; VI-NSZ-NEXT: v_sub_f16_e32 v1, -4.0, v1 1514; VI-NSZ-NEXT: v_mov_b32_e32 v2, 0x4000 1515; VI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1516; VI-NSZ-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 1517; VI-NSZ-NEXT: s_setpc_b64 s[30:31] 1518; 1519; GFX11-NSZ-LABEL: select_fneg_posk_src_add_f16: 1520; GFX11-NSZ: ; %bb.0: 1521; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1522; GFX11-NSZ-NEXT: v_sub_f16_e32 v1, -4.0, v1 1523; GFX11-NSZ-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 1524; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_2) 1525; GFX11-NSZ-NEXT: v_cndmask_b32_e32 v0, 0x4000, v1, vcc_lo 1526; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31] 1527 %cmp = icmp eq i32 %c, 0 1528 %add = fadd half %x, 4.0 1529 %fneg = fneg half %add 1530 %select = select i1 %cmp, half %fneg, half 2.0 1531 ret half %select 1532} 1533 1534define half @select_fneg_posk_src_sub_f16(i32 %c, half %x) { 1535; CI-SAFE-LABEL: select_fneg_posk_src_sub_f16: 1536; CI-SAFE: ; %bb.0: 1537; CI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1538; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1 1539; CI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1540; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1 1541; CI-SAFE-NEXT: v_add_f32_e32 v1, -4.0, v1 1542; CI-SAFE-NEXT: v_cndmask_b32_e64 v0, 2.0, -v1, vcc 1543; CI-SAFE-NEXT: s_setpc_b64 s[30:31] 1544; 1545; VI-SAFE-LABEL: select_fneg_posk_src_sub_f16: 1546; VI-SAFE: ; %bb.0: 1547; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1548; VI-SAFE-NEXT: v_add_f16_e32 v1, -4.0, v1 1549; VI-SAFE-NEXT: v_xor_b32_e32 v1, 0x8000, v1 1550; VI-SAFE-NEXT: v_mov_b32_e32 v2, 0x4000 1551; VI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1552; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 1553; VI-SAFE-NEXT: s_setpc_b64 s[30:31] 1554; 1555; GFX11-SAFE-LABEL: select_fneg_posk_src_sub_f16: 1556; GFX11-SAFE: ; %bb.0: 1557; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1558; GFX11-SAFE-NEXT: v_add_f16_e32 v1, -4.0, v1 1559; GFX11-SAFE-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 1560; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 1561; GFX11-SAFE-NEXT: v_xor_b32_e32 v1, 0x8000, v1 1562; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v0, 0x4000, v1, vcc_lo 1563; GFX11-SAFE-NEXT: s_setpc_b64 s[30:31] 1564; 1565; CI-NSZ-LABEL: select_fneg_posk_src_sub_f16: 1566; CI-NSZ: ; %bb.0: 1567; CI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1568; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v1, v1 1569; CI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1570; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v1, v1 1571; CI-NSZ-NEXT: v_sub_f32_e32 v1, 4.0, v1 1572; CI-NSZ-NEXT: v_cndmask_b32_e32 v0, 2.0, v1, vcc 1573; CI-NSZ-NEXT: s_setpc_b64 s[30:31] 1574; 1575; VI-NSZ-LABEL: select_fneg_posk_src_sub_f16: 1576; VI-NSZ: ; %bb.0: 1577; VI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1578; VI-NSZ-NEXT: v_sub_f16_e32 v1, 4.0, v1 1579; VI-NSZ-NEXT: v_mov_b32_e32 v2, 0x4000 1580; VI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1581; VI-NSZ-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 1582; VI-NSZ-NEXT: s_setpc_b64 s[30:31] 1583; 1584; GFX11-NSZ-LABEL: select_fneg_posk_src_sub_f16: 1585; GFX11-NSZ: ; %bb.0: 1586; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1587; GFX11-NSZ-NEXT: v_sub_f16_e32 v1, 4.0, v1 1588; GFX11-NSZ-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 1589; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_2) 1590; GFX11-NSZ-NEXT: v_cndmask_b32_e32 v0, 0x4000, v1, vcc_lo 1591; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31] 1592 %cmp = icmp eq i32 %c, 0 1593 %add = fsub half %x, 4.0 1594 %fneg = fneg half %add 1595 %select = select i1 %cmp, half %fneg, half 2.0 1596 ret half %select 1597} 1598 1599define half @select_fneg_posk_src_mul_f16(i32 %c, half %x) { 1600; CI-LABEL: select_fneg_posk_src_mul_f16: 1601; CI: ; %bb.0: 1602; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1603; CI-NEXT: v_cvt_f16_f32_e32 v1, v1 1604; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1605; CI-NEXT: v_cvt_f32_f16_e32 v1, v1 1606; CI-NEXT: v_mul_f32_e32 v1, -4.0, v1 1607; CI-NEXT: v_cndmask_b32_e32 v0, 2.0, v1, vcc 1608; CI-NEXT: s_setpc_b64 s[30:31] 1609; 1610; VI-LABEL: select_fneg_posk_src_mul_f16: 1611; VI: ; %bb.0: 1612; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1613; VI-NEXT: v_mul_f16_e32 v1, -4.0, v1 1614; VI-NEXT: v_mov_b32_e32 v2, 0x4000 1615; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1616; VI-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 1617; VI-NEXT: s_setpc_b64 s[30:31] 1618; 1619; GFX11-LABEL: select_fneg_posk_src_mul_f16: 1620; GFX11: ; %bb.0: 1621; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1622; GFX11-NEXT: v_mul_f16_e32 v1, -4.0, v1 1623; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 1624; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 1625; GFX11-NEXT: v_cndmask_b32_e32 v0, 0x4000, v1, vcc_lo 1626; GFX11-NEXT: s_setpc_b64 s[30:31] 1627 %cmp = icmp eq i32 %c, 0 1628 %mul = fmul half %x, 4.0 1629 %fneg = fneg half %mul 1630 %select = select i1 %cmp, half %fneg, half 2.0 1631 ret half %select 1632} 1633 1634define half @select_fneg_posk_src_fma_f16(i32 %c, half %x, half %z) { 1635; CI-SAFE-LABEL: select_fneg_posk_src_fma_f16: 1636; CI-SAFE: ; %bb.0: 1637; CI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1638; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2 1639; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1 1640; CI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1641; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2 1642; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1 1643; CI-SAFE-NEXT: v_fma_f32 v1, v1, 4.0, v2 1644; CI-SAFE-NEXT: v_cndmask_b32_e64 v0, 2.0, -v1, vcc 1645; CI-SAFE-NEXT: s_setpc_b64 s[30:31] 1646; 1647; VI-SAFE-LABEL: select_fneg_posk_src_fma_f16: 1648; VI-SAFE: ; %bb.0: 1649; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1650; VI-SAFE-NEXT: v_fma_f16 v1, v1, 4.0, v2 1651; VI-SAFE-NEXT: v_xor_b32_e32 v1, 0x8000, v1 1652; VI-SAFE-NEXT: v_mov_b32_e32 v2, 0x4000 1653; VI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1654; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 1655; VI-SAFE-NEXT: s_setpc_b64 s[30:31] 1656; 1657; GFX11-SAFE-LABEL: select_fneg_posk_src_fma_f16: 1658; GFX11-SAFE: ; %bb.0: 1659; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1660; GFX11-SAFE-NEXT: v_fmac_f16_e32 v2, 4.0, v1 1661; GFX11-SAFE-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 1662; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 1663; GFX11-SAFE-NEXT: v_xor_b32_e32 v1, 0x8000, v2 1664; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v0, 0x4000, v1, vcc_lo 1665; GFX11-SAFE-NEXT: s_setpc_b64 s[30:31] 1666; 1667; CI-NSZ-LABEL: select_fneg_posk_src_fma_f16: 1668; CI-NSZ: ; %bb.0: 1669; CI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1670; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v2, v2 1671; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v1, v1 1672; CI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1673; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v2, v2 1674; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v1, v1 1675; CI-NSZ-NEXT: v_fma_f32 v1, v1, -4.0, -v2 1676; CI-NSZ-NEXT: v_cndmask_b32_e32 v0, 2.0, v1, vcc 1677; CI-NSZ-NEXT: s_setpc_b64 s[30:31] 1678; 1679; VI-NSZ-LABEL: select_fneg_posk_src_fma_f16: 1680; VI-NSZ: ; %bb.0: 1681; VI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1682; VI-NSZ-NEXT: v_fma_f16 v1, v1, -4.0, -v2 1683; VI-NSZ-NEXT: v_mov_b32_e32 v2, 0x4000 1684; VI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1685; VI-NSZ-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 1686; VI-NSZ-NEXT: s_setpc_b64 s[30:31] 1687; 1688; GFX11-NSZ-LABEL: select_fneg_posk_src_fma_f16: 1689; GFX11-NSZ: ; %bb.0: 1690; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1691; GFX11-NSZ-NEXT: v_fma_f16 v1, v1, -4.0, -v2 1692; GFX11-NSZ-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 1693; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_2) 1694; GFX11-NSZ-NEXT: v_cndmask_b32_e32 v0, 0x4000, v1, vcc_lo 1695; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31] 1696 %cmp = icmp eq i32 %c, 0 1697 %fma = call half @llvm.fma.f16(half %x, half 4.0, half %z) 1698 %fneg = fneg half %fma 1699 %select = select i1 %cmp, half %fneg, half 2.0 1700 ret half %select 1701} 1702 1703define half @select_fneg_posk_src_fmad_f16(i32 %c, half %x, half %z) { 1704; CI-SAFE-LABEL: select_fneg_posk_src_fmad_f16: 1705; CI-SAFE: ; %bb.0: 1706; CI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1707; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v1, v1 1708; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2 1709; CI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1710; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v1, v1 1711; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2 1712; CI-SAFE-NEXT: v_mul_f32_e32 v1, 4.0, v1 1713; CI-SAFE-NEXT: v_add_f32_e32 v1, v1, v2 1714; CI-SAFE-NEXT: v_cndmask_b32_e64 v0, 2.0, -v1, vcc 1715; CI-SAFE-NEXT: s_setpc_b64 s[30:31] 1716; 1717; VI-SAFE-LABEL: select_fneg_posk_src_fmad_f16: 1718; VI-SAFE: ; %bb.0: 1719; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1720; VI-SAFE-NEXT: v_fma_f16 v1, v1, 4.0, v2 1721; VI-SAFE-NEXT: v_xor_b32_e32 v1, 0x8000, v1 1722; VI-SAFE-NEXT: v_mov_b32_e32 v2, 0x4000 1723; VI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1724; VI-SAFE-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 1725; VI-SAFE-NEXT: s_setpc_b64 s[30:31] 1726; 1727; GFX11-SAFE-LABEL: select_fneg_posk_src_fmad_f16: 1728; GFX11-SAFE: ; %bb.0: 1729; GFX11-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1730; GFX11-SAFE-NEXT: v_fmac_f16_e32 v2, 4.0, v1 1731; GFX11-SAFE-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 1732; GFX11-SAFE-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 1733; GFX11-SAFE-NEXT: v_xor_b32_e32 v1, 0x8000, v2 1734; GFX11-SAFE-NEXT: v_cndmask_b32_e32 v0, 0x4000, v1, vcc_lo 1735; GFX11-SAFE-NEXT: s_setpc_b64 s[30:31] 1736; 1737; CI-NSZ-LABEL: select_fneg_posk_src_fmad_f16: 1738; CI-NSZ: ; %bb.0: 1739; CI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1740; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v1, v1 1741; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v2, v2 1742; CI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1743; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v1, v1 1744; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v2, v2 1745; CI-NSZ-NEXT: v_mul_f32_e32 v1, -4.0, v1 1746; CI-NSZ-NEXT: v_sub_f32_e32 v1, v1, v2 1747; CI-NSZ-NEXT: v_cndmask_b32_e32 v0, 2.0, v1, vcc 1748; CI-NSZ-NEXT: s_setpc_b64 s[30:31] 1749; 1750; VI-NSZ-LABEL: select_fneg_posk_src_fmad_f16: 1751; VI-NSZ: ; %bb.0: 1752; VI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1753; VI-NSZ-NEXT: v_fma_f16 v1, v1, -4.0, -v2 1754; VI-NSZ-NEXT: v_mov_b32_e32 v2, 0x4000 1755; VI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 1756; VI-NSZ-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 1757; VI-NSZ-NEXT: s_setpc_b64 s[30:31] 1758; 1759; GFX11-NSZ-LABEL: select_fneg_posk_src_fmad_f16: 1760; GFX11-NSZ: ; %bb.0: 1761; GFX11-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1762; GFX11-NSZ-NEXT: v_fma_f16 v1, v1, -4.0, -v2 1763; GFX11-NSZ-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0 1764; GFX11-NSZ-NEXT: s_delay_alu instid0(VALU_DEP_2) 1765; GFX11-NSZ-NEXT: v_cndmask_b32_e32 v0, 0x4000, v1, vcc_lo 1766; GFX11-NSZ-NEXT: s_setpc_b64 s[30:31] 1767 %cmp = icmp eq i32 %c, 0 1768 %fmad = call half @llvm.fmuladd.f16(half %x, half 4.0, half %z) 1769 %fneg = fneg half %fmad 1770 %select = select i1 %cmp, half %fneg, half 2.0 1771 ret half %select 1772} 1773 1774declare half @llvm.fabs.f16(half) #0 1775declare half @llvm.fma.f16(half, half, half) #0 1776declare half @llvm.fmuladd.f16(half, half, half) #0 1777 1778attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } 1779