1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx700 < %s | FileCheck -check-prefixes=GCN,GFX7 %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s 4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 < %s | FileCheck -check-prefixes=GFX11 %s 5 6define i32 @fneg_xor_select_i32(i1 %cond, i32 %arg0, i32 %arg1) { 7; GCN-LABEL: fneg_xor_select_i32: 8; GCN: ; %bb.0: 9; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10; GCN-NEXT: v_and_b32_e32 v0, 1, v0 11; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 12; GCN-NEXT: v_cndmask_b32_e64 v0, -v2, -v1, vcc 13; GCN-NEXT: s_setpc_b64 s[30:31] 14; 15; GFX11-LABEL: fneg_xor_select_i32: 16; GFX11: ; %bb.0: 17; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 19; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 20; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 21; GFX11-NEXT: v_cndmask_b32_e64 v0, -v2, -v1, vcc_lo 22; GFX11-NEXT: s_setpc_b64 s[30:31] 23 %select = select i1 %cond, i32 %arg0, i32 %arg1 24 %fneg = xor i32 %select, -2147483648 25 ret i32 %fneg 26} 27 28define <2 x i32> @fneg_xor_select_v2i32(<2 x i1> %cond, <2 x i32> %arg0, <2 x i32> %arg1) { 29; GCN-LABEL: fneg_xor_select_v2i32: 30; GCN: ; %bb.0: 31; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 32; GCN-NEXT: v_and_b32_e32 v0, 1, v0 33; GCN-NEXT: v_and_b32_e32 v1, 1, v1 34; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 35; GCN-NEXT: v_cndmask_b32_e64 v0, -v4, -v2, vcc 36; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 37; GCN-NEXT: v_cndmask_b32_e64 v1, -v5, -v3, vcc 38; GCN-NEXT: s_setpc_b64 s[30:31] 39; 40; GFX11-LABEL: fneg_xor_select_v2i32: 41; GFX11: ; %bb.0: 42; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 43; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 44; GFX11-NEXT: v_and_b32_e32 v1, 1, v1 45; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) 46; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 47; GFX11-NEXT: v_cndmask_b32_e64 v0, -v4, -v2, vcc_lo 48; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 49; GFX11-NEXT: v_cndmask_b32_e64 v1, -v5, -v3, vcc_lo 50; GFX11-NEXT: s_setpc_b64 s[30:31] 51 %select = select <2 x i1> %cond, <2 x i32> %arg0, <2 x i32> %arg1 52 %fneg = xor <2 x i32> %select, <i32 -2147483648, i32 -2147483648> 53 ret <2 x i32> %fneg 54} 55 56define i32 @fneg_xor_select_i32_multi_use(i1 %cond, i32 %arg0, i32 %arg1, ptr addrspace(1) %ptr) { 57; GFX7-LABEL: fneg_xor_select_i32_multi_use: 58; GFX7: ; %bb.0: 59; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 60; GFX7-NEXT: v_and_b32_e32 v0, 1, v0 61; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 62; GFX7-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 63; GFX7-NEXT: flat_store_dword v[3:4], v0 64; GFX7-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 65; GFX7-NEXT: s_waitcnt vmcnt(0) 66; GFX7-NEXT: s_setpc_b64 s[30:31] 67; 68; GFX9-LABEL: fneg_xor_select_i32_multi_use: 69; GFX9: ; %bb.0: 70; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 71; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 72; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 73; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 74; GFX9-NEXT: global_store_dword v[3:4], v0, off 75; GFX9-NEXT: v_xor_b32_e32 v0, 0x80000000, v0 76; GFX9-NEXT: s_waitcnt vmcnt(0) 77; GFX9-NEXT: s_setpc_b64 s[30:31] 78; 79; GFX11-LABEL: fneg_xor_select_i32_multi_use: 80; GFX11: ; %bb.0: 81; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 82; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 83; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 84; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 85; GFX11-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc_lo 86; GFX11-NEXT: v_xor_b32_e32 v0, 0x80000000, v1 87; GFX11-NEXT: global_store_b32 v[3:4], v1, off 88; GFX11-NEXT: s_setpc_b64 s[30:31] 89 %select = select i1 %cond, i32 %arg0, i32 %arg1 90 store i32 %select, ptr addrspace(1) %ptr 91 %fneg = xor i32 %select, -2147483648 92 ret i32 %fneg 93} 94 95define i64 @fneg_xor_select_i64(i1 %cond, i64 %arg0, i64 %arg1) { 96; GCN-LABEL: fneg_xor_select_i64: 97; GCN: ; %bb.0: 98; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 99; GCN-NEXT: v_and_b32_e32 v0, 1, v0 100; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 101; GCN-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc 102; GCN-NEXT: v_cndmask_b32_e64 v1, -v4, -v2, vcc 103; GCN-NEXT: s_setpc_b64 s[30:31] 104; 105; GFX11-LABEL: fneg_xor_select_i64: 106; GFX11: ; %bb.0: 107; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 108; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 109; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 110; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 111; GFX11-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc_lo 112; GFX11-NEXT: v_cndmask_b32_e64 v1, -v4, -v2, vcc_lo 113; GFX11-NEXT: s_setpc_b64 s[30:31] 114 %select = select i1 %cond, i64 %arg0, i64 %arg1 115 %fneg = xor i64 %select, 9223372036854775808 116 ret i64 %fneg 117} 118 119define <2 x i64> @fneg_xor_select_v2i64(<2 x i1> %cond, <2 x i64> %arg0, <2 x i64> %arg1) { 120; GCN-LABEL: fneg_xor_select_v2i64: 121; GCN: ; %bb.0: 122; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 123; GCN-NEXT: v_and_b32_e32 v1, 1, v1 124; GCN-NEXT: v_and_b32_e32 v0, 1, v0 125; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 126; GCN-NEXT: v_cmp_eq_u32_e64 s[4:5], 1, v1 127; GCN-NEXT: v_cndmask_b32_e32 v0, v6, v2, vcc 128; GCN-NEXT: v_cndmask_b32_e64 v2, v8, v4, s[4:5] 129; GCN-NEXT: v_cndmask_b32_e64 v1, -v7, -v3, vcc 130; GCN-NEXT: v_cndmask_b32_e64 v3, -v9, -v5, s[4:5] 131; GCN-NEXT: s_setpc_b64 s[30:31] 132; 133; GFX11-LABEL: fneg_xor_select_v2i64: 134; GFX11: ; %bb.0: 135; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 136; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 137; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 138; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 139; GFX11-NEXT: v_dual_cndmask_b32 v0, v6, v2 :: v_dual_and_b32 v1, 1, v1 140; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 1, v1 141; GFX11-NEXT: v_cndmask_b32_e64 v1, -v7, -v3, vcc_lo 142; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 143; GFX11-NEXT: v_cndmask_b32_e64 v2, v8, v4, s0 144; GFX11-NEXT: v_cndmask_b32_e64 v3, -v9, -v5, s0 145; GFX11-NEXT: s_setpc_b64 s[30:31] 146 %select = select <2 x i1> %cond, <2 x i64> %arg0, <2 x i64> %arg1 147 %fneg = xor <2 x i64> %select, <i64 9223372036854775808, i64 9223372036854775808> 148 ret <2 x i64> %fneg 149} 150 151define i16 @fneg_xor_select_i16(i1 %cond, i16 %arg0, i16 %arg1) { 152; GCN-LABEL: fneg_xor_select_i16: 153; GCN: ; %bb.0: 154; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 155; GCN-NEXT: v_and_b32_e32 v0, 1, v0 156; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 157; GCN-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 158; GCN-NEXT: v_xor_b32_e32 v0, 0xffff8000, v0 159; GCN-NEXT: s_setpc_b64 s[30:31] 160; 161; GFX11-LABEL: fneg_xor_select_i16: 162; GFX11: ; %bb.0: 163; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 164; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 165; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 166; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 167; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo 168; GFX11-NEXT: v_xor_b32_e32 v0, 0xffff8000, v0 169; GFX11-NEXT: s_setpc_b64 s[30:31] 170 %select = select i1 %cond, i16 %arg0, i16 %arg1 171 %fneg = xor i16 %select, -32768 172 ret i16 %fneg 173} 174 175define <2 x i16> @fneg_xor_select_v2i16(<2 x i1> %cond, <2 x i16> %arg0, <2 x i16> %arg1) { 176; GFX7-LABEL: fneg_xor_select_v2i16: 177; GFX7: ; %bb.0: 178; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 179; GFX7-NEXT: v_and_b32_e32 v0, 1, v0 180; GFX7-NEXT: v_and_b32_e32 v1, 1, v1 181; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 182; GFX7-NEXT: v_cndmask_b32_e32 v0, v4, v2, vcc 183; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 184; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v3, vcc 185; GFX7-NEXT: v_xor_b32_e32 v1, 0x8000, v1 186; GFX7-NEXT: v_xor_b32_e32 v0, 0x8000, v0 187; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v1 188; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0 189; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 190; GFX7-NEXT: v_and_b32_e32 v1, 0xffff, v1 191; GFX7-NEXT: s_setpc_b64 s[30:31] 192; 193; GFX9-LABEL: fneg_xor_select_v2i16: 194; GFX9: ; %bb.0: 195; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 196; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 197; GFX9-NEXT: v_and_b32_e32 v1, 1, v1 198; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 199; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc 200; GFX9-NEXT: v_lshrrev_b32_e32 v2, 16, v2 201; GFX9-NEXT: v_lshrrev_b32_e32 v3, 16, v3 202; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 203; GFX9-NEXT: v_cndmask_b32_e32 v1, v3, v2, vcc 204; GFX9-NEXT: s_mov_b32 s4, 0x5040100 205; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4 206; GFX9-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 207; GFX9-NEXT: s_setpc_b64 s[30:31] 208; 209; GFX11-LABEL: fneg_xor_select_v2i16: 210; GFX11: ; %bb.0: 211; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 212; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 213; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v2 214; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v3 215; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1) 216; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 217; GFX11-NEXT: v_dual_cndmask_b32 v0, v3, v2 :: v_dual_and_b32 v1, 1, v1 218; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 219; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1) 220; GFX11-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc_lo 221; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 222; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 223; GFX11-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 224; GFX11-NEXT: s_setpc_b64 s[30:31] 225 %select = select <2 x i1> %cond, <2 x i16> %arg0, <2 x i16> %arg1 226 %fneg = xor <2 x i16> %select, <i16 -32768, i16 -32768> 227 ret <2 x i16> %fneg 228} 229 230define i16 @fneg_xor_select_i16_multi_use(i1 %cond, i16 %arg0, i16 %arg1, ptr addrspace(1) %ptr) { 231; GFX7-LABEL: fneg_xor_select_i16_multi_use: 232; GFX7: ; %bb.0: 233; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 234; GFX7-NEXT: v_and_b32_e32 v0, 1, v0 235; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 236; GFX7-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 237; GFX7-NEXT: flat_store_short v[3:4], v0 238; GFX7-NEXT: v_xor_b32_e32 v0, 0xffff8000, v0 239; GFX7-NEXT: s_waitcnt vmcnt(0) 240; GFX7-NEXT: s_setpc_b64 s[30:31] 241; 242; GFX9-LABEL: fneg_xor_select_i16_multi_use: 243; GFX9: ; %bb.0: 244; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 245; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 246; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 247; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc 248; GFX9-NEXT: global_store_short v[3:4], v0, off 249; GFX9-NEXT: v_xor_b32_e32 v0, 0xffff8000, v0 250; GFX9-NEXT: s_waitcnt vmcnt(0) 251; GFX9-NEXT: s_setpc_b64 s[30:31] 252; 253; GFX11-LABEL: fneg_xor_select_i16_multi_use: 254; GFX11: ; %bb.0: 255; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 256; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 257; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 258; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 259; GFX11-NEXT: v_cndmask_b32_e32 v1, v2, v1, vcc_lo 260; GFX11-NEXT: v_xor_b32_e32 v0, 0xffff8000, v1 261; GFX11-NEXT: global_store_b16 v[3:4], v1, off 262; GFX11-NEXT: s_setpc_b64 s[30:31] 263 %select = select i1 %cond, i16 %arg0, i16 %arg1 264 store i16 %select, ptr addrspace(1) %ptr 265 %fneg = xor i16 %select, -32768 266 ret i16 %fneg 267} 268 269define i64 @fneg_xor_select_i64_multi_user(i1 %cond, i64 %arg0, i64 %arg1, ptr addrspace(1) %ptr) { 270; GFX7-LABEL: fneg_xor_select_i64_multi_user: 271; GFX7: ; %bb.0: 272; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 273; GFX7-NEXT: v_and_b32_e32 v0, 1, v0 274; GFX7-NEXT: v_mov_b32_e32 v7, v1 275; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 276; GFX7-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc 277; GFX7-NEXT: v_cndmask_b32_e32 v0, v3, v7, vcc 278; GFX7-NEXT: flat_store_dwordx2 v[5:6], v[0:1] 279; GFX7-NEXT: v_cndmask_b32_e64 v1, -v4, -v2, vcc 280; GFX7-NEXT: s_waitcnt vmcnt(0) 281; GFX7-NEXT: s_setpc_b64 s[30:31] 282; 283; GFX9-LABEL: fneg_xor_select_i64_multi_user: 284; GFX9: ; %bb.0: 285; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 286; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 287; GFX9-NEXT: v_mov_b32_e32 v7, v1 288; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 289; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc 290; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v7, vcc 291; GFX9-NEXT: global_store_dwordx2 v[5:6], v[0:1], off 292; GFX9-NEXT: v_cndmask_b32_e64 v1, -v4, -v2, vcc 293; GFX9-NEXT: s_waitcnt vmcnt(0) 294; GFX9-NEXT: s_setpc_b64 s[30:31] 295; 296; GFX11-LABEL: fneg_xor_select_i64_multi_user: 297; GFX11: ; %bb.0: 298; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 299; GFX11-NEXT: v_dual_mov_b32 v7, v1 :: v_dual_and_b32 v0, 1, v0 300; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 301; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 302; GFX11-NEXT: v_dual_cndmask_b32 v1, v4, v2 :: v_dual_cndmask_b32 v0, v3, v7 303; GFX11-NEXT: v_cndmask_b32_e64 v2, -v4, -v2, vcc_lo 304; GFX11-NEXT: global_store_b64 v[5:6], v[0:1], off 305; GFX11-NEXT: v_mov_b32_e32 v1, v2 306; GFX11-NEXT: s_setpc_b64 s[30:31] 307 %select = select i1 %cond, i64 %arg0, i64 %arg1 308 store i64 %select, ptr addrspace(1) %ptr 309 %fneg = xor i64 %select, 9223372036854775808 310 ret i64 %fneg 311} 312 313define i32 @select_fneg_xor_select_i32(i1 %cond0, i1 %cond1, i32 %arg0, i32 %arg1) { 314; GCN-LABEL: select_fneg_xor_select_i32: 315; GCN: ; %bb.0: 316; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 317; GCN-NEXT: v_and_b32_e32 v0, 1, v0 318; GCN-NEXT: v_xor_b32_e32 v2, 0x80000000, v2 319; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 320; GCN-NEXT: v_and_b32_e32 v1, 1, v1 321; GCN-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc 322; GCN-NEXT: v_xor_b32_e32 v2, 0x80000000, v0 323; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 324; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 325; GCN-NEXT: s_setpc_b64 s[30:31] 326; 327; GFX11-LABEL: select_fneg_xor_select_i32: 328; GFX11: ; %bb.0: 329; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 330; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 331; GFX11-NEXT: v_xor_b32_e32 v2, 0x80000000, v2 332; GFX11-NEXT: v_and_b32_e32 v1, 1, v1 333; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 334; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 335; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc_lo 336; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 337; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 338; GFX11-NEXT: v_xor_b32_e32 v2, 0x80000000, v0 339; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 340; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 341; GFX11-NEXT: s_setpc_b64 s[30:31] 342 %fneg0 = xor i32 %arg0, -2147483648 343 %select0 = select i1 %cond0, i32 %arg1, i32 %fneg0 344 %fneg1 = xor i32 %select0, -2147483648 345 %select1 = select i1 %cond1, i32 %fneg1, i32 %select0 346 ret i32 %select1 347} 348 349define float @select_fneg_select_f32(i1 %cond0, i1 %cond1, float %arg0, float %arg1) { 350; GCN-LABEL: select_fneg_select_f32: 351; GCN: ; %bb.0: 352; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 353; GCN-NEXT: v_and_b32_e32 v0, 1, v0 354; GCN-NEXT: v_and_b32_e32 v1, 1, v1 355; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 356; GCN-NEXT: v_cndmask_b32_e64 v0, -v2, v3, vcc 357; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 358; GCN-NEXT: v_cndmask_b32_e64 v0, v0, -v0, vcc 359; GCN-NEXT: s_setpc_b64 s[30:31] 360; 361; GFX11-LABEL: select_fneg_select_f32: 362; GFX11: ; %bb.0: 363; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 364; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 365; GFX11-NEXT: v_and_b32_e32 v1, 1, v1 366; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) 367; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 368; GFX11-NEXT: v_cndmask_b32_e64 v0, -v2, v3, vcc_lo 369; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 370; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 371; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, -v0, vcc_lo 372; GFX11-NEXT: s_setpc_b64 s[30:31] 373 %fneg0 = fneg float %arg0 374 %select0 = select i1 %cond0, float %arg1, float %fneg0 375 %fneg1 = fneg float %select0 376 %select1 = select i1 %cond1, float %fneg1, float %select0 377 ret float %select1 378} 379 380define double @fneg_xor_select_f64(i1 %cond, double %arg0, double %arg1) { 381; GCN-LABEL: fneg_xor_select_f64: 382; GCN: ; %bb.0: 383; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 384; GCN-NEXT: v_and_b32_e32 v0, 1, v0 385; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 386; GCN-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc 387; GCN-NEXT: v_cndmask_b32_e64 v1, -v4, -v2, vcc 388; GCN-NEXT: s_setpc_b64 s[30:31] 389; 390; GFX11-LABEL: fneg_xor_select_f64: 391; GFX11: ; %bb.0: 392; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 393; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 394; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 395; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 396; GFX11-NEXT: v_cndmask_b32_e32 v0, v3, v1, vcc_lo 397; GFX11-NEXT: v_cndmask_b32_e64 v1, -v4, -v2, vcc_lo 398; GFX11-NEXT: s_setpc_b64 s[30:31] 399 %select = select i1 %cond, double %arg0, double %arg1 400 %fneg = fneg double %select 401 ret double %fneg 402} 403 404define double @fneg_xor_select_f64_multi_user(i1 %cond, double %arg0, double %arg1, ptr addrspace(1) %ptr) { 405; GFX7-LABEL: fneg_xor_select_f64_multi_user: 406; GFX7: ; %bb.0: 407; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 408; GFX7-NEXT: v_and_b32_e32 v0, 1, v0 409; GFX7-NEXT: v_mov_b32_e32 v7, v1 410; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 411; GFX7-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc 412; GFX7-NEXT: v_cndmask_b32_e32 v0, v3, v7, vcc 413; GFX7-NEXT: flat_store_dwordx2 v[5:6], v[0:1] 414; GFX7-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 415; GFX7-NEXT: s_waitcnt vmcnt(0) 416; GFX7-NEXT: s_setpc_b64 s[30:31] 417; 418; GFX9-LABEL: fneg_xor_select_f64_multi_user: 419; GFX9: ; %bb.0: 420; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 421; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 422; GFX9-NEXT: v_mov_b32_e32 v7, v1 423; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 424; GFX9-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc 425; GFX9-NEXT: v_cndmask_b32_e32 v0, v3, v7, vcc 426; GFX9-NEXT: global_store_dwordx2 v[5:6], v[0:1], off 427; GFX9-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 428; GFX9-NEXT: s_waitcnt vmcnt(0) 429; GFX9-NEXT: s_setpc_b64 s[30:31] 430; 431; GFX11-LABEL: fneg_xor_select_f64_multi_user: 432; GFX11: ; %bb.0: 433; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 434; GFX11-NEXT: v_dual_mov_b32 v7, v1 :: v_dual_and_b32 v0, 1, v0 435; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 436; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 437; GFX11-NEXT: v_dual_cndmask_b32 v1, v4, v2 :: v_dual_cndmask_b32 v0, v3, v7 438; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 439; GFX11-NEXT: v_xor_b32_e32 v2, 0x80000000, v1 440; GFX11-NEXT: global_store_b64 v[5:6], v[0:1], off 441; GFX11-NEXT: v_mov_b32_e32 v1, v2 442; GFX11-NEXT: s_setpc_b64 s[30:31] 443 %select = select i1 %cond, double %arg0, double %arg1 444 store double %select, ptr addrspace(1) %ptr 445 %fneg = fneg double %select 446 ret double %fneg 447} 448 449define double @fneg_xor_select_i64_user_with_srcmods(i1 %cond, i64 %arg0, i64 %arg1) { 450; GCN-LABEL: fneg_xor_select_i64_user_with_srcmods: 451; GCN: ; %bb.0: 452; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 453; GCN-NEXT: v_and_b32_e32 v0, 1, v0 454; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 455; GCN-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc 456; GCN-NEXT: v_cndmask_b32_e32 v1, v3, v1, vcc 457; GCN-NEXT: v_add_f64 v[0:1], -v[1:2], 2.0 458; GCN-NEXT: s_setpc_b64 s[30:31] 459; 460; GFX11-LABEL: fneg_xor_select_i64_user_with_srcmods: 461; GFX11: ; %bb.0: 462; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 463; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 464; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 465; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 466; GFX11-NEXT: v_dual_cndmask_b32 v1, v3, v1 :: v_dual_cndmask_b32 v2, v4, v2 467; GFX11-NEXT: v_add_f64 v[0:1], -v[1:2], 2.0 468; GFX11-NEXT: s_setpc_b64 s[30:31] 469 %select = select i1 %cond, i64 %arg0, i64 %arg1 470 %fneg = xor i64 %select, 9223372036854775808 471 %cast = bitcast i64 %fneg to double 472 %add = fadd double %cast, 2.0 473 ret double %add 474} 475 476define double @select_fneg_select_fneg_f64(i1 %cond0, i1 %cond1, double %arg0, double %arg1) { 477; GCN-LABEL: select_fneg_select_fneg_f64: 478; GCN: ; %bb.0: 479; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 480; GCN-NEXT: v_and_b32_e32 v0, 1, v0 481; GCN-NEXT: v_xor_b32_e32 v3, 0x80000000, v3 482; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 483; GCN-NEXT: v_and_b32_e32 v1, 1, v1 484; GCN-NEXT: v_cndmask_b32_e32 v0, v2, v4, vcc 485; GCN-NEXT: v_cndmask_b32_e32 v2, v3, v5, vcc 486; GCN-NEXT: v_xor_b32_e32 v3, 0x80000000, v2 487; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 488; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc 489; GCN-NEXT: s_setpc_b64 s[30:31] 490; 491; GFX11-LABEL: select_fneg_select_fneg_f64: 492; GFX11: ; %bb.0: 493; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 494; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 495; GFX11-NEXT: v_xor_b32_e32 v3, 0x80000000, v3 496; GFX11-NEXT: v_and_b32_e32 v1, 1, v1 497; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4) 498; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 499; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v4, vcc_lo 500; GFX11-NEXT: v_cndmask_b32_e32 v2, v3, v5, vcc_lo 501; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) 502; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 503; GFX11-NEXT: v_xor_b32_e32 v3, 0x80000000, v2 504; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 505; GFX11-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo 506; GFX11-NEXT: s_setpc_b64 s[30:31] 507 %fneg0 = fneg double %arg0 508 %select0 = select i1 %cond0, double %arg1, double %fneg0 509 %fneg1 = fneg double %select0 510 %select1 = select i1 %cond1, double %fneg1, double %select0 511 ret double %select1 512} 513 514define i64 @select_fneg_xor_select_i64(i1 %cond0, i1 %cond1, i64 %arg0, i64 %arg1) { 515; GCN-LABEL: select_fneg_xor_select_i64: 516; GCN: ; %bb.0: 517; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 518; GCN-NEXT: v_and_b32_e32 v0, 1, v0 519; GCN-NEXT: v_xor_b32_e32 v3, 0x80000000, v3 520; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 521; GCN-NEXT: v_and_b32_e32 v1, 1, v1 522; GCN-NEXT: v_cndmask_b32_e32 v0, v2, v4, vcc 523; GCN-NEXT: v_cndmask_b32_e32 v2, v3, v5, vcc 524; GCN-NEXT: v_xor_b32_e32 v3, 0x80000000, v2 525; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 526; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc 527; GCN-NEXT: s_setpc_b64 s[30:31] 528; 529; GFX11-LABEL: select_fneg_xor_select_i64: 530; GFX11: ; %bb.0: 531; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 532; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 533; GFX11-NEXT: v_xor_b32_e32 v3, 0x80000000, v3 534; GFX11-NEXT: v_and_b32_e32 v1, 1, v1 535; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4) 536; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 537; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v4, vcc_lo 538; GFX11-NEXT: v_cndmask_b32_e32 v2, v3, v5, vcc_lo 539; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) 540; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 541; GFX11-NEXT: v_xor_b32_e32 v3, 0x80000000, v2 542; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 543; GFX11-NEXT: v_cndmask_b32_e32 v1, v2, v3, vcc_lo 544; GFX11-NEXT: s_setpc_b64 s[30:31] 545 %fneg0 = xor i64 %arg0, 9223372036854775808 546 %select0 = select i1 %cond0, i64 %arg1, i64 %fneg0 547 %fneg1 = xor i64 %select0, 9223372036854775808 548 %select1 = select i1 %cond1, i64 %fneg1, i64 %select0 549 ret i64 %select1 550} 551 552define half @select_fneg_select_f16(i1 %cond0, i1 %cond1, half %arg0, half %arg1) { 553; GFX7-LABEL: select_fneg_select_f16: 554; GFX7: ; %bb.0: 555; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 556; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 557; GFX7-NEXT: v_cvt_f16_f32_e64 v2, -v2 558; GFX7-NEXT: v_and_b32_e32 v0, 1, v0 559; GFX7-NEXT: v_and_b32_e32 v1, 1, v1 560; GFX7-NEXT: v_cvt_f32_f16_e32 v3, v3 561; GFX7-NEXT: v_cvt_f32_f16_e32 v2, v2 562; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 563; GFX7-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc 564; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 565; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, -v0, vcc 566; GFX7-NEXT: s_setpc_b64 s[30:31] 567; 568; GFX9-LABEL: select_fneg_select_f16: 569; GFX9: ; %bb.0: 570; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 571; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 572; GFX9-NEXT: v_xor_b32_e32 v2, 0x8000, v2 573; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 574; GFX9-NEXT: v_and_b32_e32 v1, 1, v1 575; GFX9-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc 576; GFX9-NEXT: v_xor_b32_e32 v2, 0x8000, v0 577; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 578; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 579; GFX9-NEXT: s_setpc_b64 s[30:31] 580; 581; GFX11-LABEL: select_fneg_select_f16: 582; GFX11: ; %bb.0: 583; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 584; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 585; GFX11-NEXT: v_xor_b32_e32 v2, 0x8000, v2 586; GFX11-NEXT: v_and_b32_e32 v1, 1, v1 587; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 588; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 589; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc_lo 590; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 591; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 592; GFX11-NEXT: v_xor_b32_e32 v2, 0x8000, v0 593; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 594; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 595; GFX11-NEXT: s_setpc_b64 s[30:31] 596 %fneg0 = fneg half %arg0 597 %select0 = select i1 %cond0, half %arg1, half %fneg0 598 %fneg1 = fneg half %select0 599 %select1 = select i1 %cond1, half %fneg1, half %select0 600 ret half %select1 601} 602 603define i16 @select_fneg_xor_select_i16(i1 %cond0, i1 %cond1, i16 %arg0, i16 %arg1) { 604; GCN-LABEL: select_fneg_xor_select_i16: 605; GCN: ; %bb.0: 606; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 607; GCN-NEXT: v_and_b32_e32 v0, 1, v0 608; GCN-NEXT: v_xor_b32_e32 v2, 0xffff8000, v2 609; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 610; GCN-NEXT: v_and_b32_e32 v1, 1, v1 611; GCN-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc 612; GCN-NEXT: v_xor_b32_e32 v2, 0xffff8000, v0 613; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 614; GCN-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 615; GCN-NEXT: s_setpc_b64 s[30:31] 616; 617; GFX11-LABEL: select_fneg_xor_select_i16: 618; GFX11: ; %bb.0: 619; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 620; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 621; GFX11-NEXT: v_xor_b32_e32 v2, 0xffff8000, v2 622; GFX11-NEXT: v_and_b32_e32 v1, 1, v1 623; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 624; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 625; GFX11-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc_lo 626; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 627; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 628; GFX11-NEXT: v_xor_b32_e32 v2, 0xffff8000, v0 629; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 630; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 631; GFX11-NEXT: s_setpc_b64 s[30:31] 632 %fneg0 = xor i16 %arg0, -32768 633 %select0 = select i1 %cond0, i16 %arg1, i16 %fneg0 634 %fneg1 = xor i16 %select0, -32768 635 %select1 = select i1 %cond1, i16 %fneg1, i16 %select0 636 ret i16 %select1 637} 638 639define <2 x half> @select_fneg_select_v2f16(<2 x i1> %cond0, <2 x i1> %cond1, <2 x half> %arg0, <2 x half> %arg1) { 640; GFX7-LABEL: select_fneg_select_v2f16: 641; GFX7: ; %bb.0: 642; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 643; GFX7-NEXT: v_cvt_f16_f32_e32 v5, v5 644; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v4 645; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v6 646; GFX7-NEXT: v_cvt_f16_f32_e32 v7, v7 647; GFX7-NEXT: v_lshlrev_b32_e32 v5, 16, v5 648; GFX7-NEXT: v_or_b32_e32 v4, v4, v5 649; GFX7-NEXT: v_xor_b32_e32 v4, 0x80008000, v4 650; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v6 651; GFX7-NEXT: v_cvt_f32_f16_e32 v6, v7 652; GFX7-NEXT: v_lshrrev_b32_e32 v7, 16, v4 653; GFX7-NEXT: v_cvt_f32_f16_e32 v7, v7 654; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 655; GFX7-NEXT: v_and_b32_e32 v1, 1, v1 656; GFX7-NEXT: v_and_b32_e32 v0, 1, v0 657; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 658; GFX7-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc 659; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 660; GFX7-NEXT: v_cvt_f16_f32_e32 v6, v1 661; GFX7-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc 662; GFX7-NEXT: v_cvt_f16_f32_e32 v4, v0 663; GFX7-NEXT: v_and_b32_e32 v2, 1, v2 664; GFX7-NEXT: v_lshlrev_b32_e32 v5, 16, v6 665; GFX7-NEXT: v_and_b32_e32 v3, 1, v3 666; GFX7-NEXT: v_or_b32_e32 v4, v4, v5 667; GFX7-NEXT: v_xor_b32_e32 v4, 0x80008000, v4 668; GFX7-NEXT: v_cvt_f32_f16_e32 v5, v4 669; GFX7-NEXT: v_lshrrev_b32_e32 v4, 16, v4 670; GFX7-NEXT: v_cvt_f32_f16_e32 v4, v4 671; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 672; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v5, vcc 673; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v3 674; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v4, vcc 675; GFX7-NEXT: s_setpc_b64 s[30:31] 676; 677; GFX9-LABEL: select_fneg_select_v2f16: 678; GFX9: ; %bb.0: 679; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 680; GFX9-NEXT: v_and_b32_e32 v1, 1, v1 681; GFX9-NEXT: v_xor_b32_e32 v4, 0x80008000, v4 682; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 683; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v5 684; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v4 685; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 686; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc 687; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 688; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc 689; GFX9-NEXT: s_mov_b32 s4, 0x5040100 690; GFX9-NEXT: v_perm_b32 v4, v1, v0, s4 691; GFX9-NEXT: v_and_b32_e32 v3, 1, v3 692; GFX9-NEXT: v_xor_b32_e32 v4, 0x80008000, v4 693; GFX9-NEXT: v_and_b32_e32 v2, 1, v2 694; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v4 695; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v3 696; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 697; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 698; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 699; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4 700; GFX9-NEXT: s_setpc_b64 s[30:31] 701; 702; GFX11-LABEL: select_fneg_select_v2f16: 703; GFX11: ; %bb.0: 704; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 705; GFX11-NEXT: v_xor_b32_e32 v4, 0x80008000, v4 706; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v5 707; GFX11-NEXT: v_and_b32_e32 v3, 1, v3 708; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1) 709; GFX11-NEXT: v_lshrrev_b32_e32 v7, 16, v4 710; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 711; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 712; GFX11-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc_lo 713; GFX11-NEXT: v_and_b32_e32 v1, 1, v1 714; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) 715; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 716; GFX11-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo 717; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3 718; GFX11-NEXT: v_perm_b32 v4, v1, v0, 0x5040100 719; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 720; GFX11-NEXT: v_xor_b32_e32 v4, 0x80008000, v4 721; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v4 722; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 723; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v5 :: v_dual_and_b32 v2, 1, v2 724; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v2 725; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 726; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 727; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 728; GFX11-NEXT: s_setpc_b64 s[30:31] 729 %fneg0 = fneg <2 x half> %arg0 730 %select0 = select <2 x i1> %cond0, <2 x half> %arg1, <2 x half> %fneg0 731 %fneg1 = fneg <2 x half> %select0 732 %select1 = select <2 x i1> %cond1, <2 x half> %fneg1, <2 x half> %select0 733 ret <2 x half> %select1 734} 735 736define <2 x i16> @select_fneg_xor_select_v2i16(<2 x i1> %cond0, <2 x i1> %cond1, <2 x i16> %arg0, <2 x i16> %arg1) { 737; GFX7-LABEL: select_fneg_xor_select_v2i16: 738; GFX7: ; %bb.0: 739; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 740; GFX7-NEXT: v_and_b32_e32 v1, 1, v1 741; GFX7-NEXT: v_and_b32_e32 v0, 1, v0 742; GFX7-NEXT: v_xor_b32_e32 v5, 0xffff8000, v5 743; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 744; GFX7-NEXT: v_and_b32_e32 v3, 1, v3 745; GFX7-NEXT: v_xor_b32_e32 v4, 0xffff8000, v4 746; GFX7-NEXT: v_cndmask_b32_e32 v1, v5, v7, vcc 747; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 748; GFX7-NEXT: v_and_b32_e32 v2, 1, v2 749; GFX7-NEXT: v_cndmask_b32_e32 v0, v4, v6, vcc 750; GFX7-NEXT: v_xor_b32_e32 v5, 0x8000, v1 751; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v3 752; GFX7-NEXT: v_xor_b32_e32 v4, 0x8000, v0 753; GFX7-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 754; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 755; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 756; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v1 757; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0 758; GFX7-NEXT: v_or_b32_e32 v0, v0, v3 759; GFX7-NEXT: v_and_b32_e32 v1, 0xffff, v1 760; GFX7-NEXT: s_setpc_b64 s[30:31] 761; 762; GFX9-LABEL: select_fneg_xor_select_v2i16: 763; GFX9: ; %bb.0: 764; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 765; GFX9-NEXT: v_and_b32_e32 v1, 1, v1 766; GFX9-NEXT: v_xor_b32_e32 v4, 0x80008000, v4 767; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 768; GFX9-NEXT: v_lshrrev_b32_e32 v6, 16, v5 769; GFX9-NEXT: v_lshrrev_b32_e32 v7, 16, v4 770; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v1 771; GFX9-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc 772; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 773; GFX9-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc 774; GFX9-NEXT: s_mov_b32 s4, 0x5040100 775; GFX9-NEXT: v_perm_b32 v4, v1, v0, s4 776; GFX9-NEXT: v_and_b32_e32 v3, 1, v3 777; GFX9-NEXT: v_xor_b32_e32 v4, 0x80008000, v4 778; GFX9-NEXT: v_and_b32_e32 v2, 1, v2 779; GFX9-NEXT: v_lshrrev_b32_e32 v5, 16, v4 780; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v3 781; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 782; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 1, v2 783; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 784; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4 785; GFX9-NEXT: s_setpc_b64 s[30:31] 786; 787; GFX11-LABEL: select_fneg_xor_select_v2i16: 788; GFX11: ; %bb.0: 789; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 790; GFX11-NEXT: v_xor_b32_e32 v4, 0x80008000, v4 791; GFX11-NEXT: v_lshrrev_b32_e32 v6, 16, v5 792; GFX11-NEXT: v_and_b32_e32 v3, 1, v3 793; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1) 794; GFX11-NEXT: v_lshrrev_b32_e32 v7, 16, v4 795; GFX11-NEXT: v_and_b32_e32 v0, 1, v0 796; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0 797; GFX11-NEXT: v_cndmask_b32_e32 v0, v4, v5, vcc_lo 798; GFX11-NEXT: v_and_b32_e32 v1, 1, v1 799; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) 800; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1 801; GFX11-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo 802; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3 803; GFX11-NEXT: v_perm_b32 v4, v1, v0, 0x5040100 804; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 805; GFX11-NEXT: v_xor_b32_e32 v4, 0x80008000, v4 806; GFX11-NEXT: v_lshrrev_b32_e32 v5, 16, v4 807; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 808; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v5 :: v_dual_and_b32 v2, 1, v2 809; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v2 810; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 811; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 812; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 813; GFX11-NEXT: s_setpc_b64 s[30:31] 814 %fneg0 = xor <2 x i16> %arg0, <i16 -32768, i16 -32768> 815 %select0 = select <2 x i1> %cond0, <2 x i16> %arg1, <2 x i16> %fneg0 816 %fneg1 = xor <2 x i16> %select0, <i16 -32768, i16 -32768> 817 %select1 = select <2 x i1> %cond1, <2 x i16> %fneg1, <2 x i16> %select0 818 ret <2 x i16> %select1 819} 820 821; pattern that appeared in rocm-device-libs to manually operate on the 822; sign bit of the high half of a double 823define double @cospiD_pattern0(i32 %arg, double %arg1, double %arg2) { 824; GCN-LABEL: cospiD_pattern0: 825; GCN: ; %bb.0: 826; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 827; GCN-NEXT: v_and_b32_e32 v5, 1, v0 828; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 829; GCN-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc 830; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc 831; GCN-NEXT: v_cmp_lt_i32_e32 vcc, 1, v0 832; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 833; GCN-NEXT: v_lshlrev_b32_e32 v0, 31, v0 834; GCN-NEXT: v_xor_b32_e32 v1, v1, v0 835; GCN-NEXT: v_mov_b32_e32 v0, v3 836; GCN-NEXT: s_setpc_b64 s[30:31] 837; 838; GFX11-LABEL: cospiD_pattern0: 839; GFX11: ; %bb.0: 840; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 841; GFX11-NEXT: v_and_b32_e32 v5, 1, v0 842; GFX11-NEXT: v_cmp_lt_i32_e64 s0, 1, v0 843; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 844; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v5 845; GFX11-NEXT: v_cndmask_b32_e64 v5, 0, 1, s0 846; GFX11-NEXT: v_dual_cndmask_b32 v0, v1, v3 :: v_dual_cndmask_b32 v1, v2, v4 847; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 848; GFX11-NEXT: v_lshlrev_b32_e32 v2, 31, v5 849; GFX11-NEXT: v_xor_b32_e32 v1, v1, v2 850; GFX11-NEXT: s_setpc_b64 s[30:31] 851 %i = and i32 %arg, 1 852 %i3 = icmp eq i32 %i, 0 853 %i4 = select i1 %i3, double %arg2, double %arg1 854 %i5 = bitcast double %i4 to <2 x i32> 855 %i6 = icmp sgt i32 %arg, 1 856 %i7 = select i1 %i6, i32 -2147483648, i32 0 857 %i8 = extractelement <2 x i32> %i5, i64 1 858 %i9 = xor i32 %i8, %i7 859 %i10 = insertelement <2 x i32> %i5, i32 %i9, i64 1 860 %i11 = bitcast <2 x i32> %i10 to double 861 ret double %i11 862} 863 864define double @cospiD_pattern1(i32 %arg, double %arg1, double %arg2) { 865; GCN-LABEL: cospiD_pattern1: 866; GCN: ; %bb.0: 867; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 868; GCN-NEXT: v_and_b32_e32 v5, 1, v0 869; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v5 870; GCN-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc 871; GCN-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc 872; GCN-NEXT: v_xor_b32_e32 v2, 0x80000000, v1 873; GCN-NEXT: v_cmp_lt_i32_e32 vcc, 1, v0 874; GCN-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 875; GCN-NEXT: v_mov_b32_e32 v0, v3 876; GCN-NEXT: s_setpc_b64 s[30:31] 877; 878; GFX11-LABEL: cospiD_pattern1: 879; GFX11: ; %bb.0: 880; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 881; GFX11-NEXT: v_and_b32_e32 v5, 1, v0 882; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_3) 883; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v5 884; GFX11-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc_lo 885; GFX11-NEXT: v_cndmask_b32_e32 v1, v2, v4, vcc_lo 886; GFX11-NEXT: v_cmp_lt_i32_e32 vcc_lo, 1, v0 887; GFX11-NEXT: v_mov_b32_e32 v0, v3 888; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) 889; GFX11-NEXT: v_xor_b32_e32 v2, 0x80000000, v1 890; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo 891; GFX11-NEXT: s_setpc_b64 s[30:31] 892 %i = and i32 %arg, 1 893 %i3 = icmp eq i32 %i, 0 894 %i4 = select i1 %i3, double %arg2, double %arg1 895 %i5 = icmp sgt i32 %arg, 1 896 %i6 = fneg double %i4 897 %i7 = select i1 %i5, double %i6, double %i4 898 ret double %i7 899} 900 901; artifical example, scaled to operation on 16-bit halves of a float. 902define float @cospiD_pattern0_half(i16 %arg, float %arg1, float %arg2) { 903; GFX7-LABEL: cospiD_pattern0_half: 904; GFX7: ; %bb.0: 905; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 906; GFX7-NEXT: v_bfe_i32 v3, v0, 0, 16 907; GFX7-NEXT: v_and_b32_e32 v0, 1, v0 908; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 909; GFX7-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc 910; GFX7-NEXT: v_cmp_lt_i32_e32 vcc, 1, v3 911; GFX7-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc 912; GFX7-NEXT: v_and_b32_e32 v1, 0xffff, v0 913; GFX7-NEXT: v_and_b32_e32 v0, 0xffff0000, v0 914; GFX7-NEXT: v_lshlrev_b32_e32 v2, 31, v2 915; GFX7-NEXT: v_xor_b32_e32 v0, v2, v0 916; GFX7-NEXT: v_or_b32_e32 v0, v1, v0 917; GFX7-NEXT: s_setpc_b64 s[30:31] 918; 919; GFX9-LABEL: cospiD_pattern0_half: 920; GFX9: ; %bb.0: 921; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 922; GFX9-NEXT: v_and_b32_e32 v3, 1, v0 923; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, 0, v3 924; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 925; GFX9-NEXT: v_cmp_lt_i16_e32 vcc, 1, v0 926; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc 927; GFX9-NEXT: v_lshlrev_b16_e32 v0, 15, v0 928; GFX9-NEXT: v_xor_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 929; GFX9-NEXT: s_mov_b32 s4, 0x5040100 930; GFX9-NEXT: v_perm_b32 v0, v0, v1, s4 931; GFX9-NEXT: s_setpc_b64 s[30:31] 932; 933; GFX11-LABEL: cospiD_pattern0_half: 934; GFX11: ; %bb.0: 935; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 936; GFX11-NEXT: v_and_b32_e32 v3, 1, v0 937; GFX11-NEXT: v_cmp_lt_i16_e32 vcc_lo, 1, v0 938; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo 939; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 940; GFX11-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0, v3 941; GFX11-NEXT: v_lshlrev_b16 v0, 15, v0 942; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo 943; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 944; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v1 945; GFX11-NEXT: v_xor_b32_e32 v0, v2, v0 946; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 947; GFX11-NEXT: v_perm_b32 v0, v0, v1, 0x5040100 948; GFX11-NEXT: s_setpc_b64 s[30:31] 949 %i = and i16 %arg, 1 950 %i3 = icmp eq i16 %i, 0 951 %i4 = select i1 %i3, float %arg2, float %arg1 952 %i5 = bitcast float %i4 to <2 x i16> 953 %i6 = icmp sgt i16 %arg, 1 954 %i7 = select i1 %i6, i16 -32768, i16 0 955 %i8 = extractelement <2 x i16> %i5, i64 1 956 %i9 = xor i16 %i8, %i7 957 %i10 = insertelement <2 x i16> %i5, i16 %i9, i64 1 958 %i11 = bitcast <2 x i16> %i10 to float 959 ret float %i11 960} 961 962define float @cospiD_pattern1_half(i16 %arg, float %arg1, float %arg2) { 963; GFX7-LABEL: cospiD_pattern1_half: 964; GFX7: ; %bb.0: 965; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 966; GFX7-NEXT: v_bfe_i32 v3, v0, 0, 16 967; GFX7-NEXT: v_and_b32_e32 v0, 1, v0 968; GFX7-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0 969; GFX7-NEXT: v_cndmask_b32_e32 v0, v1, v2, vcc 970; GFX7-NEXT: v_cmp_lt_i32_e32 vcc, 1, v3 971; GFX7-NEXT: v_cndmask_b32_e64 v0, v0, -v0, vcc 972; GFX7-NEXT: s_setpc_b64 s[30:31] 973; 974; GFX9-LABEL: cospiD_pattern1_half: 975; GFX9: ; %bb.0: 976; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 977; GFX9-NEXT: v_and_b32_e32 v3, 1, v0 978; GFX9-NEXT: v_cmp_eq_u16_e32 vcc, 0, v3 979; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 980; GFX9-NEXT: v_cmp_lt_i16_e32 vcc, 1, v0 981; GFX9-NEXT: v_cndmask_b32_e64 v0, v1, -v1, vcc 982; GFX9-NEXT: s_setpc_b64 s[30:31] 983; 984; GFX11-LABEL: cospiD_pattern1_half: 985; GFX11: ; %bb.0: 986; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 987; GFX11-NEXT: v_and_b32_e32 v3, 1, v0 988; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) 989; GFX11-NEXT: v_cmp_eq_u16_e32 vcc_lo, 0, v3 990; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo 991; GFX11-NEXT: v_cmp_lt_i16_e32 vcc_lo, 1, v0 992; GFX11-NEXT: v_cndmask_b32_e64 v0, v1, -v1, vcc_lo 993; GFX11-NEXT: s_setpc_b64 s[30:31] 994 %i = and i16 %arg, 1 995 %i3 = icmp eq i16 %i, 0 996 %i4 = select i1 %i3, float %arg2, float %arg1 997 %i5 = icmp sgt i16 %arg, 1 998 %i6 = fneg float %i4 999 %i7 = select i1 %i5, float %i6, float %i4 1000 ret float %i7 1001} 1002 1003define double @fneg_f64_bitcast_vector_i64_to_f64(i64 %arg) { 1004; GCN-LABEL: fneg_f64_bitcast_vector_i64_to_f64: 1005; GCN: ; %bb.0: 1006; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1007; GCN-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 1008; GCN-NEXT: s_setpc_b64 s[30:31] 1009; 1010; GFX11-LABEL: fneg_f64_bitcast_vector_i64_to_f64: 1011; GFX11: ; %bb.0: 1012; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1013; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 1014; GFX11-NEXT: s_setpc_b64 s[30:31] 1015 %bitcast = bitcast i64 %arg to double 1016 %fneg = fneg double %bitcast 1017 ret double %fneg 1018} 1019 1020define double @fneg_f64_bitcast_vector_v2i32_to_f64(<2 x i32> %arg) { 1021; GCN-LABEL: fneg_f64_bitcast_vector_v2i32_to_f64: 1022; GCN: ; %bb.0: 1023; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1024; GCN-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 1025; GCN-NEXT: s_setpc_b64 s[30:31] 1026; 1027; GFX11-LABEL: fneg_f64_bitcast_vector_v2i32_to_f64: 1028; GFX11: ; %bb.0: 1029; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1030; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 1031; GFX11-NEXT: s_setpc_b64 s[30:31] 1032 %bitcast = bitcast <2 x i32> %arg to double 1033 %fneg = fneg double %bitcast 1034 ret double %fneg 1035} 1036 1037define double @fneg_f64_bitcast_vector_v2f32_to_f64(<2 x float> %arg) { 1038; GCN-LABEL: fneg_f64_bitcast_vector_v2f32_to_f64: 1039; GCN: ; %bb.0: 1040; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1041; GCN-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 1042; GCN-NEXT: s_setpc_b64 s[30:31] 1043; 1044; GFX11-LABEL: fneg_f64_bitcast_vector_v2f32_to_f64: 1045; GFX11: ; %bb.0: 1046; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1047; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 1048; GFX11-NEXT: s_setpc_b64 s[30:31] 1049 %bitcast = bitcast <2 x float> %arg to double 1050 %fneg = fneg double %bitcast 1051 ret double %fneg 1052} 1053 1054define double @fneg_f64_bitcast_vector_v4i16_to_f64(<4 x i16> %arg) { 1055; GFX7-LABEL: fneg_f64_bitcast_vector_v4i16_to_f64: 1056; GFX7: ; %bb.0: 1057; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1058; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 1059; GFX7-NEXT: v_and_b32_e32 v2, 0xffff, v2 1060; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 1061; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 1062; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0 1063; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 1064; GFX7-NEXT: v_xor_b32_e32 v1, 0x80000000, v2 1065; GFX7-NEXT: s_setpc_b64 s[30:31] 1066; 1067; GFX9-LABEL: fneg_f64_bitcast_vector_v4i16_to_f64: 1068; GFX9: ; %bb.0: 1069; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1070; GFX9-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 1071; GFX9-NEXT: s_setpc_b64 s[30:31] 1072; 1073; GFX11-LABEL: fneg_f64_bitcast_vector_v4i16_to_f64: 1074; GFX11: ; %bb.0: 1075; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1076; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 1077; GFX11-NEXT: s_setpc_b64 s[30:31] 1078 %bitcast = bitcast <4 x i16> %arg to double 1079 %fneg = fneg double %bitcast 1080 ret double %fneg 1081} 1082 1083define double @fneg_f64_bitcast_vector_v4f16_to_f64(<4 x half> %arg) { 1084; GFX7-LABEL: fneg_f64_bitcast_vector_v4f16_to_f64: 1085; GFX7: ; %bb.0: 1086; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1087; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 1088; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 1089; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 1090; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 1091; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 1092; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 1093; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 1094; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 1095; GFX7-NEXT: v_xor_b32_e32 v1, 0x80000000, v2 1096; GFX7-NEXT: s_setpc_b64 s[30:31] 1097; 1098; GFX9-LABEL: fneg_f64_bitcast_vector_v4f16_to_f64: 1099; GFX9: ; %bb.0: 1100; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1101; GFX9-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 1102; GFX9-NEXT: s_setpc_b64 s[30:31] 1103; 1104; GFX11-LABEL: fneg_f64_bitcast_vector_v4f16_to_f64: 1105; GFX11: ; %bb.0: 1106; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1107; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 1108; GFX11-NEXT: s_setpc_b64 s[30:31] 1109 %bitcast = bitcast <4 x half> %arg to double 1110 %fneg = fneg double %bitcast 1111 ret double %fneg 1112} 1113 1114define double @fneg_f64_bitcast_build_vector_v2i32_to_f64(i32 %elt0, i32 %elt1) { 1115; GCN-LABEL: fneg_f64_bitcast_build_vector_v2i32_to_f64: 1116; GCN: ; %bb.0: 1117; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1118; GCN-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 1119; GCN-NEXT: s_setpc_b64 s[30:31] 1120; 1121; GFX11-LABEL: fneg_f64_bitcast_build_vector_v2i32_to_f64: 1122; GFX11: ; %bb.0: 1123; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1124; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 1125; GFX11-NEXT: s_setpc_b64 s[30:31] 1126 %insert.0 = insertelement <2 x i32> poison, i32 %elt0, i32 0 1127 %insert.1 = insertelement <2 x i32> %insert.0, i32 %elt1, i32 1 1128 %bitcast = bitcast <2 x i32> %insert.1 to double 1129 %fneg = fneg double %bitcast 1130 ret double %fneg 1131} 1132 1133define double @fneg_f64_bitcast_build_vector_v2f32_to_f64(float %elt0, float %elt1) { 1134; GCN-LABEL: fneg_f64_bitcast_build_vector_v2f32_to_f64: 1135; GCN: ; %bb.0: 1136; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1137; GCN-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 1138; GCN-NEXT: s_setpc_b64 s[30:31] 1139; 1140; GFX11-LABEL: fneg_f64_bitcast_build_vector_v2f32_to_f64: 1141; GFX11: ; %bb.0: 1142; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1143; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 1144; GFX11-NEXT: s_setpc_b64 s[30:31] 1145 %insert.0 = insertelement <2 x float> poison, float %elt0, i32 0 1146 %insert.1 = insertelement <2 x float> %insert.0, float %elt1, i32 1 1147 %bitcast = bitcast <2 x float> %insert.1 to double 1148 %fneg = fneg double %bitcast 1149 ret double %fneg 1150} 1151 1152define double @fneg_f64_bitcast_build_vector_v4i16_to_f64(i16 %elt0, i16 %elt1, i16 %elt2, i16 %elt3) { 1153; GFX7-LABEL: fneg_f64_bitcast_build_vector_v4i16_to_f64: 1154; GFX7: ; %bb.0: 1155; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1156; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 1157; GFX7-NEXT: v_and_b32_e32 v2, 0xffff, v2 1158; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 1159; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 1160; GFX7-NEXT: v_and_b32_e32 v0, 0xffff, v0 1161; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 1162; GFX7-NEXT: v_xor_b32_e32 v1, 0x80000000, v2 1163; GFX7-NEXT: s_setpc_b64 s[30:31] 1164; 1165; GFX9-LABEL: fneg_f64_bitcast_build_vector_v4i16_to_f64: 1166; GFX9: ; %bb.0: 1167; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1168; GFX9-NEXT: s_mov_b32 s4, 0x5040100 1169; GFX9-NEXT: v_perm_b32 v2, v3, v2, s4 1170; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4 1171; GFX9-NEXT: v_xor_b32_e32 v1, 0x80000000, v2 1172; GFX9-NEXT: s_setpc_b64 s[30:31] 1173; 1174; GFX11-LABEL: fneg_f64_bitcast_build_vector_v4i16_to_f64: 1175; GFX11: ; %bb.0: 1176; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1177; GFX11-NEXT: v_perm_b32 v2, v3, v2, 0x5040100 1178; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 1179; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 1180; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v2 1181; GFX11-NEXT: s_setpc_b64 s[30:31] 1182 %insert.0 = insertelement <4 x i16> poison, i16 %elt0, i32 0 1183 %insert.1 = insertelement <4 x i16> %insert.0, i16 %elt1, i32 1 1184 %insert.2 = insertelement <4 x i16> %insert.1, i16 %elt2, i32 2 1185 %insert.3 = insertelement <4 x i16> %insert.2, i16 %elt3, i32 3 1186 %bitcast = bitcast <4 x i16> %insert.3 to double 1187 %fneg = fneg double %bitcast 1188 ret double %fneg 1189} 1190 1191define double @fneg_f64_bitcast_build_vector_v4f16_to_f64(half %elt0, half %elt1, half %elt2, half %elt3) { 1192; GFX7-LABEL: fneg_f64_bitcast_build_vector_v4f16_to_f64: 1193; GFX7: ; %bb.0: 1194; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1195; GFX7-NEXT: v_cvt_f16_f32_e32 v3, v3 1196; GFX7-NEXT: v_cvt_f16_f32_e32 v2, v2 1197; GFX7-NEXT: v_cvt_f16_f32_e32 v1, v1 1198; GFX7-NEXT: v_cvt_f16_f32_e32 v0, v0 1199; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 1200; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 1201; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 1202; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 1203; GFX7-NEXT: v_xor_b32_e32 v1, 0x80000000, v2 1204; GFX7-NEXT: s_setpc_b64 s[30:31] 1205; 1206; GFX9-LABEL: fneg_f64_bitcast_build_vector_v4f16_to_f64: 1207; GFX9: ; %bb.0: 1208; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1209; GFX9-NEXT: s_mov_b32 s4, 0x5040100 1210; GFX9-NEXT: v_perm_b32 v2, v3, v2, s4 1211; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4 1212; GFX9-NEXT: v_xor_b32_e32 v1, 0x80000000, v2 1213; GFX9-NEXT: s_setpc_b64 s[30:31] 1214; 1215; GFX11-LABEL: fneg_f64_bitcast_build_vector_v4f16_to_f64: 1216; GFX11: ; %bb.0: 1217; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1218; GFX11-NEXT: v_perm_b32 v2, v3, v2, 0x5040100 1219; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 1220; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 1221; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v2 1222; GFX11-NEXT: s_setpc_b64 s[30:31] 1223 %insert.0 = insertelement <4 x half> poison, half %elt0, i32 0 1224 %insert.1 = insertelement <4 x half> %insert.0, half %elt1, i32 1 1225 %insert.2 = insertelement <4 x half> %insert.1, half %elt2, i32 2 1226 %insert.3 = insertelement <4 x half> %insert.2, half %elt3, i32 3 1227 %bitcast = bitcast <4 x half> %insert.3 to double 1228 %fneg = fneg double %bitcast 1229 ret double %fneg 1230} 1231 1232define double @fneg_f64_bitcast_build_vector_v4bf16_to_f64(bfloat %elt0, bfloat %elt1, bfloat %elt2, bfloat %elt3) { 1233; GFX7-LABEL: fneg_f64_bitcast_build_vector_v4bf16_to_f64: 1234; GFX7: ; %bb.0: 1235; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1236; GFX7-NEXT: v_mul_f32_e32 v3, 1.0, v3 1237; GFX7-NEXT: v_mul_f32_e32 v1, 1.0, v1 1238; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v3 1239; GFX7-NEXT: v_mul_f32_e32 v2, 1.0, v2 1240; GFX7-NEXT: v_lshrrev_b32_e32 v1, 16, v1 1241; GFX7-NEXT: v_mul_f32_e32 v0, 1.0, v0 1242; GFX7-NEXT: v_alignbit_b32 v2, v3, v2, 16 1243; GFX7-NEXT: v_alignbit_b32 v0, v1, v0, 16 1244; GFX7-NEXT: v_xor_b32_e32 v1, 0x80000000, v2 1245; GFX7-NEXT: s_setpc_b64 s[30:31] 1246; 1247; GFX9-LABEL: fneg_f64_bitcast_build_vector_v4bf16_to_f64: 1248; GFX9: ; %bb.0: 1249; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1250; GFX9-NEXT: s_mov_b32 s4, 0x5040100 1251; GFX9-NEXT: v_perm_b32 v2, v3, v2, s4 1252; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4 1253; GFX9-NEXT: v_xor_b32_e32 v1, 0x80000000, v2 1254; GFX9-NEXT: s_setpc_b64 s[30:31] 1255; 1256; GFX11-LABEL: fneg_f64_bitcast_build_vector_v4bf16_to_f64: 1257; GFX11: ; %bb.0: 1258; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1259; GFX11-NEXT: v_perm_b32 v2, v3, v2, 0x5040100 1260; GFX11-NEXT: v_perm_b32 v0, v1, v0, 0x5040100 1261; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 1262; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v2 1263; GFX11-NEXT: s_setpc_b64 s[30:31] 1264 %insert.0 = insertelement <4 x bfloat> poison, bfloat %elt0, i32 0 1265 %insert.1 = insertelement <4 x bfloat> %insert.0, bfloat %elt1, i32 1 1266 %insert.2 = insertelement <4 x bfloat> %insert.1, bfloat %elt2, i32 2 1267 %insert.3 = insertelement <4 x bfloat> %insert.2, bfloat %elt3, i32 3 1268 %bitcast = bitcast <4 x bfloat> %insert.3 to double 1269 %fneg = fneg double %bitcast 1270 ret double %fneg 1271} 1272 1273define double @fneg_f64_bitcast_build_vector_v2i32_to_f64_modifier_user(i32 %elt0, i32 %elt1, double %fp.val) { 1274; GCN-LABEL: fneg_f64_bitcast_build_vector_v2i32_to_f64_modifier_user: 1275; GCN: ; %bb.0: 1276; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1277; GCN-NEXT: v_mul_f64 v[0:1], -v[0:1], v[2:3] 1278; GCN-NEXT: s_setpc_b64 s[30:31] 1279; 1280; GFX11-LABEL: fneg_f64_bitcast_build_vector_v2i32_to_f64_modifier_user: 1281; GFX11: ; %bb.0: 1282; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1283; GFX11-NEXT: v_mul_f64 v[0:1], -v[0:1], v[2:3] 1284; GFX11-NEXT: s_setpc_b64 s[30:31] 1285 %insert.0 = insertelement <2 x i32> poison, i32 %elt0, i32 0 1286 %insert.1 = insertelement <2 x i32> %insert.0, i32 %elt1, i32 1 1287 %bitcast = bitcast <2 x i32> %insert.1 to double 1288 %fneg = fneg double %bitcast 1289 %fmul = fmul double %fneg, %fp.val 1290 ret double %fmul 1291} 1292 1293define { double, double } @fneg_f64_bitcast_build_vector_v2i32_to_f64_multi_modifier_user(i32 %elt0, i32 %elt1, double %fp.val0, double %fp.val1) { 1294; GCN-LABEL: fneg_f64_bitcast_build_vector_v2i32_to_f64_multi_modifier_user: 1295; GCN: ; %bb.0: 1296; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1297; GCN-NEXT: v_mul_f64 v[6:7], -v[0:1], v[2:3] 1298; GCN-NEXT: v_mul_f64 v[2:3], v[4:5], -v[0:1] 1299; GCN-NEXT: v_mov_b32_e32 v0, v6 1300; GCN-NEXT: v_mov_b32_e32 v1, v7 1301; GCN-NEXT: s_setpc_b64 s[30:31] 1302; 1303; GFX11-LABEL: fneg_f64_bitcast_build_vector_v2i32_to_f64_multi_modifier_user: 1304; GFX11: ; %bb.0: 1305; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1306; GFX11-NEXT: v_mul_f64 v[6:7], -v[0:1], v[2:3] 1307; GFX11-NEXT: v_mul_f64 v[2:3], v[4:5], -v[0:1] 1308; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 1309; GFX11-NEXT: v_dual_mov_b32 v0, v6 :: v_dual_mov_b32 v1, v7 1310; GFX11-NEXT: s_setpc_b64 s[30:31] 1311 %insert.0 = insertelement <2 x i32> poison, i32 %elt0, i32 0 1312 %insert.1 = insertelement <2 x i32> %insert.0, i32 %elt1, i32 1 1313 %bitcast = bitcast <2 x i32> %insert.1 to double 1314 %fneg = fneg double %bitcast 1315 %fmul0 = fmul double %fneg, %fp.val0 1316 %fmul1 = fmul double %fp.val1, %fneg 1317 %ret.0 = insertvalue { double, double } poison, double %fmul0, 0 1318 %ret.1 = insertvalue { double, double } %ret.0, double %fmul1, 1 1319 ret { double, double } %ret.1 1320} 1321 1322define double @fneg_f64_bitcast_build_vector_v2i32_to_f64_modifier_user_integer_neg_source(i32 %elt0, i32 %elt1, double %fp.val) { 1323; GCN-LABEL: fneg_f64_bitcast_build_vector_v2i32_to_f64_modifier_user_integer_neg_source: 1324; GCN: ; %bb.0: 1325; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1326; GCN-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 1327; GCN-NEXT: v_mul_f64 v[0:1], -v[0:1], v[2:3] 1328; GCN-NEXT: s_setpc_b64 s[30:31] 1329; 1330; GFX11-LABEL: fneg_f64_bitcast_build_vector_v2i32_to_f64_modifier_user_integer_neg_source: 1331; GFX11: ; %bb.0: 1332; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1333; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 1334; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1335; GFX11-NEXT: v_mul_f64 v[0:1], -v[0:1], v[2:3] 1336; GFX11-NEXT: s_setpc_b64 s[30:31] 1337 %neg.elt1 = xor i32 %elt1, -2147483648 1338 %insert.0 = insertelement <2 x i32> poison, i32 %elt0, i32 0 1339 %insert.1 = insertelement <2 x i32> %insert.0, i32 %neg.elt1, i32 1 1340 %bitcast = bitcast <2 x i32> %insert.1 to double 1341 %fneg = fneg double %bitcast 1342 %fmul = fmul double %fneg, %fp.val 1343 ret double %fmul 1344} 1345 1346define double @fneg_f64_bitcast_build_vector_v2f32_foldable_sources_to_f64(float %elt0, float %elt1) { 1347; GCN-LABEL: fneg_f64_bitcast_build_vector_v2f32_foldable_sources_to_f64: 1348; GCN: ; %bb.0: 1349; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1350; GCN-NEXT: v_sub_f32_e32 v1, -2.0, v1 1351; GCN-NEXT: s_setpc_b64 s[30:31] 1352; 1353; GFX11-LABEL: fneg_f64_bitcast_build_vector_v2f32_foldable_sources_to_f64: 1354; GFX11: ; %bb.0: 1355; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1356; GFX11-NEXT: v_sub_f32_e32 v1, -2.0, v1 1357; GFX11-NEXT: s_setpc_b64 s[30:31] 1358 %fadd = fadd nsz nnan float %elt1, 2.0 1359 %insert.0 = insertelement <2 x float> poison, float %elt0, i32 0 1360 %insert.1 = insertelement <2 x float> %insert.0, float %fadd, i32 1 1361 %bitcast = bitcast <2 x float> %insert.1 to double 1362 %fneg = fneg double %bitcast 1363 ret double %fneg 1364} 1365 1366define double @fneg_f64_bitcast_build_vector_v2f32_to_f64_bitcast_source_user(float %elt0, float %elt1, ptr addrspace(1) %ptr) { 1367; GFX7-LABEL: fneg_f64_bitcast_build_vector_v2f32_to_f64_bitcast_source_user: 1368; GFX7: ; %bb.0: 1369; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1370; GFX7-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 1371; GFX7-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 1372; GFX7-NEXT: s_waitcnt vmcnt(0) 1373; GFX7-NEXT: s_setpc_b64 s[30:31] 1374; 1375; GFX9-LABEL: fneg_f64_bitcast_build_vector_v2f32_to_f64_bitcast_source_user: 1376; GFX9: ; %bb.0: 1377; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1378; GFX9-NEXT: global_store_dwordx2 v[2:3], v[0:1], off 1379; GFX9-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 1380; GFX9-NEXT: s_waitcnt vmcnt(0) 1381; GFX9-NEXT: s_setpc_b64 s[30:31] 1382; 1383; GFX11-LABEL: fneg_f64_bitcast_build_vector_v2f32_to_f64_bitcast_source_user: 1384; GFX11: ; %bb.0: 1385; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1386; GFX11-NEXT: v_xor_b32_e32 v4, 0x80000000, v1 1387; GFX11-NEXT: global_store_b64 v[2:3], v[0:1], off 1388; GFX11-NEXT: v_mov_b32_e32 v1, v4 1389; GFX11-NEXT: s_setpc_b64 s[30:31] 1390 %insert.0 = insertelement <2 x float> poison, float %elt0, i32 0 1391 %insert.1 = insertelement <2 x float> %insert.0, float %elt1, i32 1 1392 store <2 x float> %insert.1, ptr addrspace(1) %ptr 1393 %bitcast = bitcast <2 x float> %insert.1 to double 1394 %fneg = fneg double %bitcast 1395 ret double %fneg 1396} 1397 1398define { double, <2 x float> } @fneg_f64_bitcast_build_vector_v2f32_to_f64_bitcast_source_foldable_user(float %elt0, float %elt1, <2 x float> %arg.v2f32) { 1399; GCN-LABEL: fneg_f64_bitcast_build_vector_v2f32_to_f64_bitcast_source_foldable_user: 1400; GCN: ; %bb.0: 1401; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1402; GCN-NEXT: v_add_f32_e32 v2, v0, v2 1403; GCN-NEXT: v_add_f32_e32 v3, v1, v3 1404; GCN-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 1405; GCN-NEXT: s_setpc_b64 s[30:31] 1406; 1407; GFX11-LABEL: fneg_f64_bitcast_build_vector_v2f32_to_f64_bitcast_source_foldable_user: 1408; GFX11: ; %bb.0: 1409; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1410; GFX11-NEXT: v_xor_b32_e32 v4, 0x80000000, v1 1411; GFX11-NEXT: v_dual_add_f32 v2, v0, v2 :: v_dual_add_f32 v3, v1, v3 1412; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 1413; GFX11-NEXT: v_mov_b32_e32 v1, v4 1414; GFX11-NEXT: s_setpc_b64 s[30:31] 1415 %insert.0 = insertelement <2 x float> poison, float %elt0, i32 0 1416 %insert.1 = insertelement <2 x float> %insert.0, float %elt1, i32 1 1417 %other.bitcast.source.user = fadd <2 x float> %insert.1, %arg.v2f32 1418 %bitcast = bitcast <2 x float> %insert.1 to double 1419 %fneg = fneg double %bitcast 1420 %ret.0 = insertvalue { double, <2 x float> } poison, double %fneg, 0 1421 %ret.1 = insertvalue { double, <2 x float> } %ret.0, <2 x float> %other.bitcast.source.user, 1 1422 ret { double, <2 x float> } %ret.1 1423} 1424 1425define { double, double } @fneg_f64_bitcast_build_vector_v2f32_to_f64_bitcast_user(float %elt0, float %elt1) { 1426; GCN-LABEL: fneg_f64_bitcast_build_vector_v2f32_to_f64_bitcast_user: 1427; GCN: ; %bb.0: 1428; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1429; GCN-NEXT: v_mov_b32_e32 v3, v1 1430; GCN-NEXT: v_xor_b32_e32 v1, 0x80000000, v3 1431; GCN-NEXT: v_mov_b32_e32 v2, v0 1432; GCN-NEXT: s_setpc_b64 s[30:31] 1433; 1434; GFX11-LABEL: fneg_f64_bitcast_build_vector_v2f32_to_f64_bitcast_user: 1435; GFX11: ; %bb.0: 1436; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1437; GFX11-NEXT: v_dual_mov_b32 v3, v1 :: v_dual_mov_b32 v2, v0 1438; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1439; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v3 1440; GFX11-NEXT: s_setpc_b64 s[30:31] 1441 %insert.0 = insertelement <2 x float> poison, float %elt0, i32 0 1442 %insert.1 = insertelement <2 x float> %insert.0, float %elt1, i32 1 1443 %bitcast = bitcast <2 x float> %insert.1 to double 1444 %fneg = fneg double %bitcast 1445 %ret.0 = insertvalue { double, double } poison, double %fneg, 0 1446 %ret.1 = insertvalue { double, double } %ret.0, double %bitcast, 1 1447 ret { double, double } %ret.1 1448} 1449 1450define { double, double } @fneg_f64_bitcast_build_vector_v2f32_to_f64_bitcast_foldable_user(float %elt0, float %elt1, double %arg.f64) { 1451; GCN-LABEL: fneg_f64_bitcast_build_vector_v2f32_to_f64_bitcast_foldable_user: 1452; GCN: ; %bb.0: 1453; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1454; GCN-NEXT: v_add_f64 v[2:3], v[0:1], v[2:3] 1455; GCN-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 1456; GCN-NEXT: s_setpc_b64 s[30:31] 1457; 1458; GFX11-LABEL: fneg_f64_bitcast_build_vector_v2f32_to_f64_bitcast_foldable_user: 1459; GFX11: ; %bb.0: 1460; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1461; GFX11-NEXT: v_add_f64 v[2:3], v[0:1], v[2:3] 1462; GFX11-NEXT: v_xor_b32_e32 v1, 0x80000000, v1 1463; GFX11-NEXT: s_setpc_b64 s[30:31] 1464 %insert.0 = insertelement <2 x float> poison, float %elt0, i32 0 1465 %insert.1 = insertelement <2 x float> %insert.0, float %elt1, i32 1 1466 %bitcast = bitcast <2 x float> %insert.1 to double 1467 %other.bitcast.user = fadd double %bitcast, %arg.f64 1468 %fneg = fneg double %bitcast 1469 %ret.0 = insertvalue { double, double } poison, double %fneg, 0 1470 %ret.1 = insertvalue { double, double } %ret.0, double %other.bitcast.user, 1 1471 ret { double, double } %ret.1 1472} 1473 1474; Check for correct bitcasting back when there are multiple uses 1475define amdgpu_kernel void @multiple_uses_fneg_select_f64(double %x, double %y, i1 %z, ptr addrspace(1) %dst) { 1476; GFX7-LABEL: multiple_uses_fneg_select_f64: 1477; GFX7: ; %bb.0: 1478; GFX7-NEXT: s_load_dword s6, s[8:9], 0x4 1479; GFX7-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1480; GFX7-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x6 1481; GFX7-NEXT: s_waitcnt lgkmcnt(0) 1482; GFX7-NEXT: s_bitcmp1_b32 s6, 0 1483; GFX7-NEXT: s_cselect_b64 vcc, -1, 0 1484; GFX7-NEXT: s_and_b64 s[6:7], vcc, exec 1485; GFX7-NEXT: v_mov_b32_e32 v0, s3 1486; GFX7-NEXT: v_mov_b32_e32 v1, s1 1487; GFX7-NEXT: s_cselect_b32 s1, s1, s3 1488; GFX7-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1489; GFX7-NEXT: s_cselect_b32 s0, s0, s2 1490; GFX7-NEXT: v_mov_b32_e32 v1, s1 1491; GFX7-NEXT: v_mov_b32_e32 v2, s4 1492; GFX7-NEXT: v_cndmask_b32_e64 v1, v1, -v0, vcc 1493; GFX7-NEXT: v_mov_b32_e32 v0, s0 1494; GFX7-NEXT: v_mov_b32_e32 v3, s5 1495; GFX7-NEXT: flat_store_dwordx2 v[2:3], v[0:1] 1496; GFX7-NEXT: s_endpgm 1497; 1498; GFX9-LABEL: multiple_uses_fneg_select_f64: 1499; GFX9: ; %bb.0: 1500; GFX9-NEXT: s_load_dword s6, s[8:9], 0x10 1501; GFX9-NEXT: s_load_dwordx4 s[0:3], s[8:9], 0x0 1502; GFX9-NEXT: s_load_dwordx2 s[4:5], s[8:9], 0x18 1503; GFX9-NEXT: v_mov_b32_e32 v2, 0 1504; GFX9-NEXT: s_waitcnt lgkmcnt(0) 1505; GFX9-NEXT: s_bitcmp1_b32 s6, 0 1506; GFX9-NEXT: s_cselect_b64 vcc, -1, 0 1507; GFX9-NEXT: s_and_b64 s[6:7], vcc, exec 1508; GFX9-NEXT: v_mov_b32_e32 v0, s3 1509; GFX9-NEXT: v_mov_b32_e32 v1, s1 1510; GFX9-NEXT: s_cselect_b32 s1, s1, s3 1511; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1512; GFX9-NEXT: s_cselect_b32 s0, s0, s2 1513; GFX9-NEXT: v_mov_b32_e32 v1, s1 1514; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, -v0, vcc 1515; GFX9-NEXT: v_mov_b32_e32 v0, s0 1516; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[4:5] 1517; GFX9-NEXT: s_endpgm 1518; 1519; GFX11-LABEL: multiple_uses_fneg_select_f64: 1520; GFX11: ; %bb.0: 1521; GFX11-NEXT: s_clause 0x2 1522; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x0 1523; GFX11-NEXT: s_load_b32 s6, s[4:5], 0x10 1524; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x18 1525; GFX11-NEXT: v_mov_b32_e32 v2, 0 1526; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1527; GFX11-NEXT: v_mov_b32_e32 v0, s1 1528; GFX11-NEXT: s_bitcmp1_b32 s6, 0 1529; GFX11-NEXT: s_cselect_b32 vcc_lo, -1, 0 1530; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1) 1531; GFX11-NEXT: v_cndmask_b32_e32 v0, s3, v0, vcc_lo 1532; GFX11-NEXT: s_and_b32 s6, vcc_lo, exec_lo 1533; GFX11-NEXT: s_cselect_b32 s1, s1, s3 1534; GFX11-NEXT: s_cselect_b32 s0, s0, s2 1535; GFX11-NEXT: v_cndmask_b32_e64 v1, s1, -v0, vcc_lo 1536; GFX11-NEXT: v_mov_b32_e32 v0, s0 1537; GFX11-NEXT: global_store_b64 v2, v[0:1], s[4:5] 1538; GFX11-NEXT: s_endpgm 1539 %a = select i1 %z, double %x, double %y 1540 %b = fneg double %a 1541 %c = select i1 %z, double %a, double %b 1542 %d = fneg double %c 1543 store double %d, ptr addrspace(1) %dst 1544 ret void 1545} 1546 1547define amdgpu_kernel void @fnge_select_f32_multi_use_regression(float %.i2369) { 1548; GCN-LABEL: fnge_select_f32_multi_use_regression: 1549; GCN: ; %bb.0: ; %.entry 1550; GCN-NEXT: s_load_dword s0, s[8:9], 0x0 1551; GCN-NEXT: s_waitcnt lgkmcnt(0) 1552; GCN-NEXT: v_cmp_nlt_f32_e64 s[0:1], s0, 0 1553; GCN-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] 1554; GCN-NEXT: v_cmp_nge_f32_e32 vcc, 0, v0 1555; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v0, vcc 1556; GCN-NEXT: v_mul_f32_e64 v0, -v0, v1 1557; GCN-NEXT: v_cmp_le_f32_e32 vcc, 0, v0 1558; GCN-NEXT: s_and_b64 vcc, exec, vcc 1559; GCN-NEXT: s_endpgm 1560; 1561; GFX11-LABEL: fnge_select_f32_multi_use_regression: 1562; GFX11: ; %bb.0: ; %.entry 1563; GFX11-NEXT: s_load_b32 s0, s[4:5], 0x0 1564; GFX11-NEXT: s_waitcnt lgkmcnt(0) 1565; GFX11-NEXT: v_cmp_nlt_f32_e64 s0, s0, 0 1566; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1567; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0 1568; GFX11-NEXT: v_cmp_nge_f32_e32 vcc_lo, 0, v0 1569; GFX11-NEXT: v_cndmask_b32_e32 v1, 0, v0, vcc_lo 1570; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1571; GFX11-NEXT: v_mul_f32_e64 v0, -v0, v1 1572; GFX11-NEXT: v_cmp_le_f32_e32 vcc_lo, 0, v0 1573; GFX11-NEXT: s_and_b32 vcc_lo, exec_lo, vcc_lo 1574; GFX11-NEXT: s_endpgm 1575.entry: 1576 %i = fcmp uge float %.i2369, 0.000000e+00 1577 %.i2379 = select i1 %i, i32 1, i32 0 1578 %.i0436 = bitcast i32 %.i2379 to float 1579 %.i0440 = fneg float %.i0436 1580 %i1 = fcmp ugt float %.i0436, 0.000000e+00 1581 %.i2495 = select i1 %i1, i32 %.i2379, i32 0 1582 %.i0552 = bitcast i32 %.i2495 to float 1583 %.i0592 = fmul float %.i0440, %.i0552 1584 %.i0721 = fcmp oge float %.i0592, 0.000000e+00 1585 br i1 %.i0721, label %bb5, label %bb 1586 1587bb: ; preds = %.entry 1588 %i2 = call <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32> zeroinitializer, i32 1, i32 0) 1589 %i3 = shufflevector <2 x i32> %i2, <2 x i32> zeroinitializer, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 1590 %i4 = bitcast <4 x i32> %i3 to <4 x float> 1591 %.i0753 = extractelement <4 x float> %i4, i64 0 1592 br label %bb5 1593 1594bb5: ; preds = %bb, %.entry 1595 ret void 1596} 1597 1598 1599declare <2 x i32> @llvm.amdgcn.s.buffer.load.v2i32(<4 x i32>, i32, i32 immarg) #0 1600 1601attributes #0 = { nocallback nofree nosync nounwind willreturn memory(none) } 1602