1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s 3; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck -check-prefix=GFX8 %s 4; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s 5; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefix=GFX11 %s 6 7define <2 x i16> @v_sub_v2i16(<2 x i16> %a, <2 x i16> %b) { 8; GFX9-LABEL: v_sub_v2i16: 9; GFX9: ; %bb.0: 10; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11; GFX9-NEXT: v_pk_sub_i16 v0, v0, v1 12; GFX9-NEXT: s_setpc_b64 s[30:31] 13; 14; GFX8-LABEL: v_sub_v2i16: 15; GFX8: ; %bb.0: 16; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 17; GFX8-NEXT: v_sub_u16_e32 v2, v0, v1 18; GFX8-NEXT: v_sub_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 19; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 20; GFX8-NEXT: s_setpc_b64 s[30:31] 21; 22; GFX10-LABEL: v_sub_v2i16: 23; GFX10: ; %bb.0: 24; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25; GFX10-NEXT: v_pk_sub_i16 v0, v0, v1 26; GFX10-NEXT: s_setpc_b64 s[30:31] 27; 28; GFX11-LABEL: v_sub_v2i16: 29; GFX11: ; %bb.0: 30; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 31; GFX11-NEXT: v_pk_sub_i16 v0, v0, v1 32; GFX11-NEXT: s_setpc_b64 s[30:31] 33 %sub = sub <2 x i16> %a, %b 34 ret <2 x i16> %sub 35} 36 37define <2 x i16> @v_sub_v2i16_fneg_lhs(<2 x half> %a, <2 x i16> %b) { 38; GFX9-LABEL: v_sub_v2i16_fneg_lhs: 39; GFX9: ; %bb.0: 40; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 41; GFX9-NEXT: v_pk_sub_i16 v0, v0, v1 neg_lo:[1,0] neg_hi:[1,0] 42; GFX9-NEXT: s_setpc_b64 s[30:31] 43; 44; GFX8-LABEL: v_sub_v2i16_fneg_lhs: 45; GFX8: ; %bb.0: 46; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 47; GFX8-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 48; GFX8-NEXT: v_sub_u16_e32 v2, v0, v1 49; GFX8-NEXT: v_sub_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 50; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 51; GFX8-NEXT: s_setpc_b64 s[30:31] 52; 53; GFX10-LABEL: v_sub_v2i16_fneg_lhs: 54; GFX10: ; %bb.0: 55; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 56; GFX10-NEXT: v_pk_sub_i16 v0, v0, v1 neg_lo:[1,0] neg_hi:[1,0] 57; GFX10-NEXT: s_setpc_b64 s[30:31] 58; 59; GFX11-LABEL: v_sub_v2i16_fneg_lhs: 60; GFX11: ; %bb.0: 61; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 62; GFX11-NEXT: v_pk_sub_i16 v0, v0, v1 neg_lo:[1,0] neg_hi:[1,0] 63; GFX11-NEXT: s_setpc_b64 s[30:31] 64 %neg.a = fneg <2 x half> %a 65 %cast.neg.a = bitcast <2 x half> %neg.a to <2 x i16> 66 %sub = sub <2 x i16> %cast.neg.a, %b 67 ret <2 x i16> %sub 68} 69 70define <2 x i16> @v_sub_v2i16_fneg_rhs(<2 x i16> %a, <2 x half> %b) { 71; GFX9-LABEL: v_sub_v2i16_fneg_rhs: 72; GFX9: ; %bb.0: 73; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 74; GFX9-NEXT: v_pk_sub_i16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1] 75; GFX9-NEXT: s_setpc_b64 s[30:31] 76; 77; GFX8-LABEL: v_sub_v2i16_fneg_rhs: 78; GFX8: ; %bb.0: 79; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 80; GFX8-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 81; GFX8-NEXT: v_sub_u16_e32 v2, v0, v1 82; GFX8-NEXT: v_sub_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 83; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 84; GFX8-NEXT: s_setpc_b64 s[30:31] 85; 86; GFX10-LABEL: v_sub_v2i16_fneg_rhs: 87; GFX10: ; %bb.0: 88; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 89; GFX10-NEXT: v_pk_sub_i16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1] 90; GFX10-NEXT: s_setpc_b64 s[30:31] 91; 92; GFX11-LABEL: v_sub_v2i16_fneg_rhs: 93; GFX11: ; %bb.0: 94; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 95; GFX11-NEXT: v_pk_sub_i16 v0, v0, v1 neg_lo:[0,1] neg_hi:[0,1] 96; GFX11-NEXT: s_setpc_b64 s[30:31] 97 %neg.b = fneg <2 x half> %b 98 %cast.neg.b = bitcast <2 x half> %neg.b to <2 x i16> 99 %sub = sub <2 x i16> %a, %cast.neg.b 100 ret <2 x i16> %sub 101} 102 103define <2 x i16> @v_sub_v2i16_fneg_lhs_fneg_rhs(<2 x half> %a, <2 x half> %b) { 104; GFX9-LABEL: v_sub_v2i16_fneg_lhs_fneg_rhs: 105; GFX9: ; %bb.0: 106; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 107; GFX9-NEXT: v_pk_sub_i16 v0, v0, v1 neg_lo:[1,1] neg_hi:[1,1] 108; GFX9-NEXT: s_setpc_b64 s[30:31] 109; 110; GFX8-LABEL: v_sub_v2i16_fneg_lhs_fneg_rhs: 111; GFX8: ; %bb.0: 112; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 113; GFX8-NEXT: v_xor_b32_e32 v0, 0x80008000, v0 114; GFX8-NEXT: v_xor_b32_e32 v1, 0x80008000, v1 115; GFX8-NEXT: v_sub_u16_e32 v2, v0, v1 116; GFX8-NEXT: v_sub_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 117; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 118; GFX8-NEXT: s_setpc_b64 s[30:31] 119; 120; GFX10-LABEL: v_sub_v2i16_fneg_lhs_fneg_rhs: 121; GFX10: ; %bb.0: 122; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 123; GFX10-NEXT: v_pk_sub_i16 v0, v0, v1 neg_lo:[1,1] neg_hi:[1,1] 124; GFX10-NEXT: s_setpc_b64 s[30:31] 125; 126; GFX11-LABEL: v_sub_v2i16_fneg_lhs_fneg_rhs: 127; GFX11: ; %bb.0: 128; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 129; GFX11-NEXT: v_pk_sub_i16 v0, v0, v1 neg_lo:[1,1] neg_hi:[1,1] 130; GFX11-NEXT: s_setpc_b64 s[30:31] 131 %neg.a = fneg <2 x half> %a 132 %neg.b = fneg <2 x half> %b 133 %cast.neg.a = bitcast <2 x half> %neg.a to <2 x i16> 134 %cast.neg.b = bitcast <2 x half> %neg.b to <2 x i16> 135 %sub = sub <2 x i16> %cast.neg.a, %cast.neg.b 136 ret <2 x i16> %sub 137} 138 139define <2 x i16> @v_sub_v2i16_neg_inline_imm_splat(<2 x i16> %a) { 140; GFX9-LABEL: v_sub_v2i16_neg_inline_imm_splat: 141; GFX9: ; %bb.0: 142; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 143; GFX9-NEXT: v_mov_b32_e32 v1, 0xffc0ffc0 144; GFX9-NEXT: v_pk_sub_i16 v0, v0, v1 145; GFX9-NEXT: s_setpc_b64 s[30:31] 146; 147; GFX8-LABEL: v_sub_v2i16_neg_inline_imm_splat: 148; GFX8: ; %bb.0: 149; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 150; GFX8-NEXT: v_mov_b32_e32 v2, 64 151; GFX8-NEXT: v_add_u16_e32 v1, 64, v0 152; GFX8-NEXT: v_add_u16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 153; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 154; GFX8-NEXT: s_setpc_b64 s[30:31] 155; 156; GFX10-LABEL: v_sub_v2i16_neg_inline_imm_splat: 157; GFX10: ; %bb.0: 158; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 159; GFX10-NEXT: v_pk_sub_i16 v0, v0, 0xffc0ffc0 160; GFX10-NEXT: s_setpc_b64 s[30:31] 161; 162; GFX11-LABEL: v_sub_v2i16_neg_inline_imm_splat: 163; GFX11: ; %bb.0: 164; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 165; GFX11-NEXT: v_pk_sub_i16 v0, v0, 0xffc0ffc0 166; GFX11-NEXT: s_setpc_b64 s[30:31] 167 %sub = sub <2 x i16> %a, <i16 -64, i16 -64> 168 ret <2 x i16> %sub 169} 170 171define <2 x i16> @v_sub_v2i16_neg_inline_imm_lo(<2 x i16> %a) { 172; GFX9-LABEL: v_sub_v2i16_neg_inline_imm_lo: 173; GFX9: ; %bb.0: 174; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 175; GFX9-NEXT: v_mov_b32_e32 v1, 0x4ffc0 176; GFX9-NEXT: v_pk_sub_i16 v0, v0, v1 177; GFX9-NEXT: s_setpc_b64 s[30:31] 178; 179; GFX8-LABEL: v_sub_v2i16_neg_inline_imm_lo: 180; GFX8: ; %bb.0: 181; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 182; GFX8-NEXT: v_mov_b32_e32 v2, -4 183; GFX8-NEXT: v_add_u16_e32 v1, 64, v0 184; GFX8-NEXT: v_add_u16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 185; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 186; GFX8-NEXT: s_setpc_b64 s[30:31] 187; 188; GFX10-LABEL: v_sub_v2i16_neg_inline_imm_lo: 189; GFX10: ; %bb.0: 190; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 191; GFX10-NEXT: v_pk_sub_i16 v0, v0, 0x4ffc0 192; GFX10-NEXT: s_setpc_b64 s[30:31] 193; 194; GFX11-LABEL: v_sub_v2i16_neg_inline_imm_lo: 195; GFX11: ; %bb.0: 196; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 197; GFX11-NEXT: v_pk_sub_i16 v0, v0, 0x4ffc0 198; GFX11-NEXT: s_setpc_b64 s[30:31] 199 %sub = sub <2 x i16> %a, <i16 -64, i16 4> 200 ret <2 x i16> %sub 201} 202 203define <2 x i16> @v_sub_v2i16_neg_inline_imm_hi(<2 x i16> %a) { 204; GFX9-LABEL: v_sub_v2i16_neg_inline_imm_hi: 205; GFX9: ; %bb.0: 206; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 207; GFX9-NEXT: v_mov_b32_e32 v1, 0xffc00004 208; GFX9-NEXT: v_pk_sub_i16 v0, v0, v1 209; GFX9-NEXT: s_setpc_b64 s[30:31] 210; 211; GFX8-LABEL: v_sub_v2i16_neg_inline_imm_hi: 212; GFX8: ; %bb.0: 213; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 214; GFX8-NEXT: v_mov_b32_e32 v2, 64 215; GFX8-NEXT: v_add_u16_e32 v1, -4, v0 216; GFX8-NEXT: v_add_u16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 217; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 218; GFX8-NEXT: s_setpc_b64 s[30:31] 219; 220; GFX10-LABEL: v_sub_v2i16_neg_inline_imm_hi: 221; GFX10: ; %bb.0: 222; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 223; GFX10-NEXT: v_pk_sub_i16 v0, v0, 0xffc00004 224; GFX10-NEXT: s_setpc_b64 s[30:31] 225; 226; GFX11-LABEL: v_sub_v2i16_neg_inline_imm_hi: 227; GFX11: ; %bb.0: 228; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 229; GFX11-NEXT: v_pk_sub_i16 v0, v0, 0xffc00004 230; GFX11-NEXT: s_setpc_b64 s[30:31] 231 %sub = sub <2 x i16> %a, <i16 4, i16 -64> 232 ret <2 x i16> %sub 233} 234 235define amdgpu_ps i32 @s_sub_v2i16_neg_inline_imm_splat(<2 x i16> inreg %a) { 236; GFX9-LABEL: s_sub_v2i16_neg_inline_imm_splat: 237; GFX9: ; %bb.0: 238; GFX9-NEXT: s_lshr_b32 s1, s0, 16 239; GFX9-NEXT: s_sub_i32 s0, s0, 0xffc0ffc0 240; GFX9-NEXT: s_sub_i32 s1, s1, 0xffc0 241; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s1 242; GFX9-NEXT: ; return to shader part epilog 243; 244; GFX8-LABEL: s_sub_v2i16_neg_inline_imm_splat: 245; GFX8: ; %bb.0: 246; GFX8-NEXT: s_lshr_b32 s1, s0, 16 247; GFX8-NEXT: s_and_b32 s0, s0, 0xffff 248; GFX8-NEXT: s_add_i32 s0, s0, 0xffff0040 249; GFX8-NEXT: s_add_i32 s1, s1, 0xffff0040 250; GFX8-NEXT: s_lshl_b32 s1, s1, 16 251; GFX8-NEXT: s_and_b32 s0, s0, 0xffff 252; GFX8-NEXT: s_or_b32 s0, s1, s0 253; GFX8-NEXT: ; return to shader part epilog 254; 255; GFX10-LABEL: s_sub_v2i16_neg_inline_imm_splat: 256; GFX10: ; %bb.0: 257; GFX10-NEXT: s_lshr_b32 s1, s0, 16 258; GFX10-NEXT: s_sub_i32 s0, s0, 0xffc0ffc0 259; GFX10-NEXT: s_sub_i32 s1, s1, 0xffc0 260; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s1 261; GFX10-NEXT: ; return to shader part epilog 262; 263; GFX11-LABEL: s_sub_v2i16_neg_inline_imm_splat: 264; GFX11: ; %bb.0: 265; GFX11-NEXT: s_lshr_b32 s1, s0, 16 266; GFX11-NEXT: s_sub_i32 s0, s0, 0xffc0ffc0 267; GFX11-NEXT: s_sub_i32 s1, s1, 0xffc0 268; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s1 269; GFX11-NEXT: ; return to shader part epilog 270 %sub = sub <2 x i16> %a, <i16 -64, i16 -64> 271 %cast = bitcast <2 x i16> %sub to i32 272 ret i32 %cast 273} 274 275define amdgpu_ps i32 @s_sub_v2i16_neg_inline_imm_lo(<2 x i16> inreg %a) { 276; GFX9-LABEL: s_sub_v2i16_neg_inline_imm_lo: 277; GFX9: ; %bb.0: 278; GFX9-NEXT: s_lshr_b32 s1, s0, 16 279; GFX9-NEXT: s_sub_i32 s0, s0, 0x4ffc0 280; GFX9-NEXT: s_sub_i32 s1, s1, 4 281; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s1 282; GFX9-NEXT: ; return to shader part epilog 283; 284; GFX8-LABEL: s_sub_v2i16_neg_inline_imm_lo: 285; GFX8: ; %bb.0: 286; GFX8-NEXT: s_lshr_b32 s1, s0, 16 287; GFX8-NEXT: s_and_b32 s0, s0, 0xffff 288; GFX8-NEXT: s_add_i32 s0, s0, 0xffff0040 289; GFX8-NEXT: s_add_i32 s1, s1, -4 290; GFX8-NEXT: s_lshl_b32 s1, s1, 16 291; GFX8-NEXT: s_and_b32 s0, s0, 0xffff 292; GFX8-NEXT: s_or_b32 s0, s1, s0 293; GFX8-NEXT: ; return to shader part epilog 294; 295; GFX10-LABEL: s_sub_v2i16_neg_inline_imm_lo: 296; GFX10: ; %bb.0: 297; GFX10-NEXT: s_lshr_b32 s1, s0, 16 298; GFX10-NEXT: s_sub_i32 s0, s0, 0x4ffc0 299; GFX10-NEXT: s_sub_i32 s1, s1, 4 300; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s1 301; GFX10-NEXT: ; return to shader part epilog 302; 303; GFX11-LABEL: s_sub_v2i16_neg_inline_imm_lo: 304; GFX11: ; %bb.0: 305; GFX11-NEXT: s_lshr_b32 s1, s0, 16 306; GFX11-NEXT: s_sub_i32 s0, s0, 0x4ffc0 307; GFX11-NEXT: s_sub_i32 s1, s1, 4 308; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s1 309; GFX11-NEXT: ; return to shader part epilog 310 %sub = sub <2 x i16> %a, <i16 -64, i16 4> 311 %cast = bitcast <2 x i16> %sub to i32 312 ret i32 %cast 313} 314 315define amdgpu_ps i32 @s_sub_v2i16_neg_inline_imm_hi(<2 x i16> inreg %a) { 316; GFX9-LABEL: s_sub_v2i16_neg_inline_imm_hi: 317; GFX9: ; %bb.0: 318; GFX9-NEXT: s_lshr_b32 s1, s0, 16 319; GFX9-NEXT: s_sub_i32 s0, s0, 0xffc00004 320; GFX9-NEXT: s_sub_i32 s1, s1, 0xffc0 321; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s1 322; GFX9-NEXT: ; return to shader part epilog 323; 324; GFX8-LABEL: s_sub_v2i16_neg_inline_imm_hi: 325; GFX8: ; %bb.0: 326; GFX8-NEXT: s_lshr_b32 s1, s0, 16 327; GFX8-NEXT: s_and_b32 s0, s0, 0xffff 328; GFX8-NEXT: s_add_i32 s0, s0, -4 329; GFX8-NEXT: s_add_i32 s1, s1, 0xffff0040 330; GFX8-NEXT: s_lshl_b32 s1, s1, 16 331; GFX8-NEXT: s_and_b32 s0, s0, 0xffff 332; GFX8-NEXT: s_or_b32 s0, s1, s0 333; GFX8-NEXT: ; return to shader part epilog 334; 335; GFX10-LABEL: s_sub_v2i16_neg_inline_imm_hi: 336; GFX10: ; %bb.0: 337; GFX10-NEXT: s_lshr_b32 s1, s0, 16 338; GFX10-NEXT: s_sub_i32 s0, s0, 0xffc00004 339; GFX10-NEXT: s_sub_i32 s1, s1, 0xffc0 340; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s1 341; GFX10-NEXT: ; return to shader part epilog 342; 343; GFX11-LABEL: s_sub_v2i16_neg_inline_imm_hi: 344; GFX11: ; %bb.0: 345; GFX11-NEXT: s_lshr_b32 s1, s0, 16 346; GFX11-NEXT: s_sub_i32 s0, s0, 0xffc00004 347; GFX11-NEXT: s_sub_i32 s1, s1, 0xffc0 348; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s1 349; GFX11-NEXT: ; return to shader part epilog 350 %sub = sub <2 x i16> %a, <i16 4, i16 -64> 351 %cast = bitcast <2 x i16> %sub to i32 352 ret i32 %cast 353} 354 355define amdgpu_ps i32 @s_sub_v2i16(<2 x i16> inreg %a, <2 x i16> inreg %b) { 356; GFX9-LABEL: s_sub_v2i16: 357; GFX9: ; %bb.0: 358; GFX9-NEXT: s_lshr_b32 s2, s0, 16 359; GFX9-NEXT: s_lshr_b32 s3, s1, 16 360; GFX9-NEXT: s_sub_i32 s0, s0, s1 361; GFX9-NEXT: s_sub_i32 s1, s2, s3 362; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s1 363; GFX9-NEXT: ; return to shader part epilog 364; 365; GFX8-LABEL: s_sub_v2i16: 366; GFX8: ; %bb.0: 367; GFX8-NEXT: s_lshr_b32 s2, s0, 16 368; GFX8-NEXT: s_and_b32 s0, s0, 0xffff 369; GFX8-NEXT: s_lshr_b32 s3, s1, 16 370; GFX8-NEXT: s_and_b32 s1, s1, 0xffff 371; GFX8-NEXT: s_sub_i32 s0, s0, s1 372; GFX8-NEXT: s_sub_i32 s1, s2, s3 373; GFX8-NEXT: s_lshl_b32 s1, s1, 16 374; GFX8-NEXT: s_and_b32 s0, s0, 0xffff 375; GFX8-NEXT: s_or_b32 s0, s1, s0 376; GFX8-NEXT: ; return to shader part epilog 377; 378; GFX10-LABEL: s_sub_v2i16: 379; GFX10: ; %bb.0: 380; GFX10-NEXT: s_lshr_b32 s2, s0, 16 381; GFX10-NEXT: s_lshr_b32 s3, s1, 16 382; GFX10-NEXT: s_sub_i32 s0, s0, s1 383; GFX10-NEXT: s_sub_i32 s1, s2, s3 384; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s1 385; GFX10-NEXT: ; return to shader part epilog 386; 387; GFX11-LABEL: s_sub_v2i16: 388; GFX11: ; %bb.0: 389; GFX11-NEXT: s_lshr_b32 s2, s0, 16 390; GFX11-NEXT: s_lshr_b32 s3, s1, 16 391; GFX11-NEXT: s_sub_i32 s0, s0, s1 392; GFX11-NEXT: s_sub_i32 s1, s2, s3 393; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s1 394; GFX11-NEXT: ; return to shader part epilog 395 %sub = sub <2 x i16> %a, %b 396 %cast = bitcast <2 x i16> %sub to i32 397 ret i32 %cast 398} 399 400define amdgpu_ps i32 @s_sub_v2i16_fneg_lhs(<2 x half> inreg %a, <2 x i16> inreg %b) { 401; GFX9-LABEL: s_sub_v2i16_fneg_lhs: 402; GFX9: ; %bb.0: 403; GFX9-NEXT: s_xor_b32 s0, s0, 0x80008000 404; GFX9-NEXT: s_lshr_b32 s2, s0, 16 405; GFX9-NEXT: s_lshr_b32 s3, s1, 16 406; GFX9-NEXT: s_sub_i32 s0, s0, s1 407; GFX9-NEXT: s_sub_i32 s1, s2, s3 408; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s1 409; GFX9-NEXT: ; return to shader part epilog 410; 411; GFX8-LABEL: s_sub_v2i16_fneg_lhs: 412; GFX8: ; %bb.0: 413; GFX8-NEXT: s_xor_b32 s0, s0, 0x80008000 414; GFX8-NEXT: s_lshr_b32 s2, s0, 16 415; GFX8-NEXT: s_and_b32 s0, s0, 0xffff 416; GFX8-NEXT: s_lshr_b32 s3, s1, 16 417; GFX8-NEXT: s_and_b32 s1, s1, 0xffff 418; GFX8-NEXT: s_sub_i32 s0, s0, s1 419; GFX8-NEXT: s_sub_i32 s1, s2, s3 420; GFX8-NEXT: s_lshl_b32 s1, s1, 16 421; GFX8-NEXT: s_and_b32 s0, s0, 0xffff 422; GFX8-NEXT: s_or_b32 s0, s1, s0 423; GFX8-NEXT: ; return to shader part epilog 424; 425; GFX10-LABEL: s_sub_v2i16_fneg_lhs: 426; GFX10: ; %bb.0: 427; GFX10-NEXT: s_xor_b32 s0, s0, 0x80008000 428; GFX10-NEXT: s_lshr_b32 s3, s1, 16 429; GFX10-NEXT: s_lshr_b32 s2, s0, 16 430; GFX10-NEXT: s_sub_i32 s0, s0, s1 431; GFX10-NEXT: s_sub_i32 s1, s2, s3 432; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s1 433; GFX10-NEXT: ; return to shader part epilog 434; 435; GFX11-LABEL: s_sub_v2i16_fneg_lhs: 436; GFX11: ; %bb.0: 437; GFX11-NEXT: s_xor_b32 s0, s0, 0x80008000 438; GFX11-NEXT: s_lshr_b32 s3, s1, 16 439; GFX11-NEXT: s_lshr_b32 s2, s0, 16 440; GFX11-NEXT: s_sub_i32 s0, s0, s1 441; GFX11-NEXT: s_sub_i32 s1, s2, s3 442; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s1 443; GFX11-NEXT: ; return to shader part epilog 444 %neg.a = fneg <2 x half> %a 445 %cast.neg.a = bitcast <2 x half> %neg.a to <2 x i16> 446 %sub = sub <2 x i16> %cast.neg.a, %b 447 %cast = bitcast <2 x i16> %sub to i32 448 ret i32 %cast 449} 450 451define amdgpu_ps i32 @s_sub_v2i16_fneg_rhs(<2 x i16> inreg %a, <2 x half> inreg %b) { 452; GFX9-LABEL: s_sub_v2i16_fneg_rhs: 453; GFX9: ; %bb.0: 454; GFX9-NEXT: s_xor_b32 s1, s1, 0x80008000 455; GFX9-NEXT: s_lshr_b32 s2, s0, 16 456; GFX9-NEXT: s_lshr_b32 s3, s1, 16 457; GFX9-NEXT: s_sub_i32 s0, s0, s1 458; GFX9-NEXT: s_sub_i32 s1, s2, s3 459; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s1 460; GFX9-NEXT: ; return to shader part epilog 461; 462; GFX8-LABEL: s_sub_v2i16_fneg_rhs: 463; GFX8: ; %bb.0: 464; GFX8-NEXT: s_xor_b32 s1, s1, 0x80008000 465; GFX8-NEXT: s_lshr_b32 s2, s0, 16 466; GFX8-NEXT: s_and_b32 s0, s0, 0xffff 467; GFX8-NEXT: s_lshr_b32 s3, s1, 16 468; GFX8-NEXT: s_and_b32 s1, s1, 0xffff 469; GFX8-NEXT: s_sub_i32 s0, s0, s1 470; GFX8-NEXT: s_sub_i32 s1, s2, s3 471; GFX8-NEXT: s_lshl_b32 s1, s1, 16 472; GFX8-NEXT: s_and_b32 s0, s0, 0xffff 473; GFX8-NEXT: s_or_b32 s0, s1, s0 474; GFX8-NEXT: ; return to shader part epilog 475; 476; GFX10-LABEL: s_sub_v2i16_fneg_rhs: 477; GFX10: ; %bb.0: 478; GFX10-NEXT: s_xor_b32 s1, s1, 0x80008000 479; GFX10-NEXT: s_lshr_b32 s2, s0, 16 480; GFX10-NEXT: s_lshr_b32 s3, s1, 16 481; GFX10-NEXT: s_sub_i32 s0, s0, s1 482; GFX10-NEXT: s_sub_i32 s1, s2, s3 483; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s1 484; GFX10-NEXT: ; return to shader part epilog 485; 486; GFX11-LABEL: s_sub_v2i16_fneg_rhs: 487; GFX11: ; %bb.0: 488; GFX11-NEXT: s_xor_b32 s1, s1, 0x80008000 489; GFX11-NEXT: s_lshr_b32 s2, s0, 16 490; GFX11-NEXT: s_lshr_b32 s3, s1, 16 491; GFX11-NEXT: s_sub_i32 s0, s0, s1 492; GFX11-NEXT: s_sub_i32 s1, s2, s3 493; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s1 494; GFX11-NEXT: ; return to shader part epilog 495 %neg.b = fneg <2 x half> %b 496 %cast.neg.b = bitcast <2 x half> %neg.b to <2 x i16> 497 %sub = sub <2 x i16> %a, %cast.neg.b 498 %cast = bitcast <2 x i16> %sub to i32 499 ret i32 %cast 500} 501 502define amdgpu_ps i32 @s_sub_v2i16_fneg_lhs_fneg_rhs(<2 x half> inreg %a, <2 x half> inreg %b) { 503; GFX9-LABEL: s_sub_v2i16_fneg_lhs_fneg_rhs: 504; GFX9: ; %bb.0: 505; GFX9-NEXT: s_xor_b32 s0, s0, 0x80008000 506; GFX9-NEXT: s_xor_b32 s1, s1, 0x80008000 507; GFX9-NEXT: s_lshr_b32 s2, s0, 16 508; GFX9-NEXT: s_lshr_b32 s3, s1, 16 509; GFX9-NEXT: s_sub_i32 s0, s0, s1 510; GFX9-NEXT: s_sub_i32 s1, s2, s3 511; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s1 512; GFX9-NEXT: ; return to shader part epilog 513; 514; GFX8-LABEL: s_sub_v2i16_fneg_lhs_fneg_rhs: 515; GFX8: ; %bb.0: 516; GFX8-NEXT: s_xor_b32 s0, s0, 0x80008000 517; GFX8-NEXT: s_xor_b32 s1, s1, 0x80008000 518; GFX8-NEXT: s_lshr_b32 s2, s0, 16 519; GFX8-NEXT: s_and_b32 s0, s0, 0xffff 520; GFX8-NEXT: s_lshr_b32 s3, s1, 16 521; GFX8-NEXT: s_and_b32 s1, s1, 0xffff 522; GFX8-NEXT: s_sub_i32 s0, s0, s1 523; GFX8-NEXT: s_sub_i32 s1, s2, s3 524; GFX8-NEXT: s_lshl_b32 s1, s1, 16 525; GFX8-NEXT: s_and_b32 s0, s0, 0xffff 526; GFX8-NEXT: s_or_b32 s0, s1, s0 527; GFX8-NEXT: ; return to shader part epilog 528; 529; GFX10-LABEL: s_sub_v2i16_fneg_lhs_fneg_rhs: 530; GFX10: ; %bb.0: 531; GFX10-NEXT: s_xor_b32 s0, s0, 0x80008000 532; GFX10-NEXT: s_xor_b32 s1, s1, 0x80008000 533; GFX10-NEXT: s_lshr_b32 s2, s0, 16 534; GFX10-NEXT: s_lshr_b32 s3, s1, 16 535; GFX10-NEXT: s_sub_i32 s0, s0, s1 536; GFX10-NEXT: s_sub_i32 s1, s2, s3 537; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s1 538; GFX10-NEXT: ; return to shader part epilog 539; 540; GFX11-LABEL: s_sub_v2i16_fneg_lhs_fneg_rhs: 541; GFX11: ; %bb.0: 542; GFX11-NEXT: s_xor_b32 s0, s0, 0x80008000 543; GFX11-NEXT: s_xor_b32 s1, s1, 0x80008000 544; GFX11-NEXT: s_lshr_b32 s2, s0, 16 545; GFX11-NEXT: s_lshr_b32 s3, s1, 16 546; GFX11-NEXT: s_sub_i32 s0, s0, s1 547; GFX11-NEXT: s_sub_i32 s1, s2, s3 548; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s1 549; GFX11-NEXT: ; return to shader part epilog 550 %neg.a = fneg <2 x half> %a 551 %neg.b = fneg <2 x half> %b 552 %cast.neg.a = bitcast <2 x half> %neg.a to <2 x i16> 553 %cast.neg.b = bitcast <2 x half> %neg.b to <2 x i16> 554 %sub = sub <2 x i16> %cast.neg.a, %cast.neg.b 555 %cast = bitcast <2 x i16> %sub to i32 556 ret i32 %cast 557} 558