1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=amdgcn-- -mcpu=gfx600 < %s | FileCheck -check-prefix=GFX6 %s 3; RUN: llc -mtriple=amdgcn-- -mcpu=gfx700 < %s | FileCheck -check-prefix=GFX7 %s 4; RUN: llc -mtriple=amdgcn-- -mcpu=gfx803 < %s | FileCheck -check-prefix=GFX8 %s 5; RUN: llc -mtriple=amdgcn-- -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s 6; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s 7; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1100 < %s | FileCheck -check-prefix=GFX11 %s 8; RUN: llc -mtriple=amdgcn-- -mcpu=gfx1200 < %s | FileCheck -check-prefix=GFX12 %s 9 10define i16 @abs_i16(i16 %arg) { 11; GFX6-LABEL: abs_i16: 12; GFX6: ; %bb.0: 13; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16 15; GFX6-NEXT: v_sub_i32_e32 v1, vcc, 0, v0 16; GFX6-NEXT: v_max_i32_e32 v0, v0, v1 17; GFX6-NEXT: s_setpc_b64 s[30:31] 18; 19; GFX7-LABEL: abs_i16: 20; GFX7: ; %bb.0: 21; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22; GFX7-NEXT: v_bfe_i32 v0, v0, 0, 16 23; GFX7-NEXT: v_sub_i32_e32 v1, vcc, 0, v0 24; GFX7-NEXT: v_max_i32_e32 v0, v0, v1 25; GFX7-NEXT: s_setpc_b64 s[30:31] 26; 27; GFX8-LABEL: abs_i16: 28; GFX8: ; %bb.0: 29; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 30; GFX8-NEXT: v_sub_u16_e32 v1, 0, v0 31; GFX8-NEXT: v_max_i16_e32 v0, v0, v1 32; GFX8-NEXT: s_setpc_b64 s[30:31] 33; 34; GFX9-LABEL: abs_i16: 35; GFX9: ; %bb.0: 36; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 37; GFX9-NEXT: v_sub_u16_e32 v1, 0, v0 38; GFX9-NEXT: v_max_i16_e32 v0, v0, v1 39; GFX9-NEXT: s_setpc_b64 s[30:31] 40; 41; GFX10-LABEL: abs_i16: 42; GFX10: ; %bb.0: 43; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 44; GFX10-NEXT: v_sub_nc_u16 v1, 0, v0 45; GFX10-NEXT: v_max_i16 v0, v0, v1 46; GFX10-NEXT: s_setpc_b64 s[30:31] 47; 48; GFX11-LABEL: abs_i16: 49; GFX11: ; %bb.0: 50; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 51; GFX11-NEXT: v_sub_nc_u16 v1, 0, v0 52; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 53; GFX11-NEXT: v_max_i16 v0, v0, v1 54; GFX11-NEXT: s_setpc_b64 s[30:31] 55; 56; GFX12-LABEL: abs_i16: 57; GFX12: ; %bb.0: 58; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 59; GFX12-NEXT: s_wait_expcnt 0x0 60; GFX12-NEXT: s_wait_samplecnt 0x0 61; GFX12-NEXT: s_wait_bvhcnt 0x0 62; GFX12-NEXT: s_wait_kmcnt 0x0 63; GFX12-NEXT: v_sub_nc_u16 v1, 0, v0 64; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 65; GFX12-NEXT: v_max_i16 v0, v0, v1 66; GFX12-NEXT: s_setpc_b64 s[30:31] 67 %res = call i16 @llvm.abs.i16(i16 %arg, i1 false) 68 ret i16 %res 69} 70 71define <2 x i16> @v_abs_v2i16(<2 x i16> %arg) { 72; GFX6-LABEL: v_abs_v2i16: 73; GFX6: ; %bb.0: 74; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 75; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16 76; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16 77; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 0, v0 78; GFX6-NEXT: v_max_i32_e32 v0, v0, v2 79; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 0, v1 80; GFX6-NEXT: v_max_i32_e32 v1, v1, v2 81; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v1 82; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 83; GFX6-NEXT: s_setpc_b64 s[30:31] 84; 85; GFX7-LABEL: v_abs_v2i16: 86; GFX7: ; %bb.0: 87; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 88; GFX7-NEXT: v_bfe_i32 v0, v0, 0, 16 89; GFX7-NEXT: v_bfe_i32 v1, v1, 0, 16 90; GFX7-NEXT: v_sub_i32_e32 v2, vcc, 0, v0 91; GFX7-NEXT: v_max_i32_e32 v0, v0, v2 92; GFX7-NEXT: v_sub_i32_e32 v2, vcc, 0, v1 93; GFX7-NEXT: v_max_i32_e32 v1, v1, v2 94; GFX7-NEXT: v_lshlrev_b32_e32 v2, 16, v1 95; GFX7-NEXT: v_or_b32_e32 v0, v0, v2 96; GFX7-NEXT: s_setpc_b64 s[30:31] 97; 98; GFX8-LABEL: v_abs_v2i16: 99; GFX8: ; %bb.0: 100; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 101; GFX8-NEXT: v_mov_b32_e32 v1, 0 102; GFX8-NEXT: v_sub_u16_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 103; GFX8-NEXT: v_sub_u16_e32 v2, 0, v0 104; GFX8-NEXT: v_max_i16_sdwa v1, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 105; GFX8-NEXT: v_max_i16_e32 v0, v0, v2 106; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 107; GFX8-NEXT: s_setpc_b64 s[30:31] 108; 109; GFX9-LABEL: v_abs_v2i16: 110; GFX9: ; %bb.0: 111; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 112; GFX9-NEXT: v_pk_sub_i16 v1, 0, v0 113; GFX9-NEXT: v_pk_max_i16 v0, v0, v1 114; GFX9-NEXT: s_setpc_b64 s[30:31] 115; 116; GFX10-LABEL: v_abs_v2i16: 117; GFX10: ; %bb.0: 118; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 119; GFX10-NEXT: v_pk_sub_i16 v1, 0, v0 120; GFX10-NEXT: v_pk_max_i16 v0, v0, v1 121; GFX10-NEXT: s_setpc_b64 s[30:31] 122; 123; GFX11-LABEL: v_abs_v2i16: 124; GFX11: ; %bb.0: 125; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 126; GFX11-NEXT: v_pk_sub_i16 v1, 0, v0 127; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 128; GFX11-NEXT: v_pk_max_i16 v0, v0, v1 129; GFX11-NEXT: s_setpc_b64 s[30:31] 130; 131; GFX12-LABEL: v_abs_v2i16: 132; GFX12: ; %bb.0: 133; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 134; GFX12-NEXT: s_wait_expcnt 0x0 135; GFX12-NEXT: s_wait_samplecnt 0x0 136; GFX12-NEXT: s_wait_bvhcnt 0x0 137; GFX12-NEXT: s_wait_kmcnt 0x0 138; GFX12-NEXT: v_pk_sub_i16 v1, 0, v0 139; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_1) 140; GFX12-NEXT: v_pk_max_i16 v0, v0, v1 141; GFX12-NEXT: s_setpc_b64 s[30:31] 142 %res = call <2 x i16> @llvm.abs.v2i16(<2 x i16> %arg, i1 false) 143 ret <2 x i16> %res 144} 145 146define <3 x i16> @v_abs_v3i16(<3 x i16> %arg) { 147; GFX6-LABEL: v_abs_v3i16: 148; GFX6: ; %bb.0: 149; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 150; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16 151; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16 152; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0, v0 153; GFX6-NEXT: v_max_i32_e32 v0, v0, v3 154; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 155; GFX6-NEXT: v_max_i32_e32 v1, v1, v3 156; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 16 157; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 158; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 159; GFX6-NEXT: v_sub_i32_e32 v1, vcc, 0, v2 160; GFX6-NEXT: v_max_i32_e32 v2, v2, v1 161; GFX6-NEXT: v_alignbit_b32 v1, v2, v0, 16 162; GFX6-NEXT: s_setpc_b64 s[30:31] 163; 164; GFX7-LABEL: v_abs_v3i16: 165; GFX7: ; %bb.0: 166; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 167; GFX7-NEXT: v_bfe_i32 v0, v0, 0, 16 168; GFX7-NEXT: v_bfe_i32 v1, v1, 0, 16 169; GFX7-NEXT: v_sub_i32_e32 v3, vcc, 0, v0 170; GFX7-NEXT: v_max_i32_e32 v0, v0, v3 171; GFX7-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 172; GFX7-NEXT: v_max_i32_e32 v1, v1, v3 173; GFX7-NEXT: v_bfe_i32 v2, v2, 0, 16 174; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 175; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 176; GFX7-NEXT: v_sub_i32_e32 v1, vcc, 0, v2 177; GFX7-NEXT: v_max_i32_e32 v2, v2, v1 178; GFX7-NEXT: v_alignbit_b32 v1, v2, v0, 16 179; GFX7-NEXT: s_setpc_b64 s[30:31] 180; 181; GFX8-LABEL: v_abs_v3i16: 182; GFX8: ; %bb.0: 183; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 184; GFX8-NEXT: v_mov_b32_e32 v2, 0 185; GFX8-NEXT: v_sub_u16_e32 v3, 0, v1 186; GFX8-NEXT: v_sub_u16_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 187; GFX8-NEXT: v_max_i16_e32 v1, v1, v3 188; GFX8-NEXT: v_sub_u16_e32 v3, 0, v0 189; GFX8-NEXT: v_max_i16_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 190; GFX8-NEXT: v_max_i16_e32 v0, v0, v3 191; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 192; GFX8-NEXT: s_setpc_b64 s[30:31] 193; 194; GFX9-LABEL: v_abs_v3i16: 195; GFX9: ; %bb.0: 196; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 197; GFX9-NEXT: v_pk_sub_i16 v2, 0, v0 198; GFX9-NEXT: v_pk_max_i16 v0, v0, v2 199; GFX9-NEXT: v_pk_sub_i16 v2, 0, v1 200; GFX9-NEXT: v_pk_max_i16 v1, v1, v2 201; GFX9-NEXT: s_setpc_b64 s[30:31] 202; 203; GFX10-LABEL: v_abs_v3i16: 204; GFX10: ; %bb.0: 205; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 206; GFX10-NEXT: v_pk_sub_i16 v2, 0, v0 207; GFX10-NEXT: v_pk_sub_i16 v3, 0, v1 208; GFX10-NEXT: v_pk_max_i16 v0, v0, v2 209; GFX10-NEXT: v_pk_max_i16 v1, v1, v3 210; GFX10-NEXT: s_setpc_b64 s[30:31] 211; 212; GFX11-LABEL: v_abs_v3i16: 213; GFX11: ; %bb.0: 214; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 215; GFX11-NEXT: v_pk_sub_i16 v2, 0, v0 216; GFX11-NEXT: v_pk_sub_i16 v3, 0, v1 217; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 218; GFX11-NEXT: v_pk_max_i16 v0, v0, v2 219; GFX11-NEXT: v_pk_max_i16 v1, v1, v3 220; GFX11-NEXT: s_setpc_b64 s[30:31] 221; 222; GFX12-LABEL: v_abs_v3i16: 223; GFX12: ; %bb.0: 224; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 225; GFX12-NEXT: s_wait_expcnt 0x0 226; GFX12-NEXT: s_wait_samplecnt 0x0 227; GFX12-NEXT: s_wait_bvhcnt 0x0 228; GFX12-NEXT: s_wait_kmcnt 0x0 229; GFX12-NEXT: v_pk_sub_i16 v2, 0, v0 230; GFX12-NEXT: v_pk_sub_i16 v3, 0, v1 231; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 232; GFX12-NEXT: v_pk_max_i16 v0, v0, v2 233; GFX12-NEXT: v_pk_max_i16 v1, v1, v3 234; GFX12-NEXT: s_setpc_b64 s[30:31] 235 %res = call <3 x i16> @llvm.abs.v3i16(<3 x i16> %arg, i1 false) 236 ret <3 x i16> %res 237} 238 239define <4 x i16> @v_abs_v4i16(<4 x i16> %arg) { 240; GFX6-LABEL: v_abs_v4i16: 241; GFX6: ; %bb.0: 242; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 243; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 16 244; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 16 245; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 0, v2 246; GFX6-NEXT: v_max_i32_e32 v2, v2, v4 247; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 0, v3 248; GFX6-NEXT: v_max_i32_e32 v3, v3, v4 249; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16 250; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 251; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16 252; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 253; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0, v0 254; GFX6-NEXT: v_max_i32_e32 v0, v0, v3 255; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 256; GFX6-NEXT: v_max_i32_e32 v1, v1, v3 257; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 258; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 259; GFX6-NEXT: v_alignbit_b32 v1, v2, v0, 16 260; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2 261; GFX6-NEXT: s_setpc_b64 s[30:31] 262; 263; GFX7-LABEL: v_abs_v4i16: 264; GFX7: ; %bb.0: 265; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 266; GFX7-NEXT: v_bfe_i32 v2, v2, 0, 16 267; GFX7-NEXT: v_bfe_i32 v3, v3, 0, 16 268; GFX7-NEXT: v_sub_i32_e32 v4, vcc, 0, v2 269; GFX7-NEXT: v_max_i32_e32 v2, v2, v4 270; GFX7-NEXT: v_sub_i32_e32 v4, vcc, 0, v3 271; GFX7-NEXT: v_max_i32_e32 v3, v3, v4 272; GFX7-NEXT: v_bfe_i32 v0, v0, 0, 16 273; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 274; GFX7-NEXT: v_bfe_i32 v1, v1, 0, 16 275; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 276; GFX7-NEXT: v_sub_i32_e32 v3, vcc, 0, v0 277; GFX7-NEXT: v_max_i32_e32 v0, v0, v3 278; GFX7-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 279; GFX7-NEXT: v_max_i32_e32 v1, v1, v3 280; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 281; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 282; GFX7-NEXT: v_alignbit_b32 v1, v2, v0, 16 283; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v2 284; GFX7-NEXT: s_setpc_b64 s[30:31] 285; 286; GFX8-LABEL: v_abs_v4i16: 287; GFX8: ; %bb.0: 288; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 289; GFX8-NEXT: v_mov_b32_e32 v2, 0 290; GFX8-NEXT: v_sub_u16_sdwa v3, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 291; GFX8-NEXT: v_sub_u16_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 292; GFX8-NEXT: v_sub_u16_e32 v4, 0, v1 293; GFX8-NEXT: v_sub_u16_e32 v5, 0, v0 294; GFX8-NEXT: v_max_i16_sdwa v3, v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 295; GFX8-NEXT: v_max_i16_sdwa v2, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 296; GFX8-NEXT: v_max_i16_e32 v0, v0, v5 297; GFX8-NEXT: v_max_i16_e32 v1, v1, v4 298; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 299; GFX8-NEXT: v_or_b32_e32 v1, v1, v3 300; GFX8-NEXT: s_setpc_b64 s[30:31] 301; 302; GFX9-LABEL: v_abs_v4i16: 303; GFX9: ; %bb.0: 304; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 305; GFX9-NEXT: v_pk_sub_i16 v2, 0, v0 306; GFX9-NEXT: v_pk_max_i16 v0, v0, v2 307; GFX9-NEXT: v_pk_sub_i16 v2, 0, v1 308; GFX9-NEXT: v_pk_max_i16 v1, v1, v2 309; GFX9-NEXT: s_setpc_b64 s[30:31] 310; 311; GFX10-LABEL: v_abs_v4i16: 312; GFX10: ; %bb.0: 313; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 314; GFX10-NEXT: v_pk_sub_i16 v2, 0, v0 315; GFX10-NEXT: v_pk_sub_i16 v3, 0, v1 316; GFX10-NEXT: v_pk_max_i16 v0, v0, v2 317; GFX10-NEXT: v_pk_max_i16 v1, v1, v3 318; GFX10-NEXT: s_setpc_b64 s[30:31] 319; 320; GFX11-LABEL: v_abs_v4i16: 321; GFX11: ; %bb.0: 322; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 323; GFX11-NEXT: v_pk_sub_i16 v2, 0, v0 324; GFX11-NEXT: v_pk_sub_i16 v3, 0, v1 325; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 326; GFX11-NEXT: v_pk_max_i16 v0, v0, v2 327; GFX11-NEXT: v_pk_max_i16 v1, v1, v3 328; GFX11-NEXT: s_setpc_b64 s[30:31] 329; 330; GFX12-LABEL: v_abs_v4i16: 331; GFX12: ; %bb.0: 332; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 333; GFX12-NEXT: s_wait_expcnt 0x0 334; GFX12-NEXT: s_wait_samplecnt 0x0 335; GFX12-NEXT: s_wait_bvhcnt 0x0 336; GFX12-NEXT: s_wait_kmcnt 0x0 337; GFX12-NEXT: v_pk_sub_i16 v2, 0, v0 338; GFX12-NEXT: v_pk_sub_i16 v3, 0, v1 339; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 340; GFX12-NEXT: v_pk_max_i16 v0, v0, v2 341; GFX12-NEXT: v_pk_max_i16 v1, v1, v3 342; GFX12-NEXT: s_setpc_b64 s[30:31] 343 %res = call <4 x i16> @llvm.abs.v4i16(<4 x i16> %arg, i1 false) 344 ret <4 x i16> %res 345} 346 347define <6 x i16> @v_abs_v6i16(<6 x i16> %arg) { 348; GFX6-LABEL: v_abs_v6i16: 349; GFX6: ; %bb.0: 350; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 351; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 16 352; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 16 353; GFX6-NEXT: v_sub_i32_e32 v6, vcc, 0, v2 354; GFX6-NEXT: v_max_i32_e32 v2, v2, v6 355; GFX6-NEXT: v_sub_i32_e32 v6, vcc, 0, v3 356; GFX6-NEXT: v_max_i32_e32 v3, v3, v6 357; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16 358; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 359; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16 360; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 361; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0, v0 362; GFX6-NEXT: v_max_i32_e32 v0, v0, v3 363; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 364; GFX6-NEXT: v_bfe_i32 v5, v5, 0, 16 365; GFX6-NEXT: v_max_i32_e32 v1, v1, v3 366; GFX6-NEXT: v_bfe_i32 v4, v4, 0, 16 367; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 368; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0, v5 369; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 370; GFX6-NEXT: v_sub_i32_e32 v1, vcc, 0, v4 371; GFX6-NEXT: v_max_i32_e32 v5, v5, v3 372; GFX6-NEXT: v_max_i32_e32 v1, v4, v1 373; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v5 374; GFX6-NEXT: v_or_b32_e32 v4, v1, v3 375; GFX6-NEXT: v_alignbit_b32 v1, v2, v0, 16 376; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2 377; GFX6-NEXT: s_setpc_b64 s[30:31] 378; 379; GFX7-LABEL: v_abs_v6i16: 380; GFX7: ; %bb.0: 381; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 382; GFX7-NEXT: v_bfe_i32 v2, v2, 0, 16 383; GFX7-NEXT: v_bfe_i32 v3, v3, 0, 16 384; GFX7-NEXT: v_sub_i32_e32 v6, vcc, 0, v2 385; GFX7-NEXT: v_max_i32_e32 v2, v2, v6 386; GFX7-NEXT: v_sub_i32_e32 v6, vcc, 0, v3 387; GFX7-NEXT: v_max_i32_e32 v3, v3, v6 388; GFX7-NEXT: v_bfe_i32 v0, v0, 0, 16 389; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 390; GFX7-NEXT: v_bfe_i32 v1, v1, 0, 16 391; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 392; GFX7-NEXT: v_sub_i32_e32 v3, vcc, 0, v0 393; GFX7-NEXT: v_max_i32_e32 v0, v0, v3 394; GFX7-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 395; GFX7-NEXT: v_bfe_i32 v5, v5, 0, 16 396; GFX7-NEXT: v_max_i32_e32 v1, v1, v3 397; GFX7-NEXT: v_bfe_i32 v4, v4, 0, 16 398; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 399; GFX7-NEXT: v_sub_i32_e32 v3, vcc, 0, v5 400; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 401; GFX7-NEXT: v_sub_i32_e32 v1, vcc, 0, v4 402; GFX7-NEXT: v_max_i32_e32 v5, v5, v3 403; GFX7-NEXT: v_max_i32_e32 v1, v4, v1 404; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v5 405; GFX7-NEXT: v_or_b32_e32 v4, v1, v3 406; GFX7-NEXT: v_alignbit_b32 v1, v2, v0, 16 407; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v2 408; GFX7-NEXT: s_setpc_b64 s[30:31] 409; 410; GFX8-LABEL: v_abs_v6i16: 411; GFX8: ; %bb.0: 412; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 413; GFX8-NEXT: v_mov_b32_e32 v3, 0 414; GFX8-NEXT: v_sub_u16_sdwa v4, v3, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 415; GFX8-NEXT: v_sub_u16_sdwa v5, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 416; GFX8-NEXT: v_sub_u16_sdwa v3, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 417; GFX8-NEXT: v_sub_u16_e32 v6, 0, v2 418; GFX8-NEXT: v_sub_u16_e32 v7, 0, v1 419; GFX8-NEXT: v_sub_u16_e32 v8, 0, v0 420; GFX8-NEXT: v_max_i16_sdwa v4, v2, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 421; GFX8-NEXT: v_max_i16_sdwa v5, v1, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 422; GFX8-NEXT: v_max_i16_sdwa v3, v0, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 423; GFX8-NEXT: v_max_i16_e32 v0, v0, v8 424; GFX8-NEXT: v_max_i16_e32 v1, v1, v7 425; GFX8-NEXT: v_max_i16_e32 v2, v2, v6 426; GFX8-NEXT: v_or_b32_e32 v0, v0, v3 427; GFX8-NEXT: v_or_b32_e32 v1, v1, v5 428; GFX8-NEXT: v_or_b32_e32 v2, v2, v4 429; GFX8-NEXT: s_setpc_b64 s[30:31] 430; 431; GFX9-LABEL: v_abs_v6i16: 432; GFX9: ; %bb.0: 433; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 434; GFX9-NEXT: v_pk_sub_i16 v3, 0, v0 435; GFX9-NEXT: v_pk_max_i16 v0, v0, v3 436; GFX9-NEXT: v_pk_sub_i16 v3, 0, v1 437; GFX9-NEXT: v_pk_max_i16 v1, v1, v3 438; GFX9-NEXT: v_pk_sub_i16 v3, 0, v2 439; GFX9-NEXT: v_pk_max_i16 v2, v2, v3 440; GFX9-NEXT: s_setpc_b64 s[30:31] 441; 442; GFX10-LABEL: v_abs_v6i16: 443; GFX10: ; %bb.0: 444; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 445; GFX10-NEXT: v_pk_sub_i16 v3, 0, v0 446; GFX10-NEXT: v_pk_sub_i16 v4, 0, v1 447; GFX10-NEXT: v_pk_sub_i16 v5, 0, v2 448; GFX10-NEXT: v_pk_max_i16 v0, v0, v3 449; GFX10-NEXT: v_pk_max_i16 v1, v1, v4 450; GFX10-NEXT: v_pk_max_i16 v2, v2, v5 451; GFX10-NEXT: s_setpc_b64 s[30:31] 452; 453; GFX11-LABEL: v_abs_v6i16: 454; GFX11: ; %bb.0: 455; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 456; GFX11-NEXT: v_pk_sub_i16 v3, 0, v0 457; GFX11-NEXT: v_pk_sub_i16 v4, 0, v1 458; GFX11-NEXT: v_pk_sub_i16 v5, 0, v2 459; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 460; GFX11-NEXT: v_pk_max_i16 v0, v0, v3 461; GFX11-NEXT: v_pk_max_i16 v1, v1, v4 462; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) 463; GFX11-NEXT: v_pk_max_i16 v2, v2, v5 464; GFX11-NEXT: s_setpc_b64 s[30:31] 465; 466; GFX12-LABEL: v_abs_v6i16: 467; GFX12: ; %bb.0: 468; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 469; GFX12-NEXT: s_wait_expcnt 0x0 470; GFX12-NEXT: s_wait_samplecnt 0x0 471; GFX12-NEXT: s_wait_bvhcnt 0x0 472; GFX12-NEXT: s_wait_kmcnt 0x0 473; GFX12-NEXT: v_pk_sub_i16 v3, 0, v0 474; GFX12-NEXT: v_pk_sub_i16 v4, 0, v1 475; GFX12-NEXT: v_pk_sub_i16 v5, 0, v2 476; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 477; GFX12-NEXT: v_pk_max_i16 v0, v0, v3 478; GFX12-NEXT: v_pk_max_i16 v1, v1, v4 479; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_3) 480; GFX12-NEXT: v_pk_max_i16 v2, v2, v5 481; GFX12-NEXT: s_setpc_b64 s[30:31] 482 %res = call <6 x i16> @llvm.abs.v6i16(<6 x i16> %arg, i1 false) 483 ret <6 x i16> %res 484} 485 486define <8 x i16> @v_abs_v8i16(<8 x i16> %arg) { 487; GFX6-LABEL: v_abs_v8i16: 488; GFX6: ; %bb.0: 489; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 490; GFX6-NEXT: v_bfe_i32 v6, v6, 0, 16 491; GFX6-NEXT: v_bfe_i32 v7, v7, 0, 16 492; GFX6-NEXT: v_sub_i32_e32 v8, vcc, 0, v6 493; GFX6-NEXT: v_max_i32_e32 v6, v6, v8 494; GFX6-NEXT: v_sub_i32_e32 v8, vcc, 0, v7 495; GFX6-NEXT: v_max_i32_e32 v7, v7, v8 496; GFX6-NEXT: v_bfe_i32 v4, v4, 0, 16 497; GFX6-NEXT: v_lshlrev_b32_e32 v7, 16, v7 498; GFX6-NEXT: v_bfe_i32 v5, v5, 0, 16 499; GFX6-NEXT: v_or_b32_e32 v6, v6, v7 500; GFX6-NEXT: v_sub_i32_e32 v7, vcc, 0, v4 501; GFX6-NEXT: v_max_i32_e32 v4, v4, v7 502; GFX6-NEXT: v_sub_i32_e32 v7, vcc, 0, v5 503; GFX6-NEXT: v_max_i32_e32 v5, v5, v7 504; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 16 505; GFX6-NEXT: v_lshlrev_b32_e32 v5, 16, v5 506; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 16 507; GFX6-NEXT: v_or_b32_e32 v4, v4, v5 508; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 509; GFX6-NEXT: v_max_i32_e32 v2, v2, v5 510; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 0, v3 511; GFX6-NEXT: v_max_i32_e32 v3, v3, v5 512; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16 513; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 514; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16 515; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 516; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0, v0 517; GFX6-NEXT: v_max_i32_e32 v0, v0, v3 518; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 519; GFX6-NEXT: v_max_i32_e32 v1, v1, v3 520; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 521; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 522; GFX6-NEXT: v_alignbit_b32 v1, v2, v0, 16 523; GFX6-NEXT: v_alignbit_b32 v5, v6, v4, 16 524; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2 525; GFX6-NEXT: v_lshrrev_b32_e32 v7, 16, v6 526; GFX6-NEXT: s_setpc_b64 s[30:31] 527; 528; GFX7-LABEL: v_abs_v8i16: 529; GFX7: ; %bb.0: 530; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 531; GFX7-NEXT: v_bfe_i32 v6, v6, 0, 16 532; GFX7-NEXT: v_bfe_i32 v7, v7, 0, 16 533; GFX7-NEXT: v_sub_i32_e32 v8, vcc, 0, v6 534; GFX7-NEXT: v_max_i32_e32 v6, v6, v8 535; GFX7-NEXT: v_sub_i32_e32 v8, vcc, 0, v7 536; GFX7-NEXT: v_max_i32_e32 v7, v7, v8 537; GFX7-NEXT: v_bfe_i32 v4, v4, 0, 16 538; GFX7-NEXT: v_lshlrev_b32_e32 v7, 16, v7 539; GFX7-NEXT: v_bfe_i32 v5, v5, 0, 16 540; GFX7-NEXT: v_or_b32_e32 v6, v6, v7 541; GFX7-NEXT: v_sub_i32_e32 v7, vcc, 0, v4 542; GFX7-NEXT: v_max_i32_e32 v4, v4, v7 543; GFX7-NEXT: v_sub_i32_e32 v7, vcc, 0, v5 544; GFX7-NEXT: v_max_i32_e32 v5, v5, v7 545; GFX7-NEXT: v_bfe_i32 v2, v2, 0, 16 546; GFX7-NEXT: v_lshlrev_b32_e32 v5, 16, v5 547; GFX7-NEXT: v_bfe_i32 v3, v3, 0, 16 548; GFX7-NEXT: v_or_b32_e32 v4, v4, v5 549; GFX7-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 550; GFX7-NEXT: v_max_i32_e32 v2, v2, v5 551; GFX7-NEXT: v_sub_i32_e32 v5, vcc, 0, v3 552; GFX7-NEXT: v_max_i32_e32 v3, v3, v5 553; GFX7-NEXT: v_bfe_i32 v0, v0, 0, 16 554; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 555; GFX7-NEXT: v_bfe_i32 v1, v1, 0, 16 556; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 557; GFX7-NEXT: v_sub_i32_e32 v3, vcc, 0, v0 558; GFX7-NEXT: v_max_i32_e32 v0, v0, v3 559; GFX7-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 560; GFX7-NEXT: v_max_i32_e32 v1, v1, v3 561; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 562; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 563; GFX7-NEXT: v_alignbit_b32 v1, v2, v0, 16 564; GFX7-NEXT: v_alignbit_b32 v5, v6, v4, 16 565; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v2 566; GFX7-NEXT: v_lshrrev_b32_e32 v7, 16, v6 567; GFX7-NEXT: s_setpc_b64 s[30:31] 568; 569; GFX8-LABEL: v_abs_v8i16: 570; GFX8: ; %bb.0: 571; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 572; GFX8-NEXT: v_mov_b32_e32 v4, 0 573; GFX8-NEXT: v_sub_u16_sdwa v5, v4, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 574; GFX8-NEXT: v_sub_u16_sdwa v6, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 575; GFX8-NEXT: v_sub_u16_sdwa v7, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 576; GFX8-NEXT: v_sub_u16_sdwa v4, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 577; GFX8-NEXT: v_sub_u16_e32 v8, 0, v3 578; GFX8-NEXT: v_sub_u16_e32 v9, 0, v2 579; GFX8-NEXT: v_sub_u16_e32 v10, 0, v1 580; GFX8-NEXT: v_sub_u16_e32 v11, 0, v0 581; GFX8-NEXT: v_max_i16_sdwa v5, v3, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 582; GFX8-NEXT: v_max_i16_sdwa v6, v2, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 583; GFX8-NEXT: v_max_i16_sdwa v7, v1, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 584; GFX8-NEXT: v_max_i16_sdwa v4, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 585; GFX8-NEXT: v_max_i16_e32 v0, v0, v11 586; GFX8-NEXT: v_max_i16_e32 v1, v1, v10 587; GFX8-NEXT: v_max_i16_e32 v2, v2, v9 588; GFX8-NEXT: v_max_i16_e32 v3, v3, v8 589; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 590; GFX8-NEXT: v_or_b32_e32 v1, v1, v7 591; GFX8-NEXT: v_or_b32_e32 v2, v2, v6 592; GFX8-NEXT: v_or_b32_e32 v3, v3, v5 593; GFX8-NEXT: s_setpc_b64 s[30:31] 594; 595; GFX9-LABEL: v_abs_v8i16: 596; GFX9: ; %bb.0: 597; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 598; GFX9-NEXT: v_pk_sub_i16 v4, 0, v0 599; GFX9-NEXT: v_pk_max_i16 v0, v0, v4 600; GFX9-NEXT: v_pk_sub_i16 v4, 0, v1 601; GFX9-NEXT: v_pk_max_i16 v1, v1, v4 602; GFX9-NEXT: v_pk_sub_i16 v4, 0, v2 603; GFX9-NEXT: v_pk_max_i16 v2, v2, v4 604; GFX9-NEXT: v_pk_sub_i16 v4, 0, v3 605; GFX9-NEXT: v_pk_max_i16 v3, v3, v4 606; GFX9-NEXT: s_setpc_b64 s[30:31] 607; 608; GFX10-LABEL: v_abs_v8i16: 609; GFX10: ; %bb.0: 610; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 611; GFX10-NEXT: v_pk_sub_i16 v4, 0, v0 612; GFX10-NEXT: v_pk_sub_i16 v5, 0, v1 613; GFX10-NEXT: v_pk_sub_i16 v6, 0, v2 614; GFX10-NEXT: v_pk_sub_i16 v7, 0, v3 615; GFX10-NEXT: v_pk_max_i16 v0, v0, v4 616; GFX10-NEXT: v_pk_max_i16 v1, v1, v5 617; GFX10-NEXT: v_pk_max_i16 v2, v2, v6 618; GFX10-NEXT: v_pk_max_i16 v3, v3, v7 619; GFX10-NEXT: s_setpc_b64 s[30:31] 620; 621; GFX11-LABEL: v_abs_v8i16: 622; GFX11: ; %bb.0: 623; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 624; GFX11-NEXT: v_pk_sub_i16 v4, 0, v0 625; GFX11-NEXT: v_pk_sub_i16 v5, 0, v1 626; GFX11-NEXT: v_pk_sub_i16 v6, 0, v2 627; GFX11-NEXT: v_pk_sub_i16 v7, 0, v3 628; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 629; GFX11-NEXT: v_pk_max_i16 v0, v0, v4 630; GFX11-NEXT: v_pk_max_i16 v1, v1, v5 631; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 632; GFX11-NEXT: v_pk_max_i16 v2, v2, v6 633; GFX11-NEXT: v_pk_max_i16 v3, v3, v7 634; GFX11-NEXT: s_setpc_b64 s[30:31] 635; 636; GFX12-LABEL: v_abs_v8i16: 637; GFX12: ; %bb.0: 638; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 639; GFX12-NEXT: s_wait_expcnt 0x0 640; GFX12-NEXT: s_wait_samplecnt 0x0 641; GFX12-NEXT: s_wait_bvhcnt 0x0 642; GFX12-NEXT: s_wait_kmcnt 0x0 643; GFX12-NEXT: v_pk_sub_i16 v4, 0, v0 644; GFX12-NEXT: v_pk_sub_i16 v5, 0, v1 645; GFX12-NEXT: v_pk_sub_i16 v6, 0, v2 646; GFX12-NEXT: v_pk_sub_i16 v7, 0, v3 647; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 648; GFX12-NEXT: v_pk_max_i16 v0, v0, v4 649; GFX12-NEXT: v_pk_max_i16 v1, v1, v5 650; GFX12-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 651; GFX12-NEXT: v_pk_max_i16 v2, v2, v6 652; GFX12-NEXT: v_pk_max_i16 v3, v3, v7 653; GFX12-NEXT: s_setpc_b64 s[30:31] 654 %res = call <8 x i16> @llvm.abs.v8i16(<8 x i16> %arg, i1 false) 655 ret <8 x i16> %res 656} 657 658 659define <16 x i16> @v_abs_v16i16(<16 x i16> %arg) { 660; GFX6-LABEL: v_abs_v16i16: 661; GFX6: ; %bb.0: 662; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 663; GFX6-NEXT: v_bfe_i32 v14, v14, 0, 16 664; GFX6-NEXT: v_bfe_i32 v15, v15, 0, 16 665; GFX6-NEXT: v_sub_i32_e32 v16, vcc, 0, v14 666; GFX6-NEXT: v_max_i32_e32 v14, v14, v16 667; GFX6-NEXT: v_sub_i32_e32 v16, vcc, 0, v15 668; GFX6-NEXT: v_max_i32_e32 v15, v15, v16 669; GFX6-NEXT: v_bfe_i32 v12, v12, 0, 16 670; GFX6-NEXT: v_lshlrev_b32_e32 v15, 16, v15 671; GFX6-NEXT: v_bfe_i32 v13, v13, 0, 16 672; GFX6-NEXT: v_or_b32_e32 v14, v14, v15 673; GFX6-NEXT: v_sub_i32_e32 v15, vcc, 0, v12 674; GFX6-NEXT: v_max_i32_e32 v12, v12, v15 675; GFX6-NEXT: v_sub_i32_e32 v15, vcc, 0, v13 676; GFX6-NEXT: v_max_i32_e32 v13, v13, v15 677; GFX6-NEXT: v_bfe_i32 v10, v10, 0, 16 678; GFX6-NEXT: v_lshlrev_b32_e32 v13, 16, v13 679; GFX6-NEXT: v_bfe_i32 v11, v11, 0, 16 680; GFX6-NEXT: v_or_b32_e32 v12, v12, v13 681; GFX6-NEXT: v_sub_i32_e32 v13, vcc, 0, v10 682; GFX6-NEXT: v_max_i32_e32 v10, v10, v13 683; GFX6-NEXT: v_sub_i32_e32 v13, vcc, 0, v11 684; GFX6-NEXT: v_max_i32_e32 v11, v11, v13 685; GFX6-NEXT: v_bfe_i32 v8, v8, 0, 16 686; GFX6-NEXT: v_lshlrev_b32_e32 v11, 16, v11 687; GFX6-NEXT: v_bfe_i32 v9, v9, 0, 16 688; GFX6-NEXT: v_or_b32_e32 v10, v10, v11 689; GFX6-NEXT: v_sub_i32_e32 v11, vcc, 0, v8 690; GFX6-NEXT: v_max_i32_e32 v8, v8, v11 691; GFX6-NEXT: v_sub_i32_e32 v11, vcc, 0, v9 692; GFX6-NEXT: v_max_i32_e32 v9, v9, v11 693; GFX6-NEXT: v_bfe_i32 v6, v6, 0, 16 694; GFX6-NEXT: v_lshlrev_b32_e32 v9, 16, v9 695; GFX6-NEXT: v_bfe_i32 v7, v7, 0, 16 696; GFX6-NEXT: v_or_b32_e32 v8, v8, v9 697; GFX6-NEXT: v_sub_i32_e32 v9, vcc, 0, v6 698; GFX6-NEXT: v_max_i32_e32 v6, v6, v9 699; GFX6-NEXT: v_sub_i32_e32 v9, vcc, 0, v7 700; GFX6-NEXT: v_max_i32_e32 v7, v7, v9 701; GFX6-NEXT: v_bfe_i32 v4, v4, 0, 16 702; GFX6-NEXT: v_lshlrev_b32_e32 v7, 16, v7 703; GFX6-NEXT: v_bfe_i32 v5, v5, 0, 16 704; GFX6-NEXT: v_or_b32_e32 v6, v6, v7 705; GFX6-NEXT: v_sub_i32_e32 v7, vcc, 0, v4 706; GFX6-NEXT: v_max_i32_e32 v4, v4, v7 707; GFX6-NEXT: v_sub_i32_e32 v7, vcc, 0, v5 708; GFX6-NEXT: v_max_i32_e32 v5, v5, v7 709; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 16 710; GFX6-NEXT: v_lshlrev_b32_e32 v5, 16, v5 711; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 16 712; GFX6-NEXT: v_or_b32_e32 v4, v4, v5 713; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 714; GFX6-NEXT: v_max_i32_e32 v2, v2, v5 715; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 0, v3 716; GFX6-NEXT: v_max_i32_e32 v3, v3, v5 717; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16 718; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 719; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16 720; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 721; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0, v0 722; GFX6-NEXT: v_max_i32_e32 v0, v0, v3 723; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 724; GFX6-NEXT: v_max_i32_e32 v1, v1, v3 725; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 726; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 727; GFX6-NEXT: v_alignbit_b32 v1, v2, v0, 16 728; GFX6-NEXT: v_alignbit_b32 v5, v6, v4, 16 729; GFX6-NEXT: v_alignbit_b32 v9, v10, v8, 16 730; GFX6-NEXT: v_alignbit_b32 v13, v14, v12, 16 731; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2 732; GFX6-NEXT: v_lshrrev_b32_e32 v7, 16, v6 733; GFX6-NEXT: v_lshrrev_b32_e32 v11, 16, v10 734; GFX6-NEXT: v_lshrrev_b32_e32 v15, 16, v14 735; GFX6-NEXT: s_setpc_b64 s[30:31] 736; 737; GFX7-LABEL: v_abs_v16i16: 738; GFX7: ; %bb.0: 739; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 740; GFX7-NEXT: v_bfe_i32 v14, v14, 0, 16 741; GFX7-NEXT: v_bfe_i32 v15, v15, 0, 16 742; GFX7-NEXT: v_sub_i32_e32 v16, vcc, 0, v14 743; GFX7-NEXT: v_max_i32_e32 v14, v14, v16 744; GFX7-NEXT: v_sub_i32_e32 v16, vcc, 0, v15 745; GFX7-NEXT: v_max_i32_e32 v15, v15, v16 746; GFX7-NEXT: v_bfe_i32 v12, v12, 0, 16 747; GFX7-NEXT: v_lshlrev_b32_e32 v15, 16, v15 748; GFX7-NEXT: v_bfe_i32 v13, v13, 0, 16 749; GFX7-NEXT: v_or_b32_e32 v14, v14, v15 750; GFX7-NEXT: v_sub_i32_e32 v15, vcc, 0, v12 751; GFX7-NEXT: v_max_i32_e32 v12, v12, v15 752; GFX7-NEXT: v_sub_i32_e32 v15, vcc, 0, v13 753; GFX7-NEXT: v_max_i32_e32 v13, v13, v15 754; GFX7-NEXT: v_bfe_i32 v10, v10, 0, 16 755; GFX7-NEXT: v_lshlrev_b32_e32 v13, 16, v13 756; GFX7-NEXT: v_bfe_i32 v11, v11, 0, 16 757; GFX7-NEXT: v_or_b32_e32 v12, v12, v13 758; GFX7-NEXT: v_sub_i32_e32 v13, vcc, 0, v10 759; GFX7-NEXT: v_max_i32_e32 v10, v10, v13 760; GFX7-NEXT: v_sub_i32_e32 v13, vcc, 0, v11 761; GFX7-NEXT: v_max_i32_e32 v11, v11, v13 762; GFX7-NEXT: v_bfe_i32 v8, v8, 0, 16 763; GFX7-NEXT: v_lshlrev_b32_e32 v11, 16, v11 764; GFX7-NEXT: v_bfe_i32 v9, v9, 0, 16 765; GFX7-NEXT: v_or_b32_e32 v10, v10, v11 766; GFX7-NEXT: v_sub_i32_e32 v11, vcc, 0, v8 767; GFX7-NEXT: v_max_i32_e32 v8, v8, v11 768; GFX7-NEXT: v_sub_i32_e32 v11, vcc, 0, v9 769; GFX7-NEXT: v_max_i32_e32 v9, v9, v11 770; GFX7-NEXT: v_bfe_i32 v6, v6, 0, 16 771; GFX7-NEXT: v_lshlrev_b32_e32 v9, 16, v9 772; GFX7-NEXT: v_bfe_i32 v7, v7, 0, 16 773; GFX7-NEXT: v_or_b32_e32 v8, v8, v9 774; GFX7-NEXT: v_sub_i32_e32 v9, vcc, 0, v6 775; GFX7-NEXT: v_max_i32_e32 v6, v6, v9 776; GFX7-NEXT: v_sub_i32_e32 v9, vcc, 0, v7 777; GFX7-NEXT: v_max_i32_e32 v7, v7, v9 778; GFX7-NEXT: v_bfe_i32 v4, v4, 0, 16 779; GFX7-NEXT: v_lshlrev_b32_e32 v7, 16, v7 780; GFX7-NEXT: v_bfe_i32 v5, v5, 0, 16 781; GFX7-NEXT: v_or_b32_e32 v6, v6, v7 782; GFX7-NEXT: v_sub_i32_e32 v7, vcc, 0, v4 783; GFX7-NEXT: v_max_i32_e32 v4, v4, v7 784; GFX7-NEXT: v_sub_i32_e32 v7, vcc, 0, v5 785; GFX7-NEXT: v_max_i32_e32 v5, v5, v7 786; GFX7-NEXT: v_bfe_i32 v2, v2, 0, 16 787; GFX7-NEXT: v_lshlrev_b32_e32 v5, 16, v5 788; GFX7-NEXT: v_bfe_i32 v3, v3, 0, 16 789; GFX7-NEXT: v_or_b32_e32 v4, v4, v5 790; GFX7-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 791; GFX7-NEXT: v_max_i32_e32 v2, v2, v5 792; GFX7-NEXT: v_sub_i32_e32 v5, vcc, 0, v3 793; GFX7-NEXT: v_max_i32_e32 v3, v3, v5 794; GFX7-NEXT: v_bfe_i32 v0, v0, 0, 16 795; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 796; GFX7-NEXT: v_bfe_i32 v1, v1, 0, 16 797; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 798; GFX7-NEXT: v_sub_i32_e32 v3, vcc, 0, v0 799; GFX7-NEXT: v_max_i32_e32 v0, v0, v3 800; GFX7-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 801; GFX7-NEXT: v_max_i32_e32 v1, v1, v3 802; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 803; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 804; GFX7-NEXT: v_alignbit_b32 v1, v2, v0, 16 805; GFX7-NEXT: v_alignbit_b32 v5, v6, v4, 16 806; GFX7-NEXT: v_alignbit_b32 v9, v10, v8, 16 807; GFX7-NEXT: v_alignbit_b32 v13, v14, v12, 16 808; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v2 809; GFX7-NEXT: v_lshrrev_b32_e32 v7, 16, v6 810; GFX7-NEXT: v_lshrrev_b32_e32 v11, 16, v10 811; GFX7-NEXT: v_lshrrev_b32_e32 v15, 16, v14 812; GFX7-NEXT: s_setpc_b64 s[30:31] 813; 814; GFX8-LABEL: v_abs_v16i16: 815; GFX8: ; %bb.0: 816; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 817; GFX8-NEXT: v_mov_b32_e32 v8, 0 818; GFX8-NEXT: v_sub_u16_sdwa v9, v8, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 819; GFX8-NEXT: v_sub_u16_sdwa v10, v8, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 820; GFX8-NEXT: v_sub_u16_sdwa v11, v8, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 821; GFX8-NEXT: v_sub_u16_sdwa v12, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 822; GFX8-NEXT: v_sub_u16_sdwa v13, v8, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 823; GFX8-NEXT: v_sub_u16_sdwa v14, v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 824; GFX8-NEXT: v_sub_u16_sdwa v15, v8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 825; GFX8-NEXT: v_sub_u16_sdwa v8, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 826; GFX8-NEXT: v_sub_u16_e32 v19, 0, v0 827; GFX8-NEXT: v_max_i16_sdwa v8, v0, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 828; GFX8-NEXT: v_max_i16_e32 v0, v0, v19 829; GFX8-NEXT: v_or_b32_e32 v0, v0, v8 830; GFX8-NEXT: v_sub_u16_e32 v8, 0, v1 831; GFX8-NEXT: v_max_i16_sdwa v15, v1, v15 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 832; GFX8-NEXT: v_max_i16_e32 v1, v1, v8 833; GFX8-NEXT: v_sub_u16_e32 v16, 0, v7 834; GFX8-NEXT: v_sub_u16_e32 v17, 0, v6 835; GFX8-NEXT: v_sub_u16_e32 v18, 0, v5 836; GFX8-NEXT: v_sub_u16_e32 v19, 0, v4 837; GFX8-NEXT: v_sub_u16_e32 v8, 0, v3 838; GFX8-NEXT: v_or_b32_e32 v1, v1, v15 839; GFX8-NEXT: v_sub_u16_e32 v15, 0, v2 840; GFX8-NEXT: v_max_i16_sdwa v9, v7, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 841; GFX8-NEXT: v_max_i16_sdwa v10, v6, v10 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 842; GFX8-NEXT: v_max_i16_sdwa v11, v5, v11 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 843; GFX8-NEXT: v_max_i16_sdwa v12, v4, v12 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 844; GFX8-NEXT: v_max_i16_sdwa v13, v3, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 845; GFX8-NEXT: v_max_i16_sdwa v14, v2, v14 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 846; GFX8-NEXT: v_max_i16_e32 v2, v2, v15 847; GFX8-NEXT: v_max_i16_e32 v3, v3, v8 848; GFX8-NEXT: v_max_i16_e32 v4, v4, v19 849; GFX8-NEXT: v_max_i16_e32 v5, v5, v18 850; GFX8-NEXT: v_max_i16_e32 v6, v6, v17 851; GFX8-NEXT: v_max_i16_e32 v7, v7, v16 852; GFX8-NEXT: v_or_b32_e32 v2, v2, v14 853; GFX8-NEXT: v_or_b32_e32 v3, v3, v13 854; GFX8-NEXT: v_or_b32_e32 v4, v4, v12 855; GFX8-NEXT: v_or_b32_e32 v5, v5, v11 856; GFX8-NEXT: v_or_b32_e32 v6, v6, v10 857; GFX8-NEXT: v_or_b32_e32 v7, v7, v9 858; GFX8-NEXT: s_setpc_b64 s[30:31] 859; 860; GFX9-LABEL: v_abs_v16i16: 861; GFX9: ; %bb.0: 862; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 863; GFX9-NEXT: v_pk_sub_i16 v8, 0, v0 864; GFX9-NEXT: v_pk_max_i16 v0, v0, v8 865; GFX9-NEXT: v_pk_sub_i16 v8, 0, v1 866; GFX9-NEXT: v_pk_max_i16 v1, v1, v8 867; GFX9-NEXT: v_pk_sub_i16 v8, 0, v2 868; GFX9-NEXT: v_pk_max_i16 v2, v2, v8 869; GFX9-NEXT: v_pk_sub_i16 v8, 0, v3 870; GFX9-NEXT: v_pk_max_i16 v3, v3, v8 871; GFX9-NEXT: v_pk_sub_i16 v8, 0, v4 872; GFX9-NEXT: v_pk_max_i16 v4, v4, v8 873; GFX9-NEXT: v_pk_sub_i16 v8, 0, v5 874; GFX9-NEXT: v_pk_max_i16 v5, v5, v8 875; GFX9-NEXT: v_pk_sub_i16 v8, 0, v6 876; GFX9-NEXT: v_pk_max_i16 v6, v6, v8 877; GFX9-NEXT: v_pk_sub_i16 v8, 0, v7 878; GFX9-NEXT: v_pk_max_i16 v7, v7, v8 879; GFX9-NEXT: s_setpc_b64 s[30:31] 880; 881; GFX10-LABEL: v_abs_v16i16: 882; GFX10: ; %bb.0: 883; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 884; GFX10-NEXT: v_pk_sub_i16 v8, 0, v0 885; GFX10-NEXT: v_pk_sub_i16 v9, 0, v1 886; GFX10-NEXT: v_pk_sub_i16 v10, 0, v2 887; GFX10-NEXT: v_pk_sub_i16 v11, 0, v6 888; GFX10-NEXT: v_pk_sub_i16 v12, 0, v7 889; GFX10-NEXT: v_pk_max_i16 v0, v0, v8 890; GFX10-NEXT: v_pk_max_i16 v1, v1, v9 891; GFX10-NEXT: v_pk_max_i16 v2, v2, v10 892; GFX10-NEXT: v_pk_sub_i16 v8, 0, v3 893; GFX10-NEXT: v_pk_sub_i16 v9, 0, v4 894; GFX10-NEXT: v_pk_sub_i16 v10, 0, v5 895; GFX10-NEXT: v_pk_max_i16 v6, v6, v11 896; GFX10-NEXT: v_pk_max_i16 v7, v7, v12 897; GFX10-NEXT: v_pk_max_i16 v3, v3, v8 898; GFX10-NEXT: v_pk_max_i16 v4, v4, v9 899; GFX10-NEXT: v_pk_max_i16 v5, v5, v10 900; GFX10-NEXT: s_setpc_b64 s[30:31] 901; 902; GFX11-LABEL: v_abs_v16i16: 903; GFX11: ; %bb.0: 904; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 905; GFX11-NEXT: v_pk_sub_i16 v8, 0, v0 906; GFX11-NEXT: v_pk_sub_i16 v9, 0, v1 907; GFX11-NEXT: v_pk_sub_i16 v10, 0, v2 908; GFX11-NEXT: v_pk_sub_i16 v11, 0, v6 909; GFX11-NEXT: v_pk_sub_i16 v12, 0, v7 910; GFX11-NEXT: v_pk_max_i16 v0, v0, v8 911; GFX11-NEXT: v_pk_max_i16 v1, v1, v9 912; GFX11-NEXT: v_pk_max_i16 v2, v2, v10 913; GFX11-NEXT: v_pk_sub_i16 v8, 0, v3 914; GFX11-NEXT: v_pk_sub_i16 v9, 0, v4 915; GFX11-NEXT: v_pk_sub_i16 v10, 0, v5 916; GFX11-NEXT: v_pk_max_i16 v6, v6, v11 917; GFX11-NEXT: v_pk_max_i16 v7, v7, v12 918; GFX11-NEXT: v_pk_max_i16 v3, v3, v8 919; GFX11-NEXT: v_pk_max_i16 v4, v4, v9 920; GFX11-NEXT: v_pk_max_i16 v5, v5, v10 921; GFX11-NEXT: s_setpc_b64 s[30:31] 922; 923; GFX12-LABEL: v_abs_v16i16: 924; GFX12: ; %bb.0: 925; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 926; GFX12-NEXT: s_wait_expcnt 0x0 927; GFX12-NEXT: s_wait_samplecnt 0x0 928; GFX12-NEXT: s_wait_bvhcnt 0x0 929; GFX12-NEXT: s_wait_kmcnt 0x0 930; GFX12-NEXT: v_pk_sub_i16 v8, 0, v0 931; GFX12-NEXT: v_pk_sub_i16 v9, 0, v1 932; GFX12-NEXT: v_pk_sub_i16 v10, 0, v2 933; GFX12-NEXT: v_pk_sub_i16 v11, 0, v6 934; GFX12-NEXT: v_pk_sub_i16 v12, 0, v7 935; GFX12-NEXT: v_pk_max_i16 v0, v0, v8 936; GFX12-NEXT: v_pk_max_i16 v1, v1, v9 937; GFX12-NEXT: v_pk_max_i16 v2, v2, v10 938; GFX12-NEXT: v_pk_sub_i16 v8, 0, v3 939; GFX12-NEXT: v_pk_sub_i16 v9, 0, v4 940; GFX12-NEXT: v_pk_sub_i16 v10, 0, v5 941; GFX12-NEXT: v_pk_max_i16 v6, v6, v11 942; GFX12-NEXT: v_pk_max_i16 v7, v7, v12 943; GFX12-NEXT: v_pk_max_i16 v3, v3, v8 944; GFX12-NEXT: v_pk_max_i16 v4, v4, v9 945; GFX12-NEXT: v_pk_max_i16 v5, v5, v10 946; GFX12-NEXT: s_setpc_b64 s[30:31] 947 %res = call <16 x i16> @llvm.abs.v16i16(<16 x i16> %arg, i1 false) 948 ret <16 x i16> %res 949} 950 951define <32 x i16> @v_abs_v32i16(<32 x i16> %arg) { 952; GFX6-LABEL: v_abs_v32i16: 953; GFX6: ; %bb.0: 954; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 955; GFX6-NEXT: v_bfe_i32 v28, v28, 0, 16 956; GFX6-NEXT: v_sub_i32_e32 v31, vcc, 0, v28 957; GFX6-NEXT: v_bfe_i32 v29, v29, 0, 16 958; GFX6-NEXT: v_max_i32_e32 v28, v28, v31 959; GFX6-NEXT: v_sub_i32_e32 v31, vcc, 0, v29 960; GFX6-NEXT: v_bfe_i32 v30, v30, 0, 16 961; GFX6-NEXT: v_max_i32_e32 v29, v29, v31 962; GFX6-NEXT: v_sub_i32_e32 v31, vcc, 0, v30 963; GFX6-NEXT: v_bfe_i32 v26, v26, 0, 16 964; GFX6-NEXT: v_max_i32_e32 v30, v30, v31 965; GFX6-NEXT: v_sub_i32_e32 v31, vcc, 0, v26 966; GFX6-NEXT: v_bfe_i32 v27, v27, 0, 16 967; GFX6-NEXT: v_max_i32_e32 v26, v26, v31 968; GFX6-NEXT: v_sub_i32_e32 v31, vcc, 0, v27 969; GFX6-NEXT: v_bfe_i32 v24, v24, 0, 16 970; GFX6-NEXT: v_max_i32_e32 v27, v27, v31 971; GFX6-NEXT: v_sub_i32_e32 v31, vcc, 0, v24 972; GFX6-NEXT: v_bfe_i32 v25, v25, 0, 16 973; GFX6-NEXT: v_max_i32_e32 v24, v24, v31 974; GFX6-NEXT: v_sub_i32_e32 v31, vcc, 0, v25 975; GFX6-NEXT: v_bfe_i32 v22, v22, 0, 16 976; GFX6-NEXT: v_max_i32_e32 v25, v25, v31 977; GFX6-NEXT: v_sub_i32_e32 v31, vcc, 0, v22 978; GFX6-NEXT: v_bfe_i32 v23, v23, 0, 16 979; GFX6-NEXT: v_max_i32_e32 v22, v22, v31 980; GFX6-NEXT: v_sub_i32_e32 v31, vcc, 0, v23 981; GFX6-NEXT: v_max_i32_e32 v23, v23, v31 982; GFX6-NEXT: buffer_load_dword v31, off, s[0:3], s32 983; GFX6-NEXT: v_lshlrev_b32_e32 v23, 16, v23 984; GFX6-NEXT: v_lshlrev_b32_e32 v25, 16, v25 985; GFX6-NEXT: v_or_b32_e32 v22, v22, v23 986; GFX6-NEXT: v_or_b32_e32 v24, v24, v25 987; GFX6-NEXT: v_bfe_i32 v21, v21, 0, 16 988; GFX6-NEXT: v_bfe_i32 v20, v20, 0, 16 989; GFX6-NEXT: v_lshlrev_b32_e32 v29, 16, v29 990; GFX6-NEXT: v_or_b32_e32 v28, v28, v29 991; GFX6-NEXT: v_sub_i32_e32 v29, vcc, 0, v20 992; GFX6-NEXT: v_max_i32_e32 v20, v20, v29 993; GFX6-NEXT: v_bfe_i32 v18, v18, 0, 16 994; GFX6-NEXT: v_bfe_i32 v19, v19, 0, 16 995; GFX6-NEXT: v_bfe_i32 v16, v16, 0, 16 996; GFX6-NEXT: v_bfe_i32 v17, v17, 0, 16 997; GFX6-NEXT: v_bfe_i32 v14, v14, 0, 16 998; GFX6-NEXT: v_bfe_i32 v15, v15, 0, 16 999; GFX6-NEXT: v_bfe_i32 v12, v12, 0, 16 1000; GFX6-NEXT: v_bfe_i32 v13, v13, 0, 16 1001; GFX6-NEXT: v_bfe_i32 v10, v10, 0, 16 1002; GFX6-NEXT: v_bfe_i32 v11, v11, 0, 16 1003; GFX6-NEXT: v_bfe_i32 v8, v8, 0, 16 1004; GFX6-NEXT: v_bfe_i32 v9, v9, 0, 16 1005; GFX6-NEXT: v_bfe_i32 v6, v6, 0, 16 1006; GFX6-NEXT: v_bfe_i32 v7, v7, 0, 16 1007; GFX6-NEXT: v_bfe_i32 v4, v4, 0, 16 1008; GFX6-NEXT: v_bfe_i32 v5, v5, 0, 16 1009; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 16 1010; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 16 1011; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16 1012; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16 1013; GFX6-NEXT: v_lshlrev_b32_e32 v27, 16, v27 1014; GFX6-NEXT: v_or_b32_e32 v26, v26, v27 1015; GFX6-NEXT: v_lshrrev_b32_e32 v27, 16, v26 1016; GFX6-NEXT: s_waitcnt vmcnt(0) 1017; GFX6-NEXT: v_bfe_i32 v23, v31, 0, 16 1018; GFX6-NEXT: v_sub_i32_e32 v25, vcc, 0, v23 1019; GFX6-NEXT: v_max_i32_e32 v23, v23, v25 1020; GFX6-NEXT: v_lshlrev_b32_e32 v23, 16, v23 1021; GFX6-NEXT: v_or_b32_e32 v30, v30, v23 1022; GFX6-NEXT: v_sub_i32_e32 v23, vcc, 0, v21 1023; GFX6-NEXT: v_max_i32_e32 v21, v21, v23 1024; GFX6-NEXT: v_lshlrev_b32_e32 v21, 16, v21 1025; GFX6-NEXT: v_or_b32_e32 v20, v20, v21 1026; GFX6-NEXT: v_sub_i32_e32 v21, vcc, 0, v18 1027; GFX6-NEXT: v_max_i32_e32 v18, v18, v21 1028; GFX6-NEXT: v_sub_i32_e32 v21, vcc, 0, v19 1029; GFX6-NEXT: v_max_i32_e32 v19, v19, v21 1030; GFX6-NEXT: v_lshlrev_b32_e32 v19, 16, v19 1031; GFX6-NEXT: v_or_b32_e32 v18, v18, v19 1032; GFX6-NEXT: v_sub_i32_e32 v19, vcc, 0, v16 1033; GFX6-NEXT: v_max_i32_e32 v16, v16, v19 1034; GFX6-NEXT: v_sub_i32_e32 v19, vcc, 0, v17 1035; GFX6-NEXT: v_max_i32_e32 v17, v17, v19 1036; GFX6-NEXT: v_lshlrev_b32_e32 v17, 16, v17 1037; GFX6-NEXT: v_or_b32_e32 v16, v16, v17 1038; GFX6-NEXT: v_sub_i32_e32 v17, vcc, 0, v14 1039; GFX6-NEXT: v_max_i32_e32 v14, v14, v17 1040; GFX6-NEXT: v_sub_i32_e32 v17, vcc, 0, v15 1041; GFX6-NEXT: v_max_i32_e32 v15, v15, v17 1042; GFX6-NEXT: v_lshlrev_b32_e32 v15, 16, v15 1043; GFX6-NEXT: v_or_b32_e32 v14, v14, v15 1044; GFX6-NEXT: v_sub_i32_e32 v15, vcc, 0, v12 1045; GFX6-NEXT: v_max_i32_e32 v12, v12, v15 1046; GFX6-NEXT: v_sub_i32_e32 v15, vcc, 0, v13 1047; GFX6-NEXT: v_max_i32_e32 v13, v13, v15 1048; GFX6-NEXT: v_lshlrev_b32_e32 v13, 16, v13 1049; GFX6-NEXT: v_or_b32_e32 v12, v12, v13 1050; GFX6-NEXT: v_sub_i32_e32 v13, vcc, 0, v10 1051; GFX6-NEXT: v_max_i32_e32 v10, v10, v13 1052; GFX6-NEXT: v_sub_i32_e32 v13, vcc, 0, v11 1053; GFX6-NEXT: v_max_i32_e32 v11, v11, v13 1054; GFX6-NEXT: v_lshlrev_b32_e32 v11, 16, v11 1055; GFX6-NEXT: v_or_b32_e32 v10, v10, v11 1056; GFX6-NEXT: v_sub_i32_e32 v11, vcc, 0, v8 1057; GFX6-NEXT: v_max_i32_e32 v8, v8, v11 1058; GFX6-NEXT: v_sub_i32_e32 v11, vcc, 0, v9 1059; GFX6-NEXT: v_max_i32_e32 v9, v9, v11 1060; GFX6-NEXT: v_lshlrev_b32_e32 v9, 16, v9 1061; GFX6-NEXT: v_or_b32_e32 v8, v8, v9 1062; GFX6-NEXT: v_sub_i32_e32 v9, vcc, 0, v6 1063; GFX6-NEXT: v_max_i32_e32 v6, v6, v9 1064; GFX6-NEXT: v_sub_i32_e32 v9, vcc, 0, v7 1065; GFX6-NEXT: v_max_i32_e32 v7, v7, v9 1066; GFX6-NEXT: v_lshlrev_b32_e32 v7, 16, v7 1067; GFX6-NEXT: v_or_b32_e32 v6, v6, v7 1068; GFX6-NEXT: v_sub_i32_e32 v7, vcc, 0, v4 1069; GFX6-NEXT: v_max_i32_e32 v4, v4, v7 1070; GFX6-NEXT: v_sub_i32_e32 v7, vcc, 0, v5 1071; GFX6-NEXT: v_max_i32_e32 v5, v5, v7 1072; GFX6-NEXT: v_lshlrev_b32_e32 v5, 16, v5 1073; GFX6-NEXT: v_or_b32_e32 v4, v4, v5 1074; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 1075; GFX6-NEXT: v_max_i32_e32 v2, v2, v5 1076; GFX6-NEXT: v_sub_i32_e32 v5, vcc, 0, v3 1077; GFX6-NEXT: v_max_i32_e32 v3, v3, v5 1078; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 1079; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 1080; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0, v0 1081; GFX6-NEXT: v_max_i32_e32 v0, v0, v3 1082; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 1083; GFX6-NEXT: v_max_i32_e32 v1, v1, v3 1084; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 1085; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 1086; GFX6-NEXT: v_alignbit_b32 v1, v2, v0, 16 1087; GFX6-NEXT: v_alignbit_b32 v5, v6, v4, 16 1088; GFX6-NEXT: v_alignbit_b32 v9, v10, v8, 16 1089; GFX6-NEXT: v_alignbit_b32 v13, v14, v12, 16 1090; GFX6-NEXT: v_alignbit_b32 v17, v18, v16, 16 1091; GFX6-NEXT: v_alignbit_b32 v21, v22, v20, 16 1092; GFX6-NEXT: v_alignbit_b32 v25, v26, v24, 16 1093; GFX6-NEXT: v_alignbit_b32 v29, v30, v28, 16 1094; GFX6-NEXT: v_lshrrev_b32_e32 v3, 16, v2 1095; GFX6-NEXT: v_lshrrev_b32_e32 v7, 16, v6 1096; GFX6-NEXT: v_lshrrev_b32_e32 v11, 16, v10 1097; GFX6-NEXT: v_lshrrev_b32_e32 v15, 16, v14 1098; GFX6-NEXT: v_lshrrev_b32_e32 v19, 16, v18 1099; GFX6-NEXT: v_lshrrev_b32_e32 v23, 16, v22 1100; GFX6-NEXT: v_lshrrev_b32_e32 v31, 16, v30 1101; GFX6-NEXT: s_setpc_b64 s[30:31] 1102; 1103; GFX7-LABEL: v_abs_v32i16: 1104; GFX7: ; %bb.0: 1105; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1106; GFX7-NEXT: v_bfe_i32 v28, v28, 0, 16 1107; GFX7-NEXT: v_sub_i32_e32 v31, vcc, 0, v28 1108; GFX7-NEXT: v_bfe_i32 v29, v29, 0, 16 1109; GFX7-NEXT: v_max_i32_e32 v28, v28, v31 1110; GFX7-NEXT: v_sub_i32_e32 v31, vcc, 0, v29 1111; GFX7-NEXT: v_bfe_i32 v30, v30, 0, 16 1112; GFX7-NEXT: v_max_i32_e32 v29, v29, v31 1113; GFX7-NEXT: v_sub_i32_e32 v31, vcc, 0, v30 1114; GFX7-NEXT: v_bfe_i32 v26, v26, 0, 16 1115; GFX7-NEXT: v_max_i32_e32 v30, v30, v31 1116; GFX7-NEXT: v_sub_i32_e32 v31, vcc, 0, v26 1117; GFX7-NEXT: v_bfe_i32 v27, v27, 0, 16 1118; GFX7-NEXT: v_max_i32_e32 v26, v26, v31 1119; GFX7-NEXT: v_sub_i32_e32 v31, vcc, 0, v27 1120; GFX7-NEXT: v_bfe_i32 v24, v24, 0, 16 1121; GFX7-NEXT: v_max_i32_e32 v27, v27, v31 1122; GFX7-NEXT: v_sub_i32_e32 v31, vcc, 0, v24 1123; GFX7-NEXT: v_bfe_i32 v25, v25, 0, 16 1124; GFX7-NEXT: v_max_i32_e32 v24, v24, v31 1125; GFX7-NEXT: v_sub_i32_e32 v31, vcc, 0, v25 1126; GFX7-NEXT: v_bfe_i32 v22, v22, 0, 16 1127; GFX7-NEXT: v_max_i32_e32 v25, v25, v31 1128; GFX7-NEXT: v_sub_i32_e32 v31, vcc, 0, v22 1129; GFX7-NEXT: v_bfe_i32 v23, v23, 0, 16 1130; GFX7-NEXT: v_max_i32_e32 v22, v22, v31 1131; GFX7-NEXT: v_sub_i32_e32 v31, vcc, 0, v23 1132; GFX7-NEXT: v_max_i32_e32 v23, v23, v31 1133; GFX7-NEXT: buffer_load_dword v31, off, s[0:3], s32 1134; GFX7-NEXT: v_lshlrev_b32_e32 v23, 16, v23 1135; GFX7-NEXT: v_lshlrev_b32_e32 v25, 16, v25 1136; GFX7-NEXT: v_or_b32_e32 v22, v22, v23 1137; GFX7-NEXT: v_or_b32_e32 v24, v24, v25 1138; GFX7-NEXT: v_bfe_i32 v21, v21, 0, 16 1139; GFX7-NEXT: v_bfe_i32 v20, v20, 0, 16 1140; GFX7-NEXT: v_lshlrev_b32_e32 v29, 16, v29 1141; GFX7-NEXT: v_or_b32_e32 v28, v28, v29 1142; GFX7-NEXT: v_sub_i32_e32 v29, vcc, 0, v20 1143; GFX7-NEXT: v_max_i32_e32 v20, v20, v29 1144; GFX7-NEXT: v_bfe_i32 v18, v18, 0, 16 1145; GFX7-NEXT: v_bfe_i32 v19, v19, 0, 16 1146; GFX7-NEXT: v_bfe_i32 v16, v16, 0, 16 1147; GFX7-NEXT: v_bfe_i32 v17, v17, 0, 16 1148; GFX7-NEXT: v_bfe_i32 v14, v14, 0, 16 1149; GFX7-NEXT: v_bfe_i32 v15, v15, 0, 16 1150; GFX7-NEXT: v_bfe_i32 v12, v12, 0, 16 1151; GFX7-NEXT: v_bfe_i32 v13, v13, 0, 16 1152; GFX7-NEXT: v_bfe_i32 v10, v10, 0, 16 1153; GFX7-NEXT: v_bfe_i32 v11, v11, 0, 16 1154; GFX7-NEXT: v_bfe_i32 v8, v8, 0, 16 1155; GFX7-NEXT: v_bfe_i32 v9, v9, 0, 16 1156; GFX7-NEXT: v_bfe_i32 v6, v6, 0, 16 1157; GFX7-NEXT: v_bfe_i32 v7, v7, 0, 16 1158; GFX7-NEXT: v_bfe_i32 v4, v4, 0, 16 1159; GFX7-NEXT: v_bfe_i32 v5, v5, 0, 16 1160; GFX7-NEXT: v_bfe_i32 v2, v2, 0, 16 1161; GFX7-NEXT: v_bfe_i32 v3, v3, 0, 16 1162; GFX7-NEXT: v_bfe_i32 v0, v0, 0, 16 1163; GFX7-NEXT: v_bfe_i32 v1, v1, 0, 16 1164; GFX7-NEXT: v_lshlrev_b32_e32 v27, 16, v27 1165; GFX7-NEXT: v_or_b32_e32 v26, v26, v27 1166; GFX7-NEXT: v_lshrrev_b32_e32 v27, 16, v26 1167; GFX7-NEXT: s_waitcnt vmcnt(0) 1168; GFX7-NEXT: v_bfe_i32 v23, v31, 0, 16 1169; GFX7-NEXT: v_sub_i32_e32 v25, vcc, 0, v23 1170; GFX7-NEXT: v_max_i32_e32 v23, v23, v25 1171; GFX7-NEXT: v_lshlrev_b32_e32 v23, 16, v23 1172; GFX7-NEXT: v_or_b32_e32 v30, v30, v23 1173; GFX7-NEXT: v_sub_i32_e32 v23, vcc, 0, v21 1174; GFX7-NEXT: v_max_i32_e32 v21, v21, v23 1175; GFX7-NEXT: v_lshlrev_b32_e32 v21, 16, v21 1176; GFX7-NEXT: v_or_b32_e32 v20, v20, v21 1177; GFX7-NEXT: v_sub_i32_e32 v21, vcc, 0, v18 1178; GFX7-NEXT: v_max_i32_e32 v18, v18, v21 1179; GFX7-NEXT: v_sub_i32_e32 v21, vcc, 0, v19 1180; GFX7-NEXT: v_max_i32_e32 v19, v19, v21 1181; GFX7-NEXT: v_lshlrev_b32_e32 v19, 16, v19 1182; GFX7-NEXT: v_or_b32_e32 v18, v18, v19 1183; GFX7-NEXT: v_sub_i32_e32 v19, vcc, 0, v16 1184; GFX7-NEXT: v_max_i32_e32 v16, v16, v19 1185; GFX7-NEXT: v_sub_i32_e32 v19, vcc, 0, v17 1186; GFX7-NEXT: v_max_i32_e32 v17, v17, v19 1187; GFX7-NEXT: v_lshlrev_b32_e32 v17, 16, v17 1188; GFX7-NEXT: v_or_b32_e32 v16, v16, v17 1189; GFX7-NEXT: v_sub_i32_e32 v17, vcc, 0, v14 1190; GFX7-NEXT: v_max_i32_e32 v14, v14, v17 1191; GFX7-NEXT: v_sub_i32_e32 v17, vcc, 0, v15 1192; GFX7-NEXT: v_max_i32_e32 v15, v15, v17 1193; GFX7-NEXT: v_lshlrev_b32_e32 v15, 16, v15 1194; GFX7-NEXT: v_or_b32_e32 v14, v14, v15 1195; GFX7-NEXT: v_sub_i32_e32 v15, vcc, 0, v12 1196; GFX7-NEXT: v_max_i32_e32 v12, v12, v15 1197; GFX7-NEXT: v_sub_i32_e32 v15, vcc, 0, v13 1198; GFX7-NEXT: v_max_i32_e32 v13, v13, v15 1199; GFX7-NEXT: v_lshlrev_b32_e32 v13, 16, v13 1200; GFX7-NEXT: v_or_b32_e32 v12, v12, v13 1201; GFX7-NEXT: v_sub_i32_e32 v13, vcc, 0, v10 1202; GFX7-NEXT: v_max_i32_e32 v10, v10, v13 1203; GFX7-NEXT: v_sub_i32_e32 v13, vcc, 0, v11 1204; GFX7-NEXT: v_max_i32_e32 v11, v11, v13 1205; GFX7-NEXT: v_lshlrev_b32_e32 v11, 16, v11 1206; GFX7-NEXT: v_or_b32_e32 v10, v10, v11 1207; GFX7-NEXT: v_sub_i32_e32 v11, vcc, 0, v8 1208; GFX7-NEXT: v_max_i32_e32 v8, v8, v11 1209; GFX7-NEXT: v_sub_i32_e32 v11, vcc, 0, v9 1210; GFX7-NEXT: v_max_i32_e32 v9, v9, v11 1211; GFX7-NEXT: v_lshlrev_b32_e32 v9, 16, v9 1212; GFX7-NEXT: v_or_b32_e32 v8, v8, v9 1213; GFX7-NEXT: v_sub_i32_e32 v9, vcc, 0, v6 1214; GFX7-NEXT: v_max_i32_e32 v6, v6, v9 1215; GFX7-NEXT: v_sub_i32_e32 v9, vcc, 0, v7 1216; GFX7-NEXT: v_max_i32_e32 v7, v7, v9 1217; GFX7-NEXT: v_lshlrev_b32_e32 v7, 16, v7 1218; GFX7-NEXT: v_or_b32_e32 v6, v6, v7 1219; GFX7-NEXT: v_sub_i32_e32 v7, vcc, 0, v4 1220; GFX7-NEXT: v_max_i32_e32 v4, v4, v7 1221; GFX7-NEXT: v_sub_i32_e32 v7, vcc, 0, v5 1222; GFX7-NEXT: v_max_i32_e32 v5, v5, v7 1223; GFX7-NEXT: v_lshlrev_b32_e32 v5, 16, v5 1224; GFX7-NEXT: v_or_b32_e32 v4, v4, v5 1225; GFX7-NEXT: v_sub_i32_e32 v5, vcc, 0, v2 1226; GFX7-NEXT: v_max_i32_e32 v2, v2, v5 1227; GFX7-NEXT: v_sub_i32_e32 v5, vcc, 0, v3 1228; GFX7-NEXT: v_max_i32_e32 v3, v3, v5 1229; GFX7-NEXT: v_lshlrev_b32_e32 v3, 16, v3 1230; GFX7-NEXT: v_or_b32_e32 v2, v2, v3 1231; GFX7-NEXT: v_sub_i32_e32 v3, vcc, 0, v0 1232; GFX7-NEXT: v_max_i32_e32 v0, v0, v3 1233; GFX7-NEXT: v_sub_i32_e32 v3, vcc, 0, v1 1234; GFX7-NEXT: v_max_i32_e32 v1, v1, v3 1235; GFX7-NEXT: v_lshlrev_b32_e32 v1, 16, v1 1236; GFX7-NEXT: v_or_b32_e32 v0, v0, v1 1237; GFX7-NEXT: v_alignbit_b32 v1, v2, v0, 16 1238; GFX7-NEXT: v_alignbit_b32 v5, v6, v4, 16 1239; GFX7-NEXT: v_alignbit_b32 v9, v10, v8, 16 1240; GFX7-NEXT: v_alignbit_b32 v13, v14, v12, 16 1241; GFX7-NEXT: v_alignbit_b32 v17, v18, v16, 16 1242; GFX7-NEXT: v_alignbit_b32 v21, v22, v20, 16 1243; GFX7-NEXT: v_alignbit_b32 v25, v26, v24, 16 1244; GFX7-NEXT: v_alignbit_b32 v29, v30, v28, 16 1245; GFX7-NEXT: v_lshrrev_b32_e32 v3, 16, v2 1246; GFX7-NEXT: v_lshrrev_b32_e32 v7, 16, v6 1247; GFX7-NEXT: v_lshrrev_b32_e32 v11, 16, v10 1248; GFX7-NEXT: v_lshrrev_b32_e32 v15, 16, v14 1249; GFX7-NEXT: v_lshrrev_b32_e32 v19, 16, v18 1250; GFX7-NEXT: v_lshrrev_b32_e32 v23, 16, v22 1251; GFX7-NEXT: v_lshrrev_b32_e32 v31, 16, v30 1252; GFX7-NEXT: s_setpc_b64 s[30:31] 1253; 1254; GFX8-LABEL: v_abs_v32i16: 1255; GFX8: ; %bb.0: 1256; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1257; GFX8-NEXT: v_mov_b32_e32 v16, 0 1258; GFX8-NEXT: v_sub_u16_sdwa v18, v16, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 1259; GFX8-NEXT: v_sub_u16_e32 v19, 0, v0 1260; GFX8-NEXT: v_max_i16_sdwa v18, v0, v18 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1261; GFX8-NEXT: v_max_i16_e32 v0, v0, v19 1262; GFX8-NEXT: v_sub_u16_sdwa v19, v16, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 1263; GFX8-NEXT: v_or_b32_e32 v0, v0, v18 1264; GFX8-NEXT: v_sub_u16_e32 v18, 0, v1 1265; GFX8-NEXT: v_max_i16_sdwa v19, v1, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1266; GFX8-NEXT: v_max_i16_e32 v1, v1, v18 1267; GFX8-NEXT: v_sub_u16_sdwa v18, v16, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 1268; GFX8-NEXT: v_or_b32_e32 v1, v1, v19 1269; GFX8-NEXT: v_sub_u16_e32 v19, 0, v2 1270; GFX8-NEXT: v_max_i16_sdwa v18, v2, v18 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1271; GFX8-NEXT: v_max_i16_e32 v2, v2, v19 1272; GFX8-NEXT: v_sub_u16_sdwa v19, v16, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 1273; GFX8-NEXT: v_or_b32_e32 v2, v2, v18 1274; GFX8-NEXT: v_sub_u16_e32 v18, 0, v3 1275; GFX8-NEXT: v_max_i16_sdwa v19, v3, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1276; GFX8-NEXT: v_max_i16_e32 v3, v3, v18 1277; GFX8-NEXT: v_sub_u16_sdwa v18, v16, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 1278; GFX8-NEXT: v_or_b32_e32 v3, v3, v19 1279; GFX8-NEXT: v_sub_u16_e32 v19, 0, v4 1280; GFX8-NEXT: v_max_i16_sdwa v18, v4, v18 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1281; GFX8-NEXT: v_max_i16_e32 v4, v4, v19 1282; GFX8-NEXT: v_sub_u16_sdwa v19, v16, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 1283; GFX8-NEXT: v_or_b32_e32 v4, v4, v18 1284; GFX8-NEXT: v_sub_u16_e32 v18, 0, v5 1285; GFX8-NEXT: v_max_i16_sdwa v19, v5, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1286; GFX8-NEXT: v_max_i16_e32 v5, v5, v18 1287; GFX8-NEXT: v_sub_u16_sdwa v18, v16, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 1288; GFX8-NEXT: v_or_b32_e32 v5, v5, v19 1289; GFX8-NEXT: v_sub_u16_e32 v19, 0, v6 1290; GFX8-NEXT: v_max_i16_sdwa v18, v6, v18 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1291; GFX8-NEXT: v_max_i16_e32 v6, v6, v19 1292; GFX8-NEXT: v_sub_u16_sdwa v19, v16, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 1293; GFX8-NEXT: v_or_b32_e32 v6, v6, v18 1294; GFX8-NEXT: v_sub_u16_e32 v18, 0, v7 1295; GFX8-NEXT: v_max_i16_sdwa v19, v7, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1296; GFX8-NEXT: v_max_i16_e32 v7, v7, v18 1297; GFX8-NEXT: v_sub_u16_sdwa v18, v16, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 1298; GFX8-NEXT: v_or_b32_e32 v7, v7, v19 1299; GFX8-NEXT: v_sub_u16_e32 v19, 0, v8 1300; GFX8-NEXT: v_max_i16_sdwa v18, v8, v18 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1301; GFX8-NEXT: v_max_i16_e32 v8, v8, v19 1302; GFX8-NEXT: v_sub_u16_sdwa v19, v16, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 1303; GFX8-NEXT: v_or_b32_e32 v8, v8, v18 1304; GFX8-NEXT: v_sub_u16_e32 v18, 0, v9 1305; GFX8-NEXT: v_max_i16_sdwa v19, v9, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1306; GFX8-NEXT: v_max_i16_e32 v9, v9, v18 1307; GFX8-NEXT: v_sub_u16_sdwa v18, v16, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 1308; GFX8-NEXT: v_or_b32_e32 v9, v9, v19 1309; GFX8-NEXT: v_sub_u16_e32 v19, 0, v10 1310; GFX8-NEXT: v_max_i16_sdwa v18, v10, v18 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1311; GFX8-NEXT: v_max_i16_e32 v10, v10, v19 1312; GFX8-NEXT: v_sub_u16_sdwa v19, v16, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 1313; GFX8-NEXT: v_or_b32_e32 v10, v10, v18 1314; GFX8-NEXT: v_sub_u16_e32 v18, 0, v11 1315; GFX8-NEXT: v_max_i16_sdwa v19, v11, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1316; GFX8-NEXT: v_max_i16_e32 v11, v11, v18 1317; GFX8-NEXT: v_sub_u16_sdwa v18, v16, v12 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 1318; GFX8-NEXT: v_or_b32_e32 v11, v11, v19 1319; GFX8-NEXT: v_sub_u16_e32 v19, 0, v12 1320; GFX8-NEXT: v_max_i16_sdwa v18, v12, v18 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1321; GFX8-NEXT: v_max_i16_e32 v12, v12, v19 1322; GFX8-NEXT: v_sub_u16_sdwa v17, v16, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 1323; GFX8-NEXT: v_sub_u16_sdwa v19, v16, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 1324; GFX8-NEXT: v_sub_u16_sdwa v16, v16, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 1325; GFX8-NEXT: v_or_b32_e32 v12, v12, v18 1326; GFX8-NEXT: v_sub_u16_e32 v18, 0, v13 1327; GFX8-NEXT: v_max_i16_sdwa v16, v13, v16 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1328; GFX8-NEXT: v_max_i16_e32 v13, v13, v18 1329; GFX8-NEXT: v_sub_u16_e32 v18, 0, v15 1330; GFX8-NEXT: v_or_b32_e32 v13, v13, v16 1331; GFX8-NEXT: v_sub_u16_e32 v16, 0, v14 1332; GFX8-NEXT: v_max_i16_sdwa v17, v15, v17 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1333; GFX8-NEXT: v_max_i16_sdwa v19, v14, v19 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1334; GFX8-NEXT: v_max_i16_e32 v14, v14, v16 1335; GFX8-NEXT: v_max_i16_e32 v15, v15, v18 1336; GFX8-NEXT: v_or_b32_e32 v14, v14, v19 1337; GFX8-NEXT: v_or_b32_e32 v15, v15, v17 1338; GFX8-NEXT: s_setpc_b64 s[30:31] 1339; 1340; GFX9-LABEL: v_abs_v32i16: 1341; GFX9: ; %bb.0: 1342; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1343; GFX9-NEXT: v_pk_sub_i16 v16, 0, v0 1344; GFX9-NEXT: v_pk_max_i16 v0, v0, v16 1345; GFX9-NEXT: v_pk_sub_i16 v16, 0, v1 1346; GFX9-NEXT: v_pk_max_i16 v1, v1, v16 1347; GFX9-NEXT: v_pk_sub_i16 v16, 0, v2 1348; GFX9-NEXT: v_pk_max_i16 v2, v2, v16 1349; GFX9-NEXT: v_pk_sub_i16 v16, 0, v3 1350; GFX9-NEXT: v_pk_max_i16 v3, v3, v16 1351; GFX9-NEXT: v_pk_sub_i16 v16, 0, v4 1352; GFX9-NEXT: v_pk_max_i16 v4, v4, v16 1353; GFX9-NEXT: v_pk_sub_i16 v16, 0, v5 1354; GFX9-NEXT: v_pk_max_i16 v5, v5, v16 1355; GFX9-NEXT: v_pk_sub_i16 v16, 0, v6 1356; GFX9-NEXT: v_pk_max_i16 v6, v6, v16 1357; GFX9-NEXT: v_pk_sub_i16 v16, 0, v7 1358; GFX9-NEXT: v_pk_max_i16 v7, v7, v16 1359; GFX9-NEXT: v_pk_sub_i16 v16, 0, v8 1360; GFX9-NEXT: v_pk_max_i16 v8, v8, v16 1361; GFX9-NEXT: v_pk_sub_i16 v16, 0, v9 1362; GFX9-NEXT: v_pk_max_i16 v9, v9, v16 1363; GFX9-NEXT: v_pk_sub_i16 v16, 0, v10 1364; GFX9-NEXT: v_pk_max_i16 v10, v10, v16 1365; GFX9-NEXT: v_pk_sub_i16 v16, 0, v11 1366; GFX9-NEXT: v_pk_max_i16 v11, v11, v16 1367; GFX9-NEXT: v_pk_sub_i16 v16, 0, v12 1368; GFX9-NEXT: v_pk_max_i16 v12, v12, v16 1369; GFX9-NEXT: v_pk_sub_i16 v16, 0, v13 1370; GFX9-NEXT: v_pk_max_i16 v13, v13, v16 1371; GFX9-NEXT: v_pk_sub_i16 v16, 0, v14 1372; GFX9-NEXT: v_pk_max_i16 v14, v14, v16 1373; GFX9-NEXT: v_pk_sub_i16 v16, 0, v15 1374; GFX9-NEXT: v_pk_max_i16 v15, v15, v16 1375; GFX9-NEXT: s_setpc_b64 s[30:31] 1376; 1377; GFX10-LABEL: v_abs_v32i16: 1378; GFX10: ; %bb.0: 1379; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1380; GFX10-NEXT: v_pk_sub_i16 v16, 0, v0 1381; GFX10-NEXT: v_pk_sub_i16 v17, 0, v2 1382; GFX10-NEXT: v_pk_sub_i16 v18, 0, v3 1383; GFX10-NEXT: v_pk_sub_i16 v19, 0, v4 1384; GFX10-NEXT: v_pk_sub_i16 v20, 0, v5 1385; GFX10-NEXT: v_pk_max_i16 v0, v0, v16 1386; GFX10-NEXT: v_pk_sub_i16 v16, 0, v1 1387; GFX10-NEXT: v_pk_max_i16 v2, v2, v17 1388; GFX10-NEXT: v_pk_max_i16 v3, v3, v18 1389; GFX10-NEXT: v_pk_max_i16 v4, v4, v19 1390; GFX10-NEXT: v_pk_max_i16 v5, v5, v20 1391; GFX10-NEXT: v_pk_max_i16 v1, v1, v16 1392; GFX10-NEXT: v_pk_sub_i16 v16, 0, v6 1393; GFX10-NEXT: v_pk_sub_i16 v17, 0, v7 1394; GFX10-NEXT: v_pk_sub_i16 v18, 0, v8 1395; GFX10-NEXT: v_pk_sub_i16 v19, 0, v9 1396; GFX10-NEXT: v_pk_sub_i16 v20, 0, v10 1397; GFX10-NEXT: v_pk_max_i16 v6, v6, v16 1398; GFX10-NEXT: v_pk_max_i16 v7, v7, v17 1399; GFX10-NEXT: v_pk_max_i16 v8, v8, v18 1400; GFX10-NEXT: v_pk_max_i16 v9, v9, v19 1401; GFX10-NEXT: v_pk_max_i16 v10, v10, v20 1402; GFX10-NEXT: v_pk_sub_i16 v16, 0, v11 1403; GFX10-NEXT: v_pk_sub_i16 v17, 0, v12 1404; GFX10-NEXT: v_pk_sub_i16 v18, 0, v13 1405; GFX10-NEXT: v_pk_sub_i16 v19, 0, v14 1406; GFX10-NEXT: v_pk_sub_i16 v20, 0, v15 1407; GFX10-NEXT: v_pk_max_i16 v11, v11, v16 1408; GFX10-NEXT: v_pk_max_i16 v12, v12, v17 1409; GFX10-NEXT: v_pk_max_i16 v13, v13, v18 1410; GFX10-NEXT: v_pk_max_i16 v14, v14, v19 1411; GFX10-NEXT: v_pk_max_i16 v15, v15, v20 1412; GFX10-NEXT: s_setpc_b64 s[30:31] 1413; 1414; GFX11-LABEL: v_abs_v32i16: 1415; GFX11: ; %bb.0: 1416; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1417; GFX11-NEXT: v_pk_sub_i16 v16, 0, v0 1418; GFX11-NEXT: v_pk_sub_i16 v17, 0, v2 1419; GFX11-NEXT: v_pk_sub_i16 v18, 0, v3 1420; GFX11-NEXT: v_pk_sub_i16 v19, 0, v4 1421; GFX11-NEXT: v_pk_sub_i16 v20, 0, v5 1422; GFX11-NEXT: v_pk_max_i16 v0, v0, v16 1423; GFX11-NEXT: v_pk_sub_i16 v16, 0, v1 1424; GFX11-NEXT: v_pk_max_i16 v2, v2, v17 1425; GFX11-NEXT: v_pk_max_i16 v3, v3, v18 1426; GFX11-NEXT: v_pk_max_i16 v4, v4, v19 1427; GFX11-NEXT: v_pk_max_i16 v5, v5, v20 1428; GFX11-NEXT: v_pk_max_i16 v1, v1, v16 1429; GFX11-NEXT: v_pk_sub_i16 v16, 0, v6 1430; GFX11-NEXT: v_pk_sub_i16 v17, 0, v7 1431; GFX11-NEXT: v_pk_sub_i16 v18, 0, v8 1432; GFX11-NEXT: v_pk_sub_i16 v19, 0, v9 1433; GFX11-NEXT: v_pk_sub_i16 v20, 0, v10 1434; GFX11-NEXT: v_pk_max_i16 v6, v6, v16 1435; GFX11-NEXT: v_pk_max_i16 v7, v7, v17 1436; GFX11-NEXT: v_pk_max_i16 v8, v8, v18 1437; GFX11-NEXT: v_pk_max_i16 v9, v9, v19 1438; GFX11-NEXT: v_pk_max_i16 v10, v10, v20 1439; GFX11-NEXT: v_pk_sub_i16 v16, 0, v11 1440; GFX11-NEXT: v_pk_sub_i16 v17, 0, v12 1441; GFX11-NEXT: v_pk_sub_i16 v18, 0, v13 1442; GFX11-NEXT: v_pk_sub_i16 v19, 0, v14 1443; GFX11-NEXT: v_pk_sub_i16 v20, 0, v15 1444; GFX11-NEXT: v_pk_max_i16 v11, v11, v16 1445; GFX11-NEXT: v_pk_max_i16 v12, v12, v17 1446; GFX11-NEXT: v_pk_max_i16 v13, v13, v18 1447; GFX11-NEXT: v_pk_max_i16 v14, v14, v19 1448; GFX11-NEXT: v_pk_max_i16 v15, v15, v20 1449; GFX11-NEXT: s_setpc_b64 s[30:31] 1450; 1451; GFX12-LABEL: v_abs_v32i16: 1452; GFX12: ; %bb.0: 1453; GFX12-NEXT: s_wait_loadcnt_dscnt 0x0 1454; GFX12-NEXT: s_wait_expcnt 0x0 1455; GFX12-NEXT: s_wait_samplecnt 0x0 1456; GFX12-NEXT: s_wait_bvhcnt 0x0 1457; GFX12-NEXT: s_wait_kmcnt 0x0 1458; GFX12-NEXT: v_pk_sub_i16 v16, 0, v0 1459; GFX12-NEXT: v_pk_sub_i16 v17, 0, v2 1460; GFX12-NEXT: v_pk_sub_i16 v18, 0, v3 1461; GFX12-NEXT: v_pk_sub_i16 v19, 0, v4 1462; GFX12-NEXT: v_pk_sub_i16 v20, 0, v5 1463; GFX12-NEXT: v_pk_max_i16 v0, v0, v16 1464; GFX12-NEXT: v_pk_sub_i16 v16, 0, v1 1465; GFX12-NEXT: v_pk_max_i16 v2, v2, v17 1466; GFX12-NEXT: v_pk_max_i16 v3, v3, v18 1467; GFX12-NEXT: v_pk_max_i16 v4, v4, v19 1468; GFX12-NEXT: v_pk_max_i16 v5, v5, v20 1469; GFX12-NEXT: v_pk_max_i16 v1, v1, v16 1470; GFX12-NEXT: v_pk_sub_i16 v16, 0, v6 1471; GFX12-NEXT: v_pk_sub_i16 v17, 0, v7 1472; GFX12-NEXT: v_pk_sub_i16 v18, 0, v8 1473; GFX12-NEXT: v_pk_sub_i16 v19, 0, v9 1474; GFX12-NEXT: v_pk_sub_i16 v20, 0, v10 1475; GFX12-NEXT: v_pk_max_i16 v6, v6, v16 1476; GFX12-NEXT: v_pk_max_i16 v7, v7, v17 1477; GFX12-NEXT: v_pk_max_i16 v8, v8, v18 1478; GFX12-NEXT: v_pk_max_i16 v9, v9, v19 1479; GFX12-NEXT: v_pk_max_i16 v10, v10, v20 1480; GFX12-NEXT: v_pk_sub_i16 v16, 0, v11 1481; GFX12-NEXT: v_pk_sub_i16 v17, 0, v12 1482; GFX12-NEXT: v_pk_sub_i16 v18, 0, v13 1483; GFX12-NEXT: v_pk_sub_i16 v19, 0, v14 1484; GFX12-NEXT: v_pk_sub_i16 v20, 0, v15 1485; GFX12-NEXT: v_pk_max_i16 v11, v11, v16 1486; GFX12-NEXT: v_pk_max_i16 v12, v12, v17 1487; GFX12-NEXT: v_pk_max_i16 v13, v13, v18 1488; GFX12-NEXT: v_pk_max_i16 v14, v14, v19 1489; GFX12-NEXT: v_pk_max_i16 v15, v15, v20 1490; GFX12-NEXT: s_setpc_b64 s[30:31] 1491 %res = call <32 x i16> @llvm.abs.v32i16(<32 x i16> %arg, i1 false) 1492 ret <32 x i16> %res 1493} 1494