1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -o - %s | FileCheck -check-prefixes=GCN,GFX6 %s 3; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji -o - %s | FileCheck -check-prefixes=GCN,GFX8 %s 4; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -o - %s | FileCheck -check-prefixes=GCN,GFX9 %s 5; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -o - %s | FileCheck -check-prefixes=GCN,GFX10 %s 6; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GFX11 %s 7 8define amdgpu_ps i7 @s_fshl_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) { 9; GFX6-LABEL: s_fshl_i7: 10; GFX6: ; %bb.0: 11; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, 7 12; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0 13; GFX6-NEXT: s_and_b32 s2, s2, 0x7f 14; GFX6-NEXT: s_bfe_u32 s1, s1, 0x60001 15; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 16; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 17; GFX6-NEXT: v_mul_lo_u32 v1, v0, -7 18; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1 19; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 20; GFX6-NEXT: v_mul_hi_u32 v0, s2, v0 21; GFX6-NEXT: v_mul_lo_u32 v0, v0, 7 22; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 23; GFX6-NEXT: v_add_i32_e32 v1, vcc, -7, v0 24; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v0 25; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 26; GFX6-NEXT: v_add_i32_e32 v1, vcc, -7, v0 27; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v0 28; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 29; GFX6-NEXT: v_sub_i32_e32 v1, vcc, 6, v0 30; GFX6-NEXT: v_and_b32_e32 v0, 0x7f, v0 31; GFX6-NEXT: v_and_b32_e32 v1, 0x7f, v1 32; GFX6-NEXT: v_lshl_b32_e32 v0, s0, v0 33; GFX6-NEXT: v_lshr_b32_e32 v1, s1, v1 34; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 35; GFX6-NEXT: v_readfirstlane_b32 s0, v0 36; GFX6-NEXT: ; return to shader part epilog 37; 38; GFX8-LABEL: s_fshl_i7: 39; GFX8: ; %bb.0: 40; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v0, 7 41; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 42; GFX8-NEXT: s_and_b32 s2, s2, 0x7f 43; GFX8-NEXT: s_and_b32 s1, s1, 0x7f 44; GFX8-NEXT: s_and_b32 s1, 0xffff, s1 45; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 46; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 47; GFX8-NEXT: s_lshr_b32 s1, s1, 1 48; GFX8-NEXT: v_mul_lo_u32 v1, v0, -7 49; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1 50; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 51; GFX8-NEXT: v_mul_hi_u32 v0, s2, v0 52; GFX8-NEXT: v_mul_lo_u32 v0, v0, 7 53; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s2, v0 54; GFX8-NEXT: v_add_u32_e32 v1, vcc, -7, v0 55; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v0 56; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 57; GFX8-NEXT: v_add_u32_e32 v1, vcc, -7, v0 58; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v0 59; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 60; GFX8-NEXT: v_sub_u16_e32 v1, 6, v0 61; GFX8-NEXT: v_and_b32_e32 v0, 0x7f, v0 62; GFX8-NEXT: v_and_b32_e32 v1, 0x7f, v1 63; GFX8-NEXT: v_lshlrev_b16_e64 v0, v0, s0 64; GFX8-NEXT: v_lshrrev_b16_e64 v1, v1, s1 65; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 66; GFX8-NEXT: v_readfirstlane_b32 s0, v0 67; GFX8-NEXT: ; return to shader part epilog 68; 69; GFX9-LABEL: s_fshl_i7: 70; GFX9: ; %bb.0: 71; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v0, 7 72; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 73; GFX9-NEXT: s_and_b32 s2, s2, 0x7f 74; GFX9-NEXT: s_and_b32 s1, s1, 0x7f 75; GFX9-NEXT: s_and_b32 s1, 0xffff, s1 76; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 77; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 78; GFX9-NEXT: s_lshr_b32 s1, s1, 1 79; GFX9-NEXT: v_mul_lo_u32 v1, v0, -7 80; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 81; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 82; GFX9-NEXT: v_mul_hi_u32 v0, s2, v0 83; GFX9-NEXT: v_mul_lo_u32 v0, v0, 7 84; GFX9-NEXT: v_sub_u32_e32 v0, s2, v0 85; GFX9-NEXT: v_add_u32_e32 v1, -7, v0 86; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v0 87; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 88; GFX9-NEXT: v_add_u32_e32 v1, -7, v0 89; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v0 90; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 91; GFX9-NEXT: v_sub_u16_e32 v1, 6, v0 92; GFX9-NEXT: v_and_b32_e32 v0, 0x7f, v0 93; GFX9-NEXT: v_and_b32_e32 v1, 0x7f, v1 94; GFX9-NEXT: v_lshlrev_b16_e64 v0, v0, s0 95; GFX9-NEXT: v_lshrrev_b16_e64 v1, v1, s1 96; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 97; GFX9-NEXT: v_readfirstlane_b32 s0, v0 98; GFX9-NEXT: ; return to shader part epilog 99; 100; GFX10-LABEL: s_fshl_i7: 101; GFX10: ; %bb.0: 102; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, 7 103; GFX10-NEXT: s_and_b32 s2, s2, 0x7f 104; GFX10-NEXT: s_and_b32 s1, s1, 0x7f 105; GFX10-NEXT: s_and_b32 s1, 0xffff, s1 106; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 107; GFX10-NEXT: s_lshr_b32 s1, s1, 1 108; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 109; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 110; GFX10-NEXT: v_mul_lo_u32 v1, v0, -7 111; GFX10-NEXT: v_mul_hi_u32 v1, v0, v1 112; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1 113; GFX10-NEXT: v_mul_hi_u32 v0, s2, v0 114; GFX10-NEXT: v_mul_lo_u32 v0, v0, 7 115; GFX10-NEXT: v_sub_nc_u32_e32 v0, s2, v0 116; GFX10-NEXT: v_add_nc_u32_e32 v1, -7, v0 117; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0 118; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 119; GFX10-NEXT: v_add_nc_u32_e32 v1, -7, v0 120; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0 121; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 122; GFX10-NEXT: v_sub_nc_u16 v1, 6, v0 123; GFX10-NEXT: v_and_b32_e32 v0, 0x7f, v0 124; GFX10-NEXT: v_and_b32_e32 v1, 0x7f, v1 125; GFX10-NEXT: v_lshlrev_b16 v0, v0, s0 126; GFX10-NEXT: v_lshrrev_b16 v1, v1, s1 127; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 128; GFX10-NEXT: v_readfirstlane_b32 s0, v0 129; GFX10-NEXT: ; return to shader part epilog 130; 131; GFX11-LABEL: s_fshl_i7: 132; GFX11: ; %bb.0: 133; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, 7 134; GFX11-NEXT: s_and_b32 s2, s2, 0x7f 135; GFX11-NEXT: s_and_b32 s1, s1, 0x7f 136; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) 137; GFX11-NEXT: s_and_b32 s1, 0xffff, s1 138; GFX11-NEXT: v_rcp_iflag_f32_e32 v0, v0 139; GFX11-NEXT: s_lshr_b32 s1, s1, 1 140; GFX11-NEXT: s_waitcnt_depctr 0xfff 141; GFX11-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 142; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 143; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0 144; GFX11-NEXT: v_mul_lo_u32 v1, v0, -7 145; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 146; GFX11-NEXT: v_mul_hi_u32 v1, v0, v1 147; GFX11-NEXT: v_add_nc_u32_e32 v0, v0, v1 148; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 149; GFX11-NEXT: v_mul_hi_u32 v0, s2, v0 150; GFX11-NEXT: v_mul_lo_u32 v0, v0, 7 151; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 152; GFX11-NEXT: v_sub_nc_u32_e32 v0, s2, v0 153; GFX11-NEXT: v_add_nc_u32_e32 v1, -7, v0 154; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0 155; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 156; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 157; GFX11-NEXT: v_add_nc_u32_e32 v1, -7, v0 158; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0 159; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 160; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 161; GFX11-NEXT: v_sub_nc_u16 v1, 6, v0 162; GFX11-NEXT: v_and_b32_e32 v0, 0x7f, v0 163; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 164; GFX11-NEXT: v_and_b32_e32 v1, 0x7f, v1 165; GFX11-NEXT: v_lshlrev_b16 v0, v0, s0 166; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 167; GFX11-NEXT: v_lshrrev_b16 v1, v1, s1 168; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 169; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 170; GFX11-NEXT: v_readfirstlane_b32 s0, v0 171; GFX11-NEXT: ; return to shader part epilog 172 %result = call i7 @llvm.fshl.i7(i7 %lhs, i7 %rhs, i7 %amt) 173 ret i7 %result 174} 175 176define i7 @v_fshl_i7(i7 %lhs, i7 %rhs, i7 %amt) { 177; GFX6-LABEL: v_fshl_i7: 178; GFX6: ; %bb.0: 179; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 180; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v3, 7 181; GFX6-NEXT: v_rcp_iflag_f32_e32 v3, v3 182; GFX6-NEXT: v_and_b32_e32 v2, 0x7f, v2 183; GFX6-NEXT: v_bfe_u32 v1, v1, 1, 6 184; GFX6-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 185; GFX6-NEXT: v_cvt_u32_f32_e32 v3, v3 186; GFX6-NEXT: v_mul_lo_u32 v4, v3, -7 187; GFX6-NEXT: v_mul_hi_u32 v4, v3, v4 188; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v4 189; GFX6-NEXT: v_mul_hi_u32 v3, v2, v3 190; GFX6-NEXT: v_mul_lo_u32 v3, v3, 7 191; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v3 192; GFX6-NEXT: v_add_i32_e32 v3, vcc, -7, v2 193; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v2 194; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 195; GFX6-NEXT: v_add_i32_e32 v3, vcc, -7, v2 196; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v2 197; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 198; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 6, v2 199; GFX6-NEXT: v_and_b32_e32 v2, 0x7f, v2 200; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0 201; GFX6-NEXT: v_and_b32_e32 v2, 0x7f, v3 202; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1 203; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 204; GFX6-NEXT: s_setpc_b64 s[30:31] 205; 206; GFX8-LABEL: v_fshl_i7: 207; GFX8: ; %bb.0: 208; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 209; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v3, 7 210; GFX8-NEXT: v_rcp_iflag_f32_e32 v3, v3 211; GFX8-NEXT: v_and_b32_e32 v2, 0x7f, v2 212; GFX8-NEXT: v_and_b32_e32 v1, 0x7f, v1 213; GFX8-NEXT: v_lshrrev_b16_e32 v1, 1, v1 214; GFX8-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 215; GFX8-NEXT: v_cvt_u32_f32_e32 v3, v3 216; GFX8-NEXT: v_mul_lo_u32 v4, v3, -7 217; GFX8-NEXT: v_mul_hi_u32 v4, v3, v4 218; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v4 219; GFX8-NEXT: v_mul_hi_u32 v3, v2, v3 220; GFX8-NEXT: v_mul_lo_u32 v3, v3, 7 221; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v2, v3 222; GFX8-NEXT: v_add_u32_e32 v3, vcc, -7, v2 223; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v2 224; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 225; GFX8-NEXT: v_add_u32_e32 v3, vcc, -7, v2 226; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v2 227; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 228; GFX8-NEXT: v_sub_u16_e32 v3, 6, v2 229; GFX8-NEXT: v_and_b32_e32 v2, 0x7f, v2 230; GFX8-NEXT: v_lshlrev_b16_e32 v0, v2, v0 231; GFX8-NEXT: v_and_b32_e32 v2, 0x7f, v3 232; GFX8-NEXT: v_lshrrev_b16_e32 v1, v2, v1 233; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 234; GFX8-NEXT: s_setpc_b64 s[30:31] 235; 236; GFX9-LABEL: v_fshl_i7: 237; GFX9: ; %bb.0: 238; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 239; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v3, 7 240; GFX9-NEXT: v_rcp_iflag_f32_e32 v3, v3 241; GFX9-NEXT: v_and_b32_e32 v2, 0x7f, v2 242; GFX9-NEXT: v_and_b32_e32 v1, 0x7f, v1 243; GFX9-NEXT: v_lshrrev_b16_e32 v1, 1, v1 244; GFX9-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 245; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v3 246; GFX9-NEXT: v_mul_lo_u32 v4, v3, -7 247; GFX9-NEXT: v_mul_hi_u32 v4, v3, v4 248; GFX9-NEXT: v_add_u32_e32 v3, v3, v4 249; GFX9-NEXT: v_mul_hi_u32 v3, v2, v3 250; GFX9-NEXT: v_mul_lo_u32 v3, v3, 7 251; GFX9-NEXT: v_sub_u32_e32 v2, v2, v3 252; GFX9-NEXT: v_add_u32_e32 v3, -7, v2 253; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v2 254; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 255; GFX9-NEXT: v_add_u32_e32 v3, -7, v2 256; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v2 257; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 258; GFX9-NEXT: v_sub_u16_e32 v3, 6, v2 259; GFX9-NEXT: v_and_b32_e32 v2, 0x7f, v2 260; GFX9-NEXT: v_lshlrev_b16_e32 v0, v2, v0 261; GFX9-NEXT: v_and_b32_e32 v2, 0x7f, v3 262; GFX9-NEXT: v_lshrrev_b16_e32 v1, v2, v1 263; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 264; GFX9-NEXT: s_setpc_b64 s[30:31] 265; 266; GFX10-LABEL: v_fshl_i7: 267; GFX10: ; %bb.0: 268; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 269; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v3, 7 270; GFX10-NEXT: v_and_b32_e32 v2, 0x7f, v2 271; GFX10-NEXT: v_and_b32_e32 v1, 0x7f, v1 272; GFX10-NEXT: v_rcp_iflag_f32_e32 v3, v3 273; GFX10-NEXT: v_lshrrev_b16 v1, 1, v1 274; GFX10-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 275; GFX10-NEXT: v_cvt_u32_f32_e32 v3, v3 276; GFX10-NEXT: v_mul_lo_u32 v4, v3, -7 277; GFX10-NEXT: v_mul_hi_u32 v4, v3, v4 278; GFX10-NEXT: v_add_nc_u32_e32 v3, v3, v4 279; GFX10-NEXT: v_mul_hi_u32 v3, v2, v3 280; GFX10-NEXT: v_mul_lo_u32 v3, v3, 7 281; GFX10-NEXT: v_sub_nc_u32_e32 v2, v2, v3 282; GFX10-NEXT: v_add_nc_u32_e32 v3, -7, v2 283; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2 284; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo 285; GFX10-NEXT: v_add_nc_u32_e32 v3, -7, v2 286; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2 287; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo 288; GFX10-NEXT: v_sub_nc_u16 v3, 6, v2 289; GFX10-NEXT: v_and_b32_e32 v2, 0x7f, v2 290; GFX10-NEXT: v_and_b32_e32 v3, 0x7f, v3 291; GFX10-NEXT: v_lshlrev_b16 v0, v2, v0 292; GFX10-NEXT: v_lshrrev_b16 v1, v3, v1 293; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 294; GFX10-NEXT: s_setpc_b64 s[30:31] 295; 296; GFX11-LABEL: v_fshl_i7: 297; GFX11: ; %bb.0: 298; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 299; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v3, 7 300; GFX11-NEXT: v_and_b32_e32 v2, 0x7f, v2 301; GFX11-NEXT: v_and_b32_e32 v1, 0x7f, v1 302; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) 303; GFX11-NEXT: v_rcp_iflag_f32_e32 v3, v3 304; GFX11-NEXT: v_lshrrev_b16 v1, 1, v1 305; GFX11-NEXT: s_waitcnt_depctr 0xfff 306; GFX11-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 307; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 308; GFX11-NEXT: v_cvt_u32_f32_e32 v3, v3 309; GFX11-NEXT: v_mul_lo_u32 v4, v3, -7 310; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 311; GFX11-NEXT: v_mul_hi_u32 v4, v3, v4 312; GFX11-NEXT: v_add_nc_u32_e32 v3, v3, v4 313; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 314; GFX11-NEXT: v_mul_hi_u32 v3, v2, v3 315; GFX11-NEXT: v_mul_lo_u32 v3, v3, 7 316; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 317; GFX11-NEXT: v_sub_nc_u32_e32 v2, v2, v3 318; GFX11-NEXT: v_add_nc_u32_e32 v3, -7, v2 319; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2 320; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 321; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo 322; GFX11-NEXT: v_add_nc_u32_e32 v3, -7, v2 323; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2 324; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 325; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo 326; GFX11-NEXT: v_sub_nc_u16 v3, 6, v2 327; GFX11-NEXT: v_and_b32_e32 v2, 0x7f, v2 328; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 329; GFX11-NEXT: v_and_b32_e32 v3, 0x7f, v3 330; GFX11-NEXT: v_lshlrev_b16 v0, v2, v0 331; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 332; GFX11-NEXT: v_lshrrev_b16 v1, v3, v1 333; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 334; GFX11-NEXT: s_setpc_b64 s[30:31] 335 %result = call i7 @llvm.fshl.i7(i7 %lhs, i7 %rhs, i7 %amt) 336 ret i7 %result 337} 338 339define amdgpu_ps i8 @s_fshl_i8(i8 inreg %lhs, i8 inreg %rhs, i8 inreg %amt) { 340; GFX6-LABEL: s_fshl_i8: 341; GFX6: ; %bb.0: 342; GFX6-NEXT: s_and_b32 s3, s2, 7 343; GFX6-NEXT: s_andn2_b32 s2, 7, s2 344; GFX6-NEXT: s_bfe_u32 s1, s1, 0x70001 345; GFX6-NEXT: s_lshl_b32 s0, s0, s3 346; GFX6-NEXT: s_lshr_b32 s1, s1, s2 347; GFX6-NEXT: s_or_b32 s0, s0, s1 348; GFX6-NEXT: ; return to shader part epilog 349; 350; GFX8-LABEL: s_fshl_i8: 351; GFX8: ; %bb.0: 352; GFX8-NEXT: s_and_b32 s1, s1, 0xff 353; GFX8-NEXT: s_and_b32 s3, s2, 7 354; GFX8-NEXT: s_and_b32 s1, 0xffff, s1 355; GFX8-NEXT: s_andn2_b32 s2, 7, s2 356; GFX8-NEXT: s_and_b32 s3, 0xffff, s3 357; GFX8-NEXT: s_lshr_b32 s1, s1, 1 358; GFX8-NEXT: s_and_b32 s2, 0xffff, s2 359; GFX8-NEXT: s_lshl_b32 s0, s0, s3 360; GFX8-NEXT: s_lshr_b32 s1, s1, s2 361; GFX8-NEXT: s_or_b32 s0, s0, s1 362; GFX8-NEXT: ; return to shader part epilog 363; 364; GFX9-LABEL: s_fshl_i8: 365; GFX9: ; %bb.0: 366; GFX9-NEXT: s_and_b32 s1, s1, 0xff 367; GFX9-NEXT: s_and_b32 s3, s2, 7 368; GFX9-NEXT: s_and_b32 s1, 0xffff, s1 369; GFX9-NEXT: s_andn2_b32 s2, 7, s2 370; GFX9-NEXT: s_and_b32 s3, 0xffff, s3 371; GFX9-NEXT: s_lshr_b32 s1, s1, 1 372; GFX9-NEXT: s_and_b32 s2, 0xffff, s2 373; GFX9-NEXT: s_lshl_b32 s0, s0, s3 374; GFX9-NEXT: s_lshr_b32 s1, s1, s2 375; GFX9-NEXT: s_or_b32 s0, s0, s1 376; GFX9-NEXT: ; return to shader part epilog 377; 378; GFX10-LABEL: s_fshl_i8: 379; GFX10: ; %bb.0: 380; GFX10-NEXT: s_and_b32 s1, s1, 0xff 381; GFX10-NEXT: s_and_b32 s3, s2, 7 382; GFX10-NEXT: s_and_b32 s1, 0xffff, s1 383; GFX10-NEXT: s_andn2_b32 s2, 7, s2 384; GFX10-NEXT: s_and_b32 s3, 0xffff, s3 385; GFX10-NEXT: s_lshr_b32 s1, s1, 1 386; GFX10-NEXT: s_and_b32 s2, 0xffff, s2 387; GFX10-NEXT: s_lshl_b32 s0, s0, s3 388; GFX10-NEXT: s_lshr_b32 s1, s1, s2 389; GFX10-NEXT: s_or_b32 s0, s0, s1 390; GFX10-NEXT: ; return to shader part epilog 391; 392; GFX11-LABEL: s_fshl_i8: 393; GFX11: ; %bb.0: 394; GFX11-NEXT: s_and_b32 s1, s1, 0xff 395; GFX11-NEXT: s_and_b32 s3, s2, 7 396; GFX11-NEXT: s_and_b32 s1, 0xffff, s1 397; GFX11-NEXT: s_and_not1_b32 s2, 7, s2 398; GFX11-NEXT: s_and_b32 s3, 0xffff, s3 399; GFX11-NEXT: s_lshr_b32 s1, s1, 1 400; GFX11-NEXT: s_and_b32 s2, 0xffff, s2 401; GFX11-NEXT: s_lshl_b32 s0, s0, s3 402; GFX11-NEXT: s_lshr_b32 s1, s1, s2 403; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 404; GFX11-NEXT: s_or_b32 s0, s0, s1 405; GFX11-NEXT: ; return to shader part epilog 406 %result = call i8 @llvm.fshl.i8(i8 %lhs, i8 %rhs, i8 %amt) 407 ret i8 %result 408} 409 410define i8 @v_fshl_i8(i8 %lhs, i8 %rhs, i8 %amt) { 411; GFX6-LABEL: v_fshl_i8: 412; GFX6: ; %bb.0: 413; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 414; GFX6-NEXT: v_and_b32_e32 v3, 7, v2 415; GFX6-NEXT: v_not_b32_e32 v2, v2 416; GFX6-NEXT: v_and_b32_e32 v2, 7, v2 417; GFX6-NEXT: v_bfe_u32 v1, v1, 1, 7 418; GFX6-NEXT: v_lshlrev_b32_e32 v0, v3, v0 419; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1 420; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 421; GFX6-NEXT: s_setpc_b64 s[30:31] 422; 423; GFX8-LABEL: v_fshl_i8: 424; GFX8: ; %bb.0: 425; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 426; GFX8-NEXT: v_and_b32_e32 v3, 7, v2 427; GFX8-NEXT: v_lshlrev_b16_e32 v0, v3, v0 428; GFX8-NEXT: v_mov_b32_e32 v3, 1 429; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2 430; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 431; GFX8-NEXT: v_and_b32_e32 v2, 7, v2 432; GFX8-NEXT: v_lshrrev_b16_e32 v1, v2, v1 433; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 434; GFX8-NEXT: s_setpc_b64 s[30:31] 435; 436; GFX9-LABEL: v_fshl_i8: 437; GFX9: ; %bb.0: 438; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 439; GFX9-NEXT: v_and_b32_e32 v3, 7, v2 440; GFX9-NEXT: v_lshlrev_b16_e32 v0, v3, v0 441; GFX9-NEXT: v_mov_b32_e32 v3, 1 442; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2 443; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 444; GFX9-NEXT: v_and_b32_e32 v2, 7, v2 445; GFX9-NEXT: v_lshrrev_b16_e32 v1, v2, v1 446; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 447; GFX9-NEXT: s_setpc_b64 s[30:31] 448; 449; GFX10-LABEL: v_fshl_i8: 450; GFX10: ; %bb.0: 451; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 452; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1 453; GFX10-NEXT: v_xor_b32_e32 v3, -1, v2 454; GFX10-NEXT: v_and_b32_e32 v2, 7, v2 455; GFX10-NEXT: v_lshrrev_b16 v1, 1, v1 456; GFX10-NEXT: v_and_b32_e32 v3, 7, v3 457; GFX10-NEXT: v_lshlrev_b16 v0, v2, v0 458; GFX10-NEXT: v_lshrrev_b16 v1, v3, v1 459; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 460; GFX10-NEXT: s_setpc_b64 s[30:31] 461; 462; GFX11-LABEL: v_fshl_i8: 463; GFX11: ; %bb.0: 464; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 465; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 466; GFX11-NEXT: v_xor_b32_e32 v3, -1, v2 467; GFX11-NEXT: v_and_b32_e32 v2, 7, v2 468; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 469; GFX11-NEXT: v_lshrrev_b16 v1, 1, v1 470; GFX11-NEXT: v_and_b32_e32 v3, 7, v3 471; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 472; GFX11-NEXT: v_lshlrev_b16 v0, v2, v0 473; GFX11-NEXT: v_lshrrev_b16 v1, v3, v1 474; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 475; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 476; GFX11-NEXT: s_setpc_b64 s[30:31] 477 %result = call i8 @llvm.fshl.i8(i8 %lhs, i8 %rhs, i8 %amt) 478 ret i8 %result 479} 480 481define amdgpu_ps i8 @s_fshl_i8_4(i8 inreg %lhs, i8 inreg %rhs) { 482; GFX6-LABEL: s_fshl_i8_4: 483; GFX6: ; %bb.0: 484; GFX6-NEXT: s_lshl_b32 s0, s0, 4 485; GFX6-NEXT: s_bfe_u32 s1, s1, 0x40004 486; GFX6-NEXT: s_or_b32 s0, s0, s1 487; GFX6-NEXT: ; return to shader part epilog 488; 489; GFX8-LABEL: s_fshl_i8_4: 490; GFX8: ; %bb.0: 491; GFX8-NEXT: s_and_b32 s1, s1, 0xff 492; GFX8-NEXT: s_and_b32 s1, 0xffff, s1 493; GFX8-NEXT: s_lshl_b32 s0, s0, 4 494; GFX8-NEXT: s_lshr_b32 s1, s1, 4 495; GFX8-NEXT: s_or_b32 s0, s0, s1 496; GFX8-NEXT: ; return to shader part epilog 497; 498; GFX9-LABEL: s_fshl_i8_4: 499; GFX9: ; %bb.0: 500; GFX9-NEXT: s_and_b32 s1, s1, 0xff 501; GFX9-NEXT: s_and_b32 s1, 0xffff, s1 502; GFX9-NEXT: s_lshl_b32 s0, s0, 4 503; GFX9-NEXT: s_lshr_b32 s1, s1, 4 504; GFX9-NEXT: s_or_b32 s0, s0, s1 505; GFX9-NEXT: ; return to shader part epilog 506; 507; GFX10-LABEL: s_fshl_i8_4: 508; GFX10: ; %bb.0: 509; GFX10-NEXT: s_and_b32 s1, s1, 0xff 510; GFX10-NEXT: s_lshl_b32 s0, s0, 4 511; GFX10-NEXT: s_and_b32 s1, 0xffff, s1 512; GFX10-NEXT: s_lshr_b32 s1, s1, 4 513; GFX10-NEXT: s_or_b32 s0, s0, s1 514; GFX10-NEXT: ; return to shader part epilog 515; 516; GFX11-LABEL: s_fshl_i8_4: 517; GFX11: ; %bb.0: 518; GFX11-NEXT: s_and_b32 s1, s1, 0xff 519; GFX11-NEXT: s_lshl_b32 s0, s0, 4 520; GFX11-NEXT: s_and_b32 s1, 0xffff, s1 521; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 522; GFX11-NEXT: s_lshr_b32 s1, s1, 4 523; GFX11-NEXT: s_or_b32 s0, s0, s1 524; GFX11-NEXT: ; return to shader part epilog 525 %result = call i8 @llvm.fshl.i8(i8 %lhs, i8 %rhs, i8 4) 526 ret i8 %result 527} 528 529define i8 @v_fshl_i8_4(i8 %lhs, i8 %rhs) { 530; GFX6-LABEL: v_fshl_i8_4: 531; GFX6: ; %bb.0: 532; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 533; GFX6-NEXT: v_lshlrev_b32_e32 v0, 4, v0 534; GFX6-NEXT: v_bfe_u32 v1, v1, 4, 4 535; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 536; GFX6-NEXT: s_setpc_b64 s[30:31] 537; 538; GFX8-LABEL: v_fshl_i8_4: 539; GFX8: ; %bb.0: 540; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 541; GFX8-NEXT: v_mov_b32_e32 v2, 4 542; GFX8-NEXT: v_lshlrev_b16_e32 v0, 4, v0 543; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 544; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 545; GFX8-NEXT: s_setpc_b64 s[30:31] 546; 547; GFX9-LABEL: v_fshl_i8_4: 548; GFX9: ; %bb.0: 549; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 550; GFX9-NEXT: v_mov_b32_e32 v2, 4 551; GFX9-NEXT: v_lshlrev_b16_e32 v0, 4, v0 552; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 553; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 554; GFX9-NEXT: s_setpc_b64 s[30:31] 555; 556; GFX10-LABEL: v_fshl_i8_4: 557; GFX10: ; %bb.0: 558; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 559; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1 560; GFX10-NEXT: v_lshlrev_b16 v0, 4, v0 561; GFX10-NEXT: v_lshrrev_b16 v1, 4, v1 562; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 563; GFX10-NEXT: s_setpc_b64 s[30:31] 564; 565; GFX11-LABEL: v_fshl_i8_4: 566; GFX11: ; %bb.0: 567; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 568; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 569; GFX11-NEXT: v_lshlrev_b16 v0, 4, v0 570; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 571; GFX11-NEXT: v_lshrrev_b16 v1, 4, v1 572; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 573; GFX11-NEXT: s_setpc_b64 s[30:31] 574 %result = call i8 @llvm.fshl.i8(i8 %lhs, i8 %rhs, i8 4) 575 ret i8 %result 576} 577 578define amdgpu_ps i8 @s_fshl_i8_5(i8 inreg %lhs, i8 inreg %rhs) { 579; GFX6-LABEL: s_fshl_i8_5: 580; GFX6: ; %bb.0: 581; GFX6-NEXT: s_lshl_b32 s0, s0, 5 582; GFX6-NEXT: s_bfe_u32 s1, s1, 0x50003 583; GFX6-NEXT: s_or_b32 s0, s0, s1 584; GFX6-NEXT: ; return to shader part epilog 585; 586; GFX8-LABEL: s_fshl_i8_5: 587; GFX8: ; %bb.0: 588; GFX8-NEXT: s_and_b32 s1, s1, 0xff 589; GFX8-NEXT: s_and_b32 s1, 0xffff, s1 590; GFX8-NEXT: s_lshl_b32 s0, s0, 5 591; GFX8-NEXT: s_lshr_b32 s1, s1, 3 592; GFX8-NEXT: s_or_b32 s0, s0, s1 593; GFX8-NEXT: ; return to shader part epilog 594; 595; GFX9-LABEL: s_fshl_i8_5: 596; GFX9: ; %bb.0: 597; GFX9-NEXT: s_and_b32 s1, s1, 0xff 598; GFX9-NEXT: s_and_b32 s1, 0xffff, s1 599; GFX9-NEXT: s_lshl_b32 s0, s0, 5 600; GFX9-NEXT: s_lshr_b32 s1, s1, 3 601; GFX9-NEXT: s_or_b32 s0, s0, s1 602; GFX9-NEXT: ; return to shader part epilog 603; 604; GFX10-LABEL: s_fshl_i8_5: 605; GFX10: ; %bb.0: 606; GFX10-NEXT: s_and_b32 s1, s1, 0xff 607; GFX10-NEXT: s_lshl_b32 s0, s0, 5 608; GFX10-NEXT: s_and_b32 s1, 0xffff, s1 609; GFX10-NEXT: s_lshr_b32 s1, s1, 3 610; GFX10-NEXT: s_or_b32 s0, s0, s1 611; GFX10-NEXT: ; return to shader part epilog 612; 613; GFX11-LABEL: s_fshl_i8_5: 614; GFX11: ; %bb.0: 615; GFX11-NEXT: s_and_b32 s1, s1, 0xff 616; GFX11-NEXT: s_lshl_b32 s0, s0, 5 617; GFX11-NEXT: s_and_b32 s1, 0xffff, s1 618; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 619; GFX11-NEXT: s_lshr_b32 s1, s1, 3 620; GFX11-NEXT: s_or_b32 s0, s0, s1 621; GFX11-NEXT: ; return to shader part epilog 622 %result = call i8 @llvm.fshl.i8(i8 %lhs, i8 %rhs, i8 5) 623 ret i8 %result 624} 625 626define i8 @v_fshl_i8_5(i8 %lhs, i8 %rhs) { 627; GFX6-LABEL: v_fshl_i8_5: 628; GFX6: ; %bb.0: 629; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 630; GFX6-NEXT: v_lshlrev_b32_e32 v0, 5, v0 631; GFX6-NEXT: v_bfe_u32 v1, v1, 3, 5 632; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 633; GFX6-NEXT: s_setpc_b64 s[30:31] 634; 635; GFX8-LABEL: v_fshl_i8_5: 636; GFX8: ; %bb.0: 637; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 638; GFX8-NEXT: v_mov_b32_e32 v2, 3 639; GFX8-NEXT: v_lshlrev_b16_e32 v0, 5, v0 640; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 641; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 642; GFX8-NEXT: s_setpc_b64 s[30:31] 643; 644; GFX9-LABEL: v_fshl_i8_5: 645; GFX9: ; %bb.0: 646; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 647; GFX9-NEXT: v_mov_b32_e32 v2, 3 648; GFX9-NEXT: v_lshlrev_b16_e32 v0, 5, v0 649; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 650; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 651; GFX9-NEXT: s_setpc_b64 s[30:31] 652; 653; GFX10-LABEL: v_fshl_i8_5: 654; GFX10: ; %bb.0: 655; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 656; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1 657; GFX10-NEXT: v_lshlrev_b16 v0, 5, v0 658; GFX10-NEXT: v_lshrrev_b16 v1, 3, v1 659; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 660; GFX10-NEXT: s_setpc_b64 s[30:31] 661; 662; GFX11-LABEL: v_fshl_i8_5: 663; GFX11: ; %bb.0: 664; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 665; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 666; GFX11-NEXT: v_lshlrev_b16 v0, 5, v0 667; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 668; GFX11-NEXT: v_lshrrev_b16 v1, 3, v1 669; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 670; GFX11-NEXT: s_setpc_b64 s[30:31] 671 %result = call i8 @llvm.fshl.i8(i8 %lhs, i8 %rhs, i8 5) 672 ret i8 %result 673} 674 675define amdgpu_ps i16 @s_fshl_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg, i16 inreg %amt.arg) { 676; GFX6-LABEL: s_fshl_v2i8: 677; GFX6: ; %bb.0: 678; GFX6-NEXT: s_and_b32 s5, s2, 7 679; GFX6-NEXT: s_lshr_b32 s3, s0, 8 680; GFX6-NEXT: s_lshr_b32 s4, s2, 8 681; GFX6-NEXT: s_andn2_b32 s2, 7, s2 682; GFX6-NEXT: s_lshl_b32 s0, s0, s5 683; GFX6-NEXT: s_bfe_u32 s5, s1, 0x70001 684; GFX6-NEXT: s_lshr_b32 s2, s5, s2 685; GFX6-NEXT: s_bfe_u32 s1, s1, 0x80008 686; GFX6-NEXT: s_or_b32 s0, s0, s2 687; GFX6-NEXT: s_and_b32 s2, s4, 7 688; GFX6-NEXT: s_andn2_b32 s4, 7, s4 689; GFX6-NEXT: s_lshr_b32 s1, s1, 1 690; GFX6-NEXT: s_lshl_b32 s2, s3, s2 691; GFX6-NEXT: s_lshr_b32 s1, s1, s4 692; GFX6-NEXT: s_or_b32 s1, s2, s1 693; GFX6-NEXT: s_and_b32 s1, s1, 0xff 694; GFX6-NEXT: s_and_b32 s0, s0, 0xff 695; GFX6-NEXT: s_lshl_b32 s1, s1, 8 696; GFX6-NEXT: s_or_b32 s0, s0, s1 697; GFX6-NEXT: ; return to shader part epilog 698; 699; GFX8-LABEL: s_fshl_v2i8: 700; GFX8: ; %bb.0: 701; GFX8-NEXT: s_lshr_b32 s4, s1, 8 702; GFX8-NEXT: s_and_b32 s1, s1, 0xff 703; GFX8-NEXT: s_lshr_b32 s5, s2, 8 704; GFX8-NEXT: s_and_b32 s6, s2, 7 705; GFX8-NEXT: s_and_b32 s1, 0xffff, s1 706; GFX8-NEXT: s_andn2_b32 s2, 7, s2 707; GFX8-NEXT: s_and_b32 s6, 0xffff, s6 708; GFX8-NEXT: s_lshr_b32 s1, s1, 1 709; GFX8-NEXT: s_and_b32 s2, 0xffff, s2 710; GFX8-NEXT: s_lshr_b32 s3, s0, 8 711; GFX8-NEXT: s_lshl_b32 s0, s0, s6 712; GFX8-NEXT: s_lshr_b32 s1, s1, s2 713; GFX8-NEXT: s_or_b32 s0, s0, s1 714; GFX8-NEXT: s_and_b32 s1, s5, 7 715; GFX8-NEXT: s_and_b32 s1, 0xffff, s1 716; GFX8-NEXT: s_and_b32 s2, s4, 0xff 717; GFX8-NEXT: s_lshl_b32 s1, s3, s1 718; GFX8-NEXT: s_and_b32 s2, 0xffff, s2 719; GFX8-NEXT: s_andn2_b32 s3, 7, s5 720; GFX8-NEXT: s_lshr_b32 s2, s2, 1 721; GFX8-NEXT: s_and_b32 s3, 0xffff, s3 722; GFX8-NEXT: s_lshr_b32 s2, s2, s3 723; GFX8-NEXT: s_or_b32 s1, s1, s2 724; GFX8-NEXT: s_and_b32 s1, s1, 0xff 725; GFX8-NEXT: s_and_b32 s0, s0, 0xff 726; GFX8-NEXT: s_lshl_b32 s1, s1, 8 727; GFX8-NEXT: s_or_b32 s0, s0, s1 728; GFX8-NEXT: ; return to shader part epilog 729; 730; GFX9-LABEL: s_fshl_v2i8: 731; GFX9: ; %bb.0: 732; GFX9-NEXT: s_lshr_b32 s4, s1, 8 733; GFX9-NEXT: s_and_b32 s1, s1, 0xff 734; GFX9-NEXT: s_lshr_b32 s5, s2, 8 735; GFX9-NEXT: s_and_b32 s6, s2, 7 736; GFX9-NEXT: s_and_b32 s1, 0xffff, s1 737; GFX9-NEXT: s_andn2_b32 s2, 7, s2 738; GFX9-NEXT: s_and_b32 s6, 0xffff, s6 739; GFX9-NEXT: s_lshr_b32 s1, s1, 1 740; GFX9-NEXT: s_and_b32 s2, 0xffff, s2 741; GFX9-NEXT: s_lshr_b32 s3, s0, 8 742; GFX9-NEXT: s_lshl_b32 s0, s0, s6 743; GFX9-NEXT: s_lshr_b32 s1, s1, s2 744; GFX9-NEXT: s_or_b32 s0, s0, s1 745; GFX9-NEXT: s_and_b32 s1, s5, 7 746; GFX9-NEXT: s_and_b32 s1, 0xffff, s1 747; GFX9-NEXT: s_and_b32 s2, s4, 0xff 748; GFX9-NEXT: s_lshl_b32 s1, s3, s1 749; GFX9-NEXT: s_and_b32 s2, 0xffff, s2 750; GFX9-NEXT: s_andn2_b32 s3, 7, s5 751; GFX9-NEXT: s_lshr_b32 s2, s2, 1 752; GFX9-NEXT: s_and_b32 s3, 0xffff, s3 753; GFX9-NEXT: s_lshr_b32 s2, s2, s3 754; GFX9-NEXT: s_or_b32 s1, s1, s2 755; GFX9-NEXT: s_and_b32 s1, s1, 0xff 756; GFX9-NEXT: s_and_b32 s0, s0, 0xff 757; GFX9-NEXT: s_lshl_b32 s1, s1, 8 758; GFX9-NEXT: s_or_b32 s0, s0, s1 759; GFX9-NEXT: ; return to shader part epilog 760; 761; GFX10-LABEL: s_fshl_v2i8: 762; GFX10: ; %bb.0: 763; GFX10-NEXT: s_lshr_b32 s4, s1, 8 764; GFX10-NEXT: s_and_b32 s5, s2, 7 765; GFX10-NEXT: s_lshr_b32 s6, s2, 8 766; GFX10-NEXT: s_and_b32 s5, 0xffff, s5 767; GFX10-NEXT: s_and_b32 s4, s4, 0xff 768; GFX10-NEXT: s_lshr_b32 s3, s0, 8 769; GFX10-NEXT: s_and_b32 s1, s1, 0xff 770; GFX10-NEXT: s_lshl_b32 s0, s0, s5 771; GFX10-NEXT: s_and_b32 s5, s6, 7 772; GFX10-NEXT: s_and_b32 s4, 0xffff, s4 773; GFX10-NEXT: s_andn2_b32 s6, 7, s6 774; GFX10-NEXT: s_and_b32 s1, 0xffff, s1 775; GFX10-NEXT: s_andn2_b32 s2, 7, s2 776; GFX10-NEXT: s_and_b32 s5, 0xffff, s5 777; GFX10-NEXT: s_lshr_b32 s4, s4, 1 778; GFX10-NEXT: s_and_b32 s6, 0xffff, s6 779; GFX10-NEXT: s_lshr_b32 s1, s1, 1 780; GFX10-NEXT: s_and_b32 s2, 0xffff, s2 781; GFX10-NEXT: s_lshl_b32 s3, s3, s5 782; GFX10-NEXT: s_lshr_b32 s4, s4, s6 783; GFX10-NEXT: s_lshr_b32 s1, s1, s2 784; GFX10-NEXT: s_or_b32 s2, s3, s4 785; GFX10-NEXT: s_or_b32 s0, s0, s1 786; GFX10-NEXT: s_and_b32 s1, s2, 0xff 787; GFX10-NEXT: s_and_b32 s0, s0, 0xff 788; GFX10-NEXT: s_lshl_b32 s1, s1, 8 789; GFX10-NEXT: s_or_b32 s0, s0, s1 790; GFX10-NEXT: ; return to shader part epilog 791; 792; GFX11-LABEL: s_fshl_v2i8: 793; GFX11: ; %bb.0: 794; GFX11-NEXT: s_lshr_b32 s4, s1, 8 795; GFX11-NEXT: s_and_b32 s5, s2, 7 796; GFX11-NEXT: s_lshr_b32 s6, s2, 8 797; GFX11-NEXT: s_and_b32 s5, 0xffff, s5 798; GFX11-NEXT: s_and_b32 s4, s4, 0xff 799; GFX11-NEXT: s_lshr_b32 s3, s0, 8 800; GFX11-NEXT: s_and_b32 s1, s1, 0xff 801; GFX11-NEXT: s_lshl_b32 s0, s0, s5 802; GFX11-NEXT: s_and_b32 s5, s6, 7 803; GFX11-NEXT: s_and_b32 s4, 0xffff, s4 804; GFX11-NEXT: s_and_not1_b32 s6, 7, s6 805; GFX11-NEXT: s_and_b32 s1, 0xffff, s1 806; GFX11-NEXT: s_and_not1_b32 s2, 7, s2 807; GFX11-NEXT: s_and_b32 s5, 0xffff, s5 808; GFX11-NEXT: s_lshr_b32 s4, s4, 1 809; GFX11-NEXT: s_and_b32 s6, 0xffff, s6 810; GFX11-NEXT: s_lshr_b32 s1, s1, 1 811; GFX11-NEXT: s_and_b32 s2, 0xffff, s2 812; GFX11-NEXT: s_lshl_b32 s3, s3, s5 813; GFX11-NEXT: s_lshr_b32 s4, s4, s6 814; GFX11-NEXT: s_lshr_b32 s1, s1, s2 815; GFX11-NEXT: s_or_b32 s2, s3, s4 816; GFX11-NEXT: s_or_b32 s0, s0, s1 817; GFX11-NEXT: s_and_b32 s1, s2, 0xff 818; GFX11-NEXT: s_and_b32 s0, s0, 0xff 819; GFX11-NEXT: s_lshl_b32 s1, s1, 8 820; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 821; GFX11-NEXT: s_or_b32 s0, s0, s1 822; GFX11-NEXT: ; return to shader part epilog 823 %lhs = bitcast i16 %lhs.arg to <2 x i8> 824 %rhs = bitcast i16 %rhs.arg to <2 x i8> 825 %amt = bitcast i16 %amt.arg to <2 x i8> 826 %result = call <2 x i8> @llvm.fshl.v2i8(<2 x i8> %lhs, <2 x i8> %rhs, <2 x i8> %amt) 827 %cast.result = bitcast <2 x i8> %result to i16 828 ret i16 %cast.result 829} 830 831define i16 @v_fshl_v2i8(i16 %lhs.arg, i16 %rhs.arg, i16 %amt.arg) { 832; GFX6-LABEL: v_fshl_v2i8: 833; GFX6: ; %bb.0: 834; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 835; GFX6-NEXT: v_lshrrev_b32_e32 v4, 8, v2 836; GFX6-NEXT: v_and_b32_e32 v5, 7, v2 837; GFX6-NEXT: v_not_b32_e32 v2, v2 838; GFX6-NEXT: v_lshrrev_b32_e32 v3, 8, v0 839; GFX6-NEXT: v_and_b32_e32 v2, 7, v2 840; GFX6-NEXT: v_lshlrev_b32_e32 v0, v5, v0 841; GFX6-NEXT: v_bfe_u32 v5, v1, 1, 7 842; GFX6-NEXT: v_lshrrev_b32_e32 v2, v2, v5 843; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 844; GFX6-NEXT: v_and_b32_e32 v2, 7, v4 845; GFX6-NEXT: v_not_b32_e32 v4, v4 846; GFX6-NEXT: v_bfe_u32 v1, v1, 8, 8 847; GFX6-NEXT: v_and_b32_e32 v4, 7, v4 848; GFX6-NEXT: v_lshrrev_b32_e32 v1, 1, v1 849; GFX6-NEXT: v_lshlrev_b32_e32 v2, v2, v3 850; GFX6-NEXT: v_lshrrev_b32_e32 v1, v4, v1 851; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 852; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v1 853; GFX6-NEXT: v_and_b32_e32 v0, 0xff, v0 854; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1 855; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 856; GFX6-NEXT: s_setpc_b64 s[30:31] 857; 858; GFX8-LABEL: v_fshl_v2i8: 859; GFX8: ; %bb.0: 860; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 861; GFX8-NEXT: v_and_b32_e32 v6, 7, v2 862; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 863; GFX8-NEXT: v_lshrrev_b32_e32 v5, 8, v2 864; GFX8-NEXT: v_lshlrev_b16_e32 v0, v6, v0 865; GFX8-NEXT: v_mov_b32_e32 v6, 1 866; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2 867; GFX8-NEXT: v_lshrrev_b32_e32 v4, 8, v1 868; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 869; GFX8-NEXT: v_and_b32_e32 v2, 7, v2 870; GFX8-NEXT: v_lshrrev_b16_e32 v1, v2, v1 871; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 872; GFX8-NEXT: v_and_b32_e32 v1, 7, v5 873; GFX8-NEXT: v_lshlrev_b16_e32 v1, v1, v3 874; GFX8-NEXT: v_xor_b32_e32 v3, -1, v5 875; GFX8-NEXT: v_lshrrev_b16_sdwa v2, v6, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 876; GFX8-NEXT: v_and_b32_e32 v3, 7, v3 877; GFX8-NEXT: v_lshrrev_b16_e32 v2, v3, v2 878; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 879; GFX8-NEXT: v_and_b32_e32 v1, 0xff, v1 880; GFX8-NEXT: v_lshlrev_b16_e32 v1, 8, v1 881; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 882; GFX8-NEXT: s_setpc_b64 s[30:31] 883; 884; GFX9-LABEL: v_fshl_v2i8: 885; GFX9: ; %bb.0: 886; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 887; GFX9-NEXT: v_and_b32_e32 v6, 7, v2 888; GFX9-NEXT: v_lshrrev_b32_e32 v3, 8, v0 889; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v2 890; GFX9-NEXT: v_lshlrev_b16_e32 v0, v6, v0 891; GFX9-NEXT: v_mov_b32_e32 v6, 1 892; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2 893; GFX9-NEXT: v_lshrrev_b32_e32 v4, 8, v1 894; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 895; GFX9-NEXT: v_and_b32_e32 v2, 7, v2 896; GFX9-NEXT: v_lshrrev_b16_e32 v1, v2, v1 897; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 898; GFX9-NEXT: v_and_b32_e32 v1, 7, v5 899; GFX9-NEXT: v_lshlrev_b16_e32 v1, v1, v3 900; GFX9-NEXT: v_xor_b32_e32 v3, -1, v5 901; GFX9-NEXT: v_lshrrev_b16_sdwa v2, v6, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 902; GFX9-NEXT: v_and_b32_e32 v3, 7, v3 903; GFX9-NEXT: v_lshrrev_b16_e32 v2, v3, v2 904; GFX9-NEXT: v_or_b32_e32 v1, v1, v2 905; GFX9-NEXT: v_and_b32_e32 v1, 0xff, v1 906; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1 907; GFX9-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 908; GFX9-NEXT: s_setpc_b64 s[30:31] 909; 910; GFX10-LABEL: v_fshl_v2i8: 911; GFX10: ; %bb.0: 912; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 913; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v1 914; GFX10-NEXT: v_lshrrev_b32_e32 v4, 8, v2 915; GFX10-NEXT: v_lshrrev_b32_e32 v5, 8, v0 916; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1 917; GFX10-NEXT: v_xor_b32_e32 v7, -1, v2 918; GFX10-NEXT: v_and_b32_e32 v3, 0xff, v3 919; GFX10-NEXT: v_xor_b32_e32 v6, -1, v4 920; GFX10-NEXT: v_and_b32_e32 v4, 7, v4 921; GFX10-NEXT: v_and_b32_e32 v2, 7, v2 922; GFX10-NEXT: v_lshrrev_b16 v1, 1, v1 923; GFX10-NEXT: v_lshrrev_b16 v3, 1, v3 924; GFX10-NEXT: v_and_b32_e32 v6, 7, v6 925; GFX10-NEXT: v_and_b32_e32 v7, 7, v7 926; GFX10-NEXT: v_lshlrev_b16 v4, v4, v5 927; GFX10-NEXT: v_lshlrev_b16 v0, v2, v0 928; GFX10-NEXT: v_lshrrev_b16 v3, v6, v3 929; GFX10-NEXT: v_lshrrev_b16 v1, v7, v1 930; GFX10-NEXT: v_or_b32_e32 v2, v4, v3 931; GFX10-NEXT: v_mov_b32_e32 v3, 0xff 932; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 933; GFX10-NEXT: v_and_b32_sdwa v1, v2, v3 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 934; GFX10-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 935; GFX10-NEXT: s_setpc_b64 s[30:31] 936; 937; GFX11-LABEL: v_fshl_v2i8: 938; GFX11: ; %bb.0: 939; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 940; GFX11-NEXT: v_lshrrev_b32_e32 v3, 8, v1 941; GFX11-NEXT: v_lshrrev_b32_e32 v4, 8, v2 942; GFX11-NEXT: v_lshrrev_b32_e32 v5, 8, v0 943; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 944; GFX11-NEXT: v_xor_b32_e32 v7, -1, v2 945; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v3 946; GFX11-NEXT: v_xor_b32_e32 v6, -1, v4 947; GFX11-NEXT: v_and_b32_e32 v4, 7, v4 948; GFX11-NEXT: v_and_b32_e32 v2, 7, v2 949; GFX11-NEXT: v_lshrrev_b16 v1, 1, v1 950; GFX11-NEXT: v_lshrrev_b16 v3, 1, v3 951; GFX11-NEXT: v_and_b32_e32 v6, 7, v6 952; GFX11-NEXT: v_and_b32_e32 v7, 7, v7 953; GFX11-NEXT: v_lshlrev_b16 v4, v4, v5 954; GFX11-NEXT: v_lshlrev_b16 v0, v2, v0 955; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 956; GFX11-NEXT: v_lshrrev_b16 v3, v6, v3 957; GFX11-NEXT: v_lshrrev_b16 v1, v7, v1 958; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 959; GFX11-NEXT: v_or_b32_e32 v2, v4, v3 960; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 961; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 962; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v2 963; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 964; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 965; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1 966; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 967; GFX11-NEXT: s_setpc_b64 s[30:31] 968 %lhs = bitcast i16 %lhs.arg to <2 x i8> 969 %rhs = bitcast i16 %rhs.arg to <2 x i8> 970 %amt = bitcast i16 %amt.arg to <2 x i8> 971 %result = call <2 x i8> @llvm.fshl.v2i8(<2 x i8> %lhs, <2 x i8> %rhs, <2 x i8> %amt) 972 %cast.result = bitcast <2 x i8> %result to i16 973 ret i16 %cast.result 974} 975 976define amdgpu_ps i32 @s_fshl_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg, i32 inreg %amt.arg) { 977; GFX6-LABEL: s_fshl_v4i8: 978; GFX6: ; %bb.0: 979; GFX6-NEXT: s_and_b32 s9, s2, 7 980; GFX6-NEXT: s_lshr_b32 s3, s0, 8 981; GFX6-NEXT: s_lshr_b32 s4, s0, 16 982; GFX6-NEXT: s_lshr_b32 s5, s0, 24 983; GFX6-NEXT: s_lshr_b32 s6, s2, 8 984; GFX6-NEXT: s_lshr_b32 s7, s2, 16 985; GFX6-NEXT: s_lshr_b32 s8, s2, 24 986; GFX6-NEXT: s_andn2_b32 s2, 7, s2 987; GFX6-NEXT: s_lshl_b32 s0, s0, s9 988; GFX6-NEXT: s_bfe_u32 s9, s1, 0x70001 989; GFX6-NEXT: s_lshr_b32 s2, s9, s2 990; GFX6-NEXT: s_or_b32 s0, s0, s2 991; GFX6-NEXT: s_and_b32 s2, s6, 7 992; GFX6-NEXT: s_lshl_b32 s2, s3, s2 993; GFX6-NEXT: s_bfe_u32 s3, s1, 0x80008 994; GFX6-NEXT: s_andn2_b32 s6, 7, s6 995; GFX6-NEXT: s_lshr_b32 s3, s3, 1 996; GFX6-NEXT: s_lshr_b32 s3, s3, s6 997; GFX6-NEXT: s_or_b32 s2, s2, s3 998; GFX6-NEXT: s_and_b32 s3, s7, 7 999; GFX6-NEXT: s_lshl_b32 s3, s4, s3 1000; GFX6-NEXT: s_bfe_u32 s4, s1, 0x80010 1001; GFX6-NEXT: s_andn2_b32 s6, 7, s7 1002; GFX6-NEXT: s_lshr_b32 s4, s4, 1 1003; GFX6-NEXT: s_lshr_b32 s4, s4, s6 1004; GFX6-NEXT: s_or_b32 s3, s3, s4 1005; GFX6-NEXT: s_and_b32 s4, s8, 7 1006; GFX6-NEXT: s_andn2_b32 s6, 7, s8 1007; GFX6-NEXT: s_lshr_b32 s1, s1, 25 1008; GFX6-NEXT: s_and_b32 s2, s2, 0xff 1009; GFX6-NEXT: s_lshl_b32 s4, s5, s4 1010; GFX6-NEXT: s_lshr_b32 s1, s1, s6 1011; GFX6-NEXT: s_and_b32 s0, s0, 0xff 1012; GFX6-NEXT: s_lshl_b32 s2, s2, 8 1013; GFX6-NEXT: s_or_b32 s1, s4, s1 1014; GFX6-NEXT: s_or_b32 s0, s0, s2 1015; GFX6-NEXT: s_and_b32 s2, s3, 0xff 1016; GFX6-NEXT: s_lshl_b32 s2, s2, 16 1017; GFX6-NEXT: s_and_b32 s1, s1, 0xff 1018; GFX6-NEXT: s_or_b32 s0, s0, s2 1019; GFX6-NEXT: s_lshl_b32 s1, s1, 24 1020; GFX6-NEXT: s_or_b32 s0, s0, s1 1021; GFX6-NEXT: ; return to shader part epilog 1022; 1023; GFX8-LABEL: s_fshl_v4i8: 1024; GFX8: ; %bb.0: 1025; GFX8-NEXT: s_lshr_b32 s6, s1, 8 1026; GFX8-NEXT: s_lshr_b32 s7, s1, 16 1027; GFX8-NEXT: s_lshr_b32 s8, s1, 24 1028; GFX8-NEXT: s_and_b32 s1, s1, 0xff 1029; GFX8-NEXT: s_lshr_b32 s9, s2, 8 1030; GFX8-NEXT: s_lshr_b32 s10, s2, 16 1031; GFX8-NEXT: s_lshr_b32 s11, s2, 24 1032; GFX8-NEXT: s_and_b32 s12, s2, 7 1033; GFX8-NEXT: s_and_b32 s1, 0xffff, s1 1034; GFX8-NEXT: s_andn2_b32 s2, 7, s2 1035; GFX8-NEXT: s_and_b32 s12, 0xffff, s12 1036; GFX8-NEXT: s_lshr_b32 s1, s1, 1 1037; GFX8-NEXT: s_and_b32 s2, 0xffff, s2 1038; GFX8-NEXT: s_lshr_b32 s3, s0, 8 1039; GFX8-NEXT: s_lshr_b32 s4, s0, 16 1040; GFX8-NEXT: s_lshr_b32 s5, s0, 24 1041; GFX8-NEXT: s_lshl_b32 s0, s0, s12 1042; GFX8-NEXT: s_lshr_b32 s1, s1, s2 1043; GFX8-NEXT: s_or_b32 s0, s0, s1 1044; GFX8-NEXT: s_and_b32 s1, s9, 7 1045; GFX8-NEXT: s_and_b32 s1, 0xffff, s1 1046; GFX8-NEXT: s_and_b32 s2, s6, 0xff 1047; GFX8-NEXT: s_lshl_b32 s1, s3, s1 1048; GFX8-NEXT: s_and_b32 s2, 0xffff, s2 1049; GFX8-NEXT: s_andn2_b32 s3, 7, s9 1050; GFX8-NEXT: s_lshr_b32 s2, s2, 1 1051; GFX8-NEXT: s_and_b32 s3, 0xffff, s3 1052; GFX8-NEXT: s_lshr_b32 s2, s2, s3 1053; GFX8-NEXT: s_or_b32 s1, s1, s2 1054; GFX8-NEXT: s_and_b32 s2, s10, 7 1055; GFX8-NEXT: s_and_b32 s2, 0xffff, s2 1056; GFX8-NEXT: s_and_b32 s3, s7, 0xff 1057; GFX8-NEXT: s_lshl_b32 s2, s4, s2 1058; GFX8-NEXT: s_and_b32 s3, 0xffff, s3 1059; GFX8-NEXT: s_andn2_b32 s4, 7, s10 1060; GFX8-NEXT: s_lshr_b32 s3, s3, 1 1061; GFX8-NEXT: s_and_b32 s4, 0xffff, s4 1062; GFX8-NEXT: s_lshr_b32 s3, s3, s4 1063; GFX8-NEXT: s_or_b32 s2, s2, s3 1064; GFX8-NEXT: s_and_b32 s3, s11, 7 1065; GFX8-NEXT: s_and_b32 s3, 0xffff, s3 1066; GFX8-NEXT: s_lshl_b32 s3, s5, s3 1067; GFX8-NEXT: s_andn2_b32 s5, 7, s11 1068; GFX8-NEXT: s_and_b32 s1, s1, 0xff 1069; GFX8-NEXT: s_lshr_b32 s4, s8, 1 1070; GFX8-NEXT: s_and_b32 s5, 0xffff, s5 1071; GFX8-NEXT: s_and_b32 s0, s0, 0xff 1072; GFX8-NEXT: s_lshl_b32 s1, s1, 8 1073; GFX8-NEXT: s_lshr_b32 s4, s4, s5 1074; GFX8-NEXT: s_or_b32 s0, s0, s1 1075; GFX8-NEXT: s_and_b32 s1, s2, 0xff 1076; GFX8-NEXT: s_or_b32 s3, s3, s4 1077; GFX8-NEXT: s_lshl_b32 s1, s1, 16 1078; GFX8-NEXT: s_or_b32 s0, s0, s1 1079; GFX8-NEXT: s_and_b32 s1, s3, 0xff 1080; GFX8-NEXT: s_lshl_b32 s1, s1, 24 1081; GFX8-NEXT: s_or_b32 s0, s0, s1 1082; GFX8-NEXT: ; return to shader part epilog 1083; 1084; GFX9-LABEL: s_fshl_v4i8: 1085; GFX9: ; %bb.0: 1086; GFX9-NEXT: s_lshr_b32 s6, s1, 8 1087; GFX9-NEXT: s_lshr_b32 s7, s1, 16 1088; GFX9-NEXT: s_lshr_b32 s8, s1, 24 1089; GFX9-NEXT: s_and_b32 s1, s1, 0xff 1090; GFX9-NEXT: s_lshr_b32 s9, s2, 8 1091; GFX9-NEXT: s_lshr_b32 s10, s2, 16 1092; GFX9-NEXT: s_lshr_b32 s11, s2, 24 1093; GFX9-NEXT: s_and_b32 s12, s2, 7 1094; GFX9-NEXT: s_and_b32 s1, 0xffff, s1 1095; GFX9-NEXT: s_andn2_b32 s2, 7, s2 1096; GFX9-NEXT: s_and_b32 s12, 0xffff, s12 1097; GFX9-NEXT: s_lshr_b32 s1, s1, 1 1098; GFX9-NEXT: s_and_b32 s2, 0xffff, s2 1099; GFX9-NEXT: s_lshr_b32 s3, s0, 8 1100; GFX9-NEXT: s_lshr_b32 s4, s0, 16 1101; GFX9-NEXT: s_lshr_b32 s5, s0, 24 1102; GFX9-NEXT: s_lshl_b32 s0, s0, s12 1103; GFX9-NEXT: s_lshr_b32 s1, s1, s2 1104; GFX9-NEXT: s_or_b32 s0, s0, s1 1105; GFX9-NEXT: s_and_b32 s1, s9, 7 1106; GFX9-NEXT: s_and_b32 s1, 0xffff, s1 1107; GFX9-NEXT: s_and_b32 s2, s6, 0xff 1108; GFX9-NEXT: s_lshl_b32 s1, s3, s1 1109; GFX9-NEXT: s_and_b32 s2, 0xffff, s2 1110; GFX9-NEXT: s_andn2_b32 s3, 7, s9 1111; GFX9-NEXT: s_lshr_b32 s2, s2, 1 1112; GFX9-NEXT: s_and_b32 s3, 0xffff, s3 1113; GFX9-NEXT: s_lshr_b32 s2, s2, s3 1114; GFX9-NEXT: s_or_b32 s1, s1, s2 1115; GFX9-NEXT: s_and_b32 s2, s10, 7 1116; GFX9-NEXT: s_and_b32 s2, 0xffff, s2 1117; GFX9-NEXT: s_and_b32 s3, s7, 0xff 1118; GFX9-NEXT: s_lshl_b32 s2, s4, s2 1119; GFX9-NEXT: s_and_b32 s3, 0xffff, s3 1120; GFX9-NEXT: s_andn2_b32 s4, 7, s10 1121; GFX9-NEXT: s_lshr_b32 s3, s3, 1 1122; GFX9-NEXT: s_and_b32 s4, 0xffff, s4 1123; GFX9-NEXT: s_lshr_b32 s3, s3, s4 1124; GFX9-NEXT: s_or_b32 s2, s2, s3 1125; GFX9-NEXT: s_and_b32 s3, s11, 7 1126; GFX9-NEXT: s_and_b32 s3, 0xffff, s3 1127; GFX9-NEXT: s_lshl_b32 s3, s5, s3 1128; GFX9-NEXT: s_andn2_b32 s5, 7, s11 1129; GFX9-NEXT: s_and_b32 s1, s1, 0xff 1130; GFX9-NEXT: s_lshr_b32 s4, s8, 1 1131; GFX9-NEXT: s_and_b32 s5, 0xffff, s5 1132; GFX9-NEXT: s_and_b32 s0, s0, 0xff 1133; GFX9-NEXT: s_lshl_b32 s1, s1, 8 1134; GFX9-NEXT: s_lshr_b32 s4, s4, s5 1135; GFX9-NEXT: s_or_b32 s0, s0, s1 1136; GFX9-NEXT: s_and_b32 s1, s2, 0xff 1137; GFX9-NEXT: s_or_b32 s3, s3, s4 1138; GFX9-NEXT: s_lshl_b32 s1, s1, 16 1139; GFX9-NEXT: s_or_b32 s0, s0, s1 1140; GFX9-NEXT: s_and_b32 s1, s3, 0xff 1141; GFX9-NEXT: s_lshl_b32 s1, s1, 24 1142; GFX9-NEXT: s_or_b32 s0, s0, s1 1143; GFX9-NEXT: ; return to shader part epilog 1144; 1145; GFX10-LABEL: s_fshl_v4i8: 1146; GFX10: ; %bb.0: 1147; GFX10-NEXT: s_lshr_b32 s6, s1, 8 1148; GFX10-NEXT: s_lshr_b32 s7, s1, 16 1149; GFX10-NEXT: s_lshr_b32 s8, s1, 24 1150; GFX10-NEXT: s_and_b32 s1, s1, 0xff 1151; GFX10-NEXT: s_and_b32 s11, s2, 7 1152; GFX10-NEXT: s_and_b32 s1, 0xffff, s1 1153; GFX10-NEXT: s_andn2_b32 s12, 7, s2 1154; GFX10-NEXT: s_and_b32 s11, 0xffff, s11 1155; GFX10-NEXT: s_lshr_b32 s1, s1, 1 1156; GFX10-NEXT: s_and_b32 s12, 0xffff, s12 1157; GFX10-NEXT: s_lshr_b32 s3, s0, 8 1158; GFX10-NEXT: s_lshr_b32 s4, s0, 16 1159; GFX10-NEXT: s_lshr_b32 s5, s0, 24 1160; GFX10-NEXT: s_lshr_b32 s9, s2, 8 1161; GFX10-NEXT: s_lshl_b32 s0, s0, s11 1162; GFX10-NEXT: s_lshr_b32 s1, s1, s12 1163; GFX10-NEXT: s_and_b32 s6, s6, 0xff 1164; GFX10-NEXT: s_or_b32 s0, s0, s1 1165; GFX10-NEXT: s_and_b32 s1, s9, 7 1166; GFX10-NEXT: s_and_b32 s6, 0xffff, s6 1167; GFX10-NEXT: s_andn2_b32 s9, 7, s9 1168; GFX10-NEXT: s_lshr_b32 s10, s2, 16 1169; GFX10-NEXT: s_and_b32 s1, 0xffff, s1 1170; GFX10-NEXT: s_lshr_b32 s6, s6, 1 1171; GFX10-NEXT: s_and_b32 s9, 0xffff, s9 1172; GFX10-NEXT: s_lshl_b32 s1, s3, s1 1173; GFX10-NEXT: s_lshr_b32 s3, s6, s9 1174; GFX10-NEXT: s_and_b32 s6, s10, 7 1175; GFX10-NEXT: s_or_b32 s1, s1, s3 1176; GFX10-NEXT: s_and_b32 s3, 0xffff, s6 1177; GFX10-NEXT: s_and_b32 s6, s7, 0xff 1178; GFX10-NEXT: s_lshr_b32 s2, s2, 24 1179; GFX10-NEXT: s_lshl_b32 s3, s4, s3 1180; GFX10-NEXT: s_and_b32 s4, 0xffff, s6 1181; GFX10-NEXT: s_andn2_b32 s6, 7, s10 1182; GFX10-NEXT: s_lshr_b32 s4, s4, 1 1183; GFX10-NEXT: s_and_b32 s6, 0xffff, s6 1184; GFX10-NEXT: s_and_b32 s7, s2, 7 1185; GFX10-NEXT: s_andn2_b32 s2, 7, s2 1186; GFX10-NEXT: s_lshr_b32 s4, s4, s6 1187; GFX10-NEXT: s_and_b32 s6, 0xffff, s7 1188; GFX10-NEXT: s_lshr_b32 s7, s8, 1 1189; GFX10-NEXT: s_and_b32 s2, 0xffff, s2 1190; GFX10-NEXT: s_lshl_b32 s5, s5, s6 1191; GFX10-NEXT: s_lshr_b32 s2, s7, s2 1192; GFX10-NEXT: s_or_b32 s3, s3, s4 1193; GFX10-NEXT: s_and_b32 s1, s1, 0xff 1194; GFX10-NEXT: s_or_b32 s2, s5, s2 1195; GFX10-NEXT: s_and_b32 s0, s0, 0xff 1196; GFX10-NEXT: s_lshl_b32 s1, s1, 8 1197; GFX10-NEXT: s_and_b32 s3, s3, 0xff 1198; GFX10-NEXT: s_or_b32 s0, s0, s1 1199; GFX10-NEXT: s_lshl_b32 s1, s3, 16 1200; GFX10-NEXT: s_and_b32 s2, s2, 0xff 1201; GFX10-NEXT: s_or_b32 s0, s0, s1 1202; GFX10-NEXT: s_lshl_b32 s1, s2, 24 1203; GFX10-NEXT: s_or_b32 s0, s0, s1 1204; GFX10-NEXT: ; return to shader part epilog 1205; 1206; GFX11-LABEL: s_fshl_v4i8: 1207; GFX11: ; %bb.0: 1208; GFX11-NEXT: s_lshr_b32 s6, s1, 8 1209; GFX11-NEXT: s_lshr_b32 s7, s1, 16 1210; GFX11-NEXT: s_lshr_b32 s8, s1, 24 1211; GFX11-NEXT: s_and_b32 s1, s1, 0xff 1212; GFX11-NEXT: s_and_b32 s11, s2, 7 1213; GFX11-NEXT: s_and_b32 s1, 0xffff, s1 1214; GFX11-NEXT: s_and_not1_b32 s12, 7, s2 1215; GFX11-NEXT: s_and_b32 s11, 0xffff, s11 1216; GFX11-NEXT: s_lshr_b32 s1, s1, 1 1217; GFX11-NEXT: s_and_b32 s12, 0xffff, s12 1218; GFX11-NEXT: s_lshr_b32 s3, s0, 8 1219; GFX11-NEXT: s_lshr_b32 s4, s0, 16 1220; GFX11-NEXT: s_lshr_b32 s5, s0, 24 1221; GFX11-NEXT: s_lshr_b32 s9, s2, 8 1222; GFX11-NEXT: s_lshl_b32 s0, s0, s11 1223; GFX11-NEXT: s_lshr_b32 s1, s1, s12 1224; GFX11-NEXT: s_and_b32 s6, s6, 0xff 1225; GFX11-NEXT: s_or_b32 s0, s0, s1 1226; GFX11-NEXT: s_and_b32 s1, s9, 7 1227; GFX11-NEXT: s_and_b32 s6, 0xffff, s6 1228; GFX11-NEXT: s_and_not1_b32 s9, 7, s9 1229; GFX11-NEXT: s_lshr_b32 s10, s2, 16 1230; GFX11-NEXT: s_and_b32 s1, 0xffff, s1 1231; GFX11-NEXT: s_lshr_b32 s6, s6, 1 1232; GFX11-NEXT: s_and_b32 s9, 0xffff, s9 1233; GFX11-NEXT: s_lshl_b32 s1, s3, s1 1234; GFX11-NEXT: s_lshr_b32 s3, s6, s9 1235; GFX11-NEXT: s_and_b32 s6, s10, 7 1236; GFX11-NEXT: s_or_b32 s1, s1, s3 1237; GFX11-NEXT: s_and_b32 s3, 0xffff, s6 1238; GFX11-NEXT: s_and_b32 s6, s7, 0xff 1239; GFX11-NEXT: s_lshr_b32 s2, s2, 24 1240; GFX11-NEXT: s_lshl_b32 s3, s4, s3 1241; GFX11-NEXT: s_and_b32 s4, 0xffff, s6 1242; GFX11-NEXT: s_and_not1_b32 s6, 7, s10 1243; GFX11-NEXT: s_lshr_b32 s4, s4, 1 1244; GFX11-NEXT: s_and_b32 s6, 0xffff, s6 1245; GFX11-NEXT: s_and_b32 s7, s2, 7 1246; GFX11-NEXT: s_and_not1_b32 s2, 7, s2 1247; GFX11-NEXT: s_lshr_b32 s4, s4, s6 1248; GFX11-NEXT: s_and_b32 s6, 0xffff, s7 1249; GFX11-NEXT: s_lshr_b32 s7, s8, 1 1250; GFX11-NEXT: s_and_b32 s2, 0xffff, s2 1251; GFX11-NEXT: s_lshl_b32 s5, s5, s6 1252; GFX11-NEXT: s_lshr_b32 s2, s7, s2 1253; GFX11-NEXT: s_or_b32 s3, s3, s4 1254; GFX11-NEXT: s_and_b32 s1, s1, 0xff 1255; GFX11-NEXT: s_or_b32 s2, s5, s2 1256; GFX11-NEXT: s_and_b32 s0, s0, 0xff 1257; GFX11-NEXT: s_lshl_b32 s1, s1, 8 1258; GFX11-NEXT: s_and_b32 s3, s3, 0xff 1259; GFX11-NEXT: s_or_b32 s0, s0, s1 1260; GFX11-NEXT: s_lshl_b32 s1, s3, 16 1261; GFX11-NEXT: s_and_b32 s2, s2, 0xff 1262; GFX11-NEXT: s_or_b32 s0, s0, s1 1263; GFX11-NEXT: s_lshl_b32 s1, s2, 24 1264; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1265; GFX11-NEXT: s_or_b32 s0, s0, s1 1266; GFX11-NEXT: ; return to shader part epilog 1267 %lhs = bitcast i32 %lhs.arg to <4 x i8> 1268 %rhs = bitcast i32 %rhs.arg to <4 x i8> 1269 %amt = bitcast i32 %amt.arg to <4 x i8> 1270 %result = call <4 x i8> @llvm.fshl.v4i8(<4 x i8> %lhs, <4 x i8> %rhs, <4 x i8> %amt) 1271 %cast.result = bitcast <4 x i8> %result to i32 1272 ret i32 %cast.result 1273} 1274 1275define i32 @v_fshl_v4i8(i32 %lhs.arg, i32 %rhs.arg, i32 %amt.arg) { 1276; GFX6-LABEL: v_fshl_v4i8: 1277; GFX6: ; %bb.0: 1278; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1279; GFX6-NEXT: v_lshrrev_b32_e32 v6, 8, v2 1280; GFX6-NEXT: v_lshrrev_b32_e32 v7, 16, v2 1281; GFX6-NEXT: v_lshrrev_b32_e32 v8, 24, v2 1282; GFX6-NEXT: v_and_b32_e32 v9, 7, v2 1283; GFX6-NEXT: v_not_b32_e32 v2, v2 1284; GFX6-NEXT: v_lshrrev_b32_e32 v3, 8, v0 1285; GFX6-NEXT: v_lshrrev_b32_e32 v4, 16, v0 1286; GFX6-NEXT: v_lshrrev_b32_e32 v5, 24, v0 1287; GFX6-NEXT: v_and_b32_e32 v2, 7, v2 1288; GFX6-NEXT: v_lshlrev_b32_e32 v0, v9, v0 1289; GFX6-NEXT: v_bfe_u32 v9, v1, 1, 7 1290; GFX6-NEXT: v_lshrrev_b32_e32 v2, v2, v9 1291; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 1292; GFX6-NEXT: v_and_b32_e32 v2, 7, v6 1293; GFX6-NEXT: v_not_b32_e32 v6, v6 1294; GFX6-NEXT: v_lshlrev_b32_e32 v2, v2, v3 1295; GFX6-NEXT: v_bfe_u32 v3, v1, 8, 8 1296; GFX6-NEXT: v_and_b32_e32 v6, 7, v6 1297; GFX6-NEXT: v_lshrrev_b32_e32 v3, 1, v3 1298; GFX6-NEXT: v_lshrrev_b32_e32 v3, v6, v3 1299; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 1300; GFX6-NEXT: v_and_b32_e32 v3, 7, v7 1301; GFX6-NEXT: v_not_b32_e32 v6, v7 1302; GFX6-NEXT: v_lshlrev_b32_e32 v3, v3, v4 1303; GFX6-NEXT: v_bfe_u32 v4, v1, 16, 8 1304; GFX6-NEXT: v_and_b32_e32 v6, 7, v6 1305; GFX6-NEXT: v_lshrrev_b32_e32 v4, 1, v4 1306; GFX6-NEXT: v_lshrrev_b32_e32 v4, v6, v4 1307; GFX6-NEXT: v_not_b32_e32 v6, v8 1308; GFX6-NEXT: v_or_b32_e32 v3, v3, v4 1309; GFX6-NEXT: v_and_b32_e32 v4, 7, v8 1310; GFX6-NEXT: v_and_b32_e32 v6, 7, v6 1311; GFX6-NEXT: v_lshrrev_b32_e32 v1, 25, v1 1312; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v2 1313; GFX6-NEXT: v_lshlrev_b32_e32 v4, v4, v5 1314; GFX6-NEXT: v_lshrrev_b32_e32 v1, v6, v1 1315; GFX6-NEXT: v_and_b32_e32 v0, 0xff, v0 1316; GFX6-NEXT: v_lshlrev_b32_e32 v2, 8, v2 1317; GFX6-NEXT: v_or_b32_e32 v1, v4, v1 1318; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 1319; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v3 1320; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2 1321; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v1 1322; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 1323; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v1 1324; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 1325; GFX6-NEXT: s_setpc_b64 s[30:31] 1326; 1327; GFX8-LABEL: v_fshl_v4i8: 1328; GFX8: ; %bb.0: 1329; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1330; GFX8-NEXT: v_mov_b32_e32 v8, 1 1331; GFX8-NEXT: v_xor_b32_e32 v10, -1, v2 1332; GFX8-NEXT: v_and_b32_e32 v6, 7, v2 1333; GFX8-NEXT: v_lshrrev_b16_sdwa v9, v8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1334; GFX8-NEXT: v_and_b32_e32 v10, 7, v10 1335; GFX8-NEXT: v_lshrrev_b32_e32 v5, 8, v2 1336; GFX8-NEXT: v_lshlrev_b16_e32 v6, v6, v0 1337; GFX8-NEXT: v_lshrrev_b16_e32 v9, v10, v9 1338; GFX8-NEXT: v_lshrrev_b32_e32 v4, 8, v1 1339; GFX8-NEXT: v_or_b32_e32 v6, v6, v9 1340; GFX8-NEXT: v_and_b32_e32 v9, 7, v5 1341; GFX8-NEXT: v_xor_b32_e32 v5, -1, v5 1342; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 1343; GFX8-NEXT: v_lshrrev_b16_sdwa v4, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1344; GFX8-NEXT: v_and_b32_e32 v5, 7, v5 1345; GFX8-NEXT: v_lshlrev_b16_e32 v3, v9, v3 1346; GFX8-NEXT: v_lshrrev_b16_e32 v4, v5, v4 1347; GFX8-NEXT: v_mov_b32_e32 v7, 0xff 1348; GFX8-NEXT: v_or_b32_e32 v3, v3, v4 1349; GFX8-NEXT: v_mov_b32_e32 v4, 7 1350; GFX8-NEXT: v_mov_b32_e32 v9, -1 1351; GFX8-NEXT: v_and_b32_sdwa v5, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1352; GFX8-NEXT: v_and_b32_sdwa v7, v1, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1353; GFX8-NEXT: v_xor_b32_sdwa v10, v2, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1354; GFX8-NEXT: v_and_b32_sdwa v4, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD 1355; GFX8-NEXT: v_xor_b32_sdwa v2, v2, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD 1356; GFX8-NEXT: v_lshrrev_b16_e32 v7, 1, v7 1357; GFX8-NEXT: v_and_b32_e32 v10, 7, v10 1358; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 1359; GFX8-NEXT: v_and_b32_e32 v2, 7, v2 1360; GFX8-NEXT: v_lshlrev_b16_sdwa v5, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 1361; GFX8-NEXT: v_lshrrev_b16_e32 v7, v10, v7 1362; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 1363; GFX8-NEXT: v_lshrrev_b16_e32 v1, v2, v1 1364; GFX8-NEXT: v_or_b32_e32 v5, v5, v7 1365; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 1366; GFX8-NEXT: v_mov_b32_e32 v1, 8 1367; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1368; GFX8-NEXT: v_and_b32_e32 v2, 0xff, v5 1369; GFX8-NEXT: v_or_b32_sdwa v1, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1370; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2 1371; GFX8-NEXT: v_and_b32_e32 v0, 0xff, v0 1372; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 1373; GFX8-NEXT: v_lshlrev_b32_e32 v0, 24, v0 1374; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 1375; GFX8-NEXT: s_setpc_b64 s[30:31] 1376; 1377; GFX9-LABEL: v_fshl_v4i8: 1378; GFX9: ; %bb.0: 1379; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1380; GFX9-NEXT: v_mov_b32_e32 v8, 1 1381; GFX9-NEXT: v_xor_b32_e32 v10, -1, v2 1382; GFX9-NEXT: v_and_b32_e32 v6, 7, v2 1383; GFX9-NEXT: v_lshrrev_b16_sdwa v9, v8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1384; GFX9-NEXT: v_and_b32_e32 v10, 7, v10 1385; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v2 1386; GFX9-NEXT: v_lshlrev_b16_e32 v6, v6, v0 1387; GFX9-NEXT: v_lshrrev_b16_e32 v9, v10, v9 1388; GFX9-NEXT: v_lshrrev_b32_e32 v4, 8, v1 1389; GFX9-NEXT: v_or_b32_e32 v6, v6, v9 1390; GFX9-NEXT: v_and_b32_e32 v9, 7, v5 1391; GFX9-NEXT: v_xor_b32_e32 v5, -1, v5 1392; GFX9-NEXT: v_lshrrev_b32_e32 v3, 8, v0 1393; GFX9-NEXT: v_lshrrev_b16_sdwa v4, v8, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1394; GFX9-NEXT: v_and_b32_e32 v5, 7, v5 1395; GFX9-NEXT: v_lshlrev_b16_e32 v3, v9, v3 1396; GFX9-NEXT: v_lshrrev_b16_e32 v4, v5, v4 1397; GFX9-NEXT: v_mov_b32_e32 v7, 0xff 1398; GFX9-NEXT: v_or_b32_e32 v3, v3, v4 1399; GFX9-NEXT: v_mov_b32_e32 v4, 7 1400; GFX9-NEXT: v_mov_b32_e32 v10, -1 1401; GFX9-NEXT: v_and_b32_sdwa v5, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1402; GFX9-NEXT: v_and_b32_sdwa v9, v1, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1403; GFX9-NEXT: v_xor_b32_sdwa v11, v2, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1404; GFX9-NEXT: v_and_b32_sdwa v4, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD 1405; GFX9-NEXT: v_xor_b32_sdwa v2, v2, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD 1406; GFX9-NEXT: v_lshrrev_b16_e32 v9, 1, v9 1407; GFX9-NEXT: v_and_b32_e32 v11, 7, v11 1408; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 1409; GFX9-NEXT: v_and_b32_e32 v2, 7, v2 1410; GFX9-NEXT: v_lshlrev_b16_sdwa v5, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 1411; GFX9-NEXT: v_lshrrev_b16_e32 v9, v11, v9 1412; GFX9-NEXT: v_lshlrev_b16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 1413; GFX9-NEXT: v_lshrrev_b16_e32 v1, v2, v1 1414; GFX9-NEXT: v_or_b32_e32 v5, v5, v9 1415; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 1416; GFX9-NEXT: v_mov_b32_e32 v1, 8 1417; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1418; GFX9-NEXT: v_and_b32_e32 v2, 0xff, v5 1419; GFX9-NEXT: v_and_b32_e32 v0, 0xff, v0 1420; GFX9-NEXT: v_and_or_b32 v1, v6, v7, v1 1421; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 1422; GFX9-NEXT: v_lshlrev_b32_e32 v0, 24, v0 1423; GFX9-NEXT: v_or3_b32 v0, v1, v2, v0 1424; GFX9-NEXT: s_setpc_b64 s[30:31] 1425; 1426; GFX10-LABEL: v_fshl_v4i8: 1427; GFX10: ; %bb.0: 1428; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1429; GFX10-NEXT: v_lshrrev_b32_e32 v6, 8, v2 1430; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v0 1431; GFX10-NEXT: v_and_b32_e32 v8, 7, v2 1432; GFX10-NEXT: v_and_b32_e32 v9, 0xff, v1 1433; GFX10-NEXT: v_xor_b32_e32 v10, -1, v2 1434; GFX10-NEXT: v_and_b32_e32 v11, 7, v6 1435; GFX10-NEXT: v_lshrrev_b32_e32 v4, 16, v0 1436; GFX10-NEXT: v_lshrrev_b32_e32 v5, 24, v0 1437; GFX10-NEXT: v_lshrrev_b32_e32 v7, 8, v1 1438; GFX10-NEXT: v_lshlrev_b16 v0, v8, v0 1439; GFX10-NEXT: v_lshrrev_b16 v8, 1, v9 1440; GFX10-NEXT: v_and_b32_e32 v9, 7, v10 1441; GFX10-NEXT: v_lshlrev_b16 v3, v11, v3 1442; GFX10-NEXT: v_mov_b32_e32 v10, 0xff 1443; GFX10-NEXT: v_mov_b32_e32 v11, -1 1444; GFX10-NEXT: v_lshrrev_b32_e32 v12, 24, v1 1445; GFX10-NEXT: v_and_b32_e32 v7, 0xff, v7 1446; GFX10-NEXT: v_xor_b32_e32 v6, -1, v6 1447; GFX10-NEXT: v_mov_b32_e32 v13, 7 1448; GFX10-NEXT: v_and_b32_sdwa v1, v1, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1449; GFX10-NEXT: v_xor_b32_sdwa v10, v2, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1450; GFX10-NEXT: v_xor_b32_sdwa v11, v2, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD 1451; GFX10-NEXT: v_lshrrev_b16 v7, 1, v7 1452; GFX10-NEXT: v_and_b32_e32 v6, 7, v6 1453; GFX10-NEXT: v_and_b32_sdwa v14, v2, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1454; GFX10-NEXT: v_lshrrev_b16 v1, 1, v1 1455; GFX10-NEXT: v_and_b32_e32 v10, 7, v10 1456; GFX10-NEXT: v_and_b32_sdwa v2, v2, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD 1457; GFX10-NEXT: v_lshrrev_b16 v12, 1, v12 1458; GFX10-NEXT: v_and_b32_e32 v11, 7, v11 1459; GFX10-NEXT: v_lshrrev_b16 v6, v6, v7 1460; GFX10-NEXT: v_lshlrev_b16 v4, v14, v4 1461; GFX10-NEXT: v_lshrrev_b16 v1, v10, v1 1462; GFX10-NEXT: v_lshlrev_b16 v2, v2, v5 1463; GFX10-NEXT: v_lshrrev_b16 v5, v11, v12 1464; GFX10-NEXT: v_lshrrev_b16 v7, v9, v8 1465; GFX10-NEXT: v_or_b32_e32 v3, v3, v6 1466; GFX10-NEXT: v_mov_b32_e32 v6, 8 1467; GFX10-NEXT: v_or_b32_e32 v1, v4, v1 1468; GFX10-NEXT: v_or_b32_e32 v2, v2, v5 1469; GFX10-NEXT: v_or_b32_e32 v0, v0, v7 1470; GFX10-NEXT: v_lshlrev_b32_sdwa v3, v6, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1471; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1 1472; GFX10-NEXT: v_and_b32_e32 v2, 0xff, v2 1473; GFX10-NEXT: v_and_or_b32 v0, 0xff, v0, v3 1474; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1 1475; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v2 1476; GFX10-NEXT: v_or3_b32 v0, v0, v1, v2 1477; GFX10-NEXT: s_setpc_b64 s[30:31] 1478; 1479; GFX11-LABEL: v_fshl_v4i8: 1480; GFX11: ; %bb.0: 1481; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1482; GFX11-NEXT: v_lshrrev_b32_e32 v6, 8, v1 1483; GFX11-NEXT: v_lshrrev_b32_e32 v9, 8, v2 1484; GFX11-NEXT: v_lshrrev_b32_e32 v3, 8, v0 1485; GFX11-NEXT: v_lshrrev_b32_e32 v7, 16, v1 1486; GFX11-NEXT: v_lshrrev_b32_e32 v10, 16, v2 1487; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v6 1488; GFX11-NEXT: v_xor_b32_e32 v13, -1, v9 1489; GFX11-NEXT: v_lshrrev_b32_e32 v11, 24, v2 1490; GFX11-NEXT: v_and_b32_e32 v9, 7, v9 1491; GFX11-NEXT: v_lshrrev_b32_e32 v8, 24, v1 1492; GFX11-NEXT: v_lshrrev_b16 v6, 1, v6 1493; GFX11-NEXT: v_and_b32_e32 v13, 7, v13 1494; GFX11-NEXT: v_and_b32_e32 v7, 0xff, v7 1495; GFX11-NEXT: v_lshlrev_b16 v3, v9, v3 1496; GFX11-NEXT: v_xor_b32_e32 v9, -1, v10 1497; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v0 1498; GFX11-NEXT: v_lshrrev_b16 v6, v13, v6 1499; GFX11-NEXT: v_xor_b32_e32 v13, -1, v11 1500; GFX11-NEXT: v_lshrrev_b32_e32 v5, 24, v0 1501; GFX11-NEXT: v_and_b32_e32 v12, 7, v2 1502; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 1503; GFX11-NEXT: v_xor_b32_e32 v2, -1, v2 1504; GFX11-NEXT: v_and_b32_e32 v10, 7, v10 1505; GFX11-NEXT: v_lshrrev_b16 v7, 1, v7 1506; GFX11-NEXT: v_and_b32_e32 v9, 7, v9 1507; GFX11-NEXT: v_and_b32_e32 v11, 7, v11 1508; GFX11-NEXT: v_lshrrev_b16 v8, 1, v8 1509; GFX11-NEXT: v_and_b32_e32 v13, 7, v13 1510; GFX11-NEXT: v_lshrrev_b16 v1, 1, v1 1511; GFX11-NEXT: v_and_b32_e32 v2, 7, v2 1512; GFX11-NEXT: v_or_b32_e32 v3, v3, v6 1513; GFX11-NEXT: v_lshlrev_b16 v4, v10, v4 1514; GFX11-NEXT: v_lshrrev_b16 v6, v9, v7 1515; GFX11-NEXT: v_lshlrev_b16 v5, v11, v5 1516; GFX11-NEXT: v_lshrrev_b16 v7, v13, v8 1517; GFX11-NEXT: v_lshlrev_b16 v0, v12, v0 1518; GFX11-NEXT: v_lshrrev_b16 v1, v2, v1 1519; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v3 1520; GFX11-NEXT: v_or_b32_e32 v3, v4, v6 1521; GFX11-NEXT: v_or_b32_e32 v4, v5, v7 1522; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 1523; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 1524; GFX11-NEXT: v_lshlrev_b32_e32 v1, 8, v2 1525; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 1526; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v3 1527; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v4 1528; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 1529; GFX11-NEXT: v_and_or_b32 v0, 0xff, v0, v1 1530; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v2 1531; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) 1532; GFX11-NEXT: v_lshlrev_b32_e32 v2, 24, v3 1533; GFX11-NEXT: v_or3_b32 v0, v0, v1, v2 1534; GFX11-NEXT: s_setpc_b64 s[30:31] 1535 %lhs = bitcast i32 %lhs.arg to <4 x i8> 1536 %rhs = bitcast i32 %rhs.arg to <4 x i8> 1537 %amt = bitcast i32 %amt.arg to <4 x i8> 1538 %result = call <4 x i8> @llvm.fshl.v4i8(<4 x i8> %lhs, <4 x i8> %rhs, <4 x i8> %amt) 1539 %cast.result = bitcast <4 x i8> %result to i32 1540 ret i32 %cast.result 1541} 1542 1543define amdgpu_ps i24 @s_fshl_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt) { 1544; GFX6-LABEL: s_fshl_i24: 1545; GFX6: ; %bb.0: 1546; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 1547; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0 1548; GFX6-NEXT: v_not_b32_e32 v1, 23 1549; GFX6-NEXT: s_and_b32 s2, s2, 0xffffff 1550; GFX6-NEXT: s_bfe_u32 s1, s1, 0x170001 1551; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 1552; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 1553; GFX6-NEXT: v_mul_lo_u32 v2, v0, v1 1554; GFX6-NEXT: v_mul_hi_u32 v2, v0, v2 1555; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 1556; GFX6-NEXT: v_mul_hi_u32 v0, s2, v0 1557; GFX6-NEXT: v_mul_lo_u32 v0, v0, 24 1558; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 1559; GFX6-NEXT: v_add_i32_e32 v2, vcc, v0, v1 1560; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 1561; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1562; GFX6-NEXT: v_add_i32_e32 v1, vcc, v0, v1 1563; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 1564; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1565; GFX6-NEXT: v_sub_i32_e32 v1, vcc, 23, v0 1566; GFX6-NEXT: v_and_b32_e32 v0, 0xffffff, v0 1567; GFX6-NEXT: v_and_b32_e32 v1, 0xffffff, v1 1568; GFX6-NEXT: v_lshl_b32_e32 v0, s0, v0 1569; GFX6-NEXT: v_lshr_b32_e32 v1, s1, v1 1570; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 1571; GFX6-NEXT: v_readfirstlane_b32 s0, v0 1572; GFX6-NEXT: ; return to shader part epilog 1573; 1574; GFX8-LABEL: s_fshl_i24: 1575; GFX8: ; %bb.0: 1576; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 1577; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 1578; GFX8-NEXT: v_not_b32_e32 v1, 23 1579; GFX8-NEXT: s_and_b32 s2, s2, 0xffffff 1580; GFX8-NEXT: s_bfe_u32 s1, s1, 0x170001 1581; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 1582; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 1583; GFX8-NEXT: v_mul_lo_u32 v2, v0, v1 1584; GFX8-NEXT: v_mul_hi_u32 v2, v0, v2 1585; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 1586; GFX8-NEXT: v_mul_hi_u32 v0, s2, v0 1587; GFX8-NEXT: v_mul_lo_u32 v0, v0, 24 1588; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s2, v0 1589; GFX8-NEXT: v_add_u32_e32 v2, vcc, v0, v1 1590; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 1591; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1592; GFX8-NEXT: v_add_u32_e32 v1, vcc, v0, v1 1593; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 1594; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1595; GFX8-NEXT: v_sub_u32_e32 v1, vcc, 23, v0 1596; GFX8-NEXT: v_and_b32_e32 v0, 0xffffff, v0 1597; GFX8-NEXT: v_and_b32_e32 v1, 0xffffff, v1 1598; GFX8-NEXT: v_lshlrev_b32_e64 v0, v0, s0 1599; GFX8-NEXT: v_lshrrev_b32_e64 v1, v1, s1 1600; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 1601; GFX8-NEXT: v_readfirstlane_b32 s0, v0 1602; GFX8-NEXT: ; return to shader part epilog 1603; 1604; GFX9-LABEL: s_fshl_i24: 1605; GFX9: ; %bb.0: 1606; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 1607; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 1608; GFX9-NEXT: v_not_b32_e32 v1, 23 1609; GFX9-NEXT: s_and_b32 s2, s2, 0xffffff 1610; GFX9-NEXT: s_bfe_u32 s1, s1, 0x170001 1611; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 1612; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 1613; GFX9-NEXT: v_mul_lo_u32 v1, v0, v1 1614; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 1615; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 1616; GFX9-NEXT: v_mul_hi_u32 v0, s2, v0 1617; GFX9-NEXT: v_mul_lo_u32 v0, v0, 24 1618; GFX9-NEXT: v_sub_u32_e32 v0, s2, v0 1619; GFX9-NEXT: v_add_u32_e32 v1, 0xffffffe8, v0 1620; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 1621; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1622; GFX9-NEXT: v_add_u32_e32 v1, 0xffffffe8, v0 1623; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 1624; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1625; GFX9-NEXT: v_sub_u32_e32 v1, 23, v0 1626; GFX9-NEXT: v_and_b32_e32 v1, 0xffffff, v1 1627; GFX9-NEXT: v_and_b32_e32 v0, 0xffffff, v0 1628; GFX9-NEXT: v_lshrrev_b32_e64 v1, v1, s1 1629; GFX9-NEXT: v_lshl_or_b32 v0, s0, v0, v1 1630; GFX9-NEXT: v_readfirstlane_b32 s0, v0 1631; GFX9-NEXT: ; return to shader part epilog 1632; 1633; GFX10-LABEL: s_fshl_i24: 1634; GFX10: ; %bb.0: 1635; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 1636; GFX10-NEXT: s_and_b32 s2, s2, 0xffffff 1637; GFX10-NEXT: s_bfe_u32 s1, s1, 0x170001 1638; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 1639; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 1640; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 1641; GFX10-NEXT: v_mul_lo_u32 v1, 0xffffffe8, v0 1642; GFX10-NEXT: v_mul_hi_u32 v1, v0, v1 1643; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1 1644; GFX10-NEXT: v_mul_hi_u32 v0, s2, v0 1645; GFX10-NEXT: v_mul_lo_u32 v0, v0, 24 1646; GFX10-NEXT: v_sub_nc_u32_e32 v0, s2, v0 1647; GFX10-NEXT: v_add_nc_u32_e32 v1, 0xffffffe8, v0 1648; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 1649; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 1650; GFX10-NEXT: v_add_nc_u32_e32 v1, 0xffffffe8, v0 1651; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 1652; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 1653; GFX10-NEXT: v_sub_nc_u32_e32 v1, 23, v0 1654; GFX10-NEXT: v_and_b32_e32 v0, 0xffffff, v0 1655; GFX10-NEXT: v_and_b32_e32 v1, 0xffffff, v1 1656; GFX10-NEXT: v_lshrrev_b32_e64 v1, v1, s1 1657; GFX10-NEXT: v_lshl_or_b32 v0, s0, v0, v1 1658; GFX10-NEXT: v_readfirstlane_b32 s0, v0 1659; GFX10-NEXT: ; return to shader part epilog 1660; 1661; GFX11-LABEL: s_fshl_i24: 1662; GFX11: ; %bb.0: 1663; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 1664; GFX11-NEXT: s_and_b32 s2, s2, 0xffffff 1665; GFX11-NEXT: s_bfe_u32 s1, s1, 0x170001 1666; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) 1667; GFX11-NEXT: v_rcp_iflag_f32_e32 v0, v0 1668; GFX11-NEXT: s_waitcnt_depctr 0xfff 1669; GFX11-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 1670; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0 1671; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1672; GFX11-NEXT: v_mul_lo_u32 v1, 0xffffffe8, v0 1673; GFX11-NEXT: v_mul_hi_u32 v1, v0, v1 1674; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1675; GFX11-NEXT: v_add_nc_u32_e32 v0, v0, v1 1676; GFX11-NEXT: v_mul_hi_u32 v0, s2, v0 1677; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1678; GFX11-NEXT: v_mul_lo_u32 v0, v0, 24 1679; GFX11-NEXT: v_sub_nc_u32_e32 v0, s2, v0 1680; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 1681; GFX11-NEXT: v_add_nc_u32_e32 v1, 0xffffffe8, v0 1682; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 1683; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 1684; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 1685; GFX11-NEXT: v_add_nc_u32_e32 v1, 0xffffffe8, v0 1686; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 1687; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 1688; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 1689; GFX11-NEXT: v_sub_nc_u32_e32 v1, 23, v0 1690; GFX11-NEXT: v_and_b32_e32 v0, 0xffffff, v0 1691; GFX11-NEXT: v_and_b32_e32 v1, 0xffffff, v1 1692; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1693; GFX11-NEXT: v_lshrrev_b32_e64 v1, v1, s1 1694; GFX11-NEXT: v_lshl_or_b32 v0, s0, v0, v1 1695; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1696; GFX11-NEXT: v_readfirstlane_b32 s0, v0 1697; GFX11-NEXT: ; return to shader part epilog 1698 %result = call i24 @llvm.fshl.i24(i24 %lhs, i24 %rhs, i24 %amt) 1699 ret i24 %result 1700} 1701 1702define i24 @v_fshl_i24(i24 %lhs, i24 %rhs, i24 %amt) { 1703; GFX6-LABEL: v_fshl_i24: 1704; GFX6: ; %bb.0: 1705; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1706; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v3, 24 1707; GFX6-NEXT: v_rcp_iflag_f32_e32 v3, v3 1708; GFX6-NEXT: v_not_b32_e32 v4, 23 1709; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2 1710; GFX6-NEXT: v_bfe_u32 v1, v1, 1, 23 1711; GFX6-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 1712; GFX6-NEXT: v_cvt_u32_f32_e32 v3, v3 1713; GFX6-NEXT: v_mul_lo_u32 v5, v3, v4 1714; GFX6-NEXT: v_mul_hi_u32 v5, v3, v5 1715; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v5 1716; GFX6-NEXT: v_mul_hi_u32 v3, v2, v3 1717; GFX6-NEXT: v_mul_lo_u32 v3, v3, 24 1718; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v3 1719; GFX6-NEXT: v_add_i32_e32 v3, vcc, v2, v4 1720; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 1721; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1722; GFX6-NEXT: v_add_i32_e32 v3, vcc, 0xffffffe8, v2 1723; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 1724; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1725; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 23, v2 1726; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2 1727; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0 1728; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v3 1729; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1 1730; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 1731; GFX6-NEXT: s_setpc_b64 s[30:31] 1732; 1733; GFX8-LABEL: v_fshl_i24: 1734; GFX8: ; %bb.0: 1735; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1736; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v3, 24 1737; GFX8-NEXT: v_rcp_iflag_f32_e32 v3, v3 1738; GFX8-NEXT: v_not_b32_e32 v4, 23 1739; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2 1740; GFX8-NEXT: v_bfe_u32 v1, v1, 1, 23 1741; GFX8-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 1742; GFX8-NEXT: v_cvt_u32_f32_e32 v3, v3 1743; GFX8-NEXT: v_mul_lo_u32 v5, v3, v4 1744; GFX8-NEXT: v_mul_hi_u32 v5, v3, v5 1745; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v5 1746; GFX8-NEXT: v_mul_hi_u32 v3, v2, v3 1747; GFX8-NEXT: v_mul_lo_u32 v3, v3, 24 1748; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v2, v3 1749; GFX8-NEXT: v_add_u32_e32 v3, vcc, v2, v4 1750; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 1751; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1752; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0xffffffe8, v2 1753; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 1754; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1755; GFX8-NEXT: v_sub_u32_e32 v3, vcc, 23, v2 1756; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2 1757; GFX8-NEXT: v_lshlrev_b32_e32 v0, v2, v0 1758; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v3 1759; GFX8-NEXT: v_lshrrev_b32_e32 v1, v2, v1 1760; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 1761; GFX8-NEXT: s_setpc_b64 s[30:31] 1762; 1763; GFX9-LABEL: v_fshl_i24: 1764; GFX9: ; %bb.0: 1765; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1766; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v3, 24 1767; GFX9-NEXT: v_rcp_iflag_f32_e32 v3, v3 1768; GFX9-NEXT: v_not_b32_e32 v4, 23 1769; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2 1770; GFX9-NEXT: v_bfe_u32 v1, v1, 1, 23 1771; GFX9-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 1772; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v3 1773; GFX9-NEXT: v_mul_lo_u32 v4, v3, v4 1774; GFX9-NEXT: v_mul_hi_u32 v4, v3, v4 1775; GFX9-NEXT: v_add_u32_e32 v3, v3, v4 1776; GFX9-NEXT: v_mul_hi_u32 v3, v2, v3 1777; GFX9-NEXT: v_mul_lo_u32 v3, v3, 24 1778; GFX9-NEXT: v_sub_u32_e32 v2, v2, v3 1779; GFX9-NEXT: v_add_u32_e32 v3, 0xffffffe8, v2 1780; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 1781; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1782; GFX9-NEXT: v_add_u32_e32 v3, 0xffffffe8, v2 1783; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 1784; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1785; GFX9-NEXT: v_sub_u32_e32 v3, 23, v2 1786; GFX9-NEXT: v_and_b32_e32 v3, 0xffffff, v3 1787; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2 1788; GFX9-NEXT: v_lshrrev_b32_e32 v1, v3, v1 1789; GFX9-NEXT: v_lshl_or_b32 v0, v0, v2, v1 1790; GFX9-NEXT: s_setpc_b64 s[30:31] 1791; 1792; GFX10-LABEL: v_fshl_i24: 1793; GFX10: ; %bb.0: 1794; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1795; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v3, 24 1796; GFX10-NEXT: v_and_b32_e32 v2, 0xffffff, v2 1797; GFX10-NEXT: v_bfe_u32 v1, v1, 1, 23 1798; GFX10-NEXT: v_rcp_iflag_f32_e32 v3, v3 1799; GFX10-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 1800; GFX10-NEXT: v_cvt_u32_f32_e32 v3, v3 1801; GFX10-NEXT: v_mul_lo_u32 v4, 0xffffffe8, v3 1802; GFX10-NEXT: v_mul_hi_u32 v4, v3, v4 1803; GFX10-NEXT: v_add_nc_u32_e32 v3, v3, v4 1804; GFX10-NEXT: v_mul_hi_u32 v3, v2, v3 1805; GFX10-NEXT: v_mul_lo_u32 v3, v3, 24 1806; GFX10-NEXT: v_sub_nc_u32_e32 v2, v2, v3 1807; GFX10-NEXT: v_add_nc_u32_e32 v3, 0xffffffe8, v2 1808; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v2 1809; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo 1810; GFX10-NEXT: v_add_nc_u32_e32 v3, 0xffffffe8, v2 1811; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v2 1812; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo 1813; GFX10-NEXT: v_sub_nc_u32_e32 v3, 23, v2 1814; GFX10-NEXT: v_and_b32_e32 v2, 0xffffff, v2 1815; GFX10-NEXT: v_and_b32_e32 v3, 0xffffff, v3 1816; GFX10-NEXT: v_lshrrev_b32_e32 v1, v3, v1 1817; GFX10-NEXT: v_lshl_or_b32 v0, v0, v2, v1 1818; GFX10-NEXT: s_setpc_b64 s[30:31] 1819; 1820; GFX11-LABEL: v_fshl_i24: 1821; GFX11: ; %bb.0: 1822; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1823; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v3, 24 1824; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v2 1825; GFX11-NEXT: v_bfe_u32 v1, v1, 1, 23 1826; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_1) 1827; GFX11-NEXT: v_rcp_iflag_f32_e32 v3, v3 1828; GFX11-NEXT: s_waitcnt_depctr 0xfff 1829; GFX11-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 1830; GFX11-NEXT: v_cvt_u32_f32_e32 v3, v3 1831; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1832; GFX11-NEXT: v_mul_lo_u32 v4, 0xffffffe8, v3 1833; GFX11-NEXT: v_mul_hi_u32 v4, v3, v4 1834; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1835; GFX11-NEXT: v_add_nc_u32_e32 v3, v3, v4 1836; GFX11-NEXT: v_mul_hi_u32 v3, v2, v3 1837; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1838; GFX11-NEXT: v_mul_lo_u32 v3, v3, 24 1839; GFX11-NEXT: v_sub_nc_u32_e32 v2, v2, v3 1840; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 1841; GFX11-NEXT: v_add_nc_u32_e32 v3, 0xffffffe8, v2 1842; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v2 1843; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo 1844; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 1845; GFX11-NEXT: v_add_nc_u32_e32 v3, 0xffffffe8, v2 1846; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v2 1847; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo 1848; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 1849; GFX11-NEXT: v_sub_nc_u32_e32 v3, 23, v2 1850; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v2 1851; GFX11-NEXT: v_and_b32_e32 v3, 0xffffff, v3 1852; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1853; GFX11-NEXT: v_lshrrev_b32_e32 v1, v3, v1 1854; GFX11-NEXT: v_lshl_or_b32 v0, v0, v2, v1 1855; GFX11-NEXT: s_setpc_b64 s[30:31] 1856 %result = call i24 @llvm.fshl.i24(i24 %lhs, i24 %rhs, i24 %amt) 1857 ret i24 %result 1858} 1859 1860define amdgpu_ps i48 @s_fshl_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 inreg %amt.arg) { 1861; GFX6-LABEL: s_fshl_v2i24: 1862; GFX6: ; %bb.0: 1863; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v2, 24 1864; GFX6-NEXT: v_rcp_iflag_f32_e32 v2, v2 1865; GFX6-NEXT: s_lshr_b32 s6, s0, 16 1866; GFX6-NEXT: s_lshr_b32 s7, s1, 8 1867; GFX6-NEXT: s_bfe_u32 s9, s0, 0x80008 1868; GFX6-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 1869; GFX6-NEXT: v_cvt_u32_f32_e32 v2, v2 1870; GFX6-NEXT: s_and_b32 s8, s0, 0xff 1871; GFX6-NEXT: s_lshl_b32 s9, s9, 8 1872; GFX6-NEXT: s_and_b32 s6, s6, 0xff 1873; GFX6-NEXT: s_and_b32 s1, s1, 0xff 1874; GFX6-NEXT: v_mov_b32_e32 v0, s0 1875; GFX6-NEXT: s_and_b32 s0, s7, 0xff 1876; GFX6-NEXT: s_or_b32 s8, s8, s9 1877; GFX6-NEXT: s_and_b32 s6, 0xffff, s6 1878; GFX6-NEXT: v_alignbit_b32 v0, s1, v0, 24 1879; GFX6-NEXT: s_and_b32 s0, 0xffff, s0 1880; GFX6-NEXT: s_and_b32 s8, 0xffff, s8 1881; GFX6-NEXT: s_lshl_b32 s6, s6, 16 1882; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 1883; GFX6-NEXT: s_lshl_b32 s0, s0, 16 1884; GFX6-NEXT: v_not_b32_e32 v3, 23 1885; GFX6-NEXT: s_or_b32 s6, s8, s6 1886; GFX6-NEXT: v_or_b32_e32 v0, s0, v0 1887; GFX6-NEXT: s_lshr_b32 s0, s2, 16 1888; GFX6-NEXT: s_lshr_b32 s1, s3, 8 1889; GFX6-NEXT: s_bfe_u32 s8, s2, 0x80008 1890; GFX6-NEXT: v_mul_lo_u32 v4, v2, v3 1891; GFX6-NEXT: s_and_b32 s7, s2, 0xff 1892; GFX6-NEXT: s_lshl_b32 s8, s8, 8 1893; GFX6-NEXT: s_and_b32 s0, s0, 0xff 1894; GFX6-NEXT: s_and_b32 s3, s3, 0xff 1895; GFX6-NEXT: v_mov_b32_e32 v1, s2 1896; GFX6-NEXT: s_and_b32 s1, s1, 0xff 1897; GFX6-NEXT: s_or_b32 s7, s7, s8 1898; GFX6-NEXT: s_and_b32 s0, 0xffff, s0 1899; GFX6-NEXT: v_alignbit_b32 v1, s3, v1, 24 1900; GFX6-NEXT: s_and_b32 s1, 0xffff, s1 1901; GFX6-NEXT: s_and_b32 s7, 0xffff, s7 1902; GFX6-NEXT: s_lshl_b32 s0, s0, 16 1903; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 1904; GFX6-NEXT: s_lshl_b32 s1, s1, 16 1905; GFX6-NEXT: s_or_b32 s0, s7, s0 1906; GFX6-NEXT: v_or_b32_e32 v1, s1, v1 1907; GFX6-NEXT: s_lshr_b32 s1, s4, 16 1908; GFX6-NEXT: s_bfe_u32 s7, s4, 0x80008 1909; GFX6-NEXT: v_mul_hi_u32 v4, v2, v4 1910; GFX6-NEXT: s_and_b32 s3, s4, 0xff 1911; GFX6-NEXT: s_lshl_b32 s7, s7, 8 1912; GFX6-NEXT: s_and_b32 s1, s1, 0xff 1913; GFX6-NEXT: s_or_b32 s3, s3, s7 1914; GFX6-NEXT: s_and_b32 s1, 0xffff, s1 1915; GFX6-NEXT: s_and_b32 s3, 0xffff, s3 1916; GFX6-NEXT: s_lshl_b32 s1, s1, 16 1917; GFX6-NEXT: s_or_b32 s1, s3, s1 1918; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v4 1919; GFX6-NEXT: v_mul_hi_u32 v4, s1, v2 1920; GFX6-NEXT: s_lshr_b32 s2, s5, 8 1921; GFX6-NEXT: s_and_b32 s3, s5, 0xff 1922; GFX6-NEXT: v_mov_b32_e32 v5, s4 1923; GFX6-NEXT: s_and_b32 s2, s2, 0xff 1924; GFX6-NEXT: v_alignbit_b32 v5, s3, v5, 24 1925; GFX6-NEXT: s_and_b32 s2, 0xffff, s2 1926; GFX6-NEXT: v_and_b32_e32 v5, 0xffff, v5 1927; GFX6-NEXT: v_mul_lo_u32 v4, v4, 24 1928; GFX6-NEXT: s_lshl_b32 s2, s2, 16 1929; GFX6-NEXT: v_or_b32_e32 v5, s2, v5 1930; GFX6-NEXT: v_mul_hi_u32 v2, v5, v2 1931; GFX6-NEXT: v_sub_i32_e32 v4, vcc, s1, v4 1932; GFX6-NEXT: v_add_i32_e32 v6, vcc, v4, v3 1933; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 1934; GFX6-NEXT: v_mul_lo_u32 v2, v2, 24 1935; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc 1936; GFX6-NEXT: v_add_i32_e32 v6, vcc, v4, v3 1937; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 1938; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc 1939; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v5, v2 1940; GFX6-NEXT: v_sub_i32_e32 v6, vcc, 23, v4 1941; GFX6-NEXT: v_add_i32_e32 v5, vcc, v2, v3 1942; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 1943; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc 1944; GFX6-NEXT: v_add_i32_e32 v3, vcc, v2, v3 1945; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 1946; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v4 1947; GFX6-NEXT: s_lshr_b32 s0, s0, 1 1948; GFX6-NEXT: v_and_b32_e32 v6, 0xffffff, v6 1949; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1950; GFX6-NEXT: v_lshl_b32_e32 v4, s6, v4 1951; GFX6-NEXT: v_lshr_b32_e32 v6, s0, v6 1952; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 23, v2 1953; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2 1954; GFX6-NEXT: v_or_b32_e32 v4, v4, v6 1955; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0 1956; GFX6-NEXT: v_lshrrev_b32_e32 v1, 1, v1 1957; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v3 1958; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1 1959; GFX6-NEXT: v_bfe_u32 v2, v4, 8, 8 1960; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 1961; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v4 1962; GFX6-NEXT: v_lshlrev_b32_e32 v2, 8, v2 1963; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 1964; GFX6-NEXT: v_bfe_u32 v2, v4, 16, 8 1965; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2 1966; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 1967; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v0 1968; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v2 1969; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 1970; GFX6-NEXT: v_bfe_u32 v2, v0, 8, 8 1971; GFX6-NEXT: v_bfe_u32 v0, v0, 16, 8 1972; GFX6-NEXT: v_lshlrev_b32_e32 v0, 8, v0 1973; GFX6-NEXT: v_or_b32_e32 v0, v2, v0 1974; GFX6-NEXT: v_readfirstlane_b32 s0, v1 1975; GFX6-NEXT: v_readfirstlane_b32 s1, v0 1976; GFX6-NEXT: ; return to shader part epilog 1977; 1978; GFX8-LABEL: s_fshl_v2i24: 1979; GFX8: ; %bb.0: 1980; GFX8-NEXT: s_lshr_b32 s6, s0, 8 1981; GFX8-NEXT: s_and_b32 s6, s6, 0xff 1982; GFX8-NEXT: s_lshr_b32 s7, s0, 16 1983; GFX8-NEXT: s_lshr_b32 s8, s0, 24 1984; GFX8-NEXT: s_and_b32 s0, s0, 0xff 1985; GFX8-NEXT: s_lshl_b32 s6, s6, 8 1986; GFX8-NEXT: s_or_b32 s0, s0, s6 1987; GFX8-NEXT: s_and_b32 s6, s7, 0xff 1988; GFX8-NEXT: s_and_b32 s6, 0xffff, s6 1989; GFX8-NEXT: s_lshr_b32 s9, s1, 8 1990; GFX8-NEXT: s_and_b32 s0, 0xffff, s0 1991; GFX8-NEXT: s_lshl_b32 s6, s6, 16 1992; GFX8-NEXT: s_and_b32 s1, s1, 0xff 1993; GFX8-NEXT: s_or_b32 s0, s0, s6 1994; GFX8-NEXT: s_lshl_b32 s1, s1, 8 1995; GFX8-NEXT: s_and_b32 s6, s9, 0xff 1996; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 1997; GFX8-NEXT: s_or_b32 s1, s8, s1 1998; GFX8-NEXT: s_and_b32 s6, 0xffff, s6 1999; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 2000; GFX8-NEXT: s_and_b32 s1, 0xffff, s1 2001; GFX8-NEXT: s_lshl_b32 s6, s6, 16 2002; GFX8-NEXT: s_or_b32 s1, s1, s6 2003; GFX8-NEXT: s_lshr_b32 s6, s2, 8 2004; GFX8-NEXT: s_and_b32 s6, s6, 0xff 2005; GFX8-NEXT: s_lshr_b32 s7, s2, 16 2006; GFX8-NEXT: s_lshr_b32 s8, s2, 24 2007; GFX8-NEXT: s_and_b32 s2, s2, 0xff 2008; GFX8-NEXT: s_lshl_b32 s6, s6, 8 2009; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 2010; GFX8-NEXT: s_or_b32 s2, s2, s6 2011; GFX8-NEXT: s_and_b32 s6, s7, 0xff 2012; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 2013; GFX8-NEXT: s_and_b32 s6, 0xffff, s6 2014; GFX8-NEXT: s_lshr_b32 s9, s3, 8 2015; GFX8-NEXT: s_and_b32 s2, 0xffff, s2 2016; GFX8-NEXT: s_lshl_b32 s6, s6, 16 2017; GFX8-NEXT: s_and_b32 s3, s3, 0xff 2018; GFX8-NEXT: s_or_b32 s2, s2, s6 2019; GFX8-NEXT: s_lshl_b32 s3, s3, 8 2020; GFX8-NEXT: s_and_b32 s6, s9, 0xff 2021; GFX8-NEXT: v_not_b32_e32 v1, 23 2022; GFX8-NEXT: s_or_b32 s3, s8, s3 2023; GFX8-NEXT: s_and_b32 s6, 0xffff, s6 2024; GFX8-NEXT: v_mul_lo_u32 v2, v0, v1 2025; GFX8-NEXT: s_and_b32 s3, 0xffff, s3 2026; GFX8-NEXT: s_lshl_b32 s6, s6, 16 2027; GFX8-NEXT: s_or_b32 s3, s3, s6 2028; GFX8-NEXT: s_lshr_b32 s6, s4, 8 2029; GFX8-NEXT: s_and_b32 s6, s6, 0xff 2030; GFX8-NEXT: s_lshr_b32 s7, s4, 16 2031; GFX8-NEXT: s_lshr_b32 s8, s4, 24 2032; GFX8-NEXT: s_and_b32 s4, s4, 0xff 2033; GFX8-NEXT: s_lshl_b32 s6, s6, 8 2034; GFX8-NEXT: v_mul_hi_u32 v2, v0, v2 2035; GFX8-NEXT: s_or_b32 s4, s4, s6 2036; GFX8-NEXT: s_and_b32 s6, s7, 0xff 2037; GFX8-NEXT: s_and_b32 s6, 0xffff, s6 2038; GFX8-NEXT: s_and_b32 s4, 0xffff, s4 2039; GFX8-NEXT: s_lshl_b32 s6, s6, 16 2040; GFX8-NEXT: s_or_b32 s4, s4, s6 2041; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 2042; GFX8-NEXT: v_mul_hi_u32 v2, s4, v0 2043; GFX8-NEXT: s_lshr_b32 s9, s5, 8 2044; GFX8-NEXT: s_and_b32 s5, s5, 0xff 2045; GFX8-NEXT: s_lshl_b32 s5, s5, 8 2046; GFX8-NEXT: v_mul_lo_u32 v2, v2, 24 2047; GFX8-NEXT: s_and_b32 s6, s9, 0xff 2048; GFX8-NEXT: s_or_b32 s5, s8, s5 2049; GFX8-NEXT: s_and_b32 s6, 0xffff, s6 2050; GFX8-NEXT: s_and_b32 s5, 0xffff, s5 2051; GFX8-NEXT: s_lshl_b32 s6, s6, 16 2052; GFX8-NEXT: s_or_b32 s5, s5, s6 2053; GFX8-NEXT: v_sub_u32_e32 v2, vcc, s4, v2 2054; GFX8-NEXT: v_add_u32_e32 v3, vcc, v2, v1 2055; GFX8-NEXT: v_mul_hi_u32 v0, s5, v0 2056; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 2057; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 2058; GFX8-NEXT: v_add_u32_e32 v3, vcc, v2, v1 2059; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 2060; GFX8-NEXT: v_mul_lo_u32 v0, v0, 24 2061; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 2062; GFX8-NEXT: v_sub_u32_e32 v3, vcc, 23, v2 2063; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2 2064; GFX8-NEXT: v_lshlrev_b32_e64 v2, v2, s0 2065; GFX8-NEXT: s_lshr_b32 s0, s2, 1 2066; GFX8-NEXT: v_and_b32_e32 v3, 0xffffff, v3 2067; GFX8-NEXT: v_lshrrev_b32_e64 v3, v3, s0 2068; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s5, v0 2069; GFX8-NEXT: v_or_b32_e32 v2, v2, v3 2070; GFX8-NEXT: v_add_u32_e32 v3, vcc, v0, v1 2071; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 2072; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 2073; GFX8-NEXT: v_add_u32_e32 v1, vcc, v0, v1 2074; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 2075; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 2076; GFX8-NEXT: v_sub_u32_e32 v1, vcc, 23, v0 2077; GFX8-NEXT: v_and_b32_e32 v0, 0xffffff, v0 2078; GFX8-NEXT: s_lshr_b32 s0, s3, 1 2079; GFX8-NEXT: v_and_b32_e32 v1, 0xffffff, v1 2080; GFX8-NEXT: v_lshlrev_b32_e64 v0, v0, s1 2081; GFX8-NEXT: v_lshrrev_b32_e64 v1, v1, s0 2082; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 2083; GFX8-NEXT: v_mov_b32_e32 v1, 8 2084; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2085; GFX8-NEXT: v_mov_b32_e32 v4, 16 2086; GFX8-NEXT: v_or_b32_sdwa v3, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2087; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2088; GFX8-NEXT: v_or_b32_e32 v2, v3, v2 2089; GFX8-NEXT: v_and_b32_e32 v3, 0xff, v0 2090; GFX8-NEXT: v_lshlrev_b32_e32 v3, 24, v3 2091; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2092; GFX8-NEXT: v_or_b32_e32 v2, v2, v3 2093; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD 2094; GFX8-NEXT: v_readfirstlane_b32 s0, v2 2095; GFX8-NEXT: v_readfirstlane_b32 s1, v0 2096; GFX8-NEXT: ; return to shader part epilog 2097; 2098; GFX9-LABEL: s_fshl_v2i24: 2099; GFX9: ; %bb.0: 2100; GFX9-NEXT: s_lshr_b32 s6, s0, 8 2101; GFX9-NEXT: s_and_b32 s6, s6, 0xff 2102; GFX9-NEXT: s_lshr_b32 s7, s0, 16 2103; GFX9-NEXT: s_lshr_b32 s8, s0, 24 2104; GFX9-NEXT: s_and_b32 s0, s0, 0xff 2105; GFX9-NEXT: s_lshl_b32 s6, s6, 8 2106; GFX9-NEXT: s_or_b32 s0, s0, s6 2107; GFX9-NEXT: s_and_b32 s6, s7, 0xff 2108; GFX9-NEXT: s_and_b32 s6, 0xffff, s6 2109; GFX9-NEXT: s_lshr_b32 s9, s1, 8 2110; GFX9-NEXT: s_and_b32 s0, 0xffff, s0 2111; GFX9-NEXT: s_lshl_b32 s6, s6, 16 2112; GFX9-NEXT: s_and_b32 s1, s1, 0xff 2113; GFX9-NEXT: s_or_b32 s0, s0, s6 2114; GFX9-NEXT: s_lshl_b32 s1, s1, 8 2115; GFX9-NEXT: s_and_b32 s6, s9, 0xff 2116; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 2117; GFX9-NEXT: s_or_b32 s1, s8, s1 2118; GFX9-NEXT: s_and_b32 s6, 0xffff, s6 2119; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 2120; GFX9-NEXT: s_and_b32 s1, 0xffff, s1 2121; GFX9-NEXT: s_lshl_b32 s6, s6, 16 2122; GFX9-NEXT: s_or_b32 s1, s1, s6 2123; GFX9-NEXT: s_lshr_b32 s6, s2, 8 2124; GFX9-NEXT: s_and_b32 s6, s6, 0xff 2125; GFX9-NEXT: s_lshr_b32 s7, s2, 16 2126; GFX9-NEXT: s_lshr_b32 s8, s2, 24 2127; GFX9-NEXT: s_and_b32 s2, s2, 0xff 2128; GFX9-NEXT: s_lshl_b32 s6, s6, 8 2129; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 2130; GFX9-NEXT: s_or_b32 s2, s2, s6 2131; GFX9-NEXT: s_and_b32 s6, s7, 0xff 2132; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 2133; GFX9-NEXT: s_and_b32 s6, 0xffff, s6 2134; GFX9-NEXT: s_lshr_b32 s9, s3, 8 2135; GFX9-NEXT: s_and_b32 s2, 0xffff, s2 2136; GFX9-NEXT: s_lshl_b32 s6, s6, 16 2137; GFX9-NEXT: s_and_b32 s3, s3, 0xff 2138; GFX9-NEXT: s_or_b32 s2, s2, s6 2139; GFX9-NEXT: s_lshl_b32 s3, s3, 8 2140; GFX9-NEXT: s_and_b32 s6, s9, 0xff 2141; GFX9-NEXT: v_not_b32_e32 v1, 23 2142; GFX9-NEXT: s_or_b32 s3, s8, s3 2143; GFX9-NEXT: s_and_b32 s6, 0xffff, s6 2144; GFX9-NEXT: v_mul_lo_u32 v1, v0, v1 2145; GFX9-NEXT: s_and_b32 s3, 0xffff, s3 2146; GFX9-NEXT: s_lshl_b32 s6, s6, 16 2147; GFX9-NEXT: s_or_b32 s3, s3, s6 2148; GFX9-NEXT: s_lshr_b32 s6, s4, 8 2149; GFX9-NEXT: s_and_b32 s6, s6, 0xff 2150; GFX9-NEXT: s_lshr_b32 s7, s4, 16 2151; GFX9-NEXT: s_lshr_b32 s8, s4, 24 2152; GFX9-NEXT: s_and_b32 s4, s4, 0xff 2153; GFX9-NEXT: s_lshl_b32 s6, s6, 8 2154; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 2155; GFX9-NEXT: s_or_b32 s4, s4, s6 2156; GFX9-NEXT: s_and_b32 s6, s7, 0xff 2157; GFX9-NEXT: s_and_b32 s6, 0xffff, s6 2158; GFX9-NEXT: s_and_b32 s4, 0xffff, s4 2159; GFX9-NEXT: s_lshl_b32 s6, s6, 16 2160; GFX9-NEXT: s_or_b32 s4, s4, s6 2161; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 2162; GFX9-NEXT: v_mul_hi_u32 v1, s4, v0 2163; GFX9-NEXT: s_lshr_b32 s9, s5, 8 2164; GFX9-NEXT: s_and_b32 s5, s5, 0xff 2165; GFX9-NEXT: s_lshl_b32 s5, s5, 8 2166; GFX9-NEXT: s_and_b32 s6, s9, 0xff 2167; GFX9-NEXT: s_or_b32 s5, s8, s5 2168; GFX9-NEXT: v_mul_lo_u32 v1, v1, 24 2169; GFX9-NEXT: s_and_b32 s6, 0xffff, s6 2170; GFX9-NEXT: s_and_b32 s5, 0xffff, s5 2171; GFX9-NEXT: s_lshl_b32 s6, s6, 16 2172; GFX9-NEXT: s_or_b32 s5, s5, s6 2173; GFX9-NEXT: v_mul_hi_u32 v0, s5, v0 2174; GFX9-NEXT: v_sub_u32_e32 v1, s4, v1 2175; GFX9-NEXT: v_add_u32_e32 v2, 0xffffffe8, v1 2176; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 2177; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 2178; GFX9-NEXT: v_add_u32_e32 v2, 0xffffffe8, v1 2179; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 2180; GFX9-NEXT: v_mul_lo_u32 v0, v0, 24 2181; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 2182; GFX9-NEXT: v_sub_u32_e32 v2, 23, v1 2183; GFX9-NEXT: s_lshr_b32 s2, s2, 1 2184; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2 2185; GFX9-NEXT: v_and_b32_e32 v1, 0xffffff, v1 2186; GFX9-NEXT: v_lshrrev_b32_e64 v2, v2, s2 2187; GFX9-NEXT: v_sub_u32_e32 v0, s5, v0 2188; GFX9-NEXT: v_lshl_or_b32 v1, s0, v1, v2 2189; GFX9-NEXT: v_add_u32_e32 v2, 0xffffffe8, v0 2190; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 2191; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 2192; GFX9-NEXT: v_add_u32_e32 v2, 0xffffffe8, v0 2193; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 2194; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 2195; GFX9-NEXT: v_sub_u32_e32 v2, 23, v0 2196; GFX9-NEXT: s_lshr_b32 s0, s3, 1 2197; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2 2198; GFX9-NEXT: v_and_b32_e32 v0, 0xffffff, v0 2199; GFX9-NEXT: v_lshrrev_b32_e64 v2, v2, s0 2200; GFX9-NEXT: v_mov_b32_e32 v3, 8 2201; GFX9-NEXT: v_lshl_or_b32 v0, s1, v0, v2 2202; GFX9-NEXT: v_mov_b32_e32 v2, 0xff 2203; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2204; GFX9-NEXT: v_and_or_b32 v2, v1, v2, v3 2205; GFX9-NEXT: v_mov_b32_e32 v3, 16 2206; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2207; GFX9-NEXT: v_and_b32_e32 v3, 0xff, v0 2208; GFX9-NEXT: v_lshlrev_b32_e32 v3, 24, v3 2209; GFX9-NEXT: v_or3_b32 v1, v2, v1, v3 2210; GFX9-NEXT: v_bfe_u32 v2, v0, 8, 8 2211; GFX9-NEXT: v_bfe_u32 v0, v0, 16, 8 2212; GFX9-NEXT: v_lshl_or_b32 v0, v0, 8, v2 2213; GFX9-NEXT: v_readfirstlane_b32 s0, v1 2214; GFX9-NEXT: v_readfirstlane_b32 s1, v0 2215; GFX9-NEXT: ; return to shader part epilog 2216; 2217; GFX10-LABEL: s_fshl_v2i24: 2218; GFX10: ; %bb.0: 2219; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 2220; GFX10-NEXT: s_lshr_b32 s6, s0, 8 2221; GFX10-NEXT: s_lshr_b32 s7, s0, 16 2222; GFX10-NEXT: s_and_b32 s6, s6, 0xff 2223; GFX10-NEXT: s_lshr_b32 s8, s0, 24 2224; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 2225; GFX10-NEXT: s_and_b32 s0, s0, 0xff 2226; GFX10-NEXT: s_lshl_b32 s6, s6, 8 2227; GFX10-NEXT: s_lshr_b32 s10, s4, 16 2228; GFX10-NEXT: s_or_b32 s0, s0, s6 2229; GFX10-NEXT: s_and_b32 s6, s7, 0xff 2230; GFX10-NEXT: s_lshr_b32 s7, s4, 8 2231; GFX10-NEXT: s_lshr_b32 s11, s4, 24 2232; GFX10-NEXT: s_and_b32 s7, s7, 0xff 2233; GFX10-NEXT: s_and_b32 s4, s4, 0xff 2234; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 2235; GFX10-NEXT: s_lshl_b32 s7, s7, 8 2236; GFX10-NEXT: s_lshr_b32 s12, s5, 8 2237; GFX10-NEXT: s_or_b32 s4, s4, s7 2238; GFX10-NEXT: s_and_b32 s7, s10, 0xff 2239; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 2240; GFX10-NEXT: s_and_b32 s7, 0xffff, s7 2241; GFX10-NEXT: s_and_b32 s4, 0xffff, s4 2242; GFX10-NEXT: s_lshl_b32 s7, s7, 16 2243; GFX10-NEXT: s_and_b32 s5, s5, 0xff 2244; GFX10-NEXT: v_mul_lo_u32 v1, 0xffffffe8, v0 2245; GFX10-NEXT: s_or_b32 s4, s4, s7 2246; GFX10-NEXT: s_lshl_b32 s5, s5, 8 2247; GFX10-NEXT: s_and_b32 s7, s12, 0xff 2248; GFX10-NEXT: s_or_b32 s5, s11, s5 2249; GFX10-NEXT: s_and_b32 s7, 0xffff, s7 2250; GFX10-NEXT: s_and_b32 s5, 0xffff, s5 2251; GFX10-NEXT: s_lshl_b32 s7, s7, 16 2252; GFX10-NEXT: v_mul_hi_u32 v1, v0, v1 2253; GFX10-NEXT: s_or_b32 s5, s5, s7 2254; GFX10-NEXT: s_lshr_b32 s9, s1, 8 2255; GFX10-NEXT: s_and_b32 s1, s1, 0xff 2256; GFX10-NEXT: s_and_b32 s7, s9, 0xff 2257; GFX10-NEXT: s_lshl_b32 s1, s1, 8 2258; GFX10-NEXT: s_lshr_b32 s9, s2, 16 2259; GFX10-NEXT: s_or_b32 s1, s8, s1 2260; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1 2261; GFX10-NEXT: s_lshr_b32 s8, s2, 8 2262; GFX10-NEXT: s_lshr_b32 s10, s2, 24 2263; GFX10-NEXT: s_and_b32 s8, s8, 0xff 2264; GFX10-NEXT: s_and_b32 s2, s2, 0xff 2265; GFX10-NEXT: v_mul_hi_u32 v1, s4, v0 2266; GFX10-NEXT: v_mul_hi_u32 v0, s5, v0 2267; GFX10-NEXT: s_lshl_b32 s8, s8, 8 2268; GFX10-NEXT: s_and_b32 s6, 0xffff, s6 2269; GFX10-NEXT: s_or_b32 s2, s2, s8 2270; GFX10-NEXT: s_and_b32 s7, 0xffff, s7 2271; GFX10-NEXT: s_and_b32 s2, 0xffff, s2 2272; GFX10-NEXT: s_and_b32 s0, 0xffff, s0 2273; GFX10-NEXT: v_mul_lo_u32 v1, v1, 24 2274; GFX10-NEXT: v_mul_lo_u32 v0, v0, 24 2275; GFX10-NEXT: s_lshl_b32 s6, s6, 16 2276; GFX10-NEXT: s_and_b32 s1, 0xffff, s1 2277; GFX10-NEXT: s_lshl_b32 s7, s7, 16 2278; GFX10-NEXT: s_or_b32 s0, s0, s6 2279; GFX10-NEXT: s_or_b32 s1, s1, s7 2280; GFX10-NEXT: v_sub_nc_u32_e32 v1, s4, v1 2281; GFX10-NEXT: v_sub_nc_u32_e32 v0, s5, v0 2282; GFX10-NEXT: s_lshr_b32 s4, s3, 8 2283; GFX10-NEXT: s_and_b32 s5, s9, 0xff 2284; GFX10-NEXT: s_and_b32 s3, s3, 0xff 2285; GFX10-NEXT: v_add_nc_u32_e32 v2, 0xffffffe8, v1 2286; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 2287; GFX10-NEXT: v_add_nc_u32_e32 v3, 0xffffffe8, v0 2288; GFX10-NEXT: s_and_b32 s5, 0xffff, s5 2289; GFX10-NEXT: s_lshl_b32 s3, s3, 8 2290; GFX10-NEXT: s_and_b32 s4, s4, 0xff 2291; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo 2292; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 2293; GFX10-NEXT: s_lshl_b32 s5, s5, 16 2294; GFX10-NEXT: s_or_b32 s3, s10, s3 2295; GFX10-NEXT: s_and_b32 s4, 0xffff, s4 2296; GFX10-NEXT: v_add_nc_u32_e32 v2, 0xffffffe8, v1 2297; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo 2298; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 2299; GFX10-NEXT: s_or_b32 s2, s2, s5 2300; GFX10-NEXT: s_and_b32 s3, 0xffff, s3 2301; GFX10-NEXT: s_lshl_b32 s4, s4, 16 2302; GFX10-NEXT: v_add_nc_u32_e32 v3, 0xffffffe8, v0 2303; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo 2304; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 2305; GFX10-NEXT: s_or_b32 s3, s3, s4 2306; GFX10-NEXT: s_lshr_b32 s2, s2, 1 2307; GFX10-NEXT: v_sub_nc_u32_e32 v2, 23, v1 2308; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo 2309; GFX10-NEXT: v_and_b32_e32 v1, 0xffffff, v1 2310; GFX10-NEXT: v_and_b32_e32 v2, 0xffffff, v2 2311; GFX10-NEXT: v_sub_nc_u32_e32 v3, 23, v0 2312; GFX10-NEXT: v_and_b32_e32 v0, 0xffffff, v0 2313; GFX10-NEXT: v_lshrrev_b32_e64 v2, v2, s2 2314; GFX10-NEXT: v_and_b32_e32 v3, 0xffffff, v3 2315; GFX10-NEXT: s_lshr_b32 s2, s3, 1 2316; GFX10-NEXT: v_lshl_or_b32 v1, s0, v1, v2 2317; GFX10-NEXT: v_lshrrev_b32_e64 v3, v3, s2 2318; GFX10-NEXT: v_mov_b32_e32 v2, 8 2319; GFX10-NEXT: v_lshl_or_b32 v0, s1, v0, v3 2320; GFX10-NEXT: v_mov_b32_e32 v3, 16 2321; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2322; GFX10-NEXT: v_and_b32_e32 v4, 0xff, v0 2323; GFX10-NEXT: v_lshlrev_b32_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2324; GFX10-NEXT: v_and_or_b32 v1, 0xff, v1, v2 2325; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v4 2326; GFX10-NEXT: v_bfe_u32 v4, v0, 8, 8 2327; GFX10-NEXT: v_bfe_u32 v0, v0, 16, 8 2328; GFX10-NEXT: v_or3_b32 v1, v1, v3, v2 2329; GFX10-NEXT: v_lshl_or_b32 v0, v0, 8, v4 2330; GFX10-NEXT: v_readfirstlane_b32 s0, v1 2331; GFX10-NEXT: v_readfirstlane_b32 s1, v0 2332; GFX10-NEXT: ; return to shader part epilog 2333; 2334; GFX11-LABEL: s_fshl_v2i24: 2335; GFX11: ; %bb.0: 2336; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 2337; GFX11-NEXT: s_lshr_b32 s6, s0, 8 2338; GFX11-NEXT: s_lshr_b32 s7, s0, 16 2339; GFX11-NEXT: s_and_b32 s6, s6, 0xff 2340; GFX11-NEXT: s_lshr_b32 s8, s0, 24 2341; GFX11-NEXT: v_rcp_iflag_f32_e32 v0, v0 2342; GFX11-NEXT: s_and_b32 s0, s0, 0xff 2343; GFX11-NEXT: s_lshl_b32 s6, s6, 8 2344; GFX11-NEXT: s_and_b32 s7, s7, 0xff 2345; GFX11-NEXT: s_or_b32 s0, s0, s6 2346; GFX11-NEXT: s_and_b32 s6, 0xffff, s7 2347; GFX11-NEXT: s_and_b32 s0, 0xffff, s0 2348; GFX11-NEXT: s_lshl_b32 s6, s6, 16 2349; GFX11-NEXT: s_lshr_b32 s7, s4, 16 2350; GFX11-NEXT: s_or_b32 s0, s0, s6 2351; GFX11-NEXT: s_waitcnt_depctr 0xfff 2352; GFX11-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 2353; GFX11-NEXT: s_lshr_b32 s6, s4, 8 2354; GFX11-NEXT: s_lshr_b32 s10, s4, 24 2355; GFX11-NEXT: s_and_b32 s6, s6, 0xff 2356; GFX11-NEXT: s_and_b32 s4, s4, 0xff 2357; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0 2358; GFX11-NEXT: s_lshl_b32 s6, s6, 8 2359; GFX11-NEXT: s_lshr_b32 s11, s5, 8 2360; GFX11-NEXT: s_or_b32 s4, s4, s6 2361; GFX11-NEXT: s_and_b32 s6, s7, 0xff 2362; GFX11-NEXT: v_mul_lo_u32 v1, 0xffffffe8, v0 2363; GFX11-NEXT: s_and_b32 s6, 0xffff, s6 2364; GFX11-NEXT: s_and_b32 s4, 0xffff, s4 2365; GFX11-NEXT: s_lshl_b32 s6, s6, 16 2366; GFX11-NEXT: s_and_b32 s5, s5, 0xff 2367; GFX11-NEXT: s_or_b32 s4, s4, s6 2368; GFX11-NEXT: s_lshl_b32 s5, s5, 8 2369; GFX11-NEXT: s_and_b32 s6, s11, 0xff 2370; GFX11-NEXT: v_mul_hi_u32 v1, v0, v1 2371; GFX11-NEXT: s_or_b32 s5, s10, s5 2372; GFX11-NEXT: s_and_b32 s6, 0xffff, s6 2373; GFX11-NEXT: s_and_b32 s5, 0xffff, s5 2374; GFX11-NEXT: s_lshl_b32 s6, s6, 16 2375; GFX11-NEXT: s_lshr_b32 s9, s1, 8 2376; GFX11-NEXT: s_or_b32 s5, s5, s6 2377; GFX11-NEXT: s_and_b32 s1, s1, 0xff 2378; GFX11-NEXT: v_add_nc_u32_e32 v0, v0, v1 2379; GFX11-NEXT: s_lshl_b32 s1, s1, 8 2380; GFX11-NEXT: s_lshr_b32 s7, s2, 8 2381; GFX11-NEXT: s_or_b32 s1, s8, s1 2382; GFX11-NEXT: s_lshr_b32 s8, s2, 16 2383; GFX11-NEXT: v_mul_hi_u32 v1, s4, v0 2384; GFX11-NEXT: v_mul_hi_u32 v0, s5, v0 2385; GFX11-NEXT: s_and_b32 s7, s7, 0xff 2386; GFX11-NEXT: s_and_b32 s6, s9, 0xff 2387; GFX11-NEXT: s_lshr_b32 s9, s2, 24 2388; GFX11-NEXT: s_and_b32 s2, s2, 0xff 2389; GFX11-NEXT: s_lshl_b32 s7, s7, 8 2390; GFX11-NEXT: s_and_b32 s6, 0xffff, s6 2391; GFX11-NEXT: v_mul_lo_u32 v1, v1, 24 2392; GFX11-NEXT: v_mul_lo_u32 v0, v0, 24 2393; GFX11-NEXT: s_or_b32 s2, s2, s7 2394; GFX11-NEXT: s_and_b32 s1, 0xffff, s1 2395; GFX11-NEXT: s_and_b32 s2, 0xffff, s2 2396; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2397; GFX11-NEXT: v_sub_nc_u32_e32 v1, s4, v1 2398; GFX11-NEXT: v_sub_nc_u32_e32 v0, s5, v0 2399; GFX11-NEXT: s_and_b32 s5, s8, 0xff 2400; GFX11-NEXT: s_lshr_b32 s4, s3, 8 2401; GFX11-NEXT: s_and_b32 s5, 0xffff, s5 2402; GFX11-NEXT: v_add_nc_u32_e32 v2, 0xffffffe8, v1 2403; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 2404; GFX11-NEXT: v_add_nc_u32_e32 v3, 0xffffffe8, v0 2405; GFX11-NEXT: s_and_b32 s3, s3, 0xff 2406; GFX11-NEXT: s_lshl_b32 s5, s5, 16 2407; GFX11-NEXT: s_lshl_b32 s3, s3, 8 2408; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo 2409; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 2410; GFX11-NEXT: s_and_b32 s4, s4, 0xff 2411; GFX11-NEXT: s_or_b32 s2, s2, s5 2412; GFX11-NEXT: s_or_b32 s3, s9, s3 2413; GFX11-NEXT: v_add_nc_u32_e32 v2, 0xffffffe8, v1 2414; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo 2415; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 2416; GFX11-NEXT: s_and_b32 s4, 0xffff, s4 2417; GFX11-NEXT: s_lshr_b32 s2, s2, 1 2418; GFX11-NEXT: s_and_b32 s3, 0xffff, s3 2419; GFX11-NEXT: s_lshl_b32 s4, s4, 16 2420; GFX11-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo 2421; GFX11-NEXT: s_or_b32 s3, s3, s4 2422; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2423; GFX11-NEXT: s_lshr_b32 s3, s3, 1 2424; GFX11-NEXT: v_sub_nc_u32_e32 v2, 23, v1 2425; GFX11-NEXT: v_and_b32_e32 v1, 0xffffff, v1 2426; GFX11-NEXT: v_add_nc_u32_e32 v3, 0xffffffe8, v0 2427; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 2428; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) 2429; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v2 2430; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc_lo 2431; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) 2432; GFX11-NEXT: v_lshrrev_b32_e64 v2, v2, s2 2433; GFX11-NEXT: s_lshl_b32 s2, s6, 16 2434; GFX11-NEXT: v_sub_nc_u32_e32 v3, 23, v0 2435; GFX11-NEXT: v_and_b32_e32 v0, 0xffffff, v0 2436; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3) 2437; GFX11-NEXT: v_lshl_or_b32 v1, s0, v1, v2 2438; GFX11-NEXT: s_or_b32 s0, s1, s2 2439; GFX11-NEXT: v_and_b32_e32 v3, 0xffffff, v3 2440; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2441; GFX11-NEXT: v_bfe_u32 v2, v1, 8, 8 2442; GFX11-NEXT: v_lshrrev_b32_e64 v3, v3, s3 2443; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2444; GFX11-NEXT: v_lshlrev_b32_e32 v2, 8, v2 2445; GFX11-NEXT: v_lshl_or_b32 v0, s0, v0, v3 2446; GFX11-NEXT: v_bfe_u32 v3, v1, 16, 8 2447; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 2448; GFX11-NEXT: v_and_or_b32 v1, 0xff, v1, v2 2449; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v0 2450; GFX11-NEXT: v_bfe_u32 v5, v0, 8, 8 2451; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4) 2452; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3 2453; GFX11-NEXT: v_bfe_u32 v0, v0, 16, 8 2454; GFX11-NEXT: v_lshlrev_b32_e32 v4, 24, v4 2455; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2456; GFX11-NEXT: v_lshl_or_b32 v0, v0, 8, v5 2457; GFX11-NEXT: v_or3_b32 v1, v1, v3, v4 2458; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2459; GFX11-NEXT: v_readfirstlane_b32 s1, v0 2460; GFX11-NEXT: v_readfirstlane_b32 s0, v1 2461; GFX11-NEXT: ; return to shader part epilog 2462 %lhs = bitcast i48 %lhs.arg to <2 x i24> 2463 %rhs = bitcast i48 %rhs.arg to <2 x i24> 2464 %amt = bitcast i48 %amt.arg to <2 x i24> 2465 %result = call <2 x i24> @llvm.fshl.v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) 2466 %cast.result = bitcast <2 x i24> %result to i48 2467 ret i48 %cast.result 2468} 2469 2470define <2 x i24> @v_fshl_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) { 2471; GFX6-LABEL: v_fshl_v2i24: 2472; GFX6: ; %bb.0: 2473; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2474; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 2475; GFX6-NEXT: v_rcp_iflag_f32_e32 v6, v6 2476; GFX6-NEXT: v_not_b32_e32 v7, 23 2477; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v4 2478; GFX6-NEXT: v_and_b32_e32 v5, 0xffffff, v5 2479; GFX6-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 2480; GFX6-NEXT: v_cvt_u32_f32_e32 v6, v6 2481; GFX6-NEXT: v_bfe_u32 v2, v2, 1, 23 2482; GFX6-NEXT: v_mul_lo_u32 v8, v6, v7 2483; GFX6-NEXT: v_mul_hi_u32 v8, v6, v8 2484; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v8 2485; GFX6-NEXT: v_mul_hi_u32 v8, v4, v6 2486; GFX6-NEXT: v_mul_hi_u32 v6, v5, v6 2487; GFX6-NEXT: v_mul_lo_u32 v8, v8, 24 2488; GFX6-NEXT: v_mul_lo_u32 v6, v6, 24 2489; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v4, v8 2490; GFX6-NEXT: v_add_i32_e32 v8, vcc, v4, v7 2491; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 2492; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc 2493; GFX6-NEXT: v_add_i32_e32 v8, vcc, v4, v7 2494; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 2495; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc 2496; GFX6-NEXT: v_sub_i32_e32 v8, vcc, 23, v4 2497; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v4 2498; GFX6-NEXT: v_lshlrev_b32_e32 v0, v4, v0 2499; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v8 2500; GFX6-NEXT: v_lshrrev_b32_e32 v2, v4, v2 2501; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 2502; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v5, v6 2503; GFX6-NEXT: v_add_i32_e32 v4, vcc, v2, v7 2504; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 2505; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2506; GFX6-NEXT: v_add_i32_e32 v4, vcc, 0xffffffe8, v2 2507; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 2508; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2509; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 23, v2 2510; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2 2511; GFX6-NEXT: v_lshlrev_b32_e32 v1, v2, v1 2512; GFX6-NEXT: v_bfe_u32 v2, v3, 1, 23 2513; GFX6-NEXT: v_and_b32_e32 v3, 0xffffff, v4 2514; GFX6-NEXT: v_lshrrev_b32_e32 v2, v3, v2 2515; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 2516; GFX6-NEXT: s_setpc_b64 s[30:31] 2517; 2518; GFX8-LABEL: v_fshl_v2i24: 2519; GFX8: ; %bb.0: 2520; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2521; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 2522; GFX8-NEXT: v_rcp_iflag_f32_e32 v6, v6 2523; GFX8-NEXT: v_not_b32_e32 v7, 23 2524; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v4 2525; GFX8-NEXT: v_and_b32_e32 v5, 0xffffff, v5 2526; GFX8-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 2527; GFX8-NEXT: v_cvt_u32_f32_e32 v6, v6 2528; GFX8-NEXT: v_bfe_u32 v2, v2, 1, 23 2529; GFX8-NEXT: v_mul_lo_u32 v8, v6, v7 2530; GFX8-NEXT: v_mul_hi_u32 v8, v6, v8 2531; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v8 2532; GFX8-NEXT: v_mul_hi_u32 v8, v4, v6 2533; GFX8-NEXT: v_mul_hi_u32 v6, v5, v6 2534; GFX8-NEXT: v_mul_lo_u32 v8, v8, 24 2535; GFX8-NEXT: v_mul_lo_u32 v6, v6, 24 2536; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v4, v8 2537; GFX8-NEXT: v_add_u32_e32 v8, vcc, v4, v7 2538; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 2539; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc 2540; GFX8-NEXT: v_add_u32_e32 v8, vcc, v4, v7 2541; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 2542; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc 2543; GFX8-NEXT: v_sub_u32_e32 v8, vcc, 23, v4 2544; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v4 2545; GFX8-NEXT: v_lshlrev_b32_e32 v0, v4, v0 2546; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v8 2547; GFX8-NEXT: v_lshrrev_b32_e32 v2, v4, v2 2548; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 2549; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v5, v6 2550; GFX8-NEXT: v_add_u32_e32 v4, vcc, v2, v7 2551; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 2552; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2553; GFX8-NEXT: v_add_u32_e32 v4, vcc, 0xffffffe8, v2 2554; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 2555; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2556; GFX8-NEXT: v_sub_u32_e32 v4, vcc, 23, v2 2557; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2 2558; GFX8-NEXT: v_lshlrev_b32_e32 v1, v2, v1 2559; GFX8-NEXT: v_bfe_u32 v2, v3, 1, 23 2560; GFX8-NEXT: v_and_b32_e32 v3, 0xffffff, v4 2561; GFX8-NEXT: v_lshrrev_b32_e32 v2, v3, v2 2562; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 2563; GFX8-NEXT: s_setpc_b64 s[30:31] 2564; 2565; GFX9-LABEL: v_fshl_v2i24: 2566; GFX9: ; %bb.0: 2567; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2568; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 2569; GFX9-NEXT: v_rcp_iflag_f32_e32 v6, v6 2570; GFX9-NEXT: v_not_b32_e32 v7, 23 2571; GFX9-NEXT: v_and_b32_e32 v4, 0xffffff, v4 2572; GFX9-NEXT: v_and_b32_e32 v5, 0xffffff, v5 2573; GFX9-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 2574; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v6 2575; GFX9-NEXT: v_bfe_u32 v2, v2, 1, 23 2576; GFX9-NEXT: v_bfe_u32 v3, v3, 1, 23 2577; GFX9-NEXT: v_mul_lo_u32 v7, v6, v7 2578; GFX9-NEXT: v_mul_hi_u32 v7, v6, v7 2579; GFX9-NEXT: v_add_u32_e32 v6, v6, v7 2580; GFX9-NEXT: v_mul_hi_u32 v7, v4, v6 2581; GFX9-NEXT: v_mul_hi_u32 v6, v5, v6 2582; GFX9-NEXT: v_mul_lo_u32 v7, v7, 24 2583; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24 2584; GFX9-NEXT: v_sub_u32_e32 v4, v4, v7 2585; GFX9-NEXT: v_sub_u32_e32 v5, v5, v6 2586; GFX9-NEXT: v_add_u32_e32 v6, 0xffffffe8, v4 2587; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 2588; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc 2589; GFX9-NEXT: v_add_u32_e32 v6, 0xffffffe8, v4 2590; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 2591; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc 2592; GFX9-NEXT: v_sub_u32_e32 v6, 23, v4 2593; GFX9-NEXT: v_and_b32_e32 v6, 0xffffff, v6 2594; GFX9-NEXT: v_add_u32_e32 v7, 0xffffffe8, v5 2595; GFX9-NEXT: v_and_b32_e32 v4, 0xffffff, v4 2596; GFX9-NEXT: v_lshrrev_b32_e32 v2, v6, v2 2597; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v5 2598; GFX9-NEXT: v_lshl_or_b32 v0, v0, v4, v2 2599; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v7, vcc 2600; GFX9-NEXT: v_add_u32_e32 v4, 0xffffffe8, v2 2601; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 2602; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2603; GFX9-NEXT: v_sub_u32_e32 v4, 23, v2 2604; GFX9-NEXT: v_and_b32_e32 v4, 0xffffff, v4 2605; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2 2606; GFX9-NEXT: v_lshrrev_b32_e32 v3, v4, v3 2607; GFX9-NEXT: v_lshl_or_b32 v1, v1, v2, v3 2608; GFX9-NEXT: s_setpc_b64 s[30:31] 2609; 2610; GFX10-LABEL: v_fshl_v2i24: 2611; GFX10: ; %bb.0: 2612; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2613; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 2614; GFX10-NEXT: v_and_b32_e32 v4, 0xffffff, v4 2615; GFX10-NEXT: v_and_b32_e32 v5, 0xffffff, v5 2616; GFX10-NEXT: v_bfe_u32 v2, v2, 1, 23 2617; GFX10-NEXT: v_bfe_u32 v3, v3, 1, 23 2618; GFX10-NEXT: v_rcp_iflag_f32_e32 v6, v6 2619; GFX10-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 2620; GFX10-NEXT: v_cvt_u32_f32_e32 v6, v6 2621; GFX10-NEXT: v_mul_lo_u32 v7, 0xffffffe8, v6 2622; GFX10-NEXT: v_mul_hi_u32 v7, v6, v7 2623; GFX10-NEXT: v_add_nc_u32_e32 v6, v6, v7 2624; GFX10-NEXT: v_mul_hi_u32 v7, v4, v6 2625; GFX10-NEXT: v_mul_hi_u32 v6, v5, v6 2626; GFX10-NEXT: v_mul_lo_u32 v7, v7, 24 2627; GFX10-NEXT: v_mul_lo_u32 v6, v6, 24 2628; GFX10-NEXT: v_sub_nc_u32_e32 v4, v4, v7 2629; GFX10-NEXT: v_sub_nc_u32_e32 v5, v5, v6 2630; GFX10-NEXT: v_add_nc_u32_e32 v6, 0xffffffe8, v4 2631; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4 2632; GFX10-NEXT: v_add_nc_u32_e32 v7, 0xffffffe8, v5 2633; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo 2634; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5 2635; GFX10-NEXT: v_add_nc_u32_e32 v6, 0xffffffe8, v4 2636; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo 2637; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4 2638; GFX10-NEXT: v_add_nc_u32_e32 v7, 0xffffffe8, v5 2639; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo 2640; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5 2641; GFX10-NEXT: v_sub_nc_u32_e32 v6, 23, v4 2642; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo 2643; GFX10-NEXT: v_and_b32_e32 v4, 0xffffff, v4 2644; GFX10-NEXT: v_and_b32_e32 v6, 0xffffff, v6 2645; GFX10-NEXT: v_sub_nc_u32_e32 v7, 23, v5 2646; GFX10-NEXT: v_and_b32_e32 v5, 0xffffff, v5 2647; GFX10-NEXT: v_lshrrev_b32_e32 v2, v6, v2 2648; GFX10-NEXT: v_and_b32_e32 v7, 0xffffff, v7 2649; GFX10-NEXT: v_lshl_or_b32 v0, v0, v4, v2 2650; GFX10-NEXT: v_lshrrev_b32_e32 v3, v7, v3 2651; GFX10-NEXT: v_lshl_or_b32 v1, v1, v5, v3 2652; GFX10-NEXT: s_setpc_b64 s[30:31] 2653; 2654; GFX11-LABEL: v_fshl_v2i24: 2655; GFX11: ; %bb.0: 2656; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2657; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 2658; GFX11-NEXT: v_and_b32_e32 v4, 0xffffff, v4 2659; GFX11-NEXT: v_bfe_u32 v2, v2, 1, 23 2660; GFX11-NEXT: v_bfe_u32 v3, v3, 1, 23 2661; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_1) 2662; GFX11-NEXT: v_rcp_iflag_f32_e32 v6, v6 2663; GFX11-NEXT: s_waitcnt_depctr 0xfff 2664; GFX11-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 2665; GFX11-NEXT: v_cvt_u32_f32_e32 v6, v6 2666; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2667; GFX11-NEXT: v_mul_lo_u32 v7, 0xffffffe8, v6 2668; GFX11-NEXT: v_mul_hi_u32 v7, v6, v7 2669; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2670; GFX11-NEXT: v_add_nc_u32_e32 v6, v6, v7 2671; GFX11-NEXT: v_mul_hi_u32 v7, v4, v6 2672; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2673; GFX11-NEXT: v_mul_lo_u32 v7, v7, 24 2674; GFX11-NEXT: v_sub_nc_u32_e32 v4, v4, v7 2675; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 2676; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4 2677; GFX11-NEXT: v_and_b32_e32 v5, 0xffffff, v5 2678; GFX11-NEXT: v_mul_hi_u32 v6, v5, v6 2679; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2680; GFX11-NEXT: v_mul_lo_u32 v6, v6, 24 2681; GFX11-NEXT: v_sub_nc_u32_e32 v5, v5, v6 2682; GFX11-NEXT: v_add_nc_u32_e32 v6, 0xffffffe8, v4 2683; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3) 2684; GFX11-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo 2685; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5 2686; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1) 2687; GFX11-NEXT: v_add_nc_u32_e32 v6, 0xffffffe8, v4 2688; GFX11-NEXT: v_add_nc_u32_e32 v7, 0xffffffe8, v5 2689; GFX11-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo 2690; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4 2691; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2) 2692; GFX11-NEXT: v_dual_cndmask_b32 v4, v4, v6 :: v_dual_add_nc_u32 v7, 0xffffffe8, v5 2693; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5 2694; GFX11-NEXT: v_sub_nc_u32_e32 v6, 23, v4 2695; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 2696; GFX11-NEXT: v_dual_cndmask_b32 v5, v5, v7 :: v_dual_and_b32 v4, 0xffffff, v4 2697; GFX11-NEXT: v_and_b32_e32 v6, 0xffffff, v6 2698; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3) 2699; GFX11-NEXT: v_sub_nc_u32_e32 v7, 23, v5 2700; GFX11-NEXT: v_and_b32_e32 v5, 0xffffff, v5 2701; GFX11-NEXT: v_lshrrev_b32_e32 v2, v6, v2 2702; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 2703; GFX11-NEXT: v_and_b32_e32 v7, 0xffffff, v7 2704; GFX11-NEXT: v_lshl_or_b32 v0, v0, v4, v2 2705; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 2706; GFX11-NEXT: v_lshrrev_b32_e32 v3, v7, v3 2707; GFX11-NEXT: v_lshl_or_b32 v1, v1, v5, v3 2708; GFX11-NEXT: s_setpc_b64 s[30:31] 2709 %result = call <2 x i24> @llvm.fshl.v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) 2710 ret <2 x i24> %result 2711} 2712 2713define amdgpu_ps i32 @s_fshl_i32(i32 inreg %lhs, i32 inreg %rhs, i32 inreg %amt) { 2714; GFX6-LABEL: s_fshl_i32: 2715; GFX6: ; %bb.0: 2716; GFX6-NEXT: v_mov_b32_e32 v0, s1 2717; GFX6-NEXT: s_not_b32 s1, s2 2718; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, 1 2719; GFX6-NEXT: s_lshr_b32 s0, s0, 1 2720; GFX6-NEXT: v_mov_b32_e32 v1, s1 2721; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, v1 2722; GFX6-NEXT: v_readfirstlane_b32 s0, v0 2723; GFX6-NEXT: ; return to shader part epilog 2724; 2725; GFX8-LABEL: s_fshl_i32: 2726; GFX8: ; %bb.0: 2727; GFX8-NEXT: v_mov_b32_e32 v0, s1 2728; GFX8-NEXT: s_not_b32 s1, s2 2729; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, 1 2730; GFX8-NEXT: s_lshr_b32 s0, s0, 1 2731; GFX8-NEXT: v_mov_b32_e32 v1, s1 2732; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, v1 2733; GFX8-NEXT: v_readfirstlane_b32 s0, v0 2734; GFX8-NEXT: ; return to shader part epilog 2735; 2736; GFX9-LABEL: s_fshl_i32: 2737; GFX9: ; %bb.0: 2738; GFX9-NEXT: v_mov_b32_e32 v0, s1 2739; GFX9-NEXT: s_not_b32 s1, s2 2740; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, 1 2741; GFX9-NEXT: s_lshr_b32 s0, s0, 1 2742; GFX9-NEXT: v_mov_b32_e32 v1, s1 2743; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, v1 2744; GFX9-NEXT: v_readfirstlane_b32 s0, v0 2745; GFX9-NEXT: ; return to shader part epilog 2746; 2747; GFX10-LABEL: s_fshl_i32: 2748; GFX10: ; %bb.0: 2749; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, 1 2750; GFX10-NEXT: s_lshr_b32 s0, s0, 1 2751; GFX10-NEXT: s_not_b32 s1, s2 2752; GFX10-NEXT: v_alignbit_b32 v0, s0, v0, s1 2753; GFX10-NEXT: v_readfirstlane_b32 s0, v0 2754; GFX10-NEXT: ; return to shader part epilog 2755; 2756; GFX11-LABEL: s_fshl_i32: 2757; GFX11: ; %bb.0: 2758; GFX11-NEXT: v_alignbit_b32 v0, s0, s1, 1 2759; GFX11-NEXT: s_lshr_b32 s0, s0, 1 2760; GFX11-NEXT: s_not_b32 s1, s2 2761; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 2762; GFX11-NEXT: v_alignbit_b32 v0, s0, v0, s1 2763; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2764; GFX11-NEXT: v_readfirstlane_b32 s0, v0 2765; GFX11-NEXT: ; return to shader part epilog 2766 %result = call i32 @llvm.fshl.i32(i32 %lhs, i32 %rhs, i32 %amt) 2767 ret i32 %result 2768} 2769 2770define amdgpu_ps i32 @s_fshl_i32_5(i32 inreg %lhs, i32 inreg %rhs) { 2771; GFX6-LABEL: s_fshl_i32_5: 2772; GFX6: ; %bb.0: 2773; GFX6-NEXT: v_mov_b32_e32 v0, s1 2774; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, 27 2775; GFX6-NEXT: v_readfirstlane_b32 s0, v0 2776; GFX6-NEXT: ; return to shader part epilog 2777; 2778; GFX8-LABEL: s_fshl_i32_5: 2779; GFX8: ; %bb.0: 2780; GFX8-NEXT: v_mov_b32_e32 v0, s1 2781; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, 27 2782; GFX8-NEXT: v_readfirstlane_b32 s0, v0 2783; GFX8-NEXT: ; return to shader part epilog 2784; 2785; GFX9-LABEL: s_fshl_i32_5: 2786; GFX9: ; %bb.0: 2787; GFX9-NEXT: v_mov_b32_e32 v0, s1 2788; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, 27 2789; GFX9-NEXT: v_readfirstlane_b32 s0, v0 2790; GFX9-NEXT: ; return to shader part epilog 2791; 2792; GFX10-LABEL: s_fshl_i32_5: 2793; GFX10: ; %bb.0: 2794; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, 27 2795; GFX10-NEXT: v_readfirstlane_b32 s0, v0 2796; GFX10-NEXT: ; return to shader part epilog 2797; 2798; GFX11-LABEL: s_fshl_i32_5: 2799; GFX11: ; %bb.0: 2800; GFX11-NEXT: v_alignbit_b32 v0, s0, s1, 27 2801; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2802; GFX11-NEXT: v_readfirstlane_b32 s0, v0 2803; GFX11-NEXT: ; return to shader part epilog 2804 %result = call i32 @llvm.fshl.i32(i32 %lhs, i32 %rhs, i32 5) 2805 ret i32 %result 2806} 2807 2808define amdgpu_ps i32 @s_fshl_i32_8(i32 inreg %lhs, i32 inreg %rhs) { 2809; GFX6-LABEL: s_fshl_i32_8: 2810; GFX6: ; %bb.0: 2811; GFX6-NEXT: v_mov_b32_e32 v0, s1 2812; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, 24 2813; GFX6-NEXT: v_readfirstlane_b32 s0, v0 2814; GFX6-NEXT: ; return to shader part epilog 2815; 2816; GFX8-LABEL: s_fshl_i32_8: 2817; GFX8: ; %bb.0: 2818; GFX8-NEXT: v_mov_b32_e32 v0, s1 2819; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, 24 2820; GFX8-NEXT: v_readfirstlane_b32 s0, v0 2821; GFX8-NEXT: ; return to shader part epilog 2822; 2823; GFX9-LABEL: s_fshl_i32_8: 2824; GFX9: ; %bb.0: 2825; GFX9-NEXT: v_mov_b32_e32 v0, s1 2826; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, 24 2827; GFX9-NEXT: v_readfirstlane_b32 s0, v0 2828; GFX9-NEXT: ; return to shader part epilog 2829; 2830; GFX10-LABEL: s_fshl_i32_8: 2831; GFX10: ; %bb.0: 2832; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, 24 2833; GFX10-NEXT: v_readfirstlane_b32 s0, v0 2834; GFX10-NEXT: ; return to shader part epilog 2835; 2836; GFX11-LABEL: s_fshl_i32_8: 2837; GFX11: ; %bb.0: 2838; GFX11-NEXT: v_alignbit_b32 v0, s0, s1, 24 2839; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2840; GFX11-NEXT: v_readfirstlane_b32 s0, v0 2841; GFX11-NEXT: ; return to shader part epilog 2842 %result = call i32 @llvm.fshl.i32(i32 %lhs, i32 %rhs, i32 8) 2843 ret i32 %result 2844} 2845 2846define i32 @v_fshl_i32(i32 %lhs, i32 %rhs, i32 %amt) { 2847; GCN-LABEL: v_fshl_i32: 2848; GCN: ; %bb.0: 2849; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2850; GCN-NEXT: v_alignbit_b32 v1, v0, v1, 1 2851; GCN-NEXT: v_lshrrev_b32_e32 v0, 1, v0 2852; GCN-NEXT: v_not_b32_e32 v2, v2 2853; GCN-NEXT: v_alignbit_b32 v0, v0, v1, v2 2854; GCN-NEXT: s_setpc_b64 s[30:31] 2855; 2856; GFX11-LABEL: v_fshl_i32: 2857; GFX11: ; %bb.0: 2858; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2859; GFX11-NEXT: v_alignbit_b32 v1, v0, v1, 1 2860; GFX11-NEXT: v_lshrrev_b32_e32 v0, 1, v0 2861; GFX11-NEXT: v_not_b32_e32 v2, v2 2862; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2863; GFX11-NEXT: v_alignbit_b32 v0, v0, v1, v2 2864; GFX11-NEXT: s_setpc_b64 s[30:31] 2865 %result = call i32 @llvm.fshl.i32(i32 %lhs, i32 %rhs, i32 %amt) 2866 ret i32 %result 2867} 2868 2869define i32 @v_fshl_i32_5(i32 %lhs, i32 %rhs) { 2870; GCN-LABEL: v_fshl_i32_5: 2871; GCN: ; %bb.0: 2872; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2873; GCN-NEXT: v_alignbit_b32 v0, v0, v1, 27 2874; GCN-NEXT: s_setpc_b64 s[30:31] 2875; 2876; GFX11-LABEL: v_fshl_i32_5: 2877; GFX11: ; %bb.0: 2878; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2879; GFX11-NEXT: v_alignbit_b32 v0, v0, v1, 27 2880; GFX11-NEXT: s_setpc_b64 s[30:31] 2881 %result = call i32 @llvm.fshl.i32(i32 %lhs, i32 %rhs, i32 5) 2882 ret i32 %result 2883} 2884 2885define i32 @v_fshl_i32_8(i32 %lhs, i32 %rhs) { 2886; GCN-LABEL: v_fshl_i32_8: 2887; GCN: ; %bb.0: 2888; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2889; GCN-NEXT: v_alignbit_b32 v0, v0, v1, 24 2890; GCN-NEXT: s_setpc_b64 s[30:31] 2891; 2892; GFX11-LABEL: v_fshl_i32_8: 2893; GFX11: ; %bb.0: 2894; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2895; GFX11-NEXT: v_alignbit_b32 v0, v0, v1, 24 2896; GFX11-NEXT: s_setpc_b64 s[30:31] 2897 %result = call i32 @llvm.fshl.i32(i32 %lhs, i32 %rhs, i32 8) 2898 ret i32 %result 2899} 2900 2901define amdgpu_ps float @v_fshl_i32_ssv(i32 inreg %lhs, i32 inreg %rhs, i32 %amt) { 2902; GFX6-LABEL: v_fshl_i32_ssv: 2903; GFX6: ; %bb.0: 2904; GFX6-NEXT: v_mov_b32_e32 v1, s1 2905; GFX6-NEXT: v_alignbit_b32 v1, s0, v1, 1 2906; GFX6-NEXT: s_lshr_b32 s0, s0, 1 2907; GFX6-NEXT: v_not_b32_e32 v0, v0 2908; GFX6-NEXT: v_alignbit_b32 v0, s0, v1, v0 2909; GFX6-NEXT: ; return to shader part epilog 2910; 2911; GFX8-LABEL: v_fshl_i32_ssv: 2912; GFX8: ; %bb.0: 2913; GFX8-NEXT: v_mov_b32_e32 v1, s1 2914; GFX8-NEXT: v_alignbit_b32 v1, s0, v1, 1 2915; GFX8-NEXT: s_lshr_b32 s0, s0, 1 2916; GFX8-NEXT: v_not_b32_e32 v0, v0 2917; GFX8-NEXT: v_alignbit_b32 v0, s0, v1, v0 2918; GFX8-NEXT: ; return to shader part epilog 2919; 2920; GFX9-LABEL: v_fshl_i32_ssv: 2921; GFX9: ; %bb.0: 2922; GFX9-NEXT: v_mov_b32_e32 v1, s1 2923; GFX9-NEXT: v_alignbit_b32 v1, s0, v1, 1 2924; GFX9-NEXT: s_lshr_b32 s0, s0, 1 2925; GFX9-NEXT: v_not_b32_e32 v0, v0 2926; GFX9-NEXT: v_alignbit_b32 v0, s0, v1, v0 2927; GFX9-NEXT: ; return to shader part epilog 2928; 2929; GFX10-LABEL: v_fshl_i32_ssv: 2930; GFX10: ; %bb.0: 2931; GFX10-NEXT: v_alignbit_b32 v1, s0, s1, 1 2932; GFX10-NEXT: v_not_b32_e32 v0, v0 2933; GFX10-NEXT: s_lshr_b32 s0, s0, 1 2934; GFX10-NEXT: v_alignbit_b32 v0, s0, v1, v0 2935; GFX10-NEXT: ; return to shader part epilog 2936; 2937; GFX11-LABEL: v_fshl_i32_ssv: 2938; GFX11: ; %bb.0: 2939; GFX11-NEXT: v_alignbit_b32 v1, s0, s1, 1 2940; GFX11-NEXT: v_not_b32_e32 v0, v0 2941; GFX11-NEXT: s_lshr_b32 s0, s0, 1 2942; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 2943; GFX11-NEXT: v_alignbit_b32 v0, s0, v1, v0 2944; GFX11-NEXT: ; return to shader part epilog 2945 %result = call i32 @llvm.fshl.i32(i32 %lhs, i32 %rhs, i32 %amt) 2946 %cast.result = bitcast i32 %result to float 2947 ret float %cast.result 2948} 2949 2950define amdgpu_ps float @v_fshl_i32_svs(i32 inreg %lhs, i32 %rhs, i32 inreg %amt) { 2951; GFX6-LABEL: v_fshl_i32_svs: 2952; GFX6: ; %bb.0: 2953; GFX6-NEXT: s_not_b32 s1, s1 2954; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, 1 2955; GFX6-NEXT: s_lshr_b32 s0, s0, 1 2956; GFX6-NEXT: v_mov_b32_e32 v1, s1 2957; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, v1 2958; GFX6-NEXT: ; return to shader part epilog 2959; 2960; GFX8-LABEL: v_fshl_i32_svs: 2961; GFX8: ; %bb.0: 2962; GFX8-NEXT: s_not_b32 s1, s1 2963; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, 1 2964; GFX8-NEXT: s_lshr_b32 s0, s0, 1 2965; GFX8-NEXT: v_mov_b32_e32 v1, s1 2966; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, v1 2967; GFX8-NEXT: ; return to shader part epilog 2968; 2969; GFX9-LABEL: v_fshl_i32_svs: 2970; GFX9: ; %bb.0: 2971; GFX9-NEXT: s_not_b32 s1, s1 2972; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, 1 2973; GFX9-NEXT: s_lshr_b32 s0, s0, 1 2974; GFX9-NEXT: v_mov_b32_e32 v1, s1 2975; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, v1 2976; GFX9-NEXT: ; return to shader part epilog 2977; 2978; GFX10-LABEL: v_fshl_i32_svs: 2979; GFX10: ; %bb.0: 2980; GFX10-NEXT: v_alignbit_b32 v0, s0, v0, 1 2981; GFX10-NEXT: s_lshr_b32 s0, s0, 1 2982; GFX10-NEXT: s_not_b32 s1, s1 2983; GFX10-NEXT: v_alignbit_b32 v0, s0, v0, s1 2984; GFX10-NEXT: ; return to shader part epilog 2985; 2986; GFX11-LABEL: v_fshl_i32_svs: 2987; GFX11: ; %bb.0: 2988; GFX11-NEXT: v_alignbit_b32 v0, s0, v0, 1 2989; GFX11-NEXT: s_lshr_b32 s0, s0, 1 2990; GFX11-NEXT: s_not_b32 s1, s1 2991; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 2992; GFX11-NEXT: v_alignbit_b32 v0, s0, v0, s1 2993; GFX11-NEXT: ; return to shader part epilog 2994 %result = call i32 @llvm.fshl.i32(i32 %lhs, i32 %rhs, i32 %amt) 2995 %cast.result = bitcast i32 %result to float 2996 ret float %cast.result 2997} 2998 2999define amdgpu_ps float @v_fshl_i32_vss(i32 inreg %lhs, i32 inreg %rhs, i32 inreg %amt) { 3000; GFX6-LABEL: v_fshl_i32_vss: 3001; GFX6: ; %bb.0: 3002; GFX6-NEXT: v_mov_b32_e32 v0, s1 3003; GFX6-NEXT: s_not_b32 s1, s2 3004; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, 1 3005; GFX6-NEXT: s_lshr_b32 s0, s0, 1 3006; GFX6-NEXT: v_mov_b32_e32 v1, s1 3007; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, v1 3008; GFX6-NEXT: ; return to shader part epilog 3009; 3010; GFX8-LABEL: v_fshl_i32_vss: 3011; GFX8: ; %bb.0: 3012; GFX8-NEXT: v_mov_b32_e32 v0, s1 3013; GFX8-NEXT: s_not_b32 s1, s2 3014; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, 1 3015; GFX8-NEXT: s_lshr_b32 s0, s0, 1 3016; GFX8-NEXT: v_mov_b32_e32 v1, s1 3017; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, v1 3018; GFX8-NEXT: ; return to shader part epilog 3019; 3020; GFX9-LABEL: v_fshl_i32_vss: 3021; GFX9: ; %bb.0: 3022; GFX9-NEXT: v_mov_b32_e32 v0, s1 3023; GFX9-NEXT: s_not_b32 s1, s2 3024; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, 1 3025; GFX9-NEXT: s_lshr_b32 s0, s0, 1 3026; GFX9-NEXT: v_mov_b32_e32 v1, s1 3027; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, v1 3028; GFX9-NEXT: ; return to shader part epilog 3029; 3030; GFX10-LABEL: v_fshl_i32_vss: 3031; GFX10: ; %bb.0: 3032; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, 1 3033; GFX10-NEXT: s_lshr_b32 s0, s0, 1 3034; GFX10-NEXT: s_not_b32 s1, s2 3035; GFX10-NEXT: v_alignbit_b32 v0, s0, v0, s1 3036; GFX10-NEXT: ; return to shader part epilog 3037; 3038; GFX11-LABEL: v_fshl_i32_vss: 3039; GFX11: ; %bb.0: 3040; GFX11-NEXT: v_alignbit_b32 v0, s0, s1, 1 3041; GFX11-NEXT: s_lshr_b32 s0, s0, 1 3042; GFX11-NEXT: s_not_b32 s1, s2 3043; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 3044; GFX11-NEXT: v_alignbit_b32 v0, s0, v0, s1 3045; GFX11-NEXT: ; return to shader part epilog 3046 %result = call i32 @llvm.fshl.i32(i32 %lhs, i32 %rhs, i32 %amt) 3047 %cast.result = bitcast i32 %result to float 3048 ret float %cast.result 3049} 3050 3051define <2 x i32> @v_fshl_v2i32(<2 x i32> %lhs, <2 x i32> %rhs, <2 x i32> %amt) { 3052; GFX6-LABEL: v_fshl_v2i32: 3053; GFX6: ; %bb.0: 3054; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3055; GFX6-NEXT: v_alignbit_b32 v2, v0, v2, 1 3056; GFX6-NEXT: v_lshrrev_b32_e32 v0, 1, v0 3057; GFX6-NEXT: v_not_b32_e32 v4, v4 3058; GFX6-NEXT: v_alignbit_b32 v0, v0, v2, v4 3059; GFX6-NEXT: v_alignbit_b32 v2, v1, v3, 1 3060; GFX6-NEXT: v_lshrrev_b32_e32 v1, 1, v1 3061; GFX6-NEXT: v_not_b32_e32 v3, v5 3062; GFX6-NEXT: v_alignbit_b32 v1, v1, v2, v3 3063; GFX6-NEXT: s_setpc_b64 s[30:31] 3064; 3065; GFX8-LABEL: v_fshl_v2i32: 3066; GFX8: ; %bb.0: 3067; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3068; GFX8-NEXT: v_alignbit_b32 v2, v0, v2, 1 3069; GFX8-NEXT: v_lshrrev_b32_e32 v0, 1, v0 3070; GFX8-NEXT: v_not_b32_e32 v4, v4 3071; GFX8-NEXT: v_alignbit_b32 v0, v0, v2, v4 3072; GFX8-NEXT: v_alignbit_b32 v2, v1, v3, 1 3073; GFX8-NEXT: v_lshrrev_b32_e32 v1, 1, v1 3074; GFX8-NEXT: v_not_b32_e32 v3, v5 3075; GFX8-NEXT: v_alignbit_b32 v1, v1, v2, v3 3076; GFX8-NEXT: s_setpc_b64 s[30:31] 3077; 3078; GFX9-LABEL: v_fshl_v2i32: 3079; GFX9: ; %bb.0: 3080; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3081; GFX9-NEXT: v_alignbit_b32 v2, v0, v2, 1 3082; GFX9-NEXT: v_lshrrev_b32_e32 v0, 1, v0 3083; GFX9-NEXT: v_not_b32_e32 v4, v4 3084; GFX9-NEXT: v_alignbit_b32 v0, v0, v2, v4 3085; GFX9-NEXT: v_alignbit_b32 v2, v1, v3, 1 3086; GFX9-NEXT: v_lshrrev_b32_e32 v1, 1, v1 3087; GFX9-NEXT: v_not_b32_e32 v3, v5 3088; GFX9-NEXT: v_alignbit_b32 v1, v1, v2, v3 3089; GFX9-NEXT: s_setpc_b64 s[30:31] 3090; 3091; GFX10-LABEL: v_fshl_v2i32: 3092; GFX10: ; %bb.0: 3093; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3094; GFX10-NEXT: v_alignbit_b32 v2, v0, v2, 1 3095; GFX10-NEXT: v_lshrrev_b32_e32 v0, 1, v0 3096; GFX10-NEXT: v_not_b32_e32 v4, v4 3097; GFX10-NEXT: v_alignbit_b32 v3, v1, v3, 1 3098; GFX10-NEXT: v_lshrrev_b32_e32 v1, 1, v1 3099; GFX10-NEXT: v_not_b32_e32 v5, v5 3100; GFX10-NEXT: v_alignbit_b32 v0, v0, v2, v4 3101; GFX10-NEXT: v_alignbit_b32 v1, v1, v3, v5 3102; GFX10-NEXT: s_setpc_b64 s[30:31] 3103; 3104; GFX11-LABEL: v_fshl_v2i32: 3105; GFX11: ; %bb.0: 3106; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3107; GFX11-NEXT: v_alignbit_b32 v2, v0, v2, 1 3108; GFX11-NEXT: v_lshrrev_b32_e32 v0, 1, v0 3109; GFX11-NEXT: v_not_b32_e32 v4, v4 3110; GFX11-NEXT: v_alignbit_b32 v3, v1, v3, 1 3111; GFX11-NEXT: v_lshrrev_b32_e32 v1, 1, v1 3112; GFX11-NEXT: v_not_b32_e32 v5, v5 3113; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) 3114; GFX11-NEXT: v_alignbit_b32 v0, v0, v2, v4 3115; GFX11-NEXT: v_alignbit_b32 v1, v1, v3, v5 3116; GFX11-NEXT: s_setpc_b64 s[30:31] 3117 %result = call <2 x i32> @llvm.fshl.v2i32(<2 x i32> %lhs, <2 x i32> %rhs, <2 x i32> %amt) 3118 ret <2 x i32> %result 3119} 3120 3121define <3 x i32> @v_fshl_v3i32(<3 x i32> %lhs, <3 x i32> %rhs, <3 x i32> %amt) { 3122; GFX6-LABEL: v_fshl_v3i32: 3123; GFX6: ; %bb.0: 3124; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3125; GFX6-NEXT: v_alignbit_b32 v3, v0, v3, 1 3126; GFX6-NEXT: v_lshrrev_b32_e32 v0, 1, v0 3127; GFX6-NEXT: v_not_b32_e32 v6, v6 3128; GFX6-NEXT: v_alignbit_b32 v0, v0, v3, v6 3129; GFX6-NEXT: v_alignbit_b32 v3, v1, v4, 1 3130; GFX6-NEXT: v_lshrrev_b32_e32 v1, 1, v1 3131; GFX6-NEXT: v_not_b32_e32 v4, v7 3132; GFX6-NEXT: v_alignbit_b32 v1, v1, v3, v4 3133; GFX6-NEXT: v_alignbit_b32 v3, v2, v5, 1 3134; GFX6-NEXT: v_lshrrev_b32_e32 v2, 1, v2 3135; GFX6-NEXT: v_not_b32_e32 v4, v8 3136; GFX6-NEXT: v_alignbit_b32 v2, v2, v3, v4 3137; GFX6-NEXT: s_setpc_b64 s[30:31] 3138; 3139; GFX8-LABEL: v_fshl_v3i32: 3140; GFX8: ; %bb.0: 3141; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3142; GFX8-NEXT: v_alignbit_b32 v3, v0, v3, 1 3143; GFX8-NEXT: v_lshrrev_b32_e32 v0, 1, v0 3144; GFX8-NEXT: v_not_b32_e32 v6, v6 3145; GFX8-NEXT: v_alignbit_b32 v0, v0, v3, v6 3146; GFX8-NEXT: v_alignbit_b32 v3, v1, v4, 1 3147; GFX8-NEXT: v_lshrrev_b32_e32 v1, 1, v1 3148; GFX8-NEXT: v_not_b32_e32 v4, v7 3149; GFX8-NEXT: v_alignbit_b32 v1, v1, v3, v4 3150; GFX8-NEXT: v_alignbit_b32 v3, v2, v5, 1 3151; GFX8-NEXT: v_lshrrev_b32_e32 v2, 1, v2 3152; GFX8-NEXT: v_not_b32_e32 v4, v8 3153; GFX8-NEXT: v_alignbit_b32 v2, v2, v3, v4 3154; GFX8-NEXT: s_setpc_b64 s[30:31] 3155; 3156; GFX9-LABEL: v_fshl_v3i32: 3157; GFX9: ; %bb.0: 3158; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3159; GFX9-NEXT: v_alignbit_b32 v3, v0, v3, 1 3160; GFX9-NEXT: v_lshrrev_b32_e32 v0, 1, v0 3161; GFX9-NEXT: v_not_b32_e32 v6, v6 3162; GFX9-NEXT: v_alignbit_b32 v0, v0, v3, v6 3163; GFX9-NEXT: v_alignbit_b32 v3, v1, v4, 1 3164; GFX9-NEXT: v_lshrrev_b32_e32 v1, 1, v1 3165; GFX9-NEXT: v_not_b32_e32 v4, v7 3166; GFX9-NEXT: v_alignbit_b32 v1, v1, v3, v4 3167; GFX9-NEXT: v_alignbit_b32 v3, v2, v5, 1 3168; GFX9-NEXT: v_lshrrev_b32_e32 v2, 1, v2 3169; GFX9-NEXT: v_not_b32_e32 v4, v8 3170; GFX9-NEXT: v_alignbit_b32 v2, v2, v3, v4 3171; GFX9-NEXT: s_setpc_b64 s[30:31] 3172; 3173; GFX10-LABEL: v_fshl_v3i32: 3174; GFX10: ; %bb.0: 3175; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3176; GFX10-NEXT: v_alignbit_b32 v3, v0, v3, 1 3177; GFX10-NEXT: v_lshrrev_b32_e32 v0, 1, v0 3178; GFX10-NEXT: v_not_b32_e32 v6, v6 3179; GFX10-NEXT: v_alignbit_b32 v4, v1, v4, 1 3180; GFX10-NEXT: v_lshrrev_b32_e32 v1, 1, v1 3181; GFX10-NEXT: v_not_b32_e32 v7, v7 3182; GFX10-NEXT: v_alignbit_b32 v5, v2, v5, 1 3183; GFX10-NEXT: v_lshrrev_b32_e32 v2, 1, v2 3184; GFX10-NEXT: v_not_b32_e32 v8, v8 3185; GFX10-NEXT: v_alignbit_b32 v0, v0, v3, v6 3186; GFX10-NEXT: v_alignbit_b32 v1, v1, v4, v7 3187; GFX10-NEXT: v_alignbit_b32 v2, v2, v5, v8 3188; GFX10-NEXT: s_setpc_b64 s[30:31] 3189; 3190; GFX11-LABEL: v_fshl_v3i32: 3191; GFX11: ; %bb.0: 3192; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3193; GFX11-NEXT: v_alignbit_b32 v3, v0, v3, 1 3194; GFX11-NEXT: v_lshrrev_b32_e32 v0, 1, v0 3195; GFX11-NEXT: v_not_b32_e32 v6, v6 3196; GFX11-NEXT: v_alignbit_b32 v4, v1, v4, 1 3197; GFX11-NEXT: v_lshrrev_b32_e32 v1, 1, v1 3198; GFX11-NEXT: v_not_b32_e32 v7, v7 3199; GFX11-NEXT: v_alignbit_b32 v5, v2, v5, 1 3200; GFX11-NEXT: v_lshrrev_b32_e32 v2, 1, v2 3201; GFX11-NEXT: v_not_b32_e32 v8, v8 3202; GFX11-NEXT: v_alignbit_b32 v0, v0, v3, v6 3203; GFX11-NEXT: v_alignbit_b32 v1, v1, v4, v7 3204; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) 3205; GFX11-NEXT: v_alignbit_b32 v2, v2, v5, v8 3206; GFX11-NEXT: s_setpc_b64 s[30:31] 3207 %result = call <3 x i32> @llvm.fshl.v3i32(<3 x i32> %lhs, <3 x i32> %rhs, <3 x i32> %amt) 3208 ret <3 x i32> %result 3209} 3210 3211define <4 x i32> @v_fshl_v4i32(<4 x i32> %lhs, <4 x i32> %rhs, <4 x i32> %amt) { 3212; GFX6-LABEL: v_fshl_v4i32: 3213; GFX6: ; %bb.0: 3214; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3215; GFX6-NEXT: v_alignbit_b32 v4, v0, v4, 1 3216; GFX6-NEXT: v_lshrrev_b32_e32 v0, 1, v0 3217; GFX6-NEXT: v_not_b32_e32 v8, v8 3218; GFX6-NEXT: v_alignbit_b32 v0, v0, v4, v8 3219; GFX6-NEXT: v_alignbit_b32 v4, v1, v5, 1 3220; GFX6-NEXT: v_lshrrev_b32_e32 v1, 1, v1 3221; GFX6-NEXT: v_not_b32_e32 v5, v9 3222; GFX6-NEXT: v_alignbit_b32 v1, v1, v4, v5 3223; GFX6-NEXT: v_alignbit_b32 v4, v2, v6, 1 3224; GFX6-NEXT: v_lshrrev_b32_e32 v2, 1, v2 3225; GFX6-NEXT: v_not_b32_e32 v5, v10 3226; GFX6-NEXT: v_alignbit_b32 v2, v2, v4, v5 3227; GFX6-NEXT: v_alignbit_b32 v4, v3, v7, 1 3228; GFX6-NEXT: v_lshrrev_b32_e32 v3, 1, v3 3229; GFX6-NEXT: v_not_b32_e32 v5, v11 3230; GFX6-NEXT: v_alignbit_b32 v3, v3, v4, v5 3231; GFX6-NEXT: s_setpc_b64 s[30:31] 3232; 3233; GFX8-LABEL: v_fshl_v4i32: 3234; GFX8: ; %bb.0: 3235; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3236; GFX8-NEXT: v_alignbit_b32 v4, v0, v4, 1 3237; GFX8-NEXT: v_lshrrev_b32_e32 v0, 1, v0 3238; GFX8-NEXT: v_not_b32_e32 v8, v8 3239; GFX8-NEXT: v_alignbit_b32 v0, v0, v4, v8 3240; GFX8-NEXT: v_alignbit_b32 v4, v1, v5, 1 3241; GFX8-NEXT: v_lshrrev_b32_e32 v1, 1, v1 3242; GFX8-NEXT: v_not_b32_e32 v5, v9 3243; GFX8-NEXT: v_alignbit_b32 v1, v1, v4, v5 3244; GFX8-NEXT: v_alignbit_b32 v4, v2, v6, 1 3245; GFX8-NEXT: v_lshrrev_b32_e32 v2, 1, v2 3246; GFX8-NEXT: v_not_b32_e32 v5, v10 3247; GFX8-NEXT: v_alignbit_b32 v2, v2, v4, v5 3248; GFX8-NEXT: v_alignbit_b32 v4, v3, v7, 1 3249; GFX8-NEXT: v_lshrrev_b32_e32 v3, 1, v3 3250; GFX8-NEXT: v_not_b32_e32 v5, v11 3251; GFX8-NEXT: v_alignbit_b32 v3, v3, v4, v5 3252; GFX8-NEXT: s_setpc_b64 s[30:31] 3253; 3254; GFX9-LABEL: v_fshl_v4i32: 3255; GFX9: ; %bb.0: 3256; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3257; GFX9-NEXT: v_alignbit_b32 v4, v0, v4, 1 3258; GFX9-NEXT: v_lshrrev_b32_e32 v0, 1, v0 3259; GFX9-NEXT: v_not_b32_e32 v8, v8 3260; GFX9-NEXT: v_alignbit_b32 v0, v0, v4, v8 3261; GFX9-NEXT: v_alignbit_b32 v4, v1, v5, 1 3262; GFX9-NEXT: v_lshrrev_b32_e32 v1, 1, v1 3263; GFX9-NEXT: v_not_b32_e32 v5, v9 3264; GFX9-NEXT: v_alignbit_b32 v1, v1, v4, v5 3265; GFX9-NEXT: v_alignbit_b32 v4, v2, v6, 1 3266; GFX9-NEXT: v_lshrrev_b32_e32 v2, 1, v2 3267; GFX9-NEXT: v_not_b32_e32 v5, v10 3268; GFX9-NEXT: v_alignbit_b32 v2, v2, v4, v5 3269; GFX9-NEXT: v_alignbit_b32 v4, v3, v7, 1 3270; GFX9-NEXT: v_lshrrev_b32_e32 v3, 1, v3 3271; GFX9-NEXT: v_not_b32_e32 v5, v11 3272; GFX9-NEXT: v_alignbit_b32 v3, v3, v4, v5 3273; GFX9-NEXT: s_setpc_b64 s[30:31] 3274; 3275; GFX10-LABEL: v_fshl_v4i32: 3276; GFX10: ; %bb.0: 3277; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3278; GFX10-NEXT: v_alignbit_b32 v4, v0, v4, 1 3279; GFX10-NEXT: v_lshrrev_b32_e32 v0, 1, v0 3280; GFX10-NEXT: v_not_b32_e32 v8, v8 3281; GFX10-NEXT: v_alignbit_b32 v5, v1, v5, 1 3282; GFX10-NEXT: v_lshrrev_b32_e32 v1, 1, v1 3283; GFX10-NEXT: v_not_b32_e32 v9, v9 3284; GFX10-NEXT: v_alignbit_b32 v6, v2, v6, 1 3285; GFX10-NEXT: v_lshrrev_b32_e32 v2, 1, v2 3286; GFX10-NEXT: v_not_b32_e32 v10, v10 3287; GFX10-NEXT: v_alignbit_b32 v7, v3, v7, 1 3288; GFX10-NEXT: v_lshrrev_b32_e32 v3, 1, v3 3289; GFX10-NEXT: v_not_b32_e32 v11, v11 3290; GFX10-NEXT: v_alignbit_b32 v0, v0, v4, v8 3291; GFX10-NEXT: v_alignbit_b32 v1, v1, v5, v9 3292; GFX10-NEXT: v_alignbit_b32 v2, v2, v6, v10 3293; GFX10-NEXT: v_alignbit_b32 v3, v3, v7, v11 3294; GFX10-NEXT: s_setpc_b64 s[30:31] 3295; 3296; GFX11-LABEL: v_fshl_v4i32: 3297; GFX11: ; %bb.0: 3298; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3299; GFX11-NEXT: v_alignbit_b32 v4, v0, v4, 1 3300; GFX11-NEXT: v_lshrrev_b32_e32 v0, 1, v0 3301; GFX11-NEXT: v_not_b32_e32 v8, v8 3302; GFX11-NEXT: v_alignbit_b32 v5, v1, v5, 1 3303; GFX11-NEXT: v_lshrrev_b32_e32 v1, 1, v1 3304; GFX11-NEXT: v_not_b32_e32 v9, v9 3305; GFX11-NEXT: v_alignbit_b32 v6, v2, v6, 1 3306; GFX11-NEXT: v_lshrrev_b32_e32 v2, 1, v2 3307; GFX11-NEXT: v_not_b32_e32 v10, v10 3308; GFX11-NEXT: v_alignbit_b32 v7, v3, v7, 1 3309; GFX11-NEXT: v_lshrrev_b32_e32 v3, 1, v3 3310; GFX11-NEXT: v_not_b32_e32 v11, v11 3311; GFX11-NEXT: v_alignbit_b32 v0, v0, v4, v8 3312; GFX11-NEXT: v_alignbit_b32 v1, v1, v5, v9 3313; GFX11-NEXT: v_alignbit_b32 v2, v2, v6, v10 3314; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) 3315; GFX11-NEXT: v_alignbit_b32 v3, v3, v7, v11 3316; GFX11-NEXT: s_setpc_b64 s[30:31] 3317 %result = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %lhs, <4 x i32> %rhs, <4 x i32> %amt) 3318 ret <4 x i32> %result 3319} 3320 3321define amdgpu_ps i16 @s_fshl_i16(i16 inreg %lhs, i16 inreg %rhs, i16 inreg %amt) { 3322; GFX6-LABEL: s_fshl_i16: 3323; GFX6: ; %bb.0: 3324; GFX6-NEXT: s_and_b32 s3, s2, 15 3325; GFX6-NEXT: s_andn2_b32 s2, 15, s2 3326; GFX6-NEXT: s_and_b32 s3, 0xffff, s3 3327; GFX6-NEXT: s_bfe_u32 s1, s1, 0xf0001 3328; GFX6-NEXT: s_and_b32 s2, 0xffff, s2 3329; GFX6-NEXT: s_lshl_b32 s0, s0, s3 3330; GFX6-NEXT: s_lshr_b32 s1, s1, s2 3331; GFX6-NEXT: s_or_b32 s0, s0, s1 3332; GFX6-NEXT: ; return to shader part epilog 3333; 3334; GFX8-LABEL: s_fshl_i16: 3335; GFX8: ; %bb.0: 3336; GFX8-NEXT: s_and_b32 s3, s2, 15 3337; GFX8-NEXT: s_andn2_b32 s2, 15, s2 3338; GFX8-NEXT: s_and_b32 s1, 0xffff, s1 3339; GFX8-NEXT: s_and_b32 s3, 0xffff, s3 3340; GFX8-NEXT: s_lshr_b32 s1, s1, 1 3341; GFX8-NEXT: s_and_b32 s2, 0xffff, s2 3342; GFX8-NEXT: s_lshl_b32 s0, s0, s3 3343; GFX8-NEXT: s_lshr_b32 s1, s1, s2 3344; GFX8-NEXT: s_or_b32 s0, s0, s1 3345; GFX8-NEXT: ; return to shader part epilog 3346; 3347; GFX9-LABEL: s_fshl_i16: 3348; GFX9: ; %bb.0: 3349; GFX9-NEXT: s_and_b32 s3, s2, 15 3350; GFX9-NEXT: s_andn2_b32 s2, 15, s2 3351; GFX9-NEXT: s_and_b32 s1, 0xffff, s1 3352; GFX9-NEXT: s_and_b32 s3, 0xffff, s3 3353; GFX9-NEXT: s_lshr_b32 s1, s1, 1 3354; GFX9-NEXT: s_and_b32 s2, 0xffff, s2 3355; GFX9-NEXT: s_lshl_b32 s0, s0, s3 3356; GFX9-NEXT: s_lshr_b32 s1, s1, s2 3357; GFX9-NEXT: s_or_b32 s0, s0, s1 3358; GFX9-NEXT: ; return to shader part epilog 3359; 3360; GFX10-LABEL: s_fshl_i16: 3361; GFX10: ; %bb.0: 3362; GFX10-NEXT: s_and_b32 s3, s2, 15 3363; GFX10-NEXT: s_andn2_b32 s2, 15, s2 3364; GFX10-NEXT: s_and_b32 s1, 0xffff, s1 3365; GFX10-NEXT: s_and_b32 s3, 0xffff, s3 3366; GFX10-NEXT: s_lshr_b32 s1, s1, 1 3367; GFX10-NEXT: s_and_b32 s2, 0xffff, s2 3368; GFX10-NEXT: s_lshl_b32 s0, s0, s3 3369; GFX10-NEXT: s_lshr_b32 s1, s1, s2 3370; GFX10-NEXT: s_or_b32 s0, s0, s1 3371; GFX10-NEXT: ; return to shader part epilog 3372; 3373; GFX11-LABEL: s_fshl_i16: 3374; GFX11: ; %bb.0: 3375; GFX11-NEXT: s_and_b32 s3, s2, 15 3376; GFX11-NEXT: s_and_not1_b32 s2, 15, s2 3377; GFX11-NEXT: s_and_b32 s1, 0xffff, s1 3378; GFX11-NEXT: s_and_b32 s3, 0xffff, s3 3379; GFX11-NEXT: s_lshr_b32 s1, s1, 1 3380; GFX11-NEXT: s_and_b32 s2, 0xffff, s2 3381; GFX11-NEXT: s_lshl_b32 s0, s0, s3 3382; GFX11-NEXT: s_lshr_b32 s1, s1, s2 3383; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 3384; GFX11-NEXT: s_or_b32 s0, s0, s1 3385; GFX11-NEXT: ; return to shader part epilog 3386 %result = call i16 @llvm.fshl.i16(i16 %lhs, i16 %rhs, i16 %amt) 3387 ret i16 %result 3388} 3389 3390define amdgpu_ps i16 @s_fshl_i16_4(i16 inreg %lhs, i16 inreg %rhs) { 3391; GFX6-LABEL: s_fshl_i16_4: 3392; GFX6: ; %bb.0: 3393; GFX6-NEXT: s_lshl_b32 s0, s0, 4 3394; GFX6-NEXT: s_bfe_u32 s1, s1, 0x4000c 3395; GFX6-NEXT: s_or_b32 s0, s0, s1 3396; GFX6-NEXT: ; return to shader part epilog 3397; 3398; GFX8-LABEL: s_fshl_i16_4: 3399; GFX8: ; %bb.0: 3400; GFX8-NEXT: s_and_b32 s1, 0xffff, s1 3401; GFX8-NEXT: s_lshl_b32 s0, s0, 4 3402; GFX8-NEXT: s_lshr_b32 s1, s1, 12 3403; GFX8-NEXT: s_or_b32 s0, s0, s1 3404; GFX8-NEXT: ; return to shader part epilog 3405; 3406; GFX9-LABEL: s_fshl_i16_4: 3407; GFX9: ; %bb.0: 3408; GFX9-NEXT: s_and_b32 s1, 0xffff, s1 3409; GFX9-NEXT: s_lshl_b32 s0, s0, 4 3410; GFX9-NEXT: s_lshr_b32 s1, s1, 12 3411; GFX9-NEXT: s_or_b32 s0, s0, s1 3412; GFX9-NEXT: ; return to shader part epilog 3413; 3414; GFX10-LABEL: s_fshl_i16_4: 3415; GFX10: ; %bb.0: 3416; GFX10-NEXT: s_and_b32 s1, 0xffff, s1 3417; GFX10-NEXT: s_lshl_b32 s0, s0, 4 3418; GFX10-NEXT: s_lshr_b32 s1, s1, 12 3419; GFX10-NEXT: s_or_b32 s0, s0, s1 3420; GFX10-NEXT: ; return to shader part epilog 3421; 3422; GFX11-LABEL: s_fshl_i16_4: 3423; GFX11: ; %bb.0: 3424; GFX11-NEXT: s_and_b32 s1, 0xffff, s1 3425; GFX11-NEXT: s_lshl_b32 s0, s0, 4 3426; GFX11-NEXT: s_lshr_b32 s1, s1, 12 3427; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 3428; GFX11-NEXT: s_or_b32 s0, s0, s1 3429; GFX11-NEXT: ; return to shader part epilog 3430 %result = call i16 @llvm.fshl.i16(i16 %lhs, i16 %rhs, i16 4) 3431 ret i16 %result 3432} 3433 3434define amdgpu_ps i16 @s_fshl_i16_5(i16 inreg %lhs, i16 inreg %rhs) { 3435; GFX6-LABEL: s_fshl_i16_5: 3436; GFX6: ; %bb.0: 3437; GFX6-NEXT: s_lshl_b32 s0, s0, 5 3438; GFX6-NEXT: s_bfe_u32 s1, s1, 0x5000b 3439; GFX6-NEXT: s_or_b32 s0, s0, s1 3440; GFX6-NEXT: ; return to shader part epilog 3441; 3442; GFX8-LABEL: s_fshl_i16_5: 3443; GFX8: ; %bb.0: 3444; GFX8-NEXT: s_and_b32 s1, 0xffff, s1 3445; GFX8-NEXT: s_lshl_b32 s0, s0, 5 3446; GFX8-NEXT: s_lshr_b32 s1, s1, 11 3447; GFX8-NEXT: s_or_b32 s0, s0, s1 3448; GFX8-NEXT: ; return to shader part epilog 3449; 3450; GFX9-LABEL: s_fshl_i16_5: 3451; GFX9: ; %bb.0: 3452; GFX9-NEXT: s_and_b32 s1, 0xffff, s1 3453; GFX9-NEXT: s_lshl_b32 s0, s0, 5 3454; GFX9-NEXT: s_lshr_b32 s1, s1, 11 3455; GFX9-NEXT: s_or_b32 s0, s0, s1 3456; GFX9-NEXT: ; return to shader part epilog 3457; 3458; GFX10-LABEL: s_fshl_i16_5: 3459; GFX10: ; %bb.0: 3460; GFX10-NEXT: s_and_b32 s1, 0xffff, s1 3461; GFX10-NEXT: s_lshl_b32 s0, s0, 5 3462; GFX10-NEXT: s_lshr_b32 s1, s1, 11 3463; GFX10-NEXT: s_or_b32 s0, s0, s1 3464; GFX10-NEXT: ; return to shader part epilog 3465; 3466; GFX11-LABEL: s_fshl_i16_5: 3467; GFX11: ; %bb.0: 3468; GFX11-NEXT: s_and_b32 s1, 0xffff, s1 3469; GFX11-NEXT: s_lshl_b32 s0, s0, 5 3470; GFX11-NEXT: s_lshr_b32 s1, s1, 11 3471; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 3472; GFX11-NEXT: s_or_b32 s0, s0, s1 3473; GFX11-NEXT: ; return to shader part epilog 3474 %result = call i16 @llvm.fshl.i16(i16 %lhs, i16 %rhs, i16 5) 3475 ret i16 %result 3476} 3477 3478define i16 @v_fshl_i16(i16 %lhs, i16 %rhs, i16 %amt) { 3479; GFX6-LABEL: v_fshl_i16: 3480; GFX6: ; %bb.0: 3481; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3482; GFX6-NEXT: v_and_b32_e32 v3, 15, v2 3483; GFX6-NEXT: v_xor_b32_e32 v2, -1, v2 3484; GFX6-NEXT: v_and_b32_e32 v2, 15, v2 3485; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v3 3486; GFX6-NEXT: v_bfe_u32 v1, v1, 1, 15 3487; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2 3488; GFX6-NEXT: v_lshlrev_b32_e32 v0, v3, v0 3489; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1 3490; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 3491; GFX6-NEXT: s_setpc_b64 s[30:31] 3492; 3493; GFX8-LABEL: v_fshl_i16: 3494; GFX8: ; %bb.0: 3495; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3496; GFX8-NEXT: v_and_b32_e32 v3, 15, v2 3497; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2 3498; GFX8-NEXT: v_and_b32_e32 v2, 15, v2 3499; GFX8-NEXT: v_lshrrev_b16_e32 v1, 1, v1 3500; GFX8-NEXT: v_lshlrev_b16_e32 v0, v3, v0 3501; GFX8-NEXT: v_lshrrev_b16_e32 v1, v2, v1 3502; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 3503; GFX8-NEXT: s_setpc_b64 s[30:31] 3504; 3505; GFX9-LABEL: v_fshl_i16: 3506; GFX9: ; %bb.0: 3507; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3508; GFX9-NEXT: v_and_b32_e32 v3, 15, v2 3509; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2 3510; GFX9-NEXT: v_and_b32_e32 v2, 15, v2 3511; GFX9-NEXT: v_lshrrev_b16_e32 v1, 1, v1 3512; GFX9-NEXT: v_lshlrev_b16_e32 v0, v3, v0 3513; GFX9-NEXT: v_lshrrev_b16_e32 v1, v2, v1 3514; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 3515; GFX9-NEXT: s_setpc_b64 s[30:31] 3516; 3517; GFX10-LABEL: v_fshl_i16: 3518; GFX10: ; %bb.0: 3519; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3520; GFX10-NEXT: v_xor_b32_e32 v3, -1, v2 3521; GFX10-NEXT: v_and_b32_e32 v2, 15, v2 3522; GFX10-NEXT: v_lshrrev_b16 v1, 1, v1 3523; GFX10-NEXT: v_and_b32_e32 v3, 15, v3 3524; GFX10-NEXT: v_lshlrev_b16 v0, v2, v0 3525; GFX10-NEXT: v_lshrrev_b16 v1, v3, v1 3526; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 3527; GFX10-NEXT: s_setpc_b64 s[30:31] 3528; 3529; GFX11-LABEL: v_fshl_i16: 3530; GFX11: ; %bb.0: 3531; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3532; GFX11-NEXT: v_xor_b32_e32 v3, -1, v2 3533; GFX11-NEXT: v_and_b32_e32 v2, 15, v2 3534; GFX11-NEXT: v_lshrrev_b16 v1, 1, v1 3535; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 3536; GFX11-NEXT: v_and_b32_e32 v3, 15, v3 3537; GFX11-NEXT: v_lshlrev_b16 v0, v2, v0 3538; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 3539; GFX11-NEXT: v_lshrrev_b16 v1, v3, v1 3540; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 3541; GFX11-NEXT: s_setpc_b64 s[30:31] 3542 %result = call i16 @llvm.fshl.i16(i16 %lhs, i16 %rhs, i16 %amt) 3543 ret i16 %result 3544} 3545 3546define i16 @v_fshl_i16_4(i16 %lhs, i16 %rhs) { 3547; GFX6-LABEL: v_fshl_i16_4: 3548; GFX6: ; %bb.0: 3549; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3550; GFX6-NEXT: v_lshlrev_b32_e32 v0, 4, v0 3551; GFX6-NEXT: v_bfe_u32 v1, v1, 12, 4 3552; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 3553; GFX6-NEXT: s_setpc_b64 s[30:31] 3554; 3555; GFX8-LABEL: v_fshl_i16_4: 3556; GFX8: ; %bb.0: 3557; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3558; GFX8-NEXT: v_lshlrev_b16_e32 v0, 4, v0 3559; GFX8-NEXT: v_lshrrev_b16_e32 v1, 12, v1 3560; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 3561; GFX8-NEXT: s_setpc_b64 s[30:31] 3562; 3563; GFX9-LABEL: v_fshl_i16_4: 3564; GFX9: ; %bb.0: 3565; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3566; GFX9-NEXT: v_lshlrev_b16_e32 v0, 4, v0 3567; GFX9-NEXT: v_lshrrev_b16_e32 v1, 12, v1 3568; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 3569; GFX9-NEXT: s_setpc_b64 s[30:31] 3570; 3571; GFX10-LABEL: v_fshl_i16_4: 3572; GFX10: ; %bb.0: 3573; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3574; GFX10-NEXT: v_lshlrev_b16 v0, 4, v0 3575; GFX10-NEXT: v_lshrrev_b16 v1, 12, v1 3576; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 3577; GFX10-NEXT: s_setpc_b64 s[30:31] 3578; 3579; GFX11-LABEL: v_fshl_i16_4: 3580; GFX11: ; %bb.0: 3581; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3582; GFX11-NEXT: v_lshlrev_b16 v0, 4, v0 3583; GFX11-NEXT: v_lshrrev_b16 v1, 12, v1 3584; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 3585; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 3586; GFX11-NEXT: s_setpc_b64 s[30:31] 3587 %result = call i16 @llvm.fshl.i16(i16 %lhs, i16 %rhs, i16 4) 3588 ret i16 %result 3589} 3590 3591define i16 @v_fshl_i16_5(i16 %lhs, i16 %rhs) { 3592; GFX6-LABEL: v_fshl_i16_5: 3593; GFX6: ; %bb.0: 3594; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3595; GFX6-NEXT: v_lshlrev_b32_e32 v0, 5, v0 3596; GFX6-NEXT: v_bfe_u32 v1, v1, 11, 5 3597; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 3598; GFX6-NEXT: s_setpc_b64 s[30:31] 3599; 3600; GFX8-LABEL: v_fshl_i16_5: 3601; GFX8: ; %bb.0: 3602; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3603; GFX8-NEXT: v_lshlrev_b16_e32 v0, 5, v0 3604; GFX8-NEXT: v_lshrrev_b16_e32 v1, 11, v1 3605; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 3606; GFX8-NEXT: s_setpc_b64 s[30:31] 3607; 3608; GFX9-LABEL: v_fshl_i16_5: 3609; GFX9: ; %bb.0: 3610; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3611; GFX9-NEXT: v_lshlrev_b16_e32 v0, 5, v0 3612; GFX9-NEXT: v_lshrrev_b16_e32 v1, 11, v1 3613; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 3614; GFX9-NEXT: s_setpc_b64 s[30:31] 3615; 3616; GFX10-LABEL: v_fshl_i16_5: 3617; GFX10: ; %bb.0: 3618; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3619; GFX10-NEXT: v_lshlrev_b16 v0, 5, v0 3620; GFX10-NEXT: v_lshrrev_b16 v1, 11, v1 3621; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 3622; GFX10-NEXT: s_setpc_b64 s[30:31] 3623; 3624; GFX11-LABEL: v_fshl_i16_5: 3625; GFX11: ; %bb.0: 3626; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3627; GFX11-NEXT: v_lshlrev_b16 v0, 5, v0 3628; GFX11-NEXT: v_lshrrev_b16 v1, 11, v1 3629; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 3630; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 3631; GFX11-NEXT: s_setpc_b64 s[30:31] 3632 %result = call i16 @llvm.fshl.i16(i16 %lhs, i16 %rhs, i16 5) 3633 ret i16 %result 3634} 3635 3636define amdgpu_ps half @v_fshl_i16_ssv(i16 inreg %lhs, i16 inreg %rhs, i16 %amt) { 3637; GFX6-LABEL: v_fshl_i16_ssv: 3638; GFX6: ; %bb.0: 3639; GFX6-NEXT: v_and_b32_e32 v1, 15, v0 3640; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0 3641; GFX6-NEXT: v_and_b32_e32 v0, 15, v0 3642; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 3643; GFX6-NEXT: v_lshl_b32_e32 v1, s0, v1 3644; GFX6-NEXT: s_bfe_u32 s0, s1, 0xf0001 3645; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 3646; GFX6-NEXT: v_lshr_b32_e32 v0, s0, v0 3647; GFX6-NEXT: v_or_b32_e32 v0, v1, v0 3648; GFX6-NEXT: ; return to shader part epilog 3649; 3650; GFX8-LABEL: v_fshl_i16_ssv: 3651; GFX8: ; %bb.0: 3652; GFX8-NEXT: v_and_b32_e32 v1, 15, v0 3653; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0 3654; GFX8-NEXT: v_lshlrev_b16_e64 v1, v1, s0 3655; GFX8-NEXT: s_and_b32 s0, 0xffff, s1 3656; GFX8-NEXT: v_and_b32_e32 v0, 15, v0 3657; GFX8-NEXT: s_lshr_b32 s0, s0, 1 3658; GFX8-NEXT: v_lshrrev_b16_e64 v0, v0, s0 3659; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 3660; GFX8-NEXT: ; return to shader part epilog 3661; 3662; GFX9-LABEL: v_fshl_i16_ssv: 3663; GFX9: ; %bb.0: 3664; GFX9-NEXT: v_and_b32_e32 v1, 15, v0 3665; GFX9-NEXT: v_xor_b32_e32 v0, -1, v0 3666; GFX9-NEXT: v_lshlrev_b16_e64 v1, v1, s0 3667; GFX9-NEXT: s_and_b32 s0, 0xffff, s1 3668; GFX9-NEXT: v_and_b32_e32 v0, 15, v0 3669; GFX9-NEXT: s_lshr_b32 s0, s0, 1 3670; GFX9-NEXT: v_lshrrev_b16_e64 v0, v0, s0 3671; GFX9-NEXT: v_or_b32_e32 v0, v1, v0 3672; GFX9-NEXT: ; return to shader part epilog 3673; 3674; GFX10-LABEL: v_fshl_i16_ssv: 3675; GFX10: ; %bb.0: 3676; GFX10-NEXT: v_xor_b32_e32 v1, -1, v0 3677; GFX10-NEXT: v_and_b32_e32 v0, 15, v0 3678; GFX10-NEXT: s_and_b32 s1, 0xffff, s1 3679; GFX10-NEXT: s_lshr_b32 s1, s1, 1 3680; GFX10-NEXT: v_and_b32_e32 v1, 15, v1 3681; GFX10-NEXT: v_lshlrev_b16 v0, v0, s0 3682; GFX10-NEXT: v_lshrrev_b16 v1, v1, s1 3683; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 3684; GFX10-NEXT: ; return to shader part epilog 3685; 3686; GFX11-LABEL: v_fshl_i16_ssv: 3687; GFX11: ; %bb.0: 3688; GFX11-NEXT: v_xor_b32_e32 v1, -1, v0 3689; GFX11-NEXT: v_and_b32_e32 v0, 15, v0 3690; GFX11-NEXT: s_and_b32 s1, 0xffff, s1 3691; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2) 3692; GFX11-NEXT: s_lshr_b32 s1, s1, 1 3693; GFX11-NEXT: v_and_b32_e32 v1, 15, v1 3694; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 3695; GFX11-NEXT: v_lshlrev_b16 v0, v0, s0 3696; GFX11-NEXT: v_lshrrev_b16 v1, v1, s1 3697; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 3698; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 3699; GFX11-NEXT: ; return to shader part epilog 3700 %result = call i16 @llvm.fshl.i16(i16 %lhs, i16 %rhs, i16 %amt) 3701 %cast.result = bitcast i16 %result to half 3702 ret half %cast.result 3703} 3704 3705define amdgpu_ps half @v_fshl_i16_svs(i16 inreg %lhs, i16 %rhs, i16 inreg %amt) { 3706; GFX6-LABEL: v_fshl_i16_svs: 3707; GFX6: ; %bb.0: 3708; GFX6-NEXT: s_and_b32 s2, s1, 15 3709; GFX6-NEXT: s_andn2_b32 s1, 15, s1 3710; GFX6-NEXT: s_and_b32 s2, 0xffff, s2 3711; GFX6-NEXT: v_bfe_u32 v0, v0, 1, 15 3712; GFX6-NEXT: s_and_b32 s1, 0xffff, s1 3713; GFX6-NEXT: s_lshl_b32 s0, s0, s2 3714; GFX6-NEXT: v_lshrrev_b32_e32 v0, s1, v0 3715; GFX6-NEXT: v_or_b32_e32 v0, s0, v0 3716; GFX6-NEXT: ; return to shader part epilog 3717; 3718; GFX8-LABEL: v_fshl_i16_svs: 3719; GFX8: ; %bb.0: 3720; GFX8-NEXT: s_and_b32 s2, s1, 15 3721; GFX8-NEXT: s_andn2_b32 s1, 15, s1 3722; GFX8-NEXT: s_and_b32 s2, 0xffff, s2 3723; GFX8-NEXT: v_lshrrev_b16_e32 v0, 1, v0 3724; GFX8-NEXT: s_lshl_b32 s0, s0, s2 3725; GFX8-NEXT: v_lshrrev_b16_e32 v0, s1, v0 3726; GFX8-NEXT: v_or_b32_e32 v0, s0, v0 3727; GFX8-NEXT: ; return to shader part epilog 3728; 3729; GFX9-LABEL: v_fshl_i16_svs: 3730; GFX9: ; %bb.0: 3731; GFX9-NEXT: s_and_b32 s2, s1, 15 3732; GFX9-NEXT: s_andn2_b32 s1, 15, s1 3733; GFX9-NEXT: s_and_b32 s2, 0xffff, s2 3734; GFX9-NEXT: v_lshrrev_b16_e32 v0, 1, v0 3735; GFX9-NEXT: s_lshl_b32 s0, s0, s2 3736; GFX9-NEXT: v_lshrrev_b16_e32 v0, s1, v0 3737; GFX9-NEXT: v_or_b32_e32 v0, s0, v0 3738; GFX9-NEXT: ; return to shader part epilog 3739; 3740; GFX10-LABEL: v_fshl_i16_svs: 3741; GFX10: ; %bb.0: 3742; GFX10-NEXT: v_lshrrev_b16 v0, 1, v0 3743; GFX10-NEXT: s_andn2_b32 s2, 15, s1 3744; GFX10-NEXT: s_and_b32 s1, s1, 15 3745; GFX10-NEXT: s_and_b32 s1, 0xffff, s1 3746; GFX10-NEXT: v_lshrrev_b16 v0, s2, v0 3747; GFX10-NEXT: s_lshl_b32 s0, s0, s1 3748; GFX10-NEXT: v_or_b32_e32 v0, s0, v0 3749; GFX10-NEXT: ; return to shader part epilog 3750; 3751; GFX11-LABEL: v_fshl_i16_svs: 3752; GFX11: ; %bb.0: 3753; GFX11-NEXT: v_lshrrev_b16 v0, 1, v0 3754; GFX11-NEXT: s_and_not1_b32 s2, 15, s1 3755; GFX11-NEXT: s_and_b32 s1, s1, 15 3756; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1) 3757; GFX11-NEXT: s_and_b32 s1, 0xffff, s1 3758; GFX11-NEXT: v_lshrrev_b16 v0, s2, v0 3759; GFX11-NEXT: s_lshl_b32 s0, s0, s1 3760; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 3761; GFX11-NEXT: v_or_b32_e32 v0, s0, v0 3762; GFX11-NEXT: ; return to shader part epilog 3763 %result = call i16 @llvm.fshl.i16(i16 %lhs, i16 %rhs, i16 %amt) 3764 %cast.result = bitcast i16 %result to half 3765 ret half %cast.result 3766} 3767 3768define amdgpu_ps half @v_fshl_i16_vss(i16 %lhs, i16 inreg %rhs, i16 inreg %amt) { 3769; GFX6-LABEL: v_fshl_i16_vss: 3770; GFX6: ; %bb.0: 3771; GFX6-NEXT: s_and_b32 s2, s1, 15 3772; GFX6-NEXT: s_andn2_b32 s1, 15, s1 3773; GFX6-NEXT: s_and_b32 s2, 0xffff, s2 3774; GFX6-NEXT: s_bfe_u32 s0, s0, 0xf0001 3775; GFX6-NEXT: s_and_b32 s1, 0xffff, s1 3776; GFX6-NEXT: v_lshlrev_b32_e32 v0, s2, v0 3777; GFX6-NEXT: s_lshr_b32 s0, s0, s1 3778; GFX6-NEXT: v_or_b32_e32 v0, s0, v0 3779; GFX6-NEXT: ; return to shader part epilog 3780; 3781; GFX8-LABEL: v_fshl_i16_vss: 3782; GFX8: ; %bb.0: 3783; GFX8-NEXT: s_and_b32 s2, s1, 15 3784; GFX8-NEXT: s_andn2_b32 s1, 15, s1 3785; GFX8-NEXT: s_and_b32 s0, 0xffff, s0 3786; GFX8-NEXT: s_lshr_b32 s0, s0, 1 3787; GFX8-NEXT: s_and_b32 s1, 0xffff, s1 3788; GFX8-NEXT: v_lshlrev_b16_e32 v0, s2, v0 3789; GFX8-NEXT: s_lshr_b32 s0, s0, s1 3790; GFX8-NEXT: v_or_b32_e32 v0, s0, v0 3791; GFX8-NEXT: ; return to shader part epilog 3792; 3793; GFX9-LABEL: v_fshl_i16_vss: 3794; GFX9: ; %bb.0: 3795; GFX9-NEXT: s_and_b32 s2, s1, 15 3796; GFX9-NEXT: s_andn2_b32 s1, 15, s1 3797; GFX9-NEXT: s_and_b32 s0, 0xffff, s0 3798; GFX9-NEXT: s_lshr_b32 s0, s0, 1 3799; GFX9-NEXT: s_and_b32 s1, 0xffff, s1 3800; GFX9-NEXT: v_lshlrev_b16_e32 v0, s2, v0 3801; GFX9-NEXT: s_lshr_b32 s0, s0, s1 3802; GFX9-NEXT: v_or_b32_e32 v0, s0, v0 3803; GFX9-NEXT: ; return to shader part epilog 3804; 3805; GFX10-LABEL: v_fshl_i16_vss: 3806; GFX10: ; %bb.0: 3807; GFX10-NEXT: s_and_b32 s2, s1, 15 3808; GFX10-NEXT: s_andn2_b32 s1, 15, s1 3809; GFX10-NEXT: s_and_b32 s0, 0xffff, s0 3810; GFX10-NEXT: v_lshlrev_b16 v0, s2, v0 3811; GFX10-NEXT: s_lshr_b32 s0, s0, 1 3812; GFX10-NEXT: s_and_b32 s1, 0xffff, s1 3813; GFX10-NEXT: s_lshr_b32 s0, s0, s1 3814; GFX10-NEXT: v_or_b32_e32 v0, s0, v0 3815; GFX10-NEXT: ; return to shader part epilog 3816; 3817; GFX11-LABEL: v_fshl_i16_vss: 3818; GFX11: ; %bb.0: 3819; GFX11-NEXT: s_and_b32 s2, s1, 15 3820; GFX11-NEXT: s_and_not1_b32 s1, 15, s1 3821; GFX11-NEXT: s_and_b32 s0, 0xffff, s0 3822; GFX11-NEXT: v_lshlrev_b16 v0, s2, v0 3823; GFX11-NEXT: s_lshr_b32 s0, s0, 1 3824; GFX11-NEXT: s_and_b32 s1, 0xffff, s1 3825; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 3826; GFX11-NEXT: s_lshr_b32 s0, s0, s1 3827; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 3828; GFX11-NEXT: v_or_b32_e32 v0, s0, v0 3829; GFX11-NEXT: ; return to shader part epilog 3830 %result = call i16 @llvm.fshl.i16(i16 %lhs, i16 %rhs, i16 %amt) 3831 %cast.result = bitcast i16 %result to half 3832 ret half %cast.result 3833} 3834 3835define amdgpu_ps i32 @s_fshl_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs, <2 x i16> inreg %amt) { 3836; GFX6-LABEL: s_fshl_v2i16: 3837; GFX6: ; %bb.0: 3838; GFX6-NEXT: s_and_b32 s6, s4, 15 3839; GFX6-NEXT: s_andn2_b32 s4, 15, s4 3840; GFX6-NEXT: s_and_b32 s6, 0xffff, s6 3841; GFX6-NEXT: s_bfe_u32 s2, s2, 0xf0001 3842; GFX6-NEXT: s_and_b32 s4, 0xffff, s4 3843; GFX6-NEXT: s_lshl_b32 s0, s0, s6 3844; GFX6-NEXT: s_lshr_b32 s2, s2, s4 3845; GFX6-NEXT: s_or_b32 s0, s0, s2 3846; GFX6-NEXT: s_and_b32 s2, s5, 15 3847; GFX6-NEXT: s_andn2_b32 s4, 15, s5 3848; GFX6-NEXT: s_and_b32 s2, 0xffff, s2 3849; GFX6-NEXT: s_lshl_b32 s1, s1, s2 3850; GFX6-NEXT: s_bfe_u32 s2, s3, 0xf0001 3851; GFX6-NEXT: s_and_b32 s3, 0xffff, s4 3852; GFX6-NEXT: s_lshr_b32 s2, s2, s3 3853; GFX6-NEXT: s_or_b32 s1, s1, s2 3854; GFX6-NEXT: s_and_b32 s1, 0xffff, s1 3855; GFX6-NEXT: s_and_b32 s0, 0xffff, s0 3856; GFX6-NEXT: s_lshl_b32 s1, s1, 16 3857; GFX6-NEXT: s_or_b32 s0, s0, s1 3858; GFX6-NEXT: ; return to shader part epilog 3859; 3860; GFX8-LABEL: s_fshl_v2i16: 3861; GFX8: ; %bb.0: 3862; GFX8-NEXT: s_lshr_b32 s4, s1, 16 3863; GFX8-NEXT: s_lshr_b32 s5, s2, 16 3864; GFX8-NEXT: s_and_b32 s6, s2, 15 3865; GFX8-NEXT: s_andn2_b32 s2, 15, s2 3866; GFX8-NEXT: s_and_b32 s1, 0xffff, s1 3867; GFX8-NEXT: s_and_b32 s6, 0xffff, s6 3868; GFX8-NEXT: s_lshr_b32 s1, s1, 1 3869; GFX8-NEXT: s_and_b32 s2, 0xffff, s2 3870; GFX8-NEXT: s_lshr_b32 s3, s0, 16 3871; GFX8-NEXT: s_lshl_b32 s0, s0, s6 3872; GFX8-NEXT: s_lshr_b32 s1, s1, s2 3873; GFX8-NEXT: s_or_b32 s0, s0, s1 3874; GFX8-NEXT: s_and_b32 s1, s5, 15 3875; GFX8-NEXT: s_andn2_b32 s2, 15, s5 3876; GFX8-NEXT: s_and_b32 s1, 0xffff, s1 3877; GFX8-NEXT: s_lshl_b32 s1, s3, s1 3878; GFX8-NEXT: s_lshr_b32 s3, s4, 1 3879; GFX8-NEXT: s_and_b32 s2, 0xffff, s2 3880; GFX8-NEXT: s_lshr_b32 s2, s3, s2 3881; GFX8-NEXT: s_or_b32 s1, s1, s2 3882; GFX8-NEXT: s_and_b32 s1, 0xffff, s1 3883; GFX8-NEXT: s_and_b32 s0, 0xffff, s0 3884; GFX8-NEXT: s_lshl_b32 s1, s1, 16 3885; GFX8-NEXT: s_or_b32 s0, s0, s1 3886; GFX8-NEXT: ; return to shader part epilog 3887; 3888; GFX9-LABEL: s_fshl_v2i16: 3889; GFX9: ; %bb.0: 3890; GFX9-NEXT: s_and_b32 s3, s2, 0xf000f 3891; GFX9-NEXT: s_lshr_b32 s4, s0, 16 3892; GFX9-NEXT: s_lshr_b32 s5, s3, 16 3893; GFX9-NEXT: s_lshl_b32 s0, s0, s3 3894; GFX9-NEXT: s_lshl_b32 s3, s4, s5 3895; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s3 3896; GFX9-NEXT: s_lshr_b32 s3, s1, 16 3897; GFX9-NEXT: s_and_b32 s1, s1, 0xffff 3898; GFX9-NEXT: s_lshr_b32 s1, s1, 0x10001 3899; GFX9-NEXT: s_lshr_b32 s3, s3, 1 3900; GFX9-NEXT: s_andn2_b32 s2, 0xf000f, s2 3901; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s3 3902; GFX9-NEXT: s_lshr_b32 s3, s1, 16 3903; GFX9-NEXT: s_and_b32 s1, s1, 0xffff 3904; GFX9-NEXT: s_lshr_b32 s4, s2, 16 3905; GFX9-NEXT: s_lshr_b32 s1, s1, s2 3906; GFX9-NEXT: s_lshr_b32 s2, s3, s4 3907; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s2 3908; GFX9-NEXT: s_or_b32 s0, s0, s1 3909; GFX9-NEXT: ; return to shader part epilog 3910; 3911; GFX10-LABEL: s_fshl_v2i16: 3912; GFX10: ; %bb.0: 3913; GFX10-NEXT: s_and_b32 s6, s1, 0xffff 3914; GFX10-NEXT: s_lshr_b32 s1, s1, 16 3915; GFX10-NEXT: s_and_b32 s3, s2, 0xf000f 3916; GFX10-NEXT: s_lshr_b32 s6, s6, 0x10001 3917; GFX10-NEXT: s_lshr_b32 s1, s1, 1 3918; GFX10-NEXT: s_andn2_b32 s2, 0xf000f, s2 3919; GFX10-NEXT: s_lshr_b32 s4, s0, 16 3920; GFX10-NEXT: s_lshr_b32 s5, s3, 16 3921; GFX10-NEXT: s_pack_ll_b32_b16 s1, s6, s1 3922; GFX10-NEXT: s_lshl_b32 s0, s0, s3 3923; GFX10-NEXT: s_lshl_b32 s3, s4, s5 3924; GFX10-NEXT: s_lshr_b32 s4, s1, 16 3925; GFX10-NEXT: s_and_b32 s1, s1, 0xffff 3926; GFX10-NEXT: s_lshr_b32 s5, s2, 16 3927; GFX10-NEXT: s_lshr_b32 s1, s1, s2 3928; GFX10-NEXT: s_lshr_b32 s2, s4, s5 3929; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s3 3930; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s2 3931; GFX10-NEXT: s_or_b32 s0, s0, s1 3932; GFX10-NEXT: ; return to shader part epilog 3933; 3934; GFX11-LABEL: s_fshl_v2i16: 3935; GFX11: ; %bb.0: 3936; GFX11-NEXT: s_and_b32 s6, s1, 0xffff 3937; GFX11-NEXT: s_lshr_b32 s1, s1, 16 3938; GFX11-NEXT: s_and_b32 s3, s2, 0xf000f 3939; GFX11-NEXT: s_lshr_b32 s6, s6, 0x10001 3940; GFX11-NEXT: s_lshr_b32 s1, s1, 1 3941; GFX11-NEXT: s_and_not1_b32 s2, 0xf000f, s2 3942; GFX11-NEXT: s_lshr_b32 s4, s0, 16 3943; GFX11-NEXT: s_lshr_b32 s5, s3, 16 3944; GFX11-NEXT: s_pack_ll_b32_b16 s1, s6, s1 3945; GFX11-NEXT: s_lshl_b32 s0, s0, s3 3946; GFX11-NEXT: s_lshl_b32 s3, s4, s5 3947; GFX11-NEXT: s_lshr_b32 s4, s1, 16 3948; GFX11-NEXT: s_and_b32 s1, s1, 0xffff 3949; GFX11-NEXT: s_lshr_b32 s5, s2, 16 3950; GFX11-NEXT: s_lshr_b32 s1, s1, s2 3951; GFX11-NEXT: s_lshr_b32 s2, s4, s5 3952; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s3 3953; GFX11-NEXT: s_pack_ll_b32_b16 s1, s1, s2 3954; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 3955; GFX11-NEXT: s_or_b32 s0, s0, s1 3956; GFX11-NEXT: ; return to shader part epilog 3957 %result = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) 3958 %cast = bitcast <2 x i16> %result to i32 3959 ret i32 %cast 3960} 3961 3962define <2 x i16> @v_fshl_v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) { 3963; GFX6-LABEL: v_fshl_v2i16: 3964; GFX6: ; %bb.0: 3965; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3966; GFX6-NEXT: v_and_b32_e32 v6, 15, v4 3967; GFX6-NEXT: v_xor_b32_e32 v4, -1, v4 3968; GFX6-NEXT: v_and_b32_e32 v4, 15, v4 3969; GFX6-NEXT: v_and_b32_e32 v6, 0xffff, v6 3970; GFX6-NEXT: v_bfe_u32 v2, v2, 1, 15 3971; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4 3972; GFX6-NEXT: v_lshlrev_b32_e32 v0, v6, v0 3973; GFX6-NEXT: v_lshrrev_b32_e32 v2, v4, v2 3974; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 3975; GFX6-NEXT: v_and_b32_e32 v2, 15, v5 3976; GFX6-NEXT: v_xor_b32_e32 v4, -1, v5 3977; GFX6-NEXT: v_and_b32_e32 v4, 15, v4 3978; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2 3979; GFX6-NEXT: v_lshlrev_b32_e32 v1, v2, v1 3980; GFX6-NEXT: v_bfe_u32 v2, v3, 1, 15 3981; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v4 3982; GFX6-NEXT: v_lshrrev_b32_e32 v2, v3, v2 3983; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 3984; GFX6-NEXT: s_setpc_b64 s[30:31] 3985; 3986; GFX8-LABEL: v_fshl_v2i16: 3987; GFX8: ; %bb.0: 3988; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3989; GFX8-NEXT: v_xor_b32_e32 v4, -1, v2 3990; GFX8-NEXT: v_and_b32_e32 v3, 15, v2 3991; GFX8-NEXT: v_and_b32_e32 v4, 15, v4 3992; GFX8-NEXT: v_lshrrev_b16_e32 v5, 1, v1 3993; GFX8-NEXT: v_lshlrev_b16_e32 v3, v3, v0 3994; GFX8-NEXT: v_lshrrev_b16_e32 v4, v4, v5 3995; GFX8-NEXT: v_or_b32_e32 v3, v3, v4 3996; GFX8-NEXT: v_mov_b32_e32 v4, 15 3997; GFX8-NEXT: v_and_b32_sdwa v4, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 3998; GFX8-NEXT: v_mov_b32_e32 v5, -1 3999; GFX8-NEXT: v_xor_b32_sdwa v2, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 4000; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 4001; GFX8-NEXT: v_mov_b32_e32 v4, 1 4002; GFX8-NEXT: v_and_b32_e32 v2, 15, v2 4003; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 4004; GFX8-NEXT: v_lshrrev_b16_e32 v1, v2, v1 4005; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 4006; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0 4007; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 4008; GFX8-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 4009; GFX8-NEXT: s_setpc_b64 s[30:31] 4010; 4011; GFX9-LABEL: v_fshl_v2i16: 4012; GFX9: ; %bb.0: 4013; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4014; GFX9-NEXT: v_and_b32_e32 v3, 0xf000f, v2 4015; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2 4016; GFX9-NEXT: v_and_b32_e32 v2, 0xf000f, v2 4017; GFX9-NEXT: v_pk_lshrrev_b16 v1, 1, v1 op_sel_hi:[0,1] 4018; GFX9-NEXT: v_pk_lshlrev_b16 v0, v3, v0 4019; GFX9-NEXT: v_pk_lshrrev_b16 v1, v2, v1 4020; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 4021; GFX9-NEXT: s_setpc_b64 s[30:31] 4022; 4023; GFX10-LABEL: v_fshl_v2i16: 4024; GFX10: ; %bb.0: 4025; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4026; GFX10-NEXT: v_xor_b32_e32 v3, -1, v2 4027; GFX10-NEXT: v_and_b32_e32 v2, 0xf000f, v2 4028; GFX10-NEXT: v_pk_lshrrev_b16 v1, 1, v1 op_sel_hi:[0,1] 4029; GFX10-NEXT: v_and_b32_e32 v3, 0xf000f, v3 4030; GFX10-NEXT: v_pk_lshlrev_b16 v0, v2, v0 4031; GFX10-NEXT: v_pk_lshrrev_b16 v1, v3, v1 4032; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 4033; GFX10-NEXT: s_setpc_b64 s[30:31] 4034; 4035; GFX11-LABEL: v_fshl_v2i16: 4036; GFX11: ; %bb.0: 4037; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4038; GFX11-NEXT: v_xor_b32_e32 v3, -1, v2 4039; GFX11-NEXT: v_and_b32_e32 v2, 0xf000f, v2 4040; GFX11-NEXT: v_pk_lshrrev_b16 v1, 1, v1 op_sel_hi:[0,1] 4041; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 4042; GFX11-NEXT: v_and_b32_e32 v3, 0xf000f, v3 4043; GFX11-NEXT: v_pk_lshlrev_b16 v0, v2, v0 4044; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 4045; GFX11-NEXT: v_pk_lshrrev_b16 v1, v3, v1 4046; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 4047; GFX11-NEXT: s_setpc_b64 s[30:31] 4048 %result = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) 4049 ret <2 x i16> %result 4050} 4051 4052define <2 x i16> @v_fshl_v2i16_4_8(<2 x i16> %lhs, <2 x i16> %rhs) { 4053; GFX6-LABEL: v_fshl_v2i16_4_8: 4054; GFX6: ; %bb.0: 4055; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4056; GFX6-NEXT: v_bfe_u32 v2, v2, 1, 15 4057; GFX6-NEXT: v_lshlrev_b32_e32 v0, 4, v0 4058; GFX6-NEXT: v_lshrrev_b32_e32 v2, 11, v2 4059; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 4060; GFX6-NEXT: v_bfe_u32 v2, v3, 1, 15 4061; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1 4062; GFX6-NEXT: v_lshrrev_b32_e32 v2, 7, v2 4063; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 4064; GFX6-NEXT: s_setpc_b64 s[30:31] 4065; 4066; GFX8-LABEL: v_fshl_v2i16_4_8: 4067; GFX8: ; %bb.0: 4068; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4069; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v0 4070; GFX8-NEXT: v_lshlrev_b16_e32 v0, 4, v0 4071; GFX8-NEXT: v_lshrrev_b16_e32 v3, 12, v1 4072; GFX8-NEXT: v_or_b32_e32 v0, v0, v3 4073; GFX8-NEXT: v_mov_b32_e32 v3, 8 4074; GFX8-NEXT: v_lshlrev_b16_e32 v2, 8, v2 4075; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 4076; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 4077; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v1 4078; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1 4079; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 4080; GFX8-NEXT: s_setpc_b64 s[30:31] 4081; 4082; GFX9-LABEL: v_fshl_v2i16_4_8: 4083; GFX9: ; %bb.0: 4084; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4085; GFX9-NEXT: v_mov_b32_e32 v2, 0x80004 4086; GFX9-NEXT: v_pk_lshlrev_b16 v0, v2, v0 4087; GFX9-NEXT: v_mov_b32_e32 v2, 0x8000c 4088; GFX9-NEXT: v_pk_lshrrev_b16 v1, v2, v1 4089; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 4090; GFX9-NEXT: s_setpc_b64 s[30:31] 4091; 4092; GFX10-LABEL: v_fshl_v2i16_4_8: 4093; GFX10: ; %bb.0: 4094; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4095; GFX10-NEXT: v_pk_lshlrev_b16 v0, 0x80004, v0 4096; GFX10-NEXT: v_pk_lshrrev_b16 v1, 0x8000c, v1 4097; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 4098; GFX10-NEXT: s_setpc_b64 s[30:31] 4099; 4100; GFX11-LABEL: v_fshl_v2i16_4_8: 4101; GFX11: ; %bb.0: 4102; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4103; GFX11-NEXT: v_pk_lshlrev_b16 v0, 0x80004, v0 4104; GFX11-NEXT: v_pk_lshrrev_b16 v1, 0x8000c, v1 4105; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 4106; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 4107; GFX11-NEXT: s_setpc_b64 s[30:31] 4108 %result = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> <i16 4, i16 8>) 4109 ret <2 x i16> %result 4110} 4111 4112define amdgpu_ps float @v_fshl_v2i16_ssv(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs, <2 x i16> %amt) { 4113; GFX6-LABEL: v_fshl_v2i16_ssv: 4114; GFX6: ; %bb.0: 4115; GFX6-NEXT: v_and_b32_e32 v2, 15, v0 4116; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0 4117; GFX6-NEXT: v_and_b32_e32 v0, 15, v0 4118; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2 4119; GFX6-NEXT: v_lshl_b32_e32 v2, s0, v2 4120; GFX6-NEXT: s_bfe_u32 s0, s2, 0xf0001 4121; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 4122; GFX6-NEXT: v_lshr_b32_e32 v0, s0, v0 4123; GFX6-NEXT: v_or_b32_e32 v0, v2, v0 4124; GFX6-NEXT: v_and_b32_e32 v2, 15, v1 4125; GFX6-NEXT: v_xor_b32_e32 v1, -1, v1 4126; GFX6-NEXT: v_and_b32_e32 v1, 15, v1 4127; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2 4128; GFX6-NEXT: s_bfe_u32 s0, s3, 0xf0001 4129; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 4130; GFX6-NEXT: v_lshl_b32_e32 v2, s1, v2 4131; GFX6-NEXT: v_lshr_b32_e32 v1, s0, v1 4132; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 4133; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 4134; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 4135; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 4136; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 4137; GFX6-NEXT: ; return to shader part epilog 4138; 4139; GFX8-LABEL: v_fshl_v2i16_ssv: 4140; GFX8: ; %bb.0: 4141; GFX8-NEXT: v_and_b32_e32 v1, 15, v0 4142; GFX8-NEXT: s_lshr_b32 s2, s0, 16 4143; GFX8-NEXT: v_xor_b32_e32 v2, -1, v0 4144; GFX8-NEXT: v_lshlrev_b16_e64 v1, v1, s0 4145; GFX8-NEXT: s_and_b32 s0, 0xffff, s1 4146; GFX8-NEXT: v_and_b32_e32 v2, 15, v2 4147; GFX8-NEXT: s_lshr_b32 s0, s0, 1 4148; GFX8-NEXT: v_lshrrev_b16_e64 v2, v2, s0 4149; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 4150; GFX8-NEXT: v_mov_b32_e32 v2, 15 4151; GFX8-NEXT: v_mov_b32_e32 v3, -1 4152; GFX8-NEXT: s_lshr_b32 s3, s1, 16 4153; GFX8-NEXT: v_and_b32_sdwa v2, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 4154; GFX8-NEXT: v_xor_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 4155; GFX8-NEXT: v_and_b32_e32 v0, 15, v0 4156; GFX8-NEXT: s_lshr_b32 s0, s3, 1 4157; GFX8-NEXT: v_lshlrev_b16_e64 v2, v2, s2 4158; GFX8-NEXT: v_lshrrev_b16_e64 v0, v0, s0 4159; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 4160; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0 4161; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 4162; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 4163; GFX8-NEXT: ; return to shader part epilog 4164; 4165; GFX9-LABEL: v_fshl_v2i16_ssv: 4166; GFX9: ; %bb.0: 4167; GFX9-NEXT: v_and_b32_e32 v1, 0xf000f, v0 4168; GFX9-NEXT: v_pk_lshlrev_b16 v1, v1, s0 4169; GFX9-NEXT: s_lshr_b32 s0, s1, 16 4170; GFX9-NEXT: s_and_b32 s1, s1, 0xffff 4171; GFX9-NEXT: v_xor_b32_e32 v0, -1, v0 4172; GFX9-NEXT: s_lshr_b32 s1, s1, 0x10001 4173; GFX9-NEXT: s_lshr_b32 s0, s0, 1 4174; GFX9-NEXT: v_and_b32_e32 v0, 0xf000f, v0 4175; GFX9-NEXT: s_pack_ll_b32_b16 s0, s1, s0 4176; GFX9-NEXT: v_pk_lshrrev_b16 v0, v0, s0 4177; GFX9-NEXT: v_or_b32_e32 v0, v1, v0 4178; GFX9-NEXT: ; return to shader part epilog 4179; 4180; GFX10-LABEL: v_fshl_v2i16_ssv: 4181; GFX10: ; %bb.0: 4182; GFX10-NEXT: v_xor_b32_e32 v1, -1, v0 4183; GFX10-NEXT: s_lshr_b32 s2, s1, 16 4184; GFX10-NEXT: s_and_b32 s1, s1, 0xffff 4185; GFX10-NEXT: v_and_b32_e32 v0, 0xf000f, v0 4186; GFX10-NEXT: s_lshr_b32 s1, s1, 0x10001 4187; GFX10-NEXT: v_and_b32_e32 v1, 0xf000f, v1 4188; GFX10-NEXT: s_lshr_b32 s2, s2, 1 4189; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s2 4190; GFX10-NEXT: v_pk_lshlrev_b16 v0, v0, s0 4191; GFX10-NEXT: v_pk_lshrrev_b16 v1, v1, s1 4192; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 4193; GFX10-NEXT: ; return to shader part epilog 4194; 4195; GFX11-LABEL: v_fshl_v2i16_ssv: 4196; GFX11: ; %bb.0: 4197; GFX11-NEXT: v_xor_b32_e32 v1, -1, v0 4198; GFX11-NEXT: s_lshr_b32 s2, s1, 16 4199; GFX11-NEXT: s_and_b32 s1, s1, 0xffff 4200; GFX11-NEXT: v_and_b32_e32 v0, 0xf000f, v0 4201; GFX11-NEXT: s_lshr_b32 s1, s1, 0x10001 4202; GFX11-NEXT: v_and_b32_e32 v1, 0xf000f, v1 4203; GFX11-NEXT: s_lshr_b32 s2, s2, 1 4204; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 4205; GFX11-NEXT: s_pack_ll_b32_b16 s1, s1, s2 4206; GFX11-NEXT: v_pk_lshlrev_b16 v0, v0, s0 4207; GFX11-NEXT: v_pk_lshrrev_b16 v1, v1, s1 4208; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 4209; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 4210; GFX11-NEXT: ; return to shader part epilog 4211 %result = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) 4212 %cast = bitcast <2 x i16> %result to float 4213 ret float %cast 4214} 4215 4216define amdgpu_ps float @v_fshl_v2i16_svs(<2 x i16> inreg %lhs, <2 x i16> %rhs, <2 x i16> inreg %amt) { 4217; GFX6-LABEL: v_fshl_v2i16_svs: 4218; GFX6: ; %bb.0: 4219; GFX6-NEXT: s_and_b32 s4, s2, 15 4220; GFX6-NEXT: s_andn2_b32 s2, 15, s2 4221; GFX6-NEXT: s_and_b32 s4, 0xffff, s4 4222; GFX6-NEXT: v_bfe_u32 v0, v0, 1, 15 4223; GFX6-NEXT: s_and_b32 s2, 0xffff, s2 4224; GFX6-NEXT: s_lshl_b32 s0, s0, s4 4225; GFX6-NEXT: v_lshrrev_b32_e32 v0, s2, v0 4226; GFX6-NEXT: v_or_b32_e32 v0, s0, v0 4227; GFX6-NEXT: s_and_b32 s0, s3, 15 4228; GFX6-NEXT: s_andn2_b32 s2, 15, s3 4229; GFX6-NEXT: s_and_b32 s0, 0xffff, s0 4230; GFX6-NEXT: s_lshl_b32 s0, s1, s0 4231; GFX6-NEXT: v_bfe_u32 v1, v1, 1, 15 4232; GFX6-NEXT: s_and_b32 s1, 0xffff, s2 4233; GFX6-NEXT: v_lshrrev_b32_e32 v1, s1, v1 4234; GFX6-NEXT: v_or_b32_e32 v1, s0, v1 4235; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 4236; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 4237; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 4238; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 4239; GFX6-NEXT: ; return to shader part epilog 4240; 4241; GFX8-LABEL: v_fshl_v2i16_svs: 4242; GFX8: ; %bb.0: 4243; GFX8-NEXT: s_and_b32 s4, s1, 15 4244; GFX8-NEXT: s_lshr_b32 s3, s1, 16 4245; GFX8-NEXT: s_andn2_b32 s1, 15, s1 4246; GFX8-NEXT: s_and_b32 s4, 0xffff, s4 4247; GFX8-NEXT: v_lshrrev_b16_e32 v1, 1, v0 4248; GFX8-NEXT: s_lshr_b32 s2, s0, 16 4249; GFX8-NEXT: s_lshl_b32 s0, s0, s4 4250; GFX8-NEXT: v_lshrrev_b16_e32 v1, s1, v1 4251; GFX8-NEXT: v_or_b32_e32 v1, s0, v1 4252; GFX8-NEXT: s_and_b32 s0, s3, 15 4253; GFX8-NEXT: v_mov_b32_e32 v2, 1 4254; GFX8-NEXT: s_andn2_b32 s1, 15, s3 4255; GFX8-NEXT: s_and_b32 s0, 0xffff, s0 4256; GFX8-NEXT: v_lshrrev_b16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 4257; GFX8-NEXT: s_lshl_b32 s0, s2, s0 4258; GFX8-NEXT: v_lshrrev_b16_e32 v0, s1, v0 4259; GFX8-NEXT: v_or_b32_e32 v0, s0, v0 4260; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0 4261; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 4262; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 4263; GFX8-NEXT: ; return to shader part epilog 4264; 4265; GFX9-LABEL: v_fshl_v2i16_svs: 4266; GFX9: ; %bb.0: 4267; GFX9-NEXT: s_and_b32 s2, s1, 0xf000f 4268; GFX9-NEXT: s_lshr_b32 s3, s0, 16 4269; GFX9-NEXT: s_lshr_b32 s4, s2, 16 4270; GFX9-NEXT: s_andn2_b32 s1, 0xf000f, s1 4271; GFX9-NEXT: s_lshl_b32 s0, s0, s2 4272; GFX9-NEXT: s_lshl_b32 s2, s3, s4 4273; GFX9-NEXT: v_pk_lshrrev_b16 v0, 1, v0 op_sel_hi:[0,1] 4274; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2 4275; GFX9-NEXT: v_pk_lshrrev_b16 v0, s1, v0 4276; GFX9-NEXT: v_or_b32_e32 v0, s0, v0 4277; GFX9-NEXT: ; return to shader part epilog 4278; 4279; GFX10-LABEL: v_fshl_v2i16_svs: 4280; GFX10: ; %bb.0: 4281; GFX10-NEXT: v_pk_lshrrev_b16 v0, 1, v0 op_sel_hi:[0,1] 4282; GFX10-NEXT: s_and_b32 s2, s1, 0xf000f 4283; GFX10-NEXT: s_andn2_b32 s1, 0xf000f, s1 4284; GFX10-NEXT: s_lshr_b32 s3, s0, 16 4285; GFX10-NEXT: s_lshr_b32 s4, s2, 16 4286; GFX10-NEXT: v_pk_lshrrev_b16 v0, s1, v0 4287; GFX10-NEXT: s_lshl_b32 s0, s0, s2 4288; GFX10-NEXT: s_lshl_b32 s1, s3, s4 4289; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s1 4290; GFX10-NEXT: v_or_b32_e32 v0, s0, v0 4291; GFX10-NEXT: ; return to shader part epilog 4292; 4293; GFX11-LABEL: v_fshl_v2i16_svs: 4294; GFX11: ; %bb.0: 4295; GFX11-NEXT: v_pk_lshrrev_b16 v0, 1, v0 op_sel_hi:[0,1] 4296; GFX11-NEXT: s_and_b32 s2, s1, 0xf000f 4297; GFX11-NEXT: s_and_not1_b32 s1, 0xf000f, s1 4298; GFX11-NEXT: s_lshr_b32 s3, s0, 16 4299; GFX11-NEXT: s_lshr_b32 s4, s2, 16 4300; GFX11-NEXT: v_pk_lshrrev_b16 v0, s1, v0 4301; GFX11-NEXT: s_lshl_b32 s0, s0, s2 4302; GFX11-NEXT: s_lshl_b32 s1, s3, s4 4303; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 4304; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s1 4305; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 4306; GFX11-NEXT: v_or_b32_e32 v0, s0, v0 4307; GFX11-NEXT: ; return to shader part epilog 4308 %result = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) 4309 %cast = bitcast <2 x i16> %result to float 4310 ret float %cast 4311} 4312 4313define amdgpu_ps float @v_fshl_v2i16_vss(<2 x i16> %lhs, <2 x i16> inreg %rhs, <2 x i16> inreg %amt) { 4314; GFX6-LABEL: v_fshl_v2i16_vss: 4315; GFX6: ; %bb.0: 4316; GFX6-NEXT: s_and_b32 s4, s2, 15 4317; GFX6-NEXT: s_andn2_b32 s2, 15, s2 4318; GFX6-NEXT: s_and_b32 s4, 0xffff, s4 4319; GFX6-NEXT: s_bfe_u32 s0, s0, 0xf0001 4320; GFX6-NEXT: s_and_b32 s2, 0xffff, s2 4321; GFX6-NEXT: v_lshlrev_b32_e32 v0, s4, v0 4322; GFX6-NEXT: s_lshr_b32 s0, s0, s2 4323; GFX6-NEXT: v_or_b32_e32 v0, s0, v0 4324; GFX6-NEXT: s_and_b32 s0, s3, 15 4325; GFX6-NEXT: s_andn2_b32 s2, 15, s3 4326; GFX6-NEXT: s_and_b32 s0, 0xffff, s0 4327; GFX6-NEXT: v_lshlrev_b32_e32 v1, s0, v1 4328; GFX6-NEXT: s_bfe_u32 s0, s1, 0xf0001 4329; GFX6-NEXT: s_and_b32 s1, 0xffff, s2 4330; GFX6-NEXT: s_lshr_b32 s0, s0, s1 4331; GFX6-NEXT: v_or_b32_e32 v1, s0, v1 4332; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 4333; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 4334; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 4335; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 4336; GFX6-NEXT: ; return to shader part epilog 4337; 4338; GFX8-LABEL: v_fshl_v2i16_vss: 4339; GFX8: ; %bb.0: 4340; GFX8-NEXT: s_lshr_b32 s2, s0, 16 4341; GFX8-NEXT: s_lshr_b32 s3, s1, 16 4342; GFX8-NEXT: s_and_b32 s4, s1, 15 4343; GFX8-NEXT: s_andn2_b32 s1, 15, s1 4344; GFX8-NEXT: s_and_b32 s0, 0xffff, s0 4345; GFX8-NEXT: s_lshr_b32 s0, s0, 1 4346; GFX8-NEXT: s_and_b32 s1, 0xffff, s1 4347; GFX8-NEXT: v_lshlrev_b16_e32 v1, s4, v0 4348; GFX8-NEXT: s_lshr_b32 s0, s0, s1 4349; GFX8-NEXT: v_or_b32_e32 v1, s0, v1 4350; GFX8-NEXT: s_and_b32 s0, s3, 15 4351; GFX8-NEXT: s_andn2_b32 s1, 15, s3 4352; GFX8-NEXT: v_mov_b32_e32 v2, s0 4353; GFX8-NEXT: s_lshr_b32 s0, s2, 1 4354; GFX8-NEXT: s_and_b32 s1, 0xffff, s1 4355; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 4356; GFX8-NEXT: s_lshr_b32 s0, s0, s1 4357; GFX8-NEXT: v_or_b32_e32 v0, s0, v0 4358; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0 4359; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 4360; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 4361; GFX8-NEXT: ; return to shader part epilog 4362; 4363; GFX9-LABEL: v_fshl_v2i16_vss: 4364; GFX9: ; %bb.0: 4365; GFX9-NEXT: s_and_b32 s2, s1, 0xf000f 4366; GFX9-NEXT: v_pk_lshlrev_b16 v0, s2, v0 4367; GFX9-NEXT: s_lshr_b32 s2, s0, 16 4368; GFX9-NEXT: s_and_b32 s0, s0, 0xffff 4369; GFX9-NEXT: s_lshr_b32 s0, s0, 0x10001 4370; GFX9-NEXT: s_lshr_b32 s2, s2, 1 4371; GFX9-NEXT: s_andn2_b32 s1, 0xf000f, s1 4372; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2 4373; GFX9-NEXT: s_lshr_b32 s2, s0, 16 4374; GFX9-NEXT: s_and_b32 s0, s0, 0xffff 4375; GFX9-NEXT: s_lshr_b32 s3, s1, 16 4376; GFX9-NEXT: s_lshr_b32 s0, s0, s1 4377; GFX9-NEXT: s_lshr_b32 s1, s2, s3 4378; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s1 4379; GFX9-NEXT: v_or_b32_e32 v0, s0, v0 4380; GFX9-NEXT: ; return to shader part epilog 4381; 4382; GFX10-LABEL: v_fshl_v2i16_vss: 4383; GFX10: ; %bb.0: 4384; GFX10-NEXT: s_and_b32 s3, s0, 0xffff 4385; GFX10-NEXT: s_lshr_b32 s0, s0, 16 4386; GFX10-NEXT: s_lshr_b32 s3, s3, 0x10001 4387; GFX10-NEXT: s_lshr_b32 s0, s0, 1 4388; GFX10-NEXT: s_and_b32 s2, s1, 0xf000f 4389; GFX10-NEXT: s_andn2_b32 s1, 0xf000f, s1 4390; GFX10-NEXT: s_pack_ll_b32_b16 s0, s3, s0 4391; GFX10-NEXT: v_pk_lshlrev_b16 v0, s2, v0 4392; GFX10-NEXT: s_lshr_b32 s2, s0, 16 4393; GFX10-NEXT: s_and_b32 s0, s0, 0xffff 4394; GFX10-NEXT: s_lshr_b32 s3, s1, 16 4395; GFX10-NEXT: s_lshr_b32 s0, s0, s1 4396; GFX10-NEXT: s_lshr_b32 s1, s2, s3 4397; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s1 4398; GFX10-NEXT: v_or_b32_e32 v0, s0, v0 4399; GFX10-NEXT: ; return to shader part epilog 4400; 4401; GFX11-LABEL: v_fshl_v2i16_vss: 4402; GFX11: ; %bb.0: 4403; GFX11-NEXT: s_and_b32 s3, s0, 0xffff 4404; GFX11-NEXT: s_lshr_b32 s0, s0, 16 4405; GFX11-NEXT: s_lshr_b32 s3, s3, 0x10001 4406; GFX11-NEXT: s_lshr_b32 s0, s0, 1 4407; GFX11-NEXT: s_and_b32 s2, s1, 0xf000f 4408; GFX11-NEXT: s_and_not1_b32 s1, 0xf000f, s1 4409; GFX11-NEXT: s_pack_ll_b32_b16 s0, s3, s0 4410; GFX11-NEXT: v_pk_lshlrev_b16 v0, s2, v0 4411; GFX11-NEXT: s_lshr_b32 s2, s0, 16 4412; GFX11-NEXT: s_and_b32 s0, s0, 0xffff 4413; GFX11-NEXT: s_lshr_b32 s3, s1, 16 4414; GFX11-NEXT: s_lshr_b32 s0, s0, s1 4415; GFX11-NEXT: s_lshr_b32 s1, s2, s3 4416; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 4417; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s1 4418; GFX11-NEXT: v_or_b32_e32 v0, s0, v0 4419; GFX11-NEXT: ; return to shader part epilog 4420 %result = call <2 x i16> @llvm.fshl.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) 4421 %cast = bitcast <2 x i16> %result to float 4422 ret float %cast 4423} 4424 4425 4426define amdgpu_ps i48 @s_fshl_v3i16(<3 x i16> inreg %lhs, <3 x i16> inreg %rhs, <3 x i16> inreg %amt) { 4427; GFX6-LABEL: s_fshl_v3i16: 4428; GFX6: ; %bb.0: 4429; GFX6-NEXT: s_and_b32 s9, s6, 15 4430; GFX6-NEXT: s_andn2_b32 s6, 15, s6 4431; GFX6-NEXT: s_and_b32 s9, 0xffff, s9 4432; GFX6-NEXT: s_bfe_u32 s3, s3, 0xf0001 4433; GFX6-NEXT: s_and_b32 s6, 0xffff, s6 4434; GFX6-NEXT: s_lshl_b32 s0, s0, s9 4435; GFX6-NEXT: s_lshr_b32 s3, s3, s6 4436; GFX6-NEXT: s_or_b32 s0, s0, s3 4437; GFX6-NEXT: s_and_b32 s3, s7, 15 4438; GFX6-NEXT: s_andn2_b32 s6, 15, s7 4439; GFX6-NEXT: s_and_b32 s3, 0xffff, s3 4440; GFX6-NEXT: s_lshl_b32 s1, s1, s3 4441; GFX6-NEXT: s_bfe_u32 s3, s4, 0xf0001 4442; GFX6-NEXT: s_and_b32 s4, 0xffff, s6 4443; GFX6-NEXT: s_lshr_b32 s3, s3, s4 4444; GFX6-NEXT: s_or_b32 s1, s1, s3 4445; GFX6-NEXT: s_and_b32 s3, s8, 15 4446; GFX6-NEXT: s_andn2_b32 s4, 15, s8 4447; GFX6-NEXT: s_and_b32 s3, 0xffff, s3 4448; GFX6-NEXT: s_lshl_b32 s2, s2, s3 4449; GFX6-NEXT: s_bfe_u32 s3, s5, 0xf0001 4450; GFX6-NEXT: s_and_b32 s4, 0xffff, s4 4451; GFX6-NEXT: s_lshr_b32 s3, s3, s4 4452; GFX6-NEXT: s_and_b32 s1, 0xffff, s1 4453; GFX6-NEXT: s_or_b32 s2, s2, s3 4454; GFX6-NEXT: s_and_b32 s0, 0xffff, s0 4455; GFX6-NEXT: s_lshl_b32 s1, s1, 16 4456; GFX6-NEXT: s_or_b32 s0, s0, s1 4457; GFX6-NEXT: s_and_b32 s1, 0xffff, s2 4458; GFX6-NEXT: ; return to shader part epilog 4459; 4460; GFX8-LABEL: s_fshl_v3i16: 4461; GFX8: ; %bb.0: 4462; GFX8-NEXT: s_lshr_b32 s7, s2, 16 4463; GFX8-NEXT: s_lshr_b32 s8, s4, 16 4464; GFX8-NEXT: s_and_b32 s9, s4, 15 4465; GFX8-NEXT: s_andn2_b32 s4, 15, s4 4466; GFX8-NEXT: s_and_b32 s2, 0xffff, s2 4467; GFX8-NEXT: s_and_b32 s9, 0xffff, s9 4468; GFX8-NEXT: s_lshr_b32 s2, s2, 1 4469; GFX8-NEXT: s_and_b32 s4, 0xffff, s4 4470; GFX8-NEXT: s_lshr_b32 s6, s0, 16 4471; GFX8-NEXT: s_lshl_b32 s0, s0, s9 4472; GFX8-NEXT: s_lshr_b32 s2, s2, s4 4473; GFX8-NEXT: s_or_b32 s0, s0, s2 4474; GFX8-NEXT: s_and_b32 s2, s8, 15 4475; GFX8-NEXT: s_andn2_b32 s4, 15, s8 4476; GFX8-NEXT: s_and_b32 s2, 0xffff, s2 4477; GFX8-NEXT: s_lshl_b32 s2, s6, s2 4478; GFX8-NEXT: s_lshr_b32 s6, s7, 1 4479; GFX8-NEXT: s_and_b32 s4, 0xffff, s4 4480; GFX8-NEXT: s_lshr_b32 s4, s6, s4 4481; GFX8-NEXT: s_or_b32 s2, s2, s4 4482; GFX8-NEXT: s_and_b32 s4, s5, 15 4483; GFX8-NEXT: s_andn2_b32 s5, 15, s5 4484; GFX8-NEXT: s_and_b32 s4, 0xffff, s4 4485; GFX8-NEXT: s_and_b32 s3, 0xffff, s3 4486; GFX8-NEXT: s_lshl_b32 s1, s1, s4 4487; GFX8-NEXT: s_lshr_b32 s3, s3, 1 4488; GFX8-NEXT: s_and_b32 s4, 0xffff, s5 4489; GFX8-NEXT: s_lshr_b32 s3, s3, s4 4490; GFX8-NEXT: s_and_b32 s2, 0xffff, s2 4491; GFX8-NEXT: s_or_b32 s1, s1, s3 4492; GFX8-NEXT: s_and_b32 s0, 0xffff, s0 4493; GFX8-NEXT: s_lshl_b32 s2, s2, 16 4494; GFX8-NEXT: s_or_b32 s0, s0, s2 4495; GFX8-NEXT: s_and_b32 s1, 0xffff, s1 4496; GFX8-NEXT: ; return to shader part epilog 4497; 4498; GFX9-LABEL: s_fshl_v3i16: 4499; GFX9: ; %bb.0: 4500; GFX9-NEXT: s_and_b32 s6, s4, 0xf000f 4501; GFX9-NEXT: s_lshr_b32 s7, s0, 16 4502; GFX9-NEXT: s_lshr_b32 s8, s6, 16 4503; GFX9-NEXT: s_lshl_b32 s0, s0, s6 4504; GFX9-NEXT: s_lshl_b32 s6, s7, s8 4505; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s6 4506; GFX9-NEXT: s_lshr_b32 s6, s2, 16 4507; GFX9-NEXT: s_and_b32 s2, s2, 0xffff 4508; GFX9-NEXT: s_lshr_b32 s2, s2, 0x10001 4509; GFX9-NEXT: s_lshr_b32 s6, s6, 1 4510; GFX9-NEXT: s_andn2_b32 s4, 0xf000f, s4 4511; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s6 4512; GFX9-NEXT: s_lshr_b32 s6, s2, 16 4513; GFX9-NEXT: s_and_b32 s2, s2, 0xffff 4514; GFX9-NEXT: s_lshr_b32 s7, s4, 16 4515; GFX9-NEXT: s_lshr_b32 s2, s2, s4 4516; GFX9-NEXT: s_lshr_b32 s4, s6, s7 4517; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s4 4518; GFX9-NEXT: s_or_b32 s0, s0, s2 4519; GFX9-NEXT: s_and_b32 s2, s5, 0xf000f 4520; GFX9-NEXT: s_andn2_b32 s4, 0xf000f, s5 4521; GFX9-NEXT: s_lshr_b32 s5, s1, 16 4522; GFX9-NEXT: s_lshr_b32 s6, s2, 16 4523; GFX9-NEXT: s_lshl_b32 s1, s1, s2 4524; GFX9-NEXT: s_lshl_b32 s2, s5, s6 4525; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s2 4526; GFX9-NEXT: s_lshr_b32 s2, s3, 16 4527; GFX9-NEXT: s_and_b32 s3, s3, 0xffff 4528; GFX9-NEXT: s_lshr_b32 s3, s3, 0x10001 4529; GFX9-NEXT: s_lshr_b32 s2, s2, 1 4530; GFX9-NEXT: s_pack_ll_b32_b16 s2, s3, s2 4531; GFX9-NEXT: s_lshr_b32 s3, s2, 16 4532; GFX9-NEXT: s_and_b32 s2, s2, 0xffff 4533; GFX9-NEXT: s_lshr_b32 s5, s4, 16 4534; GFX9-NEXT: s_lshr_b32 s2, s2, s4 4535; GFX9-NEXT: s_lshr_b32 s3, s3, s5 4536; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s3 4537; GFX9-NEXT: s_or_b32 s1, s1, s2 4538; GFX9-NEXT: s_lshr_b32 s2, s0, 16 4539; GFX9-NEXT: s_and_b32 s0, s0, 0xffff 4540; GFX9-NEXT: s_lshl_b32 s2, s2, 16 4541; GFX9-NEXT: s_or_b32 s0, s0, s2 4542; GFX9-NEXT: s_and_b32 s1, s1, 0xffff 4543; GFX9-NEXT: ; return to shader part epilog 4544; 4545; GFX10-LABEL: s_fshl_v3i16: 4546; GFX10: ; %bb.0: 4547; GFX10-NEXT: s_and_b32 s9, s2, 0xffff 4548; GFX10-NEXT: s_lshr_b32 s2, s2, 16 4549; GFX10-NEXT: s_and_b32 s6, s4, 0xf000f 4550; GFX10-NEXT: s_lshr_b32 s9, s9, 0x10001 4551; GFX10-NEXT: s_lshr_b32 s2, s2, 1 4552; GFX10-NEXT: s_andn2_b32 s4, 0xf000f, s4 4553; GFX10-NEXT: s_lshr_b32 s7, s0, 16 4554; GFX10-NEXT: s_lshr_b32 s8, s6, 16 4555; GFX10-NEXT: s_pack_ll_b32_b16 s2, s9, s2 4556; GFX10-NEXT: s_lshl_b32 s0, s0, s6 4557; GFX10-NEXT: s_lshl_b32 s6, s7, s8 4558; GFX10-NEXT: s_lshr_b32 s7, s2, 16 4559; GFX10-NEXT: s_and_b32 s2, s2, 0xffff 4560; GFX10-NEXT: s_lshr_b32 s8, s4, 16 4561; GFX10-NEXT: s_lshr_b32 s2, s2, s4 4562; GFX10-NEXT: s_lshr_b32 s4, s7, s8 4563; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s6 4564; GFX10-NEXT: s_pack_ll_b32_b16 s2, s2, s4 4565; GFX10-NEXT: s_and_b32 s7, s3, 0xffff 4566; GFX10-NEXT: s_lshr_b32 s3, s3, 16 4567; GFX10-NEXT: s_or_b32 s0, s0, s2 4568; GFX10-NEXT: s_and_b32 s2, s5, 0xf000f 4569; GFX10-NEXT: s_lshr_b32 s7, s7, 0x10001 4570; GFX10-NEXT: s_lshr_b32 s3, s3, 1 4571; GFX10-NEXT: s_andn2_b32 s4, 0xf000f, s5 4572; GFX10-NEXT: s_lshr_b32 s5, s1, 16 4573; GFX10-NEXT: s_lshr_b32 s6, s2, 16 4574; GFX10-NEXT: s_lshl_b32 s1, s1, s2 4575; GFX10-NEXT: s_pack_ll_b32_b16 s2, s7, s3 4576; GFX10-NEXT: s_lshl_b32 s3, s5, s6 4577; GFX10-NEXT: s_lshr_b32 s5, s2, 16 4578; GFX10-NEXT: s_and_b32 s2, s2, 0xffff 4579; GFX10-NEXT: s_lshr_b32 s6, s4, 16 4580; GFX10-NEXT: s_lshr_b32 s2, s2, s4 4581; GFX10-NEXT: s_lshr_b32 s4, s5, s6 4582; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s3 4583; GFX10-NEXT: s_pack_ll_b32_b16 s2, s2, s4 4584; GFX10-NEXT: s_lshr_b32 s3, s0, 16 4585; GFX10-NEXT: s_and_b32 s0, s0, 0xffff 4586; GFX10-NEXT: s_lshl_b32 s3, s3, 16 4587; GFX10-NEXT: s_or_b32 s1, s1, s2 4588; GFX10-NEXT: s_or_b32 s0, s0, s3 4589; GFX10-NEXT: s_and_b32 s1, s1, 0xffff 4590; GFX10-NEXT: ; return to shader part epilog 4591; 4592; GFX11-LABEL: s_fshl_v3i16: 4593; GFX11: ; %bb.0: 4594; GFX11-NEXT: s_and_b32 s9, s2, 0xffff 4595; GFX11-NEXT: s_lshr_b32 s2, s2, 16 4596; GFX11-NEXT: s_and_b32 s6, s4, 0xf000f 4597; GFX11-NEXT: s_lshr_b32 s9, s9, 0x10001 4598; GFX11-NEXT: s_lshr_b32 s2, s2, 1 4599; GFX11-NEXT: s_and_not1_b32 s4, 0xf000f, s4 4600; GFX11-NEXT: s_lshr_b32 s7, s0, 16 4601; GFX11-NEXT: s_lshr_b32 s8, s6, 16 4602; GFX11-NEXT: s_pack_ll_b32_b16 s2, s9, s2 4603; GFX11-NEXT: s_lshl_b32 s0, s0, s6 4604; GFX11-NEXT: s_lshl_b32 s6, s7, s8 4605; GFX11-NEXT: s_lshr_b32 s7, s2, 16 4606; GFX11-NEXT: s_and_b32 s2, s2, 0xffff 4607; GFX11-NEXT: s_lshr_b32 s8, s4, 16 4608; GFX11-NEXT: s_lshr_b32 s2, s2, s4 4609; GFX11-NEXT: s_lshr_b32 s4, s7, s8 4610; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s6 4611; GFX11-NEXT: s_pack_ll_b32_b16 s2, s2, s4 4612; GFX11-NEXT: s_and_b32 s7, s3, 0xffff 4613; GFX11-NEXT: s_lshr_b32 s3, s3, 16 4614; GFX11-NEXT: s_or_b32 s0, s0, s2 4615; GFX11-NEXT: s_and_b32 s2, s5, 0xf000f 4616; GFX11-NEXT: s_lshr_b32 s7, s7, 0x10001 4617; GFX11-NEXT: s_lshr_b32 s3, s3, 1 4618; GFX11-NEXT: s_and_not1_b32 s4, 0xf000f, s5 4619; GFX11-NEXT: s_lshr_b32 s5, s1, 16 4620; GFX11-NEXT: s_lshr_b32 s6, s2, 16 4621; GFX11-NEXT: s_lshl_b32 s1, s1, s2 4622; GFX11-NEXT: s_pack_ll_b32_b16 s2, s7, s3 4623; GFX11-NEXT: s_lshl_b32 s3, s5, s6 4624; GFX11-NEXT: s_lshr_b32 s5, s2, 16 4625; GFX11-NEXT: s_and_b32 s2, s2, 0xffff 4626; GFX11-NEXT: s_lshr_b32 s6, s4, 16 4627; GFX11-NEXT: s_lshr_b32 s2, s2, s4 4628; GFX11-NEXT: s_lshr_b32 s4, s5, s6 4629; GFX11-NEXT: s_pack_ll_b32_b16 s1, s1, s3 4630; GFX11-NEXT: s_pack_ll_b32_b16 s2, s2, s4 4631; GFX11-NEXT: s_lshr_b32 s3, s0, 16 4632; GFX11-NEXT: s_and_b32 s0, s0, 0xffff 4633; GFX11-NEXT: s_lshl_b32 s3, s3, 16 4634; GFX11-NEXT: s_or_b32 s1, s1, s2 4635; GFX11-NEXT: s_or_b32 s0, s0, s3 4636; GFX11-NEXT: s_and_b32 s1, s1, 0xffff 4637; GFX11-NEXT: ; return to shader part epilog 4638 %result = call <3 x i16> @llvm.fshl.v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt) 4639 %cast = bitcast <3 x i16> %result to i48 4640 ret i48 %cast 4641} 4642 4643define <3 x half> @v_fshl_v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt) { 4644; GFX6-LABEL: v_fshl_v3i16: 4645; GFX6: ; %bb.0: 4646; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4647; GFX6-NEXT: v_and_b32_e32 v9, 15, v6 4648; GFX6-NEXT: v_xor_b32_e32 v6, -1, v6 4649; GFX6-NEXT: v_and_b32_e32 v6, 15, v6 4650; GFX6-NEXT: v_and_b32_e32 v9, 0xffff, v9 4651; GFX6-NEXT: v_bfe_u32 v3, v3, 1, 15 4652; GFX6-NEXT: v_and_b32_e32 v6, 0xffff, v6 4653; GFX6-NEXT: v_lshlrev_b32_e32 v0, v9, v0 4654; GFX6-NEXT: v_lshrrev_b32_e32 v3, v6, v3 4655; GFX6-NEXT: v_or_b32_e32 v0, v0, v3 4656; GFX6-NEXT: v_and_b32_e32 v3, 15, v7 4657; GFX6-NEXT: v_xor_b32_e32 v6, -1, v7 4658; GFX6-NEXT: v_and_b32_e32 v6, 15, v6 4659; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v3 4660; GFX6-NEXT: v_lshlrev_b32_e32 v1, v3, v1 4661; GFX6-NEXT: v_bfe_u32 v3, v4, 1, 15 4662; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v6 4663; GFX6-NEXT: v_lshrrev_b32_e32 v3, v4, v3 4664; GFX6-NEXT: v_or_b32_e32 v1, v1, v3 4665; GFX6-NEXT: v_and_b32_e32 v3, 15, v8 4666; GFX6-NEXT: v_xor_b32_e32 v4, -1, v8 4667; GFX6-NEXT: v_and_b32_e32 v4, 15, v4 4668; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v3 4669; GFX6-NEXT: v_lshlrev_b32_e32 v2, v3, v2 4670; GFX6-NEXT: v_bfe_u32 v3, v5, 1, 15 4671; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4 4672; GFX6-NEXT: v_lshrrev_b32_e32 v3, v4, v3 4673; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 4674; GFX6-NEXT: s_setpc_b64 s[30:31] 4675; 4676; GFX8-LABEL: v_fshl_v3i16: 4677; GFX8: ; %bb.0: 4678; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4679; GFX8-NEXT: v_xor_b32_e32 v7, -1, v4 4680; GFX8-NEXT: v_and_b32_e32 v6, 15, v4 4681; GFX8-NEXT: v_and_b32_e32 v7, 15, v7 4682; GFX8-NEXT: v_lshrrev_b16_e32 v8, 1, v2 4683; GFX8-NEXT: v_lshlrev_b16_e32 v6, v6, v0 4684; GFX8-NEXT: v_lshrrev_b16_e32 v7, v7, v8 4685; GFX8-NEXT: v_or_b32_e32 v6, v6, v7 4686; GFX8-NEXT: v_mov_b32_e32 v7, 15 4687; GFX8-NEXT: v_and_b32_sdwa v7, v4, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 4688; GFX8-NEXT: v_mov_b32_e32 v8, -1 4689; GFX8-NEXT: v_xor_b32_sdwa v4, v4, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 4690; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 4691; GFX8-NEXT: v_mov_b32_e32 v7, 1 4692; GFX8-NEXT: v_and_b32_e32 v4, 15, v4 4693; GFX8-NEXT: v_lshrrev_b16_sdwa v2, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 4694; GFX8-NEXT: v_lshrrev_b16_e32 v2, v4, v2 4695; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 4696; GFX8-NEXT: v_and_b32_e32 v2, 15, v5 4697; GFX8-NEXT: v_xor_b32_e32 v4, -1, v5 4698; GFX8-NEXT: v_and_b32_e32 v4, 15, v4 4699; GFX8-NEXT: v_lshlrev_b16_e32 v1, v2, v1 4700; GFX8-NEXT: v_lshrrev_b16_e32 v2, 1, v3 4701; GFX8-NEXT: v_lshrrev_b16_e32 v2, v4, v2 4702; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0 4703; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 4704; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 4705; GFX8-NEXT: v_or_b32_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 4706; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v1 4707; GFX8-NEXT: s_setpc_b64 s[30:31] 4708; 4709; GFX9-LABEL: v_fshl_v3i16: 4710; GFX9: ; %bb.0: 4711; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4712; GFX9-NEXT: v_and_b32_e32 v6, 0xf000f, v4 4713; GFX9-NEXT: v_xor_b32_e32 v4, -1, v4 4714; GFX9-NEXT: v_and_b32_e32 v4, 0xf000f, v4 4715; GFX9-NEXT: v_pk_lshrrev_b16 v2, 1, v2 op_sel_hi:[0,1] 4716; GFX9-NEXT: v_pk_lshlrev_b16 v0, v6, v0 4717; GFX9-NEXT: v_pk_lshrrev_b16 v2, v4, v2 4718; GFX9-NEXT: v_or_b32_e32 v0, v0, v2 4719; GFX9-NEXT: v_and_b32_e32 v2, 0xf000f, v5 4720; GFX9-NEXT: v_xor_b32_e32 v4, -1, v5 4721; GFX9-NEXT: v_and_b32_e32 v4, 0xf000f, v4 4722; GFX9-NEXT: v_pk_lshlrev_b16 v1, v2, v1 4723; GFX9-NEXT: v_pk_lshrrev_b16 v2, 1, v3 op_sel_hi:[0,1] 4724; GFX9-NEXT: v_pk_lshrrev_b16 v2, v4, v2 4725; GFX9-NEXT: v_or_b32_e32 v1, v1, v2 4726; GFX9-NEXT: s_setpc_b64 s[30:31] 4727; 4728; GFX10-LABEL: v_fshl_v3i16: 4729; GFX10: ; %bb.0: 4730; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4731; GFX10-NEXT: v_xor_b32_e32 v6, -1, v4 4732; GFX10-NEXT: v_xor_b32_e32 v7, -1, v5 4733; GFX10-NEXT: v_and_b32_e32 v4, 0xf000f, v4 4734; GFX10-NEXT: v_pk_lshrrev_b16 v2, 1, v2 op_sel_hi:[0,1] 4735; GFX10-NEXT: v_and_b32_e32 v5, 0xf000f, v5 4736; GFX10-NEXT: v_and_b32_e32 v6, 0xf000f, v6 4737; GFX10-NEXT: v_pk_lshrrev_b16 v3, 1, v3 op_sel_hi:[0,1] 4738; GFX10-NEXT: v_and_b32_e32 v7, 0xf000f, v7 4739; GFX10-NEXT: v_pk_lshlrev_b16 v0, v4, v0 4740; GFX10-NEXT: v_pk_lshlrev_b16 v1, v5, v1 4741; GFX10-NEXT: v_pk_lshrrev_b16 v2, v6, v2 4742; GFX10-NEXT: v_pk_lshrrev_b16 v3, v7, v3 4743; GFX10-NEXT: v_or_b32_e32 v0, v0, v2 4744; GFX10-NEXT: v_or_b32_e32 v1, v1, v3 4745; GFX10-NEXT: s_setpc_b64 s[30:31] 4746; 4747; GFX11-LABEL: v_fshl_v3i16: 4748; GFX11: ; %bb.0: 4749; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4750; GFX11-NEXT: v_xor_b32_e32 v6, -1, v4 4751; GFX11-NEXT: v_xor_b32_e32 v7, -1, v5 4752; GFX11-NEXT: v_and_b32_e32 v4, 0xf000f, v4 4753; GFX11-NEXT: v_pk_lshrrev_b16 v2, 1, v2 op_sel_hi:[0,1] 4754; GFX11-NEXT: v_and_b32_e32 v5, 0xf000f, v5 4755; GFX11-NEXT: v_and_b32_e32 v6, 0xf000f, v6 4756; GFX11-NEXT: v_pk_lshrrev_b16 v3, 1, v3 op_sel_hi:[0,1] 4757; GFX11-NEXT: v_and_b32_e32 v7, 0xf000f, v7 4758; GFX11-NEXT: v_pk_lshlrev_b16 v0, v4, v0 4759; GFX11-NEXT: v_pk_lshlrev_b16 v1, v5, v1 4760; GFX11-NEXT: v_pk_lshrrev_b16 v2, v6, v2 4761; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) 4762; GFX11-NEXT: v_pk_lshrrev_b16 v3, v7, v3 4763; GFX11-NEXT: v_or_b32_e32 v0, v0, v2 4764; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 4765; GFX11-NEXT: v_or_b32_e32 v1, v1, v3 4766; GFX11-NEXT: s_setpc_b64 s[30:31] 4767 %result = call <3 x i16> @llvm.fshl.v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt) 4768 %cast.result = bitcast <3 x i16> %result to <3 x half> 4769 ret <3 x half> %cast.result 4770} 4771 4772define amdgpu_ps <2 x i32> @s_fshl_v4i16(<4 x i16> inreg %lhs, <4 x i16> inreg %rhs, <4 x i16> inreg %amt) { 4773; GFX6-LABEL: s_fshl_v4i16: 4774; GFX6: ; %bb.0: 4775; GFX6-NEXT: s_and_b32 s12, s8, 15 4776; GFX6-NEXT: s_andn2_b32 s8, 15, s8 4777; GFX6-NEXT: s_and_b32 s12, 0xffff, s12 4778; GFX6-NEXT: s_bfe_u32 s4, s4, 0xf0001 4779; GFX6-NEXT: s_and_b32 s8, 0xffff, s8 4780; GFX6-NEXT: s_lshl_b32 s0, s0, s12 4781; GFX6-NEXT: s_lshr_b32 s4, s4, s8 4782; GFX6-NEXT: s_or_b32 s0, s0, s4 4783; GFX6-NEXT: s_and_b32 s4, s9, 15 4784; GFX6-NEXT: s_andn2_b32 s8, 15, s9 4785; GFX6-NEXT: s_and_b32 s4, 0xffff, s4 4786; GFX6-NEXT: s_lshl_b32 s1, s1, s4 4787; GFX6-NEXT: s_bfe_u32 s4, s5, 0xf0001 4788; GFX6-NEXT: s_and_b32 s5, 0xffff, s8 4789; GFX6-NEXT: s_lshr_b32 s4, s4, s5 4790; GFX6-NEXT: s_or_b32 s1, s1, s4 4791; GFX6-NEXT: s_and_b32 s4, s10, 15 4792; GFX6-NEXT: s_andn2_b32 s5, 15, s10 4793; GFX6-NEXT: s_and_b32 s4, 0xffff, s4 4794; GFX6-NEXT: s_lshl_b32 s2, s2, s4 4795; GFX6-NEXT: s_bfe_u32 s4, s6, 0xf0001 4796; GFX6-NEXT: s_and_b32 s5, 0xffff, s5 4797; GFX6-NEXT: s_lshr_b32 s4, s4, s5 4798; GFX6-NEXT: s_or_b32 s2, s2, s4 4799; GFX6-NEXT: s_and_b32 s4, s11, 15 4800; GFX6-NEXT: s_andn2_b32 s5, 15, s11 4801; GFX6-NEXT: s_and_b32 s4, 0xffff, s4 4802; GFX6-NEXT: s_lshl_b32 s3, s3, s4 4803; GFX6-NEXT: s_bfe_u32 s4, s7, 0xf0001 4804; GFX6-NEXT: s_and_b32 s5, 0xffff, s5 4805; GFX6-NEXT: s_lshr_b32 s4, s4, s5 4806; GFX6-NEXT: s_and_b32 s1, 0xffff, s1 4807; GFX6-NEXT: s_or_b32 s3, s3, s4 4808; GFX6-NEXT: s_and_b32 s0, 0xffff, s0 4809; GFX6-NEXT: s_lshl_b32 s1, s1, 16 4810; GFX6-NEXT: s_or_b32 s0, s0, s1 4811; GFX6-NEXT: s_and_b32 s1, 0xffff, s2 4812; GFX6-NEXT: s_and_b32 s2, 0xffff, s3 4813; GFX6-NEXT: s_lshl_b32 s2, s2, 16 4814; GFX6-NEXT: s_or_b32 s1, s1, s2 4815; GFX6-NEXT: ; return to shader part epilog 4816; 4817; GFX8-LABEL: s_fshl_v4i16: 4818; GFX8: ; %bb.0: 4819; GFX8-NEXT: s_lshr_b32 s8, s2, 16 4820; GFX8-NEXT: s_lshr_b32 s10, s4, 16 4821; GFX8-NEXT: s_and_b32 s12, s4, 15 4822; GFX8-NEXT: s_andn2_b32 s4, 15, s4 4823; GFX8-NEXT: s_and_b32 s2, 0xffff, s2 4824; GFX8-NEXT: s_and_b32 s12, 0xffff, s12 4825; GFX8-NEXT: s_lshr_b32 s2, s2, 1 4826; GFX8-NEXT: s_and_b32 s4, 0xffff, s4 4827; GFX8-NEXT: s_lshr_b32 s6, s0, 16 4828; GFX8-NEXT: s_lshl_b32 s0, s0, s12 4829; GFX8-NEXT: s_lshr_b32 s2, s2, s4 4830; GFX8-NEXT: s_or_b32 s0, s0, s2 4831; GFX8-NEXT: s_and_b32 s2, s10, 15 4832; GFX8-NEXT: s_andn2_b32 s4, 15, s10 4833; GFX8-NEXT: s_and_b32 s2, 0xffff, s2 4834; GFX8-NEXT: s_lshl_b32 s2, s6, s2 4835; GFX8-NEXT: s_lshr_b32 s6, s8, 1 4836; GFX8-NEXT: s_and_b32 s4, 0xffff, s4 4837; GFX8-NEXT: s_lshr_b32 s4, s6, s4 4838; GFX8-NEXT: s_or_b32 s2, s2, s4 4839; GFX8-NEXT: s_and_b32 s4, s5, 15 4840; GFX8-NEXT: s_lshr_b32 s9, s3, 16 4841; GFX8-NEXT: s_lshr_b32 s11, s5, 16 4842; GFX8-NEXT: s_andn2_b32 s5, 15, s5 4843; GFX8-NEXT: s_and_b32 s4, 0xffff, s4 4844; GFX8-NEXT: s_and_b32 s3, 0xffff, s3 4845; GFX8-NEXT: s_lshr_b32 s7, s1, 16 4846; GFX8-NEXT: s_lshl_b32 s1, s1, s4 4847; GFX8-NEXT: s_lshr_b32 s3, s3, 1 4848; GFX8-NEXT: s_and_b32 s4, 0xffff, s5 4849; GFX8-NEXT: s_lshr_b32 s3, s3, s4 4850; GFX8-NEXT: s_or_b32 s1, s1, s3 4851; GFX8-NEXT: s_and_b32 s3, s11, 15 4852; GFX8-NEXT: s_andn2_b32 s4, 15, s11 4853; GFX8-NEXT: s_and_b32 s3, 0xffff, s3 4854; GFX8-NEXT: s_lshr_b32 s5, s9, 1 4855; GFX8-NEXT: s_and_b32 s4, 0xffff, s4 4856; GFX8-NEXT: s_lshl_b32 s3, s7, s3 4857; GFX8-NEXT: s_lshr_b32 s4, s5, s4 4858; GFX8-NEXT: s_and_b32 s2, 0xffff, s2 4859; GFX8-NEXT: s_or_b32 s3, s3, s4 4860; GFX8-NEXT: s_and_b32 s0, 0xffff, s0 4861; GFX8-NEXT: s_lshl_b32 s2, s2, 16 4862; GFX8-NEXT: s_or_b32 s0, s0, s2 4863; GFX8-NEXT: s_and_b32 s2, 0xffff, s3 4864; GFX8-NEXT: s_and_b32 s1, 0xffff, s1 4865; GFX8-NEXT: s_lshl_b32 s2, s2, 16 4866; GFX8-NEXT: s_or_b32 s1, s1, s2 4867; GFX8-NEXT: ; return to shader part epilog 4868; 4869; GFX9-LABEL: s_fshl_v4i16: 4870; GFX9: ; %bb.0: 4871; GFX9-NEXT: s_and_b32 s6, s4, 0xf000f 4872; GFX9-NEXT: s_lshr_b32 s7, s0, 16 4873; GFX9-NEXT: s_lshr_b32 s8, s6, 16 4874; GFX9-NEXT: s_lshl_b32 s0, s0, s6 4875; GFX9-NEXT: s_lshl_b32 s6, s7, s8 4876; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s6 4877; GFX9-NEXT: s_lshr_b32 s6, s2, 16 4878; GFX9-NEXT: s_and_b32 s2, s2, 0xffff 4879; GFX9-NEXT: s_lshr_b32 s2, s2, 0x10001 4880; GFX9-NEXT: s_lshr_b32 s6, s6, 1 4881; GFX9-NEXT: s_andn2_b32 s4, 0xf000f, s4 4882; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s6 4883; GFX9-NEXT: s_lshr_b32 s6, s2, 16 4884; GFX9-NEXT: s_and_b32 s2, s2, 0xffff 4885; GFX9-NEXT: s_lshr_b32 s7, s4, 16 4886; GFX9-NEXT: s_lshr_b32 s2, s2, s4 4887; GFX9-NEXT: s_lshr_b32 s4, s6, s7 4888; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s4 4889; GFX9-NEXT: s_or_b32 s0, s0, s2 4890; GFX9-NEXT: s_and_b32 s2, s5, 0xf000f 4891; GFX9-NEXT: s_andn2_b32 s4, 0xf000f, s5 4892; GFX9-NEXT: s_lshr_b32 s5, s1, 16 4893; GFX9-NEXT: s_lshr_b32 s6, s2, 16 4894; GFX9-NEXT: s_lshl_b32 s1, s1, s2 4895; GFX9-NEXT: s_lshl_b32 s2, s5, s6 4896; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s2 4897; GFX9-NEXT: s_lshr_b32 s2, s3, 16 4898; GFX9-NEXT: s_and_b32 s3, s3, 0xffff 4899; GFX9-NEXT: s_lshr_b32 s3, s3, 0x10001 4900; GFX9-NEXT: s_lshr_b32 s2, s2, 1 4901; GFX9-NEXT: s_pack_ll_b32_b16 s2, s3, s2 4902; GFX9-NEXT: s_lshr_b32 s3, s2, 16 4903; GFX9-NEXT: s_and_b32 s2, s2, 0xffff 4904; GFX9-NEXT: s_lshr_b32 s5, s4, 16 4905; GFX9-NEXT: s_lshr_b32 s2, s2, s4 4906; GFX9-NEXT: s_lshr_b32 s3, s3, s5 4907; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s3 4908; GFX9-NEXT: s_or_b32 s1, s1, s2 4909; GFX9-NEXT: ; return to shader part epilog 4910; 4911; GFX10-LABEL: s_fshl_v4i16: 4912; GFX10: ; %bb.0: 4913; GFX10-NEXT: s_and_b32 s9, s2, 0xffff 4914; GFX10-NEXT: s_lshr_b32 s2, s2, 16 4915; GFX10-NEXT: s_and_b32 s6, s4, 0xf000f 4916; GFX10-NEXT: s_lshr_b32 s9, s9, 0x10001 4917; GFX10-NEXT: s_lshr_b32 s2, s2, 1 4918; GFX10-NEXT: s_andn2_b32 s4, 0xf000f, s4 4919; GFX10-NEXT: s_lshr_b32 s7, s0, 16 4920; GFX10-NEXT: s_lshr_b32 s8, s6, 16 4921; GFX10-NEXT: s_pack_ll_b32_b16 s2, s9, s2 4922; GFX10-NEXT: s_lshl_b32 s0, s0, s6 4923; GFX10-NEXT: s_lshl_b32 s6, s7, s8 4924; GFX10-NEXT: s_lshr_b32 s7, s2, 16 4925; GFX10-NEXT: s_and_b32 s2, s2, 0xffff 4926; GFX10-NEXT: s_lshr_b32 s8, s4, 16 4927; GFX10-NEXT: s_lshr_b32 s2, s2, s4 4928; GFX10-NEXT: s_lshr_b32 s4, s7, s8 4929; GFX10-NEXT: s_and_b32 s8, s3, 0xffff 4930; GFX10-NEXT: s_lshr_b32 s3, s3, 16 4931; GFX10-NEXT: s_pack_ll_b32_b16 s2, s2, s4 4932; GFX10-NEXT: s_and_b32 s4, s5, 0xf000f 4933; GFX10-NEXT: s_lshr_b32 s8, s8, 0x10001 4934; GFX10-NEXT: s_lshr_b32 s3, s3, 1 4935; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s6 4936; GFX10-NEXT: s_andn2_b32 s5, 0xf000f, s5 4937; GFX10-NEXT: s_lshr_b32 s6, s1, 16 4938; GFX10-NEXT: s_lshr_b32 s7, s4, 16 4939; GFX10-NEXT: s_pack_ll_b32_b16 s3, s8, s3 4940; GFX10-NEXT: s_lshl_b32 s1, s1, s4 4941; GFX10-NEXT: s_lshl_b32 s4, s6, s7 4942; GFX10-NEXT: s_lshr_b32 s6, s3, 16 4943; GFX10-NEXT: s_and_b32 s3, s3, 0xffff 4944; GFX10-NEXT: s_lshr_b32 s7, s5, 16 4945; GFX10-NEXT: s_lshr_b32 s3, s3, s5 4946; GFX10-NEXT: s_lshr_b32 s5, s6, s7 4947; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s4 4948; GFX10-NEXT: s_pack_ll_b32_b16 s3, s3, s5 4949; GFX10-NEXT: s_or_b32 s0, s0, s2 4950; GFX10-NEXT: s_or_b32 s1, s1, s3 4951; GFX10-NEXT: ; return to shader part epilog 4952; 4953; GFX11-LABEL: s_fshl_v4i16: 4954; GFX11: ; %bb.0: 4955; GFX11-NEXT: s_and_b32 s9, s2, 0xffff 4956; GFX11-NEXT: s_lshr_b32 s2, s2, 16 4957; GFX11-NEXT: s_and_b32 s6, s4, 0xf000f 4958; GFX11-NEXT: s_lshr_b32 s9, s9, 0x10001 4959; GFX11-NEXT: s_lshr_b32 s2, s2, 1 4960; GFX11-NEXT: s_and_not1_b32 s4, 0xf000f, s4 4961; GFX11-NEXT: s_lshr_b32 s7, s0, 16 4962; GFX11-NEXT: s_lshr_b32 s8, s6, 16 4963; GFX11-NEXT: s_pack_ll_b32_b16 s2, s9, s2 4964; GFX11-NEXT: s_lshl_b32 s0, s0, s6 4965; GFX11-NEXT: s_lshl_b32 s6, s7, s8 4966; GFX11-NEXT: s_lshr_b32 s7, s2, 16 4967; GFX11-NEXT: s_and_b32 s2, s2, 0xffff 4968; GFX11-NEXT: s_lshr_b32 s8, s4, 16 4969; GFX11-NEXT: s_lshr_b32 s2, s2, s4 4970; GFX11-NEXT: s_lshr_b32 s4, s7, s8 4971; GFX11-NEXT: s_and_b32 s8, s3, 0xffff 4972; GFX11-NEXT: s_lshr_b32 s3, s3, 16 4973; GFX11-NEXT: s_pack_ll_b32_b16 s2, s2, s4 4974; GFX11-NEXT: s_and_b32 s4, s5, 0xf000f 4975; GFX11-NEXT: s_lshr_b32 s8, s8, 0x10001 4976; GFX11-NEXT: s_lshr_b32 s3, s3, 1 4977; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s6 4978; GFX11-NEXT: s_and_not1_b32 s5, 0xf000f, s5 4979; GFX11-NEXT: s_lshr_b32 s6, s1, 16 4980; GFX11-NEXT: s_lshr_b32 s7, s4, 16 4981; GFX11-NEXT: s_pack_ll_b32_b16 s3, s8, s3 4982; GFX11-NEXT: s_lshl_b32 s1, s1, s4 4983; GFX11-NEXT: s_lshl_b32 s4, s6, s7 4984; GFX11-NEXT: s_lshr_b32 s6, s3, 16 4985; GFX11-NEXT: s_and_b32 s3, s3, 0xffff 4986; GFX11-NEXT: s_lshr_b32 s7, s5, 16 4987; GFX11-NEXT: s_lshr_b32 s3, s3, s5 4988; GFX11-NEXT: s_lshr_b32 s5, s6, s7 4989; GFX11-NEXT: s_pack_ll_b32_b16 s1, s1, s4 4990; GFX11-NEXT: s_pack_ll_b32_b16 s3, s3, s5 4991; GFX11-NEXT: s_or_b32 s0, s0, s2 4992; GFX11-NEXT: s_or_b32 s1, s1, s3 4993; GFX11-NEXT: ; return to shader part epilog 4994 %result = call <4 x i16> @llvm.fshl.v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt) 4995 %cast.result = bitcast <4 x i16> %result to <2 x i32> 4996 ret <2 x i32> %cast.result 4997} 4998 4999define <4 x half> @v_fshl_v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt) { 5000; GFX6-LABEL: v_fshl_v4i16: 5001; GFX6: ; %bb.0: 5002; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5003; GFX6-NEXT: v_and_b32_e32 v12, 15, v8 5004; GFX6-NEXT: v_xor_b32_e32 v8, -1, v8 5005; GFX6-NEXT: v_and_b32_e32 v8, 15, v8 5006; GFX6-NEXT: v_and_b32_e32 v12, 0xffff, v12 5007; GFX6-NEXT: v_bfe_u32 v4, v4, 1, 15 5008; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v8 5009; GFX6-NEXT: v_lshlrev_b32_e32 v0, v12, v0 5010; GFX6-NEXT: v_lshrrev_b32_e32 v4, v8, v4 5011; GFX6-NEXT: v_or_b32_e32 v0, v0, v4 5012; GFX6-NEXT: v_and_b32_e32 v4, 15, v9 5013; GFX6-NEXT: v_xor_b32_e32 v8, -1, v9 5014; GFX6-NEXT: v_and_b32_e32 v8, 15, v8 5015; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4 5016; GFX6-NEXT: v_lshlrev_b32_e32 v1, v4, v1 5017; GFX6-NEXT: v_bfe_u32 v4, v5, 1, 15 5018; GFX6-NEXT: v_and_b32_e32 v5, 0xffff, v8 5019; GFX6-NEXT: v_lshrrev_b32_e32 v4, v5, v4 5020; GFX6-NEXT: v_or_b32_e32 v1, v1, v4 5021; GFX6-NEXT: v_and_b32_e32 v4, 15, v10 5022; GFX6-NEXT: v_xor_b32_e32 v5, -1, v10 5023; GFX6-NEXT: v_and_b32_e32 v5, 15, v5 5024; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4 5025; GFX6-NEXT: v_lshlrev_b32_e32 v2, v4, v2 5026; GFX6-NEXT: v_bfe_u32 v4, v6, 1, 15 5027; GFX6-NEXT: v_and_b32_e32 v5, 0xffff, v5 5028; GFX6-NEXT: v_lshrrev_b32_e32 v4, v5, v4 5029; GFX6-NEXT: v_or_b32_e32 v2, v2, v4 5030; GFX6-NEXT: v_and_b32_e32 v4, 15, v11 5031; GFX6-NEXT: v_xor_b32_e32 v5, -1, v11 5032; GFX6-NEXT: v_and_b32_e32 v5, 15, v5 5033; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4 5034; GFX6-NEXT: v_lshlrev_b32_e32 v3, v4, v3 5035; GFX6-NEXT: v_bfe_u32 v4, v7, 1, 15 5036; GFX6-NEXT: v_and_b32_e32 v5, 0xffff, v5 5037; GFX6-NEXT: v_lshrrev_b32_e32 v4, v5, v4 5038; GFX6-NEXT: v_or_b32_e32 v3, v3, v4 5039; GFX6-NEXT: s_setpc_b64 s[30:31] 5040; 5041; GFX8-LABEL: v_fshl_v4i16: 5042; GFX8: ; %bb.0: 5043; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5044; GFX8-NEXT: v_xor_b32_e32 v7, -1, v4 5045; GFX8-NEXT: v_and_b32_e32 v6, 15, v4 5046; GFX8-NEXT: v_and_b32_e32 v7, 15, v7 5047; GFX8-NEXT: v_lshrrev_b16_e32 v8, 1, v2 5048; GFX8-NEXT: v_lshlrev_b16_e32 v6, v6, v0 5049; GFX8-NEXT: v_lshrrev_b16_e32 v7, v7, v8 5050; GFX8-NEXT: v_or_b32_e32 v6, v6, v7 5051; GFX8-NEXT: v_mov_b32_e32 v7, 15 5052; GFX8-NEXT: v_and_b32_sdwa v8, v4, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 5053; GFX8-NEXT: v_mov_b32_e32 v9, -1 5054; GFX8-NEXT: v_xor_b32_sdwa v4, v4, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 5055; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v8, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 5056; GFX8-NEXT: v_mov_b32_e32 v8, 1 5057; GFX8-NEXT: v_and_b32_e32 v4, 15, v4 5058; GFX8-NEXT: v_lshrrev_b16_sdwa v2, v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 5059; GFX8-NEXT: v_lshrrev_b16_e32 v2, v4, v2 5060; GFX8-NEXT: v_xor_b32_e32 v4, -1, v5 5061; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 5062; GFX8-NEXT: v_and_b32_e32 v2, 15, v5 5063; GFX8-NEXT: v_and_b32_e32 v4, 15, v4 5064; GFX8-NEXT: v_lshrrev_b16_e32 v10, 1, v3 5065; GFX8-NEXT: v_lshlrev_b16_e32 v2, v2, v1 5066; GFX8-NEXT: v_lshrrev_b16_e32 v4, v4, v10 5067; GFX8-NEXT: v_or_b32_e32 v2, v2, v4 5068; GFX8-NEXT: v_and_b32_sdwa v4, v5, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 5069; GFX8-NEXT: v_xor_b32_sdwa v5, v5, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 5070; GFX8-NEXT: v_and_b32_e32 v5, 15, v5 5071; GFX8-NEXT: v_lshrrev_b16_sdwa v3, v8, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 5072; GFX8-NEXT: v_lshlrev_b16_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 5073; GFX8-NEXT: v_lshrrev_b16_e32 v3, v5, v3 5074; GFX8-NEXT: v_or_b32_e32 v1, v1, v3 5075; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0 5076; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v1 5077; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 5078; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1 5079; GFX8-NEXT: v_or_b32_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 5080; GFX8-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 5081; GFX8-NEXT: s_setpc_b64 s[30:31] 5082; 5083; GFX9-LABEL: v_fshl_v4i16: 5084; GFX9: ; %bb.0: 5085; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5086; GFX9-NEXT: v_and_b32_e32 v6, 0xf000f, v4 5087; GFX9-NEXT: v_xor_b32_e32 v4, -1, v4 5088; GFX9-NEXT: v_and_b32_e32 v4, 0xf000f, v4 5089; GFX9-NEXT: v_pk_lshrrev_b16 v2, 1, v2 op_sel_hi:[0,1] 5090; GFX9-NEXT: v_pk_lshlrev_b16 v0, v6, v0 5091; GFX9-NEXT: v_pk_lshrrev_b16 v2, v4, v2 5092; GFX9-NEXT: v_or_b32_e32 v0, v0, v2 5093; GFX9-NEXT: v_and_b32_e32 v2, 0xf000f, v5 5094; GFX9-NEXT: v_xor_b32_e32 v4, -1, v5 5095; GFX9-NEXT: v_and_b32_e32 v4, 0xf000f, v4 5096; GFX9-NEXT: v_pk_lshlrev_b16 v1, v2, v1 5097; GFX9-NEXT: v_pk_lshrrev_b16 v2, 1, v3 op_sel_hi:[0,1] 5098; GFX9-NEXT: v_pk_lshrrev_b16 v2, v4, v2 5099; GFX9-NEXT: v_or_b32_e32 v1, v1, v2 5100; GFX9-NEXT: s_setpc_b64 s[30:31] 5101; 5102; GFX10-LABEL: v_fshl_v4i16: 5103; GFX10: ; %bb.0: 5104; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5105; GFX10-NEXT: v_xor_b32_e32 v6, -1, v4 5106; GFX10-NEXT: v_xor_b32_e32 v7, -1, v5 5107; GFX10-NEXT: v_and_b32_e32 v4, 0xf000f, v4 5108; GFX10-NEXT: v_pk_lshrrev_b16 v2, 1, v2 op_sel_hi:[0,1] 5109; GFX10-NEXT: v_and_b32_e32 v5, 0xf000f, v5 5110; GFX10-NEXT: v_and_b32_e32 v6, 0xf000f, v6 5111; GFX10-NEXT: v_pk_lshrrev_b16 v3, 1, v3 op_sel_hi:[0,1] 5112; GFX10-NEXT: v_and_b32_e32 v7, 0xf000f, v7 5113; GFX10-NEXT: v_pk_lshlrev_b16 v0, v4, v0 5114; GFX10-NEXT: v_pk_lshlrev_b16 v1, v5, v1 5115; GFX10-NEXT: v_pk_lshrrev_b16 v2, v6, v2 5116; GFX10-NEXT: v_pk_lshrrev_b16 v3, v7, v3 5117; GFX10-NEXT: v_or_b32_e32 v0, v0, v2 5118; GFX10-NEXT: v_or_b32_e32 v1, v1, v3 5119; GFX10-NEXT: s_setpc_b64 s[30:31] 5120; 5121; GFX11-LABEL: v_fshl_v4i16: 5122; GFX11: ; %bb.0: 5123; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5124; GFX11-NEXT: v_xor_b32_e32 v6, -1, v4 5125; GFX11-NEXT: v_xor_b32_e32 v7, -1, v5 5126; GFX11-NEXT: v_and_b32_e32 v4, 0xf000f, v4 5127; GFX11-NEXT: v_pk_lshrrev_b16 v2, 1, v2 op_sel_hi:[0,1] 5128; GFX11-NEXT: v_and_b32_e32 v5, 0xf000f, v5 5129; GFX11-NEXT: v_and_b32_e32 v6, 0xf000f, v6 5130; GFX11-NEXT: v_pk_lshrrev_b16 v3, 1, v3 op_sel_hi:[0,1] 5131; GFX11-NEXT: v_and_b32_e32 v7, 0xf000f, v7 5132; GFX11-NEXT: v_pk_lshlrev_b16 v0, v4, v0 5133; GFX11-NEXT: v_pk_lshlrev_b16 v1, v5, v1 5134; GFX11-NEXT: v_pk_lshrrev_b16 v2, v6, v2 5135; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) 5136; GFX11-NEXT: v_pk_lshrrev_b16 v3, v7, v3 5137; GFX11-NEXT: v_or_b32_e32 v0, v0, v2 5138; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 5139; GFX11-NEXT: v_or_b32_e32 v1, v1, v3 5140; GFX11-NEXT: s_setpc_b64 s[30:31] 5141 %result = call <4 x i16> @llvm.fshl.v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt) 5142 %cast.result = bitcast <4 x i16> %result to <4 x half> 5143 ret <4 x half> %cast.result 5144} 5145 5146define amdgpu_ps i64 @s_fshl_i64(i64 inreg %lhs, i64 inreg %rhs, i64 inreg %amt) { 5147; GFX6-LABEL: s_fshl_i64: 5148; GFX6: ; %bb.0: 5149; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], s4 5150; GFX6-NEXT: s_lshr_b64 s[2:3], s[2:3], 1 5151; GFX6-NEXT: s_not_b32 s4, s4 5152; GFX6-NEXT: s_lshr_b64 s[2:3], s[2:3], s4 5153; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 5154; GFX6-NEXT: ; return to shader part epilog 5155; 5156; GFX8-LABEL: s_fshl_i64: 5157; GFX8: ; %bb.0: 5158; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], s4 5159; GFX8-NEXT: s_lshr_b64 s[2:3], s[2:3], 1 5160; GFX8-NEXT: s_not_b32 s4, s4 5161; GFX8-NEXT: s_lshr_b64 s[2:3], s[2:3], s4 5162; GFX8-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 5163; GFX8-NEXT: ; return to shader part epilog 5164; 5165; GFX9-LABEL: s_fshl_i64: 5166; GFX9: ; %bb.0: 5167; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], s4 5168; GFX9-NEXT: s_lshr_b64 s[2:3], s[2:3], 1 5169; GFX9-NEXT: s_not_b32 s4, s4 5170; GFX9-NEXT: s_lshr_b64 s[2:3], s[2:3], s4 5171; GFX9-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 5172; GFX9-NEXT: ; return to shader part epilog 5173; 5174; GFX10-LABEL: s_fshl_i64: 5175; GFX10: ; %bb.0: 5176; GFX10-NEXT: s_lshr_b64 s[2:3], s[2:3], 1 5177; GFX10-NEXT: s_not_b32 s5, s4 5178; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s4 5179; GFX10-NEXT: s_lshr_b64 s[2:3], s[2:3], s5 5180; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 5181; GFX10-NEXT: ; return to shader part epilog 5182; 5183; GFX11-LABEL: s_fshl_i64: 5184; GFX11: ; %bb.0: 5185; GFX11-NEXT: s_lshr_b64 s[2:3], s[2:3], 1 5186; GFX11-NEXT: s_not_b32 s5, s4 5187; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], s4 5188; GFX11-NEXT: s_lshr_b64 s[2:3], s[2:3], s5 5189; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 5190; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 5191; GFX11-NEXT: ; return to shader part epilog 5192 %result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %rhs, i64 %amt) 5193 ret i64 %result 5194} 5195 5196define amdgpu_ps i64 @s_fshl_i64_5(i64 inreg %lhs, i64 inreg %rhs) { 5197; GCN-LABEL: s_fshl_i64_5: 5198; GCN: ; %bb.0: 5199; GCN-NEXT: s_lshl_b64 s[0:1], s[0:1], 5 5200; GCN-NEXT: s_lshr_b32 s2, s3, 27 5201; GCN-NEXT: s_mov_b32 s3, 0 5202; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 5203; GCN-NEXT: ; return to shader part epilog 5204; 5205; GFX11-LABEL: s_fshl_i64_5: 5206; GFX11: ; %bb.0: 5207; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], 5 5208; GFX11-NEXT: s_lshr_b32 s2, s3, 27 5209; GFX11-NEXT: s_mov_b32 s3, 0 5210; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 5211; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 5212; GFX11-NEXT: ; return to shader part epilog 5213 %result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %rhs, i64 5) 5214 ret i64 %result 5215} 5216 5217define amdgpu_ps i64 @s_fshl_i64_32(i64 inreg %lhs, i64 inreg %rhs) { 5218; GCN-LABEL: s_fshl_i64_32: 5219; GCN: ; %bb.0: 5220; GCN-NEXT: s_mov_b32 s1, s0 5221; GCN-NEXT: s_mov_b32 s0, 0 5222; GCN-NEXT: s_mov_b32 s2, s3 5223; GCN-NEXT: s_mov_b32 s3, s0 5224; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 5225; GCN-NEXT: ; return to shader part epilog 5226; 5227; GFX11-LABEL: s_fshl_i64_32: 5228; GFX11: ; %bb.0: 5229; GFX11-NEXT: s_mov_b32 s1, s0 5230; GFX11-NEXT: s_mov_b32 s0, 0 5231; GFX11-NEXT: s_mov_b32 s2, s3 5232; GFX11-NEXT: s_mov_b32 s3, s0 5233; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 5234; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 5235; GFX11-NEXT: ; return to shader part epilog 5236 %result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %rhs, i64 32) 5237 ret i64 %result 5238} 5239 5240define amdgpu_ps i64 @s_fshl_i64_48(i64 inreg %lhs, i64 inreg %rhs) { 5241; GCN-LABEL: s_fshl_i64_48: 5242; GCN: ; %bb.0: 5243; GCN-NEXT: s_lshl_b32 s1, s0, 16 5244; GCN-NEXT: s_mov_b32 s0, 0 5245; GCN-NEXT: s_lshr_b64 s[2:3], s[2:3], 16 5246; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 5247; GCN-NEXT: ; return to shader part epilog 5248; 5249; GFX11-LABEL: s_fshl_i64_48: 5250; GFX11: ; %bb.0: 5251; GFX11-NEXT: s_lshl_b32 s1, s0, 16 5252; GFX11-NEXT: s_mov_b32 s0, 0 5253; GFX11-NEXT: s_lshr_b64 s[2:3], s[2:3], 16 5254; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 5255; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 5256; GFX11-NEXT: ; return to shader part epilog 5257 %result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %rhs, i64 48) 5258 ret i64 %result 5259} 5260 5261define i64 @v_fshl_i64(i64 %lhs, i64 %rhs, i64 %amt) { 5262; GFX6-LABEL: v_fshl_i64: 5263; GFX6: ; %bb.0: 5264; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5265; GFX6-NEXT: v_and_b32_e32 v5, 63, v4 5266; GFX6-NEXT: v_lshr_b64 v[2:3], v[2:3], 1 5267; GFX6-NEXT: v_not_b32_e32 v4, v4 5268; GFX6-NEXT: v_and_b32_e32 v4, 63, v4 5269; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], v5 5270; GFX6-NEXT: v_lshr_b64 v[2:3], v[2:3], v4 5271; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 5272; GFX6-NEXT: v_or_b32_e32 v1, v1, v3 5273; GFX6-NEXT: s_setpc_b64 s[30:31] 5274; 5275; GFX8-LABEL: v_fshl_i64: 5276; GFX8: ; %bb.0: 5277; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5278; GFX8-NEXT: v_and_b32_e32 v5, 63, v4 5279; GFX8-NEXT: v_lshrrev_b64 v[2:3], 1, v[2:3] 5280; GFX8-NEXT: v_not_b32_e32 v4, v4 5281; GFX8-NEXT: v_and_b32_e32 v4, 63, v4 5282; GFX8-NEXT: v_lshlrev_b64 v[0:1], v5, v[0:1] 5283; GFX8-NEXT: v_lshrrev_b64 v[2:3], v4, v[2:3] 5284; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 5285; GFX8-NEXT: v_or_b32_e32 v1, v1, v3 5286; GFX8-NEXT: s_setpc_b64 s[30:31] 5287; 5288; GFX9-LABEL: v_fshl_i64: 5289; GFX9: ; %bb.0: 5290; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5291; GFX9-NEXT: v_and_b32_e32 v5, 63, v4 5292; GFX9-NEXT: v_lshrrev_b64 v[2:3], 1, v[2:3] 5293; GFX9-NEXT: v_not_b32_e32 v4, v4 5294; GFX9-NEXT: v_and_b32_e32 v4, 63, v4 5295; GFX9-NEXT: v_lshlrev_b64 v[0:1], v5, v[0:1] 5296; GFX9-NEXT: v_lshrrev_b64 v[2:3], v4, v[2:3] 5297; GFX9-NEXT: v_or_b32_e32 v0, v0, v2 5298; GFX9-NEXT: v_or_b32_e32 v1, v1, v3 5299; GFX9-NEXT: s_setpc_b64 s[30:31] 5300; 5301; GFX10-LABEL: v_fshl_i64: 5302; GFX10: ; %bb.0: 5303; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5304; GFX10-NEXT: v_not_b32_e32 v5, v4 5305; GFX10-NEXT: v_lshrrev_b64 v[2:3], 1, v[2:3] 5306; GFX10-NEXT: v_and_b32_e32 v4, 63, v4 5307; GFX10-NEXT: v_and_b32_e32 v5, 63, v5 5308; GFX10-NEXT: v_lshlrev_b64 v[0:1], v4, v[0:1] 5309; GFX10-NEXT: v_lshrrev_b64 v[2:3], v5, v[2:3] 5310; GFX10-NEXT: v_or_b32_e32 v0, v0, v2 5311; GFX10-NEXT: v_or_b32_e32 v1, v1, v3 5312; GFX10-NEXT: s_setpc_b64 s[30:31] 5313; 5314; GFX11-LABEL: v_fshl_i64: 5315; GFX11: ; %bb.0: 5316; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5317; GFX11-NEXT: v_not_b32_e32 v5, v4 5318; GFX11-NEXT: v_lshrrev_b64 v[2:3], 1, v[2:3] 5319; GFX11-NEXT: v_and_b32_e32 v4, 63, v4 5320; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 5321; GFX11-NEXT: v_and_b32_e32 v5, 63, v5 5322; GFX11-NEXT: v_lshlrev_b64 v[0:1], v4, v[0:1] 5323; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 5324; GFX11-NEXT: v_lshrrev_b64 v[2:3], v5, v[2:3] 5325; GFX11-NEXT: v_or_b32_e32 v0, v0, v2 5326; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 5327; GFX11-NEXT: v_or_b32_e32 v1, v1, v3 5328; GFX11-NEXT: s_setpc_b64 s[30:31] 5329 %result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %rhs, i64 %amt) 5330 ret i64 %result 5331} 5332 5333define i64 @v_fshl_i64_5(i64 %lhs, i64 %rhs) { 5334; GFX6-LABEL: v_fshl_i64_5: 5335; GFX6: ; %bb.0: 5336; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5337; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 5 5338; GFX6-NEXT: v_lshrrev_b32_e32 v2, 27, v3 5339; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 5340; GFX6-NEXT: s_setpc_b64 s[30:31] 5341; 5342; GFX8-LABEL: v_fshl_i64_5: 5343; GFX8: ; %bb.0: 5344; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5345; GFX8-NEXT: v_lshlrev_b64 v[0:1], 5, v[0:1] 5346; GFX8-NEXT: v_lshrrev_b32_e32 v2, 27, v3 5347; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 5348; GFX8-NEXT: s_setpc_b64 s[30:31] 5349; 5350; GFX9-LABEL: v_fshl_i64_5: 5351; GFX9: ; %bb.0: 5352; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5353; GFX9-NEXT: v_lshlrev_b64 v[0:1], 5, v[0:1] 5354; GFX9-NEXT: v_lshrrev_b32_e32 v2, 27, v3 5355; GFX9-NEXT: v_or_b32_e32 v0, v0, v2 5356; GFX9-NEXT: s_setpc_b64 s[30:31] 5357; 5358; GFX10-LABEL: v_fshl_i64_5: 5359; GFX10: ; %bb.0: 5360; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5361; GFX10-NEXT: v_lshlrev_b64 v[0:1], 5, v[0:1] 5362; GFX10-NEXT: v_lshrrev_b32_e32 v2, 27, v3 5363; GFX10-NEXT: v_or_b32_e32 v0, v0, v2 5364; GFX10-NEXT: s_setpc_b64 s[30:31] 5365; 5366; GFX11-LABEL: v_fshl_i64_5: 5367; GFX11: ; %bb.0: 5368; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5369; GFX11-NEXT: v_lshlrev_b64 v[0:1], 5, v[0:1] 5370; GFX11-NEXT: v_lshrrev_b32_e32 v2, 27, v3 5371; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 5372; GFX11-NEXT: v_or_b32_e32 v0, v0, v2 5373; GFX11-NEXT: s_setpc_b64 s[30:31] 5374 %result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %rhs, i64 5) 5375 ret i64 %result 5376} 5377 5378define i64 @v_fshl_i64_32(i64 %lhs, i64 %rhs) { 5379; GCN-LABEL: v_fshl_i64_32: 5380; GCN: ; %bb.0: 5381; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5382; GCN-NEXT: v_mov_b32_e32 v1, v0 5383; GCN-NEXT: v_mov_b32_e32 v0, v3 5384; GCN-NEXT: s_setpc_b64 s[30:31] 5385; 5386; GFX11-LABEL: v_fshl_i64_32: 5387; GFX11: ; %bb.0: 5388; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5389; GFX11-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, v3 5390; GFX11-NEXT: s_setpc_b64 s[30:31] 5391 %result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %rhs, i64 32) 5392 ret i64 %result 5393} 5394 5395define i64 @v_fshl_i64_48(i64 %lhs, i64 %rhs) { 5396; GFX6-LABEL: v_fshl_i64_48: 5397; GFX6: ; %bb.0: 5398; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5399; GFX6-NEXT: v_mov_b32_e32 v4, v0 5400; GFX6-NEXT: v_lshr_b64 v[0:1], v[2:3], 16 5401; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v4 5402; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 5403; GFX6-NEXT: s_setpc_b64 s[30:31] 5404; 5405; GFX8-LABEL: v_fshl_i64_48: 5406; GFX8: ; %bb.0: 5407; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5408; GFX8-NEXT: v_mov_b32_e32 v4, v0 5409; GFX8-NEXT: v_lshrrev_b64 v[0:1], 16, v[2:3] 5410; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v4 5411; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 5412; GFX8-NEXT: s_setpc_b64 s[30:31] 5413; 5414; GFX9-LABEL: v_fshl_i64_48: 5415; GFX9: ; %bb.0: 5416; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5417; GFX9-NEXT: v_mov_b32_e32 v4, v0 5418; GFX9-NEXT: v_lshrrev_b64 v[0:1], 16, v[2:3] 5419; GFX9-NEXT: v_lshl_or_b32 v1, v4, 16, v1 5420; GFX9-NEXT: s_setpc_b64 s[30:31] 5421; 5422; GFX10-LABEL: v_fshl_i64_48: 5423; GFX10: ; %bb.0: 5424; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5425; GFX10-NEXT: v_mov_b32_e32 v4, v0 5426; GFX10-NEXT: v_lshrrev_b64 v[0:1], 16, v[2:3] 5427; GFX10-NEXT: v_lshl_or_b32 v1, v4, 16, v1 5428; GFX10-NEXT: s_setpc_b64 s[30:31] 5429; 5430; GFX11-LABEL: v_fshl_i64_48: 5431; GFX11: ; %bb.0: 5432; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5433; GFX11-NEXT: v_mov_b32_e32 v4, v0 5434; GFX11-NEXT: v_lshrrev_b64 v[0:1], 16, v[2:3] 5435; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 5436; GFX11-NEXT: v_lshl_or_b32 v1, v4, 16, v1 5437; GFX11-NEXT: s_setpc_b64 s[30:31] 5438 %result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %rhs, i64 48) 5439 ret i64 %result 5440} 5441 5442define amdgpu_ps <2 x float> @v_fshl_i64_ssv(i64 inreg %lhs, i64 inreg %rhs, i64 %amt) { 5443; GFX6-LABEL: v_fshl_i64_ssv: 5444; GFX6: ; %bb.0: 5445; GFX6-NEXT: v_and_b32_e32 v1, 63, v0 5446; GFX6-NEXT: v_not_b32_e32 v0, v0 5447; GFX6-NEXT: v_lshl_b64 v[1:2], s[0:1], v1 5448; GFX6-NEXT: s_lshr_b64 s[0:1], s[2:3], 1 5449; GFX6-NEXT: v_and_b32_e32 v0, 63, v0 5450; GFX6-NEXT: v_lshr_b64 v[3:4], s[0:1], v0 5451; GFX6-NEXT: v_or_b32_e32 v0, v1, v3 5452; GFX6-NEXT: v_or_b32_e32 v1, v2, v4 5453; GFX6-NEXT: ; return to shader part epilog 5454; 5455; GFX8-LABEL: v_fshl_i64_ssv: 5456; GFX8: ; %bb.0: 5457; GFX8-NEXT: v_and_b32_e32 v1, 63, v0 5458; GFX8-NEXT: v_not_b32_e32 v0, v0 5459; GFX8-NEXT: v_lshlrev_b64 v[1:2], v1, s[0:1] 5460; GFX8-NEXT: s_lshr_b64 s[0:1], s[2:3], 1 5461; GFX8-NEXT: v_and_b32_e32 v0, 63, v0 5462; GFX8-NEXT: v_lshrrev_b64 v[3:4], v0, s[0:1] 5463; GFX8-NEXT: v_or_b32_e32 v0, v1, v3 5464; GFX8-NEXT: v_or_b32_e32 v1, v2, v4 5465; GFX8-NEXT: ; return to shader part epilog 5466; 5467; GFX9-LABEL: v_fshl_i64_ssv: 5468; GFX9: ; %bb.0: 5469; GFX9-NEXT: v_and_b32_e32 v1, 63, v0 5470; GFX9-NEXT: v_not_b32_e32 v0, v0 5471; GFX9-NEXT: v_lshlrev_b64 v[1:2], v1, s[0:1] 5472; GFX9-NEXT: s_lshr_b64 s[0:1], s[2:3], 1 5473; GFX9-NEXT: v_and_b32_e32 v0, 63, v0 5474; GFX9-NEXT: v_lshrrev_b64 v[3:4], v0, s[0:1] 5475; GFX9-NEXT: v_or_b32_e32 v0, v1, v3 5476; GFX9-NEXT: v_or_b32_e32 v1, v2, v4 5477; GFX9-NEXT: ; return to shader part epilog 5478; 5479; GFX10-LABEL: v_fshl_i64_ssv: 5480; GFX10: ; %bb.0: 5481; GFX10-NEXT: v_not_b32_e32 v1, v0 5482; GFX10-NEXT: v_and_b32_e32 v0, 63, v0 5483; GFX10-NEXT: s_lshr_b64 s[2:3], s[2:3], 1 5484; GFX10-NEXT: v_and_b32_e32 v2, 63, v1 5485; GFX10-NEXT: v_lshlrev_b64 v[0:1], v0, s[0:1] 5486; GFX10-NEXT: v_lshrrev_b64 v[2:3], v2, s[2:3] 5487; GFX10-NEXT: v_or_b32_e32 v0, v0, v2 5488; GFX10-NEXT: v_or_b32_e32 v1, v1, v3 5489; GFX10-NEXT: ; return to shader part epilog 5490; 5491; GFX11-LABEL: v_fshl_i64_ssv: 5492; GFX11: ; %bb.0: 5493; GFX11-NEXT: v_not_b32_e32 v1, v0 5494; GFX11-NEXT: v_and_b32_e32 v0, 63, v0 5495; GFX11-NEXT: s_lshr_b64 s[2:3], s[2:3], 1 5496; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 5497; GFX11-NEXT: v_and_b32_e32 v2, 63, v1 5498; GFX11-NEXT: v_lshlrev_b64 v[0:1], v0, s[0:1] 5499; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 5500; GFX11-NEXT: v_lshrrev_b64 v[2:3], v2, s[2:3] 5501; GFX11-NEXT: v_or_b32_e32 v0, v0, v2 5502; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 5503; GFX11-NEXT: v_or_b32_e32 v1, v1, v3 5504; GFX11-NEXT: ; return to shader part epilog 5505 %result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %rhs, i64 %amt) 5506 %cast = bitcast i64 %result to <2 x float> 5507 ret <2 x float> %cast 5508} 5509 5510define amdgpu_ps <2 x float> @v_fshl_i64_svs(i64 inreg %lhs, i64 %rhs, i64 inreg %amt) { 5511; GFX6-LABEL: v_fshl_i64_svs: 5512; GFX6: ; %bb.0: 5513; GFX6-NEXT: v_lshr_b64 v[0:1], v[0:1], 1 5514; GFX6-NEXT: s_andn2_b32 s3, 63, s2 5515; GFX6-NEXT: v_lshr_b64 v[0:1], v[0:1], s3 5516; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 5517; GFX6-NEXT: v_or_b32_e32 v0, s0, v0 5518; GFX6-NEXT: v_or_b32_e32 v1, s1, v1 5519; GFX6-NEXT: ; return to shader part epilog 5520; 5521; GFX8-LABEL: v_fshl_i64_svs: 5522; GFX8: ; %bb.0: 5523; GFX8-NEXT: v_lshrrev_b64 v[0:1], 1, v[0:1] 5524; GFX8-NEXT: s_andn2_b32 s3, 63, s2 5525; GFX8-NEXT: v_lshrrev_b64 v[0:1], s3, v[0:1] 5526; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 5527; GFX8-NEXT: v_or_b32_e32 v0, s0, v0 5528; GFX8-NEXT: v_or_b32_e32 v1, s1, v1 5529; GFX8-NEXT: ; return to shader part epilog 5530; 5531; GFX9-LABEL: v_fshl_i64_svs: 5532; GFX9: ; %bb.0: 5533; GFX9-NEXT: v_lshrrev_b64 v[0:1], 1, v[0:1] 5534; GFX9-NEXT: s_andn2_b32 s3, 63, s2 5535; GFX9-NEXT: v_lshrrev_b64 v[0:1], s3, v[0:1] 5536; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 5537; GFX9-NEXT: v_or_b32_e32 v0, s0, v0 5538; GFX9-NEXT: v_or_b32_e32 v1, s1, v1 5539; GFX9-NEXT: ; return to shader part epilog 5540; 5541; GFX10-LABEL: v_fshl_i64_svs: 5542; GFX10: ; %bb.0: 5543; GFX10-NEXT: v_lshrrev_b64 v[0:1], 1, v[0:1] 5544; GFX10-NEXT: s_andn2_b32 s3, 63, s2 5545; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 5546; GFX10-NEXT: v_lshrrev_b64 v[0:1], s3, v[0:1] 5547; GFX10-NEXT: v_or_b32_e32 v0, s0, v0 5548; GFX10-NEXT: v_or_b32_e32 v1, s1, v1 5549; GFX10-NEXT: ; return to shader part epilog 5550; 5551; GFX11-LABEL: v_fshl_i64_svs: 5552; GFX11: ; %bb.0: 5553; GFX11-NEXT: v_lshrrev_b64 v[0:1], 1, v[0:1] 5554; GFX11-NEXT: s_and_not1_b32 s3, 63, s2 5555; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 5556; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 5557; GFX11-NEXT: v_lshrrev_b64 v[0:1], s3, v[0:1] 5558; GFX11-NEXT: v_or_b32_e32 v0, s0, v0 5559; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 5560; GFX11-NEXT: v_or_b32_e32 v1, s1, v1 5561; GFX11-NEXT: ; return to shader part epilog 5562 %result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %rhs, i64 %amt) 5563 %cast = bitcast i64 %result to <2 x float> 5564 ret <2 x float> %cast 5565} 5566 5567define amdgpu_ps <2 x float> @v_fshl_i64_vss(i64 %lhs, i64 inreg %rhs, i64 inreg %amt) { 5568; GFX6-LABEL: v_fshl_i64_vss: 5569; GFX6: ; %bb.0: 5570; GFX6-NEXT: s_and_b32 s3, s2, 63 5571; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], s3 5572; GFX6-NEXT: s_lshr_b64 s[0:1], s[0:1], 1 5573; GFX6-NEXT: s_not_b32 s2, s2 5574; GFX6-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 5575; GFX6-NEXT: v_or_b32_e32 v0, s0, v0 5576; GFX6-NEXT: v_or_b32_e32 v1, s1, v1 5577; GFX6-NEXT: ; return to shader part epilog 5578; 5579; GFX8-LABEL: v_fshl_i64_vss: 5580; GFX8: ; %bb.0: 5581; GFX8-NEXT: s_and_b32 s3, s2, 63 5582; GFX8-NEXT: v_lshlrev_b64 v[0:1], s3, v[0:1] 5583; GFX8-NEXT: s_lshr_b64 s[0:1], s[0:1], 1 5584; GFX8-NEXT: s_not_b32 s2, s2 5585; GFX8-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 5586; GFX8-NEXT: v_or_b32_e32 v0, s0, v0 5587; GFX8-NEXT: v_or_b32_e32 v1, s1, v1 5588; GFX8-NEXT: ; return to shader part epilog 5589; 5590; GFX9-LABEL: v_fshl_i64_vss: 5591; GFX9: ; %bb.0: 5592; GFX9-NEXT: s_and_b32 s3, s2, 63 5593; GFX9-NEXT: v_lshlrev_b64 v[0:1], s3, v[0:1] 5594; GFX9-NEXT: s_lshr_b64 s[0:1], s[0:1], 1 5595; GFX9-NEXT: s_not_b32 s2, s2 5596; GFX9-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 5597; GFX9-NEXT: v_or_b32_e32 v0, s0, v0 5598; GFX9-NEXT: v_or_b32_e32 v1, s1, v1 5599; GFX9-NEXT: ; return to shader part epilog 5600; 5601; GFX10-LABEL: v_fshl_i64_vss: 5602; GFX10: ; %bb.0: 5603; GFX10-NEXT: s_and_b32 s3, s2, 63 5604; GFX10-NEXT: s_lshr_b64 s[0:1], s[0:1], 1 5605; GFX10-NEXT: v_lshlrev_b64 v[0:1], s3, v[0:1] 5606; GFX10-NEXT: s_not_b32 s2, s2 5607; GFX10-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 5608; GFX10-NEXT: v_or_b32_e32 v0, s0, v0 5609; GFX10-NEXT: v_or_b32_e32 v1, s1, v1 5610; GFX10-NEXT: ; return to shader part epilog 5611; 5612; GFX11-LABEL: v_fshl_i64_vss: 5613; GFX11: ; %bb.0: 5614; GFX11-NEXT: s_and_b32 s3, s2, 63 5615; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], 1 5616; GFX11-NEXT: v_lshlrev_b64 v[0:1], s3, v[0:1] 5617; GFX11-NEXT: s_not_b32 s2, s2 5618; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 5619; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 5620; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 5621; GFX11-NEXT: v_or_b32_e32 v0, s0, v0 5622; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 5623; GFX11-NEXT: v_or_b32_e32 v1, s1, v1 5624; GFX11-NEXT: ; return to shader part epilog 5625 %result = call i64 @llvm.fshl.i64(i64 %lhs, i64 %rhs, i64 %amt) 5626 %cast = bitcast i64 %result to <2 x float> 5627 ret <2 x float> %cast 5628} 5629 5630define amdgpu_ps <2 x i64> @s_fshl_v2i64(<2 x i64> inreg %lhs, <2 x i64> inreg %rhs, <2 x i64> inreg %amt) { 5631; GFX6-LABEL: s_fshl_v2i64: 5632; GFX6: ; %bb.0: 5633; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], s8 5634; GFX6-NEXT: s_lshr_b64 s[4:5], s[4:5], 1 5635; GFX6-NEXT: s_not_b32 s8, s8 5636; GFX6-NEXT: s_lshr_b64 s[4:5], s[4:5], s8 5637; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5] 5638; GFX6-NEXT: s_lshr_b64 s[4:5], s[6:7], 1 5639; GFX6-NEXT: s_not_b32 s6, s10 5640; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], s10 5641; GFX6-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 5642; GFX6-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] 5643; GFX6-NEXT: ; return to shader part epilog 5644; 5645; GFX8-LABEL: s_fshl_v2i64: 5646; GFX8: ; %bb.0: 5647; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], s8 5648; GFX8-NEXT: s_lshr_b64 s[4:5], s[4:5], 1 5649; GFX8-NEXT: s_not_b32 s8, s8 5650; GFX8-NEXT: s_lshr_b64 s[4:5], s[4:5], s8 5651; GFX8-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5] 5652; GFX8-NEXT: s_lshr_b64 s[4:5], s[6:7], 1 5653; GFX8-NEXT: s_not_b32 s6, s10 5654; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], s10 5655; GFX8-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 5656; GFX8-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] 5657; GFX8-NEXT: ; return to shader part epilog 5658; 5659; GFX9-LABEL: s_fshl_v2i64: 5660; GFX9: ; %bb.0: 5661; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], s8 5662; GFX9-NEXT: s_lshr_b64 s[4:5], s[4:5], 1 5663; GFX9-NEXT: s_not_b32 s8, s8 5664; GFX9-NEXT: s_lshr_b64 s[4:5], s[4:5], s8 5665; GFX9-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5] 5666; GFX9-NEXT: s_lshr_b64 s[4:5], s[6:7], 1 5667; GFX9-NEXT: s_not_b32 s6, s10 5668; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], s10 5669; GFX9-NEXT: s_lshr_b64 s[4:5], s[4:5], s6 5670; GFX9-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] 5671; GFX9-NEXT: ; return to shader part epilog 5672; 5673; GFX10-LABEL: s_fshl_v2i64: 5674; GFX10: ; %bb.0: 5675; GFX10-NEXT: s_lshr_b64 s[4:5], s[4:5], 1 5676; GFX10-NEXT: s_not_b32 s9, s8 5677; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s8 5678; GFX10-NEXT: s_lshr_b64 s[6:7], s[6:7], 1 5679; GFX10-NEXT: s_not_b32 s8, s10 5680; GFX10-NEXT: s_lshr_b64 s[4:5], s[4:5], s9 5681; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], s10 5682; GFX10-NEXT: s_lshr_b64 s[6:7], s[6:7], s8 5683; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5] 5684; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[6:7] 5685; GFX10-NEXT: ; return to shader part epilog 5686; 5687; GFX11-LABEL: s_fshl_v2i64: 5688; GFX11: ; %bb.0: 5689; GFX11-NEXT: s_lshr_b64 s[4:5], s[4:5], 1 5690; GFX11-NEXT: s_not_b32 s9, s8 5691; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], s8 5692; GFX11-NEXT: s_lshr_b64 s[6:7], s[6:7], 1 5693; GFX11-NEXT: s_not_b32 s8, s10 5694; GFX11-NEXT: s_lshr_b64 s[4:5], s[4:5], s9 5695; GFX11-NEXT: s_lshl_b64 s[2:3], s[2:3], s10 5696; GFX11-NEXT: s_lshr_b64 s[6:7], s[6:7], s8 5697; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5] 5698; GFX11-NEXT: s_or_b64 s[2:3], s[2:3], s[6:7] 5699; GFX11-NEXT: ; return to shader part epilog 5700 %result = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %amt) 5701 ret <2 x i64> %result 5702} 5703 5704define <2 x i64> @v_fshl_v2i64(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %amt) { 5705; GFX6-LABEL: v_fshl_v2i64: 5706; GFX6: ; %bb.0: 5707; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5708; GFX6-NEXT: v_and_b32_e32 v9, 63, v8 5709; GFX6-NEXT: v_lshr_b64 v[4:5], v[4:5], 1 5710; GFX6-NEXT: v_not_b32_e32 v8, v8 5711; GFX6-NEXT: v_and_b32_e32 v8, 63, v8 5712; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], v9 5713; GFX6-NEXT: v_lshr_b64 v[4:5], v[4:5], v8 5714; GFX6-NEXT: v_lshr_b64 v[6:7], v[6:7], 1 5715; GFX6-NEXT: v_or_b32_e32 v0, v0, v4 5716; GFX6-NEXT: v_and_b32_e32 v4, 63, v10 5717; GFX6-NEXT: v_lshl_b64 v[2:3], v[2:3], v4 5718; GFX6-NEXT: v_not_b32_e32 v4, v10 5719; GFX6-NEXT: v_and_b32_e32 v4, 63, v4 5720; GFX6-NEXT: v_lshr_b64 v[6:7], v[6:7], v4 5721; GFX6-NEXT: v_or_b32_e32 v1, v1, v5 5722; GFX6-NEXT: v_or_b32_e32 v2, v2, v6 5723; GFX6-NEXT: v_or_b32_e32 v3, v3, v7 5724; GFX6-NEXT: s_setpc_b64 s[30:31] 5725; 5726; GFX8-LABEL: v_fshl_v2i64: 5727; GFX8: ; %bb.0: 5728; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5729; GFX8-NEXT: v_and_b32_e32 v9, 63, v8 5730; GFX8-NEXT: v_lshrrev_b64 v[4:5], 1, v[4:5] 5731; GFX8-NEXT: v_not_b32_e32 v8, v8 5732; GFX8-NEXT: v_and_b32_e32 v8, 63, v8 5733; GFX8-NEXT: v_lshlrev_b64 v[0:1], v9, v[0:1] 5734; GFX8-NEXT: v_lshrrev_b64 v[4:5], v8, v[4:5] 5735; GFX8-NEXT: v_lshrrev_b64 v[6:7], 1, v[6:7] 5736; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 5737; GFX8-NEXT: v_and_b32_e32 v4, 63, v10 5738; GFX8-NEXT: v_lshlrev_b64 v[2:3], v4, v[2:3] 5739; GFX8-NEXT: v_not_b32_e32 v4, v10 5740; GFX8-NEXT: v_and_b32_e32 v4, 63, v4 5741; GFX8-NEXT: v_lshrrev_b64 v[6:7], v4, v[6:7] 5742; GFX8-NEXT: v_or_b32_e32 v1, v1, v5 5743; GFX8-NEXT: v_or_b32_e32 v2, v2, v6 5744; GFX8-NEXT: v_or_b32_e32 v3, v3, v7 5745; GFX8-NEXT: s_setpc_b64 s[30:31] 5746; 5747; GFX9-LABEL: v_fshl_v2i64: 5748; GFX9: ; %bb.0: 5749; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5750; GFX9-NEXT: v_and_b32_e32 v9, 63, v8 5751; GFX9-NEXT: v_lshrrev_b64 v[4:5], 1, v[4:5] 5752; GFX9-NEXT: v_not_b32_e32 v8, v8 5753; GFX9-NEXT: v_and_b32_e32 v8, 63, v8 5754; GFX9-NEXT: v_lshlrev_b64 v[0:1], v9, v[0:1] 5755; GFX9-NEXT: v_lshrrev_b64 v[4:5], v8, v[4:5] 5756; GFX9-NEXT: v_lshrrev_b64 v[6:7], 1, v[6:7] 5757; GFX9-NEXT: v_or_b32_e32 v0, v0, v4 5758; GFX9-NEXT: v_and_b32_e32 v4, 63, v10 5759; GFX9-NEXT: v_lshlrev_b64 v[2:3], v4, v[2:3] 5760; GFX9-NEXT: v_not_b32_e32 v4, v10 5761; GFX9-NEXT: v_and_b32_e32 v4, 63, v4 5762; GFX9-NEXT: v_lshrrev_b64 v[6:7], v4, v[6:7] 5763; GFX9-NEXT: v_or_b32_e32 v1, v1, v5 5764; GFX9-NEXT: v_or_b32_e32 v2, v2, v6 5765; GFX9-NEXT: v_or_b32_e32 v3, v3, v7 5766; GFX9-NEXT: s_setpc_b64 s[30:31] 5767; 5768; GFX10-LABEL: v_fshl_v2i64: 5769; GFX10: ; %bb.0: 5770; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5771; GFX10-NEXT: v_not_b32_e32 v9, v8 5772; GFX10-NEXT: v_not_b32_e32 v11, v10 5773; GFX10-NEXT: v_lshrrev_b64 v[4:5], 1, v[4:5] 5774; GFX10-NEXT: v_lshrrev_b64 v[6:7], 1, v[6:7] 5775; GFX10-NEXT: v_and_b32_e32 v8, 63, v8 5776; GFX10-NEXT: v_and_b32_e32 v9, 63, v9 5777; GFX10-NEXT: v_and_b32_e32 v10, 63, v10 5778; GFX10-NEXT: v_and_b32_e32 v11, 63, v11 5779; GFX10-NEXT: v_lshlrev_b64 v[0:1], v8, v[0:1] 5780; GFX10-NEXT: v_lshrrev_b64 v[4:5], v9, v[4:5] 5781; GFX10-NEXT: v_lshlrev_b64 v[2:3], v10, v[2:3] 5782; GFX10-NEXT: v_lshrrev_b64 v[6:7], v11, v[6:7] 5783; GFX10-NEXT: v_or_b32_e32 v0, v0, v4 5784; GFX10-NEXT: v_or_b32_e32 v1, v1, v5 5785; GFX10-NEXT: v_or_b32_e32 v2, v2, v6 5786; GFX10-NEXT: v_or_b32_e32 v3, v3, v7 5787; GFX10-NEXT: s_setpc_b64 s[30:31] 5788; 5789; GFX11-LABEL: v_fshl_v2i64: 5790; GFX11: ; %bb.0: 5791; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5792; GFX11-NEXT: v_not_b32_e32 v9, v8 5793; GFX11-NEXT: v_not_b32_e32 v11, v10 5794; GFX11-NEXT: v_lshrrev_b64 v[4:5], 1, v[4:5] 5795; GFX11-NEXT: v_lshrrev_b64 v[6:7], 1, v[6:7] 5796; GFX11-NEXT: v_and_b32_e32 v8, 63, v8 5797; GFX11-NEXT: v_and_b32_e32 v9, 63, v9 5798; GFX11-NEXT: v_and_b32_e32 v10, 63, v10 5799; GFX11-NEXT: v_and_b32_e32 v11, 63, v11 5800; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 5801; GFX11-NEXT: v_lshlrev_b64 v[0:1], v8, v[0:1] 5802; GFX11-NEXT: v_lshrrev_b64 v[4:5], v9, v[4:5] 5803; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 5804; GFX11-NEXT: v_lshlrev_b64 v[2:3], v10, v[2:3] 5805; GFX11-NEXT: v_lshrrev_b64 v[6:7], v11, v[6:7] 5806; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4) 5807; GFX11-NEXT: v_or_b32_e32 v0, v0, v4 5808; GFX11-NEXT: v_or_b32_e32 v1, v1, v5 5809; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4) 5810; GFX11-NEXT: v_or_b32_e32 v2, v2, v6 5811; GFX11-NEXT: v_or_b32_e32 v3, v3, v7 5812; GFX11-NEXT: s_setpc_b64 s[30:31] 5813 %result = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %amt) 5814 ret <2 x i64> %result 5815} 5816 5817define amdgpu_ps i128 @s_fshl_i128(i128 inreg %lhs, i128 inreg %rhs, i128 inreg %amt) { 5818; GFX6-LABEL: s_fshl_i128: 5819; GFX6: ; %bb.0: 5820; GFX6-NEXT: s_and_b32 s9, s8, 0x7f 5821; GFX6-NEXT: s_sub_i32 s11, s9, 64 5822; GFX6-NEXT: s_sub_i32 s14, 64, s9 5823; GFX6-NEXT: s_cmp_lt_u32 s9, 64 5824; GFX6-NEXT: s_cselect_b32 s18, 1, 0 5825; GFX6-NEXT: s_cmp_eq_u32 s9, 0 5826; GFX6-NEXT: s_cselect_b32 s9, 1, 0 5827; GFX6-NEXT: s_lshr_b64 s[14:15], s[0:1], s14 5828; GFX6-NEXT: s_lshl_b64 s[16:17], s[2:3], s8 5829; GFX6-NEXT: s_lshl_b64 s[12:13], s[0:1], s8 5830; GFX6-NEXT: s_or_b64 s[14:15], s[14:15], s[16:17] 5831; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], s11 5832; GFX6-NEXT: s_cmp_lg_u32 s18, 0 5833; GFX6-NEXT: s_cselect_b64 s[12:13], s[12:13], 0 5834; GFX6-NEXT: s_cselect_b64 s[0:1], s[14:15], s[0:1] 5835; GFX6-NEXT: s_cmp_lg_u32 s9, 0 5836; GFX6-NEXT: s_mov_b32 s10, 0 5837; GFX6-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] 5838; GFX6-NEXT: s_lshr_b64 s[0:1], s[4:5], 1 5839; GFX6-NEXT: s_lshl_b32 s11, s6, 31 5840; GFX6-NEXT: s_lshr_b64 s[4:5], s[6:7], 1 5841; GFX6-NEXT: s_andn2_b32 s6, 0x7f, s8 5842; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[10:11] 5843; GFX6-NEXT: s_not_b32 s9, s8 5844; GFX6-NEXT: s_sub_i32 s14, s6, 64 5845; GFX6-NEXT: s_sub_i32 s10, 64, s6 5846; GFX6-NEXT: s_cmp_lt_u32 s6, 64 5847; GFX6-NEXT: s_cselect_b32 s15, 1, 0 5848; GFX6-NEXT: s_cmp_eq_u32 s6, 0 5849; GFX6-NEXT: s_cselect_b32 s16, 1, 0 5850; GFX6-NEXT: s_lshr_b64 s[6:7], s[4:5], s9 5851; GFX6-NEXT: s_lshr_b64 s[8:9], s[0:1], s9 5852; GFX6-NEXT: s_lshl_b64 s[10:11], s[4:5], s10 5853; GFX6-NEXT: s_or_b64 s[8:9], s[8:9], s[10:11] 5854; GFX6-NEXT: s_lshr_b64 s[4:5], s[4:5], s14 5855; GFX6-NEXT: s_cmp_lg_u32 s15, 0 5856; GFX6-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] 5857; GFX6-NEXT: s_cmp_lg_u32 s16, 0 5858; GFX6-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5] 5859; GFX6-NEXT: s_cmp_lg_u32 s15, 0 5860; GFX6-NEXT: s_cselect_b64 s[4:5], s[6:7], 0 5861; GFX6-NEXT: s_or_b64 s[0:1], s[12:13], s[0:1] 5862; GFX6-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] 5863; GFX6-NEXT: ; return to shader part epilog 5864; 5865; GFX8-LABEL: s_fshl_i128: 5866; GFX8: ; %bb.0: 5867; GFX8-NEXT: s_and_b32 s9, s8, 0x7f 5868; GFX8-NEXT: s_sub_i32 s11, s9, 64 5869; GFX8-NEXT: s_sub_i32 s14, 64, s9 5870; GFX8-NEXT: s_cmp_lt_u32 s9, 64 5871; GFX8-NEXT: s_cselect_b32 s18, 1, 0 5872; GFX8-NEXT: s_cmp_eq_u32 s9, 0 5873; GFX8-NEXT: s_cselect_b32 s9, 1, 0 5874; GFX8-NEXT: s_lshr_b64 s[14:15], s[0:1], s14 5875; GFX8-NEXT: s_lshl_b64 s[16:17], s[2:3], s8 5876; GFX8-NEXT: s_lshl_b64 s[12:13], s[0:1], s8 5877; GFX8-NEXT: s_or_b64 s[14:15], s[14:15], s[16:17] 5878; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], s11 5879; GFX8-NEXT: s_cmp_lg_u32 s18, 0 5880; GFX8-NEXT: s_cselect_b64 s[12:13], s[12:13], 0 5881; GFX8-NEXT: s_cselect_b64 s[0:1], s[14:15], s[0:1] 5882; GFX8-NEXT: s_cmp_lg_u32 s9, 0 5883; GFX8-NEXT: s_mov_b32 s10, 0 5884; GFX8-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] 5885; GFX8-NEXT: s_lshr_b64 s[0:1], s[4:5], 1 5886; GFX8-NEXT: s_lshl_b32 s11, s6, 31 5887; GFX8-NEXT: s_lshr_b64 s[4:5], s[6:7], 1 5888; GFX8-NEXT: s_andn2_b32 s6, 0x7f, s8 5889; GFX8-NEXT: s_or_b64 s[0:1], s[0:1], s[10:11] 5890; GFX8-NEXT: s_not_b32 s9, s8 5891; GFX8-NEXT: s_sub_i32 s14, s6, 64 5892; GFX8-NEXT: s_sub_i32 s10, 64, s6 5893; GFX8-NEXT: s_cmp_lt_u32 s6, 64 5894; GFX8-NEXT: s_cselect_b32 s15, 1, 0 5895; GFX8-NEXT: s_cmp_eq_u32 s6, 0 5896; GFX8-NEXT: s_cselect_b32 s16, 1, 0 5897; GFX8-NEXT: s_lshr_b64 s[6:7], s[4:5], s9 5898; GFX8-NEXT: s_lshr_b64 s[8:9], s[0:1], s9 5899; GFX8-NEXT: s_lshl_b64 s[10:11], s[4:5], s10 5900; GFX8-NEXT: s_or_b64 s[8:9], s[8:9], s[10:11] 5901; GFX8-NEXT: s_lshr_b64 s[4:5], s[4:5], s14 5902; GFX8-NEXT: s_cmp_lg_u32 s15, 0 5903; GFX8-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] 5904; GFX8-NEXT: s_cmp_lg_u32 s16, 0 5905; GFX8-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5] 5906; GFX8-NEXT: s_cmp_lg_u32 s15, 0 5907; GFX8-NEXT: s_cselect_b64 s[4:5], s[6:7], 0 5908; GFX8-NEXT: s_or_b64 s[0:1], s[12:13], s[0:1] 5909; GFX8-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] 5910; GFX8-NEXT: ; return to shader part epilog 5911; 5912; GFX9-LABEL: s_fshl_i128: 5913; GFX9: ; %bb.0: 5914; GFX9-NEXT: s_and_b32 s9, s8, 0x7f 5915; GFX9-NEXT: s_sub_i32 s11, s9, 64 5916; GFX9-NEXT: s_sub_i32 s14, 64, s9 5917; GFX9-NEXT: s_cmp_lt_u32 s9, 64 5918; GFX9-NEXT: s_cselect_b32 s18, 1, 0 5919; GFX9-NEXT: s_cmp_eq_u32 s9, 0 5920; GFX9-NEXT: s_cselect_b32 s9, 1, 0 5921; GFX9-NEXT: s_lshr_b64 s[14:15], s[0:1], s14 5922; GFX9-NEXT: s_lshl_b64 s[16:17], s[2:3], s8 5923; GFX9-NEXT: s_lshl_b64 s[12:13], s[0:1], s8 5924; GFX9-NEXT: s_or_b64 s[14:15], s[14:15], s[16:17] 5925; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], s11 5926; GFX9-NEXT: s_cmp_lg_u32 s18, 0 5927; GFX9-NEXT: s_cselect_b64 s[12:13], s[12:13], 0 5928; GFX9-NEXT: s_cselect_b64 s[0:1], s[14:15], s[0:1] 5929; GFX9-NEXT: s_cmp_lg_u32 s9, 0 5930; GFX9-NEXT: s_mov_b32 s10, 0 5931; GFX9-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] 5932; GFX9-NEXT: s_lshr_b64 s[0:1], s[4:5], 1 5933; GFX9-NEXT: s_lshl_b32 s11, s6, 31 5934; GFX9-NEXT: s_lshr_b64 s[4:5], s[6:7], 1 5935; GFX9-NEXT: s_andn2_b32 s6, 0x7f, s8 5936; GFX9-NEXT: s_or_b64 s[0:1], s[0:1], s[10:11] 5937; GFX9-NEXT: s_not_b32 s9, s8 5938; GFX9-NEXT: s_sub_i32 s14, s6, 64 5939; GFX9-NEXT: s_sub_i32 s10, 64, s6 5940; GFX9-NEXT: s_cmp_lt_u32 s6, 64 5941; GFX9-NEXT: s_cselect_b32 s15, 1, 0 5942; GFX9-NEXT: s_cmp_eq_u32 s6, 0 5943; GFX9-NEXT: s_cselect_b32 s16, 1, 0 5944; GFX9-NEXT: s_lshr_b64 s[6:7], s[4:5], s9 5945; GFX9-NEXT: s_lshr_b64 s[8:9], s[0:1], s9 5946; GFX9-NEXT: s_lshl_b64 s[10:11], s[4:5], s10 5947; GFX9-NEXT: s_or_b64 s[8:9], s[8:9], s[10:11] 5948; GFX9-NEXT: s_lshr_b64 s[4:5], s[4:5], s14 5949; GFX9-NEXT: s_cmp_lg_u32 s15, 0 5950; GFX9-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] 5951; GFX9-NEXT: s_cmp_lg_u32 s16, 0 5952; GFX9-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5] 5953; GFX9-NEXT: s_cmp_lg_u32 s15, 0 5954; GFX9-NEXT: s_cselect_b64 s[4:5], s[6:7], 0 5955; GFX9-NEXT: s_or_b64 s[0:1], s[12:13], s[0:1] 5956; GFX9-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] 5957; GFX9-NEXT: ; return to shader part epilog 5958; 5959; GFX10-LABEL: s_fshl_i128: 5960; GFX10: ; %bb.0: 5961; GFX10-NEXT: s_and_b32 s9, s8, 0x7f 5962; GFX10-NEXT: s_mov_b32 s10, 0 5963; GFX10-NEXT: s_sub_i32 s11, s9, 64 5964; GFX10-NEXT: s_sub_i32 s12, 64, s9 5965; GFX10-NEXT: s_cmp_lt_u32 s9, 64 5966; GFX10-NEXT: s_cselect_b32 s18, 1, 0 5967; GFX10-NEXT: s_cmp_eq_u32 s9, 0 5968; GFX10-NEXT: s_cselect_b32 s9, 1, 0 5969; GFX10-NEXT: s_lshr_b64 s[12:13], s[0:1], s12 5970; GFX10-NEXT: s_lshl_b64 s[14:15], s[2:3], s8 5971; GFX10-NEXT: s_lshl_b64 s[16:17], s[0:1], s8 5972; GFX10-NEXT: s_or_b64 s[12:13], s[12:13], s[14:15] 5973; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s11 5974; GFX10-NEXT: s_cmp_lg_u32 s18, 0 5975; GFX10-NEXT: s_cselect_b64 s[14:15], s[16:17], 0 5976; GFX10-NEXT: s_cselect_b64 s[0:1], s[12:13], s[0:1] 5977; GFX10-NEXT: s_cmp_lg_u32 s9, 0 5978; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] 5979; GFX10-NEXT: s_lshr_b64 s[0:1], s[4:5], 1 5980; GFX10-NEXT: s_lshl_b32 s11, s6, 31 5981; GFX10-NEXT: s_lshr_b64 s[4:5], s[6:7], 1 5982; GFX10-NEXT: s_andn2_b32 s6, 0x7f, s8 5983; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[10:11] 5984; GFX10-NEXT: s_not_b32 s10, s8 5985; GFX10-NEXT: s_sub_i32 s12, s6, 64 5986; GFX10-NEXT: s_sub_i32 s8, 64, s6 5987; GFX10-NEXT: s_cmp_lt_u32 s6, 64 5988; GFX10-NEXT: s_cselect_b32 s13, 1, 0 5989; GFX10-NEXT: s_cmp_eq_u32 s6, 0 5990; GFX10-NEXT: s_cselect_b32 s16, 1, 0 5991; GFX10-NEXT: s_lshr_b64 s[6:7], s[0:1], s10 5992; GFX10-NEXT: s_lshl_b64 s[8:9], s[4:5], s8 5993; GFX10-NEXT: s_lshr_b64 s[10:11], s[4:5], s10 5994; GFX10-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] 5995; GFX10-NEXT: s_lshr_b64 s[4:5], s[4:5], s12 5996; GFX10-NEXT: s_cmp_lg_u32 s13, 0 5997; GFX10-NEXT: s_cselect_b64 s[4:5], s[6:7], s[4:5] 5998; GFX10-NEXT: s_cmp_lg_u32 s16, 0 5999; GFX10-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5] 6000; GFX10-NEXT: s_cmp_lg_u32 s13, 0 6001; GFX10-NEXT: s_cselect_b64 s[4:5], s[10:11], 0 6002; GFX10-NEXT: s_or_b64 s[0:1], s[14:15], s[0:1] 6003; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] 6004; GFX10-NEXT: ; return to shader part epilog 6005; 6006; GFX11-LABEL: s_fshl_i128: 6007; GFX11: ; %bb.0: 6008; GFX11-NEXT: s_and_b32 s9, s8, 0x7f 6009; GFX11-NEXT: s_mov_b32 s10, 0 6010; GFX11-NEXT: s_sub_i32 s11, s9, 64 6011; GFX11-NEXT: s_sub_i32 s12, 64, s9 6012; GFX11-NEXT: s_cmp_lt_u32 s9, 64 6013; GFX11-NEXT: s_cselect_b32 s18, 1, 0 6014; GFX11-NEXT: s_cmp_eq_u32 s9, 0 6015; GFX11-NEXT: s_cselect_b32 s9, 1, 0 6016; GFX11-NEXT: s_lshr_b64 s[12:13], s[0:1], s12 6017; GFX11-NEXT: s_lshl_b64 s[14:15], s[2:3], s8 6018; GFX11-NEXT: s_lshl_b64 s[16:17], s[0:1], s8 6019; GFX11-NEXT: s_or_b64 s[12:13], s[12:13], s[14:15] 6020; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], s11 6021; GFX11-NEXT: s_cmp_lg_u32 s18, 0 6022; GFX11-NEXT: s_cselect_b64 s[14:15], s[16:17], 0 6023; GFX11-NEXT: s_cselect_b64 s[0:1], s[12:13], s[0:1] 6024; GFX11-NEXT: s_cmp_lg_u32 s9, 0 6025; GFX11-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] 6026; GFX11-NEXT: s_lshr_b64 s[0:1], s[4:5], 1 6027; GFX11-NEXT: s_lshl_b32 s11, s6, 31 6028; GFX11-NEXT: s_lshr_b64 s[4:5], s[6:7], 1 6029; GFX11-NEXT: s_and_not1_b32 s6, 0x7f, s8 6030; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[10:11] 6031; GFX11-NEXT: s_not_b32 s10, s8 6032; GFX11-NEXT: s_sub_i32 s12, s6, 64 6033; GFX11-NEXT: s_sub_i32 s8, 64, s6 6034; GFX11-NEXT: s_cmp_lt_u32 s6, 64 6035; GFX11-NEXT: s_cselect_b32 s13, 1, 0 6036; GFX11-NEXT: s_cmp_eq_u32 s6, 0 6037; GFX11-NEXT: s_cselect_b32 s16, 1, 0 6038; GFX11-NEXT: s_lshr_b64 s[6:7], s[0:1], s10 6039; GFX11-NEXT: s_lshl_b64 s[8:9], s[4:5], s8 6040; GFX11-NEXT: s_lshr_b64 s[10:11], s[4:5], s10 6041; GFX11-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] 6042; GFX11-NEXT: s_lshr_b64 s[4:5], s[4:5], s12 6043; GFX11-NEXT: s_cmp_lg_u32 s13, 0 6044; GFX11-NEXT: s_cselect_b64 s[4:5], s[6:7], s[4:5] 6045; GFX11-NEXT: s_cmp_lg_u32 s16, 0 6046; GFX11-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5] 6047; GFX11-NEXT: s_cmp_lg_u32 s13, 0 6048; GFX11-NEXT: s_cselect_b64 s[4:5], s[10:11], 0 6049; GFX11-NEXT: s_or_b64 s[0:1], s[14:15], s[0:1] 6050; GFX11-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] 6051; GFX11-NEXT: ; return to shader part epilog 6052 %result = call i128 @llvm.fshl.i128(i128 %lhs, i128 %rhs, i128 %amt) 6053 ret i128 %result 6054} 6055 6056define i128 @v_fshl_i128(i128 %lhs, i128 %rhs, i128 %amt) { 6057; GFX6-LABEL: v_fshl_i128: 6058; GFX6: ; %bb.0: 6059; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6060; GFX6-NEXT: v_and_b32_e32 v15, 0x7f, v8 6061; GFX6-NEXT: v_sub_i32_e32 v9, vcc, 64, v15 6062; GFX6-NEXT: v_add_i32_e32 v17, vcc, 0xffffffc0, v15 6063; GFX6-NEXT: v_lshr_b64 v[9:10], v[0:1], v9 6064; GFX6-NEXT: v_lshl_b64 v[11:12], v[2:3], v15 6065; GFX6-NEXT: v_lshl_b64 v[13:14], v[0:1], v15 6066; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], v17 6067; GFX6-NEXT: v_or_b32_e32 v9, v9, v11 6068; GFX6-NEXT: v_or_b32_e32 v10, v10, v12 6069; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v15 6070; GFX6-NEXT: v_cndmask_b32_e32 v11, 0, v13, vcc 6071; GFX6-NEXT: v_cndmask_b32_e32 v12, 0, v14, vcc 6072; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc 6073; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v10, vcc 6074; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v15 6075; GFX6-NEXT: v_cndmask_b32_e32 v10, v0, v2, vcc 6076; GFX6-NEXT: v_cndmask_b32_e32 v13, v1, v3, vcc 6077; GFX6-NEXT: v_lshr_b64 v[0:1], v[4:5], 1 6078; GFX6-NEXT: v_lshlrev_b32_e32 v2, 31, v6 6079; GFX6-NEXT: v_not_b32_e32 v4, v8 6080; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 6081; GFX6-NEXT: v_lshr_b64 v[2:3], v[6:7], 1 6082; GFX6-NEXT: v_and_b32_e32 v14, 0x7f, v4 6083; GFX6-NEXT: v_not_b32_e32 v16, 63 6084; GFX6-NEXT: v_sub_i32_e32 v6, vcc, 64, v14 6085; GFX6-NEXT: v_add_i32_e32 v15, vcc, v14, v16 6086; GFX6-NEXT: v_lshr_b64 v[4:5], v[0:1], v14 6087; GFX6-NEXT: v_lshl_b64 v[6:7], v[2:3], v6 6088; GFX6-NEXT: v_lshr_b64 v[8:9], v[2:3], v14 6089; GFX6-NEXT: v_lshr_b64 v[2:3], v[2:3], v15 6090; GFX6-NEXT: v_or_b32_e32 v4, v4, v6 6091; GFX6-NEXT: v_or_b32_e32 v5, v5, v7 6092; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v14 6093; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 6094; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc 6095; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v14 6096; GFX6-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[4:5] 6097; GFX6-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[4:5] 6098; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc 6099; GFX6-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc 6100; GFX6-NEXT: v_or_b32_e32 v0, v11, v0 6101; GFX6-NEXT: v_or_b32_e32 v1, v12, v1 6102; GFX6-NEXT: v_or_b32_e32 v2, v10, v2 6103; GFX6-NEXT: v_or_b32_e32 v3, v13, v3 6104; GFX6-NEXT: s_setpc_b64 s[30:31] 6105; 6106; GFX8-LABEL: v_fshl_i128: 6107; GFX8: ; %bb.0: 6108; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6109; GFX8-NEXT: v_and_b32_e32 v15, 0x7f, v8 6110; GFX8-NEXT: v_sub_u32_e32 v9, vcc, 64, v15 6111; GFX8-NEXT: v_add_u32_e32 v17, vcc, 0xffffffc0, v15 6112; GFX8-NEXT: v_lshrrev_b64 v[9:10], v9, v[0:1] 6113; GFX8-NEXT: v_lshlrev_b64 v[11:12], v15, v[2:3] 6114; GFX8-NEXT: v_lshlrev_b64 v[13:14], v15, v[0:1] 6115; GFX8-NEXT: v_lshlrev_b64 v[0:1], v17, v[0:1] 6116; GFX8-NEXT: v_or_b32_e32 v9, v9, v11 6117; GFX8-NEXT: v_or_b32_e32 v10, v10, v12 6118; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v15 6119; GFX8-NEXT: v_cndmask_b32_e32 v11, 0, v13, vcc 6120; GFX8-NEXT: v_cndmask_b32_e32 v12, 0, v14, vcc 6121; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc 6122; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v10, vcc 6123; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v15 6124; GFX8-NEXT: v_cndmask_b32_e32 v10, v0, v2, vcc 6125; GFX8-NEXT: v_cndmask_b32_e32 v13, v1, v3, vcc 6126; GFX8-NEXT: v_lshrrev_b64 v[0:1], 1, v[4:5] 6127; GFX8-NEXT: v_lshlrev_b32_e32 v2, 31, v6 6128; GFX8-NEXT: v_not_b32_e32 v4, v8 6129; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 6130; GFX8-NEXT: v_lshrrev_b64 v[2:3], 1, v[6:7] 6131; GFX8-NEXT: v_and_b32_e32 v14, 0x7f, v4 6132; GFX8-NEXT: v_not_b32_e32 v16, 63 6133; GFX8-NEXT: v_sub_u32_e32 v6, vcc, 64, v14 6134; GFX8-NEXT: v_add_u32_e32 v15, vcc, v14, v16 6135; GFX8-NEXT: v_lshrrev_b64 v[4:5], v14, v[0:1] 6136; GFX8-NEXT: v_lshlrev_b64 v[6:7], v6, v[2:3] 6137; GFX8-NEXT: v_lshrrev_b64 v[8:9], v14, v[2:3] 6138; GFX8-NEXT: v_lshrrev_b64 v[2:3], v15, v[2:3] 6139; GFX8-NEXT: v_or_b32_e32 v4, v4, v6 6140; GFX8-NEXT: v_or_b32_e32 v5, v5, v7 6141; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v14 6142; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 6143; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc 6144; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v14 6145; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[4:5] 6146; GFX8-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[4:5] 6147; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc 6148; GFX8-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc 6149; GFX8-NEXT: v_or_b32_e32 v0, v11, v0 6150; GFX8-NEXT: v_or_b32_e32 v1, v12, v1 6151; GFX8-NEXT: v_or_b32_e32 v2, v10, v2 6152; GFX8-NEXT: v_or_b32_e32 v3, v13, v3 6153; GFX8-NEXT: s_setpc_b64 s[30:31] 6154; 6155; GFX9-LABEL: v_fshl_i128: 6156; GFX9: ; %bb.0: 6157; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6158; GFX9-NEXT: v_and_b32_e32 v15, 0x7f, v8 6159; GFX9-NEXT: v_sub_u32_e32 v9, 64, v15 6160; GFX9-NEXT: v_add_u32_e32 v16, 0xffffffc0, v15 6161; GFX9-NEXT: v_lshrrev_b64 v[9:10], v9, v[0:1] 6162; GFX9-NEXT: v_lshlrev_b64 v[11:12], v15, v[2:3] 6163; GFX9-NEXT: v_lshlrev_b64 v[13:14], v15, v[0:1] 6164; GFX9-NEXT: v_lshlrev_b64 v[0:1], v16, v[0:1] 6165; GFX9-NEXT: v_or_b32_e32 v9, v9, v11 6166; GFX9-NEXT: v_or_b32_e32 v10, v10, v12 6167; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v15 6168; GFX9-NEXT: v_cndmask_b32_e32 v11, 0, v13, vcc 6169; GFX9-NEXT: v_cndmask_b32_e32 v12, 0, v14, vcc 6170; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v9, vcc 6171; GFX9-NEXT: v_cndmask_b32_e32 v9, v1, v10, vcc 6172; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v15 6173; GFX9-NEXT: v_cndmask_b32_e32 v10, v0, v2, vcc 6174; GFX9-NEXT: v_lshrrev_b64 v[0:1], 1, v[4:5] 6175; GFX9-NEXT: v_not_b32_e32 v4, v8 6176; GFX9-NEXT: v_cndmask_b32_e32 v13, v9, v3, vcc 6177; GFX9-NEXT: v_lshrrev_b64 v[2:3], 1, v[6:7] 6178; GFX9-NEXT: v_and_b32_e32 v14, 0x7f, v4 6179; GFX9-NEXT: v_lshl_or_b32 v1, v6, 31, v1 6180; GFX9-NEXT: v_sub_u32_e32 v6, 64, v14 6181; GFX9-NEXT: v_add_u32_e32 v15, 0xffffffc0, v14 6182; GFX9-NEXT: v_lshrrev_b64 v[4:5], v14, v[0:1] 6183; GFX9-NEXT: v_lshlrev_b64 v[6:7], v6, v[2:3] 6184; GFX9-NEXT: v_lshrrev_b64 v[8:9], v14, v[2:3] 6185; GFX9-NEXT: v_lshrrev_b64 v[2:3], v15, v[2:3] 6186; GFX9-NEXT: v_or_b32_e32 v4, v4, v6 6187; GFX9-NEXT: v_or_b32_e32 v5, v5, v7 6188; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v14 6189; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 6190; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc 6191; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v14 6192; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[4:5] 6193; GFX9-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[4:5] 6194; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc 6195; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc 6196; GFX9-NEXT: v_or_b32_e32 v0, v11, v0 6197; GFX9-NEXT: v_or_b32_e32 v1, v12, v1 6198; GFX9-NEXT: v_or_b32_e32 v2, v10, v2 6199; GFX9-NEXT: v_or_b32_e32 v3, v13, v3 6200; GFX9-NEXT: s_setpc_b64 s[30:31] 6201; 6202; GFX10-LABEL: v_fshl_i128: 6203; GFX10: ; %bb.0: 6204; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6205; GFX10-NEXT: v_and_b32_e32 v18, 0x7f, v8 6206; GFX10-NEXT: v_not_b32_e32 v10, v8 6207; GFX10-NEXT: v_lshrrev_b64 v[4:5], 1, v[4:5] 6208; GFX10-NEXT: v_lshrrev_b64 v[12:13], 1, v[6:7] 6209; GFX10-NEXT: v_sub_nc_u32_e32 v11, 64, v18 6210; GFX10-NEXT: v_and_b32_e32 v19, 0x7f, v10 6211; GFX10-NEXT: v_lshlrev_b64 v[8:9], v18, v[2:3] 6212; GFX10-NEXT: v_lshl_or_b32 v5, v6, 31, v5 6213; GFX10-NEXT: v_add_nc_u32_e32 v20, 0xffffffc0, v18 6214; GFX10-NEXT: v_lshrrev_b64 v[10:11], v11, v[0:1] 6215; GFX10-NEXT: v_sub_nc_u32_e32 v16, 64, v19 6216; GFX10-NEXT: v_lshlrev_b64 v[6:7], v18, v[0:1] 6217; GFX10-NEXT: v_lshrrev_b64 v[14:15], v19, v[4:5] 6218; GFX10-NEXT: v_lshlrev_b64 v[0:1], v20, v[0:1] 6219; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v18 6220; GFX10-NEXT: v_or_b32_e32 v10, v10, v8 6221; GFX10-NEXT: v_add_nc_u32_e32 v8, 0xffffffc0, v19 6222; GFX10-NEXT: v_lshlrev_b64 v[16:17], v16, v[12:13] 6223; GFX10-NEXT: v_or_b32_e32 v11, v11, v9 6224; GFX10-NEXT: v_cmp_gt_u32_e64 s4, 64, v19 6225; GFX10-NEXT: v_cndmask_b32_e32 v10, v0, v10, vcc_lo 6226; GFX10-NEXT: v_lshrrev_b64 v[8:9], v8, v[12:13] 6227; GFX10-NEXT: v_cmp_eq_u32_e64 s5, 0, v19 6228; GFX10-NEXT: v_or_b32_e32 v14, v14, v16 6229; GFX10-NEXT: v_or_b32_e32 v15, v15, v17 6230; GFX10-NEXT: v_cndmask_b32_e32 v11, v1, v11, vcc_lo 6231; GFX10-NEXT: v_lshrrev_b64 v[0:1], v19, v[12:13] 6232; GFX10-NEXT: v_cmp_eq_u32_e64 s6, 0, v18 6233; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, v14, s4 6234; GFX10-NEXT: v_cndmask_b32_e64 v9, v9, v15, s4 6235; GFX10-NEXT: v_cndmask_b32_e32 v6, 0, v6, vcc_lo 6236; GFX10-NEXT: v_cndmask_b32_e32 v7, 0, v7, vcc_lo 6237; GFX10-NEXT: v_cndmask_b32_e64 v2, v10, v2, s6 6238; GFX10-NEXT: v_cndmask_b32_e64 v3, v11, v3, s6 6239; GFX10-NEXT: v_cndmask_b32_e64 v4, v8, v4, s5 6240; GFX10-NEXT: v_cndmask_b32_e64 v5, v9, v5, s5 6241; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, v0, s4 6242; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, v1, s4 6243; GFX10-NEXT: v_or_b32_e32 v0, v6, v4 6244; GFX10-NEXT: v_or_b32_e32 v1, v7, v5 6245; GFX10-NEXT: v_or_b32_e32 v2, v2, v8 6246; GFX10-NEXT: v_or_b32_e32 v3, v3, v9 6247; GFX10-NEXT: s_setpc_b64 s[30:31] 6248; 6249; GFX11-LABEL: v_fshl_i128: 6250; GFX11: ; %bb.0: 6251; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6252; GFX11-NEXT: v_and_b32_e32 v18, 0x7f, v8 6253; GFX11-NEXT: v_not_b32_e32 v10, v8 6254; GFX11-NEXT: v_lshrrev_b64 v[4:5], 1, v[4:5] 6255; GFX11-NEXT: v_lshrrev_b64 v[12:13], 1, v[6:7] 6256; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 6257; GFX11-NEXT: v_sub_nc_u32_e32 v11, 64, v18 6258; GFX11-NEXT: v_and_b32_e32 v19, 0x7f, v10 6259; GFX11-NEXT: v_lshlrev_b64 v[8:9], v18, v[2:3] 6260; GFX11-NEXT: v_lshl_or_b32 v5, v6, 31, v5 6261; GFX11-NEXT: v_lshlrev_b64 v[6:7], v18, v[0:1] 6262; GFX11-NEXT: v_lshrrev_b64 v[10:11], v11, v[0:1] 6263; GFX11-NEXT: v_sub_nc_u32_e32 v16, 64, v19 6264; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v18 6265; GFX11-NEXT: v_add_nc_u32_e32 v20, 0xffffffc0, v18 6266; GFX11-NEXT: v_lshrrev_b64 v[14:15], v19, v[4:5] 6267; GFX11-NEXT: v_cmp_gt_u32_e64 s0, 64, v19 6268; GFX11-NEXT: v_or_b32_e32 v10, v10, v8 6269; GFX11-NEXT: v_cndmask_b32_e32 v7, 0, v7, vcc_lo 6270; GFX11-NEXT: v_add_nc_u32_e32 v8, 0xffffffc0, v19 6271; GFX11-NEXT: v_lshlrev_b64 v[16:17], v16, v[12:13] 6272; GFX11-NEXT: v_lshlrev_b64 v[0:1], v20, v[0:1] 6273; GFX11-NEXT: v_or_b32_e32 v11, v11, v9 6274; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 0, v19 6275; GFX11-NEXT: v_lshrrev_b64 v[8:9], v8, v[12:13] 6276; GFX11-NEXT: v_cndmask_b32_e32 v6, 0, v6, vcc_lo 6277; GFX11-NEXT: v_or_b32_e32 v14, v14, v16 6278; GFX11-NEXT: v_or_b32_e32 v15, v15, v17 6279; GFX11-NEXT: v_dual_cndmask_b32 v10, v0, v10 :: v_dual_cndmask_b32 v11, v1, v11 6280; GFX11-NEXT: v_lshrrev_b64 v[0:1], v19, v[12:13] 6281; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_3) 6282; GFX11-NEXT: v_cndmask_b32_e64 v8, v8, v14, s0 6283; GFX11-NEXT: v_cmp_eq_u32_e64 s2, 0, v18 6284; GFX11-NEXT: v_cndmask_b32_e64 v9, v9, v15, s0 6285; GFX11-NEXT: v_cndmask_b32_e64 v4, v8, v4, s1 6286; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_4) 6287; GFX11-NEXT: v_cndmask_b32_e64 v2, v10, v2, s2 6288; GFX11-NEXT: v_cndmask_b32_e64 v3, v11, v3, s2 6289; GFX11-NEXT: v_cndmask_b32_e64 v5, v9, v5, s1 6290; GFX11-NEXT: v_cndmask_b32_e64 v8, 0, v0, s0 6291; GFX11-NEXT: v_cndmask_b32_e64 v9, 0, v1, s0 6292; GFX11-NEXT: v_or_b32_e32 v0, v6, v4 6293; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 6294; GFX11-NEXT: v_or_b32_e32 v1, v7, v5 6295; GFX11-NEXT: v_or_b32_e32 v2, v2, v8 6296; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) 6297; GFX11-NEXT: v_or_b32_e32 v3, v3, v9 6298; GFX11-NEXT: s_setpc_b64 s[30:31] 6299 %result = call i128 @llvm.fshl.i128(i128 %lhs, i128 %rhs, i128 %amt) 6300 ret i128 %result 6301} 6302 6303define amdgpu_ps <4 x float> @v_fshl_i128_ssv(i128 inreg %lhs, i128 inreg %rhs, i128 %amt) { 6304; GFX6-LABEL: v_fshl_i128_ssv: 6305; GFX6: ; %bb.0: 6306; GFX6-NEXT: v_and_b32_e32 v7, 0x7f, v0 6307; GFX6-NEXT: v_sub_i32_e32 v1, vcc, 64, v7 6308; GFX6-NEXT: v_lshr_b64 v[1:2], s[0:1], v1 6309; GFX6-NEXT: v_lshl_b64 v[3:4], s[2:3], v7 6310; GFX6-NEXT: v_add_i32_e32 v9, vcc, 0xffffffc0, v7 6311; GFX6-NEXT: v_lshl_b64 v[5:6], s[0:1], v7 6312; GFX6-NEXT: v_or_b32_e32 v3, v1, v3 6313; GFX6-NEXT: v_or_b32_e32 v4, v2, v4 6314; GFX6-NEXT: v_lshl_b64 v[1:2], s[0:1], v9 6315; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v7 6316; GFX6-NEXT: v_not_b32_e32 v0, v0 6317; GFX6-NEXT: s_mov_b32 s8, 0 6318; GFX6-NEXT: v_cndmask_b32_e32 v9, 0, v5, vcc 6319; GFX6-NEXT: v_cndmask_b32_e32 v6, 0, v6, vcc 6320; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 6321; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 6322; GFX6-NEXT: v_mov_b32_e32 v3, s2 6323; GFX6-NEXT: v_mov_b32_e32 v4, s3 6324; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 6325; GFX6-NEXT: s_lshr_b64 s[0:1], s[4:5], 1 6326; GFX6-NEXT: s_lshl_b32 s9, s6, 31 6327; GFX6-NEXT: v_and_b32_e32 v11, 0x7f, v0 6328; GFX6-NEXT: v_cndmask_b32_e32 v7, v1, v3, vcc 6329; GFX6-NEXT: v_cndmask_b32_e32 v10, v2, v4, vcc 6330; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[8:9] 6331; GFX6-NEXT: s_lshr_b64 s[2:3], s[6:7], 1 6332; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 64, v11 6333; GFX6-NEXT: v_not_b32_e32 v8, 63 6334; GFX6-NEXT: v_lshr_b64 v[0:1], s[0:1], v11 6335; GFX6-NEXT: v_lshl_b64 v[2:3], s[2:3], v2 6336; GFX6-NEXT: v_add_i32_e32 v8, vcc, v11, v8 6337; GFX6-NEXT: v_or_b32_e32 v2, v0, v2 6338; GFX6-NEXT: v_or_b32_e32 v3, v1, v3 6339; GFX6-NEXT: v_lshr_b64 v[0:1], s[2:3], v8 6340; GFX6-NEXT: v_lshr_b64 v[4:5], s[2:3], v11 6341; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v11 6342; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 6343; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 6344; GFX6-NEXT: v_mov_b32_e32 v2, s0 6345; GFX6-NEXT: v_mov_b32_e32 v3, s1 6346; GFX6-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v11 6347; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] 6348; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] 6349; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc 6350; GFX6-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc 6351; GFX6-NEXT: v_or_b32_e32 v0, v9, v0 6352; GFX6-NEXT: v_or_b32_e32 v1, v6, v1 6353; GFX6-NEXT: v_or_b32_e32 v2, v7, v2 6354; GFX6-NEXT: v_or_b32_e32 v3, v10, v3 6355; GFX6-NEXT: ; return to shader part epilog 6356; 6357; GFX8-LABEL: v_fshl_i128_ssv: 6358; GFX8: ; %bb.0: 6359; GFX8-NEXT: v_and_b32_e32 v7, 0x7f, v0 6360; GFX8-NEXT: v_sub_u32_e32 v1, vcc, 64, v7 6361; GFX8-NEXT: v_lshrrev_b64 v[1:2], v1, s[0:1] 6362; GFX8-NEXT: v_lshlrev_b64 v[3:4], v7, s[2:3] 6363; GFX8-NEXT: v_add_u32_e32 v9, vcc, 0xffffffc0, v7 6364; GFX8-NEXT: v_lshlrev_b64 v[5:6], v7, s[0:1] 6365; GFX8-NEXT: v_or_b32_e32 v3, v1, v3 6366; GFX8-NEXT: v_or_b32_e32 v4, v2, v4 6367; GFX8-NEXT: v_lshlrev_b64 v[1:2], v9, s[0:1] 6368; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v7 6369; GFX8-NEXT: v_not_b32_e32 v0, v0 6370; GFX8-NEXT: s_mov_b32 s8, 0 6371; GFX8-NEXT: v_cndmask_b32_e32 v9, 0, v5, vcc 6372; GFX8-NEXT: v_cndmask_b32_e32 v6, 0, v6, vcc 6373; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 6374; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 6375; GFX8-NEXT: v_mov_b32_e32 v3, s2 6376; GFX8-NEXT: v_mov_b32_e32 v4, s3 6377; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 6378; GFX8-NEXT: s_lshr_b64 s[0:1], s[4:5], 1 6379; GFX8-NEXT: s_lshl_b32 s9, s6, 31 6380; GFX8-NEXT: v_and_b32_e32 v11, 0x7f, v0 6381; GFX8-NEXT: v_cndmask_b32_e32 v7, v1, v3, vcc 6382; GFX8-NEXT: v_cndmask_b32_e32 v10, v2, v4, vcc 6383; GFX8-NEXT: s_or_b64 s[0:1], s[0:1], s[8:9] 6384; GFX8-NEXT: s_lshr_b64 s[2:3], s[6:7], 1 6385; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 64, v11 6386; GFX8-NEXT: v_not_b32_e32 v8, 63 6387; GFX8-NEXT: v_lshrrev_b64 v[0:1], v11, s[0:1] 6388; GFX8-NEXT: v_lshlrev_b64 v[2:3], v2, s[2:3] 6389; GFX8-NEXT: v_add_u32_e32 v8, vcc, v11, v8 6390; GFX8-NEXT: v_or_b32_e32 v2, v0, v2 6391; GFX8-NEXT: v_or_b32_e32 v3, v1, v3 6392; GFX8-NEXT: v_lshrrev_b64 v[0:1], v8, s[2:3] 6393; GFX8-NEXT: v_lshrrev_b64 v[4:5], v11, s[2:3] 6394; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v11 6395; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 6396; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 6397; GFX8-NEXT: v_mov_b32_e32 v2, s0 6398; GFX8-NEXT: v_mov_b32_e32 v3, s1 6399; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v11 6400; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] 6401; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] 6402; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc 6403; GFX8-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc 6404; GFX8-NEXT: v_or_b32_e32 v0, v9, v0 6405; GFX8-NEXT: v_or_b32_e32 v1, v6, v1 6406; GFX8-NEXT: v_or_b32_e32 v2, v7, v2 6407; GFX8-NEXT: v_or_b32_e32 v3, v10, v3 6408; GFX8-NEXT: ; return to shader part epilog 6409; 6410; GFX9-LABEL: v_fshl_i128_ssv: 6411; GFX9: ; %bb.0: 6412; GFX9-NEXT: v_and_b32_e32 v7, 0x7f, v0 6413; GFX9-NEXT: v_sub_u32_e32 v1, 64, v7 6414; GFX9-NEXT: v_lshrrev_b64 v[1:2], v1, s[0:1] 6415; GFX9-NEXT: v_lshlrev_b64 v[3:4], v7, s[2:3] 6416; GFX9-NEXT: v_add_u32_e32 v8, 0xffffffc0, v7 6417; GFX9-NEXT: v_lshlrev_b64 v[5:6], v7, s[0:1] 6418; GFX9-NEXT: v_or_b32_e32 v3, v1, v3 6419; GFX9-NEXT: v_or_b32_e32 v4, v2, v4 6420; GFX9-NEXT: v_lshlrev_b64 v[1:2], v8, s[0:1] 6421; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v7 6422; GFX9-NEXT: v_not_b32_e32 v0, v0 6423; GFX9-NEXT: s_mov_b32 s8, 0 6424; GFX9-NEXT: v_cndmask_b32_e32 v8, 0, v5, vcc 6425; GFX9-NEXT: v_cndmask_b32_e32 v6, 0, v6, vcc 6426; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 6427; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 6428; GFX9-NEXT: v_mov_b32_e32 v4, s3 6429; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 6430; GFX9-NEXT: s_lshr_b64 s[0:1], s[4:5], 1 6431; GFX9-NEXT: s_lshl_b32 s9, s6, 31 6432; GFX9-NEXT: v_and_b32_e32 v10, 0x7f, v0 6433; GFX9-NEXT: v_mov_b32_e32 v3, s2 6434; GFX9-NEXT: v_cndmask_b32_e32 v9, v2, v4, vcc 6435; GFX9-NEXT: s_or_b64 s[0:1], s[0:1], s[8:9] 6436; GFX9-NEXT: s_lshr_b64 s[2:3], s[6:7], 1 6437; GFX9-NEXT: v_sub_u32_e32 v2, 64, v10 6438; GFX9-NEXT: v_cndmask_b32_e32 v7, v1, v3, vcc 6439; GFX9-NEXT: v_lshrrev_b64 v[0:1], v10, s[0:1] 6440; GFX9-NEXT: v_lshlrev_b64 v[2:3], v2, s[2:3] 6441; GFX9-NEXT: v_add_u32_e32 v11, 0xffffffc0, v10 6442; GFX9-NEXT: v_or_b32_e32 v2, v0, v2 6443; GFX9-NEXT: v_or_b32_e32 v3, v1, v3 6444; GFX9-NEXT: v_lshrrev_b64 v[0:1], v11, s[2:3] 6445; GFX9-NEXT: v_lshrrev_b64 v[4:5], v10, s[2:3] 6446; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v10 6447; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 6448; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 6449; GFX9-NEXT: v_mov_b32_e32 v2, s0 6450; GFX9-NEXT: v_mov_b32_e32 v3, s1 6451; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v10 6452; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] 6453; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] 6454; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc 6455; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc 6456; GFX9-NEXT: v_or_b32_e32 v0, v8, v0 6457; GFX9-NEXT: v_or_b32_e32 v1, v6, v1 6458; GFX9-NEXT: v_or_b32_e32 v2, v7, v2 6459; GFX9-NEXT: v_or_b32_e32 v3, v9, v3 6460; GFX9-NEXT: ; return to shader part epilog 6461; 6462; GFX10-LABEL: v_fshl_i128_ssv: 6463; GFX10: ; %bb.0: 6464; GFX10-NEXT: v_and_b32_e32 v12, 0x7f, v0 6465; GFX10-NEXT: v_not_b32_e32 v2, v0 6466; GFX10-NEXT: s_mov_b32 s8, 0 6467; GFX10-NEXT: s_lshr_b64 s[4:5], s[4:5], 1 6468; GFX10-NEXT: s_lshl_b32 s9, s6, 31 6469; GFX10-NEXT: v_sub_nc_u32_e32 v3, 64, v12 6470; GFX10-NEXT: v_and_b32_e32 v13, 0x7f, v2 6471; GFX10-NEXT: v_lshlrev_b64 v[0:1], v12, s[2:3] 6472; GFX10-NEXT: s_or_b64 s[8:9], s[4:5], s[8:9] 6473; GFX10-NEXT: s_lshr_b64 s[6:7], s[6:7], 1 6474; GFX10-NEXT: v_lshrrev_b64 v[2:3], v3, s[0:1] 6475; GFX10-NEXT: v_sub_nc_u32_e32 v8, 64, v13 6476; GFX10-NEXT: v_add_nc_u32_e32 v10, 0xffffffc0, v12 6477; GFX10-NEXT: v_lshrrev_b64 v[6:7], v13, s[8:9] 6478; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v12 6479; GFX10-NEXT: v_lshlrev_b64 v[4:5], v12, s[0:1] 6480; GFX10-NEXT: v_or_b32_e32 v2, v2, v0 6481; GFX10-NEXT: v_add_nc_u32_e32 v0, 0xffffffc0, v13 6482; GFX10-NEXT: v_lshlrev_b64 v[8:9], v8, s[6:7] 6483; GFX10-NEXT: v_lshlrev_b64 v[10:11], v10, s[0:1] 6484; GFX10-NEXT: v_or_b32_e32 v3, v3, v1 6485; GFX10-NEXT: v_cmp_gt_u32_e64 s0, 64, v13 6486; GFX10-NEXT: v_lshrrev_b64 v[0:1], v0, s[6:7] 6487; GFX10-NEXT: v_cmp_eq_u32_e64 s1, 0, v13 6488; GFX10-NEXT: v_or_b32_e32 v6, v6, v8 6489; GFX10-NEXT: v_or_b32_e32 v7, v7, v9 6490; GFX10-NEXT: v_cndmask_b32_e32 v8, v10, v2, vcc_lo 6491; GFX10-NEXT: v_cndmask_b32_e32 v10, v11, v3, vcc_lo 6492; GFX10-NEXT: v_lshrrev_b64 v[2:3], v13, s[6:7] 6493; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v6, s0 6494; GFX10-NEXT: v_cmp_eq_u32_e64 s4, 0, v12 6495; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v7, s0 6496; GFX10-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc_lo 6497; GFX10-NEXT: v_cndmask_b32_e32 v5, 0, v5, vcc_lo 6498; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s8, s1 6499; GFX10-NEXT: v_cndmask_b32_e64 v6, v8, s2, s4 6500; GFX10-NEXT: v_cndmask_b32_e64 v7, v10, s3, s4 6501; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s9, s1 6502; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v2, s0 6503; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, v3, s0 6504; GFX10-NEXT: v_or_b32_e32 v0, v4, v0 6505; GFX10-NEXT: v_or_b32_e32 v1, v5, v1 6506; GFX10-NEXT: v_or_b32_e32 v2, v6, v2 6507; GFX10-NEXT: v_or_b32_e32 v3, v7, v3 6508; GFX10-NEXT: ; return to shader part epilog 6509; 6510; GFX11-LABEL: v_fshl_i128_ssv: 6511; GFX11: ; %bb.0: 6512; GFX11-NEXT: v_and_b32_e32 v12, 0x7f, v0 6513; GFX11-NEXT: v_not_b32_e32 v2, v0 6514; GFX11-NEXT: s_mov_b32 s8, 0 6515; GFX11-NEXT: s_lshr_b64 s[4:5], s[4:5], 1 6516; GFX11-NEXT: s_lshl_b32 s9, s6, 31 6517; GFX11-NEXT: v_lshlrev_b64 v[4:5], v12, s[0:1] 6518; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v12 6519; GFX11-NEXT: v_and_b32_e32 v13, 0x7f, v2 6520; GFX11-NEXT: s_or_b64 s[8:9], s[4:5], s[8:9] 6521; GFX11-NEXT: s_lshr_b64 s[6:7], s[6:7], 1 6522; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) 6523; GFX11-NEXT: v_dual_cndmask_b32 v4, 0, v4 :: v_dual_cndmask_b32 v5, 0, v5 6524; GFX11-NEXT: v_sub_nc_u32_e32 v3, 64, v12 6525; GFX11-NEXT: v_lshlrev_b64 v[0:1], v12, s[2:3] 6526; GFX11-NEXT: v_sub_nc_u32_e32 v8, 64, v13 6527; GFX11-NEXT: v_add_nc_u32_e32 v10, 0xffffffc0, v12 6528; GFX11-NEXT: v_lshrrev_b64 v[6:7], v13, s[8:9] 6529; GFX11-NEXT: v_lshrrev_b64 v[2:3], v3, s[0:1] 6530; GFX11-NEXT: v_cmp_eq_u32_e64 s4, 0, v12 6531; GFX11-NEXT: v_lshlrev_b64 v[8:9], v8, s[6:7] 6532; GFX11-NEXT: v_lshlrev_b64 v[10:11], v10, s[0:1] 6533; GFX11-NEXT: v_cmp_gt_u32_e64 s0, 64, v13 6534; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 0, v13 6535; GFX11-NEXT: v_or_b32_e32 v2, v2, v0 6536; GFX11-NEXT: v_add_nc_u32_e32 v0, 0xffffffc0, v13 6537; GFX11-NEXT: v_or_b32_e32 v3, v3, v1 6538; GFX11-NEXT: v_or_b32_e32 v6, v6, v8 6539; GFX11-NEXT: v_or_b32_e32 v7, v7, v9 6540; GFX11-NEXT: v_cndmask_b32_e32 v8, v10, v2, vcc_lo 6541; GFX11-NEXT: v_lshrrev_b64 v[0:1], v0, s[6:7] 6542; GFX11-NEXT: v_cndmask_b32_e32 v10, v11, v3, vcc_lo 6543; GFX11-NEXT: v_lshrrev_b64 v[2:3], v13, s[6:7] 6544; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4) 6545; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v6, s0 6546; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, v7, s0 6547; GFX11-NEXT: v_cndmask_b32_e64 v6, v8, s2, s4 6548; GFX11-NEXT: v_cndmask_b32_e64 v7, v10, s3, s4 6549; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, v2, s0 6550; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s8, s1 6551; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s9, s1 6552; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, v3, s0 6553; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 6554; GFX11-NEXT: v_or_b32_e32 v2, v6, v2 6555; GFX11-NEXT: v_or_b32_e32 v0, v4, v0 6556; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 6557; GFX11-NEXT: v_or_b32_e32 v1, v5, v1 6558; GFX11-NEXT: v_or_b32_e32 v3, v7, v3 6559; GFX11-NEXT: ; return to shader part epilog 6560 %result = call i128 @llvm.fshl.i128(i128 %lhs, i128 %rhs, i128 %amt) 6561 %cast.result = bitcast i128 %result to <4 x float> 6562 ret <4 x float> %cast.result 6563} 6564 6565define amdgpu_ps <4 x float> @v_fshl_i128_svs(i128 inreg %lhs, i128 %rhs, i128 inreg %amt) { 6566; GFX6-LABEL: v_fshl_i128_svs: 6567; GFX6: ; %bb.0: 6568; GFX6-NEXT: s_and_b32 s5, s4, 0x7f 6569; GFX6-NEXT: s_sub_i32 s12, s5, 64 6570; GFX6-NEXT: s_sub_i32 s8, 64, s5 6571; GFX6-NEXT: s_cmp_lt_u32 s5, 64 6572; GFX6-NEXT: s_cselect_b32 s13, 1, 0 6573; GFX6-NEXT: s_cmp_eq_u32 s5, 0 6574; GFX6-NEXT: s_cselect_b32 s5, 1, 0 6575; GFX6-NEXT: s_lshr_b64 s[8:9], s[0:1], s8 6576; GFX6-NEXT: s_lshl_b64 s[10:11], s[2:3], s4 6577; GFX6-NEXT: s_lshl_b64 s[6:7], s[0:1], s4 6578; GFX6-NEXT: s_or_b64 s[8:9], s[8:9], s[10:11] 6579; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], s12 6580; GFX6-NEXT: s_cmp_lg_u32 s13, 0 6581; GFX6-NEXT: s_cselect_b64 s[6:7], s[6:7], 0 6582; GFX6-NEXT: s_cselect_b64 s[0:1], s[8:9], s[0:1] 6583; GFX6-NEXT: s_cmp_lg_u32 s5, 0 6584; GFX6-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] 6585; GFX6-NEXT: v_lshr_b64 v[0:1], v[0:1], 1 6586; GFX6-NEXT: s_andn2_b32 s0, 0x7f, s4 6587; GFX6-NEXT: v_lshlrev_b32_e32 v4, 31, v2 6588; GFX6-NEXT: v_lshr_b64 v[2:3], v[2:3], 1 6589; GFX6-NEXT: s_sub_i32 s1, s0, 64 6590; GFX6-NEXT: s_sub_i32 s4, 64, s0 6591; GFX6-NEXT: v_or_b32_e32 v1, v1, v4 6592; GFX6-NEXT: s_cmp_lt_u32 s0, 64 6593; GFX6-NEXT: s_cselect_b32 s5, 1, 0 6594; GFX6-NEXT: s_cmp_eq_u32 s0, 0 6595; GFX6-NEXT: v_lshr_b64 v[4:5], v[0:1], s0 6596; GFX6-NEXT: v_lshl_b64 v[6:7], v[2:3], s4 6597; GFX6-NEXT: s_cselect_b32 s8, 1, 0 6598; GFX6-NEXT: v_lshr_b64 v[8:9], v[2:3], s0 6599; GFX6-NEXT: v_lshr_b64 v[2:3], v[2:3], s1 6600; GFX6-NEXT: s_and_b32 s0, 1, s5 6601; GFX6-NEXT: v_or_b32_e32 v4, v4, v6 6602; GFX6-NEXT: v_or_b32_e32 v5, v5, v7 6603; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 6604; GFX6-NEXT: s_and_b32 s0, 1, s8 6605; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 6606; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc 6607; GFX6-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 6608; GFX6-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[0:1] 6609; GFX6-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[0:1] 6610; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc 6611; GFX6-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc 6612; GFX6-NEXT: v_or_b32_e32 v0, s6, v0 6613; GFX6-NEXT: v_or_b32_e32 v1, s7, v1 6614; GFX6-NEXT: v_or_b32_e32 v2, s2, v2 6615; GFX6-NEXT: v_or_b32_e32 v3, s3, v3 6616; GFX6-NEXT: ; return to shader part epilog 6617; 6618; GFX8-LABEL: v_fshl_i128_svs: 6619; GFX8: ; %bb.0: 6620; GFX8-NEXT: s_and_b32 s5, s4, 0x7f 6621; GFX8-NEXT: s_sub_i32 s12, s5, 64 6622; GFX8-NEXT: s_sub_i32 s8, 64, s5 6623; GFX8-NEXT: s_cmp_lt_u32 s5, 64 6624; GFX8-NEXT: s_cselect_b32 s13, 1, 0 6625; GFX8-NEXT: s_cmp_eq_u32 s5, 0 6626; GFX8-NEXT: s_cselect_b32 s5, 1, 0 6627; GFX8-NEXT: s_lshr_b64 s[8:9], s[0:1], s8 6628; GFX8-NEXT: s_lshl_b64 s[10:11], s[2:3], s4 6629; GFX8-NEXT: s_lshl_b64 s[6:7], s[0:1], s4 6630; GFX8-NEXT: s_or_b64 s[8:9], s[8:9], s[10:11] 6631; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], s12 6632; GFX8-NEXT: s_cmp_lg_u32 s13, 0 6633; GFX8-NEXT: s_cselect_b64 s[6:7], s[6:7], 0 6634; GFX8-NEXT: s_cselect_b64 s[0:1], s[8:9], s[0:1] 6635; GFX8-NEXT: s_cmp_lg_u32 s5, 0 6636; GFX8-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] 6637; GFX8-NEXT: v_lshrrev_b64 v[0:1], 1, v[0:1] 6638; GFX8-NEXT: s_andn2_b32 s0, 0x7f, s4 6639; GFX8-NEXT: v_lshlrev_b32_e32 v4, 31, v2 6640; GFX8-NEXT: v_lshrrev_b64 v[2:3], 1, v[2:3] 6641; GFX8-NEXT: s_sub_i32 s1, s0, 64 6642; GFX8-NEXT: s_sub_i32 s4, 64, s0 6643; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 6644; GFX8-NEXT: s_cmp_lt_u32 s0, 64 6645; GFX8-NEXT: s_cselect_b32 s5, 1, 0 6646; GFX8-NEXT: s_cmp_eq_u32 s0, 0 6647; GFX8-NEXT: v_lshrrev_b64 v[4:5], s0, v[0:1] 6648; GFX8-NEXT: v_lshlrev_b64 v[6:7], s4, v[2:3] 6649; GFX8-NEXT: s_cselect_b32 s8, 1, 0 6650; GFX8-NEXT: v_lshrrev_b64 v[8:9], s0, v[2:3] 6651; GFX8-NEXT: v_lshrrev_b64 v[2:3], s1, v[2:3] 6652; GFX8-NEXT: s_and_b32 s0, 1, s5 6653; GFX8-NEXT: v_or_b32_e32 v4, v4, v6 6654; GFX8-NEXT: v_or_b32_e32 v5, v5, v7 6655; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 6656; GFX8-NEXT: s_and_b32 s0, 1, s8 6657; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 6658; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc 6659; GFX8-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 6660; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[0:1] 6661; GFX8-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[0:1] 6662; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc 6663; GFX8-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc 6664; GFX8-NEXT: v_or_b32_e32 v0, s6, v0 6665; GFX8-NEXT: v_or_b32_e32 v1, s7, v1 6666; GFX8-NEXT: v_or_b32_e32 v2, s2, v2 6667; GFX8-NEXT: v_or_b32_e32 v3, s3, v3 6668; GFX8-NEXT: ; return to shader part epilog 6669; 6670; GFX9-LABEL: v_fshl_i128_svs: 6671; GFX9: ; %bb.0: 6672; GFX9-NEXT: s_and_b32 s5, s4, 0x7f 6673; GFX9-NEXT: s_sub_i32 s12, s5, 64 6674; GFX9-NEXT: s_sub_i32 s8, 64, s5 6675; GFX9-NEXT: s_cmp_lt_u32 s5, 64 6676; GFX9-NEXT: s_cselect_b32 s13, 1, 0 6677; GFX9-NEXT: s_cmp_eq_u32 s5, 0 6678; GFX9-NEXT: s_cselect_b32 s5, 1, 0 6679; GFX9-NEXT: s_lshr_b64 s[8:9], s[0:1], s8 6680; GFX9-NEXT: s_lshl_b64 s[10:11], s[2:3], s4 6681; GFX9-NEXT: s_lshl_b64 s[6:7], s[0:1], s4 6682; GFX9-NEXT: s_or_b64 s[8:9], s[8:9], s[10:11] 6683; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], s12 6684; GFX9-NEXT: s_cmp_lg_u32 s13, 0 6685; GFX9-NEXT: s_cselect_b64 s[6:7], s[6:7], 0 6686; GFX9-NEXT: s_cselect_b64 s[0:1], s[8:9], s[0:1] 6687; GFX9-NEXT: v_lshrrev_b64 v[0:1], 1, v[0:1] 6688; GFX9-NEXT: s_cmp_lg_u32 s5, 0 6689; GFX9-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] 6690; GFX9-NEXT: s_andn2_b32 s0, 0x7f, s4 6691; GFX9-NEXT: v_lshl_or_b32 v1, v2, 31, v1 6692; GFX9-NEXT: v_lshrrev_b64 v[2:3], 1, v[2:3] 6693; GFX9-NEXT: s_sub_i32 s1, s0, 64 6694; GFX9-NEXT: s_sub_i32 s4, 64, s0 6695; GFX9-NEXT: s_cmp_lt_u32 s0, 64 6696; GFX9-NEXT: s_cselect_b32 s5, 1, 0 6697; GFX9-NEXT: s_cmp_eq_u32 s0, 0 6698; GFX9-NEXT: v_lshrrev_b64 v[4:5], s0, v[0:1] 6699; GFX9-NEXT: v_lshlrev_b64 v[6:7], s4, v[2:3] 6700; GFX9-NEXT: s_cselect_b32 s8, 1, 0 6701; GFX9-NEXT: v_lshrrev_b64 v[8:9], s0, v[2:3] 6702; GFX9-NEXT: v_lshrrev_b64 v[2:3], s1, v[2:3] 6703; GFX9-NEXT: s_and_b32 s0, 1, s5 6704; GFX9-NEXT: v_or_b32_e32 v4, v4, v6 6705; GFX9-NEXT: v_or_b32_e32 v5, v5, v7 6706; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 6707; GFX9-NEXT: s_and_b32 s0, 1, s8 6708; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 6709; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc 6710; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 6711; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[0:1] 6712; GFX9-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[0:1] 6713; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc 6714; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc 6715; GFX9-NEXT: v_or_b32_e32 v0, s6, v0 6716; GFX9-NEXT: v_or_b32_e32 v1, s7, v1 6717; GFX9-NEXT: v_or_b32_e32 v2, s2, v2 6718; GFX9-NEXT: v_or_b32_e32 v3, s3, v3 6719; GFX9-NEXT: ; return to shader part epilog 6720; 6721; GFX10-LABEL: v_fshl_i128_svs: 6722; GFX10: ; %bb.0: 6723; GFX10-NEXT: s_and_b32 s5, s4, 0x7f 6724; GFX10-NEXT: v_lshrrev_b64 v[0:1], 1, v[0:1] 6725; GFX10-NEXT: s_sub_i32 s12, s5, 64 6726; GFX10-NEXT: s_sub_i32 s6, 64, s5 6727; GFX10-NEXT: s_cmp_lt_u32 s5, 64 6728; GFX10-NEXT: s_cselect_b32 s13, 1, 0 6729; GFX10-NEXT: s_cmp_eq_u32 s5, 0 6730; GFX10-NEXT: v_lshl_or_b32 v1, v2, 31, v1 6731; GFX10-NEXT: s_cselect_b32 s5, 1, 0 6732; GFX10-NEXT: s_lshr_b64 s[6:7], s[0:1], s6 6733; GFX10-NEXT: s_lshl_b64 s[8:9], s[2:3], s4 6734; GFX10-NEXT: s_lshl_b64 s[10:11], s[0:1], s4 6735; GFX10-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] 6736; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s12 6737; GFX10-NEXT: s_cmp_lg_u32 s13, 0 6738; GFX10-NEXT: v_lshrrev_b64 v[2:3], 1, v[2:3] 6739; GFX10-NEXT: s_cselect_b64 s[8:9], s[10:11], 0 6740; GFX10-NEXT: s_cselect_b64 s[0:1], s[6:7], s[0:1] 6741; GFX10-NEXT: s_cmp_lg_u32 s5, 0 6742; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] 6743; GFX10-NEXT: s_andn2_b32 s0, 0x7f, s4 6744; GFX10-NEXT: s_sub_i32 s1, 64, s0 6745; GFX10-NEXT: v_lshrrev_b64 v[4:5], s0, v[0:1] 6746; GFX10-NEXT: v_lshlrev_b64 v[6:7], s1, v[2:3] 6747; GFX10-NEXT: s_sub_i32 s1, s0, 64 6748; GFX10-NEXT: s_cmp_lt_u32 s0, 64 6749; GFX10-NEXT: v_lshrrev_b64 v[8:9], s1, v[2:3] 6750; GFX10-NEXT: s_cselect_b32 s4, 1, 0 6751; GFX10-NEXT: s_cmp_eq_u32 s0, 0 6752; GFX10-NEXT: v_or_b32_e32 v4, v4, v6 6753; GFX10-NEXT: s_cselect_b32 s5, 1, 0 6754; GFX10-NEXT: s_and_b32 s1, 1, s4 6755; GFX10-NEXT: v_or_b32_e32 v5, v5, v7 6756; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s1 6757; GFX10-NEXT: v_lshrrev_b64 v[2:3], s0, v[2:3] 6758; GFX10-NEXT: s_and_b32 s0, 1, s5 6759; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, s0 6760; GFX10-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc_lo 6761; GFX10-NEXT: v_cndmask_b32_e32 v5, v9, v5, vcc_lo 6762; GFX10-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc_lo 6763; GFX10-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc_lo 6764; GFX10-NEXT: v_cndmask_b32_e64 v0, v4, v0, s0 6765; GFX10-NEXT: v_cndmask_b32_e64 v1, v5, v1, s0 6766; GFX10-NEXT: v_or_b32_e32 v2, s2, v2 6767; GFX10-NEXT: v_or_b32_e32 v3, s3, v3 6768; GFX10-NEXT: v_or_b32_e32 v0, s8, v0 6769; GFX10-NEXT: v_or_b32_e32 v1, s9, v1 6770; GFX10-NEXT: ; return to shader part epilog 6771; 6772; GFX11-LABEL: v_fshl_i128_svs: 6773; GFX11: ; %bb.0: 6774; GFX11-NEXT: s_and_b32 s5, s4, 0x7f 6775; GFX11-NEXT: v_lshrrev_b64 v[0:1], 1, v[0:1] 6776; GFX11-NEXT: s_sub_i32 s12, s5, 64 6777; GFX11-NEXT: s_sub_i32 s6, 64, s5 6778; GFX11-NEXT: s_cmp_lt_u32 s5, 64 6779; GFX11-NEXT: s_cselect_b32 s13, 1, 0 6780; GFX11-NEXT: s_cmp_eq_u32 s5, 0 6781; GFX11-NEXT: v_lshl_or_b32 v1, v2, 31, v1 6782; GFX11-NEXT: s_cselect_b32 s5, 1, 0 6783; GFX11-NEXT: s_lshr_b64 s[6:7], s[0:1], s6 6784; GFX11-NEXT: s_lshl_b64 s[8:9], s[2:3], s4 6785; GFX11-NEXT: s_lshl_b64 s[10:11], s[0:1], s4 6786; GFX11-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] 6787; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], s12 6788; GFX11-NEXT: s_cmp_lg_u32 s13, 0 6789; GFX11-NEXT: v_lshrrev_b64 v[2:3], 1, v[2:3] 6790; GFX11-NEXT: s_cselect_b64 s[8:9], s[10:11], 0 6791; GFX11-NEXT: s_cselect_b64 s[0:1], s[6:7], s[0:1] 6792; GFX11-NEXT: s_cmp_lg_u32 s5, 0 6793; GFX11-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] 6794; GFX11-NEXT: s_and_not1_b32 s0, 0x7f, s4 6795; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 6796; GFX11-NEXT: s_sub_i32 s1, 64, s0 6797; GFX11-NEXT: v_lshrrev_b64 v[4:5], s0, v[0:1] 6798; GFX11-NEXT: v_lshlrev_b64 v[6:7], s1, v[2:3] 6799; GFX11-NEXT: s_sub_i32 s1, s0, 64 6800; GFX11-NEXT: s_cmp_lt_u32 s0, 64 6801; GFX11-NEXT: v_lshrrev_b64 v[8:9], s1, v[2:3] 6802; GFX11-NEXT: s_cselect_b32 s4, 1, 0 6803; GFX11-NEXT: s_cmp_eq_u32 s0, 0 6804; GFX11-NEXT: v_or_b32_e32 v4, v4, v6 6805; GFX11-NEXT: s_cselect_b32 s5, 1, 0 6806; GFX11-NEXT: s_and_b32 s1, 1, s4 6807; GFX11-NEXT: v_or_b32_e32 v5, v5, v7 6808; GFX11-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s1 6809; GFX11-NEXT: v_lshrrev_b64 v[2:3], s0, v[2:3] 6810; GFX11-NEXT: s_and_b32 s0, 1, s5 6811; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_3) 6812; GFX11-NEXT: v_cmp_ne_u32_e64 s0, 0, s0 6813; GFX11-NEXT: v_dual_cndmask_b32 v4, v8, v4 :: v_dual_cndmask_b32 v5, v9, v5 6814; GFX11-NEXT: v_dual_cndmask_b32 v2, 0, v2 :: v_dual_cndmask_b32 v3, 0, v3 6815; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) 6816; GFX11-NEXT: v_cndmask_b32_e64 v0, v4, v0, s0 6817; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, v1, s0 6818; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4) 6819; GFX11-NEXT: v_or_b32_e32 v2, s2, v2 6820; GFX11-NEXT: v_or_b32_e32 v3, s3, v3 6821; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 6822; GFX11-NEXT: v_or_b32_e32 v0, s8, v0 6823; GFX11-NEXT: v_or_b32_e32 v1, s9, v1 6824; GFX11-NEXT: ; return to shader part epilog 6825 %result = call i128 @llvm.fshl.i128(i128 %lhs, i128 %rhs, i128 %amt) 6826 %cast.result = bitcast i128 %result to <4 x float> 6827 ret <4 x float> %cast.result 6828} 6829 6830define amdgpu_ps <4 x float> @v_fshl_i128_vss(i128 %lhs, i128 inreg %rhs, i128 inreg %amt) { 6831; GFX6-LABEL: v_fshl_i128_vss: 6832; GFX6: ; %bb.0: 6833; GFX6-NEXT: s_and_b32 s5, s4, 0x7f 6834; GFX6-NEXT: s_sub_i32 s7, s5, 64 6835; GFX6-NEXT: s_sub_i32 s8, 64, s5 6836; GFX6-NEXT: s_cmp_lt_u32 s5, 64 6837; GFX6-NEXT: s_cselect_b32 s9, 1, 0 6838; GFX6-NEXT: s_cmp_eq_u32 s5, 0 6839; GFX6-NEXT: s_mov_b32 s6, 0 6840; GFX6-NEXT: s_cselect_b32 s10, 1, 0 6841; GFX6-NEXT: v_lshr_b64 v[4:5], v[0:1], s8 6842; GFX6-NEXT: v_lshl_b64 v[8:9], v[0:1], s5 6843; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], s7 6844; GFX6-NEXT: s_lshr_b64 s[0:1], s[0:1], 1 6845; GFX6-NEXT: s_lshl_b32 s7, s2, 31 6846; GFX6-NEXT: v_lshl_b64 v[6:7], v[2:3], s5 6847; GFX6-NEXT: s_and_b32 s5, 1, s9 6848; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[6:7] 6849; GFX6-NEXT: s_not_b32 s6, s4 6850; GFX6-NEXT: s_andn2_b32 s4, 0x7f, s4 6851; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 6852; GFX6-NEXT: s_and_b32 s5, 1, s10 6853; GFX6-NEXT: s_lshr_b64 s[2:3], s[2:3], 1 6854; GFX6-NEXT: s_sub_i32 s10, s4, 64 6855; GFX6-NEXT: s_sub_i32 s8, 64, s4 6856; GFX6-NEXT: s_cmp_lt_u32 s4, 64 6857; GFX6-NEXT: v_or_b32_e32 v4, v4, v6 6858; GFX6-NEXT: v_or_b32_e32 v5, v5, v7 6859; GFX6-NEXT: s_cselect_b32 s11, 1, 0 6860; GFX6-NEXT: s_cmp_eq_u32 s4, 0 6861; GFX6-NEXT: v_cndmask_b32_e32 v6, 0, v8, vcc 6862; GFX6-NEXT: v_cndmask_b32_e32 v7, 0, v9, vcc 6863; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 6864; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 6865; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 6866; GFX6-NEXT: s_cselect_b32 s12, 1, 0 6867; GFX6-NEXT: s_lshr_b64 s[4:5], s[2:3], s6 6868; GFX6-NEXT: s_lshr_b64 s[6:7], s[0:1], s6 6869; GFX6-NEXT: s_lshl_b64 s[8:9], s[2:3], s8 6870; GFX6-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] 6871; GFX6-NEXT: s_lshr_b64 s[2:3], s[2:3], s10 6872; GFX6-NEXT: s_cmp_lg_u32 s11, 0 6873; GFX6-NEXT: s_cselect_b64 s[2:3], s[6:7], s[2:3] 6874; GFX6-NEXT: s_cmp_lg_u32 s12, 0 6875; GFX6-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3] 6876; GFX6-NEXT: s_cmp_lg_u32 s11, 0 6877; GFX6-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc 6878; GFX6-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc 6879; GFX6-NEXT: s_cselect_b64 s[2:3], s[4:5], 0 6880; GFX6-NEXT: v_or_b32_e32 v0, s0, v6 6881; GFX6-NEXT: v_or_b32_e32 v1, s1, v7 6882; GFX6-NEXT: v_or_b32_e32 v2, s2, v2 6883; GFX6-NEXT: v_or_b32_e32 v3, s3, v3 6884; GFX6-NEXT: ; return to shader part epilog 6885; 6886; GFX8-LABEL: v_fshl_i128_vss: 6887; GFX8: ; %bb.0: 6888; GFX8-NEXT: s_and_b32 s5, s4, 0x7f 6889; GFX8-NEXT: s_sub_i32 s7, s5, 64 6890; GFX8-NEXT: s_sub_i32 s8, 64, s5 6891; GFX8-NEXT: s_cmp_lt_u32 s5, 64 6892; GFX8-NEXT: s_cselect_b32 s9, 1, 0 6893; GFX8-NEXT: s_cmp_eq_u32 s5, 0 6894; GFX8-NEXT: s_mov_b32 s6, 0 6895; GFX8-NEXT: s_cselect_b32 s10, 1, 0 6896; GFX8-NEXT: v_lshrrev_b64 v[4:5], s8, v[0:1] 6897; GFX8-NEXT: v_lshlrev_b64 v[8:9], s5, v[0:1] 6898; GFX8-NEXT: v_lshlrev_b64 v[0:1], s7, v[0:1] 6899; GFX8-NEXT: s_lshr_b64 s[0:1], s[0:1], 1 6900; GFX8-NEXT: s_lshl_b32 s7, s2, 31 6901; GFX8-NEXT: v_lshlrev_b64 v[6:7], s5, v[2:3] 6902; GFX8-NEXT: s_and_b32 s5, 1, s9 6903; GFX8-NEXT: s_or_b64 s[0:1], s[0:1], s[6:7] 6904; GFX8-NEXT: s_not_b32 s6, s4 6905; GFX8-NEXT: s_andn2_b32 s4, 0x7f, s4 6906; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 6907; GFX8-NEXT: s_and_b32 s5, 1, s10 6908; GFX8-NEXT: s_lshr_b64 s[2:3], s[2:3], 1 6909; GFX8-NEXT: s_sub_i32 s10, s4, 64 6910; GFX8-NEXT: s_sub_i32 s8, 64, s4 6911; GFX8-NEXT: s_cmp_lt_u32 s4, 64 6912; GFX8-NEXT: v_or_b32_e32 v4, v4, v6 6913; GFX8-NEXT: v_or_b32_e32 v5, v5, v7 6914; GFX8-NEXT: s_cselect_b32 s11, 1, 0 6915; GFX8-NEXT: s_cmp_eq_u32 s4, 0 6916; GFX8-NEXT: v_cndmask_b32_e32 v6, 0, v8, vcc 6917; GFX8-NEXT: v_cndmask_b32_e32 v7, 0, v9, vcc 6918; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 6919; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 6920; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 6921; GFX8-NEXT: s_cselect_b32 s12, 1, 0 6922; GFX8-NEXT: s_lshr_b64 s[4:5], s[2:3], s6 6923; GFX8-NEXT: s_lshr_b64 s[6:7], s[0:1], s6 6924; GFX8-NEXT: s_lshl_b64 s[8:9], s[2:3], s8 6925; GFX8-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] 6926; GFX8-NEXT: s_lshr_b64 s[2:3], s[2:3], s10 6927; GFX8-NEXT: s_cmp_lg_u32 s11, 0 6928; GFX8-NEXT: s_cselect_b64 s[2:3], s[6:7], s[2:3] 6929; GFX8-NEXT: s_cmp_lg_u32 s12, 0 6930; GFX8-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3] 6931; GFX8-NEXT: s_cmp_lg_u32 s11, 0 6932; GFX8-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc 6933; GFX8-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc 6934; GFX8-NEXT: s_cselect_b64 s[2:3], s[4:5], 0 6935; GFX8-NEXT: v_or_b32_e32 v0, s0, v6 6936; GFX8-NEXT: v_or_b32_e32 v1, s1, v7 6937; GFX8-NEXT: v_or_b32_e32 v2, s2, v2 6938; GFX8-NEXT: v_or_b32_e32 v3, s3, v3 6939; GFX8-NEXT: ; return to shader part epilog 6940; 6941; GFX9-LABEL: v_fshl_i128_vss: 6942; GFX9: ; %bb.0: 6943; GFX9-NEXT: s_and_b32 s5, s4, 0x7f 6944; GFX9-NEXT: s_sub_i32 s7, s5, 64 6945; GFX9-NEXT: s_sub_i32 s8, 64, s5 6946; GFX9-NEXT: s_cmp_lt_u32 s5, 64 6947; GFX9-NEXT: s_cselect_b32 s9, 1, 0 6948; GFX9-NEXT: s_cmp_eq_u32 s5, 0 6949; GFX9-NEXT: s_mov_b32 s6, 0 6950; GFX9-NEXT: s_cselect_b32 s10, 1, 0 6951; GFX9-NEXT: v_lshrrev_b64 v[4:5], s8, v[0:1] 6952; GFX9-NEXT: v_lshlrev_b64 v[8:9], s5, v[0:1] 6953; GFX9-NEXT: v_lshlrev_b64 v[0:1], s7, v[0:1] 6954; GFX9-NEXT: s_lshr_b64 s[0:1], s[0:1], 1 6955; GFX9-NEXT: s_lshl_b32 s7, s2, 31 6956; GFX9-NEXT: v_lshlrev_b64 v[6:7], s5, v[2:3] 6957; GFX9-NEXT: s_and_b32 s5, 1, s9 6958; GFX9-NEXT: s_or_b64 s[0:1], s[0:1], s[6:7] 6959; GFX9-NEXT: s_not_b32 s6, s4 6960; GFX9-NEXT: s_andn2_b32 s4, 0x7f, s4 6961; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 6962; GFX9-NEXT: s_and_b32 s5, 1, s10 6963; GFX9-NEXT: s_lshr_b64 s[2:3], s[2:3], 1 6964; GFX9-NEXT: s_sub_i32 s10, s4, 64 6965; GFX9-NEXT: s_sub_i32 s8, 64, s4 6966; GFX9-NEXT: s_cmp_lt_u32 s4, 64 6967; GFX9-NEXT: v_or_b32_e32 v4, v4, v6 6968; GFX9-NEXT: v_or_b32_e32 v5, v5, v7 6969; GFX9-NEXT: s_cselect_b32 s11, 1, 0 6970; GFX9-NEXT: s_cmp_eq_u32 s4, 0 6971; GFX9-NEXT: v_cndmask_b32_e32 v6, 0, v8, vcc 6972; GFX9-NEXT: v_cndmask_b32_e32 v7, 0, v9, vcc 6973; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc 6974; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc 6975; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 6976; GFX9-NEXT: s_cselect_b32 s12, 1, 0 6977; GFX9-NEXT: s_lshr_b64 s[4:5], s[2:3], s6 6978; GFX9-NEXT: s_lshr_b64 s[6:7], s[0:1], s6 6979; GFX9-NEXT: s_lshl_b64 s[8:9], s[2:3], s8 6980; GFX9-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] 6981; GFX9-NEXT: s_lshr_b64 s[2:3], s[2:3], s10 6982; GFX9-NEXT: s_cmp_lg_u32 s11, 0 6983; GFX9-NEXT: s_cselect_b64 s[2:3], s[6:7], s[2:3] 6984; GFX9-NEXT: s_cmp_lg_u32 s12, 0 6985; GFX9-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3] 6986; GFX9-NEXT: s_cmp_lg_u32 s11, 0 6987; GFX9-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc 6988; GFX9-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc 6989; GFX9-NEXT: s_cselect_b64 s[2:3], s[4:5], 0 6990; GFX9-NEXT: v_or_b32_e32 v0, s0, v6 6991; GFX9-NEXT: v_or_b32_e32 v1, s1, v7 6992; GFX9-NEXT: v_or_b32_e32 v2, s2, v2 6993; GFX9-NEXT: v_or_b32_e32 v3, s3, v3 6994; GFX9-NEXT: ; return to shader part epilog 6995; 6996; GFX10-LABEL: v_fshl_i128_vss: 6997; GFX10: ; %bb.0: 6998; GFX10-NEXT: s_and_b32 s5, s4, 0x7f 6999; GFX10-NEXT: s_sub_i32 s6, s5, 64 7000; GFX10-NEXT: s_sub_i32 s7, 64, s5 7001; GFX10-NEXT: s_cmp_lt_u32 s5, 64 7002; GFX10-NEXT: v_lshrrev_b64 v[4:5], s7, v[0:1] 7003; GFX10-NEXT: s_cselect_b32 s8, 1, 0 7004; GFX10-NEXT: s_cmp_eq_u32 s5, 0 7005; GFX10-NEXT: v_lshlrev_b64 v[6:7], s5, v[2:3] 7006; GFX10-NEXT: s_cselect_b32 s9, 1, 0 7007; GFX10-NEXT: v_lshlrev_b64 v[8:9], s5, v[0:1] 7008; GFX10-NEXT: v_lshlrev_b64 v[0:1], s6, v[0:1] 7009; GFX10-NEXT: s_mov_b32 s6, 0 7010; GFX10-NEXT: s_lshr_b64 s[0:1], s[0:1], 1 7011; GFX10-NEXT: s_lshl_b32 s7, s2, 31 7012; GFX10-NEXT: s_and_b32 s5, 1, s8 7013; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[6:7] 7014; GFX10-NEXT: s_andn2_b32 s6, 0x7f, s4 7015; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s5 7016; GFX10-NEXT: v_or_b32_e32 v4, v4, v6 7017; GFX10-NEXT: v_or_b32_e32 v5, v5, v7 7018; GFX10-NEXT: s_and_b32 s5, 1, s9 7019; GFX10-NEXT: s_lshr_b64 s[2:3], s[2:3], 1 7020; GFX10-NEXT: s_not_b32 s8, s4 7021; GFX10-NEXT: s_sub_i32 s10, s6, 64 7022; GFX10-NEXT: s_sub_i32 s7, 64, s6 7023; GFX10-NEXT: s_cmp_lt_u32 s6, 64 7024; GFX10-NEXT: v_cndmask_b32_e32 v6, 0, v8, vcc_lo 7025; GFX10-NEXT: s_cselect_b32 s11, 1, 0 7026; GFX10-NEXT: s_cmp_eq_u32 s6, 0 7027; GFX10-NEXT: v_cndmask_b32_e32 v7, 0, v9, vcc_lo 7028; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 7029; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo 7030; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s5 7031; GFX10-NEXT: s_cselect_b32 s12, 1, 0 7032; GFX10-NEXT: s_lshr_b64 s[4:5], s[0:1], s8 7033; GFX10-NEXT: s_lshl_b64 s[6:7], s[2:3], s7 7034; GFX10-NEXT: s_lshr_b64 s[8:9], s[2:3], s8 7035; GFX10-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7] 7036; GFX10-NEXT: s_lshr_b64 s[2:3], s[2:3], s10 7037; GFX10-NEXT: s_cmp_lg_u32 s11, 0 7038; GFX10-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc_lo 7039; GFX10-NEXT: s_cselect_b64 s[2:3], s[4:5], s[2:3] 7040; GFX10-NEXT: s_cmp_lg_u32 s12, 0 7041; GFX10-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc_lo 7042; GFX10-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3] 7043; GFX10-NEXT: s_cmp_lg_u32 s11, 0 7044; GFX10-NEXT: v_or_b32_e32 v0, s0, v6 7045; GFX10-NEXT: s_cselect_b64 s[2:3], s[8:9], 0 7046; GFX10-NEXT: v_or_b32_e32 v1, s1, v7 7047; GFX10-NEXT: v_or_b32_e32 v2, s2, v2 7048; GFX10-NEXT: v_or_b32_e32 v3, s3, v3 7049; GFX10-NEXT: ; return to shader part epilog 7050; 7051; GFX11-LABEL: v_fshl_i128_vss: 7052; GFX11: ; %bb.0: 7053; GFX11-NEXT: s_and_b32 s5, s4, 0x7f 7054; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 7055; GFX11-NEXT: s_sub_i32 s6, s5, 64 7056; GFX11-NEXT: s_sub_i32 s7, 64, s5 7057; GFX11-NEXT: s_cmp_lt_u32 s5, 64 7058; GFX11-NEXT: v_lshrrev_b64 v[4:5], s7, v[0:1] 7059; GFX11-NEXT: s_cselect_b32 s8, 1, 0 7060; GFX11-NEXT: s_cmp_eq_u32 s5, 0 7061; GFX11-NEXT: v_lshlrev_b64 v[6:7], s5, v[2:3] 7062; GFX11-NEXT: s_cselect_b32 s9, 1, 0 7063; GFX11-NEXT: v_lshlrev_b64 v[8:9], s5, v[0:1] 7064; GFX11-NEXT: v_lshlrev_b64 v[0:1], s6, v[0:1] 7065; GFX11-NEXT: s_mov_b32 s6, 0 7066; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], 1 7067; GFX11-NEXT: s_lshl_b32 s7, s2, 31 7068; GFX11-NEXT: s_and_b32 s5, 1, s8 7069; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[6:7] 7070; GFX11-NEXT: s_and_not1_b32 s6, 0x7f, s4 7071; GFX11-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s5 7072; GFX11-NEXT: v_or_b32_e32 v4, v4, v6 7073; GFX11-NEXT: v_or_b32_e32 v5, v5, v7 7074; GFX11-NEXT: s_and_b32 s5, 1, s9 7075; GFX11-NEXT: s_lshr_b64 s[2:3], s[2:3], 1 7076; GFX11-NEXT: s_not_b32 s8, s4 7077; GFX11-NEXT: s_sub_i32 s10, s6, 64 7078; GFX11-NEXT: s_sub_i32 s7, 64, s6 7079; GFX11-NEXT: s_cmp_lt_u32 s6, 64 7080; GFX11-NEXT: v_dual_cndmask_b32 v6, 0, v8 :: v_dual_cndmask_b32 v7, 0, v9 7081; GFX11-NEXT: s_cselect_b32 s11, 1, 0 7082; GFX11-NEXT: s_cmp_eq_u32 s6, 0 7083; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v4 :: v_dual_cndmask_b32 v1, v1, v5 7084; GFX11-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s5 7085; GFX11-NEXT: s_cselect_b32 s12, 1, 0 7086; GFX11-NEXT: s_lshr_b64 s[4:5], s[0:1], s8 7087; GFX11-NEXT: s_lshl_b64 s[6:7], s[2:3], s7 7088; GFX11-NEXT: s_lshr_b64 s[8:9], s[2:3], s8 7089; GFX11-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7] 7090; GFX11-NEXT: s_lshr_b64 s[2:3], s[2:3], s10 7091; GFX11-NEXT: s_cmp_lg_u32 s11, 0 7092; GFX11-NEXT: v_dual_cndmask_b32 v2, v0, v2 :: v_dual_cndmask_b32 v3, v1, v3 7093; GFX11-NEXT: s_cselect_b64 s[2:3], s[4:5], s[2:3] 7094; GFX11-NEXT: s_cmp_lg_u32 s12, 0 7095; GFX11-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3] 7096; GFX11-NEXT: s_cmp_lg_u32 s11, 0 7097; GFX11-NEXT: v_or_b32_e32 v0, s0, v6 7098; GFX11-NEXT: s_cselect_b64 s[2:3], s[8:9], 0 7099; GFX11-NEXT: v_or_b32_e32 v1, s1, v7 7100; GFX11-NEXT: v_or_b32_e32 v2, s2, v2 7101; GFX11-NEXT: v_or_b32_e32 v3, s3, v3 7102; GFX11-NEXT: ; return to shader part epilog 7103 %result = call i128 @llvm.fshl.i128(i128 %lhs, i128 %rhs, i128 %amt) 7104 %cast.result = bitcast i128 %result to <4 x float> 7105 ret <4 x float> %cast.result 7106} 7107 7108define amdgpu_ps i128 @s_fshl_i128_65(i128 inreg %lhs, i128 inreg %rhs) { 7109; GFX6-LABEL: s_fshl_i128_65: 7110; GFX6: ; %bb.0: 7111; GFX6-NEXT: s_lshl_b64 s[2:3], s[0:1], 1 7112; GFX6-NEXT: s_lshr_b32 s4, s5, 31 7113; GFX6-NEXT: s_mov_b32 s5, 0 7114; GFX6-NEXT: s_lshl_b64 s[0:1], s[6:7], 1 7115; GFX6-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1] 7116; GFX6-NEXT: s_lshr_b32 s4, s7, 31 7117; GFX6-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] 7118; GFX6-NEXT: ; return to shader part epilog 7119; 7120; GFX8-LABEL: s_fshl_i128_65: 7121; GFX8: ; %bb.0: 7122; GFX8-NEXT: s_lshl_b64 s[2:3], s[0:1], 1 7123; GFX8-NEXT: s_lshr_b32 s4, s5, 31 7124; GFX8-NEXT: s_mov_b32 s5, 0 7125; GFX8-NEXT: s_lshl_b64 s[0:1], s[6:7], 1 7126; GFX8-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1] 7127; GFX8-NEXT: s_lshr_b32 s4, s7, 31 7128; GFX8-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] 7129; GFX8-NEXT: ; return to shader part epilog 7130; 7131; GFX9-LABEL: s_fshl_i128_65: 7132; GFX9: ; %bb.0: 7133; GFX9-NEXT: s_lshl_b64 s[2:3], s[0:1], 1 7134; GFX9-NEXT: s_lshr_b32 s4, s5, 31 7135; GFX9-NEXT: s_mov_b32 s5, 0 7136; GFX9-NEXT: s_lshl_b64 s[0:1], s[6:7], 1 7137; GFX9-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1] 7138; GFX9-NEXT: s_lshr_b32 s4, s7, 31 7139; GFX9-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] 7140; GFX9-NEXT: ; return to shader part epilog 7141; 7142; GFX10-LABEL: s_fshl_i128_65: 7143; GFX10: ; %bb.0: 7144; GFX10-NEXT: s_lshr_b32 s2, s5, 31 7145; GFX10-NEXT: s_mov_b32 s3, 0 7146; GFX10-NEXT: s_lshl_b64 s[4:5], s[6:7], 1 7147; GFX10-NEXT: s_lshl_b64 s[8:9], s[0:1], 1 7148; GFX10-NEXT: s_or_b64 s[0:1], s[2:3], s[4:5] 7149; GFX10-NEXT: s_lshr_b32 s2, s7, 31 7150; GFX10-NEXT: s_or_b64 s[2:3], s[8:9], s[2:3] 7151; GFX10-NEXT: ; return to shader part epilog 7152; 7153; GFX11-LABEL: s_fshl_i128_65: 7154; GFX11: ; %bb.0: 7155; GFX11-NEXT: s_lshr_b32 s2, s5, 31 7156; GFX11-NEXT: s_mov_b32 s3, 0 7157; GFX11-NEXT: s_lshl_b64 s[4:5], s[6:7], 1 7158; GFX11-NEXT: s_lshl_b64 s[8:9], s[0:1], 1 7159; GFX11-NEXT: s_or_b64 s[0:1], s[2:3], s[4:5] 7160; GFX11-NEXT: s_lshr_b32 s2, s7, 31 7161; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 7162; GFX11-NEXT: s_or_b64 s[2:3], s[8:9], s[2:3] 7163; GFX11-NEXT: ; return to shader part epilog 7164 %result = call i128 @llvm.fshl.i128(i128 %lhs, i128 %rhs, i128 65) 7165 ret i128 %result 7166} 7167 7168define i128 @v_fshl_i128_65(i128 %lhs, i128 %rhs) { 7169; GFX6-LABEL: v_fshl_i128_65: 7170; GFX6: ; %bb.0: 7171; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7172; GFX6-NEXT: v_lshl_b64 v[2:3], v[0:1], 1 7173; GFX6-NEXT: v_lshl_b64 v[0:1], v[6:7], 1 7174; GFX6-NEXT: v_lshrrev_b32_e32 v4, 31, v5 7175; GFX6-NEXT: v_or_b32_e32 v0, v4, v0 7176; GFX6-NEXT: v_lshrrev_b32_e32 v4, 31, v7 7177; GFX6-NEXT: v_or_b32_e32 v2, v2, v4 7178; GFX6-NEXT: s_setpc_b64 s[30:31] 7179; 7180; GFX8-LABEL: v_fshl_i128_65: 7181; GFX8: ; %bb.0: 7182; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7183; GFX8-NEXT: v_lshlrev_b64 v[2:3], 1, v[0:1] 7184; GFX8-NEXT: v_lshlrev_b64 v[0:1], 1, v[6:7] 7185; GFX8-NEXT: v_lshrrev_b32_e32 v4, 31, v5 7186; GFX8-NEXT: v_or_b32_e32 v0, v4, v0 7187; GFX8-NEXT: v_lshrrev_b32_e32 v4, 31, v7 7188; GFX8-NEXT: v_or_b32_e32 v2, v2, v4 7189; GFX8-NEXT: s_setpc_b64 s[30:31] 7190; 7191; GFX9-LABEL: v_fshl_i128_65: 7192; GFX9: ; %bb.0: 7193; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7194; GFX9-NEXT: v_lshlrev_b64 v[2:3], 1, v[0:1] 7195; GFX9-NEXT: v_lshlrev_b64 v[0:1], 1, v[6:7] 7196; GFX9-NEXT: v_lshrrev_b32_e32 v4, 31, v5 7197; GFX9-NEXT: v_or_b32_e32 v0, v4, v0 7198; GFX9-NEXT: v_lshrrev_b32_e32 v4, 31, v7 7199; GFX9-NEXT: v_or_b32_e32 v2, v2, v4 7200; GFX9-NEXT: s_setpc_b64 s[30:31] 7201; 7202; GFX10-LABEL: v_fshl_i128_65: 7203; GFX10: ; %bb.0: 7204; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7205; GFX10-NEXT: v_lshlrev_b64 v[2:3], 1, v[0:1] 7206; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[6:7] 7207; GFX10-NEXT: v_lshrrev_b32_e32 v4, 31, v5 7208; GFX10-NEXT: v_lshrrev_b32_e32 v5, 31, v7 7209; GFX10-NEXT: v_or_b32_e32 v0, v4, v0 7210; GFX10-NEXT: v_or_b32_e32 v2, v2, v5 7211; GFX10-NEXT: s_setpc_b64 s[30:31] 7212; 7213; GFX11-LABEL: v_fshl_i128_65: 7214; GFX11: ; %bb.0: 7215; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7216; GFX11-NEXT: v_lshlrev_b64 v[2:3], 1, v[0:1] 7217; GFX11-NEXT: v_lshlrev_b64 v[0:1], 1, v[6:7] 7218; GFX11-NEXT: v_lshrrev_b32_e32 v4, 31, v5 7219; GFX11-NEXT: v_lshrrev_b32_e32 v5, 31, v7 7220; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 7221; GFX11-NEXT: v_or_b32_e32 v0, v4, v0 7222; GFX11-NEXT: v_or_b32_e32 v2, v2, v5 7223; GFX11-NEXT: s_setpc_b64 s[30:31] 7224 %result = call i128 @llvm.fshl.i128(i128 %lhs, i128 %rhs, i128 65) 7225 ret i128 %result 7226} 7227 7228define amdgpu_ps <2 x i128> @s_fshl_v2i128(<2 x i128> inreg %lhs, <2 x i128> inreg %rhs, <2 x i128> inreg %amt) { 7229; GFX6-LABEL: s_fshl_v2i128: 7230; GFX6: ; %bb.0: 7231; GFX6-NEXT: s_and_b32 s17, s16, 0x7f 7232; GFX6-NEXT: s_sub_i32 s19, s17, 64 7233; GFX6-NEXT: s_sub_i32 s21, 64, s17 7234; GFX6-NEXT: s_cmp_lt_u32 s17, 64 7235; GFX6-NEXT: s_cselect_b32 s28, 1, 0 7236; GFX6-NEXT: s_cmp_eq_u32 s17, 0 7237; GFX6-NEXT: s_cselect_b32 s17, 1, 0 7238; GFX6-NEXT: s_lshr_b64 s[24:25], s[0:1], s21 7239; GFX6-NEXT: s_lshl_b64 s[26:27], s[2:3], s16 7240; GFX6-NEXT: s_lshl_b64 s[22:23], s[0:1], s16 7241; GFX6-NEXT: s_or_b64 s[24:25], s[24:25], s[26:27] 7242; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], s19 7243; GFX6-NEXT: s_cmp_lg_u32 s28, 0 7244; GFX6-NEXT: s_cselect_b64 s[22:23], s[22:23], 0 7245; GFX6-NEXT: s_cselect_b64 s[0:1], s[24:25], s[0:1] 7246; GFX6-NEXT: s_cmp_lg_u32 s17, 0 7247; GFX6-NEXT: s_mov_b32 s18, 0 7248; GFX6-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] 7249; GFX6-NEXT: s_lshr_b64 s[0:1], s[8:9], 1 7250; GFX6-NEXT: s_lshl_b32 s19, s10, 31 7251; GFX6-NEXT: s_lshr_b64 s[8:9], s[10:11], 1 7252; GFX6-NEXT: s_andn2_b32 s10, 0x7f, s16 7253; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[18:19] 7254; GFX6-NEXT: s_not_b32 s17, s16 7255; GFX6-NEXT: s_sub_i32 s19, s10, 64 7256; GFX6-NEXT: s_sub_i32 s21, 64, s10 7257; GFX6-NEXT: s_cmp_lt_u32 s10, 64 7258; GFX6-NEXT: s_cselect_b32 s26, 1, 0 7259; GFX6-NEXT: s_cmp_eq_u32 s10, 0 7260; GFX6-NEXT: s_cselect_b32 s27, 1, 0 7261; GFX6-NEXT: s_lshr_b64 s[10:11], s[8:9], s17 7262; GFX6-NEXT: s_lshr_b64 s[16:17], s[0:1], s17 7263; GFX6-NEXT: s_lshl_b64 s[24:25], s[8:9], s21 7264; GFX6-NEXT: s_or_b64 s[16:17], s[16:17], s[24:25] 7265; GFX6-NEXT: s_lshr_b64 s[8:9], s[8:9], s19 7266; GFX6-NEXT: s_cmp_lg_u32 s26, 0 7267; GFX6-NEXT: s_cselect_b64 s[8:9], s[16:17], s[8:9] 7268; GFX6-NEXT: s_cmp_lg_u32 s27, 0 7269; GFX6-NEXT: s_cselect_b64 s[0:1], s[0:1], s[8:9] 7270; GFX6-NEXT: s_cmp_lg_u32 s26, 0 7271; GFX6-NEXT: s_cselect_b64 s[8:9], s[10:11], 0 7272; GFX6-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9] 7273; GFX6-NEXT: s_and_b32 s8, s20, 0x7f 7274; GFX6-NEXT: s_or_b64 s[0:1], s[22:23], s[0:1] 7275; GFX6-NEXT: s_sub_i32 s19, s8, 64 7276; GFX6-NEXT: s_sub_i32 s10, 64, s8 7277; GFX6-NEXT: s_cmp_lt_u32 s8, 64 7278; GFX6-NEXT: s_cselect_b32 s21, 1, 0 7279; GFX6-NEXT: s_cmp_eq_u32 s8, 0 7280; GFX6-NEXT: s_cselect_b32 s22, 1, 0 7281; GFX6-NEXT: s_lshr_b64 s[10:11], s[4:5], s10 7282; GFX6-NEXT: s_lshl_b64 s[16:17], s[6:7], s20 7283; GFX6-NEXT: s_lshl_b64 s[8:9], s[4:5], s20 7284; GFX6-NEXT: s_or_b64 s[10:11], s[10:11], s[16:17] 7285; GFX6-NEXT: s_lshl_b64 s[4:5], s[4:5], s19 7286; GFX6-NEXT: s_cmp_lg_u32 s21, 0 7287; GFX6-NEXT: s_cselect_b64 s[8:9], s[8:9], 0 7288; GFX6-NEXT: s_cselect_b64 s[4:5], s[10:11], s[4:5] 7289; GFX6-NEXT: s_cmp_lg_u32 s22, 0 7290; GFX6-NEXT: s_cselect_b64 s[6:7], s[6:7], s[4:5] 7291; GFX6-NEXT: s_lshr_b64 s[4:5], s[12:13], 1 7292; GFX6-NEXT: s_lshl_b32 s19, s14, 31 7293; GFX6-NEXT: s_andn2_b32 s12, 0x7f, s20 7294; GFX6-NEXT: s_or_b64 s[4:5], s[4:5], s[18:19] 7295; GFX6-NEXT: s_lshr_b64 s[10:11], s[14:15], 1 7296; GFX6-NEXT: s_not_b32 s14, s20 7297; GFX6-NEXT: s_sub_i32 s18, s12, 64 7298; GFX6-NEXT: s_sub_i32 s16, 64, s12 7299; GFX6-NEXT: s_cmp_lt_u32 s12, 64 7300; GFX6-NEXT: s_cselect_b32 s19, 1, 0 7301; GFX6-NEXT: s_cmp_eq_u32 s12, 0 7302; GFX6-NEXT: s_cselect_b32 s20, 1, 0 7303; GFX6-NEXT: s_lshr_b64 s[12:13], s[10:11], s14 7304; GFX6-NEXT: s_lshr_b64 s[14:15], s[4:5], s14 7305; GFX6-NEXT: s_lshl_b64 s[16:17], s[10:11], s16 7306; GFX6-NEXT: s_or_b64 s[14:15], s[14:15], s[16:17] 7307; GFX6-NEXT: s_lshr_b64 s[10:11], s[10:11], s18 7308; GFX6-NEXT: s_cmp_lg_u32 s19, 0 7309; GFX6-NEXT: s_cselect_b64 s[10:11], s[14:15], s[10:11] 7310; GFX6-NEXT: s_cmp_lg_u32 s20, 0 7311; GFX6-NEXT: s_cselect_b64 s[4:5], s[4:5], s[10:11] 7312; GFX6-NEXT: s_cmp_lg_u32 s19, 0 7313; GFX6-NEXT: s_cselect_b64 s[10:11], s[12:13], 0 7314; GFX6-NEXT: s_or_b64 s[4:5], s[8:9], s[4:5] 7315; GFX6-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11] 7316; GFX6-NEXT: ; return to shader part epilog 7317; 7318; GFX8-LABEL: s_fshl_v2i128: 7319; GFX8: ; %bb.0: 7320; GFX8-NEXT: s_and_b32 s17, s16, 0x7f 7321; GFX8-NEXT: s_sub_i32 s19, s17, 64 7322; GFX8-NEXT: s_sub_i32 s21, 64, s17 7323; GFX8-NEXT: s_cmp_lt_u32 s17, 64 7324; GFX8-NEXT: s_cselect_b32 s28, 1, 0 7325; GFX8-NEXT: s_cmp_eq_u32 s17, 0 7326; GFX8-NEXT: s_cselect_b32 s17, 1, 0 7327; GFX8-NEXT: s_lshr_b64 s[24:25], s[0:1], s21 7328; GFX8-NEXT: s_lshl_b64 s[26:27], s[2:3], s16 7329; GFX8-NEXT: s_lshl_b64 s[22:23], s[0:1], s16 7330; GFX8-NEXT: s_or_b64 s[24:25], s[24:25], s[26:27] 7331; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], s19 7332; GFX8-NEXT: s_cmp_lg_u32 s28, 0 7333; GFX8-NEXT: s_cselect_b64 s[22:23], s[22:23], 0 7334; GFX8-NEXT: s_cselect_b64 s[0:1], s[24:25], s[0:1] 7335; GFX8-NEXT: s_cmp_lg_u32 s17, 0 7336; GFX8-NEXT: s_mov_b32 s18, 0 7337; GFX8-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] 7338; GFX8-NEXT: s_lshr_b64 s[0:1], s[8:9], 1 7339; GFX8-NEXT: s_lshl_b32 s19, s10, 31 7340; GFX8-NEXT: s_lshr_b64 s[8:9], s[10:11], 1 7341; GFX8-NEXT: s_andn2_b32 s10, 0x7f, s16 7342; GFX8-NEXT: s_or_b64 s[0:1], s[0:1], s[18:19] 7343; GFX8-NEXT: s_not_b32 s17, s16 7344; GFX8-NEXT: s_sub_i32 s19, s10, 64 7345; GFX8-NEXT: s_sub_i32 s21, 64, s10 7346; GFX8-NEXT: s_cmp_lt_u32 s10, 64 7347; GFX8-NEXT: s_cselect_b32 s26, 1, 0 7348; GFX8-NEXT: s_cmp_eq_u32 s10, 0 7349; GFX8-NEXT: s_cselect_b32 s27, 1, 0 7350; GFX8-NEXT: s_lshr_b64 s[10:11], s[8:9], s17 7351; GFX8-NEXT: s_lshr_b64 s[16:17], s[0:1], s17 7352; GFX8-NEXT: s_lshl_b64 s[24:25], s[8:9], s21 7353; GFX8-NEXT: s_or_b64 s[16:17], s[16:17], s[24:25] 7354; GFX8-NEXT: s_lshr_b64 s[8:9], s[8:9], s19 7355; GFX8-NEXT: s_cmp_lg_u32 s26, 0 7356; GFX8-NEXT: s_cselect_b64 s[8:9], s[16:17], s[8:9] 7357; GFX8-NEXT: s_cmp_lg_u32 s27, 0 7358; GFX8-NEXT: s_cselect_b64 s[0:1], s[0:1], s[8:9] 7359; GFX8-NEXT: s_cmp_lg_u32 s26, 0 7360; GFX8-NEXT: s_cselect_b64 s[8:9], s[10:11], 0 7361; GFX8-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9] 7362; GFX8-NEXT: s_and_b32 s8, s20, 0x7f 7363; GFX8-NEXT: s_or_b64 s[0:1], s[22:23], s[0:1] 7364; GFX8-NEXT: s_sub_i32 s19, s8, 64 7365; GFX8-NEXT: s_sub_i32 s10, 64, s8 7366; GFX8-NEXT: s_cmp_lt_u32 s8, 64 7367; GFX8-NEXT: s_cselect_b32 s21, 1, 0 7368; GFX8-NEXT: s_cmp_eq_u32 s8, 0 7369; GFX8-NEXT: s_cselect_b32 s22, 1, 0 7370; GFX8-NEXT: s_lshr_b64 s[10:11], s[4:5], s10 7371; GFX8-NEXT: s_lshl_b64 s[16:17], s[6:7], s20 7372; GFX8-NEXT: s_lshl_b64 s[8:9], s[4:5], s20 7373; GFX8-NEXT: s_or_b64 s[10:11], s[10:11], s[16:17] 7374; GFX8-NEXT: s_lshl_b64 s[4:5], s[4:5], s19 7375; GFX8-NEXT: s_cmp_lg_u32 s21, 0 7376; GFX8-NEXT: s_cselect_b64 s[8:9], s[8:9], 0 7377; GFX8-NEXT: s_cselect_b64 s[4:5], s[10:11], s[4:5] 7378; GFX8-NEXT: s_cmp_lg_u32 s22, 0 7379; GFX8-NEXT: s_cselect_b64 s[6:7], s[6:7], s[4:5] 7380; GFX8-NEXT: s_lshr_b64 s[4:5], s[12:13], 1 7381; GFX8-NEXT: s_lshl_b32 s19, s14, 31 7382; GFX8-NEXT: s_andn2_b32 s12, 0x7f, s20 7383; GFX8-NEXT: s_or_b64 s[4:5], s[4:5], s[18:19] 7384; GFX8-NEXT: s_lshr_b64 s[10:11], s[14:15], 1 7385; GFX8-NEXT: s_not_b32 s14, s20 7386; GFX8-NEXT: s_sub_i32 s18, s12, 64 7387; GFX8-NEXT: s_sub_i32 s16, 64, s12 7388; GFX8-NEXT: s_cmp_lt_u32 s12, 64 7389; GFX8-NEXT: s_cselect_b32 s19, 1, 0 7390; GFX8-NEXT: s_cmp_eq_u32 s12, 0 7391; GFX8-NEXT: s_cselect_b32 s20, 1, 0 7392; GFX8-NEXT: s_lshr_b64 s[12:13], s[10:11], s14 7393; GFX8-NEXT: s_lshr_b64 s[14:15], s[4:5], s14 7394; GFX8-NEXT: s_lshl_b64 s[16:17], s[10:11], s16 7395; GFX8-NEXT: s_or_b64 s[14:15], s[14:15], s[16:17] 7396; GFX8-NEXT: s_lshr_b64 s[10:11], s[10:11], s18 7397; GFX8-NEXT: s_cmp_lg_u32 s19, 0 7398; GFX8-NEXT: s_cselect_b64 s[10:11], s[14:15], s[10:11] 7399; GFX8-NEXT: s_cmp_lg_u32 s20, 0 7400; GFX8-NEXT: s_cselect_b64 s[4:5], s[4:5], s[10:11] 7401; GFX8-NEXT: s_cmp_lg_u32 s19, 0 7402; GFX8-NEXT: s_cselect_b64 s[10:11], s[12:13], 0 7403; GFX8-NEXT: s_or_b64 s[4:5], s[8:9], s[4:5] 7404; GFX8-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11] 7405; GFX8-NEXT: ; return to shader part epilog 7406; 7407; GFX9-LABEL: s_fshl_v2i128: 7408; GFX9: ; %bb.0: 7409; GFX9-NEXT: s_and_b32 s17, s16, 0x7f 7410; GFX9-NEXT: s_sub_i32 s19, s17, 64 7411; GFX9-NEXT: s_sub_i32 s21, 64, s17 7412; GFX9-NEXT: s_cmp_lt_u32 s17, 64 7413; GFX9-NEXT: s_cselect_b32 s28, 1, 0 7414; GFX9-NEXT: s_cmp_eq_u32 s17, 0 7415; GFX9-NEXT: s_cselect_b32 s17, 1, 0 7416; GFX9-NEXT: s_lshr_b64 s[24:25], s[0:1], s21 7417; GFX9-NEXT: s_lshl_b64 s[26:27], s[2:3], s16 7418; GFX9-NEXT: s_lshl_b64 s[22:23], s[0:1], s16 7419; GFX9-NEXT: s_or_b64 s[24:25], s[24:25], s[26:27] 7420; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], s19 7421; GFX9-NEXT: s_cmp_lg_u32 s28, 0 7422; GFX9-NEXT: s_cselect_b64 s[22:23], s[22:23], 0 7423; GFX9-NEXT: s_cselect_b64 s[0:1], s[24:25], s[0:1] 7424; GFX9-NEXT: s_cmp_lg_u32 s17, 0 7425; GFX9-NEXT: s_mov_b32 s18, 0 7426; GFX9-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] 7427; GFX9-NEXT: s_lshr_b64 s[0:1], s[8:9], 1 7428; GFX9-NEXT: s_lshl_b32 s19, s10, 31 7429; GFX9-NEXT: s_lshr_b64 s[8:9], s[10:11], 1 7430; GFX9-NEXT: s_andn2_b32 s10, 0x7f, s16 7431; GFX9-NEXT: s_or_b64 s[0:1], s[0:1], s[18:19] 7432; GFX9-NEXT: s_not_b32 s17, s16 7433; GFX9-NEXT: s_sub_i32 s19, s10, 64 7434; GFX9-NEXT: s_sub_i32 s21, 64, s10 7435; GFX9-NEXT: s_cmp_lt_u32 s10, 64 7436; GFX9-NEXT: s_cselect_b32 s26, 1, 0 7437; GFX9-NEXT: s_cmp_eq_u32 s10, 0 7438; GFX9-NEXT: s_cselect_b32 s27, 1, 0 7439; GFX9-NEXT: s_lshr_b64 s[10:11], s[8:9], s17 7440; GFX9-NEXT: s_lshr_b64 s[16:17], s[0:1], s17 7441; GFX9-NEXT: s_lshl_b64 s[24:25], s[8:9], s21 7442; GFX9-NEXT: s_or_b64 s[16:17], s[16:17], s[24:25] 7443; GFX9-NEXT: s_lshr_b64 s[8:9], s[8:9], s19 7444; GFX9-NEXT: s_cmp_lg_u32 s26, 0 7445; GFX9-NEXT: s_cselect_b64 s[8:9], s[16:17], s[8:9] 7446; GFX9-NEXT: s_cmp_lg_u32 s27, 0 7447; GFX9-NEXT: s_cselect_b64 s[0:1], s[0:1], s[8:9] 7448; GFX9-NEXT: s_cmp_lg_u32 s26, 0 7449; GFX9-NEXT: s_cselect_b64 s[8:9], s[10:11], 0 7450; GFX9-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9] 7451; GFX9-NEXT: s_and_b32 s8, s20, 0x7f 7452; GFX9-NEXT: s_or_b64 s[0:1], s[22:23], s[0:1] 7453; GFX9-NEXT: s_sub_i32 s19, s8, 64 7454; GFX9-NEXT: s_sub_i32 s10, 64, s8 7455; GFX9-NEXT: s_cmp_lt_u32 s8, 64 7456; GFX9-NEXT: s_cselect_b32 s21, 1, 0 7457; GFX9-NEXT: s_cmp_eq_u32 s8, 0 7458; GFX9-NEXT: s_cselect_b32 s22, 1, 0 7459; GFX9-NEXT: s_lshr_b64 s[10:11], s[4:5], s10 7460; GFX9-NEXT: s_lshl_b64 s[16:17], s[6:7], s20 7461; GFX9-NEXT: s_lshl_b64 s[8:9], s[4:5], s20 7462; GFX9-NEXT: s_or_b64 s[10:11], s[10:11], s[16:17] 7463; GFX9-NEXT: s_lshl_b64 s[4:5], s[4:5], s19 7464; GFX9-NEXT: s_cmp_lg_u32 s21, 0 7465; GFX9-NEXT: s_cselect_b64 s[8:9], s[8:9], 0 7466; GFX9-NEXT: s_cselect_b64 s[4:5], s[10:11], s[4:5] 7467; GFX9-NEXT: s_cmp_lg_u32 s22, 0 7468; GFX9-NEXT: s_cselect_b64 s[6:7], s[6:7], s[4:5] 7469; GFX9-NEXT: s_lshr_b64 s[4:5], s[12:13], 1 7470; GFX9-NEXT: s_lshl_b32 s19, s14, 31 7471; GFX9-NEXT: s_andn2_b32 s12, 0x7f, s20 7472; GFX9-NEXT: s_or_b64 s[4:5], s[4:5], s[18:19] 7473; GFX9-NEXT: s_lshr_b64 s[10:11], s[14:15], 1 7474; GFX9-NEXT: s_not_b32 s14, s20 7475; GFX9-NEXT: s_sub_i32 s18, s12, 64 7476; GFX9-NEXT: s_sub_i32 s16, 64, s12 7477; GFX9-NEXT: s_cmp_lt_u32 s12, 64 7478; GFX9-NEXT: s_cselect_b32 s19, 1, 0 7479; GFX9-NEXT: s_cmp_eq_u32 s12, 0 7480; GFX9-NEXT: s_cselect_b32 s20, 1, 0 7481; GFX9-NEXT: s_lshr_b64 s[12:13], s[10:11], s14 7482; GFX9-NEXT: s_lshr_b64 s[14:15], s[4:5], s14 7483; GFX9-NEXT: s_lshl_b64 s[16:17], s[10:11], s16 7484; GFX9-NEXT: s_or_b64 s[14:15], s[14:15], s[16:17] 7485; GFX9-NEXT: s_lshr_b64 s[10:11], s[10:11], s18 7486; GFX9-NEXT: s_cmp_lg_u32 s19, 0 7487; GFX9-NEXT: s_cselect_b64 s[10:11], s[14:15], s[10:11] 7488; GFX9-NEXT: s_cmp_lg_u32 s20, 0 7489; GFX9-NEXT: s_cselect_b64 s[4:5], s[4:5], s[10:11] 7490; GFX9-NEXT: s_cmp_lg_u32 s19, 0 7491; GFX9-NEXT: s_cselect_b64 s[10:11], s[12:13], 0 7492; GFX9-NEXT: s_or_b64 s[4:5], s[8:9], s[4:5] 7493; GFX9-NEXT: s_or_b64 s[6:7], s[6:7], s[10:11] 7494; GFX9-NEXT: ; return to shader part epilog 7495; 7496; GFX10-LABEL: s_fshl_v2i128: 7497; GFX10: ; %bb.0: 7498; GFX10-NEXT: s_and_b32 s17, s16, 0x7f 7499; GFX10-NEXT: s_mov_b32 s18, 0 7500; GFX10-NEXT: s_sub_i32 s19, s17, 64 7501; GFX10-NEXT: s_sub_i32 s21, 64, s17 7502; GFX10-NEXT: s_cmp_lt_u32 s17, 64 7503; GFX10-NEXT: s_cselect_b32 s28, 1, 0 7504; GFX10-NEXT: s_cmp_eq_u32 s17, 0 7505; GFX10-NEXT: s_cselect_b32 s17, 1, 0 7506; GFX10-NEXT: s_lshr_b64 s[22:23], s[0:1], s21 7507; GFX10-NEXT: s_lshl_b64 s[24:25], s[2:3], s16 7508; GFX10-NEXT: s_lshl_b64 s[26:27], s[0:1], s16 7509; GFX10-NEXT: s_or_b64 s[22:23], s[22:23], s[24:25] 7510; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s19 7511; GFX10-NEXT: s_cmp_lg_u32 s28, 0 7512; GFX10-NEXT: s_cselect_b64 s[24:25], s[26:27], 0 7513; GFX10-NEXT: s_cselect_b64 s[0:1], s[22:23], s[0:1] 7514; GFX10-NEXT: s_cmp_lg_u32 s17, 0 7515; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] 7516; GFX10-NEXT: s_lshr_b64 s[0:1], s[8:9], 1 7517; GFX10-NEXT: s_lshl_b32 s19, s10, 31 7518; GFX10-NEXT: s_lshr_b64 s[8:9], s[10:11], 1 7519; GFX10-NEXT: s_andn2_b32 s10, 0x7f, s16 7520; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[18:19] 7521; GFX10-NEXT: s_not_b32 s19, s16 7522; GFX10-NEXT: s_sub_i32 s21, s10, 64 7523; GFX10-NEXT: s_sub_i32 s16, 64, s10 7524; GFX10-NEXT: s_cmp_lt_u32 s10, 64 7525; GFX10-NEXT: s_cselect_b32 s26, 1, 0 7526; GFX10-NEXT: s_cmp_eq_u32 s10, 0 7527; GFX10-NEXT: s_cselect_b32 s27, 1, 0 7528; GFX10-NEXT: s_lshr_b64 s[10:11], s[0:1], s19 7529; GFX10-NEXT: s_lshl_b64 s[16:17], s[8:9], s16 7530; GFX10-NEXT: s_lshr_b64 s[22:23], s[8:9], s19 7531; GFX10-NEXT: s_or_b64 s[10:11], s[10:11], s[16:17] 7532; GFX10-NEXT: s_lshr_b64 s[8:9], s[8:9], s21 7533; GFX10-NEXT: s_cmp_lg_u32 s26, 0 7534; GFX10-NEXT: s_cselect_b64 s[8:9], s[10:11], s[8:9] 7535; GFX10-NEXT: s_cmp_lg_u32 s27, 0 7536; GFX10-NEXT: s_cselect_b64 s[0:1], s[0:1], s[8:9] 7537; GFX10-NEXT: s_cmp_lg_u32 s26, 0 7538; GFX10-NEXT: s_cselect_b64 s[8:9], s[22:23], 0 7539; GFX10-NEXT: s_and_b32 s10, s20, 0x7f 7540; GFX10-NEXT: s_or_b64 s[0:1], s[24:25], s[0:1] 7541; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9] 7542; GFX10-NEXT: s_sub_i32 s19, s10, 64 7543; GFX10-NEXT: s_sub_i32 s8, 64, s10 7544; GFX10-NEXT: s_cmp_lt_u32 s10, 64 7545; GFX10-NEXT: s_cselect_b32 s21, 1, 0 7546; GFX10-NEXT: s_cmp_eq_u32 s10, 0 7547; GFX10-NEXT: s_cselect_b32 s22, 1, 0 7548; GFX10-NEXT: s_lshr_b64 s[8:9], s[4:5], s8 7549; GFX10-NEXT: s_lshl_b64 s[10:11], s[6:7], s20 7550; GFX10-NEXT: s_lshl_b64 s[16:17], s[4:5], s20 7551; GFX10-NEXT: s_or_b64 s[8:9], s[8:9], s[10:11] 7552; GFX10-NEXT: s_lshl_b64 s[4:5], s[4:5], s19 7553; GFX10-NEXT: s_cmp_lg_u32 s21, 0 7554; GFX10-NEXT: s_cselect_b64 s[10:11], s[16:17], 0 7555; GFX10-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] 7556; GFX10-NEXT: s_cmp_lg_u32 s22, 0 7557; GFX10-NEXT: s_cselect_b64 s[6:7], s[6:7], s[4:5] 7558; GFX10-NEXT: s_lshr_b64 s[4:5], s[12:13], 1 7559; GFX10-NEXT: s_lshl_b32 s19, s14, 31 7560; GFX10-NEXT: s_andn2_b32 s12, 0x7f, s20 7561; GFX10-NEXT: s_or_b64 s[4:5], s[4:5], s[18:19] 7562; GFX10-NEXT: s_lshr_b64 s[8:9], s[14:15], 1 7563; GFX10-NEXT: s_not_b32 s16, s20 7564; GFX10-NEXT: s_sub_i32 s18, s12, 64 7565; GFX10-NEXT: s_sub_i32 s14, 64, s12 7566; GFX10-NEXT: s_cmp_lt_u32 s12, 64 7567; GFX10-NEXT: s_cselect_b32 s19, 1, 0 7568; GFX10-NEXT: s_cmp_eq_u32 s12, 0 7569; GFX10-NEXT: s_cselect_b32 s20, 1, 0 7570; GFX10-NEXT: s_lshr_b64 s[12:13], s[4:5], s16 7571; GFX10-NEXT: s_lshl_b64 s[14:15], s[8:9], s14 7572; GFX10-NEXT: s_lshr_b64 s[16:17], s[8:9], s16 7573; GFX10-NEXT: s_or_b64 s[12:13], s[12:13], s[14:15] 7574; GFX10-NEXT: s_lshr_b64 s[8:9], s[8:9], s18 7575; GFX10-NEXT: s_cmp_lg_u32 s19, 0 7576; GFX10-NEXT: s_cselect_b64 s[8:9], s[12:13], s[8:9] 7577; GFX10-NEXT: s_cmp_lg_u32 s20, 0 7578; GFX10-NEXT: s_cselect_b64 s[4:5], s[4:5], s[8:9] 7579; GFX10-NEXT: s_cmp_lg_u32 s19, 0 7580; GFX10-NEXT: s_cselect_b64 s[8:9], s[16:17], 0 7581; GFX10-NEXT: s_or_b64 s[4:5], s[10:11], s[4:5] 7582; GFX10-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] 7583; GFX10-NEXT: ; return to shader part epilog 7584; 7585; GFX11-LABEL: s_fshl_v2i128: 7586; GFX11: ; %bb.0: 7587; GFX11-NEXT: s_and_b32 s17, s16, 0x7f 7588; GFX11-NEXT: s_mov_b32 s18, 0 7589; GFX11-NEXT: s_sub_i32 s19, s17, 64 7590; GFX11-NEXT: s_sub_i32 s21, 64, s17 7591; GFX11-NEXT: s_cmp_lt_u32 s17, 64 7592; GFX11-NEXT: s_cselect_b32 s28, 1, 0 7593; GFX11-NEXT: s_cmp_eq_u32 s17, 0 7594; GFX11-NEXT: s_cselect_b32 s17, 1, 0 7595; GFX11-NEXT: s_lshr_b64 s[22:23], s[0:1], s21 7596; GFX11-NEXT: s_lshl_b64 s[24:25], s[2:3], s16 7597; GFX11-NEXT: s_lshl_b64 s[26:27], s[0:1], s16 7598; GFX11-NEXT: s_or_b64 s[22:23], s[22:23], s[24:25] 7599; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], s19 7600; GFX11-NEXT: s_cmp_lg_u32 s28, 0 7601; GFX11-NEXT: s_cselect_b64 s[24:25], s[26:27], 0 7602; GFX11-NEXT: s_cselect_b64 s[0:1], s[22:23], s[0:1] 7603; GFX11-NEXT: s_cmp_lg_u32 s17, 0 7604; GFX11-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] 7605; GFX11-NEXT: s_lshr_b64 s[0:1], s[8:9], 1 7606; GFX11-NEXT: s_lshl_b32 s19, s10, 31 7607; GFX11-NEXT: s_lshr_b64 s[8:9], s[10:11], 1 7608; GFX11-NEXT: s_and_not1_b32 s10, 0x7f, s16 7609; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[18:19] 7610; GFX11-NEXT: s_not_b32 s19, s16 7611; GFX11-NEXT: s_sub_i32 s21, s10, 64 7612; GFX11-NEXT: s_sub_i32 s16, 64, s10 7613; GFX11-NEXT: s_cmp_lt_u32 s10, 64 7614; GFX11-NEXT: s_cselect_b32 s26, 1, 0 7615; GFX11-NEXT: s_cmp_eq_u32 s10, 0 7616; GFX11-NEXT: s_cselect_b32 s27, 1, 0 7617; GFX11-NEXT: s_lshr_b64 s[10:11], s[0:1], s19 7618; GFX11-NEXT: s_lshl_b64 s[16:17], s[8:9], s16 7619; GFX11-NEXT: s_lshr_b64 s[22:23], s[8:9], s19 7620; GFX11-NEXT: s_or_b64 s[10:11], s[10:11], s[16:17] 7621; GFX11-NEXT: s_lshr_b64 s[8:9], s[8:9], s21 7622; GFX11-NEXT: s_cmp_lg_u32 s26, 0 7623; GFX11-NEXT: s_cselect_b64 s[8:9], s[10:11], s[8:9] 7624; GFX11-NEXT: s_cmp_lg_u32 s27, 0 7625; GFX11-NEXT: s_cselect_b64 s[0:1], s[0:1], s[8:9] 7626; GFX11-NEXT: s_cmp_lg_u32 s26, 0 7627; GFX11-NEXT: s_cselect_b64 s[8:9], s[22:23], 0 7628; GFX11-NEXT: s_and_b32 s10, s20, 0x7f 7629; GFX11-NEXT: s_or_b64 s[0:1], s[24:25], s[0:1] 7630; GFX11-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9] 7631; GFX11-NEXT: s_sub_i32 s19, s10, 64 7632; GFX11-NEXT: s_sub_i32 s8, 64, s10 7633; GFX11-NEXT: s_cmp_lt_u32 s10, 64 7634; GFX11-NEXT: s_cselect_b32 s21, 1, 0 7635; GFX11-NEXT: s_cmp_eq_u32 s10, 0 7636; GFX11-NEXT: s_cselect_b32 s22, 1, 0 7637; GFX11-NEXT: s_lshr_b64 s[8:9], s[4:5], s8 7638; GFX11-NEXT: s_lshl_b64 s[10:11], s[6:7], s20 7639; GFX11-NEXT: s_lshl_b64 s[16:17], s[4:5], s20 7640; GFX11-NEXT: s_or_b64 s[8:9], s[8:9], s[10:11] 7641; GFX11-NEXT: s_lshl_b64 s[4:5], s[4:5], s19 7642; GFX11-NEXT: s_cmp_lg_u32 s21, 0 7643; GFX11-NEXT: s_cselect_b64 s[10:11], s[16:17], 0 7644; GFX11-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] 7645; GFX11-NEXT: s_cmp_lg_u32 s22, 0 7646; GFX11-NEXT: s_cselect_b64 s[6:7], s[6:7], s[4:5] 7647; GFX11-NEXT: s_lshr_b64 s[4:5], s[12:13], 1 7648; GFX11-NEXT: s_lshl_b32 s19, s14, 31 7649; GFX11-NEXT: s_and_not1_b32 s12, 0x7f, s20 7650; GFX11-NEXT: s_or_b64 s[4:5], s[4:5], s[18:19] 7651; GFX11-NEXT: s_lshr_b64 s[8:9], s[14:15], 1 7652; GFX11-NEXT: s_not_b32 s16, s20 7653; GFX11-NEXT: s_sub_i32 s18, s12, 64 7654; GFX11-NEXT: s_sub_i32 s14, 64, s12 7655; GFX11-NEXT: s_cmp_lt_u32 s12, 64 7656; GFX11-NEXT: s_cselect_b32 s19, 1, 0 7657; GFX11-NEXT: s_cmp_eq_u32 s12, 0 7658; GFX11-NEXT: s_cselect_b32 s20, 1, 0 7659; GFX11-NEXT: s_lshr_b64 s[12:13], s[4:5], s16 7660; GFX11-NEXT: s_lshl_b64 s[14:15], s[8:9], s14 7661; GFX11-NEXT: s_lshr_b64 s[16:17], s[8:9], s16 7662; GFX11-NEXT: s_or_b64 s[12:13], s[12:13], s[14:15] 7663; GFX11-NEXT: s_lshr_b64 s[8:9], s[8:9], s18 7664; GFX11-NEXT: s_cmp_lg_u32 s19, 0 7665; GFX11-NEXT: s_cselect_b64 s[8:9], s[12:13], s[8:9] 7666; GFX11-NEXT: s_cmp_lg_u32 s20, 0 7667; GFX11-NEXT: s_cselect_b64 s[4:5], s[4:5], s[8:9] 7668; GFX11-NEXT: s_cmp_lg_u32 s19, 0 7669; GFX11-NEXT: s_cselect_b64 s[8:9], s[16:17], 0 7670; GFX11-NEXT: s_or_b64 s[4:5], s[10:11], s[4:5] 7671; GFX11-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] 7672; GFX11-NEXT: ; return to shader part epilog 7673 %result = call <2 x i128> @llvm.fshl.v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %amt) 7674 ret <2 x i128> %result 7675} 7676 7677define <2 x i128> @v_fshl_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %amt) { 7678; GFX6-LABEL: v_fshl_v2i128: 7679; GFX6: ; %bb.0: 7680; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7681; GFX6-NEXT: v_and_b32_e32 v23, 0x7f, v16 7682; GFX6-NEXT: v_sub_i32_e32 v17, vcc, 64, v23 7683; GFX6-NEXT: v_lshr_b64 v[17:18], v[0:1], v17 7684; GFX6-NEXT: v_lshl_b64 v[21:22], v[2:3], v23 7685; GFX6-NEXT: v_lshr_b64 v[8:9], v[8:9], 1 7686; GFX6-NEXT: v_not_b32_e32 v16, v16 7687; GFX6-NEXT: v_or_b32_e32 v21, v17, v21 7688; GFX6-NEXT: v_lshlrev_b32_e32 v17, 31, v10 7689; GFX6-NEXT: v_lshr_b64 v[10:11], v[10:11], 1 7690; GFX6-NEXT: v_and_b32_e32 v24, 0x7f, v16 7691; GFX6-NEXT: v_or_b32_e32 v9, v9, v17 7692; GFX6-NEXT: v_sub_i32_e32 v16, vcc, 64, v24 7693; GFX6-NEXT: v_or_b32_e32 v22, v18, v22 7694; GFX6-NEXT: v_lshl_b64 v[16:17], v[10:11], v16 7695; GFX6-NEXT: v_lshr_b64 v[18:19], v[8:9], v24 7696; GFX6-NEXT: v_not_b32_e32 v25, 63 7697; GFX6-NEXT: v_or_b32_e32 v18, v18, v16 7698; GFX6-NEXT: v_add_i32_e32 v16, vcc, v23, v25 7699; GFX6-NEXT: v_or_b32_e32 v19, v19, v17 7700; GFX6-NEXT: v_lshl_b64 v[16:17], v[0:1], v16 7701; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], v23 7702; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v23 7703; GFX6-NEXT: v_cndmask_b32_e32 v26, 0, v0, vcc 7704; GFX6-NEXT: v_cndmask_b32_e32 v0, v16, v21, vcc 7705; GFX6-NEXT: v_cndmask_b32_e32 v16, v17, v22, vcc 7706; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v23 7707; GFX6-NEXT: v_cndmask_b32_e64 v17, v0, v2, s[4:5] 7708; GFX6-NEXT: v_cndmask_b32_e64 v16, v16, v3, s[4:5] 7709; GFX6-NEXT: v_add_i32_e64 v0, s[4:5], v24, v25 7710; GFX6-NEXT: v_lshr_b64 v[2:3], v[10:11], v0 7711; GFX6-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v24 7712; GFX6-NEXT: v_cndmask_b32_e64 v2, v2, v18, s[4:5] 7713; GFX6-NEXT: v_cndmask_b32_e32 v18, 0, v1, vcc 7714; GFX6-NEXT: v_lshr_b64 v[0:1], v[10:11], v24 7715; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v24 7716; GFX6-NEXT: v_cndmask_b32_e64 v3, v3, v19, s[4:5] 7717; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc 7718; GFX6-NEXT: v_cndmask_b32_e64 v8, 0, v0, s[4:5] 7719; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v9, vcc 7720; GFX6-NEXT: v_or_b32_e32 v0, v26, v2 7721; GFX6-NEXT: v_or_b32_e32 v2, v17, v8 7722; GFX6-NEXT: v_and_b32_e32 v17, 0x7f, v20 7723; GFX6-NEXT: v_cndmask_b32_e64 v19, 0, v1, s[4:5] 7724; GFX6-NEXT: v_or_b32_e32 v1, v18, v3 7725; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 64, v17 7726; GFX6-NEXT: v_lshr_b64 v[8:9], v[4:5], v3 7727; GFX6-NEXT: v_lshl_b64 v[10:11], v[6:7], v17 7728; GFX6-NEXT: v_or_b32_e32 v3, v16, v19 7729; GFX6-NEXT: v_add_i32_e32 v16, vcc, v17, v25 7730; GFX6-NEXT: v_or_b32_e32 v10, v8, v10 7731; GFX6-NEXT: v_or_b32_e32 v11, v9, v11 7732; GFX6-NEXT: v_lshl_b64 v[8:9], v[4:5], v17 7733; GFX6-NEXT: v_lshl_b64 v[4:5], v[4:5], v16 7734; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v17 7735; GFX6-NEXT: v_cndmask_b32_e32 v16, 0, v8, vcc 7736; GFX6-NEXT: v_cndmask_b32_e32 v18, 0, v9, vcc 7737; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc 7738; GFX6-NEXT: v_cndmask_b32_e32 v5, v5, v11, vcc 7739; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v17 7740; GFX6-NEXT: v_cndmask_b32_e32 v17, v4, v6, vcc 7741; GFX6-NEXT: v_cndmask_b32_e32 v19, v5, v7, vcc 7742; GFX6-NEXT: v_lshr_b64 v[4:5], v[12:13], 1 7743; GFX6-NEXT: v_lshlrev_b32_e32 v6, 31, v14 7744; GFX6-NEXT: v_not_b32_e32 v8, v20 7745; GFX6-NEXT: v_or_b32_e32 v5, v5, v6 7746; GFX6-NEXT: v_lshr_b64 v[6:7], v[14:15], 1 7747; GFX6-NEXT: v_and_b32_e32 v12, 0x7f, v8 7748; GFX6-NEXT: v_sub_i32_e32 v10, vcc, 64, v12 7749; GFX6-NEXT: v_lshr_b64 v[8:9], v[4:5], v12 7750; GFX6-NEXT: v_lshl_b64 v[10:11], v[6:7], v10 7751; GFX6-NEXT: v_add_i32_e32 v13, vcc, v12, v25 7752; GFX6-NEXT: v_or_b32_e32 v10, v8, v10 7753; GFX6-NEXT: v_or_b32_e32 v11, v9, v11 7754; GFX6-NEXT: v_lshr_b64 v[8:9], v[6:7], v12 7755; GFX6-NEXT: v_lshr_b64 v[6:7], v[6:7], v13 7756; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v12 7757; GFX6-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc 7758; GFX6-NEXT: v_cndmask_b32_e32 v7, v7, v11, vcc 7759; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v12 7760; GFX6-NEXT: v_cndmask_b32_e64 v4, v6, v4, s[4:5] 7761; GFX6-NEXT: v_cndmask_b32_e64 v5, v7, v5, s[4:5] 7762; GFX6-NEXT: v_cndmask_b32_e32 v6, 0, v8, vcc 7763; GFX6-NEXT: v_cndmask_b32_e32 v7, 0, v9, vcc 7764; GFX6-NEXT: v_or_b32_e32 v4, v16, v4 7765; GFX6-NEXT: v_or_b32_e32 v5, v18, v5 7766; GFX6-NEXT: v_or_b32_e32 v6, v17, v6 7767; GFX6-NEXT: v_or_b32_e32 v7, v19, v7 7768; GFX6-NEXT: s_setpc_b64 s[30:31] 7769; 7770; GFX8-LABEL: v_fshl_v2i128: 7771; GFX8: ; %bb.0: 7772; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7773; GFX8-NEXT: v_and_b32_e32 v23, 0x7f, v16 7774; GFX8-NEXT: v_sub_u32_e32 v17, vcc, 64, v23 7775; GFX8-NEXT: v_lshrrev_b64 v[17:18], v17, v[0:1] 7776; GFX8-NEXT: v_lshlrev_b64 v[21:22], v23, v[2:3] 7777; GFX8-NEXT: v_lshrrev_b64 v[8:9], 1, v[8:9] 7778; GFX8-NEXT: v_not_b32_e32 v16, v16 7779; GFX8-NEXT: v_or_b32_e32 v21, v17, v21 7780; GFX8-NEXT: v_lshlrev_b32_e32 v17, 31, v10 7781; GFX8-NEXT: v_lshrrev_b64 v[10:11], 1, v[10:11] 7782; GFX8-NEXT: v_and_b32_e32 v24, 0x7f, v16 7783; GFX8-NEXT: v_or_b32_e32 v9, v9, v17 7784; GFX8-NEXT: v_sub_u32_e32 v16, vcc, 64, v24 7785; GFX8-NEXT: v_or_b32_e32 v22, v18, v22 7786; GFX8-NEXT: v_lshlrev_b64 v[16:17], v16, v[10:11] 7787; GFX8-NEXT: v_lshrrev_b64 v[18:19], v24, v[8:9] 7788; GFX8-NEXT: v_not_b32_e32 v25, 63 7789; GFX8-NEXT: v_or_b32_e32 v18, v18, v16 7790; GFX8-NEXT: v_add_u32_e32 v16, vcc, v23, v25 7791; GFX8-NEXT: v_or_b32_e32 v19, v19, v17 7792; GFX8-NEXT: v_lshlrev_b64 v[16:17], v16, v[0:1] 7793; GFX8-NEXT: v_lshlrev_b64 v[0:1], v23, v[0:1] 7794; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v23 7795; GFX8-NEXT: v_cndmask_b32_e32 v26, 0, v0, vcc 7796; GFX8-NEXT: v_cndmask_b32_e32 v0, v16, v21, vcc 7797; GFX8-NEXT: v_cndmask_b32_e32 v16, v17, v22, vcc 7798; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v23 7799; GFX8-NEXT: v_cndmask_b32_e64 v17, v0, v2, s[4:5] 7800; GFX8-NEXT: v_cndmask_b32_e64 v16, v16, v3, s[4:5] 7801; GFX8-NEXT: v_add_u32_e64 v0, s[4:5], v24, v25 7802; GFX8-NEXT: v_lshrrev_b64 v[2:3], v0, v[10:11] 7803; GFX8-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v24 7804; GFX8-NEXT: v_cndmask_b32_e64 v2, v2, v18, s[4:5] 7805; GFX8-NEXT: v_cndmask_b32_e32 v18, 0, v1, vcc 7806; GFX8-NEXT: v_lshrrev_b64 v[0:1], v24, v[10:11] 7807; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v24 7808; GFX8-NEXT: v_cndmask_b32_e64 v3, v3, v19, s[4:5] 7809; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc 7810; GFX8-NEXT: v_cndmask_b32_e64 v8, 0, v0, s[4:5] 7811; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v9, vcc 7812; GFX8-NEXT: v_or_b32_e32 v0, v26, v2 7813; GFX8-NEXT: v_or_b32_e32 v2, v17, v8 7814; GFX8-NEXT: v_and_b32_e32 v17, 0x7f, v20 7815; GFX8-NEXT: v_cndmask_b32_e64 v19, 0, v1, s[4:5] 7816; GFX8-NEXT: v_or_b32_e32 v1, v18, v3 7817; GFX8-NEXT: v_sub_u32_e32 v3, vcc, 64, v17 7818; GFX8-NEXT: v_lshrrev_b64 v[8:9], v3, v[4:5] 7819; GFX8-NEXT: v_lshlrev_b64 v[10:11], v17, v[6:7] 7820; GFX8-NEXT: v_or_b32_e32 v3, v16, v19 7821; GFX8-NEXT: v_add_u32_e32 v16, vcc, v17, v25 7822; GFX8-NEXT: v_or_b32_e32 v10, v8, v10 7823; GFX8-NEXT: v_or_b32_e32 v11, v9, v11 7824; GFX8-NEXT: v_lshlrev_b64 v[8:9], v17, v[4:5] 7825; GFX8-NEXT: v_lshlrev_b64 v[4:5], v16, v[4:5] 7826; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v17 7827; GFX8-NEXT: v_cndmask_b32_e32 v16, 0, v8, vcc 7828; GFX8-NEXT: v_cndmask_b32_e32 v18, 0, v9, vcc 7829; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc 7830; GFX8-NEXT: v_cndmask_b32_e32 v5, v5, v11, vcc 7831; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v17 7832; GFX8-NEXT: v_cndmask_b32_e32 v17, v4, v6, vcc 7833; GFX8-NEXT: v_cndmask_b32_e32 v19, v5, v7, vcc 7834; GFX8-NEXT: v_lshrrev_b64 v[4:5], 1, v[12:13] 7835; GFX8-NEXT: v_lshlrev_b32_e32 v6, 31, v14 7836; GFX8-NEXT: v_not_b32_e32 v8, v20 7837; GFX8-NEXT: v_or_b32_e32 v5, v5, v6 7838; GFX8-NEXT: v_lshrrev_b64 v[6:7], 1, v[14:15] 7839; GFX8-NEXT: v_and_b32_e32 v12, 0x7f, v8 7840; GFX8-NEXT: v_sub_u32_e32 v10, vcc, 64, v12 7841; GFX8-NEXT: v_lshrrev_b64 v[8:9], v12, v[4:5] 7842; GFX8-NEXT: v_lshlrev_b64 v[10:11], v10, v[6:7] 7843; GFX8-NEXT: v_add_u32_e32 v13, vcc, v12, v25 7844; GFX8-NEXT: v_or_b32_e32 v10, v8, v10 7845; GFX8-NEXT: v_or_b32_e32 v11, v9, v11 7846; GFX8-NEXT: v_lshrrev_b64 v[8:9], v12, v[6:7] 7847; GFX8-NEXT: v_lshrrev_b64 v[6:7], v13, v[6:7] 7848; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v12 7849; GFX8-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc 7850; GFX8-NEXT: v_cndmask_b32_e32 v7, v7, v11, vcc 7851; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v12 7852; GFX8-NEXT: v_cndmask_b32_e64 v4, v6, v4, s[4:5] 7853; GFX8-NEXT: v_cndmask_b32_e64 v5, v7, v5, s[4:5] 7854; GFX8-NEXT: v_cndmask_b32_e32 v6, 0, v8, vcc 7855; GFX8-NEXT: v_cndmask_b32_e32 v7, 0, v9, vcc 7856; GFX8-NEXT: v_or_b32_e32 v4, v16, v4 7857; GFX8-NEXT: v_or_b32_e32 v5, v18, v5 7858; GFX8-NEXT: v_or_b32_e32 v6, v17, v6 7859; GFX8-NEXT: v_or_b32_e32 v7, v19, v7 7860; GFX8-NEXT: s_setpc_b64 s[30:31] 7861; 7862; GFX9-LABEL: v_fshl_v2i128: 7863; GFX9: ; %bb.0: 7864; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7865; GFX9-NEXT: v_and_b32_e32 v23, 0x7f, v16 7866; GFX9-NEXT: v_lshrrev_b64 v[8:9], 1, v[8:9] 7867; GFX9-NEXT: v_sub_u32_e32 v17, 64, v23 7868; GFX9-NEXT: v_not_b32_e32 v16, v16 7869; GFX9-NEXT: v_lshrrev_b64 v[17:18], v17, v[0:1] 7870; GFX9-NEXT: v_lshlrev_b64 v[21:22], v23, v[2:3] 7871; GFX9-NEXT: v_lshl_or_b32 v9, v10, 31, v9 7872; GFX9-NEXT: v_lshrrev_b64 v[10:11], 1, v[10:11] 7873; GFX9-NEXT: v_and_b32_e32 v24, 0x7f, v16 7874; GFX9-NEXT: v_sub_u32_e32 v16, 64, v24 7875; GFX9-NEXT: v_or_b32_e32 v21, v17, v21 7876; GFX9-NEXT: v_or_b32_e32 v22, v18, v22 7877; GFX9-NEXT: v_lshlrev_b64 v[16:17], v16, v[10:11] 7878; GFX9-NEXT: v_lshrrev_b64 v[18:19], v24, v[8:9] 7879; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v23 7880; GFX9-NEXT: v_or_b32_e32 v18, v18, v16 7881; GFX9-NEXT: v_add_u32_e32 v16, 0xffffffc0, v23 7882; GFX9-NEXT: v_or_b32_e32 v19, v19, v17 7883; GFX9-NEXT: v_lshlrev_b64 v[16:17], v16, v[0:1] 7884; GFX9-NEXT: v_lshlrev_b64 v[0:1], v23, v[0:1] 7885; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v23 7886; GFX9-NEXT: v_cndmask_b32_e32 v25, 0, v0, vcc 7887; GFX9-NEXT: v_cndmask_b32_e32 v0, v16, v21, vcc 7888; GFX9-NEXT: v_cndmask_b32_e32 v16, v17, v22, vcc 7889; GFX9-NEXT: v_cndmask_b32_e64 v17, v0, v2, s[4:5] 7890; GFX9-NEXT: v_add_u32_e32 v0, 0xffffffc0, v24 7891; GFX9-NEXT: v_cndmask_b32_e64 v16, v16, v3, s[4:5] 7892; GFX9-NEXT: v_lshrrev_b64 v[2:3], v0, v[10:11] 7893; GFX9-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v24 7894; GFX9-NEXT: v_cndmask_b32_e64 v2, v2, v18, s[4:5] 7895; GFX9-NEXT: v_cndmask_b32_e32 v18, 0, v1, vcc 7896; GFX9-NEXT: v_lshrrev_b64 v[0:1], v24, v[10:11] 7897; GFX9-NEXT: v_cndmask_b32_e64 v3, v3, v19, s[4:5] 7898; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v24 7899; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v9, vcc 7900; GFX9-NEXT: v_cndmask_b32_e64 v9, 0, v1, s[4:5] 7901; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v8, vcc 7902; GFX9-NEXT: v_cndmask_b32_e64 v8, 0, v0, s[4:5] 7903; GFX9-NEXT: v_or_b32_e32 v1, v18, v3 7904; GFX9-NEXT: v_or_b32_e32 v3, v16, v9 7905; GFX9-NEXT: v_and_b32_e32 v16, 0x7f, v20 7906; GFX9-NEXT: v_or_b32_e32 v0, v25, v2 7907; GFX9-NEXT: v_or_b32_e32 v2, v17, v8 7908; GFX9-NEXT: v_sub_u32_e32 v8, 64, v16 7909; GFX9-NEXT: v_lshrrev_b64 v[8:9], v8, v[4:5] 7910; GFX9-NEXT: v_lshlrev_b64 v[10:11], v16, v[6:7] 7911; GFX9-NEXT: v_add_u32_e32 v17, 0xffffffc0, v16 7912; GFX9-NEXT: v_or_b32_e32 v10, v8, v10 7913; GFX9-NEXT: v_or_b32_e32 v11, v9, v11 7914; GFX9-NEXT: v_lshlrev_b64 v[8:9], v16, v[4:5] 7915; GFX9-NEXT: v_lshlrev_b64 v[4:5], v17, v[4:5] 7916; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v16 7917; GFX9-NEXT: v_cndmask_b32_e32 v17, 0, v8, vcc 7918; GFX9-NEXT: v_cndmask_b32_e32 v18, 0, v9, vcc 7919; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v10, vcc 7920; GFX9-NEXT: v_cndmask_b32_e32 v8, v5, v11, vcc 7921; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v16 7922; GFX9-NEXT: v_cndmask_b32_e32 v16, v4, v6, vcc 7923; GFX9-NEXT: v_lshrrev_b64 v[4:5], 1, v[12:13] 7924; GFX9-NEXT: v_cndmask_b32_e32 v12, v8, v7, vcc 7925; GFX9-NEXT: v_not_b32_e32 v8, v20 7926; GFX9-NEXT: v_lshrrev_b64 v[6:7], 1, v[14:15] 7927; GFX9-NEXT: v_and_b32_e32 v13, 0x7f, v8 7928; GFX9-NEXT: v_lshl_or_b32 v5, v14, 31, v5 7929; GFX9-NEXT: v_sub_u32_e32 v10, 64, v13 7930; GFX9-NEXT: v_lshrrev_b64 v[8:9], v13, v[4:5] 7931; GFX9-NEXT: v_lshlrev_b64 v[10:11], v10, v[6:7] 7932; GFX9-NEXT: v_add_u32_e32 v14, 0xffffffc0, v13 7933; GFX9-NEXT: v_or_b32_e32 v10, v8, v10 7934; GFX9-NEXT: v_or_b32_e32 v11, v9, v11 7935; GFX9-NEXT: v_lshrrev_b64 v[8:9], v13, v[6:7] 7936; GFX9-NEXT: v_lshrrev_b64 v[6:7], v14, v[6:7] 7937; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v13 7938; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v10, vcc 7939; GFX9-NEXT: v_cndmask_b32_e32 v7, v7, v11, vcc 7940; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v13 7941; GFX9-NEXT: v_cndmask_b32_e64 v4, v6, v4, s[4:5] 7942; GFX9-NEXT: v_cndmask_b32_e64 v5, v7, v5, s[4:5] 7943; GFX9-NEXT: v_cndmask_b32_e32 v6, 0, v8, vcc 7944; GFX9-NEXT: v_cndmask_b32_e32 v7, 0, v9, vcc 7945; GFX9-NEXT: v_or_b32_e32 v4, v17, v4 7946; GFX9-NEXT: v_or_b32_e32 v5, v18, v5 7947; GFX9-NEXT: v_or_b32_e32 v6, v16, v6 7948; GFX9-NEXT: v_or_b32_e32 v7, v12, v7 7949; GFX9-NEXT: s_setpc_b64 s[30:31] 7950; 7951; GFX10-LABEL: v_fshl_v2i128: 7952; GFX10: ; %bb.0: 7953; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7954; GFX10-NEXT: v_and_b32_e32 v27, 0x7f, v16 7955; GFX10-NEXT: v_not_b32_e32 v21, v16 7956; GFX10-NEXT: v_lshrrev_b64 v[8:9], 1, v[8:9] 7957; GFX10-NEXT: v_sub_nc_u32_e32 v17, 64, v27 7958; GFX10-NEXT: v_and_b32_e32 v28, 0x7f, v21 7959; GFX10-NEXT: v_lshlrev_b64 v[18:19], v27, v[2:3] 7960; GFX10-NEXT: v_lshl_or_b32 v9, v10, 31, v9 7961; GFX10-NEXT: v_lshrrev_b64 v[10:11], 1, v[10:11] 7962; GFX10-NEXT: v_lshrrev_b64 v[16:17], v17, v[0:1] 7963; GFX10-NEXT: v_add_nc_u32_e32 v29, 0xffffffc0, v27 7964; GFX10-NEXT: v_sub_nc_u32_e32 v25, 64, v28 7965; GFX10-NEXT: v_lshlrev_b64 v[21:22], v27, v[0:1] 7966; GFX10-NEXT: v_lshrrev_b64 v[23:24], v28, v[8:9] 7967; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v27 7968; GFX10-NEXT: v_or_b32_e32 v18, v16, v18 7969; GFX10-NEXT: v_add_nc_u32_e32 v16, 0xffffffc0, v28 7970; GFX10-NEXT: v_lshlrev_b64 v[25:26], v25, v[10:11] 7971; GFX10-NEXT: v_lshlrev_b64 v[0:1], v29, v[0:1] 7972; GFX10-NEXT: v_or_b32_e32 v19, v17, v19 7973; GFX10-NEXT: v_cndmask_b32_e32 v21, 0, v21, vcc_lo 7974; GFX10-NEXT: v_lshrrev_b64 v[16:17], v16, v[10:11] 7975; GFX10-NEXT: v_cndmask_b32_e32 v22, 0, v22, vcc_lo 7976; GFX10-NEXT: v_or_b32_e32 v23, v23, v25 7977; GFX10-NEXT: v_cndmask_b32_e32 v18, v0, v18, vcc_lo 7978; GFX10-NEXT: v_cndmask_b32_e32 v19, v1, v19, vcc_lo 7979; GFX10-NEXT: v_or_b32_e32 v24, v24, v26 7980; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v28 7981; GFX10-NEXT: v_cmp_eq_u32_e64 s4, 0, v27 7982; GFX10-NEXT: v_cmp_eq_u32_e64 s5, 0, v28 7983; GFX10-NEXT: v_lshrrev_b64 v[0:1], v28, v[10:11] 7984; GFX10-NEXT: v_cndmask_b32_e32 v16, v16, v23, vcc_lo 7985; GFX10-NEXT: v_cndmask_b32_e32 v10, v17, v24, vcc_lo 7986; GFX10-NEXT: v_cndmask_b32_e64 v23, v19, v3, s4 7987; GFX10-NEXT: v_and_b32_e32 v24, 0x7f, v20 7988; GFX10-NEXT: v_cndmask_b32_e32 v25, 0, v1, vcc_lo 7989; GFX10-NEXT: v_cndmask_b32_e64 v3, v16, v8, s5 7990; GFX10-NEXT: v_cndmask_b32_e64 v8, v10, v9, s5 7991; GFX10-NEXT: v_not_b32_e32 v16, v20 7992; GFX10-NEXT: v_cndmask_b32_e64 v2, v18, v2, s4 7993; GFX10-NEXT: v_cndmask_b32_e32 v10, 0, v0, vcc_lo 7994; GFX10-NEXT: v_or_b32_e32 v0, v21, v3 7995; GFX10-NEXT: v_or_b32_e32 v1, v22, v8 7996; GFX10-NEXT: v_lshrrev_b64 v[8:9], 1, v[12:13] 7997; GFX10-NEXT: v_sub_nc_u32_e32 v3, 64, v24 7998; GFX10-NEXT: v_and_b32_e32 v22, 0x7f, v16 7999; GFX10-NEXT: v_or_b32_e32 v2, v2, v10 8000; GFX10-NEXT: v_lshlrev_b64 v[12:13], v24, v[6:7] 8001; GFX10-NEXT: v_lshlrev_b64 v[16:17], v24, v[4:5] 8002; GFX10-NEXT: v_lshrrev_b64 v[10:11], v3, v[4:5] 8003; GFX10-NEXT: v_lshl_or_b32 v9, v14, 31, v9 8004; GFX10-NEXT: v_lshrrev_b64 v[14:15], 1, v[14:15] 8005; GFX10-NEXT: v_sub_nc_u32_e32 v20, 64, v22 8006; GFX10-NEXT: v_add_nc_u32_e32 v3, 0xffffffc0, v24 8007; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v24 8008; GFX10-NEXT: v_or_b32_e32 v12, v10, v12 8009; GFX10-NEXT: v_add_nc_u32_e32 v10, 0xffffffc0, v22 8010; GFX10-NEXT: v_lshrrev_b64 v[18:19], v22, v[8:9] 8011; GFX10-NEXT: v_lshlrev_b64 v[20:21], v20, v[14:15] 8012; GFX10-NEXT: v_lshlrev_b64 v[3:4], v3, v[4:5] 8013; GFX10-NEXT: v_or_b32_e32 v5, v11, v13 8014; GFX10-NEXT: v_lshrrev_b64 v[10:11], v10, v[14:15] 8015; GFX10-NEXT: v_cndmask_b32_e32 v13, 0, v16, vcc_lo 8016; GFX10-NEXT: v_cmp_gt_u32_e64 s4, 64, v22 8017; GFX10-NEXT: v_or_b32_e32 v16, v18, v20 8018; GFX10-NEXT: v_or_b32_e32 v18, v19, v21 8019; GFX10-NEXT: v_cndmask_b32_e32 v12, v3, v12, vcc_lo 8020; GFX10-NEXT: v_cndmask_b32_e32 v5, v4, v5, vcc_lo 8021; GFX10-NEXT: v_lshrrev_b64 v[3:4], v22, v[14:15] 8022; GFX10-NEXT: v_cndmask_b32_e64 v10, v10, v16, s4 8023; GFX10-NEXT: v_cmp_eq_u32_e64 s5, 0, v22 8024; GFX10-NEXT: v_cmp_eq_u32_e64 s6, 0, v24 8025; GFX10-NEXT: v_cndmask_b32_e64 v11, v11, v18, s4 8026; GFX10-NEXT: v_cndmask_b32_e32 v14, 0, v17, vcc_lo 8027; GFX10-NEXT: v_cndmask_b32_e64 v6, v12, v6, s6 8028; GFX10-NEXT: v_cndmask_b32_e64 v7, v5, v7, s6 8029; GFX10-NEXT: v_cndmask_b32_e64 v5, v10, v8, s5 8030; GFX10-NEXT: v_cndmask_b32_e64 v8, v11, v9, s5 8031; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, v3, s4 8032; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, v4, s4 8033; GFX10-NEXT: v_or_b32_e32 v3, v23, v25 8034; GFX10-NEXT: v_or_b32_e32 v4, v13, v5 8035; GFX10-NEXT: v_or_b32_e32 v5, v14, v8 8036; GFX10-NEXT: v_or_b32_e32 v6, v6, v9 8037; GFX10-NEXT: v_or_b32_e32 v7, v7, v10 8038; GFX10-NEXT: s_setpc_b64 s[30:31] 8039; 8040; GFX11-LABEL: v_fshl_v2i128: 8041; GFX11: ; %bb.0: 8042; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8043; GFX11-NEXT: v_and_b32_e32 v27, 0x7f, v16 8044; GFX11-NEXT: v_not_b32_e32 v21, v16 8045; GFX11-NEXT: v_lshrrev_b64 v[8:9], 1, v[8:9] 8046; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 8047; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v27 8048; GFX11-NEXT: v_and_b32_e32 v28, 0x7f, v21 8049; GFX11-NEXT: v_lshlrev_b64 v[21:22], v27, v[0:1] 8050; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_3) 8051; GFX11-NEXT: v_lshl_or_b32 v9, v10, 31, v9 8052; GFX11-NEXT: v_lshrrev_b64 v[10:11], 1, v[10:11] 8053; GFX11-NEXT: v_cndmask_b32_e32 v22, 0, v22, vcc_lo 8054; GFX11-NEXT: v_sub_nc_u32_e32 v17, 64, v27 8055; GFX11-NEXT: v_lshlrev_b64 v[18:19], v27, v[2:3] 8056; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 0, v27 8057; GFX11-NEXT: v_cndmask_b32_e32 v21, 0, v21, vcc_lo 8058; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1) 8059; GFX11-NEXT: v_lshrrev_b64 v[16:17], v17, v[0:1] 8060; GFX11-NEXT: v_or_b32_e32 v18, v16, v18 8061; GFX11-NEXT: v_add_nc_u32_e32 v29, 0xffffffc0, v27 8062; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 8063; GFX11-NEXT: v_or_b32_e32 v19, v17, v19 8064; GFX11-NEXT: v_lshlrev_b64 v[0:1], v29, v[0:1] 8065; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 8066; GFX11-NEXT: v_dual_cndmask_b32 v18, v0, v18 :: v_dual_cndmask_b32 v19, v1, v19 8067; GFX11-NEXT: v_sub_nc_u32_e32 v25, 64, v28 8068; GFX11-NEXT: v_add_nc_u32_e32 v16, 0xffffffc0, v28 8069; GFX11-NEXT: v_lshrrev_b64 v[23:24], v28, v[8:9] 8070; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v28 8071; GFX11-NEXT: v_lshrrev_b64 v[0:1], v28, v[10:11] 8072; GFX11-NEXT: v_lshlrev_b64 v[25:26], v25, v[10:11] 8073; GFX11-NEXT: v_lshrrev_b64 v[16:17], v16, v[10:11] 8074; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 0, v28 8075; GFX11-NEXT: v_cndmask_b32_e64 v2, v18, v2, s0 8076; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2) 8077; GFX11-NEXT: v_or_b32_e32 v23, v23, v25 8078; GFX11-NEXT: v_or_b32_e32 v24, v24, v26 8079; GFX11-NEXT: v_dual_cndmask_b32 v25, 0, v1 :: v_dual_cndmask_b32 v16, v16, v23 8080; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_4) 8081; GFX11-NEXT: v_cndmask_b32_e32 v10, v17, v24, vcc_lo 8082; GFX11-NEXT: v_cndmask_b32_e64 v23, v19, v3, s0 8083; GFX11-NEXT: v_and_b32_e32 v24, 0x7f, v20 8084; GFX11-NEXT: v_cndmask_b32_e64 v3, v16, v8, s1 8085; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) 8086; GFX11-NEXT: v_cndmask_b32_e64 v8, v10, v9, s1 8087; GFX11-NEXT: v_not_b32_e32 v16, v20 8088; GFX11-NEXT: v_cndmask_b32_e32 v10, 0, v0, vcc_lo 8089; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v24 8090; GFX11-NEXT: v_or_b32_e32 v0, v21, v3 8091; GFX11-NEXT: v_or_b32_e32 v1, v22, v8 8092; GFX11-NEXT: v_lshrrev_b64 v[8:9], 1, v[12:13] 8093; GFX11-NEXT: v_sub_nc_u32_e32 v3, 64, v24 8094; GFX11-NEXT: v_and_b32_e32 v22, 0x7f, v16 8095; GFX11-NEXT: v_or_b32_e32 v2, v2, v10 8096; GFX11-NEXT: v_lshlrev_b64 v[12:13], v24, v[6:7] 8097; GFX11-NEXT: v_lshlrev_b64 v[16:17], v24, v[4:5] 8098; GFX11-NEXT: v_lshrrev_b64 v[10:11], v3, v[4:5] 8099; GFX11-NEXT: v_lshl_or_b32 v9, v14, 31, v9 8100; GFX11-NEXT: v_lshrrev_b64 v[14:15], 1, v[14:15] 8101; GFX11-NEXT: v_sub_nc_u32_e32 v20, 64, v22 8102; GFX11-NEXT: v_add_nc_u32_e32 v3, 0xffffffc0, v24 8103; GFX11-NEXT: v_cmp_gt_u32_e64 s0, 64, v22 8104; GFX11-NEXT: v_or_b32_e32 v12, v10, v12 8105; GFX11-NEXT: v_add_nc_u32_e32 v10, 0xffffffc0, v22 8106; GFX11-NEXT: v_lshrrev_b64 v[18:19], v22, v[8:9] 8107; GFX11-NEXT: v_lshlrev_b64 v[20:21], v20, v[14:15] 8108; GFX11-NEXT: v_lshlrev_b64 v[3:4], v3, v[4:5] 8109; GFX11-NEXT: v_or_b32_e32 v5, v11, v13 8110; GFX11-NEXT: v_cndmask_b32_e32 v13, 0, v16, vcc_lo 8111; GFX11-NEXT: v_lshrrev_b64 v[10:11], v10, v[14:15] 8112; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 0, v22 8113; GFX11-NEXT: v_or_b32_e32 v16, v18, v20 8114; GFX11-NEXT: v_or_b32_e32 v18, v19, v21 8115; GFX11-NEXT: v_dual_cndmask_b32 v12, v3, v12 :: v_dual_cndmask_b32 v5, v4, v5 8116; GFX11-NEXT: v_lshrrev_b64 v[3:4], v22, v[14:15] 8117; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_3) 8118; GFX11-NEXT: v_cndmask_b32_e64 v10, v10, v16, s0 8119; GFX11-NEXT: v_cmp_eq_u32_e64 s2, 0, v24 8120; GFX11-NEXT: v_cndmask_b32_e64 v11, v11, v18, s0 8121; GFX11-NEXT: v_cndmask_b32_e32 v14, 0, v17, vcc_lo 8122; GFX11-NEXT: v_cndmask_b32_e64 v6, v12, v6, s2 8123; GFX11-NEXT: v_cndmask_b32_e64 v7, v5, v7, s2 8124; GFX11-NEXT: v_cndmask_b32_e64 v5, v10, v8, s1 8125; GFX11-NEXT: v_cndmask_b32_e64 v8, v11, v9, s1 8126; GFX11-NEXT: v_cndmask_b32_e64 v9, 0, v3, s0 8127; GFX11-NEXT: v_cndmask_b32_e64 v10, 0, v4, s0 8128; GFX11-NEXT: v_or_b32_e32 v3, v23, v25 8129; GFX11-NEXT: v_or_b32_e32 v4, v13, v5 8130; GFX11-NEXT: v_or_b32_e32 v5, v14, v8 8131; GFX11-NEXT: v_or_b32_e32 v6, v6, v9 8132; GFX11-NEXT: v_or_b32_e32 v7, v7, v10 8133; GFX11-NEXT: s_setpc_b64 s[30:31] 8134 %result = call <2 x i128> @llvm.fshl.v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %amt) 8135 ret <2 x i128> %result 8136} 8137 8138declare i7 @llvm.fshl.i7(i7, i7, i7) #0 8139declare i8 @llvm.fshl.i8(i8, i8, i8) #0 8140declare <2 x i8> @llvm.fshl.v2i8(<2 x i8>, <2 x i8>, <2 x i8>) #0 8141declare <4 x i8> @llvm.fshl.v4i8(<4 x i8>, <4 x i8>, <4 x i8>) #0 8142 8143declare i16 @llvm.fshl.i16(i16, i16, i16) #0 8144declare <2 x i16> @llvm.fshl.v2i16(<2 x i16>, <2 x i16>, <2 x i16>) #0 8145declare <3 x i16> @llvm.fshl.v3i16(<3 x i16>, <3 x i16>, <3 x i16>) #0 8146declare <4 x i16> @llvm.fshl.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) #0 8147declare <5 x i16> @llvm.fshl.v5i16(<5 x i16>, <5 x i16>, <5 x i16>) #0 8148declare <6 x i16> @llvm.fshl.v6i16(<6 x i16>, <6 x i16>, <6 x i16>) #0 8149declare <8 x i16> @llvm.fshl.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) #0 8150 8151declare i24 @llvm.fshl.i24(i24, i24, i24) #0 8152declare <2 x i24> @llvm.fshl.v2i24(<2 x i24>, <2 x i24>, <2 x i24>) #0 8153 8154declare i32 @llvm.fshl.i32(i32, i32, i32) #0 8155declare <2 x i32> @llvm.fshl.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) #0 8156declare <3 x i32> @llvm.fshl.v3i32(<3 x i32>, <3 x i32>, <3 x i32>) #0 8157declare <4 x i32> @llvm.fshl.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #0 8158declare <5 x i32> @llvm.fshl.v5i32(<5 x i32>, <5 x i32>, <5 x i32>) #0 8159declare <16 x i32> @llvm.fshl.v16i32(<16 x i32>, <16 x i32>, <16 x i32>) #0 8160 8161declare i48 @llvm.fshl.i48(i48, i48, i48) #0 8162 8163declare i64 @llvm.fshl.i64(i64, i64, i64) #0 8164declare <2 x i64> @llvm.fshl.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) #0 8165 8166declare i128 @llvm.fshl.i128(i128, i128, i128) #0 8167declare <2 x i128> @llvm.fshl.v2i128(<2 x i128>, <2 x i128>, <2 x i128>) #0 8168 8169attributes #0 = { nounwind readnone speculatable willreturn } 8170