1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti -o - %s | FileCheck -check-prefixes=GCN,GFX6 %s 3; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji -o - %s | FileCheck -check-prefixes=GCN,GFX8 %s 4; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 -o - %s | FileCheck -check-prefixes=GCN,GFX9 %s 5; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 -o - %s | FileCheck -check-prefixes=GCN,GFX10 %s 6; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -o - %s | FileCheck -check-prefixes=GFX11 %s 7 8define amdgpu_ps i7 @s_fshr_i7(i7 inreg %lhs, i7 inreg %rhs, i7 inreg %amt) { 9; GFX6-LABEL: s_fshr_i7: 10; GFX6: ; %bb.0: 11; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, 7 12; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0 13; GFX6-NEXT: s_and_b32 s2, s2, 0x7f 14; GFX6-NEXT: s_lshl_b32 s0, s0, 1 15; GFX6-NEXT: s_and_b32 s1, s1, 0x7f 16; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 17; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 18; GFX6-NEXT: v_mul_lo_u32 v1, v0, -7 19; GFX6-NEXT: v_mul_hi_u32 v1, v0, v1 20; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v1 21; GFX6-NEXT: v_mul_hi_u32 v0, s2, v0 22; GFX6-NEXT: v_mul_lo_u32 v0, v0, 7 23; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 24; GFX6-NEXT: v_add_i32_e32 v1, vcc, -7, v0 25; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v0 26; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 27; GFX6-NEXT: v_add_i32_e32 v1, vcc, -7, v0 28; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v0 29; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 30; GFX6-NEXT: v_sub_i32_e32 v1, vcc, 6, v0 31; GFX6-NEXT: v_and_b32_e32 v0, 0x7f, v0 32; GFX6-NEXT: v_and_b32_e32 v1, 0x7f, v1 33; GFX6-NEXT: v_lshl_b32_e32 v1, s0, v1 34; GFX6-NEXT: v_lshr_b32_e32 v0, s1, v0 35; GFX6-NEXT: v_or_b32_e32 v0, v1, v0 36; GFX6-NEXT: v_readfirstlane_b32 s0, v0 37; GFX6-NEXT: ; return to shader part epilog 38; 39; GFX8-LABEL: s_fshr_i7: 40; GFX8: ; %bb.0: 41; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v0, 7 42; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 43; GFX8-NEXT: s_and_b32 s2, s2, 0x7f 44; GFX8-NEXT: s_lshl_b32 s0, s0, 1 45; GFX8-NEXT: s_and_b32 s1, s1, 0x7f 46; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 47; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 48; GFX8-NEXT: v_mul_lo_u32 v1, v0, -7 49; GFX8-NEXT: v_mul_hi_u32 v1, v0, v1 50; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v1 51; GFX8-NEXT: v_mul_hi_u32 v0, s2, v0 52; GFX8-NEXT: v_mul_lo_u32 v0, v0, 7 53; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s2, v0 54; GFX8-NEXT: v_add_u32_e32 v1, vcc, -7, v0 55; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v0 56; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 57; GFX8-NEXT: v_add_u32_e32 v1, vcc, -7, v0 58; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v0 59; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 60; GFX8-NEXT: v_sub_u16_e32 v1, 6, v0 61; GFX8-NEXT: v_and_b32_e32 v0, 0x7f, v0 62; GFX8-NEXT: v_and_b32_e32 v1, 0x7f, v1 63; GFX8-NEXT: v_lshlrev_b16_e64 v1, v1, s0 64; GFX8-NEXT: v_lshrrev_b16_e64 v0, v0, s1 65; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 66; GFX8-NEXT: v_readfirstlane_b32 s0, v0 67; GFX8-NEXT: ; return to shader part epilog 68; 69; GFX9-LABEL: s_fshr_i7: 70; GFX9: ; %bb.0: 71; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v0, 7 72; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 73; GFX9-NEXT: s_and_b32 s2, s2, 0x7f 74; GFX9-NEXT: s_lshl_b32 s0, s0, 1 75; GFX9-NEXT: s_and_b32 s1, s1, 0x7f 76; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 77; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 78; GFX9-NEXT: v_mul_lo_u32 v1, v0, -7 79; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 80; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 81; GFX9-NEXT: v_mul_hi_u32 v0, s2, v0 82; GFX9-NEXT: v_mul_lo_u32 v0, v0, 7 83; GFX9-NEXT: v_sub_u32_e32 v0, s2, v0 84; GFX9-NEXT: v_add_u32_e32 v1, -7, v0 85; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v0 86; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 87; GFX9-NEXT: v_add_u32_e32 v1, -7, v0 88; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v0 89; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 90; GFX9-NEXT: v_sub_u16_e32 v1, 6, v0 91; GFX9-NEXT: v_and_b32_e32 v0, 0x7f, v0 92; GFX9-NEXT: v_and_b32_e32 v1, 0x7f, v1 93; GFX9-NEXT: v_lshlrev_b16_e64 v1, v1, s0 94; GFX9-NEXT: v_lshrrev_b16_e64 v0, v0, s1 95; GFX9-NEXT: v_or_b32_e32 v0, v1, v0 96; GFX9-NEXT: v_readfirstlane_b32 s0, v0 97; GFX9-NEXT: ; return to shader part epilog 98; 99; GFX10-LABEL: s_fshr_i7: 100; GFX10: ; %bb.0: 101; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, 7 102; GFX10-NEXT: s_and_b32 s2, s2, 0x7f 103; GFX10-NEXT: s_lshl_b32 s0, s0, 1 104; GFX10-NEXT: s_and_b32 s1, s1, 0x7f 105; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 106; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 107; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 108; GFX10-NEXT: v_mul_lo_u32 v1, v0, -7 109; GFX10-NEXT: v_mul_hi_u32 v1, v0, v1 110; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1 111; GFX10-NEXT: v_mul_hi_u32 v0, s2, v0 112; GFX10-NEXT: v_mul_lo_u32 v0, v0, 7 113; GFX10-NEXT: v_sub_nc_u32_e32 v0, s2, v0 114; GFX10-NEXT: v_add_nc_u32_e32 v1, -7, v0 115; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0 116; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 117; GFX10-NEXT: v_add_nc_u32_e32 v1, -7, v0 118; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0 119; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 120; GFX10-NEXT: v_sub_nc_u16 v1, 6, v0 121; GFX10-NEXT: v_and_b32_e32 v0, 0x7f, v0 122; GFX10-NEXT: v_and_b32_e32 v1, 0x7f, v1 123; GFX10-NEXT: v_lshrrev_b16 v0, v0, s1 124; GFX10-NEXT: v_lshlrev_b16 v1, v1, s0 125; GFX10-NEXT: v_or_b32_e32 v0, v1, v0 126; GFX10-NEXT: v_readfirstlane_b32 s0, v0 127; GFX10-NEXT: ; return to shader part epilog 128; 129; GFX11-LABEL: s_fshr_i7: 130; GFX11: ; %bb.0: 131; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, 7 132; GFX11-NEXT: s_and_b32 s2, s2, 0x7f 133; GFX11-NEXT: s_lshl_b32 s0, s0, 1 134; GFX11-NEXT: s_and_b32 s1, s1, 0x7f 135; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) 136; GFX11-NEXT: v_rcp_iflag_f32_e32 v0, v0 137; GFX11-NEXT: s_waitcnt_depctr 0xfff 138; GFX11-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 139; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0 140; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 141; GFX11-NEXT: v_mul_lo_u32 v1, v0, -7 142; GFX11-NEXT: v_mul_hi_u32 v1, v0, v1 143; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 144; GFX11-NEXT: v_add_nc_u32_e32 v0, v0, v1 145; GFX11-NEXT: v_mul_hi_u32 v0, s2, v0 146; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 147; GFX11-NEXT: v_mul_lo_u32 v0, v0, 7 148; GFX11-NEXT: v_sub_nc_u32_e32 v0, s2, v0 149; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 150; GFX11-NEXT: v_add_nc_u32_e32 v1, -7, v0 151; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0 152; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 153; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 154; GFX11-NEXT: v_add_nc_u32_e32 v1, -7, v0 155; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v0 156; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 157; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 158; GFX11-NEXT: v_sub_nc_u16 v1, 6, v0 159; GFX11-NEXT: v_and_b32_e32 v0, 0x7f, v0 160; GFX11-NEXT: v_and_b32_e32 v1, 0x7f, v1 161; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 162; GFX11-NEXT: v_lshrrev_b16 v0, v0, s1 163; GFX11-NEXT: v_lshlrev_b16 v1, v1, s0 164; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 165; GFX11-NEXT: v_or_b32_e32 v0, v1, v0 166; GFX11-NEXT: v_readfirstlane_b32 s0, v0 167; GFX11-NEXT: ; return to shader part epilog 168 %result = call i7 @llvm.fshr.i7(i7 %lhs, i7 %rhs, i7 %amt) 169 ret i7 %result 170} 171 172define i7 @v_fshr_i7(i7 %lhs, i7 %rhs, i7 %amt) { 173; GFX6-LABEL: v_fshr_i7: 174; GFX6: ; %bb.0: 175; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 176; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v3, 7 177; GFX6-NEXT: v_rcp_iflag_f32_e32 v3, v3 178; GFX6-NEXT: v_and_b32_e32 v2, 0x7f, v2 179; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 180; GFX6-NEXT: v_and_b32_e32 v1, 0x7f, v1 181; GFX6-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 182; GFX6-NEXT: v_cvt_u32_f32_e32 v3, v3 183; GFX6-NEXT: v_mul_lo_u32 v4, v3, -7 184; GFX6-NEXT: v_mul_hi_u32 v4, v3, v4 185; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v4 186; GFX6-NEXT: v_mul_hi_u32 v3, v2, v3 187; GFX6-NEXT: v_mul_lo_u32 v3, v3, 7 188; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v3 189; GFX6-NEXT: v_add_i32_e32 v3, vcc, -7, v2 190; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v2 191; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 192; GFX6-NEXT: v_add_i32_e32 v3, vcc, -7, v2 193; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 7, v2 194; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 195; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 6, v2 196; GFX6-NEXT: v_and_b32_e32 v2, 0x7f, v2 197; GFX6-NEXT: v_and_b32_e32 v3, 0x7f, v3 198; GFX6-NEXT: v_lshlrev_b32_e32 v0, v3, v0 199; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1 200; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 201; GFX6-NEXT: s_setpc_b64 s[30:31] 202; 203; GFX8-LABEL: v_fshr_i7: 204; GFX8: ; %bb.0: 205; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 206; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v3, 7 207; GFX8-NEXT: v_rcp_iflag_f32_e32 v3, v3 208; GFX8-NEXT: v_and_b32_e32 v2, 0x7f, v2 209; GFX8-NEXT: v_lshlrev_b16_e32 v0, 1, v0 210; GFX8-NEXT: v_and_b32_e32 v1, 0x7f, v1 211; GFX8-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 212; GFX8-NEXT: v_cvt_u32_f32_e32 v3, v3 213; GFX8-NEXT: v_mul_lo_u32 v4, v3, -7 214; GFX8-NEXT: v_mul_hi_u32 v4, v3, v4 215; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v4 216; GFX8-NEXT: v_mul_hi_u32 v3, v2, v3 217; GFX8-NEXT: v_mul_lo_u32 v3, v3, 7 218; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v2, v3 219; GFX8-NEXT: v_add_u32_e32 v3, vcc, -7, v2 220; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v2 221; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 222; GFX8-NEXT: v_add_u32_e32 v3, vcc, -7, v2 223; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 7, v2 224; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 225; GFX8-NEXT: v_sub_u16_e32 v3, 6, v2 226; GFX8-NEXT: v_and_b32_e32 v2, 0x7f, v2 227; GFX8-NEXT: v_and_b32_e32 v3, 0x7f, v3 228; GFX8-NEXT: v_lshlrev_b16_e32 v0, v3, v0 229; GFX8-NEXT: v_lshrrev_b16_e32 v1, v2, v1 230; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 231; GFX8-NEXT: s_setpc_b64 s[30:31] 232; 233; GFX9-LABEL: v_fshr_i7: 234; GFX9: ; %bb.0: 235; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 236; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v3, 7 237; GFX9-NEXT: v_rcp_iflag_f32_e32 v3, v3 238; GFX9-NEXT: v_and_b32_e32 v2, 0x7f, v2 239; GFX9-NEXT: v_lshlrev_b16_e32 v0, 1, v0 240; GFX9-NEXT: v_and_b32_e32 v1, 0x7f, v1 241; GFX9-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 242; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v3 243; GFX9-NEXT: v_mul_lo_u32 v4, v3, -7 244; GFX9-NEXT: v_mul_hi_u32 v4, v3, v4 245; GFX9-NEXT: v_add_u32_e32 v3, v3, v4 246; GFX9-NEXT: v_mul_hi_u32 v3, v2, v3 247; GFX9-NEXT: v_mul_lo_u32 v3, v3, 7 248; GFX9-NEXT: v_sub_u32_e32 v2, v2, v3 249; GFX9-NEXT: v_add_u32_e32 v3, -7, v2 250; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v2 251; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 252; GFX9-NEXT: v_add_u32_e32 v3, -7, v2 253; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 7, v2 254; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 255; GFX9-NEXT: v_sub_u16_e32 v3, 6, v2 256; GFX9-NEXT: v_and_b32_e32 v2, 0x7f, v2 257; GFX9-NEXT: v_and_b32_e32 v3, 0x7f, v3 258; GFX9-NEXT: v_lshlrev_b16_e32 v0, v3, v0 259; GFX9-NEXT: v_lshrrev_b16_e32 v1, v2, v1 260; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 261; GFX9-NEXT: s_setpc_b64 s[30:31] 262; 263; GFX10-LABEL: v_fshr_i7: 264; GFX10: ; %bb.0: 265; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 266; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v3, 7 267; GFX10-NEXT: v_and_b32_e32 v2, 0x7f, v2 268; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0 269; GFX10-NEXT: v_and_b32_e32 v1, 0x7f, v1 270; GFX10-NEXT: v_rcp_iflag_f32_e32 v3, v3 271; GFX10-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 272; GFX10-NEXT: v_cvt_u32_f32_e32 v3, v3 273; GFX10-NEXT: v_mul_lo_u32 v4, v3, -7 274; GFX10-NEXT: v_mul_hi_u32 v4, v3, v4 275; GFX10-NEXT: v_add_nc_u32_e32 v3, v3, v4 276; GFX10-NEXT: v_mul_hi_u32 v3, v2, v3 277; GFX10-NEXT: v_mul_lo_u32 v3, v3, 7 278; GFX10-NEXT: v_sub_nc_u32_e32 v2, v2, v3 279; GFX10-NEXT: v_add_nc_u32_e32 v3, -7, v2 280; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2 281; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo 282; GFX10-NEXT: v_add_nc_u32_e32 v3, -7, v2 283; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2 284; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo 285; GFX10-NEXT: v_sub_nc_u16 v3, 6, v2 286; GFX10-NEXT: v_and_b32_e32 v2, 0x7f, v2 287; GFX10-NEXT: v_and_b32_e32 v3, 0x7f, v3 288; GFX10-NEXT: v_lshrrev_b16 v1, v2, v1 289; GFX10-NEXT: v_lshlrev_b16 v0, v3, v0 290; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 291; GFX10-NEXT: s_setpc_b64 s[30:31] 292; 293; GFX11-LABEL: v_fshr_i7: 294; GFX11: ; %bb.0: 295; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 296; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v3, 7 297; GFX11-NEXT: v_and_b32_e32 v2, 0x7f, v2 298; GFX11-NEXT: v_lshlrev_b16 v0, 1, v0 299; GFX11-NEXT: v_and_b32_e32 v1, 0x7f, v1 300; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_1) 301; GFX11-NEXT: v_rcp_iflag_f32_e32 v3, v3 302; GFX11-NEXT: s_waitcnt_depctr 0xfff 303; GFX11-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 304; GFX11-NEXT: v_cvt_u32_f32_e32 v3, v3 305; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 306; GFX11-NEXT: v_mul_lo_u32 v4, v3, -7 307; GFX11-NEXT: v_mul_hi_u32 v4, v3, v4 308; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 309; GFX11-NEXT: v_add_nc_u32_e32 v3, v3, v4 310; GFX11-NEXT: v_mul_hi_u32 v3, v2, v3 311; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 312; GFX11-NEXT: v_mul_lo_u32 v3, v3, 7 313; GFX11-NEXT: v_sub_nc_u32_e32 v2, v2, v3 314; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 315; GFX11-NEXT: v_add_nc_u32_e32 v3, -7, v2 316; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2 317; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo 318; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 319; GFX11-NEXT: v_add_nc_u32_e32 v3, -7, v2 320; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 7, v2 321; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo 322; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 323; GFX11-NEXT: v_sub_nc_u16 v3, 6, v2 324; GFX11-NEXT: v_and_b32_e32 v2, 0x7f, v2 325; GFX11-NEXT: v_and_b32_e32 v3, 0x7f, v3 326; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 327; GFX11-NEXT: v_lshrrev_b16 v1, v2, v1 328; GFX11-NEXT: v_lshlrev_b16 v0, v3, v0 329; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 330; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 331; GFX11-NEXT: s_setpc_b64 s[30:31] 332 %result = call i7 @llvm.fshr.i7(i7 %lhs, i7 %rhs, i7 %amt) 333 ret i7 %result 334} 335 336define amdgpu_ps i8 @s_fshr_i8(i8 inreg %lhs, i8 inreg %rhs, i8 inreg %amt) { 337; GFX6-LABEL: s_fshr_i8: 338; GFX6: ; %bb.0: 339; GFX6-NEXT: s_and_b32 s3, s2, 7 340; GFX6-NEXT: s_andn2_b32 s2, 7, s2 341; GFX6-NEXT: s_lshl_b32 s0, s0, 1 342; GFX6-NEXT: s_and_b32 s1, s1, 0xff 343; GFX6-NEXT: s_lshl_b32 s0, s0, s2 344; GFX6-NEXT: s_lshr_b32 s1, s1, s3 345; GFX6-NEXT: s_or_b32 s0, s0, s1 346; GFX6-NEXT: ; return to shader part epilog 347; 348; GFX8-LABEL: s_fshr_i8: 349; GFX8: ; %bb.0: 350; GFX8-NEXT: s_andn2_b32 s3, 7, s2 351; GFX8-NEXT: s_and_b32 s2, s2, 7 352; GFX8-NEXT: s_and_b32 s1, s1, 0xff 353; GFX8-NEXT: s_lshl_b32 s0, s0, 1 354; GFX8-NEXT: s_and_b32 s3, 0xffff, s3 355; GFX8-NEXT: s_and_b32 s1, 0xffff, s1 356; GFX8-NEXT: s_and_b32 s2, 0xffff, s2 357; GFX8-NEXT: s_lshl_b32 s0, s0, s3 358; GFX8-NEXT: s_lshr_b32 s1, s1, s2 359; GFX8-NEXT: s_or_b32 s0, s0, s1 360; GFX8-NEXT: ; return to shader part epilog 361; 362; GFX9-LABEL: s_fshr_i8: 363; GFX9: ; %bb.0: 364; GFX9-NEXT: s_andn2_b32 s3, 7, s2 365; GFX9-NEXT: s_and_b32 s2, s2, 7 366; GFX9-NEXT: s_and_b32 s1, s1, 0xff 367; GFX9-NEXT: s_lshl_b32 s0, s0, 1 368; GFX9-NEXT: s_and_b32 s3, 0xffff, s3 369; GFX9-NEXT: s_and_b32 s1, 0xffff, s1 370; GFX9-NEXT: s_and_b32 s2, 0xffff, s2 371; GFX9-NEXT: s_lshl_b32 s0, s0, s3 372; GFX9-NEXT: s_lshr_b32 s1, s1, s2 373; GFX9-NEXT: s_or_b32 s0, s0, s1 374; GFX9-NEXT: ; return to shader part epilog 375; 376; GFX10-LABEL: s_fshr_i8: 377; GFX10: ; %bb.0: 378; GFX10-NEXT: s_andn2_b32 s3, 7, s2 379; GFX10-NEXT: s_and_b32 s1, s1, 0xff 380; GFX10-NEXT: s_and_b32 s2, s2, 7 381; GFX10-NEXT: s_lshl_b32 s0, s0, 1 382; GFX10-NEXT: s_and_b32 s3, 0xffff, s3 383; GFX10-NEXT: s_and_b32 s1, 0xffff, s1 384; GFX10-NEXT: s_and_b32 s2, 0xffff, s2 385; GFX10-NEXT: s_lshl_b32 s0, s0, s3 386; GFX10-NEXT: s_lshr_b32 s1, s1, s2 387; GFX10-NEXT: s_or_b32 s0, s0, s1 388; GFX10-NEXT: ; return to shader part epilog 389; 390; GFX11-LABEL: s_fshr_i8: 391; GFX11: ; %bb.0: 392; GFX11-NEXT: s_and_not1_b32 s3, 7, s2 393; GFX11-NEXT: s_and_b32 s1, s1, 0xff 394; GFX11-NEXT: s_and_b32 s2, s2, 7 395; GFX11-NEXT: s_lshl_b32 s0, s0, 1 396; GFX11-NEXT: s_and_b32 s3, 0xffff, s3 397; GFX11-NEXT: s_and_b32 s1, 0xffff, s1 398; GFX11-NEXT: s_and_b32 s2, 0xffff, s2 399; GFX11-NEXT: s_lshl_b32 s0, s0, s3 400; GFX11-NEXT: s_lshr_b32 s1, s1, s2 401; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 402; GFX11-NEXT: s_or_b32 s0, s0, s1 403; GFX11-NEXT: ; return to shader part epilog 404 %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 %amt) 405 ret i8 %result 406} 407 408define i8 @v_fshr_i8(i8 %lhs, i8 %rhs, i8 %amt) { 409; GFX6-LABEL: v_fshr_i8: 410; GFX6: ; %bb.0: 411; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 412; GFX6-NEXT: v_and_b32_e32 v3, 7, v2 413; GFX6-NEXT: v_not_b32_e32 v2, v2 414; GFX6-NEXT: v_and_b32_e32 v2, 7, v2 415; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 416; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v1 417; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0 418; GFX6-NEXT: v_lshrrev_b32_e32 v1, v3, v1 419; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 420; GFX6-NEXT: s_setpc_b64 s[30:31] 421; 422; GFX8-LABEL: v_fshr_i8: 423; GFX8: ; %bb.0: 424; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 425; GFX8-NEXT: v_xor_b32_e32 v3, -1, v2 426; GFX8-NEXT: v_lshlrev_b16_e32 v0, 1, v0 427; GFX8-NEXT: v_and_b32_e32 v3, 7, v3 428; GFX8-NEXT: v_and_b32_e32 v2, 7, v2 429; GFX8-NEXT: v_lshlrev_b16_e32 v0, v3, v0 430; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 431; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 432; GFX8-NEXT: s_setpc_b64 s[30:31] 433; 434; GFX9-LABEL: v_fshr_i8: 435; GFX9: ; %bb.0: 436; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 437; GFX9-NEXT: v_xor_b32_e32 v3, -1, v2 438; GFX9-NEXT: v_lshlrev_b16_e32 v0, 1, v0 439; GFX9-NEXT: v_and_b32_e32 v3, 7, v3 440; GFX9-NEXT: v_and_b32_e32 v2, 7, v2 441; GFX9-NEXT: v_lshlrev_b16_e32 v0, v3, v0 442; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 443; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 444; GFX9-NEXT: s_setpc_b64 s[30:31] 445; 446; GFX10-LABEL: v_fshr_i8: 447; GFX10: ; %bb.0: 448; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 449; GFX10-NEXT: v_xor_b32_e32 v3, -1, v2 450; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0 451; GFX10-NEXT: v_and_b32_e32 v2, 7, v2 452; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1 453; GFX10-NEXT: v_and_b32_e32 v3, 7, v3 454; GFX10-NEXT: v_lshrrev_b16 v1, v2, v1 455; GFX10-NEXT: v_lshlrev_b16 v0, v3, v0 456; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 457; GFX10-NEXT: s_setpc_b64 s[30:31] 458; 459; GFX11-LABEL: v_fshr_i8: 460; GFX11: ; %bb.0: 461; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 462; GFX11-NEXT: v_xor_b32_e32 v3, -1, v2 463; GFX11-NEXT: v_lshlrev_b16 v0, 1, v0 464; GFX11-NEXT: v_and_b32_e32 v2, 7, v2 465; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 466; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) 467; GFX11-NEXT: v_and_b32_e32 v3, 7, v3 468; GFX11-NEXT: v_lshrrev_b16 v1, v2, v1 469; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 470; GFX11-NEXT: v_lshlrev_b16 v0, v3, v0 471; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 472; GFX11-NEXT: s_setpc_b64 s[30:31] 473 %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 %amt) 474 ret i8 %result 475} 476 477define amdgpu_ps i8 @s_fshr_i8_4(i8 inreg %lhs, i8 inreg %rhs) { 478; GFX6-LABEL: s_fshr_i8_4: 479; GFX6: ; %bb.0: 480; GFX6-NEXT: s_lshl_b32 s0, s0, 4 481; GFX6-NEXT: s_bfe_u32 s1, s1, 0x40004 482; GFX6-NEXT: s_or_b32 s0, s0, s1 483; GFX6-NEXT: ; return to shader part epilog 484; 485; GFX8-LABEL: s_fshr_i8_4: 486; GFX8: ; %bb.0: 487; GFX8-NEXT: s_and_b32 s1, s1, 0xff 488; GFX8-NEXT: s_and_b32 s1, 0xffff, s1 489; GFX8-NEXT: s_lshl_b32 s0, s0, 4 490; GFX8-NEXT: s_lshr_b32 s1, s1, 4 491; GFX8-NEXT: s_or_b32 s0, s0, s1 492; GFX8-NEXT: ; return to shader part epilog 493; 494; GFX9-LABEL: s_fshr_i8_4: 495; GFX9: ; %bb.0: 496; GFX9-NEXT: s_and_b32 s1, s1, 0xff 497; GFX9-NEXT: s_and_b32 s1, 0xffff, s1 498; GFX9-NEXT: s_lshl_b32 s0, s0, 4 499; GFX9-NEXT: s_lshr_b32 s1, s1, 4 500; GFX9-NEXT: s_or_b32 s0, s0, s1 501; GFX9-NEXT: ; return to shader part epilog 502; 503; GFX10-LABEL: s_fshr_i8_4: 504; GFX10: ; %bb.0: 505; GFX10-NEXT: s_and_b32 s1, s1, 0xff 506; GFX10-NEXT: s_lshl_b32 s0, s0, 4 507; GFX10-NEXT: s_and_b32 s1, 0xffff, s1 508; GFX10-NEXT: s_lshr_b32 s1, s1, 4 509; GFX10-NEXT: s_or_b32 s0, s0, s1 510; GFX10-NEXT: ; return to shader part epilog 511; 512; GFX11-LABEL: s_fshr_i8_4: 513; GFX11: ; %bb.0: 514; GFX11-NEXT: s_and_b32 s1, s1, 0xff 515; GFX11-NEXT: s_lshl_b32 s0, s0, 4 516; GFX11-NEXT: s_and_b32 s1, 0xffff, s1 517; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 518; GFX11-NEXT: s_lshr_b32 s1, s1, 4 519; GFX11-NEXT: s_or_b32 s0, s0, s1 520; GFX11-NEXT: ; return to shader part epilog 521 %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 4) 522 ret i8 %result 523} 524 525define i8 @v_fshr_i8_4(i8 %lhs, i8 %rhs) { 526; GFX6-LABEL: v_fshr_i8_4: 527; GFX6: ; %bb.0: 528; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 529; GFX6-NEXT: v_lshlrev_b32_e32 v0, 4, v0 530; GFX6-NEXT: v_bfe_u32 v1, v1, 4, 4 531; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 532; GFX6-NEXT: s_setpc_b64 s[30:31] 533; 534; GFX8-LABEL: v_fshr_i8_4: 535; GFX8: ; %bb.0: 536; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 537; GFX8-NEXT: v_mov_b32_e32 v2, 4 538; GFX8-NEXT: v_lshlrev_b16_e32 v0, 4, v0 539; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 540; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 541; GFX8-NEXT: s_setpc_b64 s[30:31] 542; 543; GFX9-LABEL: v_fshr_i8_4: 544; GFX9: ; %bb.0: 545; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 546; GFX9-NEXT: v_mov_b32_e32 v2, 4 547; GFX9-NEXT: v_lshlrev_b16_e32 v0, 4, v0 548; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 549; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 550; GFX9-NEXT: s_setpc_b64 s[30:31] 551; 552; GFX10-LABEL: v_fshr_i8_4: 553; GFX10: ; %bb.0: 554; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 555; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1 556; GFX10-NEXT: v_lshlrev_b16 v0, 4, v0 557; GFX10-NEXT: v_lshrrev_b16 v1, 4, v1 558; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 559; GFX10-NEXT: s_setpc_b64 s[30:31] 560; 561; GFX11-LABEL: v_fshr_i8_4: 562; GFX11: ; %bb.0: 563; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 564; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 565; GFX11-NEXT: v_lshlrev_b16 v0, 4, v0 566; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 567; GFX11-NEXT: v_lshrrev_b16 v1, 4, v1 568; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 569; GFX11-NEXT: s_setpc_b64 s[30:31] 570 %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 4) 571 ret i8 %result 572} 573 574define amdgpu_ps i8 @s_fshr_i8_5(i8 inreg %lhs, i8 inreg %rhs) { 575; GFX6-LABEL: s_fshr_i8_5: 576; GFX6: ; %bb.0: 577; GFX6-NEXT: s_lshl_b32 s0, s0, 3 578; GFX6-NEXT: s_bfe_u32 s1, s1, 0x30005 579; GFX6-NEXT: s_or_b32 s0, s0, s1 580; GFX6-NEXT: ; return to shader part epilog 581; 582; GFX8-LABEL: s_fshr_i8_5: 583; GFX8: ; %bb.0: 584; GFX8-NEXT: s_and_b32 s1, s1, 0xff 585; GFX8-NEXT: s_and_b32 s1, 0xffff, s1 586; GFX8-NEXT: s_lshl_b32 s0, s0, 3 587; GFX8-NEXT: s_lshr_b32 s1, s1, 5 588; GFX8-NEXT: s_or_b32 s0, s0, s1 589; GFX8-NEXT: ; return to shader part epilog 590; 591; GFX9-LABEL: s_fshr_i8_5: 592; GFX9: ; %bb.0: 593; GFX9-NEXT: s_and_b32 s1, s1, 0xff 594; GFX9-NEXT: s_and_b32 s1, 0xffff, s1 595; GFX9-NEXT: s_lshl_b32 s0, s0, 3 596; GFX9-NEXT: s_lshr_b32 s1, s1, 5 597; GFX9-NEXT: s_or_b32 s0, s0, s1 598; GFX9-NEXT: ; return to shader part epilog 599; 600; GFX10-LABEL: s_fshr_i8_5: 601; GFX10: ; %bb.0: 602; GFX10-NEXT: s_and_b32 s1, s1, 0xff 603; GFX10-NEXT: s_lshl_b32 s0, s0, 3 604; GFX10-NEXT: s_and_b32 s1, 0xffff, s1 605; GFX10-NEXT: s_lshr_b32 s1, s1, 5 606; GFX10-NEXT: s_or_b32 s0, s0, s1 607; GFX10-NEXT: ; return to shader part epilog 608; 609; GFX11-LABEL: s_fshr_i8_5: 610; GFX11: ; %bb.0: 611; GFX11-NEXT: s_and_b32 s1, s1, 0xff 612; GFX11-NEXT: s_lshl_b32 s0, s0, 3 613; GFX11-NEXT: s_and_b32 s1, 0xffff, s1 614; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) 615; GFX11-NEXT: s_lshr_b32 s1, s1, 5 616; GFX11-NEXT: s_or_b32 s0, s0, s1 617; GFX11-NEXT: ; return to shader part epilog 618 %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 5) 619 ret i8 %result 620} 621 622define i8 @v_fshr_i8_5(i8 %lhs, i8 %rhs) { 623; GFX6-LABEL: v_fshr_i8_5: 624; GFX6: ; %bb.0: 625; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 626; GFX6-NEXT: v_lshlrev_b32_e32 v0, 3, v0 627; GFX6-NEXT: v_bfe_u32 v1, v1, 5, 3 628; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 629; GFX6-NEXT: s_setpc_b64 s[30:31] 630; 631; GFX8-LABEL: v_fshr_i8_5: 632; GFX8: ; %bb.0: 633; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 634; GFX8-NEXT: v_mov_b32_e32 v2, 5 635; GFX8-NEXT: v_lshlrev_b16_e32 v0, 3, v0 636; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 637; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 638; GFX8-NEXT: s_setpc_b64 s[30:31] 639; 640; GFX9-LABEL: v_fshr_i8_5: 641; GFX9: ; %bb.0: 642; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 643; GFX9-NEXT: v_mov_b32_e32 v2, 5 644; GFX9-NEXT: v_lshlrev_b16_e32 v0, 3, v0 645; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 646; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 647; GFX9-NEXT: s_setpc_b64 s[30:31] 648; 649; GFX10-LABEL: v_fshr_i8_5: 650; GFX10: ; %bb.0: 651; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 652; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1 653; GFX10-NEXT: v_lshlrev_b16 v0, 3, v0 654; GFX10-NEXT: v_lshrrev_b16 v1, 5, v1 655; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 656; GFX10-NEXT: s_setpc_b64 s[30:31] 657; 658; GFX11-LABEL: v_fshr_i8_5: 659; GFX11: ; %bb.0: 660; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 661; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 662; GFX11-NEXT: v_lshlrev_b16 v0, 3, v0 663; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 664; GFX11-NEXT: v_lshrrev_b16 v1, 5, v1 665; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 666; GFX11-NEXT: s_setpc_b64 s[30:31] 667 %result = call i8 @llvm.fshr.i8(i8 %lhs, i8 %rhs, i8 5) 668 ret i8 %result 669} 670 671define amdgpu_ps i16 @s_fshr_v2i8(i16 inreg %lhs.arg, i16 inreg %rhs.arg, i16 inreg %amt.arg) { 672; GFX6-LABEL: s_fshr_v2i8: 673; GFX6: ; %bb.0: 674; GFX6-NEXT: s_lshr_b32 s3, s0, 8 675; GFX6-NEXT: s_lshr_b32 s4, s2, 8 676; GFX6-NEXT: s_and_b32 s5, s2, 7 677; GFX6-NEXT: s_andn2_b32 s2, 7, s2 678; GFX6-NEXT: s_lshl_b32 s0, s0, 1 679; GFX6-NEXT: s_lshl_b32 s0, s0, s2 680; GFX6-NEXT: s_and_b32 s2, s1, 0xff 681; GFX6-NEXT: s_lshr_b32 s2, s2, s5 682; GFX6-NEXT: s_or_b32 s0, s0, s2 683; GFX6-NEXT: s_and_b32 s2, s4, 7 684; GFX6-NEXT: s_andn2_b32 s4, 7, s4 685; GFX6-NEXT: s_lshl_b32 s3, s3, 1 686; GFX6-NEXT: s_bfe_u32 s1, s1, 0x80008 687; GFX6-NEXT: s_lshl_b32 s3, s3, s4 688; GFX6-NEXT: s_lshr_b32 s1, s1, s2 689; GFX6-NEXT: s_or_b32 s1, s3, s1 690; GFX6-NEXT: s_and_b32 s1, s1, 0xff 691; GFX6-NEXT: s_and_b32 s0, s0, 0xff 692; GFX6-NEXT: s_lshl_b32 s1, s1, 8 693; GFX6-NEXT: s_or_b32 s0, s0, s1 694; GFX6-NEXT: ; return to shader part epilog 695; 696; GFX8-LABEL: s_fshr_v2i8: 697; GFX8: ; %bb.0: 698; GFX8-NEXT: s_lshr_b32 s4, s1, 8 699; GFX8-NEXT: s_lshr_b32 s5, s2, 8 700; GFX8-NEXT: s_andn2_b32 s6, 7, s2 701; GFX8-NEXT: s_and_b32 s2, s2, 7 702; GFX8-NEXT: s_and_b32 s1, s1, 0xff 703; GFX8-NEXT: s_lshr_b32 s3, s0, 8 704; GFX8-NEXT: s_lshl_b32 s0, s0, 1 705; GFX8-NEXT: s_and_b32 s6, 0xffff, s6 706; GFX8-NEXT: s_and_b32 s1, 0xffff, s1 707; GFX8-NEXT: s_and_b32 s2, 0xffff, s2 708; GFX8-NEXT: s_lshl_b32 s0, s0, s6 709; GFX8-NEXT: s_lshr_b32 s1, s1, s2 710; GFX8-NEXT: s_andn2_b32 s2, 7, s5 711; GFX8-NEXT: s_or_b32 s0, s0, s1 712; GFX8-NEXT: s_lshl_b32 s1, s3, 1 713; GFX8-NEXT: s_and_b32 s2, 0xffff, s2 714; GFX8-NEXT: s_lshl_b32 s1, s1, s2 715; GFX8-NEXT: s_and_b32 s2, s5, 7 716; GFX8-NEXT: s_and_b32 s3, s4, 0xff 717; GFX8-NEXT: s_and_b32 s3, 0xffff, s3 718; GFX8-NEXT: s_and_b32 s2, 0xffff, s2 719; GFX8-NEXT: s_lshr_b32 s2, s3, s2 720; GFX8-NEXT: s_or_b32 s1, s1, s2 721; GFX8-NEXT: s_and_b32 s1, s1, 0xff 722; GFX8-NEXT: s_and_b32 s0, s0, 0xff 723; GFX8-NEXT: s_lshl_b32 s1, s1, 8 724; GFX8-NEXT: s_or_b32 s0, s0, s1 725; GFX8-NEXT: ; return to shader part epilog 726; 727; GFX9-LABEL: s_fshr_v2i8: 728; GFX9: ; %bb.0: 729; GFX9-NEXT: s_lshr_b32 s4, s1, 8 730; GFX9-NEXT: s_lshr_b32 s5, s2, 8 731; GFX9-NEXT: s_andn2_b32 s6, 7, s2 732; GFX9-NEXT: s_and_b32 s2, s2, 7 733; GFX9-NEXT: s_and_b32 s1, s1, 0xff 734; GFX9-NEXT: s_lshr_b32 s3, s0, 8 735; GFX9-NEXT: s_lshl_b32 s0, s0, 1 736; GFX9-NEXT: s_and_b32 s6, 0xffff, s6 737; GFX9-NEXT: s_and_b32 s1, 0xffff, s1 738; GFX9-NEXT: s_and_b32 s2, 0xffff, s2 739; GFX9-NEXT: s_lshl_b32 s0, s0, s6 740; GFX9-NEXT: s_lshr_b32 s1, s1, s2 741; GFX9-NEXT: s_andn2_b32 s2, 7, s5 742; GFX9-NEXT: s_or_b32 s0, s0, s1 743; GFX9-NEXT: s_lshl_b32 s1, s3, 1 744; GFX9-NEXT: s_and_b32 s2, 0xffff, s2 745; GFX9-NEXT: s_lshl_b32 s1, s1, s2 746; GFX9-NEXT: s_and_b32 s2, s5, 7 747; GFX9-NEXT: s_and_b32 s3, s4, 0xff 748; GFX9-NEXT: s_and_b32 s3, 0xffff, s3 749; GFX9-NEXT: s_and_b32 s2, 0xffff, s2 750; GFX9-NEXT: s_lshr_b32 s2, s3, s2 751; GFX9-NEXT: s_or_b32 s1, s1, s2 752; GFX9-NEXT: s_and_b32 s1, s1, 0xff 753; GFX9-NEXT: s_and_b32 s0, s0, 0xff 754; GFX9-NEXT: s_lshl_b32 s1, s1, 8 755; GFX9-NEXT: s_or_b32 s0, s0, s1 756; GFX9-NEXT: ; return to shader part epilog 757; 758; GFX10-LABEL: s_fshr_v2i8: 759; GFX10: ; %bb.0: 760; GFX10-NEXT: s_andn2_b32 s5, 7, s2 761; GFX10-NEXT: s_lshr_b32 s3, s0, 8 762; GFX10-NEXT: s_lshr_b32 s4, s1, 8 763; GFX10-NEXT: s_lshl_b32 s0, s0, 1 764; GFX10-NEXT: s_and_b32 s5, 0xffff, s5 765; GFX10-NEXT: s_lshr_b32 s6, s2, 8 766; GFX10-NEXT: s_lshl_b32 s0, s0, s5 767; GFX10-NEXT: s_andn2_b32 s5, 7, s6 768; GFX10-NEXT: s_and_b32 s4, s4, 0xff 769; GFX10-NEXT: s_and_b32 s6, s6, 7 770; GFX10-NEXT: s_and_b32 s1, s1, 0xff 771; GFX10-NEXT: s_and_b32 s2, s2, 7 772; GFX10-NEXT: s_lshl_b32 s3, s3, 1 773; GFX10-NEXT: s_and_b32 s5, 0xffff, s5 774; GFX10-NEXT: s_and_b32 s4, 0xffff, s4 775; GFX10-NEXT: s_and_b32 s6, 0xffff, s6 776; GFX10-NEXT: s_and_b32 s1, 0xffff, s1 777; GFX10-NEXT: s_and_b32 s2, 0xffff, s2 778; GFX10-NEXT: s_lshl_b32 s3, s3, s5 779; GFX10-NEXT: s_lshr_b32 s4, s4, s6 780; GFX10-NEXT: s_lshr_b32 s1, s1, s2 781; GFX10-NEXT: s_or_b32 s2, s3, s4 782; GFX10-NEXT: s_or_b32 s0, s0, s1 783; GFX10-NEXT: s_and_b32 s1, s2, 0xff 784; GFX10-NEXT: s_and_b32 s0, s0, 0xff 785; GFX10-NEXT: s_lshl_b32 s1, s1, 8 786; GFX10-NEXT: s_or_b32 s0, s0, s1 787; GFX10-NEXT: ; return to shader part epilog 788; 789; GFX11-LABEL: s_fshr_v2i8: 790; GFX11: ; %bb.0: 791; GFX11-NEXT: s_and_not1_b32 s5, 7, s2 792; GFX11-NEXT: s_lshr_b32 s3, s0, 8 793; GFX11-NEXT: s_lshr_b32 s4, s1, 8 794; GFX11-NEXT: s_lshl_b32 s0, s0, 1 795; GFX11-NEXT: s_and_b32 s5, 0xffff, s5 796; GFX11-NEXT: s_lshr_b32 s6, s2, 8 797; GFX11-NEXT: s_lshl_b32 s0, s0, s5 798; GFX11-NEXT: s_and_not1_b32 s5, 7, s6 799; GFX11-NEXT: s_and_b32 s4, s4, 0xff 800; GFX11-NEXT: s_and_b32 s6, s6, 7 801; GFX11-NEXT: s_and_b32 s1, s1, 0xff 802; GFX11-NEXT: s_and_b32 s2, s2, 7 803; GFX11-NEXT: s_lshl_b32 s3, s3, 1 804; GFX11-NEXT: s_and_b32 s5, 0xffff, s5 805; GFX11-NEXT: s_and_b32 s4, 0xffff, s4 806; GFX11-NEXT: s_and_b32 s6, 0xffff, s6 807; GFX11-NEXT: s_and_b32 s1, 0xffff, s1 808; GFX11-NEXT: s_and_b32 s2, 0xffff, s2 809; GFX11-NEXT: s_lshl_b32 s3, s3, s5 810; GFX11-NEXT: s_lshr_b32 s4, s4, s6 811; GFX11-NEXT: s_lshr_b32 s1, s1, s2 812; GFX11-NEXT: s_or_b32 s2, s3, s4 813; GFX11-NEXT: s_or_b32 s0, s0, s1 814; GFX11-NEXT: s_and_b32 s1, s2, 0xff 815; GFX11-NEXT: s_and_b32 s0, s0, 0xff 816; GFX11-NEXT: s_lshl_b32 s1, s1, 8 817; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 818; GFX11-NEXT: s_or_b32 s0, s0, s1 819; GFX11-NEXT: ; return to shader part epilog 820 %lhs = bitcast i16 %lhs.arg to <2 x i8> 821 %rhs = bitcast i16 %rhs.arg to <2 x i8> 822 %amt = bitcast i16 %amt.arg to <2 x i8> 823 %result = call <2 x i8> @llvm.fshr.v2i8(<2 x i8> %lhs, <2 x i8> %rhs, <2 x i8> %amt) 824 %cast.result = bitcast <2 x i8> %result to i16 825 ret i16 %cast.result 826} 827 828define i16 @v_fshr_v2i8(i16 %lhs.arg, i16 %rhs.arg, i16 %amt.arg) { 829; GFX6-LABEL: v_fshr_v2i8: 830; GFX6: ; %bb.0: 831; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 832; GFX6-NEXT: v_lshrrev_b32_e32 v4, 8, v2 833; GFX6-NEXT: v_and_b32_e32 v5, 7, v2 834; GFX6-NEXT: v_not_b32_e32 v2, v2 835; GFX6-NEXT: v_lshrrev_b32_e32 v3, 8, v0 836; GFX6-NEXT: v_and_b32_e32 v2, 7, v2 837; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 838; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0 839; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v1 840; GFX6-NEXT: v_lshrrev_b32_e32 v2, v5, v2 841; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 842; GFX6-NEXT: v_and_b32_e32 v2, 7, v4 843; GFX6-NEXT: v_not_b32_e32 v4, v4 844; GFX6-NEXT: v_and_b32_e32 v4, 7, v4 845; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v3 846; GFX6-NEXT: v_bfe_u32 v1, v1, 8, 8 847; GFX6-NEXT: v_lshlrev_b32_e32 v3, v4, v3 848; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1 849; GFX6-NEXT: v_or_b32_e32 v1, v3, v1 850; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v1 851; GFX6-NEXT: v_and_b32_e32 v0, 0xff, v0 852; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1 853; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 854; GFX6-NEXT: s_setpc_b64 s[30:31] 855; 856; GFX8-LABEL: v_fshr_v2i8: 857; GFX8: ; %bb.0: 858; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 859; GFX8-NEXT: v_xor_b32_e32 v6, -1, v2 860; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 861; GFX8-NEXT: v_lshrrev_b32_e32 v5, 8, v2 862; GFX8-NEXT: v_lshlrev_b16_e32 v0, 1, v0 863; GFX8-NEXT: v_and_b32_e32 v6, 7, v6 864; GFX8-NEXT: v_and_b32_e32 v2, 7, v2 865; GFX8-NEXT: v_lshrrev_b32_e32 v4, 8, v1 866; GFX8-NEXT: v_lshlrev_b16_e32 v0, v6, v0 867; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 868; GFX8-NEXT: v_xor_b32_e32 v2, -1, v5 869; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 870; GFX8-NEXT: v_lshlrev_b16_e32 v1, 1, v3 871; GFX8-NEXT: v_and_b32_e32 v2, 7, v2 872; GFX8-NEXT: v_lshlrev_b16_e32 v1, v2, v1 873; GFX8-NEXT: v_and_b32_e32 v2, 7, v5 874; GFX8-NEXT: v_lshrrev_b16_sdwa v2, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 875; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 876; GFX8-NEXT: v_and_b32_e32 v1, 0xff, v1 877; GFX8-NEXT: v_lshlrev_b16_e32 v1, 8, v1 878; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 879; GFX8-NEXT: s_setpc_b64 s[30:31] 880; 881; GFX9-LABEL: v_fshr_v2i8: 882; GFX9: ; %bb.0: 883; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 884; GFX9-NEXT: v_xor_b32_e32 v6, -1, v2 885; GFX9-NEXT: v_lshrrev_b32_e32 v3, 8, v0 886; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v2 887; GFX9-NEXT: v_lshlrev_b16_e32 v0, 1, v0 888; GFX9-NEXT: v_and_b32_e32 v6, 7, v6 889; GFX9-NEXT: v_and_b32_e32 v2, 7, v2 890; GFX9-NEXT: v_lshrrev_b32_e32 v4, 8, v1 891; GFX9-NEXT: v_lshlrev_b16_e32 v0, v6, v0 892; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 893; GFX9-NEXT: v_xor_b32_e32 v2, -1, v5 894; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 895; GFX9-NEXT: v_lshlrev_b16_e32 v1, 1, v3 896; GFX9-NEXT: v_and_b32_e32 v2, 7, v2 897; GFX9-NEXT: v_lshlrev_b16_e32 v1, v2, v1 898; GFX9-NEXT: v_and_b32_e32 v2, 7, v5 899; GFX9-NEXT: v_lshrrev_b16_sdwa v2, v2, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 900; GFX9-NEXT: v_or_b32_e32 v1, v1, v2 901; GFX9-NEXT: v_and_b32_e32 v1, 0xff, v1 902; GFX9-NEXT: v_lshlrev_b16_e32 v1, 8, v1 903; GFX9-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 904; GFX9-NEXT: s_setpc_b64 s[30:31] 905; 906; GFX10-LABEL: v_fshr_v2i8: 907; GFX10: ; %bb.0: 908; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 909; GFX10-NEXT: v_lshrrev_b32_e32 v3, 8, v2 910; GFX10-NEXT: v_lshrrev_b32_e32 v4, 8, v0 911; GFX10-NEXT: v_lshrrev_b32_e32 v5, 8, v1 912; GFX10-NEXT: v_xor_b32_e32 v7, -1, v2 913; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0 914; GFX10-NEXT: v_xor_b32_e32 v6, -1, v3 915; GFX10-NEXT: v_lshlrev_b16 v4, 1, v4 916; GFX10-NEXT: v_and_b32_e32 v3, 7, v3 917; GFX10-NEXT: v_and_b32_e32 v5, 0xff, v5 918; GFX10-NEXT: v_and_b32_e32 v2, 7, v2 919; GFX10-NEXT: v_and_b32_e32 v6, 7, v6 920; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1 921; GFX10-NEXT: v_and_b32_e32 v7, 7, v7 922; GFX10-NEXT: v_lshrrev_b16 v3, v3, v5 923; GFX10-NEXT: v_lshlrev_b16 v4, v6, v4 924; GFX10-NEXT: v_lshrrev_b16 v1, v2, v1 925; GFX10-NEXT: v_lshlrev_b16 v0, v7, v0 926; GFX10-NEXT: v_or_b32_e32 v2, v4, v3 927; GFX10-NEXT: v_mov_b32_e32 v3, 0xff 928; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 929; GFX10-NEXT: v_and_b32_sdwa v1, v2, v3 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 930; GFX10-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 931; GFX10-NEXT: s_setpc_b64 s[30:31] 932; 933; GFX11-LABEL: v_fshr_v2i8: 934; GFX11: ; %bb.0: 935; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 936; GFX11-NEXT: v_lshrrev_b32_e32 v3, 8, v2 937; GFX11-NEXT: v_lshrrev_b32_e32 v4, 8, v0 938; GFX11-NEXT: v_lshrrev_b32_e32 v5, 8, v1 939; GFX11-NEXT: v_xor_b32_e32 v7, -1, v2 940; GFX11-NEXT: v_lshlrev_b16 v0, 1, v0 941; GFX11-NEXT: v_xor_b32_e32 v6, -1, v3 942; GFX11-NEXT: v_lshlrev_b16 v4, 1, v4 943; GFX11-NEXT: v_and_b32_e32 v3, 7, v3 944; GFX11-NEXT: v_and_b32_e32 v5, 0xff, v5 945; GFX11-NEXT: v_and_b32_e32 v2, 7, v2 946; GFX11-NEXT: v_and_b32_e32 v6, 7, v6 947; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 948; GFX11-NEXT: v_and_b32_e32 v7, 7, v7 949; GFX11-NEXT: v_lshrrev_b16 v3, v3, v5 950; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 951; GFX11-NEXT: v_lshlrev_b16 v4, v6, v4 952; GFX11-NEXT: v_lshrrev_b16 v1, v2, v1 953; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) 954; GFX11-NEXT: v_lshlrev_b16 v0, v7, v0 955; GFX11-NEXT: v_or_b32_e32 v2, v4, v3 956; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 957; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 958; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v2 959; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 960; GFX11-NEXT: v_and_b32_e32 v0, 0xff, v0 961; GFX11-NEXT: v_lshlrev_b16 v1, 8, v1 962; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 963; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 964; GFX11-NEXT: s_setpc_b64 s[30:31] 965 %lhs = bitcast i16 %lhs.arg to <2 x i8> 966 %rhs = bitcast i16 %rhs.arg to <2 x i8> 967 %amt = bitcast i16 %amt.arg to <2 x i8> 968 %result = call <2 x i8> @llvm.fshr.v2i8(<2 x i8> %lhs, <2 x i8> %rhs, <2 x i8> %amt) 969 %cast.result = bitcast <2 x i8> %result to i16 970 ret i16 %cast.result 971} 972 973define amdgpu_ps i32 @s_fshr_v4i8(i32 inreg %lhs.arg, i32 inreg %rhs.arg, i32 inreg %amt.arg) { 974; GFX6-LABEL: s_fshr_v4i8: 975; GFX6: ; %bb.0: 976; GFX6-NEXT: s_lshr_b32 s3, s0, 8 977; GFX6-NEXT: s_lshr_b32 s4, s0, 16 978; GFX6-NEXT: s_lshr_b32 s5, s0, 24 979; GFX6-NEXT: s_lshr_b32 s7, s2, 8 980; GFX6-NEXT: s_lshr_b32 s8, s2, 16 981; GFX6-NEXT: s_lshr_b32 s9, s2, 24 982; GFX6-NEXT: s_and_b32 s10, s2, 7 983; GFX6-NEXT: s_andn2_b32 s2, 7, s2 984; GFX6-NEXT: s_lshl_b32 s0, s0, 1 985; GFX6-NEXT: s_lshl_b32 s0, s0, s2 986; GFX6-NEXT: s_and_b32 s2, s1, 0xff 987; GFX6-NEXT: s_lshr_b32 s2, s2, s10 988; GFX6-NEXT: s_or_b32 s0, s0, s2 989; GFX6-NEXT: s_and_b32 s2, s7, 7 990; GFX6-NEXT: s_andn2_b32 s7, 7, s7 991; GFX6-NEXT: s_lshl_b32 s3, s3, 1 992; GFX6-NEXT: s_lshl_b32 s3, s3, s7 993; GFX6-NEXT: s_bfe_u32 s7, s1, 0x80008 994; GFX6-NEXT: s_lshr_b32 s2, s7, s2 995; GFX6-NEXT: s_lshr_b32 s6, s1, 24 996; GFX6-NEXT: s_or_b32 s2, s3, s2 997; GFX6-NEXT: s_and_b32 s3, s8, 7 998; GFX6-NEXT: s_andn2_b32 s7, 7, s8 999; GFX6-NEXT: s_lshl_b32 s4, s4, 1 1000; GFX6-NEXT: s_bfe_u32 s1, s1, 0x80010 1001; GFX6-NEXT: s_lshl_b32 s4, s4, s7 1002; GFX6-NEXT: s_lshr_b32 s1, s1, s3 1003; GFX6-NEXT: s_or_b32 s1, s4, s1 1004; GFX6-NEXT: s_and_b32 s3, s9, 7 1005; GFX6-NEXT: s_andn2_b32 s4, 7, s9 1006; GFX6-NEXT: s_lshl_b32 s5, s5, 1 1007; GFX6-NEXT: s_and_b32 s2, s2, 0xff 1008; GFX6-NEXT: s_lshl_b32 s4, s5, s4 1009; GFX6-NEXT: s_lshr_b32 s3, s6, s3 1010; GFX6-NEXT: s_and_b32 s0, s0, 0xff 1011; GFX6-NEXT: s_lshl_b32 s2, s2, 8 1012; GFX6-NEXT: s_and_b32 s1, s1, 0xff 1013; GFX6-NEXT: s_or_b32 s3, s4, s3 1014; GFX6-NEXT: s_or_b32 s0, s0, s2 1015; GFX6-NEXT: s_lshl_b32 s1, s1, 16 1016; GFX6-NEXT: s_or_b32 s0, s0, s1 1017; GFX6-NEXT: s_and_b32 s1, s3, 0xff 1018; GFX6-NEXT: s_lshl_b32 s1, s1, 24 1019; GFX6-NEXT: s_or_b32 s0, s0, s1 1020; GFX6-NEXT: ; return to shader part epilog 1021; 1022; GFX8-LABEL: s_fshr_v4i8: 1023; GFX8: ; %bb.0: 1024; GFX8-NEXT: s_lshr_b32 s6, s1, 8 1025; GFX8-NEXT: s_lshr_b32 s7, s1, 16 1026; GFX8-NEXT: s_lshr_b32 s8, s1, 24 1027; GFX8-NEXT: s_lshr_b32 s9, s2, 8 1028; GFX8-NEXT: s_lshr_b32 s10, s2, 16 1029; GFX8-NEXT: s_lshr_b32 s11, s2, 24 1030; GFX8-NEXT: s_andn2_b32 s12, 7, s2 1031; GFX8-NEXT: s_and_b32 s2, s2, 7 1032; GFX8-NEXT: s_and_b32 s1, s1, 0xff 1033; GFX8-NEXT: s_lshr_b32 s3, s0, 8 1034; GFX8-NEXT: s_lshr_b32 s4, s0, 16 1035; GFX8-NEXT: s_lshr_b32 s5, s0, 24 1036; GFX8-NEXT: s_lshl_b32 s0, s0, 1 1037; GFX8-NEXT: s_and_b32 s12, 0xffff, s12 1038; GFX8-NEXT: s_and_b32 s1, 0xffff, s1 1039; GFX8-NEXT: s_and_b32 s2, 0xffff, s2 1040; GFX8-NEXT: s_lshl_b32 s0, s0, s12 1041; GFX8-NEXT: s_lshr_b32 s1, s1, s2 1042; GFX8-NEXT: s_andn2_b32 s2, 7, s9 1043; GFX8-NEXT: s_or_b32 s0, s0, s1 1044; GFX8-NEXT: s_lshl_b32 s1, s3, 1 1045; GFX8-NEXT: s_and_b32 s2, 0xffff, s2 1046; GFX8-NEXT: s_lshl_b32 s1, s1, s2 1047; GFX8-NEXT: s_and_b32 s2, s9, 7 1048; GFX8-NEXT: s_and_b32 s3, s6, 0xff 1049; GFX8-NEXT: s_and_b32 s3, 0xffff, s3 1050; GFX8-NEXT: s_and_b32 s2, 0xffff, s2 1051; GFX8-NEXT: s_lshr_b32 s2, s3, s2 1052; GFX8-NEXT: s_andn2_b32 s3, 7, s10 1053; GFX8-NEXT: s_or_b32 s1, s1, s2 1054; GFX8-NEXT: s_lshl_b32 s2, s4, 1 1055; GFX8-NEXT: s_and_b32 s3, 0xffff, s3 1056; GFX8-NEXT: s_lshl_b32 s2, s2, s3 1057; GFX8-NEXT: s_and_b32 s3, s10, 7 1058; GFX8-NEXT: s_and_b32 s4, s7, 0xff 1059; GFX8-NEXT: s_and_b32 s4, 0xffff, s4 1060; GFX8-NEXT: s_and_b32 s3, 0xffff, s3 1061; GFX8-NEXT: s_lshr_b32 s3, s4, s3 1062; GFX8-NEXT: s_andn2_b32 s4, 7, s11 1063; GFX8-NEXT: s_or_b32 s2, s2, s3 1064; GFX8-NEXT: s_lshl_b32 s3, s5, 1 1065; GFX8-NEXT: s_and_b32 s4, 0xffff, s4 1066; GFX8-NEXT: s_lshl_b32 s3, s3, s4 1067; GFX8-NEXT: s_and_b32 s4, s11, 7 1068; GFX8-NEXT: s_and_b32 s1, s1, 0xff 1069; GFX8-NEXT: s_and_b32 s4, 0xffff, s4 1070; GFX8-NEXT: s_and_b32 s0, s0, 0xff 1071; GFX8-NEXT: s_lshl_b32 s1, s1, 8 1072; GFX8-NEXT: s_lshr_b32 s4, s8, s4 1073; GFX8-NEXT: s_or_b32 s0, s0, s1 1074; GFX8-NEXT: s_and_b32 s1, s2, 0xff 1075; GFX8-NEXT: s_or_b32 s3, s3, s4 1076; GFX8-NEXT: s_lshl_b32 s1, s1, 16 1077; GFX8-NEXT: s_or_b32 s0, s0, s1 1078; GFX8-NEXT: s_and_b32 s1, s3, 0xff 1079; GFX8-NEXT: s_lshl_b32 s1, s1, 24 1080; GFX8-NEXT: s_or_b32 s0, s0, s1 1081; GFX8-NEXT: ; return to shader part epilog 1082; 1083; GFX9-LABEL: s_fshr_v4i8: 1084; GFX9: ; %bb.0: 1085; GFX9-NEXT: s_lshr_b32 s6, s1, 8 1086; GFX9-NEXT: s_lshr_b32 s7, s1, 16 1087; GFX9-NEXT: s_lshr_b32 s8, s1, 24 1088; GFX9-NEXT: s_lshr_b32 s9, s2, 8 1089; GFX9-NEXT: s_lshr_b32 s10, s2, 16 1090; GFX9-NEXT: s_lshr_b32 s11, s2, 24 1091; GFX9-NEXT: s_andn2_b32 s12, 7, s2 1092; GFX9-NEXT: s_and_b32 s2, s2, 7 1093; GFX9-NEXT: s_and_b32 s1, s1, 0xff 1094; GFX9-NEXT: s_lshr_b32 s3, s0, 8 1095; GFX9-NEXT: s_lshr_b32 s4, s0, 16 1096; GFX9-NEXT: s_lshr_b32 s5, s0, 24 1097; GFX9-NEXT: s_lshl_b32 s0, s0, 1 1098; GFX9-NEXT: s_and_b32 s12, 0xffff, s12 1099; GFX9-NEXT: s_and_b32 s1, 0xffff, s1 1100; GFX9-NEXT: s_and_b32 s2, 0xffff, s2 1101; GFX9-NEXT: s_lshl_b32 s0, s0, s12 1102; GFX9-NEXT: s_lshr_b32 s1, s1, s2 1103; GFX9-NEXT: s_andn2_b32 s2, 7, s9 1104; GFX9-NEXT: s_or_b32 s0, s0, s1 1105; GFX9-NEXT: s_lshl_b32 s1, s3, 1 1106; GFX9-NEXT: s_and_b32 s2, 0xffff, s2 1107; GFX9-NEXT: s_lshl_b32 s1, s1, s2 1108; GFX9-NEXT: s_and_b32 s2, s9, 7 1109; GFX9-NEXT: s_and_b32 s3, s6, 0xff 1110; GFX9-NEXT: s_and_b32 s3, 0xffff, s3 1111; GFX9-NEXT: s_and_b32 s2, 0xffff, s2 1112; GFX9-NEXT: s_lshr_b32 s2, s3, s2 1113; GFX9-NEXT: s_andn2_b32 s3, 7, s10 1114; GFX9-NEXT: s_or_b32 s1, s1, s2 1115; GFX9-NEXT: s_lshl_b32 s2, s4, 1 1116; GFX9-NEXT: s_and_b32 s3, 0xffff, s3 1117; GFX9-NEXT: s_lshl_b32 s2, s2, s3 1118; GFX9-NEXT: s_and_b32 s3, s10, 7 1119; GFX9-NEXT: s_and_b32 s4, s7, 0xff 1120; GFX9-NEXT: s_and_b32 s4, 0xffff, s4 1121; GFX9-NEXT: s_and_b32 s3, 0xffff, s3 1122; GFX9-NEXT: s_lshr_b32 s3, s4, s3 1123; GFX9-NEXT: s_andn2_b32 s4, 7, s11 1124; GFX9-NEXT: s_or_b32 s2, s2, s3 1125; GFX9-NEXT: s_lshl_b32 s3, s5, 1 1126; GFX9-NEXT: s_and_b32 s4, 0xffff, s4 1127; GFX9-NEXT: s_lshl_b32 s3, s3, s4 1128; GFX9-NEXT: s_and_b32 s4, s11, 7 1129; GFX9-NEXT: s_and_b32 s1, s1, 0xff 1130; GFX9-NEXT: s_and_b32 s4, 0xffff, s4 1131; GFX9-NEXT: s_and_b32 s0, s0, 0xff 1132; GFX9-NEXT: s_lshl_b32 s1, s1, 8 1133; GFX9-NEXT: s_lshr_b32 s4, s8, s4 1134; GFX9-NEXT: s_or_b32 s0, s0, s1 1135; GFX9-NEXT: s_and_b32 s1, s2, 0xff 1136; GFX9-NEXT: s_or_b32 s3, s3, s4 1137; GFX9-NEXT: s_lshl_b32 s1, s1, 16 1138; GFX9-NEXT: s_or_b32 s0, s0, s1 1139; GFX9-NEXT: s_and_b32 s1, s3, 0xff 1140; GFX9-NEXT: s_lshl_b32 s1, s1, 24 1141; GFX9-NEXT: s_or_b32 s0, s0, s1 1142; GFX9-NEXT: ; return to shader part epilog 1143; 1144; GFX10-LABEL: s_fshr_v4i8: 1145; GFX10: ; %bb.0: 1146; GFX10-NEXT: s_lshr_b32 s6, s1, 8 1147; GFX10-NEXT: s_lshr_b32 s7, s1, 16 1148; GFX10-NEXT: s_lshr_b32 s8, s1, 24 1149; GFX10-NEXT: s_lshr_b32 s9, s2, 8 1150; GFX10-NEXT: s_lshr_b32 s10, s2, 16 1151; GFX10-NEXT: s_lshr_b32 s11, s2, 24 1152; GFX10-NEXT: s_andn2_b32 s12, 7, s2 1153; GFX10-NEXT: s_and_b32 s1, s1, 0xff 1154; GFX10-NEXT: s_and_b32 s2, s2, 7 1155; GFX10-NEXT: s_and_b32 s1, 0xffff, s1 1156; GFX10-NEXT: s_and_b32 s2, 0xffff, s2 1157; GFX10-NEXT: s_lshr_b32 s3, s0, 8 1158; GFX10-NEXT: s_lshr_b32 s1, s1, s2 1159; GFX10-NEXT: s_andn2_b32 s2, 7, s9 1160; GFX10-NEXT: s_and_b32 s6, s6, 0xff 1161; GFX10-NEXT: s_and_b32 s9, s9, 7 1162; GFX10-NEXT: s_lshr_b32 s4, s0, 16 1163; GFX10-NEXT: s_lshr_b32 s5, s0, 24 1164; GFX10-NEXT: s_lshl_b32 s0, s0, 1 1165; GFX10-NEXT: s_and_b32 s12, 0xffff, s12 1166; GFX10-NEXT: s_lshl_b32 s3, s3, 1 1167; GFX10-NEXT: s_and_b32 s2, 0xffff, s2 1168; GFX10-NEXT: s_and_b32 s6, 0xffff, s6 1169; GFX10-NEXT: s_and_b32 s9, 0xffff, s9 1170; GFX10-NEXT: s_lshl_b32 s0, s0, s12 1171; GFX10-NEXT: s_lshl_b32 s2, s3, s2 1172; GFX10-NEXT: s_lshr_b32 s3, s6, s9 1173; GFX10-NEXT: s_or_b32 s0, s0, s1 1174; GFX10-NEXT: s_or_b32 s1, s2, s3 1175; GFX10-NEXT: s_andn2_b32 s2, 7, s10 1176; GFX10-NEXT: s_lshl_b32 s3, s4, 1 1177; GFX10-NEXT: s_and_b32 s4, s7, 0xff 1178; GFX10-NEXT: s_and_b32 s6, s10, 7 1179; GFX10-NEXT: s_and_b32 s2, 0xffff, s2 1180; GFX10-NEXT: s_and_b32 s4, 0xffff, s4 1181; GFX10-NEXT: s_and_b32 s6, 0xffff, s6 1182; GFX10-NEXT: s_lshl_b32 s2, s3, s2 1183; GFX10-NEXT: s_lshr_b32 s3, s4, s6 1184; GFX10-NEXT: s_lshl_b32 s4, s5, 1 1185; GFX10-NEXT: s_andn2_b32 s5, 7, s11 1186; GFX10-NEXT: s_and_b32 s6, s11, 7 1187; GFX10-NEXT: s_and_b32 s5, 0xffff, s5 1188; GFX10-NEXT: s_and_b32 s6, 0xffff, s6 1189; GFX10-NEXT: s_lshl_b32 s4, s4, s5 1190; GFX10-NEXT: s_lshr_b32 s5, s8, s6 1191; GFX10-NEXT: s_or_b32 s2, s2, s3 1192; GFX10-NEXT: s_and_b32 s1, s1, 0xff 1193; GFX10-NEXT: s_or_b32 s3, s4, s5 1194; GFX10-NEXT: s_and_b32 s0, s0, 0xff 1195; GFX10-NEXT: s_lshl_b32 s1, s1, 8 1196; GFX10-NEXT: s_and_b32 s2, s2, 0xff 1197; GFX10-NEXT: s_or_b32 s0, s0, s1 1198; GFX10-NEXT: s_lshl_b32 s1, s2, 16 1199; GFX10-NEXT: s_and_b32 s2, s3, 0xff 1200; GFX10-NEXT: s_or_b32 s0, s0, s1 1201; GFX10-NEXT: s_lshl_b32 s1, s2, 24 1202; GFX10-NEXT: s_or_b32 s0, s0, s1 1203; GFX10-NEXT: ; return to shader part epilog 1204; 1205; GFX11-LABEL: s_fshr_v4i8: 1206; GFX11: ; %bb.0: 1207; GFX11-NEXT: s_lshr_b32 s6, s1, 8 1208; GFX11-NEXT: s_lshr_b32 s7, s1, 16 1209; GFX11-NEXT: s_lshr_b32 s8, s1, 24 1210; GFX11-NEXT: s_lshr_b32 s9, s2, 8 1211; GFX11-NEXT: s_lshr_b32 s10, s2, 16 1212; GFX11-NEXT: s_lshr_b32 s11, s2, 24 1213; GFX11-NEXT: s_and_not1_b32 s12, 7, s2 1214; GFX11-NEXT: s_and_b32 s1, s1, 0xff 1215; GFX11-NEXT: s_and_b32 s2, s2, 7 1216; GFX11-NEXT: s_and_b32 s1, 0xffff, s1 1217; GFX11-NEXT: s_and_b32 s2, 0xffff, s2 1218; GFX11-NEXT: s_lshr_b32 s3, s0, 8 1219; GFX11-NEXT: s_lshr_b32 s1, s1, s2 1220; GFX11-NEXT: s_and_not1_b32 s2, 7, s9 1221; GFX11-NEXT: s_and_b32 s6, s6, 0xff 1222; GFX11-NEXT: s_and_b32 s9, s9, 7 1223; GFX11-NEXT: s_lshr_b32 s4, s0, 16 1224; GFX11-NEXT: s_lshr_b32 s5, s0, 24 1225; GFX11-NEXT: s_lshl_b32 s0, s0, 1 1226; GFX11-NEXT: s_and_b32 s12, 0xffff, s12 1227; GFX11-NEXT: s_lshl_b32 s3, s3, 1 1228; GFX11-NEXT: s_and_b32 s2, 0xffff, s2 1229; GFX11-NEXT: s_and_b32 s6, 0xffff, s6 1230; GFX11-NEXT: s_and_b32 s9, 0xffff, s9 1231; GFX11-NEXT: s_lshl_b32 s0, s0, s12 1232; GFX11-NEXT: s_lshl_b32 s2, s3, s2 1233; GFX11-NEXT: s_lshr_b32 s3, s6, s9 1234; GFX11-NEXT: s_or_b32 s0, s0, s1 1235; GFX11-NEXT: s_or_b32 s1, s2, s3 1236; GFX11-NEXT: s_and_not1_b32 s2, 7, s10 1237; GFX11-NEXT: s_lshl_b32 s3, s4, 1 1238; GFX11-NEXT: s_and_b32 s4, s7, 0xff 1239; GFX11-NEXT: s_and_b32 s6, s10, 7 1240; GFX11-NEXT: s_and_b32 s2, 0xffff, s2 1241; GFX11-NEXT: s_and_b32 s4, 0xffff, s4 1242; GFX11-NEXT: s_and_b32 s6, 0xffff, s6 1243; GFX11-NEXT: s_lshl_b32 s2, s3, s2 1244; GFX11-NEXT: s_lshr_b32 s3, s4, s6 1245; GFX11-NEXT: s_lshl_b32 s4, s5, 1 1246; GFX11-NEXT: s_and_not1_b32 s5, 7, s11 1247; GFX11-NEXT: s_and_b32 s6, s11, 7 1248; GFX11-NEXT: s_and_b32 s5, 0xffff, s5 1249; GFX11-NEXT: s_and_b32 s6, 0xffff, s6 1250; GFX11-NEXT: s_lshl_b32 s4, s4, s5 1251; GFX11-NEXT: s_lshr_b32 s5, s8, s6 1252; GFX11-NEXT: s_or_b32 s2, s2, s3 1253; GFX11-NEXT: s_and_b32 s1, s1, 0xff 1254; GFX11-NEXT: s_or_b32 s3, s4, s5 1255; GFX11-NEXT: s_and_b32 s0, s0, 0xff 1256; GFX11-NEXT: s_lshl_b32 s1, s1, 8 1257; GFX11-NEXT: s_and_b32 s2, s2, 0xff 1258; GFX11-NEXT: s_or_b32 s0, s0, s1 1259; GFX11-NEXT: s_lshl_b32 s1, s2, 16 1260; GFX11-NEXT: s_and_b32 s2, s3, 0xff 1261; GFX11-NEXT: s_or_b32 s0, s0, s1 1262; GFX11-NEXT: s_lshl_b32 s1, s2, 24 1263; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 1264; GFX11-NEXT: s_or_b32 s0, s0, s1 1265; GFX11-NEXT: ; return to shader part epilog 1266 %lhs = bitcast i32 %lhs.arg to <4 x i8> 1267 %rhs = bitcast i32 %rhs.arg to <4 x i8> 1268 %amt = bitcast i32 %amt.arg to <4 x i8> 1269 %result = call <4 x i8> @llvm.fshr.v4i8(<4 x i8> %lhs, <4 x i8> %rhs, <4 x i8> %amt) 1270 %cast.result = bitcast <4 x i8> %result to i32 1271 ret i32 %cast.result 1272} 1273 1274define i32 @v_fshr_v4i8(i32 %lhs.arg, i32 %rhs.arg, i32 %amt.arg) { 1275; GFX6-LABEL: v_fshr_v4i8: 1276; GFX6: ; %bb.0: 1277; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1278; GFX6-NEXT: v_lshrrev_b32_e32 v7, 8, v2 1279; GFX6-NEXT: v_lshrrev_b32_e32 v8, 16, v2 1280; GFX6-NEXT: v_lshrrev_b32_e32 v9, 24, v2 1281; GFX6-NEXT: v_and_b32_e32 v10, 7, v2 1282; GFX6-NEXT: v_not_b32_e32 v2, v2 1283; GFX6-NEXT: v_lshrrev_b32_e32 v3, 8, v0 1284; GFX6-NEXT: v_lshrrev_b32_e32 v4, 16, v0 1285; GFX6-NEXT: v_lshrrev_b32_e32 v5, 24, v0 1286; GFX6-NEXT: v_and_b32_e32 v2, 7, v2 1287; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 1288; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0 1289; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v1 1290; GFX6-NEXT: v_lshrrev_b32_e32 v2, v10, v2 1291; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 1292; GFX6-NEXT: v_and_b32_e32 v2, 7, v7 1293; GFX6-NEXT: v_not_b32_e32 v7, v7 1294; GFX6-NEXT: v_and_b32_e32 v7, 7, v7 1295; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v3 1296; GFX6-NEXT: v_lshlrev_b32_e32 v3, v7, v3 1297; GFX6-NEXT: v_bfe_u32 v7, v1, 8, 8 1298; GFX6-NEXT: v_lshrrev_b32_e32 v2, v2, v7 1299; GFX6-NEXT: v_not_b32_e32 v7, v8 1300; GFX6-NEXT: v_lshrrev_b32_e32 v6, 24, v1 1301; GFX6-NEXT: v_or_b32_e32 v2, v3, v2 1302; GFX6-NEXT: v_and_b32_e32 v3, 7, v8 1303; GFX6-NEXT: v_and_b32_e32 v7, 7, v7 1304; GFX6-NEXT: v_lshlrev_b32_e32 v4, 1, v4 1305; GFX6-NEXT: v_bfe_u32 v1, v1, 16, 8 1306; GFX6-NEXT: v_lshlrev_b32_e32 v4, v7, v4 1307; GFX6-NEXT: v_lshrrev_b32_e32 v1, v3, v1 1308; GFX6-NEXT: v_or_b32_e32 v1, v4, v1 1309; GFX6-NEXT: v_not_b32_e32 v4, v9 1310; GFX6-NEXT: v_and_b32_e32 v3, 7, v9 1311; GFX6-NEXT: v_and_b32_e32 v4, 7, v4 1312; GFX6-NEXT: v_lshlrev_b32_e32 v5, 1, v5 1313; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v2 1314; GFX6-NEXT: v_lshlrev_b32_e32 v4, v4, v5 1315; GFX6-NEXT: v_lshrrev_b32_e32 v3, v3, v6 1316; GFX6-NEXT: v_and_b32_e32 v0, 0xff, v0 1317; GFX6-NEXT: v_lshlrev_b32_e32 v2, 8, v2 1318; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v1 1319; GFX6-NEXT: v_or_b32_e32 v3, v4, v3 1320; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 1321; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 1322; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 1323; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v3 1324; GFX6-NEXT: v_lshlrev_b32_e32 v1, 24, v1 1325; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 1326; GFX6-NEXT: s_setpc_b64 s[30:31] 1327; 1328; GFX8-LABEL: v_fshr_v4i8: 1329; GFX8: ; %bb.0: 1330; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1331; GFX8-NEXT: v_xor_b32_e32 v7, -1, v2 1332; GFX8-NEXT: v_lshlrev_b16_e32 v6, 1, v0 1333; GFX8-NEXT: v_and_b32_e32 v7, 7, v7 1334; GFX8-NEXT: v_lshlrev_b16_e32 v6, v7, v6 1335; GFX8-NEXT: v_and_b32_e32 v7, 7, v2 1336; GFX8-NEXT: v_lshrrev_b32_e32 v5, 8, v2 1337; GFX8-NEXT: v_lshrrev_b16_sdwa v7, v7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1338; GFX8-NEXT: v_lshrrev_b32_e32 v3, 8, v0 1339; GFX8-NEXT: v_or_b32_e32 v6, v6, v7 1340; GFX8-NEXT: v_xor_b32_e32 v7, -1, v5 1341; GFX8-NEXT: v_lshrrev_b32_e32 v4, 8, v1 1342; GFX8-NEXT: v_lshlrev_b16_e32 v3, 1, v3 1343; GFX8-NEXT: v_and_b32_e32 v7, 7, v7 1344; GFX8-NEXT: v_and_b32_e32 v5, 7, v5 1345; GFX8-NEXT: v_lshlrev_b16_e32 v3, v7, v3 1346; GFX8-NEXT: v_lshrrev_b16_sdwa v4, v5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1347; GFX8-NEXT: v_mov_b32_e32 v7, -1 1348; GFX8-NEXT: v_or_b32_e32 v3, v3, v4 1349; GFX8-NEXT: v_mov_b32_e32 v4, 1 1350; GFX8-NEXT: v_xor_b32_sdwa v9, v2, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1351; GFX8-NEXT: v_lshlrev_b16_sdwa v5, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 1352; GFX8-NEXT: v_and_b32_e32 v9, 7, v9 1353; GFX8-NEXT: v_mov_b32_e32 v8, 0xff 1354; GFX8-NEXT: v_lshlrev_b16_e32 v5, v9, v5 1355; GFX8-NEXT: v_mov_b32_e32 v9, 7 1356; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 1357; GFX8-NEXT: v_xor_b32_sdwa v4, v2, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD 1358; GFX8-NEXT: v_and_b32_sdwa v10, v2, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1359; GFX8-NEXT: v_and_b32_sdwa v8, v1, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1360; GFX8-NEXT: v_and_b32_e32 v4, 7, v4 1361; GFX8-NEXT: v_and_b32_sdwa v2, v2, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD 1362; GFX8-NEXT: v_lshrrev_b16_e32 v8, v10, v8 1363; GFX8-NEXT: v_lshlrev_b16_e32 v0, v4, v0 1364; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 1365; GFX8-NEXT: v_or_b32_e32 v5, v5, v8 1366; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 1367; GFX8-NEXT: v_mov_b32_e32 v1, 8 1368; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1369; GFX8-NEXT: v_and_b32_e32 v2, 0xff, v5 1370; GFX8-NEXT: v_or_b32_sdwa v1, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 1371; GFX8-NEXT: v_lshlrev_b32_e32 v2, 16, v2 1372; GFX8-NEXT: v_and_b32_e32 v0, 0xff, v0 1373; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 1374; GFX8-NEXT: v_lshlrev_b32_e32 v0, 24, v0 1375; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 1376; GFX8-NEXT: s_setpc_b64 s[30:31] 1377; 1378; GFX9-LABEL: v_fshr_v4i8: 1379; GFX9: ; %bb.0: 1380; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1381; GFX9-NEXT: v_xor_b32_e32 v7, -1, v2 1382; GFX9-NEXT: v_lshlrev_b16_e32 v6, 1, v0 1383; GFX9-NEXT: v_and_b32_e32 v7, 7, v7 1384; GFX9-NEXT: v_lshlrev_b16_e32 v6, v7, v6 1385; GFX9-NEXT: v_and_b32_e32 v7, 7, v2 1386; GFX9-NEXT: v_lshrrev_b32_e32 v5, 8, v2 1387; GFX9-NEXT: v_lshrrev_b16_sdwa v7, v7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1388; GFX9-NEXT: v_lshrrev_b32_e32 v3, 8, v0 1389; GFX9-NEXT: v_or_b32_e32 v6, v6, v7 1390; GFX9-NEXT: v_xor_b32_e32 v7, -1, v5 1391; GFX9-NEXT: v_lshrrev_b32_e32 v4, 8, v1 1392; GFX9-NEXT: v_lshlrev_b16_e32 v3, 1, v3 1393; GFX9-NEXT: v_and_b32_e32 v7, 7, v7 1394; GFX9-NEXT: v_and_b32_e32 v5, 7, v5 1395; GFX9-NEXT: v_lshlrev_b16_e32 v3, v7, v3 1396; GFX9-NEXT: v_lshrrev_b16_sdwa v4, v5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1397; GFX9-NEXT: v_mov_b32_e32 v7, -1 1398; GFX9-NEXT: v_or_b32_e32 v3, v3, v4 1399; GFX9-NEXT: v_mov_b32_e32 v4, 1 1400; GFX9-NEXT: v_xor_b32_sdwa v9, v2, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1401; GFX9-NEXT: v_lshlrev_b16_sdwa v5, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 1402; GFX9-NEXT: v_and_b32_e32 v9, 7, v9 1403; GFX9-NEXT: v_mov_b32_e32 v8, 0xff 1404; GFX9-NEXT: v_lshlrev_b16_e32 v5, v9, v5 1405; GFX9-NEXT: v_mov_b32_e32 v9, 7 1406; GFX9-NEXT: v_lshlrev_b16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 1407; GFX9-NEXT: v_xor_b32_sdwa v4, v2, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD 1408; GFX9-NEXT: v_and_b32_sdwa v10, v2, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1409; GFX9-NEXT: v_and_b32_sdwa v11, v1, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1410; GFX9-NEXT: v_and_b32_e32 v4, 7, v4 1411; GFX9-NEXT: v_and_b32_sdwa v2, v2, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD 1412; GFX9-NEXT: v_lshrrev_b16_e32 v10, v10, v11 1413; GFX9-NEXT: v_lshlrev_b16_e32 v0, v4, v0 1414; GFX9-NEXT: v_lshrrev_b16_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_3 1415; GFX9-NEXT: v_or_b32_e32 v5, v5, v10 1416; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 1417; GFX9-NEXT: v_mov_b32_e32 v1, 8 1418; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v1, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1419; GFX9-NEXT: v_and_b32_e32 v2, 0xff, v5 1420; GFX9-NEXT: v_and_b32_e32 v0, 0xff, v0 1421; GFX9-NEXT: v_and_or_b32 v1, v6, v8, v1 1422; GFX9-NEXT: v_lshlrev_b32_e32 v2, 16, v2 1423; GFX9-NEXT: v_lshlrev_b32_e32 v0, 24, v0 1424; GFX9-NEXT: v_or3_b32 v0, v1, v2, v0 1425; GFX9-NEXT: s_setpc_b64 s[30:31] 1426; 1427; GFX10-LABEL: v_fshr_v4i8: 1428; GFX10: ; %bb.0: 1429; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1430; GFX10-NEXT: v_lshrrev_b32_e32 v5, 8, v2 1431; GFX10-NEXT: v_lshrrev_b32_e32 v4, 8, v0 1432; GFX10-NEXT: v_xor_b32_e32 v8, -1, v2 1433; GFX10-NEXT: v_mov_b32_e32 v3, -1 1434; GFX10-NEXT: v_lshrrev_b32_e32 v6, 16, v0 1435; GFX10-NEXT: v_xor_b32_e32 v10, -1, v5 1436; GFX10-NEXT: v_lshrrev_b32_e32 v7, 24, v0 1437; GFX10-NEXT: v_lshrrev_b32_e32 v9, 8, v1 1438; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0 1439; GFX10-NEXT: v_and_b32_e32 v8, 7, v8 1440; GFX10-NEXT: v_lshlrev_b16 v4, 1, v4 1441; GFX10-NEXT: v_and_b32_e32 v10, 7, v10 1442; GFX10-NEXT: v_mov_b32_e32 v14, 0xff 1443; GFX10-NEXT: v_lshrrev_b32_e32 v11, 24, v1 1444; GFX10-NEXT: v_lshlrev_b16 v0, v8, v0 1445; GFX10-NEXT: v_and_b32_e32 v8, 0xff, v9 1446; GFX10-NEXT: v_lshlrev_b16 v4, v10, v4 1447; GFX10-NEXT: v_xor_b32_sdwa v9, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1448; GFX10-NEXT: v_mov_b32_e32 v10, 7 1449; GFX10-NEXT: v_xor_b32_sdwa v3, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD 1450; GFX10-NEXT: v_and_b32_e32 v12, 7, v2 1451; GFX10-NEXT: v_and_b32_e32 v13, 0xff, v1 1452; GFX10-NEXT: v_and_b32_e32 v5, 7, v5 1453; GFX10-NEXT: v_lshlrev_b16 v6, 1, v6 1454; GFX10-NEXT: v_and_b32_e32 v9, 7, v9 1455; GFX10-NEXT: v_and_b32_sdwa v15, v2, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1456; GFX10-NEXT: v_and_b32_sdwa v1, v1, v14 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 1457; GFX10-NEXT: v_lshlrev_b16 v7, 1, v7 1458; GFX10-NEXT: v_and_b32_e32 v3, 7, v3 1459; GFX10-NEXT: v_and_b32_sdwa v2, v2, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 src1_sel:DWORD 1460; GFX10-NEXT: v_lshrrev_b16 v5, v5, v8 1461; GFX10-NEXT: v_lshlrev_b16 v6, v9, v6 1462; GFX10-NEXT: v_lshrrev_b16 v1, v15, v1 1463; GFX10-NEXT: v_lshlrev_b16 v3, v3, v7 1464; GFX10-NEXT: v_lshrrev_b16 v2, v2, v11 1465; GFX10-NEXT: v_lshrrev_b16 v7, v12, v13 1466; GFX10-NEXT: v_or_b32_e32 v4, v4, v5 1467; GFX10-NEXT: v_mov_b32_e32 v5, 8 1468; GFX10-NEXT: v_or_b32_e32 v1, v6, v1 1469; GFX10-NEXT: v_or_b32_e32 v2, v3, v2 1470; GFX10-NEXT: v_or_b32_e32 v0, v0, v7 1471; GFX10-NEXT: v_lshlrev_b32_sdwa v3, v5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 1472; GFX10-NEXT: v_and_b32_e32 v1, 0xff, v1 1473; GFX10-NEXT: v_and_b32_e32 v2, 0xff, v2 1474; GFX10-NEXT: v_and_or_b32 v0, 0xff, v0, v3 1475; GFX10-NEXT: v_lshlrev_b32_e32 v1, 16, v1 1476; GFX10-NEXT: v_lshlrev_b32_e32 v2, 24, v2 1477; GFX10-NEXT: v_or3_b32 v0, v0, v1, v2 1478; GFX10-NEXT: s_setpc_b64 s[30:31] 1479; 1480; GFX11-LABEL: v_fshr_v4i8: 1481; GFX11: ; %bb.0: 1482; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1483; GFX11-NEXT: v_lshrrev_b32_e32 v6, 8, v1 1484; GFX11-NEXT: v_lshrrev_b32_e32 v7, 8, v2 1485; GFX11-NEXT: v_lshrrev_b32_e32 v3, 8, v0 1486; GFX11-NEXT: v_lshrrev_b32_e32 v11, 16, v2 1487; GFX11-NEXT: v_lshrrev_b32_e32 v13, 24, v2 1488; GFX11-NEXT: v_and_b32_e32 v6, 0xff, v6 1489; GFX11-NEXT: v_xor_b32_e32 v12, -1, v7 1490; GFX11-NEXT: v_and_b32_e32 v7, 7, v7 1491; GFX11-NEXT: v_lshrrev_b32_e32 v4, 16, v0 1492; GFX11-NEXT: v_lshrrev_b32_e32 v5, 24, v0 1493; GFX11-NEXT: v_lshrrev_b32_e32 v8, 16, v1 1494; GFX11-NEXT: v_lshlrev_b16 v3, 1, v3 1495; GFX11-NEXT: v_and_b32_e32 v12, 7, v12 1496; GFX11-NEXT: v_xor_b32_e32 v14, -1, v11 1497; GFX11-NEXT: v_lshrrev_b16 v6, v7, v6 1498; GFX11-NEXT: v_xor_b32_e32 v7, -1, v13 1499; GFX11-NEXT: v_lshrrev_b32_e32 v9, 24, v1 1500; GFX11-NEXT: v_xor_b32_e32 v10, -1, v2 1501; GFX11-NEXT: v_lshlrev_b16 v3, v12, v3 1502; GFX11-NEXT: v_lshlrev_b16 v4, 1, v4 1503; GFX11-NEXT: v_and_b32_e32 v12, 7, v14 1504; GFX11-NEXT: v_and_b32_e32 v11, 7, v11 1505; GFX11-NEXT: v_and_b32_e32 v8, 0xff, v8 1506; GFX11-NEXT: v_lshlrev_b16 v5, 1, v5 1507; GFX11-NEXT: v_and_b32_e32 v7, 7, v7 1508; GFX11-NEXT: v_and_b32_e32 v13, 7, v13 1509; GFX11-NEXT: v_lshlrev_b16 v0, 1, v0 1510; GFX11-NEXT: v_and_b32_e32 v10, 7, v10 1511; GFX11-NEXT: v_and_b32_e32 v2, 7, v2 1512; GFX11-NEXT: v_and_b32_e32 v1, 0xff, v1 1513; GFX11-NEXT: v_or_b32_e32 v3, v3, v6 1514; GFX11-NEXT: v_lshlrev_b16 v4, v12, v4 1515; GFX11-NEXT: v_lshrrev_b16 v6, v11, v8 1516; GFX11-NEXT: v_lshlrev_b16 v5, v7, v5 1517; GFX11-NEXT: v_lshrrev_b16 v7, v13, v9 1518; GFX11-NEXT: v_lshlrev_b16 v0, v10, v0 1519; GFX11-NEXT: v_lshrrev_b16 v1, v2, v1 1520; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v3 1521; GFX11-NEXT: v_or_b32_e32 v3, v4, v6 1522; GFX11-NEXT: v_or_b32_e32 v4, v5, v7 1523; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 1524; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 1525; GFX11-NEXT: v_lshlrev_b32_e32 v1, 8, v2 1526; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 1527; GFX11-NEXT: v_and_b32_e32 v2, 0xff, v3 1528; GFX11-NEXT: v_and_b32_e32 v3, 0xff, v4 1529; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 1530; GFX11-NEXT: v_and_or_b32 v0, 0xff, v0, v1 1531; GFX11-NEXT: v_lshlrev_b32_e32 v1, 16, v2 1532; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1) 1533; GFX11-NEXT: v_lshlrev_b32_e32 v2, 24, v3 1534; GFX11-NEXT: v_or3_b32 v0, v0, v1, v2 1535; GFX11-NEXT: s_setpc_b64 s[30:31] 1536 %lhs = bitcast i32 %lhs.arg to <4 x i8> 1537 %rhs = bitcast i32 %rhs.arg to <4 x i8> 1538 %amt = bitcast i32 %amt.arg to <4 x i8> 1539 %result = call <4 x i8> @llvm.fshr.v4i8(<4 x i8> %lhs, <4 x i8> %rhs, <4 x i8> %amt) 1540 %cast.result = bitcast <4 x i8> %result to i32 1541 ret i32 %cast.result 1542} 1543 1544define amdgpu_ps i24 @s_fshr_i24(i24 inreg %lhs, i24 inreg %rhs, i24 inreg %amt) { 1545; GFX6-LABEL: s_fshr_i24: 1546; GFX6: ; %bb.0: 1547; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 1548; GFX6-NEXT: v_rcp_iflag_f32_e32 v0, v0 1549; GFX6-NEXT: v_not_b32_e32 v1, 23 1550; GFX6-NEXT: s_and_b32 s2, s2, 0xffffff 1551; GFX6-NEXT: s_lshl_b32 s0, s0, 1 1552; GFX6-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 1553; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0 1554; GFX6-NEXT: s_and_b32 s1, s1, 0xffffff 1555; GFX6-NEXT: v_mul_lo_u32 v2, v0, v1 1556; GFX6-NEXT: v_mul_hi_u32 v2, v0, v2 1557; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 1558; GFX6-NEXT: v_mul_hi_u32 v0, s2, v0 1559; GFX6-NEXT: v_mul_lo_u32 v0, v0, 24 1560; GFX6-NEXT: v_sub_i32_e32 v0, vcc, s2, v0 1561; GFX6-NEXT: v_add_i32_e32 v2, vcc, v0, v1 1562; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 1563; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1564; GFX6-NEXT: v_add_i32_e32 v1, vcc, v0, v1 1565; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 1566; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1567; GFX6-NEXT: v_sub_i32_e32 v1, vcc, 23, v0 1568; GFX6-NEXT: v_and_b32_e32 v0, 0xffffff, v0 1569; GFX6-NEXT: v_and_b32_e32 v1, 0xffffff, v1 1570; GFX6-NEXT: v_lshl_b32_e32 v1, s0, v1 1571; GFX6-NEXT: v_lshr_b32_e32 v0, s1, v0 1572; GFX6-NEXT: v_or_b32_e32 v0, v1, v0 1573; GFX6-NEXT: v_readfirstlane_b32 s0, v0 1574; GFX6-NEXT: ; return to shader part epilog 1575; 1576; GFX8-LABEL: s_fshr_i24: 1577; GFX8: ; %bb.0: 1578; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 1579; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 1580; GFX8-NEXT: v_not_b32_e32 v1, 23 1581; GFX8-NEXT: s_and_b32 s2, s2, 0xffffff 1582; GFX8-NEXT: s_lshl_b32 s0, s0, 1 1583; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 1584; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 1585; GFX8-NEXT: s_and_b32 s1, s1, 0xffffff 1586; GFX8-NEXT: v_mul_lo_u32 v2, v0, v1 1587; GFX8-NEXT: v_mul_hi_u32 v2, v0, v2 1588; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 1589; GFX8-NEXT: v_mul_hi_u32 v0, s2, v0 1590; GFX8-NEXT: v_mul_lo_u32 v0, v0, 24 1591; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s2, v0 1592; GFX8-NEXT: v_add_u32_e32 v2, vcc, v0, v1 1593; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 1594; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 1595; GFX8-NEXT: v_add_u32_e32 v1, vcc, v0, v1 1596; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 1597; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1598; GFX8-NEXT: v_sub_u32_e32 v1, vcc, 23, v0 1599; GFX8-NEXT: v_and_b32_e32 v0, 0xffffff, v0 1600; GFX8-NEXT: v_and_b32_e32 v1, 0xffffff, v1 1601; GFX8-NEXT: v_lshlrev_b32_e64 v1, v1, s0 1602; GFX8-NEXT: v_lshrrev_b32_e64 v0, v0, s1 1603; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 1604; GFX8-NEXT: v_readfirstlane_b32 s0, v0 1605; GFX8-NEXT: ; return to shader part epilog 1606; 1607; GFX9-LABEL: s_fshr_i24: 1608; GFX9: ; %bb.0: 1609; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 1610; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 1611; GFX9-NEXT: v_not_b32_e32 v1, 23 1612; GFX9-NEXT: s_and_b32 s2, s2, 0xffffff 1613; GFX9-NEXT: s_and_b32 s1, s1, 0xffffff 1614; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 1615; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 1616; GFX9-NEXT: s_lshl_b32 s0, s0, 1 1617; GFX9-NEXT: v_mul_lo_u32 v1, v0, v1 1618; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 1619; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 1620; GFX9-NEXT: v_mul_hi_u32 v0, s2, v0 1621; GFX9-NEXT: v_mul_lo_u32 v0, v0, 24 1622; GFX9-NEXT: v_sub_u32_e32 v0, s2, v0 1623; GFX9-NEXT: v_add_u32_e32 v1, 0xffffffe8, v0 1624; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 1625; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1626; GFX9-NEXT: v_add_u32_e32 v1, 0xffffffe8, v0 1627; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 1628; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 1629; GFX9-NEXT: v_sub_u32_e32 v1, 23, v0 1630; GFX9-NEXT: v_and_b32_e32 v0, 0xffffff, v0 1631; GFX9-NEXT: v_and_b32_e32 v1, 0xffffff, v1 1632; GFX9-NEXT: v_lshrrev_b32_e64 v0, v0, s1 1633; GFX9-NEXT: v_lshl_or_b32 v0, s0, v1, v0 1634; GFX9-NEXT: v_readfirstlane_b32 s0, v0 1635; GFX9-NEXT: ; return to shader part epilog 1636; 1637; GFX10-LABEL: s_fshr_i24: 1638; GFX10: ; %bb.0: 1639; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 1640; GFX10-NEXT: s_and_b32 s2, s2, 0xffffff 1641; GFX10-NEXT: s_and_b32 s1, s1, 0xffffff 1642; GFX10-NEXT: s_lshl_b32 s0, s0, 1 1643; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 1644; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 1645; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 1646; GFX10-NEXT: v_mul_lo_u32 v1, 0xffffffe8, v0 1647; GFX10-NEXT: v_mul_hi_u32 v1, v0, v1 1648; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1 1649; GFX10-NEXT: v_mul_hi_u32 v0, s2, v0 1650; GFX10-NEXT: v_mul_lo_u32 v0, v0, 24 1651; GFX10-NEXT: v_sub_nc_u32_e32 v0, s2, v0 1652; GFX10-NEXT: v_add_nc_u32_e32 v1, 0xffffffe8, v0 1653; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 1654; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 1655; GFX10-NEXT: v_add_nc_u32_e32 v1, 0xffffffe8, v0 1656; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 1657; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 1658; GFX10-NEXT: v_sub_nc_u32_e32 v1, 23, v0 1659; GFX10-NEXT: v_and_b32_e32 v0, 0xffffff, v0 1660; GFX10-NEXT: v_and_b32_e32 v1, 0xffffff, v1 1661; GFX10-NEXT: v_lshrrev_b32_e64 v0, v0, s1 1662; GFX10-NEXT: v_lshl_or_b32 v0, s0, v1, v0 1663; GFX10-NEXT: v_readfirstlane_b32 s0, v0 1664; GFX10-NEXT: ; return to shader part epilog 1665; 1666; GFX11-LABEL: s_fshr_i24: 1667; GFX11: ; %bb.0: 1668; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 1669; GFX11-NEXT: s_and_b32 s2, s2, 0xffffff 1670; GFX11-NEXT: s_and_b32 s1, s1, 0xffffff 1671; GFX11-NEXT: s_lshl_b32 s0, s0, 1 1672; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1) 1673; GFX11-NEXT: v_rcp_iflag_f32_e32 v0, v0 1674; GFX11-NEXT: s_waitcnt_depctr 0xfff 1675; GFX11-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 1676; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0 1677; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1678; GFX11-NEXT: v_mul_lo_u32 v1, 0xffffffe8, v0 1679; GFX11-NEXT: v_mul_hi_u32 v1, v0, v1 1680; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1681; GFX11-NEXT: v_add_nc_u32_e32 v0, v0, v1 1682; GFX11-NEXT: v_mul_hi_u32 v0, s2, v0 1683; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1684; GFX11-NEXT: v_mul_lo_u32 v0, v0, 24 1685; GFX11-NEXT: v_sub_nc_u32_e32 v0, s2, v0 1686; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 1687; GFX11-NEXT: v_add_nc_u32_e32 v1, 0xffffffe8, v0 1688; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 1689; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 1690; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 1691; GFX11-NEXT: v_add_nc_u32_e32 v1, 0xffffffe8, v0 1692; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 1693; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo 1694; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 1695; GFX11-NEXT: v_sub_nc_u32_e32 v1, 23, v0 1696; GFX11-NEXT: v_and_b32_e32 v0, 0xffffff, v0 1697; GFX11-NEXT: v_and_b32_e32 v1, 0xffffff, v1 1698; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 1699; GFX11-NEXT: v_lshrrev_b32_e64 v0, v0, s1 1700; GFX11-NEXT: v_lshl_or_b32 v0, s0, v1, v0 1701; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 1702; GFX11-NEXT: v_readfirstlane_b32 s0, v0 1703; GFX11-NEXT: ; return to shader part epilog 1704 %result = call i24 @llvm.fshr.i24(i24 %lhs, i24 %rhs, i24 %amt) 1705 ret i24 %result 1706} 1707 1708define i24 @v_fshr_i24(i24 %lhs, i24 %rhs, i24 %amt) { 1709; GFX6-LABEL: v_fshr_i24: 1710; GFX6: ; %bb.0: 1711; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1712; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v3, 24 1713; GFX6-NEXT: v_rcp_iflag_f32_e32 v3, v3 1714; GFX6-NEXT: v_not_b32_e32 v4, 23 1715; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2 1716; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 1717; GFX6-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 1718; GFX6-NEXT: v_cvt_u32_f32_e32 v3, v3 1719; GFX6-NEXT: v_and_b32_e32 v1, 0xffffff, v1 1720; GFX6-NEXT: v_mul_lo_u32 v5, v3, v4 1721; GFX6-NEXT: v_mul_hi_u32 v5, v3, v5 1722; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v5 1723; GFX6-NEXT: v_mul_hi_u32 v3, v2, v3 1724; GFX6-NEXT: v_mul_lo_u32 v3, v3, 24 1725; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v3 1726; GFX6-NEXT: v_add_i32_e32 v3, vcc, v2, v4 1727; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 1728; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1729; GFX6-NEXT: v_add_i32_e32 v3, vcc, 0xffffffe8, v2 1730; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 1731; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1732; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 23, v2 1733; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2 1734; GFX6-NEXT: v_and_b32_e32 v3, 0xffffff, v3 1735; GFX6-NEXT: v_lshlrev_b32_e32 v0, v3, v0 1736; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1 1737; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 1738; GFX6-NEXT: s_setpc_b64 s[30:31] 1739; 1740; GFX8-LABEL: v_fshr_i24: 1741; GFX8: ; %bb.0: 1742; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1743; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v3, 24 1744; GFX8-NEXT: v_rcp_iflag_f32_e32 v3, v3 1745; GFX8-NEXT: v_not_b32_e32 v4, 23 1746; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2 1747; GFX8-NEXT: v_lshlrev_b32_e32 v0, 1, v0 1748; GFX8-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 1749; GFX8-NEXT: v_cvt_u32_f32_e32 v3, v3 1750; GFX8-NEXT: v_and_b32_e32 v1, 0xffffff, v1 1751; GFX8-NEXT: v_mul_lo_u32 v5, v3, v4 1752; GFX8-NEXT: v_mul_hi_u32 v5, v3, v5 1753; GFX8-NEXT: v_add_u32_e32 v3, vcc, v3, v5 1754; GFX8-NEXT: v_mul_hi_u32 v3, v2, v3 1755; GFX8-NEXT: v_mul_lo_u32 v3, v3, 24 1756; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v2, v3 1757; GFX8-NEXT: v_add_u32_e32 v3, vcc, v2, v4 1758; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 1759; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1760; GFX8-NEXT: v_add_u32_e32 v3, vcc, 0xffffffe8, v2 1761; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 1762; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1763; GFX8-NEXT: v_sub_u32_e32 v3, vcc, 23, v2 1764; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2 1765; GFX8-NEXT: v_and_b32_e32 v3, 0xffffff, v3 1766; GFX8-NEXT: v_lshlrev_b32_e32 v0, v3, v0 1767; GFX8-NEXT: v_lshrrev_b32_e32 v1, v2, v1 1768; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 1769; GFX8-NEXT: s_setpc_b64 s[30:31] 1770; 1771; GFX9-LABEL: v_fshr_i24: 1772; GFX9: ; %bb.0: 1773; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1774; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v3, 24 1775; GFX9-NEXT: v_rcp_iflag_f32_e32 v3, v3 1776; GFX9-NEXT: v_not_b32_e32 v4, 23 1777; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2 1778; GFX9-NEXT: v_and_b32_e32 v1, 0xffffff, v1 1779; GFX9-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 1780; GFX9-NEXT: v_cvt_u32_f32_e32 v3, v3 1781; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0 1782; GFX9-NEXT: v_mul_lo_u32 v4, v3, v4 1783; GFX9-NEXT: v_mul_hi_u32 v4, v3, v4 1784; GFX9-NEXT: v_add_u32_e32 v3, v3, v4 1785; GFX9-NEXT: v_mul_hi_u32 v3, v2, v3 1786; GFX9-NEXT: v_mul_lo_u32 v3, v3, 24 1787; GFX9-NEXT: v_sub_u32_e32 v2, v2, v3 1788; GFX9-NEXT: v_add_u32_e32 v3, 0xffffffe8, v2 1789; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 1790; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1791; GFX9-NEXT: v_add_u32_e32 v3, 0xffffffe8, v2 1792; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 1793; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1794; GFX9-NEXT: v_sub_u32_e32 v3, 23, v2 1795; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2 1796; GFX9-NEXT: v_and_b32_e32 v3, 0xffffff, v3 1797; GFX9-NEXT: v_lshrrev_b32_e32 v1, v2, v1 1798; GFX9-NEXT: v_lshl_or_b32 v0, v0, v3, v1 1799; GFX9-NEXT: s_setpc_b64 s[30:31] 1800; 1801; GFX10-LABEL: v_fshr_i24: 1802; GFX10: ; %bb.0: 1803; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1804; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v3, 24 1805; GFX10-NEXT: v_and_b32_e32 v2, 0xffffff, v2 1806; GFX10-NEXT: v_and_b32_e32 v1, 0xffffff, v1 1807; GFX10-NEXT: v_lshlrev_b32_e32 v0, 1, v0 1808; GFX10-NEXT: v_rcp_iflag_f32_e32 v3, v3 1809; GFX10-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 1810; GFX10-NEXT: v_cvt_u32_f32_e32 v3, v3 1811; GFX10-NEXT: v_mul_lo_u32 v4, 0xffffffe8, v3 1812; GFX10-NEXT: v_mul_hi_u32 v4, v3, v4 1813; GFX10-NEXT: v_add_nc_u32_e32 v3, v3, v4 1814; GFX10-NEXT: v_mul_hi_u32 v3, v2, v3 1815; GFX10-NEXT: v_mul_lo_u32 v3, v3, 24 1816; GFX10-NEXT: v_sub_nc_u32_e32 v2, v2, v3 1817; GFX10-NEXT: v_add_nc_u32_e32 v3, 0xffffffe8, v2 1818; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v2 1819; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo 1820; GFX10-NEXT: v_add_nc_u32_e32 v3, 0xffffffe8, v2 1821; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v2 1822; GFX10-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo 1823; GFX10-NEXT: v_sub_nc_u32_e32 v3, 23, v2 1824; GFX10-NEXT: v_and_b32_e32 v2, 0xffffff, v2 1825; GFX10-NEXT: v_and_b32_e32 v3, 0xffffff, v3 1826; GFX10-NEXT: v_lshrrev_b32_e32 v1, v2, v1 1827; GFX10-NEXT: v_lshl_or_b32 v0, v0, v3, v1 1828; GFX10-NEXT: s_setpc_b64 s[30:31] 1829; 1830; GFX11-LABEL: v_fshr_i24: 1831; GFX11: ; %bb.0: 1832; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1833; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v3, 24 1834; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v2 1835; GFX11-NEXT: v_lshlrev_b32_e32 v0, 1, v0 1836; GFX11-NEXT: v_and_b32_e32 v1, 0xffffff, v1 1837; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_2) | instid1(VALU_DEP_1) 1838; GFX11-NEXT: v_rcp_iflag_f32_e32 v3, v3 1839; GFX11-NEXT: s_waitcnt_depctr 0xfff 1840; GFX11-NEXT: v_mul_f32_e32 v3, 0x4f7ffffe, v3 1841; GFX11-NEXT: v_cvt_u32_f32_e32 v3, v3 1842; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1843; GFX11-NEXT: v_mul_lo_u32 v4, 0xffffffe8, v3 1844; GFX11-NEXT: v_mul_hi_u32 v4, v3, v4 1845; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1846; GFX11-NEXT: v_add_nc_u32_e32 v3, v3, v4 1847; GFX11-NEXT: v_mul_hi_u32 v3, v2, v3 1848; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 1849; GFX11-NEXT: v_mul_lo_u32 v3, v3, 24 1850; GFX11-NEXT: v_sub_nc_u32_e32 v2, v2, v3 1851; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 1852; GFX11-NEXT: v_add_nc_u32_e32 v3, 0xffffffe8, v2 1853; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v2 1854; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo 1855; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 1856; GFX11-NEXT: v_add_nc_u32_e32 v3, 0xffffffe8, v2 1857; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v2 1858; GFX11-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc_lo 1859; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 1860; GFX11-NEXT: v_sub_nc_u32_e32 v3, 23, v2 1861; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v2 1862; GFX11-NEXT: v_and_b32_e32 v3, 0xffffff, v3 1863; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 1864; GFX11-NEXT: v_lshrrev_b32_e32 v1, v2, v1 1865; GFX11-NEXT: v_lshl_or_b32 v0, v0, v3, v1 1866; GFX11-NEXT: s_setpc_b64 s[30:31] 1867 %result = call i24 @llvm.fshr.i24(i24 %lhs, i24 %rhs, i24 %amt) 1868 ret i24 %result 1869} 1870 1871define amdgpu_ps i48 @s_fshr_v2i24(i48 inreg %lhs.arg, i48 inreg %rhs.arg, i48 inreg %amt.arg) { 1872; GFX6-LABEL: s_fshr_v2i24: 1873; GFX6: ; %bb.0: 1874; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v2, 24 1875; GFX6-NEXT: v_rcp_iflag_f32_e32 v2, v2 1876; GFX6-NEXT: s_lshr_b32 s7, s1, 8 1877; GFX6-NEXT: s_bfe_u32 s9, s0, 0x80008 1878; GFX6-NEXT: s_and_b32 s1, s1, 0xff 1879; GFX6-NEXT: v_mul_f32_e32 v2, 0x4f7ffffe, v2 1880; GFX6-NEXT: v_cvt_u32_f32_e32 v2, v2 1881; GFX6-NEXT: v_mov_b32_e32 v0, s0 1882; GFX6-NEXT: v_not_b32_e32 v3, 23 1883; GFX6-NEXT: s_lshr_b32 s6, s0, 16 1884; GFX6-NEXT: s_and_b32 s8, s0, 0xff 1885; GFX6-NEXT: s_lshl_b32 s9, s9, 8 1886; GFX6-NEXT: v_alignbit_b32 v0, s1, v0, 24 1887; GFX6-NEXT: s_and_b32 s0, s7, 0xff 1888; GFX6-NEXT: s_lshr_b32 s1, s2, 16 1889; GFX6-NEXT: s_lshr_b32 s7, s3, 8 1890; GFX6-NEXT: s_bfe_u32 s10, s2, 0x80008 1891; GFX6-NEXT: v_mul_lo_u32 v4, v2, v3 1892; GFX6-NEXT: s_or_b32 s8, s8, s9 1893; GFX6-NEXT: s_and_b32 s9, s2, 0xff 1894; GFX6-NEXT: s_lshl_b32 s10, s10, 8 1895; GFX6-NEXT: s_and_b32 s1, s1, 0xff 1896; GFX6-NEXT: s_and_b32 s3, s3, 0xff 1897; GFX6-NEXT: v_mov_b32_e32 v1, s2 1898; GFX6-NEXT: s_and_b32 s2, s7, 0xff 1899; GFX6-NEXT: s_or_b32 s9, s9, s10 1900; GFX6-NEXT: s_and_b32 s1, 0xffff, s1 1901; GFX6-NEXT: v_alignbit_b32 v1, s3, v1, 24 1902; GFX6-NEXT: s_and_b32 s2, 0xffff, s2 1903; GFX6-NEXT: s_and_b32 s9, 0xffff, s9 1904; GFX6-NEXT: s_lshl_b32 s1, s1, 16 1905; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 1906; GFX6-NEXT: s_lshl_b32 s2, s2, 16 1907; GFX6-NEXT: s_or_b32 s1, s9, s1 1908; GFX6-NEXT: v_or_b32_e32 v1, s2, v1 1909; GFX6-NEXT: s_lshr_b32 s2, s4, 16 1910; GFX6-NEXT: s_bfe_u32 s9, s4, 0x80008 1911; GFX6-NEXT: v_mul_hi_u32 v4, v2, v4 1912; GFX6-NEXT: s_and_b32 s7, s4, 0xff 1913; GFX6-NEXT: s_lshl_b32 s9, s9, 8 1914; GFX6-NEXT: s_and_b32 s2, s2, 0xff 1915; GFX6-NEXT: s_or_b32 s7, s7, s9 1916; GFX6-NEXT: s_and_b32 s2, 0xffff, s2 1917; GFX6-NEXT: s_and_b32 s7, 0xffff, s7 1918; GFX6-NEXT: s_lshl_b32 s2, s2, 16 1919; GFX6-NEXT: s_or_b32 s2, s7, s2 1920; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v4 1921; GFX6-NEXT: v_mul_hi_u32 v4, s2, v2 1922; GFX6-NEXT: s_lshr_b32 s3, s5, 8 1923; GFX6-NEXT: s_and_b32 s5, s5, 0xff 1924; GFX6-NEXT: v_mov_b32_e32 v5, s4 1925; GFX6-NEXT: s_and_b32 s3, s3, 0xff 1926; GFX6-NEXT: v_alignbit_b32 v5, s5, v5, 24 1927; GFX6-NEXT: s_and_b32 s3, 0xffff, s3 1928; GFX6-NEXT: v_and_b32_e32 v5, 0xffff, v5 1929; GFX6-NEXT: v_mul_lo_u32 v4, v4, 24 1930; GFX6-NEXT: s_lshl_b32 s3, s3, 16 1931; GFX6-NEXT: v_or_b32_e32 v5, s3, v5 1932; GFX6-NEXT: v_mul_hi_u32 v2, v5, v2 1933; GFX6-NEXT: v_sub_i32_e32 v4, vcc, s2, v4 1934; GFX6-NEXT: v_add_i32_e32 v6, vcc, v4, v3 1935; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 1936; GFX6-NEXT: v_mul_lo_u32 v2, v2, 24 1937; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc 1938; GFX6-NEXT: v_add_i32_e32 v6, vcc, v4, v3 1939; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 1940; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc 1941; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v5, v2 1942; GFX6-NEXT: v_sub_i32_e32 v6, vcc, 23, v4 1943; GFX6-NEXT: v_add_i32_e32 v5, vcc, v2, v3 1944; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 1945; GFX6-NEXT: s_and_b32 s6, s6, 0xff 1946; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v5, vcc 1947; GFX6-NEXT: s_and_b32 s8, 0xffff, s8 1948; GFX6-NEXT: s_and_b32 s6, 0xffff, s6 1949; GFX6-NEXT: v_add_i32_e32 v3, vcc, v2, v3 1950; GFX6-NEXT: s_lshl_b32 s2, s6, 17 1951; GFX6-NEXT: s_lshl_b32 s3, s8, 1 1952; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 1953; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 1954; GFX6-NEXT: s_and_b32 s0, 0xffff, s0 1955; GFX6-NEXT: s_or_b32 s2, s2, s3 1956; GFX6-NEXT: v_and_b32_e32 v6, 0xffffff, v6 1957; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v4 1958; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 1959; GFX6-NEXT: v_lshl_b32_e32 v6, s2, v6 1960; GFX6-NEXT: v_lshr_b32_e32 v4, s1, v4 1961; GFX6-NEXT: v_sub_i32_e32 v3, vcc, 23, v2 1962; GFX6-NEXT: s_lshl_b32 s0, s0, 17 1963; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 1964; GFX6-NEXT: v_or_b32_e32 v4, v6, v4 1965; GFX6-NEXT: v_or_b32_e32 v0, s0, v0 1966; GFX6-NEXT: v_and_b32_e32 v3, 0xffffff, v3 1967; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2 1968; GFX6-NEXT: v_lshlrev_b32_e32 v0, v3, v0 1969; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1 1970; GFX6-NEXT: v_bfe_u32 v2, v4, 8, 8 1971; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 1972; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v4 1973; GFX6-NEXT: v_lshlrev_b32_e32 v2, 8, v2 1974; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 1975; GFX6-NEXT: v_bfe_u32 v2, v4, 16, 8 1976; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2 1977; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 1978; GFX6-NEXT: v_and_b32_e32 v2, 0xff, v0 1979; GFX6-NEXT: v_lshlrev_b32_e32 v2, 24, v2 1980; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 1981; GFX6-NEXT: v_bfe_u32 v2, v0, 8, 8 1982; GFX6-NEXT: v_bfe_u32 v0, v0, 16, 8 1983; GFX6-NEXT: v_lshlrev_b32_e32 v0, 8, v0 1984; GFX6-NEXT: v_or_b32_e32 v0, v2, v0 1985; GFX6-NEXT: v_readfirstlane_b32 s0, v1 1986; GFX6-NEXT: v_readfirstlane_b32 s1, v0 1987; GFX6-NEXT: ; return to shader part epilog 1988; 1989; GFX8-LABEL: s_fshr_v2i24: 1990; GFX8: ; %bb.0: 1991; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 1992; GFX8-NEXT: s_lshr_b32 s9, s1, 8 1993; GFX8-NEXT: s_and_b32 s1, s1, 0xff 1994; GFX8-NEXT: v_rcp_iflag_f32_e32 v0, v0 1995; GFX8-NEXT: s_lshr_b32 s6, s0, 8 1996; GFX8-NEXT: s_lshr_b32 s8, s0, 24 1997; GFX8-NEXT: s_lshl_b32 s1, s1, 8 1998; GFX8-NEXT: s_and_b32 s6, s6, 0xff 1999; GFX8-NEXT: s_or_b32 s1, s8, s1 2000; GFX8-NEXT: s_lshr_b32 s8, s2, 8 2001; GFX8-NEXT: s_lshr_b32 s7, s0, 16 2002; GFX8-NEXT: s_and_b32 s0, s0, 0xff 2003; GFX8-NEXT: s_lshl_b32 s6, s6, 8 2004; GFX8-NEXT: s_and_b32 s8, s8, 0xff 2005; GFX8-NEXT: s_or_b32 s0, s0, s6 2006; GFX8-NEXT: s_and_b32 s6, s7, 0xff 2007; GFX8-NEXT: s_and_b32 s7, s9, 0xff 2008; GFX8-NEXT: s_lshr_b32 s9, s2, 16 2009; GFX8-NEXT: s_lshr_b32 s10, s2, 24 2010; GFX8-NEXT: s_and_b32 s2, s2, 0xff 2011; GFX8-NEXT: s_lshl_b32 s8, s8, 8 2012; GFX8-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 2013; GFX8-NEXT: s_or_b32 s2, s2, s8 2014; GFX8-NEXT: s_and_b32 s8, s9, 0xff 2015; GFX8-NEXT: v_cvt_u32_f32_e32 v0, v0 2016; GFX8-NEXT: s_and_b32 s8, 0xffff, s8 2017; GFX8-NEXT: s_lshr_b32 s11, s3, 8 2018; GFX8-NEXT: s_and_b32 s2, 0xffff, s2 2019; GFX8-NEXT: s_lshl_b32 s8, s8, 16 2020; GFX8-NEXT: s_and_b32 s3, s3, 0xff 2021; GFX8-NEXT: s_or_b32 s2, s2, s8 2022; GFX8-NEXT: s_lshl_b32 s3, s3, 8 2023; GFX8-NEXT: s_and_b32 s8, s11, 0xff 2024; GFX8-NEXT: v_not_b32_e32 v1, 23 2025; GFX8-NEXT: s_or_b32 s3, s10, s3 2026; GFX8-NEXT: s_and_b32 s8, 0xffff, s8 2027; GFX8-NEXT: v_mul_lo_u32 v2, v0, v1 2028; GFX8-NEXT: s_and_b32 s3, 0xffff, s3 2029; GFX8-NEXT: s_lshl_b32 s8, s8, 16 2030; GFX8-NEXT: s_or_b32 s3, s3, s8 2031; GFX8-NEXT: s_lshr_b32 s8, s4, 8 2032; GFX8-NEXT: s_and_b32 s8, s8, 0xff 2033; GFX8-NEXT: s_lshr_b32 s9, s4, 16 2034; GFX8-NEXT: s_lshr_b32 s10, s4, 24 2035; GFX8-NEXT: s_and_b32 s4, s4, 0xff 2036; GFX8-NEXT: s_lshl_b32 s8, s8, 8 2037; GFX8-NEXT: v_mul_hi_u32 v2, v0, v2 2038; GFX8-NEXT: s_or_b32 s4, s4, s8 2039; GFX8-NEXT: s_and_b32 s8, s9, 0xff 2040; GFX8-NEXT: s_and_b32 s8, 0xffff, s8 2041; GFX8-NEXT: s_and_b32 s4, 0xffff, s4 2042; GFX8-NEXT: s_lshl_b32 s8, s8, 16 2043; GFX8-NEXT: s_or_b32 s4, s4, s8 2044; GFX8-NEXT: v_add_u32_e32 v0, vcc, v0, v2 2045; GFX8-NEXT: v_mul_hi_u32 v2, s4, v0 2046; GFX8-NEXT: s_lshr_b32 s11, s5, 8 2047; GFX8-NEXT: s_and_b32 s5, s5, 0xff 2048; GFX8-NEXT: s_lshl_b32 s5, s5, 8 2049; GFX8-NEXT: v_mul_lo_u32 v2, v2, 24 2050; GFX8-NEXT: s_and_b32 s8, s11, 0xff 2051; GFX8-NEXT: s_or_b32 s5, s10, s5 2052; GFX8-NEXT: s_and_b32 s8, 0xffff, s8 2053; GFX8-NEXT: s_and_b32 s5, 0xffff, s5 2054; GFX8-NEXT: s_lshl_b32 s8, s8, 16 2055; GFX8-NEXT: s_or_b32 s5, s5, s8 2056; GFX8-NEXT: v_sub_u32_e32 v2, vcc, s4, v2 2057; GFX8-NEXT: v_add_u32_e32 v3, vcc, v2, v1 2058; GFX8-NEXT: v_mul_hi_u32 v0, s5, v0 2059; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 2060; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 2061; GFX8-NEXT: v_add_u32_e32 v3, vcc, v2, v1 2062; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 2063; GFX8-NEXT: v_mul_lo_u32 v0, v0, 24 2064; GFX8-NEXT: s_and_b32 s0, 0xffff, s0 2065; GFX8-NEXT: s_and_b32 s6, 0xffff, s6 2066; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v3, vcc 2067; GFX8-NEXT: v_sub_u32_e32 v3, vcc, 23, v2 2068; GFX8-NEXT: s_lshl_b32 s4, s6, 17 2069; GFX8-NEXT: s_lshl_b32 s0, s0, 1 2070; GFX8-NEXT: s_or_b32 s0, s4, s0 2071; GFX8-NEXT: v_and_b32_e32 v3, 0xffffff, v3 2072; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2 2073; GFX8-NEXT: v_lshlrev_b32_e64 v3, v3, s0 2074; GFX8-NEXT: v_lshrrev_b32_e64 v2, v2, s2 2075; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s5, v0 2076; GFX8-NEXT: v_or_b32_e32 v2, v3, v2 2077; GFX8-NEXT: v_add_u32_e32 v3, vcc, v0, v1 2078; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 2079; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v3, vcc 2080; GFX8-NEXT: v_add_u32_e32 v1, vcc, v0, v1 2081; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 2082; GFX8-NEXT: s_and_b32 s1, 0xffff, s1 2083; GFX8-NEXT: s_and_b32 s7, 0xffff, s7 2084; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc 2085; GFX8-NEXT: v_sub_u32_e32 v1, vcc, 23, v0 2086; GFX8-NEXT: s_lshl_b32 s0, s7, 17 2087; GFX8-NEXT: s_lshl_b32 s1, s1, 1 2088; GFX8-NEXT: s_or_b32 s0, s0, s1 2089; GFX8-NEXT: v_and_b32_e32 v1, 0xffffff, v1 2090; GFX8-NEXT: v_and_b32_e32 v0, 0xffffff, v0 2091; GFX8-NEXT: v_lshlrev_b32_e64 v1, v1, s0 2092; GFX8-NEXT: v_lshrrev_b32_e64 v0, v0, s3 2093; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 2094; GFX8-NEXT: v_mov_b32_e32 v1, 8 2095; GFX8-NEXT: v_lshlrev_b32_sdwa v3, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2096; GFX8-NEXT: v_mov_b32_e32 v4, 16 2097; GFX8-NEXT: v_or_b32_sdwa v3, v2, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 2098; GFX8-NEXT: v_lshlrev_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2099; GFX8-NEXT: v_or_b32_e32 v2, v3, v2 2100; GFX8-NEXT: v_and_b32_e32 v3, 0xff, v0 2101; GFX8-NEXT: v_lshlrev_b32_e32 v3, 24, v3 2102; GFX8-NEXT: v_lshlrev_b32_sdwa v1, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2103; GFX8-NEXT: v_or_b32_e32 v2, v2, v3 2104; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD 2105; GFX8-NEXT: v_readfirstlane_b32 s0, v2 2106; GFX8-NEXT: v_readfirstlane_b32 s1, v0 2107; GFX8-NEXT: ; return to shader part epilog 2108; 2109; GFX9-LABEL: s_fshr_v2i24: 2110; GFX9: ; %bb.0: 2111; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 2112; GFX9-NEXT: s_lshr_b32 s9, s1, 8 2113; GFX9-NEXT: s_and_b32 s1, s1, 0xff 2114; GFX9-NEXT: v_rcp_iflag_f32_e32 v0, v0 2115; GFX9-NEXT: s_lshr_b32 s6, s0, 8 2116; GFX9-NEXT: s_lshr_b32 s8, s0, 24 2117; GFX9-NEXT: s_lshl_b32 s1, s1, 8 2118; GFX9-NEXT: s_and_b32 s6, s6, 0xff 2119; GFX9-NEXT: s_or_b32 s1, s8, s1 2120; GFX9-NEXT: s_lshr_b32 s8, s2, 8 2121; GFX9-NEXT: s_lshr_b32 s7, s0, 16 2122; GFX9-NEXT: s_and_b32 s0, s0, 0xff 2123; GFX9-NEXT: s_lshl_b32 s6, s6, 8 2124; GFX9-NEXT: s_and_b32 s8, s8, 0xff 2125; GFX9-NEXT: s_or_b32 s0, s0, s6 2126; GFX9-NEXT: s_and_b32 s6, s7, 0xff 2127; GFX9-NEXT: s_and_b32 s7, s9, 0xff 2128; GFX9-NEXT: s_lshr_b32 s9, s2, 16 2129; GFX9-NEXT: s_lshr_b32 s10, s2, 24 2130; GFX9-NEXT: s_and_b32 s2, s2, 0xff 2131; GFX9-NEXT: s_lshl_b32 s8, s8, 8 2132; GFX9-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 2133; GFX9-NEXT: s_or_b32 s2, s2, s8 2134; GFX9-NEXT: s_and_b32 s8, s9, 0xff 2135; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0 2136; GFX9-NEXT: s_and_b32 s8, 0xffff, s8 2137; GFX9-NEXT: s_lshr_b32 s11, s3, 8 2138; GFX9-NEXT: s_and_b32 s2, 0xffff, s2 2139; GFX9-NEXT: s_lshl_b32 s8, s8, 16 2140; GFX9-NEXT: s_and_b32 s3, s3, 0xff 2141; GFX9-NEXT: s_or_b32 s2, s2, s8 2142; GFX9-NEXT: s_lshl_b32 s3, s3, 8 2143; GFX9-NEXT: s_and_b32 s8, s11, 0xff 2144; GFX9-NEXT: v_not_b32_e32 v1, 23 2145; GFX9-NEXT: s_or_b32 s3, s10, s3 2146; GFX9-NEXT: s_and_b32 s8, 0xffff, s8 2147; GFX9-NEXT: v_mul_lo_u32 v1, v0, v1 2148; GFX9-NEXT: s_and_b32 s3, 0xffff, s3 2149; GFX9-NEXT: s_lshl_b32 s8, s8, 16 2150; GFX9-NEXT: s_or_b32 s3, s3, s8 2151; GFX9-NEXT: s_lshr_b32 s8, s4, 8 2152; GFX9-NEXT: s_and_b32 s8, s8, 0xff 2153; GFX9-NEXT: s_lshr_b32 s9, s4, 16 2154; GFX9-NEXT: s_lshr_b32 s10, s4, 24 2155; GFX9-NEXT: s_and_b32 s4, s4, 0xff 2156; GFX9-NEXT: s_lshl_b32 s8, s8, 8 2157; GFX9-NEXT: v_mul_hi_u32 v1, v0, v1 2158; GFX9-NEXT: s_or_b32 s4, s4, s8 2159; GFX9-NEXT: s_and_b32 s8, s9, 0xff 2160; GFX9-NEXT: s_and_b32 s8, 0xffff, s8 2161; GFX9-NEXT: s_and_b32 s4, 0xffff, s4 2162; GFX9-NEXT: s_lshl_b32 s8, s8, 16 2163; GFX9-NEXT: s_or_b32 s4, s4, s8 2164; GFX9-NEXT: v_add_u32_e32 v0, v0, v1 2165; GFX9-NEXT: v_mul_hi_u32 v1, s4, v0 2166; GFX9-NEXT: s_lshr_b32 s11, s5, 8 2167; GFX9-NEXT: s_and_b32 s5, s5, 0xff 2168; GFX9-NEXT: s_lshl_b32 s5, s5, 8 2169; GFX9-NEXT: s_and_b32 s8, s11, 0xff 2170; GFX9-NEXT: s_or_b32 s5, s10, s5 2171; GFX9-NEXT: s_and_b32 s8, 0xffff, s8 2172; GFX9-NEXT: s_and_b32 s5, 0xffff, s5 2173; GFX9-NEXT: v_mul_lo_u32 v1, v1, 24 2174; GFX9-NEXT: s_lshl_b32 s8, s8, 16 2175; GFX9-NEXT: s_or_b32 s5, s5, s8 2176; GFX9-NEXT: v_mul_hi_u32 v0, s5, v0 2177; GFX9-NEXT: v_sub_u32_e32 v1, s4, v1 2178; GFX9-NEXT: v_add_u32_e32 v2, 0xffffffe8, v1 2179; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 2180; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 2181; GFX9-NEXT: v_mul_lo_u32 v0, v0, 24 2182; GFX9-NEXT: v_add_u32_e32 v2, 0xffffffe8, v1 2183; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v1 2184; GFX9-NEXT: s_and_b32 s0, 0xffff, s0 2185; GFX9-NEXT: s_and_b32 s6, 0xffff, s6 2186; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc 2187; GFX9-NEXT: v_sub_u32_e32 v2, 23, v1 2188; GFX9-NEXT: s_lshl_b32 s4, s6, 17 2189; GFX9-NEXT: s_lshl_b32 s0, s0, 1 2190; GFX9-NEXT: v_and_b32_e32 v1, 0xffffff, v1 2191; GFX9-NEXT: s_or_b32 s0, s4, s0 2192; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2 2193; GFX9-NEXT: v_lshrrev_b32_e64 v1, v1, s2 2194; GFX9-NEXT: v_sub_u32_e32 v0, s5, v0 2195; GFX9-NEXT: v_lshl_or_b32 v1, s0, v2, v1 2196; GFX9-NEXT: v_add_u32_e32 v2, 0xffffffe8, v0 2197; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 2198; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 2199; GFX9-NEXT: v_add_u32_e32 v2, 0xffffffe8, v0 2200; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v0 2201; GFX9-NEXT: s_and_b32 s1, 0xffff, s1 2202; GFX9-NEXT: s_and_b32 s7, 0xffff, s7 2203; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 2204; GFX9-NEXT: v_sub_u32_e32 v2, 23, v0 2205; GFX9-NEXT: s_lshl_b32 s0, s7, 17 2206; GFX9-NEXT: s_lshl_b32 s1, s1, 1 2207; GFX9-NEXT: v_and_b32_e32 v0, 0xffffff, v0 2208; GFX9-NEXT: s_or_b32 s0, s0, s1 2209; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2 2210; GFX9-NEXT: v_lshrrev_b32_e64 v0, v0, s3 2211; GFX9-NEXT: v_mov_b32_e32 v3, 8 2212; GFX9-NEXT: v_lshl_or_b32 v0, s0, v2, v0 2213; GFX9-NEXT: v_mov_b32_e32 v2, 0xff 2214; GFX9-NEXT: v_lshlrev_b32_sdwa v3, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2215; GFX9-NEXT: v_and_or_b32 v2, v1, v2, v3 2216; GFX9-NEXT: v_mov_b32_e32 v3, 16 2217; GFX9-NEXT: v_lshlrev_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2218; GFX9-NEXT: v_and_b32_e32 v3, 0xff, v0 2219; GFX9-NEXT: v_lshlrev_b32_e32 v3, 24, v3 2220; GFX9-NEXT: v_or3_b32 v1, v2, v1, v3 2221; GFX9-NEXT: v_bfe_u32 v2, v0, 8, 8 2222; GFX9-NEXT: v_bfe_u32 v0, v0, 16, 8 2223; GFX9-NEXT: v_lshl_or_b32 v0, v0, 8, v2 2224; GFX9-NEXT: v_readfirstlane_b32 s0, v1 2225; GFX9-NEXT: v_readfirstlane_b32 s1, v0 2226; GFX9-NEXT: ; return to shader part epilog 2227; 2228; GFX10-LABEL: s_fshr_v2i24: 2229; GFX10: ; %bb.0: 2230; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 2231; GFX10-NEXT: s_lshr_b32 s14, s4, 8 2232; GFX10-NEXT: s_lshr_b32 s15, s4, 16 2233; GFX10-NEXT: s_and_b32 s14, s14, 0xff 2234; GFX10-NEXT: s_lshr_b32 s16, s4, 24 2235; GFX10-NEXT: v_rcp_iflag_f32_e32 v0, v0 2236; GFX10-NEXT: s_and_b32 s4, s4, 0xff 2237; GFX10-NEXT: s_and_b32 s15, s15, 0xff 2238; GFX10-NEXT: s_lshl_b32 s14, s14, 8 2239; GFX10-NEXT: s_and_b32 s15, 0xffff, s15 2240; GFX10-NEXT: s_or_b32 s4, s4, s14 2241; GFX10-NEXT: s_lshr_b32 s17, s5, 8 2242; GFX10-NEXT: s_and_b32 s5, s5, 0xff 2243; GFX10-NEXT: s_lshl_b32 s14, s15, 16 2244; GFX10-NEXT: s_and_b32 s4, 0xffff, s4 2245; GFX10-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 2246; GFX10-NEXT: s_lshl_b32 s5, s5, 8 2247; GFX10-NEXT: s_and_b32 s15, s17, 0xff 2248; GFX10-NEXT: s_or_b32 s4, s4, s14 2249; GFX10-NEXT: s_or_b32 s5, s16, s5 2250; GFX10-NEXT: v_cvt_u32_f32_e32 v0, v0 2251; GFX10-NEXT: s_and_b32 s14, 0xffff, s15 2252; GFX10-NEXT: s_and_b32 s5, 0xffff, s5 2253; GFX10-NEXT: s_lshl_b32 s14, s14, 16 2254; GFX10-NEXT: s_lshr_b32 s9, s1, 8 2255; GFX10-NEXT: v_mul_lo_u32 v1, 0xffffffe8, v0 2256; GFX10-NEXT: s_or_b32 s5, s5, s14 2257; GFX10-NEXT: s_and_b32 s1, s1, 0xff 2258; GFX10-NEXT: s_lshr_b32 s10, s2, 8 2259; GFX10-NEXT: s_lshr_b32 s8, s0, 24 2260; GFX10-NEXT: s_lshr_b32 s11, s2, 16 2261; GFX10-NEXT: s_lshl_b32 s1, s1, 8 2262; GFX10-NEXT: s_and_b32 s9, s9, 0xff 2263; GFX10-NEXT: v_mul_hi_u32 v1, v0, v1 2264; GFX10-NEXT: s_and_b32 s10, s10, 0xff 2265; GFX10-NEXT: s_lshr_b32 s12, s2, 24 2266; GFX10-NEXT: s_and_b32 s2, s2, 0xff 2267; GFX10-NEXT: s_or_b32 s1, s8, s1 2268; GFX10-NEXT: s_and_b32 s8, 0xffff, s9 2269; GFX10-NEXT: s_lshl_b32 s9, s10, 8 2270; GFX10-NEXT: s_lshr_b32 s6, s0, 8 2271; GFX10-NEXT: v_add_nc_u32_e32 v0, v0, v1 2272; GFX10-NEXT: s_or_b32 s2, s2, s9 2273; GFX10-NEXT: s_lshr_b32 s13, s3, 8 2274; GFX10-NEXT: s_and_b32 s2, 0xffff, s2 2275; GFX10-NEXT: s_and_b32 s3, s3, 0xff 2276; GFX10-NEXT: v_mul_hi_u32 v1, s4, v0 2277; GFX10-NEXT: v_mul_hi_u32 v0, s5, v0 2278; GFX10-NEXT: s_and_b32 s6, s6, 0xff 2279; GFX10-NEXT: s_lshl_b32 s3, s3, 8 2280; GFX10-NEXT: s_lshr_b32 s7, s0, 16 2281; GFX10-NEXT: s_and_b32 s0, s0, 0xff 2282; GFX10-NEXT: s_lshl_b32 s6, s6, 8 2283; GFX10-NEXT: s_or_b32 s3, s12, s3 2284; GFX10-NEXT: v_mul_lo_u32 v1, v1, 24 2285; GFX10-NEXT: v_mul_lo_u32 v0, v0, 24 2286; GFX10-NEXT: s_and_b32 s7, s7, 0xff 2287; GFX10-NEXT: s_or_b32 s0, s0, s6 2288; GFX10-NEXT: s_and_b32 s3, 0xffff, s3 2289; GFX10-NEXT: s_and_b32 s7, 0xffff, s7 2290; GFX10-NEXT: s_and_b32 s0, 0xffff, s0 2291; GFX10-NEXT: s_and_b32 s1, 0xffff, s1 2292; GFX10-NEXT: v_sub_nc_u32_e32 v1, s4, v1 2293; GFX10-NEXT: v_sub_nc_u32_e32 v0, s5, v0 2294; GFX10-NEXT: s_and_b32 s4, s11, 0xff 2295; GFX10-NEXT: s_lshl_b32 s0, s0, 1 2296; GFX10-NEXT: s_and_b32 s4, 0xffff, s4 2297; GFX10-NEXT: v_add_nc_u32_e32 v2, 0xffffffe8, v1 2298; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 2299; GFX10-NEXT: s_lshl_b32 s4, s4, 16 2300; GFX10-NEXT: s_lshl_b32 s1, s1, 1 2301; GFX10-NEXT: s_or_b32 s2, s2, s4 2302; GFX10-NEXT: s_and_b32 s4, s13, 0xff 2303; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc_lo 2304; GFX10-NEXT: v_add_nc_u32_e32 v2, 0xffffffe8, v0 2305; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 2306; GFX10-NEXT: s_and_b32 s4, 0xffff, s4 2307; GFX10-NEXT: v_add_nc_u32_e32 v3, 0xffffffe8, v1 2308; GFX10-NEXT: s_lshl_b32 s4, s4, 16 2309; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 2310; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 2311; GFX10-NEXT: s_or_b32 s3, s3, s4 2312; GFX10-NEXT: s_lshl_b32 s4, s7, 17 2313; GFX10-NEXT: v_add_nc_u32_e32 v2, 0xffffffe8, v0 2314; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc_lo 2315; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 2316; GFX10-NEXT: s_or_b32 s0, s4, s0 2317; GFX10-NEXT: v_sub_nc_u32_e32 v3, 23, v1 2318; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 2319; GFX10-NEXT: v_and_b32_e32 v1, 0xffffff, v1 2320; GFX10-NEXT: v_and_b32_e32 v2, 0xffffff, v3 2321; GFX10-NEXT: v_sub_nc_u32_e32 v3, 23, v0 2322; GFX10-NEXT: v_and_b32_e32 v0, 0xffffff, v0 2323; GFX10-NEXT: v_lshrrev_b32_e64 v1, v1, s2 2324; GFX10-NEXT: s_lshl_b32 s2, s8, 17 2325; GFX10-NEXT: v_and_b32_e32 v3, 0xffffff, v3 2326; GFX10-NEXT: v_lshrrev_b32_e64 v0, v0, s3 2327; GFX10-NEXT: v_lshl_or_b32 v1, s0, v2, v1 2328; GFX10-NEXT: s_or_b32 s0, s2, s1 2329; GFX10-NEXT: v_mov_b32_e32 v2, 8 2330; GFX10-NEXT: v_lshl_or_b32 v0, s0, v3, v0 2331; GFX10-NEXT: v_mov_b32_e32 v3, 16 2332; GFX10-NEXT: v_lshlrev_b32_sdwa v2, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_1 2333; GFX10-NEXT: v_and_b32_e32 v4, 0xff, v0 2334; GFX10-NEXT: v_and_or_b32 v2, 0xff, v1, v2 2335; GFX10-NEXT: v_lshlrev_b32_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_2 2336; GFX10-NEXT: v_lshlrev_b32_e32 v3, 24, v4 2337; GFX10-NEXT: v_bfe_u32 v4, v0, 8, 8 2338; GFX10-NEXT: v_bfe_u32 v0, v0, 16, 8 2339; GFX10-NEXT: v_or3_b32 v1, v2, v1, v3 2340; GFX10-NEXT: v_lshl_or_b32 v0, v0, 8, v4 2341; GFX10-NEXT: v_readfirstlane_b32 s0, v1 2342; GFX10-NEXT: v_readfirstlane_b32 s1, v0 2343; GFX10-NEXT: ; return to shader part epilog 2344; 2345; GFX11-LABEL: s_fshr_v2i24: 2346; GFX11: ; %bb.0: 2347; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v0, 24 2348; GFX11-NEXT: s_lshr_b32 s14, s4, 8 2349; GFX11-NEXT: s_lshr_b32 s15, s4, 16 2350; GFX11-NEXT: s_and_b32 s14, s14, 0xff 2351; GFX11-NEXT: s_lshr_b32 s16, s4, 24 2352; GFX11-NEXT: v_rcp_iflag_f32_e32 v0, v0 2353; GFX11-NEXT: s_and_b32 s4, s4, 0xff 2354; GFX11-NEXT: s_and_b32 s15, s15, 0xff 2355; GFX11-NEXT: s_lshl_b32 s14, s14, 8 2356; GFX11-NEXT: s_and_b32 s15, 0xffff, s15 2357; GFX11-NEXT: s_or_b32 s4, s4, s14 2358; GFX11-NEXT: s_lshr_b32 s17, s5, 8 2359; GFX11-NEXT: s_and_b32 s5, s5, 0xff 2360; GFX11-NEXT: s_lshl_b32 s14, s15, 16 2361; GFX11-NEXT: s_and_b32 s4, 0xffff, s4 2362; GFX11-NEXT: s_waitcnt_depctr 0xfff 2363; GFX11-NEXT: v_mul_f32_e32 v0, 0x4f7ffffe, v0 2364; GFX11-NEXT: s_lshl_b32 s5, s5, 8 2365; GFX11-NEXT: s_and_b32 s15, s17, 0xff 2366; GFX11-NEXT: s_or_b32 s4, s4, s14 2367; GFX11-NEXT: s_or_b32 s5, s16, s5 2368; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0 2369; GFX11-NEXT: s_and_b32 s14, 0xffff, s15 2370; GFX11-NEXT: s_and_b32 s5, 0xffff, s5 2371; GFX11-NEXT: s_lshl_b32 s14, s14, 16 2372; GFX11-NEXT: s_lshr_b32 s10, s2, 8 2373; GFX11-NEXT: v_mul_lo_u32 v1, 0xffffffe8, v0 2374; GFX11-NEXT: s_or_b32 s5, s5, s14 2375; GFX11-NEXT: s_lshr_b32 s9, s1, 8 2376; GFX11-NEXT: s_and_b32 s1, s1, 0xff 2377; GFX11-NEXT: s_lshr_b32 s11, s2, 16 2378; GFX11-NEXT: s_and_b32 s10, s10, 0xff 2379; GFX11-NEXT: s_lshr_b32 s6, s0, 8 2380; GFX11-NEXT: s_lshr_b32 s8, s0, 24 2381; GFX11-NEXT: v_mul_hi_u32 v1, v0, v1 2382; GFX11-NEXT: s_lshr_b32 s12, s2, 24 2383; GFX11-NEXT: s_and_b32 s2, s2, 0xff 2384; GFX11-NEXT: s_lshl_b32 s1, s1, 8 2385; GFX11-NEXT: s_and_b32 s9, s9, 0xff 2386; GFX11-NEXT: s_and_b32 s11, s11, 0xff 2387; GFX11-NEXT: s_and_b32 s6, s6, 0xff 2388; GFX11-NEXT: s_or_b32 s1, s8, s1 2389; GFX11-NEXT: v_add_nc_u32_e32 v0, v0, v1 2390; GFX11-NEXT: s_and_b32 s8, 0xffff, s9 2391; GFX11-NEXT: s_and_b32 s9, 0xffff, s11 2392; GFX11-NEXT: s_lshr_b32 s7, s0, 16 2393; GFX11-NEXT: s_and_b32 s0, s0, 0xff 2394; GFX11-NEXT: v_mul_hi_u32 v1, s4, v0 2395; GFX11-NEXT: v_mul_hi_u32 v0, s5, v0 2396; GFX11-NEXT: s_lshr_b32 s13, s3, 8 2397; GFX11-NEXT: s_and_b32 s3, s3, 0xff 2398; GFX11-NEXT: s_lshl_b32 s6, s6, 8 2399; GFX11-NEXT: s_and_b32 s7, s7, 0xff 2400; GFX11-NEXT: s_lshl_b32 s3, s3, 8 2401; GFX11-NEXT: s_and_b32 s13, s13, 0xff 2402; GFX11-NEXT: v_mul_lo_u32 v1, v1, 24 2403; GFX11-NEXT: v_mul_lo_u32 v0, v0, 24 2404; GFX11-NEXT: s_or_b32 s0, s0, s6 2405; GFX11-NEXT: s_and_b32 s7, 0xffff, s7 2406; GFX11-NEXT: s_or_b32 s3, s12, s3 2407; GFX11-NEXT: s_and_b32 s0, 0xffff, s0 2408; GFX11-NEXT: s_and_b32 s3, 0xffff, s3 2409; GFX11-NEXT: s_lshl_b32 s0, s0, 1 2410; GFX11-NEXT: v_sub_nc_u32_e32 v1, s4, v1 2411; GFX11-NEXT: v_sub_nc_u32_e32 v0, s5, v0 2412; GFX11-NEXT: s_lshl_b32 s4, s10, 8 2413; GFX11-NEXT: s_and_b32 s10, 0xffff, s13 2414; GFX11-NEXT: s_or_b32 s2, s2, s4 2415; GFX11-NEXT: v_add_nc_u32_e32 v2, 0xffffffe8, v1 2416; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 2417; GFX11-NEXT: s_lshl_b32 s4, s9, 16 2418; GFX11-NEXT: s_and_b32 s2, 0xffff, s2 2419; GFX11-NEXT: s_lshl_b32 s5, s10, 16 2420; GFX11-NEXT: s_or_b32 s2, s2, s4 2421; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v2 :: v_dual_add_nc_u32 v2, 0xffffffe8, v0 2422; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 2423; GFX11-NEXT: s_lshl_b32 s4, s7, 17 2424; GFX11-NEXT: s_and_b32 s1, 0xffff, s1 2425; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_4) | instid1(VALU_DEP_2) 2426; GFX11-NEXT: v_add_nc_u32_e32 v3, 0xffffffe8, v1 2427; GFX11-NEXT: s_or_b32 s0, s4, s0 2428; GFX11-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc_lo 2429; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v1 2430; GFX11-NEXT: s_lshl_b32 s1, s1, 1 2431; GFX11-NEXT: v_dual_cndmask_b32 v1, v1, v3 :: v_dual_add_nc_u32 v2, 0xffffffe8, v0 2432; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v0 2433; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) 2434; GFX11-NEXT: v_sub_nc_u32_e32 v3, 23, v1 2435; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v2 :: v_dual_and_b32 v1, 0xffffff, v1 2436; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2437; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v3 2438; GFX11-NEXT: v_lshrrev_b32_e64 v1, v1, s2 2439; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3) 2440; GFX11-NEXT: v_sub_nc_u32_e32 v3, 23, v0 2441; GFX11-NEXT: v_and_b32_e32 v0, 0xffffff, v0 2442; GFX11-NEXT: s_or_b32 s2, s3, s5 2443; GFX11-NEXT: v_lshl_or_b32 v1, s0, v2, v1 2444; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 2445; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v3 2446; GFX11-NEXT: v_lshrrev_b32_e64 v0, v0, s2 2447; GFX11-NEXT: s_lshl_b32 s0, s8, 17 2448; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 2449; GFX11-NEXT: s_or_b32 s0, s0, s1 2450; GFX11-NEXT: v_bfe_u32 v3, v1, 16, 8 2451; GFX11-NEXT: v_lshl_or_b32 v0, s0, v2, v0 2452; GFX11-NEXT: v_bfe_u32 v2, v1, 8, 8 2453; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 2454; GFX11-NEXT: v_lshlrev_b32_e32 v3, 16, v3 2455; GFX11-NEXT: v_and_b32_e32 v4, 0xff, v0 2456; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 2457; GFX11-NEXT: v_lshlrev_b32_e32 v2, 8, v2 2458; GFX11-NEXT: v_lshlrev_b32_e32 v4, 24, v4 2459; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3) 2460; GFX11-NEXT: v_and_or_b32 v1, 0xff, v1, v2 2461; GFX11-NEXT: v_bfe_u32 v2, v0, 8, 8 2462; GFX11-NEXT: v_bfe_u32 v0, v0, 16, 8 2463; GFX11-NEXT: v_or3_b32 v1, v1, v3, v4 2464; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2465; GFX11-NEXT: v_lshl_or_b32 v0, v0, 8, v2 2466; GFX11-NEXT: v_readfirstlane_b32 s0, v1 2467; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 2468; GFX11-NEXT: v_readfirstlane_b32 s1, v0 2469; GFX11-NEXT: ; return to shader part epilog 2470 %lhs = bitcast i48 %lhs.arg to <2 x i24> 2471 %rhs = bitcast i48 %rhs.arg to <2 x i24> 2472 %amt = bitcast i48 %amt.arg to <2 x i24> 2473 %result = call <2 x i24> @llvm.fshr.v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) 2474 %cast.result = bitcast <2 x i24> %result to i48 2475 ret i48 %cast.result 2476} 2477 2478define <2 x i24> @v_fshr_v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) { 2479; GFX6-LABEL: v_fshr_v2i24: 2480; GFX6: ; %bb.0: 2481; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2482; GFX6-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 2483; GFX6-NEXT: v_rcp_iflag_f32_e32 v6, v6 2484; GFX6-NEXT: v_not_b32_e32 v7, 23 2485; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v4 2486; GFX6-NEXT: v_and_b32_e32 v5, 0xffffff, v5 2487; GFX6-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 2488; GFX6-NEXT: v_cvt_u32_f32_e32 v6, v6 2489; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 2490; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2 2491; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v1 2492; GFX6-NEXT: v_mul_lo_u32 v8, v6, v7 2493; GFX6-NEXT: v_and_b32_e32 v3, 0xffffff, v3 2494; GFX6-NEXT: v_mul_hi_u32 v8, v6, v8 2495; GFX6-NEXT: v_add_i32_e32 v6, vcc, v6, v8 2496; GFX6-NEXT: v_mul_hi_u32 v8, v4, v6 2497; GFX6-NEXT: v_mul_hi_u32 v6, v5, v6 2498; GFX6-NEXT: v_mul_lo_u32 v8, v8, 24 2499; GFX6-NEXT: v_mul_lo_u32 v6, v6, 24 2500; GFX6-NEXT: v_sub_i32_e32 v4, vcc, v4, v8 2501; GFX6-NEXT: v_add_i32_e32 v8, vcc, v4, v7 2502; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 2503; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc 2504; GFX6-NEXT: v_add_i32_e32 v8, vcc, v4, v7 2505; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 2506; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc 2507; GFX6-NEXT: v_sub_i32_e32 v8, vcc, 23, v4 2508; GFX6-NEXT: v_and_b32_e32 v8, 0xffffff, v8 2509; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v4 2510; GFX6-NEXT: v_lshlrev_b32_e32 v0, v8, v0 2511; GFX6-NEXT: v_lshrrev_b32_e32 v2, v4, v2 2512; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 2513; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v5, v6 2514; GFX6-NEXT: v_add_i32_e32 v4, vcc, v2, v7 2515; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 2516; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2517; GFX6-NEXT: v_add_i32_e32 v4, vcc, 0xffffffe8, v2 2518; GFX6-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 2519; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2520; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 23, v2 2521; GFX6-NEXT: v_and_b32_e32 v4, 0xffffff, v4 2522; GFX6-NEXT: v_and_b32_e32 v2, 0xffffff, v2 2523; GFX6-NEXT: v_lshlrev_b32_e32 v1, v4, v1 2524; GFX6-NEXT: v_lshrrev_b32_e32 v2, v2, v3 2525; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 2526; GFX6-NEXT: s_setpc_b64 s[30:31] 2527; 2528; GFX8-LABEL: v_fshr_v2i24: 2529; GFX8: ; %bb.0: 2530; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2531; GFX8-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 2532; GFX8-NEXT: v_rcp_iflag_f32_e32 v6, v6 2533; GFX8-NEXT: v_not_b32_e32 v7, 23 2534; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v4 2535; GFX8-NEXT: v_and_b32_e32 v5, 0xffffff, v5 2536; GFX8-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 2537; GFX8-NEXT: v_cvt_u32_f32_e32 v6, v6 2538; GFX8-NEXT: v_lshlrev_b32_e32 v0, 1, v0 2539; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2 2540; GFX8-NEXT: v_lshlrev_b32_e32 v1, 1, v1 2541; GFX8-NEXT: v_mul_lo_u32 v8, v6, v7 2542; GFX8-NEXT: v_and_b32_e32 v3, 0xffffff, v3 2543; GFX8-NEXT: v_mul_hi_u32 v8, v6, v8 2544; GFX8-NEXT: v_add_u32_e32 v6, vcc, v6, v8 2545; GFX8-NEXT: v_mul_hi_u32 v8, v4, v6 2546; GFX8-NEXT: v_mul_hi_u32 v6, v5, v6 2547; GFX8-NEXT: v_mul_lo_u32 v8, v8, 24 2548; GFX8-NEXT: v_mul_lo_u32 v6, v6, 24 2549; GFX8-NEXT: v_sub_u32_e32 v4, vcc, v4, v8 2550; GFX8-NEXT: v_add_u32_e32 v8, vcc, v4, v7 2551; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 2552; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc 2553; GFX8-NEXT: v_add_u32_e32 v8, vcc, v4, v7 2554; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 2555; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v8, vcc 2556; GFX8-NEXT: v_sub_u32_e32 v8, vcc, 23, v4 2557; GFX8-NEXT: v_and_b32_e32 v8, 0xffffff, v8 2558; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v4 2559; GFX8-NEXT: v_lshlrev_b32_e32 v0, v8, v0 2560; GFX8-NEXT: v_lshrrev_b32_e32 v2, v4, v2 2561; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 2562; GFX8-NEXT: v_sub_u32_e32 v2, vcc, v5, v6 2563; GFX8-NEXT: v_add_u32_e32 v4, vcc, v2, v7 2564; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 2565; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2566; GFX8-NEXT: v_add_u32_e32 v4, vcc, 0xffffffe8, v2 2567; GFX8-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 2568; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2569; GFX8-NEXT: v_sub_u32_e32 v4, vcc, 23, v2 2570; GFX8-NEXT: v_and_b32_e32 v4, 0xffffff, v4 2571; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v2 2572; GFX8-NEXT: v_lshlrev_b32_e32 v1, v4, v1 2573; GFX8-NEXT: v_lshrrev_b32_e32 v2, v2, v3 2574; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 2575; GFX8-NEXT: s_setpc_b64 s[30:31] 2576; 2577; GFX9-LABEL: v_fshr_v2i24: 2578; GFX9: ; %bb.0: 2579; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2580; GFX9-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 2581; GFX9-NEXT: v_rcp_iflag_f32_e32 v6, v6 2582; GFX9-NEXT: v_not_b32_e32 v7, 23 2583; GFX9-NEXT: v_and_b32_e32 v4, 0xffffff, v4 2584; GFX9-NEXT: v_and_b32_e32 v5, 0xffffff, v5 2585; GFX9-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 2586; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v6 2587; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2 2588; GFX9-NEXT: v_lshlrev_b32_e32 v0, 1, v0 2589; GFX9-NEXT: v_and_b32_e32 v3, 0xffffff, v3 2590; GFX9-NEXT: v_mul_lo_u32 v7, v6, v7 2591; GFX9-NEXT: v_lshlrev_b32_e32 v1, 1, v1 2592; GFX9-NEXT: v_mul_hi_u32 v7, v6, v7 2593; GFX9-NEXT: v_add_u32_e32 v6, v6, v7 2594; GFX9-NEXT: v_mul_hi_u32 v7, v4, v6 2595; GFX9-NEXT: v_mul_hi_u32 v6, v5, v6 2596; GFX9-NEXT: v_mul_lo_u32 v7, v7, 24 2597; GFX9-NEXT: v_mul_lo_u32 v6, v6, 24 2598; GFX9-NEXT: v_sub_u32_e32 v4, v4, v7 2599; GFX9-NEXT: v_sub_u32_e32 v5, v5, v6 2600; GFX9-NEXT: v_add_u32_e32 v6, 0xffffffe8, v4 2601; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 2602; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc 2603; GFX9-NEXT: v_add_u32_e32 v6, 0xffffffe8, v4 2604; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v4 2605; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc 2606; GFX9-NEXT: v_sub_u32_e32 v6, 23, v4 2607; GFX9-NEXT: v_and_b32_e32 v4, 0xffffff, v4 2608; GFX9-NEXT: v_and_b32_e32 v6, 0xffffff, v6 2609; GFX9-NEXT: v_lshrrev_b32_e32 v2, v4, v2 2610; GFX9-NEXT: v_lshl_or_b32 v0, v0, v6, v2 2611; GFX9-NEXT: v_add_u32_e32 v2, 0xffffffe8, v5 2612; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v5 2613; GFX9-NEXT: v_cndmask_b32_e32 v2, v5, v2, vcc 2614; GFX9-NEXT: v_add_u32_e32 v4, 0xffffffe8, v2 2615; GFX9-NEXT: v_cmp_le_u32_e32 vcc, 24, v2 2616; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 2617; GFX9-NEXT: v_sub_u32_e32 v4, 23, v2 2618; GFX9-NEXT: v_and_b32_e32 v2, 0xffffff, v2 2619; GFX9-NEXT: v_and_b32_e32 v4, 0xffffff, v4 2620; GFX9-NEXT: v_lshrrev_b32_e32 v2, v2, v3 2621; GFX9-NEXT: v_lshl_or_b32 v1, v1, v4, v2 2622; GFX9-NEXT: s_setpc_b64 s[30:31] 2623; 2624; GFX10-LABEL: v_fshr_v2i24: 2625; GFX10: ; %bb.0: 2626; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2627; GFX10-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 2628; GFX10-NEXT: v_and_b32_e32 v4, 0xffffff, v4 2629; GFX10-NEXT: v_and_b32_e32 v5, 0xffffff, v5 2630; GFX10-NEXT: v_and_b32_e32 v2, 0xffffff, v2 2631; GFX10-NEXT: v_and_b32_e32 v3, 0xffffff, v3 2632; GFX10-NEXT: v_rcp_iflag_f32_e32 v6, v6 2633; GFX10-NEXT: v_lshlrev_b32_e32 v0, 1, v0 2634; GFX10-NEXT: v_lshlrev_b32_e32 v1, 1, v1 2635; GFX10-NEXT: v_mul_f32_e32 v6, 0x4f7ffffe, v6 2636; GFX10-NEXT: v_cvt_u32_f32_e32 v6, v6 2637; GFX10-NEXT: v_mul_lo_u32 v7, 0xffffffe8, v6 2638; GFX10-NEXT: v_mul_hi_u32 v7, v6, v7 2639; GFX10-NEXT: v_add_nc_u32_e32 v6, v6, v7 2640; GFX10-NEXT: v_mul_hi_u32 v7, v4, v6 2641; GFX10-NEXT: v_mul_hi_u32 v6, v5, v6 2642; GFX10-NEXT: v_mul_lo_u32 v7, v7, 24 2643; GFX10-NEXT: v_mul_lo_u32 v6, v6, 24 2644; GFX10-NEXT: v_sub_nc_u32_e32 v4, v4, v7 2645; GFX10-NEXT: v_sub_nc_u32_e32 v5, v5, v6 2646; GFX10-NEXT: v_add_nc_u32_e32 v6, 0xffffffe8, v4 2647; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4 2648; GFX10-NEXT: v_add_nc_u32_e32 v7, 0xffffffe8, v5 2649; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo 2650; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5 2651; GFX10-NEXT: v_add_nc_u32_e32 v6, 0xffffffe8, v4 2652; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo 2653; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4 2654; GFX10-NEXT: v_add_nc_u32_e32 v7, 0xffffffe8, v5 2655; GFX10-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo 2656; GFX10-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5 2657; GFX10-NEXT: v_sub_nc_u32_e32 v6, 23, v4 2658; GFX10-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo 2659; GFX10-NEXT: v_and_b32_e32 v4, 0xffffff, v4 2660; GFX10-NEXT: v_and_b32_e32 v6, 0xffffff, v6 2661; GFX10-NEXT: v_sub_nc_u32_e32 v7, 23, v5 2662; GFX10-NEXT: v_and_b32_e32 v5, 0xffffff, v5 2663; GFX10-NEXT: v_lshrrev_b32_e32 v2, v4, v2 2664; GFX10-NEXT: v_and_b32_e32 v4, 0xffffff, v7 2665; GFX10-NEXT: v_lshrrev_b32_e32 v3, v5, v3 2666; GFX10-NEXT: v_lshl_or_b32 v0, v0, v6, v2 2667; GFX10-NEXT: v_lshl_or_b32 v1, v1, v4, v3 2668; GFX10-NEXT: s_setpc_b64 s[30:31] 2669; 2670; GFX11-LABEL: v_fshr_v2i24: 2671; GFX11: ; %bb.0: 2672; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2673; GFX11-NEXT: v_cvt_f32_ubyte0_e32 v6, 24 2674; GFX11-NEXT: v_and_b32_e32 v4, 0xffffff, v4 2675; GFX11-NEXT: v_and_b32_e32 v2, 0xffffff, v2 2676; GFX11-NEXT: v_and_b32_e32 v3, 0xffffff, v3 2677; GFX11-NEXT: v_lshlrev_b32_e32 v0, 1, v0 2678; GFX11-NEXT: v_rcp_iflag_f32_e32 v6, v6 2679; GFX11-NEXT: s_waitcnt_depctr 0xfff 2680; GFX11-NEXT: v_dual_mul_f32 v6, 0x4f7ffffe, v6 :: v_dual_lshlrev_b32 v1, 1, v1 2681; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2682; GFX11-NEXT: v_cvt_u32_f32_e32 v6, v6 2683; GFX11-NEXT: v_mul_lo_u32 v7, 0xffffffe8, v6 2684; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2685; GFX11-NEXT: v_mul_hi_u32 v7, v6, v7 2686; GFX11-NEXT: v_add_nc_u32_e32 v6, v6, v7 2687; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2688; GFX11-NEXT: v_mul_hi_u32 v7, v4, v6 2689; GFX11-NEXT: v_mul_lo_u32 v7, v7, 24 2690; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2691; GFX11-NEXT: v_sub_nc_u32_e32 v4, v4, v7 2692; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4 2693; GFX11-NEXT: v_and_b32_e32 v5, 0xffffff, v5 2694; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2695; GFX11-NEXT: v_mul_hi_u32 v6, v5, v6 2696; GFX11-NEXT: v_mul_lo_u32 v6, v6, 24 2697; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1) 2698; GFX11-NEXT: v_sub_nc_u32_e32 v5, v5, v6 2699; GFX11-NEXT: v_add_nc_u32_e32 v6, 0xffffffe8, v4 2700; GFX11-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc_lo 2701; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 2702; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5 2703; GFX11-NEXT: v_add_nc_u32_e32 v6, 0xffffffe8, v4 2704; GFX11-NEXT: v_add_nc_u32_e32 v7, 0xffffffe8, v5 2705; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) 2706; GFX11-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc_lo 2707; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v4 2708; GFX11-NEXT: v_dual_cndmask_b32 v4, v4, v6 :: v_dual_add_nc_u32 v7, 0xffffffe8, v5 2709; GFX11-NEXT: v_cmp_le_u32_e32 vcc_lo, 24, v5 2710; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) 2711; GFX11-NEXT: v_sub_nc_u32_e32 v6, 23, v4 2712; GFX11-NEXT: v_dual_cndmask_b32 v5, v5, v7 :: v_dual_and_b32 v4, 0xffffff, v4 2713; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 2714; GFX11-NEXT: v_and_b32_e32 v6, 0xffffff, v6 2715; GFX11-NEXT: v_sub_nc_u32_e32 v7, 23, v5 2716; GFX11-NEXT: v_and_b32_e32 v5, 0xffffff, v5 2717; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) 2718; GFX11-NEXT: v_lshrrev_b32_e32 v2, v4, v2 2719; GFX11-NEXT: v_and_b32_e32 v4, 0xffffff, v7 2720; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 2721; GFX11-NEXT: v_lshrrev_b32_e32 v3, v5, v3 2722; GFX11-NEXT: v_lshl_or_b32 v0, v0, v6, v2 2723; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 2724; GFX11-NEXT: v_lshl_or_b32 v1, v1, v4, v3 2725; GFX11-NEXT: s_setpc_b64 s[30:31] 2726 %result = call <2 x i24> @llvm.fshr.v2i24(<2 x i24> %lhs, <2 x i24> %rhs, <2 x i24> %amt) 2727 ret <2 x i24> %result 2728} 2729 2730define amdgpu_ps i32 @s_fshr_i32(i32 inreg %lhs, i32 inreg %rhs, i32 inreg %amt) { 2731; GFX6-LABEL: s_fshr_i32: 2732; GFX6: ; %bb.0: 2733; GFX6-NEXT: v_mov_b32_e32 v0, s1 2734; GFX6-NEXT: v_mov_b32_e32 v1, s2 2735; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, v1 2736; GFX6-NEXT: v_readfirstlane_b32 s0, v0 2737; GFX6-NEXT: ; return to shader part epilog 2738; 2739; GFX8-LABEL: s_fshr_i32: 2740; GFX8: ; %bb.0: 2741; GFX8-NEXT: v_mov_b32_e32 v0, s1 2742; GFX8-NEXT: v_mov_b32_e32 v1, s2 2743; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, v1 2744; GFX8-NEXT: v_readfirstlane_b32 s0, v0 2745; GFX8-NEXT: ; return to shader part epilog 2746; 2747; GFX9-LABEL: s_fshr_i32: 2748; GFX9: ; %bb.0: 2749; GFX9-NEXT: v_mov_b32_e32 v0, s1 2750; GFX9-NEXT: v_mov_b32_e32 v1, s2 2751; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, v1 2752; GFX9-NEXT: v_readfirstlane_b32 s0, v0 2753; GFX9-NEXT: ; return to shader part epilog 2754; 2755; GFX10-LABEL: s_fshr_i32: 2756; GFX10: ; %bb.0: 2757; GFX10-NEXT: v_mov_b32_e32 v0, s2 2758; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, v0 2759; GFX10-NEXT: v_readfirstlane_b32 s0, v0 2760; GFX10-NEXT: ; return to shader part epilog 2761; 2762; GFX11-LABEL: s_fshr_i32: 2763; GFX11: ; %bb.0: 2764; GFX11-NEXT: v_mov_b32_e32 v0, s2 2765; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 2766; GFX11-NEXT: v_alignbit_b32 v0, s0, s1, v0 2767; GFX11-NEXT: v_readfirstlane_b32 s0, v0 2768; GFX11-NEXT: ; return to shader part epilog 2769 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt) 2770 ret i32 %result 2771} 2772 2773define amdgpu_ps i32 @s_fshr_i32_5(i32 inreg %lhs, i32 inreg %rhs) { 2774; GFX6-LABEL: s_fshr_i32_5: 2775; GFX6: ; %bb.0: 2776; GFX6-NEXT: v_mov_b32_e32 v0, s1 2777; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, 5 2778; GFX6-NEXT: v_readfirstlane_b32 s0, v0 2779; GFX6-NEXT: ; return to shader part epilog 2780; 2781; GFX8-LABEL: s_fshr_i32_5: 2782; GFX8: ; %bb.0: 2783; GFX8-NEXT: v_mov_b32_e32 v0, s1 2784; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, 5 2785; GFX8-NEXT: v_readfirstlane_b32 s0, v0 2786; GFX8-NEXT: ; return to shader part epilog 2787; 2788; GFX9-LABEL: s_fshr_i32_5: 2789; GFX9: ; %bb.0: 2790; GFX9-NEXT: v_mov_b32_e32 v0, s1 2791; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, 5 2792; GFX9-NEXT: v_readfirstlane_b32 s0, v0 2793; GFX9-NEXT: ; return to shader part epilog 2794; 2795; GFX10-LABEL: s_fshr_i32_5: 2796; GFX10: ; %bb.0: 2797; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, 5 2798; GFX10-NEXT: v_readfirstlane_b32 s0, v0 2799; GFX10-NEXT: ; return to shader part epilog 2800; 2801; GFX11-LABEL: s_fshr_i32_5: 2802; GFX11: ; %bb.0: 2803; GFX11-NEXT: v_alignbit_b32 v0, s0, s1, 5 2804; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2805; GFX11-NEXT: v_readfirstlane_b32 s0, v0 2806; GFX11-NEXT: ; return to shader part epilog 2807 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 5) 2808 ret i32 %result 2809} 2810 2811define amdgpu_ps i32 @s_fshr_i32_8(i32 inreg %lhs, i32 inreg %rhs) { 2812; GFX6-LABEL: s_fshr_i32_8: 2813; GFX6: ; %bb.0: 2814; GFX6-NEXT: v_mov_b32_e32 v0, s1 2815; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, 8 2816; GFX6-NEXT: v_readfirstlane_b32 s0, v0 2817; GFX6-NEXT: ; return to shader part epilog 2818; 2819; GFX8-LABEL: s_fshr_i32_8: 2820; GFX8: ; %bb.0: 2821; GFX8-NEXT: v_mov_b32_e32 v0, s1 2822; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, 8 2823; GFX8-NEXT: v_readfirstlane_b32 s0, v0 2824; GFX8-NEXT: ; return to shader part epilog 2825; 2826; GFX9-LABEL: s_fshr_i32_8: 2827; GFX9: ; %bb.0: 2828; GFX9-NEXT: v_mov_b32_e32 v0, s1 2829; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, 8 2830; GFX9-NEXT: v_readfirstlane_b32 s0, v0 2831; GFX9-NEXT: ; return to shader part epilog 2832; 2833; GFX10-LABEL: s_fshr_i32_8: 2834; GFX10: ; %bb.0: 2835; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, 8 2836; GFX10-NEXT: v_readfirstlane_b32 s0, v0 2837; GFX10-NEXT: ; return to shader part epilog 2838; 2839; GFX11-LABEL: s_fshr_i32_8: 2840; GFX11: ; %bb.0: 2841; GFX11-NEXT: v_alignbit_b32 v0, s0, s1, 8 2842; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2843; GFX11-NEXT: v_readfirstlane_b32 s0, v0 2844; GFX11-NEXT: ; return to shader part epilog 2845 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 8) 2846 ret i32 %result 2847} 2848 2849define i32 @v_fshr_i32(i32 %lhs, i32 %rhs, i32 %amt) { 2850; GCN-LABEL: v_fshr_i32: 2851; GCN: ; %bb.0: 2852; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2853; GCN-NEXT: v_alignbit_b32 v0, v0, v1, v2 2854; GCN-NEXT: s_setpc_b64 s[30:31] 2855; 2856; GFX11-LABEL: v_fshr_i32: 2857; GFX11: ; %bb.0: 2858; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2859; GFX11-NEXT: v_alignbit_b32 v0, v0, v1, v2 2860; GFX11-NEXT: s_setpc_b64 s[30:31] 2861 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt) 2862 ret i32 %result 2863} 2864 2865define i32 @v_fshr_i32_5(i32 %lhs, i32 %rhs) { 2866; GCN-LABEL: v_fshr_i32_5: 2867; GCN: ; %bb.0: 2868; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2869; GCN-NEXT: v_alignbit_b32 v0, v0, v1, 5 2870; GCN-NEXT: s_setpc_b64 s[30:31] 2871; 2872; GFX11-LABEL: v_fshr_i32_5: 2873; GFX11: ; %bb.0: 2874; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2875; GFX11-NEXT: v_alignbit_b32 v0, v0, v1, 5 2876; GFX11-NEXT: s_setpc_b64 s[30:31] 2877 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 5) 2878 ret i32 %result 2879} 2880 2881define i32 @v_fshr_i32_8(i32 %lhs, i32 %rhs) { 2882; GCN-LABEL: v_fshr_i32_8: 2883; GCN: ; %bb.0: 2884; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2885; GCN-NEXT: v_alignbit_b32 v0, v0, v1, 8 2886; GCN-NEXT: s_setpc_b64 s[30:31] 2887; 2888; GFX11-LABEL: v_fshr_i32_8: 2889; GFX11: ; %bb.0: 2890; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2891; GFX11-NEXT: v_alignbit_b32 v0, v0, v1, 8 2892; GFX11-NEXT: s_setpc_b64 s[30:31] 2893 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 8) 2894 ret i32 %result 2895} 2896 2897define amdgpu_ps float @v_fshr_i32_ssv(i32 inreg %lhs, i32 inreg %rhs, i32 %amt) { 2898; GFX6-LABEL: v_fshr_i32_ssv: 2899; GFX6: ; %bb.0: 2900; GFX6-NEXT: v_mov_b32_e32 v1, s1 2901; GFX6-NEXT: v_alignbit_b32 v0, s0, v1, v0 2902; GFX6-NEXT: ; return to shader part epilog 2903; 2904; GFX8-LABEL: v_fshr_i32_ssv: 2905; GFX8: ; %bb.0: 2906; GFX8-NEXT: v_mov_b32_e32 v1, s1 2907; GFX8-NEXT: v_alignbit_b32 v0, s0, v1, v0 2908; GFX8-NEXT: ; return to shader part epilog 2909; 2910; GFX9-LABEL: v_fshr_i32_ssv: 2911; GFX9: ; %bb.0: 2912; GFX9-NEXT: v_mov_b32_e32 v1, s1 2913; GFX9-NEXT: v_alignbit_b32 v0, s0, v1, v0 2914; GFX9-NEXT: ; return to shader part epilog 2915; 2916; GFX10-LABEL: v_fshr_i32_ssv: 2917; GFX10: ; %bb.0: 2918; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, v0 2919; GFX10-NEXT: ; return to shader part epilog 2920; 2921; GFX11-LABEL: v_fshr_i32_ssv: 2922; GFX11: ; %bb.0: 2923; GFX11-NEXT: v_alignbit_b32 v0, s0, s1, v0 2924; GFX11-NEXT: ; return to shader part epilog 2925 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt) 2926 %cast.result = bitcast i32 %result to float 2927 ret float %cast.result 2928} 2929 2930define amdgpu_ps float @v_fshr_i32_svs(i32 inreg %lhs, i32 %rhs, i32 inreg %amt) { 2931; GFX6-LABEL: v_fshr_i32_svs: 2932; GFX6: ; %bb.0: 2933; GFX6-NEXT: v_mov_b32_e32 v1, s1 2934; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, v1 2935; GFX6-NEXT: ; return to shader part epilog 2936; 2937; GFX8-LABEL: v_fshr_i32_svs: 2938; GFX8: ; %bb.0: 2939; GFX8-NEXT: v_mov_b32_e32 v1, s1 2940; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, v1 2941; GFX8-NEXT: ; return to shader part epilog 2942; 2943; GFX9-LABEL: v_fshr_i32_svs: 2944; GFX9: ; %bb.0: 2945; GFX9-NEXT: v_mov_b32_e32 v1, s1 2946; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, v1 2947; GFX9-NEXT: ; return to shader part epilog 2948; 2949; GFX10-LABEL: v_fshr_i32_svs: 2950; GFX10: ; %bb.0: 2951; GFX10-NEXT: v_alignbit_b32 v0, s0, v0, s1 2952; GFX10-NEXT: ; return to shader part epilog 2953; 2954; GFX11-LABEL: v_fshr_i32_svs: 2955; GFX11: ; %bb.0: 2956; GFX11-NEXT: v_alignbit_b32 v0, s0, v0, s1 2957; GFX11-NEXT: ; return to shader part epilog 2958 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt) 2959 %cast.result = bitcast i32 %result to float 2960 ret float %cast.result 2961} 2962 2963define amdgpu_ps float @v_fshr_i32_vss(i32 inreg %lhs, i32 inreg %rhs, i32 inreg %amt) { 2964; GFX6-LABEL: v_fshr_i32_vss: 2965; GFX6: ; %bb.0: 2966; GFX6-NEXT: v_mov_b32_e32 v0, s1 2967; GFX6-NEXT: v_mov_b32_e32 v1, s2 2968; GFX6-NEXT: v_alignbit_b32 v0, s0, v0, v1 2969; GFX6-NEXT: ; return to shader part epilog 2970; 2971; GFX8-LABEL: v_fshr_i32_vss: 2972; GFX8: ; %bb.0: 2973; GFX8-NEXT: v_mov_b32_e32 v0, s1 2974; GFX8-NEXT: v_mov_b32_e32 v1, s2 2975; GFX8-NEXT: v_alignbit_b32 v0, s0, v0, v1 2976; GFX8-NEXT: ; return to shader part epilog 2977; 2978; GFX9-LABEL: v_fshr_i32_vss: 2979; GFX9: ; %bb.0: 2980; GFX9-NEXT: v_mov_b32_e32 v0, s1 2981; GFX9-NEXT: v_mov_b32_e32 v1, s2 2982; GFX9-NEXT: v_alignbit_b32 v0, s0, v0, v1 2983; GFX9-NEXT: ; return to shader part epilog 2984; 2985; GFX10-LABEL: v_fshr_i32_vss: 2986; GFX10: ; %bb.0: 2987; GFX10-NEXT: v_mov_b32_e32 v0, s2 2988; GFX10-NEXT: v_alignbit_b32 v0, s0, s1, v0 2989; GFX10-NEXT: ; return to shader part epilog 2990; 2991; GFX11-LABEL: v_fshr_i32_vss: 2992; GFX11: ; %bb.0: 2993; GFX11-NEXT: v_mov_b32_e32 v0, s2 2994; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 2995; GFX11-NEXT: v_alignbit_b32 v0, s0, s1, v0 2996; GFX11-NEXT: ; return to shader part epilog 2997 %result = call i32 @llvm.fshr.i32(i32 %lhs, i32 %rhs, i32 %amt) 2998 %cast.result = bitcast i32 %result to float 2999 ret float %cast.result 3000} 3001 3002define <2 x i32> @v_fshr_v2i32(<2 x i32> %lhs, <2 x i32> %rhs, <2 x i32> %amt) { 3003; GCN-LABEL: v_fshr_v2i32: 3004; GCN: ; %bb.0: 3005; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3006; GCN-NEXT: v_alignbit_b32 v0, v0, v2, v4 3007; GCN-NEXT: v_alignbit_b32 v1, v1, v3, v5 3008; GCN-NEXT: s_setpc_b64 s[30:31] 3009; 3010; GFX11-LABEL: v_fshr_v2i32: 3011; GFX11: ; %bb.0: 3012; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3013; GFX11-NEXT: v_alignbit_b32 v0, v0, v2, v4 3014; GFX11-NEXT: v_alignbit_b32 v1, v1, v3, v5 3015; GFX11-NEXT: s_setpc_b64 s[30:31] 3016 %result = call <2 x i32> @llvm.fshr.v2i32(<2 x i32> %lhs, <2 x i32> %rhs, <2 x i32> %amt) 3017 ret <2 x i32> %result 3018} 3019 3020define <3 x i32> @v_fshr_v3i32(<3 x i32> %lhs, <3 x i32> %rhs, <3 x i32> %amt) { 3021; GCN-LABEL: v_fshr_v3i32: 3022; GCN: ; %bb.0: 3023; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3024; GCN-NEXT: v_alignbit_b32 v0, v0, v3, v6 3025; GCN-NEXT: v_alignbit_b32 v1, v1, v4, v7 3026; GCN-NEXT: v_alignbit_b32 v2, v2, v5, v8 3027; GCN-NEXT: s_setpc_b64 s[30:31] 3028; 3029; GFX11-LABEL: v_fshr_v3i32: 3030; GFX11: ; %bb.0: 3031; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3032; GFX11-NEXT: v_alignbit_b32 v0, v0, v3, v6 3033; GFX11-NEXT: v_alignbit_b32 v1, v1, v4, v7 3034; GFX11-NEXT: v_alignbit_b32 v2, v2, v5, v8 3035; GFX11-NEXT: s_setpc_b64 s[30:31] 3036 %result = call <3 x i32> @llvm.fshr.v3i32(<3 x i32> %lhs, <3 x i32> %rhs, <3 x i32> %amt) 3037 ret <3 x i32> %result 3038} 3039 3040define <4 x i32> @v_fshr_v4i32(<4 x i32> %lhs, <4 x i32> %rhs, <4 x i32> %amt) { 3041; GCN-LABEL: v_fshr_v4i32: 3042; GCN: ; %bb.0: 3043; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3044; GCN-NEXT: v_alignbit_b32 v0, v0, v4, v8 3045; GCN-NEXT: v_alignbit_b32 v1, v1, v5, v9 3046; GCN-NEXT: v_alignbit_b32 v2, v2, v6, v10 3047; GCN-NEXT: v_alignbit_b32 v3, v3, v7, v11 3048; GCN-NEXT: s_setpc_b64 s[30:31] 3049; 3050; GFX11-LABEL: v_fshr_v4i32: 3051; GFX11: ; %bb.0: 3052; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3053; GFX11-NEXT: v_alignbit_b32 v0, v0, v4, v8 3054; GFX11-NEXT: v_alignbit_b32 v1, v1, v5, v9 3055; GFX11-NEXT: v_alignbit_b32 v2, v2, v6, v10 3056; GFX11-NEXT: v_alignbit_b32 v3, v3, v7, v11 3057; GFX11-NEXT: s_setpc_b64 s[30:31] 3058 %result = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %lhs, <4 x i32> %rhs, <4 x i32> %amt) 3059 ret <4 x i32> %result 3060} 3061 3062define amdgpu_ps i16 @s_fshr_i16(i16 inreg %lhs, i16 inreg %rhs, i16 inreg %amt) { 3063; GFX6-LABEL: s_fshr_i16: 3064; GFX6: ; %bb.0: 3065; GFX6-NEXT: s_and_b32 s3, s2, 15 3066; GFX6-NEXT: s_andn2_b32 s2, 15, s2 3067; GFX6-NEXT: s_lshl_b32 s0, s0, 1 3068; GFX6-NEXT: s_and_b32 s2, 0xffff, s2 3069; GFX6-NEXT: s_lshl_b32 s0, s0, s2 3070; GFX6-NEXT: s_and_b32 s2, 0xffff, s3 3071; GFX6-NEXT: s_and_b32 s1, s1, 0xffff 3072; GFX6-NEXT: s_lshr_b32 s1, s1, s2 3073; GFX6-NEXT: s_or_b32 s0, s0, s1 3074; GFX6-NEXT: ; return to shader part epilog 3075; 3076; GFX8-LABEL: s_fshr_i16: 3077; GFX8: ; %bb.0: 3078; GFX8-NEXT: s_and_b32 s3, s2, 15 3079; GFX8-NEXT: s_andn2_b32 s2, 15, s2 3080; GFX8-NEXT: s_lshl_b32 s0, s0, 1 3081; GFX8-NEXT: s_and_b32 s2, 0xffff, s2 3082; GFX8-NEXT: s_lshl_b32 s0, s0, s2 3083; GFX8-NEXT: s_and_b32 s1, 0xffff, s1 3084; GFX8-NEXT: s_and_b32 s2, 0xffff, s3 3085; GFX8-NEXT: s_lshr_b32 s1, s1, s2 3086; GFX8-NEXT: s_or_b32 s0, s0, s1 3087; GFX8-NEXT: ; return to shader part epilog 3088; 3089; GFX9-LABEL: s_fshr_i16: 3090; GFX9: ; %bb.0: 3091; GFX9-NEXT: s_and_b32 s3, s2, 15 3092; GFX9-NEXT: s_andn2_b32 s2, 15, s2 3093; GFX9-NEXT: s_lshl_b32 s0, s0, 1 3094; GFX9-NEXT: s_and_b32 s2, 0xffff, s2 3095; GFX9-NEXT: s_lshl_b32 s0, s0, s2 3096; GFX9-NEXT: s_and_b32 s1, 0xffff, s1 3097; GFX9-NEXT: s_and_b32 s2, 0xffff, s3 3098; GFX9-NEXT: s_lshr_b32 s1, s1, s2 3099; GFX9-NEXT: s_or_b32 s0, s0, s1 3100; GFX9-NEXT: ; return to shader part epilog 3101; 3102; GFX10-LABEL: s_fshr_i16: 3103; GFX10: ; %bb.0: 3104; GFX10-NEXT: s_and_b32 s3, s2, 15 3105; GFX10-NEXT: s_andn2_b32 s2, 15, s2 3106; GFX10-NEXT: s_lshl_b32 s0, s0, 1 3107; GFX10-NEXT: s_and_b32 s2, 0xffff, s2 3108; GFX10-NEXT: s_and_b32 s1, 0xffff, s1 3109; GFX10-NEXT: s_and_b32 s3, 0xffff, s3 3110; GFX10-NEXT: s_lshl_b32 s0, s0, s2 3111; GFX10-NEXT: s_lshr_b32 s1, s1, s3 3112; GFX10-NEXT: s_or_b32 s0, s0, s1 3113; GFX10-NEXT: ; return to shader part epilog 3114; 3115; GFX11-LABEL: s_fshr_i16: 3116; GFX11: ; %bb.0: 3117; GFX11-NEXT: s_and_b32 s3, s2, 15 3118; GFX11-NEXT: s_and_not1_b32 s2, 15, s2 3119; GFX11-NEXT: s_lshl_b32 s0, s0, 1 3120; GFX11-NEXT: s_and_b32 s2, 0xffff, s2 3121; GFX11-NEXT: s_and_b32 s1, 0xffff, s1 3122; GFX11-NEXT: s_and_b32 s3, 0xffff, s3 3123; GFX11-NEXT: s_lshl_b32 s0, s0, s2 3124; GFX11-NEXT: s_lshr_b32 s1, s1, s3 3125; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 3126; GFX11-NEXT: s_or_b32 s0, s0, s1 3127; GFX11-NEXT: ; return to shader part epilog 3128 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt) 3129 ret i16 %result 3130} 3131 3132define amdgpu_ps i16 @s_fshr_i16_4(i16 inreg %lhs, i16 inreg %rhs) { 3133; GFX6-LABEL: s_fshr_i16_4: 3134; GFX6: ; %bb.0: 3135; GFX6-NEXT: s_lshl_b32 s0, s0, 12 3136; GFX6-NEXT: s_bfe_u32 s1, s1, 0xc0004 3137; GFX6-NEXT: s_or_b32 s0, s0, s1 3138; GFX6-NEXT: ; return to shader part epilog 3139; 3140; GFX8-LABEL: s_fshr_i16_4: 3141; GFX8: ; %bb.0: 3142; GFX8-NEXT: s_and_b32 s1, 0xffff, s1 3143; GFX8-NEXT: s_lshl_b32 s0, s0, 12 3144; GFX8-NEXT: s_lshr_b32 s1, s1, 4 3145; GFX8-NEXT: s_or_b32 s0, s0, s1 3146; GFX8-NEXT: ; return to shader part epilog 3147; 3148; GFX9-LABEL: s_fshr_i16_4: 3149; GFX9: ; %bb.0: 3150; GFX9-NEXT: s_and_b32 s1, 0xffff, s1 3151; GFX9-NEXT: s_lshl_b32 s0, s0, 12 3152; GFX9-NEXT: s_lshr_b32 s1, s1, 4 3153; GFX9-NEXT: s_or_b32 s0, s0, s1 3154; GFX9-NEXT: ; return to shader part epilog 3155; 3156; GFX10-LABEL: s_fshr_i16_4: 3157; GFX10: ; %bb.0: 3158; GFX10-NEXT: s_and_b32 s1, 0xffff, s1 3159; GFX10-NEXT: s_lshl_b32 s0, s0, 12 3160; GFX10-NEXT: s_lshr_b32 s1, s1, 4 3161; GFX10-NEXT: s_or_b32 s0, s0, s1 3162; GFX10-NEXT: ; return to shader part epilog 3163; 3164; GFX11-LABEL: s_fshr_i16_4: 3165; GFX11: ; %bb.0: 3166; GFX11-NEXT: s_and_b32 s1, 0xffff, s1 3167; GFX11-NEXT: s_lshl_b32 s0, s0, 12 3168; GFX11-NEXT: s_lshr_b32 s1, s1, 4 3169; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 3170; GFX11-NEXT: s_or_b32 s0, s0, s1 3171; GFX11-NEXT: ; return to shader part epilog 3172 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 4) 3173 ret i16 %result 3174} 3175 3176define amdgpu_ps i16 @s_fshr_i16_5(i16 inreg %lhs, i16 inreg %rhs) { 3177; GFX6-LABEL: s_fshr_i16_5: 3178; GFX6: ; %bb.0: 3179; GFX6-NEXT: s_lshl_b32 s0, s0, 11 3180; GFX6-NEXT: s_bfe_u32 s1, s1, 0xb0005 3181; GFX6-NEXT: s_or_b32 s0, s0, s1 3182; GFX6-NEXT: ; return to shader part epilog 3183; 3184; GFX8-LABEL: s_fshr_i16_5: 3185; GFX8: ; %bb.0: 3186; GFX8-NEXT: s_and_b32 s1, 0xffff, s1 3187; GFX8-NEXT: s_lshl_b32 s0, s0, 11 3188; GFX8-NEXT: s_lshr_b32 s1, s1, 5 3189; GFX8-NEXT: s_or_b32 s0, s0, s1 3190; GFX8-NEXT: ; return to shader part epilog 3191; 3192; GFX9-LABEL: s_fshr_i16_5: 3193; GFX9: ; %bb.0: 3194; GFX9-NEXT: s_and_b32 s1, 0xffff, s1 3195; GFX9-NEXT: s_lshl_b32 s0, s0, 11 3196; GFX9-NEXT: s_lshr_b32 s1, s1, 5 3197; GFX9-NEXT: s_or_b32 s0, s0, s1 3198; GFX9-NEXT: ; return to shader part epilog 3199; 3200; GFX10-LABEL: s_fshr_i16_5: 3201; GFX10: ; %bb.0: 3202; GFX10-NEXT: s_and_b32 s1, 0xffff, s1 3203; GFX10-NEXT: s_lshl_b32 s0, s0, 11 3204; GFX10-NEXT: s_lshr_b32 s1, s1, 5 3205; GFX10-NEXT: s_or_b32 s0, s0, s1 3206; GFX10-NEXT: ; return to shader part epilog 3207; 3208; GFX11-LABEL: s_fshr_i16_5: 3209; GFX11: ; %bb.0: 3210; GFX11-NEXT: s_and_b32 s1, 0xffff, s1 3211; GFX11-NEXT: s_lshl_b32 s0, s0, 11 3212; GFX11-NEXT: s_lshr_b32 s1, s1, 5 3213; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 3214; GFX11-NEXT: s_or_b32 s0, s0, s1 3215; GFX11-NEXT: ; return to shader part epilog 3216 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 5) 3217 ret i16 %result 3218} 3219 3220define i16 @v_fshr_i16(i16 %lhs, i16 %rhs, i16 %amt) { 3221; GFX6-LABEL: v_fshr_i16: 3222; GFX6: ; %bb.0: 3223; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3224; GFX6-NEXT: v_and_b32_e32 v3, 15, v2 3225; GFX6-NEXT: v_xor_b32_e32 v2, -1, v2 3226; GFX6-NEXT: v_and_b32_e32 v2, 15, v2 3227; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 3228; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2 3229; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0 3230; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v3 3231; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 3232; GFX6-NEXT: v_lshrrev_b32_e32 v1, v2, v1 3233; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 3234; GFX6-NEXT: s_setpc_b64 s[30:31] 3235; 3236; GFX8-LABEL: v_fshr_i16: 3237; GFX8: ; %bb.0: 3238; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3239; GFX8-NEXT: v_and_b32_e32 v3, 15, v2 3240; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2 3241; GFX8-NEXT: v_and_b32_e32 v2, 15, v2 3242; GFX8-NEXT: v_lshlrev_b16_e32 v0, 1, v0 3243; GFX8-NEXT: v_lshlrev_b16_e32 v0, v2, v0 3244; GFX8-NEXT: v_lshrrev_b16_e32 v1, v3, v1 3245; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 3246; GFX8-NEXT: s_setpc_b64 s[30:31] 3247; 3248; GFX9-LABEL: v_fshr_i16: 3249; GFX9: ; %bb.0: 3250; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3251; GFX9-NEXT: v_and_b32_e32 v3, 15, v2 3252; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2 3253; GFX9-NEXT: v_and_b32_e32 v2, 15, v2 3254; GFX9-NEXT: v_lshlrev_b16_e32 v0, 1, v0 3255; GFX9-NEXT: v_lshlrev_b16_e32 v0, v2, v0 3256; GFX9-NEXT: v_lshrrev_b16_e32 v1, v3, v1 3257; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 3258; GFX9-NEXT: s_setpc_b64 s[30:31] 3259; 3260; GFX10-LABEL: v_fshr_i16: 3261; GFX10: ; %bb.0: 3262; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3263; GFX10-NEXT: v_xor_b32_e32 v3, -1, v2 3264; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0 3265; GFX10-NEXT: v_and_b32_e32 v2, 15, v2 3266; GFX10-NEXT: v_and_b32_e32 v3, 15, v3 3267; GFX10-NEXT: v_lshrrev_b16 v1, v2, v1 3268; GFX10-NEXT: v_lshlrev_b16 v0, v3, v0 3269; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 3270; GFX10-NEXT: s_setpc_b64 s[30:31] 3271; 3272; GFX11-LABEL: v_fshr_i16: 3273; GFX11: ; %bb.0: 3274; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3275; GFX11-NEXT: v_xor_b32_e32 v3, -1, v2 3276; GFX11-NEXT: v_lshlrev_b16 v0, 1, v0 3277; GFX11-NEXT: v_and_b32_e32 v2, 15, v2 3278; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 3279; GFX11-NEXT: v_and_b32_e32 v3, 15, v3 3280; GFX11-NEXT: v_lshrrev_b16 v1, v2, v1 3281; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 3282; GFX11-NEXT: v_lshlrev_b16 v0, v3, v0 3283; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 3284; GFX11-NEXT: s_setpc_b64 s[30:31] 3285 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt) 3286 ret i16 %result 3287} 3288 3289define i16 @v_fshr_i16_4(i16 %lhs, i16 %rhs) { 3290; GFX6-LABEL: v_fshr_i16_4: 3291; GFX6: ; %bb.0: 3292; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3293; GFX6-NEXT: v_lshlrev_b32_e32 v0, 12, v0 3294; GFX6-NEXT: v_bfe_u32 v1, v1, 4, 12 3295; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 3296; GFX6-NEXT: s_setpc_b64 s[30:31] 3297; 3298; GFX8-LABEL: v_fshr_i16_4: 3299; GFX8: ; %bb.0: 3300; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3301; GFX8-NEXT: v_lshlrev_b16_e32 v0, 12, v0 3302; GFX8-NEXT: v_lshrrev_b16_e32 v1, 4, v1 3303; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 3304; GFX8-NEXT: s_setpc_b64 s[30:31] 3305; 3306; GFX9-LABEL: v_fshr_i16_4: 3307; GFX9: ; %bb.0: 3308; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3309; GFX9-NEXT: v_lshlrev_b16_e32 v0, 12, v0 3310; GFX9-NEXT: v_lshrrev_b16_e32 v1, 4, v1 3311; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 3312; GFX9-NEXT: s_setpc_b64 s[30:31] 3313; 3314; GFX10-LABEL: v_fshr_i16_4: 3315; GFX10: ; %bb.0: 3316; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3317; GFX10-NEXT: v_lshlrev_b16 v0, 12, v0 3318; GFX10-NEXT: v_lshrrev_b16 v1, 4, v1 3319; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 3320; GFX10-NEXT: s_setpc_b64 s[30:31] 3321; 3322; GFX11-LABEL: v_fshr_i16_4: 3323; GFX11: ; %bb.0: 3324; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3325; GFX11-NEXT: v_lshlrev_b16 v0, 12, v0 3326; GFX11-NEXT: v_lshrrev_b16 v1, 4, v1 3327; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 3328; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 3329; GFX11-NEXT: s_setpc_b64 s[30:31] 3330 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 4) 3331 ret i16 %result 3332} 3333 3334define i16 @v_fshr_i16_5(i16 %lhs, i16 %rhs) { 3335; GFX6-LABEL: v_fshr_i16_5: 3336; GFX6: ; %bb.0: 3337; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3338; GFX6-NEXT: v_lshlrev_b32_e32 v0, 11, v0 3339; GFX6-NEXT: v_bfe_u32 v1, v1, 5, 11 3340; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 3341; GFX6-NEXT: s_setpc_b64 s[30:31] 3342; 3343; GFX8-LABEL: v_fshr_i16_5: 3344; GFX8: ; %bb.0: 3345; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3346; GFX8-NEXT: v_lshlrev_b16_e32 v0, 11, v0 3347; GFX8-NEXT: v_lshrrev_b16_e32 v1, 5, v1 3348; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 3349; GFX8-NEXT: s_setpc_b64 s[30:31] 3350; 3351; GFX9-LABEL: v_fshr_i16_5: 3352; GFX9: ; %bb.0: 3353; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3354; GFX9-NEXT: v_lshlrev_b16_e32 v0, 11, v0 3355; GFX9-NEXT: v_lshrrev_b16_e32 v1, 5, v1 3356; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 3357; GFX9-NEXT: s_setpc_b64 s[30:31] 3358; 3359; GFX10-LABEL: v_fshr_i16_5: 3360; GFX10: ; %bb.0: 3361; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3362; GFX10-NEXT: v_lshlrev_b16 v0, 11, v0 3363; GFX10-NEXT: v_lshrrev_b16 v1, 5, v1 3364; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 3365; GFX10-NEXT: s_setpc_b64 s[30:31] 3366; 3367; GFX11-LABEL: v_fshr_i16_5: 3368; GFX11: ; %bb.0: 3369; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3370; GFX11-NEXT: v_lshlrev_b16 v0, 11, v0 3371; GFX11-NEXT: v_lshrrev_b16 v1, 5, v1 3372; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 3373; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 3374; GFX11-NEXT: s_setpc_b64 s[30:31] 3375 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 5) 3376 ret i16 %result 3377} 3378 3379define amdgpu_ps half @v_fshr_i16_ssv(i16 inreg %lhs, i16 inreg %rhs, i16 %amt) { 3380; GFX6-LABEL: v_fshr_i16_ssv: 3381; GFX6: ; %bb.0: 3382; GFX6-NEXT: v_and_b32_e32 v1, 15, v0 3383; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0 3384; GFX6-NEXT: v_and_b32_e32 v0, 15, v0 3385; GFX6-NEXT: s_lshl_b32 s0, s0, 1 3386; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 3387; GFX6-NEXT: v_lshl_b32_e32 v0, s0, v0 3388; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 3389; GFX6-NEXT: s_and_b32 s0, s1, 0xffff 3390; GFX6-NEXT: v_lshr_b32_e32 v1, s0, v1 3391; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 3392; GFX6-NEXT: ; return to shader part epilog 3393; 3394; GFX8-LABEL: v_fshr_i16_ssv: 3395; GFX8: ; %bb.0: 3396; GFX8-NEXT: v_and_b32_e32 v1, 15, v0 3397; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0 3398; GFX8-NEXT: v_and_b32_e32 v0, 15, v0 3399; GFX8-NEXT: s_lshl_b32 s0, s0, 1 3400; GFX8-NEXT: v_lshlrev_b16_e64 v0, v0, s0 3401; GFX8-NEXT: v_lshrrev_b16_e64 v1, v1, s1 3402; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 3403; GFX8-NEXT: ; return to shader part epilog 3404; 3405; GFX9-LABEL: v_fshr_i16_ssv: 3406; GFX9: ; %bb.0: 3407; GFX9-NEXT: v_and_b32_e32 v1, 15, v0 3408; GFX9-NEXT: v_xor_b32_e32 v0, -1, v0 3409; GFX9-NEXT: v_and_b32_e32 v0, 15, v0 3410; GFX9-NEXT: s_lshl_b32 s0, s0, 1 3411; GFX9-NEXT: v_lshlrev_b16_e64 v0, v0, s0 3412; GFX9-NEXT: v_lshrrev_b16_e64 v1, v1, s1 3413; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 3414; GFX9-NEXT: ; return to shader part epilog 3415; 3416; GFX10-LABEL: v_fshr_i16_ssv: 3417; GFX10: ; %bb.0: 3418; GFX10-NEXT: v_xor_b32_e32 v1, -1, v0 3419; GFX10-NEXT: v_and_b32_e32 v0, 15, v0 3420; GFX10-NEXT: s_lshl_b32 s0, s0, 1 3421; GFX10-NEXT: v_and_b32_e32 v1, 15, v1 3422; GFX10-NEXT: v_lshrrev_b16 v0, v0, s1 3423; GFX10-NEXT: v_lshlrev_b16 v1, v1, s0 3424; GFX10-NEXT: v_or_b32_e32 v0, v1, v0 3425; GFX10-NEXT: ; return to shader part epilog 3426; 3427; GFX11-LABEL: v_fshr_i16_ssv: 3428; GFX11: ; %bb.0: 3429; GFX11-NEXT: v_xor_b32_e32 v1, -1, v0 3430; GFX11-NEXT: v_and_b32_e32 v0, 15, v0 3431; GFX11-NEXT: s_lshl_b32 s0, s0, 1 3432; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 3433; GFX11-NEXT: v_and_b32_e32 v1, 15, v1 3434; GFX11-NEXT: v_lshrrev_b16 v0, v0, s1 3435; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 3436; GFX11-NEXT: v_lshlrev_b16 v1, v1, s0 3437; GFX11-NEXT: v_or_b32_e32 v0, v1, v0 3438; GFX11-NEXT: ; return to shader part epilog 3439 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt) 3440 %cast.result = bitcast i16 %result to half 3441 ret half %cast.result 3442} 3443 3444define amdgpu_ps half @v_fshr_i16_svs(i16 inreg %lhs, i16 %rhs, i16 inreg %amt) { 3445; GFX6-LABEL: v_fshr_i16_svs: 3446; GFX6: ; %bb.0: 3447; GFX6-NEXT: s_and_b32 s2, s1, 15 3448; GFX6-NEXT: s_andn2_b32 s1, 15, s1 3449; GFX6-NEXT: s_lshl_b32 s0, s0, 1 3450; GFX6-NEXT: s_and_b32 s1, 0xffff, s1 3451; GFX6-NEXT: s_lshl_b32 s0, s0, s1 3452; GFX6-NEXT: s_and_b32 s1, 0xffff, s2 3453; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 3454; GFX6-NEXT: v_lshrrev_b32_e32 v0, s1, v0 3455; GFX6-NEXT: v_or_b32_e32 v0, s0, v0 3456; GFX6-NEXT: ; return to shader part epilog 3457; 3458; GFX8-LABEL: v_fshr_i16_svs: 3459; GFX8: ; %bb.0: 3460; GFX8-NEXT: s_and_b32 s2, s1, 15 3461; GFX8-NEXT: s_andn2_b32 s1, 15, s1 3462; GFX8-NEXT: s_lshl_b32 s0, s0, 1 3463; GFX8-NEXT: s_and_b32 s1, 0xffff, s1 3464; GFX8-NEXT: s_lshl_b32 s0, s0, s1 3465; GFX8-NEXT: v_lshrrev_b16_e32 v0, s2, v0 3466; GFX8-NEXT: v_or_b32_e32 v0, s0, v0 3467; GFX8-NEXT: ; return to shader part epilog 3468; 3469; GFX9-LABEL: v_fshr_i16_svs: 3470; GFX9: ; %bb.0: 3471; GFX9-NEXT: s_and_b32 s2, s1, 15 3472; GFX9-NEXT: s_andn2_b32 s1, 15, s1 3473; GFX9-NEXT: s_lshl_b32 s0, s0, 1 3474; GFX9-NEXT: s_and_b32 s1, 0xffff, s1 3475; GFX9-NEXT: s_lshl_b32 s0, s0, s1 3476; GFX9-NEXT: v_lshrrev_b16_e32 v0, s2, v0 3477; GFX9-NEXT: v_or_b32_e32 v0, s0, v0 3478; GFX9-NEXT: ; return to shader part epilog 3479; 3480; GFX10-LABEL: v_fshr_i16_svs: 3481; GFX10: ; %bb.0: 3482; GFX10-NEXT: s_and_b32 s2, s1, 15 3483; GFX10-NEXT: s_andn2_b32 s1, 15, s1 3484; GFX10-NEXT: v_lshrrev_b16 v0, s2, v0 3485; GFX10-NEXT: s_lshl_b32 s0, s0, 1 3486; GFX10-NEXT: s_and_b32 s1, 0xffff, s1 3487; GFX10-NEXT: s_lshl_b32 s0, s0, s1 3488; GFX10-NEXT: v_or_b32_e32 v0, s0, v0 3489; GFX10-NEXT: ; return to shader part epilog 3490; 3491; GFX11-LABEL: v_fshr_i16_svs: 3492; GFX11: ; %bb.0: 3493; GFX11-NEXT: s_and_b32 s2, s1, 15 3494; GFX11-NEXT: s_and_not1_b32 s1, 15, s1 3495; GFX11-NEXT: v_lshrrev_b16 v0, s2, v0 3496; GFX11-NEXT: s_lshl_b32 s0, s0, 1 3497; GFX11-NEXT: s_and_b32 s1, 0xffff, s1 3498; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 3499; GFX11-NEXT: s_lshl_b32 s0, s0, s1 3500; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 3501; GFX11-NEXT: v_or_b32_e32 v0, s0, v0 3502; GFX11-NEXT: ; return to shader part epilog 3503 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt) 3504 %cast.result = bitcast i16 %result to half 3505 ret half %cast.result 3506} 3507 3508define amdgpu_ps half @v_fshr_i16_vss(i16 %lhs, i16 inreg %rhs, i16 inreg %amt) { 3509; GFX6-LABEL: v_fshr_i16_vss: 3510; GFX6: ; %bb.0: 3511; GFX6-NEXT: s_and_b32 s2, s1, 15 3512; GFX6-NEXT: s_andn2_b32 s1, 15, s1 3513; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 3514; GFX6-NEXT: s_and_b32 s1, 0xffff, s1 3515; GFX6-NEXT: v_lshlrev_b32_e32 v0, s1, v0 3516; GFX6-NEXT: s_and_b32 s1, 0xffff, s2 3517; GFX6-NEXT: s_and_b32 s0, s0, 0xffff 3518; GFX6-NEXT: s_lshr_b32 s0, s0, s1 3519; GFX6-NEXT: v_or_b32_e32 v0, s0, v0 3520; GFX6-NEXT: ; return to shader part epilog 3521; 3522; GFX8-LABEL: v_fshr_i16_vss: 3523; GFX8: ; %bb.0: 3524; GFX8-NEXT: s_and_b32 s2, s1, 15 3525; GFX8-NEXT: s_andn2_b32 s1, 15, s1 3526; GFX8-NEXT: v_lshlrev_b16_e32 v0, 1, v0 3527; GFX8-NEXT: v_lshlrev_b16_e32 v0, s1, v0 3528; GFX8-NEXT: s_and_b32 s0, 0xffff, s0 3529; GFX8-NEXT: s_and_b32 s1, 0xffff, s2 3530; GFX8-NEXT: s_lshr_b32 s0, s0, s1 3531; GFX8-NEXT: v_or_b32_e32 v0, s0, v0 3532; GFX8-NEXT: ; return to shader part epilog 3533; 3534; GFX9-LABEL: v_fshr_i16_vss: 3535; GFX9: ; %bb.0: 3536; GFX9-NEXT: s_and_b32 s2, s1, 15 3537; GFX9-NEXT: s_andn2_b32 s1, 15, s1 3538; GFX9-NEXT: v_lshlrev_b16_e32 v0, 1, v0 3539; GFX9-NEXT: v_lshlrev_b16_e32 v0, s1, v0 3540; GFX9-NEXT: s_and_b32 s0, 0xffff, s0 3541; GFX9-NEXT: s_and_b32 s1, 0xffff, s2 3542; GFX9-NEXT: s_lshr_b32 s0, s0, s1 3543; GFX9-NEXT: v_or_b32_e32 v0, s0, v0 3544; GFX9-NEXT: ; return to shader part epilog 3545; 3546; GFX10-LABEL: v_fshr_i16_vss: 3547; GFX10: ; %bb.0: 3548; GFX10-NEXT: v_lshlrev_b16 v0, 1, v0 3549; GFX10-NEXT: s_andn2_b32 s2, 15, s1 3550; GFX10-NEXT: s_and_b32 s1, s1, 15 3551; GFX10-NEXT: s_and_b32 s0, 0xffff, s0 3552; GFX10-NEXT: s_and_b32 s1, 0xffff, s1 3553; GFX10-NEXT: v_lshlrev_b16 v0, s2, v0 3554; GFX10-NEXT: s_lshr_b32 s0, s0, s1 3555; GFX10-NEXT: v_or_b32_e32 v0, s0, v0 3556; GFX10-NEXT: ; return to shader part epilog 3557; 3558; GFX11-LABEL: v_fshr_i16_vss: 3559; GFX11: ; %bb.0: 3560; GFX11-NEXT: v_lshlrev_b16 v0, 1, v0 3561; GFX11-NEXT: s_and_not1_b32 s2, 15, s1 3562; GFX11-NEXT: s_and_b32 s1, s1, 15 3563; GFX11-NEXT: s_and_b32 s0, 0xffff, s0 3564; GFX11-NEXT: s_and_b32 s1, 0xffff, s1 3565; GFX11-NEXT: v_lshlrev_b16 v0, s2, v0 3566; GFX11-NEXT: s_lshr_b32 s0, s0, s1 3567; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 3568; GFX11-NEXT: v_or_b32_e32 v0, s0, v0 3569; GFX11-NEXT: ; return to shader part epilog 3570 %result = call i16 @llvm.fshr.i16(i16 %lhs, i16 %rhs, i16 %amt) 3571 %cast.result = bitcast i16 %result to half 3572 ret half %cast.result 3573} 3574 3575define amdgpu_ps i32 @s_fshr_v2i16(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs, <2 x i16> inreg %amt) { 3576; GFX6-LABEL: s_fshr_v2i16: 3577; GFX6: ; %bb.0: 3578; GFX6-NEXT: s_lshl_b32 s5, s5, 16 3579; GFX6-NEXT: s_and_b32 s4, s4, 0xffff 3580; GFX6-NEXT: s_or_b32 s4, s5, s4 3581; GFX6-NEXT: s_bfe_u32 s5, s2, 0xf0001 3582; GFX6-NEXT: s_lshl_b32 s0, s0, 1 3583; GFX6-NEXT: s_lshr_b32 s5, s5, 14 3584; GFX6-NEXT: s_or_b32 s0, s0, s5 3585; GFX6-NEXT: s_bfe_u32 s5, s3, 0xf0001 3586; GFX6-NEXT: s_lshl_b32 s1, s1, 1 3587; GFX6-NEXT: s_lshr_b32 s5, s5, 14 3588; GFX6-NEXT: s_xor_b32 s4, s4, -1 3589; GFX6-NEXT: s_or_b32 s1, s1, s5 3590; GFX6-NEXT: s_lshl_b32 s2, s2, 1 3591; GFX6-NEXT: s_lshr_b32 s5, s4, 16 3592; GFX6-NEXT: s_and_b32 s6, s4, 15 3593; GFX6-NEXT: s_andn2_b32 s4, 15, s4 3594; GFX6-NEXT: s_and_b32 s6, 0xffff, s6 3595; GFX6-NEXT: s_bfe_u32 s2, s2, 0xf0001 3596; GFX6-NEXT: s_and_b32 s4, 0xffff, s4 3597; GFX6-NEXT: s_lshl_b32 s0, s0, s6 3598; GFX6-NEXT: s_lshr_b32 s2, s2, s4 3599; GFX6-NEXT: s_or_b32 s0, s0, s2 3600; GFX6-NEXT: s_and_b32 s2, s5, 15 3601; GFX6-NEXT: s_lshl_b32 s3, s3, 1 3602; GFX6-NEXT: s_andn2_b32 s4, 15, s5 3603; GFX6-NEXT: s_and_b32 s2, 0xffff, s2 3604; GFX6-NEXT: s_lshl_b32 s1, s1, s2 3605; GFX6-NEXT: s_bfe_u32 s2, s3, 0xf0001 3606; GFX6-NEXT: s_and_b32 s3, 0xffff, s4 3607; GFX6-NEXT: s_lshr_b32 s2, s2, s3 3608; GFX6-NEXT: s_or_b32 s1, s1, s2 3609; GFX6-NEXT: s_and_b32 s1, 0xffff, s1 3610; GFX6-NEXT: s_and_b32 s0, 0xffff, s0 3611; GFX6-NEXT: s_lshl_b32 s1, s1, 16 3612; GFX6-NEXT: s_or_b32 s0, s0, s1 3613; GFX6-NEXT: ; return to shader part epilog 3614; 3615; GFX8-LABEL: s_fshr_v2i16: 3616; GFX8: ; %bb.0: 3617; GFX8-NEXT: s_and_b32 s5, 0xffff, s1 3618; GFX8-NEXT: s_lshr_b32 s3, s0, 16 3619; GFX8-NEXT: s_lshr_b32 s4, s1, 16 3620; GFX8-NEXT: s_lshl_b32 s0, s0, 1 3621; GFX8-NEXT: s_lshr_b32 s5, s5, 15 3622; GFX8-NEXT: s_or_b32 s0, s0, s5 3623; GFX8-NEXT: s_lshl_b32 s3, s3, 1 3624; GFX8-NEXT: s_lshr_b32 s5, s4, 15 3625; GFX8-NEXT: s_lshl_b32 s1, s1, 1 3626; GFX8-NEXT: s_xor_b32 s2, s2, -1 3627; GFX8-NEXT: s_or_b32 s3, s3, s5 3628; GFX8-NEXT: s_lshr_b32 s5, s2, 16 3629; GFX8-NEXT: s_and_b32 s6, s2, 15 3630; GFX8-NEXT: s_andn2_b32 s2, 15, s2 3631; GFX8-NEXT: s_and_b32 s1, 0xffff, s1 3632; GFX8-NEXT: s_and_b32 s6, 0xffff, s6 3633; GFX8-NEXT: s_lshr_b32 s1, s1, 1 3634; GFX8-NEXT: s_and_b32 s2, 0xffff, s2 3635; GFX8-NEXT: s_lshl_b32 s0, s0, s6 3636; GFX8-NEXT: s_lshr_b32 s1, s1, s2 3637; GFX8-NEXT: s_or_b32 s0, s0, s1 3638; GFX8-NEXT: s_and_b32 s1, s5, 15 3639; GFX8-NEXT: s_lshl_b32 s4, s4, 1 3640; GFX8-NEXT: s_and_b32 s1, 0xffff, s1 3641; GFX8-NEXT: s_andn2_b32 s2, 15, s5 3642; GFX8-NEXT: s_lshl_b32 s1, s3, s1 3643; GFX8-NEXT: s_and_b32 s3, 0xffff, s4 3644; GFX8-NEXT: s_lshr_b32 s3, s3, 1 3645; GFX8-NEXT: s_and_b32 s2, 0xffff, s2 3646; GFX8-NEXT: s_lshr_b32 s2, s3, s2 3647; GFX8-NEXT: s_or_b32 s1, s1, s2 3648; GFX8-NEXT: s_and_b32 s1, 0xffff, s1 3649; GFX8-NEXT: s_and_b32 s0, 0xffff, s0 3650; GFX8-NEXT: s_lshl_b32 s1, s1, 16 3651; GFX8-NEXT: s_or_b32 s0, s0, s1 3652; GFX8-NEXT: ; return to shader part epilog 3653; 3654; GFX9-LABEL: s_fshr_v2i16: 3655; GFX9: ; %bb.0: 3656; GFX9-NEXT: s_lshr_b32 s4, s0, 16 3657; GFX9-NEXT: s_lshl_b32 s0, s0, 0x10001 3658; GFX9-NEXT: s_lshl_b32 s4, s4, 1 3659; GFX9-NEXT: s_and_b32 s3, s2, 0xf000f 3660; GFX9-NEXT: s_andn2_b32 s2, 0xf000f, s2 3661; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s4 3662; GFX9-NEXT: s_lshr_b32 s4, s0, 16 3663; GFX9-NEXT: s_lshr_b32 s5, s2, 16 3664; GFX9-NEXT: s_lshl_b32 s0, s0, s2 3665; GFX9-NEXT: s_lshl_b32 s2, s4, s5 3666; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2 3667; GFX9-NEXT: s_lshr_b32 s2, s1, 16 3668; GFX9-NEXT: s_and_b32 s1, s1, 0xffff 3669; GFX9-NEXT: s_lshr_b32 s4, s3, 16 3670; GFX9-NEXT: s_lshr_b32 s1, s1, s3 3671; GFX9-NEXT: s_lshr_b32 s2, s2, s4 3672; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s2 3673; GFX9-NEXT: s_or_b32 s0, s0, s1 3674; GFX9-NEXT: ; return to shader part epilog 3675; 3676; GFX10-LABEL: s_fshr_v2i16: 3677; GFX10: ; %bb.0: 3678; GFX10-NEXT: s_lshr_b32 s3, s0, 16 3679; GFX10-NEXT: s_lshl_b32 s0, s0, 0x10001 3680; GFX10-NEXT: s_lshl_b32 s3, s3, 1 3681; GFX10-NEXT: s_and_b32 s4, s2, 0xf000f 3682; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s3 3683; GFX10-NEXT: s_andn2_b32 s2, 0xf000f, s2 3684; GFX10-NEXT: s_lshr_b32 s3, s0, 16 3685; GFX10-NEXT: s_lshr_b32 s5, s2, 16 3686; GFX10-NEXT: s_lshl_b32 s0, s0, s2 3687; GFX10-NEXT: s_lshl_b32 s2, s3, s5 3688; GFX10-NEXT: s_lshr_b32 s3, s1, 16 3689; GFX10-NEXT: s_and_b32 s1, s1, 0xffff 3690; GFX10-NEXT: s_lshr_b32 s5, s4, 16 3691; GFX10-NEXT: s_lshr_b32 s1, s1, s4 3692; GFX10-NEXT: s_lshr_b32 s3, s3, s5 3693; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s2 3694; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s3 3695; GFX10-NEXT: s_or_b32 s0, s0, s1 3696; GFX10-NEXT: ; return to shader part epilog 3697; 3698; GFX11-LABEL: s_fshr_v2i16: 3699; GFX11: ; %bb.0: 3700; GFX11-NEXT: s_lshr_b32 s3, s0, 16 3701; GFX11-NEXT: s_lshl_b32 s0, s0, 0x10001 3702; GFX11-NEXT: s_lshl_b32 s3, s3, 1 3703; GFX11-NEXT: s_and_b32 s4, s2, 0xf000f 3704; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s3 3705; GFX11-NEXT: s_and_not1_b32 s2, 0xf000f, s2 3706; GFX11-NEXT: s_lshr_b32 s3, s0, 16 3707; GFX11-NEXT: s_lshr_b32 s5, s2, 16 3708; GFX11-NEXT: s_lshl_b32 s0, s0, s2 3709; GFX11-NEXT: s_lshl_b32 s2, s3, s5 3710; GFX11-NEXT: s_lshr_b32 s3, s1, 16 3711; GFX11-NEXT: s_and_b32 s1, s1, 0xffff 3712; GFX11-NEXT: s_lshr_b32 s5, s4, 16 3713; GFX11-NEXT: s_lshr_b32 s1, s1, s4 3714; GFX11-NEXT: s_lshr_b32 s3, s3, s5 3715; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s2 3716; GFX11-NEXT: s_pack_ll_b32_b16 s1, s1, s3 3717; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 3718; GFX11-NEXT: s_or_b32 s0, s0, s1 3719; GFX11-NEXT: ; return to shader part epilog 3720 %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) 3721 %cast = bitcast <2 x i16> %result to i32 3722 ret i32 %cast 3723} 3724 3725define <2 x i16> @v_fshr_v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) { 3726; GFX6-LABEL: v_fshr_v2i16: 3727; GFX6: ; %bb.0: 3728; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3729; GFX6-NEXT: v_lshlrev_b32_e32 v5, 16, v5 3730; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4 3731; GFX6-NEXT: v_or_b32_e32 v4, v5, v4 3732; GFX6-NEXT: v_bfe_u32 v5, v2, 1, 15 3733; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 3734; GFX6-NEXT: v_lshrrev_b32_e32 v5, 14, v5 3735; GFX6-NEXT: v_or_b32_e32 v0, v0, v5 3736; GFX6-NEXT: v_bfe_u32 v5, v3, 1, 15 3737; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v1 3738; GFX6-NEXT: v_lshrrev_b32_e32 v5, 14, v5 3739; GFX6-NEXT: v_xor_b32_e32 v4, -1, v4 3740; GFX6-NEXT: v_or_b32_e32 v1, v1, v5 3741; GFX6-NEXT: v_lshrrev_b32_e32 v5, 16, v4 3742; GFX6-NEXT: v_and_b32_e32 v6, 15, v4 3743; GFX6-NEXT: v_xor_b32_e32 v4, -1, v4 3744; GFX6-NEXT: v_lshlrev_b32_e32 v2, 1, v2 3745; GFX6-NEXT: v_and_b32_e32 v4, 15, v4 3746; GFX6-NEXT: v_and_b32_e32 v6, 0xffff, v6 3747; GFX6-NEXT: v_bfe_u32 v2, v2, 1, 15 3748; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4 3749; GFX6-NEXT: v_lshlrev_b32_e32 v0, v6, v0 3750; GFX6-NEXT: v_lshrrev_b32_e32 v2, v4, v2 3751; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 3752; GFX6-NEXT: v_and_b32_e32 v2, 15, v5 3753; GFX6-NEXT: v_xor_b32_e32 v4, -1, v5 3754; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v3 3755; GFX6-NEXT: v_and_b32_e32 v4, 15, v4 3756; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2 3757; GFX6-NEXT: v_lshlrev_b32_e32 v1, v2, v1 3758; GFX6-NEXT: v_bfe_u32 v2, v3, 1, 15 3759; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v4 3760; GFX6-NEXT: v_lshrrev_b32_e32 v2, v3, v2 3761; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 3762; GFX6-NEXT: s_setpc_b64 s[30:31] 3763; 3764; GFX8-LABEL: v_fshr_v2i16: 3765; GFX8: ; %bb.0: 3766; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3767; GFX8-NEXT: v_lshlrev_b16_e32 v3, 1, v0 3768; GFX8-NEXT: v_lshrrev_b16_e32 v4, 15, v1 3769; GFX8-NEXT: v_or_b32_e32 v3, v3, v4 3770; GFX8-NEXT: v_mov_b32_e32 v4, 1 3771; GFX8-NEXT: v_mov_b32_e32 v5, 15 3772; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 3773; GFX8-NEXT: v_lshrrev_b16_sdwa v6, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 3774; GFX8-NEXT: v_xor_b32_e32 v2, -1, v2 3775; GFX8-NEXT: v_or_b32_e32 v0, v0, v6 3776; GFX8-NEXT: v_lshlrev_b16_e32 v6, 1, v1 3777; GFX8-NEXT: v_lshlrev_b16_sdwa v1, v4, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 3778; GFX8-NEXT: v_and_b32_e32 v4, 15, v2 3779; GFX8-NEXT: v_xor_b32_e32 v7, -1, v2 3780; GFX8-NEXT: v_and_b32_e32 v7, 15, v7 3781; GFX8-NEXT: v_lshlrev_b16_e32 v3, v4, v3 3782; GFX8-NEXT: v_lshrrev_b16_e32 v4, 1, v6 3783; GFX8-NEXT: v_lshrrev_b16_e32 v4, v7, v4 3784; GFX8-NEXT: v_or_b32_e32 v3, v3, v4 3785; GFX8-NEXT: v_and_b32_sdwa v4, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 3786; GFX8-NEXT: v_mov_b32_e32 v5, -1 3787; GFX8-NEXT: v_xor_b32_sdwa v2, v2, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 3788; GFX8-NEXT: v_and_b32_e32 v2, 15, v2 3789; GFX8-NEXT: v_lshrrev_b16_e32 v1, 1, v1 3790; GFX8-NEXT: v_lshlrev_b16_e32 v0, v4, v0 3791; GFX8-NEXT: v_lshrrev_b16_e32 v1, v2, v1 3792; GFX8-NEXT: v_or_b32_e32 v0, v0, v1 3793; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0 3794; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 3795; GFX8-NEXT: v_or_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 3796; GFX8-NEXT: s_setpc_b64 s[30:31] 3797; 3798; GFX9-LABEL: v_fshr_v2i16: 3799; GFX9: ; %bb.0: 3800; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3801; GFX9-NEXT: v_and_b32_e32 v3, 0xf000f, v2 3802; GFX9-NEXT: v_xor_b32_e32 v2, -1, v2 3803; GFX9-NEXT: v_and_b32_e32 v2, 0xf000f, v2 3804; GFX9-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1] 3805; GFX9-NEXT: v_pk_lshlrev_b16 v0, v2, v0 3806; GFX9-NEXT: v_pk_lshrrev_b16 v1, v3, v1 3807; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 3808; GFX9-NEXT: s_setpc_b64 s[30:31] 3809; 3810; GFX10-LABEL: v_fshr_v2i16: 3811; GFX10: ; %bb.0: 3812; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3813; GFX10-NEXT: v_xor_b32_e32 v3, -1, v2 3814; GFX10-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1] 3815; GFX10-NEXT: v_and_b32_e32 v2, 0xf000f, v2 3816; GFX10-NEXT: v_and_b32_e32 v3, 0xf000f, v3 3817; GFX10-NEXT: v_pk_lshrrev_b16 v1, v2, v1 3818; GFX10-NEXT: v_pk_lshlrev_b16 v0, v3, v0 3819; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 3820; GFX10-NEXT: s_setpc_b64 s[30:31] 3821; 3822; GFX11-LABEL: v_fshr_v2i16: 3823; GFX11: ; %bb.0: 3824; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3825; GFX11-NEXT: v_xor_b32_e32 v3, -1, v2 3826; GFX11-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1] 3827; GFX11-NEXT: v_and_b32_e32 v2, 0xf000f, v2 3828; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 3829; GFX11-NEXT: v_and_b32_e32 v3, 0xf000f, v3 3830; GFX11-NEXT: v_pk_lshrrev_b16 v1, v2, v1 3831; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 3832; GFX11-NEXT: v_pk_lshlrev_b16 v0, v3, v0 3833; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 3834; GFX11-NEXT: s_setpc_b64 s[30:31] 3835 %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) 3836 ret <2 x i16> %result 3837} 3838 3839define <2 x i16> @v_fshr_v2i16_4_8(<2 x i16> %lhs, <2 x i16> %rhs) { 3840; GFX6-LABEL: v_fshr_v2i16_4_8: 3841; GFX6: ; %bb.0: 3842; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3843; GFX6-NEXT: v_bfe_u32 v2, v2, 1, 15 3844; GFX6-NEXT: v_lshlrev_b32_e32 v0, 12, v0 3845; GFX6-NEXT: v_lshrrev_b32_e32 v2, 3, v2 3846; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 3847; GFX6-NEXT: v_bfe_u32 v2, v3, 1, 15 3848; GFX6-NEXT: v_lshlrev_b32_e32 v1, 8, v1 3849; GFX6-NEXT: v_lshrrev_b32_e32 v2, 7, v2 3850; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 3851; GFX6-NEXT: s_setpc_b64 s[30:31] 3852; 3853; GFX8-LABEL: v_fshr_v2i16_4_8: 3854; GFX8: ; %bb.0: 3855; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3856; GFX8-NEXT: v_lshrrev_b32_e32 v2, 16, v0 3857; GFX8-NEXT: v_lshlrev_b16_e32 v0, 12, v0 3858; GFX8-NEXT: v_lshrrev_b16_e32 v3, 4, v1 3859; GFX8-NEXT: v_or_b32_e32 v0, v0, v3 3860; GFX8-NEXT: v_mov_b32_e32 v3, 8 3861; GFX8-NEXT: v_lshlrev_b16_e32 v2, 8, v2 3862; GFX8-NEXT: v_lshrrev_b16_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 3863; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 3864; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v1 3865; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1 3866; GFX8-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 3867; GFX8-NEXT: s_setpc_b64 s[30:31] 3868; 3869; GFX9-LABEL: v_fshr_v2i16_4_8: 3870; GFX9: ; %bb.0: 3871; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3872; GFX9-NEXT: v_mov_b32_e32 v2, 0x8000c 3873; GFX9-NEXT: v_pk_lshlrev_b16 v0, v2, v0 3874; GFX9-NEXT: v_mov_b32_e32 v2, 0x80004 3875; GFX9-NEXT: v_pk_lshrrev_b16 v1, v2, v1 3876; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 3877; GFX9-NEXT: s_setpc_b64 s[30:31] 3878; 3879; GFX10-LABEL: v_fshr_v2i16_4_8: 3880; GFX10: ; %bb.0: 3881; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3882; GFX10-NEXT: v_pk_lshlrev_b16 v0, 0x8000c, v0 3883; GFX10-NEXT: v_pk_lshrrev_b16 v1, 0x80004, v1 3884; GFX10-NEXT: v_or_b32_e32 v0, v0, v1 3885; GFX10-NEXT: s_setpc_b64 s[30:31] 3886; 3887; GFX11-LABEL: v_fshr_v2i16_4_8: 3888; GFX11: ; %bb.0: 3889; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3890; GFX11-NEXT: v_pk_lshlrev_b16 v0, 0x8000c, v0 3891; GFX11-NEXT: v_pk_lshrrev_b16 v1, 0x80004, v1 3892; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 3893; GFX11-NEXT: v_or_b32_e32 v0, v0, v1 3894; GFX11-NEXT: s_setpc_b64 s[30:31] 3895 %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> <i16 4, i16 8>) 3896 ret <2 x i16> %result 3897} 3898 3899define amdgpu_ps float @v_fshr_v2i16_ssv(<2 x i16> inreg %lhs, <2 x i16> inreg %rhs, <2 x i16> %amt) { 3900; GFX6-LABEL: v_fshr_v2i16_ssv: 3901; GFX6: ; %bb.0: 3902; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 3903; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 3904; GFX6-NEXT: v_or_b32_e32 v0, v1, v0 3905; GFX6-NEXT: s_bfe_u32 s4, s2, 0xf0001 3906; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0 3907; GFX6-NEXT: s_lshl_b32 s0, s0, 1 3908; GFX6-NEXT: s_lshr_b32 s4, s4, 14 3909; GFX6-NEXT: v_lshrrev_b32_e32 v1, 16, v0 3910; GFX6-NEXT: v_and_b32_e32 v2, 15, v0 3911; GFX6-NEXT: v_xor_b32_e32 v0, -1, v0 3912; GFX6-NEXT: s_or_b32 s0, s0, s4 3913; GFX6-NEXT: s_lshl_b32 s2, s2, 1 3914; GFX6-NEXT: v_and_b32_e32 v0, 15, v0 3915; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2 3916; GFX6-NEXT: v_lshl_b32_e32 v2, s0, v2 3917; GFX6-NEXT: s_bfe_u32 s0, s2, 0xf0001 3918; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 3919; GFX6-NEXT: v_lshr_b32_e32 v0, s0, v0 3920; GFX6-NEXT: s_bfe_u32 s4, s3, 0xf0001 3921; GFX6-NEXT: v_or_b32_e32 v0, v2, v0 3922; GFX6-NEXT: v_and_b32_e32 v2, 15, v1 3923; GFX6-NEXT: v_xor_b32_e32 v1, -1, v1 3924; GFX6-NEXT: s_lshl_b32 s1, s1, 1 3925; GFX6-NEXT: s_lshr_b32 s4, s4, 14 3926; GFX6-NEXT: s_lshl_b32 s3, s3, 1 3927; GFX6-NEXT: v_and_b32_e32 v1, 15, v1 3928; GFX6-NEXT: s_or_b32 s1, s1, s4 3929; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2 3930; GFX6-NEXT: s_bfe_u32 s0, s3, 0xf0001 3931; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 3932; GFX6-NEXT: v_lshl_b32_e32 v2, s1, v2 3933; GFX6-NEXT: v_lshr_b32_e32 v1, s0, v1 3934; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 3935; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 3936; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 3937; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 3938; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 3939; GFX6-NEXT: ; return to shader part epilog 3940; 3941; GFX8-LABEL: v_fshr_v2i16_ssv: 3942; GFX8: ; %bb.0: 3943; GFX8-NEXT: s_and_b32 s4, 0xffff, s1 3944; GFX8-NEXT: s_lshr_b32 s2, s0, 16 3945; GFX8-NEXT: s_lshl_b32 s0, s0, 1 3946; GFX8-NEXT: s_lshr_b32 s4, s4, 15 3947; GFX8-NEXT: v_xor_b32_e32 v0, -1, v0 3948; GFX8-NEXT: s_lshr_b32 s3, s1, 16 3949; GFX8-NEXT: s_or_b32 s0, s0, s4 3950; GFX8-NEXT: s_lshl_b32 s1, s1, 1 3951; GFX8-NEXT: v_and_b32_e32 v1, 15, v0 3952; GFX8-NEXT: v_xor_b32_e32 v2, -1, v0 3953; GFX8-NEXT: v_lshlrev_b16_e64 v1, v1, s0 3954; GFX8-NEXT: s_and_b32 s0, 0xffff, s1 3955; GFX8-NEXT: v_and_b32_e32 v2, 15, v2 3956; GFX8-NEXT: s_lshr_b32 s0, s0, 1 3957; GFX8-NEXT: v_lshrrev_b16_e64 v2, v2, s0 3958; GFX8-NEXT: s_lshr_b32 s4, s3, 15 3959; GFX8-NEXT: s_lshl_b32 s3, s3, 1 3960; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 3961; GFX8-NEXT: v_mov_b32_e32 v2, 15 3962; GFX8-NEXT: v_mov_b32_e32 v3, -1 3963; GFX8-NEXT: s_lshl_b32 s2, s2, 1 3964; GFX8-NEXT: v_and_b32_sdwa v2, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 3965; GFX8-NEXT: v_xor_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 3966; GFX8-NEXT: s_and_b32 s0, 0xffff, s3 3967; GFX8-NEXT: s_or_b32 s2, s2, s4 3968; GFX8-NEXT: v_and_b32_e32 v0, 15, v0 3969; GFX8-NEXT: s_lshr_b32 s0, s0, 1 3970; GFX8-NEXT: v_lshlrev_b16_e64 v2, v2, s2 3971; GFX8-NEXT: v_lshrrev_b16_e64 v0, v0, s0 3972; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 3973; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0 3974; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 3975; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 3976; GFX8-NEXT: ; return to shader part epilog 3977; 3978; GFX9-LABEL: v_fshr_v2i16_ssv: 3979; GFX9: ; %bb.0: 3980; GFX9-NEXT: s_lshr_b32 s2, s0, 16 3981; GFX9-NEXT: v_and_b32_e32 v1, 0xf000f, v0 3982; GFX9-NEXT: v_xor_b32_e32 v0, -1, v0 3983; GFX9-NEXT: s_lshl_b32 s0, s0, 0x10001 3984; GFX9-NEXT: s_lshl_b32 s2, s2, 1 3985; GFX9-NEXT: v_and_b32_e32 v0, 0xf000f, v0 3986; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2 3987; GFX9-NEXT: v_pk_lshlrev_b16 v0, v0, s0 3988; GFX9-NEXT: v_pk_lshrrev_b16 v1, v1, s1 3989; GFX9-NEXT: v_or_b32_e32 v0, v0, v1 3990; GFX9-NEXT: ; return to shader part epilog 3991; 3992; GFX10-LABEL: v_fshr_v2i16_ssv: 3993; GFX10: ; %bb.0: 3994; GFX10-NEXT: v_xor_b32_e32 v1, -1, v0 3995; GFX10-NEXT: s_lshr_b32 s2, s0, 16 3996; GFX10-NEXT: v_and_b32_e32 v0, 0xf000f, v0 3997; GFX10-NEXT: s_lshl_b32 s0, s0, 0x10001 3998; GFX10-NEXT: s_lshl_b32 s2, s2, 1 3999; GFX10-NEXT: v_and_b32_e32 v1, 0xf000f, v1 4000; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s2 4001; GFX10-NEXT: v_pk_lshrrev_b16 v0, v0, s1 4002; GFX10-NEXT: v_pk_lshlrev_b16 v1, v1, s0 4003; GFX10-NEXT: v_or_b32_e32 v0, v1, v0 4004; GFX10-NEXT: ; return to shader part epilog 4005; 4006; GFX11-LABEL: v_fshr_v2i16_ssv: 4007; GFX11: ; %bb.0: 4008; GFX11-NEXT: v_xor_b32_e32 v1, -1, v0 4009; GFX11-NEXT: s_lshr_b32 s2, s0, 16 4010; GFX11-NEXT: v_and_b32_e32 v0, 0xf000f, v0 4011; GFX11-NEXT: s_lshl_b32 s0, s0, 0x10001 4012; GFX11-NEXT: s_lshl_b32 s2, s2, 1 4013; GFX11-NEXT: v_and_b32_e32 v1, 0xf000f, v1 4014; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s2 4015; GFX11-NEXT: v_pk_lshrrev_b16 v0, v0, s1 4016; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 4017; GFX11-NEXT: v_pk_lshlrev_b16 v1, v1, s0 4018; GFX11-NEXT: v_or_b32_e32 v0, v1, v0 4019; GFX11-NEXT: ; return to shader part epilog 4020 %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) 4021 %cast = bitcast <2 x i16> %result to float 4022 ret float %cast 4023} 4024 4025define amdgpu_ps float @v_fshr_v2i16_svs(<2 x i16> inreg %lhs, <2 x i16> %rhs, <2 x i16> inreg %amt) { 4026; GFX6-LABEL: v_fshr_v2i16_svs: 4027; GFX6: ; %bb.0: 4028; GFX6-NEXT: v_bfe_u32 v2, v0, 1, 15 4029; GFX6-NEXT: s_lshl_b32 s3, s3, 16 4030; GFX6-NEXT: s_and_b32 s2, s2, 0xffff 4031; GFX6-NEXT: s_lshl_b32 s0, s0, 1 4032; GFX6-NEXT: v_lshrrev_b32_e32 v2, 14, v2 4033; GFX6-NEXT: v_bfe_u32 v3, v1, 1, 15 4034; GFX6-NEXT: s_or_b32 s2, s3, s2 4035; GFX6-NEXT: v_or_b32_e32 v2, s0, v2 4036; GFX6-NEXT: s_lshl_b32 s0, s1, 1 4037; GFX6-NEXT: v_lshrrev_b32_e32 v3, 14, v3 4038; GFX6-NEXT: v_or_b32_e32 v3, s0, v3 4039; GFX6-NEXT: s_xor_b32 s0, s2, -1 4040; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 4041; GFX6-NEXT: s_lshr_b32 s1, s0, 16 4042; GFX6-NEXT: s_and_b32 s2, s0, 15 4043; GFX6-NEXT: s_andn2_b32 s0, 15, s0 4044; GFX6-NEXT: v_bfe_u32 v0, v0, 1, 15 4045; GFX6-NEXT: s_and_b32 s0, 0xffff, s0 4046; GFX6-NEXT: s_and_b32 s2, 0xffff, s2 4047; GFX6-NEXT: v_lshrrev_b32_e32 v0, s0, v0 4048; GFX6-NEXT: s_and_b32 s0, s1, 15 4049; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v1 4050; GFX6-NEXT: v_lshlrev_b32_e32 v2, s2, v2 4051; GFX6-NEXT: s_andn2_b32 s1, 15, s1 4052; GFX6-NEXT: s_and_b32 s0, 0xffff, s0 4053; GFX6-NEXT: v_or_b32_e32 v0, v2, v0 4054; GFX6-NEXT: v_lshlrev_b32_e32 v2, s0, v3 4055; GFX6-NEXT: v_bfe_u32 v1, v1, 1, 15 4056; GFX6-NEXT: s_and_b32 s0, 0xffff, s1 4057; GFX6-NEXT: v_lshrrev_b32_e32 v1, s0, v1 4058; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 4059; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 4060; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 4061; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 4062; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 4063; GFX6-NEXT: ; return to shader part epilog 4064; 4065; GFX8-LABEL: v_fshr_v2i16_svs: 4066; GFX8: ; %bb.0: 4067; GFX8-NEXT: s_lshr_b32 s2, s0, 16 4068; GFX8-NEXT: s_lshl_b32 s0, s0, 1 4069; GFX8-NEXT: v_lshrrev_b16_e32 v1, 15, v0 4070; GFX8-NEXT: v_mov_b32_e32 v2, 15 4071; GFX8-NEXT: v_or_b32_e32 v1, s0, v1 4072; GFX8-NEXT: s_lshl_b32 s0, s2, 1 4073; GFX8-NEXT: v_lshrrev_b16_sdwa v2, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 4074; GFX8-NEXT: v_or_b32_e32 v2, s0, v2 4075; GFX8-NEXT: v_lshlrev_b16_e32 v3, 1, v0 4076; GFX8-NEXT: v_mov_b32_e32 v4, 1 4077; GFX8-NEXT: s_xor_b32 s0, s1, -1 4078; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 4079; GFX8-NEXT: s_lshr_b32 s1, s0, 16 4080; GFX8-NEXT: s_and_b32 s2, s0, 15 4081; GFX8-NEXT: s_andn2_b32 s0, 15, s0 4082; GFX8-NEXT: v_lshrrev_b16_e32 v3, 1, v3 4083; GFX8-NEXT: v_lshrrev_b16_e32 v3, s0, v3 4084; GFX8-NEXT: s_and_b32 s0, s1, 15 4085; GFX8-NEXT: s_andn2_b32 s1, 15, s1 4086; GFX8-NEXT: v_lshrrev_b16_e32 v0, 1, v0 4087; GFX8-NEXT: v_lshlrev_b16_e32 v2, s0, v2 4088; GFX8-NEXT: v_lshrrev_b16_e32 v0, s1, v0 4089; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 4090; GFX8-NEXT: v_lshlrev_b16_e32 v1, s2, v1 4091; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0 4092; GFX8-NEXT: v_or_b32_e32 v1, v1, v3 4093; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 4094; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 4095; GFX8-NEXT: ; return to shader part epilog 4096; 4097; GFX9-LABEL: v_fshr_v2i16_svs: 4098; GFX9: ; %bb.0: 4099; GFX9-NEXT: s_lshr_b32 s3, s0, 16 4100; GFX9-NEXT: s_lshl_b32 s0, s0, 0x10001 4101; GFX9-NEXT: s_lshl_b32 s3, s3, 1 4102; GFX9-NEXT: s_and_b32 s2, s1, 0xf000f 4103; GFX9-NEXT: s_andn2_b32 s1, 0xf000f, s1 4104; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s3 4105; GFX9-NEXT: s_lshr_b32 s3, s0, 16 4106; GFX9-NEXT: s_lshr_b32 s4, s1, 16 4107; GFX9-NEXT: s_lshl_b32 s0, s0, s1 4108; GFX9-NEXT: s_lshl_b32 s1, s3, s4 4109; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s1 4110; GFX9-NEXT: v_pk_lshrrev_b16 v0, s2, v0 4111; GFX9-NEXT: v_or_b32_e32 v0, s0, v0 4112; GFX9-NEXT: ; return to shader part epilog 4113; 4114; GFX10-LABEL: v_fshr_v2i16_svs: 4115; GFX10: ; %bb.0: 4116; GFX10-NEXT: s_lshr_b32 s2, s0, 16 4117; GFX10-NEXT: s_lshl_b32 s0, s0, 0x10001 4118; GFX10-NEXT: s_lshl_b32 s2, s2, 1 4119; GFX10-NEXT: s_and_b32 s3, s1, 0xf000f 4120; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s2 4121; GFX10-NEXT: s_andn2_b32 s1, 0xf000f, s1 4122; GFX10-NEXT: s_lshr_b32 s2, s0, 16 4123; GFX10-NEXT: s_lshr_b32 s4, s1, 16 4124; GFX10-NEXT: v_pk_lshrrev_b16 v0, s3, v0 4125; GFX10-NEXT: s_lshl_b32 s0, s0, s1 4126; GFX10-NEXT: s_lshl_b32 s1, s2, s4 4127; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s1 4128; GFX10-NEXT: v_or_b32_e32 v0, s0, v0 4129; GFX10-NEXT: ; return to shader part epilog 4130; 4131; GFX11-LABEL: v_fshr_v2i16_svs: 4132; GFX11: ; %bb.0: 4133; GFX11-NEXT: s_lshr_b32 s2, s0, 16 4134; GFX11-NEXT: s_lshl_b32 s0, s0, 0x10001 4135; GFX11-NEXT: s_lshl_b32 s2, s2, 1 4136; GFX11-NEXT: s_and_b32 s3, s1, 0xf000f 4137; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s2 4138; GFX11-NEXT: s_and_not1_b32 s1, 0xf000f, s1 4139; GFX11-NEXT: s_lshr_b32 s2, s0, 16 4140; GFX11-NEXT: s_lshr_b32 s4, s1, 16 4141; GFX11-NEXT: v_pk_lshrrev_b16 v0, s3, v0 4142; GFX11-NEXT: s_lshl_b32 s0, s0, s1 4143; GFX11-NEXT: s_lshl_b32 s1, s2, s4 4144; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 4145; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s1 4146; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 4147; GFX11-NEXT: v_or_b32_e32 v0, s0, v0 4148; GFX11-NEXT: ; return to shader part epilog 4149 %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) 4150 %cast = bitcast <2 x i16> %result to float 4151 ret float %cast 4152} 4153 4154define amdgpu_ps float @v_fshr_v2i16_vss(<2 x i16> %lhs, <2 x i16> inreg %rhs, <2 x i16> inreg %amt) { 4155; GFX6-LABEL: v_fshr_v2i16_vss: 4156; GFX6: ; %bb.0: 4157; GFX6-NEXT: s_lshl_b32 s3, s3, 16 4158; GFX6-NEXT: s_and_b32 s2, s2, 0xffff 4159; GFX6-NEXT: s_or_b32 s2, s3, s2 4160; GFX6-NEXT: s_bfe_u32 s3, s0, 0xf0001 4161; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 4162; GFX6-NEXT: s_lshr_b32 s3, s3, 14 4163; GFX6-NEXT: v_or_b32_e32 v0, s3, v0 4164; GFX6-NEXT: s_bfe_u32 s3, s1, 0xf0001 4165; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v1 4166; GFX6-NEXT: s_lshr_b32 s3, s3, 14 4167; GFX6-NEXT: s_xor_b32 s2, s2, -1 4168; GFX6-NEXT: v_or_b32_e32 v1, s3, v1 4169; GFX6-NEXT: s_lshl_b32 s0, s0, 1 4170; GFX6-NEXT: s_lshr_b32 s3, s2, 16 4171; GFX6-NEXT: s_and_b32 s4, s2, 15 4172; GFX6-NEXT: s_andn2_b32 s2, 15, s2 4173; GFX6-NEXT: s_and_b32 s4, 0xffff, s4 4174; GFX6-NEXT: s_bfe_u32 s0, s0, 0xf0001 4175; GFX6-NEXT: s_and_b32 s2, 0xffff, s2 4176; GFX6-NEXT: v_lshlrev_b32_e32 v0, s4, v0 4177; GFX6-NEXT: s_lshr_b32 s0, s0, s2 4178; GFX6-NEXT: v_or_b32_e32 v0, s0, v0 4179; GFX6-NEXT: s_and_b32 s0, s3, 15 4180; GFX6-NEXT: s_lshl_b32 s1, s1, 1 4181; GFX6-NEXT: s_andn2_b32 s2, 15, s3 4182; GFX6-NEXT: s_and_b32 s0, 0xffff, s0 4183; GFX6-NEXT: v_lshlrev_b32_e32 v1, s0, v1 4184; GFX6-NEXT: s_bfe_u32 s0, s1, 0xf0001 4185; GFX6-NEXT: s_and_b32 s1, 0xffff, s2 4186; GFX6-NEXT: s_lshr_b32 s0, s0, s1 4187; GFX6-NEXT: v_or_b32_e32 v1, s0, v1 4188; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 4189; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 4190; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 4191; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 4192; GFX6-NEXT: ; return to shader part epilog 4193; 4194; GFX8-LABEL: v_fshr_v2i16_vss: 4195; GFX8: ; %bb.0: 4196; GFX8-NEXT: s_and_b32 s3, 0xffff, s0 4197; GFX8-NEXT: s_lshr_b32 s2, s0, 16 4198; GFX8-NEXT: v_lshlrev_b16_e32 v1, 1, v0 4199; GFX8-NEXT: s_lshr_b32 s3, s3, 15 4200; GFX8-NEXT: v_mov_b32_e32 v2, 1 4201; GFX8-NEXT: v_or_b32_e32 v1, s3, v1 4202; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 4203; GFX8-NEXT: s_lshr_b32 s3, s2, 15 4204; GFX8-NEXT: s_lshl_b32 s0, s0, 1 4205; GFX8-NEXT: s_xor_b32 s1, s1, -1 4206; GFX8-NEXT: v_or_b32_e32 v0, s3, v0 4207; GFX8-NEXT: s_lshr_b32 s3, s1, 16 4208; GFX8-NEXT: s_and_b32 s4, s1, 15 4209; GFX8-NEXT: s_andn2_b32 s1, 15, s1 4210; GFX8-NEXT: s_and_b32 s0, 0xffff, s0 4211; GFX8-NEXT: s_lshr_b32 s0, s0, 1 4212; GFX8-NEXT: s_and_b32 s1, 0xffff, s1 4213; GFX8-NEXT: v_lshlrev_b16_e32 v1, s4, v1 4214; GFX8-NEXT: s_lshr_b32 s0, s0, s1 4215; GFX8-NEXT: s_lshl_b32 s2, s2, 1 4216; GFX8-NEXT: v_or_b32_e32 v1, s0, v1 4217; GFX8-NEXT: s_and_b32 s0, s3, 15 4218; GFX8-NEXT: s_andn2_b32 s1, 15, s3 4219; GFX8-NEXT: v_lshlrev_b16_e32 v0, s0, v0 4220; GFX8-NEXT: s_and_b32 s0, 0xffff, s2 4221; GFX8-NEXT: s_lshr_b32 s0, s0, 1 4222; GFX8-NEXT: s_and_b32 s1, 0xffff, s1 4223; GFX8-NEXT: s_lshr_b32 s0, s0, s1 4224; GFX8-NEXT: v_or_b32_e32 v0, s0, v0 4225; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0 4226; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 4227; GFX8-NEXT: v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 4228; GFX8-NEXT: ; return to shader part epilog 4229; 4230; GFX9-LABEL: v_fshr_v2i16_vss: 4231; GFX9: ; %bb.0: 4232; GFX9-NEXT: s_and_b32 s2, s1, 0xf000f 4233; GFX9-NEXT: s_andn2_b32 s1, 0xf000f, s1 4234; GFX9-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1] 4235; GFX9-NEXT: v_pk_lshlrev_b16 v0, s1, v0 4236; GFX9-NEXT: s_lshr_b32 s1, s0, 16 4237; GFX9-NEXT: s_and_b32 s0, s0, 0xffff 4238; GFX9-NEXT: s_lshr_b32 s3, s2, 16 4239; GFX9-NEXT: s_lshr_b32 s0, s0, s2 4240; GFX9-NEXT: s_lshr_b32 s1, s1, s3 4241; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s1 4242; GFX9-NEXT: v_or_b32_e32 v0, s0, v0 4243; GFX9-NEXT: ; return to shader part epilog 4244; 4245; GFX10-LABEL: v_fshr_v2i16_vss: 4246; GFX10: ; %bb.0: 4247; GFX10-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1] 4248; GFX10-NEXT: s_and_b32 s2, s1, 0xf000f 4249; GFX10-NEXT: s_andn2_b32 s1, 0xf000f, s1 4250; GFX10-NEXT: s_lshr_b32 s3, s0, 16 4251; GFX10-NEXT: s_and_b32 s0, s0, 0xffff 4252; GFX10-NEXT: s_lshr_b32 s4, s2, 16 4253; GFX10-NEXT: v_pk_lshlrev_b16 v0, s1, v0 4254; GFX10-NEXT: s_lshr_b32 s0, s0, s2 4255; GFX10-NEXT: s_lshr_b32 s1, s3, s4 4256; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s1 4257; GFX10-NEXT: v_or_b32_e32 v0, s0, v0 4258; GFX10-NEXT: ; return to shader part epilog 4259; 4260; GFX11-LABEL: v_fshr_v2i16_vss: 4261; GFX11: ; %bb.0: 4262; GFX11-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1] 4263; GFX11-NEXT: s_and_b32 s2, s1, 0xf000f 4264; GFX11-NEXT: s_and_not1_b32 s1, 0xf000f, s1 4265; GFX11-NEXT: s_lshr_b32 s3, s0, 16 4266; GFX11-NEXT: s_and_b32 s0, s0, 0xffff 4267; GFX11-NEXT: s_lshr_b32 s4, s2, 16 4268; GFX11-NEXT: v_pk_lshlrev_b16 v0, s1, v0 4269; GFX11-NEXT: s_lshr_b32 s0, s0, s2 4270; GFX11-NEXT: s_lshr_b32 s1, s3, s4 4271; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 4272; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s1 4273; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 4274; GFX11-NEXT: v_or_b32_e32 v0, s0, v0 4275; GFX11-NEXT: ; return to shader part epilog 4276 %result = call <2 x i16> @llvm.fshr.v2i16(<2 x i16> %lhs, <2 x i16> %rhs, <2 x i16> %amt) 4277 %cast = bitcast <2 x i16> %result to float 4278 ret float %cast 4279} 4280 4281define amdgpu_ps i48 @s_fshr_v3i16(<3 x i16> inreg %lhs, <3 x i16> inreg %rhs, <3 x i16> inreg %amt) { 4282; GFX6-LABEL: s_fshr_v3i16: 4283; GFX6: ; %bb.0: 4284; GFX6-NEXT: s_and_b32 s7, s7, 0xffff 4285; GFX6-NEXT: s_and_b32 s6, s6, 0xffff 4286; GFX6-NEXT: s_lshl_b32 s7, s7, 16 4287; GFX6-NEXT: s_or_b32 s6, s6, s7 4288; GFX6-NEXT: s_and_b32 s7, s8, 0xffff 4289; GFX6-NEXT: s_bfe_u32 s8, s3, 0xf0001 4290; GFX6-NEXT: s_lshl_b32 s0, s0, 1 4291; GFX6-NEXT: s_lshr_b32 s8, s8, 14 4292; GFX6-NEXT: s_or_b32 s0, s0, s8 4293; GFX6-NEXT: s_bfe_u32 s8, s4, 0xf0001 4294; GFX6-NEXT: s_lshl_b32 s1, s1, 1 4295; GFX6-NEXT: s_lshr_b32 s8, s8, 14 4296; GFX6-NEXT: s_xor_b32 s6, s6, -1 4297; GFX6-NEXT: s_or_b32 s1, s1, s8 4298; GFX6-NEXT: s_lshl_b32 s3, s3, 1 4299; GFX6-NEXT: s_lshr_b32 s8, s6, 16 4300; GFX6-NEXT: s_and_b32 s9, s6, 15 4301; GFX6-NEXT: s_andn2_b32 s6, 15, s6 4302; GFX6-NEXT: s_and_b32 s9, 0xffff, s9 4303; GFX6-NEXT: s_bfe_u32 s3, s3, 0xf0001 4304; GFX6-NEXT: s_and_b32 s6, 0xffff, s6 4305; GFX6-NEXT: s_lshl_b32 s0, s0, s9 4306; GFX6-NEXT: s_lshr_b32 s3, s3, s6 4307; GFX6-NEXT: s_or_b32 s0, s0, s3 4308; GFX6-NEXT: s_and_b32 s3, s8, 15 4309; GFX6-NEXT: s_lshl_b32 s4, s4, 1 4310; GFX6-NEXT: s_andn2_b32 s6, 15, s8 4311; GFX6-NEXT: s_and_b32 s3, 0xffff, s3 4312; GFX6-NEXT: s_lshl_b32 s1, s1, s3 4313; GFX6-NEXT: s_bfe_u32 s3, s4, 0xf0001 4314; GFX6-NEXT: s_and_b32 s4, 0xffff, s6 4315; GFX6-NEXT: s_lshr_b32 s3, s3, s4 4316; GFX6-NEXT: s_or_b32 s1, s1, s3 4317; GFX6-NEXT: s_bfe_u32 s3, s5, 0xf0001 4318; GFX6-NEXT: s_lshl_b32 s2, s2, 1 4319; GFX6-NEXT: s_lshr_b32 s3, s3, 14 4320; GFX6-NEXT: s_xor_b32 s4, s7, -1 4321; GFX6-NEXT: s_or_b32 s2, s2, s3 4322; GFX6-NEXT: s_lshl_b32 s3, s5, 1 4323; GFX6-NEXT: s_and_b32 s5, s4, 15 4324; GFX6-NEXT: s_andn2_b32 s4, 15, s4 4325; GFX6-NEXT: s_and_b32 s5, 0xffff, s5 4326; GFX6-NEXT: s_bfe_u32 s3, s3, 0xf0001 4327; GFX6-NEXT: s_and_b32 s4, 0xffff, s4 4328; GFX6-NEXT: s_lshl_b32 s2, s2, s5 4329; GFX6-NEXT: s_lshr_b32 s3, s3, s4 4330; GFX6-NEXT: s_and_b32 s1, 0xffff, s1 4331; GFX6-NEXT: s_or_b32 s2, s2, s3 4332; GFX6-NEXT: s_and_b32 s0, 0xffff, s0 4333; GFX6-NEXT: s_lshl_b32 s1, s1, 16 4334; GFX6-NEXT: s_or_b32 s0, s0, s1 4335; GFX6-NEXT: s_and_b32 s1, 0xffff, s2 4336; GFX6-NEXT: ; return to shader part epilog 4337; 4338; GFX8-LABEL: s_fshr_v3i16: 4339; GFX8: ; %bb.0: 4340; GFX8-NEXT: s_and_b32 s8, 0xffff, s2 4341; GFX8-NEXT: s_lshr_b32 s6, s0, 16 4342; GFX8-NEXT: s_lshr_b32 s7, s2, 16 4343; GFX8-NEXT: s_lshl_b32 s0, s0, 1 4344; GFX8-NEXT: s_lshr_b32 s8, s8, 15 4345; GFX8-NEXT: s_or_b32 s0, s0, s8 4346; GFX8-NEXT: s_lshl_b32 s6, s6, 1 4347; GFX8-NEXT: s_lshr_b32 s8, s7, 15 4348; GFX8-NEXT: s_lshl_b32 s2, s2, 1 4349; GFX8-NEXT: s_xor_b32 s4, s4, -1 4350; GFX8-NEXT: s_or_b32 s6, s6, s8 4351; GFX8-NEXT: s_lshr_b32 s8, s4, 16 4352; GFX8-NEXT: s_and_b32 s9, s4, 15 4353; GFX8-NEXT: s_andn2_b32 s4, 15, s4 4354; GFX8-NEXT: s_and_b32 s2, 0xffff, s2 4355; GFX8-NEXT: s_and_b32 s9, 0xffff, s9 4356; GFX8-NEXT: s_lshr_b32 s2, s2, 1 4357; GFX8-NEXT: s_and_b32 s4, 0xffff, s4 4358; GFX8-NEXT: s_lshl_b32 s0, s0, s9 4359; GFX8-NEXT: s_lshr_b32 s2, s2, s4 4360; GFX8-NEXT: s_or_b32 s0, s0, s2 4361; GFX8-NEXT: s_and_b32 s2, s8, 15 4362; GFX8-NEXT: s_lshl_b32 s7, s7, 1 4363; GFX8-NEXT: s_and_b32 s2, 0xffff, s2 4364; GFX8-NEXT: s_andn2_b32 s4, 15, s8 4365; GFX8-NEXT: s_lshl_b32 s2, s6, s2 4366; GFX8-NEXT: s_and_b32 s6, 0xffff, s7 4367; GFX8-NEXT: s_lshr_b32 s6, s6, 1 4368; GFX8-NEXT: s_and_b32 s4, 0xffff, s4 4369; GFX8-NEXT: s_lshr_b32 s4, s6, s4 4370; GFX8-NEXT: s_or_b32 s2, s2, s4 4371; GFX8-NEXT: s_and_b32 s4, 0xffff, s3 4372; GFX8-NEXT: s_lshl_b32 s1, s1, 1 4373; GFX8-NEXT: s_lshr_b32 s4, s4, 15 4374; GFX8-NEXT: s_or_b32 s1, s1, s4 4375; GFX8-NEXT: s_lshl_b32 s3, s3, 1 4376; GFX8-NEXT: s_xor_b32 s4, s5, -1 4377; GFX8-NEXT: s_and_b32 s5, s4, 15 4378; GFX8-NEXT: s_andn2_b32 s4, 15, s4 4379; GFX8-NEXT: s_and_b32 s3, 0xffff, s3 4380; GFX8-NEXT: s_and_b32 s5, 0xffff, s5 4381; GFX8-NEXT: s_lshr_b32 s3, s3, 1 4382; GFX8-NEXT: s_and_b32 s4, 0xffff, s4 4383; GFX8-NEXT: s_lshl_b32 s1, s1, s5 4384; GFX8-NEXT: s_lshr_b32 s3, s3, s4 4385; GFX8-NEXT: s_and_b32 s2, 0xffff, s2 4386; GFX8-NEXT: s_or_b32 s1, s1, s3 4387; GFX8-NEXT: s_and_b32 s0, 0xffff, s0 4388; GFX8-NEXT: s_lshl_b32 s2, s2, 16 4389; GFX8-NEXT: s_or_b32 s0, s0, s2 4390; GFX8-NEXT: s_and_b32 s1, 0xffff, s1 4391; GFX8-NEXT: ; return to shader part epilog 4392; 4393; GFX9-LABEL: s_fshr_v3i16: 4394; GFX9: ; %bb.0: 4395; GFX9-NEXT: s_lshr_b32 s7, s0, 16 4396; GFX9-NEXT: s_lshl_b32 s0, s0, 0x10001 4397; GFX9-NEXT: s_lshl_b32 s7, s7, 1 4398; GFX9-NEXT: s_and_b32 s6, s4, 0xf000f 4399; GFX9-NEXT: s_andn2_b32 s4, 0xf000f, s4 4400; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s7 4401; GFX9-NEXT: s_lshr_b32 s7, s0, 16 4402; GFX9-NEXT: s_lshr_b32 s8, s4, 16 4403; GFX9-NEXT: s_lshl_b32 s0, s0, s4 4404; GFX9-NEXT: s_lshl_b32 s4, s7, s8 4405; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s4 4406; GFX9-NEXT: s_lshr_b32 s4, s2, 16 4407; GFX9-NEXT: s_and_b32 s2, s2, 0xffff 4408; GFX9-NEXT: s_lshr_b32 s7, s6, 16 4409; GFX9-NEXT: s_lshr_b32 s2, s2, s6 4410; GFX9-NEXT: s_lshr_b32 s4, s4, s7 4411; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s4 4412; GFX9-NEXT: s_or_b32 s0, s0, s2 4413; GFX9-NEXT: s_and_b32 s2, s5, 0xf000f 4414; GFX9-NEXT: s_andn2_b32 s4, 0xf000f, s5 4415; GFX9-NEXT: s_lshr_b32 s5, s1, 16 4416; GFX9-NEXT: s_lshl_b32 s1, s1, 0x10001 4417; GFX9-NEXT: s_lshl_b32 s5, s5, 1 4418; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s5 4419; GFX9-NEXT: s_lshr_b32 s5, s1, 16 4420; GFX9-NEXT: s_lshr_b32 s6, s4, 16 4421; GFX9-NEXT: s_lshl_b32 s1, s1, s4 4422; GFX9-NEXT: s_lshl_b32 s4, s5, s6 4423; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s4 4424; GFX9-NEXT: s_lshr_b32 s4, s3, 16 4425; GFX9-NEXT: s_and_b32 s3, s3, 0xffff 4426; GFX9-NEXT: s_lshr_b32 s5, s2, 16 4427; GFX9-NEXT: s_lshr_b32 s2, s3, s2 4428; GFX9-NEXT: s_lshr_b32 s3, s4, s5 4429; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s3 4430; GFX9-NEXT: s_or_b32 s1, s1, s2 4431; GFX9-NEXT: s_lshr_b32 s2, s0, 16 4432; GFX9-NEXT: s_and_b32 s0, s0, 0xffff 4433; GFX9-NEXT: s_lshl_b32 s2, s2, 16 4434; GFX9-NEXT: s_or_b32 s0, s0, s2 4435; GFX9-NEXT: s_and_b32 s1, s1, 0xffff 4436; GFX9-NEXT: ; return to shader part epilog 4437; 4438; GFX10-LABEL: s_fshr_v3i16: 4439; GFX10: ; %bb.0: 4440; GFX10-NEXT: s_lshr_b32 s6, s0, 16 4441; GFX10-NEXT: s_lshl_b32 s0, s0, 0x10001 4442; GFX10-NEXT: s_lshl_b32 s6, s6, 1 4443; GFX10-NEXT: s_and_b32 s7, s4, 0xf000f 4444; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s6 4445; GFX10-NEXT: s_andn2_b32 s4, 0xf000f, s4 4446; GFX10-NEXT: s_lshr_b32 s6, s0, 16 4447; GFX10-NEXT: s_lshr_b32 s8, s4, 16 4448; GFX10-NEXT: s_lshl_b32 s0, s0, s4 4449; GFX10-NEXT: s_lshl_b32 s4, s6, s8 4450; GFX10-NEXT: s_lshr_b32 s6, s2, 16 4451; GFX10-NEXT: s_and_b32 s2, s2, 0xffff 4452; GFX10-NEXT: s_lshr_b32 s8, s7, 16 4453; GFX10-NEXT: s_lshr_b32 s2, s2, s7 4454; GFX10-NEXT: s_lshr_b32 s6, s6, s8 4455; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s4 4456; GFX10-NEXT: s_pack_ll_b32_b16 s2, s2, s6 4457; GFX10-NEXT: s_and_b32 s4, s5, 0xf000f 4458; GFX10-NEXT: s_or_b32 s0, s0, s2 4459; GFX10-NEXT: s_lshr_b32 s2, s1, 16 4460; GFX10-NEXT: s_lshl_b32 s1, s1, 0x10001 4461; GFX10-NEXT: s_lshl_b32 s2, s2, 1 4462; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s2 4463; GFX10-NEXT: s_andn2_b32 s2, 0xf000f, s5 4464; GFX10-NEXT: s_lshr_b32 s5, s1, 16 4465; GFX10-NEXT: s_lshr_b32 s6, s2, 16 4466; GFX10-NEXT: s_lshl_b32 s1, s1, s2 4467; GFX10-NEXT: s_lshl_b32 s2, s5, s6 4468; GFX10-NEXT: s_lshr_b32 s5, s3, 16 4469; GFX10-NEXT: s_and_b32 s3, s3, 0xffff 4470; GFX10-NEXT: s_lshr_b32 s6, s4, 16 4471; GFX10-NEXT: s_lshr_b32 s3, s3, s4 4472; GFX10-NEXT: s_lshr_b32 s4, s5, s6 4473; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s2 4474; GFX10-NEXT: s_pack_ll_b32_b16 s2, s3, s4 4475; GFX10-NEXT: s_lshr_b32 s3, s0, 16 4476; GFX10-NEXT: s_and_b32 s0, s0, 0xffff 4477; GFX10-NEXT: s_lshl_b32 s3, s3, 16 4478; GFX10-NEXT: s_or_b32 s1, s1, s2 4479; GFX10-NEXT: s_or_b32 s0, s0, s3 4480; GFX10-NEXT: s_and_b32 s1, s1, 0xffff 4481; GFX10-NEXT: ; return to shader part epilog 4482; 4483; GFX11-LABEL: s_fshr_v3i16: 4484; GFX11: ; %bb.0: 4485; GFX11-NEXT: s_lshr_b32 s6, s0, 16 4486; GFX11-NEXT: s_lshl_b32 s0, s0, 0x10001 4487; GFX11-NEXT: s_lshl_b32 s6, s6, 1 4488; GFX11-NEXT: s_and_b32 s7, s4, 0xf000f 4489; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s6 4490; GFX11-NEXT: s_and_not1_b32 s4, 0xf000f, s4 4491; GFX11-NEXT: s_lshr_b32 s6, s0, 16 4492; GFX11-NEXT: s_lshr_b32 s8, s4, 16 4493; GFX11-NEXT: s_lshl_b32 s0, s0, s4 4494; GFX11-NEXT: s_lshl_b32 s4, s6, s8 4495; GFX11-NEXT: s_lshr_b32 s6, s2, 16 4496; GFX11-NEXT: s_and_b32 s2, s2, 0xffff 4497; GFX11-NEXT: s_lshr_b32 s8, s7, 16 4498; GFX11-NEXT: s_lshr_b32 s2, s2, s7 4499; GFX11-NEXT: s_lshr_b32 s6, s6, s8 4500; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s4 4501; GFX11-NEXT: s_pack_ll_b32_b16 s2, s2, s6 4502; GFX11-NEXT: s_and_b32 s4, s5, 0xf000f 4503; GFX11-NEXT: s_or_b32 s0, s0, s2 4504; GFX11-NEXT: s_lshr_b32 s2, s1, 16 4505; GFX11-NEXT: s_lshl_b32 s1, s1, 0x10001 4506; GFX11-NEXT: s_lshl_b32 s2, s2, 1 4507; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 4508; GFX11-NEXT: s_pack_ll_b32_b16 s1, s1, s2 4509; GFX11-NEXT: s_and_not1_b32 s2, 0xf000f, s5 4510; GFX11-NEXT: s_lshr_b32 s5, s1, 16 4511; GFX11-NEXT: s_lshr_b32 s6, s2, 16 4512; GFX11-NEXT: s_lshl_b32 s1, s1, s2 4513; GFX11-NEXT: s_lshl_b32 s2, s5, s6 4514; GFX11-NEXT: s_lshr_b32 s5, s3, 16 4515; GFX11-NEXT: s_and_b32 s3, s3, 0xffff 4516; GFX11-NEXT: s_lshr_b32 s6, s4, 16 4517; GFX11-NEXT: s_lshr_b32 s3, s3, s4 4518; GFX11-NEXT: s_lshr_b32 s4, s5, s6 4519; GFX11-NEXT: s_pack_ll_b32_b16 s1, s1, s2 4520; GFX11-NEXT: s_pack_ll_b32_b16 s2, s3, s4 4521; GFX11-NEXT: s_lshr_b32 s3, s0, 16 4522; GFX11-NEXT: s_and_b32 s0, s0, 0xffff 4523; GFX11-NEXT: s_lshl_b32 s3, s3, 16 4524; GFX11-NEXT: s_or_b32 s1, s1, s2 4525; GFX11-NEXT: s_or_b32 s0, s0, s3 4526; GFX11-NEXT: s_and_b32 s1, s1, 0xffff 4527; GFX11-NEXT: ; return to shader part epilog 4528 %result = call <3 x i16> @llvm.fshr.v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt) 4529 %cast = bitcast <3 x i16> %result to i48 4530 ret i48 %cast 4531} 4532 4533define <3 x half> @v_fshr_v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt) { 4534; GFX6-LABEL: v_fshr_v3i16: 4535; GFX6: ; %bb.0: 4536; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4537; GFX6-NEXT: v_and_b32_e32 v7, 0xffff, v7 4538; GFX6-NEXT: v_and_b32_e32 v6, 0xffff, v6 4539; GFX6-NEXT: v_lshlrev_b32_e32 v7, 16, v7 4540; GFX6-NEXT: v_or_b32_e32 v6, v6, v7 4541; GFX6-NEXT: v_and_b32_e32 v7, 0xffff, v8 4542; GFX6-NEXT: v_bfe_u32 v8, v3, 1, 15 4543; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 4544; GFX6-NEXT: v_lshrrev_b32_e32 v8, 14, v8 4545; GFX6-NEXT: v_or_b32_e32 v0, v0, v8 4546; GFX6-NEXT: v_bfe_u32 v8, v4, 1, 15 4547; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v1 4548; GFX6-NEXT: v_lshrrev_b32_e32 v8, 14, v8 4549; GFX6-NEXT: v_xor_b32_e32 v6, -1, v6 4550; GFX6-NEXT: v_or_b32_e32 v1, v1, v8 4551; GFX6-NEXT: v_lshrrev_b32_e32 v8, 16, v6 4552; GFX6-NEXT: v_and_b32_e32 v9, 15, v6 4553; GFX6-NEXT: v_xor_b32_e32 v6, -1, v6 4554; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v3 4555; GFX6-NEXT: v_and_b32_e32 v6, 15, v6 4556; GFX6-NEXT: v_and_b32_e32 v9, 0xffff, v9 4557; GFX6-NEXT: v_bfe_u32 v3, v3, 1, 15 4558; GFX6-NEXT: v_and_b32_e32 v6, 0xffff, v6 4559; GFX6-NEXT: v_lshlrev_b32_e32 v0, v9, v0 4560; GFX6-NEXT: v_lshrrev_b32_e32 v3, v6, v3 4561; GFX6-NEXT: v_or_b32_e32 v0, v0, v3 4562; GFX6-NEXT: v_and_b32_e32 v3, 15, v8 4563; GFX6-NEXT: v_xor_b32_e32 v6, -1, v8 4564; GFX6-NEXT: v_lshlrev_b32_e32 v4, 1, v4 4565; GFX6-NEXT: v_and_b32_e32 v6, 15, v6 4566; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v3 4567; GFX6-NEXT: v_lshlrev_b32_e32 v1, v3, v1 4568; GFX6-NEXT: v_bfe_u32 v3, v4, 1, 15 4569; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v6 4570; GFX6-NEXT: v_lshrrev_b32_e32 v3, v4, v3 4571; GFX6-NEXT: v_or_b32_e32 v1, v1, v3 4572; GFX6-NEXT: v_bfe_u32 v3, v5, 1, 15 4573; GFX6-NEXT: v_lshlrev_b32_e32 v2, 1, v2 4574; GFX6-NEXT: v_lshrrev_b32_e32 v3, 14, v3 4575; GFX6-NEXT: v_xor_b32_e32 v4, -1, v7 4576; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 4577; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v5 4578; GFX6-NEXT: v_and_b32_e32 v5, 15, v4 4579; GFX6-NEXT: v_xor_b32_e32 v4, -1, v4 4580; GFX6-NEXT: v_and_b32_e32 v4, 15, v4 4581; GFX6-NEXT: v_and_b32_e32 v5, 0xffff, v5 4582; GFX6-NEXT: v_bfe_u32 v3, v3, 1, 15 4583; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4 4584; GFX6-NEXT: v_lshlrev_b32_e32 v2, v5, v2 4585; GFX6-NEXT: v_lshrrev_b32_e32 v3, v4, v3 4586; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 4587; GFX6-NEXT: s_setpc_b64 s[30:31] 4588; 4589; GFX8-LABEL: v_fshr_v3i16: 4590; GFX8: ; %bb.0: 4591; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4592; GFX8-NEXT: v_lshlrev_b16_e32 v6, 1, v0 4593; GFX8-NEXT: v_lshrrev_b16_e32 v7, 15, v2 4594; GFX8-NEXT: v_or_b32_e32 v6, v6, v7 4595; GFX8-NEXT: v_mov_b32_e32 v7, 1 4596; GFX8-NEXT: v_mov_b32_e32 v8, 15 4597; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 4598; GFX8-NEXT: v_lshrrev_b16_sdwa v9, v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 4599; GFX8-NEXT: v_xor_b32_e32 v4, -1, v4 4600; GFX8-NEXT: v_or_b32_e32 v0, v0, v9 4601; GFX8-NEXT: v_lshlrev_b16_e32 v9, 1, v2 4602; GFX8-NEXT: v_lshlrev_b16_sdwa v2, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 4603; GFX8-NEXT: v_and_b32_e32 v7, 15, v4 4604; GFX8-NEXT: v_xor_b32_e32 v10, -1, v4 4605; GFX8-NEXT: v_and_b32_e32 v10, 15, v10 4606; GFX8-NEXT: v_lshlrev_b16_e32 v6, v7, v6 4607; GFX8-NEXT: v_lshrrev_b16_e32 v7, 1, v9 4608; GFX8-NEXT: v_lshrrev_b16_e32 v7, v10, v7 4609; GFX8-NEXT: v_or_b32_e32 v6, v6, v7 4610; GFX8-NEXT: v_and_b32_sdwa v7, v4, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 4611; GFX8-NEXT: v_mov_b32_e32 v8, -1 4612; GFX8-NEXT: v_xor_b32_sdwa v4, v4, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 4613; GFX8-NEXT: v_and_b32_e32 v4, 15, v4 4614; GFX8-NEXT: v_lshrrev_b16_e32 v2, 1, v2 4615; GFX8-NEXT: v_lshlrev_b16_e32 v0, v7, v0 4616; GFX8-NEXT: v_lshrrev_b16_e32 v2, v4, v2 4617; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 4618; GFX8-NEXT: v_lshlrev_b16_e32 v1, 1, v1 4619; GFX8-NEXT: v_lshrrev_b16_e32 v2, 15, v3 4620; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 4621; GFX8-NEXT: v_lshlrev_b16_e32 v2, 1, v3 4622; GFX8-NEXT: v_xor_b32_e32 v3, -1, v5 4623; GFX8-NEXT: v_and_b32_e32 v4, 15, v3 4624; GFX8-NEXT: v_xor_b32_e32 v3, -1, v3 4625; GFX8-NEXT: v_and_b32_e32 v3, 15, v3 4626; GFX8-NEXT: v_lshrrev_b16_e32 v2, 1, v2 4627; GFX8-NEXT: v_lshlrev_b16_e32 v1, v4, v1 4628; GFX8-NEXT: v_lshrrev_b16_e32 v2, v3, v2 4629; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0 4630; GFX8-NEXT: v_or_b32_e32 v1, v1, v2 4631; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 4632; GFX8-NEXT: v_or_b32_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 4633; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v1 4634; GFX8-NEXT: s_setpc_b64 s[30:31] 4635; 4636; GFX9-LABEL: v_fshr_v3i16: 4637; GFX9: ; %bb.0: 4638; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4639; GFX9-NEXT: v_and_b32_e32 v6, 0xf000f, v4 4640; GFX9-NEXT: v_xor_b32_e32 v4, -1, v4 4641; GFX9-NEXT: v_and_b32_e32 v4, 0xf000f, v4 4642; GFX9-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1] 4643; GFX9-NEXT: v_pk_lshlrev_b16 v0, v4, v0 4644; GFX9-NEXT: v_pk_lshrrev_b16 v2, v6, v2 4645; GFX9-NEXT: v_xor_b32_e32 v4, -1, v5 4646; GFX9-NEXT: v_or_b32_e32 v0, v0, v2 4647; GFX9-NEXT: v_and_b32_e32 v2, 0xf000f, v5 4648; GFX9-NEXT: v_and_b32_e32 v4, 0xf000f, v4 4649; GFX9-NEXT: v_pk_lshlrev_b16 v1, 1, v1 op_sel_hi:[0,1] 4650; GFX9-NEXT: v_pk_lshlrev_b16 v1, v4, v1 4651; GFX9-NEXT: v_pk_lshrrev_b16 v2, v2, v3 4652; GFX9-NEXT: v_or_b32_e32 v1, v1, v2 4653; GFX9-NEXT: s_setpc_b64 s[30:31] 4654; 4655; GFX10-LABEL: v_fshr_v3i16: 4656; GFX10: ; %bb.0: 4657; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4658; GFX10-NEXT: v_xor_b32_e32 v6, -1, v4 4659; GFX10-NEXT: v_xor_b32_e32 v7, -1, v5 4660; GFX10-NEXT: v_and_b32_e32 v4, 0xf000f, v4 4661; GFX10-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1] 4662; GFX10-NEXT: v_and_b32_e32 v5, 0xf000f, v5 4663; GFX10-NEXT: v_and_b32_e32 v6, 0xf000f, v6 4664; GFX10-NEXT: v_pk_lshlrev_b16 v1, 1, v1 op_sel_hi:[0,1] 4665; GFX10-NEXT: v_and_b32_e32 v7, 0xf000f, v7 4666; GFX10-NEXT: v_pk_lshrrev_b16 v2, v4, v2 4667; GFX10-NEXT: v_pk_lshrrev_b16 v3, v5, v3 4668; GFX10-NEXT: v_pk_lshlrev_b16 v0, v6, v0 4669; GFX10-NEXT: v_pk_lshlrev_b16 v1, v7, v1 4670; GFX10-NEXT: v_or_b32_e32 v0, v0, v2 4671; GFX10-NEXT: v_or_b32_e32 v1, v1, v3 4672; GFX10-NEXT: s_setpc_b64 s[30:31] 4673; 4674; GFX11-LABEL: v_fshr_v3i16: 4675; GFX11: ; %bb.0: 4676; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4677; GFX11-NEXT: v_xor_b32_e32 v6, -1, v4 4678; GFX11-NEXT: v_xor_b32_e32 v7, -1, v5 4679; GFX11-NEXT: v_and_b32_e32 v4, 0xf000f, v4 4680; GFX11-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1] 4681; GFX11-NEXT: v_and_b32_e32 v5, 0xf000f, v5 4682; GFX11-NEXT: v_and_b32_e32 v6, 0xf000f, v6 4683; GFX11-NEXT: v_pk_lshlrev_b16 v1, 1, v1 op_sel_hi:[0,1] 4684; GFX11-NEXT: v_and_b32_e32 v7, 0xf000f, v7 4685; GFX11-NEXT: v_pk_lshrrev_b16 v2, v4, v2 4686; GFX11-NEXT: v_pk_lshrrev_b16 v3, v5, v3 4687; GFX11-NEXT: v_pk_lshlrev_b16 v0, v6, v0 4688; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) 4689; GFX11-NEXT: v_pk_lshlrev_b16 v1, v7, v1 4690; GFX11-NEXT: v_or_b32_e32 v0, v0, v2 4691; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 4692; GFX11-NEXT: v_or_b32_e32 v1, v1, v3 4693; GFX11-NEXT: s_setpc_b64 s[30:31] 4694 %result = call <3 x i16> @llvm.fshr.v3i16(<3 x i16> %lhs, <3 x i16> %rhs, <3 x i16> %amt) 4695 %cast.result = bitcast <3 x i16> %result to <3 x half> 4696 ret <3 x half> %cast.result 4697} 4698 4699define amdgpu_ps <2 x i32> @s_fshr_v4i16(<4 x i16> inreg %lhs, <4 x i16> inreg %rhs, <4 x i16> inreg %amt) { 4700; GFX6-LABEL: s_fshr_v4i16: 4701; GFX6: ; %bb.0: 4702; GFX6-NEXT: s_lshl_b32 s9, s9, 16 4703; GFX6-NEXT: s_and_b32 s8, s8, 0xffff 4704; GFX6-NEXT: s_or_b32 s8, s9, s8 4705; GFX6-NEXT: s_lshl_b32 s9, s11, 16 4706; GFX6-NEXT: s_and_b32 s10, s10, 0xffff 4707; GFX6-NEXT: s_or_b32 s9, s9, s10 4708; GFX6-NEXT: s_bfe_u32 s10, s4, 0xf0001 4709; GFX6-NEXT: s_lshl_b32 s0, s0, 1 4710; GFX6-NEXT: s_lshr_b32 s10, s10, 14 4711; GFX6-NEXT: s_or_b32 s0, s0, s10 4712; GFX6-NEXT: s_bfe_u32 s10, s5, 0xf0001 4713; GFX6-NEXT: s_lshl_b32 s1, s1, 1 4714; GFX6-NEXT: s_lshr_b32 s10, s10, 14 4715; GFX6-NEXT: s_xor_b32 s8, s8, -1 4716; GFX6-NEXT: s_or_b32 s1, s1, s10 4717; GFX6-NEXT: s_lshl_b32 s4, s4, 1 4718; GFX6-NEXT: s_lshr_b32 s10, s8, 16 4719; GFX6-NEXT: s_and_b32 s11, s8, 15 4720; GFX6-NEXT: s_andn2_b32 s8, 15, s8 4721; GFX6-NEXT: s_and_b32 s11, 0xffff, s11 4722; GFX6-NEXT: s_bfe_u32 s4, s4, 0xf0001 4723; GFX6-NEXT: s_and_b32 s8, 0xffff, s8 4724; GFX6-NEXT: s_lshl_b32 s0, s0, s11 4725; GFX6-NEXT: s_lshr_b32 s4, s4, s8 4726; GFX6-NEXT: s_or_b32 s0, s0, s4 4727; GFX6-NEXT: s_and_b32 s4, s10, 15 4728; GFX6-NEXT: s_lshl_b32 s5, s5, 1 4729; GFX6-NEXT: s_andn2_b32 s8, 15, s10 4730; GFX6-NEXT: s_and_b32 s4, 0xffff, s4 4731; GFX6-NEXT: s_lshl_b32 s1, s1, s4 4732; GFX6-NEXT: s_bfe_u32 s4, s5, 0xf0001 4733; GFX6-NEXT: s_and_b32 s5, 0xffff, s8 4734; GFX6-NEXT: s_lshr_b32 s4, s4, s5 4735; GFX6-NEXT: s_or_b32 s1, s1, s4 4736; GFX6-NEXT: s_and_b32 s1, 0xffff, s1 4737; GFX6-NEXT: s_and_b32 s0, 0xffff, s0 4738; GFX6-NEXT: s_lshl_b32 s1, s1, 16 4739; GFX6-NEXT: s_or_b32 s0, s0, s1 4740; GFX6-NEXT: s_lshl_b32 s1, s2, 1 4741; GFX6-NEXT: s_bfe_u32 s2, s6, 0xf0001 4742; GFX6-NEXT: s_lshr_b32 s2, s2, 14 4743; GFX6-NEXT: s_or_b32 s1, s1, s2 4744; GFX6-NEXT: s_lshl_b32 s2, s3, 1 4745; GFX6-NEXT: s_bfe_u32 s3, s7, 0xf0001 4746; GFX6-NEXT: s_lshr_b32 s3, s3, 14 4747; GFX6-NEXT: s_xor_b32 s5, s9, -1 4748; GFX6-NEXT: s_or_b32 s2, s2, s3 4749; GFX6-NEXT: s_lshl_b32 s3, s6, 1 4750; GFX6-NEXT: s_lshl_b32 s4, s7, 1 4751; GFX6-NEXT: s_lshr_b32 s6, s5, 16 4752; GFX6-NEXT: s_and_b32 s7, s5, 15 4753; GFX6-NEXT: s_andn2_b32 s5, 15, s5 4754; GFX6-NEXT: s_and_b32 s7, 0xffff, s7 4755; GFX6-NEXT: s_bfe_u32 s3, s3, 0xf0001 4756; GFX6-NEXT: s_and_b32 s5, 0xffff, s5 4757; GFX6-NEXT: s_lshl_b32 s1, s1, s7 4758; GFX6-NEXT: s_lshr_b32 s3, s3, s5 4759; GFX6-NEXT: s_or_b32 s1, s1, s3 4760; GFX6-NEXT: s_and_b32 s3, s6, 15 4761; GFX6-NEXT: s_andn2_b32 s5, 15, s6 4762; GFX6-NEXT: s_and_b32 s3, 0xffff, s3 4763; GFX6-NEXT: s_lshl_b32 s2, s2, s3 4764; GFX6-NEXT: s_bfe_u32 s3, s4, 0xf0001 4765; GFX6-NEXT: s_and_b32 s4, 0xffff, s5 4766; GFX6-NEXT: s_lshr_b32 s3, s3, s4 4767; GFX6-NEXT: s_or_b32 s2, s2, s3 4768; GFX6-NEXT: s_and_b32 s2, 0xffff, s2 4769; GFX6-NEXT: s_and_b32 s1, 0xffff, s1 4770; GFX6-NEXT: s_lshl_b32 s2, s2, 16 4771; GFX6-NEXT: s_or_b32 s1, s1, s2 4772; GFX6-NEXT: ; return to shader part epilog 4773; 4774; GFX8-LABEL: s_fshr_v4i16: 4775; GFX8: ; %bb.0: 4776; GFX8-NEXT: s_and_b32 s8, 0xffff, s2 4777; GFX8-NEXT: s_lshr_b32 s6, s0, 16 4778; GFX8-NEXT: s_lshr_b32 s7, s2, 16 4779; GFX8-NEXT: s_lshl_b32 s0, s0, 1 4780; GFX8-NEXT: s_lshr_b32 s8, s8, 15 4781; GFX8-NEXT: s_or_b32 s0, s0, s8 4782; GFX8-NEXT: s_lshl_b32 s6, s6, 1 4783; GFX8-NEXT: s_lshr_b32 s8, s7, 15 4784; GFX8-NEXT: s_lshl_b32 s2, s2, 1 4785; GFX8-NEXT: s_xor_b32 s4, s4, -1 4786; GFX8-NEXT: s_or_b32 s6, s6, s8 4787; GFX8-NEXT: s_lshr_b32 s8, s4, 16 4788; GFX8-NEXT: s_and_b32 s9, s4, 15 4789; GFX8-NEXT: s_andn2_b32 s4, 15, s4 4790; GFX8-NEXT: s_and_b32 s2, 0xffff, s2 4791; GFX8-NEXT: s_and_b32 s9, 0xffff, s9 4792; GFX8-NEXT: s_lshr_b32 s2, s2, 1 4793; GFX8-NEXT: s_and_b32 s4, 0xffff, s4 4794; GFX8-NEXT: s_lshl_b32 s0, s0, s9 4795; GFX8-NEXT: s_lshr_b32 s2, s2, s4 4796; GFX8-NEXT: s_or_b32 s0, s0, s2 4797; GFX8-NEXT: s_and_b32 s2, s8, 15 4798; GFX8-NEXT: s_lshl_b32 s7, s7, 1 4799; GFX8-NEXT: s_and_b32 s2, 0xffff, s2 4800; GFX8-NEXT: s_andn2_b32 s4, 15, s8 4801; GFX8-NEXT: s_lshl_b32 s2, s6, s2 4802; GFX8-NEXT: s_and_b32 s6, 0xffff, s7 4803; GFX8-NEXT: s_lshr_b32 s6, s6, 1 4804; GFX8-NEXT: s_and_b32 s4, 0xffff, s4 4805; GFX8-NEXT: s_lshr_b32 s4, s6, s4 4806; GFX8-NEXT: s_or_b32 s2, s2, s4 4807; GFX8-NEXT: s_and_b32 s2, 0xffff, s2 4808; GFX8-NEXT: s_and_b32 s0, 0xffff, s0 4809; GFX8-NEXT: s_lshl_b32 s2, s2, 16 4810; GFX8-NEXT: s_and_b32 s6, 0xffff, s3 4811; GFX8-NEXT: s_or_b32 s0, s0, s2 4812; GFX8-NEXT: s_lshr_b32 s2, s1, 16 4813; GFX8-NEXT: s_lshr_b32 s4, s3, 16 4814; GFX8-NEXT: s_lshl_b32 s1, s1, 1 4815; GFX8-NEXT: s_lshr_b32 s6, s6, 15 4816; GFX8-NEXT: s_or_b32 s1, s1, s6 4817; GFX8-NEXT: s_lshl_b32 s2, s2, 1 4818; GFX8-NEXT: s_lshr_b32 s6, s4, 15 4819; GFX8-NEXT: s_lshl_b32 s3, s3, 1 4820; GFX8-NEXT: s_xor_b32 s5, s5, -1 4821; GFX8-NEXT: s_or_b32 s2, s2, s6 4822; GFX8-NEXT: s_lshr_b32 s6, s5, 16 4823; GFX8-NEXT: s_and_b32 s7, s5, 15 4824; GFX8-NEXT: s_andn2_b32 s5, 15, s5 4825; GFX8-NEXT: s_and_b32 s3, 0xffff, s3 4826; GFX8-NEXT: s_and_b32 s7, 0xffff, s7 4827; GFX8-NEXT: s_lshr_b32 s3, s3, 1 4828; GFX8-NEXT: s_and_b32 s5, 0xffff, s5 4829; GFX8-NEXT: s_lshl_b32 s1, s1, s7 4830; GFX8-NEXT: s_lshr_b32 s3, s3, s5 4831; GFX8-NEXT: s_or_b32 s1, s1, s3 4832; GFX8-NEXT: s_and_b32 s3, s6, 15 4833; GFX8-NEXT: s_lshl_b32 s4, s4, 1 4834; GFX8-NEXT: s_and_b32 s3, 0xffff, s3 4835; GFX8-NEXT: s_andn2_b32 s5, 15, s6 4836; GFX8-NEXT: s_lshl_b32 s2, s2, s3 4837; GFX8-NEXT: s_and_b32 s3, 0xffff, s4 4838; GFX8-NEXT: s_lshr_b32 s3, s3, 1 4839; GFX8-NEXT: s_and_b32 s4, 0xffff, s5 4840; GFX8-NEXT: s_lshr_b32 s3, s3, s4 4841; GFX8-NEXT: s_or_b32 s2, s2, s3 4842; GFX8-NEXT: s_and_b32 s2, 0xffff, s2 4843; GFX8-NEXT: s_and_b32 s1, 0xffff, s1 4844; GFX8-NEXT: s_lshl_b32 s2, s2, 16 4845; GFX8-NEXT: s_or_b32 s1, s1, s2 4846; GFX8-NEXT: ; return to shader part epilog 4847; 4848; GFX9-LABEL: s_fshr_v4i16: 4849; GFX9: ; %bb.0: 4850; GFX9-NEXT: s_lshr_b32 s7, s0, 16 4851; GFX9-NEXT: s_lshl_b32 s0, s0, 0x10001 4852; GFX9-NEXT: s_lshl_b32 s7, s7, 1 4853; GFX9-NEXT: s_and_b32 s6, s4, 0xf000f 4854; GFX9-NEXT: s_andn2_b32 s4, 0xf000f, s4 4855; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s7 4856; GFX9-NEXT: s_lshr_b32 s7, s0, 16 4857; GFX9-NEXT: s_lshr_b32 s8, s4, 16 4858; GFX9-NEXT: s_lshl_b32 s0, s0, s4 4859; GFX9-NEXT: s_lshl_b32 s4, s7, s8 4860; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s4 4861; GFX9-NEXT: s_lshr_b32 s4, s2, 16 4862; GFX9-NEXT: s_and_b32 s2, s2, 0xffff 4863; GFX9-NEXT: s_lshr_b32 s7, s6, 16 4864; GFX9-NEXT: s_lshr_b32 s2, s2, s6 4865; GFX9-NEXT: s_lshr_b32 s4, s4, s7 4866; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s4 4867; GFX9-NEXT: s_or_b32 s0, s0, s2 4868; GFX9-NEXT: s_and_b32 s2, s5, 0xf000f 4869; GFX9-NEXT: s_andn2_b32 s4, 0xf000f, s5 4870; GFX9-NEXT: s_lshr_b32 s5, s1, 16 4871; GFX9-NEXT: s_lshl_b32 s1, s1, 0x10001 4872; GFX9-NEXT: s_lshl_b32 s5, s5, 1 4873; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s5 4874; GFX9-NEXT: s_lshr_b32 s5, s1, 16 4875; GFX9-NEXT: s_lshr_b32 s6, s4, 16 4876; GFX9-NEXT: s_lshl_b32 s1, s1, s4 4877; GFX9-NEXT: s_lshl_b32 s4, s5, s6 4878; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s4 4879; GFX9-NEXT: s_lshr_b32 s4, s3, 16 4880; GFX9-NEXT: s_and_b32 s3, s3, 0xffff 4881; GFX9-NEXT: s_lshr_b32 s5, s2, 16 4882; GFX9-NEXT: s_lshr_b32 s2, s3, s2 4883; GFX9-NEXT: s_lshr_b32 s3, s4, s5 4884; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s3 4885; GFX9-NEXT: s_or_b32 s1, s1, s2 4886; GFX9-NEXT: ; return to shader part epilog 4887; 4888; GFX10-LABEL: s_fshr_v4i16: 4889; GFX10: ; %bb.0: 4890; GFX10-NEXT: s_lshr_b32 s6, s0, 16 4891; GFX10-NEXT: s_lshl_b32 s0, s0, 0x10001 4892; GFX10-NEXT: s_lshl_b32 s6, s6, 1 4893; GFX10-NEXT: s_and_b32 s7, s4, 0xf000f 4894; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s6 4895; GFX10-NEXT: s_andn2_b32 s4, 0xf000f, s4 4896; GFX10-NEXT: s_lshr_b32 s6, s0, 16 4897; GFX10-NEXT: s_lshr_b32 s8, s4, 16 4898; GFX10-NEXT: s_lshl_b32 s0, s0, s4 4899; GFX10-NEXT: s_lshl_b32 s4, s6, s8 4900; GFX10-NEXT: s_lshr_b32 s6, s2, 16 4901; GFX10-NEXT: s_pack_ll_b32_b16 s0, s0, s4 4902; GFX10-NEXT: s_lshr_b32 s4, s1, 16 4903; GFX10-NEXT: s_and_b32 s2, s2, 0xffff 4904; GFX10-NEXT: s_lshr_b32 s8, s7, 16 4905; GFX10-NEXT: s_lshl_b32 s1, s1, 0x10001 4906; GFX10-NEXT: s_lshl_b32 s4, s4, 1 4907; GFX10-NEXT: s_lshr_b32 s2, s2, s7 4908; GFX10-NEXT: s_lshr_b32 s6, s6, s8 4909; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s4 4910; GFX10-NEXT: s_andn2_b32 s4, 0xf000f, s5 4911; GFX10-NEXT: s_pack_ll_b32_b16 s2, s2, s6 4912; GFX10-NEXT: s_and_b32 s6, s5, 0xf000f 4913; GFX10-NEXT: s_lshr_b32 s5, s1, 16 4914; GFX10-NEXT: s_lshr_b32 s7, s4, 16 4915; GFX10-NEXT: s_lshl_b32 s1, s1, s4 4916; GFX10-NEXT: s_lshl_b32 s4, s5, s7 4917; GFX10-NEXT: s_lshr_b32 s5, s3, 16 4918; GFX10-NEXT: s_and_b32 s3, s3, 0xffff 4919; GFX10-NEXT: s_lshr_b32 s7, s6, 16 4920; GFX10-NEXT: s_lshr_b32 s3, s3, s6 4921; GFX10-NEXT: s_lshr_b32 s5, s5, s7 4922; GFX10-NEXT: s_pack_ll_b32_b16 s1, s1, s4 4923; GFX10-NEXT: s_pack_ll_b32_b16 s3, s3, s5 4924; GFX10-NEXT: s_or_b32 s0, s0, s2 4925; GFX10-NEXT: s_or_b32 s1, s1, s3 4926; GFX10-NEXT: ; return to shader part epilog 4927; 4928; GFX11-LABEL: s_fshr_v4i16: 4929; GFX11: ; %bb.0: 4930; GFX11-NEXT: s_lshr_b32 s6, s0, 16 4931; GFX11-NEXT: s_lshl_b32 s0, s0, 0x10001 4932; GFX11-NEXT: s_lshl_b32 s6, s6, 1 4933; GFX11-NEXT: s_and_b32 s7, s4, 0xf000f 4934; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s6 4935; GFX11-NEXT: s_and_not1_b32 s4, 0xf000f, s4 4936; GFX11-NEXT: s_lshr_b32 s6, s0, 16 4937; GFX11-NEXT: s_lshr_b32 s8, s4, 16 4938; GFX11-NEXT: s_lshl_b32 s0, s0, s4 4939; GFX11-NEXT: s_lshl_b32 s4, s6, s8 4940; GFX11-NEXT: s_lshr_b32 s6, s2, 16 4941; GFX11-NEXT: s_pack_ll_b32_b16 s0, s0, s4 4942; GFX11-NEXT: s_lshr_b32 s4, s1, 16 4943; GFX11-NEXT: s_and_b32 s2, s2, 0xffff 4944; GFX11-NEXT: s_lshr_b32 s8, s7, 16 4945; GFX11-NEXT: s_lshl_b32 s1, s1, 0x10001 4946; GFX11-NEXT: s_lshl_b32 s4, s4, 1 4947; GFX11-NEXT: s_lshr_b32 s2, s2, s7 4948; GFX11-NEXT: s_lshr_b32 s6, s6, s8 4949; GFX11-NEXT: s_pack_ll_b32_b16 s1, s1, s4 4950; GFX11-NEXT: s_and_not1_b32 s4, 0xf000f, s5 4951; GFX11-NEXT: s_pack_ll_b32_b16 s2, s2, s6 4952; GFX11-NEXT: s_and_b32 s6, s5, 0xf000f 4953; GFX11-NEXT: s_lshr_b32 s5, s1, 16 4954; GFX11-NEXT: s_lshr_b32 s7, s4, 16 4955; GFX11-NEXT: s_lshl_b32 s1, s1, s4 4956; GFX11-NEXT: s_lshl_b32 s4, s5, s7 4957; GFX11-NEXT: s_lshr_b32 s5, s3, 16 4958; GFX11-NEXT: s_and_b32 s3, s3, 0xffff 4959; GFX11-NEXT: s_lshr_b32 s7, s6, 16 4960; GFX11-NEXT: s_lshr_b32 s3, s3, s6 4961; GFX11-NEXT: s_lshr_b32 s5, s5, s7 4962; GFX11-NEXT: s_pack_ll_b32_b16 s1, s1, s4 4963; GFX11-NEXT: s_pack_ll_b32_b16 s3, s3, s5 4964; GFX11-NEXT: s_or_b32 s0, s0, s2 4965; GFX11-NEXT: s_or_b32 s1, s1, s3 4966; GFX11-NEXT: ; return to shader part epilog 4967 %result = call <4 x i16> @llvm.fshr.v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt) 4968 %cast.result = bitcast <4 x i16> %result to <2 x i32> 4969 ret <2 x i32> %cast.result 4970} 4971 4972define <4 x half> @v_fshr_v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt) { 4973; GFX6-LABEL: v_fshr_v4i16: 4974; GFX6: ; %bb.0: 4975; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4976; GFX6-NEXT: v_lshlrev_b32_e32 v9, 16, v9 4977; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v8 4978; GFX6-NEXT: v_or_b32_e32 v8, v9, v8 4979; GFX6-NEXT: v_lshlrev_b32_e32 v9, 16, v11 4980; GFX6-NEXT: v_and_b32_e32 v10, 0xffff, v10 4981; GFX6-NEXT: v_or_b32_e32 v9, v9, v10 4982; GFX6-NEXT: v_bfe_u32 v10, v4, 1, 15 4983; GFX6-NEXT: v_lshlrev_b32_e32 v0, 1, v0 4984; GFX6-NEXT: v_lshrrev_b32_e32 v10, 14, v10 4985; GFX6-NEXT: v_or_b32_e32 v0, v0, v10 4986; GFX6-NEXT: v_bfe_u32 v10, v5, 1, 15 4987; GFX6-NEXT: v_lshlrev_b32_e32 v1, 1, v1 4988; GFX6-NEXT: v_lshrrev_b32_e32 v10, 14, v10 4989; GFX6-NEXT: v_xor_b32_e32 v8, -1, v8 4990; GFX6-NEXT: v_or_b32_e32 v1, v1, v10 4991; GFX6-NEXT: v_lshrrev_b32_e32 v10, 16, v8 4992; GFX6-NEXT: v_and_b32_e32 v11, 15, v8 4993; GFX6-NEXT: v_xor_b32_e32 v8, -1, v8 4994; GFX6-NEXT: v_lshlrev_b32_e32 v4, 1, v4 4995; GFX6-NEXT: v_and_b32_e32 v8, 15, v8 4996; GFX6-NEXT: v_and_b32_e32 v11, 0xffff, v11 4997; GFX6-NEXT: v_bfe_u32 v4, v4, 1, 15 4998; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v8 4999; GFX6-NEXT: v_lshlrev_b32_e32 v0, v11, v0 5000; GFX6-NEXT: v_lshrrev_b32_e32 v4, v8, v4 5001; GFX6-NEXT: v_or_b32_e32 v0, v0, v4 5002; GFX6-NEXT: v_and_b32_e32 v4, 15, v10 5003; GFX6-NEXT: v_xor_b32_e32 v8, -1, v10 5004; GFX6-NEXT: v_lshlrev_b32_e32 v5, 1, v5 5005; GFX6-NEXT: v_and_b32_e32 v8, 15, v8 5006; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4 5007; GFX6-NEXT: v_lshlrev_b32_e32 v1, v4, v1 5008; GFX6-NEXT: v_bfe_u32 v4, v5, 1, 15 5009; GFX6-NEXT: v_and_b32_e32 v5, 0xffff, v8 5010; GFX6-NEXT: v_lshrrev_b32_e32 v4, v5, v4 5011; GFX6-NEXT: v_or_b32_e32 v1, v1, v4 5012; GFX6-NEXT: v_bfe_u32 v4, v6, 1, 15 5013; GFX6-NEXT: v_lshlrev_b32_e32 v2, 1, v2 5014; GFX6-NEXT: v_lshrrev_b32_e32 v4, 14, v4 5015; GFX6-NEXT: v_or_b32_e32 v2, v2, v4 5016; GFX6-NEXT: v_bfe_u32 v4, v7, 1, 15 5017; GFX6-NEXT: v_lshlrev_b32_e32 v3, 1, v3 5018; GFX6-NEXT: v_lshrrev_b32_e32 v4, 14, v4 5019; GFX6-NEXT: v_or_b32_e32 v3, v3, v4 5020; GFX6-NEXT: v_lshlrev_b32_e32 v4, 1, v6 5021; GFX6-NEXT: v_xor_b32_e32 v6, -1, v9 5022; GFX6-NEXT: v_lshlrev_b32_e32 v5, 1, v7 5023; GFX6-NEXT: v_lshrrev_b32_e32 v7, 16, v6 5024; GFX6-NEXT: v_and_b32_e32 v8, 15, v6 5025; GFX6-NEXT: v_xor_b32_e32 v6, -1, v6 5026; GFX6-NEXT: v_and_b32_e32 v6, 15, v6 5027; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v8 5028; GFX6-NEXT: v_bfe_u32 v4, v4, 1, 15 5029; GFX6-NEXT: v_and_b32_e32 v6, 0xffff, v6 5030; GFX6-NEXT: v_lshlrev_b32_e32 v2, v8, v2 5031; GFX6-NEXT: v_lshrrev_b32_e32 v4, v6, v4 5032; GFX6-NEXT: v_or_b32_e32 v2, v2, v4 5033; GFX6-NEXT: v_and_b32_e32 v4, 15, v7 5034; GFX6-NEXT: v_xor_b32_e32 v6, -1, v7 5035; GFX6-NEXT: v_and_b32_e32 v6, 15, v6 5036; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4 5037; GFX6-NEXT: v_lshlrev_b32_e32 v3, v4, v3 5038; GFX6-NEXT: v_bfe_u32 v4, v5, 1, 15 5039; GFX6-NEXT: v_and_b32_e32 v5, 0xffff, v6 5040; GFX6-NEXT: v_lshrrev_b32_e32 v4, v5, v4 5041; GFX6-NEXT: v_or_b32_e32 v3, v3, v4 5042; GFX6-NEXT: s_setpc_b64 s[30:31] 5043; 5044; GFX8-LABEL: v_fshr_v4i16: 5045; GFX8: ; %bb.0: 5046; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5047; GFX8-NEXT: v_lshlrev_b16_e32 v6, 1, v0 5048; GFX8-NEXT: v_lshrrev_b16_e32 v7, 15, v2 5049; GFX8-NEXT: v_or_b32_e32 v6, v6, v7 5050; GFX8-NEXT: v_mov_b32_e32 v7, 1 5051; GFX8-NEXT: v_mov_b32_e32 v8, 15 5052; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 5053; GFX8-NEXT: v_lshrrev_b16_sdwa v9, v8, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 5054; GFX8-NEXT: v_xor_b32_e32 v4, -1, v4 5055; GFX8-NEXT: v_or_b32_e32 v0, v0, v9 5056; GFX8-NEXT: v_lshlrev_b16_e32 v9, 1, v2 5057; GFX8-NEXT: v_xor_b32_e32 v11, -1, v4 5058; GFX8-NEXT: v_and_b32_e32 v10, 15, v4 5059; GFX8-NEXT: v_and_b32_e32 v11, 15, v11 5060; GFX8-NEXT: v_lshrrev_b16_e32 v9, 1, v9 5061; GFX8-NEXT: v_lshlrev_b16_e32 v6, v10, v6 5062; GFX8-NEXT: v_lshrrev_b16_e32 v9, v11, v9 5063; GFX8-NEXT: v_mov_b32_e32 v10, -1 5064; GFX8-NEXT: v_lshlrev_b16_sdwa v2, v7, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 5065; GFX8-NEXT: v_or_b32_e32 v6, v6, v9 5066; GFX8-NEXT: v_and_b32_sdwa v9, v4, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 5067; GFX8-NEXT: v_xor_b32_sdwa v4, v4, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 5068; GFX8-NEXT: v_and_b32_e32 v4, 15, v4 5069; GFX8-NEXT: v_lshrrev_b16_e32 v2, 1, v2 5070; GFX8-NEXT: v_lshlrev_b16_e32 v0, v9, v0 5071; GFX8-NEXT: v_lshrrev_b16_e32 v2, v4, v2 5072; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 5073; GFX8-NEXT: v_lshlrev_b16_e32 v2, 1, v1 5074; GFX8-NEXT: v_lshrrev_b16_e32 v4, 15, v3 5075; GFX8-NEXT: v_and_b32_e32 v0, 0xffff, v0 5076; GFX8-NEXT: v_or_b32_e32 v2, v2, v4 5077; GFX8-NEXT: v_lshlrev_b16_sdwa v1, v7, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 5078; GFX8-NEXT: v_lshrrev_b16_sdwa v4, v8, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 5079; GFX8-NEXT: v_xor_b32_e32 v5, -1, v5 5080; GFX8-NEXT: v_lshlrev_b32_e32 v0, 16, v0 5081; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 5082; GFX8-NEXT: v_lshlrev_b16_e32 v4, 1, v3 5083; GFX8-NEXT: v_lshlrev_b16_sdwa v3, v7, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 5084; GFX8-NEXT: v_xor_b32_e32 v7, -1, v5 5085; GFX8-NEXT: v_or_b32_sdwa v0, v6, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 5086; GFX8-NEXT: v_and_b32_e32 v6, 15, v5 5087; GFX8-NEXT: v_and_b32_e32 v7, 15, v7 5088; GFX8-NEXT: v_lshrrev_b16_e32 v4, 1, v4 5089; GFX8-NEXT: v_lshlrev_b16_e32 v2, v6, v2 5090; GFX8-NEXT: v_lshrrev_b16_e32 v4, v7, v4 5091; GFX8-NEXT: v_or_b32_e32 v2, v2, v4 5092; GFX8-NEXT: v_and_b32_sdwa v4, v5, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 5093; GFX8-NEXT: v_xor_b32_sdwa v5, v5, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 5094; GFX8-NEXT: v_and_b32_e32 v5, 15, v5 5095; GFX8-NEXT: v_lshrrev_b16_e32 v3, 1, v3 5096; GFX8-NEXT: v_lshlrev_b16_e32 v1, v4, v1 5097; GFX8-NEXT: v_lshrrev_b16_e32 v3, v5, v3 5098; GFX8-NEXT: v_or_b32_e32 v1, v1, v3 5099; GFX8-NEXT: v_and_b32_e32 v1, 0xffff, v1 5100; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1 5101; GFX8-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 5102; GFX8-NEXT: s_setpc_b64 s[30:31] 5103; 5104; GFX9-LABEL: v_fshr_v4i16: 5105; GFX9: ; %bb.0: 5106; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5107; GFX9-NEXT: v_and_b32_e32 v6, 0xf000f, v4 5108; GFX9-NEXT: v_xor_b32_e32 v4, -1, v4 5109; GFX9-NEXT: v_and_b32_e32 v4, 0xf000f, v4 5110; GFX9-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1] 5111; GFX9-NEXT: v_pk_lshlrev_b16 v0, v4, v0 5112; GFX9-NEXT: v_pk_lshrrev_b16 v2, v6, v2 5113; GFX9-NEXT: v_xor_b32_e32 v4, -1, v5 5114; GFX9-NEXT: v_or_b32_e32 v0, v0, v2 5115; GFX9-NEXT: v_and_b32_e32 v2, 0xf000f, v5 5116; GFX9-NEXT: v_and_b32_e32 v4, 0xf000f, v4 5117; GFX9-NEXT: v_pk_lshlrev_b16 v1, 1, v1 op_sel_hi:[0,1] 5118; GFX9-NEXT: v_pk_lshlrev_b16 v1, v4, v1 5119; GFX9-NEXT: v_pk_lshrrev_b16 v2, v2, v3 5120; GFX9-NEXT: v_or_b32_e32 v1, v1, v2 5121; GFX9-NEXT: s_setpc_b64 s[30:31] 5122; 5123; GFX10-LABEL: v_fshr_v4i16: 5124; GFX10: ; %bb.0: 5125; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5126; GFX10-NEXT: v_xor_b32_e32 v6, -1, v4 5127; GFX10-NEXT: v_xor_b32_e32 v7, -1, v5 5128; GFX10-NEXT: v_and_b32_e32 v4, 0xf000f, v4 5129; GFX10-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1] 5130; GFX10-NEXT: v_and_b32_e32 v5, 0xf000f, v5 5131; GFX10-NEXT: v_and_b32_e32 v6, 0xf000f, v6 5132; GFX10-NEXT: v_pk_lshlrev_b16 v1, 1, v1 op_sel_hi:[0,1] 5133; GFX10-NEXT: v_and_b32_e32 v7, 0xf000f, v7 5134; GFX10-NEXT: v_pk_lshrrev_b16 v2, v4, v2 5135; GFX10-NEXT: v_pk_lshrrev_b16 v3, v5, v3 5136; GFX10-NEXT: v_pk_lshlrev_b16 v0, v6, v0 5137; GFX10-NEXT: v_pk_lshlrev_b16 v1, v7, v1 5138; GFX10-NEXT: v_or_b32_e32 v0, v0, v2 5139; GFX10-NEXT: v_or_b32_e32 v1, v1, v3 5140; GFX10-NEXT: s_setpc_b64 s[30:31] 5141; 5142; GFX11-LABEL: v_fshr_v4i16: 5143; GFX11: ; %bb.0: 5144; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5145; GFX11-NEXT: v_xor_b32_e32 v6, -1, v4 5146; GFX11-NEXT: v_xor_b32_e32 v7, -1, v5 5147; GFX11-NEXT: v_and_b32_e32 v4, 0xf000f, v4 5148; GFX11-NEXT: v_pk_lshlrev_b16 v0, 1, v0 op_sel_hi:[0,1] 5149; GFX11-NEXT: v_and_b32_e32 v5, 0xf000f, v5 5150; GFX11-NEXT: v_and_b32_e32 v6, 0xf000f, v6 5151; GFX11-NEXT: v_pk_lshlrev_b16 v1, 1, v1 op_sel_hi:[0,1] 5152; GFX11-NEXT: v_and_b32_e32 v7, 0xf000f, v7 5153; GFX11-NEXT: v_pk_lshrrev_b16 v2, v4, v2 5154; GFX11-NEXT: v_pk_lshrrev_b16 v3, v5, v3 5155; GFX11-NEXT: v_pk_lshlrev_b16 v0, v6, v0 5156; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2) 5157; GFX11-NEXT: v_pk_lshlrev_b16 v1, v7, v1 5158; GFX11-NEXT: v_or_b32_e32 v0, v0, v2 5159; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 5160; GFX11-NEXT: v_or_b32_e32 v1, v1, v3 5161; GFX11-NEXT: s_setpc_b64 s[30:31] 5162 %result = call <4 x i16> @llvm.fshr.v4i16(<4 x i16> %lhs, <4 x i16> %rhs, <4 x i16> %amt) 5163 %cast.result = bitcast <4 x i16> %result to <4 x half> 5164 ret <4 x half> %cast.result 5165} 5166 5167define amdgpu_ps i64 @s_fshr_i64(i64 inreg %lhs, i64 inreg %rhs, i64 inreg %amt) { 5168; GFX6-LABEL: s_fshr_i64: 5169; GFX6: ; %bb.0: 5170; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 5171; GFX6-NEXT: s_not_b32 s5, s4 5172; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], s5 5173; GFX6-NEXT: s_lshr_b64 s[2:3], s[2:3], s4 5174; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 5175; GFX6-NEXT: ; return to shader part epilog 5176; 5177; GFX8-LABEL: s_fshr_i64: 5178; GFX8: ; %bb.0: 5179; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 5180; GFX8-NEXT: s_not_b32 s5, s4 5181; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], s5 5182; GFX8-NEXT: s_lshr_b64 s[2:3], s[2:3], s4 5183; GFX8-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 5184; GFX8-NEXT: ; return to shader part epilog 5185; 5186; GFX9-LABEL: s_fshr_i64: 5187; GFX9: ; %bb.0: 5188; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 5189; GFX9-NEXT: s_not_b32 s5, s4 5190; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], s5 5191; GFX9-NEXT: s_lshr_b64 s[2:3], s[2:3], s4 5192; GFX9-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 5193; GFX9-NEXT: ; return to shader part epilog 5194; 5195; GFX10-LABEL: s_fshr_i64: 5196; GFX10: ; %bb.0: 5197; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 5198; GFX10-NEXT: s_not_b32 s5, s4 5199; GFX10-NEXT: s_lshr_b64 s[2:3], s[2:3], s4 5200; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s5 5201; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 5202; GFX10-NEXT: ; return to shader part epilog 5203; 5204; GFX11-LABEL: s_fshr_i64: 5205; GFX11: ; %bb.0: 5206; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 5207; GFX11-NEXT: s_not_b32 s5, s4 5208; GFX11-NEXT: s_lshr_b64 s[2:3], s[2:3], s4 5209; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], s5 5210; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 5211; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 5212; GFX11-NEXT: ; return to shader part epilog 5213 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt) 5214 ret i64 %result 5215} 5216 5217define amdgpu_ps i64 @s_fshr_i64_5(i64 inreg %lhs, i64 inreg %rhs) { 5218; GCN-LABEL: s_fshr_i64_5: 5219; GCN: ; %bb.0: 5220; GCN-NEXT: s_lshl_b32 s1, s0, 27 5221; GCN-NEXT: s_mov_b32 s0, 0 5222; GCN-NEXT: s_lshr_b64 s[2:3], s[2:3], 5 5223; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 5224; GCN-NEXT: ; return to shader part epilog 5225; 5226; GFX11-LABEL: s_fshr_i64_5: 5227; GFX11: ; %bb.0: 5228; GFX11-NEXT: s_lshl_b32 s1, s0, 27 5229; GFX11-NEXT: s_mov_b32 s0, 0 5230; GFX11-NEXT: s_lshr_b64 s[2:3], s[2:3], 5 5231; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 5232; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 5233; GFX11-NEXT: ; return to shader part epilog 5234 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 5) 5235 ret i64 %result 5236} 5237 5238define amdgpu_ps i64 @s_fshr_i64_32(i64 inreg %lhs, i64 inreg %rhs) { 5239; GCN-LABEL: s_fshr_i64_32: 5240; GCN: ; %bb.0: 5241; GCN-NEXT: s_mov_b32 s1, s0 5242; GCN-NEXT: s_mov_b32 s0, 0 5243; GCN-NEXT: s_mov_b32 s2, s3 5244; GCN-NEXT: s_mov_b32 s3, s0 5245; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 5246; GCN-NEXT: ; return to shader part epilog 5247; 5248; GFX11-LABEL: s_fshr_i64_32: 5249; GFX11: ; %bb.0: 5250; GFX11-NEXT: s_mov_b32 s1, s0 5251; GFX11-NEXT: s_mov_b32 s0, 0 5252; GFX11-NEXT: s_mov_b32 s2, s3 5253; GFX11-NEXT: s_mov_b32 s3, s0 5254; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 5255; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 5256; GFX11-NEXT: ; return to shader part epilog 5257 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 32) 5258 ret i64 %result 5259} 5260 5261define amdgpu_ps i64 @s_fshr_i64_48(i64 inreg %lhs, i64 inreg %rhs) { 5262; GCN-LABEL: s_fshr_i64_48: 5263; GCN: ; %bb.0: 5264; GCN-NEXT: s_lshl_b64 s[0:1], s[0:1], 16 5265; GCN-NEXT: s_lshr_b32 s2, s3, 16 5266; GCN-NEXT: s_mov_b32 s3, 0 5267; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 5268; GCN-NEXT: ; return to shader part epilog 5269; 5270; GFX11-LABEL: s_fshr_i64_48: 5271; GFX11: ; %bb.0: 5272; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], 16 5273; GFX11-NEXT: s_lshr_b32 s2, s3, 16 5274; GFX11-NEXT: s_mov_b32 s3, 0 5275; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 5276; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 5277; GFX11-NEXT: ; return to shader part epilog 5278 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 48) 5279 ret i64 %result 5280} 5281 5282define i64 @v_fshr_i64(i64 %lhs, i64 %rhs, i64 %amt) { 5283; GFX6-LABEL: v_fshr_i64: 5284; GFX6: ; %bb.0: 5285; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5286; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 1 5287; GFX6-NEXT: v_not_b32_e32 v5, v4 5288; GFX6-NEXT: v_and_b32_e32 v5, 63, v5 5289; GFX6-NEXT: v_and_b32_e32 v4, 63, v4 5290; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], v5 5291; GFX6-NEXT: v_lshr_b64 v[2:3], v[2:3], v4 5292; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 5293; GFX6-NEXT: v_or_b32_e32 v1, v1, v3 5294; GFX6-NEXT: s_setpc_b64 s[30:31] 5295; 5296; GFX8-LABEL: v_fshr_i64: 5297; GFX8: ; %bb.0: 5298; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5299; GFX8-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 5300; GFX8-NEXT: v_not_b32_e32 v5, v4 5301; GFX8-NEXT: v_and_b32_e32 v5, 63, v5 5302; GFX8-NEXT: v_and_b32_e32 v4, 63, v4 5303; GFX8-NEXT: v_lshlrev_b64 v[0:1], v5, v[0:1] 5304; GFX8-NEXT: v_lshrrev_b64 v[2:3], v4, v[2:3] 5305; GFX8-NEXT: v_or_b32_e32 v0, v0, v2 5306; GFX8-NEXT: v_or_b32_e32 v1, v1, v3 5307; GFX8-NEXT: s_setpc_b64 s[30:31] 5308; 5309; GFX9-LABEL: v_fshr_i64: 5310; GFX9: ; %bb.0: 5311; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5312; GFX9-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 5313; GFX9-NEXT: v_not_b32_e32 v5, v4 5314; GFX9-NEXT: v_and_b32_e32 v5, 63, v5 5315; GFX9-NEXT: v_and_b32_e32 v4, 63, v4 5316; GFX9-NEXT: v_lshlrev_b64 v[0:1], v5, v[0:1] 5317; GFX9-NEXT: v_lshrrev_b64 v[2:3], v4, v[2:3] 5318; GFX9-NEXT: v_or_b32_e32 v0, v0, v2 5319; GFX9-NEXT: v_or_b32_e32 v1, v1, v3 5320; GFX9-NEXT: s_setpc_b64 s[30:31] 5321; 5322; GFX10-LABEL: v_fshr_i64: 5323; GFX10: ; %bb.0: 5324; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5325; GFX10-NEXT: v_not_b32_e32 v5, v4 5326; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 5327; GFX10-NEXT: v_and_b32_e32 v4, 63, v4 5328; GFX10-NEXT: v_and_b32_e32 v5, 63, v5 5329; GFX10-NEXT: v_lshrrev_b64 v[2:3], v4, v[2:3] 5330; GFX10-NEXT: v_lshlrev_b64 v[0:1], v5, v[0:1] 5331; GFX10-NEXT: v_or_b32_e32 v0, v0, v2 5332; GFX10-NEXT: v_or_b32_e32 v1, v1, v3 5333; GFX10-NEXT: s_setpc_b64 s[30:31] 5334; 5335; GFX11-LABEL: v_fshr_i64: 5336; GFX11: ; %bb.0: 5337; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5338; GFX11-NEXT: v_not_b32_e32 v5, v4 5339; GFX11-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 5340; GFX11-NEXT: v_and_b32_e32 v4, 63, v4 5341; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2) 5342; GFX11-NEXT: v_and_b32_e32 v5, 63, v5 5343; GFX11-NEXT: v_lshrrev_b64 v[2:3], v4, v[2:3] 5344; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 5345; GFX11-NEXT: v_lshlrev_b64 v[0:1], v5, v[0:1] 5346; GFX11-NEXT: v_or_b32_e32 v0, v0, v2 5347; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 5348; GFX11-NEXT: v_or_b32_e32 v1, v1, v3 5349; GFX11-NEXT: s_setpc_b64 s[30:31] 5350 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt) 5351 ret i64 %result 5352} 5353 5354define i64 @v_fshr_i64_5(i64 %lhs, i64 %rhs) { 5355; GFX6-LABEL: v_fshr_i64_5: 5356; GFX6: ; %bb.0: 5357; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5358; GFX6-NEXT: v_mov_b32_e32 v4, v0 5359; GFX6-NEXT: v_lshr_b64 v[0:1], v[2:3], 5 5360; GFX6-NEXT: v_lshlrev_b32_e32 v2, 27, v4 5361; GFX6-NEXT: v_or_b32_e32 v1, v2, v1 5362; GFX6-NEXT: s_setpc_b64 s[30:31] 5363; 5364; GFX8-LABEL: v_fshr_i64_5: 5365; GFX8: ; %bb.0: 5366; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5367; GFX8-NEXT: v_mov_b32_e32 v4, v0 5368; GFX8-NEXT: v_lshrrev_b64 v[0:1], 5, v[2:3] 5369; GFX8-NEXT: v_lshlrev_b32_e32 v2, 27, v4 5370; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 5371; GFX8-NEXT: s_setpc_b64 s[30:31] 5372; 5373; GFX9-LABEL: v_fshr_i64_5: 5374; GFX9: ; %bb.0: 5375; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5376; GFX9-NEXT: v_mov_b32_e32 v4, v0 5377; GFX9-NEXT: v_lshrrev_b64 v[0:1], 5, v[2:3] 5378; GFX9-NEXT: v_lshl_or_b32 v1, v4, 27, v1 5379; GFX9-NEXT: s_setpc_b64 s[30:31] 5380; 5381; GFX10-LABEL: v_fshr_i64_5: 5382; GFX10: ; %bb.0: 5383; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5384; GFX10-NEXT: v_mov_b32_e32 v4, v0 5385; GFX10-NEXT: v_lshrrev_b64 v[0:1], 5, v[2:3] 5386; GFX10-NEXT: v_lshl_or_b32 v1, v4, 27, v1 5387; GFX10-NEXT: s_setpc_b64 s[30:31] 5388; 5389; GFX11-LABEL: v_fshr_i64_5: 5390; GFX11: ; %bb.0: 5391; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5392; GFX11-NEXT: v_mov_b32_e32 v4, v0 5393; GFX11-NEXT: v_lshrrev_b64 v[0:1], 5, v[2:3] 5394; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 5395; GFX11-NEXT: v_lshl_or_b32 v1, v4, 27, v1 5396; GFX11-NEXT: s_setpc_b64 s[30:31] 5397 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 5) 5398 ret i64 %result 5399} 5400 5401define i64 @v_fshr_i64_32(i64 %lhs, i64 %rhs) { 5402; GCN-LABEL: v_fshr_i64_32: 5403; GCN: ; %bb.0: 5404; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5405; GCN-NEXT: v_mov_b32_e32 v1, v0 5406; GCN-NEXT: v_mov_b32_e32 v0, v3 5407; GCN-NEXT: s_setpc_b64 s[30:31] 5408; 5409; GFX11-LABEL: v_fshr_i64_32: 5410; GFX11: ; %bb.0: 5411; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5412; GFX11-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, v3 5413; GFX11-NEXT: s_setpc_b64 s[30:31] 5414 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 32) 5415 ret i64 %result 5416} 5417 5418define i64 @v_fshr_i64_48(i64 %lhs, i64 %rhs) { 5419; GFX6-LABEL: v_fshr_i64_48: 5420; GFX6: ; %bb.0: 5421; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5422; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 16 5423; GFX6-NEXT: v_lshrrev_b32_e32 v2, 16, v3 5424; GFX6-NEXT: v_or_b32_e32 v0, v0, v2 5425; GFX6-NEXT: s_setpc_b64 s[30:31] 5426; 5427; GFX8-LABEL: v_fshr_i64_48: 5428; GFX8: ; %bb.0: 5429; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5430; GFX8-NEXT: v_lshlrev_b64 v[0:1], 16, v[0:1] 5431; GFX8-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 5432; GFX8-NEXT: s_setpc_b64 s[30:31] 5433; 5434; GFX9-LABEL: v_fshr_i64_48: 5435; GFX9: ; %bb.0: 5436; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5437; GFX9-NEXT: v_lshlrev_b64 v[0:1], 16, v[0:1] 5438; GFX9-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 5439; GFX9-NEXT: s_setpc_b64 s[30:31] 5440; 5441; GFX10-LABEL: v_fshr_i64_48: 5442; GFX10: ; %bb.0: 5443; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5444; GFX10-NEXT: v_lshlrev_b64 v[0:1], 16, v[0:1] 5445; GFX10-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 5446; GFX10-NEXT: s_setpc_b64 s[30:31] 5447; 5448; GFX11-LABEL: v_fshr_i64_48: 5449; GFX11: ; %bb.0: 5450; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5451; GFX11-NEXT: v_lshlrev_b64 v[0:1], 16, v[0:1] 5452; GFX11-NEXT: v_lshrrev_b32_e32 v2, 16, v3 5453; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 5454; GFX11-NEXT: v_or_b32_e32 v0, v0, v2 5455; GFX11-NEXT: s_setpc_b64 s[30:31] 5456 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 48) 5457 ret i64 %result 5458} 5459 5460define amdgpu_ps <2 x float> @v_fshr_i64_ssv(i64 inreg %lhs, i64 inreg %rhs, i64 %amt) { 5461; GFX6-LABEL: v_fshr_i64_ssv: 5462; GFX6: ; %bb.0: 5463; GFX6-NEXT: v_not_b32_e32 v1, v0 5464; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 5465; GFX6-NEXT: v_and_b32_e32 v1, 63, v1 5466; GFX6-NEXT: v_and_b32_e32 v0, 63, v0 5467; GFX6-NEXT: v_lshl_b64 v[1:2], s[0:1], v1 5468; GFX6-NEXT: v_lshr_b64 v[3:4], s[2:3], v0 5469; GFX6-NEXT: v_or_b32_e32 v0, v1, v3 5470; GFX6-NEXT: v_or_b32_e32 v1, v2, v4 5471; GFX6-NEXT: ; return to shader part epilog 5472; 5473; GFX8-LABEL: v_fshr_i64_ssv: 5474; GFX8: ; %bb.0: 5475; GFX8-NEXT: v_not_b32_e32 v1, v0 5476; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 5477; GFX8-NEXT: v_and_b32_e32 v1, 63, v1 5478; GFX8-NEXT: v_and_b32_e32 v0, 63, v0 5479; GFX8-NEXT: v_lshlrev_b64 v[1:2], v1, s[0:1] 5480; GFX8-NEXT: v_lshrrev_b64 v[3:4], v0, s[2:3] 5481; GFX8-NEXT: v_or_b32_e32 v0, v1, v3 5482; GFX8-NEXT: v_or_b32_e32 v1, v2, v4 5483; GFX8-NEXT: ; return to shader part epilog 5484; 5485; GFX9-LABEL: v_fshr_i64_ssv: 5486; GFX9: ; %bb.0: 5487; GFX9-NEXT: v_not_b32_e32 v1, v0 5488; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 5489; GFX9-NEXT: v_and_b32_e32 v1, 63, v1 5490; GFX9-NEXT: v_and_b32_e32 v0, 63, v0 5491; GFX9-NEXT: v_lshlrev_b64 v[1:2], v1, s[0:1] 5492; GFX9-NEXT: v_lshrrev_b64 v[3:4], v0, s[2:3] 5493; GFX9-NEXT: v_or_b32_e32 v0, v1, v3 5494; GFX9-NEXT: v_or_b32_e32 v1, v2, v4 5495; GFX9-NEXT: ; return to shader part epilog 5496; 5497; GFX10-LABEL: v_fshr_i64_ssv: 5498; GFX10: ; %bb.0: 5499; GFX10-NEXT: v_not_b32_e32 v1, v0 5500; GFX10-NEXT: v_and_b32_e32 v0, 63, v0 5501; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 5502; GFX10-NEXT: v_and_b32_e32 v2, 63, v1 5503; GFX10-NEXT: v_lshrrev_b64 v[0:1], v0, s[2:3] 5504; GFX10-NEXT: v_lshlrev_b64 v[2:3], v2, s[0:1] 5505; GFX10-NEXT: v_or_b32_e32 v0, v2, v0 5506; GFX10-NEXT: v_or_b32_e32 v1, v3, v1 5507; GFX10-NEXT: ; return to shader part epilog 5508; 5509; GFX11-LABEL: v_fshr_i64_ssv: 5510; GFX11: ; %bb.0: 5511; GFX11-NEXT: v_not_b32_e32 v1, v0 5512; GFX11-NEXT: v_and_b32_e32 v0, 63, v0 5513; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 5514; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 5515; GFX11-NEXT: v_and_b32_e32 v2, 63, v1 5516; GFX11-NEXT: v_lshrrev_b64 v[0:1], v0, s[2:3] 5517; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1) 5518; GFX11-NEXT: v_lshlrev_b64 v[2:3], v2, s[0:1] 5519; GFX11-NEXT: v_or_b32_e32 v0, v2, v0 5520; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 5521; GFX11-NEXT: v_or_b32_e32 v1, v3, v1 5522; GFX11-NEXT: ; return to shader part epilog 5523 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt) 5524 %cast = bitcast i64 %result to <2 x float> 5525 ret <2 x float> %cast 5526} 5527 5528define amdgpu_ps <2 x float> @v_fshr_i64_svs(i64 inreg %lhs, i64 %rhs, i64 inreg %amt) { 5529; GFX6-LABEL: v_fshr_i64_svs: 5530; GFX6: ; %bb.0: 5531; GFX6-NEXT: s_not_b32 s3, s2 5532; GFX6-NEXT: s_and_b32 s2, s2, 63 5533; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 5534; GFX6-NEXT: v_lshr_b64 v[0:1], v[0:1], s2 5535; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], s3 5536; GFX6-NEXT: v_or_b32_e32 v0, s0, v0 5537; GFX6-NEXT: v_or_b32_e32 v1, s1, v1 5538; GFX6-NEXT: ; return to shader part epilog 5539; 5540; GFX8-LABEL: v_fshr_i64_svs: 5541; GFX8: ; %bb.0: 5542; GFX8-NEXT: s_not_b32 s3, s2 5543; GFX8-NEXT: s_and_b32 s2, s2, 63 5544; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 5545; GFX8-NEXT: v_lshrrev_b64 v[0:1], s2, v[0:1] 5546; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], s3 5547; GFX8-NEXT: v_or_b32_e32 v0, s0, v0 5548; GFX8-NEXT: v_or_b32_e32 v1, s1, v1 5549; GFX8-NEXT: ; return to shader part epilog 5550; 5551; GFX9-LABEL: v_fshr_i64_svs: 5552; GFX9: ; %bb.0: 5553; GFX9-NEXT: s_not_b32 s3, s2 5554; GFX9-NEXT: s_and_b32 s2, s2, 63 5555; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 5556; GFX9-NEXT: v_lshrrev_b64 v[0:1], s2, v[0:1] 5557; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], s3 5558; GFX9-NEXT: v_or_b32_e32 v0, s0, v0 5559; GFX9-NEXT: v_or_b32_e32 v1, s1, v1 5560; GFX9-NEXT: ; return to shader part epilog 5561; 5562; GFX10-LABEL: v_fshr_i64_svs: 5563; GFX10: ; %bb.0: 5564; GFX10-NEXT: s_and_b32 s3, s2, 63 5565; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 5566; GFX10-NEXT: v_lshrrev_b64 v[0:1], s3, v[0:1] 5567; GFX10-NEXT: s_not_b32 s2, s2 5568; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 5569; GFX10-NEXT: v_or_b32_e32 v0, s0, v0 5570; GFX10-NEXT: v_or_b32_e32 v1, s1, v1 5571; GFX10-NEXT: ; return to shader part epilog 5572; 5573; GFX11-LABEL: v_fshr_i64_svs: 5574; GFX11: ; %bb.0: 5575; GFX11-NEXT: s_and_b32 s3, s2, 63 5576; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 5577; GFX11-NEXT: v_lshrrev_b64 v[0:1], s3, v[0:1] 5578; GFX11-NEXT: s_not_b32 s2, s2 5579; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 5580; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 5581; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1) 5582; GFX11-NEXT: v_or_b32_e32 v0, s0, v0 5583; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 5584; GFX11-NEXT: v_or_b32_e32 v1, s1, v1 5585; GFX11-NEXT: ; return to shader part epilog 5586 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt) 5587 %cast = bitcast i64 %result to <2 x float> 5588 ret <2 x float> %cast 5589} 5590 5591define amdgpu_ps <2 x float> @v_fshr_i64_vss(i64 %lhs, i64 inreg %rhs, i64 inreg %amt) { 5592; GFX6-LABEL: v_fshr_i64_vss: 5593; GFX6: ; %bb.0: 5594; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 1 5595; GFX6-NEXT: s_andn2_b32 s3, 63, s2 5596; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], s3 5597; GFX6-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 5598; GFX6-NEXT: v_or_b32_e32 v0, s0, v0 5599; GFX6-NEXT: v_or_b32_e32 v1, s1, v1 5600; GFX6-NEXT: ; return to shader part epilog 5601; 5602; GFX8-LABEL: v_fshr_i64_vss: 5603; GFX8: ; %bb.0: 5604; GFX8-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 5605; GFX8-NEXT: s_andn2_b32 s3, 63, s2 5606; GFX8-NEXT: v_lshlrev_b64 v[0:1], s3, v[0:1] 5607; GFX8-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 5608; GFX8-NEXT: v_or_b32_e32 v0, s0, v0 5609; GFX8-NEXT: v_or_b32_e32 v1, s1, v1 5610; GFX8-NEXT: ; return to shader part epilog 5611; 5612; GFX9-LABEL: v_fshr_i64_vss: 5613; GFX9: ; %bb.0: 5614; GFX9-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 5615; GFX9-NEXT: s_andn2_b32 s3, 63, s2 5616; GFX9-NEXT: v_lshlrev_b64 v[0:1], s3, v[0:1] 5617; GFX9-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 5618; GFX9-NEXT: v_or_b32_e32 v0, s0, v0 5619; GFX9-NEXT: v_or_b32_e32 v1, s1, v1 5620; GFX9-NEXT: ; return to shader part epilog 5621; 5622; GFX10-LABEL: v_fshr_i64_vss: 5623; GFX10: ; %bb.0: 5624; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 5625; GFX10-NEXT: s_andn2_b32 s3, 63, s2 5626; GFX10-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 5627; GFX10-NEXT: v_lshlrev_b64 v[0:1], s3, v[0:1] 5628; GFX10-NEXT: v_or_b32_e32 v0, s0, v0 5629; GFX10-NEXT: v_or_b32_e32 v1, s1, v1 5630; GFX10-NEXT: ; return to shader part epilog 5631; 5632; GFX11-LABEL: v_fshr_i64_vss: 5633; GFX11: ; %bb.0: 5634; GFX11-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 5635; GFX11-NEXT: s_and_not1_b32 s3, 63, s2 5636; GFX11-NEXT: s_lshr_b64 s[0:1], s[0:1], s2 5637; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1) 5638; GFX11-NEXT: v_lshlrev_b64 v[0:1], s3, v[0:1] 5639; GFX11-NEXT: v_or_b32_e32 v0, s0, v0 5640; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) 5641; GFX11-NEXT: v_or_b32_e32 v1, s1, v1 5642; GFX11-NEXT: ; return to shader part epilog 5643 %result = call i64 @llvm.fshr.i64(i64 %lhs, i64 %rhs, i64 %amt) 5644 %cast = bitcast i64 %result to <2 x float> 5645 ret <2 x float> %cast 5646} 5647 5648define amdgpu_ps <2 x i64> @s_fshr_v2i64(<2 x i64> inreg %lhs, <2 x i64> inreg %rhs, <2 x i64> inreg %amt) { 5649; GFX6-LABEL: s_fshr_v2i64: 5650; GFX6: ; %bb.0: 5651; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 5652; GFX6-NEXT: s_not_b32 s9, s8 5653; GFX6-NEXT: s_lshl_b64 s[0:1], s[0:1], s9 5654; GFX6-NEXT: s_lshr_b64 s[4:5], s[4:5], s8 5655; GFX6-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5] 5656; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 5657; GFX6-NEXT: s_not_b32 s4, s10 5658; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], s4 5659; GFX6-NEXT: s_lshr_b64 s[4:5], s[6:7], s10 5660; GFX6-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] 5661; GFX6-NEXT: ; return to shader part epilog 5662; 5663; GFX8-LABEL: s_fshr_v2i64: 5664; GFX8: ; %bb.0: 5665; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 5666; GFX8-NEXT: s_not_b32 s9, s8 5667; GFX8-NEXT: s_lshl_b64 s[0:1], s[0:1], s9 5668; GFX8-NEXT: s_lshr_b64 s[4:5], s[4:5], s8 5669; GFX8-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5] 5670; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 5671; GFX8-NEXT: s_not_b32 s4, s10 5672; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], s4 5673; GFX8-NEXT: s_lshr_b64 s[4:5], s[6:7], s10 5674; GFX8-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] 5675; GFX8-NEXT: ; return to shader part epilog 5676; 5677; GFX9-LABEL: s_fshr_v2i64: 5678; GFX9: ; %bb.0: 5679; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 5680; GFX9-NEXT: s_not_b32 s9, s8 5681; GFX9-NEXT: s_lshl_b64 s[0:1], s[0:1], s9 5682; GFX9-NEXT: s_lshr_b64 s[4:5], s[4:5], s8 5683; GFX9-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5] 5684; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 5685; GFX9-NEXT: s_not_b32 s4, s10 5686; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], s4 5687; GFX9-NEXT: s_lshr_b64 s[4:5], s[6:7], s10 5688; GFX9-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] 5689; GFX9-NEXT: ; return to shader part epilog 5690; 5691; GFX10-LABEL: s_fshr_v2i64: 5692; GFX10: ; %bb.0: 5693; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 5694; GFX10-NEXT: s_not_b32 s9, s8 5695; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 5696; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s9 5697; GFX10-NEXT: s_not_b32 s9, s10 5698; GFX10-NEXT: s_lshr_b64 s[4:5], s[4:5], s8 5699; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], s9 5700; GFX10-NEXT: s_lshr_b64 s[6:7], s[6:7], s10 5701; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5] 5702; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[6:7] 5703; GFX10-NEXT: ; return to shader part epilog 5704; 5705; GFX11-LABEL: s_fshr_v2i64: 5706; GFX11: ; %bb.0: 5707; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 5708; GFX11-NEXT: s_not_b32 s9, s8 5709; GFX11-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 5710; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], s9 5711; GFX11-NEXT: s_not_b32 s9, s10 5712; GFX11-NEXT: s_lshr_b64 s[4:5], s[4:5], s8 5713; GFX11-NEXT: s_lshl_b64 s[2:3], s[2:3], s9 5714; GFX11-NEXT: s_lshr_b64 s[6:7], s[6:7], s10 5715; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5] 5716; GFX11-NEXT: s_or_b64 s[2:3], s[2:3], s[6:7] 5717; GFX11-NEXT: ; return to shader part epilog 5718 %result = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %amt) 5719 ret <2 x i64> %result 5720} 5721 5722define <2 x i64> @v_fshr_v2i64(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %amt) { 5723; GFX6-LABEL: v_fshr_v2i64: 5724; GFX6: ; %bb.0: 5725; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5726; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 1 5727; GFX6-NEXT: v_not_b32_e32 v9, v8 5728; GFX6-NEXT: v_and_b32_e32 v9, 63, v9 5729; GFX6-NEXT: v_and_b32_e32 v8, 63, v8 5730; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], v9 5731; GFX6-NEXT: v_lshr_b64 v[4:5], v[4:5], v8 5732; GFX6-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 5733; GFX6-NEXT: v_or_b32_e32 v0, v0, v4 5734; GFX6-NEXT: v_not_b32_e32 v4, v10 5735; GFX6-NEXT: v_and_b32_e32 v4, 63, v4 5736; GFX6-NEXT: v_lshl_b64 v[2:3], v[2:3], v4 5737; GFX6-NEXT: v_and_b32_e32 v4, 63, v10 5738; GFX6-NEXT: v_lshr_b64 v[6:7], v[6:7], v4 5739; GFX6-NEXT: v_or_b32_e32 v1, v1, v5 5740; GFX6-NEXT: v_or_b32_e32 v2, v2, v6 5741; GFX6-NEXT: v_or_b32_e32 v3, v3, v7 5742; GFX6-NEXT: s_setpc_b64 s[30:31] 5743; 5744; GFX8-LABEL: v_fshr_v2i64: 5745; GFX8: ; %bb.0: 5746; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5747; GFX8-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 5748; GFX8-NEXT: v_not_b32_e32 v9, v8 5749; GFX8-NEXT: v_and_b32_e32 v9, 63, v9 5750; GFX8-NEXT: v_and_b32_e32 v8, 63, v8 5751; GFX8-NEXT: v_lshlrev_b64 v[0:1], v9, v[0:1] 5752; GFX8-NEXT: v_lshrrev_b64 v[4:5], v8, v[4:5] 5753; GFX8-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 5754; GFX8-NEXT: v_or_b32_e32 v0, v0, v4 5755; GFX8-NEXT: v_not_b32_e32 v4, v10 5756; GFX8-NEXT: v_and_b32_e32 v4, 63, v4 5757; GFX8-NEXT: v_lshlrev_b64 v[2:3], v4, v[2:3] 5758; GFX8-NEXT: v_and_b32_e32 v4, 63, v10 5759; GFX8-NEXT: v_lshrrev_b64 v[6:7], v4, v[6:7] 5760; GFX8-NEXT: v_or_b32_e32 v1, v1, v5 5761; GFX8-NEXT: v_or_b32_e32 v2, v2, v6 5762; GFX8-NEXT: v_or_b32_e32 v3, v3, v7 5763; GFX8-NEXT: s_setpc_b64 s[30:31] 5764; 5765; GFX9-LABEL: v_fshr_v2i64: 5766; GFX9: ; %bb.0: 5767; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5768; GFX9-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 5769; GFX9-NEXT: v_not_b32_e32 v9, v8 5770; GFX9-NEXT: v_and_b32_e32 v9, 63, v9 5771; GFX9-NEXT: v_and_b32_e32 v8, 63, v8 5772; GFX9-NEXT: v_lshlrev_b64 v[0:1], v9, v[0:1] 5773; GFX9-NEXT: v_lshrrev_b64 v[4:5], v8, v[4:5] 5774; GFX9-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 5775; GFX9-NEXT: v_or_b32_e32 v0, v0, v4 5776; GFX9-NEXT: v_not_b32_e32 v4, v10 5777; GFX9-NEXT: v_and_b32_e32 v4, 63, v4 5778; GFX9-NEXT: v_lshlrev_b64 v[2:3], v4, v[2:3] 5779; GFX9-NEXT: v_and_b32_e32 v4, 63, v10 5780; GFX9-NEXT: v_lshrrev_b64 v[6:7], v4, v[6:7] 5781; GFX9-NEXT: v_or_b32_e32 v1, v1, v5 5782; GFX9-NEXT: v_or_b32_e32 v2, v2, v6 5783; GFX9-NEXT: v_or_b32_e32 v3, v3, v7 5784; GFX9-NEXT: s_setpc_b64 s[30:31] 5785; 5786; GFX10-LABEL: v_fshr_v2i64: 5787; GFX10: ; %bb.0: 5788; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5789; GFX10-NEXT: v_not_b32_e32 v9, v8 5790; GFX10-NEXT: v_not_b32_e32 v11, v10 5791; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 5792; GFX10-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 5793; GFX10-NEXT: v_and_b32_e32 v8, 63, v8 5794; GFX10-NEXT: v_and_b32_e32 v9, 63, v9 5795; GFX10-NEXT: v_and_b32_e32 v11, 63, v11 5796; GFX10-NEXT: v_and_b32_e32 v10, 63, v10 5797; GFX10-NEXT: v_lshrrev_b64 v[4:5], v8, v[4:5] 5798; GFX10-NEXT: v_lshlrev_b64 v[0:1], v9, v[0:1] 5799; GFX10-NEXT: v_lshlrev_b64 v[2:3], v11, v[2:3] 5800; GFX10-NEXT: v_lshrrev_b64 v[6:7], v10, v[6:7] 5801; GFX10-NEXT: v_or_b32_e32 v0, v0, v4 5802; GFX10-NEXT: v_or_b32_e32 v1, v1, v5 5803; GFX10-NEXT: v_or_b32_e32 v2, v2, v6 5804; GFX10-NEXT: v_or_b32_e32 v3, v3, v7 5805; GFX10-NEXT: s_setpc_b64 s[30:31] 5806; 5807; GFX11-LABEL: v_fshr_v2i64: 5808; GFX11: ; %bb.0: 5809; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5810; GFX11-NEXT: v_not_b32_e32 v9, v8 5811; GFX11-NEXT: v_not_b32_e32 v11, v10 5812; GFX11-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 5813; GFX11-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 5814; GFX11-NEXT: v_and_b32_e32 v8, 63, v8 5815; GFX11-NEXT: v_and_b32_e32 v9, 63, v9 5816; GFX11-NEXT: v_and_b32_e32 v11, 63, v11 5817; GFX11-NEXT: v_and_b32_e32 v10, 63, v10 5818; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 5819; GFX11-NEXT: v_lshrrev_b64 v[4:5], v8, v[4:5] 5820; GFX11-NEXT: v_lshlrev_b64 v[0:1], v9, v[0:1] 5821; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 5822; GFX11-NEXT: v_lshlrev_b64 v[2:3], v11, v[2:3] 5823; GFX11-NEXT: v_lshrrev_b64 v[6:7], v10, v[6:7] 5824; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4) 5825; GFX11-NEXT: v_or_b32_e32 v0, v0, v4 5826; GFX11-NEXT: v_or_b32_e32 v1, v1, v5 5827; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4) 5828; GFX11-NEXT: v_or_b32_e32 v2, v2, v6 5829; GFX11-NEXT: v_or_b32_e32 v3, v3, v7 5830; GFX11-NEXT: s_setpc_b64 s[30:31] 5831 %result = call <2 x i64> @llvm.fshr.v2i64(<2 x i64> %lhs, <2 x i64> %rhs, <2 x i64> %amt) 5832 ret <2 x i64> %result 5833} 5834 5835define amdgpu_ps i128 @s_fshr_i128(i128 inreg %lhs, i128 inreg %rhs, i128 inreg %amt) { 5836; GFX6-LABEL: s_fshr_i128: 5837; GFX6: ; %bb.0: 5838; GFX6-NEXT: s_lshl_b64 s[10:11], s[0:1], 1 5839; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 5840; GFX6-NEXT: s_lshr_b32 s0, s1, 31 5841; GFX6-NEXT: s_mov_b32 s1, 0 5842; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] 5843; GFX6-NEXT: s_andn2_b32 s2, 0x7f, s8 5844; GFX6-NEXT: s_not_b32 s9, s8 5845; GFX6-NEXT: s_sub_i32 s16, s2, 64 5846; GFX6-NEXT: s_sub_i32 s12, 64, s2 5847; GFX6-NEXT: s_cmp_lt_u32 s2, 64 5848; GFX6-NEXT: s_cselect_b32 s17, 1, 0 5849; GFX6-NEXT: s_cmp_eq_u32 s2, 0 5850; GFX6-NEXT: s_cselect_b32 s18, 1, 0 5851; GFX6-NEXT: s_lshr_b64 s[12:13], s[10:11], s12 5852; GFX6-NEXT: s_lshl_b64 s[14:15], s[0:1], s9 5853; GFX6-NEXT: s_lshl_b64 s[2:3], s[10:11], s9 5854; GFX6-NEXT: s_or_b64 s[12:13], s[12:13], s[14:15] 5855; GFX6-NEXT: s_lshl_b64 s[10:11], s[10:11], s16 5856; GFX6-NEXT: s_cmp_lg_u32 s17, 0 5857; GFX6-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 5858; GFX6-NEXT: s_cselect_b64 s[10:11], s[12:13], s[10:11] 5859; GFX6-NEXT: s_cmp_lg_u32 s18, 0 5860; GFX6-NEXT: s_cselect_b64 s[10:11], s[0:1], s[10:11] 5861; GFX6-NEXT: s_and_b32 s0, s8, 0x7f 5862; GFX6-NEXT: s_sub_i32 s14, s0, 64 5863; GFX6-NEXT: s_sub_i32 s12, 64, s0 5864; GFX6-NEXT: s_cmp_lt_u32 s0, 64 5865; GFX6-NEXT: s_cselect_b32 s15, 1, 0 5866; GFX6-NEXT: s_cmp_eq_u32 s0, 0 5867; GFX6-NEXT: s_cselect_b32 s16, 1, 0 5868; GFX6-NEXT: s_lshr_b64 s[0:1], s[6:7], s8 5869; GFX6-NEXT: s_lshr_b64 s[8:9], s[4:5], s8 5870; GFX6-NEXT: s_lshl_b64 s[12:13], s[6:7], s12 5871; GFX6-NEXT: s_or_b64 s[8:9], s[8:9], s[12:13] 5872; GFX6-NEXT: s_lshr_b64 s[6:7], s[6:7], s14 5873; GFX6-NEXT: s_cmp_lg_u32 s15, 0 5874; GFX6-NEXT: s_cselect_b64 s[6:7], s[8:9], s[6:7] 5875; GFX6-NEXT: s_cmp_lg_u32 s16, 0 5876; GFX6-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] 5877; GFX6-NEXT: s_cmp_lg_u32 s15, 0 5878; GFX6-NEXT: s_cselect_b64 s[6:7], s[0:1], 0 5879; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[4:5] 5880; GFX6-NEXT: s_or_b64 s[2:3], s[10:11], s[6:7] 5881; GFX6-NEXT: ; return to shader part epilog 5882; 5883; GFX8-LABEL: s_fshr_i128: 5884; GFX8: ; %bb.0: 5885; GFX8-NEXT: s_lshl_b64 s[10:11], s[0:1], 1 5886; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 5887; GFX8-NEXT: s_lshr_b32 s0, s1, 31 5888; GFX8-NEXT: s_mov_b32 s1, 0 5889; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] 5890; GFX8-NEXT: s_andn2_b32 s2, 0x7f, s8 5891; GFX8-NEXT: s_not_b32 s9, s8 5892; GFX8-NEXT: s_sub_i32 s16, s2, 64 5893; GFX8-NEXT: s_sub_i32 s12, 64, s2 5894; GFX8-NEXT: s_cmp_lt_u32 s2, 64 5895; GFX8-NEXT: s_cselect_b32 s17, 1, 0 5896; GFX8-NEXT: s_cmp_eq_u32 s2, 0 5897; GFX8-NEXT: s_cselect_b32 s18, 1, 0 5898; GFX8-NEXT: s_lshr_b64 s[12:13], s[10:11], s12 5899; GFX8-NEXT: s_lshl_b64 s[14:15], s[0:1], s9 5900; GFX8-NEXT: s_lshl_b64 s[2:3], s[10:11], s9 5901; GFX8-NEXT: s_or_b64 s[12:13], s[12:13], s[14:15] 5902; GFX8-NEXT: s_lshl_b64 s[10:11], s[10:11], s16 5903; GFX8-NEXT: s_cmp_lg_u32 s17, 0 5904; GFX8-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 5905; GFX8-NEXT: s_cselect_b64 s[10:11], s[12:13], s[10:11] 5906; GFX8-NEXT: s_cmp_lg_u32 s18, 0 5907; GFX8-NEXT: s_cselect_b64 s[10:11], s[0:1], s[10:11] 5908; GFX8-NEXT: s_and_b32 s0, s8, 0x7f 5909; GFX8-NEXT: s_sub_i32 s14, s0, 64 5910; GFX8-NEXT: s_sub_i32 s12, 64, s0 5911; GFX8-NEXT: s_cmp_lt_u32 s0, 64 5912; GFX8-NEXT: s_cselect_b32 s15, 1, 0 5913; GFX8-NEXT: s_cmp_eq_u32 s0, 0 5914; GFX8-NEXT: s_cselect_b32 s16, 1, 0 5915; GFX8-NEXT: s_lshr_b64 s[0:1], s[6:7], s8 5916; GFX8-NEXT: s_lshr_b64 s[8:9], s[4:5], s8 5917; GFX8-NEXT: s_lshl_b64 s[12:13], s[6:7], s12 5918; GFX8-NEXT: s_or_b64 s[8:9], s[8:9], s[12:13] 5919; GFX8-NEXT: s_lshr_b64 s[6:7], s[6:7], s14 5920; GFX8-NEXT: s_cmp_lg_u32 s15, 0 5921; GFX8-NEXT: s_cselect_b64 s[6:7], s[8:9], s[6:7] 5922; GFX8-NEXT: s_cmp_lg_u32 s16, 0 5923; GFX8-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] 5924; GFX8-NEXT: s_cmp_lg_u32 s15, 0 5925; GFX8-NEXT: s_cselect_b64 s[6:7], s[0:1], 0 5926; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[4:5] 5927; GFX8-NEXT: s_or_b64 s[2:3], s[10:11], s[6:7] 5928; GFX8-NEXT: ; return to shader part epilog 5929; 5930; GFX9-LABEL: s_fshr_i128: 5931; GFX9: ; %bb.0: 5932; GFX9-NEXT: s_lshl_b64 s[10:11], s[0:1], 1 5933; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 5934; GFX9-NEXT: s_lshr_b32 s0, s1, 31 5935; GFX9-NEXT: s_mov_b32 s1, 0 5936; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] 5937; GFX9-NEXT: s_andn2_b32 s2, 0x7f, s8 5938; GFX9-NEXT: s_not_b32 s9, s8 5939; GFX9-NEXT: s_sub_i32 s16, s2, 64 5940; GFX9-NEXT: s_sub_i32 s12, 64, s2 5941; GFX9-NEXT: s_cmp_lt_u32 s2, 64 5942; GFX9-NEXT: s_cselect_b32 s17, 1, 0 5943; GFX9-NEXT: s_cmp_eq_u32 s2, 0 5944; GFX9-NEXT: s_cselect_b32 s18, 1, 0 5945; GFX9-NEXT: s_lshr_b64 s[12:13], s[10:11], s12 5946; GFX9-NEXT: s_lshl_b64 s[14:15], s[0:1], s9 5947; GFX9-NEXT: s_lshl_b64 s[2:3], s[10:11], s9 5948; GFX9-NEXT: s_or_b64 s[12:13], s[12:13], s[14:15] 5949; GFX9-NEXT: s_lshl_b64 s[10:11], s[10:11], s16 5950; GFX9-NEXT: s_cmp_lg_u32 s17, 0 5951; GFX9-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 5952; GFX9-NEXT: s_cselect_b64 s[10:11], s[12:13], s[10:11] 5953; GFX9-NEXT: s_cmp_lg_u32 s18, 0 5954; GFX9-NEXT: s_cselect_b64 s[10:11], s[0:1], s[10:11] 5955; GFX9-NEXT: s_and_b32 s0, s8, 0x7f 5956; GFX9-NEXT: s_sub_i32 s14, s0, 64 5957; GFX9-NEXT: s_sub_i32 s12, 64, s0 5958; GFX9-NEXT: s_cmp_lt_u32 s0, 64 5959; GFX9-NEXT: s_cselect_b32 s15, 1, 0 5960; GFX9-NEXT: s_cmp_eq_u32 s0, 0 5961; GFX9-NEXT: s_cselect_b32 s16, 1, 0 5962; GFX9-NEXT: s_lshr_b64 s[0:1], s[6:7], s8 5963; GFX9-NEXT: s_lshr_b64 s[8:9], s[4:5], s8 5964; GFX9-NEXT: s_lshl_b64 s[12:13], s[6:7], s12 5965; GFX9-NEXT: s_or_b64 s[8:9], s[8:9], s[12:13] 5966; GFX9-NEXT: s_lshr_b64 s[6:7], s[6:7], s14 5967; GFX9-NEXT: s_cmp_lg_u32 s15, 0 5968; GFX9-NEXT: s_cselect_b64 s[6:7], s[8:9], s[6:7] 5969; GFX9-NEXT: s_cmp_lg_u32 s16, 0 5970; GFX9-NEXT: s_cselect_b64 s[4:5], s[4:5], s[6:7] 5971; GFX9-NEXT: s_cmp_lg_u32 s15, 0 5972; GFX9-NEXT: s_cselect_b64 s[6:7], s[0:1], 0 5973; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[4:5] 5974; GFX9-NEXT: s_or_b64 s[2:3], s[10:11], s[6:7] 5975; GFX9-NEXT: ; return to shader part epilog 5976; 5977; GFX10-LABEL: s_fshr_i128: 5978; GFX10: ; %bb.0: 5979; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 5980; GFX10-NEXT: s_lshr_b32 s10, s1, 31 5981; GFX10-NEXT: s_mov_b32 s11, 0 5982; GFX10-NEXT: s_andn2_b32 s9, 0x7f, s8 5983; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 5984; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[10:11] 5985; GFX10-NEXT: s_not_b32 s14, s8 5986; GFX10-NEXT: s_sub_i32 s16, s9, 64 5987; GFX10-NEXT: s_sub_i32 s10, 64, s9 5988; GFX10-NEXT: s_cmp_lt_u32 s9, 64 5989; GFX10-NEXT: s_cselect_b32 s17, 1, 0 5990; GFX10-NEXT: s_cmp_eq_u32 s9, 0 5991; GFX10-NEXT: s_cselect_b32 s9, 1, 0 5992; GFX10-NEXT: s_lshr_b64 s[10:11], s[0:1], s10 5993; GFX10-NEXT: s_lshl_b64 s[12:13], s[2:3], s14 5994; GFX10-NEXT: s_lshl_b64 s[14:15], s[0:1], s14 5995; GFX10-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13] 5996; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s16 5997; GFX10-NEXT: s_cmp_lg_u32 s17, 0 5998; GFX10-NEXT: s_cselect_b64 s[12:13], s[14:15], 0 5999; GFX10-NEXT: s_cselect_b64 s[0:1], s[10:11], s[0:1] 6000; GFX10-NEXT: s_cmp_lg_u32 s9, 0 6001; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] 6002; GFX10-NEXT: s_and_b32 s0, s8, 0x7f 6003; GFX10-NEXT: s_sub_i32 s14, s0, 64 6004; GFX10-NEXT: s_sub_i32 s9, 64, s0 6005; GFX10-NEXT: s_cmp_lt_u32 s0, 64 6006; GFX10-NEXT: s_cselect_b32 s15, 1, 0 6007; GFX10-NEXT: s_cmp_eq_u32 s0, 0 6008; GFX10-NEXT: s_cselect_b32 s16, 1, 0 6009; GFX10-NEXT: s_lshr_b64 s[0:1], s[4:5], s8 6010; GFX10-NEXT: s_lshl_b64 s[10:11], s[6:7], s9 6011; GFX10-NEXT: s_lshr_b64 s[8:9], s[6:7], s8 6012; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[10:11] 6013; GFX10-NEXT: s_lshr_b64 s[6:7], s[6:7], s14 6014; GFX10-NEXT: s_cmp_lg_u32 s15, 0 6015; GFX10-NEXT: s_cselect_b64 s[0:1], s[0:1], s[6:7] 6016; GFX10-NEXT: s_cmp_lg_u32 s16, 0 6017; GFX10-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1] 6018; GFX10-NEXT: s_cmp_lg_u32 s15, 0 6019; GFX10-NEXT: s_cselect_b64 s[4:5], s[8:9], 0 6020; GFX10-NEXT: s_or_b64 s[0:1], s[12:13], s[0:1] 6021; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] 6022; GFX10-NEXT: ; return to shader part epilog 6023; 6024; GFX11-LABEL: s_fshr_i128: 6025; GFX11: ; %bb.0: 6026; GFX11-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 6027; GFX11-NEXT: s_lshr_b32 s10, s1, 31 6028; GFX11-NEXT: s_mov_b32 s11, 0 6029; GFX11-NEXT: s_and_not1_b32 s9, 0x7f, s8 6030; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 6031; GFX11-NEXT: s_or_b64 s[2:3], s[2:3], s[10:11] 6032; GFX11-NEXT: s_not_b32 s14, s8 6033; GFX11-NEXT: s_sub_i32 s16, s9, 64 6034; GFX11-NEXT: s_sub_i32 s10, 64, s9 6035; GFX11-NEXT: s_cmp_lt_u32 s9, 64 6036; GFX11-NEXT: s_cselect_b32 s17, 1, 0 6037; GFX11-NEXT: s_cmp_eq_u32 s9, 0 6038; GFX11-NEXT: s_cselect_b32 s9, 1, 0 6039; GFX11-NEXT: s_lshr_b64 s[10:11], s[0:1], s10 6040; GFX11-NEXT: s_lshl_b64 s[12:13], s[2:3], s14 6041; GFX11-NEXT: s_lshl_b64 s[14:15], s[0:1], s14 6042; GFX11-NEXT: s_or_b64 s[10:11], s[10:11], s[12:13] 6043; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], s16 6044; GFX11-NEXT: s_cmp_lg_u32 s17, 0 6045; GFX11-NEXT: s_cselect_b64 s[12:13], s[14:15], 0 6046; GFX11-NEXT: s_cselect_b64 s[0:1], s[10:11], s[0:1] 6047; GFX11-NEXT: s_cmp_lg_u32 s9, 0 6048; GFX11-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] 6049; GFX11-NEXT: s_and_b32 s0, s8, 0x7f 6050; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 6051; GFX11-NEXT: s_sub_i32 s14, s0, 64 6052; GFX11-NEXT: s_sub_i32 s9, 64, s0 6053; GFX11-NEXT: s_cmp_lt_u32 s0, 64 6054; GFX11-NEXT: s_cselect_b32 s15, 1, 0 6055; GFX11-NEXT: s_cmp_eq_u32 s0, 0 6056; GFX11-NEXT: s_cselect_b32 s16, 1, 0 6057; GFX11-NEXT: s_lshr_b64 s[0:1], s[4:5], s8 6058; GFX11-NEXT: s_lshl_b64 s[10:11], s[6:7], s9 6059; GFX11-NEXT: s_lshr_b64 s[8:9], s[6:7], s8 6060; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[10:11] 6061; GFX11-NEXT: s_lshr_b64 s[6:7], s[6:7], s14 6062; GFX11-NEXT: s_cmp_lg_u32 s15, 0 6063; GFX11-NEXT: s_cselect_b64 s[0:1], s[0:1], s[6:7] 6064; GFX11-NEXT: s_cmp_lg_u32 s16, 0 6065; GFX11-NEXT: s_cselect_b64 s[0:1], s[4:5], s[0:1] 6066; GFX11-NEXT: s_cmp_lg_u32 s15, 0 6067; GFX11-NEXT: s_cselect_b64 s[4:5], s[8:9], 0 6068; GFX11-NEXT: s_or_b64 s[0:1], s[12:13], s[0:1] 6069; GFX11-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] 6070; GFX11-NEXT: ; return to shader part epilog 6071 %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt) 6072 ret i128 %result 6073} 6074 6075define i128 @v_fshr_i128(i128 %lhs, i128 %rhs, i128 %amt) { 6076; GFX6-LABEL: v_fshr_i128: 6077; GFX6: ; %bb.0: 6078; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6079; GFX6-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 6080; GFX6-NEXT: v_lshl_b64 v[9:10], v[0:1], 1 6081; GFX6-NEXT: v_lshrrev_b32_e32 v0, 31, v1 6082; GFX6-NEXT: v_or_b32_e32 v2, v2, v0 6083; GFX6-NEXT: v_not_b32_e32 v0, v8 6084; GFX6-NEXT: v_and_b32_e32 v15, 0x7f, v0 6085; GFX6-NEXT: v_sub_i32_e32 v0, vcc, 64, v15 6086; GFX6-NEXT: v_not_b32_e32 v16, 63 6087; GFX6-NEXT: v_lshr_b64 v[0:1], v[9:10], v0 6088; GFX6-NEXT: v_lshl_b64 v[11:12], v[2:3], v15 6089; GFX6-NEXT: v_add_i32_e32 v17, vcc, v15, v16 6090; GFX6-NEXT: v_lshl_b64 v[13:14], v[9:10], v15 6091; GFX6-NEXT: v_or_b32_e32 v11, v0, v11 6092; GFX6-NEXT: v_or_b32_e32 v12, v1, v12 6093; GFX6-NEXT: v_lshl_b64 v[0:1], v[9:10], v17 6094; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v15 6095; GFX6-NEXT: v_cndmask_b32_e32 v10, 0, v13, vcc 6096; GFX6-NEXT: v_cndmask_b32_e32 v13, 0, v14, vcc 6097; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc 6098; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v12, vcc 6099; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v15 6100; GFX6-NEXT: v_and_b32_e32 v14, 0x7f, v8 6101; GFX6-NEXT: v_cndmask_b32_e32 v11, v0, v2, vcc 6102; GFX6-NEXT: v_cndmask_b32_e32 v12, v1, v3, vcc 6103; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 64, v14 6104; GFX6-NEXT: v_lshr_b64 v[0:1], v[4:5], v14 6105; GFX6-NEXT: v_lshl_b64 v[2:3], v[6:7], v2 6106; GFX6-NEXT: v_add_i32_e32 v15, vcc, v14, v16 6107; GFX6-NEXT: v_or_b32_e32 v2, v0, v2 6108; GFX6-NEXT: v_or_b32_e32 v3, v1, v3 6109; GFX6-NEXT: v_lshr_b64 v[0:1], v[6:7], v15 6110; GFX6-NEXT: v_lshr_b64 v[8:9], v[6:7], v14 6111; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v14 6112; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 6113; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 6114; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v14 6115; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[4:5] 6116; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[4:5] 6117; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc 6118; GFX6-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc 6119; GFX6-NEXT: v_or_b32_e32 v0, v10, v0 6120; GFX6-NEXT: v_or_b32_e32 v1, v13, v1 6121; GFX6-NEXT: v_or_b32_e32 v2, v11, v2 6122; GFX6-NEXT: v_or_b32_e32 v3, v12, v3 6123; GFX6-NEXT: s_setpc_b64 s[30:31] 6124; 6125; GFX8-LABEL: v_fshr_i128: 6126; GFX8: ; %bb.0: 6127; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6128; GFX8-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 6129; GFX8-NEXT: v_lshlrev_b64 v[9:10], 1, v[0:1] 6130; GFX8-NEXT: v_lshrrev_b32_e32 v0, 31, v1 6131; GFX8-NEXT: v_or_b32_e32 v2, v2, v0 6132; GFX8-NEXT: v_not_b32_e32 v0, v8 6133; GFX8-NEXT: v_and_b32_e32 v15, 0x7f, v0 6134; GFX8-NEXT: v_sub_u32_e32 v0, vcc, 64, v15 6135; GFX8-NEXT: v_not_b32_e32 v16, 63 6136; GFX8-NEXT: v_lshrrev_b64 v[0:1], v0, v[9:10] 6137; GFX8-NEXT: v_lshlrev_b64 v[11:12], v15, v[2:3] 6138; GFX8-NEXT: v_add_u32_e32 v17, vcc, v15, v16 6139; GFX8-NEXT: v_lshlrev_b64 v[13:14], v15, v[9:10] 6140; GFX8-NEXT: v_or_b32_e32 v11, v0, v11 6141; GFX8-NEXT: v_or_b32_e32 v12, v1, v12 6142; GFX8-NEXT: v_lshlrev_b64 v[0:1], v17, v[9:10] 6143; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v15 6144; GFX8-NEXT: v_cndmask_b32_e32 v10, 0, v13, vcc 6145; GFX8-NEXT: v_cndmask_b32_e32 v13, 0, v14, vcc 6146; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc 6147; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v12, vcc 6148; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v15 6149; GFX8-NEXT: v_and_b32_e32 v14, 0x7f, v8 6150; GFX8-NEXT: v_cndmask_b32_e32 v11, v0, v2, vcc 6151; GFX8-NEXT: v_cndmask_b32_e32 v12, v1, v3, vcc 6152; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 64, v14 6153; GFX8-NEXT: v_lshrrev_b64 v[0:1], v14, v[4:5] 6154; GFX8-NEXT: v_lshlrev_b64 v[2:3], v2, v[6:7] 6155; GFX8-NEXT: v_add_u32_e32 v15, vcc, v14, v16 6156; GFX8-NEXT: v_or_b32_e32 v2, v0, v2 6157; GFX8-NEXT: v_or_b32_e32 v3, v1, v3 6158; GFX8-NEXT: v_lshrrev_b64 v[0:1], v15, v[6:7] 6159; GFX8-NEXT: v_lshrrev_b64 v[8:9], v14, v[6:7] 6160; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v14 6161; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 6162; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 6163; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v14 6164; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[4:5] 6165; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[4:5] 6166; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc 6167; GFX8-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc 6168; GFX8-NEXT: v_or_b32_e32 v0, v10, v0 6169; GFX8-NEXT: v_or_b32_e32 v1, v13, v1 6170; GFX8-NEXT: v_or_b32_e32 v2, v11, v2 6171; GFX8-NEXT: v_or_b32_e32 v3, v12, v3 6172; GFX8-NEXT: s_setpc_b64 s[30:31] 6173; 6174; GFX9-LABEL: v_fshr_i128: 6175; GFX9: ; %bb.0: 6176; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6177; GFX9-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 6178; GFX9-NEXT: v_lshlrev_b64 v[9:10], 1, v[0:1] 6179; GFX9-NEXT: v_lshrrev_b32_e32 v0, 31, v1 6180; GFX9-NEXT: v_or_b32_e32 v2, v2, v0 6181; GFX9-NEXT: v_not_b32_e32 v0, v8 6182; GFX9-NEXT: v_and_b32_e32 v15, 0x7f, v0 6183; GFX9-NEXT: v_sub_u32_e32 v0, 64, v15 6184; GFX9-NEXT: v_lshrrev_b64 v[0:1], v0, v[9:10] 6185; GFX9-NEXT: v_lshlrev_b64 v[11:12], v15, v[2:3] 6186; GFX9-NEXT: v_add_u32_e32 v16, 0xffffffc0, v15 6187; GFX9-NEXT: v_lshlrev_b64 v[13:14], v15, v[9:10] 6188; GFX9-NEXT: v_or_b32_e32 v11, v0, v11 6189; GFX9-NEXT: v_or_b32_e32 v12, v1, v12 6190; GFX9-NEXT: v_lshlrev_b64 v[0:1], v16, v[9:10] 6191; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v15 6192; GFX9-NEXT: v_cndmask_b32_e32 v10, 0, v13, vcc 6193; GFX9-NEXT: v_cndmask_b32_e32 v13, 0, v14, vcc 6194; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v11, vcc 6195; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v12, vcc 6196; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v15 6197; GFX9-NEXT: v_and_b32_e32 v14, 0x7f, v8 6198; GFX9-NEXT: v_cndmask_b32_e32 v11, v0, v2, vcc 6199; GFX9-NEXT: v_sub_u32_e32 v2, 64, v14 6200; GFX9-NEXT: v_cndmask_b32_e32 v12, v1, v3, vcc 6201; GFX9-NEXT: v_lshrrev_b64 v[0:1], v14, v[4:5] 6202; GFX9-NEXT: v_lshlrev_b64 v[2:3], v2, v[6:7] 6203; GFX9-NEXT: v_add_u32_e32 v15, 0xffffffc0, v14 6204; GFX9-NEXT: v_or_b32_e32 v2, v0, v2 6205; GFX9-NEXT: v_or_b32_e32 v3, v1, v3 6206; GFX9-NEXT: v_lshrrev_b64 v[0:1], v15, v[6:7] 6207; GFX9-NEXT: v_lshrrev_b64 v[8:9], v14, v[6:7] 6208; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v14 6209; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 6210; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 6211; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v14 6212; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v4, s[4:5] 6213; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v5, s[4:5] 6214; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc 6215; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc 6216; GFX9-NEXT: v_or_b32_e32 v0, v10, v0 6217; GFX9-NEXT: v_or_b32_e32 v1, v13, v1 6218; GFX9-NEXT: v_or_b32_e32 v2, v11, v2 6219; GFX9-NEXT: v_or_b32_e32 v3, v12, v3 6220; GFX9-NEXT: s_setpc_b64 s[30:31] 6221; 6222; GFX10-LABEL: v_fshr_i128: 6223; GFX10: ; %bb.0: 6224; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6225; GFX10-NEXT: v_not_b32_e32 v9, v8 6226; GFX10-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 6227; GFX10-NEXT: v_lshrrev_b32_e32 v10, 31, v1 6228; GFX10-NEXT: v_and_b32_e32 v19, 0x7f, v8 6229; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 6230; GFX10-NEXT: v_and_b32_e32 v18, 0x7f, v9 6231; GFX10-NEXT: v_or_b32_e32 v2, v2, v10 6232; GFX10-NEXT: v_sub_nc_u32_e32 v16, 64, v19 6233; GFX10-NEXT: v_add_nc_u32_e32 v21, 0xffffffc0, v19 6234; GFX10-NEXT: v_sub_nc_u32_e32 v10, 64, v18 6235; GFX10-NEXT: v_add_nc_u32_e32 v20, 0xffffffc0, v18 6236; GFX10-NEXT: v_lshlrev_b64 v[8:9], v18, v[2:3] 6237; GFX10-NEXT: v_lshrrev_b64 v[12:13], v19, v[4:5] 6238; GFX10-NEXT: v_lshlrev_b64 v[16:17], v16, v[6:7] 6239; GFX10-NEXT: v_lshrrev_b64 v[10:11], v10, v[0:1] 6240; GFX10-NEXT: v_lshlrev_b64 v[14:15], v18, v[0:1] 6241; GFX10-NEXT: v_lshlrev_b64 v[0:1], v20, v[0:1] 6242; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v18 6243; GFX10-NEXT: v_cmp_gt_u32_e64 s4, 64, v19 6244; GFX10-NEXT: v_or_b32_e32 v12, v12, v16 6245; GFX10-NEXT: v_or_b32_e32 v10, v10, v8 6246; GFX10-NEXT: v_or_b32_e32 v11, v11, v9 6247; GFX10-NEXT: v_lshrrev_b64 v[8:9], v21, v[6:7] 6248; GFX10-NEXT: v_or_b32_e32 v13, v13, v17 6249; GFX10-NEXT: v_cmp_eq_u32_e64 s5, 0, v19 6250; GFX10-NEXT: v_cndmask_b32_e32 v10, v0, v10, vcc_lo 6251; GFX10-NEXT: v_cndmask_b32_e32 v11, v1, v11, vcc_lo 6252; GFX10-NEXT: v_lshrrev_b64 v[0:1], v19, v[6:7] 6253; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, v12, s4 6254; GFX10-NEXT: v_cmp_eq_u32_e64 s6, 0, v18 6255; GFX10-NEXT: v_cndmask_b32_e64 v6, v9, v13, s4 6256; GFX10-NEXT: v_cndmask_b32_e32 v14, 0, v14, vcc_lo 6257; GFX10-NEXT: v_cndmask_b32_e32 v7, 0, v15, vcc_lo 6258; GFX10-NEXT: v_cndmask_b32_e64 v4, v8, v4, s5 6259; GFX10-NEXT: v_cndmask_b32_e64 v2, v10, v2, s6 6260; GFX10-NEXT: v_cndmask_b32_e64 v3, v11, v3, s6 6261; GFX10-NEXT: v_cndmask_b32_e64 v5, v6, v5, s5 6262; GFX10-NEXT: v_cndmask_b32_e64 v6, 0, v0, s4 6263; GFX10-NEXT: v_cndmask_b32_e64 v8, 0, v1, s4 6264; GFX10-NEXT: v_or_b32_e32 v0, v14, v4 6265; GFX10-NEXT: v_or_b32_e32 v1, v7, v5 6266; GFX10-NEXT: v_or_b32_e32 v2, v2, v6 6267; GFX10-NEXT: v_or_b32_e32 v3, v3, v8 6268; GFX10-NEXT: s_setpc_b64 s[30:31] 6269; 6270; GFX11-LABEL: v_fshr_i128: 6271; GFX11: ; %bb.0: 6272; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6273; GFX11-NEXT: v_not_b32_e32 v9, v8 6274; GFX11-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 6275; GFX11-NEXT: v_lshrrev_b32_e32 v10, 31, v1 6276; GFX11-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 6277; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) 6278; GFX11-NEXT: v_and_b32_e32 v18, 0x7f, v9 6279; GFX11-NEXT: v_or_b32_e32 v2, v2, v10 6280; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_4) 6281; GFX11-NEXT: v_sub_nc_u32_e32 v10, 64, v18 6282; GFX11-NEXT: v_lshlrev_b64 v[14:15], v18, v[0:1] 6283; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v18 6284; GFX11-NEXT: v_and_b32_e32 v19, 0x7f, v8 6285; GFX11-NEXT: v_add_nc_u32_e32 v20, 0xffffffc0, v18 6286; GFX11-NEXT: v_lshlrev_b64 v[8:9], v18, v[2:3] 6287; GFX11-NEXT: v_lshrrev_b64 v[10:11], v10, v[0:1] 6288; GFX11-NEXT: v_cndmask_b32_e32 v14, 0, v14, vcc_lo 6289; GFX11-NEXT: v_sub_nc_u32_e32 v16, 64, v19 6290; GFX11-NEXT: v_lshlrev_b64 v[0:1], v20, v[0:1] 6291; GFX11-NEXT: v_lshrrev_b64 v[12:13], v19, v[4:5] 6292; GFX11-NEXT: v_cmp_gt_u32_e64 s0, 64, v19 6293; GFX11-NEXT: v_or_b32_e32 v10, v10, v8 6294; GFX11-NEXT: v_add_nc_u32_e32 v21, 0xffffffc0, v19 6295; GFX11-NEXT: v_lshlrev_b64 v[16:17], v16, v[6:7] 6296; GFX11-NEXT: v_or_b32_e32 v11, v11, v9 6297; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 0, v19 6298; GFX11-NEXT: v_cndmask_b32_e32 v10, v0, v10, vcc_lo 6299; GFX11-NEXT: v_lshrrev_b64 v[8:9], v21, v[6:7] 6300; GFX11-NEXT: v_or_b32_e32 v12, v12, v16 6301; GFX11-NEXT: v_or_b32_e32 v13, v13, v17 6302; GFX11-NEXT: v_cndmask_b32_e32 v11, v1, v11, vcc_lo 6303; GFX11-NEXT: v_lshrrev_b64 v[0:1], v19, v[6:7] 6304; GFX11-NEXT: v_cmp_eq_u32_e64 s2, 0, v18 6305; GFX11-NEXT: v_cndmask_b32_e64 v8, v8, v12, s0 6306; GFX11-NEXT: v_cndmask_b32_e64 v6, v9, v13, s0 6307; GFX11-NEXT: v_cndmask_b32_e32 v7, 0, v15, vcc_lo 6308; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) 6309; GFX11-NEXT: v_cndmask_b32_e64 v2, v10, v2, s2 6310; GFX11-NEXT: v_cndmask_b32_e64 v3, v11, v3, s2 6311; GFX11-NEXT: v_cndmask_b32_e64 v4, v8, v4, s1 6312; GFX11-NEXT: v_cndmask_b32_e64 v5, v6, v5, s1 6313; GFX11-NEXT: v_cndmask_b32_e64 v6, 0, v0, s0 6314; GFX11-NEXT: v_cndmask_b32_e64 v8, 0, v1, s0 6315; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 6316; GFX11-NEXT: v_or_b32_e32 v0, v14, v4 6317; GFX11-NEXT: v_or_b32_e32 v1, v7, v5 6318; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 6319; GFX11-NEXT: v_or_b32_e32 v2, v2, v6 6320; GFX11-NEXT: v_or_b32_e32 v3, v3, v8 6321; GFX11-NEXT: s_setpc_b64 s[30:31] 6322 %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt) 6323 ret i128 %result 6324} 6325 6326define amdgpu_ps <4 x float> @v_fshr_i128_ssv(i128 inreg %lhs, i128 inreg %rhs, i128 %amt) { 6327; GFX6-LABEL: v_fshr_i128_ssv: 6328; GFX6: ; %bb.0: 6329; GFX6-NEXT: v_not_b32_e32 v1, v0 6330; GFX6-NEXT: s_lshl_b64 s[8:9], s[0:1], 1 6331; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 6332; GFX6-NEXT: s_lshr_b32 s0, s1, 31 6333; GFX6-NEXT: s_mov_b32 s1, 0 6334; GFX6-NEXT: v_and_b32_e32 v7, 0x7f, v1 6335; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] 6336; GFX6-NEXT: v_sub_i32_e32 v1, vcc, 64, v7 6337; GFX6-NEXT: v_not_b32_e32 v8, 63 6338; GFX6-NEXT: v_lshr_b64 v[1:2], s[8:9], v1 6339; GFX6-NEXT: v_lshl_b64 v[3:4], s[0:1], v7 6340; GFX6-NEXT: v_add_i32_e32 v9, vcc, v7, v8 6341; GFX6-NEXT: v_lshl_b64 v[5:6], s[8:9], v7 6342; GFX6-NEXT: v_or_b32_e32 v3, v1, v3 6343; GFX6-NEXT: v_or_b32_e32 v4, v2, v4 6344; GFX6-NEXT: v_lshl_b64 v[1:2], s[8:9], v9 6345; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v7 6346; GFX6-NEXT: v_cndmask_b32_e32 v9, 0, v5, vcc 6347; GFX6-NEXT: v_cndmask_b32_e32 v6, 0, v6, vcc 6348; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 6349; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 6350; GFX6-NEXT: v_mov_b32_e32 v3, s0 6351; GFX6-NEXT: v_mov_b32_e32 v4, s1 6352; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 6353; GFX6-NEXT: v_and_b32_e32 v11, 0x7f, v0 6354; GFX6-NEXT: v_cndmask_b32_e32 v7, v1, v3, vcc 6355; GFX6-NEXT: v_cndmask_b32_e32 v10, v2, v4, vcc 6356; GFX6-NEXT: v_sub_i32_e32 v2, vcc, 64, v11 6357; GFX6-NEXT: v_lshr_b64 v[0:1], s[4:5], v11 6358; GFX6-NEXT: v_lshl_b64 v[2:3], s[6:7], v2 6359; GFX6-NEXT: v_add_i32_e32 v8, vcc, v11, v8 6360; GFX6-NEXT: v_or_b32_e32 v2, v0, v2 6361; GFX6-NEXT: v_or_b32_e32 v3, v1, v3 6362; GFX6-NEXT: v_lshr_b64 v[0:1], s[6:7], v8 6363; GFX6-NEXT: v_lshr_b64 v[4:5], s[6:7], v11 6364; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v11 6365; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 6366; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 6367; GFX6-NEXT: v_mov_b32_e32 v2, s4 6368; GFX6-NEXT: v_mov_b32_e32 v3, s5 6369; GFX6-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v11 6370; GFX6-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] 6371; GFX6-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] 6372; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc 6373; GFX6-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc 6374; GFX6-NEXT: v_or_b32_e32 v0, v9, v0 6375; GFX6-NEXT: v_or_b32_e32 v1, v6, v1 6376; GFX6-NEXT: v_or_b32_e32 v2, v7, v2 6377; GFX6-NEXT: v_or_b32_e32 v3, v10, v3 6378; GFX6-NEXT: ; return to shader part epilog 6379; 6380; GFX8-LABEL: v_fshr_i128_ssv: 6381; GFX8: ; %bb.0: 6382; GFX8-NEXT: v_not_b32_e32 v1, v0 6383; GFX8-NEXT: s_lshl_b64 s[8:9], s[0:1], 1 6384; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 6385; GFX8-NEXT: s_lshr_b32 s0, s1, 31 6386; GFX8-NEXT: s_mov_b32 s1, 0 6387; GFX8-NEXT: v_and_b32_e32 v7, 0x7f, v1 6388; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] 6389; GFX8-NEXT: v_sub_u32_e32 v1, vcc, 64, v7 6390; GFX8-NEXT: v_not_b32_e32 v8, 63 6391; GFX8-NEXT: v_lshrrev_b64 v[1:2], v1, s[8:9] 6392; GFX8-NEXT: v_lshlrev_b64 v[3:4], v7, s[0:1] 6393; GFX8-NEXT: v_add_u32_e32 v9, vcc, v7, v8 6394; GFX8-NEXT: v_lshlrev_b64 v[5:6], v7, s[8:9] 6395; GFX8-NEXT: v_or_b32_e32 v3, v1, v3 6396; GFX8-NEXT: v_or_b32_e32 v4, v2, v4 6397; GFX8-NEXT: v_lshlrev_b64 v[1:2], v9, s[8:9] 6398; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v7 6399; GFX8-NEXT: v_cndmask_b32_e32 v9, 0, v5, vcc 6400; GFX8-NEXT: v_cndmask_b32_e32 v6, 0, v6, vcc 6401; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 6402; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 6403; GFX8-NEXT: v_mov_b32_e32 v3, s0 6404; GFX8-NEXT: v_mov_b32_e32 v4, s1 6405; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 6406; GFX8-NEXT: v_and_b32_e32 v11, 0x7f, v0 6407; GFX8-NEXT: v_cndmask_b32_e32 v7, v1, v3, vcc 6408; GFX8-NEXT: v_cndmask_b32_e32 v10, v2, v4, vcc 6409; GFX8-NEXT: v_sub_u32_e32 v2, vcc, 64, v11 6410; GFX8-NEXT: v_lshrrev_b64 v[0:1], v11, s[4:5] 6411; GFX8-NEXT: v_lshlrev_b64 v[2:3], v2, s[6:7] 6412; GFX8-NEXT: v_add_u32_e32 v8, vcc, v11, v8 6413; GFX8-NEXT: v_or_b32_e32 v2, v0, v2 6414; GFX8-NEXT: v_or_b32_e32 v3, v1, v3 6415; GFX8-NEXT: v_lshrrev_b64 v[0:1], v8, s[6:7] 6416; GFX8-NEXT: v_lshrrev_b64 v[4:5], v11, s[6:7] 6417; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v11 6418; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 6419; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 6420; GFX8-NEXT: v_mov_b32_e32 v2, s4 6421; GFX8-NEXT: v_mov_b32_e32 v3, s5 6422; GFX8-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v11 6423; GFX8-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] 6424; GFX8-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] 6425; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc 6426; GFX8-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc 6427; GFX8-NEXT: v_or_b32_e32 v0, v9, v0 6428; GFX8-NEXT: v_or_b32_e32 v1, v6, v1 6429; GFX8-NEXT: v_or_b32_e32 v2, v7, v2 6430; GFX8-NEXT: v_or_b32_e32 v3, v10, v3 6431; GFX8-NEXT: ; return to shader part epilog 6432; 6433; GFX9-LABEL: v_fshr_i128_ssv: 6434; GFX9: ; %bb.0: 6435; GFX9-NEXT: v_not_b32_e32 v1, v0 6436; GFX9-NEXT: s_lshl_b64 s[8:9], s[0:1], 1 6437; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 6438; GFX9-NEXT: s_lshr_b32 s0, s1, 31 6439; GFX9-NEXT: s_mov_b32 s1, 0 6440; GFX9-NEXT: v_and_b32_e32 v7, 0x7f, v1 6441; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] 6442; GFX9-NEXT: v_sub_u32_e32 v1, 64, v7 6443; GFX9-NEXT: v_lshrrev_b64 v[1:2], v1, s[8:9] 6444; GFX9-NEXT: v_lshlrev_b64 v[3:4], v7, s[0:1] 6445; GFX9-NEXT: v_add_u32_e32 v8, 0xffffffc0, v7 6446; GFX9-NEXT: v_lshlrev_b64 v[5:6], v7, s[8:9] 6447; GFX9-NEXT: v_or_b32_e32 v3, v1, v3 6448; GFX9-NEXT: v_or_b32_e32 v4, v2, v4 6449; GFX9-NEXT: v_lshlrev_b64 v[1:2], v8, s[8:9] 6450; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v7 6451; GFX9-NEXT: v_cndmask_b32_e32 v8, 0, v5, vcc 6452; GFX9-NEXT: v_cndmask_b32_e32 v6, 0, v6, vcc 6453; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 6454; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 6455; GFX9-NEXT: v_mov_b32_e32 v4, s1 6456; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v7 6457; GFX9-NEXT: v_and_b32_e32 v10, 0x7f, v0 6458; GFX9-NEXT: v_mov_b32_e32 v3, s0 6459; GFX9-NEXT: v_cndmask_b32_e32 v9, v2, v4, vcc 6460; GFX9-NEXT: v_sub_u32_e32 v2, 64, v10 6461; GFX9-NEXT: v_cndmask_b32_e32 v7, v1, v3, vcc 6462; GFX9-NEXT: v_lshrrev_b64 v[0:1], v10, s[4:5] 6463; GFX9-NEXT: v_lshlrev_b64 v[2:3], v2, s[6:7] 6464; GFX9-NEXT: v_add_u32_e32 v11, 0xffffffc0, v10 6465; GFX9-NEXT: v_or_b32_e32 v2, v0, v2 6466; GFX9-NEXT: v_or_b32_e32 v3, v1, v3 6467; GFX9-NEXT: v_lshrrev_b64 v[0:1], v11, s[6:7] 6468; GFX9-NEXT: v_lshrrev_b64 v[4:5], v10, s[6:7] 6469; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v10 6470; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc 6471; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v3, vcc 6472; GFX9-NEXT: v_mov_b32_e32 v2, s4 6473; GFX9-NEXT: v_mov_b32_e32 v3, s5 6474; GFX9-NEXT: v_cmp_eq_u32_e64 s[0:1], 0, v10 6475; GFX9-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] 6476; GFX9-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[0:1] 6477; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v4, vcc 6478; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v5, vcc 6479; GFX9-NEXT: v_or_b32_e32 v0, v8, v0 6480; GFX9-NEXT: v_or_b32_e32 v1, v6, v1 6481; GFX9-NEXT: v_or_b32_e32 v2, v7, v2 6482; GFX9-NEXT: v_or_b32_e32 v3, v9, v3 6483; GFX9-NEXT: ; return to shader part epilog 6484; 6485; GFX10-LABEL: v_fshr_i128_ssv: 6486; GFX10: ; %bb.0: 6487; GFX10-NEXT: v_not_b32_e32 v1, v0 6488; GFX10-NEXT: v_and_b32_e32 v13, 0x7f, v0 6489; GFX10-NEXT: s_mov_b32 s9, 0 6490; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 6491; GFX10-NEXT: s_lshr_b32 s8, s1, 31 6492; GFX10-NEXT: v_and_b32_e32 v12, 0x7f, v1 6493; GFX10-NEXT: v_sub_nc_u32_e32 v8, 64, v13 6494; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 6495; GFX10-NEXT: s_or_b64 s[8:9], s[2:3], s[8:9] 6496; GFX10-NEXT: v_add_nc_u32_e32 v14, 0xffffffc0, v13 6497; GFX10-NEXT: v_sub_nc_u32_e32 v2, 64, v12 6498; GFX10-NEXT: v_lshlrev_b64 v[0:1], v12, s[8:9] 6499; GFX10-NEXT: v_add_nc_u32_e32 v10, 0xffffffc0, v12 6500; GFX10-NEXT: v_lshrrev_b64 v[4:5], v13, s[4:5] 6501; GFX10-NEXT: v_lshlrev_b64 v[8:9], v8, s[6:7] 6502; GFX10-NEXT: v_lshrrev_b64 v[2:3], v2, s[0:1] 6503; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v12 6504; GFX10-NEXT: v_lshlrev_b64 v[10:11], v10, s[0:1] 6505; GFX10-NEXT: v_lshlrev_b64 v[6:7], v12, s[0:1] 6506; GFX10-NEXT: v_cmp_gt_u32_e64 s0, 64, v13 6507; GFX10-NEXT: v_or_b32_e32 v4, v4, v8 6508; GFX10-NEXT: v_or_b32_e32 v2, v2, v0 6509; GFX10-NEXT: v_or_b32_e32 v3, v3, v1 6510; GFX10-NEXT: v_lshrrev_b64 v[0:1], v14, s[6:7] 6511; GFX10-NEXT: v_or_b32_e32 v5, v5, v9 6512; GFX10-NEXT: v_cmp_eq_u32_e64 s1, 0, v13 6513; GFX10-NEXT: v_cndmask_b32_e32 v8, v10, v2, vcc_lo 6514; GFX10-NEXT: v_cndmask_b32_e32 v10, v11, v3, vcc_lo 6515; GFX10-NEXT: v_lshrrev_b64 v[2:3], v13, s[6:7] 6516; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v4, s0 6517; GFX10-NEXT: v_cmp_eq_u32_e64 s2, 0, v12 6518; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v5, s0 6519; GFX10-NEXT: v_cndmask_b32_e32 v6, 0, v6, vcc_lo 6520; GFX10-NEXT: v_cndmask_b32_e32 v4, 0, v7, vcc_lo 6521; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, s4, s1 6522; GFX10-NEXT: v_cndmask_b32_e64 v5, v8, s8, s2 6523; GFX10-NEXT: v_cndmask_b32_e64 v7, v10, s9, s2 6524; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, s5, s1 6525; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v2, s0 6526; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, v3, s0 6527; GFX10-NEXT: v_or_b32_e32 v0, v6, v0 6528; GFX10-NEXT: v_or_b32_e32 v1, v4, v1 6529; GFX10-NEXT: v_or_b32_e32 v2, v5, v2 6530; GFX10-NEXT: v_or_b32_e32 v3, v7, v3 6531; GFX10-NEXT: ; return to shader part epilog 6532; 6533; GFX11-LABEL: v_fshr_i128_ssv: 6534; GFX11: ; %bb.0: 6535; GFX11-NEXT: v_not_b32_e32 v1, v0 6536; GFX11-NEXT: s_lshr_b32 s8, s1, 31 6537; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 6538; GFX11-NEXT: s_mov_b32 s9, 0 6539; GFX11-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 6540; GFX11-NEXT: v_and_b32_e32 v12, 0x7f, v1 6541; GFX11-NEXT: s_or_b64 s[8:9], s[2:3], s[8:9] 6542; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) 6543; GFX11-NEXT: v_lshlrev_b64 v[6:7], v12, s[0:1] 6544; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v12 6545; GFX11-NEXT: v_and_b32_e32 v13, 0x7f, v0 6546; GFX11-NEXT: v_sub_nc_u32_e32 v2, 64, v12 6547; GFX11-NEXT: v_lshlrev_b64 v[0:1], v12, s[8:9] 6548; GFX11-NEXT: v_add_nc_u32_e32 v10, 0xffffffc0, v12 6549; GFX11-NEXT: v_cndmask_b32_e32 v6, 0, v6, vcc_lo 6550; GFX11-NEXT: v_sub_nc_u32_e32 v8, 64, v13 6551; GFX11-NEXT: v_lshrrev_b64 v[2:3], v2, s[0:1] 6552; GFX11-NEXT: v_add_nc_u32_e32 v14, 0xffffffc0, v13 6553; GFX11-NEXT: v_lshrrev_b64 v[4:5], v13, s[4:5] 6554; GFX11-NEXT: v_lshlrev_b64 v[10:11], v10, s[0:1] 6555; GFX11-NEXT: v_lshlrev_b64 v[8:9], v8, s[6:7] 6556; GFX11-NEXT: v_cmp_gt_u32_e64 s0, 64, v13 6557; GFX11-NEXT: v_or_b32_e32 v2, v2, v0 6558; GFX11-NEXT: v_or_b32_e32 v3, v3, v1 6559; GFX11-NEXT: v_lshrrev_b64 v[0:1], v14, s[6:7] 6560; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 0, v13 6561; GFX11-NEXT: v_or_b32_e32 v4, v4, v8 6562; GFX11-NEXT: v_or_b32_e32 v5, v5, v9 6563; GFX11-NEXT: v_cndmask_b32_e32 v8, v10, v2, vcc_lo 6564; GFX11-NEXT: v_cndmask_b32_e32 v10, v11, v3, vcc_lo 6565; GFX11-NEXT: v_lshrrev_b64 v[2:3], v13, s[6:7] 6566; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v4, s0 6567; GFX11-NEXT: v_cmp_eq_u32_e64 s2, 0, v12 6568; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, v5, s0 6569; GFX11-NEXT: v_cndmask_b32_e32 v4, 0, v7, vcc_lo 6570; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 6571; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, s4, s1 6572; GFX11-NEXT: v_cndmask_b32_e64 v5, v8, s8, s2 6573; GFX11-NEXT: v_cndmask_b32_e64 v7, v10, s9, s2 6574; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, s5, s1 6575; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, v2, s0 6576; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, v3, s0 6577; GFX11-NEXT: v_or_b32_e32 v0, v6, v0 6578; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 6579; GFX11-NEXT: v_or_b32_e32 v1, v4, v1 6580; GFX11-NEXT: v_or_b32_e32 v2, v5, v2 6581; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) 6582; GFX11-NEXT: v_or_b32_e32 v3, v7, v3 6583; GFX11-NEXT: ; return to shader part epilog 6584 %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt) 6585 %cast.result = bitcast i128 %result to <4 x float> 6586 ret <4 x float> %cast.result 6587} 6588 6589define amdgpu_ps <4 x float> @v_fshr_i128_svs(i128 inreg %lhs, i128 %rhs, i128 inreg %amt) { 6590; GFX6-LABEL: v_fshr_i128_svs: 6591; GFX6: ; %bb.0: 6592; GFX6-NEXT: s_lshl_b64 s[6:7], s[0:1], 1 6593; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 6594; GFX6-NEXT: s_lshr_b32 s0, s1, 31 6595; GFX6-NEXT: s_mov_b32 s1, 0 6596; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] 6597; GFX6-NEXT: s_andn2_b32 s2, 0x7f, s4 6598; GFX6-NEXT: s_not_b32 s5, s4 6599; GFX6-NEXT: s_sub_i32 s12, s2, 64 6600; GFX6-NEXT: s_sub_i32 s8, 64, s2 6601; GFX6-NEXT: s_cmp_lt_u32 s2, 64 6602; GFX6-NEXT: s_cselect_b32 s13, 1, 0 6603; GFX6-NEXT: s_cmp_eq_u32 s2, 0 6604; GFX6-NEXT: s_cselect_b32 s14, 1, 0 6605; GFX6-NEXT: s_lshr_b64 s[8:9], s[6:7], s8 6606; GFX6-NEXT: s_lshl_b64 s[10:11], s[0:1], s5 6607; GFX6-NEXT: s_lshl_b64 s[2:3], s[6:7], s5 6608; GFX6-NEXT: s_or_b64 s[8:9], s[8:9], s[10:11] 6609; GFX6-NEXT: s_lshl_b64 s[6:7], s[6:7], s12 6610; GFX6-NEXT: s_cmp_lg_u32 s13, 0 6611; GFX6-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 6612; GFX6-NEXT: s_cselect_b64 s[6:7], s[8:9], s[6:7] 6613; GFX6-NEXT: s_cmp_lg_u32 s14, 0 6614; GFX6-NEXT: s_cselect_b64 s[6:7], s[0:1], s[6:7] 6615; GFX6-NEXT: s_and_b32 s0, s4, 0x7f 6616; GFX6-NEXT: s_sub_i32 s1, s0, 64 6617; GFX6-NEXT: s_sub_i32 s4, 64, s0 6618; GFX6-NEXT: s_cmp_lt_u32 s0, 64 6619; GFX6-NEXT: s_cselect_b32 s5, 1, 0 6620; GFX6-NEXT: s_cmp_eq_u32 s0, 0 6621; GFX6-NEXT: v_lshr_b64 v[4:5], v[0:1], s0 6622; GFX6-NEXT: v_lshl_b64 v[6:7], v[2:3], s4 6623; GFX6-NEXT: s_cselect_b32 s8, 1, 0 6624; GFX6-NEXT: v_lshr_b64 v[8:9], v[2:3], s0 6625; GFX6-NEXT: v_lshr_b64 v[2:3], v[2:3], s1 6626; GFX6-NEXT: s_and_b32 s0, 1, s5 6627; GFX6-NEXT: v_or_b32_e32 v4, v4, v6 6628; GFX6-NEXT: v_or_b32_e32 v5, v5, v7 6629; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 6630; GFX6-NEXT: s_and_b32 s0, 1, s8 6631; GFX6-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 6632; GFX6-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc 6633; GFX6-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 6634; GFX6-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[0:1] 6635; GFX6-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[0:1] 6636; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc 6637; GFX6-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc 6638; GFX6-NEXT: v_or_b32_e32 v0, s2, v0 6639; GFX6-NEXT: v_or_b32_e32 v1, s3, v1 6640; GFX6-NEXT: v_or_b32_e32 v2, s6, v2 6641; GFX6-NEXT: v_or_b32_e32 v3, s7, v3 6642; GFX6-NEXT: ; return to shader part epilog 6643; 6644; GFX8-LABEL: v_fshr_i128_svs: 6645; GFX8: ; %bb.0: 6646; GFX8-NEXT: s_lshl_b64 s[6:7], s[0:1], 1 6647; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 6648; GFX8-NEXT: s_lshr_b32 s0, s1, 31 6649; GFX8-NEXT: s_mov_b32 s1, 0 6650; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] 6651; GFX8-NEXT: s_andn2_b32 s2, 0x7f, s4 6652; GFX8-NEXT: s_not_b32 s5, s4 6653; GFX8-NEXT: s_sub_i32 s12, s2, 64 6654; GFX8-NEXT: s_sub_i32 s8, 64, s2 6655; GFX8-NEXT: s_cmp_lt_u32 s2, 64 6656; GFX8-NEXT: s_cselect_b32 s13, 1, 0 6657; GFX8-NEXT: s_cmp_eq_u32 s2, 0 6658; GFX8-NEXT: s_cselect_b32 s14, 1, 0 6659; GFX8-NEXT: s_lshr_b64 s[8:9], s[6:7], s8 6660; GFX8-NEXT: s_lshl_b64 s[10:11], s[0:1], s5 6661; GFX8-NEXT: s_lshl_b64 s[2:3], s[6:7], s5 6662; GFX8-NEXT: s_or_b64 s[8:9], s[8:9], s[10:11] 6663; GFX8-NEXT: s_lshl_b64 s[6:7], s[6:7], s12 6664; GFX8-NEXT: s_cmp_lg_u32 s13, 0 6665; GFX8-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 6666; GFX8-NEXT: s_cselect_b64 s[6:7], s[8:9], s[6:7] 6667; GFX8-NEXT: s_cmp_lg_u32 s14, 0 6668; GFX8-NEXT: s_cselect_b64 s[6:7], s[0:1], s[6:7] 6669; GFX8-NEXT: s_and_b32 s0, s4, 0x7f 6670; GFX8-NEXT: s_sub_i32 s1, s0, 64 6671; GFX8-NEXT: s_sub_i32 s4, 64, s0 6672; GFX8-NEXT: s_cmp_lt_u32 s0, 64 6673; GFX8-NEXT: s_cselect_b32 s5, 1, 0 6674; GFX8-NEXT: s_cmp_eq_u32 s0, 0 6675; GFX8-NEXT: v_lshrrev_b64 v[4:5], s0, v[0:1] 6676; GFX8-NEXT: v_lshlrev_b64 v[6:7], s4, v[2:3] 6677; GFX8-NEXT: s_cselect_b32 s8, 1, 0 6678; GFX8-NEXT: v_lshrrev_b64 v[8:9], s0, v[2:3] 6679; GFX8-NEXT: v_lshrrev_b64 v[2:3], s1, v[2:3] 6680; GFX8-NEXT: s_and_b32 s0, 1, s5 6681; GFX8-NEXT: v_or_b32_e32 v4, v4, v6 6682; GFX8-NEXT: v_or_b32_e32 v5, v5, v7 6683; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 6684; GFX8-NEXT: s_and_b32 s0, 1, s8 6685; GFX8-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 6686; GFX8-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc 6687; GFX8-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 6688; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[0:1] 6689; GFX8-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[0:1] 6690; GFX8-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc 6691; GFX8-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc 6692; GFX8-NEXT: v_or_b32_e32 v0, s2, v0 6693; GFX8-NEXT: v_or_b32_e32 v1, s3, v1 6694; GFX8-NEXT: v_or_b32_e32 v2, s6, v2 6695; GFX8-NEXT: v_or_b32_e32 v3, s7, v3 6696; GFX8-NEXT: ; return to shader part epilog 6697; 6698; GFX9-LABEL: v_fshr_i128_svs: 6699; GFX9: ; %bb.0: 6700; GFX9-NEXT: s_lshl_b64 s[6:7], s[0:1], 1 6701; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 6702; GFX9-NEXT: s_lshr_b32 s0, s1, 31 6703; GFX9-NEXT: s_mov_b32 s1, 0 6704; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[0:1] 6705; GFX9-NEXT: s_andn2_b32 s2, 0x7f, s4 6706; GFX9-NEXT: s_not_b32 s5, s4 6707; GFX9-NEXT: s_sub_i32 s12, s2, 64 6708; GFX9-NEXT: s_sub_i32 s8, 64, s2 6709; GFX9-NEXT: s_cmp_lt_u32 s2, 64 6710; GFX9-NEXT: s_cselect_b32 s13, 1, 0 6711; GFX9-NEXT: s_cmp_eq_u32 s2, 0 6712; GFX9-NEXT: s_cselect_b32 s14, 1, 0 6713; GFX9-NEXT: s_lshr_b64 s[8:9], s[6:7], s8 6714; GFX9-NEXT: s_lshl_b64 s[10:11], s[0:1], s5 6715; GFX9-NEXT: s_lshl_b64 s[2:3], s[6:7], s5 6716; GFX9-NEXT: s_or_b64 s[8:9], s[8:9], s[10:11] 6717; GFX9-NEXT: s_lshl_b64 s[6:7], s[6:7], s12 6718; GFX9-NEXT: s_cmp_lg_u32 s13, 0 6719; GFX9-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 6720; GFX9-NEXT: s_cselect_b64 s[6:7], s[8:9], s[6:7] 6721; GFX9-NEXT: s_cmp_lg_u32 s14, 0 6722; GFX9-NEXT: s_cselect_b64 s[6:7], s[0:1], s[6:7] 6723; GFX9-NEXT: s_and_b32 s0, s4, 0x7f 6724; GFX9-NEXT: s_sub_i32 s1, s0, 64 6725; GFX9-NEXT: s_sub_i32 s4, 64, s0 6726; GFX9-NEXT: s_cmp_lt_u32 s0, 64 6727; GFX9-NEXT: s_cselect_b32 s5, 1, 0 6728; GFX9-NEXT: s_cmp_eq_u32 s0, 0 6729; GFX9-NEXT: v_lshrrev_b64 v[4:5], s0, v[0:1] 6730; GFX9-NEXT: v_lshlrev_b64 v[6:7], s4, v[2:3] 6731; GFX9-NEXT: s_cselect_b32 s8, 1, 0 6732; GFX9-NEXT: v_lshrrev_b64 v[8:9], s0, v[2:3] 6733; GFX9-NEXT: v_lshrrev_b64 v[2:3], s1, v[2:3] 6734; GFX9-NEXT: s_and_b32 s0, 1, s5 6735; GFX9-NEXT: v_or_b32_e32 v4, v4, v6 6736; GFX9-NEXT: v_or_b32_e32 v5, v5, v7 6737; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s0 6738; GFX9-NEXT: s_and_b32 s0, 1, s8 6739; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc 6740; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc 6741; GFX9-NEXT: v_cmp_ne_u32_e64 s[0:1], 0, s0 6742; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[0:1] 6743; GFX9-NEXT: v_cndmask_b32_e64 v1, v3, v1, s[0:1] 6744; GFX9-NEXT: v_cndmask_b32_e32 v2, 0, v8, vcc 6745; GFX9-NEXT: v_cndmask_b32_e32 v3, 0, v9, vcc 6746; GFX9-NEXT: v_or_b32_e32 v0, s2, v0 6747; GFX9-NEXT: v_or_b32_e32 v1, s3, v1 6748; GFX9-NEXT: v_or_b32_e32 v2, s6, v2 6749; GFX9-NEXT: v_or_b32_e32 v3, s7, v3 6750; GFX9-NEXT: ; return to shader part epilog 6751; 6752; GFX10-LABEL: v_fshr_i128_svs: 6753; GFX10: ; %bb.0: 6754; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 6755; GFX10-NEXT: s_lshr_b32 s6, s1, 31 6756; GFX10-NEXT: s_mov_b32 s7, 0 6757; GFX10-NEXT: s_andn2_b32 s5, 0x7f, s4 6758; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 6759; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[6:7] 6760; GFX10-NEXT: s_not_b32 s10, s4 6761; GFX10-NEXT: s_sub_i32 s12, s5, 64 6762; GFX10-NEXT: s_sub_i32 s6, 64, s5 6763; GFX10-NEXT: s_cmp_lt_u32 s5, 64 6764; GFX10-NEXT: s_cselect_b32 s13, 1, 0 6765; GFX10-NEXT: s_cmp_eq_u32 s5, 0 6766; GFX10-NEXT: s_cselect_b32 s5, 1, 0 6767; GFX10-NEXT: s_lshr_b64 s[6:7], s[0:1], s6 6768; GFX10-NEXT: s_lshl_b64 s[8:9], s[2:3], s10 6769; GFX10-NEXT: s_lshl_b64 s[10:11], s[0:1], s10 6770; GFX10-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] 6771; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s12 6772; GFX10-NEXT: s_cmp_lg_u32 s13, 0 6773; GFX10-NEXT: s_cselect_b64 s[8:9], s[10:11], 0 6774; GFX10-NEXT: s_cselect_b64 s[0:1], s[6:7], s[0:1] 6775; GFX10-NEXT: s_cmp_lg_u32 s5, 0 6776; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] 6777; GFX10-NEXT: s_and_b32 s0, s4, 0x7f 6778; GFX10-NEXT: s_sub_i32 s1, 64, s0 6779; GFX10-NEXT: v_lshrrev_b64 v[4:5], s0, v[0:1] 6780; GFX10-NEXT: v_lshlrev_b64 v[6:7], s1, v[2:3] 6781; GFX10-NEXT: s_sub_i32 s1, s0, 64 6782; GFX10-NEXT: s_cmp_lt_u32 s0, 64 6783; GFX10-NEXT: v_lshrrev_b64 v[8:9], s1, v[2:3] 6784; GFX10-NEXT: s_cselect_b32 s4, 1, 0 6785; GFX10-NEXT: s_cmp_eq_u32 s0, 0 6786; GFX10-NEXT: v_or_b32_e32 v4, v4, v6 6787; GFX10-NEXT: s_cselect_b32 s5, 1, 0 6788; GFX10-NEXT: s_and_b32 s1, 1, s4 6789; GFX10-NEXT: v_or_b32_e32 v5, v5, v7 6790; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s1 6791; GFX10-NEXT: v_lshrrev_b64 v[2:3], s0, v[2:3] 6792; GFX10-NEXT: s_and_b32 s0, 1, s5 6793; GFX10-NEXT: v_cmp_ne_u32_e64 s0, 0, s0 6794; GFX10-NEXT: v_cndmask_b32_e32 v4, v8, v4, vcc_lo 6795; GFX10-NEXT: v_cndmask_b32_e32 v5, v9, v5, vcc_lo 6796; GFX10-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc_lo 6797; GFX10-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc_lo 6798; GFX10-NEXT: v_cndmask_b32_e64 v0, v4, v0, s0 6799; GFX10-NEXT: v_cndmask_b32_e64 v1, v5, v1, s0 6800; GFX10-NEXT: v_or_b32_e32 v2, s2, v2 6801; GFX10-NEXT: v_or_b32_e32 v3, s3, v3 6802; GFX10-NEXT: v_or_b32_e32 v0, s8, v0 6803; GFX10-NEXT: v_or_b32_e32 v1, s9, v1 6804; GFX10-NEXT: ; return to shader part epilog 6805; 6806; GFX11-LABEL: v_fshr_i128_svs: 6807; GFX11: ; %bb.0: 6808; GFX11-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 6809; GFX11-NEXT: s_lshr_b32 s6, s1, 31 6810; GFX11-NEXT: s_mov_b32 s7, 0 6811; GFX11-NEXT: s_and_not1_b32 s5, 0x7f, s4 6812; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 6813; GFX11-NEXT: s_or_b64 s[2:3], s[2:3], s[6:7] 6814; GFX11-NEXT: s_not_b32 s10, s4 6815; GFX11-NEXT: s_sub_i32 s12, s5, 64 6816; GFX11-NEXT: s_sub_i32 s6, 64, s5 6817; GFX11-NEXT: s_cmp_lt_u32 s5, 64 6818; GFX11-NEXT: s_cselect_b32 s13, 1, 0 6819; GFX11-NEXT: s_cmp_eq_u32 s5, 0 6820; GFX11-NEXT: s_cselect_b32 s5, 1, 0 6821; GFX11-NEXT: s_lshr_b64 s[6:7], s[0:1], s6 6822; GFX11-NEXT: s_lshl_b64 s[8:9], s[2:3], s10 6823; GFX11-NEXT: s_lshl_b64 s[10:11], s[0:1], s10 6824; GFX11-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] 6825; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], s12 6826; GFX11-NEXT: s_cmp_lg_u32 s13, 0 6827; GFX11-NEXT: s_cselect_b64 s[8:9], s[10:11], 0 6828; GFX11-NEXT: s_cselect_b64 s[0:1], s[6:7], s[0:1] 6829; GFX11-NEXT: s_cmp_lg_u32 s5, 0 6830; GFX11-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] 6831; GFX11-NEXT: s_and_b32 s0, s4, 0x7f 6832; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 6833; GFX11-NEXT: s_sub_i32 s1, 64, s0 6834; GFX11-NEXT: v_lshrrev_b64 v[4:5], s0, v[0:1] 6835; GFX11-NEXT: v_lshlrev_b64 v[6:7], s1, v[2:3] 6836; GFX11-NEXT: s_sub_i32 s1, s0, 64 6837; GFX11-NEXT: s_cmp_lt_u32 s0, 64 6838; GFX11-NEXT: v_lshrrev_b64 v[8:9], s1, v[2:3] 6839; GFX11-NEXT: s_cselect_b32 s4, 1, 0 6840; GFX11-NEXT: s_cmp_eq_u32 s0, 0 6841; GFX11-NEXT: v_or_b32_e32 v4, v4, v6 6842; GFX11-NEXT: s_cselect_b32 s5, 1, 0 6843; GFX11-NEXT: s_and_b32 s1, 1, s4 6844; GFX11-NEXT: v_or_b32_e32 v5, v5, v7 6845; GFX11-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s1 6846; GFX11-NEXT: v_lshrrev_b64 v[2:3], s0, v[2:3] 6847; GFX11-NEXT: s_and_b32 s0, 1, s5 6848; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_3) 6849; GFX11-NEXT: v_cmp_ne_u32_e64 s0, 0, s0 6850; GFX11-NEXT: v_dual_cndmask_b32 v4, v8, v4 :: v_dual_cndmask_b32 v5, v9, v5 6851; GFX11-NEXT: v_dual_cndmask_b32 v2, 0, v2 :: v_dual_cndmask_b32 v3, 0, v3 6852; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3) 6853; GFX11-NEXT: v_cndmask_b32_e64 v0, v4, v0, s0 6854; GFX11-NEXT: v_cndmask_b32_e64 v1, v5, v1, s0 6855; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4) 6856; GFX11-NEXT: v_or_b32_e32 v2, s2, v2 6857; GFX11-NEXT: v_or_b32_e32 v3, s3, v3 6858; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 6859; GFX11-NEXT: v_or_b32_e32 v0, s8, v0 6860; GFX11-NEXT: v_or_b32_e32 v1, s9, v1 6861; GFX11-NEXT: ; return to shader part epilog 6862 %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt) 6863 %cast.result = bitcast i128 %result to <4 x float> 6864 ret <4 x float> %cast.result 6865} 6866 6867define amdgpu_ps <4 x float> @v_fshr_i128_vss(i128 %lhs, i128 inreg %rhs, i128 inreg %amt) { 6868; GFX6-LABEL: v_fshr_i128_vss: 6869; GFX6: ; %bb.0: 6870; GFX6-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 6871; GFX6-NEXT: v_lshl_b64 v[4:5], v[0:1], 1 6872; GFX6-NEXT: v_lshrrev_b32_e32 v0, 31, v1 6873; GFX6-NEXT: s_andn2_b32 s5, 0x7f, s4 6874; GFX6-NEXT: v_or_b32_e32 v2, v2, v0 6875; GFX6-NEXT: s_sub_i32 s6, s5, 64 6876; GFX6-NEXT: s_sub_i32 s7, 64, s5 6877; GFX6-NEXT: s_cmp_lt_u32 s5, 64 6878; GFX6-NEXT: v_lshr_b64 v[0:1], v[4:5], s7 6879; GFX6-NEXT: v_lshl_b64 v[6:7], v[2:3], s5 6880; GFX6-NEXT: s_cselect_b32 s8, 1, 0 6881; GFX6-NEXT: s_cmp_eq_u32 s5, 0 6882; GFX6-NEXT: s_cselect_b32 s9, 1, 0 6883; GFX6-NEXT: v_lshl_b64 v[8:9], v[4:5], s5 6884; GFX6-NEXT: v_or_b32_e32 v6, v0, v6 6885; GFX6-NEXT: v_or_b32_e32 v7, v1, v7 6886; GFX6-NEXT: v_lshl_b64 v[0:1], v[4:5], s6 6887; GFX6-NEXT: s_and_b32 s5, 1, s8 6888; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 6889; GFX6-NEXT: s_and_b32 s5, 1, s9 6890; GFX6-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc 6891; GFX6-NEXT: v_cndmask_b32_e32 v5, 0, v9, vcc 6892; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 6893; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 6894; GFX6-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 6895; GFX6-NEXT: s_and_b32 s5, s4, 0x7f 6896; GFX6-NEXT: s_sub_i32 s10, s5, 64 6897; GFX6-NEXT: s_sub_i32 s8, 64, s5 6898; GFX6-NEXT: s_cmp_lt_u32 s5, 64 6899; GFX6-NEXT: s_cselect_b32 s11, 1, 0 6900; GFX6-NEXT: s_cmp_eq_u32 s5, 0 6901; GFX6-NEXT: s_cselect_b32 s12, 1, 0 6902; GFX6-NEXT: s_lshr_b64 s[6:7], s[2:3], s4 6903; GFX6-NEXT: s_lshr_b64 s[4:5], s[0:1], s4 6904; GFX6-NEXT: s_lshl_b64 s[8:9], s[2:3], s8 6905; GFX6-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] 6906; GFX6-NEXT: s_lshr_b64 s[2:3], s[2:3], s10 6907; GFX6-NEXT: s_cmp_lg_u32 s11, 0 6908; GFX6-NEXT: s_cselect_b64 s[2:3], s[4:5], s[2:3] 6909; GFX6-NEXT: s_cmp_lg_u32 s12, 0 6910; GFX6-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3] 6911; GFX6-NEXT: s_cmp_lg_u32 s11, 0 6912; GFX6-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc 6913; GFX6-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc 6914; GFX6-NEXT: s_cselect_b64 s[2:3], s[6:7], 0 6915; GFX6-NEXT: v_or_b32_e32 v0, s0, v4 6916; GFX6-NEXT: v_or_b32_e32 v1, s1, v5 6917; GFX6-NEXT: v_or_b32_e32 v2, s2, v2 6918; GFX6-NEXT: v_or_b32_e32 v3, s3, v3 6919; GFX6-NEXT: ; return to shader part epilog 6920; 6921; GFX8-LABEL: v_fshr_i128_vss: 6922; GFX8: ; %bb.0: 6923; GFX8-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 6924; GFX8-NEXT: v_lshlrev_b64 v[4:5], 1, v[0:1] 6925; GFX8-NEXT: v_lshrrev_b32_e32 v0, 31, v1 6926; GFX8-NEXT: s_andn2_b32 s5, 0x7f, s4 6927; GFX8-NEXT: v_or_b32_e32 v2, v2, v0 6928; GFX8-NEXT: s_sub_i32 s6, s5, 64 6929; GFX8-NEXT: s_sub_i32 s7, 64, s5 6930; GFX8-NEXT: s_cmp_lt_u32 s5, 64 6931; GFX8-NEXT: v_lshrrev_b64 v[0:1], s7, v[4:5] 6932; GFX8-NEXT: v_lshlrev_b64 v[6:7], s5, v[2:3] 6933; GFX8-NEXT: s_cselect_b32 s8, 1, 0 6934; GFX8-NEXT: s_cmp_eq_u32 s5, 0 6935; GFX8-NEXT: s_cselect_b32 s9, 1, 0 6936; GFX8-NEXT: v_lshlrev_b64 v[8:9], s5, v[4:5] 6937; GFX8-NEXT: v_or_b32_e32 v6, v0, v6 6938; GFX8-NEXT: v_or_b32_e32 v7, v1, v7 6939; GFX8-NEXT: v_lshlrev_b64 v[0:1], s6, v[4:5] 6940; GFX8-NEXT: s_and_b32 s5, 1, s8 6941; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 6942; GFX8-NEXT: s_and_b32 s5, 1, s9 6943; GFX8-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc 6944; GFX8-NEXT: v_cndmask_b32_e32 v5, 0, v9, vcc 6945; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 6946; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 6947; GFX8-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 6948; GFX8-NEXT: s_and_b32 s5, s4, 0x7f 6949; GFX8-NEXT: s_sub_i32 s10, s5, 64 6950; GFX8-NEXT: s_sub_i32 s8, 64, s5 6951; GFX8-NEXT: s_cmp_lt_u32 s5, 64 6952; GFX8-NEXT: s_cselect_b32 s11, 1, 0 6953; GFX8-NEXT: s_cmp_eq_u32 s5, 0 6954; GFX8-NEXT: s_cselect_b32 s12, 1, 0 6955; GFX8-NEXT: s_lshr_b64 s[6:7], s[2:3], s4 6956; GFX8-NEXT: s_lshr_b64 s[4:5], s[0:1], s4 6957; GFX8-NEXT: s_lshl_b64 s[8:9], s[2:3], s8 6958; GFX8-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] 6959; GFX8-NEXT: s_lshr_b64 s[2:3], s[2:3], s10 6960; GFX8-NEXT: s_cmp_lg_u32 s11, 0 6961; GFX8-NEXT: s_cselect_b64 s[2:3], s[4:5], s[2:3] 6962; GFX8-NEXT: s_cmp_lg_u32 s12, 0 6963; GFX8-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3] 6964; GFX8-NEXT: s_cmp_lg_u32 s11, 0 6965; GFX8-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc 6966; GFX8-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc 6967; GFX8-NEXT: s_cselect_b64 s[2:3], s[6:7], 0 6968; GFX8-NEXT: v_or_b32_e32 v0, s0, v4 6969; GFX8-NEXT: v_or_b32_e32 v1, s1, v5 6970; GFX8-NEXT: v_or_b32_e32 v2, s2, v2 6971; GFX8-NEXT: v_or_b32_e32 v3, s3, v3 6972; GFX8-NEXT: ; return to shader part epilog 6973; 6974; GFX9-LABEL: v_fshr_i128_vss: 6975; GFX9: ; %bb.0: 6976; GFX9-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 6977; GFX9-NEXT: v_lshlrev_b64 v[4:5], 1, v[0:1] 6978; GFX9-NEXT: v_lshrrev_b32_e32 v0, 31, v1 6979; GFX9-NEXT: s_andn2_b32 s5, 0x7f, s4 6980; GFX9-NEXT: v_or_b32_e32 v2, v2, v0 6981; GFX9-NEXT: s_sub_i32 s6, s5, 64 6982; GFX9-NEXT: s_sub_i32 s7, 64, s5 6983; GFX9-NEXT: s_cmp_lt_u32 s5, 64 6984; GFX9-NEXT: v_lshrrev_b64 v[0:1], s7, v[4:5] 6985; GFX9-NEXT: v_lshlrev_b64 v[6:7], s5, v[2:3] 6986; GFX9-NEXT: s_cselect_b32 s8, 1, 0 6987; GFX9-NEXT: s_cmp_eq_u32 s5, 0 6988; GFX9-NEXT: s_cselect_b32 s9, 1, 0 6989; GFX9-NEXT: v_lshlrev_b64 v[8:9], s5, v[4:5] 6990; GFX9-NEXT: v_or_b32_e32 v6, v0, v6 6991; GFX9-NEXT: v_or_b32_e32 v7, v1, v7 6992; GFX9-NEXT: v_lshlrev_b64 v[0:1], s6, v[4:5] 6993; GFX9-NEXT: s_and_b32 s5, 1, s8 6994; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 6995; GFX9-NEXT: s_and_b32 s5, 1, s9 6996; GFX9-NEXT: v_cndmask_b32_e32 v4, 0, v8, vcc 6997; GFX9-NEXT: v_cndmask_b32_e32 v5, 0, v9, vcc 6998; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v6, vcc 6999; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v7, vcc 7000; GFX9-NEXT: v_cmp_ne_u32_e64 vcc, 0, s5 7001; GFX9-NEXT: s_and_b32 s5, s4, 0x7f 7002; GFX9-NEXT: s_sub_i32 s10, s5, 64 7003; GFX9-NEXT: s_sub_i32 s8, 64, s5 7004; GFX9-NEXT: s_cmp_lt_u32 s5, 64 7005; GFX9-NEXT: s_cselect_b32 s11, 1, 0 7006; GFX9-NEXT: s_cmp_eq_u32 s5, 0 7007; GFX9-NEXT: s_cselect_b32 s12, 1, 0 7008; GFX9-NEXT: s_lshr_b64 s[6:7], s[2:3], s4 7009; GFX9-NEXT: s_lshr_b64 s[4:5], s[0:1], s4 7010; GFX9-NEXT: s_lshl_b64 s[8:9], s[2:3], s8 7011; GFX9-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] 7012; GFX9-NEXT: s_lshr_b64 s[2:3], s[2:3], s10 7013; GFX9-NEXT: s_cmp_lg_u32 s11, 0 7014; GFX9-NEXT: s_cselect_b64 s[2:3], s[4:5], s[2:3] 7015; GFX9-NEXT: s_cmp_lg_u32 s12, 0 7016; GFX9-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3] 7017; GFX9-NEXT: s_cmp_lg_u32 s11, 0 7018; GFX9-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc 7019; GFX9-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc 7020; GFX9-NEXT: s_cselect_b64 s[2:3], s[6:7], 0 7021; GFX9-NEXT: v_or_b32_e32 v0, s0, v4 7022; GFX9-NEXT: v_or_b32_e32 v1, s1, v5 7023; GFX9-NEXT: v_or_b32_e32 v2, s2, v2 7024; GFX9-NEXT: v_or_b32_e32 v3, s3, v3 7025; GFX9-NEXT: ; return to shader part epilog 7026; 7027; GFX10-LABEL: v_fshr_i128_vss: 7028; GFX10: ; %bb.0: 7029; GFX10-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 7030; GFX10-NEXT: v_lshrrev_b32_e32 v4, 31, v1 7031; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 7032; GFX10-NEXT: s_andn2_b32 s5, 0x7f, s4 7033; GFX10-NEXT: s_sub_i32 s6, s5, 64 7034; GFX10-NEXT: v_or_b32_e32 v2, v2, v4 7035; GFX10-NEXT: s_sub_i32 s7, 64, s5 7036; GFX10-NEXT: s_cmp_lt_u32 s5, 64 7037; GFX10-NEXT: v_lshrrev_b64 v[4:5], s7, v[0:1] 7038; GFX10-NEXT: s_cselect_b32 s8, 1, 0 7039; GFX10-NEXT: s_cmp_eq_u32 s5, 0 7040; GFX10-NEXT: v_lshlrev_b64 v[6:7], s5, v[2:3] 7041; GFX10-NEXT: s_cselect_b32 s9, 1, 0 7042; GFX10-NEXT: v_lshlrev_b64 v[8:9], s5, v[0:1] 7043; GFX10-NEXT: s_and_b32 s5, 1, s8 7044; GFX10-NEXT: v_lshlrev_b64 v[0:1], s6, v[0:1] 7045; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s5 7046; GFX10-NEXT: s_and_b32 s5, s4, 0x7f 7047; GFX10-NEXT: v_or_b32_e32 v4, v4, v6 7048; GFX10-NEXT: v_or_b32_e32 v5, v5, v7 7049; GFX10-NEXT: s_and_b32 s6, 1, s9 7050; GFX10-NEXT: s_sub_i32 s10, s5, 64 7051; GFX10-NEXT: s_sub_i32 s8, 64, s5 7052; GFX10-NEXT: s_cmp_lt_u32 s5, 64 7053; GFX10-NEXT: v_cndmask_b32_e32 v6, 0, v8, vcc_lo 7054; GFX10-NEXT: s_cselect_b32 s11, 1, 0 7055; GFX10-NEXT: s_cmp_eq_u32 s5, 0 7056; GFX10-NEXT: v_cndmask_b32_e32 v7, 0, v9, vcc_lo 7057; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v4, vcc_lo 7058; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v5, vcc_lo 7059; GFX10-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s6 7060; GFX10-NEXT: s_cselect_b32 s12, 1, 0 7061; GFX10-NEXT: s_lshr_b64 s[6:7], s[0:1], s4 7062; GFX10-NEXT: s_lshl_b64 s[8:9], s[2:3], s8 7063; GFX10-NEXT: s_lshr_b64 s[4:5], s[2:3], s4 7064; GFX10-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] 7065; GFX10-NEXT: s_lshr_b64 s[2:3], s[2:3], s10 7066; GFX10-NEXT: s_cmp_lg_u32 s11, 0 7067; GFX10-NEXT: v_cndmask_b32_e32 v2, v0, v2, vcc_lo 7068; GFX10-NEXT: s_cselect_b64 s[2:3], s[6:7], s[2:3] 7069; GFX10-NEXT: s_cmp_lg_u32 s12, 0 7070; GFX10-NEXT: v_cndmask_b32_e32 v3, v1, v3, vcc_lo 7071; GFX10-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3] 7072; GFX10-NEXT: s_cmp_lg_u32 s11, 0 7073; GFX10-NEXT: v_or_b32_e32 v0, s0, v6 7074; GFX10-NEXT: s_cselect_b64 s[2:3], s[4:5], 0 7075; GFX10-NEXT: v_or_b32_e32 v1, s1, v7 7076; GFX10-NEXT: v_or_b32_e32 v2, s2, v2 7077; GFX10-NEXT: v_or_b32_e32 v3, s3, v3 7078; GFX10-NEXT: ; return to shader part epilog 7079; 7080; GFX11-LABEL: v_fshr_i128_vss: 7081; GFX11: ; %bb.0: 7082; GFX11-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 7083; GFX11-NEXT: v_lshrrev_b32_e32 v4, 31, v1 7084; GFX11-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 7085; GFX11-NEXT: s_and_not1_b32 s5, 0x7f, s4 7086; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2) 7087; GFX11-NEXT: s_sub_i32 s6, s5, 64 7088; GFX11-NEXT: v_or_b32_e32 v2, v2, v4 7089; GFX11-NEXT: s_sub_i32 s7, 64, s5 7090; GFX11-NEXT: s_cmp_lt_u32 s5, 64 7091; GFX11-NEXT: v_lshrrev_b64 v[4:5], s7, v[0:1] 7092; GFX11-NEXT: s_cselect_b32 s8, 1, 0 7093; GFX11-NEXT: s_cmp_eq_u32 s5, 0 7094; GFX11-NEXT: v_lshlrev_b64 v[6:7], s5, v[2:3] 7095; GFX11-NEXT: s_cselect_b32 s9, 1, 0 7096; GFX11-NEXT: v_lshlrev_b64 v[8:9], s5, v[0:1] 7097; GFX11-NEXT: s_and_b32 s5, 1, s8 7098; GFX11-NEXT: v_lshlrev_b64 v[0:1], s6, v[0:1] 7099; GFX11-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s5 7100; GFX11-NEXT: s_and_b32 s5, s4, 0x7f 7101; GFX11-NEXT: v_or_b32_e32 v4, v4, v6 7102; GFX11-NEXT: v_or_b32_e32 v5, v5, v7 7103; GFX11-NEXT: s_and_b32 s6, 1, s9 7104; GFX11-NEXT: s_sub_i32 s10, s5, 64 7105; GFX11-NEXT: s_sub_i32 s8, 64, s5 7106; GFX11-NEXT: s_cmp_lt_u32 s5, 64 7107; GFX11-NEXT: v_dual_cndmask_b32 v6, 0, v8 :: v_dual_cndmask_b32 v7, 0, v9 7108; GFX11-NEXT: s_cselect_b32 s11, 1, 0 7109; GFX11-NEXT: s_cmp_eq_u32 s5, 0 7110; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v4 :: v_dual_cndmask_b32 v1, v1, v5 7111; GFX11-NEXT: v_cmp_ne_u32_e64 vcc_lo, 0, s6 7112; GFX11-NEXT: s_cselect_b32 s12, 1, 0 7113; GFX11-NEXT: s_lshr_b64 s[6:7], s[0:1], s4 7114; GFX11-NEXT: s_lshl_b64 s[8:9], s[2:3], s8 7115; GFX11-NEXT: s_lshr_b64 s[4:5], s[2:3], s4 7116; GFX11-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] 7117; GFX11-NEXT: s_lshr_b64 s[2:3], s[2:3], s10 7118; GFX11-NEXT: s_cmp_lg_u32 s11, 0 7119; GFX11-NEXT: v_dual_cndmask_b32 v2, v0, v2 :: v_dual_cndmask_b32 v3, v1, v3 7120; GFX11-NEXT: s_cselect_b64 s[2:3], s[6:7], s[2:3] 7121; GFX11-NEXT: s_cmp_lg_u32 s12, 0 7122; GFX11-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3] 7123; GFX11-NEXT: s_cmp_lg_u32 s11, 0 7124; GFX11-NEXT: v_or_b32_e32 v0, s0, v6 7125; GFX11-NEXT: s_cselect_b64 s[2:3], s[4:5], 0 7126; GFX11-NEXT: v_or_b32_e32 v1, s1, v7 7127; GFX11-NEXT: v_or_b32_e32 v2, s2, v2 7128; GFX11-NEXT: v_or_b32_e32 v3, s3, v3 7129; GFX11-NEXT: ; return to shader part epilog 7130 %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 %amt) 7131 %cast.result = bitcast i128 %result to <4 x float> 7132 ret <4 x float> %cast.result 7133} 7134 7135define amdgpu_ps i128 @s_fshr_i128_65(i128 inreg %lhs, i128 inreg %rhs) { 7136; GFX6-LABEL: s_fshr_i128_65: 7137; GFX6: ; %bb.0: 7138; GFX6-NEXT: s_mov_b32 s4, 0 7139; GFX6-NEXT: s_lshl_b32 s5, s0, 31 7140; GFX6-NEXT: s_lshl_b32 s3, s2, 31 7141; GFX6-NEXT: s_mov_b32 s2, s4 7142; GFX6-NEXT: s_lshr_b64 s[0:1], s[0:1], 1 7143; GFX6-NEXT: s_or_b64 s[2:3], s[2:3], s[0:1] 7144; GFX6-NEXT: s_lshr_b64 s[0:1], s[6:7], 1 7145; GFX6-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1] 7146; GFX6-NEXT: ; return to shader part epilog 7147; 7148; GFX8-LABEL: s_fshr_i128_65: 7149; GFX8: ; %bb.0: 7150; GFX8-NEXT: s_mov_b32 s4, 0 7151; GFX8-NEXT: s_lshl_b32 s5, s0, 31 7152; GFX8-NEXT: s_lshl_b32 s3, s2, 31 7153; GFX8-NEXT: s_mov_b32 s2, s4 7154; GFX8-NEXT: s_lshr_b64 s[0:1], s[0:1], 1 7155; GFX8-NEXT: s_or_b64 s[2:3], s[2:3], s[0:1] 7156; GFX8-NEXT: s_lshr_b64 s[0:1], s[6:7], 1 7157; GFX8-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1] 7158; GFX8-NEXT: ; return to shader part epilog 7159; 7160; GFX9-LABEL: s_fshr_i128_65: 7161; GFX9: ; %bb.0: 7162; GFX9-NEXT: s_mov_b32 s4, 0 7163; GFX9-NEXT: s_lshl_b32 s5, s0, 31 7164; GFX9-NEXT: s_lshl_b32 s3, s2, 31 7165; GFX9-NEXT: s_mov_b32 s2, s4 7166; GFX9-NEXT: s_lshr_b64 s[0:1], s[0:1], 1 7167; GFX9-NEXT: s_or_b64 s[2:3], s[2:3], s[0:1] 7168; GFX9-NEXT: s_lshr_b64 s[0:1], s[6:7], 1 7169; GFX9-NEXT: s_or_b64 s[0:1], s[4:5], s[0:1] 7170; GFX9-NEXT: ; return to shader part epilog 7171; 7172; GFX10-LABEL: s_fshr_i128_65: 7173; GFX10: ; %bb.0: 7174; GFX10-NEXT: s_mov_b32 s4, 0 7175; GFX10-NEXT: s_lshl_b32 s5, s0, 31 7176; GFX10-NEXT: s_lshl_b32 s3, s2, 31 7177; GFX10-NEXT: s_mov_b32 s2, s4 7178; GFX10-NEXT: s_lshr_b64 s[6:7], s[6:7], 1 7179; GFX10-NEXT: s_lshr_b64 s[8:9], s[0:1], 1 7180; GFX10-NEXT: s_or_b64 s[0:1], s[4:5], s[6:7] 7181; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9] 7182; GFX10-NEXT: ; return to shader part epilog 7183; 7184; GFX11-LABEL: s_fshr_i128_65: 7185; GFX11: ; %bb.0: 7186; GFX11-NEXT: s_mov_b32 s4, 0 7187; GFX11-NEXT: s_lshl_b32 s5, s0, 31 7188; GFX11-NEXT: s_lshl_b32 s3, s2, 31 7189; GFX11-NEXT: s_mov_b32 s2, s4 7190; GFX11-NEXT: s_lshr_b64 s[6:7], s[6:7], 1 7191; GFX11-NEXT: s_lshr_b64 s[8:9], s[0:1], 1 7192; GFX11-NEXT: s_or_b64 s[0:1], s[4:5], s[6:7] 7193; GFX11-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9] 7194; GFX11-NEXT: ; return to shader part epilog 7195 %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 65) 7196 ret i128 %result 7197} 7198 7199define i128 @v_fshr_i128_65(i128 %lhs, i128 %rhs) { 7200; GFX6-LABEL: v_fshr_i128_65: 7201; GFX6: ; %bb.0: 7202; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7203; GFX6-NEXT: v_lshlrev_b32_e32 v4, 31, v0 7204; GFX6-NEXT: v_lshlrev_b32_e32 v5, 31, v2 7205; GFX6-NEXT: v_lshr_b64 v[2:3], v[0:1], 1 7206; GFX6-NEXT: v_lshr_b64 v[0:1], v[6:7], 1 7207; GFX6-NEXT: v_or_b32_e32 v3, v5, v3 7208; GFX6-NEXT: v_or_b32_e32 v1, v4, v1 7209; GFX6-NEXT: s_setpc_b64 s[30:31] 7210; 7211; GFX8-LABEL: v_fshr_i128_65: 7212; GFX8: ; %bb.0: 7213; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7214; GFX8-NEXT: v_lshlrev_b32_e32 v4, 31, v0 7215; GFX8-NEXT: v_lshlrev_b32_e32 v5, 31, v2 7216; GFX8-NEXT: v_lshrrev_b64 v[2:3], 1, v[0:1] 7217; GFX8-NEXT: v_lshrrev_b64 v[0:1], 1, v[6:7] 7218; GFX8-NEXT: v_or_b32_e32 v3, v5, v3 7219; GFX8-NEXT: v_or_b32_e32 v1, v4, v1 7220; GFX8-NEXT: s_setpc_b64 s[30:31] 7221; 7222; GFX9-LABEL: v_fshr_i128_65: 7223; GFX9: ; %bb.0: 7224; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7225; GFX9-NEXT: v_mov_b32_e32 v8, v2 7226; GFX9-NEXT: v_lshrrev_b64 v[2:3], 1, v[0:1] 7227; GFX9-NEXT: v_lshrrev_b64 v[4:5], 1, v[6:7] 7228; GFX9-NEXT: v_lshl_or_b32 v3, v8, 31, v3 7229; GFX9-NEXT: v_lshl_or_b32 v1, v0, 31, v5 7230; GFX9-NEXT: v_mov_b32_e32 v0, v4 7231; GFX9-NEXT: s_setpc_b64 s[30:31] 7232; 7233; GFX10-LABEL: v_fshr_i128_65: 7234; GFX10: ; %bb.0: 7235; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7236; GFX10-NEXT: v_mov_b32_e32 v8, v2 7237; GFX10-NEXT: v_lshrrev_b64 v[4:5], 1, v[6:7] 7238; GFX10-NEXT: v_lshrrev_b64 v[2:3], 1, v[0:1] 7239; GFX10-NEXT: v_lshl_or_b32 v1, v0, 31, v5 7240; GFX10-NEXT: v_lshl_or_b32 v3, v8, 31, v3 7241; GFX10-NEXT: v_mov_b32_e32 v0, v4 7242; GFX10-NEXT: s_setpc_b64 s[30:31] 7243; 7244; GFX11-LABEL: v_fshr_i128_65: 7245; GFX11: ; %bb.0: 7246; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7247; GFX11-NEXT: v_mov_b32_e32 v8, v2 7248; GFX11-NEXT: v_lshrrev_b64 v[4:5], 1, v[6:7] 7249; GFX11-NEXT: v_lshrrev_b64 v[2:3], 1, v[0:1] 7250; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) 7251; GFX11-NEXT: v_lshl_or_b32 v1, v0, 31, v5 7252; GFX11-NEXT: v_lshl_or_b32 v3, v8, 31, v3 7253; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) 7254; GFX11-NEXT: v_mov_b32_e32 v0, v4 7255; GFX11-NEXT: s_setpc_b64 s[30:31] 7256 %result = call i128 @llvm.fshr.i128(i128 %lhs, i128 %rhs, i128 65) 7257 ret i128 %result 7258} 7259 7260define amdgpu_ps <2 x i128> @s_fshr_v2i128(<2 x i128> inreg %lhs, <2 x i128> inreg %rhs, <2 x i128> inreg %amt) { 7261; GFX6-LABEL: s_fshr_v2i128: 7262; GFX6: ; %bb.0: 7263; GFX6-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 7264; GFX6-NEXT: s_lshr_b32 s22, s1, 31 7265; GFX6-NEXT: s_mov_b32 s23, 0 7266; GFX6-NEXT: s_lshl_b64 s[18:19], s[0:1], 1 7267; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[22:23] 7268; GFX6-NEXT: s_andn2_b32 s2, 0x7f, s16 7269; GFX6-NEXT: s_not_b32 s17, s16 7270; GFX6-NEXT: s_sub_i32 s21, s2, 64 7271; GFX6-NEXT: s_sub_i32 s22, 64, s2 7272; GFX6-NEXT: s_cmp_lt_u32 s2, 64 7273; GFX6-NEXT: s_cselect_b32 s28, 1, 0 7274; GFX6-NEXT: s_cmp_eq_u32 s2, 0 7275; GFX6-NEXT: s_cselect_b32 s29, 1, 0 7276; GFX6-NEXT: s_lshr_b64 s[24:25], s[18:19], s22 7277; GFX6-NEXT: s_lshl_b64 s[26:27], s[0:1], s17 7278; GFX6-NEXT: s_lshl_b64 s[2:3], s[18:19], s17 7279; GFX6-NEXT: s_or_b64 s[24:25], s[24:25], s[26:27] 7280; GFX6-NEXT: s_lshl_b64 s[18:19], s[18:19], s21 7281; GFX6-NEXT: s_cmp_lg_u32 s28, 0 7282; GFX6-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 7283; GFX6-NEXT: s_cselect_b64 s[18:19], s[24:25], s[18:19] 7284; GFX6-NEXT: s_cmp_lg_u32 s29, 0 7285; GFX6-NEXT: s_cselect_b64 s[18:19], s[0:1], s[18:19] 7286; GFX6-NEXT: s_and_b32 s0, s16, 0x7f 7287; GFX6-NEXT: s_sub_i32 s21, s0, 64 7288; GFX6-NEXT: s_sub_i32 s22, 64, s0 7289; GFX6-NEXT: s_cmp_lt_u32 s0, 64 7290; GFX6-NEXT: s_cselect_b32 s26, 1, 0 7291; GFX6-NEXT: s_cmp_eq_u32 s0, 0 7292; GFX6-NEXT: s_cselect_b32 s27, 1, 0 7293; GFX6-NEXT: s_lshr_b64 s[0:1], s[10:11], s16 7294; GFX6-NEXT: s_lshr_b64 s[16:17], s[8:9], s16 7295; GFX6-NEXT: s_lshl_b64 s[24:25], s[10:11], s22 7296; GFX6-NEXT: s_or_b64 s[16:17], s[16:17], s[24:25] 7297; GFX6-NEXT: s_lshr_b64 s[10:11], s[10:11], s21 7298; GFX6-NEXT: s_cmp_lg_u32 s26, 0 7299; GFX6-NEXT: s_cselect_b64 s[10:11], s[16:17], s[10:11] 7300; GFX6-NEXT: s_cmp_lg_u32 s27, 0 7301; GFX6-NEXT: s_cselect_b64 s[8:9], s[8:9], s[10:11] 7302; GFX6-NEXT: s_cmp_lg_u32 s26, 0 7303; GFX6-NEXT: s_cselect_b64 s[10:11], s[0:1], 0 7304; GFX6-NEXT: s_lshl_b64 s[6:7], s[6:7], 1 7305; GFX6-NEXT: s_lshr_b32 s22, s5, 31 7306; GFX6-NEXT: s_or_b64 s[0:1], s[2:3], s[8:9] 7307; GFX6-NEXT: s_lshl_b64 s[8:9], s[4:5], 1 7308; GFX6-NEXT: s_or_b64 s[4:5], s[6:7], s[22:23] 7309; GFX6-NEXT: s_andn2_b32 s6, 0x7f, s20 7310; GFX6-NEXT: s_or_b64 s[2:3], s[18:19], s[10:11] 7311; GFX6-NEXT: s_not_b32 s16, s20 7312; GFX6-NEXT: s_sub_i32 s18, s6, 64 7313; GFX6-NEXT: s_sub_i32 s10, 64, s6 7314; GFX6-NEXT: s_cmp_lt_u32 s6, 64 7315; GFX6-NEXT: s_cselect_b32 s19, 1, 0 7316; GFX6-NEXT: s_cmp_eq_u32 s6, 0 7317; GFX6-NEXT: s_cselect_b32 s21, 1, 0 7318; GFX6-NEXT: s_lshl_b64 s[6:7], s[8:9], s16 7319; GFX6-NEXT: s_lshr_b64 s[10:11], s[8:9], s10 7320; GFX6-NEXT: s_lshl_b64 s[16:17], s[4:5], s16 7321; GFX6-NEXT: s_or_b64 s[10:11], s[10:11], s[16:17] 7322; GFX6-NEXT: s_lshl_b64 s[8:9], s[8:9], s18 7323; GFX6-NEXT: s_cmp_lg_u32 s19, 0 7324; GFX6-NEXT: s_cselect_b64 s[6:7], s[6:7], 0 7325; GFX6-NEXT: s_cselect_b64 s[8:9], s[10:11], s[8:9] 7326; GFX6-NEXT: s_cmp_lg_u32 s21, 0 7327; GFX6-NEXT: s_cselect_b64 s[8:9], s[4:5], s[8:9] 7328; GFX6-NEXT: s_and_b32 s4, s20, 0x7f 7329; GFX6-NEXT: s_sub_i32 s18, s4, 64 7330; GFX6-NEXT: s_sub_i32 s16, 64, s4 7331; GFX6-NEXT: s_cmp_lt_u32 s4, 64 7332; GFX6-NEXT: s_cselect_b32 s19, 1, 0 7333; GFX6-NEXT: s_cmp_eq_u32 s4, 0 7334; GFX6-NEXT: s_cselect_b32 s21, 1, 0 7335; GFX6-NEXT: s_lshr_b64 s[10:11], s[12:13], s20 7336; GFX6-NEXT: s_lshl_b64 s[16:17], s[14:15], s16 7337; GFX6-NEXT: s_lshr_b64 s[4:5], s[14:15], s20 7338; GFX6-NEXT: s_or_b64 s[10:11], s[10:11], s[16:17] 7339; GFX6-NEXT: s_lshr_b64 s[14:15], s[14:15], s18 7340; GFX6-NEXT: s_cmp_lg_u32 s19, 0 7341; GFX6-NEXT: s_cselect_b64 s[10:11], s[10:11], s[14:15] 7342; GFX6-NEXT: s_cmp_lg_u32 s21, 0 7343; GFX6-NEXT: s_cselect_b64 s[10:11], s[12:13], s[10:11] 7344; GFX6-NEXT: s_cmp_lg_u32 s19, 0 7345; GFX6-NEXT: s_cselect_b64 s[12:13], s[4:5], 0 7346; GFX6-NEXT: s_or_b64 s[4:5], s[6:7], s[10:11] 7347; GFX6-NEXT: s_or_b64 s[6:7], s[8:9], s[12:13] 7348; GFX6-NEXT: ; return to shader part epilog 7349; 7350; GFX8-LABEL: s_fshr_v2i128: 7351; GFX8: ; %bb.0: 7352; GFX8-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 7353; GFX8-NEXT: s_lshr_b32 s22, s1, 31 7354; GFX8-NEXT: s_mov_b32 s23, 0 7355; GFX8-NEXT: s_lshl_b64 s[18:19], s[0:1], 1 7356; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[22:23] 7357; GFX8-NEXT: s_andn2_b32 s2, 0x7f, s16 7358; GFX8-NEXT: s_not_b32 s17, s16 7359; GFX8-NEXT: s_sub_i32 s21, s2, 64 7360; GFX8-NEXT: s_sub_i32 s22, 64, s2 7361; GFX8-NEXT: s_cmp_lt_u32 s2, 64 7362; GFX8-NEXT: s_cselect_b32 s28, 1, 0 7363; GFX8-NEXT: s_cmp_eq_u32 s2, 0 7364; GFX8-NEXT: s_cselect_b32 s29, 1, 0 7365; GFX8-NEXT: s_lshr_b64 s[24:25], s[18:19], s22 7366; GFX8-NEXT: s_lshl_b64 s[26:27], s[0:1], s17 7367; GFX8-NEXT: s_lshl_b64 s[2:3], s[18:19], s17 7368; GFX8-NEXT: s_or_b64 s[24:25], s[24:25], s[26:27] 7369; GFX8-NEXT: s_lshl_b64 s[18:19], s[18:19], s21 7370; GFX8-NEXT: s_cmp_lg_u32 s28, 0 7371; GFX8-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 7372; GFX8-NEXT: s_cselect_b64 s[18:19], s[24:25], s[18:19] 7373; GFX8-NEXT: s_cmp_lg_u32 s29, 0 7374; GFX8-NEXT: s_cselect_b64 s[18:19], s[0:1], s[18:19] 7375; GFX8-NEXT: s_and_b32 s0, s16, 0x7f 7376; GFX8-NEXT: s_sub_i32 s21, s0, 64 7377; GFX8-NEXT: s_sub_i32 s22, 64, s0 7378; GFX8-NEXT: s_cmp_lt_u32 s0, 64 7379; GFX8-NEXT: s_cselect_b32 s26, 1, 0 7380; GFX8-NEXT: s_cmp_eq_u32 s0, 0 7381; GFX8-NEXT: s_cselect_b32 s27, 1, 0 7382; GFX8-NEXT: s_lshr_b64 s[0:1], s[10:11], s16 7383; GFX8-NEXT: s_lshr_b64 s[16:17], s[8:9], s16 7384; GFX8-NEXT: s_lshl_b64 s[24:25], s[10:11], s22 7385; GFX8-NEXT: s_or_b64 s[16:17], s[16:17], s[24:25] 7386; GFX8-NEXT: s_lshr_b64 s[10:11], s[10:11], s21 7387; GFX8-NEXT: s_cmp_lg_u32 s26, 0 7388; GFX8-NEXT: s_cselect_b64 s[10:11], s[16:17], s[10:11] 7389; GFX8-NEXT: s_cmp_lg_u32 s27, 0 7390; GFX8-NEXT: s_cselect_b64 s[8:9], s[8:9], s[10:11] 7391; GFX8-NEXT: s_cmp_lg_u32 s26, 0 7392; GFX8-NEXT: s_cselect_b64 s[10:11], s[0:1], 0 7393; GFX8-NEXT: s_lshl_b64 s[6:7], s[6:7], 1 7394; GFX8-NEXT: s_lshr_b32 s22, s5, 31 7395; GFX8-NEXT: s_or_b64 s[0:1], s[2:3], s[8:9] 7396; GFX8-NEXT: s_lshl_b64 s[8:9], s[4:5], 1 7397; GFX8-NEXT: s_or_b64 s[4:5], s[6:7], s[22:23] 7398; GFX8-NEXT: s_andn2_b32 s6, 0x7f, s20 7399; GFX8-NEXT: s_or_b64 s[2:3], s[18:19], s[10:11] 7400; GFX8-NEXT: s_not_b32 s16, s20 7401; GFX8-NEXT: s_sub_i32 s18, s6, 64 7402; GFX8-NEXT: s_sub_i32 s10, 64, s6 7403; GFX8-NEXT: s_cmp_lt_u32 s6, 64 7404; GFX8-NEXT: s_cselect_b32 s19, 1, 0 7405; GFX8-NEXT: s_cmp_eq_u32 s6, 0 7406; GFX8-NEXT: s_cselect_b32 s21, 1, 0 7407; GFX8-NEXT: s_lshl_b64 s[6:7], s[8:9], s16 7408; GFX8-NEXT: s_lshr_b64 s[10:11], s[8:9], s10 7409; GFX8-NEXT: s_lshl_b64 s[16:17], s[4:5], s16 7410; GFX8-NEXT: s_or_b64 s[10:11], s[10:11], s[16:17] 7411; GFX8-NEXT: s_lshl_b64 s[8:9], s[8:9], s18 7412; GFX8-NEXT: s_cmp_lg_u32 s19, 0 7413; GFX8-NEXT: s_cselect_b64 s[6:7], s[6:7], 0 7414; GFX8-NEXT: s_cselect_b64 s[8:9], s[10:11], s[8:9] 7415; GFX8-NEXT: s_cmp_lg_u32 s21, 0 7416; GFX8-NEXT: s_cselect_b64 s[8:9], s[4:5], s[8:9] 7417; GFX8-NEXT: s_and_b32 s4, s20, 0x7f 7418; GFX8-NEXT: s_sub_i32 s18, s4, 64 7419; GFX8-NEXT: s_sub_i32 s16, 64, s4 7420; GFX8-NEXT: s_cmp_lt_u32 s4, 64 7421; GFX8-NEXT: s_cselect_b32 s19, 1, 0 7422; GFX8-NEXT: s_cmp_eq_u32 s4, 0 7423; GFX8-NEXT: s_cselect_b32 s21, 1, 0 7424; GFX8-NEXT: s_lshr_b64 s[10:11], s[12:13], s20 7425; GFX8-NEXT: s_lshl_b64 s[16:17], s[14:15], s16 7426; GFX8-NEXT: s_lshr_b64 s[4:5], s[14:15], s20 7427; GFX8-NEXT: s_or_b64 s[10:11], s[10:11], s[16:17] 7428; GFX8-NEXT: s_lshr_b64 s[14:15], s[14:15], s18 7429; GFX8-NEXT: s_cmp_lg_u32 s19, 0 7430; GFX8-NEXT: s_cselect_b64 s[10:11], s[10:11], s[14:15] 7431; GFX8-NEXT: s_cmp_lg_u32 s21, 0 7432; GFX8-NEXT: s_cselect_b64 s[10:11], s[12:13], s[10:11] 7433; GFX8-NEXT: s_cmp_lg_u32 s19, 0 7434; GFX8-NEXT: s_cselect_b64 s[12:13], s[4:5], 0 7435; GFX8-NEXT: s_or_b64 s[4:5], s[6:7], s[10:11] 7436; GFX8-NEXT: s_or_b64 s[6:7], s[8:9], s[12:13] 7437; GFX8-NEXT: ; return to shader part epilog 7438; 7439; GFX9-LABEL: s_fshr_v2i128: 7440; GFX9: ; %bb.0: 7441; GFX9-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 7442; GFX9-NEXT: s_lshr_b32 s22, s1, 31 7443; GFX9-NEXT: s_mov_b32 s23, 0 7444; GFX9-NEXT: s_lshl_b64 s[18:19], s[0:1], 1 7445; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[22:23] 7446; GFX9-NEXT: s_andn2_b32 s2, 0x7f, s16 7447; GFX9-NEXT: s_not_b32 s17, s16 7448; GFX9-NEXT: s_sub_i32 s21, s2, 64 7449; GFX9-NEXT: s_sub_i32 s22, 64, s2 7450; GFX9-NEXT: s_cmp_lt_u32 s2, 64 7451; GFX9-NEXT: s_cselect_b32 s28, 1, 0 7452; GFX9-NEXT: s_cmp_eq_u32 s2, 0 7453; GFX9-NEXT: s_cselect_b32 s29, 1, 0 7454; GFX9-NEXT: s_lshr_b64 s[24:25], s[18:19], s22 7455; GFX9-NEXT: s_lshl_b64 s[26:27], s[0:1], s17 7456; GFX9-NEXT: s_lshl_b64 s[2:3], s[18:19], s17 7457; GFX9-NEXT: s_or_b64 s[24:25], s[24:25], s[26:27] 7458; GFX9-NEXT: s_lshl_b64 s[18:19], s[18:19], s21 7459; GFX9-NEXT: s_cmp_lg_u32 s28, 0 7460; GFX9-NEXT: s_cselect_b64 s[2:3], s[2:3], 0 7461; GFX9-NEXT: s_cselect_b64 s[18:19], s[24:25], s[18:19] 7462; GFX9-NEXT: s_cmp_lg_u32 s29, 0 7463; GFX9-NEXT: s_cselect_b64 s[18:19], s[0:1], s[18:19] 7464; GFX9-NEXT: s_and_b32 s0, s16, 0x7f 7465; GFX9-NEXT: s_sub_i32 s21, s0, 64 7466; GFX9-NEXT: s_sub_i32 s22, 64, s0 7467; GFX9-NEXT: s_cmp_lt_u32 s0, 64 7468; GFX9-NEXT: s_cselect_b32 s26, 1, 0 7469; GFX9-NEXT: s_cmp_eq_u32 s0, 0 7470; GFX9-NEXT: s_cselect_b32 s27, 1, 0 7471; GFX9-NEXT: s_lshr_b64 s[0:1], s[10:11], s16 7472; GFX9-NEXT: s_lshr_b64 s[16:17], s[8:9], s16 7473; GFX9-NEXT: s_lshl_b64 s[24:25], s[10:11], s22 7474; GFX9-NEXT: s_or_b64 s[16:17], s[16:17], s[24:25] 7475; GFX9-NEXT: s_lshr_b64 s[10:11], s[10:11], s21 7476; GFX9-NEXT: s_cmp_lg_u32 s26, 0 7477; GFX9-NEXT: s_cselect_b64 s[10:11], s[16:17], s[10:11] 7478; GFX9-NEXT: s_cmp_lg_u32 s27, 0 7479; GFX9-NEXT: s_cselect_b64 s[8:9], s[8:9], s[10:11] 7480; GFX9-NEXT: s_cmp_lg_u32 s26, 0 7481; GFX9-NEXT: s_cselect_b64 s[10:11], s[0:1], 0 7482; GFX9-NEXT: s_lshl_b64 s[6:7], s[6:7], 1 7483; GFX9-NEXT: s_lshr_b32 s22, s5, 31 7484; GFX9-NEXT: s_or_b64 s[0:1], s[2:3], s[8:9] 7485; GFX9-NEXT: s_lshl_b64 s[8:9], s[4:5], 1 7486; GFX9-NEXT: s_or_b64 s[4:5], s[6:7], s[22:23] 7487; GFX9-NEXT: s_andn2_b32 s6, 0x7f, s20 7488; GFX9-NEXT: s_or_b64 s[2:3], s[18:19], s[10:11] 7489; GFX9-NEXT: s_not_b32 s16, s20 7490; GFX9-NEXT: s_sub_i32 s18, s6, 64 7491; GFX9-NEXT: s_sub_i32 s10, 64, s6 7492; GFX9-NEXT: s_cmp_lt_u32 s6, 64 7493; GFX9-NEXT: s_cselect_b32 s19, 1, 0 7494; GFX9-NEXT: s_cmp_eq_u32 s6, 0 7495; GFX9-NEXT: s_cselect_b32 s21, 1, 0 7496; GFX9-NEXT: s_lshl_b64 s[6:7], s[8:9], s16 7497; GFX9-NEXT: s_lshr_b64 s[10:11], s[8:9], s10 7498; GFX9-NEXT: s_lshl_b64 s[16:17], s[4:5], s16 7499; GFX9-NEXT: s_or_b64 s[10:11], s[10:11], s[16:17] 7500; GFX9-NEXT: s_lshl_b64 s[8:9], s[8:9], s18 7501; GFX9-NEXT: s_cmp_lg_u32 s19, 0 7502; GFX9-NEXT: s_cselect_b64 s[6:7], s[6:7], 0 7503; GFX9-NEXT: s_cselect_b64 s[8:9], s[10:11], s[8:9] 7504; GFX9-NEXT: s_cmp_lg_u32 s21, 0 7505; GFX9-NEXT: s_cselect_b64 s[8:9], s[4:5], s[8:9] 7506; GFX9-NEXT: s_and_b32 s4, s20, 0x7f 7507; GFX9-NEXT: s_sub_i32 s18, s4, 64 7508; GFX9-NEXT: s_sub_i32 s16, 64, s4 7509; GFX9-NEXT: s_cmp_lt_u32 s4, 64 7510; GFX9-NEXT: s_cselect_b32 s19, 1, 0 7511; GFX9-NEXT: s_cmp_eq_u32 s4, 0 7512; GFX9-NEXT: s_cselect_b32 s21, 1, 0 7513; GFX9-NEXT: s_lshr_b64 s[10:11], s[12:13], s20 7514; GFX9-NEXT: s_lshl_b64 s[16:17], s[14:15], s16 7515; GFX9-NEXT: s_lshr_b64 s[4:5], s[14:15], s20 7516; GFX9-NEXT: s_or_b64 s[10:11], s[10:11], s[16:17] 7517; GFX9-NEXT: s_lshr_b64 s[14:15], s[14:15], s18 7518; GFX9-NEXT: s_cmp_lg_u32 s19, 0 7519; GFX9-NEXT: s_cselect_b64 s[10:11], s[10:11], s[14:15] 7520; GFX9-NEXT: s_cmp_lg_u32 s21, 0 7521; GFX9-NEXT: s_cselect_b64 s[10:11], s[12:13], s[10:11] 7522; GFX9-NEXT: s_cmp_lg_u32 s19, 0 7523; GFX9-NEXT: s_cselect_b64 s[12:13], s[4:5], 0 7524; GFX9-NEXT: s_or_b64 s[4:5], s[6:7], s[10:11] 7525; GFX9-NEXT: s_or_b64 s[6:7], s[8:9], s[12:13] 7526; GFX9-NEXT: ; return to shader part epilog 7527; 7528; GFX10-LABEL: s_fshr_v2i128: 7529; GFX10: ; %bb.0: 7530; GFX10-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 7531; GFX10-NEXT: s_lshr_b32 s18, s1, 31 7532; GFX10-NEXT: s_mov_b32 s19, 0 7533; GFX10-NEXT: s_andn2_b32 s17, 0x7f, s16 7534; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 7535; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[18:19] 7536; GFX10-NEXT: s_not_b32 s18, s16 7537; GFX10-NEXT: s_sub_i32 s21, s17, 64 7538; GFX10-NEXT: s_sub_i32 s22, 64, s17 7539; GFX10-NEXT: s_cmp_lt_u32 s17, 64 7540; GFX10-NEXT: s_cselect_b32 s28, 1, 0 7541; GFX10-NEXT: s_cmp_eq_u32 s17, 0 7542; GFX10-NEXT: s_cselect_b32 s17, 1, 0 7543; GFX10-NEXT: s_lshr_b64 s[22:23], s[0:1], s22 7544; GFX10-NEXT: s_lshl_b64 s[24:25], s[2:3], s18 7545; GFX10-NEXT: s_lshl_b64 s[26:27], s[0:1], s18 7546; GFX10-NEXT: s_or_b64 s[22:23], s[22:23], s[24:25] 7547; GFX10-NEXT: s_lshl_b64 s[0:1], s[0:1], s21 7548; GFX10-NEXT: s_cmp_lg_u32 s28, 0 7549; GFX10-NEXT: s_cselect_b64 s[24:25], s[26:27], 0 7550; GFX10-NEXT: s_cselect_b64 s[0:1], s[22:23], s[0:1] 7551; GFX10-NEXT: s_cmp_lg_u32 s17, 0 7552; GFX10-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] 7553; GFX10-NEXT: s_and_b32 s0, s16, 0x7f 7554; GFX10-NEXT: s_sub_i32 s18, s0, 64 7555; GFX10-NEXT: s_sub_i32 s17, 64, s0 7556; GFX10-NEXT: s_cmp_lt_u32 s0, 64 7557; GFX10-NEXT: s_cselect_b32 s21, 1, 0 7558; GFX10-NEXT: s_cmp_eq_u32 s0, 0 7559; GFX10-NEXT: s_cselect_b32 s26, 1, 0 7560; GFX10-NEXT: s_lshr_b64 s[0:1], s[8:9], s16 7561; GFX10-NEXT: s_lshl_b64 s[22:23], s[10:11], s17 7562; GFX10-NEXT: s_lshr_b64 s[16:17], s[10:11], s16 7563; GFX10-NEXT: s_or_b64 s[0:1], s[0:1], s[22:23] 7564; GFX10-NEXT: s_lshr_b64 s[10:11], s[10:11], s18 7565; GFX10-NEXT: s_cmp_lg_u32 s21, 0 7566; GFX10-NEXT: s_cselect_b64 s[0:1], s[0:1], s[10:11] 7567; GFX10-NEXT: s_cmp_lg_u32 s26, 0 7568; GFX10-NEXT: s_cselect_b64 s[0:1], s[8:9], s[0:1] 7569; GFX10-NEXT: s_cmp_lg_u32 s21, 0 7570; GFX10-NEXT: s_cselect_b64 s[8:9], s[16:17], 0 7571; GFX10-NEXT: s_lshl_b64 s[6:7], s[6:7], 1 7572; GFX10-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9] 7573; GFX10-NEXT: s_lshr_b32 s18, s5, 31 7574; GFX10-NEXT: s_andn2_b32 s8, 0x7f, s20 7575; GFX10-NEXT: s_or_b64 s[0:1], s[24:25], s[0:1] 7576; GFX10-NEXT: s_lshl_b64 s[4:5], s[4:5], 1 7577; GFX10-NEXT: s_or_b64 s[6:7], s[6:7], s[18:19] 7578; GFX10-NEXT: s_not_b32 s16, s20 7579; GFX10-NEXT: s_sub_i32 s18, s8, 64 7580; GFX10-NEXT: s_sub_i32 s9, 64, s8 7581; GFX10-NEXT: s_cmp_lt_u32 s8, 64 7582; GFX10-NEXT: s_cselect_b32 s19, 1, 0 7583; GFX10-NEXT: s_cmp_eq_u32 s8, 0 7584; GFX10-NEXT: s_cselect_b32 s21, 1, 0 7585; GFX10-NEXT: s_lshr_b64 s[8:9], s[4:5], s9 7586; GFX10-NEXT: s_lshl_b64 s[10:11], s[6:7], s16 7587; GFX10-NEXT: s_lshl_b64 s[16:17], s[4:5], s16 7588; GFX10-NEXT: s_or_b64 s[8:9], s[8:9], s[10:11] 7589; GFX10-NEXT: s_lshl_b64 s[4:5], s[4:5], s18 7590; GFX10-NEXT: s_cmp_lg_u32 s19, 0 7591; GFX10-NEXT: s_cselect_b64 s[10:11], s[16:17], 0 7592; GFX10-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] 7593; GFX10-NEXT: s_cmp_lg_u32 s21, 0 7594; GFX10-NEXT: s_cselect_b64 s[6:7], s[6:7], s[4:5] 7595; GFX10-NEXT: s_and_b32 s4, s20, 0x7f 7596; GFX10-NEXT: s_sub_i32 s18, s4, 64 7597; GFX10-NEXT: s_sub_i32 s8, 64, s4 7598; GFX10-NEXT: s_cmp_lt_u32 s4, 64 7599; GFX10-NEXT: s_cselect_b32 s19, 1, 0 7600; GFX10-NEXT: s_cmp_eq_u32 s4, 0 7601; GFX10-NEXT: s_cselect_b32 s21, 1, 0 7602; GFX10-NEXT: s_lshr_b64 s[4:5], s[12:13], s20 7603; GFX10-NEXT: s_lshl_b64 s[8:9], s[14:15], s8 7604; GFX10-NEXT: s_lshr_b64 s[16:17], s[14:15], s20 7605; GFX10-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] 7606; GFX10-NEXT: s_lshr_b64 s[8:9], s[14:15], s18 7607; GFX10-NEXT: s_cmp_lg_u32 s19, 0 7608; GFX10-NEXT: s_cselect_b64 s[4:5], s[4:5], s[8:9] 7609; GFX10-NEXT: s_cmp_lg_u32 s21, 0 7610; GFX10-NEXT: s_cselect_b64 s[4:5], s[12:13], s[4:5] 7611; GFX10-NEXT: s_cmp_lg_u32 s19, 0 7612; GFX10-NEXT: s_cselect_b64 s[8:9], s[16:17], 0 7613; GFX10-NEXT: s_or_b64 s[4:5], s[10:11], s[4:5] 7614; GFX10-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] 7615; GFX10-NEXT: ; return to shader part epilog 7616; 7617; GFX11-LABEL: s_fshr_v2i128: 7618; GFX11: ; %bb.0: 7619; GFX11-NEXT: s_lshl_b64 s[2:3], s[2:3], 1 7620; GFX11-NEXT: s_lshr_b32 s18, s1, 31 7621; GFX11-NEXT: s_mov_b32 s19, 0 7622; GFX11-NEXT: s_and_not1_b32 s17, 0x7f, s16 7623; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], 1 7624; GFX11-NEXT: s_or_b64 s[2:3], s[2:3], s[18:19] 7625; GFX11-NEXT: s_not_b32 s18, s16 7626; GFX11-NEXT: s_sub_i32 s21, s17, 64 7627; GFX11-NEXT: s_sub_i32 s22, 64, s17 7628; GFX11-NEXT: s_cmp_lt_u32 s17, 64 7629; GFX11-NEXT: s_cselect_b32 s28, 1, 0 7630; GFX11-NEXT: s_cmp_eq_u32 s17, 0 7631; GFX11-NEXT: s_cselect_b32 s17, 1, 0 7632; GFX11-NEXT: s_lshr_b64 s[22:23], s[0:1], s22 7633; GFX11-NEXT: s_lshl_b64 s[24:25], s[2:3], s18 7634; GFX11-NEXT: s_lshl_b64 s[26:27], s[0:1], s18 7635; GFX11-NEXT: s_or_b64 s[22:23], s[22:23], s[24:25] 7636; GFX11-NEXT: s_lshl_b64 s[0:1], s[0:1], s21 7637; GFX11-NEXT: s_cmp_lg_u32 s28, 0 7638; GFX11-NEXT: s_cselect_b64 s[24:25], s[26:27], 0 7639; GFX11-NEXT: s_cselect_b64 s[0:1], s[22:23], s[0:1] 7640; GFX11-NEXT: s_cmp_lg_u32 s17, 0 7641; GFX11-NEXT: s_cselect_b64 s[2:3], s[2:3], s[0:1] 7642; GFX11-NEXT: s_and_b32 s0, s16, 0x7f 7643; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 7644; GFX11-NEXT: s_sub_i32 s18, s0, 64 7645; GFX11-NEXT: s_sub_i32 s17, 64, s0 7646; GFX11-NEXT: s_cmp_lt_u32 s0, 64 7647; GFX11-NEXT: s_cselect_b32 s21, 1, 0 7648; GFX11-NEXT: s_cmp_eq_u32 s0, 0 7649; GFX11-NEXT: s_cselect_b32 s26, 1, 0 7650; GFX11-NEXT: s_lshr_b64 s[0:1], s[8:9], s16 7651; GFX11-NEXT: s_lshl_b64 s[22:23], s[10:11], s17 7652; GFX11-NEXT: s_lshr_b64 s[16:17], s[10:11], s16 7653; GFX11-NEXT: s_or_b64 s[0:1], s[0:1], s[22:23] 7654; GFX11-NEXT: s_lshr_b64 s[10:11], s[10:11], s18 7655; GFX11-NEXT: s_cmp_lg_u32 s21, 0 7656; GFX11-NEXT: s_cselect_b64 s[0:1], s[0:1], s[10:11] 7657; GFX11-NEXT: s_cmp_lg_u32 s26, 0 7658; GFX11-NEXT: s_cselect_b64 s[0:1], s[8:9], s[0:1] 7659; GFX11-NEXT: s_cmp_lg_u32 s21, 0 7660; GFX11-NEXT: s_cselect_b64 s[8:9], s[16:17], 0 7661; GFX11-NEXT: s_lshl_b64 s[6:7], s[6:7], 1 7662; GFX11-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9] 7663; GFX11-NEXT: s_lshr_b32 s18, s5, 31 7664; GFX11-NEXT: s_and_not1_b32 s8, 0x7f, s20 7665; GFX11-NEXT: s_or_b64 s[0:1], s[24:25], s[0:1] 7666; GFX11-NEXT: s_lshl_b64 s[4:5], s[4:5], 1 7667; GFX11-NEXT: s_or_b64 s[6:7], s[6:7], s[18:19] 7668; GFX11-NEXT: s_not_b32 s16, s20 7669; GFX11-NEXT: s_sub_i32 s18, s8, 64 7670; GFX11-NEXT: s_sub_i32 s9, 64, s8 7671; GFX11-NEXT: s_cmp_lt_u32 s8, 64 7672; GFX11-NEXT: s_cselect_b32 s19, 1, 0 7673; GFX11-NEXT: s_cmp_eq_u32 s8, 0 7674; GFX11-NEXT: s_cselect_b32 s21, 1, 0 7675; GFX11-NEXT: s_lshr_b64 s[8:9], s[4:5], s9 7676; GFX11-NEXT: s_lshl_b64 s[10:11], s[6:7], s16 7677; GFX11-NEXT: s_lshl_b64 s[16:17], s[4:5], s16 7678; GFX11-NEXT: s_or_b64 s[8:9], s[8:9], s[10:11] 7679; GFX11-NEXT: s_lshl_b64 s[4:5], s[4:5], s18 7680; GFX11-NEXT: s_cmp_lg_u32 s19, 0 7681; GFX11-NEXT: s_cselect_b64 s[10:11], s[16:17], 0 7682; GFX11-NEXT: s_cselect_b64 s[4:5], s[8:9], s[4:5] 7683; GFX11-NEXT: s_cmp_lg_u32 s21, 0 7684; GFX11-NEXT: s_cselect_b64 s[6:7], s[6:7], s[4:5] 7685; GFX11-NEXT: s_and_b32 s4, s20, 0x7f 7686; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) 7687; GFX11-NEXT: s_sub_i32 s18, s4, 64 7688; GFX11-NEXT: s_sub_i32 s8, 64, s4 7689; GFX11-NEXT: s_cmp_lt_u32 s4, 64 7690; GFX11-NEXT: s_cselect_b32 s19, 1, 0 7691; GFX11-NEXT: s_cmp_eq_u32 s4, 0 7692; GFX11-NEXT: s_cselect_b32 s21, 1, 0 7693; GFX11-NEXT: s_lshr_b64 s[4:5], s[12:13], s20 7694; GFX11-NEXT: s_lshl_b64 s[8:9], s[14:15], s8 7695; GFX11-NEXT: s_lshr_b64 s[16:17], s[14:15], s20 7696; GFX11-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9] 7697; GFX11-NEXT: s_lshr_b64 s[8:9], s[14:15], s18 7698; GFX11-NEXT: s_cmp_lg_u32 s19, 0 7699; GFX11-NEXT: s_cselect_b64 s[4:5], s[4:5], s[8:9] 7700; GFX11-NEXT: s_cmp_lg_u32 s21, 0 7701; GFX11-NEXT: s_cselect_b64 s[4:5], s[12:13], s[4:5] 7702; GFX11-NEXT: s_cmp_lg_u32 s19, 0 7703; GFX11-NEXT: s_cselect_b64 s[8:9], s[16:17], 0 7704; GFX11-NEXT: s_or_b64 s[4:5], s[10:11], s[4:5] 7705; GFX11-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] 7706; GFX11-NEXT: ; return to shader part epilog 7707 %result = call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %amt) 7708 ret <2 x i128> %result 7709} 7710 7711define <2 x i128> @v_fshr_v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %amt) { 7712; GFX6-LABEL: v_fshr_v2i128: 7713; GFX6: ; %bb.0: 7714; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7715; GFX6-NEXT: v_lshl_b64 v[2:3], v[2:3], 1 7716; GFX6-NEXT: v_lshl_b64 v[17:18], v[0:1], 1 7717; GFX6-NEXT: v_lshrrev_b32_e32 v0, 31, v1 7718; GFX6-NEXT: v_or_b32_e32 v2, v2, v0 7719; GFX6-NEXT: v_not_b32_e32 v0, v16 7720; GFX6-NEXT: v_and_b32_e32 v19, 0x7f, v0 7721; GFX6-NEXT: v_sub_i32_e32 v0, vcc, 64, v19 7722; GFX6-NEXT: v_lshr_b64 v[0:1], v[17:18], v0 7723; GFX6-NEXT: v_lshl_b64 v[21:22], v[2:3], v19 7724; GFX6-NEXT: v_and_b32_e32 v25, 0x7f, v16 7725; GFX6-NEXT: v_or_b32_e32 v23, v0, v21 7726; GFX6-NEXT: v_sub_i32_e32 v0, vcc, 64, v25 7727; GFX6-NEXT: v_or_b32_e32 v24, v1, v22 7728; GFX6-NEXT: v_lshl_b64 v[0:1], v[10:11], v0 7729; GFX6-NEXT: v_lshr_b64 v[21:22], v[8:9], v25 7730; GFX6-NEXT: v_not_b32_e32 v26, 63 7731; GFX6-NEXT: v_or_b32_e32 v21, v21, v0 7732; GFX6-NEXT: v_add_i32_e32 v0, vcc, v19, v26 7733; GFX6-NEXT: v_or_b32_e32 v22, v22, v1 7734; GFX6-NEXT: v_lshl_b64 v[0:1], v[17:18], v0 7735; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v19 7736; GFX6-NEXT: v_cndmask_b32_e32 v0, v0, v23, vcc 7737; GFX6-NEXT: v_cndmask_b32_e32 v1, v1, v24, vcc 7738; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v19 7739; GFX6-NEXT: v_cndmask_b32_e64 v2, v0, v2, s[4:5] 7740; GFX6-NEXT: v_cndmask_b32_e64 v3, v1, v3, s[4:5] 7741; GFX6-NEXT: v_add_i32_e64 v0, s[4:5], v25, v26 7742; GFX6-NEXT: v_lshl_b64 v[16:17], v[17:18], v19 7743; GFX6-NEXT: v_lshr_b64 v[0:1], v[10:11], v0 7744; GFX6-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v25 7745; GFX6-NEXT: v_cndmask_b32_e32 v16, 0, v16, vcc 7746; GFX6-NEXT: v_cndmask_b32_e64 v18, v0, v21, s[4:5] 7747; GFX6-NEXT: v_cndmask_b32_e64 v19, v1, v22, s[4:5] 7748; GFX6-NEXT: v_cndmask_b32_e32 v17, 0, v17, vcc 7749; GFX6-NEXT: v_lshr_b64 v[0:1], v[10:11], v25 7750; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v25 7751; GFX6-NEXT: v_cndmask_b32_e32 v8, v18, v8, vcc 7752; GFX6-NEXT: v_cndmask_b32_e32 v9, v19, v9, vcc 7753; GFX6-NEXT: v_lshl_b64 v[6:7], v[6:7], 1 7754; GFX6-NEXT: v_cndmask_b32_e64 v10, 0, v0, s[4:5] 7755; GFX6-NEXT: v_cndmask_b32_e64 v18, 0, v1, s[4:5] 7756; GFX6-NEXT: v_or_b32_e32 v0, v16, v8 7757; GFX6-NEXT: v_or_b32_e32 v1, v17, v9 7758; GFX6-NEXT: v_lshl_b64 v[8:9], v[4:5], 1 7759; GFX6-NEXT: v_lshrrev_b32_e32 v4, 31, v5 7760; GFX6-NEXT: v_or_b32_e32 v6, v6, v4 7761; GFX6-NEXT: v_not_b32_e32 v4, v20 7762; GFX6-NEXT: v_and_b32_e32 v16, 0x7f, v4 7763; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 64, v16 7764; GFX6-NEXT: v_or_b32_e32 v2, v2, v10 7765; GFX6-NEXT: v_lshr_b64 v[4:5], v[8:9], v4 7766; GFX6-NEXT: v_lshl_b64 v[10:11], v[6:7], v16 7767; GFX6-NEXT: v_add_i32_e32 v17, vcc, v16, v26 7768; GFX6-NEXT: v_or_b32_e32 v10, v4, v10 7769; GFX6-NEXT: v_or_b32_e32 v11, v5, v11 7770; GFX6-NEXT: v_lshl_b64 v[4:5], v[8:9], v16 7771; GFX6-NEXT: v_lshl_b64 v[8:9], v[8:9], v17 7772; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v16 7773; GFX6-NEXT: v_or_b32_e32 v3, v3, v18 7774; GFX6-NEXT: v_cndmask_b32_e32 v17, 0, v4, vcc 7775; GFX6-NEXT: v_cndmask_b32_e32 v18, 0, v5, vcc 7776; GFX6-NEXT: v_cndmask_b32_e32 v4, v8, v10, vcc 7777; GFX6-NEXT: v_cndmask_b32_e32 v5, v9, v11, vcc 7778; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v16 7779; GFX6-NEXT: v_and_b32_e32 v10, 0x7f, v20 7780; GFX6-NEXT: v_cndmask_b32_e32 v8, v4, v6, vcc 7781; GFX6-NEXT: v_cndmask_b32_e32 v9, v5, v7, vcc 7782; GFX6-NEXT: v_sub_i32_e32 v6, vcc, 64, v10 7783; GFX6-NEXT: v_lshr_b64 v[4:5], v[12:13], v10 7784; GFX6-NEXT: v_lshl_b64 v[6:7], v[14:15], v6 7785; GFX6-NEXT: v_add_i32_e32 v11, vcc, v10, v26 7786; GFX6-NEXT: v_or_b32_e32 v16, v4, v6 7787; GFX6-NEXT: v_or_b32_e32 v19, v5, v7 7788; GFX6-NEXT: v_lshr_b64 v[6:7], v[14:15], v11 7789; GFX6-NEXT: v_lshr_b64 v[4:5], v[14:15], v10 7790; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v10 7791; GFX6-NEXT: v_cndmask_b32_e32 v6, v6, v16, vcc 7792; GFX6-NEXT: v_cndmask_b32_e32 v7, v7, v19, vcc 7793; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v10 7794; GFX6-NEXT: v_cndmask_b32_e64 v6, v6, v12, s[4:5] 7795; GFX6-NEXT: v_cndmask_b32_e64 v7, v7, v13, s[4:5] 7796; GFX6-NEXT: v_cndmask_b32_e32 v10, 0, v4, vcc 7797; GFX6-NEXT: v_cndmask_b32_e32 v11, 0, v5, vcc 7798; GFX6-NEXT: v_or_b32_e32 v4, v17, v6 7799; GFX6-NEXT: v_or_b32_e32 v5, v18, v7 7800; GFX6-NEXT: v_or_b32_e32 v6, v8, v10 7801; GFX6-NEXT: v_or_b32_e32 v7, v9, v11 7802; GFX6-NEXT: s_setpc_b64 s[30:31] 7803; 7804; GFX8-LABEL: v_fshr_v2i128: 7805; GFX8: ; %bb.0: 7806; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7807; GFX8-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 7808; GFX8-NEXT: v_lshlrev_b64 v[17:18], 1, v[0:1] 7809; GFX8-NEXT: v_lshrrev_b32_e32 v0, 31, v1 7810; GFX8-NEXT: v_or_b32_e32 v2, v2, v0 7811; GFX8-NEXT: v_not_b32_e32 v0, v16 7812; GFX8-NEXT: v_and_b32_e32 v19, 0x7f, v0 7813; GFX8-NEXT: v_sub_u32_e32 v0, vcc, 64, v19 7814; GFX8-NEXT: v_lshrrev_b64 v[0:1], v0, v[17:18] 7815; GFX8-NEXT: v_lshlrev_b64 v[21:22], v19, v[2:3] 7816; GFX8-NEXT: v_and_b32_e32 v25, 0x7f, v16 7817; GFX8-NEXT: v_or_b32_e32 v23, v0, v21 7818; GFX8-NEXT: v_sub_u32_e32 v0, vcc, 64, v25 7819; GFX8-NEXT: v_or_b32_e32 v24, v1, v22 7820; GFX8-NEXT: v_lshlrev_b64 v[0:1], v0, v[10:11] 7821; GFX8-NEXT: v_lshrrev_b64 v[21:22], v25, v[8:9] 7822; GFX8-NEXT: v_not_b32_e32 v26, 63 7823; GFX8-NEXT: v_or_b32_e32 v21, v21, v0 7824; GFX8-NEXT: v_add_u32_e32 v0, vcc, v19, v26 7825; GFX8-NEXT: v_or_b32_e32 v22, v22, v1 7826; GFX8-NEXT: v_lshlrev_b64 v[0:1], v0, v[17:18] 7827; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v19 7828; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v23, vcc 7829; GFX8-NEXT: v_cndmask_b32_e32 v1, v1, v24, vcc 7830; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v19 7831; GFX8-NEXT: v_cndmask_b32_e64 v2, v0, v2, s[4:5] 7832; GFX8-NEXT: v_cndmask_b32_e64 v3, v1, v3, s[4:5] 7833; GFX8-NEXT: v_add_u32_e64 v0, s[4:5], v25, v26 7834; GFX8-NEXT: v_lshlrev_b64 v[16:17], v19, v[17:18] 7835; GFX8-NEXT: v_lshrrev_b64 v[0:1], v0, v[10:11] 7836; GFX8-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v25 7837; GFX8-NEXT: v_cndmask_b32_e32 v16, 0, v16, vcc 7838; GFX8-NEXT: v_cndmask_b32_e64 v18, v0, v21, s[4:5] 7839; GFX8-NEXT: v_cndmask_b32_e64 v19, v1, v22, s[4:5] 7840; GFX8-NEXT: v_cndmask_b32_e32 v17, 0, v17, vcc 7841; GFX8-NEXT: v_lshrrev_b64 v[0:1], v25, v[10:11] 7842; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v25 7843; GFX8-NEXT: v_cndmask_b32_e32 v8, v18, v8, vcc 7844; GFX8-NEXT: v_cndmask_b32_e32 v9, v19, v9, vcc 7845; GFX8-NEXT: v_lshlrev_b64 v[6:7], 1, v[6:7] 7846; GFX8-NEXT: v_cndmask_b32_e64 v10, 0, v0, s[4:5] 7847; GFX8-NEXT: v_cndmask_b32_e64 v18, 0, v1, s[4:5] 7848; GFX8-NEXT: v_or_b32_e32 v0, v16, v8 7849; GFX8-NEXT: v_or_b32_e32 v1, v17, v9 7850; GFX8-NEXT: v_lshlrev_b64 v[8:9], 1, v[4:5] 7851; GFX8-NEXT: v_lshrrev_b32_e32 v4, 31, v5 7852; GFX8-NEXT: v_or_b32_e32 v6, v6, v4 7853; GFX8-NEXT: v_not_b32_e32 v4, v20 7854; GFX8-NEXT: v_and_b32_e32 v16, 0x7f, v4 7855; GFX8-NEXT: v_sub_u32_e32 v4, vcc, 64, v16 7856; GFX8-NEXT: v_or_b32_e32 v2, v2, v10 7857; GFX8-NEXT: v_lshrrev_b64 v[4:5], v4, v[8:9] 7858; GFX8-NEXT: v_lshlrev_b64 v[10:11], v16, v[6:7] 7859; GFX8-NEXT: v_add_u32_e32 v17, vcc, v16, v26 7860; GFX8-NEXT: v_or_b32_e32 v10, v4, v10 7861; GFX8-NEXT: v_or_b32_e32 v11, v5, v11 7862; GFX8-NEXT: v_lshlrev_b64 v[4:5], v16, v[8:9] 7863; GFX8-NEXT: v_lshlrev_b64 v[8:9], v17, v[8:9] 7864; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v16 7865; GFX8-NEXT: v_or_b32_e32 v3, v3, v18 7866; GFX8-NEXT: v_cndmask_b32_e32 v17, 0, v4, vcc 7867; GFX8-NEXT: v_cndmask_b32_e32 v18, 0, v5, vcc 7868; GFX8-NEXT: v_cndmask_b32_e32 v4, v8, v10, vcc 7869; GFX8-NEXT: v_cndmask_b32_e32 v5, v9, v11, vcc 7870; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v16 7871; GFX8-NEXT: v_and_b32_e32 v10, 0x7f, v20 7872; GFX8-NEXT: v_cndmask_b32_e32 v8, v4, v6, vcc 7873; GFX8-NEXT: v_cndmask_b32_e32 v9, v5, v7, vcc 7874; GFX8-NEXT: v_sub_u32_e32 v6, vcc, 64, v10 7875; GFX8-NEXT: v_lshrrev_b64 v[4:5], v10, v[12:13] 7876; GFX8-NEXT: v_lshlrev_b64 v[6:7], v6, v[14:15] 7877; GFX8-NEXT: v_add_u32_e32 v11, vcc, v10, v26 7878; GFX8-NEXT: v_or_b32_e32 v16, v4, v6 7879; GFX8-NEXT: v_or_b32_e32 v19, v5, v7 7880; GFX8-NEXT: v_lshrrev_b64 v[6:7], v11, v[14:15] 7881; GFX8-NEXT: v_lshrrev_b64 v[4:5], v10, v[14:15] 7882; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v10 7883; GFX8-NEXT: v_cndmask_b32_e32 v6, v6, v16, vcc 7884; GFX8-NEXT: v_cndmask_b32_e32 v7, v7, v19, vcc 7885; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v10 7886; GFX8-NEXT: v_cndmask_b32_e64 v6, v6, v12, s[4:5] 7887; GFX8-NEXT: v_cndmask_b32_e64 v7, v7, v13, s[4:5] 7888; GFX8-NEXT: v_cndmask_b32_e32 v10, 0, v4, vcc 7889; GFX8-NEXT: v_cndmask_b32_e32 v11, 0, v5, vcc 7890; GFX8-NEXT: v_or_b32_e32 v4, v17, v6 7891; GFX8-NEXT: v_or_b32_e32 v5, v18, v7 7892; GFX8-NEXT: v_or_b32_e32 v6, v8, v10 7893; GFX8-NEXT: v_or_b32_e32 v7, v9, v11 7894; GFX8-NEXT: s_setpc_b64 s[30:31] 7895; 7896; GFX9-LABEL: v_fshr_v2i128: 7897; GFX9: ; %bb.0: 7898; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7899; GFX9-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 7900; GFX9-NEXT: v_lshlrev_b64 v[17:18], 1, v[0:1] 7901; GFX9-NEXT: v_lshrrev_b32_e32 v0, 31, v1 7902; GFX9-NEXT: v_or_b32_e32 v2, v2, v0 7903; GFX9-NEXT: v_not_b32_e32 v0, v16 7904; GFX9-NEXT: v_and_b32_e32 v19, 0x7f, v0 7905; GFX9-NEXT: v_sub_u32_e32 v0, 64, v19 7906; GFX9-NEXT: v_lshrrev_b64 v[0:1], v0, v[17:18] 7907; GFX9-NEXT: v_lshlrev_b64 v[21:22], v19, v[2:3] 7908; GFX9-NEXT: v_and_b32_e32 v25, 0x7f, v16 7909; GFX9-NEXT: v_or_b32_e32 v23, v0, v21 7910; GFX9-NEXT: v_sub_u32_e32 v0, 64, v25 7911; GFX9-NEXT: v_or_b32_e32 v24, v1, v22 7912; GFX9-NEXT: v_lshlrev_b64 v[0:1], v0, v[10:11] 7913; GFX9-NEXT: v_lshrrev_b64 v[21:22], v25, v[8:9] 7914; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v19 7915; GFX9-NEXT: v_or_b32_e32 v21, v21, v0 7916; GFX9-NEXT: v_add_u32_e32 v0, 0xffffffc0, v19 7917; GFX9-NEXT: v_or_b32_e32 v22, v22, v1 7918; GFX9-NEXT: v_lshlrev_b64 v[0:1], v0, v[17:18] 7919; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v19 7920; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v23, vcc 7921; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v24, vcc 7922; GFX9-NEXT: v_cndmask_b32_e64 v2, v0, v2, s[4:5] 7923; GFX9-NEXT: v_add_u32_e32 v0, 0xffffffc0, v25 7924; GFX9-NEXT: v_lshlrev_b64 v[16:17], v19, v[17:18] 7925; GFX9-NEXT: v_cndmask_b32_e64 v3, v1, v3, s[4:5] 7926; GFX9-NEXT: v_lshrrev_b64 v[0:1], v0, v[10:11] 7927; GFX9-NEXT: v_cmp_gt_u32_e64 s[4:5], 64, v25 7928; GFX9-NEXT: v_cndmask_b32_e32 v16, 0, v16, vcc 7929; GFX9-NEXT: v_cndmask_b32_e64 v18, v0, v21, s[4:5] 7930; GFX9-NEXT: v_cndmask_b32_e64 v19, v1, v22, s[4:5] 7931; GFX9-NEXT: v_cndmask_b32_e32 v17, 0, v17, vcc 7932; GFX9-NEXT: v_lshrrev_b64 v[0:1], v25, v[10:11] 7933; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v25 7934; GFX9-NEXT: v_cndmask_b32_e32 v8, v18, v8, vcc 7935; GFX9-NEXT: v_cndmask_b32_e32 v9, v19, v9, vcc 7936; GFX9-NEXT: v_lshlrev_b64 v[6:7], 1, v[6:7] 7937; GFX9-NEXT: v_cndmask_b32_e64 v10, 0, v0, s[4:5] 7938; GFX9-NEXT: v_cndmask_b32_e64 v11, 0, v1, s[4:5] 7939; GFX9-NEXT: v_or_b32_e32 v0, v16, v8 7940; GFX9-NEXT: v_or_b32_e32 v1, v17, v9 7941; GFX9-NEXT: v_lshlrev_b64 v[8:9], 1, v[4:5] 7942; GFX9-NEXT: v_lshrrev_b32_e32 v4, 31, v5 7943; GFX9-NEXT: v_or_b32_e32 v6, v6, v4 7944; GFX9-NEXT: v_not_b32_e32 v4, v20 7945; GFX9-NEXT: v_and_b32_e32 v16, 0x7f, v4 7946; GFX9-NEXT: v_sub_u32_e32 v4, 64, v16 7947; GFX9-NEXT: v_or_b32_e32 v2, v2, v10 7948; GFX9-NEXT: v_or_b32_e32 v3, v3, v11 7949; GFX9-NEXT: v_lshrrev_b64 v[4:5], v4, v[8:9] 7950; GFX9-NEXT: v_lshlrev_b64 v[10:11], v16, v[6:7] 7951; GFX9-NEXT: v_add_u32_e32 v17, 0xffffffc0, v16 7952; GFX9-NEXT: v_or_b32_e32 v10, v4, v10 7953; GFX9-NEXT: v_or_b32_e32 v11, v5, v11 7954; GFX9-NEXT: v_lshlrev_b64 v[4:5], v16, v[8:9] 7955; GFX9-NEXT: v_lshlrev_b64 v[8:9], v17, v[8:9] 7956; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v16 7957; GFX9-NEXT: v_cndmask_b32_e32 v17, 0, v4, vcc 7958; GFX9-NEXT: v_cndmask_b32_e32 v18, 0, v5, vcc 7959; GFX9-NEXT: v_cndmask_b32_e32 v4, v8, v10, vcc 7960; GFX9-NEXT: v_cndmask_b32_e32 v5, v9, v11, vcc 7961; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v16 7962; GFX9-NEXT: v_and_b32_e32 v10, 0x7f, v20 7963; GFX9-NEXT: v_cndmask_b32_e32 v8, v4, v6, vcc 7964; GFX9-NEXT: v_sub_u32_e32 v6, 64, v10 7965; GFX9-NEXT: v_cndmask_b32_e32 v9, v5, v7, vcc 7966; GFX9-NEXT: v_lshrrev_b64 v[4:5], v10, v[12:13] 7967; GFX9-NEXT: v_lshlrev_b64 v[6:7], v6, v[14:15] 7968; GFX9-NEXT: v_add_u32_e32 v11, 0xffffffc0, v10 7969; GFX9-NEXT: v_or_b32_e32 v16, v4, v6 7970; GFX9-NEXT: v_or_b32_e32 v19, v5, v7 7971; GFX9-NEXT: v_lshrrev_b64 v[6:7], v11, v[14:15] 7972; GFX9-NEXT: v_lshrrev_b64 v[4:5], v10, v[14:15] 7973; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v10 7974; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v16, vcc 7975; GFX9-NEXT: v_cndmask_b32_e32 v7, v7, v19, vcc 7976; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v10 7977; GFX9-NEXT: v_cndmask_b32_e64 v6, v6, v12, s[4:5] 7978; GFX9-NEXT: v_cndmask_b32_e64 v7, v7, v13, s[4:5] 7979; GFX9-NEXT: v_cndmask_b32_e32 v10, 0, v4, vcc 7980; GFX9-NEXT: v_cndmask_b32_e32 v11, 0, v5, vcc 7981; GFX9-NEXT: v_or_b32_e32 v4, v17, v6 7982; GFX9-NEXT: v_or_b32_e32 v5, v18, v7 7983; GFX9-NEXT: v_or_b32_e32 v6, v8, v10 7984; GFX9-NEXT: v_or_b32_e32 v7, v9, v11 7985; GFX9-NEXT: s_setpc_b64 s[30:31] 7986; 7987; GFX10-LABEL: v_fshr_v2i128: 7988; GFX10: ; %bb.0: 7989; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7990; GFX10-NEXT: v_not_b32_e32 v17, v16 7991; GFX10-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 7992; GFX10-NEXT: v_and_b32_e32 v26, 0x7f, v16 7993; GFX10-NEXT: v_lshlrev_b64 v[6:7], 1, v[6:7] 7994; GFX10-NEXT: v_and_b32_e32 v25, 0x7f, v17 7995; GFX10-NEXT: v_lshrrev_b32_e32 v17, 31, v1 7996; GFX10-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 7997; GFX10-NEXT: v_add_nc_u32_e32 v27, 0xffffffc0, v26 7998; GFX10-NEXT: v_cmp_gt_u32_e64 s4, 64, v26 7999; GFX10-NEXT: v_sub_nc_u32_e32 v18, 64, v25 8000; GFX10-NEXT: v_or_b32_e32 v2, v2, v17 8001; GFX10-NEXT: v_add_nc_u32_e32 v19, 0xffffffc0, v25 8002; GFX10-NEXT: v_lshlrev_b64 v[23:24], v25, v[0:1] 8003; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v25 8004; GFX10-NEXT: v_lshrrev_b64 v[17:18], v18, v[0:1] 8005; GFX10-NEXT: v_lshlrev_b64 v[21:22], v25, v[2:3] 8006; GFX10-NEXT: v_lshlrev_b64 v[0:1], v19, v[0:1] 8007; GFX10-NEXT: v_cndmask_b32_e32 v23, 0, v23, vcc_lo 8008; GFX10-NEXT: v_cndmask_b32_e32 v24, 0, v24, vcc_lo 8009; GFX10-NEXT: v_or_b32_e32 v22, v18, v22 8010; GFX10-NEXT: v_sub_nc_u32_e32 v18, 64, v26 8011; GFX10-NEXT: v_or_b32_e32 v21, v17, v21 8012; GFX10-NEXT: v_lshrrev_b64 v[16:17], v26, v[8:9] 8013; GFX10-NEXT: v_cndmask_b32_e32 v22, v1, v22, vcc_lo 8014; GFX10-NEXT: v_lshlrev_b64 v[18:19], v18, v[10:11] 8015; GFX10-NEXT: v_cndmask_b32_e32 v21, v0, v21, vcc_lo 8016; GFX10-NEXT: v_lshrrev_b64 v[0:1], v27, v[10:11] 8017; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v25 8018; GFX10-NEXT: v_or_b32_e32 v16, v16, v18 8019; GFX10-NEXT: v_or_b32_e32 v17, v17, v19 8020; GFX10-NEXT: v_cndmask_b32_e32 v18, v21, v2, vcc_lo 8021; GFX10-NEXT: v_cndmask_b32_e32 v22, v22, v3, vcc_lo 8022; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v26 8023; GFX10-NEXT: v_cndmask_b32_e64 v0, v0, v16, s4 8024; GFX10-NEXT: v_not_b32_e32 v16, v20 8025; GFX10-NEXT: v_cndmask_b32_e64 v1, v1, v17, s4 8026; GFX10-NEXT: v_lshrrev_b64 v[2:3], v26, v[10:11] 8027; GFX10-NEXT: v_cndmask_b32_e32 v0, v0, v8, vcc_lo 8028; GFX10-NEXT: v_and_b32_e32 v25, 0x7f, v16 8029; GFX10-NEXT: v_lshrrev_b32_e32 v8, 31, v5 8030; GFX10-NEXT: v_lshlrev_b64 v[4:5], 1, v[4:5] 8031; GFX10-NEXT: v_cndmask_b32_e32 v1, v1, v9, vcc_lo 8032; GFX10-NEXT: v_or_b32_e32 v0, v23, v0 8033; GFX10-NEXT: v_sub_nc_u32_e32 v9, 64, v25 8034; GFX10-NEXT: v_or_b32_e32 v6, v6, v8 8035; GFX10-NEXT: v_and_b32_e32 v23, 0x7f, v20 8036; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, v2, s4 8037; GFX10-NEXT: v_cndmask_b32_e64 v26, 0, v3, s4 8038; GFX10-NEXT: v_lshrrev_b64 v[8:9], v9, v[4:5] 8039; GFX10-NEXT: v_lshlrev_b64 v[10:11], v25, v[6:7] 8040; GFX10-NEXT: v_sub_nc_u32_e32 v20, 64, v23 8041; GFX10-NEXT: v_add_nc_u32_e32 v3, 0xffffffc0, v25 8042; GFX10-NEXT: v_or_b32_e32 v2, v18, v2 8043; GFX10-NEXT: v_lshlrev_b64 v[16:17], v25, v[4:5] 8044; GFX10-NEXT: v_lshrrev_b64 v[18:19], v23, v[12:13] 8045; GFX10-NEXT: v_or_b32_e32 v10, v8, v10 8046; GFX10-NEXT: v_add_nc_u32_e32 v8, 0xffffffc0, v23 8047; GFX10-NEXT: v_lshlrev_b64 v[20:21], v20, v[14:15] 8048; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v25 8049; GFX10-NEXT: v_lshlrev_b64 v[3:4], v3, v[4:5] 8050; GFX10-NEXT: v_or_b32_e32 v5, v9, v11 8051; GFX10-NEXT: v_lshrrev_b64 v[8:9], v8, v[14:15] 8052; GFX10-NEXT: v_cmp_gt_u32_e64 s4, 64, v23 8053; GFX10-NEXT: v_cndmask_b32_e32 v11, 0, v16, vcc_lo 8054; GFX10-NEXT: v_or_b32_e32 v16, v18, v20 8055; GFX10-NEXT: v_or_b32_e32 v18, v19, v21 8056; GFX10-NEXT: v_cndmask_b32_e32 v10, v3, v10, vcc_lo 8057; GFX10-NEXT: v_cndmask_b32_e32 v5, v4, v5, vcc_lo 8058; GFX10-NEXT: v_lshrrev_b64 v[3:4], v23, v[14:15] 8059; GFX10-NEXT: v_cndmask_b32_e64 v8, v8, v16, s4 8060; GFX10-NEXT: v_cmp_eq_u32_e64 s5, 0, v23 8061; GFX10-NEXT: v_cmp_eq_u32_e64 s6, 0, v25 8062; GFX10-NEXT: v_cndmask_b32_e64 v9, v9, v18, s4 8063; GFX10-NEXT: v_cndmask_b32_e32 v14, 0, v17, vcc_lo 8064; GFX10-NEXT: v_or_b32_e32 v1, v24, v1 8065; GFX10-NEXT: v_cndmask_b32_e64 v6, v10, v6, s6 8066; GFX10-NEXT: v_cndmask_b32_e64 v7, v5, v7, s6 8067; GFX10-NEXT: v_cndmask_b32_e64 v5, v8, v12, s5 8068; GFX10-NEXT: v_cndmask_b32_e64 v8, v9, v13, s5 8069; GFX10-NEXT: v_cndmask_b32_e64 v9, 0, v3, s4 8070; GFX10-NEXT: v_cndmask_b32_e64 v10, 0, v4, s4 8071; GFX10-NEXT: v_or_b32_e32 v3, v22, v26 8072; GFX10-NEXT: v_or_b32_e32 v4, v11, v5 8073; GFX10-NEXT: v_or_b32_e32 v5, v14, v8 8074; GFX10-NEXT: v_or_b32_e32 v6, v6, v9 8075; GFX10-NEXT: v_or_b32_e32 v7, v7, v10 8076; GFX10-NEXT: s_setpc_b64 s[30:31] 8077; 8078; GFX11-LABEL: v_fshr_v2i128: 8079; GFX11: ; %bb.0: 8080; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8081; GFX11-NEXT: v_not_b32_e32 v17, v16 8082; GFX11-NEXT: v_lshlrev_b64 v[2:3], 1, v[2:3] 8083; GFX11-NEXT: v_lshlrev_b64 v[6:7], 1, v[6:7] 8084; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_3) 8085; GFX11-NEXT: v_and_b32_e32 v25, 0x7f, v17 8086; GFX11-NEXT: v_lshrrev_b32_e32 v17, 31, v1 8087; GFX11-NEXT: v_lshlrev_b64 v[0:1], 1, v[0:1] 8088; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v25 8089; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 8090; GFX11-NEXT: v_or_b32_e32 v2, v2, v17 8091; GFX11-NEXT: v_lshlrev_b64 v[23:24], v25, v[0:1] 8092; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2) 8093; GFX11-NEXT: v_dual_cndmask_b32 v23, 0, v23 :: v_dual_and_b32 v26, 0x7f, v16 8094; GFX11-NEXT: v_cndmask_b32_e32 v24, 0, v24, vcc_lo 8095; GFX11-NEXT: v_sub_nc_u32_e32 v18, 64, v25 8096; GFX11-NEXT: v_lshlrev_b64 v[21:22], v25, v[2:3] 8097; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) 8098; GFX11-NEXT: v_cmp_gt_u32_e64 s0, 64, v26 8099; GFX11-NEXT: v_lshrrev_b64 v[17:18], v18, v[0:1] 8100; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_3) 8101; GFX11-NEXT: v_or_b32_e32 v22, v18, v22 8102; GFX11-NEXT: v_add_nc_u32_e32 v19, 0xffffffc0, v25 8103; GFX11-NEXT: v_or_b32_e32 v21, v17, v21 8104; GFX11-NEXT: v_sub_nc_u32_e32 v18, 64, v26 8105; GFX11-NEXT: v_lshrrev_b64 v[16:17], v26, v[8:9] 8106; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_3) 8107; GFX11-NEXT: v_lshlrev_b64 v[0:1], v19, v[0:1] 8108; GFX11-NEXT: v_lshlrev_b64 v[18:19], v18, v[10:11] 8109; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_4) 8110; GFX11-NEXT: v_dual_cndmask_b32 v22, v1, v22 :: v_dual_cndmask_b32 v21, v0, v21 8111; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v25 8112; GFX11-NEXT: v_add_nc_u32_e32 v27, 0xffffffc0, v26 8113; GFX11-NEXT: v_or_b32_e32 v16, v16, v18 8114; GFX11-NEXT: v_or_b32_e32 v17, v17, v19 8115; GFX11-NEXT: v_cndmask_b32_e32 v22, v22, v3, vcc_lo 8116; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1) 8117; GFX11-NEXT: v_lshrrev_b64 v[0:1], v27, v[10:11] 8118; GFX11-NEXT: v_cndmask_b32_e64 v0, v0, v16, s0 8119; GFX11-NEXT: v_not_b32_e32 v16, v20 8120; GFX11-NEXT: v_cndmask_b32_e32 v18, v21, v2, vcc_lo 8121; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v26 8122; GFX11-NEXT: v_cndmask_b32_e64 v1, v1, v17, s0 8123; GFX11-NEXT: v_lshrrev_b64 v[2:3], v26, v[10:11] 8124; GFX11-NEXT: v_and_b32_e32 v25, 0x7f, v16 8125; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4) 8126; GFX11-NEXT: v_dual_cndmask_b32 v0, v0, v8 :: v_dual_cndmask_b32 v1, v1, v9 8127; GFX11-NEXT: v_lshrrev_b32_e32 v8, 31, v5 8128; GFX11-NEXT: v_lshlrev_b64 v[4:5], 1, v[4:5] 8129; GFX11-NEXT: v_sub_nc_u32_e32 v9, 64, v25 8130; GFX11-NEXT: v_cndmask_b32_e64 v26, 0, v3, s0 8131; GFX11-NEXT: v_add_nc_u32_e32 v3, 0xffffffc0, v25 8132; GFX11-NEXT: v_or_b32_e32 v6, v6, v8 8133; GFX11-NEXT: v_or_b32_e32 v0, v23, v0 8134; GFX11-NEXT: v_lshrrev_b64 v[8:9], v9, v[4:5] 8135; GFX11-NEXT: v_lshlrev_b64 v[16:17], v25, v[4:5] 8136; GFX11-NEXT: v_lshlrev_b64 v[3:4], v3, v[4:5] 8137; GFX11-NEXT: v_lshlrev_b64 v[10:11], v25, v[6:7] 8138; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v25 8139; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, v2, s0 8140; GFX11-NEXT: v_cmp_eq_u32_e64 s2, 0, v25 8141; GFX11-NEXT: v_or_b32_e32 v1, v24, v1 8142; GFX11-NEXT: v_or_b32_e32 v10, v8, v10 8143; GFX11-NEXT: v_and_b32_e32 v23, 0x7f, v20 8144; GFX11-NEXT: v_or_b32_e32 v2, v18, v2 8145; GFX11-NEXT: v_or_b32_e32 v5, v9, v11 8146; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 8147; GFX11-NEXT: v_dual_cndmask_b32 v11, 0, v16 :: v_dual_cndmask_b32 v10, v3, v10 8148; GFX11-NEXT: v_sub_nc_u32_e32 v20, 64, v23 8149; GFX11-NEXT: v_add_nc_u32_e32 v8, 0xffffffc0, v23 8150; GFX11-NEXT: v_lshrrev_b64 v[18:19], v23, v[12:13] 8151; GFX11-NEXT: v_cmp_gt_u32_e64 s0, 64, v23 8152; GFX11-NEXT: v_cndmask_b32_e32 v5, v4, v5, vcc_lo 8153; GFX11-NEXT: v_lshlrev_b64 v[20:21], v20, v[14:15] 8154; GFX11-NEXT: v_lshrrev_b64 v[8:9], v8, v[14:15] 8155; GFX11-NEXT: v_lshrrev_b64 v[3:4], v23, v[14:15] 8156; GFX11-NEXT: v_cndmask_b32_e32 v14, 0, v17, vcc_lo 8157; GFX11-NEXT: v_cmp_eq_u32_e64 s1, 0, v23 8158; GFX11-NEXT: v_cndmask_b32_e64 v6, v10, v6, s2 8159; GFX11-NEXT: v_or_b32_e32 v16, v18, v20 8160; GFX11-NEXT: v_or_b32_e32 v18, v19, v21 8161; GFX11-NEXT: v_cndmask_b32_e64 v7, v5, v7, s2 8162; GFX11-NEXT: v_cndmask_b32_e64 v10, 0, v4, s0 8163; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 8164; GFX11-NEXT: v_cndmask_b32_e64 v8, v8, v16, s0 8165; GFX11-NEXT: v_cndmask_b32_e64 v9, v9, v18, s0 8166; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) 8167; GFX11-NEXT: v_or_b32_e32 v7, v7, v10 8168; GFX11-NEXT: v_cndmask_b32_e64 v5, v8, v12, s1 8169; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_2) | instid1(VALU_DEP_4) 8170; GFX11-NEXT: v_cndmask_b32_e64 v8, v9, v13, s1 8171; GFX11-NEXT: v_cndmask_b32_e64 v9, 0, v3, s0 8172; GFX11-NEXT: v_or_b32_e32 v3, v22, v26 8173; GFX11-NEXT: v_or_b32_e32 v4, v11, v5 8174; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4) 8175; GFX11-NEXT: v_or_b32_e32 v5, v14, v8 8176; GFX11-NEXT: v_or_b32_e32 v6, v6, v9 8177; GFX11-NEXT: s_setpc_b64 s[30:31] 8178 %result = call <2 x i128> @llvm.fshr.v2i128(<2 x i128> %lhs, <2 x i128> %rhs, <2 x i128> %amt) 8179 ret <2 x i128> %result 8180} 8181 8182declare i7 @llvm.fshr.i7(i7, i7, i7) #0 8183declare i8 @llvm.fshr.i8(i8, i8, i8) #0 8184declare <2 x i8> @llvm.fshr.v2i8(<2 x i8>, <2 x i8>, <2 x i8>) #0 8185declare <4 x i8> @llvm.fshr.v4i8(<4 x i8>, <4 x i8>, <4 x i8>) #0 8186 8187declare i16 @llvm.fshr.i16(i16, i16, i16) #0 8188declare <2 x i16> @llvm.fshr.v2i16(<2 x i16>, <2 x i16>, <2 x i16>) #0 8189declare <3 x i16> @llvm.fshr.v3i16(<3 x i16>, <3 x i16>, <3 x i16>) #0 8190declare <4 x i16> @llvm.fshr.v4i16(<4 x i16>, <4 x i16>, <4 x i16>) #0 8191declare <5 x i16> @llvm.fshr.v5i16(<5 x i16>, <5 x i16>, <5 x i16>) #0 8192declare <6 x i16> @llvm.fshr.v6i16(<6 x i16>, <6 x i16>, <6 x i16>) #0 8193declare <8 x i16> @llvm.fshr.v8i16(<8 x i16>, <8 x i16>, <8 x i16>) #0 8194 8195declare i24 @llvm.fshr.i24(i24, i24, i24) #0 8196declare <2 x i24> @llvm.fshr.v2i24(<2 x i24>, <2 x i24>, <2 x i24>) #0 8197 8198declare i32 @llvm.fshr.i32(i32, i32, i32) #0 8199declare <2 x i32> @llvm.fshr.v2i32(<2 x i32>, <2 x i32>, <2 x i32>) #0 8200declare <3 x i32> @llvm.fshr.v3i32(<3 x i32>, <3 x i32>, <3 x i32>) #0 8201declare <4 x i32> @llvm.fshr.v4i32(<4 x i32>, <4 x i32>, <4 x i32>) #0 8202declare <5 x i32> @llvm.fshr.v5i32(<5 x i32>, <5 x i32>, <5 x i32>) #0 8203declare <16 x i32> @llvm.fshr.v16i32(<16 x i32>, <16 x i32>, <16 x i32>) #0 8204 8205declare i48 @llvm.fshr.i48(i48, i48, i48) #0 8206 8207declare i64 @llvm.fshr.i64(i64, i64, i64) #0 8208declare <2 x i64> @llvm.fshr.v2i64(<2 x i64>, <2 x i64>, <2 x i64>) #0 8209 8210declare i128 @llvm.fshr.i128(i128, i128, i128) #0 8211declare <2 x i128> @llvm.fshr.v2i128(<2 x i128>, <2 x i128>, <2 x i128>) #0 8212 8213attributes #0 = { nounwind readnone speculatable willreturn } 8214