1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s 3; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s 4; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s 5; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s 6; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s 7 8define i8 @v_ashr_i8(i8 %value, i8 %amount) { 9; GFX6-LABEL: v_ashr_i8: 10; GFX6: ; %bb.0: 11; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v1 13; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8 14; GFX6-NEXT: v_ashrrev_i32_e32 v0, v1, v0 15; GFX6-NEXT: s_setpc_b64 s[30:31] 16; 17; GFX8-LABEL: v_ashr_i8: 18; GFX8: ; %bb.0: 19; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 20; GFX8-NEXT: v_lshlrev_b16_e32 v0, 8, v0 21; GFX8-NEXT: v_ashrrev_i16_sdwa v0, v1, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_1 22; GFX8-NEXT: s_setpc_b64 s[30:31] 23; 24; GFX9-LABEL: v_ashr_i8: 25; GFX9: ; %bb.0: 26; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 27; GFX9-NEXT: v_ashrrev_i16_sdwa v0, v1, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:BYTE_0 28; GFX9-NEXT: s_setpc_b64 s[30:31] 29; 30; GFX10PLUS-LABEL: v_ashr_i8: 31; GFX10PLUS: ; %bb.0: 32; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 33; GFX10PLUS-NEXT: v_and_b32_e32 v1, 0xff, v1 34; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 8 35; GFX10PLUS-NEXT: v_ashrrev_i16 v0, v1, v0 36; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 37 %result = ashr i8 %value, %amount 38 ret i8 %result 39} 40 41define i8 @v_ashr_i8_7(i8 %value) { 42; GFX6-LABEL: v_ashr_i8_7: 43; GFX6: ; %bb.0: 44; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 45; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8 46; GFX6-NEXT: v_ashrrev_i32_e32 v0, 7, v0 47; GFX6-NEXT: s_setpc_b64 s[30:31] 48; 49; GFX8-LABEL: v_ashr_i8_7: 50; GFX8: ; %bb.0: 51; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 52; GFX8-NEXT: v_lshlrev_b16_e32 v0, 8, v0 53; GFX8-NEXT: v_ashrrev_i16_e32 v0, 15, v0 54; GFX8-NEXT: s_setpc_b64 s[30:31] 55; 56; GFX9-LABEL: v_ashr_i8_7: 57; GFX9: ; %bb.0: 58; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 59; GFX9-NEXT: v_mov_b32_e32 v1, 7 60; GFX9-NEXT: v_ashrrev_i16_sdwa v0, v1, sext(v0) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 61; GFX9-NEXT: s_setpc_b64 s[30:31] 62; 63; GFX10PLUS-LABEL: v_ashr_i8_7: 64; GFX10PLUS: ; %bb.0: 65; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 66; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 8 67; GFX10PLUS-NEXT: v_ashrrev_i16 v0, 7, v0 68; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 69 %result = ashr i8 %value, 7 70 ret i8 %result 71} 72 73define amdgpu_ps i8 @s_ashr_i8(i8 inreg %value, i8 inreg %amount) { 74; GFX6-LABEL: s_ashr_i8: 75; GFX6: ; %bb.0: 76; GFX6-NEXT: s_sext_i32_i8 s0, s0 77; GFX6-NEXT: s_ashr_i32 s0, s0, s1 78; GFX6-NEXT: ; return to shader part epilog 79; 80; GFX8-LABEL: s_ashr_i8: 81; GFX8: ; %bb.0: 82; GFX8-NEXT: s_sext_i32_i8 s0, s0 83; GFX8-NEXT: s_sext_i32_i8 s1, s1 84; GFX8-NEXT: s_ashr_i32 s0, s0, s1 85; GFX8-NEXT: ; return to shader part epilog 86; 87; GFX9-LABEL: s_ashr_i8: 88; GFX9: ; %bb.0: 89; GFX9-NEXT: s_sext_i32_i8 s0, s0 90; GFX9-NEXT: s_sext_i32_i8 s1, s1 91; GFX9-NEXT: s_ashr_i32 s0, s0, s1 92; GFX9-NEXT: ; return to shader part epilog 93; 94; GFX10PLUS-LABEL: s_ashr_i8: 95; GFX10PLUS: ; %bb.0: 96; GFX10PLUS-NEXT: s_sext_i32_i8 s0, s0 97; GFX10PLUS-NEXT: s_sext_i32_i8 s1, s1 98; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, s1 99; GFX10PLUS-NEXT: ; return to shader part epilog 100 %result = ashr i8 %value, %amount 101 ret i8 %result 102} 103 104define amdgpu_ps i8 @s_ashr_i8_7(i8 inreg %value) { 105; GCN-LABEL: s_ashr_i8_7: 106; GCN: ; %bb.0: 107; GCN-NEXT: s_sext_i32_i8 s0, s0 108; GCN-NEXT: s_ashr_i32 s0, s0, 7 109; GCN-NEXT: ; return to shader part epilog 110; 111; GFX10PLUS-LABEL: s_ashr_i8_7: 112; GFX10PLUS: ; %bb.0: 113; GFX10PLUS-NEXT: s_sext_i32_i8 s0, s0 114; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 7 115; GFX10PLUS-NEXT: ; return to shader part epilog 116 %result = ashr i8 %value, 7 117 ret i8 %result 118} 119 120 121define i24 @v_ashr_i24(i24 %value, i24 %amount) { 122; GCN-LABEL: v_ashr_i24: 123; GCN: ; %bb.0: 124; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 125; GCN-NEXT: v_and_b32_e32 v1, 0xffffff, v1 126; GCN-NEXT: v_bfe_i32 v0, v0, 0, 24 127; GCN-NEXT: v_ashrrev_i32_e32 v0, v1, v0 128; GCN-NEXT: s_setpc_b64 s[30:31] 129; 130; GFX10PLUS-LABEL: v_ashr_i24: 131; GFX10PLUS: ; %bb.0: 132; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 133; GFX10PLUS-NEXT: v_and_b32_e32 v1, 0xffffff, v1 134; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 24 135; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, v1, v0 136; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 137 %result = ashr i24 %value, %amount 138 ret i24 %result 139} 140 141define i24 @v_ashr_i24_7(i24 %value) { 142; GCN-LABEL: v_ashr_i24_7: 143; GCN: ; %bb.0: 144; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 145; GCN-NEXT: v_bfe_i32 v0, v0, 0, 24 146; GCN-NEXT: v_ashrrev_i32_e32 v0, 7, v0 147; GCN-NEXT: s_setpc_b64 s[30:31] 148; 149; GFX10PLUS-LABEL: v_ashr_i24_7: 150; GFX10PLUS: ; %bb.0: 151; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 152; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 24 153; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, 7, v0 154; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 155 %result = ashr i24 %value, 7 156 ret i24 %result 157} 158 159define amdgpu_ps i24 @s_ashr_i24(i24 inreg %value, i24 inreg %amount) { 160; GCN-LABEL: s_ashr_i24: 161; GCN: ; %bb.0: 162; GCN-NEXT: s_bfe_i32 s0, s0, 0x180000 163; GCN-NEXT: s_ashr_i32 s0, s0, s1 164; GCN-NEXT: ; return to shader part epilog 165; 166; GFX10PLUS-LABEL: s_ashr_i24: 167; GFX10PLUS: ; %bb.0: 168; GFX10PLUS-NEXT: s_bfe_i32 s0, s0, 0x180000 169; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, s1 170; GFX10PLUS-NEXT: ; return to shader part epilog 171 %result = ashr i24 %value, %amount 172 ret i24 %result 173} 174 175define amdgpu_ps i24 @s_ashr_i24_7(i24 inreg %value) { 176; GCN-LABEL: s_ashr_i24_7: 177; GCN: ; %bb.0: 178; GCN-NEXT: s_bfe_i32 s0, s0, 0x180000 179; GCN-NEXT: s_ashr_i32 s0, s0, 7 180; GCN-NEXT: ; return to shader part epilog 181; 182; GFX10PLUS-LABEL: s_ashr_i24_7: 183; GFX10PLUS: ; %bb.0: 184; GFX10PLUS-NEXT: s_bfe_i32 s0, s0, 0x180000 185; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 7 186; GFX10PLUS-NEXT: ; return to shader part epilog 187 %result = ashr i24 %value, 7 188 ret i24 %result 189} 190 191define i32 @v_ashr_i32(i32 %value, i32 %amount) { 192; GCN-LABEL: v_ashr_i32: 193; GCN: ; %bb.0: 194; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 195; GCN-NEXT: v_ashrrev_i32_e32 v0, v1, v0 196; GCN-NEXT: s_setpc_b64 s[30:31] 197; 198; GFX10PLUS-LABEL: v_ashr_i32: 199; GFX10PLUS: ; %bb.0: 200; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 201; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, v1, v0 202; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 203 %result = ashr i32 %value, %amount 204 ret i32 %result 205} 206 207define i32 @v_ashr_i32_31(i32 %value) { 208; GCN-LABEL: v_ashr_i32_31: 209; GCN: ; %bb.0: 210; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 211; GCN-NEXT: v_ashrrev_i32_e32 v0, 31, v0 212; GCN-NEXT: s_setpc_b64 s[30:31] 213; 214; GFX10PLUS-LABEL: v_ashr_i32_31: 215; GFX10PLUS: ; %bb.0: 216; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 217; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, 31, v0 218; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 219 %result = ashr i32 %value, 31 220 ret i32 %result 221} 222 223define amdgpu_ps i32 @s_ashr_i32(i32 inreg %value, i32 inreg %amount) { 224; GCN-LABEL: s_ashr_i32: 225; GCN: ; %bb.0: 226; GCN-NEXT: s_ashr_i32 s0, s0, s1 227; GCN-NEXT: ; return to shader part epilog 228; 229; GFX10PLUS-LABEL: s_ashr_i32: 230; GFX10PLUS: ; %bb.0: 231; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, s1 232; GFX10PLUS-NEXT: ; return to shader part epilog 233 %result = ashr i32 %value, %amount 234 ret i32 %result 235} 236 237define amdgpu_ps i32 @s_ashr_i32_31(i32 inreg %value) { 238; GCN-LABEL: s_ashr_i32_31: 239; GCN: ; %bb.0: 240; GCN-NEXT: s_ashr_i32 s0, s0, 31 241; GCN-NEXT: ; return to shader part epilog 242; 243; GFX10PLUS-LABEL: s_ashr_i32_31: 244; GFX10PLUS: ; %bb.0: 245; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 31 246; GFX10PLUS-NEXT: ; return to shader part epilog 247 %result = ashr i32 %value, 31 248 ret i32 %result 249} 250 251define amdgpu_ps float @ashr_i32_sv(i32 inreg %value, i32 %amount) { 252; GFX6-LABEL: ashr_i32_sv: 253; GFX6: ; %bb.0: 254; GFX6-NEXT: v_ashr_i32_e32 v0, s0, v0 255; GFX6-NEXT: ; return to shader part epilog 256; 257; GFX8-LABEL: ashr_i32_sv: 258; GFX8: ; %bb.0: 259; GFX8-NEXT: v_ashrrev_i32_e64 v0, v0, s0 260; GFX8-NEXT: ; return to shader part epilog 261; 262; GFX9-LABEL: ashr_i32_sv: 263; GFX9: ; %bb.0: 264; GFX9-NEXT: v_ashrrev_i32_e64 v0, v0, s0 265; GFX9-NEXT: ; return to shader part epilog 266; 267; GFX10PLUS-LABEL: ashr_i32_sv: 268; GFX10PLUS: ; %bb.0: 269; GFX10PLUS-NEXT: v_ashrrev_i32_e64 v0, v0, s0 270; GFX10PLUS-NEXT: ; return to shader part epilog 271 %result = ashr i32 %value, %amount 272 %cast = bitcast i32 %result to float 273 ret float %cast 274} 275 276define amdgpu_ps float @ashr_i32_vs(i32 %value, i32 inreg %amount) { 277; GCN-LABEL: ashr_i32_vs: 278; GCN: ; %bb.0: 279; GCN-NEXT: v_ashrrev_i32_e32 v0, s0, v0 280; GCN-NEXT: ; return to shader part epilog 281; 282; GFX10PLUS-LABEL: ashr_i32_vs: 283; GFX10PLUS: ; %bb.0: 284; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, s0, v0 285; GFX10PLUS-NEXT: ; return to shader part epilog 286 %result = ashr i32 %value, %amount 287 %cast = bitcast i32 %result to float 288 ret float %cast 289} 290 291define <2 x i32> @v_ashr_v2i32(<2 x i32> %value, <2 x i32> %amount) { 292; GCN-LABEL: v_ashr_v2i32: 293; GCN: ; %bb.0: 294; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 295; GCN-NEXT: v_ashrrev_i32_e32 v0, v2, v0 296; GCN-NEXT: v_ashrrev_i32_e32 v1, v3, v1 297; GCN-NEXT: s_setpc_b64 s[30:31] 298; 299; GFX10PLUS-LABEL: v_ashr_v2i32: 300; GFX10PLUS: ; %bb.0: 301; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 302; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, v2, v0 303; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, v3, v1 304; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 305 %result = ashr <2 x i32> %value, %amount 306 ret <2 x i32> %result 307} 308 309define <2 x i32> @v_ashr_v2i32_31(<2 x i32> %value) { 310; GCN-LABEL: v_ashr_v2i32_31: 311; GCN: ; %bb.0: 312; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 313; GCN-NEXT: v_ashrrev_i32_e32 v0, 31, v0 314; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v1 315; GCN-NEXT: s_setpc_b64 s[30:31] 316; 317; GFX10PLUS-LABEL: v_ashr_v2i32_31: 318; GFX10PLUS: ; %bb.0: 319; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 320; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, 31, v0 321; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, 31, v1 322; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 323 %result = ashr <2 x i32> %value, <i32 31, i32 31> 324 ret <2 x i32> %result 325} 326 327define amdgpu_ps <2 x i32> @s_ashr_v2i32(<2 x i32> inreg %value, <2 x i32> inreg %amount) { 328; GCN-LABEL: s_ashr_v2i32: 329; GCN: ; %bb.0: 330; GCN-NEXT: s_ashr_i32 s0, s0, s2 331; GCN-NEXT: s_ashr_i32 s1, s1, s3 332; GCN-NEXT: ; return to shader part epilog 333; 334; GFX10PLUS-LABEL: s_ashr_v2i32: 335; GFX10PLUS: ; %bb.0: 336; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, s2 337; GFX10PLUS-NEXT: s_ashr_i32 s1, s1, s3 338; GFX10PLUS-NEXT: ; return to shader part epilog 339 %result = ashr <2 x i32> %value, %amount 340 ret <2 x i32> %result 341} 342 343define <3 x i32> @v_ashr_v3i32(<3 x i32> %value, <3 x i32> %amount) { 344; GCN-LABEL: v_ashr_v3i32: 345; GCN: ; %bb.0: 346; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 347; GCN-NEXT: v_ashrrev_i32_e32 v0, v3, v0 348; GCN-NEXT: v_ashrrev_i32_e32 v1, v4, v1 349; GCN-NEXT: v_ashrrev_i32_e32 v2, v5, v2 350; GCN-NEXT: s_setpc_b64 s[30:31] 351; 352; GFX10PLUS-LABEL: v_ashr_v3i32: 353; GFX10PLUS: ; %bb.0: 354; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 355; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, v3, v0 356; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, v4, v1 357; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v2, v5, v2 358; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 359 %result = ashr <3 x i32> %value, %amount 360 ret <3 x i32> %result 361} 362 363define amdgpu_ps <3 x i32> @s_ashr_v3i32(<3 x i32> inreg %value, <3 x i32> inreg %amount) { 364; GCN-LABEL: s_ashr_v3i32: 365; GCN: ; %bb.0: 366; GCN-NEXT: s_ashr_i32 s0, s0, s3 367; GCN-NEXT: s_ashr_i32 s1, s1, s4 368; GCN-NEXT: s_ashr_i32 s2, s2, s5 369; GCN-NEXT: ; return to shader part epilog 370; 371; GFX10PLUS-LABEL: s_ashr_v3i32: 372; GFX10PLUS: ; %bb.0: 373; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, s3 374; GFX10PLUS-NEXT: s_ashr_i32 s1, s1, s4 375; GFX10PLUS-NEXT: s_ashr_i32 s2, s2, s5 376; GFX10PLUS-NEXT: ; return to shader part epilog 377 %result = ashr <3 x i32> %value, %amount 378 ret <3 x i32> %result 379} 380 381define <4 x i32> @v_ashr_v4i32(<4 x i32> %value, <4 x i32> %amount) { 382; GCN-LABEL: v_ashr_v4i32: 383; GCN: ; %bb.0: 384; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 385; GCN-NEXT: v_ashrrev_i32_e32 v0, v4, v0 386; GCN-NEXT: v_ashrrev_i32_e32 v1, v5, v1 387; GCN-NEXT: v_ashrrev_i32_e32 v2, v6, v2 388; GCN-NEXT: v_ashrrev_i32_e32 v3, v7, v3 389; GCN-NEXT: s_setpc_b64 s[30:31] 390; 391; GFX10PLUS-LABEL: v_ashr_v4i32: 392; GFX10PLUS: ; %bb.0: 393; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 394; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, v4, v0 395; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, v5, v1 396; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v2, v6, v2 397; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v3, v7, v3 398; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 399 %result = ashr <4 x i32> %value, %amount 400 ret <4 x i32> %result 401} 402 403define amdgpu_ps <4 x i32> @s_ashr_v4i32(<4 x i32> inreg %value, <4 x i32> inreg %amount) { 404; GCN-LABEL: s_ashr_v4i32: 405; GCN: ; %bb.0: 406; GCN-NEXT: s_ashr_i32 s0, s0, s4 407; GCN-NEXT: s_ashr_i32 s1, s1, s5 408; GCN-NEXT: s_ashr_i32 s2, s2, s6 409; GCN-NEXT: s_ashr_i32 s3, s3, s7 410; GCN-NEXT: ; return to shader part epilog 411; 412; GFX10PLUS-LABEL: s_ashr_v4i32: 413; GFX10PLUS: ; %bb.0: 414; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, s4 415; GFX10PLUS-NEXT: s_ashr_i32 s1, s1, s5 416; GFX10PLUS-NEXT: s_ashr_i32 s2, s2, s6 417; GFX10PLUS-NEXT: s_ashr_i32 s3, s3, s7 418; GFX10PLUS-NEXT: ; return to shader part epilog 419 %result = ashr <4 x i32> %value, %amount 420 ret <4 x i32> %result 421} 422 423define <5 x i32> @v_ashr_v5i32(<5 x i32> %value, <5 x i32> %amount) { 424; GCN-LABEL: v_ashr_v5i32: 425; GCN: ; %bb.0: 426; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 427; GCN-NEXT: v_ashrrev_i32_e32 v0, v5, v0 428; GCN-NEXT: v_ashrrev_i32_e32 v1, v6, v1 429; GCN-NEXT: v_ashrrev_i32_e32 v2, v7, v2 430; GCN-NEXT: v_ashrrev_i32_e32 v3, v8, v3 431; GCN-NEXT: v_ashrrev_i32_e32 v4, v9, v4 432; GCN-NEXT: s_setpc_b64 s[30:31] 433; 434; GFX10PLUS-LABEL: v_ashr_v5i32: 435; GFX10PLUS: ; %bb.0: 436; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 437; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, v5, v0 438; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, v6, v1 439; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v2, v7, v2 440; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v3, v8, v3 441; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v4, v9, v4 442; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 443 %result = ashr <5 x i32> %value, %amount 444 ret <5 x i32> %result 445} 446 447define amdgpu_ps <5 x i32> @s_ashr_v5i32(<5 x i32> inreg %value, <5 x i32> inreg %amount) { 448; GCN-LABEL: s_ashr_v5i32: 449; GCN: ; %bb.0: 450; GCN-NEXT: s_ashr_i32 s0, s0, s5 451; GCN-NEXT: s_ashr_i32 s1, s1, s6 452; GCN-NEXT: s_ashr_i32 s2, s2, s7 453; GCN-NEXT: s_ashr_i32 s3, s3, s8 454; GCN-NEXT: s_ashr_i32 s4, s4, s9 455; GCN-NEXT: ; return to shader part epilog 456; 457; GFX10PLUS-LABEL: s_ashr_v5i32: 458; GFX10PLUS: ; %bb.0: 459; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, s5 460; GFX10PLUS-NEXT: s_ashr_i32 s1, s1, s6 461; GFX10PLUS-NEXT: s_ashr_i32 s2, s2, s7 462; GFX10PLUS-NEXT: s_ashr_i32 s3, s3, s8 463; GFX10PLUS-NEXT: s_ashr_i32 s4, s4, s9 464; GFX10PLUS-NEXT: ; return to shader part epilog 465 %result = ashr <5 x i32> %value, %amount 466 ret <5 x i32> %result 467} 468 469define <16 x i32> @v_ashr_v16i32(<16 x i32> %value, <16 x i32> %amount) { 470; GCN-LABEL: v_ashr_v16i32: 471; GCN: ; %bb.0: 472; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 473; GCN-NEXT: v_ashrrev_i32_e32 v0, v16, v0 474; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s32 475; GCN-NEXT: v_ashrrev_i32_e32 v1, v17, v1 476; GCN-NEXT: v_ashrrev_i32_e32 v2, v18, v2 477; GCN-NEXT: v_ashrrev_i32_e32 v3, v19, v3 478; GCN-NEXT: v_ashrrev_i32_e32 v4, v20, v4 479; GCN-NEXT: v_ashrrev_i32_e32 v5, v21, v5 480; GCN-NEXT: v_ashrrev_i32_e32 v6, v22, v6 481; GCN-NEXT: v_ashrrev_i32_e32 v7, v23, v7 482; GCN-NEXT: v_ashrrev_i32_e32 v8, v24, v8 483; GCN-NEXT: v_ashrrev_i32_e32 v9, v25, v9 484; GCN-NEXT: v_ashrrev_i32_e32 v10, v26, v10 485; GCN-NEXT: v_ashrrev_i32_e32 v11, v27, v11 486; GCN-NEXT: v_ashrrev_i32_e32 v12, v28, v12 487; GCN-NEXT: v_ashrrev_i32_e32 v13, v29, v13 488; GCN-NEXT: v_ashrrev_i32_e32 v14, v30, v14 489; GCN-NEXT: s_waitcnt vmcnt(0) 490; GCN-NEXT: v_ashrrev_i32_e32 v15, v16, v15 491; GCN-NEXT: s_setpc_b64 s[30:31] 492; 493; GFX10-LABEL: v_ashr_v16i32: 494; GFX10: ; %bb.0: 495; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 496; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32 497; GFX10-NEXT: v_ashrrev_i32_e32 v0, v16, v0 498; GFX10-NEXT: v_ashrrev_i32_e32 v1, v17, v1 499; GFX10-NEXT: v_ashrrev_i32_e32 v2, v18, v2 500; GFX10-NEXT: v_ashrrev_i32_e32 v3, v19, v3 501; GFX10-NEXT: v_ashrrev_i32_e32 v4, v20, v4 502; GFX10-NEXT: v_ashrrev_i32_e32 v5, v21, v5 503; GFX10-NEXT: v_ashrrev_i32_e32 v6, v22, v6 504; GFX10-NEXT: v_ashrrev_i32_e32 v7, v23, v7 505; GFX10-NEXT: v_ashrrev_i32_e32 v8, v24, v8 506; GFX10-NEXT: v_ashrrev_i32_e32 v9, v25, v9 507; GFX10-NEXT: v_ashrrev_i32_e32 v10, v26, v10 508; GFX10-NEXT: v_ashrrev_i32_e32 v11, v27, v11 509; GFX10-NEXT: v_ashrrev_i32_e32 v12, v28, v12 510; GFX10-NEXT: v_ashrrev_i32_e32 v13, v29, v13 511; GFX10-NEXT: v_ashrrev_i32_e32 v14, v30, v14 512; GFX10-NEXT: s_waitcnt vmcnt(0) 513; GFX10-NEXT: v_ashrrev_i32_e32 v15, v31, v15 514; GFX10-NEXT: s_setpc_b64 s[30:31] 515; 516; GFX11-LABEL: v_ashr_v16i32: 517; GFX11: ; %bb.0: 518; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 519; GFX11-NEXT: scratch_load_b32 v31, off, s32 520; GFX11-NEXT: v_ashrrev_i32_e32 v0, v16, v0 521; GFX11-NEXT: v_ashrrev_i32_e32 v1, v17, v1 522; GFX11-NEXT: v_ashrrev_i32_e32 v2, v18, v2 523; GFX11-NEXT: v_ashrrev_i32_e32 v3, v19, v3 524; GFX11-NEXT: v_ashrrev_i32_e32 v4, v20, v4 525; GFX11-NEXT: v_ashrrev_i32_e32 v5, v21, v5 526; GFX11-NEXT: v_ashrrev_i32_e32 v6, v22, v6 527; GFX11-NEXT: v_ashrrev_i32_e32 v7, v23, v7 528; GFX11-NEXT: v_ashrrev_i32_e32 v8, v24, v8 529; GFX11-NEXT: v_ashrrev_i32_e32 v9, v25, v9 530; GFX11-NEXT: v_ashrrev_i32_e32 v10, v26, v10 531; GFX11-NEXT: v_ashrrev_i32_e32 v11, v27, v11 532; GFX11-NEXT: v_ashrrev_i32_e32 v12, v28, v12 533; GFX11-NEXT: v_ashrrev_i32_e32 v13, v29, v13 534; GFX11-NEXT: v_ashrrev_i32_e32 v14, v30, v14 535; GFX11-NEXT: s_waitcnt vmcnt(0) 536; GFX11-NEXT: v_ashrrev_i32_e32 v15, v31, v15 537; GFX11-NEXT: s_setpc_b64 s[30:31] 538 %result = ashr <16 x i32> %value, %amount 539 ret <16 x i32> %result 540} 541 542define amdgpu_ps <16 x i32> @s_ashr_v16i32(<16 x i32> inreg %value, <16 x i32> inreg %amount) { 543; GCN-LABEL: s_ashr_v16i32: 544; GCN: ; %bb.0: 545; GCN-NEXT: s_ashr_i32 s0, s0, s16 546; GCN-NEXT: s_ashr_i32 s1, s1, s17 547; GCN-NEXT: s_ashr_i32 s2, s2, s18 548; GCN-NEXT: s_ashr_i32 s3, s3, s19 549; GCN-NEXT: s_ashr_i32 s4, s4, s20 550; GCN-NEXT: s_ashr_i32 s5, s5, s21 551; GCN-NEXT: s_ashr_i32 s6, s6, s22 552; GCN-NEXT: s_ashr_i32 s7, s7, s23 553; GCN-NEXT: s_ashr_i32 s8, s8, s24 554; GCN-NEXT: s_ashr_i32 s9, s9, s25 555; GCN-NEXT: s_ashr_i32 s10, s10, s26 556; GCN-NEXT: s_ashr_i32 s11, s11, s27 557; GCN-NEXT: s_ashr_i32 s12, s12, s28 558; GCN-NEXT: s_ashr_i32 s13, s13, s29 559; GCN-NEXT: s_ashr_i32 s14, s14, s30 560; GCN-NEXT: s_ashr_i32 s15, s15, s31 561; GCN-NEXT: ; return to shader part epilog 562; 563; GFX10PLUS-LABEL: s_ashr_v16i32: 564; GFX10PLUS: ; %bb.0: 565; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, s16 566; GFX10PLUS-NEXT: s_ashr_i32 s1, s1, s17 567; GFX10PLUS-NEXT: s_ashr_i32 s2, s2, s18 568; GFX10PLUS-NEXT: s_ashr_i32 s3, s3, s19 569; GFX10PLUS-NEXT: s_ashr_i32 s4, s4, s20 570; GFX10PLUS-NEXT: s_ashr_i32 s5, s5, s21 571; GFX10PLUS-NEXT: s_ashr_i32 s6, s6, s22 572; GFX10PLUS-NEXT: s_ashr_i32 s7, s7, s23 573; GFX10PLUS-NEXT: s_ashr_i32 s8, s8, s24 574; GFX10PLUS-NEXT: s_ashr_i32 s9, s9, s25 575; GFX10PLUS-NEXT: s_ashr_i32 s10, s10, s26 576; GFX10PLUS-NEXT: s_ashr_i32 s11, s11, s27 577; GFX10PLUS-NEXT: s_ashr_i32 s12, s12, s28 578; GFX10PLUS-NEXT: s_ashr_i32 s13, s13, s29 579; GFX10PLUS-NEXT: s_ashr_i32 s14, s14, s30 580; GFX10PLUS-NEXT: s_ashr_i32 s15, s15, s31 581; GFX10PLUS-NEXT: ; return to shader part epilog 582 %result = ashr <16 x i32> %value, %amount 583 ret <16 x i32> %result 584} 585 586define i16 @v_ashr_i16(i16 %value, i16 %amount) { 587; GFX6-LABEL: v_ashr_i16: 588; GFX6: ; %bb.0: 589; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 590; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 591; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16 592; GFX6-NEXT: v_ashrrev_i32_e32 v0, v1, v0 593; GFX6-NEXT: s_setpc_b64 s[30:31] 594; 595; GFX8-LABEL: v_ashr_i16: 596; GFX8: ; %bb.0: 597; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 598; GFX8-NEXT: v_ashrrev_i16_e32 v0, v1, v0 599; GFX8-NEXT: s_setpc_b64 s[30:31] 600; 601; GFX9-LABEL: v_ashr_i16: 602; GFX9: ; %bb.0: 603; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 604; GFX9-NEXT: v_ashrrev_i16_e32 v0, v1, v0 605; GFX9-NEXT: s_setpc_b64 s[30:31] 606; 607; GFX10PLUS-LABEL: v_ashr_i16: 608; GFX10PLUS: ; %bb.0: 609; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 610; GFX10PLUS-NEXT: v_ashrrev_i16 v0, v1, v0 611; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 612 %result = ashr i16 %value, %amount 613 ret i16 %result 614} 615 616define i16 @v_ashr_i16_15(i16 %value) { 617; GFX6-LABEL: v_ashr_i16_15: 618; GFX6: ; %bb.0: 619; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 620; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16 621; GFX6-NEXT: v_ashrrev_i32_e32 v0, 15, v0 622; GFX6-NEXT: s_setpc_b64 s[30:31] 623; 624; GFX8-LABEL: v_ashr_i16_15: 625; GFX8: ; %bb.0: 626; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 627; GFX8-NEXT: v_ashrrev_i16_e32 v0, 15, v0 628; GFX8-NEXT: s_setpc_b64 s[30:31] 629; 630; GFX9-LABEL: v_ashr_i16_15: 631; GFX9: ; %bb.0: 632; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 633; GFX9-NEXT: v_ashrrev_i16_e32 v0, 15, v0 634; GFX9-NEXT: s_setpc_b64 s[30:31] 635; 636; GFX10PLUS-LABEL: v_ashr_i16_15: 637; GFX10PLUS: ; %bb.0: 638; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 639; GFX10PLUS-NEXT: v_ashrrev_i16 v0, 15, v0 640; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 641 %result = ashr i16 %value, 15 642 ret i16 %result 643} 644 645define amdgpu_ps i16 @s_ashr_i16(i16 inreg %value, i16 inreg %amount) { 646; GFX6-LABEL: s_ashr_i16: 647; GFX6: ; %bb.0: 648; GFX6-NEXT: s_sext_i32_i16 s0, s0 649; GFX6-NEXT: s_ashr_i32 s0, s0, s1 650; GFX6-NEXT: ; return to shader part epilog 651; 652; GFX8-LABEL: s_ashr_i16: 653; GFX8: ; %bb.0: 654; GFX8-NEXT: s_sext_i32_i16 s0, s0 655; GFX8-NEXT: s_sext_i32_i16 s1, s1 656; GFX8-NEXT: s_ashr_i32 s0, s0, s1 657; GFX8-NEXT: ; return to shader part epilog 658; 659; GFX9-LABEL: s_ashr_i16: 660; GFX9: ; %bb.0: 661; GFX9-NEXT: s_sext_i32_i16 s0, s0 662; GFX9-NEXT: s_sext_i32_i16 s1, s1 663; GFX9-NEXT: s_ashr_i32 s0, s0, s1 664; GFX9-NEXT: ; return to shader part epilog 665; 666; GFX10PLUS-LABEL: s_ashr_i16: 667; GFX10PLUS: ; %bb.0: 668; GFX10PLUS-NEXT: s_sext_i32_i16 s0, s0 669; GFX10PLUS-NEXT: s_sext_i32_i16 s1, s1 670; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, s1 671; GFX10PLUS-NEXT: ; return to shader part epilog 672 %result = ashr i16 %value, %amount 673 ret i16 %result 674} 675 676define amdgpu_ps i16 @s_ashr_i16_15(i16 inreg %value) { 677; GCN-LABEL: s_ashr_i16_15: 678; GCN: ; %bb.0: 679; GCN-NEXT: s_sext_i32_i16 s0, s0 680; GCN-NEXT: s_ashr_i32 s0, s0, 15 681; GCN-NEXT: ; return to shader part epilog 682; 683; GFX10PLUS-LABEL: s_ashr_i16_15: 684; GFX10PLUS: ; %bb.0: 685; GFX10PLUS-NEXT: s_sext_i32_i16 s0, s0 686; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 15 687; GFX10PLUS-NEXT: ; return to shader part epilog 688 %result = ashr i16 %value, 15 689 ret i16 %result 690} 691 692define amdgpu_ps half @ashr_i16_sv(i16 inreg %value, i16 %amount) { 693; GFX6-LABEL: ashr_i16_sv: 694; GFX6: ; %bb.0: 695; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 696; GFX6-NEXT: s_sext_i32_i16 s0, s0 697; GFX6-NEXT: v_ashr_i32_e32 v0, s0, v0 698; GFX6-NEXT: ; return to shader part epilog 699; 700; GFX8-LABEL: ashr_i16_sv: 701; GFX8: ; %bb.0: 702; GFX8-NEXT: v_ashrrev_i16_e64 v0, v0, s0 703; GFX8-NEXT: ; return to shader part epilog 704; 705; GFX9-LABEL: ashr_i16_sv: 706; GFX9: ; %bb.0: 707; GFX9-NEXT: v_ashrrev_i16_e64 v0, v0, s0 708; GFX9-NEXT: ; return to shader part epilog 709; 710; GFX10PLUS-LABEL: ashr_i16_sv: 711; GFX10PLUS: ; %bb.0: 712; GFX10PLUS-NEXT: v_ashrrev_i16 v0, v0, s0 713; GFX10PLUS-NEXT: ; return to shader part epilog 714 %result = ashr i16 %value, %amount 715 %cast = bitcast i16 %result to half 716 ret half %cast 717} 718 719define amdgpu_ps half @ashr_i16_vs(i16 %value, i16 inreg %amount) { 720; GFX6-LABEL: ashr_i16_vs: 721; GFX6: ; %bb.0: 722; GFX6-NEXT: s_and_b32 s0, s0, 0xffff 723; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16 724; GFX6-NEXT: v_ashrrev_i32_e32 v0, s0, v0 725; GFX6-NEXT: ; return to shader part epilog 726; 727; GFX8-LABEL: ashr_i16_vs: 728; GFX8: ; %bb.0: 729; GFX8-NEXT: v_ashrrev_i16_e32 v0, s0, v0 730; GFX8-NEXT: ; return to shader part epilog 731; 732; GFX9-LABEL: ashr_i16_vs: 733; GFX9: ; %bb.0: 734; GFX9-NEXT: v_ashrrev_i16_e32 v0, s0, v0 735; GFX9-NEXT: ; return to shader part epilog 736; 737; GFX10PLUS-LABEL: ashr_i16_vs: 738; GFX10PLUS: ; %bb.0: 739; GFX10PLUS-NEXT: v_ashrrev_i16 v0, s0, v0 740; GFX10PLUS-NEXT: ; return to shader part epilog 741 %result = ashr i16 %value, %amount 742 %cast = bitcast i16 %result to half 743 ret half %cast 744} 745 746define <2 x i16> @v_ashr_v2i16(<2 x i16> %value, <2 x i16> %amount) { 747; GFX6-LABEL: v_ashr_v2i16: 748; GFX6: ; %bb.0: 749; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 750; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2 751; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16 752; GFX6-NEXT: v_ashrrev_i32_e32 v0, v2, v0 753; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v3 754; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16 755; GFX6-NEXT: v_ashrrev_i32_e32 v1, v2, v1 756; GFX6-NEXT: s_setpc_b64 s[30:31] 757; 758; GFX8-LABEL: v_ashr_v2i16: 759; GFX8: ; %bb.0: 760; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 761; GFX8-NEXT: v_ashrrev_i16_e32 v2, v1, v0 762; GFX8-NEXT: v_ashrrev_i16_sdwa v0, v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 763; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 764; GFX8-NEXT: s_setpc_b64 s[30:31] 765; 766; GFX9-LABEL: v_ashr_v2i16: 767; GFX9: ; %bb.0: 768; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 769; GFX9-NEXT: v_pk_ashrrev_i16 v0, v1, v0 770; GFX9-NEXT: s_setpc_b64 s[30:31] 771; 772; GFX10PLUS-LABEL: v_ashr_v2i16: 773; GFX10PLUS: ; %bb.0: 774; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 775; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v0, v1, v0 776; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 777 %result = ashr <2 x i16> %value, %amount 778 ret <2 x i16> %result 779} 780 781define <2 x i16> @v_ashr_v2i16_15(<2 x i16> %value) { 782; GFX6-LABEL: v_ashr_v2i16_15: 783; GFX6: ; %bb.0: 784; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 785; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16 786; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16 787; GFX6-NEXT: v_ashrrev_i32_e32 v0, 15, v0 788; GFX6-NEXT: v_ashrrev_i32_e32 v1, 15, v1 789; GFX6-NEXT: s_setpc_b64 s[30:31] 790; 791; GFX8-LABEL: v_ashr_v2i16_15: 792; GFX8: ; %bb.0: 793; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 794; GFX8-NEXT: v_mov_b32_e32 v2, 15 795; GFX8-NEXT: v_ashrrev_i16_e32 v1, 15, v0 796; GFX8-NEXT: v_ashrrev_i16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 797; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 798; GFX8-NEXT: s_setpc_b64 s[30:31] 799; 800; GFX9-LABEL: v_ashr_v2i16_15: 801; GFX9: ; %bb.0: 802; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 803; GFX9-NEXT: v_pk_ashrrev_i16 v0, 15, v0 op_sel_hi:[0,1] 804; GFX9-NEXT: s_setpc_b64 s[30:31] 805; 806; GFX10PLUS-LABEL: v_ashr_v2i16_15: 807; GFX10PLUS: ; %bb.0: 808; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 809; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v0, 15, v0 op_sel_hi:[0,1] 810; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 811 %result = ashr <2 x i16> %value, <i16 15, i16 15> 812 ret <2 x i16> %result 813} 814 815define amdgpu_ps i32 @s_ashr_v2i16(<2 x i16> inreg %value, <2 x i16> inreg %amount) { 816; GFX6-LABEL: s_ashr_v2i16: 817; GFX6: ; %bb.0: 818; GFX6-NEXT: s_sext_i32_i16 s1, s1 819; GFX6-NEXT: s_sext_i32_i16 s0, s0 820; GFX6-NEXT: s_ashr_i32 s1, s1, s3 821; GFX6-NEXT: s_ashr_i32 s0, s0, s2 822; GFX6-NEXT: s_and_b32 s1, s1, 0xffff 823; GFX6-NEXT: s_and_b32 s0, s0, 0xffff 824; GFX6-NEXT: s_lshl_b32 s1, s1, 16 825; GFX6-NEXT: s_or_b32 s0, s0, s1 826; GFX6-NEXT: ; return to shader part epilog 827; 828; GFX8-LABEL: s_ashr_v2i16: 829; GFX8: ; %bb.0: 830; GFX8-NEXT: s_sext_i32_i16 s2, s0 831; GFX8-NEXT: s_bfe_i32 s0, s0, 0x100010 832; GFX8-NEXT: s_sext_i32_i16 s3, s1 833; GFX8-NEXT: s_bfe_i32 s1, s1, 0x100010 834; GFX8-NEXT: s_ashr_i32 s2, s2, s3 835; GFX8-NEXT: s_ashr_i32 s0, s0, s1 836; GFX8-NEXT: s_lshl_b32 s0, s0, 16 837; GFX8-NEXT: s_and_b32 s1, s2, 0xffff 838; GFX8-NEXT: s_or_b32 s0, s0, s1 839; GFX8-NEXT: ; return to shader part epilog 840; 841; GFX9-LABEL: s_ashr_v2i16: 842; GFX9: ; %bb.0: 843; GFX9-NEXT: s_sext_i32_i16 s2, s0 844; GFX9-NEXT: s_ashr_i32 s0, s0, 16 845; GFX9-NEXT: s_sext_i32_i16 s3, s1 846; GFX9-NEXT: s_ashr_i32 s1, s1, 16 847; GFX9-NEXT: s_ashr_i32 s2, s2, s3 848; GFX9-NEXT: s_ashr_i32 s0, s0, s1 849; GFX9-NEXT: s_pack_ll_b32_b16 s0, s2, s0 850; GFX9-NEXT: ; return to shader part epilog 851; 852; GFX10PLUS-LABEL: s_ashr_v2i16: 853; GFX10PLUS: ; %bb.0: 854; GFX10PLUS-NEXT: s_sext_i32_i16 s2, s0 855; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 16 856; GFX10PLUS-NEXT: s_sext_i32_i16 s3, s1 857; GFX10PLUS-NEXT: s_ashr_i32 s1, s1, 16 858; GFX10PLUS-NEXT: s_ashr_i32 s2, s2, s3 859; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, s1 860; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s0, s2, s0 861; GFX10PLUS-NEXT: ; return to shader part epilog 862 %result = ashr <2 x i16> %value, %amount 863 %cast = bitcast <2 x i16> %result to i32 864 ret i32 %cast 865} 866 867define amdgpu_ps float @ashr_v2i16_sv(<2 x i16> inreg %value, <2 x i16> %amount) { 868; GFX6-LABEL: ashr_v2i16_sv: 869; GFX6: ; %bb.0: 870; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 871; GFX6-NEXT: s_sext_i32_i16 s0, s0 872; GFX6-NEXT: v_ashr_i32_e32 v0, s0, v0 873; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 874; GFX6-NEXT: s_sext_i32_i16 s0, s1 875; GFX6-NEXT: v_ashr_i32_e32 v1, s0, v1 876; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 877; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 878; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 879; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 880; GFX6-NEXT: ; return to shader part epilog 881; 882; GFX8-LABEL: ashr_v2i16_sv: 883; GFX8: ; %bb.0: 884; GFX8-NEXT: s_lshr_b32 s1, s0, 16 885; GFX8-NEXT: v_mov_b32_e32 v2, s1 886; GFX8-NEXT: v_ashrrev_i16_e64 v1, v0, s0 887; GFX8-NEXT: v_ashrrev_i16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 888; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 889; GFX8-NEXT: ; return to shader part epilog 890; 891; GFX9-LABEL: ashr_v2i16_sv: 892; GFX9: ; %bb.0: 893; GFX9-NEXT: v_pk_ashrrev_i16 v0, v0, s0 894; GFX9-NEXT: ; return to shader part epilog 895; 896; GFX10PLUS-LABEL: ashr_v2i16_sv: 897; GFX10PLUS: ; %bb.0: 898; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v0, v0, s0 899; GFX10PLUS-NEXT: ; return to shader part epilog 900 %result = ashr <2 x i16> %value, %amount 901 %cast = bitcast <2 x i16> %result to float 902 ret float %cast 903} 904 905define amdgpu_ps float @ashr_v2i16_vs(<2 x i16> %value, <2 x i16> inreg %amount) { 906; GFX6-LABEL: ashr_v2i16_vs: 907; GFX6: ; %bb.0: 908; GFX6-NEXT: s_and_b32 s0, s0, 0xffff 909; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16 910; GFX6-NEXT: v_ashrrev_i32_e32 v0, s0, v0 911; GFX6-NEXT: s_and_b32 s0, s1, 0xffff 912; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16 913; GFX6-NEXT: v_ashrrev_i32_e32 v1, s0, v1 914; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 915; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 916; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 917; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 918; GFX6-NEXT: ; return to shader part epilog 919; 920; GFX8-LABEL: ashr_v2i16_vs: 921; GFX8: ; %bb.0: 922; GFX8-NEXT: s_lshr_b32 s1, s0, 16 923; GFX8-NEXT: v_mov_b32_e32 v2, s1 924; GFX8-NEXT: v_ashrrev_i16_e32 v1, s0, v0 925; GFX8-NEXT: v_ashrrev_i16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 926; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 927; GFX8-NEXT: ; return to shader part epilog 928; 929; GFX9-LABEL: ashr_v2i16_vs: 930; GFX9: ; %bb.0: 931; GFX9-NEXT: v_pk_ashrrev_i16 v0, s0, v0 932; GFX9-NEXT: ; return to shader part epilog 933; 934; GFX10PLUS-LABEL: ashr_v2i16_vs: 935; GFX10PLUS: ; %bb.0: 936; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v0, s0, v0 937; GFX10PLUS-NEXT: ; return to shader part epilog 938 %result = ashr <2 x i16> %value, %amount 939 %cast = bitcast <2 x i16> %result to float 940 ret float %cast 941} 942 943; FIXME 944; define <3 x i16> @v_ashr_v3i16(<3 x i16> %value, <3 x i16> %amount) { 945; %result = ashr <3 x i16> %value, %amount 946; ret <3 x i16> %result 947; } 948 949; define amdgpu_ps <3 x i16> @s_ashr_v3i16(<3 x i16> inreg %value, <3 x i16> inreg %amount) { 950; %result = ashr <3 x i16> %value, %amount 951; ret <3 x i16> %result 952; } 953 954define <2 x float> @v_ashr_v4i16(<4 x i16> %value, <4 x i16> %amount) { 955; GFX6-LABEL: v_ashr_v4i16: 956; GFX6: ; %bb.0: 957; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 958; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4 959; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16 960; GFX6-NEXT: v_ashrrev_i32_e32 v0, v4, v0 961; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v5 962; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16 963; GFX6-NEXT: v_ashrrev_i32_e32 v1, v4, v1 964; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v6 965; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 16 966; GFX6-NEXT: v_ashrrev_i32_e32 v2, v4, v2 967; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v7 968; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 16 969; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 970; GFX6-NEXT: v_ashrrev_i32_e32 v3, v4, v3 971; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 972; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 973; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 974; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v2 975; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v3 976; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2 977; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 978; GFX6-NEXT: s_setpc_b64 s[30:31] 979; 980; GFX8-LABEL: v_ashr_v4i16: 981; GFX8: ; %bb.0: 982; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 983; GFX8-NEXT: v_ashrrev_i16_e32 v4, v2, v0 984; GFX8-NEXT: v_ashrrev_i16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 985; GFX8-NEXT: v_ashrrev_i16_e32 v2, v3, v1 986; GFX8-NEXT: v_ashrrev_i16_sdwa v1, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 987; GFX8-NEXT: v_or_b32_e32 v0, v4, v0 988; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 989; GFX8-NEXT: s_setpc_b64 s[30:31] 990; 991; GFX9-LABEL: v_ashr_v4i16: 992; GFX9: ; %bb.0: 993; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 994; GFX9-NEXT: v_pk_ashrrev_i16 v0, v2, v0 995; GFX9-NEXT: v_pk_ashrrev_i16 v1, v3, v1 996; GFX9-NEXT: s_setpc_b64 s[30:31] 997; 998; GFX10PLUS-LABEL: v_ashr_v4i16: 999; GFX10PLUS: ; %bb.0: 1000; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1001; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v0, v2, v0 1002; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v1, v3, v1 1003; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 1004 %result = ashr <4 x i16> %value, %amount 1005 %cast = bitcast <4 x i16> %result to <2 x float> 1006 ret <2 x float> %cast 1007} 1008 1009define amdgpu_ps <2 x i32> @s_ashr_v4i16(<4 x i16> inreg %value, <4 x i16> inreg %amount) { 1010; GFX6-LABEL: s_ashr_v4i16: 1011; GFX6: ; %bb.0: 1012; GFX6-NEXT: s_sext_i32_i16 s1, s1 1013; GFX6-NEXT: s_sext_i32_i16 s0, s0 1014; GFX6-NEXT: s_ashr_i32 s1, s1, s5 1015; GFX6-NEXT: s_ashr_i32 s0, s0, s4 1016; GFX6-NEXT: s_sext_i32_i16 s2, s2 1017; GFX6-NEXT: s_sext_i32_i16 s3, s3 1018; GFX6-NEXT: s_and_b32 s1, s1, 0xffff 1019; GFX6-NEXT: s_ashr_i32 s2, s2, s6 1020; GFX6-NEXT: s_ashr_i32 s3, s3, s7 1021; GFX6-NEXT: s_and_b32 s0, s0, 0xffff 1022; GFX6-NEXT: s_lshl_b32 s1, s1, 16 1023; GFX6-NEXT: s_or_b32 s0, s0, s1 1024; GFX6-NEXT: s_and_b32 s1, s2, 0xffff 1025; GFX6-NEXT: s_and_b32 s2, s3, 0xffff 1026; GFX6-NEXT: s_lshl_b32 s2, s2, 16 1027; GFX6-NEXT: s_or_b32 s1, s1, s2 1028; GFX6-NEXT: ; return to shader part epilog 1029; 1030; GFX8-LABEL: s_ashr_v4i16: 1031; GFX8: ; %bb.0: 1032; GFX8-NEXT: s_sext_i32_i16 s4, s0 1033; GFX8-NEXT: s_bfe_i32 s0, s0, 0x100010 1034; GFX8-NEXT: s_sext_i32_i16 s5, s1 1035; GFX8-NEXT: s_bfe_i32 s1, s1, 0x100010 1036; GFX8-NEXT: s_sext_i32_i16 s6, s2 1037; GFX8-NEXT: s_bfe_i32 s2, s2, 0x100010 1038; GFX8-NEXT: s_sext_i32_i16 s7, s3 1039; GFX8-NEXT: s_bfe_i32 s3, s3, 0x100010 1040; GFX8-NEXT: s_ashr_i32 s4, s4, s6 1041; GFX8-NEXT: s_ashr_i32 s0, s0, s2 1042; GFX8-NEXT: s_ashr_i32 s2, s5, s7 1043; GFX8-NEXT: s_ashr_i32 s1, s1, s3 1044; GFX8-NEXT: s_lshl_b32 s0, s0, 16 1045; GFX8-NEXT: s_and_b32 s3, s4, 0xffff 1046; GFX8-NEXT: s_lshl_b32 s1, s1, 16 1047; GFX8-NEXT: s_and_b32 s2, s2, 0xffff 1048; GFX8-NEXT: s_or_b32 s0, s0, s3 1049; GFX8-NEXT: s_or_b32 s1, s1, s2 1050; GFX8-NEXT: ; return to shader part epilog 1051; 1052; GFX9-LABEL: s_ashr_v4i16: 1053; GFX9: ; %bb.0: 1054; GFX9-NEXT: s_sext_i32_i16 s4, s0 1055; GFX9-NEXT: s_ashr_i32 s0, s0, 16 1056; GFX9-NEXT: s_sext_i32_i16 s5, s2 1057; GFX9-NEXT: s_ashr_i32 s2, s2, 16 1058; GFX9-NEXT: s_ashr_i32 s4, s4, s5 1059; GFX9-NEXT: s_ashr_i32 s0, s0, s2 1060; GFX9-NEXT: s_pack_ll_b32_b16 s0, s4, s0 1061; GFX9-NEXT: s_sext_i32_i16 s2, s1 1062; GFX9-NEXT: s_ashr_i32 s1, s1, 16 1063; GFX9-NEXT: s_sext_i32_i16 s4, s3 1064; GFX9-NEXT: s_ashr_i32 s3, s3, 16 1065; GFX9-NEXT: s_ashr_i32 s2, s2, s4 1066; GFX9-NEXT: s_ashr_i32 s1, s1, s3 1067; GFX9-NEXT: s_pack_ll_b32_b16 s1, s2, s1 1068; GFX9-NEXT: ; return to shader part epilog 1069; 1070; GFX10PLUS-LABEL: s_ashr_v4i16: 1071; GFX10PLUS: ; %bb.0: 1072; GFX10PLUS-NEXT: s_sext_i32_i16 s4, s0 1073; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 16 1074; GFX10PLUS-NEXT: s_sext_i32_i16 s5, s2 1075; GFX10PLUS-NEXT: s_ashr_i32 s2, s2, 16 1076; GFX10PLUS-NEXT: s_ashr_i32 s4, s4, s5 1077; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, s2 1078; GFX10PLUS-NEXT: s_sext_i32_i16 s2, s1 1079; GFX10PLUS-NEXT: s_ashr_i32 s1, s1, 16 1080; GFX10PLUS-NEXT: s_sext_i32_i16 s5, s3 1081; GFX10PLUS-NEXT: s_ashr_i32 s3, s3, 16 1082; GFX10PLUS-NEXT: s_ashr_i32 s2, s2, s5 1083; GFX10PLUS-NEXT: s_ashr_i32 s1, s1, s3 1084; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s0, s4, s0 1085; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s1, s2, s1 1086; GFX10PLUS-NEXT: ; return to shader part epilog 1087 %result = ashr <4 x i16> %value, %amount 1088 %cast = bitcast <4 x i16> %result to <2 x i32> 1089 ret <2 x i32> %cast 1090} 1091 1092; FIXME 1093; define <5 x i16> @v_ashr_v5i16(<5 x i16> %value, <5 x i16> %amount) { 1094; %result = ashr <5 x i16> %value, %amount 1095; ret <5 x i16> %result 1096; } 1097 1098; define amdgpu_ps <5 x i16> @s_ashr_v5i16(<5 x i16> inreg %value, <5 x i16> inreg %amount) { 1099; %result = ashr <5 x i16> %value, %amount 1100; ret <5 x i16> %result 1101; } 1102 1103; define <3 x float> @v_ashr_v6i16(<6 x i16> %value, <6 x i16> %amount) { 1104; %result = ashr <6 x i16> %value, %amount 1105; %cast = bitcast <6 x i16> %result to <3 x float> 1106; ret <3 x float> %cast 1107; } 1108 1109; define amdgpu_ps <3 x i32> @s_ashr_v6i16(<6 x i16> inreg %value, <6 x i16> inreg %amount) { 1110; %result = ashr <6 x i16> %value, %amount 1111; %cast = bitcast <6 x i16> %result to <3 x i32> 1112; ret <3 x i32> %cast 1113; } 1114 1115define <4 x float> @v_ashr_v8i16(<8 x i16> %value, <8 x i16> %amount) { 1116; GFX6-LABEL: v_ashr_v8i16: 1117; GFX6: ; %bb.0: 1118; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1119; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v8 1120; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 16 1121; GFX6-NEXT: v_ashrrev_i32_e32 v0, v8, v0 1122; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v9 1123; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 16 1124; GFX6-NEXT: v_ashrrev_i32_e32 v1, v8, v1 1125; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v10 1126; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 16 1127; GFX6-NEXT: v_ashrrev_i32_e32 v2, v8, v2 1128; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v11 1129; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 16 1130; GFX6-NEXT: v_ashrrev_i32_e32 v3, v8, v3 1131; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v12 1132; GFX6-NEXT: v_bfe_i32 v4, v4, 0, 16 1133; GFX6-NEXT: v_ashrrev_i32_e32 v4, v8, v4 1134; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v13 1135; GFX6-NEXT: v_bfe_i32 v5, v5, 0, 16 1136; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 1137; GFX6-NEXT: v_ashrrev_i32_e32 v5, v8, v5 1138; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v14 1139; GFX6-NEXT: v_bfe_i32 v6, v6, 0, 16 1140; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 1141; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 1142; GFX6-NEXT: v_ashrrev_i32_e32 v6, v8, v6 1143; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v15 1144; GFX6-NEXT: v_bfe_i32 v7, v7, 0, 16 1145; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 1146; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v2 1147; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v3 1148; GFX6-NEXT: v_ashrrev_i32_e32 v7, v8, v7 1149; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2 1150; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v5 1151; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 1152; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v4 1153; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 1154; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v7 1155; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 1156; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v6 1157; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v4 1158; GFX6-NEXT: v_or_b32_e32 v3, v3, v4 1159; GFX6-NEXT: s_setpc_b64 s[30:31] 1160; 1161; GFX8-LABEL: v_ashr_v8i16: 1162; GFX8: ; %bb.0: 1163; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1164; GFX8-NEXT: v_ashrrev_i16_e32 v8, v4, v0 1165; GFX8-NEXT: v_ashrrev_i16_sdwa v0, v4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 1166; GFX8-NEXT: v_ashrrev_i16_e32 v4, v5, v1 1167; GFX8-NEXT: v_ashrrev_i16_sdwa v1, v5, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 1168; GFX8-NEXT: v_or_b32_e32 v1, v4, v1 1169; GFX8-NEXT: v_ashrrev_i16_e32 v4, v6, v2 1170; GFX8-NEXT: v_ashrrev_i16_sdwa v2, v6, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 1171; GFX8-NEXT: v_or_b32_e32 v2, v4, v2 1172; GFX8-NEXT: v_ashrrev_i16_e32 v4, v7, v3 1173; GFX8-NEXT: v_ashrrev_i16_sdwa v3, v7, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 1174; GFX8-NEXT: v_or_b32_e32 v0, v8, v0 1175; GFX8-NEXT: v_or_b32_e32 v3, v4, v3 1176; GFX8-NEXT: s_setpc_b64 s[30:31] 1177; 1178; GFX9-LABEL: v_ashr_v8i16: 1179; GFX9: ; %bb.0: 1180; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1181; GFX9-NEXT: v_pk_ashrrev_i16 v0, v4, v0 1182; GFX9-NEXT: v_pk_ashrrev_i16 v1, v5, v1 1183; GFX9-NEXT: v_pk_ashrrev_i16 v2, v6, v2 1184; GFX9-NEXT: v_pk_ashrrev_i16 v3, v7, v3 1185; GFX9-NEXT: s_setpc_b64 s[30:31] 1186; 1187; GFX10PLUS-LABEL: v_ashr_v8i16: 1188; GFX10PLUS: ; %bb.0: 1189; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1190; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v0, v4, v0 1191; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v1, v5, v1 1192; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v2, v6, v2 1193; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v3, v7, v3 1194; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 1195 %result = ashr <8 x i16> %value, %amount 1196 %cast = bitcast <8 x i16> %result to <4 x float> 1197 ret <4 x float> %cast 1198} 1199 1200define amdgpu_ps <4 x i32> @s_ashr_v8i16(<8 x i16> inreg %value, <8 x i16> inreg %amount) { 1201; GFX6-LABEL: s_ashr_v8i16: 1202; GFX6: ; %bb.0: 1203; GFX6-NEXT: s_sext_i32_i16 s1, s1 1204; GFX6-NEXT: s_sext_i32_i16 s0, s0 1205; GFX6-NEXT: s_ashr_i32 s1, s1, s9 1206; GFX6-NEXT: s_ashr_i32 s0, s0, s8 1207; GFX6-NEXT: s_sext_i32_i16 s2, s2 1208; GFX6-NEXT: s_sext_i32_i16 s3, s3 1209; GFX6-NEXT: s_and_b32 s1, s1, 0xffff 1210; GFX6-NEXT: s_ashr_i32 s2, s2, s10 1211; GFX6-NEXT: s_ashr_i32 s3, s3, s11 1212; GFX6-NEXT: s_sext_i32_i16 s5, s5 1213; GFX6-NEXT: s_and_b32 s0, s0, 0xffff 1214; GFX6-NEXT: s_lshl_b32 s1, s1, 16 1215; GFX6-NEXT: s_sext_i32_i16 s4, s4 1216; GFX6-NEXT: s_ashr_i32 s5, s5, s13 1217; GFX6-NEXT: s_sext_i32_i16 s7, s7 1218; GFX6-NEXT: s_or_b32 s0, s0, s1 1219; GFX6-NEXT: s_and_b32 s1, s2, 0xffff 1220; GFX6-NEXT: s_and_b32 s2, s3, 0xffff 1221; GFX6-NEXT: s_ashr_i32 s4, s4, s12 1222; GFX6-NEXT: s_sext_i32_i16 s6, s6 1223; GFX6-NEXT: s_ashr_i32 s7, s7, s15 1224; GFX6-NEXT: s_lshl_b32 s2, s2, 16 1225; GFX6-NEXT: s_and_b32 s3, s5, 0xffff 1226; GFX6-NEXT: s_ashr_i32 s6, s6, s14 1227; GFX6-NEXT: s_or_b32 s1, s1, s2 1228; GFX6-NEXT: s_and_b32 s2, s4, 0xffff 1229; GFX6-NEXT: s_lshl_b32 s3, s3, 16 1230; GFX6-NEXT: s_and_b32 s4, s7, 0xffff 1231; GFX6-NEXT: s_or_b32 s2, s2, s3 1232; GFX6-NEXT: s_and_b32 s3, s6, 0xffff 1233; GFX6-NEXT: s_lshl_b32 s4, s4, 16 1234; GFX6-NEXT: s_or_b32 s3, s3, s4 1235; GFX6-NEXT: ; return to shader part epilog 1236; 1237; GFX8-LABEL: s_ashr_v8i16: 1238; GFX8: ; %bb.0: 1239; GFX8-NEXT: s_sext_i32_i16 s8, s0 1240; GFX8-NEXT: s_bfe_i32 s0, s0, 0x100010 1241; GFX8-NEXT: s_sext_i32_i16 s9, s1 1242; GFX8-NEXT: s_bfe_i32 s1, s1, 0x100010 1243; GFX8-NEXT: s_sext_i32_i16 s12, s4 1244; GFX8-NEXT: s_bfe_i32 s4, s4, 0x100010 1245; GFX8-NEXT: s_sext_i32_i16 s13, s5 1246; GFX8-NEXT: s_bfe_i32 s5, s5, 0x100010 1247; GFX8-NEXT: s_sext_i32_i16 s10, s2 1248; GFX8-NEXT: s_bfe_i32 s2, s2, 0x100010 1249; GFX8-NEXT: s_sext_i32_i16 s14, s6 1250; GFX8-NEXT: s_bfe_i32 s6, s6, 0x100010 1251; GFX8-NEXT: s_ashr_i32 s0, s0, s4 1252; GFX8-NEXT: s_ashr_i32 s4, s9, s13 1253; GFX8-NEXT: s_ashr_i32 s1, s1, s5 1254; GFX8-NEXT: s_sext_i32_i16 s11, s3 1255; GFX8-NEXT: s_bfe_i32 s3, s3, 0x100010 1256; GFX8-NEXT: s_sext_i32_i16 s15, s7 1257; GFX8-NEXT: s_bfe_i32 s7, s7, 0x100010 1258; GFX8-NEXT: s_ashr_i32 s5, s10, s14 1259; GFX8-NEXT: s_ashr_i32 s2, s2, s6 1260; GFX8-NEXT: s_lshl_b32 s1, s1, 16 1261; GFX8-NEXT: s_and_b32 s4, s4, 0xffff 1262; GFX8-NEXT: s_ashr_i32 s8, s8, s12 1263; GFX8-NEXT: s_ashr_i32 s6, s11, s15 1264; GFX8-NEXT: s_ashr_i32 s3, s3, s7 1265; GFX8-NEXT: s_or_b32 s1, s1, s4 1266; GFX8-NEXT: s_lshl_b32 s2, s2, 16 1267; GFX8-NEXT: s_and_b32 s4, s5, 0xffff 1268; GFX8-NEXT: s_lshl_b32 s0, s0, 16 1269; GFX8-NEXT: s_and_b32 s7, s8, 0xffff 1270; GFX8-NEXT: s_or_b32 s2, s2, s4 1271; GFX8-NEXT: s_lshl_b32 s3, s3, 16 1272; GFX8-NEXT: s_and_b32 s4, s6, 0xffff 1273; GFX8-NEXT: s_or_b32 s0, s0, s7 1274; GFX8-NEXT: s_or_b32 s3, s3, s4 1275; GFX8-NEXT: ; return to shader part epilog 1276; 1277; GFX9-LABEL: s_ashr_v8i16: 1278; GFX9: ; %bb.0: 1279; GFX9-NEXT: s_sext_i32_i16 s8, s0 1280; GFX9-NEXT: s_ashr_i32 s0, s0, 16 1281; GFX9-NEXT: s_sext_i32_i16 s9, s4 1282; GFX9-NEXT: s_ashr_i32 s4, s4, 16 1283; GFX9-NEXT: s_ashr_i32 s8, s8, s9 1284; GFX9-NEXT: s_ashr_i32 s0, s0, s4 1285; GFX9-NEXT: s_pack_ll_b32_b16 s0, s8, s0 1286; GFX9-NEXT: s_sext_i32_i16 s4, s1 1287; GFX9-NEXT: s_ashr_i32 s1, s1, 16 1288; GFX9-NEXT: s_sext_i32_i16 s8, s5 1289; GFX9-NEXT: s_ashr_i32 s5, s5, 16 1290; GFX9-NEXT: s_ashr_i32 s4, s4, s8 1291; GFX9-NEXT: s_ashr_i32 s1, s1, s5 1292; GFX9-NEXT: s_pack_ll_b32_b16 s1, s4, s1 1293; GFX9-NEXT: s_sext_i32_i16 s4, s2 1294; GFX9-NEXT: s_ashr_i32 s2, s2, 16 1295; GFX9-NEXT: s_sext_i32_i16 s5, s6 1296; GFX9-NEXT: s_ashr_i32 s6, s6, 16 1297; GFX9-NEXT: s_ashr_i32 s4, s4, s5 1298; GFX9-NEXT: s_ashr_i32 s2, s2, s6 1299; GFX9-NEXT: s_pack_ll_b32_b16 s2, s4, s2 1300; GFX9-NEXT: s_sext_i32_i16 s4, s3 1301; GFX9-NEXT: s_ashr_i32 s3, s3, 16 1302; GFX9-NEXT: s_sext_i32_i16 s5, s7 1303; GFX9-NEXT: s_ashr_i32 s6, s7, 16 1304; GFX9-NEXT: s_ashr_i32 s4, s4, s5 1305; GFX9-NEXT: s_ashr_i32 s3, s3, s6 1306; GFX9-NEXT: s_pack_ll_b32_b16 s3, s4, s3 1307; GFX9-NEXT: ; return to shader part epilog 1308; 1309; GFX10PLUS-LABEL: s_ashr_v8i16: 1310; GFX10PLUS: ; %bb.0: 1311; GFX10PLUS-NEXT: s_sext_i32_i16 s8, s0 1312; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 16 1313; GFX10PLUS-NEXT: s_sext_i32_i16 s9, s4 1314; GFX10PLUS-NEXT: s_ashr_i32 s4, s4, 16 1315; GFX10PLUS-NEXT: s_ashr_i32 s8, s8, s9 1316; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, s4 1317; GFX10PLUS-NEXT: s_sext_i32_i16 s4, s1 1318; GFX10PLUS-NEXT: s_ashr_i32 s1, s1, 16 1319; GFX10PLUS-NEXT: s_sext_i32_i16 s9, s5 1320; GFX10PLUS-NEXT: s_ashr_i32 s5, s5, 16 1321; GFX10PLUS-NEXT: s_ashr_i32 s4, s4, s9 1322; GFX10PLUS-NEXT: s_ashr_i32 s1, s1, s5 1323; GFX10PLUS-NEXT: s_sext_i32_i16 s5, s6 1324; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s1, s4, s1 1325; GFX10PLUS-NEXT: s_sext_i32_i16 s4, s2 1326; GFX10PLUS-NEXT: s_ashr_i32 s2, s2, 16 1327; GFX10PLUS-NEXT: s_ashr_i32 s6, s6, 16 1328; GFX10PLUS-NEXT: s_ashr_i32 s4, s4, s5 1329; GFX10PLUS-NEXT: s_ashr_i32 s2, s2, s6 1330; GFX10PLUS-NEXT: s_sext_i32_i16 s5, s3 1331; GFX10PLUS-NEXT: s_ashr_i32 s3, s3, 16 1332; GFX10PLUS-NEXT: s_sext_i32_i16 s6, s7 1333; GFX10PLUS-NEXT: s_ashr_i32 s7, s7, 16 1334; GFX10PLUS-NEXT: s_ashr_i32 s5, s5, s6 1335; GFX10PLUS-NEXT: s_ashr_i32 s3, s3, s7 1336; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s0, s8, s0 1337; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s2, s4, s2 1338; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s3, s5, s3 1339; GFX10PLUS-NEXT: ; return to shader part epilog 1340 %result = ashr <8 x i16> %value, %amount 1341 %cast = bitcast <8 x i16> %result to <4 x i32> 1342 ret <4 x i32> %cast 1343} 1344 1345define i64 @v_ashr_i64(i64 %value, i64 %amount) { 1346; GFX6-LABEL: v_ashr_i64: 1347; GFX6: ; %bb.0: 1348; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1349; GFX6-NEXT: v_ashr_i64 v[0:1], v[0:1], v2 1350; GFX6-NEXT: s_setpc_b64 s[30:31] 1351; 1352; GFX8-LABEL: v_ashr_i64: 1353; GFX8: ; %bb.0: 1354; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1355; GFX8-NEXT: v_ashrrev_i64 v[0:1], v2, v[0:1] 1356; GFX8-NEXT: s_setpc_b64 s[30:31] 1357; 1358; GFX9-LABEL: v_ashr_i64: 1359; GFX9: ; %bb.0: 1360; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1361; GFX9-NEXT: v_ashrrev_i64 v[0:1], v2, v[0:1] 1362; GFX9-NEXT: s_setpc_b64 s[30:31] 1363; 1364; GFX10PLUS-LABEL: v_ashr_i64: 1365; GFX10PLUS: ; %bb.0: 1366; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1367; GFX10PLUS-NEXT: v_ashrrev_i64 v[0:1], v2, v[0:1] 1368; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 1369 %result = ashr i64 %value, %amount 1370 ret i64 %result 1371} 1372 1373define i64 @v_ashr_i64_63(i64 %value) { 1374; GCN-LABEL: v_ashr_i64_63: 1375; GCN: ; %bb.0: 1376; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1377; GCN-NEXT: v_ashrrev_i32_e32 v0, 31, v1 1378; GCN-NEXT: v_mov_b32_e32 v1, v0 1379; GCN-NEXT: s_setpc_b64 s[30:31] 1380; 1381; GFX10PLUS-LABEL: v_ashr_i64_63: 1382; GFX10PLUS: ; %bb.0: 1383; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1384; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, 31, v1 1385; GFX10PLUS-NEXT: v_mov_b32_e32 v1, v0 1386; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 1387 %result = ashr i64 %value, 63 1388 ret i64 %result 1389} 1390 1391define i64 @v_ashr_i64_33(i64 %value) { 1392; GCN-LABEL: v_ashr_i64_33: 1393; GCN: ; %bb.0: 1394; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1395; GCN-NEXT: v_ashrrev_i32_e32 v2, 31, v1 1396; GCN-NEXT: v_ashrrev_i32_e32 v0, 1, v1 1397; GCN-NEXT: v_mov_b32_e32 v1, v2 1398; GCN-NEXT: s_setpc_b64 s[30:31] 1399; 1400; GFX10PLUS-LABEL: v_ashr_i64_33: 1401; GFX10PLUS: ; %bb.0: 1402; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1403; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, 1, v1 1404; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, 31, v1 1405; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 1406 %result = ashr i64 %value, 33 1407 ret i64 %result 1408} 1409 1410define i64 @v_ashr_i64_32(i64 %value) { 1411; GCN-LABEL: v_ashr_i64_32: 1412; GCN: ; %bb.0: 1413; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1414; GCN-NEXT: v_mov_b32_e32 v0, v1 1415; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0 1416; GCN-NEXT: s_setpc_b64 s[30:31] 1417; 1418; GFX10PLUS-LABEL: v_ashr_i64_32: 1419; GFX10PLUS: ; %bb.0: 1420; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1421; GFX10PLUS-NEXT: v_mov_b32_e32 v0, v1 1422; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, 31, v0 1423; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 1424 %result = ashr i64 %value, 32 1425 ret i64 %result 1426} 1427 1428define i64 @v_ashr_i64_31(i64 %value) { 1429; GFX6-LABEL: v_ashr_i64_31: 1430; GFX6: ; %bb.0: 1431; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1432; GFX6-NEXT: v_ashr_i64 v[0:1], v[0:1], 31 1433; GFX6-NEXT: s_setpc_b64 s[30:31] 1434; 1435; GFX8-LABEL: v_ashr_i64_31: 1436; GFX8: ; %bb.0: 1437; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1438; GFX8-NEXT: v_ashrrev_i64 v[0:1], 31, v[0:1] 1439; GFX8-NEXT: s_setpc_b64 s[30:31] 1440; 1441; GFX9-LABEL: v_ashr_i64_31: 1442; GFX9: ; %bb.0: 1443; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1444; GFX9-NEXT: v_ashrrev_i64 v[0:1], 31, v[0:1] 1445; GFX9-NEXT: s_setpc_b64 s[30:31] 1446; 1447; GFX10PLUS-LABEL: v_ashr_i64_31: 1448; GFX10PLUS: ; %bb.0: 1449; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1450; GFX10PLUS-NEXT: v_ashrrev_i64 v[0:1], 31, v[0:1] 1451; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 1452 %result = ashr i64 %value, 31 1453 ret i64 %result 1454} 1455 1456define amdgpu_ps i64 @s_ashr_i64(i64 inreg %value, i64 inreg %amount) { 1457; GCN-LABEL: s_ashr_i64: 1458; GCN: ; %bb.0: 1459; GCN-NEXT: s_ashr_i64 s[0:1], s[0:1], s2 1460; GCN-NEXT: ; return to shader part epilog 1461; 1462; GFX10PLUS-LABEL: s_ashr_i64: 1463; GFX10PLUS: ; %bb.0: 1464; GFX10PLUS-NEXT: s_ashr_i64 s[0:1], s[0:1], s2 1465; GFX10PLUS-NEXT: ; return to shader part epilog 1466 %result = ashr i64 %value, %amount 1467 ret i64 %result 1468} 1469 1470define amdgpu_ps i64 @s_ashr_i64_63(i64 inreg %value) { 1471; GCN-LABEL: s_ashr_i64_63: 1472; GCN: ; %bb.0: 1473; GCN-NEXT: s_ashr_i32 s0, s1, 31 1474; GCN-NEXT: s_mov_b32 s1, s0 1475; GCN-NEXT: ; return to shader part epilog 1476; 1477; GFX10PLUS-LABEL: s_ashr_i64_63: 1478; GFX10PLUS: ; %bb.0: 1479; GFX10PLUS-NEXT: s_ashr_i32 s0, s1, 31 1480; GFX10PLUS-NEXT: s_mov_b32 s1, s0 1481; GFX10PLUS-NEXT: ; return to shader part epilog 1482 %result = ashr i64 %value, 63 1483 ret i64 %result 1484} 1485 1486define amdgpu_ps i64 @s_ashr_i64_33(i64 inreg %value) { 1487; GCN-LABEL: s_ashr_i64_33: 1488; GCN: ; %bb.0: 1489; GCN-NEXT: s_ashr_i32 s2, s1, 31 1490; GCN-NEXT: s_ashr_i32 s0, s1, 1 1491; GCN-NEXT: s_mov_b32 s1, s2 1492; GCN-NEXT: ; return to shader part epilog 1493; 1494; GFX10PLUS-LABEL: s_ashr_i64_33: 1495; GFX10PLUS: ; %bb.0: 1496; GFX10PLUS-NEXT: s_ashr_i32 s0, s1, 1 1497; GFX10PLUS-NEXT: s_ashr_i32 s1, s1, 31 1498; GFX10PLUS-NEXT: ; return to shader part epilog 1499 %result = ashr i64 %value, 33 1500 ret i64 %result 1501} 1502 1503define amdgpu_ps i64 @s_ashr_i64_32(i64 inreg %value) { 1504; GCN-LABEL: s_ashr_i64_32: 1505; GCN: ; %bb.0: 1506; GCN-NEXT: s_mov_b32 s0, s1 1507; GCN-NEXT: s_ashr_i32 s1, s1, 31 1508; GCN-NEXT: ; return to shader part epilog 1509; 1510; GFX10PLUS-LABEL: s_ashr_i64_32: 1511; GFX10PLUS: ; %bb.0: 1512; GFX10PLUS-NEXT: s_mov_b32 s0, s1 1513; GFX10PLUS-NEXT: s_ashr_i32 s1, s1, 31 1514; GFX10PLUS-NEXT: ; return to shader part epilog 1515 %result = ashr i64 %value, 32 1516 ret i64 %result 1517} 1518 1519define amdgpu_ps i64 @s_ashr_i64_31(i64 inreg %value) { 1520; GCN-LABEL: s_ashr_i64_31: 1521; GCN: ; %bb.0: 1522; GCN-NEXT: s_ashr_i64 s[0:1], s[0:1], 31 1523; GCN-NEXT: ; return to shader part epilog 1524; 1525; GFX10PLUS-LABEL: s_ashr_i64_31: 1526; GFX10PLUS: ; %bb.0: 1527; GFX10PLUS-NEXT: s_ashr_i64 s[0:1], s[0:1], 31 1528; GFX10PLUS-NEXT: ; return to shader part epilog 1529 %result = ashr i64 %value, 31 1530 ret i64 %result 1531} 1532 1533define amdgpu_ps <2 x float> @ashr_i64_sv(i64 inreg %value, i64 %amount) { 1534; GFX6-LABEL: ashr_i64_sv: 1535; GFX6: ; %bb.0: 1536; GFX6-NEXT: v_ashr_i64 v[0:1], s[0:1], v0 1537; GFX6-NEXT: ; return to shader part epilog 1538; 1539; GFX8-LABEL: ashr_i64_sv: 1540; GFX8: ; %bb.0: 1541; GFX8-NEXT: v_ashrrev_i64 v[0:1], v0, s[0:1] 1542; GFX8-NEXT: ; return to shader part epilog 1543; 1544; GFX9-LABEL: ashr_i64_sv: 1545; GFX9: ; %bb.0: 1546; GFX9-NEXT: v_ashrrev_i64 v[0:1], v0, s[0:1] 1547; GFX9-NEXT: ; return to shader part epilog 1548; 1549; GFX10PLUS-LABEL: ashr_i64_sv: 1550; GFX10PLUS: ; %bb.0: 1551; GFX10PLUS-NEXT: v_ashrrev_i64 v[0:1], v0, s[0:1] 1552; GFX10PLUS-NEXT: ; return to shader part epilog 1553 %result = ashr i64 %value, %amount 1554 %cast = bitcast i64 %result to <2 x float> 1555 ret <2 x float> %cast 1556} 1557 1558define amdgpu_ps <2 x float> @ashr_i64_vs(i64 %value, i64 inreg %amount) { 1559; GFX6-LABEL: ashr_i64_vs: 1560; GFX6: ; %bb.0: 1561; GFX6-NEXT: v_ashr_i64 v[0:1], v[0:1], s0 1562; GFX6-NEXT: ; return to shader part epilog 1563; 1564; GFX8-LABEL: ashr_i64_vs: 1565; GFX8: ; %bb.0: 1566; GFX8-NEXT: v_ashrrev_i64 v[0:1], s0, v[0:1] 1567; GFX8-NEXT: ; return to shader part epilog 1568; 1569; GFX9-LABEL: ashr_i64_vs: 1570; GFX9: ; %bb.0: 1571; GFX9-NEXT: v_ashrrev_i64 v[0:1], s0, v[0:1] 1572; GFX9-NEXT: ; return to shader part epilog 1573; 1574; GFX10PLUS-LABEL: ashr_i64_vs: 1575; GFX10PLUS: ; %bb.0: 1576; GFX10PLUS-NEXT: v_ashrrev_i64 v[0:1], s0, v[0:1] 1577; GFX10PLUS-NEXT: ; return to shader part epilog 1578 %result = ashr i64 %value, %amount 1579 %cast = bitcast i64 %result to <2 x float> 1580 ret <2 x float> %cast 1581} 1582 1583define <2 x i64> @v_ashr_v2i64(<2 x i64> %value, <2 x i64> %amount) { 1584; GFX6-LABEL: v_ashr_v2i64: 1585; GFX6: ; %bb.0: 1586; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1587; GFX6-NEXT: v_ashr_i64 v[0:1], v[0:1], v4 1588; GFX6-NEXT: v_ashr_i64 v[2:3], v[2:3], v6 1589; GFX6-NEXT: s_setpc_b64 s[30:31] 1590; 1591; GFX8-LABEL: v_ashr_v2i64: 1592; GFX8: ; %bb.0: 1593; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1594; GFX8-NEXT: v_ashrrev_i64 v[0:1], v4, v[0:1] 1595; GFX8-NEXT: v_ashrrev_i64 v[2:3], v6, v[2:3] 1596; GFX8-NEXT: s_setpc_b64 s[30:31] 1597; 1598; GFX9-LABEL: v_ashr_v2i64: 1599; GFX9: ; %bb.0: 1600; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1601; GFX9-NEXT: v_ashrrev_i64 v[0:1], v4, v[0:1] 1602; GFX9-NEXT: v_ashrrev_i64 v[2:3], v6, v[2:3] 1603; GFX9-NEXT: s_setpc_b64 s[30:31] 1604; 1605; GFX10PLUS-LABEL: v_ashr_v2i64: 1606; GFX10PLUS: ; %bb.0: 1607; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1608; GFX10PLUS-NEXT: v_ashrrev_i64 v[0:1], v4, v[0:1] 1609; GFX10PLUS-NEXT: v_ashrrev_i64 v[2:3], v6, v[2:3] 1610; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 1611 %result = ashr <2 x i64> %value, %amount 1612 ret <2 x i64> %result 1613} 1614 1615define <2 x i64> @v_ashr_v2i64_31(<2 x i64> %value) { 1616; GFX6-LABEL: v_ashr_v2i64_31: 1617; GFX6: ; %bb.0: 1618; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1619; GFX6-NEXT: v_ashr_i64 v[0:1], v[0:1], 31 1620; GFX6-NEXT: v_ashr_i64 v[2:3], v[2:3], 31 1621; GFX6-NEXT: s_setpc_b64 s[30:31] 1622; 1623; GFX8-LABEL: v_ashr_v2i64_31: 1624; GFX8: ; %bb.0: 1625; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1626; GFX8-NEXT: v_ashrrev_i64 v[0:1], 31, v[0:1] 1627; GFX8-NEXT: v_ashrrev_i64 v[2:3], 31, v[2:3] 1628; GFX8-NEXT: s_setpc_b64 s[30:31] 1629; 1630; GFX9-LABEL: v_ashr_v2i64_31: 1631; GFX9: ; %bb.0: 1632; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1633; GFX9-NEXT: v_ashrrev_i64 v[0:1], 31, v[0:1] 1634; GFX9-NEXT: v_ashrrev_i64 v[2:3], 31, v[2:3] 1635; GFX9-NEXT: s_setpc_b64 s[30:31] 1636; 1637; GFX10PLUS-LABEL: v_ashr_v2i64_31: 1638; GFX10PLUS: ; %bb.0: 1639; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1640; GFX10PLUS-NEXT: v_ashrrev_i64 v[0:1], 31, v[0:1] 1641; GFX10PLUS-NEXT: v_ashrrev_i64 v[2:3], 31, v[2:3] 1642; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 1643 %result = ashr <2 x i64> %value, <i64 31, i64 31> 1644 ret <2 x i64> %result 1645} 1646 1647define amdgpu_ps <2 x i64> @s_ashr_v2i64(<2 x i64> inreg %value, <2 x i64> inreg %amount) { 1648; GCN-LABEL: s_ashr_v2i64: 1649; GCN: ; %bb.0: 1650; GCN-NEXT: s_ashr_i64 s[0:1], s[0:1], s4 1651; GCN-NEXT: s_ashr_i64 s[2:3], s[2:3], s6 1652; GCN-NEXT: ; return to shader part epilog 1653; 1654; GFX10PLUS-LABEL: s_ashr_v2i64: 1655; GFX10PLUS: ; %bb.0: 1656; GFX10PLUS-NEXT: s_ashr_i64 s[0:1], s[0:1], s4 1657; GFX10PLUS-NEXT: s_ashr_i64 s[2:3], s[2:3], s6 1658; GFX10PLUS-NEXT: ; return to shader part epilog 1659 %result = ashr <2 x i64> %value, %amount 1660 ret <2 x i64> %result 1661} 1662 1663define i65 @v_ashr_i65(i65 %value, i65 %amount) { 1664; GFX6-LABEL: v_ashr_i65: 1665; GFX6: ; %bb.0: 1666; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1667; GFX6-NEXT: v_bfe_i32 v4, v2, 0, 1 1668; GFX6-NEXT: v_ashrrev_i32_e32 v5, 31, v4 1669; GFX6-NEXT: v_sub_i32_e32 v8, vcc, 64, v3 1670; GFX6-NEXT: v_lshr_b64 v[6:7], v[0:1], v3 1671; GFX6-NEXT: v_lshl_b64 v[8:9], v[4:5], v8 1672; GFX6-NEXT: v_add_i32_e32 v2, vcc, 0xffffffc0, v3 1673; GFX6-NEXT: v_ashr_i64 v[10:11], v[4:5], v3 1674; GFX6-NEXT: v_or_b32_e32 v6, v6, v8 1675; GFX6-NEXT: v_ashrrev_i32_e32 v8, 31, v5 1676; GFX6-NEXT: v_ashr_i64 v[4:5], v[4:5], v2 1677; GFX6-NEXT: v_or_b32_e32 v7, v7, v9 1678; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v3 1679; GFX6-NEXT: v_cndmask_b32_e32 v2, v4, v6, vcc 1680; GFX6-NEXT: v_cndmask_b32_e32 v4, v5, v7, vcc 1681; GFX6-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3 1682; GFX6-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[4:5] 1683; GFX6-NEXT: v_cndmask_b32_e64 v1, v4, v1, s[4:5] 1684; GFX6-NEXT: v_cndmask_b32_e32 v2, v8, v10, vcc 1685; GFX6-NEXT: s_setpc_b64 s[30:31] 1686; 1687; GFX8-LABEL: v_ashr_i65: 1688; GFX8: ; %bb.0: 1689; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1690; GFX8-NEXT: v_bfe_i32 v4, v2, 0, 1 1691; GFX8-NEXT: v_ashrrev_i32_e32 v5, 31, v4 1692; GFX8-NEXT: v_sub_u32_e32 v8, vcc, 64, v3 1693; GFX8-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1] 1694; GFX8-NEXT: v_lshlrev_b64 v[8:9], v8, v[4:5] 1695; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0xffffffc0, v3 1696; GFX8-NEXT: v_ashrrev_i64 v[10:11], v3, v[4:5] 1697; GFX8-NEXT: v_or_b32_e32 v6, v6, v8 1698; GFX8-NEXT: v_ashrrev_i32_e32 v8, 31, v5 1699; GFX8-NEXT: v_ashrrev_i64 v[4:5], v2, v[4:5] 1700; GFX8-NEXT: v_or_b32_e32 v7, v7, v9 1701; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v3 1702; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v6, vcc 1703; GFX8-NEXT: v_cndmask_b32_e32 v4, v5, v7, vcc 1704; GFX8-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3 1705; GFX8-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[4:5] 1706; GFX8-NEXT: v_cndmask_b32_e64 v1, v4, v1, s[4:5] 1707; GFX8-NEXT: v_cndmask_b32_e32 v2, v8, v10, vcc 1708; GFX8-NEXT: s_setpc_b64 s[30:31] 1709; 1710; GFX9-LABEL: v_ashr_i65: 1711; GFX9: ; %bb.0: 1712; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1713; GFX9-NEXT: v_bfe_i32 v4, v2, 0, 1 1714; GFX9-NEXT: v_ashrrev_i32_e32 v5, 31, v4 1715; GFX9-NEXT: v_sub_u32_e32 v8, 64, v3 1716; GFX9-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1] 1717; GFX9-NEXT: v_lshlrev_b64 v[8:9], v8, v[4:5] 1718; GFX9-NEXT: v_add_u32_e32 v2, 0xffffffc0, v3 1719; GFX9-NEXT: v_ashrrev_i64 v[10:11], v3, v[4:5] 1720; GFX9-NEXT: v_or_b32_e32 v6, v6, v8 1721; GFX9-NEXT: v_ashrrev_i32_e32 v8, 31, v5 1722; GFX9-NEXT: v_ashrrev_i64 v[4:5], v2, v[4:5] 1723; GFX9-NEXT: v_or_b32_e32 v7, v7, v9 1724; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v3 1725; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v6, vcc 1726; GFX9-NEXT: v_cndmask_b32_e32 v4, v5, v7, vcc 1727; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v3 1728; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v0, s[4:5] 1729; GFX9-NEXT: v_cndmask_b32_e64 v1, v4, v1, s[4:5] 1730; GFX9-NEXT: v_cndmask_b32_e32 v2, v8, v10, vcc 1731; GFX9-NEXT: s_setpc_b64 s[30:31] 1732; 1733; GFX10-LABEL: v_ashr_i65: 1734; GFX10: ; %bb.0: 1735; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1736; GFX10-NEXT: v_bfe_i32 v4, v2, 0, 1 1737; GFX10-NEXT: v_sub_nc_u32_e32 v2, 64, v3 1738; GFX10-NEXT: v_add_nc_u32_e32 v10, 0xffffffc0, v3 1739; GFX10-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1] 1740; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v3 1741; GFX10-NEXT: v_ashrrev_i32_e32 v5, 31, v4 1742; GFX10-NEXT: v_cmp_eq_u32_e64 s4, 0, v3 1743; GFX10-NEXT: v_lshlrev_b64 v[8:9], v2, v[4:5] 1744; GFX10-NEXT: v_ashrrev_i64 v[10:11], v10, v[4:5] 1745; GFX10-NEXT: v_or_b32_e32 v2, v6, v8 1746; GFX10-NEXT: v_or_b32_e32 v8, v7, v9 1747; GFX10-NEXT: v_ashrrev_i64 v[6:7], v3, v[4:5] 1748; GFX10-NEXT: v_ashrrev_i32_e32 v3, 31, v5 1749; GFX10-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc_lo 1750; GFX10-NEXT: v_cndmask_b32_e32 v4, v11, v8, vcc_lo 1751; GFX10-NEXT: v_cndmask_b32_e64 v0, v2, v0, s4 1752; GFX10-NEXT: v_cndmask_b32_e64 v1, v4, v1, s4 1753; GFX10-NEXT: v_cndmask_b32_e32 v2, v3, v6, vcc_lo 1754; GFX10-NEXT: s_setpc_b64 s[30:31] 1755; 1756; GFX11-LABEL: v_ashr_i65: 1757; GFX11: ; %bb.0: 1758; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1759; GFX11-NEXT: v_bfe_i32 v4, v2, 0, 1 1760; GFX11-NEXT: v_sub_nc_u32_e32 v2, 64, v3 1761; GFX11-NEXT: v_add_nc_u32_e32 v10, 0xffffffc0, v3 1762; GFX11-NEXT: v_lshrrev_b64 v[6:7], v3, v[0:1] 1763; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v3 1764; GFX11-NEXT: v_ashrrev_i32_e32 v5, 31, v4 1765; GFX11-NEXT: v_cmp_eq_u32_e64 s0, 0, v3 1766; GFX11-NEXT: v_lshlrev_b64 v[8:9], v2, v[4:5] 1767; GFX11-NEXT: v_ashrrev_i64 v[10:11], v10, v[4:5] 1768; GFX11-NEXT: v_or_b32_e32 v2, v6, v8 1769; GFX11-NEXT: v_or_b32_e32 v8, v7, v9 1770; GFX11-NEXT: v_ashrrev_i64 v[6:7], v3, v[4:5] 1771; GFX11-NEXT: v_ashrrev_i32_e32 v3, 31, v5 1772; GFX11-NEXT: v_cndmask_b32_e32 v2, v10, v2, vcc_lo 1773; GFX11-NEXT: v_cndmask_b32_e32 v4, v11, v8, vcc_lo 1774; GFX11-NEXT: v_cndmask_b32_e64 v0, v2, v0, s0 1775; GFX11-NEXT: v_cndmask_b32_e64 v1, v4, v1, s0 1776; GFX11-NEXT: v_cndmask_b32_e32 v2, v3, v6, vcc_lo 1777; GFX11-NEXT: s_setpc_b64 s[30:31] 1778 %result = ashr i65 %value, %amount 1779 ret i65 %result 1780} 1781 1782define i65 @v_ashr_i65_33(i65 %value) { 1783; GFX6-LABEL: v_ashr_i65_33: 1784; GFX6: ; %bb.0: 1785; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1786; GFX6-NEXT: v_mov_b32_e32 v3, v1 1787; GFX6-NEXT: v_bfe_i32 v1, v2, 0, 1 1788; GFX6-NEXT: v_ashrrev_i32_e32 v2, 31, v1 1789; GFX6-NEXT: v_lshl_b64 v[0:1], v[1:2], 31 1790; GFX6-NEXT: v_lshrrev_b32_e32 v3, 1, v3 1791; GFX6-NEXT: v_or_b32_e32 v0, v3, v0 1792; GFX6-NEXT: v_ashrrev_i32_e32 v2, 1, v2 1793; GFX6-NEXT: s_setpc_b64 s[30:31] 1794; 1795; GFX8-LABEL: v_ashr_i65_33: 1796; GFX8: ; %bb.0: 1797; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1798; GFX8-NEXT: v_mov_b32_e32 v3, v1 1799; GFX8-NEXT: v_bfe_i32 v1, v2, 0, 1 1800; GFX8-NEXT: v_ashrrev_i32_e32 v2, 31, v1 1801; GFX8-NEXT: v_lshlrev_b64 v[0:1], 31, v[1:2] 1802; GFX8-NEXT: v_lshrrev_b32_e32 v3, 1, v3 1803; GFX8-NEXT: v_or_b32_e32 v0, v3, v0 1804; GFX8-NEXT: v_ashrrev_i32_e32 v2, 1, v2 1805; GFX8-NEXT: s_setpc_b64 s[30:31] 1806; 1807; GFX9-LABEL: v_ashr_i65_33: 1808; GFX9: ; %bb.0: 1809; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1810; GFX9-NEXT: v_mov_b32_e32 v3, v1 1811; GFX9-NEXT: v_bfe_i32 v1, v2, 0, 1 1812; GFX9-NEXT: v_ashrrev_i32_e32 v2, 31, v1 1813; GFX9-NEXT: v_lshlrev_b64 v[0:1], 31, v[1:2] 1814; GFX9-NEXT: v_lshrrev_b32_e32 v3, 1, v3 1815; GFX9-NEXT: v_or_b32_e32 v0, v3, v0 1816; GFX9-NEXT: v_ashrrev_i32_e32 v2, 1, v2 1817; GFX9-NEXT: s_setpc_b64 s[30:31] 1818; 1819; GFX10PLUS-LABEL: v_ashr_i65_33: 1820; GFX10PLUS: ; %bb.0: 1821; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1822; GFX10PLUS-NEXT: v_mov_b32_e32 v3, v1 1823; GFX10PLUS-NEXT: v_bfe_i32 v1, v2, 0, 1 1824; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v3, 1, v3 1825; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v2, 31, v1 1826; GFX10PLUS-NEXT: v_lshlrev_b64 v[0:1], 31, v[1:2] 1827; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v2, 1, v2 1828; GFX10PLUS-NEXT: v_or_b32_e32 v0, v3, v0 1829; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 1830 %result = ashr i65 %value, 33 1831 ret i65 %result 1832} 1833 1834define amdgpu_ps i65 @s_ashr_i65(i65 inreg %value, i65 inreg %amount) { 1835; GCN-LABEL: s_ashr_i65: 1836; GCN: ; %bb.0: 1837; GCN-NEXT: s_bfe_i64 s[4:5], s[2:3], 0x10000 1838; GCN-NEXT: s_sub_i32 s10, s3, 64 1839; GCN-NEXT: s_sub_i32 s8, 64, s3 1840; GCN-NEXT: s_cmp_lt_u32 s3, 64 1841; GCN-NEXT: s_cselect_b32 s11, 1, 0 1842; GCN-NEXT: s_cmp_eq_u32 s3, 0 1843; GCN-NEXT: s_cselect_b32 s12, 1, 0 1844; GCN-NEXT: s_ashr_i64 s[6:7], s[4:5], s3 1845; GCN-NEXT: s_lshr_b64 s[2:3], s[0:1], s3 1846; GCN-NEXT: s_lshl_b64 s[8:9], s[4:5], s8 1847; GCN-NEXT: s_or_b64 s[2:3], s[2:3], s[8:9] 1848; GCN-NEXT: s_ashr_i32 s7, s5, 31 1849; GCN-NEXT: s_ashr_i64 s[4:5], s[4:5], s10 1850; GCN-NEXT: s_cmp_lg_u32 s11, 0 1851; GCN-NEXT: s_cselect_b64 s[2:3], s[2:3], s[4:5] 1852; GCN-NEXT: s_cmp_lg_u32 s12, 0 1853; GCN-NEXT: s_cselect_b64 s[0:1], s[0:1], s[2:3] 1854; GCN-NEXT: s_cmp_lg_u32 s11, 0 1855; GCN-NEXT: s_cselect_b32 s2, s6, s7 1856; GCN-NEXT: ; return to shader part epilog 1857; 1858; GFX10PLUS-LABEL: s_ashr_i65: 1859; GFX10PLUS: ; %bb.0: 1860; GFX10PLUS-NEXT: s_bfe_i64 s[4:5], s[2:3], 0x10000 1861; GFX10PLUS-NEXT: s_sub_i32 s10, s3, 64 1862; GFX10PLUS-NEXT: s_sub_i32 s2, 64, s3 1863; GFX10PLUS-NEXT: s_cmp_lt_u32 s3, 64 1864; GFX10PLUS-NEXT: s_cselect_b32 s11, 1, 0 1865; GFX10PLUS-NEXT: s_cmp_eq_u32 s3, 0 1866; GFX10PLUS-NEXT: s_cselect_b32 s12, 1, 0 1867; GFX10PLUS-NEXT: s_lshr_b64 s[6:7], s[0:1], s3 1868; GFX10PLUS-NEXT: s_lshl_b64 s[8:9], s[4:5], s2 1869; GFX10PLUS-NEXT: s_ashr_i64 s[2:3], s[4:5], s3 1870; GFX10PLUS-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] 1871; GFX10PLUS-NEXT: s_ashr_i32 s3, s5, 31 1872; GFX10PLUS-NEXT: s_ashr_i64 s[4:5], s[4:5], s10 1873; GFX10PLUS-NEXT: s_cmp_lg_u32 s11, 0 1874; GFX10PLUS-NEXT: s_cselect_b64 s[4:5], s[6:7], s[4:5] 1875; GFX10PLUS-NEXT: s_cmp_lg_u32 s12, 0 1876; GFX10PLUS-NEXT: s_cselect_b64 s[0:1], s[0:1], s[4:5] 1877; GFX10PLUS-NEXT: s_cmp_lg_u32 s11, 0 1878; GFX10PLUS-NEXT: s_cselect_b32 s2, s2, s3 1879; GFX10PLUS-NEXT: ; return to shader part epilog 1880 %result = ashr i65 %value, %amount 1881 ret i65 %result 1882} 1883 1884define amdgpu_ps i65 @s_ashr_i65_33(i65 inreg %value) { 1885; GCN-LABEL: s_ashr_i65_33: 1886; GCN: ; %bb.0: 1887; GCN-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000 1888; GCN-NEXT: s_lshr_b32 s0, s1, 1 1889; GCN-NEXT: s_mov_b32 s1, 0 1890; GCN-NEXT: s_lshl_b64 s[4:5], s[2:3], 31 1891; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5] 1892; GCN-NEXT: s_ashr_i32 s2, s3, 1 1893; GCN-NEXT: ; return to shader part epilog 1894; 1895; GFX10PLUS-LABEL: s_ashr_i65_33: 1896; GFX10PLUS: ; %bb.0: 1897; GFX10PLUS-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000 1898; GFX10PLUS-NEXT: s_lshr_b32 s0, s1, 1 1899; GFX10PLUS-NEXT: s_mov_b32 s1, 0 1900; GFX10PLUS-NEXT: s_lshl_b64 s[4:5], s[2:3], 31 1901; GFX10PLUS-NEXT: s_ashr_i32 s2, s3, 1 1902; GFX10PLUS-NEXT: s_or_b64 s[0:1], s[0:1], s[4:5] 1903; GFX10PLUS-NEXT: ; return to shader part epilog 1904 %result = ashr i65 %value, 33 1905 ret i65 %result 1906} 1907 1908; FIXME: Argument lowering asserts 1909; define <2 x i65> @v_ashr_v2i65(<2 x i65> %value, <2 x i65> %amount) { 1910; %result = ashr <2 x i65> %value, %amount 1911; ret <2 x i65> %result 1912; } 1913 1914; define amdgpu_ps <2 x i65> @s_ashr_v2i65(<2 x i65> inreg %value, <2 x i65> inreg %amount) { 1915; %result = ashr <2 x i65> %value, %amount 1916; ret <2 x i65> %result 1917; } 1918