1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s 3; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s 4; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s 5; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s 6; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s 7 8define i8 @v_shl_i8(i8 %value, i8 %amount) { 9; GFX6-LABEL: v_shl_i8: 10; GFX6: ; %bb.0: 11; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12; GFX6-NEXT: v_and_b32_e32 v1, 0xff, v1 13; GFX6-NEXT: v_lshlrev_b32_e32 v0, v1, v0 14; GFX6-NEXT: s_setpc_b64 s[30:31] 15; 16; GFX8-LABEL: v_shl_i8: 17; GFX8: ; %bb.0: 18; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 19; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 20; GFX8-NEXT: s_setpc_b64 s[30:31] 21; 22; GFX9-LABEL: v_shl_i8: 23; GFX9: ; %bb.0: 24; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 25; GFX9-NEXT: v_lshlrev_b16_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD 26; GFX9-NEXT: s_setpc_b64 s[30:31] 27; 28; GFX10PLUS-LABEL: v_shl_i8: 29; GFX10PLUS: ; %bb.0: 30; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 31; GFX10PLUS-NEXT: v_and_b32_e32 v1, 0xff, v1 32; GFX10PLUS-NEXT: v_lshlrev_b16 v0, v1, v0 33; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 34 %result = shl i8 %value, %amount 35 ret i8 %result 36} 37 38define i8 @v_shl_i8_7(i8 %value) { 39; GFX6-LABEL: v_shl_i8_7: 40; GFX6: ; %bb.0: 41; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 42; GFX6-NEXT: v_lshlrev_b32_e32 v0, 7, v0 43; GFX6-NEXT: s_setpc_b64 s[30:31] 44; 45; GFX8-LABEL: v_shl_i8_7: 46; GFX8: ; %bb.0: 47; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 48; GFX8-NEXT: v_lshlrev_b16_e32 v0, 7, v0 49; GFX8-NEXT: s_setpc_b64 s[30:31] 50; 51; GFX9-LABEL: v_shl_i8_7: 52; GFX9: ; %bb.0: 53; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 54; GFX9-NEXT: v_lshlrev_b16_e32 v0, 7, v0 55; GFX9-NEXT: s_setpc_b64 s[30:31] 56; 57; GFX10PLUS-LABEL: v_shl_i8_7: 58; GFX10PLUS: ; %bb.0: 59; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 60; GFX10PLUS-NEXT: v_lshlrev_b16 v0, 7, v0 61; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 62 %result = shl i8 %value, 7 63 ret i8 %result 64} 65 66define amdgpu_ps i8 @s_shl_i8(i8 inreg %value, i8 inreg %amount) { 67; GFX6-LABEL: s_shl_i8: 68; GFX6: ; %bb.0: 69; GFX6-NEXT: s_lshl_b32 s0, s0, s1 70; GFX6-NEXT: ; return to shader part epilog 71; 72; GFX8-LABEL: s_shl_i8: 73; GFX8: ; %bb.0: 74; GFX8-NEXT: s_and_b32 s0, s0, 0xff 75; GFX8-NEXT: s_lshl_b32 s0, s0, s1 76; GFX8-NEXT: ; return to shader part epilog 77; 78; GFX9-LABEL: s_shl_i8: 79; GFX9: ; %bb.0: 80; GFX9-NEXT: s_and_b32 s0, s0, 0xff 81; GFX9-NEXT: s_lshl_b32 s0, s0, s1 82; GFX9-NEXT: ; return to shader part epilog 83; 84; GFX10PLUS-LABEL: s_shl_i8: 85; GFX10PLUS: ; %bb.0: 86; GFX10PLUS-NEXT: s_and_b32 s0, s0, 0xff 87; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, s1 88; GFX10PLUS-NEXT: ; return to shader part epilog 89 %result = shl i8 %value, %amount 90 ret i8 %result 91} 92 93define amdgpu_ps i8 @s_shl_i8_7(i8 inreg %value) { 94; GCN-LABEL: s_shl_i8_7: 95; GCN: ; %bb.0: 96; GCN-NEXT: s_lshl_b32 s0, s0, 7 97; GCN-NEXT: ; return to shader part epilog 98; 99; GFX10PLUS-LABEL: s_shl_i8_7: 100; GFX10PLUS: ; %bb.0: 101; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 7 102; GFX10PLUS-NEXT: ; return to shader part epilog 103 %result = shl i8 %value, 7 104 ret i8 %result 105} 106 107 108define i24 @v_shl_i24(i24 %value, i24 %amount) { 109; GCN-LABEL: v_shl_i24: 110; GCN: ; %bb.0: 111; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 112; GCN-NEXT: v_and_b32_e32 v1, 0xffffff, v1 113; GCN-NEXT: v_lshlrev_b32_e32 v0, v1, v0 114; GCN-NEXT: s_setpc_b64 s[30:31] 115; 116; GFX10PLUS-LABEL: v_shl_i24: 117; GFX10PLUS: ; %bb.0: 118; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 119; GFX10PLUS-NEXT: v_and_b32_e32 v1, 0xffffff, v1 120; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v0, v1, v0 121; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 122 %result = shl i24 %value, %amount 123 ret i24 %result 124} 125 126define i24 @v_shl_i24_7(i24 %value) { 127; GCN-LABEL: v_shl_i24_7: 128; GCN: ; %bb.0: 129; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 130; GCN-NEXT: v_lshlrev_b32_e32 v0, 7, v0 131; GCN-NEXT: s_setpc_b64 s[30:31] 132; 133; GFX10PLUS-LABEL: v_shl_i24_7: 134; GFX10PLUS: ; %bb.0: 135; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 136; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v0, 7, v0 137; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 138 %result = shl i24 %value, 7 139 ret i24 %result 140} 141 142define amdgpu_ps i24 @s_shl_i24(i24 inreg %value, i24 inreg %amount) { 143; GCN-LABEL: s_shl_i24: 144; GCN: ; %bb.0: 145; GCN-NEXT: s_lshl_b32 s0, s0, s1 146; GCN-NEXT: ; return to shader part epilog 147; 148; GFX10PLUS-LABEL: s_shl_i24: 149; GFX10PLUS: ; %bb.0: 150; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, s1 151; GFX10PLUS-NEXT: ; return to shader part epilog 152 %result = shl i24 %value, %amount 153 ret i24 %result 154} 155 156define amdgpu_ps i24 @s_shl_i24_7(i24 inreg %value) { 157; GCN-LABEL: s_shl_i24_7: 158; GCN: ; %bb.0: 159; GCN-NEXT: s_lshl_b32 s0, s0, 7 160; GCN-NEXT: ; return to shader part epilog 161; 162; GFX10PLUS-LABEL: s_shl_i24_7: 163; GFX10PLUS: ; %bb.0: 164; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 7 165; GFX10PLUS-NEXT: ; return to shader part epilog 166 %result = shl i24 %value, 7 167 ret i24 %result 168} 169 170define i32 @v_shl_i32(i32 %value, i32 %amount) { 171; GCN-LABEL: v_shl_i32: 172; GCN: ; %bb.0: 173; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 174; GCN-NEXT: v_lshlrev_b32_e32 v0, v1, v0 175; GCN-NEXT: s_setpc_b64 s[30:31] 176; 177; GFX10PLUS-LABEL: v_shl_i32: 178; GFX10PLUS: ; %bb.0: 179; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 180; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v0, v1, v0 181; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 182 %result = shl i32 %value, %amount 183 ret i32 %result 184} 185 186define i32 @v_shl_i32_31(i32 %value) { 187; GCN-LABEL: v_shl_i32_31: 188; GCN: ; %bb.0: 189; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 190; GCN-NEXT: v_lshlrev_b32_e32 v0, 31, v0 191; GCN-NEXT: s_setpc_b64 s[30:31] 192; 193; GFX10PLUS-LABEL: v_shl_i32_31: 194; GFX10PLUS: ; %bb.0: 195; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 196; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v0, 31, v0 197; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 198 %result = shl i32 %value, 31 199 ret i32 %result 200} 201 202define amdgpu_ps i32 @s_shl_i32(i32 inreg %value, i32 inreg %amount) { 203; GCN-LABEL: s_shl_i32: 204; GCN: ; %bb.0: 205; GCN-NEXT: s_lshl_b32 s0, s0, s1 206; GCN-NEXT: ; return to shader part epilog 207; 208; GFX10PLUS-LABEL: s_shl_i32: 209; GFX10PLUS: ; %bb.0: 210; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, s1 211; GFX10PLUS-NEXT: ; return to shader part epilog 212 %result = shl i32 %value, %amount 213 ret i32 %result 214} 215 216define amdgpu_ps i32 @s_shl_i32_31(i32 inreg %value) { 217; GCN-LABEL: s_shl_i32_31: 218; GCN: ; %bb.0: 219; GCN-NEXT: s_lshl_b32 s0, s0, 31 220; GCN-NEXT: ; return to shader part epilog 221; 222; GFX10PLUS-LABEL: s_shl_i32_31: 223; GFX10PLUS: ; %bb.0: 224; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 31 225; GFX10PLUS-NEXT: ; return to shader part epilog 226 %result = shl i32 %value, 31 227 ret i32 %result 228} 229 230define amdgpu_ps float @shl_i32_sv(i32 inreg %value, i32 %amount) { 231; GFX6-LABEL: shl_i32_sv: 232; GFX6: ; %bb.0: 233; GFX6-NEXT: v_lshl_b32_e32 v0, s0, v0 234; GFX6-NEXT: ; return to shader part epilog 235; 236; GFX8-LABEL: shl_i32_sv: 237; GFX8: ; %bb.0: 238; GFX8-NEXT: v_lshlrev_b32_e64 v0, v0, s0 239; GFX8-NEXT: ; return to shader part epilog 240; 241; GFX9-LABEL: shl_i32_sv: 242; GFX9: ; %bb.0: 243; GFX9-NEXT: v_lshlrev_b32_e64 v0, v0, s0 244; GFX9-NEXT: ; return to shader part epilog 245; 246; GFX10PLUS-LABEL: shl_i32_sv: 247; GFX10PLUS: ; %bb.0: 248; GFX10PLUS-NEXT: v_lshlrev_b32_e64 v0, v0, s0 249; GFX10PLUS-NEXT: ; return to shader part epilog 250 %result = shl i32 %value, %amount 251 %cast = bitcast i32 %result to float 252 ret float %cast 253} 254 255define amdgpu_ps float @shl_i32_vs(i32 %value, i32 inreg %amount) { 256; GCN-LABEL: shl_i32_vs: 257; GCN: ; %bb.0: 258; GCN-NEXT: v_lshlrev_b32_e32 v0, s0, v0 259; GCN-NEXT: ; return to shader part epilog 260; 261; GFX10PLUS-LABEL: shl_i32_vs: 262; GFX10PLUS: ; %bb.0: 263; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v0, s0, v0 264; GFX10PLUS-NEXT: ; return to shader part epilog 265 %result = shl i32 %value, %amount 266 %cast = bitcast i32 %result to float 267 ret float %cast 268} 269 270define <2 x i32> @v_shl_v2i32(<2 x i32> %value, <2 x i32> %amount) { 271; GCN-LABEL: v_shl_v2i32: 272; GCN: ; %bb.0: 273; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 274; GCN-NEXT: v_lshlrev_b32_e32 v0, v2, v0 275; GCN-NEXT: v_lshlrev_b32_e32 v1, v3, v1 276; GCN-NEXT: s_setpc_b64 s[30:31] 277; 278; GFX10PLUS-LABEL: v_shl_v2i32: 279; GFX10PLUS: ; %bb.0: 280; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 281; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v0, v2, v0 282; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v1, v3, v1 283; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 284 %result = shl <2 x i32> %value, %amount 285 ret <2 x i32> %result 286} 287 288define <2 x i32> @v_shl_v2i32_31(<2 x i32> %value) { 289; GCN-LABEL: v_shl_v2i32_31: 290; GCN: ; %bb.0: 291; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 292; GCN-NEXT: v_lshlrev_b32_e32 v0, 31, v0 293; GCN-NEXT: v_lshlrev_b32_e32 v1, 31, v1 294; GCN-NEXT: s_setpc_b64 s[30:31] 295; 296; GFX10PLUS-LABEL: v_shl_v2i32_31: 297; GFX10PLUS: ; %bb.0: 298; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 299; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v0, 31, v0 300; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v1, 31, v1 301; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 302 %result = shl <2 x i32> %value, <i32 31, i32 31> 303 ret <2 x i32> %result 304} 305 306define amdgpu_ps <2 x i32> @s_shl_v2i32(<2 x i32> inreg %value, <2 x i32> inreg %amount) { 307; GCN-LABEL: s_shl_v2i32: 308; GCN: ; %bb.0: 309; GCN-NEXT: s_lshl_b32 s0, s0, s2 310; GCN-NEXT: s_lshl_b32 s1, s1, s3 311; GCN-NEXT: ; return to shader part epilog 312; 313; GFX10PLUS-LABEL: s_shl_v2i32: 314; GFX10PLUS: ; %bb.0: 315; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, s2 316; GFX10PLUS-NEXT: s_lshl_b32 s1, s1, s3 317; GFX10PLUS-NEXT: ; return to shader part epilog 318 %result = shl <2 x i32> %value, %amount 319 ret <2 x i32> %result 320} 321 322define <3 x i32> @v_shl_v3i32(<3 x i32> %value, <3 x i32> %amount) { 323; GCN-LABEL: v_shl_v3i32: 324; GCN: ; %bb.0: 325; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 326; GCN-NEXT: v_lshlrev_b32_e32 v0, v3, v0 327; GCN-NEXT: v_lshlrev_b32_e32 v1, v4, v1 328; GCN-NEXT: v_lshlrev_b32_e32 v2, v5, v2 329; GCN-NEXT: s_setpc_b64 s[30:31] 330; 331; GFX10PLUS-LABEL: v_shl_v3i32: 332; GFX10PLUS: ; %bb.0: 333; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 334; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v0, v3, v0 335; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v1, v4, v1 336; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v2, v5, v2 337; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 338 %result = shl <3 x i32> %value, %amount 339 ret <3 x i32> %result 340} 341 342define amdgpu_ps <3 x i32> @s_shl_v3i32(<3 x i32> inreg %value, <3 x i32> inreg %amount) { 343; GCN-LABEL: s_shl_v3i32: 344; GCN: ; %bb.0: 345; GCN-NEXT: s_lshl_b32 s0, s0, s3 346; GCN-NEXT: s_lshl_b32 s1, s1, s4 347; GCN-NEXT: s_lshl_b32 s2, s2, s5 348; GCN-NEXT: ; return to shader part epilog 349; 350; GFX10PLUS-LABEL: s_shl_v3i32: 351; GFX10PLUS: ; %bb.0: 352; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, s3 353; GFX10PLUS-NEXT: s_lshl_b32 s1, s1, s4 354; GFX10PLUS-NEXT: s_lshl_b32 s2, s2, s5 355; GFX10PLUS-NEXT: ; return to shader part epilog 356 %result = shl <3 x i32> %value, %amount 357 ret <3 x i32> %result 358} 359 360define <4 x i32> @v_shl_v4i32(<4 x i32> %value, <4 x i32> %amount) { 361; GCN-LABEL: v_shl_v4i32: 362; GCN: ; %bb.0: 363; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 364; GCN-NEXT: v_lshlrev_b32_e32 v0, v4, v0 365; GCN-NEXT: v_lshlrev_b32_e32 v1, v5, v1 366; GCN-NEXT: v_lshlrev_b32_e32 v2, v6, v2 367; GCN-NEXT: v_lshlrev_b32_e32 v3, v7, v3 368; GCN-NEXT: s_setpc_b64 s[30:31] 369; 370; GFX10PLUS-LABEL: v_shl_v4i32: 371; GFX10PLUS: ; %bb.0: 372; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 373; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v0, v4, v0 374; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v1, v5, v1 375; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v2, v6, v2 376; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v3, v7, v3 377; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 378 %result = shl <4 x i32> %value, %amount 379 ret <4 x i32> %result 380} 381 382define amdgpu_ps <4 x i32> @s_shl_v4i32(<4 x i32> inreg %value, <4 x i32> inreg %amount) { 383; GCN-LABEL: s_shl_v4i32: 384; GCN: ; %bb.0: 385; GCN-NEXT: s_lshl_b32 s0, s0, s4 386; GCN-NEXT: s_lshl_b32 s1, s1, s5 387; GCN-NEXT: s_lshl_b32 s2, s2, s6 388; GCN-NEXT: s_lshl_b32 s3, s3, s7 389; GCN-NEXT: ; return to shader part epilog 390; 391; GFX10PLUS-LABEL: s_shl_v4i32: 392; GFX10PLUS: ; %bb.0: 393; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, s4 394; GFX10PLUS-NEXT: s_lshl_b32 s1, s1, s5 395; GFX10PLUS-NEXT: s_lshl_b32 s2, s2, s6 396; GFX10PLUS-NEXT: s_lshl_b32 s3, s3, s7 397; GFX10PLUS-NEXT: ; return to shader part epilog 398 %result = shl <4 x i32> %value, %amount 399 ret <4 x i32> %result 400} 401 402define <5 x i32> @v_shl_v5i32(<5 x i32> %value, <5 x i32> %amount) { 403; GCN-LABEL: v_shl_v5i32: 404; GCN: ; %bb.0: 405; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 406; GCN-NEXT: v_lshlrev_b32_e32 v0, v5, v0 407; GCN-NEXT: v_lshlrev_b32_e32 v1, v6, v1 408; GCN-NEXT: v_lshlrev_b32_e32 v2, v7, v2 409; GCN-NEXT: v_lshlrev_b32_e32 v3, v8, v3 410; GCN-NEXT: v_lshlrev_b32_e32 v4, v9, v4 411; GCN-NEXT: s_setpc_b64 s[30:31] 412; 413; GFX10PLUS-LABEL: v_shl_v5i32: 414; GFX10PLUS: ; %bb.0: 415; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 416; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v0, v5, v0 417; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v1, v6, v1 418; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v2, v7, v2 419; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v3, v8, v3 420; GFX10PLUS-NEXT: v_lshlrev_b32_e32 v4, v9, v4 421; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 422 %result = shl <5 x i32> %value, %amount 423 ret <5 x i32> %result 424} 425 426define amdgpu_ps <5 x i32> @s_shl_v5i32(<5 x i32> inreg %value, <5 x i32> inreg %amount) { 427; GCN-LABEL: s_shl_v5i32: 428; GCN: ; %bb.0: 429; GCN-NEXT: s_lshl_b32 s0, s0, s5 430; GCN-NEXT: s_lshl_b32 s1, s1, s6 431; GCN-NEXT: s_lshl_b32 s2, s2, s7 432; GCN-NEXT: s_lshl_b32 s3, s3, s8 433; GCN-NEXT: s_lshl_b32 s4, s4, s9 434; GCN-NEXT: ; return to shader part epilog 435; 436; GFX10PLUS-LABEL: s_shl_v5i32: 437; GFX10PLUS: ; %bb.0: 438; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, s5 439; GFX10PLUS-NEXT: s_lshl_b32 s1, s1, s6 440; GFX10PLUS-NEXT: s_lshl_b32 s2, s2, s7 441; GFX10PLUS-NEXT: s_lshl_b32 s3, s3, s8 442; GFX10PLUS-NEXT: s_lshl_b32 s4, s4, s9 443; GFX10PLUS-NEXT: ; return to shader part epilog 444 %result = shl <5 x i32> %value, %amount 445 ret <5 x i32> %result 446} 447 448define <16 x i32> @v_shl_v16i32(<16 x i32> %value, <16 x i32> %amount) { 449; GCN-LABEL: v_shl_v16i32: 450; GCN: ; %bb.0: 451; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 452; GCN-NEXT: v_lshlrev_b32_e32 v0, v16, v0 453; GCN-NEXT: buffer_load_dword v16, off, s[0:3], s32 454; GCN-NEXT: v_lshlrev_b32_e32 v1, v17, v1 455; GCN-NEXT: v_lshlrev_b32_e32 v2, v18, v2 456; GCN-NEXT: v_lshlrev_b32_e32 v3, v19, v3 457; GCN-NEXT: v_lshlrev_b32_e32 v4, v20, v4 458; GCN-NEXT: v_lshlrev_b32_e32 v5, v21, v5 459; GCN-NEXT: v_lshlrev_b32_e32 v6, v22, v6 460; GCN-NEXT: v_lshlrev_b32_e32 v7, v23, v7 461; GCN-NEXT: v_lshlrev_b32_e32 v8, v24, v8 462; GCN-NEXT: v_lshlrev_b32_e32 v9, v25, v9 463; GCN-NEXT: v_lshlrev_b32_e32 v10, v26, v10 464; GCN-NEXT: v_lshlrev_b32_e32 v11, v27, v11 465; GCN-NEXT: v_lshlrev_b32_e32 v12, v28, v12 466; GCN-NEXT: v_lshlrev_b32_e32 v13, v29, v13 467; GCN-NEXT: v_lshlrev_b32_e32 v14, v30, v14 468; GCN-NEXT: s_waitcnt vmcnt(0) 469; GCN-NEXT: v_lshlrev_b32_e32 v15, v16, v15 470; GCN-NEXT: s_setpc_b64 s[30:31] 471; 472; GFX10-LABEL: v_shl_v16i32: 473; GFX10: ; %bb.0: 474; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 475; GFX10-NEXT: buffer_load_dword v31, off, s[0:3], s32 476; GFX10-NEXT: v_lshlrev_b32_e32 v0, v16, v0 477; GFX10-NEXT: v_lshlrev_b32_e32 v1, v17, v1 478; GFX10-NEXT: v_lshlrev_b32_e32 v2, v18, v2 479; GFX10-NEXT: v_lshlrev_b32_e32 v3, v19, v3 480; GFX10-NEXT: v_lshlrev_b32_e32 v4, v20, v4 481; GFX10-NEXT: v_lshlrev_b32_e32 v5, v21, v5 482; GFX10-NEXT: v_lshlrev_b32_e32 v6, v22, v6 483; GFX10-NEXT: v_lshlrev_b32_e32 v7, v23, v7 484; GFX10-NEXT: v_lshlrev_b32_e32 v8, v24, v8 485; GFX10-NEXT: v_lshlrev_b32_e32 v9, v25, v9 486; GFX10-NEXT: v_lshlrev_b32_e32 v10, v26, v10 487; GFX10-NEXT: v_lshlrev_b32_e32 v11, v27, v11 488; GFX10-NEXT: v_lshlrev_b32_e32 v12, v28, v12 489; GFX10-NEXT: v_lshlrev_b32_e32 v13, v29, v13 490; GFX10-NEXT: v_lshlrev_b32_e32 v14, v30, v14 491; GFX10-NEXT: s_waitcnt vmcnt(0) 492; GFX10-NEXT: v_lshlrev_b32_e32 v15, v31, v15 493; GFX10-NEXT: s_setpc_b64 s[30:31] 494; 495; GFX11-LABEL: v_shl_v16i32: 496; GFX11: ; %bb.0: 497; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 498; GFX11-NEXT: scratch_load_b32 v31, off, s32 499; GFX11-NEXT: v_lshlrev_b32_e32 v0, v16, v0 500; GFX11-NEXT: v_lshlrev_b32_e32 v1, v17, v1 501; GFX11-NEXT: v_lshlrev_b32_e32 v2, v18, v2 502; GFX11-NEXT: v_lshlrev_b32_e32 v3, v19, v3 503; GFX11-NEXT: v_lshlrev_b32_e32 v4, v20, v4 504; GFX11-NEXT: v_lshlrev_b32_e32 v5, v21, v5 505; GFX11-NEXT: v_lshlrev_b32_e32 v6, v22, v6 506; GFX11-NEXT: v_lshlrev_b32_e32 v7, v23, v7 507; GFX11-NEXT: v_lshlrev_b32_e32 v8, v24, v8 508; GFX11-NEXT: v_lshlrev_b32_e32 v9, v25, v9 509; GFX11-NEXT: v_lshlrev_b32_e32 v10, v26, v10 510; GFX11-NEXT: v_lshlrev_b32_e32 v11, v27, v11 511; GFX11-NEXT: v_lshlrev_b32_e32 v12, v28, v12 512; GFX11-NEXT: v_lshlrev_b32_e32 v13, v29, v13 513; GFX11-NEXT: v_lshlrev_b32_e32 v14, v30, v14 514; GFX11-NEXT: s_waitcnt vmcnt(0) 515; GFX11-NEXT: v_lshlrev_b32_e32 v15, v31, v15 516; GFX11-NEXT: s_setpc_b64 s[30:31] 517 %result = shl <16 x i32> %value, %amount 518 ret <16 x i32> %result 519} 520 521define amdgpu_ps <16 x i32> @s_shl_v16i32(<16 x i32> inreg %value, <16 x i32> inreg %amount) { 522; GCN-LABEL: s_shl_v16i32: 523; GCN: ; %bb.0: 524; GCN-NEXT: s_lshl_b32 s0, s0, s16 525; GCN-NEXT: s_lshl_b32 s1, s1, s17 526; GCN-NEXT: s_lshl_b32 s2, s2, s18 527; GCN-NEXT: s_lshl_b32 s3, s3, s19 528; GCN-NEXT: s_lshl_b32 s4, s4, s20 529; GCN-NEXT: s_lshl_b32 s5, s5, s21 530; GCN-NEXT: s_lshl_b32 s6, s6, s22 531; GCN-NEXT: s_lshl_b32 s7, s7, s23 532; GCN-NEXT: s_lshl_b32 s8, s8, s24 533; GCN-NEXT: s_lshl_b32 s9, s9, s25 534; GCN-NEXT: s_lshl_b32 s10, s10, s26 535; GCN-NEXT: s_lshl_b32 s11, s11, s27 536; GCN-NEXT: s_lshl_b32 s12, s12, s28 537; GCN-NEXT: s_lshl_b32 s13, s13, s29 538; GCN-NEXT: s_lshl_b32 s14, s14, s30 539; GCN-NEXT: s_lshl_b32 s15, s15, s31 540; GCN-NEXT: ; return to shader part epilog 541; 542; GFX10PLUS-LABEL: s_shl_v16i32: 543; GFX10PLUS: ; %bb.0: 544; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, s16 545; GFX10PLUS-NEXT: s_lshl_b32 s1, s1, s17 546; GFX10PLUS-NEXT: s_lshl_b32 s2, s2, s18 547; GFX10PLUS-NEXT: s_lshl_b32 s3, s3, s19 548; GFX10PLUS-NEXT: s_lshl_b32 s4, s4, s20 549; GFX10PLUS-NEXT: s_lshl_b32 s5, s5, s21 550; GFX10PLUS-NEXT: s_lshl_b32 s6, s6, s22 551; GFX10PLUS-NEXT: s_lshl_b32 s7, s7, s23 552; GFX10PLUS-NEXT: s_lshl_b32 s8, s8, s24 553; GFX10PLUS-NEXT: s_lshl_b32 s9, s9, s25 554; GFX10PLUS-NEXT: s_lshl_b32 s10, s10, s26 555; GFX10PLUS-NEXT: s_lshl_b32 s11, s11, s27 556; GFX10PLUS-NEXT: s_lshl_b32 s12, s12, s28 557; GFX10PLUS-NEXT: s_lshl_b32 s13, s13, s29 558; GFX10PLUS-NEXT: s_lshl_b32 s14, s14, s30 559; GFX10PLUS-NEXT: s_lshl_b32 s15, s15, s31 560; GFX10PLUS-NEXT: ; return to shader part epilog 561 %result = shl <16 x i32> %value, %amount 562 ret <16 x i32> %result 563} 564 565define i16 @v_shl_i16(i16 %value, i16 %amount) { 566; GFX6-LABEL: v_shl_i16: 567; GFX6: ; %bb.0: 568; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 569; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 570; GFX6-NEXT: v_lshlrev_b32_e32 v0, v1, v0 571; GFX6-NEXT: s_setpc_b64 s[30:31] 572; 573; GFX8-LABEL: v_shl_i16: 574; GFX8: ; %bb.0: 575; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 576; GFX8-NEXT: v_lshlrev_b16_e32 v0, v1, v0 577; GFX8-NEXT: s_setpc_b64 s[30:31] 578; 579; GFX9-LABEL: v_shl_i16: 580; GFX9: ; %bb.0: 581; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 582; GFX9-NEXT: v_lshlrev_b16_e32 v0, v1, v0 583; GFX9-NEXT: s_setpc_b64 s[30:31] 584; 585; GFX10PLUS-LABEL: v_shl_i16: 586; GFX10PLUS: ; %bb.0: 587; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 588; GFX10PLUS-NEXT: v_lshlrev_b16 v0, v1, v0 589; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 590 %result = shl i16 %value, %amount 591 ret i16 %result 592} 593 594define i16 @v_shl_i16_15(i16 %value) { 595; GFX6-LABEL: v_shl_i16_15: 596; GFX6: ; %bb.0: 597; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 598; GFX6-NEXT: v_lshlrev_b32_e32 v0, 15, v0 599; GFX6-NEXT: s_setpc_b64 s[30:31] 600; 601; GFX8-LABEL: v_shl_i16_15: 602; GFX8: ; %bb.0: 603; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 604; GFX8-NEXT: v_lshlrev_b16_e32 v0, 15, v0 605; GFX8-NEXT: s_setpc_b64 s[30:31] 606; 607; GFX9-LABEL: v_shl_i16_15: 608; GFX9: ; %bb.0: 609; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 610; GFX9-NEXT: v_lshlrev_b16_e32 v0, 15, v0 611; GFX9-NEXT: s_setpc_b64 s[30:31] 612; 613; GFX10PLUS-LABEL: v_shl_i16_15: 614; GFX10PLUS: ; %bb.0: 615; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 616; GFX10PLUS-NEXT: v_lshlrev_b16 v0, 15, v0 617; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 618 %result = shl i16 %value, 15 619 ret i16 %result 620} 621 622define amdgpu_ps i16 @s_shl_i16(i16 inreg %value, i16 inreg %amount) { 623; GFX6-LABEL: s_shl_i16: 624; GFX6: ; %bb.0: 625; GFX6-NEXT: s_lshl_b32 s0, s0, s1 626; GFX6-NEXT: ; return to shader part epilog 627; 628; GFX8-LABEL: s_shl_i16: 629; GFX8: ; %bb.0: 630; GFX8-NEXT: s_and_b32 s0, s0, 0xffff 631; GFX8-NEXT: s_lshl_b32 s0, s0, s1 632; GFX8-NEXT: ; return to shader part epilog 633; 634; GFX9-LABEL: s_shl_i16: 635; GFX9: ; %bb.0: 636; GFX9-NEXT: s_and_b32 s0, s0, 0xffff 637; GFX9-NEXT: s_lshl_b32 s0, s0, s1 638; GFX9-NEXT: ; return to shader part epilog 639; 640; GFX10PLUS-LABEL: s_shl_i16: 641; GFX10PLUS: ; %bb.0: 642; GFX10PLUS-NEXT: s_and_b32 s0, s0, 0xffff 643; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, s1 644; GFX10PLUS-NEXT: ; return to shader part epilog 645 %result = shl i16 %value, %amount 646 ret i16 %result 647} 648 649define amdgpu_ps i16 @s_shl_i16_15(i16 inreg %value) { 650; GCN-LABEL: s_shl_i16_15: 651; GCN: ; %bb.0: 652; GCN-NEXT: s_lshl_b32 s0, s0, 15 653; GCN-NEXT: ; return to shader part epilog 654; 655; GFX10PLUS-LABEL: s_shl_i16_15: 656; GFX10PLUS: ; %bb.0: 657; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 15 658; GFX10PLUS-NEXT: ; return to shader part epilog 659 %result = shl i16 %value, 15 660 ret i16 %result 661} 662 663define amdgpu_ps half @shl_i16_sv(i16 inreg %value, i16 %amount) { 664; GFX6-LABEL: shl_i16_sv: 665; GFX6: ; %bb.0: 666; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 667; GFX6-NEXT: v_lshl_b32_e32 v0, s0, v0 668; GFX6-NEXT: ; return to shader part epilog 669; 670; GFX8-LABEL: shl_i16_sv: 671; GFX8: ; %bb.0: 672; GFX8-NEXT: v_lshlrev_b16_e64 v0, v0, s0 673; GFX8-NEXT: ; return to shader part epilog 674; 675; GFX9-LABEL: shl_i16_sv: 676; GFX9: ; %bb.0: 677; GFX9-NEXT: v_lshlrev_b16_e64 v0, v0, s0 678; GFX9-NEXT: ; return to shader part epilog 679; 680; GFX10PLUS-LABEL: shl_i16_sv: 681; GFX10PLUS: ; %bb.0: 682; GFX10PLUS-NEXT: v_lshlrev_b16 v0, v0, s0 683; GFX10PLUS-NEXT: ; return to shader part epilog 684 %result = shl i16 %value, %amount 685 %cast = bitcast i16 %result to half 686 ret half %cast 687} 688 689define amdgpu_ps half @shl_i16_vs(i16 %value, i16 inreg %amount) { 690; GFX6-LABEL: shl_i16_vs: 691; GFX6: ; %bb.0: 692; GFX6-NEXT: s_and_b32 s0, s0, 0xffff 693; GFX6-NEXT: v_lshlrev_b32_e32 v0, s0, v0 694; GFX6-NEXT: ; return to shader part epilog 695; 696; GFX8-LABEL: shl_i16_vs: 697; GFX8: ; %bb.0: 698; GFX8-NEXT: v_lshlrev_b16_e32 v0, s0, v0 699; GFX8-NEXT: ; return to shader part epilog 700; 701; GFX9-LABEL: shl_i16_vs: 702; GFX9: ; %bb.0: 703; GFX9-NEXT: v_lshlrev_b16_e32 v0, s0, v0 704; GFX9-NEXT: ; return to shader part epilog 705; 706; GFX10PLUS-LABEL: shl_i16_vs: 707; GFX10PLUS: ; %bb.0: 708; GFX10PLUS-NEXT: v_lshlrev_b16 v0, s0, v0 709; GFX10PLUS-NEXT: ; return to shader part epilog 710 %result = shl i16 %value, %amount 711 %cast = bitcast i16 %result to half 712 ret half %cast 713} 714 715define <2 x i16> @v_shl_v2i16(<2 x i16> %value, <2 x i16> %amount) { 716; GFX6-LABEL: v_shl_v2i16: 717; GFX6: ; %bb.0: 718; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 719; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v2 720; GFX6-NEXT: v_lshlrev_b32_e32 v0, v2, v0 721; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v3 722; GFX6-NEXT: v_lshlrev_b32_e32 v1, v2, v1 723; GFX6-NEXT: s_setpc_b64 s[30:31] 724; 725; GFX8-LABEL: v_shl_v2i16: 726; GFX8: ; %bb.0: 727; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 728; GFX8-NEXT: v_lshlrev_b16_e32 v2, v1, v0 729; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 730; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 731; GFX8-NEXT: s_setpc_b64 s[30:31] 732; 733; GFX9-LABEL: v_shl_v2i16: 734; GFX9: ; %bb.0: 735; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 736; GFX9-NEXT: v_pk_lshlrev_b16 v0, v1, v0 737; GFX9-NEXT: s_setpc_b64 s[30:31] 738; 739; GFX10PLUS-LABEL: v_shl_v2i16: 740; GFX10PLUS: ; %bb.0: 741; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 742; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v0, v1, v0 743; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 744 %result = shl <2 x i16> %value, %amount 745 ret <2 x i16> %result 746} 747 748define <2 x i16> @v_shl_v2i16_15(<2 x i16> %value) { 749; GFX6-LABEL: v_shl_v2i16_15: 750; GFX6: ; %bb.0: 751; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 752; GFX6-NEXT: v_lshlrev_b32_e32 v0, 15, v0 753; GFX6-NEXT: v_lshlrev_b32_e32 v1, 15, v1 754; GFX6-NEXT: s_setpc_b64 s[30:31] 755; 756; GFX8-LABEL: v_shl_v2i16_15: 757; GFX8: ; %bb.0: 758; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 759; GFX8-NEXT: v_mov_b32_e32 v2, 15 760; GFX8-NEXT: v_lshlrev_b16_e32 v1, 15, v0 761; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 762; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 763; GFX8-NEXT: s_setpc_b64 s[30:31] 764; 765; GFX9-LABEL: v_shl_v2i16_15: 766; GFX9: ; %bb.0: 767; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 768; GFX9-NEXT: v_pk_lshlrev_b16 v0, 15, v0 op_sel_hi:[0,1] 769; GFX9-NEXT: s_setpc_b64 s[30:31] 770; 771; GFX10PLUS-LABEL: v_shl_v2i16_15: 772; GFX10PLUS: ; %bb.0: 773; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 774; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v0, 15, v0 op_sel_hi:[0,1] 775; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 776 %result = shl <2 x i16> %value, <i16 15, i16 15> 777 ret <2 x i16> %result 778} 779 780define amdgpu_ps i32 @s_shl_v2i16(<2 x i16> inreg %value, <2 x i16> inreg %amount) { 781; GFX6-LABEL: s_shl_v2i16: 782; GFX6: ; %bb.0: 783; GFX6-NEXT: s_lshl_b32 s1, s1, s3 784; GFX6-NEXT: s_lshl_b32 s0, s0, s2 785; GFX6-NEXT: s_and_b32 s1, s1, 0xffff 786; GFX6-NEXT: s_and_b32 s0, s0, 0xffff 787; GFX6-NEXT: s_lshl_b32 s1, s1, 16 788; GFX6-NEXT: s_or_b32 s0, s0, s1 789; GFX6-NEXT: ; return to shader part epilog 790; 791; GFX8-LABEL: s_shl_v2i16: 792; GFX8: ; %bb.0: 793; GFX8-NEXT: s_lshr_b32 s2, s0, 16 794; GFX8-NEXT: s_and_b32 s0, s0, 0xffff 795; GFX8-NEXT: s_lshr_b32 s3, s1, 16 796; GFX8-NEXT: s_lshl_b32 s0, s0, s1 797; GFX8-NEXT: s_lshl_b32 s1, s2, s3 798; GFX8-NEXT: s_lshl_b32 s1, s1, 16 799; GFX8-NEXT: s_and_b32 s0, s0, 0xffff 800; GFX8-NEXT: s_or_b32 s0, s1, s0 801; GFX8-NEXT: ; return to shader part epilog 802; 803; GFX9-LABEL: s_shl_v2i16: 804; GFX9: ; %bb.0: 805; GFX9-NEXT: s_lshr_b32 s2, s0, 16 806; GFX9-NEXT: s_lshr_b32 s3, s1, 16 807; GFX9-NEXT: s_lshl_b32 s0, s0, s1 808; GFX9-NEXT: s_lshl_b32 s1, s2, s3 809; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s1 810; GFX9-NEXT: ; return to shader part epilog 811; 812; GFX10PLUS-LABEL: s_shl_v2i16: 813; GFX10PLUS: ; %bb.0: 814; GFX10PLUS-NEXT: s_lshr_b32 s2, s0, 16 815; GFX10PLUS-NEXT: s_lshr_b32 s3, s1, 16 816; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, s1 817; GFX10PLUS-NEXT: s_lshl_b32 s1, s2, s3 818; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s0, s0, s1 819; GFX10PLUS-NEXT: ; return to shader part epilog 820 %result = shl <2 x i16> %value, %amount 821 %cast = bitcast <2 x i16> %result to i32 822 ret i32 %cast 823} 824 825define amdgpu_ps float @shl_v2i16_sv(<2 x i16> inreg %value, <2 x i16> %amount) { 826; GFX6-LABEL: shl_v2i16_sv: 827; GFX6: ; %bb.0: 828; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 829; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 830; GFX6-NEXT: v_lshl_b32_e32 v1, s1, v1 831; GFX6-NEXT: v_lshl_b32_e32 v0, s0, v0 832; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 833; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 834; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 835; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 836; GFX6-NEXT: ; return to shader part epilog 837; 838; GFX8-LABEL: shl_v2i16_sv: 839; GFX8: ; %bb.0: 840; GFX8-NEXT: s_lshr_b32 s1, s0, 16 841; GFX8-NEXT: v_mov_b32_e32 v2, s1 842; GFX8-NEXT: v_lshlrev_b16_e64 v1, v0, s0 843; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD 844; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 845; GFX8-NEXT: ; return to shader part epilog 846; 847; GFX9-LABEL: shl_v2i16_sv: 848; GFX9: ; %bb.0: 849; GFX9-NEXT: v_pk_lshlrev_b16 v0, v0, s0 850; GFX9-NEXT: ; return to shader part epilog 851; 852; GFX10PLUS-LABEL: shl_v2i16_sv: 853; GFX10PLUS: ; %bb.0: 854; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v0, v0, s0 855; GFX10PLUS-NEXT: ; return to shader part epilog 856 %result = shl <2 x i16> %value, %amount 857 %cast = bitcast <2 x i16> %result to float 858 ret float %cast 859} 860 861define amdgpu_ps float @shl_v2i16_vs(<2 x i16> %value, <2 x i16> inreg %amount) { 862; GFX6-LABEL: shl_v2i16_vs: 863; GFX6: ; %bb.0: 864; GFX6-NEXT: s_and_b32 s0, s0, 0xffff 865; GFX6-NEXT: v_lshlrev_b32_e32 v0, s0, v0 866; GFX6-NEXT: s_and_b32 s0, s1, 0xffff 867; GFX6-NEXT: v_lshlrev_b32_e32 v1, s0, v1 868; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 869; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 870; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 871; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 872; GFX6-NEXT: ; return to shader part epilog 873; 874; GFX8-LABEL: shl_v2i16_vs: 875; GFX8: ; %bb.0: 876; GFX8-NEXT: s_lshr_b32 s1, s0, 16 877; GFX8-NEXT: v_mov_b32_e32 v2, s1 878; GFX8-NEXT: v_lshlrev_b16_e32 v1, s0, v0 879; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 880; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 881; GFX8-NEXT: ; return to shader part epilog 882; 883; GFX9-LABEL: shl_v2i16_vs: 884; GFX9: ; %bb.0: 885; GFX9-NEXT: v_pk_lshlrev_b16 v0, s0, v0 886; GFX9-NEXT: ; return to shader part epilog 887; 888; GFX10PLUS-LABEL: shl_v2i16_vs: 889; GFX10PLUS: ; %bb.0: 890; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v0, s0, v0 891; GFX10PLUS-NEXT: ; return to shader part epilog 892 %result = shl <2 x i16> %value, %amount 893 %cast = bitcast <2 x i16> %result to float 894 ret float %cast 895} 896 897; FIXME 898; define <3 x i16> @v_shl_v3i16(<3 x i16> %value, <3 x i16> %amount) { 899; %result = shl <3 x i16> %value, %amount 900; ret <3 x i16> %result 901; } 902 903; define amdgpu_ps <3 x i16> @s_shl_v3i16(<3 x i16> inreg %value, <3 x i16> inreg %amount) { 904; %result = shl <3 x i16> %value, %amount 905; ret <3 x i16> %result 906; } 907 908define <2 x float> @v_shl_v4i16(<4 x i16> %value, <4 x i16> %amount) { 909; GFX6-LABEL: v_shl_v4i16: 910; GFX6: ; %bb.0: 911; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 912; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v4 913; GFX6-NEXT: v_lshlrev_b32_e32 v0, v4, v0 914; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v5 915; GFX6-NEXT: v_lshlrev_b32_e32 v1, v4, v1 916; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v6 917; GFX6-NEXT: v_lshlrev_b32_e32 v2, v4, v2 918; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v7 919; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 920; GFX6-NEXT: v_lshlrev_b32_e32 v3, v4, v3 921; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 922; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 923; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 924; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v2 925; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v3 926; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2 927; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 928; GFX6-NEXT: s_setpc_b64 s[30:31] 929; 930; GFX8-LABEL: v_shl_v4i16: 931; GFX8: ; %bb.0: 932; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 933; GFX8-NEXT: v_lshlrev_b16_e32 v4, v2, v0 934; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 935; GFX8-NEXT: v_lshlrev_b16_e32 v2, v3, v1 936; GFX8-NEXT: v_lshlrev_b16_sdwa v1, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 937; GFX8-NEXT: v_or_b32_e32 v0, v4, v0 938; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 939; GFX8-NEXT: s_setpc_b64 s[30:31] 940; 941; GFX9-LABEL: v_shl_v4i16: 942; GFX9: ; %bb.0: 943; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 944; GFX9-NEXT: v_pk_lshlrev_b16 v0, v2, v0 945; GFX9-NEXT: v_pk_lshlrev_b16 v1, v3, v1 946; GFX9-NEXT: s_setpc_b64 s[30:31] 947; 948; GFX10PLUS-LABEL: v_shl_v4i16: 949; GFX10PLUS: ; %bb.0: 950; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 951; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v0, v2, v0 952; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v1, v3, v1 953; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 954 %result = shl <4 x i16> %value, %amount 955 %cast = bitcast <4 x i16> %result to <2 x float> 956 ret <2 x float> %cast 957} 958 959define amdgpu_ps <2 x i32> @s_shl_v4i16(<4 x i16> inreg %value, <4 x i16> inreg %amount) { 960; GFX6-LABEL: s_shl_v4i16: 961; GFX6: ; %bb.0: 962; GFX6-NEXT: s_lshl_b32 s1, s1, s5 963; GFX6-NEXT: s_lshl_b32 s0, s0, s4 964; GFX6-NEXT: s_and_b32 s1, s1, 0xffff 965; GFX6-NEXT: s_lshl_b32 s2, s2, s6 966; GFX6-NEXT: s_lshl_b32 s3, s3, s7 967; GFX6-NEXT: s_and_b32 s0, s0, 0xffff 968; GFX6-NEXT: s_lshl_b32 s1, s1, 16 969; GFX6-NEXT: s_or_b32 s0, s0, s1 970; GFX6-NEXT: s_and_b32 s1, s2, 0xffff 971; GFX6-NEXT: s_and_b32 s2, s3, 0xffff 972; GFX6-NEXT: s_lshl_b32 s2, s2, 16 973; GFX6-NEXT: s_or_b32 s1, s1, s2 974; GFX6-NEXT: ; return to shader part epilog 975; 976; GFX8-LABEL: s_shl_v4i16: 977; GFX8: ; %bb.0: 978; GFX8-NEXT: s_lshr_b32 s4, s0, 16 979; GFX8-NEXT: s_and_b32 s0, s0, 0xffff 980; GFX8-NEXT: s_lshr_b32 s6, s2, 16 981; GFX8-NEXT: s_lshr_b32 s5, s1, 16 982; GFX8-NEXT: s_and_b32 s1, s1, 0xffff 983; GFX8-NEXT: s_lshr_b32 s7, s3, 16 984; GFX8-NEXT: s_lshl_b32 s0, s0, s2 985; GFX8-NEXT: s_lshl_b32 s2, s4, s6 986; GFX8-NEXT: s_lshl_b32 s1, s1, s3 987; GFX8-NEXT: s_lshl_b32 s3, s5, s7 988; GFX8-NEXT: s_lshl_b32 s2, s2, 16 989; GFX8-NEXT: s_and_b32 s0, s0, 0xffff 990; GFX8-NEXT: s_or_b32 s0, s2, s0 991; GFX8-NEXT: s_lshl_b32 s2, s3, 16 992; GFX8-NEXT: s_and_b32 s1, s1, 0xffff 993; GFX8-NEXT: s_or_b32 s1, s2, s1 994; GFX8-NEXT: ; return to shader part epilog 995; 996; GFX9-LABEL: s_shl_v4i16: 997; GFX9: ; %bb.0: 998; GFX9-NEXT: s_lshr_b32 s4, s0, 16 999; GFX9-NEXT: s_lshr_b32 s5, s2, 16 1000; GFX9-NEXT: s_lshl_b32 s0, s0, s2 1001; GFX9-NEXT: s_lshl_b32 s2, s4, s5 1002; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2 1003; GFX9-NEXT: s_lshr_b32 s2, s1, 16 1004; GFX9-NEXT: s_lshr_b32 s4, s3, 16 1005; GFX9-NEXT: s_lshl_b32 s1, s1, s3 1006; GFX9-NEXT: s_lshl_b32 s2, s2, s4 1007; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s2 1008; GFX9-NEXT: ; return to shader part epilog 1009; 1010; GFX10PLUS-LABEL: s_shl_v4i16: 1011; GFX10PLUS: ; %bb.0: 1012; GFX10PLUS-NEXT: s_lshr_b32 s4, s0, 16 1013; GFX10PLUS-NEXT: s_lshr_b32 s5, s2, 16 1014; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, s2 1015; GFX10PLUS-NEXT: s_lshl_b32 s2, s4, s5 1016; GFX10PLUS-NEXT: s_lshr_b32 s4, s1, 16 1017; GFX10PLUS-NEXT: s_lshr_b32 s5, s3, 16 1018; GFX10PLUS-NEXT: s_lshl_b32 s1, s1, s3 1019; GFX10PLUS-NEXT: s_lshl_b32 s3, s4, s5 1020; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s0, s0, s2 1021; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s1, s1, s3 1022; GFX10PLUS-NEXT: ; return to shader part epilog 1023 %result = shl <4 x i16> %value, %amount 1024 %cast = bitcast <4 x i16> %result to <2 x i32> 1025 ret <2 x i32> %cast 1026} 1027 1028; FIXME 1029; define <5 x i16> @v_shl_v5i16(<5 x i16> %value, <5 x i16> %amount) { 1030; %result = shl <5 x i16> %value, %amount 1031; ret <5 x i16> %result 1032; } 1033 1034; define amdgpu_ps <5 x i16> @s_shl_v5i16(<5 x i16> inreg %value, <5 x i16> inreg %amount) { 1035; %result = shl <5 x i16> %value, %amount 1036; ret <5 x i16> %result 1037; } 1038 1039; define <3 x float> @v_shl_v6i16(<6 x i16> %value, <6 x i16> %amount) { 1040; %result = shl <6 x i16> %value, %amount 1041; %cast = bitcast <6 x i16> %result to <3 x float> 1042; ret <3 x float> %cast 1043; } 1044 1045; define amdgpu_ps <3 x i32> @s_shl_v6i16(<6 x i16> inreg %value, <6 x i16> inreg %amount) { 1046; %result = shl <6 x i16> %value, %amount 1047; %cast = bitcast <6 x i16> %result to <3 x i32> 1048; ret <3 x i32> %cast 1049; } 1050 1051define <4 x float> @v_shl_v8i16(<8 x i16> %value, <8 x i16> %amount) { 1052; GFX6-LABEL: v_shl_v8i16: 1053; GFX6: ; %bb.0: 1054; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1055; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v8 1056; GFX6-NEXT: v_lshlrev_b32_e32 v0, v8, v0 1057; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v9 1058; GFX6-NEXT: v_lshlrev_b32_e32 v1, v8, v1 1059; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v10 1060; GFX6-NEXT: v_lshlrev_b32_e32 v2, v8, v2 1061; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v11 1062; GFX6-NEXT: v_lshlrev_b32_e32 v3, v8, v3 1063; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v12 1064; GFX6-NEXT: v_lshlrev_b32_e32 v4, v8, v4 1065; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v13 1066; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 1067; GFX6-NEXT: v_lshlrev_b32_e32 v5, v8, v5 1068; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v14 1069; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 1070; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 1071; GFX6-NEXT: v_lshlrev_b32_e32 v6, v8, v6 1072; GFX6-NEXT: v_and_b32_e32 v8, 0xffff, v15 1073; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 1074; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v2 1075; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v3 1076; GFX6-NEXT: v_lshlrev_b32_e32 v7, v8, v7 1077; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2 1078; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v5 1079; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 1080; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v4 1081; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 1082; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v7 1083; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 1084; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v6 1085; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v4 1086; GFX6-NEXT: v_or_b32_e32 v3, v3, v4 1087; GFX6-NEXT: s_setpc_b64 s[30:31] 1088; 1089; GFX8-LABEL: v_shl_v8i16: 1090; GFX8: ; %bb.0: 1091; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1092; GFX8-NEXT: v_lshlrev_b16_e32 v8, v4, v0 1093; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 1094; GFX8-NEXT: v_lshlrev_b16_e32 v4, v5, v1 1095; GFX8-NEXT: v_lshlrev_b16_sdwa v1, v5, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 1096; GFX8-NEXT: v_or_b32_e32 v1, v4, v1 1097; GFX8-NEXT: v_lshlrev_b16_e32 v4, v6, v2 1098; GFX8-NEXT: v_lshlrev_b16_sdwa v2, v6, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 1099; GFX8-NEXT: v_or_b32_e32 v2, v4, v2 1100; GFX8-NEXT: v_lshlrev_b16_e32 v4, v7, v3 1101; GFX8-NEXT: v_lshlrev_b16_sdwa v3, v7, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1 1102; GFX8-NEXT: v_or_b32_e32 v0, v8, v0 1103; GFX8-NEXT: v_or_b32_e32 v3, v4, v3 1104; GFX8-NEXT: s_setpc_b64 s[30:31] 1105; 1106; GFX9-LABEL: v_shl_v8i16: 1107; GFX9: ; %bb.0: 1108; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1109; GFX9-NEXT: v_pk_lshlrev_b16 v0, v4, v0 1110; GFX9-NEXT: v_pk_lshlrev_b16 v1, v5, v1 1111; GFX9-NEXT: v_pk_lshlrev_b16 v2, v6, v2 1112; GFX9-NEXT: v_pk_lshlrev_b16 v3, v7, v3 1113; GFX9-NEXT: s_setpc_b64 s[30:31] 1114; 1115; GFX10PLUS-LABEL: v_shl_v8i16: 1116; GFX10PLUS: ; %bb.0: 1117; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1118; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v0, v4, v0 1119; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v1, v5, v1 1120; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v2, v6, v2 1121; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v3, v7, v3 1122; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 1123 %result = shl <8 x i16> %value, %amount 1124 %cast = bitcast <8 x i16> %result to <4 x float> 1125 ret <4 x float> %cast 1126} 1127 1128define amdgpu_ps <4 x i32> @s_shl_v8i16(<8 x i16> inreg %value, <8 x i16> inreg %amount) { 1129; GFX6-LABEL: s_shl_v8i16: 1130; GFX6: ; %bb.0: 1131; GFX6-NEXT: s_lshl_b32 s1, s1, s9 1132; GFX6-NEXT: s_lshl_b32 s0, s0, s8 1133; GFX6-NEXT: s_and_b32 s1, s1, 0xffff 1134; GFX6-NEXT: s_lshl_b32 s2, s2, s10 1135; GFX6-NEXT: s_lshl_b32 s3, s3, s11 1136; GFX6-NEXT: s_and_b32 s0, s0, 0xffff 1137; GFX6-NEXT: s_lshl_b32 s1, s1, 16 1138; GFX6-NEXT: s_lshl_b32 s5, s5, s13 1139; GFX6-NEXT: s_or_b32 s0, s0, s1 1140; GFX6-NEXT: s_and_b32 s1, s2, 0xffff 1141; GFX6-NEXT: s_and_b32 s2, s3, 0xffff 1142; GFX6-NEXT: s_lshl_b32 s4, s4, s12 1143; GFX6-NEXT: s_lshl_b32 s7, s7, s15 1144; GFX6-NEXT: s_lshl_b32 s2, s2, 16 1145; GFX6-NEXT: s_and_b32 s3, s5, 0xffff 1146; GFX6-NEXT: s_lshl_b32 s6, s6, s14 1147; GFX6-NEXT: s_or_b32 s1, s1, s2 1148; GFX6-NEXT: s_and_b32 s2, s4, 0xffff 1149; GFX6-NEXT: s_lshl_b32 s3, s3, 16 1150; GFX6-NEXT: s_and_b32 s4, s7, 0xffff 1151; GFX6-NEXT: s_or_b32 s2, s2, s3 1152; GFX6-NEXT: s_and_b32 s3, s6, 0xffff 1153; GFX6-NEXT: s_lshl_b32 s4, s4, 16 1154; GFX6-NEXT: s_or_b32 s3, s3, s4 1155; GFX6-NEXT: ; return to shader part epilog 1156; 1157; GFX8-LABEL: s_shl_v8i16: 1158; GFX8: ; %bb.0: 1159; GFX8-NEXT: s_lshr_b32 s8, s0, 16 1160; GFX8-NEXT: s_and_b32 s0, s0, 0xffff 1161; GFX8-NEXT: s_lshr_b32 s12, s4, 16 1162; GFX8-NEXT: s_lshr_b32 s9, s1, 16 1163; GFX8-NEXT: s_and_b32 s1, s1, 0xffff 1164; GFX8-NEXT: s_lshr_b32 s13, s5, 16 1165; GFX8-NEXT: s_lshl_b32 s0, s0, s4 1166; GFX8-NEXT: s_lshl_b32 s4, s8, s12 1167; GFX8-NEXT: s_lshr_b32 s10, s2, 16 1168; GFX8-NEXT: s_and_b32 s2, s2, 0xffff 1169; GFX8-NEXT: s_lshr_b32 s14, s6, 16 1170; GFX8-NEXT: s_lshl_b32 s1, s1, s5 1171; GFX8-NEXT: s_lshl_b32 s5, s9, s13 1172; GFX8-NEXT: s_lshl_b32 s4, s4, 16 1173; GFX8-NEXT: s_and_b32 s0, s0, 0xffff 1174; GFX8-NEXT: s_lshr_b32 s11, s3, 16 1175; GFX8-NEXT: s_and_b32 s3, s3, 0xffff 1176; GFX8-NEXT: s_lshr_b32 s15, s7, 16 1177; GFX8-NEXT: s_lshl_b32 s2, s2, s6 1178; GFX8-NEXT: s_lshl_b32 s6, s10, s14 1179; GFX8-NEXT: s_or_b32 s0, s4, s0 1180; GFX8-NEXT: s_lshl_b32 s4, s5, 16 1181; GFX8-NEXT: s_and_b32 s1, s1, 0xffff 1182; GFX8-NEXT: s_lshl_b32 s3, s3, s7 1183; GFX8-NEXT: s_lshl_b32 s7, s11, s15 1184; GFX8-NEXT: s_or_b32 s1, s4, s1 1185; GFX8-NEXT: s_lshl_b32 s4, s6, 16 1186; GFX8-NEXT: s_and_b32 s2, s2, 0xffff 1187; GFX8-NEXT: s_or_b32 s2, s4, s2 1188; GFX8-NEXT: s_lshl_b32 s4, s7, 16 1189; GFX8-NEXT: s_and_b32 s3, s3, 0xffff 1190; GFX8-NEXT: s_or_b32 s3, s4, s3 1191; GFX8-NEXT: ; return to shader part epilog 1192; 1193; GFX9-LABEL: s_shl_v8i16: 1194; GFX9: ; %bb.0: 1195; GFX9-NEXT: s_lshr_b32 s8, s0, 16 1196; GFX9-NEXT: s_lshr_b32 s9, s4, 16 1197; GFX9-NEXT: s_lshl_b32 s0, s0, s4 1198; GFX9-NEXT: s_lshl_b32 s4, s8, s9 1199; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s4 1200; GFX9-NEXT: s_lshr_b32 s4, s1, 16 1201; GFX9-NEXT: s_lshr_b32 s8, s5, 16 1202; GFX9-NEXT: s_lshl_b32 s1, s1, s5 1203; GFX9-NEXT: s_lshl_b32 s4, s4, s8 1204; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s4 1205; GFX9-NEXT: s_lshr_b32 s4, s2, 16 1206; GFX9-NEXT: s_lshr_b32 s5, s6, 16 1207; GFX9-NEXT: s_lshl_b32 s2, s2, s6 1208; GFX9-NEXT: s_lshl_b32 s4, s4, s5 1209; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s4 1210; GFX9-NEXT: s_lshr_b32 s4, s3, 16 1211; GFX9-NEXT: s_lshr_b32 s5, s7, 16 1212; GFX9-NEXT: s_lshl_b32 s3, s3, s7 1213; GFX9-NEXT: s_lshl_b32 s4, s4, s5 1214; GFX9-NEXT: s_pack_ll_b32_b16 s3, s3, s4 1215; GFX9-NEXT: ; return to shader part epilog 1216; 1217; GFX10PLUS-LABEL: s_shl_v8i16: 1218; GFX10PLUS: ; %bb.0: 1219; GFX10PLUS-NEXT: s_lshr_b32 s8, s0, 16 1220; GFX10PLUS-NEXT: s_lshr_b32 s9, s4, 16 1221; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, s4 1222; GFX10PLUS-NEXT: s_lshl_b32 s4, s8, s9 1223; GFX10PLUS-NEXT: s_lshr_b32 s8, s1, 16 1224; GFX10PLUS-NEXT: s_lshr_b32 s9, s5, 16 1225; GFX10PLUS-NEXT: s_lshl_b32 s1, s1, s5 1226; GFX10PLUS-NEXT: s_lshl_b32 s5, s8, s9 1227; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s0, s0, s4 1228; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s1, s1, s5 1229; GFX10PLUS-NEXT: s_lshr_b32 s4, s2, 16 1230; GFX10PLUS-NEXT: s_lshr_b32 s5, s6, 16 1231; GFX10PLUS-NEXT: s_lshl_b32 s2, s2, s6 1232; GFX10PLUS-NEXT: s_lshl_b32 s4, s4, s5 1233; GFX10PLUS-NEXT: s_lshr_b32 s5, s3, 16 1234; GFX10PLUS-NEXT: s_lshr_b32 s6, s7, 16 1235; GFX10PLUS-NEXT: s_lshl_b32 s3, s3, s7 1236; GFX10PLUS-NEXT: s_lshl_b32 s5, s5, s6 1237; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s2, s2, s4 1238; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s3, s3, s5 1239; GFX10PLUS-NEXT: ; return to shader part epilog 1240 %result = shl <8 x i16> %value, %amount 1241 %cast = bitcast <8 x i16> %result to <4 x i32> 1242 ret <4 x i32> %cast 1243} 1244 1245define i64 @v_shl_i64(i64 %value, i64 %amount) { 1246; GFX6-LABEL: v_shl_i64: 1247; GFX6: ; %bb.0: 1248; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1249; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], v2 1250; GFX6-NEXT: s_setpc_b64 s[30:31] 1251; 1252; GFX8-LABEL: v_shl_i64: 1253; GFX8: ; %bb.0: 1254; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1255; GFX8-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1] 1256; GFX8-NEXT: s_setpc_b64 s[30:31] 1257; 1258; GFX9-LABEL: v_shl_i64: 1259; GFX9: ; %bb.0: 1260; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1261; GFX9-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1] 1262; GFX9-NEXT: s_setpc_b64 s[30:31] 1263; 1264; GFX10PLUS-LABEL: v_shl_i64: 1265; GFX10PLUS: ; %bb.0: 1266; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1267; GFX10PLUS-NEXT: v_lshlrev_b64 v[0:1], v2, v[0:1] 1268; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 1269 %result = shl i64 %value, %amount 1270 ret i64 %result 1271} 1272 1273define i64 @v_shl_i64_63(i64 %value) { 1274; GCN-LABEL: v_shl_i64_63: 1275; GCN: ; %bb.0: 1276; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1277; GCN-NEXT: v_lshlrev_b32_e32 v1, 31, v0 1278; GCN-NEXT: v_mov_b32_e32 v0, 0 1279; GCN-NEXT: s_setpc_b64 s[30:31] 1280; 1281; GFX10-LABEL: v_shl_i64_63: 1282; GFX10: ; %bb.0: 1283; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1284; GFX10-NEXT: v_lshlrev_b32_e32 v1, 31, v0 1285; GFX10-NEXT: v_mov_b32_e32 v0, 0 1286; GFX10-NEXT: s_setpc_b64 s[30:31] 1287; 1288; GFX11-LABEL: v_shl_i64_63: 1289; GFX11: ; %bb.0: 1290; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1291; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_lshlrev_b32 v1, 31, v0 1292; GFX11-NEXT: s_setpc_b64 s[30:31] 1293 %result = shl i64 %value, 63 1294 ret i64 %result 1295} 1296 1297define i64 @v_shl_i64_33(i64 %value) { 1298; GCN-LABEL: v_shl_i64_33: 1299; GCN: ; %bb.0: 1300; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1301; GCN-NEXT: v_lshlrev_b32_e32 v1, 1, v0 1302; GCN-NEXT: v_mov_b32_e32 v0, 0 1303; GCN-NEXT: s_setpc_b64 s[30:31] 1304; 1305; GFX10-LABEL: v_shl_i64_33: 1306; GFX10: ; %bb.0: 1307; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1308; GFX10-NEXT: v_lshlrev_b32_e32 v1, 1, v0 1309; GFX10-NEXT: v_mov_b32_e32 v0, 0 1310; GFX10-NEXT: s_setpc_b64 s[30:31] 1311; 1312; GFX11-LABEL: v_shl_i64_33: 1313; GFX11: ; %bb.0: 1314; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1315; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_lshlrev_b32 v1, 1, v0 1316; GFX11-NEXT: s_setpc_b64 s[30:31] 1317 %result = shl i64 %value, 33 1318 ret i64 %result 1319} 1320 1321define i64 @v_shl_i64_32(i64 %value) { 1322; GCN-LABEL: v_shl_i64_32: 1323; GCN: ; %bb.0: 1324; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1325; GCN-NEXT: v_mov_b32_e32 v1, v0 1326; GCN-NEXT: v_mov_b32_e32 v0, 0 1327; GCN-NEXT: s_setpc_b64 s[30:31] 1328; 1329; GFX10-LABEL: v_shl_i64_32: 1330; GFX10: ; %bb.0: 1331; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1332; GFX10-NEXT: v_mov_b32_e32 v1, v0 1333; GFX10-NEXT: v_mov_b32_e32 v0, 0 1334; GFX10-NEXT: s_setpc_b64 s[30:31] 1335; 1336; GFX11-LABEL: v_shl_i64_32: 1337; GFX11: ; %bb.0: 1338; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1339; GFX11-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_mov_b32 v0, 0 1340; GFX11-NEXT: s_setpc_b64 s[30:31] 1341 %result = shl i64 %value, 32 1342 ret i64 %result 1343} 1344 1345define i64 @v_shl_i64_31(i64 %value) { 1346; GFX6-LABEL: v_shl_i64_31: 1347; GFX6: ; %bb.0: 1348; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1349; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 31 1350; GFX6-NEXT: s_setpc_b64 s[30:31] 1351; 1352; GFX8-LABEL: v_shl_i64_31: 1353; GFX8: ; %bb.0: 1354; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1355; GFX8-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1] 1356; GFX8-NEXT: s_setpc_b64 s[30:31] 1357; 1358; GFX9-LABEL: v_shl_i64_31: 1359; GFX9: ; %bb.0: 1360; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1361; GFX9-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1] 1362; GFX9-NEXT: s_setpc_b64 s[30:31] 1363; 1364; GFX10PLUS-LABEL: v_shl_i64_31: 1365; GFX10PLUS: ; %bb.0: 1366; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1367; GFX10PLUS-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1] 1368; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 1369 %result = shl i64 %value, 31 1370 ret i64 %result 1371} 1372 1373define amdgpu_ps i64 @s_shl_i64(i64 inreg %value, i64 inreg %amount) { 1374; GCN-LABEL: s_shl_i64: 1375; GCN: ; %bb.0: 1376; GCN-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 1377; GCN-NEXT: ; return to shader part epilog 1378; 1379; GFX10PLUS-LABEL: s_shl_i64: 1380; GFX10PLUS: ; %bb.0: 1381; GFX10PLUS-NEXT: s_lshl_b64 s[0:1], s[0:1], s2 1382; GFX10PLUS-NEXT: ; return to shader part epilog 1383 %result = shl i64 %value, %amount 1384 ret i64 %result 1385} 1386 1387define amdgpu_ps i64 @s_shl_i64_63(i64 inreg %value) { 1388; GCN-LABEL: s_shl_i64_63: 1389; GCN: ; %bb.0: 1390; GCN-NEXT: s_lshl_b32 s1, s0, 31 1391; GCN-NEXT: s_mov_b32 s0, 0 1392; GCN-NEXT: ; return to shader part epilog 1393; 1394; GFX10PLUS-LABEL: s_shl_i64_63: 1395; GFX10PLUS: ; %bb.0: 1396; GFX10PLUS-NEXT: s_lshl_b32 s1, s0, 31 1397; GFX10PLUS-NEXT: s_mov_b32 s0, 0 1398; GFX10PLUS-NEXT: ; return to shader part epilog 1399 %result = shl i64 %value, 63 1400 ret i64 %result 1401} 1402 1403define amdgpu_ps i64 @s_shl_i64_33(i64 inreg %value) { 1404; GCN-LABEL: s_shl_i64_33: 1405; GCN: ; %bb.0: 1406; GCN-NEXT: s_lshl_b32 s1, s0, 1 1407; GCN-NEXT: s_mov_b32 s0, 0 1408; GCN-NEXT: ; return to shader part epilog 1409; 1410; GFX10PLUS-LABEL: s_shl_i64_33: 1411; GFX10PLUS: ; %bb.0: 1412; GFX10PLUS-NEXT: s_lshl_b32 s1, s0, 1 1413; GFX10PLUS-NEXT: s_mov_b32 s0, 0 1414; GFX10PLUS-NEXT: ; return to shader part epilog 1415 %result = shl i64 %value, 33 1416 ret i64 %result 1417} 1418 1419define amdgpu_ps i64 @s_shl_i64_32(i64 inreg %value) { 1420; GCN-LABEL: s_shl_i64_32: 1421; GCN: ; %bb.0: 1422; GCN-NEXT: s_mov_b32 s1, s0 1423; GCN-NEXT: s_mov_b32 s0, 0 1424; GCN-NEXT: ; return to shader part epilog 1425; 1426; GFX10PLUS-LABEL: s_shl_i64_32: 1427; GFX10PLUS: ; %bb.0: 1428; GFX10PLUS-NEXT: s_mov_b32 s1, s0 1429; GFX10PLUS-NEXT: s_mov_b32 s0, 0 1430; GFX10PLUS-NEXT: ; return to shader part epilog 1431 %result = shl i64 %value, 32 1432 ret i64 %result 1433} 1434 1435define amdgpu_ps i64 @s_shl_i64_31(i64 inreg %value) { 1436; GCN-LABEL: s_shl_i64_31: 1437; GCN: ; %bb.0: 1438; GCN-NEXT: s_lshl_b64 s[0:1], s[0:1], 31 1439; GCN-NEXT: ; return to shader part epilog 1440; 1441; GFX10PLUS-LABEL: s_shl_i64_31: 1442; GFX10PLUS: ; %bb.0: 1443; GFX10PLUS-NEXT: s_lshl_b64 s[0:1], s[0:1], 31 1444; GFX10PLUS-NEXT: ; return to shader part epilog 1445 %result = shl i64 %value, 31 1446 ret i64 %result 1447} 1448 1449define amdgpu_ps <2 x float> @shl_i64_sv(i64 inreg %value, i64 %amount) { 1450; GFX6-LABEL: shl_i64_sv: 1451; GFX6: ; %bb.0: 1452; GFX6-NEXT: v_lshl_b64 v[0:1], s[0:1], v0 1453; GFX6-NEXT: ; return to shader part epilog 1454; 1455; GFX8-LABEL: shl_i64_sv: 1456; GFX8: ; %bb.0: 1457; GFX8-NEXT: v_lshlrev_b64 v[0:1], v0, s[0:1] 1458; GFX8-NEXT: ; return to shader part epilog 1459; 1460; GFX9-LABEL: shl_i64_sv: 1461; GFX9: ; %bb.0: 1462; GFX9-NEXT: v_lshlrev_b64 v[0:1], v0, s[0:1] 1463; GFX9-NEXT: ; return to shader part epilog 1464; 1465; GFX10PLUS-LABEL: shl_i64_sv: 1466; GFX10PLUS: ; %bb.0: 1467; GFX10PLUS-NEXT: v_lshlrev_b64 v[0:1], v0, s[0:1] 1468; GFX10PLUS-NEXT: ; return to shader part epilog 1469 %result = shl i64 %value, %amount 1470 %cast = bitcast i64 %result to <2 x float> 1471 ret <2 x float> %cast 1472} 1473 1474define amdgpu_ps <2 x float> @shl_i64_vs(i64 %value, i64 inreg %amount) { 1475; GFX6-LABEL: shl_i64_vs: 1476; GFX6: ; %bb.0: 1477; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], s0 1478; GFX6-NEXT: ; return to shader part epilog 1479; 1480; GFX8-LABEL: shl_i64_vs: 1481; GFX8: ; %bb.0: 1482; GFX8-NEXT: v_lshlrev_b64 v[0:1], s0, v[0:1] 1483; GFX8-NEXT: ; return to shader part epilog 1484; 1485; GFX9-LABEL: shl_i64_vs: 1486; GFX9: ; %bb.0: 1487; GFX9-NEXT: v_lshlrev_b64 v[0:1], s0, v[0:1] 1488; GFX9-NEXT: ; return to shader part epilog 1489; 1490; GFX10PLUS-LABEL: shl_i64_vs: 1491; GFX10PLUS: ; %bb.0: 1492; GFX10PLUS-NEXT: v_lshlrev_b64 v[0:1], s0, v[0:1] 1493; GFX10PLUS-NEXT: ; return to shader part epilog 1494 %result = shl i64 %value, %amount 1495 %cast = bitcast i64 %result to <2 x float> 1496 ret <2 x float> %cast 1497} 1498 1499define <2 x i64> @v_shl_v2i64(<2 x i64> %value, <2 x i64> %amount) { 1500; GFX6-LABEL: v_shl_v2i64: 1501; GFX6: ; %bb.0: 1502; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1503; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], v4 1504; GFX6-NEXT: v_lshl_b64 v[2:3], v[2:3], v6 1505; GFX6-NEXT: s_setpc_b64 s[30:31] 1506; 1507; GFX8-LABEL: v_shl_v2i64: 1508; GFX8: ; %bb.0: 1509; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1510; GFX8-NEXT: v_lshlrev_b64 v[0:1], v4, v[0:1] 1511; GFX8-NEXT: v_lshlrev_b64 v[2:3], v6, v[2:3] 1512; GFX8-NEXT: s_setpc_b64 s[30:31] 1513; 1514; GFX9-LABEL: v_shl_v2i64: 1515; GFX9: ; %bb.0: 1516; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1517; GFX9-NEXT: v_lshlrev_b64 v[0:1], v4, v[0:1] 1518; GFX9-NEXT: v_lshlrev_b64 v[2:3], v6, v[2:3] 1519; GFX9-NEXT: s_setpc_b64 s[30:31] 1520; 1521; GFX10PLUS-LABEL: v_shl_v2i64: 1522; GFX10PLUS: ; %bb.0: 1523; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1524; GFX10PLUS-NEXT: v_lshlrev_b64 v[0:1], v4, v[0:1] 1525; GFX10PLUS-NEXT: v_lshlrev_b64 v[2:3], v6, v[2:3] 1526; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 1527 %result = shl <2 x i64> %value, %amount 1528 ret <2 x i64> %result 1529} 1530 1531define <2 x i64> @v_shl_v2i64_31(<2 x i64> %value) { 1532; GFX6-LABEL: v_shl_v2i64_31: 1533; GFX6: ; %bb.0: 1534; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1535; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 31 1536; GFX6-NEXT: v_lshl_b64 v[2:3], v[2:3], 31 1537; GFX6-NEXT: s_setpc_b64 s[30:31] 1538; 1539; GFX8-LABEL: v_shl_v2i64_31: 1540; GFX8: ; %bb.0: 1541; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1542; GFX8-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1] 1543; GFX8-NEXT: v_lshlrev_b64 v[2:3], 31, v[2:3] 1544; GFX8-NEXT: s_setpc_b64 s[30:31] 1545; 1546; GFX9-LABEL: v_shl_v2i64_31: 1547; GFX9: ; %bb.0: 1548; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1549; GFX9-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1] 1550; GFX9-NEXT: v_lshlrev_b64 v[2:3], 31, v[2:3] 1551; GFX9-NEXT: s_setpc_b64 s[30:31] 1552; 1553; GFX10PLUS-LABEL: v_shl_v2i64_31: 1554; GFX10PLUS: ; %bb.0: 1555; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1556; GFX10PLUS-NEXT: v_lshlrev_b64 v[0:1], 31, v[0:1] 1557; GFX10PLUS-NEXT: v_lshlrev_b64 v[2:3], 31, v[2:3] 1558; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 1559 %result = shl <2 x i64> %value, <i64 31, i64 31> 1560 ret <2 x i64> %result 1561} 1562 1563define amdgpu_ps <2 x i64> @s_shl_v2i64(<2 x i64> inreg %value, <2 x i64> inreg %amount) { 1564; GCN-LABEL: s_shl_v2i64: 1565; GCN: ; %bb.0: 1566; GCN-NEXT: s_lshl_b64 s[0:1], s[0:1], s4 1567; GCN-NEXT: s_lshl_b64 s[2:3], s[2:3], s6 1568; GCN-NEXT: ; return to shader part epilog 1569; 1570; GFX10PLUS-LABEL: s_shl_v2i64: 1571; GFX10PLUS: ; %bb.0: 1572; GFX10PLUS-NEXT: s_lshl_b64 s[0:1], s[0:1], s4 1573; GFX10PLUS-NEXT: s_lshl_b64 s[2:3], s[2:3], s6 1574; GFX10PLUS-NEXT: ; return to shader part epilog 1575 %result = shl <2 x i64> %value, %amount 1576 ret <2 x i64> %result 1577} 1578 1579define i65 @v_shl_i65(i65 %value, i65 %amount) { 1580; GFX6-LABEL: v_shl_i65: 1581; GFX6: ; %bb.0: 1582; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1583; GFX6-NEXT: v_sub_i32_e32 v4, vcc, 64, v3 1584; GFX6-NEXT: v_lshr_b64 v[4:5], v[0:1], v4 1585; GFX6-NEXT: v_lshl_b64 v[5:6], v[2:3], v3 1586; GFX6-NEXT: v_add_i32_e32 v8, vcc, 0xffffffc0, v3 1587; GFX6-NEXT: v_lshl_b64 v[6:7], v[0:1], v3 1588; GFX6-NEXT: v_or_b32_e32 v9, v4, v5 1589; GFX6-NEXT: v_lshl_b64 v[4:5], v[0:1], v8 1590; GFX6-NEXT: v_cmp_gt_u32_e32 vcc, 64, v3 1591; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc 1592; GFX6-NEXT: v_cndmask_b32_e32 v1, 0, v7, vcc 1593; GFX6-NEXT: v_cndmask_b32_e32 v4, v4, v9, vcc 1594; GFX6-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 1595; GFX6-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc 1596; GFX6-NEXT: s_setpc_b64 s[30:31] 1597; 1598; GFX8-LABEL: v_shl_i65: 1599; GFX8: ; %bb.0: 1600; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1601; GFX8-NEXT: v_sub_u32_e32 v4, vcc, 64, v3 1602; GFX8-NEXT: v_lshrrev_b64 v[4:5], v4, v[0:1] 1603; GFX8-NEXT: v_lshlrev_b64 v[5:6], v3, v[2:3] 1604; GFX8-NEXT: v_add_u32_e32 v8, vcc, 0xffffffc0, v3 1605; GFX8-NEXT: v_lshlrev_b64 v[6:7], v3, v[0:1] 1606; GFX8-NEXT: v_or_b32_e32 v9, v4, v5 1607; GFX8-NEXT: v_lshlrev_b64 v[4:5], v8, v[0:1] 1608; GFX8-NEXT: v_cmp_gt_u32_e32 vcc, 64, v3 1609; GFX8-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc 1610; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v7, vcc 1611; GFX8-NEXT: v_cndmask_b32_e32 v4, v4, v9, vcc 1612; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 1613; GFX8-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc 1614; GFX8-NEXT: s_setpc_b64 s[30:31] 1615; 1616; GFX9-LABEL: v_shl_i65: 1617; GFX9: ; %bb.0: 1618; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1619; GFX9-NEXT: v_sub_u32_e32 v4, 64, v3 1620; GFX9-NEXT: v_lshrrev_b64 v[4:5], v4, v[0:1] 1621; GFX9-NEXT: v_lshlrev_b64 v[5:6], v3, v[2:3] 1622; GFX9-NEXT: v_add_u32_e32 v8, 0xffffffc0, v3 1623; GFX9-NEXT: v_lshlrev_b64 v[6:7], v3, v[0:1] 1624; GFX9-NEXT: v_or_b32_e32 v9, v4, v5 1625; GFX9-NEXT: v_lshlrev_b64 v[4:5], v8, v[0:1] 1626; GFX9-NEXT: v_cmp_gt_u32_e32 vcc, 64, v3 1627; GFX9-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc 1628; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v7, vcc 1629; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v9, vcc 1630; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v3 1631; GFX9-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc 1632; GFX9-NEXT: s_setpc_b64 s[30:31] 1633; 1634; GFX10-LABEL: v_shl_i65: 1635; GFX10: ; %bb.0: 1636; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1637; GFX10-NEXT: v_sub_nc_u32_e32 v6, 64, v3 1638; GFX10-NEXT: v_lshlrev_b64 v[4:5], v3, v[2:3] 1639; GFX10-NEXT: v_add_nc_u32_e32 v8, 0xffffffc0, v3 1640; GFX10-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v3 1641; GFX10-NEXT: v_lshrrev_b64 v[5:6], v6, v[0:1] 1642; GFX10-NEXT: v_lshlrev_b64 v[6:7], v3, v[0:1] 1643; GFX10-NEXT: v_lshlrev_b64 v[8:9], v8, v[0:1] 1644; GFX10-NEXT: v_or_b32_e32 v1, v5, v4 1645; GFX10-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc_lo 1646; GFX10-NEXT: v_cndmask_b32_e32 v4, v8, v1, vcc_lo 1647; GFX10-NEXT: v_cndmask_b32_e32 v1, 0, v7, vcc_lo 1648; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 1649; GFX10-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc_lo 1650; GFX10-NEXT: s_setpc_b64 s[30:31] 1651; 1652; GFX11-LABEL: v_shl_i65: 1653; GFX11: ; %bb.0: 1654; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1655; GFX11-NEXT: v_sub_nc_u32_e32 v6, 64, v3 1656; GFX11-NEXT: v_lshlrev_b64 v[4:5], v3, v[2:3] 1657; GFX11-NEXT: v_add_nc_u32_e32 v8, 0xffffffc0, v3 1658; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, 64, v3 1659; GFX11-NEXT: v_lshrrev_b64 v[5:6], v6, v[0:1] 1660; GFX11-NEXT: v_lshlrev_b64 v[6:7], v3, v[0:1] 1661; GFX11-NEXT: v_lshlrev_b64 v[8:9], v8, v[0:1] 1662; GFX11-NEXT: v_or_b32_e32 v1, v5, v4 1663; GFX11-NEXT: v_cndmask_b32_e32 v0, 0, v6, vcc_lo 1664; GFX11-NEXT: v_dual_cndmask_b32 v4, v8, v1 :: v_dual_cndmask_b32 v1, 0, v7 1665; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3 1666; GFX11-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc_lo 1667; GFX11-NEXT: s_setpc_b64 s[30:31] 1668 %result = shl i65 %value, %amount 1669 ret i65 %result 1670} 1671 1672define i65 @v_shl_i65_33(i65 %value) { 1673; GFX6-LABEL: v_shl_i65_33: 1674; GFX6: ; %bb.0: 1675; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1676; GFX6-NEXT: v_lshlrev_b32_e32 v4, 1, v0 1677; GFX6-NEXT: v_lshr_b64 v[2:3], v[0:1], 31 1678; GFX6-NEXT: v_mov_b32_e32 v0, 0 1679; GFX6-NEXT: v_mov_b32_e32 v1, v4 1680; GFX6-NEXT: s_setpc_b64 s[30:31] 1681; 1682; GFX8-LABEL: v_shl_i65_33: 1683; GFX8: ; %bb.0: 1684; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1685; GFX8-NEXT: v_lshlrev_b32_e32 v4, 1, v0 1686; GFX8-NEXT: v_lshrrev_b64 v[2:3], 31, v[0:1] 1687; GFX8-NEXT: v_mov_b32_e32 v0, 0 1688; GFX8-NEXT: v_mov_b32_e32 v1, v4 1689; GFX8-NEXT: s_setpc_b64 s[30:31] 1690; 1691; GFX9-LABEL: v_shl_i65_33: 1692; GFX9: ; %bb.0: 1693; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1694; GFX9-NEXT: v_lshlrev_b32_e32 v4, 1, v0 1695; GFX9-NEXT: v_lshrrev_b64 v[2:3], 31, v[0:1] 1696; GFX9-NEXT: v_mov_b32_e32 v0, 0 1697; GFX9-NEXT: v_mov_b32_e32 v1, v4 1698; GFX9-NEXT: s_setpc_b64 s[30:31] 1699; 1700; GFX10-LABEL: v_shl_i65_33: 1701; GFX10: ; %bb.0: 1702; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1703; GFX10-NEXT: v_lshlrev_b32_e32 v4, 1, v0 1704; GFX10-NEXT: v_lshrrev_b64 v[2:3], 31, v[0:1] 1705; GFX10-NEXT: v_mov_b32_e32 v0, 0 1706; GFX10-NEXT: v_mov_b32_e32 v1, v4 1707; GFX10-NEXT: s_setpc_b64 s[30:31] 1708; 1709; GFX11-LABEL: v_shl_i65_33: 1710; GFX11: ; %bb.0: 1711; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1712; GFX11-NEXT: v_lshlrev_b32_e32 v4, 1, v0 1713; GFX11-NEXT: v_lshrrev_b64 v[2:3], 31, v[0:1] 1714; GFX11-NEXT: v_dual_mov_b32 v0, 0 :: v_dual_mov_b32 v1, v4 1715; GFX11-NEXT: s_setpc_b64 s[30:31] 1716 %result = shl i65 %value, 33 1717 ret i65 %result 1718} 1719 1720define amdgpu_ps i65 @s_shl_i65(i65 inreg %value, i65 inreg %amount) { 1721; GCN-LABEL: s_shl_i65: 1722; GCN: ; %bb.0: 1723; GCN-NEXT: s_sub_i32 s10, s3, 64 1724; GCN-NEXT: s_sub_i32 s6, 64, s3 1725; GCN-NEXT: s_cmp_lt_u32 s3, 64 1726; GCN-NEXT: s_cselect_b32 s11, 1, 0 1727; GCN-NEXT: s_cmp_eq_u32 s3, 0 1728; GCN-NEXT: s_cselect_b32 s12, 1, 0 1729; GCN-NEXT: s_lshr_b64 s[6:7], s[0:1], s6 1730; GCN-NEXT: s_lshl_b64 s[8:9], s[2:3], s3 1731; GCN-NEXT: s_lshl_b64 s[4:5], s[0:1], s3 1732; GCN-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] 1733; GCN-NEXT: s_lshl_b64 s[8:9], s[0:1], s10 1734; GCN-NEXT: s_cmp_lg_u32 s11, 0 1735; GCN-NEXT: s_cselect_b64 s[0:1], s[4:5], 0 1736; GCN-NEXT: s_cselect_b32 s3, s6, s8 1737; GCN-NEXT: s_cmp_lg_u32 s12, 0 1738; GCN-NEXT: s_cselect_b32 s2, s2, s3 1739; GCN-NEXT: ; return to shader part epilog 1740; 1741; GFX10PLUS-LABEL: s_shl_i65: 1742; GFX10PLUS: ; %bb.0: 1743; GFX10PLUS-NEXT: s_sub_i32 s10, s3, 64 1744; GFX10PLUS-NEXT: s_sub_i32 s4, 64, s3 1745; GFX10PLUS-NEXT: s_cmp_lt_u32 s3, 64 1746; GFX10PLUS-NEXT: s_cselect_b32 s11, 1, 0 1747; GFX10PLUS-NEXT: s_cmp_eq_u32 s3, 0 1748; GFX10PLUS-NEXT: s_cselect_b32 s12, 1, 0 1749; GFX10PLUS-NEXT: s_lshr_b64 s[4:5], s[0:1], s4 1750; GFX10PLUS-NEXT: s_lshl_b64 s[6:7], s[2:3], s3 1751; GFX10PLUS-NEXT: s_lshl_b64 s[8:9], s[0:1], s3 1752; GFX10PLUS-NEXT: s_or_b64 s[4:5], s[4:5], s[6:7] 1753; GFX10PLUS-NEXT: s_lshl_b64 s[6:7], s[0:1], s10 1754; GFX10PLUS-NEXT: s_cmp_lg_u32 s11, 0 1755; GFX10PLUS-NEXT: s_cselect_b64 s[0:1], s[8:9], 0 1756; GFX10PLUS-NEXT: s_cselect_b32 s3, s4, s6 1757; GFX10PLUS-NEXT: s_cmp_lg_u32 s12, 0 1758; GFX10PLUS-NEXT: s_cselect_b32 s2, s2, s3 1759; GFX10PLUS-NEXT: ; return to shader part epilog 1760 %result = shl i65 %value, %amount 1761 ret i65 %result 1762} 1763 1764define amdgpu_ps i65 @s_shl_i65_33(i65 inreg %value) { 1765; GCN-LABEL: s_shl_i65_33: 1766; GCN: ; %bb.0: 1767; GCN-NEXT: s_lshl_b32 s4, s0, 1 1768; GCN-NEXT: s_mov_b32 s6, 0 1769; GCN-NEXT: s_lshl_b32 s7, s2, 1 1770; GCN-NEXT: s_lshr_b64 s[0:1], s[0:1], 31 1771; GCN-NEXT: s_or_b64 s[2:3], s[6:7], s[0:1] 1772; GCN-NEXT: s_mov_b32 s0, 0 1773; GCN-NEXT: s_mov_b32 s1, s4 1774; GCN-NEXT: ; return to shader part epilog 1775; 1776; GFX10PLUS-LABEL: s_shl_i65_33: 1777; GFX10PLUS: ; %bb.0: 1778; GFX10PLUS-NEXT: s_mov_b32 s4, 0 1779; GFX10PLUS-NEXT: s_lshl_b32 s5, s2, 1 1780; GFX10PLUS-NEXT: s_lshr_b64 s[2:3], s[0:1], 31 1781; GFX10PLUS-NEXT: s_lshl_b32 s1, s0, 1 1782; GFX10PLUS-NEXT: s_or_b64 s[2:3], s[4:5], s[2:3] 1783; GFX10PLUS-NEXT: s_mov_b32 s0, 0 1784; GFX10PLUS-NEXT: ; return to shader part epilog 1785 %result = shl i65 %value, 33 1786 ret i65 %result 1787} 1788 1789; FIXME: Argument lowering asserts 1790; define <2 x i65> @v_shl_v2i65(<2 x i65> %value, <2 x i65> %amount) { 1791; %result = shl <2 x i65> %value, %amount 1792; ret <2 x i65> %result 1793; } 1794 1795; define amdgpu_ps <2 x i65> @s_shl_v2i65(<2 x i65> inreg %value, <2 x i65> inreg %amount) { 1796; %result = shl <2 x i65> %value, %amount 1797; ret <2 x i65> %result 1798; } 1799