1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,GFX6 %s 3; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s 4; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s 5; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1010 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX10 %s 6; RUN: llc -global-isel -mtriple=amdgcn-amd-amdpal -mcpu=gfx1100 -amdgpu-enable-delay-alu=0 < %s | FileCheck -check-prefixes=GFX10PLUS,GFX11 %s 7 8define i8 @v_sext_inreg_i8_4(i8 %value) { 9; GCN-LABEL: v_sext_inreg_i8_4: 10; GCN: ; %bb.0: 11; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12; GCN-NEXT: v_bfe_i32 v0, v0, 0, 4 13; GCN-NEXT: s_setpc_b64 s[30:31] 14; 15; GFX10PLUS-LABEL: v_sext_inreg_i8_4: 16; GFX10PLUS: ; %bb.0: 17; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 18; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 4 19; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 20 %shl = shl i8 %value, 4 21 %ashr = ashr i8 %shl, 4 22 ret i8 %ashr 23} 24 25define i8 @v_sext_inreg_i8_7(i8 %value) { 26; GCN-LABEL: v_sext_inreg_i8_7: 27; GCN: ; %bb.0: 28; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 29; GCN-NEXT: v_bfe_i32 v0, v0, 0, 1 30; GCN-NEXT: s_setpc_b64 s[30:31] 31; 32; GFX10PLUS-LABEL: v_sext_inreg_i8_7: 33; GFX10PLUS: ; %bb.0: 34; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 35; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 1 36; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 37 %shl = shl i8 %value, 7 38 %ashr = ashr i8 %shl, 7 39 ret i8 %ashr 40} 41 42define amdgpu_ps i8 @s_sext_inreg_i8(i8 inreg %value) { 43; GFX6-LABEL: s_sext_inreg_i8: 44; GFX6: ; %bb.0: 45; GFX6-NEXT: s_bfe_i32 s0, s0, 0x50000 46; GFX6-NEXT: ; return to shader part epilog 47; 48; GFX8-LABEL: s_sext_inreg_i8: 49; GFX8: ; %bb.0: 50; GFX8-NEXT: s_lshl_b32 s0, s0, 3 51; GFX8-NEXT: s_sext_i32_i8 s0, s0 52; GFX8-NEXT: s_ashr_i32 s0, s0, 3 53; GFX8-NEXT: ; return to shader part epilog 54; 55; GFX9-LABEL: s_sext_inreg_i8: 56; GFX9: ; %bb.0: 57; GFX9-NEXT: s_lshl_b32 s0, s0, 3 58; GFX9-NEXT: s_sext_i32_i8 s0, s0 59; GFX9-NEXT: s_ashr_i32 s0, s0, 3 60; GFX9-NEXT: ; return to shader part epilog 61; 62; GFX10PLUS-LABEL: s_sext_inreg_i8: 63; GFX10PLUS: ; %bb.0: 64; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 3 65; GFX10PLUS-NEXT: s_sext_i32_i8 s0, s0 66; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 3 67; GFX10PLUS-NEXT: ; return to shader part epilog 68 %shl = shl i8 %value, 3 69 %ashr = ashr i8 %shl, 3 70 ret i8 %ashr 71} 72 73define amdgpu_ps i8 @s_sext_inreg_i8_6(i8 inreg %value) { 74; GFX6-LABEL: s_sext_inreg_i8_6: 75; GFX6: ; %bb.0: 76; GFX6-NEXT: s_bfe_i32 s0, s0, 0x20000 77; GFX6-NEXT: ; return to shader part epilog 78; 79; GFX8-LABEL: s_sext_inreg_i8_6: 80; GFX8: ; %bb.0: 81; GFX8-NEXT: s_lshl_b32 s0, s0, 6 82; GFX8-NEXT: s_sext_i32_i8 s0, s0 83; GFX8-NEXT: s_ashr_i32 s0, s0, 6 84; GFX8-NEXT: ; return to shader part epilog 85; 86; GFX9-LABEL: s_sext_inreg_i8_6: 87; GFX9: ; %bb.0: 88; GFX9-NEXT: s_lshl_b32 s0, s0, 6 89; GFX9-NEXT: s_sext_i32_i8 s0, s0 90; GFX9-NEXT: s_ashr_i32 s0, s0, 6 91; GFX9-NEXT: ; return to shader part epilog 92; 93; GFX10PLUS-LABEL: s_sext_inreg_i8_6: 94; GFX10PLUS: ; %bb.0: 95; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 6 96; GFX10PLUS-NEXT: s_sext_i32_i8 s0, s0 97; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 6 98; GFX10PLUS-NEXT: ; return to shader part epilog 99 %shl = shl i8 %value, 6 100 %ashr = ashr i8 %shl, 6 101 ret i8 %ashr 102} 103 104define i24 @v_sext_inreg_i24_12(i24 %value) { 105; GCN-LABEL: v_sext_inreg_i24_12: 106; GCN: ; %bb.0: 107; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 108; GCN-NEXT: v_bfe_i32 v0, v0, 0, 24 109; GCN-NEXT: v_ashrrev_i32_e32 v0, 12, v0 110; GCN-NEXT: s_setpc_b64 s[30:31] 111; 112; GFX10PLUS-LABEL: v_sext_inreg_i24_12: 113; GFX10PLUS: ; %bb.0: 114; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 115; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 24 116; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, 12, v0 117; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 118 %shl = shl i24 %value, 12 119 %ashr = ashr i24 %value, 12 120 ret i24 %ashr 121} 122 123define i24 @v_sext_inreg_i24_7(i24 %value) { 124; GCN-LABEL: v_sext_inreg_i24_7: 125; GCN: ; %bb.0: 126; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 127; GCN-NEXT: v_bfe_i32 v0, v0, 0, 17 128; GCN-NEXT: s_setpc_b64 s[30:31] 129; 130; GFX10PLUS-LABEL: v_sext_inreg_i24_7: 131; GFX10PLUS: ; %bb.0: 132; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 133; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 17 134; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 135 %shl = shl i24 %value, 7 136 %ashr = ashr i24 %shl, 7 137 ret i24 %ashr 138} 139 140define amdgpu_ps i24 @s_sext_inreg_i24_8(i24 inreg %value) { 141; GCN-LABEL: s_sext_inreg_i24_8: 142; GCN: ; %bb.0: 143; GCN-NEXT: s_sext_i32_i16 s0, s0 144; GCN-NEXT: ; return to shader part epilog 145; 146; GFX10PLUS-LABEL: s_sext_inreg_i24_8: 147; GFX10PLUS: ; %bb.0: 148; GFX10PLUS-NEXT: s_sext_i32_i16 s0, s0 149; GFX10PLUS-NEXT: ; return to shader part epilog 150 %shl = shl i24 %value, 8 151 %ashr = ashr i24 %shl, 8 152 ret i24 %ashr 153} 154 155define amdgpu_ps i24 @s_sext_inreg_i24_7(i24 inreg %value) { 156; GCN-LABEL: s_sext_inreg_i24_7: 157; GCN: ; %bb.0: 158; GCN-NEXT: s_bfe_i32 s0, s0, 0x110000 159; GCN-NEXT: ; return to shader part epilog 160; 161; GFX10PLUS-LABEL: s_sext_inreg_i24_7: 162; GFX10PLUS: ; %bb.0: 163; GFX10PLUS-NEXT: s_bfe_i32 s0, s0, 0x110000 164; GFX10PLUS-NEXT: ; return to shader part epilog 165 %shl = shl i24 %value, 7 166 %ashr = ashr i24 %shl, 7 167 ret i24 %ashr 168} 169 170define i32 @v_sext_inreg_i32_3(i32 %value) { 171; GCN-LABEL: v_sext_inreg_i32_3: 172; GCN: ; %bb.0: 173; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 174; GCN-NEXT: v_bfe_i32 v0, v0, 0, 29 175; GCN-NEXT: s_setpc_b64 s[30:31] 176; 177; GFX10PLUS-LABEL: v_sext_inreg_i32_3: 178; GFX10PLUS: ; %bb.0: 179; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 180; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 29 181; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 182 %shl = shl i32 %value, 3 183 %ashr = ashr i32 %shl, 3 184 ret i32 %ashr 185} 186 187define i32 @v_sext_inreg_i32_31(i32 %value) { 188; GCN-LABEL: v_sext_inreg_i32_31: 189; GCN: ; %bb.0: 190; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 191; GCN-NEXT: v_ashrrev_i32_e32 v0, 31, v0 192; GCN-NEXT: s_setpc_b64 s[30:31] 193; 194; GFX10PLUS-LABEL: v_sext_inreg_i32_31: 195; GFX10PLUS: ; %bb.0: 196; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 197; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v0, 31, v0 198; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 199 %shl = shl i32 %value, 31 200 %ashr = ashr i32 %value, 31 201 ret i32 %ashr 202} 203 204define amdgpu_ps i32 @s_sext_inreg_i32_2(i32 inreg %value) { 205; GCN-LABEL: s_sext_inreg_i32_2: 206; GCN: ; %bb.0: 207; GCN-NEXT: s_bfe_i32 s0, s0, 0x1e0000 208; GCN-NEXT: ; return to shader part epilog 209; 210; GFX10PLUS-LABEL: s_sext_inreg_i32_2: 211; GFX10PLUS: ; %bb.0: 212; GFX10PLUS-NEXT: s_bfe_i32 s0, s0, 0x1e0000 213; GFX10PLUS-NEXT: ; return to shader part epilog 214 %shl = shl i32 %value, 2 215 %ashr = ashr i32 %shl, 2 216 ret i32 %ashr 217} 218 219define amdgpu_ps i32 @s_sext_inreg_i32_31(i32 inreg %value) { 220; GCN-LABEL: s_sext_inreg_i32_31: 221; GCN: ; %bb.0: 222; GCN-NEXT: s_bfe_i32 s0, s0, 0x10000 223; GCN-NEXT: ; return to shader part epilog 224; 225; GFX10PLUS-LABEL: s_sext_inreg_i32_31: 226; GFX10PLUS: ; %bb.0: 227; GFX10PLUS-NEXT: s_bfe_i32 s0, s0, 0x10000 228; GFX10PLUS-NEXT: ; return to shader part epilog 229 %shl = shl i32 %value, 31 230 %ashr = ashr i32 %shl, 31 231 ret i32 %ashr 232} 233 234define <2 x i32> @v_sext_inreg_v2i32_14(<2 x i32> %value) { 235; GCN-LABEL: v_sext_inreg_v2i32_14: 236; GCN: ; %bb.0: 237; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 238; GCN-NEXT: v_bfe_i32 v0, v0, 0, 18 239; GCN-NEXT: v_bfe_i32 v1, v1, 0, 18 240; GCN-NEXT: s_setpc_b64 s[30:31] 241; 242; GFX10PLUS-LABEL: v_sext_inreg_v2i32_14: 243; GFX10PLUS: ; %bb.0: 244; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 245; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 18 246; GFX10PLUS-NEXT: v_bfe_i32 v1, v1, 0, 18 247; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 248 %shl = shl <2 x i32> %value, <i32 14, i32 14> 249 %ashr = ashr <2 x i32> %shl, <i32 14, i32 14> 250 ret <2 x i32> %ashr 251} 252 253define <2 x i32> @v_sext_inreg_v2i32_31(<2 x i32> %value) { 254; GCN-LABEL: v_sext_inreg_v2i32_31: 255; GCN: ; %bb.0: 256; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 257; GCN-NEXT: v_bfe_i32 v0, v0, 0, 1 258; GCN-NEXT: v_bfe_i32 v1, v1, 0, 1 259; GCN-NEXT: s_setpc_b64 s[30:31] 260; 261; GFX10PLUS-LABEL: v_sext_inreg_v2i32_31: 262; GFX10PLUS: ; %bb.0: 263; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 264; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 1 265; GFX10PLUS-NEXT: v_bfe_i32 v1, v1, 0, 1 266; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 267 %shl = shl <2 x i32> %value, <i32 31, i32 31> 268 %shr = ashr <2 x i32> %shl, <i32 31, i32 31> 269 ret <2 x i32> %shr 270} 271 272define amdgpu_ps <2 x i32> @s_sext_inreg_v2i32_22(<2 x i32> inreg %value) { 273; GCN-LABEL: s_sext_inreg_v2i32_22: 274; GCN: ; %bb.0: 275; GCN-NEXT: s_bfe_i32 s0, s0, 0xa0000 276; GCN-NEXT: s_bfe_i32 s1, s1, 0xa0000 277; GCN-NEXT: ; return to shader part epilog 278; 279; GFX10PLUS-LABEL: s_sext_inreg_v2i32_22: 280; GFX10PLUS: ; %bb.0: 281; GFX10PLUS-NEXT: s_bfe_i32 s0, s0, 0xa0000 282; GFX10PLUS-NEXT: s_bfe_i32 s1, s1, 0xa0000 283; GFX10PLUS-NEXT: ; return to shader part epilog 284 %shl = shl <2 x i32> %value, <i32 22, i32 22> 285 %ashr = ashr <2 x i32> %shl, <i32 22, i32 22> 286 ret <2 x i32> %ashr 287} 288 289define <3 x i32> @v_sext_inreg_v3i32_16(<3 x i32> %value, <3 x i32> %amount) { 290; GCN-LABEL: v_sext_inreg_v3i32_16: 291; GCN: ; %bb.0: 292; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 293; GCN-NEXT: v_bfe_i32 v0, v0, 0, 16 294; GCN-NEXT: v_bfe_i32 v1, v1, 0, 16 295; GCN-NEXT: v_bfe_i32 v2, v2, 0, 16 296; GCN-NEXT: s_setpc_b64 s[30:31] 297; 298; GFX10PLUS-LABEL: v_sext_inreg_v3i32_16: 299; GFX10PLUS: ; %bb.0: 300; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 301; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 16 302; GFX10PLUS-NEXT: v_bfe_i32 v1, v1, 0, 16 303; GFX10PLUS-NEXT: v_bfe_i32 v2, v2, 0, 16 304; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 305 %shl = shl <3 x i32> %value, <i32 16, i32 16, i32 16> 306 %ashr = ashr <3 x i32> %shl, <i32 16, i32 16, i32 16> 307 ret <3 x i32> %ashr 308} 309 310define amdgpu_ps <3 x i32> @s_sext_inreg_v3i32_22(<3 x i32> inreg %value) { 311; GCN-LABEL: s_sext_inreg_v3i32_22: 312; GCN: ; %bb.0: 313; GCN-NEXT: s_bfe_i32 s0, s0, 0xa0000 314; GCN-NEXT: s_bfe_i32 s1, s1, 0xa0000 315; GCN-NEXT: s_bfe_i32 s2, s2, 0xa0000 316; GCN-NEXT: ; return to shader part epilog 317; 318; GFX10PLUS-LABEL: s_sext_inreg_v3i32_22: 319; GFX10PLUS: ; %bb.0: 320; GFX10PLUS-NEXT: s_bfe_i32 s0, s0, 0xa0000 321; GFX10PLUS-NEXT: s_bfe_i32 s1, s1, 0xa0000 322; GFX10PLUS-NEXT: s_bfe_i32 s2, s2, 0xa0000 323; GFX10PLUS-NEXT: ; return to shader part epilog 324 %shl = shl <3 x i32> %value, <i32 22, i32 22, i32 22> 325 %ashr = ashr <3 x i32> %shl, <i32 22, i32 22, i32 22> 326 ret <3 x i32> %ashr 327} 328 329define <4 x i32> @v_sext_inreg_v4i32_6(<4 x i32> %value) { 330; GCN-LABEL: v_sext_inreg_v4i32_6: 331; GCN: ; %bb.0: 332; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 333; GCN-NEXT: v_bfe_i32 v0, v0, 0, 26 334; GCN-NEXT: v_bfe_i32 v1, v1, 0, 26 335; GCN-NEXT: v_bfe_i32 v2, v2, 0, 26 336; GCN-NEXT: v_bfe_i32 v3, v3, 0, 26 337; GCN-NEXT: s_setpc_b64 s[30:31] 338; 339; GFX10PLUS-LABEL: v_sext_inreg_v4i32_6: 340; GFX10PLUS: ; %bb.0: 341; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 342; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 26 343; GFX10PLUS-NEXT: v_bfe_i32 v1, v1, 0, 26 344; GFX10PLUS-NEXT: v_bfe_i32 v2, v2, 0, 26 345; GFX10PLUS-NEXT: v_bfe_i32 v3, v3, 0, 26 346; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 347 %shl = shl <4 x i32> %value, <i32 6, i32 6, i32 6, i32 6> 348 %ashr = ashr <4 x i32> %shl, <i32 6, i32 6, i32 6, i32 6> 349 ret <4 x i32> %ashr 350} 351 352define amdgpu_ps <4 x i32> @s_sext_inreg_v4i32_13(<4 x i32> inreg %value) { 353; GCN-LABEL: s_sext_inreg_v4i32_13: 354; GCN: ; %bb.0: 355; GCN-NEXT: s_bfe_i32 s0, s0, 0x130000 356; GCN-NEXT: s_bfe_i32 s1, s1, 0x130000 357; GCN-NEXT: s_bfe_i32 s2, s2, 0x130000 358; GCN-NEXT: s_bfe_i32 s3, s3, 0x130000 359; GCN-NEXT: ; return to shader part epilog 360; 361; GFX10PLUS-LABEL: s_sext_inreg_v4i32_13: 362; GFX10PLUS: ; %bb.0: 363; GFX10PLUS-NEXT: s_bfe_i32 s0, s0, 0x130000 364; GFX10PLUS-NEXT: s_bfe_i32 s1, s1, 0x130000 365; GFX10PLUS-NEXT: s_bfe_i32 s2, s2, 0x130000 366; GFX10PLUS-NEXT: s_bfe_i32 s3, s3, 0x130000 367; GFX10PLUS-NEXT: ; return to shader part epilog 368 %shl = shl <4 x i32> %value, <i32 13, i32 13, i32 13, i32 13> 369 %ashr = ashr <4 x i32> %shl, <i32 13, i32 13, i32 13, i32 13> 370 ret <4 x i32> %ashr 371} 372 373define <5 x i32> @v_sext_inreg_v5i32_30(<5 x i32> %value) { 374; GCN-LABEL: v_sext_inreg_v5i32_30: 375; GCN: ; %bb.0: 376; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 377; GCN-NEXT: v_bfe_i32 v0, v0, 0, 2 378; GCN-NEXT: v_bfe_i32 v1, v1, 0, 2 379; GCN-NEXT: v_bfe_i32 v2, v2, 0, 2 380; GCN-NEXT: v_bfe_i32 v3, v3, 0, 2 381; GCN-NEXT: v_bfe_i32 v4, v4, 0, 2 382; GCN-NEXT: s_setpc_b64 s[30:31] 383; 384; GFX10PLUS-LABEL: v_sext_inreg_v5i32_30: 385; GFX10PLUS: ; %bb.0: 386; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 387; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 2 388; GFX10PLUS-NEXT: v_bfe_i32 v1, v1, 0, 2 389; GFX10PLUS-NEXT: v_bfe_i32 v2, v2, 0, 2 390; GFX10PLUS-NEXT: v_bfe_i32 v3, v3, 0, 2 391; GFX10PLUS-NEXT: v_bfe_i32 v4, v4, 0, 2 392; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 393 %shl = shl <5 x i32> %value, <i32 30, i32 30, i32 30, i32 30, i32 30> 394 %ashr = ashr <5 x i32> %shl, <i32 30, i32 30, i32 30, i32 30, i32 30> 395 ret <5 x i32> %ashr 396} 397 398define amdgpu_ps <5 x i32> @s_sext_inreg_v5i32_19(<5 x i32> inreg %value) { 399; GCN-LABEL: s_sext_inreg_v5i32_19: 400; GCN: ; %bb.0: 401; GCN-NEXT: s_ashr_i32 s0, s0, 19 402; GCN-NEXT: s_ashr_i32 s1, s1, 19 403; GCN-NEXT: s_ashr_i32 s2, s2, 19 404; GCN-NEXT: s_ashr_i32 s3, s3, 19 405; GCN-NEXT: s_ashr_i32 s4, s4, 19 406; GCN-NEXT: ; return to shader part epilog 407; 408; GFX10PLUS-LABEL: s_sext_inreg_v5i32_19: 409; GFX10PLUS: ; %bb.0: 410; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 19 411; GFX10PLUS-NEXT: s_ashr_i32 s1, s1, 19 412; GFX10PLUS-NEXT: s_ashr_i32 s2, s2, 19 413; GFX10PLUS-NEXT: s_ashr_i32 s3, s3, 19 414; GFX10PLUS-NEXT: s_ashr_i32 s4, s4, 19 415; GFX10PLUS-NEXT: ; return to shader part epilog 416 %shl = shl <5 x i32> %value, <i32 19, i32 19, i32 19, i32 19, i32 19> 417 %ashr = ashr <5 x i32> %value, <i32 19, i32 19, i32 19, i32 19, i32 19> 418 ret <5 x i32> %ashr 419} 420 421define <16 x i32> @v_sext_inreg_v16i32_27(<16 x i32> %value) { 422; GCN-LABEL: v_sext_inreg_v16i32_27: 423; GCN: ; %bb.0: 424; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 425; GCN-NEXT: v_bfe_i32 v0, v0, 0, 5 426; GCN-NEXT: v_bfe_i32 v1, v1, 0, 5 427; GCN-NEXT: v_bfe_i32 v2, v2, 0, 5 428; GCN-NEXT: v_bfe_i32 v3, v3, 0, 5 429; GCN-NEXT: v_bfe_i32 v4, v4, 0, 5 430; GCN-NEXT: v_bfe_i32 v5, v5, 0, 5 431; GCN-NEXT: v_bfe_i32 v6, v6, 0, 5 432; GCN-NEXT: v_bfe_i32 v7, v7, 0, 5 433; GCN-NEXT: v_bfe_i32 v8, v8, 0, 5 434; GCN-NEXT: v_bfe_i32 v9, v9, 0, 5 435; GCN-NEXT: v_bfe_i32 v10, v10, 0, 5 436; GCN-NEXT: v_bfe_i32 v11, v11, 0, 5 437; GCN-NEXT: v_bfe_i32 v12, v12, 0, 5 438; GCN-NEXT: v_bfe_i32 v13, v13, 0, 5 439; GCN-NEXT: v_bfe_i32 v14, v14, 0, 5 440; GCN-NEXT: v_bfe_i32 v15, v15, 0, 5 441; GCN-NEXT: s_setpc_b64 s[30:31] 442; 443; GFX10PLUS-LABEL: v_sext_inreg_v16i32_27: 444; GFX10PLUS: ; %bb.0: 445; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 446; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 5 447; GFX10PLUS-NEXT: v_bfe_i32 v1, v1, 0, 5 448; GFX10PLUS-NEXT: v_bfe_i32 v2, v2, 0, 5 449; GFX10PLUS-NEXT: v_bfe_i32 v3, v3, 0, 5 450; GFX10PLUS-NEXT: v_bfe_i32 v4, v4, 0, 5 451; GFX10PLUS-NEXT: v_bfe_i32 v5, v5, 0, 5 452; GFX10PLUS-NEXT: v_bfe_i32 v6, v6, 0, 5 453; GFX10PLUS-NEXT: v_bfe_i32 v7, v7, 0, 5 454; GFX10PLUS-NEXT: v_bfe_i32 v8, v8, 0, 5 455; GFX10PLUS-NEXT: v_bfe_i32 v9, v9, 0, 5 456; GFX10PLUS-NEXT: v_bfe_i32 v10, v10, 0, 5 457; GFX10PLUS-NEXT: v_bfe_i32 v11, v11, 0, 5 458; GFX10PLUS-NEXT: v_bfe_i32 v12, v12, 0, 5 459; GFX10PLUS-NEXT: v_bfe_i32 v13, v13, 0, 5 460; GFX10PLUS-NEXT: v_bfe_i32 v14, v14, 0, 5 461; GFX10PLUS-NEXT: v_bfe_i32 v15, v15, 0, 5 462; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 463 %shl = shl <16 x i32> %value, <i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27> 464 %ashr = ashr <16 x i32> %shl, <i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27, i32 27> 465 ret <16 x i32> %ashr 466} 467 468define amdgpu_ps <16 x i32> @s_sext_inreg_v16i32_3(<16 x i32> inreg %value) { 469; GCN-LABEL: s_sext_inreg_v16i32_3: 470; GCN: ; %bb.0: 471; GCN-NEXT: s_bfe_i32 s0, s0, 0x1d0000 472; GCN-NEXT: s_bfe_i32 s1, s1, 0x1d0000 473; GCN-NEXT: s_bfe_i32 s2, s2, 0x1d0000 474; GCN-NEXT: s_bfe_i32 s3, s3, 0x1d0000 475; GCN-NEXT: s_bfe_i32 s4, s4, 0x1d0000 476; GCN-NEXT: s_bfe_i32 s5, s5, 0x1d0000 477; GCN-NEXT: s_bfe_i32 s6, s6, 0x1d0000 478; GCN-NEXT: s_bfe_i32 s7, s7, 0x1d0000 479; GCN-NEXT: s_bfe_i32 s8, s8, 0x1d0000 480; GCN-NEXT: s_bfe_i32 s9, s9, 0x1d0000 481; GCN-NEXT: s_bfe_i32 s10, s10, 0x1d0000 482; GCN-NEXT: s_bfe_i32 s11, s11, 0x1d0000 483; GCN-NEXT: s_bfe_i32 s12, s12, 0x1d0000 484; GCN-NEXT: s_bfe_i32 s13, s13, 0x1d0000 485; GCN-NEXT: s_bfe_i32 s14, s14, 0x1d0000 486; GCN-NEXT: s_bfe_i32 s15, s15, 0x1d0000 487; GCN-NEXT: ; return to shader part epilog 488; 489; GFX10PLUS-LABEL: s_sext_inreg_v16i32_3: 490; GFX10PLUS: ; %bb.0: 491; GFX10PLUS-NEXT: s_bfe_i32 s0, s0, 0x1d0000 492; GFX10PLUS-NEXT: s_bfe_i32 s1, s1, 0x1d0000 493; GFX10PLUS-NEXT: s_bfe_i32 s2, s2, 0x1d0000 494; GFX10PLUS-NEXT: s_bfe_i32 s3, s3, 0x1d0000 495; GFX10PLUS-NEXT: s_bfe_i32 s4, s4, 0x1d0000 496; GFX10PLUS-NEXT: s_bfe_i32 s5, s5, 0x1d0000 497; GFX10PLUS-NEXT: s_bfe_i32 s6, s6, 0x1d0000 498; GFX10PLUS-NEXT: s_bfe_i32 s7, s7, 0x1d0000 499; GFX10PLUS-NEXT: s_bfe_i32 s8, s8, 0x1d0000 500; GFX10PLUS-NEXT: s_bfe_i32 s9, s9, 0x1d0000 501; GFX10PLUS-NEXT: s_bfe_i32 s10, s10, 0x1d0000 502; GFX10PLUS-NEXT: s_bfe_i32 s11, s11, 0x1d0000 503; GFX10PLUS-NEXT: s_bfe_i32 s12, s12, 0x1d0000 504; GFX10PLUS-NEXT: s_bfe_i32 s13, s13, 0x1d0000 505; GFX10PLUS-NEXT: s_bfe_i32 s14, s14, 0x1d0000 506; GFX10PLUS-NEXT: s_bfe_i32 s15, s15, 0x1d0000 507; GFX10PLUS-NEXT: ; return to shader part epilog 508 %shl = shl <16 x i32> %value, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 509 %ashr = ashr <16 x i32> %shl, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3> 510 ret <16 x i32> %ashr 511} 512 513define i16 @v_sext_inreg_i16_4(i16 %value) { 514; GFX6-LABEL: v_sext_inreg_i16_4: 515; GFX6: ; %bb.0: 516; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 517; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 12 518; GFX6-NEXT: s_setpc_b64 s[30:31] 519; 520; GFX8-LABEL: v_sext_inreg_i16_4: 521; GFX8: ; %bb.0: 522; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 523; GFX8-NEXT: v_lshlrev_b16_e32 v0, 4, v0 524; GFX8-NEXT: v_ashrrev_i16_e32 v0, 4, v0 525; GFX8-NEXT: s_setpc_b64 s[30:31] 526; 527; GFX9-LABEL: v_sext_inreg_i16_4: 528; GFX9: ; %bb.0: 529; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 530; GFX9-NEXT: v_bfe_i32 v0, v0, 0, 12 531; GFX9-NEXT: s_setpc_b64 s[30:31] 532; 533; GFX10PLUS-LABEL: v_sext_inreg_i16_4: 534; GFX10PLUS: ; %bb.0: 535; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 536; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 12 537; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 538 %shl = shl i16 %value, 4 539 %ashr = ashr i16 %shl, 4 540 ret i16 %ashr 541} 542 543define i16 @v_sext_inreg_i16_15(i16 %value) { 544; GFX6-LABEL: v_sext_inreg_i16_15: 545; GFX6: ; %bb.0: 546; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 547; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 1 548; GFX6-NEXT: s_setpc_b64 s[30:31] 549; 550; GFX8-LABEL: v_sext_inreg_i16_15: 551; GFX8: ; %bb.0: 552; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 553; GFX8-NEXT: v_lshlrev_b16_e32 v0, 15, v0 554; GFX8-NEXT: v_ashrrev_i16_e32 v0, 15, v0 555; GFX8-NEXT: s_setpc_b64 s[30:31] 556; 557; GFX9-LABEL: v_sext_inreg_i16_15: 558; GFX9: ; %bb.0: 559; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 560; GFX9-NEXT: v_bfe_i32 v0, v0, 0, 1 561; GFX9-NEXT: s_setpc_b64 s[30:31] 562; 563; GFX10PLUS-LABEL: v_sext_inreg_i16_15: 564; GFX10PLUS: ; %bb.0: 565; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 566; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 1 567; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 568 %shl = shl i16 %value, 15 569 %ashr = ashr i16 %shl, 15 570 ret i16 %ashr 571} 572 573define amdgpu_ps i16 @s_sext_inreg_i16_9(i16 inreg %value) { 574; GFX6-LABEL: s_sext_inreg_i16_9: 575; GFX6: ; %bb.0: 576; GFX6-NEXT: s_bfe_i32 s0, s0, 0x70000 577; GFX6-NEXT: ; return to shader part epilog 578; 579; GFX8-LABEL: s_sext_inreg_i16_9: 580; GFX8: ; %bb.0: 581; GFX8-NEXT: s_lshl_b32 s0, s0, 9 582; GFX8-NEXT: s_sext_i32_i16 s0, s0 583; GFX8-NEXT: s_ashr_i32 s0, s0, 9 584; GFX8-NEXT: ; return to shader part epilog 585; 586; GFX9-LABEL: s_sext_inreg_i16_9: 587; GFX9: ; %bb.0: 588; GFX9-NEXT: s_lshl_b32 s0, s0, 9 589; GFX9-NEXT: s_sext_i32_i16 s0, s0 590; GFX9-NEXT: s_ashr_i32 s0, s0, 9 591; GFX9-NEXT: ; return to shader part epilog 592; 593; GFX10PLUS-LABEL: s_sext_inreg_i16_9: 594; GFX10PLUS: ; %bb.0: 595; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 9 596; GFX10PLUS-NEXT: s_sext_i32_i16 s0, s0 597; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 9 598; GFX10PLUS-NEXT: ; return to shader part epilog 599 %shl = shl i16 %value, 9 600 %ashr = ashr i16 %shl, 9 601 ret i16 %ashr 602} 603 604define amdgpu_ps i16 @s_sext_inreg_i16_15(i16 inreg %value) { 605; GFX6-LABEL: s_sext_inreg_i16_15: 606; GFX6: ; %bb.0: 607; GFX6-NEXT: s_bfe_i32 s0, s0, 0x10000 608; GFX6-NEXT: ; return to shader part epilog 609; 610; GFX8-LABEL: s_sext_inreg_i16_15: 611; GFX8: ; %bb.0: 612; GFX8-NEXT: s_lshl_b32 s0, s0, 15 613; GFX8-NEXT: s_sext_i32_i16 s0, s0 614; GFX8-NEXT: s_ashr_i32 s0, s0, 15 615; GFX8-NEXT: ; return to shader part epilog 616; 617; GFX9-LABEL: s_sext_inreg_i16_15: 618; GFX9: ; %bb.0: 619; GFX9-NEXT: s_lshl_b32 s0, s0, 15 620; GFX9-NEXT: s_sext_i32_i16 s0, s0 621; GFX9-NEXT: s_ashr_i32 s0, s0, 15 622; GFX9-NEXT: ; return to shader part epilog 623; 624; GFX10PLUS-LABEL: s_sext_inreg_i16_15: 625; GFX10PLUS: ; %bb.0: 626; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 15 627; GFX10PLUS-NEXT: s_sext_i32_i16 s0, s0 628; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 15 629; GFX10PLUS-NEXT: ; return to shader part epilog 630 %shl = shl i16 %value, 15 631 %ashr = ashr i16 %shl, 15 632 ret i16 %ashr 633} 634 635define <2 x i16> @v_sext_inreg_v2i16_8(<2 x i16> %value) { 636; GFX6-LABEL: v_sext_inreg_v2i16_8: 637; GFX6: ; %bb.0: 638; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 639; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 8 640; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 8 641; GFX6-NEXT: s_setpc_b64 s[30:31] 642; 643; GFX8-LABEL: v_sext_inreg_v2i16_8: 644; GFX8: ; %bb.0: 645; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 646; GFX8-NEXT: v_lshrrev_b32_e32 v1, 16, v0 647; GFX8-NEXT: v_lshlrev_b16_e32 v1, 8, v1 648; GFX8-NEXT: v_ashrrev_i16_e32 v1, 8, v1 649; GFX8-NEXT: v_lshlrev_b16_e32 v0, 8, v0 650; GFX8-NEXT: v_lshlrev_b32_e32 v1, 16, v1 651; GFX8-NEXT: v_or_b32_sdwa v0, sext(v0), v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD 652; GFX8-NEXT: s_setpc_b64 s[30:31] 653; 654; GFX9-LABEL: v_sext_inreg_v2i16_8: 655; GFX9: ; %bb.0: 656; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 657; GFX9-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1] 658; GFX9-NEXT: v_pk_ashrrev_i16 v0, 8, v0 op_sel_hi:[0,1] 659; GFX9-NEXT: s_setpc_b64 s[30:31] 660; 661; GFX10PLUS-LABEL: v_sext_inreg_v2i16_8: 662; GFX10PLUS: ; %bb.0: 663; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 664; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v0, 8, v0 op_sel_hi:[0,1] 665; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v0, 8, v0 op_sel_hi:[0,1] 666; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 667 %shl = shl <2 x i16> %value, <i16 8, i16 8> 668 %ashr = ashr <2 x i16> %shl, <i16 8, i16 8> 669 ret <2 x i16> %ashr 670} 671 672define <2 x i16> @v_sext_inreg_v2i16_15(<2 x i16> %value) { 673; GFX6-LABEL: v_sext_inreg_v2i16_15: 674; GFX6: ; %bb.0: 675; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 676; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 1 677; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 1 678; GFX6-NEXT: s_setpc_b64 s[30:31] 679; 680; GFX8-LABEL: v_sext_inreg_v2i16_15: 681; GFX8: ; %bb.0: 682; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 683; GFX8-NEXT: v_mov_b32_e32 v2, 15 684; GFX8-NEXT: v_lshlrev_b16_e32 v1, 15, v0 685; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 686; GFX8-NEXT: v_ashrrev_i16_e32 v1, 15, v1 687; GFX8-NEXT: v_ashrrev_i16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 688; GFX8-NEXT: v_or_b32_e32 v0, v1, v0 689; GFX8-NEXT: s_setpc_b64 s[30:31] 690; 691; GFX9-LABEL: v_sext_inreg_v2i16_15: 692; GFX9: ; %bb.0: 693; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 694; GFX9-NEXT: v_pk_lshlrev_b16 v0, 15, v0 op_sel_hi:[0,1] 695; GFX9-NEXT: v_pk_ashrrev_i16 v0, 15, v0 op_sel_hi:[0,1] 696; GFX9-NEXT: s_setpc_b64 s[30:31] 697; 698; GFX10PLUS-LABEL: v_sext_inreg_v2i16_15: 699; GFX10PLUS: ; %bb.0: 700; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 701; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v0, 15, v0 op_sel_hi:[0,1] 702; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v0, 15, v0 op_sel_hi:[0,1] 703; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 704 %shl = shl <2 x i16> %value, <i16 15, i16 15> 705 %ashr = ashr <2 x i16> %shl, <i16 15, i16 15> 706 ret <2 x i16> %ashr 707} 708 709define amdgpu_ps i32 @s_sext_inreg_v2i16_11(<2 x i16> inreg %value) { 710; GFX6-LABEL: s_sext_inreg_v2i16_11: 711; GFX6: ; %bb.0: 712; GFX6-NEXT: s_bfe_i32 s1, s1, 0x50000 713; GFX6-NEXT: s_bfe_i32 s0, s0, 0x50000 714; GFX6-NEXT: s_and_b32 s1, s1, 0xffff 715; GFX6-NEXT: s_and_b32 s0, s0, 0xffff 716; GFX6-NEXT: s_lshl_b32 s1, s1, 16 717; GFX6-NEXT: s_or_b32 s0, s0, s1 718; GFX6-NEXT: ; return to shader part epilog 719; 720; GFX8-LABEL: s_sext_inreg_v2i16_11: 721; GFX8: ; %bb.0: 722; GFX8-NEXT: s_lshr_b32 s1, s0, 16 723; GFX8-NEXT: s_lshl_b32 s0, s0, 11 724; GFX8-NEXT: s_lshl_b32 s1, s1, 11 725; GFX8-NEXT: s_sext_i32_i16 s0, s0 726; GFX8-NEXT: s_sext_i32_i16 s1, s1 727; GFX8-NEXT: s_ashr_i32 s0, s0, 11 728; GFX8-NEXT: s_ashr_i32 s1, s1, 11 729; GFX8-NEXT: s_lshl_b32 s1, s1, 16 730; GFX8-NEXT: s_and_b32 s0, s0, 0xffff 731; GFX8-NEXT: s_or_b32 s0, s1, s0 732; GFX8-NEXT: ; return to shader part epilog 733; 734; GFX9-LABEL: s_sext_inreg_v2i16_11: 735; GFX9: ; %bb.0: 736; GFX9-NEXT: s_lshr_b32 s1, s0, 16 737; GFX9-NEXT: s_lshl_b32 s0, s0, 0xb000b 738; GFX9-NEXT: s_lshl_b32 s1, s1, 11 739; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s1 740; GFX9-NEXT: s_sext_i32_i16 s1, s0 741; GFX9-NEXT: s_ashr_i32 s0, s0, 16 742; GFX9-NEXT: s_sext_i32_i16 s2, 0xb000b 743; GFX9-NEXT: s_ashr_i32 s1, s1, s2 744; GFX9-NEXT: s_ashr_i32 s0, s0, 11 745; GFX9-NEXT: s_pack_ll_b32_b16 s0, s1, s0 746; GFX9-NEXT: ; return to shader part epilog 747; 748; GFX10PLUS-LABEL: s_sext_inreg_v2i16_11: 749; GFX10PLUS: ; %bb.0: 750; GFX10PLUS-NEXT: s_lshr_b32 s1, s0, 16 751; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 0xb000b 752; GFX10PLUS-NEXT: s_lshl_b32 s1, s1, 11 753; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s0, s0, s1 754; GFX10PLUS-NEXT: s_sext_i32_i16 s1, 0xb000b 755; GFX10PLUS-NEXT: s_sext_i32_i16 s2, s0 756; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 16 757; GFX10PLUS-NEXT: s_ashr_i32 s1, s2, s1 758; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 11 759; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s0, s1, s0 760; GFX10PLUS-NEXT: ; return to shader part epilog 761 %shl = shl <2 x i16> %value, <i16 11, i16 11> 762 %ashr = ashr <2 x i16> %shl, <i16 11, i16 11> 763 %cast = bitcast <2 x i16> %ashr to i32 764 ret i32 %cast 765} 766 767; FIXME 768; define <3 x i16> @v_sext_inreg_v3i16_4(<3 x i16> %value) { 769; %shl = shl <3 x i16> %value, <i16 4, i16 4, i16 4> 770; %ashr = ashr <3 x i16> %shl, <i16 4, i16 4, i16 4> 771; ret <3 x i16> %ashr 772; } 773 774; define amdgpu_ps <3 x i16> @s_sext_inreg_v3i16_4(<3 x i16> inreg %value) { 775; %shl = shl <3 x i16> %value, <i16 4, i16 4, i16 4> 776; %ashr = ashr <3 x i16> %shl, <i16 4, i16 4, i16 4> 777; ret <3 x i16> %ashr 778; } 779 780define <2 x float> @v_sext_inreg_v4i16_3(<4 x i16> %value) { 781; GFX6-LABEL: v_sext_inreg_v4i16_3: 782; GFX6: ; %bb.0: 783; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 784; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 13 785; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 13 786; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 787; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 13 788; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 13 789; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 790; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 791; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 792; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v2 793; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v3 794; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2 795; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 796; GFX6-NEXT: s_setpc_b64 s[30:31] 797; 798; GFX8-LABEL: v_sext_inreg_v4i16_3: 799; GFX8: ; %bb.0: 800; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 801; GFX8-NEXT: v_mov_b32_e32 v3, 3 802; GFX8-NEXT: v_lshlrev_b16_e32 v2, 3, v0 803; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 804; GFX8-NEXT: v_lshlrev_b16_e32 v4, 3, v1 805; GFX8-NEXT: v_lshlrev_b16_sdwa v1, v3, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 806; GFX8-NEXT: v_ashrrev_i16_e32 v2, 3, v2 807; GFX8-NEXT: v_ashrrev_i16_sdwa v0, v3, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 808; GFX8-NEXT: v_or_b32_e32 v0, v2, v0 809; GFX8-NEXT: v_ashrrev_i16_e32 v2, 3, v4 810; GFX8-NEXT: v_ashrrev_i16_sdwa v1, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 811; GFX8-NEXT: v_or_b32_e32 v1, v2, v1 812; GFX8-NEXT: s_setpc_b64 s[30:31] 813; 814; GFX9-LABEL: v_sext_inreg_v4i16_3: 815; GFX9: ; %bb.0: 816; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 817; GFX9-NEXT: v_pk_lshlrev_b16 v0, 3, v0 op_sel_hi:[0,1] 818; GFX9-NEXT: v_pk_lshlrev_b16 v1, 3, v1 op_sel_hi:[0,1] 819; GFX9-NEXT: v_pk_ashrrev_i16 v0, 3, v0 op_sel_hi:[0,1] 820; GFX9-NEXT: v_pk_ashrrev_i16 v1, 3, v1 op_sel_hi:[0,1] 821; GFX9-NEXT: s_setpc_b64 s[30:31] 822; 823; GFX10PLUS-LABEL: v_sext_inreg_v4i16_3: 824; GFX10PLUS: ; %bb.0: 825; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 826; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v0, 3, v0 op_sel_hi:[0,1] 827; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v1, 3, v1 op_sel_hi:[0,1] 828; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v0, 3, v0 op_sel_hi:[0,1] 829; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v1, 3, v1 op_sel_hi:[0,1] 830; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 831 %shl = shl <4 x i16> %value, <i16 3, i16 3, i16 3, i16 3> 832 %ashr = ashr <4 x i16> %shl, <i16 3, i16 3, i16 3, i16 3> 833 %cast = bitcast <4 x i16> %ashr to <2 x float> 834 ret <2 x float> %cast 835} 836 837define amdgpu_ps <2 x i32> @s_sext_inreg_v4i16_14(<4 x i16> inreg %value) { 838; GFX6-LABEL: s_sext_inreg_v4i16_14: 839; GFX6: ; %bb.0: 840; GFX6-NEXT: s_bfe_i32 s1, s1, 0x20000 841; GFX6-NEXT: s_bfe_i32 s0, s0, 0x20000 842; GFX6-NEXT: s_and_b32 s1, s1, 0xffff 843; GFX6-NEXT: s_bfe_i32 s2, s2, 0x20000 844; GFX6-NEXT: s_bfe_i32 s3, s3, 0x20000 845; GFX6-NEXT: s_and_b32 s0, s0, 0xffff 846; GFX6-NEXT: s_lshl_b32 s1, s1, 16 847; GFX6-NEXT: s_or_b32 s0, s0, s1 848; GFX6-NEXT: s_and_b32 s1, s2, 0xffff 849; GFX6-NEXT: s_and_b32 s2, s3, 0xffff 850; GFX6-NEXT: s_lshl_b32 s2, s2, 16 851; GFX6-NEXT: s_or_b32 s1, s1, s2 852; GFX6-NEXT: ; return to shader part epilog 853; 854; GFX8-LABEL: s_sext_inreg_v4i16_14: 855; GFX8: ; %bb.0: 856; GFX8-NEXT: s_lshr_b32 s2, s0, 16 857; GFX8-NEXT: s_lshr_b32 s3, s1, 16 858; GFX8-NEXT: s_lshl_b32 s0, s0, 14 859; GFX8-NEXT: s_lshl_b32 s2, s2, 14 860; GFX8-NEXT: s_lshl_b32 s1, s1, 14 861; GFX8-NEXT: s_lshl_b32 s3, s3, 14 862; GFX8-NEXT: s_sext_i32_i16 s0, s0 863; GFX8-NEXT: s_sext_i32_i16 s2, s2 864; GFX8-NEXT: s_sext_i32_i16 s1, s1 865; GFX8-NEXT: s_sext_i32_i16 s3, s3 866; GFX8-NEXT: s_ashr_i32 s0, s0, 14 867; GFX8-NEXT: s_ashr_i32 s2, s2, 14 868; GFX8-NEXT: s_ashr_i32 s1, s1, 14 869; GFX8-NEXT: s_ashr_i32 s3, s3, 14 870; GFX8-NEXT: s_lshl_b32 s2, s2, 16 871; GFX8-NEXT: s_and_b32 s0, s0, 0xffff 872; GFX8-NEXT: s_or_b32 s0, s2, s0 873; GFX8-NEXT: s_lshl_b32 s2, s3, 16 874; GFX8-NEXT: s_and_b32 s1, s1, 0xffff 875; GFX8-NEXT: s_or_b32 s1, s2, s1 876; GFX8-NEXT: ; return to shader part epilog 877; 878; GFX9-LABEL: s_sext_inreg_v4i16_14: 879; GFX9: ; %bb.0: 880; GFX9-NEXT: s_lshr_b32 s2, s0, 16 881; GFX9-NEXT: s_lshl_b32 s0, s0, 0xe000e 882; GFX9-NEXT: s_lshl_b32 s2, s2, 14 883; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s2 884; GFX9-NEXT: s_sext_i32_i16 s2, s0 885; GFX9-NEXT: s_ashr_i32 s0, s0, 16 886; GFX9-NEXT: s_sext_i32_i16 s3, 0xe000e 887; GFX9-NEXT: s_ashr_i32 s2, s2, s3 888; GFX9-NEXT: s_ashr_i32 s0, s0, 14 889; GFX9-NEXT: s_pack_ll_b32_b16 s0, s2, s0 890; GFX9-NEXT: s_lshr_b32 s2, s1, 16 891; GFX9-NEXT: s_lshl_b32 s1, s1, 0xe000e 892; GFX9-NEXT: s_lshl_b32 s2, s2, 14 893; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s2 894; GFX9-NEXT: s_sext_i32_i16 s2, s1 895; GFX9-NEXT: s_ashr_i32 s1, s1, 16 896; GFX9-NEXT: s_ashr_i32 s2, s2, s3 897; GFX9-NEXT: s_ashr_i32 s1, s1, 14 898; GFX9-NEXT: s_pack_ll_b32_b16 s1, s2, s1 899; GFX9-NEXT: ; return to shader part epilog 900; 901; GFX10PLUS-LABEL: s_sext_inreg_v4i16_14: 902; GFX10PLUS: ; %bb.0: 903; GFX10PLUS-NEXT: s_lshr_b32 s2, s0, 16 904; GFX10PLUS-NEXT: s_lshr_b32 s4, s1, 16 905; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 0xe000e 906; GFX10PLUS-NEXT: s_lshl_b32 s2, s2, 14 907; GFX10PLUS-NEXT: s_lshl_b32 s1, s1, 0xe000e 908; GFX10PLUS-NEXT: s_lshl_b32 s4, s4, 14 909; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s0, s0, s2 910; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s1, s1, s4 911; GFX10PLUS-NEXT: s_sext_i32_i16 s2, 0xe000e 912; GFX10PLUS-NEXT: s_sext_i32_i16 s3, s0 913; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 16 914; GFX10PLUS-NEXT: s_sext_i32_i16 s4, s1 915; GFX10PLUS-NEXT: s_ashr_i32 s1, s1, 16 916; GFX10PLUS-NEXT: s_ashr_i32 s3, s3, s2 917; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 14 918; GFX10PLUS-NEXT: s_ashr_i32 s2, s4, s2 919; GFX10PLUS-NEXT: s_ashr_i32 s1, s1, 14 920; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s0, s3, s0 921; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s1, s2, s1 922; GFX10PLUS-NEXT: ; return to shader part epilog 923 %shl = shl <4 x i16> %value, <i16 14, i16 14, i16 14, i16 14> 924 %ashr = ashr <4 x i16> %shl, <i16 14, i16 14, i16 14, i16 14> 925 %cast = bitcast <4 x i16> %ashr to <2 x i32> 926 ret <2 x i32> %cast 927} 928 929; FIXME 930; define <5 x i16> @v_sext_inreg_v5i16(<5 x i16> %value) { 931; %shl = shl <5 x i16> %value, %amount 932; ret <5 x i16> %result 933; } 934 935; define amdgpu_ps <5 x i16> @s_sext_inreg_v5i16(<5 x i16> inreg %value) { 936; %shl = shl <5 x i16> %value, %amount 937; ret <5 x i16> %result 938; } 939 940; define <3 x float> @v_sext_inreg_v6i16(<6 x i16> %value) { 941; %shl = shl <6 x i16> %value, %amount 942; %cast = bitcast <6 x i16> %result to <3 x float> 943; ret <3 x float> %cast 944; } 945 946; define amdgpu_ps <3 x i32> @s_sext_inreg_v6i16(<6 x i16> inreg %value) { 947; %shl = shl <6 x i16> %value, %amount 948; %cast = bitcast <6 x i16> %result to <3 x i32> 949; ret <3 x i32> %cast 950; } 951 952define <4 x float> @v_sext_inreg_v8i16_11(<8 x i16> %value) { 953; GFX6-LABEL: v_sext_inreg_v8i16_11: 954; GFX6: ; %bb.0: 955; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 956; GFX6-NEXT: v_bfe_i32 v1, v1, 0, 5 957; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 5 958; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v1 959; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 5 960; GFX6-NEXT: v_bfe_i32 v3, v3, 0, 5 961; GFX6-NEXT: v_and_b32_e32 v0, 0xffff, v0 962; GFX6-NEXT: v_lshlrev_b32_e32 v1, 16, v1 963; GFX6-NEXT: v_bfe_i32 v5, v5, 0, 5 964; GFX6-NEXT: v_or_b32_e32 v0, v0, v1 965; GFX6-NEXT: v_and_b32_e32 v1, 0xffff, v2 966; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v3 967; GFX6-NEXT: v_bfe_i32 v4, v4, 0, 5 968; GFX6-NEXT: v_bfe_i32 v7, v7, 0, 5 969; GFX6-NEXT: v_lshlrev_b32_e32 v2, 16, v2 970; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v5 971; GFX6-NEXT: v_bfe_i32 v6, v6, 0, 5 972; GFX6-NEXT: v_or_b32_e32 v1, v1, v2 973; GFX6-NEXT: v_and_b32_e32 v2, 0xffff, v4 974; GFX6-NEXT: v_lshlrev_b32_e32 v3, 16, v3 975; GFX6-NEXT: v_and_b32_e32 v4, 0xffff, v7 976; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 977; GFX6-NEXT: v_and_b32_e32 v3, 0xffff, v6 978; GFX6-NEXT: v_lshlrev_b32_e32 v4, 16, v4 979; GFX6-NEXT: v_or_b32_e32 v3, v3, v4 980; GFX6-NEXT: s_setpc_b64 s[30:31] 981; 982; GFX8-LABEL: v_sext_inreg_v8i16_11: 983; GFX8: ; %bb.0: 984; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 985; GFX8-NEXT: v_mov_b32_e32 v5, 11 986; GFX8-NEXT: v_lshlrev_b16_e32 v4, 11, v0 987; GFX8-NEXT: v_lshlrev_b16_sdwa v0, v5, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 988; GFX8-NEXT: v_lshlrev_b16_e32 v6, 11, v1 989; GFX8-NEXT: v_lshlrev_b16_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 990; GFX8-NEXT: v_ashrrev_i16_e32 v4, 11, v4 991; GFX8-NEXT: v_ashrrev_i16_sdwa v0, v5, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 992; GFX8-NEXT: v_lshlrev_b16_e32 v7, 11, v2 993; GFX8-NEXT: v_lshlrev_b16_sdwa v2, v5, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 994; GFX8-NEXT: v_or_b32_e32 v0, v4, v0 995; GFX8-NEXT: v_ashrrev_i16_e32 v4, 11, v6 996; GFX8-NEXT: v_ashrrev_i16_sdwa v1, v5, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 997; GFX8-NEXT: v_lshlrev_b16_e32 v8, 11, v3 998; GFX8-NEXT: v_lshlrev_b16_sdwa v3, v5, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1 999; GFX8-NEXT: v_or_b32_e32 v1, v4, v1 1000; GFX8-NEXT: v_ashrrev_i16_e32 v4, 11, v7 1001; GFX8-NEXT: v_ashrrev_i16_sdwa v2, v5, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 1002; GFX8-NEXT: v_or_b32_e32 v2, v4, v2 1003; GFX8-NEXT: v_ashrrev_i16_e32 v4, 11, v8 1004; GFX8-NEXT: v_ashrrev_i16_sdwa v3, v5, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD 1005; GFX8-NEXT: v_or_b32_e32 v3, v4, v3 1006; GFX8-NEXT: s_setpc_b64 s[30:31] 1007; 1008; GFX9-LABEL: v_sext_inreg_v8i16_11: 1009; GFX9: ; %bb.0: 1010; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1011; GFX9-NEXT: v_pk_lshlrev_b16 v0, 11, v0 op_sel_hi:[0,1] 1012; GFX9-NEXT: v_pk_lshlrev_b16 v1, 11, v1 op_sel_hi:[0,1] 1013; GFX9-NEXT: v_pk_lshlrev_b16 v2, 11, v2 op_sel_hi:[0,1] 1014; GFX9-NEXT: v_pk_lshlrev_b16 v3, 11, v3 op_sel_hi:[0,1] 1015; GFX9-NEXT: v_pk_ashrrev_i16 v0, 11, v0 op_sel_hi:[0,1] 1016; GFX9-NEXT: v_pk_ashrrev_i16 v1, 11, v1 op_sel_hi:[0,1] 1017; GFX9-NEXT: v_pk_ashrrev_i16 v2, 11, v2 op_sel_hi:[0,1] 1018; GFX9-NEXT: v_pk_ashrrev_i16 v3, 11, v3 op_sel_hi:[0,1] 1019; GFX9-NEXT: s_setpc_b64 s[30:31] 1020; 1021; GFX10PLUS-LABEL: v_sext_inreg_v8i16_11: 1022; GFX10PLUS: ; %bb.0: 1023; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1024; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v0, 11, v0 op_sel_hi:[0,1] 1025; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v1, 11, v1 op_sel_hi:[0,1] 1026; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v2, 11, v2 op_sel_hi:[0,1] 1027; GFX10PLUS-NEXT: v_pk_lshlrev_b16 v3, 11, v3 op_sel_hi:[0,1] 1028; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v0, 11, v0 op_sel_hi:[0,1] 1029; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v1, 11, v1 op_sel_hi:[0,1] 1030; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v2, 11, v2 op_sel_hi:[0,1] 1031; GFX10PLUS-NEXT: v_pk_ashrrev_i16 v3, 11, v3 op_sel_hi:[0,1] 1032; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 1033 %shl = shl <8 x i16> %value, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11> 1034 %ashr = ashr <8 x i16> %shl, <i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11, i16 11> 1035 %cast = bitcast <8 x i16> %ashr to <4 x float> 1036 ret <4 x float> %cast 1037} 1038 1039define amdgpu_ps <4 x i32> @s_sext_inreg_v8i16_5(<8 x i16> inreg %value) { 1040; GFX6-LABEL: s_sext_inreg_v8i16_5: 1041; GFX6: ; %bb.0: 1042; GFX6-NEXT: s_bfe_i32 s1, s1, 0xb0000 1043; GFX6-NEXT: s_bfe_i32 s0, s0, 0xb0000 1044; GFX6-NEXT: s_and_b32 s1, s1, 0xffff 1045; GFX6-NEXT: s_bfe_i32 s2, s2, 0xb0000 1046; GFX6-NEXT: s_bfe_i32 s3, s3, 0xb0000 1047; GFX6-NEXT: s_and_b32 s0, s0, 0xffff 1048; GFX6-NEXT: s_lshl_b32 s1, s1, 16 1049; GFX6-NEXT: s_bfe_i32 s5, s5, 0xb0000 1050; GFX6-NEXT: s_or_b32 s0, s0, s1 1051; GFX6-NEXT: s_and_b32 s1, s2, 0xffff 1052; GFX6-NEXT: s_and_b32 s2, s3, 0xffff 1053; GFX6-NEXT: s_bfe_i32 s4, s4, 0xb0000 1054; GFX6-NEXT: s_bfe_i32 s7, s7, 0xb0000 1055; GFX6-NEXT: s_lshl_b32 s2, s2, 16 1056; GFX6-NEXT: s_and_b32 s3, s5, 0xffff 1057; GFX6-NEXT: s_bfe_i32 s6, s6, 0xb0000 1058; GFX6-NEXT: s_or_b32 s1, s1, s2 1059; GFX6-NEXT: s_and_b32 s2, s4, 0xffff 1060; GFX6-NEXT: s_lshl_b32 s3, s3, 16 1061; GFX6-NEXT: s_and_b32 s4, s7, 0xffff 1062; GFX6-NEXT: s_or_b32 s2, s2, s3 1063; GFX6-NEXT: s_and_b32 s3, s6, 0xffff 1064; GFX6-NEXT: s_lshl_b32 s4, s4, 16 1065; GFX6-NEXT: s_or_b32 s3, s3, s4 1066; GFX6-NEXT: ; return to shader part epilog 1067; 1068; GFX8-LABEL: s_sext_inreg_v8i16_5: 1069; GFX8: ; %bb.0: 1070; GFX8-NEXT: s_lshr_b32 s4, s0, 16 1071; GFX8-NEXT: s_lshr_b32 s5, s1, 16 1072; GFX8-NEXT: s_lshl_b32 s0, s0, 5 1073; GFX8-NEXT: s_lshl_b32 s4, s4, 5 1074; GFX8-NEXT: s_lshr_b32 s6, s2, 16 1075; GFX8-NEXT: s_lshl_b32 s1, s1, 5 1076; GFX8-NEXT: s_lshl_b32 s5, s5, 5 1077; GFX8-NEXT: s_sext_i32_i16 s0, s0 1078; GFX8-NEXT: s_sext_i32_i16 s4, s4 1079; GFX8-NEXT: s_lshr_b32 s7, s3, 16 1080; GFX8-NEXT: s_lshl_b32 s2, s2, 5 1081; GFX8-NEXT: s_lshl_b32 s6, s6, 5 1082; GFX8-NEXT: s_sext_i32_i16 s1, s1 1083; GFX8-NEXT: s_sext_i32_i16 s5, s5 1084; GFX8-NEXT: s_ashr_i32 s0, s0, 5 1085; GFX8-NEXT: s_ashr_i32 s4, s4, 5 1086; GFX8-NEXT: s_lshl_b32 s3, s3, 5 1087; GFX8-NEXT: s_lshl_b32 s7, s7, 5 1088; GFX8-NEXT: s_sext_i32_i16 s2, s2 1089; GFX8-NEXT: s_sext_i32_i16 s6, s6 1090; GFX8-NEXT: s_ashr_i32 s1, s1, 5 1091; GFX8-NEXT: s_ashr_i32 s5, s5, 5 1092; GFX8-NEXT: s_lshl_b32 s4, s4, 16 1093; GFX8-NEXT: s_and_b32 s0, s0, 0xffff 1094; GFX8-NEXT: s_sext_i32_i16 s3, s3 1095; GFX8-NEXT: s_sext_i32_i16 s7, s7 1096; GFX8-NEXT: s_ashr_i32 s2, s2, 5 1097; GFX8-NEXT: s_ashr_i32 s6, s6, 5 1098; GFX8-NEXT: s_or_b32 s0, s4, s0 1099; GFX8-NEXT: s_lshl_b32 s4, s5, 16 1100; GFX8-NEXT: s_and_b32 s1, s1, 0xffff 1101; GFX8-NEXT: s_ashr_i32 s3, s3, 5 1102; GFX8-NEXT: s_ashr_i32 s7, s7, 5 1103; GFX8-NEXT: s_or_b32 s1, s4, s1 1104; GFX8-NEXT: s_lshl_b32 s4, s6, 16 1105; GFX8-NEXT: s_and_b32 s2, s2, 0xffff 1106; GFX8-NEXT: s_or_b32 s2, s4, s2 1107; GFX8-NEXT: s_lshl_b32 s4, s7, 16 1108; GFX8-NEXT: s_and_b32 s3, s3, 0xffff 1109; GFX8-NEXT: s_or_b32 s3, s4, s3 1110; GFX8-NEXT: ; return to shader part epilog 1111; 1112; GFX9-LABEL: s_sext_inreg_v8i16_5: 1113; GFX9: ; %bb.0: 1114; GFX9-NEXT: s_lshr_b32 s4, s0, 16 1115; GFX9-NEXT: s_lshl_b32 s0, s0, 0x50005 1116; GFX9-NEXT: s_lshl_b32 s4, s4, 5 1117; GFX9-NEXT: s_pack_ll_b32_b16 s0, s0, s4 1118; GFX9-NEXT: s_sext_i32_i16 s4, s0 1119; GFX9-NEXT: s_ashr_i32 s0, s0, 16 1120; GFX9-NEXT: s_sext_i32_i16 s5, 0x50005 1121; GFX9-NEXT: s_ashr_i32 s4, s4, s5 1122; GFX9-NEXT: s_ashr_i32 s0, s0, 5 1123; GFX9-NEXT: s_pack_ll_b32_b16 s0, s4, s0 1124; GFX9-NEXT: s_lshr_b32 s4, s1, 16 1125; GFX9-NEXT: s_lshl_b32 s1, s1, 0x50005 1126; GFX9-NEXT: s_lshl_b32 s4, s4, 5 1127; GFX9-NEXT: s_pack_ll_b32_b16 s1, s1, s4 1128; GFX9-NEXT: s_sext_i32_i16 s4, s1 1129; GFX9-NEXT: s_ashr_i32 s1, s1, 16 1130; GFX9-NEXT: s_ashr_i32 s4, s4, s5 1131; GFX9-NEXT: s_ashr_i32 s1, s1, 5 1132; GFX9-NEXT: s_pack_ll_b32_b16 s1, s4, s1 1133; GFX9-NEXT: s_lshr_b32 s4, s2, 16 1134; GFX9-NEXT: s_lshl_b32 s2, s2, 0x50005 1135; GFX9-NEXT: s_lshl_b32 s4, s4, 5 1136; GFX9-NEXT: s_pack_ll_b32_b16 s2, s2, s4 1137; GFX9-NEXT: s_sext_i32_i16 s4, s2 1138; GFX9-NEXT: s_ashr_i32 s2, s2, 16 1139; GFX9-NEXT: s_ashr_i32 s4, s4, s5 1140; GFX9-NEXT: s_ashr_i32 s2, s2, 5 1141; GFX9-NEXT: s_pack_ll_b32_b16 s2, s4, s2 1142; GFX9-NEXT: s_lshr_b32 s4, s3, 16 1143; GFX9-NEXT: s_lshl_b32 s3, s3, 0x50005 1144; GFX9-NEXT: s_lshl_b32 s4, s4, 5 1145; GFX9-NEXT: s_pack_ll_b32_b16 s3, s3, s4 1146; GFX9-NEXT: s_sext_i32_i16 s4, s3 1147; GFX9-NEXT: s_ashr_i32 s3, s3, 16 1148; GFX9-NEXT: s_ashr_i32 s4, s4, s5 1149; GFX9-NEXT: s_ashr_i32 s3, s3, 5 1150; GFX9-NEXT: s_pack_ll_b32_b16 s3, s4, s3 1151; GFX9-NEXT: ; return to shader part epilog 1152; 1153; GFX10PLUS-LABEL: s_sext_inreg_v8i16_5: 1154; GFX10PLUS: ; %bb.0: 1155; GFX10PLUS-NEXT: s_lshr_b32 s4, s0, 16 1156; GFX10PLUS-NEXT: s_lshr_b32 s6, s1, 16 1157; GFX10PLUS-NEXT: s_lshl_b32 s0, s0, 0x50005 1158; GFX10PLUS-NEXT: s_lshl_b32 s4, s4, 5 1159; GFX10PLUS-NEXT: s_lshl_b32 s1, s1, 0x50005 1160; GFX10PLUS-NEXT: s_lshl_b32 s6, s6, 5 1161; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s0, s0, s4 1162; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s1, s1, s6 1163; GFX10PLUS-NEXT: s_lshr_b32 s6, s2, 16 1164; GFX10PLUS-NEXT: s_sext_i32_i16 s4, s0 1165; GFX10PLUS-NEXT: s_sext_i32_i16 s5, 0x50005 1166; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 16 1167; GFX10PLUS-NEXT: s_lshl_b32 s2, s2, 0x50005 1168; GFX10PLUS-NEXT: s_lshl_b32 s6, s6, 5 1169; GFX10PLUS-NEXT: s_ashr_i32 s4, s4, s5 1170; GFX10PLUS-NEXT: s_ashr_i32 s0, s0, 5 1171; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s2, s2, s6 1172; GFX10PLUS-NEXT: s_lshr_b32 s6, s3, 16 1173; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s0, s4, s0 1174; GFX10PLUS-NEXT: s_sext_i32_i16 s4, s1 1175; GFX10PLUS-NEXT: s_ashr_i32 s1, s1, 16 1176; GFX10PLUS-NEXT: s_lshl_b32 s3, s3, 0x50005 1177; GFX10PLUS-NEXT: s_lshl_b32 s6, s6, 5 1178; GFX10PLUS-NEXT: s_ashr_i32 s4, s4, s5 1179; GFX10PLUS-NEXT: s_ashr_i32 s1, s1, 5 1180; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s3, s3, s6 1181; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s1, s4, s1 1182; GFX10PLUS-NEXT: s_sext_i32_i16 s4, s2 1183; GFX10PLUS-NEXT: s_ashr_i32 s2, s2, 16 1184; GFX10PLUS-NEXT: s_sext_i32_i16 s6, s3 1185; GFX10PLUS-NEXT: s_ashr_i32 s3, s3, 16 1186; GFX10PLUS-NEXT: s_ashr_i32 s4, s4, s5 1187; GFX10PLUS-NEXT: s_ashr_i32 s2, s2, 5 1188; GFX10PLUS-NEXT: s_ashr_i32 s5, s6, s5 1189; GFX10PLUS-NEXT: s_ashr_i32 s3, s3, 5 1190; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s2, s4, s2 1191; GFX10PLUS-NEXT: s_pack_ll_b32_b16 s3, s5, s3 1192; GFX10PLUS-NEXT: ; return to shader part epilog 1193 %shl = shl <8 x i16> %value, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5> 1194 %ashr = ashr <8 x i16> %shl, <i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5, i16 5> 1195 %cast = bitcast <8 x i16> %ashr to <4 x i32> 1196 ret <4 x i32> %cast 1197} 1198 1199define i64 @v_sext_inreg_i64_23(i64 %value) { 1200; GCN-LABEL: v_sext_inreg_i64_23: 1201; GCN: ; %bb.0: 1202; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1203; GCN-NEXT: v_bfe_i32 v1, v0, 0, 9 1204; GCN-NEXT: s_setpc_b64 s[30:31] 1205; 1206; GFX10PLUS-LABEL: v_sext_inreg_i64_23: 1207; GFX10PLUS: ; %bb.0: 1208; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1209; GFX10PLUS-NEXT: v_bfe_i32 v1, v0, 0, 9 1210; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 1211 %shl = shl i64 %value, 23 1212 %ashr = ashr i64 %shl, 23 1213 ret i64 %ashr 1214} 1215 1216define i64 @v_sext_inreg_i64_40(i64 %value) { 1217; GCN-LABEL: v_sext_inreg_i64_40: 1218; GCN: ; %bb.0: 1219; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1220; GCN-NEXT: v_bfe_i32 v0, v0, 0, 24 1221; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0 1222; GCN-NEXT: s_setpc_b64 s[30:31] 1223; 1224; GFX10PLUS-LABEL: v_sext_inreg_i64_40: 1225; GFX10PLUS: ; %bb.0: 1226; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1227; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 24 1228; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, 31, v0 1229; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 1230 %shl = shl i64 %value, 40 1231 %ashr = ashr i64 %shl, 40 1232 ret i64 %ashr 1233} 1234 1235define i64 @v_sext_inreg_i64_63(i64 %value) { 1236; GCN-LABEL: v_sext_inreg_i64_63: 1237; GCN: ; %bb.0: 1238; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1239; GCN-NEXT: v_bfe_i32 v0, v0, 0, 1 1240; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0 1241; GCN-NEXT: s_setpc_b64 s[30:31] 1242; 1243; GFX10PLUS-LABEL: v_sext_inreg_i64_63: 1244; GFX10PLUS: ; %bb.0: 1245; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1246; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 1 1247; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, 31, v0 1248; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 1249 %shl = shl i64 %value, 63 1250 %ashr = ashr i64 %shl, 63 1251 ret i64 %ashr 1252} 1253 1254define i64 @v_sext_inreg_i64_33(i64 %value) { 1255; GCN-LABEL: v_sext_inreg_i64_33: 1256; GCN: ; %bb.0: 1257; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1258; GCN-NEXT: v_bfe_i32 v0, v0, 0, 31 1259; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0 1260; GCN-NEXT: s_setpc_b64 s[30:31] 1261; 1262; GFX10PLUS-LABEL: v_sext_inreg_i64_33: 1263; GFX10PLUS: ; %bb.0: 1264; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1265; GFX10PLUS-NEXT: v_bfe_i32 v0, v0, 0, 31 1266; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, 31, v0 1267; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 1268 %shl = shl i64 %value, 33 1269 %ashr = ashr i64 %shl, 33 1270 ret i64 %ashr 1271} 1272 1273define i64 @v_sext_inreg_i64_32(i64 %value) { 1274; GCN-LABEL: v_sext_inreg_i64_32: 1275; GCN: ; %bb.0: 1276; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1277; GCN-NEXT: v_mov_b32_e32 v0, v1 1278; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0 1279; GCN-NEXT: s_setpc_b64 s[30:31] 1280; 1281; GFX10PLUS-LABEL: v_sext_inreg_i64_32: 1282; GFX10PLUS: ; %bb.0: 1283; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1284; GFX10PLUS-NEXT: v_mov_b32_e32 v0, v1 1285; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v1, 31, v0 1286; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 1287 %shl = shl i64 %value, 32 1288 %ashr = ashr i64 %value, 32 1289 ret i64 %ashr 1290} 1291 1292define i64 @v_sext_inreg_i64_31(i64 %value) { 1293; GCN-LABEL: v_sext_inreg_i64_31: 1294; GCN: ; %bb.0: 1295; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1296; GCN-NEXT: v_bfe_i32 v1, v0, 0, 1 1297; GCN-NEXT: s_setpc_b64 s[30:31] 1298; 1299; GFX10PLUS-LABEL: v_sext_inreg_i64_31: 1300; GFX10PLUS: ; %bb.0: 1301; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1302; GFX10PLUS-NEXT: v_bfe_i32 v1, v0, 0, 1 1303; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 1304 %shl = shl i64 %value, 31 1305 %ashr = ashr i64 %shl, 31 1306 ret i64 %ashr 1307} 1308 1309define amdgpu_ps i64 @s_sext_inreg_i64_3(i64 inreg %value) { 1310; GCN-LABEL: s_sext_inreg_i64_3: 1311; GCN: ; %bb.0: 1312; GCN-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x3d0000 1313; GCN-NEXT: ; return to shader part epilog 1314; 1315; GFX10PLUS-LABEL: s_sext_inreg_i64_3: 1316; GFX10PLUS: ; %bb.0: 1317; GFX10PLUS-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x3d0000 1318; GFX10PLUS-NEXT: ; return to shader part epilog 1319 %shl = shl i64 %value, 3 1320 %ashr = ashr i64 %shl, 3 1321 ret i64 %ashr 1322} 1323 1324define amdgpu_ps i64 @s_sext_inreg_i64_63(i64 inreg %value) { 1325; GCN-LABEL: s_sext_inreg_i64_63: 1326; GCN: ; %bb.0: 1327; GCN-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x10000 1328; GCN-NEXT: ; return to shader part epilog 1329; 1330; GFX10PLUS-LABEL: s_sext_inreg_i64_63: 1331; GFX10PLUS: ; %bb.0: 1332; GFX10PLUS-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x10000 1333; GFX10PLUS-NEXT: ; return to shader part epilog 1334 %shl = shl i64 %value, 63 1335 %ashr = ashr i64 %shl, 63 1336 ret i64 %ashr 1337} 1338 1339define amdgpu_ps i64 @s_sext_inreg_i64_33(i64 inreg %value) { 1340; GCN-LABEL: s_sext_inreg_i64_33: 1341; GCN: ; %bb.0: 1342; GCN-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x1f0000 1343; GCN-NEXT: ; return to shader part epilog 1344; 1345; GFX10PLUS-LABEL: s_sext_inreg_i64_33: 1346; GFX10PLUS: ; %bb.0: 1347; GFX10PLUS-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x1f0000 1348; GFX10PLUS-NEXT: ; return to shader part epilog 1349 %shl = shl i64 %value, 33 1350 %ashr = ashr i64 %shl, 33 1351 ret i64 %ashr 1352} 1353 1354define amdgpu_ps i64 @s_sext_inreg_i64_32(i64 inreg %value) { 1355; GCN-LABEL: s_sext_inreg_i64_32: 1356; GCN: ; %bb.0: 1357; GCN-NEXT: s_ashr_i32 s1, s0, 31 1358; GCN-NEXT: ; return to shader part epilog 1359; 1360; GFX10PLUS-LABEL: s_sext_inreg_i64_32: 1361; GFX10PLUS: ; %bb.0: 1362; GFX10PLUS-NEXT: s_ashr_i32 s1, s0, 31 1363; GFX10PLUS-NEXT: ; return to shader part epilog 1364 %shl = shl i64 %value, 32 1365 %ashr = ashr i64 %shl, 32 1366 ret i64 %ashr 1367} 1368 1369define amdgpu_ps i64 @s_sext_inreg_i64_31(i64 inreg %value) { 1370; GCN-LABEL: s_sext_inreg_i64_31: 1371; GCN: ; %bb.0: 1372; GCN-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x210000 1373; GCN-NEXT: ; return to shader part epilog 1374; 1375; GFX10PLUS-LABEL: s_sext_inreg_i64_31: 1376; GFX10PLUS: ; %bb.0: 1377; GFX10PLUS-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x210000 1378; GFX10PLUS-NEXT: ; return to shader part epilog 1379 %shl = shl i64 %value, 31 1380 %ashr = ashr i64 %shl, 31 1381 ret i64 %ashr 1382} 1383 1384define <2 x i64> @v_sext_inreg_v2i64_16(<2 x i64> %value) { 1385; GCN-LABEL: v_sext_inreg_v2i64_16: 1386; GCN: ; %bb.0: 1387; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1388; GCN-NEXT: v_bfe_i32 v1, v0, 0, 16 1389; GCN-NEXT: v_bfe_i32 v3, v2, 0, 16 1390; GCN-NEXT: s_setpc_b64 s[30:31] 1391; 1392; GFX10PLUS-LABEL: v_sext_inreg_v2i64_16: 1393; GFX10PLUS: ; %bb.0: 1394; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1395; GFX10PLUS-NEXT: v_bfe_i32 v1, v0, 0, 16 1396; GFX10PLUS-NEXT: v_bfe_i32 v3, v2, 0, 16 1397; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 1398 %shl = shl <2 x i64> %value, <i64 16, i64 16> 1399 %ashr = ashr <2 x i64> %shl, <i64 16, i64 16> 1400 ret <2 x i64> %ashr 1401} 1402 1403define <2 x i64> @v_sext_inreg_v2i64_31(<2 x i64> %value) { 1404; GCN-LABEL: v_sext_inreg_v2i64_31: 1405; GCN: ; %bb.0: 1406; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1407; GCN-NEXT: v_bfe_i32 v1, v0, 0, 1 1408; GCN-NEXT: v_bfe_i32 v3, v2, 0, 1 1409; GCN-NEXT: s_setpc_b64 s[30:31] 1410; 1411; GFX10PLUS-LABEL: v_sext_inreg_v2i64_31: 1412; GFX10PLUS: ; %bb.0: 1413; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1414; GFX10PLUS-NEXT: v_bfe_i32 v1, v0, 0, 1 1415; GFX10PLUS-NEXT: v_bfe_i32 v3, v2, 0, 1 1416; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 1417 %shl = shl <2 x i64> %value, <i64 31, i64 31> 1418 %ashr = ashr <2 x i64> %shl, <i64 31, i64 31> 1419 ret <2 x i64> %ashr 1420} 1421 1422define amdgpu_ps <2 x i64> @s_sext_inreg_v2i64_30(<2 x i64> inreg %value) { 1423; GCN-LABEL: s_sext_inreg_v2i64_30: 1424; GCN: ; %bb.0: 1425; GCN-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x220000 1426; GCN-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x220000 1427; GCN-NEXT: ; return to shader part epilog 1428; 1429; GFX10PLUS-LABEL: s_sext_inreg_v2i64_30: 1430; GFX10PLUS: ; %bb.0: 1431; GFX10PLUS-NEXT: s_bfe_i64 s[0:1], s[0:1], 0x220000 1432; GFX10PLUS-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x220000 1433; GFX10PLUS-NEXT: ; return to shader part epilog 1434 %shl = shl <2 x i64> %value, <i64 30, i64 30> 1435 %ashr = ashr <2 x i64> %shl, <i64 30, i64 30> 1436 ret <2 x i64> %ashr 1437} 1438 1439define i65 @v_sext_inreg_i65_22(i65 %value) { 1440; GFX6-LABEL: v_sext_inreg_i65_22: 1441; GFX6: ; %bb.0: 1442; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1443; GFX6-NEXT: v_lshl_b64 v[2:3], v[2:3], 22 1444; GFX6-NEXT: v_lshrrev_b32_e32 v3, 10, v1 1445; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 1446; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 1 1447; GFX6-NEXT: v_lshr_b64 v[0:1], v[0:1], 0 1448; GFX6-NEXT: v_ashrrev_i32_e32 v3, 31, v2 1449; GFX6-NEXT: v_bfe_u32 v1, v1, 0, 10 1450; GFX6-NEXT: v_lshlrev_b32_e32 v4, 10, v2 1451; GFX6-NEXT: v_ashr_i64 v[2:3], v[2:3], 22 1452; GFX6-NEXT: v_or_b32_e32 v1, v1, v4 1453; GFX6-NEXT: s_setpc_b64 s[30:31] 1454; 1455; GFX8-LABEL: v_sext_inreg_i65_22: 1456; GFX8: ; %bb.0: 1457; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1458; GFX8-NEXT: v_lshlrev_b64 v[2:3], 22, v[2:3] 1459; GFX8-NEXT: v_lshrrev_b32_e32 v3, 10, v1 1460; GFX8-NEXT: v_or_b32_e32 v2, v2, v3 1461; GFX8-NEXT: v_bfe_i32 v2, v2, 0, 1 1462; GFX8-NEXT: v_lshrrev_b64 v[0:1], 0, v[0:1] 1463; GFX8-NEXT: v_ashrrev_i32_e32 v3, 31, v2 1464; GFX8-NEXT: v_bfe_u32 v1, v1, 0, 10 1465; GFX8-NEXT: v_lshlrev_b32_e32 v4, 10, v2 1466; GFX8-NEXT: v_ashrrev_i64 v[2:3], 22, v[2:3] 1467; GFX8-NEXT: v_or_b32_e32 v1, v1, v4 1468; GFX8-NEXT: s_setpc_b64 s[30:31] 1469; 1470; GFX9-LABEL: v_sext_inreg_i65_22: 1471; GFX9: ; %bb.0: 1472; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1473; GFX9-NEXT: v_lshlrev_b64 v[2:3], 22, v[2:3] 1474; GFX9-NEXT: v_lshrrev_b32_e32 v3, 10, v1 1475; GFX9-NEXT: v_or_b32_e32 v2, v2, v3 1476; GFX9-NEXT: v_lshrrev_b64 v[0:1], 0, v[0:1] 1477; GFX9-NEXT: v_bfe_i32 v2, v2, 0, 1 1478; GFX9-NEXT: v_ashrrev_i32_e32 v3, 31, v2 1479; GFX9-NEXT: v_bfe_u32 v1, v1, 0, 10 1480; GFX9-NEXT: v_lshl_or_b32 v1, v2, 10, v1 1481; GFX9-NEXT: v_ashrrev_i64 v[2:3], 22, v[2:3] 1482; GFX9-NEXT: s_setpc_b64 s[30:31] 1483; 1484; GFX10PLUS-LABEL: v_sext_inreg_i65_22: 1485; GFX10PLUS: ; %bb.0: 1486; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1487; GFX10PLUS-NEXT: v_lshlrev_b64 v[2:3], 22, v[2:3] 1488; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v3, 10, v1 1489; GFX10PLUS-NEXT: v_lshrrev_b64 v[0:1], 0, v[0:1] 1490; GFX10PLUS-NEXT: v_or_b32_e32 v2, v2, v3 1491; GFX10PLUS-NEXT: v_bfe_u32 v1, v1, 0, 10 1492; GFX10PLUS-NEXT: v_bfe_i32 v2, v2, 0, 1 1493; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v3, 31, v2 1494; GFX10PLUS-NEXT: v_lshl_or_b32 v1, v2, 10, v1 1495; GFX10PLUS-NEXT: v_ashrrev_i64 v[2:3], 22, v[2:3] 1496; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 1497 %shl = shl i65 %value, 22 1498 %ashr = ashr i65 %shl, 22 1499 ret i65 %ashr 1500} 1501 1502define i65 @v_sext_inreg_i65_33(i65 %value) { 1503; GFX6-LABEL: v_sext_inreg_i65_33: 1504; GFX6: ; %bb.0: 1505; GFX6-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1506; GFX6-NEXT: v_mov_b32_e32 v3, v1 1507; GFX6-NEXT: v_bfe_i32 v1, v2, 0, 1 1508; GFX6-NEXT: v_ashrrev_i32_e32 v2, 31, v1 1509; GFX6-NEXT: v_lshl_b64 v[0:1], v[1:2], 31 1510; GFX6-NEXT: v_lshrrev_b32_e32 v3, 1, v3 1511; GFX6-NEXT: v_or_b32_e32 v0, v3, v0 1512; GFX6-NEXT: v_ashrrev_i32_e32 v2, 1, v2 1513; GFX6-NEXT: s_setpc_b64 s[30:31] 1514; 1515; GFX8-LABEL: v_sext_inreg_i65_33: 1516; GFX8: ; %bb.0: 1517; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1518; GFX8-NEXT: v_mov_b32_e32 v3, v1 1519; GFX8-NEXT: v_bfe_i32 v1, v2, 0, 1 1520; GFX8-NEXT: v_ashrrev_i32_e32 v2, 31, v1 1521; GFX8-NEXT: v_lshlrev_b64 v[0:1], 31, v[1:2] 1522; GFX8-NEXT: v_lshrrev_b32_e32 v3, 1, v3 1523; GFX8-NEXT: v_or_b32_e32 v0, v3, v0 1524; GFX8-NEXT: v_ashrrev_i32_e32 v2, 1, v2 1525; GFX8-NEXT: s_setpc_b64 s[30:31] 1526; 1527; GFX9-LABEL: v_sext_inreg_i65_33: 1528; GFX9: ; %bb.0: 1529; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1530; GFX9-NEXT: v_mov_b32_e32 v3, v1 1531; GFX9-NEXT: v_bfe_i32 v1, v2, 0, 1 1532; GFX9-NEXT: v_ashrrev_i32_e32 v2, 31, v1 1533; GFX9-NEXT: v_lshlrev_b64 v[0:1], 31, v[1:2] 1534; GFX9-NEXT: v_lshrrev_b32_e32 v3, 1, v3 1535; GFX9-NEXT: v_or_b32_e32 v0, v3, v0 1536; GFX9-NEXT: v_ashrrev_i32_e32 v2, 1, v2 1537; GFX9-NEXT: s_setpc_b64 s[30:31] 1538; 1539; GFX10PLUS-LABEL: v_sext_inreg_i65_33: 1540; GFX10PLUS: ; %bb.0: 1541; GFX10PLUS-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1542; GFX10PLUS-NEXT: v_mov_b32_e32 v3, v1 1543; GFX10PLUS-NEXT: v_bfe_i32 v1, v2, 0, 1 1544; GFX10PLUS-NEXT: v_lshrrev_b32_e32 v3, 1, v3 1545; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v2, 31, v1 1546; GFX10PLUS-NEXT: v_lshlrev_b64 v[0:1], 31, v[1:2] 1547; GFX10PLUS-NEXT: v_ashrrev_i32_e32 v2, 1, v2 1548; GFX10PLUS-NEXT: v_or_b32_e32 v0, v3, v0 1549; GFX10PLUS-NEXT: s_setpc_b64 s[30:31] 1550 %shl = shl i65 %value, 33 1551 %ashr = ashr i65 %value, 33 1552 ret i65 %ashr 1553} 1554 1555define amdgpu_ps i65 @s_sext_inreg_i65_18(i65 inreg %value) { 1556; GCN-LABEL: s_sext_inreg_i65_18: 1557; GCN: ; %bb.0: 1558; GCN-NEXT: s_lshl_b64 s[2:3], s[2:3], 18 1559; GCN-NEXT: s_lshr_b32 s4, s1, 14 1560; GCN-NEXT: s_mov_b32 s5, 0 1561; GCN-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] 1562; GCN-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000 1563; GCN-NEXT: s_bfe_u64 s[0:1], s[0:1], 0x2e0000 1564; GCN-NEXT: s_lshl_b32 s7, s2, 14 1565; GCN-NEXT: s_mov_b32 s6, s5 1566; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[6:7] 1567; GCN-NEXT: s_ashr_i64 s[2:3], s[2:3], 18 1568; GCN-NEXT: ; return to shader part epilog 1569; 1570; GFX10PLUS-LABEL: s_sext_inreg_i65_18: 1571; GFX10PLUS: ; %bb.0: 1572; GFX10PLUS-NEXT: s_lshl_b64 s[2:3], s[2:3], 18 1573; GFX10PLUS-NEXT: s_lshr_b32 s4, s1, 14 1574; GFX10PLUS-NEXT: s_mov_b32 s5, 0 1575; GFX10PLUS-NEXT: s_bfe_u64 s[0:1], s[0:1], 0x2e0000 1576; GFX10PLUS-NEXT: s_or_b64 s[2:3], s[2:3], s[4:5] 1577; GFX10PLUS-NEXT: s_mov_b32 s6, s5 1578; GFX10PLUS-NEXT: s_bfe_i64 s[2:3], s[2:3], 0x10000 1579; GFX10PLUS-NEXT: s_lshl_b32 s7, s2, 14 1580; GFX10PLUS-NEXT: s_ashr_i64 s[2:3], s[2:3], 18 1581; GFX10PLUS-NEXT: s_or_b64 s[0:1], s[0:1], s[6:7] 1582; GFX10PLUS-NEXT: ; return to shader part epilog 1583 %shl = shl i65 %value, 18 1584 %ashr = ashr i65 %shl, 18 1585 ret i65 %ashr 1586} 1587 1588define amdgpu_ps i65 @s_sext_inreg_i65_33(i65 inreg %value) { 1589; GCN-LABEL: s_sext_inreg_i65_33: 1590; GCN: ; %bb.0: 1591; GCN-NEXT: s_lshl_b32 s3, s2, 1 1592; GCN-NEXT: s_mov_b32 s2, 0 1593; GCN-NEXT: s_lshr_b64 s[4:5], s[0:1], 31 1594; GCN-NEXT: s_or_b64 s[4:5], s[2:3], s[4:5] 1595; GCN-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000 1596; GCN-NEXT: s_bfe_u32 s0, s0, 0x1f0000 1597; GCN-NEXT: s_mov_b32 s1, s2 1598; GCN-NEXT: s_lshl_b64 s[2:3], s[4:5], 31 1599; GCN-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 1600; GCN-NEXT: s_ashr_i32 s2, s5, 1 1601; GCN-NEXT: ; return to shader part epilog 1602; 1603; GFX10PLUS-LABEL: s_sext_inreg_i65_33: 1604; GFX10PLUS: ; %bb.0: 1605; GFX10PLUS-NEXT: s_lshl_b32 s3, s2, 1 1606; GFX10PLUS-NEXT: s_mov_b32 s2, 0 1607; GFX10PLUS-NEXT: s_lshr_b64 s[4:5], s[0:1], 31 1608; GFX10PLUS-NEXT: s_bfe_u32 s0, s0, 0x1f0000 1609; GFX10PLUS-NEXT: s_or_b64 s[4:5], s[2:3], s[4:5] 1610; GFX10PLUS-NEXT: s_mov_b32 s1, s2 1611; GFX10PLUS-NEXT: s_bfe_i64 s[4:5], s[4:5], 0x10000 1612; GFX10PLUS-NEXT: s_lshl_b64 s[2:3], s[4:5], 31 1613; GFX10PLUS-NEXT: s_or_b64 s[0:1], s[0:1], s[2:3] 1614; GFX10PLUS-NEXT: s_ashr_i32 s2, s5, 1 1615; GFX10PLUS-NEXT: ; return to shader part epilog 1616 %shl = shl i65 %value, 33 1617 %ashr = ashr i65 %shl, 33 1618 ret i65 %ashr 1619} 1620 1621; FIXME: Argument lowering asserts 1622; define <2 x i65> @v_sext_inreg_v2i65_36(<2 x i65> %value) { 1623; %shl = shl <2 x i65> %value, <i65 36, i65 36> 1624; %ashr = ashr <2 x i65> %shl, <i65 36, i65 36> 1625; ret <2 x i65> %ashr 1626; } 1627 1628; define amdgpu_ps <2 x i65> @s_sext_inreg_v2i65_36(<2 x i65> inreg %valuex) { 1629; %shl = shl <2 x i65> %value, <i65 36, i65 36> 1630; %ashr = ashrshl <2 x i65> %shl, <i65 36, i65 36> 1631; ret <2 x i65> %ashr 1632; } 1633;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 1634; GFX10: {{.*}} 1635; GFX11: {{.*}} 1636